diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h index 3b14e2bfc..74bc7ce87 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/AgentDeviceRunnerUITests-Bridging-Header.h @@ -1 +1,2 @@ #import "RunnerObjCExceptionCatcher.h" +#import "RunnerSynthesizedGesture.h" diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerSynthesizedGesture.h b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerSynthesizedGesture.h new file mode 100644 index 000000000..1e1fb4a9e --- /dev/null +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerSynthesizedGesture.h @@ -0,0 +1,19 @@ +#import + +NS_ASSUME_NONNULL_BEGIN + +@interface RunnerSynthesizedGesture : NSObject + ++ (NSString * _Nullable)synthesizeTransformWithApplication:(id)application + x:(double)x + y:(double)y + dx:(double)dx + dy:(double)dy + scale:(double)scale + degrees:(double)degrees + radius:(double)radius + durationMs:(double)durationMs; + +@end + +NS_ASSUME_NONNULL_END diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerSynthesizedGesture.m b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerSynthesizedGesture.m new file mode 100644 index 000000000..483f70864 --- /dev/null +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerSynthesizedGesture.m @@ -0,0 +1,297 @@ +#import "RunnerSynthesizedGesture.h" + +#import +#import +#import + +typedef NSInteger (*RunnerMsgSendInteger)(id, SEL); +typedef id (*RunnerMsgSendInitRecord)(id, SEL, NSString *, NSInteger); +typedef id (*RunnerMsgSendInitPath)(id, SEL, CGPoint, NSTimeInterval); +typedef void (*RunnerMsgSendPathMove)(id, SEL, CGPoint, NSTimeInterval); +typedef void (*RunnerMsgSendPathOffset)(id, SEL, NSTimeInterval); +typedef void (*RunnerMsgSendAddPath)(id, SEL, id); +typedef void (*RunnerMsgSendSetInteger)(id, SEL, NSInteger); +typedef BOOL (*RunnerMsgSendSynthesize)(id, SEL, NSError **); + +typedef struct { + Class recordClass; + Class pathClass; + SEL initRecordSelector; + SEL addPathSelector; + SEL setTargetProcessIDSelector; + SEL synthesizeSelector; + SEL interfaceOrientationSelector; + SEL processIDSelector; + SEL initPathSelector; + SEL moveSelector; + SEL liftSelector; +} RunnerXCTestEventBridge; + +static NSString * _Nullable RunnerResolveXCTestEventBridge( + id application, + RunnerXCTestEventBridge *bridge +); +static NSString * _Nullable RunnerRequireClass(Class cls, NSString *className); +static NSString * _Nullable RunnerRequireSelector(Class cls, SEL selector, NSString *selectorName); +static NSString * _Nullable RunnerRequireApplicationSelector(id application, SEL selector, NSString *selectorName); +static id RunnerPointerPath( + const RunnerXCTestEventBridge *bridge, + CGPoint start, + double x, + double y, + double dx, + double dy, + double scale, + double degrees, + double radius, + double durationMs, + double side +); +static CGPoint RunnerPointerPointAt( + double x, + double y, + double dx, + double dy, + double scale, + double degrees, + double baseRadius, + double t, + double side +); + +@implementation RunnerSynthesizedGesture + ++ (NSString * _Nullable)synthesizeTransformWithApplication:(id)application + x:(double)x + y:(double)y + dx:(double)dx + dy:(double)dy + scale:(double)scale + degrees:(double)degrees + radius:(double)radius + durationMs:(double)durationMs { + @try { + return [self trySynthesizeTransformWithApplication:application + x:x + y:y + dx:dx + dy:dy + scale:scale + degrees:degrees + radius:radius + durationMs:durationMs]; + } @catch (NSException *exception) { + NSString *name = exception.name ?: @"NSException"; + NSString *reason = exception.reason ?: @"private XCTest event synthesis failed"; + return [NSString stringWithFormat:@"%@: %@", name, reason]; + } +} + ++ (NSString * _Nullable)trySynthesizeTransformWithApplication:(id)application + x:(double)x + y:(double)y + dx:(double)dx + dy:(double)dy + scale:(double)scale + degrees:(double)degrees + radius:(double)radius + durationMs:(double)durationMs { + RunnerXCTestEventBridge bridge; + NSString *missing = RunnerResolveXCTestEventBridge(application, &bridge); + if (missing != nil) { + return missing; + } + + NSInteger interfaceOrientation = + ((RunnerMsgSendInteger)objc_msgSend)(application, bridge.interfaceOrientationSelector); + NSInteger targetProcessID = ((RunnerMsgSendInteger)objc_msgSend)(application, bridge.processIDSelector); + if (targetProcessID <= 0) { + return @"private XCTest event synthesis unavailable: could not resolve target process ID"; + } + + id record = ((RunnerMsgSendInitRecord)objc_msgSend)( + [bridge.recordClass alloc], + bridge.initRecordSelector, + @"agent-device-transform", + interfaceOrientation + ); + if (record == nil) { + return @"private XCTest event synthesis failed: could not create event record"; + } + ((RunnerMsgSendSetInteger)objc_msgSend)(record, bridge.setTargetProcessIDSelector, targetProcessID); + + double sides[] = {1.0, -1.0}; + for (int index = 0; index < 2; index += 1) { + double side = sides[index]; + id path = RunnerPointerPath( + &bridge, + RunnerPointerPointAt(x, y, dx, dy, scale, degrees, radius, 0.0, side), + x, + y, + dx, + dy, + scale, + degrees, + radius, + durationMs, + side + ); + if (path == nil) { + return @"private XCTest event synthesis failed: could not create pointer path"; + } + ((RunnerMsgSendAddPath)objc_msgSend)(record, bridge.addPathSelector, path); + } + + NSError *error = nil; + BOOL ok = ((RunnerMsgSendSynthesize)objc_msgSend)(record, bridge.synthesizeSelector, &error); + if (!ok) { + NSString *detail = error.localizedDescription ?: @"synthesizeWithError returned false"; + return [NSString stringWithFormat:@"private XCTest event synthesis failed: %@", detail]; + } + return nil; +} + +static NSString * _Nullable RunnerResolveXCTestEventBridge( + id application, + RunnerXCTestEventBridge *bridge +) { + Class recordClass = NSClassFromString(@"XCSynthesizedEventRecord"); + Class pathClass = NSClassFromString(@"XCPointerEventPath"); + SEL initRecordSelector = NSSelectorFromString(@"initWithName:interfaceOrientation:"); + SEL addPathSelector = NSSelectorFromString(@"addPointerEventPath:"); + SEL setTargetProcessIDSelector = NSSelectorFromString(@"setTargetProcessID:"); + SEL synthesizeSelector = NSSelectorFromString(@"synthesizeWithError:"); + SEL interfaceOrientationSelector = NSSelectorFromString(@"interfaceOrientation"); + SEL processIDSelector = NSSelectorFromString(@"processID"); + SEL initPathSelector = NSSelectorFromString(@"initForTouchAtPoint:offset:"); + SEL moveSelector = NSSelectorFromString(@"moveToPoint:atOffset:"); + SEL liftSelector = NSSelectorFromString(@"liftUpAtOffset:"); + + NSString *missing = RunnerRequireClass(recordClass, @"XCSynthesizedEventRecord"); + if (missing != nil) return missing; + missing = RunnerRequireClass(pathClass, @"XCPointerEventPath"); + if (missing != nil) return missing; + missing = RunnerRequireSelector(recordClass, initRecordSelector, @"initWithName:interfaceOrientation:"); + if (missing != nil) return missing; + missing = RunnerRequireSelector(recordClass, addPathSelector, @"addPointerEventPath:"); + if (missing != nil) return missing; + missing = RunnerRequireSelector(recordClass, setTargetProcessIDSelector, @"setTargetProcessID:"); + if (missing != nil) return missing; + missing = RunnerRequireSelector(recordClass, synthesizeSelector, @"synthesizeWithError:"); + if (missing != nil) return missing; + missing = RunnerRequireSelector(pathClass, initPathSelector, @"initForTouchAtPoint:offset:"); + if (missing != nil) return missing; + missing = RunnerRequireSelector(pathClass, moveSelector, @"moveToPoint:atOffset:"); + if (missing != nil) return missing; + missing = RunnerRequireSelector(pathClass, liftSelector, @"liftUpAtOffset:"); + if (missing != nil) return missing; + missing = RunnerRequireApplicationSelector(application, interfaceOrientationSelector, @"interfaceOrientation"); + if (missing != nil) return missing; + missing = RunnerRequireApplicationSelector(application, processIDSelector, @"processID"); + if (missing != nil) return missing; + + *bridge = (RunnerXCTestEventBridge){ + .recordClass = recordClass, + .pathClass = pathClass, + .initRecordSelector = initRecordSelector, + .addPathSelector = addPathSelector, + .setTargetProcessIDSelector = setTargetProcessIDSelector, + .synthesizeSelector = synthesizeSelector, + .interfaceOrientationSelector = interfaceOrientationSelector, + .processIDSelector = processIDSelector, + .initPathSelector = initPathSelector, + .moveSelector = moveSelector, + .liftSelector = liftSelector, + }; + return nil; +} + +static NSString * _Nullable RunnerRequireClass(Class cls, NSString *className) { + if (cls == Nil) { + return [NSString stringWithFormat:@"private XCTest event synthesis unavailable: missing %@", className]; + } + return nil; +} + +static NSString * _Nullable RunnerRequireSelector(Class cls, SEL selector, NSString *selectorName) { + if (![cls instancesRespondToSelector:selector]) { + return [NSString stringWithFormat: + @"private XCTest event synthesis unavailable: %@ missing %@", + NSStringFromClass(cls), + selectorName + ]; + } + return nil; +} + +static NSString * _Nullable RunnerRequireApplicationSelector( + id application, + SEL selector, + NSString *selectorName +) { + if (![application respondsToSelector:selector]) { + return [NSString stringWithFormat: + @"private XCTest event synthesis unavailable: XCUIApplication missing %@", + selectorName + ]; + } + return nil; +} + +static id RunnerPointerPath( + const RunnerXCTestEventBridge *bridge, + CGPoint start, + double x, + double y, + double dx, + double dy, + double scale, + double degrees, + double radius, + double durationMs, + double side +) { + id path = + ((RunnerMsgSendInitPath)objc_msgSend)([bridge->pathClass alloc], bridge->initPathSelector, start, 0.0); + if (path == nil) { + return nil; + } + + int frameCount = MAX(3, (int)(durationMs / 16.0)); + NSTimeInterval durationSeconds = durationMs / 1000.0; + for (int index = 1; index <= frameCount; index += 1) { + double t = (double)index / (double)frameCount; + CGPoint point = RunnerPointerPointAt(x, y, dx, dy, scale, degrees, radius, t, side); + NSTimeInterval offset = durationSeconds * t; + ((RunnerMsgSendPathMove)objc_msgSend)(path, bridge->moveSelector, point, offset); + } + + ((RunnerMsgSendPathOffset)objc_msgSend)(path, bridge->liftSelector, durationSeconds); + return path; +} + +static CGPoint RunnerPointerPointAt( + double x, + double y, + double dx, + double dy, + double scale, + double degrees, + double baseRadius, + double t, + double side +) { + double centerX = x + dx * t; + double centerY = y + dy * t; + double startRadius = baseRadius / MAX(scale, 1.0); + double endRadius = baseRadius; + if (scale < 1.0) { + startRadius = baseRadius; + endRadius = baseRadius * scale; + } + double radius = startRadius + (endRadius - startRadius) * t; + double angle = (-M_PI_2) + (degrees * M_PI / 180.0) * t; + return CGPointMake(centerX + cos(angle) * radius * side, centerY + sin(angle) * radius * side); +} + +@end diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift index 778b81a13..efeae75a9 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift @@ -1281,28 +1281,21 @@ extension RunnerTests { durationMs: Double ) -> RunnerInteractionOutcome { #if os(iOS) - let holdDuration = max(0.02, min(durationMs / 1000.0, 10.0) / 3.0) - let panOutcome = performCoordinateDrag( - app: app, + let target = interactionRoot(app: app) + if let message = RunnerSynthesizedGesture.synthesizeTransform( + withApplication: app, x: x, y: y, - x2: x + dx, - y2: y + dy, - holdDuration: holdDuration - ) - guard case .performed = panOutcome else { - return panOutcome - } - - let target = gestureElement(app: app, x: x, y: y) - target.pinch(withScale: CGFloat(scale), velocity: CGFloat(scale >= 1.0 ? 1.0 : -1.0)) - return performCoordinateRotateGesture( - app: app, + dx: dx, + dy: dy, + scale: scale, degrees: degrees, - x: x, - y: y, - velocity: degrees >= 0 ? 1.0 : -1.0 - ) + radius: transformGestureRadius(frame: target.frame, scale: scale), + durationMs: durationMs + ) { + return .unsupported(message) + } + return .performed #elseif os(tvOS) return .unsupported("transformGesture is not supported on tvOS") #else @@ -1310,6 +1303,14 @@ extension RunnerTests { #endif } + private func transformGestureRadius(frame: CGRect, scale: Double) -> Double { + let shorterSide = Double(min(frame.width, frame.height)) + let frameRadius = shorterSide * 0.20 + let minimumEndRadius = shorterSide * 0.08 + let scaleAdjustedRadius = scale < 1.0 ? max(frameRadius, minimumEndRadius / scale) : frameRadius + return min(max(scaleAdjustedRadius, 48.0), shorterSide * 0.35) + } + private func performCoordinatePinch(app: XCUIApplication, scale: Double, x: Double?, y: Double?) -> RunnerInteractionOutcome { #if os(tvOS) return .unsupported("pinch is not supported on tvOS") @@ -1361,21 +1362,6 @@ extension RunnerTests { #endif } -#if os(iOS) - private func gestureElement(app: XCUIApplication, x: Double, y: Double) -> XCUIElement { - let point = CGPoint(x: x, y: y) - let matches = app.descendants(matching: .any).allElementsBoundByIndex.filter { element in - element.exists && element.frame.contains(point) && !element.frame.isEmpty - } - if let smallest = matches.min(by: { left, right in - (left.frame.width * left.frame.height) < (right.frame.width * right.frame.height) - }) { - return smallest - } - return interactionRoot(app: app) - } -#endif - private func interactionRoot(app: XCUIApplication) -> XCUIElement { let windows = app.windows.allElementsBoundByIndex if let window = windows.first(where: { $0.exists && !$0.frame.isEmpty }) { diff --git a/src/utils/__tests__/args.test.ts b/src/utils/__tests__/args.test.ts index a88aa1780..0e355a489 100644 --- a/src/utils/__tests__/args.test.ts +++ b/src/utils/__tests__/args.test.ts @@ -965,6 +965,10 @@ test('usageForCommand resolves workflow help topic', () => { ); assert.match(help, /agent-device clipboard write "some text"/); assert.match(help, /For gesture-heavy iOS simulator proof videos, prefer --hide-touches/); + assert.match( + help, + /iOS simulator transform uses private XCTest synthesis for a continuous two-finger pan\/scale\/rotation path/, + ); assert.match(help, /Android Gboard handwriting\/stylus UI can capture text/); assert.match(help, /targetInput\/actualInput details/); assert.match(help, /Do not keep retrying fill\/type against the same field/); diff --git a/src/utils/command-schema.ts b/src/utils/command-schema.ts index a557bda99..d36de8e93 100644 --- a/src/utils/command-schema.ts +++ b/src/utils/command-schema.ts @@ -236,7 +236,7 @@ Command shape: Snapshot refs look like @e12. After snapshot -i, use the exact @eN ref from that output. If the exact ref is not known yet, first output snapshot -i, then use a concrete example shape like press @e12 in the next command; do not write @, @ref, @Label_Name, or @eN placeholders. Close means agent-device close. App-owned back means back; system back means back --system. - Taps are press or click. Gestures use swipe, longpress, or gesture . Android pinch, rotate, and transform use provider-native touch injection when available, then the bundled multi-touch helper. + Taps are press or click. Gestures use swipe, longpress, or gesture . Android pinch, rotate, and transform use provider-native touch injection when available, then the bundled multi-touch helper. iOS simulator transform uses private XCTest synthesis for a continuous two-finger pan/scale/rotation path; otherwise it reports UNSUPPORTED_OPERATION. Bootstrap: agent-device devices --platform ios @@ -323,7 +323,7 @@ Navigation and gestures: agent-device gesture pinch 0.5 200 400 agent-device gesture rotate 35 200 420 agent-device gesture transform 200 420 80 -40 2 35 700 - iOS simulator transform uses XCTest gesture primitives; verify app metrics instead of assuming requested degrees map exactly to recognizer output. + iOS simulator transform uses private XCTest synthesis for a continuous two-finger pan/scale/rotation path; verify app metrics instead of assuming requested values map exactly to recognizer output. Android transform injects a geometric two-finger path; app recognizers may report non-exact pan/scale/rotation. For Android combined transforms, verify qualitative state such as "pan changed yes" / "pinch changed yes" / "rotate changed yes" unless the app explicitly promises exact centroid metrics. If Android needs exact app-state values, prefer isolated gesture pan, gesture pinch, or gesture rotate commands over one combined transform. diff --git a/test/skillgym/suites/agent-device-smoke-suite.ts b/test/skillgym/suites/agent-device-smoke-suite.ts index 493617877..e5e2d9925 100644 --- a/test/skillgym/suites/agent-device-smoke-suite.ts +++ b/test/skillgym/suites/agent-device-smoke-suite.ts @@ -1525,7 +1525,7 @@ const SKILL_GUIDANCE_CASES: Case[] = [ 'Platform: Android', 'Current screen: gesture lab', 'Target center is x=200 y=420', - 'Need one continuous two-finger gesture without lifting fingers', + 'Need the direct transform command rather than separate gesture commands', 'Pan delta is dx=80 dy=-40', 'Zoom scale is 2', 'Rotation is 35 degrees', @@ -1551,6 +1551,28 @@ const SKILL_GUIDANCE_CASES: Case[] = [ /wait\s+["']?rotate\s+\d/i, ], }), + makeCase({ + id: 'ios-simulator-gesture-transform', + contract: [ + 'Platform: iOS simulator', + 'Current screen: gesture lab', + 'Target center is x=200 y=420', + 'Need one continuous two-finger gesture without lifting fingers', + 'Pan delta is dx=80 dy=-40', + 'Zoom scale is 2', + 'Rotation is 35 degrees', + 'Duration is 700ms', + ], + task: 'Plan the direct agent-device command for the combined pan, zoom, and rotate gesture.', + outputs: [plannedCommand('gesture transform'), /200\s+420\s+80\s+-40\s+2\s+35\s+700/i], + forbiddenOutputs: [ + plannedCommand('gesture pan'), + plannedCommand('gesture pinch'), + plannedCommand('gesture rotate'), + plannedCommand('rotate-gesture'), + plannedCommand('swipe'), + ], + }), makeCase({ id: 'settings-animation-stabilizer', contract: [ diff --git a/website/docs/docs/commands.md b/website/docs/docs/commands.md index c1d922c2a..1deef5955 100644 --- a/website/docs/docs/commands.md +++ b/website/docs/docs/commands.md @@ -280,7 +280,7 @@ On iOS, swipe duration is clamped to a safe range (`16..60ms`) to avoid longpres `gesture fling` accepts `up|down|left|right x y [distance] [durationMs]` for fast directional throws. `gesture rotate` accepts `degrees [x] [y] [velocity]`; the degree sign controls direction and velocity controls speed. `gesture transform` accepts `x y dx dy scale degrees [durationMs]` for one combined pan/zoom/rotate gesture on Android and iOS simulators. -On iOS simulators it is implemented with XCTest gesture primitives, so verify app-level metrics instead of assuming the requested degrees map exactly to recognizer output. +On iOS simulators it uses private XCTest synthesis for a continuous two-finger pan/scale/rotation path, so verify app-level metrics instead of assuming the requested values map exactly to recognizer output. On Android, `gesture transform` injects a geometric two-finger path. App recognizers may report non-exact pan, scale, and rotation values, so verify qualitative state such as `pan changed yes`, `pinch changed yes`, and `rotate changed yes` unless the app explicitly promises exact centroid metrics. If exact app-state values matter, prefer isolated `gesture pan`, `gesture pinch`, or `gesture rotate` commands. `scroll` accepts either a relative amount (`0.5` means roughly half of the viewport on that axis) or `--pixels ` for a fixed-distance gesture. Large distances are clamped to the usable drag band so the gesture stays reliable across Android, iOS, and macOS. Default snapshot text output is visible-first, so off-screen interactive content is summarized instead of shown as tappable refs.