From e3ca3c23093caef4ed02b8f511a3662627f26915 Mon Sep 17 00:00:00 2001 From: DjDeveloperr Date: Wed, 6 May 2026 14:06:49 -0400 Subject: [PATCH] Improve Android WebRTC input and encoding performance --- README.md | 16 +- cli/native/XCWNativeBridge.h | 5 + cli/native/XCWNativeBridge.m | 234 +++ client/src/api/types.ts | 7 + client/src/app/AppShell.tsx | 6 +- .../features/simulators/simulatorDisplay.ts | 3 + client/src/features/viewport/DeviceChrome.tsx | 51 +- .../features/viewport/SimulatorViewport.tsx | 3 + client/src/styles/components.css | 48 + docs/api/rest.md | 65 +- docs/cli/commands.md | 15 +- docs/extensions/browser-client.md | 6 +- docs/guide/architecture.md | 18 +- docs/guide/installation.md | 1 + server/Cargo.lock | 286 +++- server/Cargo.toml | 3 + server/src/android.rs | 1355 +++++++++++++++++ server/src/api/routes.rs | 692 ++++++++- server/src/main.rs | 193 ++- server/src/native/ffi.rs | 17 + server/src/transport/webrtc.rs | 662 +++++++- skills/simdeck/SKILL.md | 11 +- 22 files changed, 3568 insertions(+), 129 deletions(-) create mode 100644 server/src/android.rs diff --git a/README.md b/README.md index afb7a70f..60217875 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@

SimDeck is a developer tool built for streamlining mobile app development for coding agents. - Drive Simulator from the CLI using agents, browser, and automated tests on macOS. + Drive iOS Simulators and Android emulators from the CLI using agents, browser, and automated tests on macOS.

@@ -35,8 +35,9 @@ view inside the editor. ## Features -- Local simulator video stream over browser-native WebRTC H.264 with H.264 WebSocket fallback -- Full simulator control & inspection using private accessibility APIs - available using `simdeck` CLI +- Local iOS Simulator and Android emulator video over browser-native WebRTC H.264 with H.264 WebSocket fallback +- Android emulator frames are sourced from emulator gRPC and encoded through macOS VideoToolbox +- Full simulator control & inspection using private iOS accessibility APIs and Android UIAutomator - available using `simdeck` CLI - Real-time screen `describe` command using accessibility view tree - available in token-efficient format for agents - CoreSimulator chrome asset rendering for device bezels - NativeScript, React Native, UIKit and SwiftUI runtime inspector plugins to view app's view hierarchy live @@ -138,6 +139,7 @@ simdeck boot simdeck shutdown simdeck erase simdeck install /path/to/App.app +simdeck install android: /path/to/app.apk simdeck uninstall com.example.App simdeck open-url https://example.com simdeck launch com.apple.Preferences @@ -179,6 +181,14 @@ simdeck logs --seconds 30 --limit 200 without launching Simulator.app, then falls back to `xcrun simctl` when private booting is unavailable. +Android emulators appear in `simdeck list` with IDs like +`android:SimDeck_Pixel_8_API_36`. For Android IDs, lifecycle, install, launch, +URL, screenshot, logs, UIAutomator `describe`, tap, swipe, text, key, home, app +switcher, rotation, pasteboard, and browser live view route through the Android +SDK tools (`emulator` and `adb`) plus the emulator gRPC screenshot stream for +live video. `simdeck stream` remains iOS-only because it writes the iOS H.264 +transport stream. + `stream` writes an Annex B H.264 elementary stream to stdout for diagnostics or external tools such as `ffplay`. diff --git a/cli/native/XCWNativeBridge.h b/cli/native/XCWNativeBridge.h index a7d81d61..ff4ac7f1 100644 --- a/cli/native/XCWNativeBridge.h +++ b/cli/native/XCWNativeBridge.h @@ -89,6 +89,11 @@ bool xcw_native_session_rotate_right(void * _Nonnull handle, char * _Nullable * bool xcw_native_session_rotate_left(void * _Nonnull handle, char * _Nullable * _Nullable error_message); void xcw_native_session_set_frame_callback(void * _Nonnull handle, xcw_native_frame_callback _Nullable callback, void * _Nullable user_data); +void * _Nullable xcw_native_h264_encoder_create(xcw_native_frame_callback _Nullable callback, void * _Nullable user_data, char * _Nullable * _Nullable error_message); +void xcw_native_h264_encoder_destroy(void * _Nullable handle); +bool xcw_native_h264_encoder_encode_rgba(void * _Nonnull handle, const uint8_t * _Nonnull rgba, size_t length, uint32_t width, uint32_t height, uint64_t timestamp_us, char * _Nullable * _Nullable error_message); +void xcw_native_h264_encoder_request_keyframe(void * _Nonnull handle); + void xcw_native_free_string(char * _Nullable value); void xcw_native_free_bytes(xcw_native_owned_bytes bytes); void xcw_native_release_shared_bytes(xcw_native_shared_bytes bytes); diff --git a/cli/native/XCWNativeBridge.m b/cli/native/XCWNativeBridge.m index 3fc2376a..e93241e7 100644 --- a/cli/native/XCWNativeBridge.m +++ b/cli/native/XCWNativeBridge.m @@ -3,11 +3,13 @@ #import "DFPrivateSimulatorDisplayBridge.h" #import "XCWAccessibilityBridge.h" #import "XCWChromeRenderer.h" +#import "XCWH264Encoder.h" #import "XCWNativeSession.h" #import "XCWSimctl.h" #import #import +#import #include #include @@ -63,10 +65,190 @@ static xcw_native_owned_bytes XCWOwnedBytesFromData(NSData *data) { return bytes; } +static xcw_native_shared_bytes XCWSharedBytesFromData(NSData *data) { + if (data.length == 0) { + return (xcw_native_shared_bytes){0}; + } + + CFTypeRef owner = CFRetain((__bridge CFTypeRef)data); + return (xcw_native_shared_bytes){ + .data = data.bytes, + .length = data.length, + .owner = (const void *)owner, + }; +} + static XCWNativeSession *XCWNativeSessionFromHandle(void *handle) { return (__bridge XCWNativeSession *)handle; } +@interface XCWNativeH264Encoder : NSObject + +- (instancetype)initWithFrameCallback:(xcw_native_frame_callback)callback + userData:(void *)userData; +- (BOOL)encodeRGBA:(const uint8_t *)rgba + length:(size_t)length + width:(uint32_t)width + height:(uint32_t)height + error:(NSError * _Nullable __autoreleasing *)error; +- (void)requestKeyFrame; +- (void)invalidate; + +@end + +@implementation XCWNativeH264Encoder { + XCWH264Encoder *_encoder; + xcw_native_frame_callback _callback; + void *_callbackUserData; + uint64_t _frameSequence; +} + +- (instancetype)initWithFrameCallback:(xcw_native_frame_callback)callback + userData:(void *)userData { + self = [super init]; + if (self == nil) { + return nil; + } + + _callback = callback; + _callbackUserData = userData; + __weak typeof(self) weakSelf = self; + @synchronized (XCWNativeH264Encoder.class) { + const char *previousCodec = getenv("SIMDECK_VIDEO_CODEC"); + char *previousCodecCopy = previousCodec != NULL ? strdup(previousCodec) : NULL; + const char *androidCodec = getenv("SIMDECK_ANDROID_VIDEO_CODEC"); + if (androidCodec == NULL || strlen(androidCodec) == 0) { + androidCodec = "software"; + } + setenv("SIMDECK_VIDEO_CODEC", androidCodec, 1); + _encoder = [[XCWH264Encoder alloc] initWithOutputHandler:^(NSData *sampleData, + uint64_t timestampUs, + BOOL isKeyFrame, + NSString * _Nullable codec, + NSData * _Nullable decoderConfig, + CGSize dimensions) { + __strong typeof(weakSelf) strongSelf = weakSelf; + if (strongSelf == nil || strongSelf->_callback == NULL || sampleData.length == 0) { + return; + } + strongSelf->_frameSequence += 1; + xcw_native_frame frame = { + .frame_sequence = strongSelf->_frameSequence, + .timestamp_us = timestampUs, + .is_keyframe = isKeyFrame, + .width = (uint32_t)llround(dimensions.width), + .height = (uint32_t)llround(dimensions.height), + .codec = codec.UTF8String, + .description = XCWSharedBytesFromData(decoderConfig), + .data = XCWSharedBytesFromData(sampleData), + }; + strongSelf->_callback(&frame, strongSelf->_callbackUserData); + }]; + if (previousCodecCopy != NULL) { + setenv("SIMDECK_VIDEO_CODEC", previousCodecCopy, 1); + free(previousCodecCopy); + } else { + unsetenv("SIMDECK_VIDEO_CODEC"); + } + } + return self; +} + +- (void)dealloc { + [self invalidate]; +} + +- (BOOL)encodeRGBA:(const uint8_t *)rgba + length:(size_t)length + width:(uint32_t)width + height:(uint32_t)height + error:(NSError * _Nullable __autoreleasing *)error { + if (rgba == NULL || width == 0 || height == 0) { + if (error != NULL) { + *error = [NSError errorWithDomain:@"SimDeck.NativeH264Encoder" + code:1 + userInfo:@{ NSLocalizedDescriptionKey: @"RGBA frame input was empty." }]; + } + return NO; + } + size_t expectedLength = (size_t)width * (size_t)height * 4; + if (length < expectedLength) { + if (error != NULL) { + *error = [NSError errorWithDomain:@"SimDeck.NativeH264Encoder" + code:2 + userInfo:@{ NSLocalizedDescriptionKey: @"RGBA frame input was truncated." }]; + } + return NO; + } + + NSDictionary *attributes = @{ + (__bridge NSString *)kCVPixelBufferPixelFormatTypeKey: @(kCVPixelFormatType_32BGRA), + (__bridge NSString *)kCVPixelBufferWidthKey: @(width), + (__bridge NSString *)kCVPixelBufferHeightKey: @(height), + (__bridge NSString *)kCVPixelBufferIOSurfacePropertiesKey: @{}, + }; + CVPixelBufferRef pixelBuffer = NULL; + CVReturn createStatus = CVPixelBufferCreate(kCFAllocatorDefault, + (size_t)width, + (size_t)height, + kCVPixelFormatType_32BGRA, + (__bridge CFDictionaryRef)attributes, + &pixelBuffer); + if (createStatus != kCVReturnSuccess || pixelBuffer == NULL) { + if (error != NULL) { + *error = [NSError errorWithDomain:@"SimDeck.NativeH264Encoder" + code:createStatus + userInfo:@{ NSLocalizedDescriptionKey: @"Unable to allocate a VideoToolbox pixel buffer." }]; + } + return NO; + } + + CVReturn lockStatus = CVPixelBufferLockBaseAddress(pixelBuffer, 0); + if (lockStatus != kCVReturnSuccess) { + CVPixelBufferRelease(pixelBuffer); + if (error != NULL) { + *error = [NSError errorWithDomain:@"SimDeck.NativeH264Encoder" + code:lockStatus + userInfo:@{ NSLocalizedDescriptionKey: @"Unable to lock a VideoToolbox pixel buffer." }]; + } + return NO; + } + + uint8_t *dst = CVPixelBufferGetBaseAddress(pixelBuffer); + size_t dstRowBytes = CVPixelBufferGetBytesPerRow(pixelBuffer); + size_t srcRowBytes = (size_t)width * 4; + for (uint32_t y = 0; y < height; y += 1) { + const uint8_t *srcRow = rgba + ((size_t)y * srcRowBytes); + uint8_t *dstRow = dst + ((size_t)y * dstRowBytes); + for (uint32_t x = 0; x < width; x += 1) { + const uint8_t *src = srcRow + ((size_t)x * 4); + uint8_t *pixel = dstRow + ((size_t)x * 4); + pixel[0] = src[2]; + pixel[1] = src[1]; + pixel[2] = src[0]; + pixel[3] = src[3]; + } + } + CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); + [_encoder encodePixelBuffer:pixelBuffer]; + CVPixelBufferRelease(pixelBuffer); + return YES; +} + +- (void)requestKeyFrame { + [_encoder requestKeyFrame]; +} + +- (void)invalidate { + [_encoder invalidate]; +} + +@end + +static XCWNativeH264Encoder *XCWNativeH264EncoderFromHandle(void *handle) { + return (__bridge XCWNativeH264Encoder *)handle; +} + static BOOL XCWPerformSimctlAction(char **errorMessage, BOOL (^action)(XCWSimctl *simctl, NSError **error)) { XCWSimctl *simctl = [[XCWSimctl alloc] init]; NSError *error = nil; @@ -889,6 +1071,58 @@ void xcw_native_session_set_frame_callback(void *handle, xcw_native_frame_callba } } +void *xcw_native_h264_encoder_create(xcw_native_frame_callback callback, void *user_data, char **error_message) { + @autoreleasepool { + XCWNativeH264Encoder *encoder = [[XCWNativeH264Encoder alloc] initWithFrameCallback:callback + userData:user_data]; + if (encoder == nil) { + if (error_message != NULL) { + *error_message = XCWCopyCString(@"Unable to create the native H.264 encoder."); + } + return NULL; + } + return (__bridge_retained void *)encoder; + } +} + +void xcw_native_h264_encoder_destroy(void *handle) { + if (handle == NULL) { + return; + } + @autoreleasepool { + XCWNativeH264Encoder *encoder = CFBridgingRelease(handle); + [encoder invalidate]; + } +} + +bool xcw_native_h264_encoder_encode_rgba(void *handle, + const uint8_t *rgba, + size_t length, + uint32_t width, + uint32_t height, + uint64_t timestamp_us, + char **error_message) { + (void)timestamp_us; + @autoreleasepool { + NSError *error = nil; + BOOL ok = [XCWNativeH264EncoderFromHandle(handle) encodeRGBA:rgba + length:length + width:width + height:height + error:&error]; + if (!ok) { + XCWSetErrorMessage(error_message, error); + } + return ok; + } +} + +void xcw_native_h264_encoder_request_keyframe(void *handle) { + @autoreleasepool { + [XCWNativeH264EncoderFromHandle(handle) requestKeyFrame]; + } +} + void xcw_native_free_string(char *value) { if (value != NULL) { free(value); diff --git a/client/src/api/types.ts b/client/src/api/types.ts index e45e625f..a45bd0e8 100644 --- a/client/src/api/types.ts +++ b/client/src/api/types.ts @@ -28,11 +28,17 @@ export interface PrivateDisplayInfo { export interface SimulatorMetadata { udid: string; name: string; + platform?: "ios-simulator" | "android-emulator" | string; runtimeName?: string; runtimeIdentifier?: string; deviceTypeName?: string; deviceTypeIdentifier?: string; isBooted: boolean; + android?: { + avdName?: string; + grpcPort?: number; + serial?: string; + }; privateDisplay?: PrivateDisplayInfo; } @@ -61,6 +67,7 @@ export interface ChromeProfile { screenWidth: number; screenHeight: number; cornerRadius: number; + chromeStyle?: "asset" | "css-android" | string; hasScreenMask?: boolean; buttons?: ChromeButtonProfile[]; } diff --git a/client/src/app/AppShell.tsx b/client/src/app/AppShell.tsx index 16cd0b20..a0231ed2 100644 --- a/client/src/app/AppShell.tsx +++ b/client/src/app/AppShell.tsx @@ -737,9 +737,12 @@ export function AppShell({ : "", [selectedSimulator?.udid, streamStamp], ); + const chromeUsesAsset = Boolean( + viewportChromeProfile && viewportChromeProfile.chromeStyle !== "css-android", + ); const chromeRequired = Boolean( (shouldRenderChrome && !chromeProfileReady) || - (viewportChromeProfile && chromeUrl), + (chromeUsesAsset && chromeUrl), ); const simulatorRotationQuarterTurns = normalizeSimulatorRotationQuarterTurns(selectedSimulator); @@ -1935,6 +1938,7 @@ export function AppShell({ chromeProfile={viewportChromeProfile} chromeRequired={chromeRequired} chromeScreenStyle={viewportScreenStyle} + chromeStyle={viewportChromeProfile?.chromeStyle} chromeUrl={chromeUrl} chromeButtonUrl={chromeButtonUrl} debugPanel={ diff --git a/client/src/features/simulators/simulatorDisplay.ts b/client/src/features/simulators/simulatorDisplay.ts index 0837ef20..dc2cd995 100644 --- a/client/src/features/simulators/simulatorDisplay.ts +++ b/client/src/features/simulators/simulatorDisplay.ts @@ -14,6 +14,9 @@ export function simulatorRuntimeLabel(simulator: SimulatorMetadata): string { export function shouldRenderNativeChrome( simulator: SimulatorMetadata, ): boolean { + if (simulator.platform === "android-emulator") { + return true; + } const identifier = simulator.deviceTypeIdentifier ?? ""; const name = simulator.name ?? ""; const deviceTypeName = simulator.deviceTypeName ?? ""; diff --git a/client/src/features/viewport/DeviceChrome.tsx b/client/src/features/viewport/DeviceChrome.tsx index 9ac199de..b5455c38 100644 --- a/client/src/features/viewport/DeviceChrome.tsx +++ b/client/src/features/viewport/DeviceChrome.tsx @@ -17,6 +17,7 @@ interface DeviceChromeProps { accessibilitySelectedId: string; chromeProfile: ChromeProfile | null; chromeScreenStyle: CSSProperties | null; + chromeStyle?: string; chromeUrl: string; chromeButtonUrl: (button: string, pressed?: boolean) => string; hasFrame: boolean; @@ -60,6 +61,7 @@ export function DeviceChrome({ accessibilitySelectedId, chromeProfile, chromeScreenStyle, + chromeStyle, chromeUrl, chromeButtonUrl, hasFrame, @@ -91,35 +93,42 @@ export function DeviceChrome({ useChromeProfile, }: DeviceChromeProps) { if (useChromeProfile) { + const useCssAndroidChrome = chromeStyle === "css-android"; return (
- - - + {useCssAndroidChrome ? ( +