From 14eaf6eeea92d057cf94f9d08176c972f4428d39 Mon Sep 17 00:00:00 2001
From: Carlo Zottmann <carlo@zottmann.dev>
Date: Sat, 28 Feb 2026 16:30:41 +0100
Subject: [PATCH] fix: classify stderr warnings correctly and prioritize
 xcresult output

When xcresult parsing succeeds and tests actually ran (totalTestCount > 0),
stderr lines are redundant noise (e.g. "multiple matching destinations")
and are filtered out. The xcresult summary is placed first in the response.

When xcresult reports 0 tests (build failed before tests could run), the
xcresult is meaningless and stderr is preserved since it contains the
actual compilation errors.

Fixes #231
---
 CHANGELOG.md                                  |   6 +
 .../device/__tests__/test_device.test.ts      |  84 +++++++++--
 src/mcp/tools/device/test_device.ts           |  32 +++-
 .../tools/macos/__tests__/test_macos.test.ts  | 137 ++++++++++++++++++
 src/mcp/tools/macos/test_macos.ts             |  33 ++++-
 src/utils/test-common.ts                      |  31 +++-
 6 files changed, 295 insertions(+), 28 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 33ee66a5..3629949c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## [Unreleased]
+
+### Fixed
+
+- Fixed stderr warnings (e.g. "multiple matching destinations") hiding actual test failures by prioritizing xcresult output when available ([#231](https://github.com/getsentry/XcodeBuildMCP/issues/231))
+
 ## [2.1.0]
 
 ### Added
diff --git a/src/mcp/tools/device/__tests__/test_device.test.ts b/src/mcp/tools/device/__tests__/test_device.test.ts
index 1179dda4..6851a35b 100644
--- a/src/mcp/tools/device/__tests__/test_device.test.ts
+++ b/src/mcp/tools/device/__tests__/test_device.test.ts
@@ -168,9 +168,9 @@ describe('test_device plugin', () => {
       );
 
       expect(result.content).toHaveLength(2);
-      expect(result.content[0].text).toContain('✅');
-      expect(result.content[1].text).toContain('Test Results Summary:');
-      expect(result.content[1].text).toContain('MyScheme Tests');
+      expect(result.content[0].text).toContain('Test Results Summary:');
+      expect(result.content[0].text).toContain('MyScheme Tests');
+      expect(result.content[1].text).toContain('✅');
     });
 
     it('should handle test failure scenarios', async () => {
@@ -214,8 +214,8 @@ describe('test_device plugin', () => {
       );
 
       expect(result.content).toHaveLength(2);
-      expect(result.content[1].text).toContain('Test Failures:');
-      expect(result.content[1].text).toContain('testExample');
+      expect(result.content[0].text).toContain('Test Failures:');
+      expect(result.content[0].text).toContain('testExample');
     });
 
     it('should handle xcresult parsing failures gracefully', async () => {
@@ -264,6 +264,70 @@ describe('test_device plugin', () => {
       expect(result.content[0].text).toContain('✅');
     });
 
+    it('should preserve stderr when xcresult reports zero tests (build failure)', async () => {
+      // When the build fails, xcresult exists but has totalTestCount: 0.
+      // stderr contains the actual compilation errors and must be preserved.
+      let callCount = 0;
+      const mockExecutor = async (
+        _args: string[],
+        _description?: string,
+        _useShell?: boolean,
+        _opts?: { cwd?: string },
+        _detached?: boolean,
+      ) => {
+        callCount++;
+
+        // First call: xcodebuild test fails with compilation error
+        if (callCount === 1) {
+          return createMockCommandResponse({
+            success: false,
+            output: '',
+            error: 'error: missing argument for parameter in call',
+          });
+        }
+
+        // Second call: xcresulttool succeeds but reports 0 tests
+        return createMockCommandResponse({
+          success: true,
+          output: JSON.stringify({
+            title: 'Test Results',
+            result: 'unknown',
+            totalTestCount: 0,
+            passedTests: 0,
+            failedTests: 0,
+            skippedTests: 0,
+            expectedFailures: 0,
+          }),
+        });
+      };
+
+      const result = await testDeviceLogic(
+        {
+          projectPath: '/path/to/project.xcodeproj',
+          scheme: 'MyScheme',
+          deviceId: 'test-device-123',
+          configuration: 'Debug',
+          preferXcodebuild: false,
+          platform: 'iOS',
+        },
+        mockExecutor,
+        createMockFileSystemExecutor({
+          mkdtemp: async () => '/tmp/xcodebuild-test-buildfail',
+          tmpdir: () => '/tmp',
+          stat: async () => ({ isDirectory: () => false, mtimeMs: 0 }),
+          rm: async () => {},
+        }),
+      );
+
+      // stderr with compilation error must be preserved
+      const allText = result.content.map((c) => c.text).join('\n');
+      expect(allText).toContain('[stderr]');
+      expect(allText).toContain('missing argument');
+
+      // xcresult summary should NOT be present
+      expect(allText).not.toContain('Test Results Summary:');
+    });
+
     it('should support different platforms', async () => {
       // Mock xcresulttool output
       const mockExecutor = createMockExecutor({
@@ -298,7 +362,7 @@ describe('test_device plugin', () => {
       );
 
       expect(result.content).toHaveLength(2);
-      expect(result.content[1].text).toContain('WatchApp Tests');
+      expect(result.content[0].text).toContain('WatchApp Tests');
     });
 
     it('should handle optional parameters', async () => {
@@ -337,7 +401,7 @@ describe('test_device plugin', () => {
       );
 
       expect(result.content).toHaveLength(2);
-      expect(result.content[0].text).toContain('✅');
+      expect(result.content[1].text).toContain('✅');
     });
 
     it('should handle workspace testing successfully', async () => {
@@ -374,9 +438,9 @@ describe('test_device plugin', () => {
       );
 
       expect(result.content).toHaveLength(2);
-      expect(result.content[0].text).toContain('✅');
-      expect(result.content[1].text).toContain('Test Results Summary:');
-      expect(result.content[1].text).toContain('WorkspaceScheme Tests');
+      expect(result.content[0].text).toContain('Test Results Summary:');
+      expect(result.content[0].text).toContain('WorkspaceScheme Tests');
+      expect(result.content[1].text).toContain('✅');
     });
   });
 });
diff --git a/src/mcp/tools/device/test_device.ts b/src/mcp/tools/device/test_device.ts
index 257c7724..a6e9d042 100644
--- a/src/mcp/tools/device/test_device.ts
+++ b/src/mcp/tools/device/test_device.ts
@@ -76,13 +76,18 @@ const publicSchemaObject = baseSchemaObject.omit({
  * (JavaScript implementation - no actual interface, this is just documentation)
  */
 
+interface XcresultSummary {
+  formatted: string;
+  totalTestCount: number;
+}
+
 /**
  * Parse xcresult bundle using xcrun xcresulttool
  */
 async function parseXcresultBundle(
   resultBundlePath: string,
   executor: CommandExecutor = getDefaultCommandExecutor(),
-): Promise<string> {
+): Promise<XcresultSummary> {
   try {
     // Use injected executor for testing
     const result = await executor(
@@ -98,7 +103,11 @@ async function parseXcresultBundle(
 
     // Parse JSON response and format as human-readable
     const summaryData = JSON.parse(result.output) as Record<string, unknown>;
-    return formatTestSummary(summaryData);
+    return {
+      formatted: formatTestSummary(summaryData),
+      totalTestCount:
+        typeof summaryData.totalTestCount === 'number' ? summaryData.totalTestCount : 0,
+    };
   } catch (error) {
     const errorMessage = error instanceof Error ? error.message : String(error);
     log('error', `Error parsing xcresult bundle: ${errorMessage}`);
@@ -252,20 +261,31 @@ export async function testDeviceLogic(
         throw new Error(`xcresult bundle not found at ${resultBundlePath}`);
       }
 
-      const testSummary = await parseXcresultBundle(resultBundlePath, executor);
+      const xcresult = await parseXcresultBundle(resultBundlePath, executor);
       log('info', 'Successfully parsed xcresult bundle');
 
       // Clean up temporary directory
       await cleanup();
 
-      // Return combined result - preserve isError from testResult (test failures should be marked as errors)
+      // If no tests ran (e.g. build failed), the xcresult is empty/meaningless.
+      // Fall back to the original response which contains the actual build errors.
+      if (xcresult.totalTestCount === 0) {
+        log('info', 'xcresult reports 0 tests — falling back to raw build output');
+        return testResult;
+      }
+
+      // When xcresult has real test data, it's the authoritative source.
+      // Drop stderr lines — they're redundant noise (e.g. "multiple matching destinations").
+      const filteredContent = (testResult.content || []).filter(
+        (item) => item.type !== 'text' || !item.text.includes('[stderr]'),
+      );
       return {
         content: [
-          ...(testResult.content || []),
           {
             type: 'text',
-            text: '\nTest Results Summary:\n' + testSummary,
+            text: '\nTest Results Summary:\n' + xcresult.formatted,
           },
+          ...filteredContent,
         ],
         isError: testResult.isError,
       };
diff --git a/src/mcp/tools/macos/__tests__/test_macos.test.ts b/src/mcp/tools/macos/__tests__/test_macos.test.ts
index 71f6d562..75463ba5 100644
--- a/src/mcp/tools/macos/__tests__/test_macos.test.ts
+++ b/src/mcp/tools/macos/__tests__/test_macos.test.ts
@@ -507,6 +507,143 @@ describe('test_macos plugin (unified)', () => {
       );
     });
 
+    it('should filter out stderr lines when xcresult data is available', async () => {
+      // Regression test for #231: stderr warnings (e.g. "multiple matching destinations")
+      // should be dropped when xcresult parsing succeeds, since xcresult is authoritative.
+      let callCount = 0;
+      const mockExecutor = async (
+        command: string[],
+        logPrefix?: string,
+        useShell?: boolean,
+        opts?: { env?: Record<string, string> },
+        detached?: boolean,
+      ) => {
+        callCount++;
+        void logPrefix;
+        void useShell;
+        void opts;
+        void detached;
+
+        // First call: xcodebuild test fails with stderr warning
+        if (callCount === 1) {
+          return createMockCommandResponse({
+            success: false,
+            output: '',
+            error:
+              'WARNING: multiple matching destinations, using first match\n' + 'error: Test failed',
+          });
+        }
+
+        // Second call: xcresulttool succeeds
+        if (command.includes('xcresulttool')) {
+          return createMockCommandResponse({
+            success: true,
+            output: JSON.stringify({
+              title: 'Test Results',
+              result: 'FAILED',
+              totalTestCount: 5,
+              passedTests: 3,
+              failedTests: 2,
+              skippedTests: 0,
+              expectedFailures: 0,
+            }),
+          });
+        }
+
+        return createMockCommandResponse({ success: true, output: '' });
+      };
+
+      const mockFileSystemExecutor = createTestFileSystemExecutor({
+        mkdtemp: async () => '/tmp/xcodebuild-test-stderr',
+      });
+
+      const result = await testMacosLogic(
+        {
+          workspacePath: '/path/to/MyProject.xcworkspace',
+          scheme: 'MyScheme',
+        },
+        mockExecutor,
+        mockFileSystemExecutor,
+      );
+
+      // stderr lines should be filtered out
+      const allText = result.content.map((c) => c.text).join('\n');
+      expect(allText).not.toContain('[stderr]');
+
+      // xcresult summary should be present and first
+      expect(result.content[0].text).toContain('Test Results Summary:');
+
+      // Build status line should still be present
+      expect(allText).toContain('Test Run test failed for scheme MyScheme');
+    });
+
+    it('should preserve stderr when xcresult reports zero tests (build failure)', async () => {
+      // When the build fails, xcresult exists but has totalTestCount: 0.
+      // In that case stderr contains the actual compilation errors and must be preserved.
+      let callCount = 0;
+      const mockExecutor = async (
+        command: string[],
+        logPrefix?: string,
+        useShell?: boolean,
+        opts?: { env?: Record<string, string> },
+        detached?: boolean,
+      ) => {
+        callCount++;
+        void logPrefix;
+        void useShell;
+        void opts;
+        void detached;
+
+        // First call: xcodebuild test fails with compilation error on stderr
+        if (callCount === 1) {
+          return createMockCommandResponse({
+            success: false,
+            output: '',
+            error: 'error: missing argument for parameter in call',
+          });
+        }
+
+        // Second call: xcresulttool succeeds but reports 0 tests
+        if (command.includes('xcresulttool')) {
+          return createMockCommandResponse({
+            success: true,
+            output: JSON.stringify({
+              title: 'Test Results',
+              result: 'unknown',
+              totalTestCount: 0,
+              passedTests: 0,
+              failedTests: 0,
+              skippedTests: 0,
+              expectedFailures: 0,
+            }),
+          });
+        }
+
+        return createMockCommandResponse({ success: true, output: '' });
+      };
+
+      const mockFileSystemExecutor = createTestFileSystemExecutor({
+        mkdtemp: async () => '/tmp/xcodebuild-test-buildfail',
+      });
+
+      const result = await testMacosLogic(
+        {
+          workspacePath: '/path/to/MyProject.xcworkspace',
+          scheme: 'MyScheme',
+        },
+        mockExecutor,
+        mockFileSystemExecutor,
+      );
+
+      // stderr with compilation error must be preserved (not filtered)
+      const allText = result.content.map((c) => c.text).join('\n');
+      expect(allText).toContain('[stderr]');
+      expect(allText).toContain('missing argument');
+
+      // xcresult summary should NOT be present (it's meaningless with 0 tests)
+      expect(allText).not.toContain('Test Results Summary:');
+    });
+
     it('should return exact exception handling response', async () => {
       // Mock executor (won't be called due to mkdtemp failure)
       const mockExecutor = createMockExecutor({
diff --git a/src/mcp/tools/macos/test_macos.ts b/src/mcp/tools/macos/test_macos.ts
index 09120fd9..67790683 100644
--- a/src/mcp/tools/macos/test_macos.ts
+++ b/src/mcp/tools/macos/test_macos.ts
@@ -86,10 +86,15 @@ export type TestMacosParams = z.infer<typeof testMacosSchema>;
 /**
  * Parse xcresult bundle using xcrun xcresulttool
  */
+interface XcresultSummary {
+  formatted: string;
+  totalTestCount: number;
+}
+
 async function parseXcresultBundle(
   resultBundlePath: string,
   executor: CommandExecutor = getDefaultCommandExecutor(),
-): Promise<string> {
+): Promise<XcresultSummary> {
   try {
     const result = await executor(
       ['xcrun', 'xcresulttool', 'get', 'test-results', 'summary', '--path', resultBundlePath],
@@ -113,7 +118,12 @@ async function parseXcresultBundle(
       throw new Error('Invalid JSON output: expected object');
     }
 
-    return formatTestSummary(summary as Record<string, unknown>);
+    const summaryRecord = summary as Record<string, unknown>;
+    return {
+      formatted: formatTestSummary(summaryRecord),
+      totalTestCount:
+        typeof summaryRecord.totalTestCount === 'number' ? summaryRecord.totalTestCount : 0,
+    };
   } catch (error) {
     const errorMessage = error instanceof Error ? error.message : String(error);
     log('error', `Error parsing xcresult bundle: ${errorMessage}`);
@@ -287,20 +297,31 @@ export async function testMacosLogic(
         throw new Error(`xcresult bundle not found at ${resultBundlePath}`);
       }
 
-      const testSummary = await parseXcresultBundle(resultBundlePath, executor);
+      const xcresult = await parseXcresultBundle(resultBundlePath, executor);
       log('info', 'Successfully parsed xcresult bundle');
 
       // Clean up temporary directory
       await fileSystemExecutor.rm(tempDir, { recursive: true, force: true });
 
-      // Return combined result - preserve isError from testResult (test failures should be marked as errors)
+      // If no tests ran (e.g. build failed), the xcresult is empty/meaningless.
+      // Fall back to the original response which contains the actual build errors.
+      if (xcresult.totalTestCount === 0) {
+        log('info', 'xcresult reports 0 tests — falling back to raw build output');
+        return testResult;
+      }
+
+      // When xcresult has real test data, it's the authoritative source.
+      // Drop stderr lines — they're redundant noise (e.g. "multiple matching destinations").
+      const filteredContent = (testResult.content ?? []).filter(
+        (item) => item.type !== 'text' || !item.text.includes('[stderr]'),
+      );
       return {
         content: [
-          ...(testResult.content ?? []),
           {
             type: 'text',
-            text: '\nTest Results Summary:\n' + testSummary,
+            text: '\nTest Results Summary:\n' + xcresult.formatted,
           },
+          ...filteredContent,
         ],
         isError: testResult.isError,
       };
diff --git a/src/utils/test-common.ts b/src/utils/test-common.ts
index 8703f134..ab60a15d 100644
--- a/src/utils/test-common.ts
+++ b/src/utils/test-common.ts
@@ -56,10 +56,15 @@ interface TestSummary {
   }>;
 }
 
+interface XcresultSummary {
+  formatted: string;
+  totalTestCount: number;
+}
+
 /**
  * Parse xcresult bundle using xcrun xcresulttool
  */
-export async function parseXcresultBundle(resultBundlePath: string): Promise<string> {
+export async function parseXcresultBundle(resultBundlePath: string): Promise<XcresultSummary> {
   try {
     const execAsync = promisify(exec);
     const { stdout } = await execAsync(
@@ -68,7 +73,10 @@ export async function parseXcresultBundle(resultBundlePath: string): Promise<str
 
     // Parse JSON response and format as human-readable
     const summary = JSON.parse(stdout) as TestSummary;
-    return formatTestSummary(summary);
+    return {
+      formatted: formatTestSummary(summary),
+      totalTestCount: summary.totalTestCount ?? 0,
+    };
   } catch (error) {
     const errorMessage = error instanceof Error ? error.message : String(error);
     log('error', `Error parsing xcresult bundle: ${errorMessage}`);
@@ -216,20 +224,31 @@ export async function handleTestLogic(
         throw new Error(`xcresult bundle not found at ${resultBundlePath}`);
       }
 
-      const testSummary = await parseXcresultBundle(resultBundlePath);
+      const xcresult = await parseXcresultBundle(resultBundlePath);
       log('info', 'Successfully parsed xcresult bundle');
 
       // Clean up temporary directory
       await rm(tempDir, { recursive: true, force: true });
 
-      // Return combined result - preserve isError from testResult (test failures should be marked as errors)
+      // If no tests ran (e.g. build failed), the xcresult is empty/meaningless.
+      // Fall back to the original response which contains the actual build errors.
+      if (xcresult.totalTestCount === 0) {
+        log('info', 'xcresult reports 0 tests — falling back to raw build output');
+        return consolidateContentForClaudeCode(testResult);
+      }
+
+      // When xcresult has real test data, it's the authoritative source.
+      // Drop stderr lines — they're redundant noise (e.g. "multiple matching destinations").
+      const filteredContent = (testResult.content || []).filter(
+        (item) => item.type !== 'text' || !item.text.includes('[stderr]'),
+      );
       const combinedResponse: ToolResponse = {
         content: [
-          ...(testResult.content || []),
           {
             type: 'text',
-            text: '\nTest Results Summary:\n' + testSummary,
+            text: '\nTest Results Summary:\n' + xcresult.formatted,
           },
+          ...filteredContent,
         ],
         isError: testResult.isError,
       };