diff --git a/agents-api.php b/agents-api.php index ded0a48..bc942e0 100644 --- a/agents-api.php +++ b/agents-api.php @@ -93,6 +93,7 @@ require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-transcript-persister.php'; require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-null-transcript-persister.php'; require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-conversation-compaction.php'; +require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-tool-pair-validator.php'; require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-markdown-section-compaction-adapter.php'; require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-iteration-budget.php'; require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-conversation-result.php'; diff --git a/composer.json b/composer.json index 4225695..87b8b19 100644 --- a/composer.json +++ b/composer.json @@ -42,6 +42,7 @@ "php tests/conversation-runner-contracts-smoke.php", "php tests/conversation-transcript-lock-smoke.php", "php tests/conversation-compaction-smoke.php", + "php tests/tool-pair-validator-smoke.php", "php tests/markdown-section-compaction-smoke.php", "php tests/context-registry-smoke.php", "php tests/conversation-loop-smoke.php", diff --git a/src/Runtime/class-wp-agent-tool-pair-validator.php b/src/Runtime/class-wp-agent-tool-pair-validator.php new file mode 100644 index 0000000..b1cb5e4 --- /dev/null +++ b/src/Runtime/class-wp-agent-tool-pair-validator.php @@ -0,0 +1,213 @@ + i` with the same `payload.tool_name`. Matching is FIFO + * per tool name — the oldest unmatched tool_call wins, mirroring how providers + * resolve tool-use IDs positionally when multiple calls share a name. + */ +class WP_Agent_Tool_Pair_Validator { + + public const KIND_ORPHAN_TOOL_CALL = 'orphan_tool_call'; + public const KIND_ORPHAN_TOOL_RESULT = 'orphan_tool_result'; + + public const EVENT_VALIDATED = 'tool_pair_validated'; + public const EVENT_PRUNED = 'tool_pair_pruned'; + + /** + * Inspect a message list and return any orphan tool_call / tool_result envelopes. + * + * Returned reports are sorted by ascending message index. Each entry has the + * shape `{ index, kind, type, tool_name }`. + * + * @param array> $messages Raw or normalized messages. + * @return array> Orphan reports. + */ + public static function validate( array $messages ): array { + $orphans = array(); + $pending = array(); + + foreach ( array_values( $messages ) as $index => $message ) { + $envelope = WP_Agent_Message::normalize( $message ); + $type = $envelope['type']; + + if ( WP_Agent_Message::TYPE_TOOL_CALL === $type ) { + $pending[] = array( + 'index' => $index, + 'tool_name' => self::tool_name( $envelope ), + ); + continue; + } + + if ( WP_Agent_Message::TYPE_TOOL_RESULT !== $type ) { + continue; + } + + $tool_name = self::tool_name( $envelope ); + $matched_pos = self::match_pending( $pending, $tool_name ); + + if ( null === $matched_pos ) { + $orphans[] = array( + 'index' => $index, + 'kind' => self::KIND_ORPHAN_TOOL_RESULT, + 'type' => WP_Agent_Message::TYPE_TOOL_RESULT, + 'tool_name' => $tool_name, + ); + continue; + } + + array_splice( $pending, $matched_pos, 1 ); + } + + foreach ( $pending as $pending_call ) { + $orphans[] = array( + 'index' => $pending_call['index'], + 'kind' => self::KIND_ORPHAN_TOOL_CALL, + 'type' => WP_Agent_Message::TYPE_TOOL_CALL, + 'tool_name' => $pending_call['tool_name'], + ); + } + + usort( + $orphans, + static function ( array $a, array $b ): int { + return $a['index'] <=> $b['index']; + } + ); + + return $orphans; + } + + /** + * Convenience predicate: does the transcript have zero orphans? + * + * @param array> $messages Messages. + * @return bool + */ + public static function is_paired( array $messages ): bool { + return array() === self::validate( $messages ); + } + + /** + * Drop orphan tool_call / tool_result envelopes from the message list. + * + * Non-tool messages and properly paired tool messages are preserved. The + * returned events array follows the same `{type, metadata}` shape used by + * the compaction lifecycle so consumers can forward both through a single + * event sink. + * + * @param array> $messages Messages. + * @return array{messages: array>, removed: array>, events: array>} + */ + public static function prune( array $messages ): array { + $source = array_values( $messages ); + $orphans = self::validate( $source ); + + if ( array() === $orphans ) { + return array( + 'messages' => $source, + 'removed' => array(), + 'events' => array( + self::event( + self::EVENT_VALIDATED, + array( + 'total_messages' => count( $source ), + 'orphan_count' => 0, + ) + ), + ), + ); + } + + $drop_indices = array(); + foreach ( $orphans as $orphan ) { + $drop_indices[ (int) $orphan['index'] ] = true; + } + + $retained = array(); + foreach ( $source as $index => $message ) { + if ( isset( $drop_indices[ $index ] ) ) { + continue; + } + $retained[] = $message; + } + + $event = self::event( + self::EVENT_PRUNED, + array( + 'total_messages' => count( $source ), + 'orphan_count' => count( $orphans ), + 'retained_count' => count( $retained ), + 'removed_indices' => array_map( 'intval', array_keys( $drop_indices ) ), + 'orphans' => $orphans, + ) + ); + + return array( + 'messages' => $retained, + 'removed' => $orphans, + 'events' => array( $event ), + ); + } + + /** + * Find the FIFO-oldest pending tool_call with the given name. + * + * @param array> $pending Pending list. + * @param string $tool_name Tool name to match. + * @return int|null Index in $pending or null when no match. + */ + private static function match_pending( array $pending, string $tool_name ): ?int { + foreach ( $pending as $position => $candidate ) { + if ( ( $candidate['tool_name'] ?? '' ) === $tool_name ) { + return $position; + } + } + + return null; + } + + /** + * Read a normalized envelope's payload tool name. + * + * @param array $envelope Normalized envelope. + * @return string + */ + private static function tool_name( array $envelope ): string { + $name = $envelope['payload']['tool_name'] ?? ''; + return is_string( $name ) ? $name : ''; + } + + /** + * Build a lifecycle event payload. + * + * @param string $type Event type. + * @param array $data Event data. + * @return array + */ + private static function event( string $type, array $data ): array { + return array( + 'type' => $type, + 'metadata' => $data, + ); + } +} diff --git a/tests/tool-pair-validator-smoke.php b/tests/tool-pair-validator-smoke.php new file mode 100644 index 0000000..6fc3edd --- /dev/null +++ b/tests/tool-pair-validator-smoke.php @@ -0,0 +1,106 @@ + 'user', 'content' => 'hello' ); +$assistant = array( 'role' => 'assistant', 'content' => 'hi' ); + +$call_search = WP_Agent_Message::toolCall( 'searching', 'search', array( 'q' => 'foo' ), 1 ); +$result_search = WP_Agent_Message::toolResult( 'results', 'search', array( 'success' => true, 'tool_data' => array( 'hits' => 0 ) ) ); + +$call_fetch = WP_Agent_Message::toolCall( 'fetching', 'fetch', array( 'url' => 'https://example.com' ), 2 ); +$result_fetch = WP_Agent_Message::toolResult( 'fetched', 'fetch', array( 'success' => true, 'tool_data' => array( 'status' => 200 ) ) ); + +echo "\n[1] Empty transcript has no orphans:\n"; +agents_api_smoke_assert_equals( array(), WP_Agent_Tool_Pair_Validator::validate( array() ), 'empty transcript validates clean', $failures, $passes ); +agents_api_smoke_assert_equals( true, WP_Agent_Tool_Pair_Validator::is_paired( array() ), 'empty transcript is paired', $failures, $passes ); + +echo "\n[2] Transcript without tool messages has no orphans:\n"; +$plain = array( $user_message, $assistant, $user_message ); +agents_api_smoke_assert_equals( array(), WP_Agent_Tool_Pair_Validator::validate( $plain ), 'plain transcript has no orphans', $failures, $passes ); +agents_api_smoke_assert_equals( true, WP_Agent_Tool_Pair_Validator::is_paired( $plain ), 'plain transcript is paired', $failures, $passes ); + +echo "\n[3] Properly paired tool_call + tool_result is clean:\n"; +$paired = array( $user_message, $call_search, $result_search, $assistant ); +agents_api_smoke_assert_equals( array(), WP_Agent_Tool_Pair_Validator::validate( $paired ), 'paired call+result has no orphans', $failures, $passes ); + +echo "\n[4] Tool_call with no matching tool_result is flagged:\n"; +$orphan_call = array( $user_message, $call_search, $assistant ); +$orphans = WP_Agent_Tool_Pair_Validator::validate( $orphan_call ); +agents_api_smoke_assert_equals( 1, count( $orphans ), 'orphan call produces one report', $failures, $passes ); +agents_api_smoke_assert_equals( 1, $orphans[0]['index'], 'orphan call index points at the call', $failures, $passes ); +agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::KIND_ORPHAN_TOOL_CALL, $orphans[0]['kind'], 'orphan call kind is correct', $failures, $passes ); +agents_api_smoke_assert_equals( 'search', $orphans[0]['tool_name'], 'orphan call tool_name is preserved', $failures, $passes ); + +echo "\n[5] Tool_result with no matching tool_call is flagged:\n"; +$orphan_result = array( $user_message, $result_search, $assistant ); +$orphans = WP_Agent_Tool_Pair_Validator::validate( $orphan_result ); +agents_api_smoke_assert_equals( 1, count( $orphans ), 'orphan result produces one report', $failures, $passes ); +agents_api_smoke_assert_equals( 1, $orphans[0]['index'], 'orphan result index points at the result', $failures, $passes ); +agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::KIND_ORPHAN_TOOL_RESULT, $orphans[0]['kind'], 'orphan result kind is correct', $failures, $passes ); +agents_api_smoke_assert_equals( 'search', $orphans[0]['tool_name'], 'orphan result tool_name is preserved', $failures, $passes ); + +echo "\n[6] Multiple interleaved calls match FIFO by tool name:\n"; +$multi = array( + $call_search, + $call_fetch, + $result_fetch, + $result_search, +); +agents_api_smoke_assert_equals( array(), WP_Agent_Tool_Pair_Validator::validate( $multi ), 'interleaved pairs validate clean', $failures, $passes ); + +echo "\n[7] Two calls for the same tool with one result leaves the second call orphan:\n"; +$double_call = array( $call_search, $call_search, $result_search ); +$orphans = WP_Agent_Tool_Pair_Validator::validate( $double_call ); +agents_api_smoke_assert_equals( 1, count( $orphans ), 'double-call leaves one orphan', $failures, $passes ); +agents_api_smoke_assert_equals( 1, $orphans[0]['index'], 'second call is the orphan (FIFO matches first)', $failures, $passes ); +agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::KIND_ORPHAN_TOOL_CALL, $orphans[0]['kind'], 'orphan kind is tool_call', $failures, $passes ); + +echo "\n[8] Result for a different tool name does not consume an unrelated pending call:\n"; +$crossed = array( $call_search, $result_fetch ); +$orphans = WP_Agent_Tool_Pair_Validator::validate( $crossed ); +agents_api_smoke_assert_equals( 2, count( $orphans ), 'crossed names produce two orphans', $failures, $passes ); +agents_api_smoke_assert_equals( 0, $orphans[0]['index'], 'first orphan (by index) is the call at 0', $failures, $passes ); +agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::KIND_ORPHAN_TOOL_CALL, $orphans[0]['kind'], 'first orphan kind is tool_call', $failures, $passes ); +agents_api_smoke_assert_equals( 1, $orphans[1]['index'], 'second orphan is the result at 1', $failures, $passes ); +agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::KIND_ORPHAN_TOOL_RESULT, $orphans[1]['kind'], 'second orphan kind is tool_result', $failures, $passes ); + +echo "\n[9] prune() drops orphans and emits a lifecycle event:\n"; +$messy = array( $user_message, $call_search, $result_fetch, $assistant ); +$pruned = WP_Agent_Tool_Pair_Validator::prune( $messy ); +agents_api_smoke_assert_equals( 2, count( $pruned['messages'] ), 'pruned transcript drops both orphans', $failures, $passes ); +agents_api_smoke_assert_equals( 'user', $pruned['messages'][0]['role'], 'first retained message is the user turn', $failures, $passes ); +agents_api_smoke_assert_equals( 'assistant', $pruned['messages'][1]['role'], 'second retained message is the assistant turn', $failures, $passes ); +agents_api_smoke_assert_equals( 2, count( $pruned['removed'] ), 'two orphans are reported as removed', $failures, $passes ); +agents_api_smoke_assert_equals( 1, count( $pruned['events'] ), 'prune emits a single lifecycle event', $failures, $passes ); +agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::EVENT_PRUNED, $pruned['events'][0]['type'], 'lifecycle event type is tool_pair_pruned', $failures, $passes ); +agents_api_smoke_assert_equals( 2, $pruned['events'][0]['metadata']['orphan_count'], 'event metadata records orphan_count', $failures, $passes ); + +echo "\n[10] prune() on a clean transcript is a no-op with a validated event:\n"; +$clean_pruned = WP_Agent_Tool_Pair_Validator::prune( $paired ); +agents_api_smoke_assert_equals( 4, count( $clean_pruned['messages'] ), 'clean prune retains all messages', $failures, $passes ); +agents_api_smoke_assert_equals( array(), $clean_pruned['removed'], 'clean prune removes nothing', $failures, $passes ); +agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::EVENT_VALIDATED, $clean_pruned['events'][0]['type'], 'clean prune emits validated event', $failures, $passes ); +agents_api_smoke_assert_equals( 0, $clean_pruned['events'][0]['metadata']['orphan_count'], 'clean prune event reports orphan_count=0', $failures, $passes ); + +agents_api_smoke_finish( 'tool-pair-validator', $failures, $passes );