Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions agents-api.php
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-transcript-persister.php';
require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-null-transcript-persister.php';
require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-conversation-compaction.php';
require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-tool-pair-validator.php';
require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-markdown-section-compaction-adapter.php';
require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-iteration-budget.php';
require_once AGENTS_API_PATH . 'src/Runtime/class-wp-agent-conversation-result.php';
Expand Down
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"php tests/conversation-runner-contracts-smoke.php",
"php tests/conversation-transcript-lock-smoke.php",
"php tests/conversation-compaction-smoke.php",
"php tests/tool-pair-validator-smoke.php",
"php tests/markdown-section-compaction-smoke.php",
"php tests/context-registry-smoke.php",
"php tests/conversation-loop-smoke.php",
Expand Down
213 changes: 213 additions & 0 deletions src/Runtime/class-wp-agent-tool-pair-validator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
<?php
/**
* Tool-call / tool-result pair validator.
*
* @package AgentsAPI
*/

namespace AgentsAPI\AI;

if ( ! defined( 'ABSPATH' ) ) {
exit;
}

/**
* Detects and removes orphan tool_call / tool_result messages in a transcript.
*
* Provider request shapes (Anthropic-style tool_use/tool_result blocks, OpenAI-style
* tool_calls/tool messages) require every tool call to be paired with a result. A
* transcript with an orphan tool_call or tool_result is a provider 400 waiting to
* happen. This validator gives consumers a substrate-level helper to detect or
* scrub such transcripts before dispatch.
*
* Pairing rule: a tool_call envelope at index `i` matches the next tool_result
* envelope at index `j > i` with the same `payload.tool_name`. Matching is FIFO
* per tool name — the oldest unmatched tool_call wins, mirroring how providers
* resolve tool-use IDs positionally when multiple calls share a name.
*/
class WP_Agent_Tool_Pair_Validator {

public const KIND_ORPHAN_TOOL_CALL = 'orphan_tool_call';
public const KIND_ORPHAN_TOOL_RESULT = 'orphan_tool_result';

public const EVENT_VALIDATED = 'tool_pair_validated';
public const EVENT_PRUNED = 'tool_pair_pruned';

/**
* Inspect a message list and return any orphan tool_call / tool_result envelopes.
*
* Returned reports are sorted by ascending message index. Each entry has the
* shape `{ index, kind, type, tool_name }`.
*
* @param array<int, array<string, mixed>> $messages Raw or normalized messages.
* @return array<int, array<string, mixed>> Orphan reports.
*/
public static function validate( array $messages ): array {
$orphans = array();
$pending = array();

foreach ( array_values( $messages ) as $index => $message ) {
$envelope = WP_Agent_Message::normalize( $message );
$type = $envelope['type'];

if ( WP_Agent_Message::TYPE_TOOL_CALL === $type ) {
$pending[] = array(
'index' => $index,
'tool_name' => self::tool_name( $envelope ),
);
continue;
}

if ( WP_Agent_Message::TYPE_TOOL_RESULT !== $type ) {
continue;
}

$tool_name = self::tool_name( $envelope );
$matched_pos = self::match_pending( $pending, $tool_name );

if ( null === $matched_pos ) {
$orphans[] = array(
'index' => $index,
'kind' => self::KIND_ORPHAN_TOOL_RESULT,
'type' => WP_Agent_Message::TYPE_TOOL_RESULT,
'tool_name' => $tool_name,
);
continue;
}

array_splice( $pending, $matched_pos, 1 );
}

foreach ( $pending as $pending_call ) {
$orphans[] = array(
'index' => $pending_call['index'],
'kind' => self::KIND_ORPHAN_TOOL_CALL,
'type' => WP_Agent_Message::TYPE_TOOL_CALL,
'tool_name' => $pending_call['tool_name'],
);
}

usort(
$orphans,
static function ( array $a, array $b ): int {
return $a['index'] <=> $b['index'];
}
);

return $orphans;
}

/**
* Convenience predicate: does the transcript have zero orphans?
*
* @param array<int, array<string, mixed>> $messages Messages.
* @return bool
*/
public static function is_paired( array $messages ): bool {
return array() === self::validate( $messages );
}

/**
* Drop orphan tool_call / tool_result envelopes from the message list.
*
* Non-tool messages and properly paired tool messages are preserved. The
* returned events array follows the same `{type, metadata}` shape used by
* the compaction lifecycle so consumers can forward both through a single
* event sink.
*
* @param array<int, array<string, mixed>> $messages Messages.
* @return array{messages: array<int, array<string, mixed>>, removed: array<int, array<string, mixed>>, events: array<int, array<string, mixed>>}
*/
public static function prune( array $messages ): array {
$source = array_values( $messages );
$orphans = self::validate( $source );

if ( array() === $orphans ) {
return array(
'messages' => $source,
'removed' => array(),
'events' => array(
self::event(
self::EVENT_VALIDATED,
array(
'total_messages' => count( $source ),
'orphan_count' => 0,
)
),
),
);
}

$drop_indices = array();
foreach ( $orphans as $orphan ) {
$drop_indices[ (int) $orphan['index'] ] = true;
}

$retained = array();
foreach ( $source as $index => $message ) {
if ( isset( $drop_indices[ $index ] ) ) {
continue;
}
$retained[] = $message;
}

$event = self::event(
self::EVENT_PRUNED,
array(
'total_messages' => count( $source ),
'orphan_count' => count( $orphans ),
'retained_count' => count( $retained ),
'removed_indices' => array_map( 'intval', array_keys( $drop_indices ) ),
'orphans' => $orphans,
)
);

return array(
'messages' => $retained,
'removed' => $orphans,
'events' => array( $event ),
);
}

/**
* Find the FIFO-oldest pending tool_call with the given name.
*
* @param array<int, array<string, mixed>> $pending Pending list.
* @param string $tool_name Tool name to match.
* @return int|null Index in $pending or null when no match.
*/
private static function match_pending( array $pending, string $tool_name ): ?int {
foreach ( $pending as $position => $candidate ) {
if ( ( $candidate['tool_name'] ?? '' ) === $tool_name ) {
return $position;
}
}

return null;
}

/**
* Read a normalized envelope's payload tool name.
*
* @param array<string, mixed> $envelope Normalized envelope.
* @return string
*/
private static function tool_name( array $envelope ): string {
$name = $envelope['payload']['tool_name'] ?? '';
return is_string( $name ) ? $name : '';
}

/**
* Build a lifecycle event payload.
*
* @param string $type Event type.
* @param array<string, mixed> $data Event data.
* @return array<string, mixed>
*/
private static function event( string $type, array $data ): array {
return array(
'type' => $type,
'metadata' => $data,
);
}
}
106 changes: 106 additions & 0 deletions tests/tool-pair-validator-smoke.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
<?php
/**
* Pure-PHP smoke test for the tool-call / tool-result pair validator.
*
* Run with: php tests/tool-pair-validator-smoke.php
*
* @package AgentsAPI\Tests
*/

if ( ! defined( 'ABSPATH' ) ) {
define( 'ABSPATH', __DIR__ . '/' );
}

$failures = array();
$passes = 0;

echo "agents-api-tool-pair-validator-smoke\n";

require_once __DIR__ . '/agents-api-smoke-helpers.php';
agents_api_smoke_require_module();

use AgentsAPI\AI\WP_Agent_Message;
use AgentsAPI\AI\WP_Agent_Tool_Pair_Validator;

$user_message = array( 'role' => 'user', 'content' => 'hello' );
$assistant = array( 'role' => 'assistant', 'content' => 'hi' );

$call_search = WP_Agent_Message::toolCall( 'searching', 'search', array( 'q' => 'foo' ), 1 );
$result_search = WP_Agent_Message::toolResult( 'results', 'search', array( 'success' => true, 'tool_data' => array( 'hits' => 0 ) ) );

$call_fetch = WP_Agent_Message::toolCall( 'fetching', 'fetch', array( 'url' => 'https://example.com' ), 2 );
$result_fetch = WP_Agent_Message::toolResult( 'fetched', 'fetch', array( 'success' => true, 'tool_data' => array( 'status' => 200 ) ) );

echo "\n[1] Empty transcript has no orphans:\n";
agents_api_smoke_assert_equals( array(), WP_Agent_Tool_Pair_Validator::validate( array() ), 'empty transcript validates clean', $failures, $passes );
agents_api_smoke_assert_equals( true, WP_Agent_Tool_Pair_Validator::is_paired( array() ), 'empty transcript is paired', $failures, $passes );

echo "\n[2] Transcript without tool messages has no orphans:\n";
$plain = array( $user_message, $assistant, $user_message );
agents_api_smoke_assert_equals( array(), WP_Agent_Tool_Pair_Validator::validate( $plain ), 'plain transcript has no orphans', $failures, $passes );
agents_api_smoke_assert_equals( true, WP_Agent_Tool_Pair_Validator::is_paired( $plain ), 'plain transcript is paired', $failures, $passes );

echo "\n[3] Properly paired tool_call + tool_result is clean:\n";
$paired = array( $user_message, $call_search, $result_search, $assistant );
agents_api_smoke_assert_equals( array(), WP_Agent_Tool_Pair_Validator::validate( $paired ), 'paired call+result has no orphans', $failures, $passes );

echo "\n[4] Tool_call with no matching tool_result is flagged:\n";
$orphan_call = array( $user_message, $call_search, $assistant );
$orphans = WP_Agent_Tool_Pair_Validator::validate( $orphan_call );
agents_api_smoke_assert_equals( 1, count( $orphans ), 'orphan call produces one report', $failures, $passes );
agents_api_smoke_assert_equals( 1, $orphans[0]['index'], 'orphan call index points at the call', $failures, $passes );
agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::KIND_ORPHAN_TOOL_CALL, $orphans[0]['kind'], 'orphan call kind is correct', $failures, $passes );
agents_api_smoke_assert_equals( 'search', $orphans[0]['tool_name'], 'orphan call tool_name is preserved', $failures, $passes );

echo "\n[5] Tool_result with no matching tool_call is flagged:\n";
$orphan_result = array( $user_message, $result_search, $assistant );
$orphans = WP_Agent_Tool_Pair_Validator::validate( $orphan_result );
agents_api_smoke_assert_equals( 1, count( $orphans ), 'orphan result produces one report', $failures, $passes );
agents_api_smoke_assert_equals( 1, $orphans[0]['index'], 'orphan result index points at the result', $failures, $passes );
agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::KIND_ORPHAN_TOOL_RESULT, $orphans[0]['kind'], 'orphan result kind is correct', $failures, $passes );
agents_api_smoke_assert_equals( 'search', $orphans[0]['tool_name'], 'orphan result tool_name is preserved', $failures, $passes );

echo "\n[6] Multiple interleaved calls match FIFO by tool name:\n";
$multi = array(
$call_search,
$call_fetch,
$result_fetch,
$result_search,
);
agents_api_smoke_assert_equals( array(), WP_Agent_Tool_Pair_Validator::validate( $multi ), 'interleaved pairs validate clean', $failures, $passes );

echo "\n[7] Two calls for the same tool with one result leaves the second call orphan:\n";
$double_call = array( $call_search, $call_search, $result_search );
$orphans = WP_Agent_Tool_Pair_Validator::validate( $double_call );
agents_api_smoke_assert_equals( 1, count( $orphans ), 'double-call leaves one orphan', $failures, $passes );
agents_api_smoke_assert_equals( 1, $orphans[0]['index'], 'second call is the orphan (FIFO matches first)', $failures, $passes );
agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::KIND_ORPHAN_TOOL_CALL, $orphans[0]['kind'], 'orphan kind is tool_call', $failures, $passes );

echo "\n[8] Result for a different tool name does not consume an unrelated pending call:\n";
$crossed = array( $call_search, $result_fetch );
$orphans = WP_Agent_Tool_Pair_Validator::validate( $crossed );
agents_api_smoke_assert_equals( 2, count( $orphans ), 'crossed names produce two orphans', $failures, $passes );
agents_api_smoke_assert_equals( 0, $orphans[0]['index'], 'first orphan (by index) is the call at 0', $failures, $passes );
agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::KIND_ORPHAN_TOOL_CALL, $orphans[0]['kind'], 'first orphan kind is tool_call', $failures, $passes );
agents_api_smoke_assert_equals( 1, $orphans[1]['index'], 'second orphan is the result at 1', $failures, $passes );
agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::KIND_ORPHAN_TOOL_RESULT, $orphans[1]['kind'], 'second orphan kind is tool_result', $failures, $passes );

echo "\n[9] prune() drops orphans and emits a lifecycle event:\n";
$messy = array( $user_message, $call_search, $result_fetch, $assistant );
$pruned = WP_Agent_Tool_Pair_Validator::prune( $messy );
agents_api_smoke_assert_equals( 2, count( $pruned['messages'] ), 'pruned transcript drops both orphans', $failures, $passes );
agents_api_smoke_assert_equals( 'user', $pruned['messages'][0]['role'], 'first retained message is the user turn', $failures, $passes );
agents_api_smoke_assert_equals( 'assistant', $pruned['messages'][1]['role'], 'second retained message is the assistant turn', $failures, $passes );
agents_api_smoke_assert_equals( 2, count( $pruned['removed'] ), 'two orphans are reported as removed', $failures, $passes );
agents_api_smoke_assert_equals( 1, count( $pruned['events'] ), 'prune emits a single lifecycle event', $failures, $passes );
agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::EVENT_PRUNED, $pruned['events'][0]['type'], 'lifecycle event type is tool_pair_pruned', $failures, $passes );
agents_api_smoke_assert_equals( 2, $pruned['events'][0]['metadata']['orphan_count'], 'event metadata records orphan_count', $failures, $passes );

echo "\n[10] prune() on a clean transcript is a no-op with a validated event:\n";
$clean_pruned = WP_Agent_Tool_Pair_Validator::prune( $paired );
agents_api_smoke_assert_equals( 4, count( $clean_pruned['messages'] ), 'clean prune retains all messages', $failures, $passes );
agents_api_smoke_assert_equals( array(), $clean_pruned['removed'], 'clean prune removes nothing', $failures, $passes );
agents_api_smoke_assert_equals( WP_Agent_Tool_Pair_Validator::EVENT_VALIDATED, $clean_pruned['events'][0]['type'], 'clean prune emits validated event', $failures, $passes );
agents_api_smoke_assert_equals( 0, $clean_pruned['events'][0]['metadata']['orphan_count'], 'clean prune event reports orphan_count=0', $failures, $passes );

agents_api_smoke_finish( 'tool-pair-validator', $failures, $passes );
Loading