diff --git a/docs/runtime-and-tools.md b/docs/runtime-and-tools.md index 42b381f..20e6eb7 100644 --- a/docs/runtime-and-tools.md +++ b/docs/runtime-and-tools.md @@ -34,6 +34,7 @@ Representative message roles include user, assistant, tool-call, and tool-result array( 'messages' => $messages, 'tool_execution_results' => $tool_results, + 'tool_audit_events' => $tool_audit_events, 'events' => $events, 'turn_count' => $turns_run, 'final_content' => $last_assistant_text, @@ -161,6 +162,41 @@ Failure modes are normalized rather than thrown to the loop: - executor exceptions are caught and returned as tool errors; - executor arrays without `success` are wrapped as successful results. +## Tool Audit Events + +When the conversation loop mediates tool calls, the result includes +`tool_audit_events` alongside the backwards-compatible `tool_execution_results`. +The audit events are the safe replay surface for generic observers: they include +stable hashes and normalized status, but do not include raw tool parameters. + +Representative event shape: + +```php +array( + 'schema_version' => 1, + 'type' => 'tool_call', + 'turn_count' => 1, + 'tool_name' => 'client/search_docs', + 'tool_source' => 'client', + 'parameters_sha256' => 'sha256:...', + 'parameters_redacted' => true, + 'success' => true, + 'result_status' => 'success', + 'result_sha256' => 'sha256:...', +) +``` + +Failed calls include `error_type` when the loop can classify the failure. The +core classifications are `tool_not_found`, `missing_required_parameters`, and +`executor_exception`. + +Sensitive parameter keys such as `token`, `secret`, `password`, `authorization`, +`cookie`, `credential`, `nonce`, and `api_key` are redacted before hashing. Hosts +can customize deterministic redaction with the +`agents_api_tool_audit_parameters` filter. The legacy `tool_execution_results` +field still contains raw parameters for existing callers and should be treated as +caller-owned runtime data, not as the generic replay artifact surface. + ## Visibility and action policy The tool policy layer resolves which tools are visible and how each tool may execute. Public policy classes include: diff --git a/src/Runtime/class-wp-agent-conversation-loop.php b/src/Runtime/class-wp-agent-conversation-loop.php index b6f5486..48a1c63 100644 --- a/src/Runtime/class-wp-agent-conversation-loop.php +++ b/src/Runtime/class-wp-agent-conversation-loop.php @@ -87,6 +87,7 @@ public static function run( array $messages, callable $turn_runner, array $optio $messages = WP_Agent_Message::normalize_many( $messages ); $events = array(); $tool_results = array(); + $tool_audit_events = array(); $conversation_complete = false; $exceeded_budget = null; @@ -198,6 +199,7 @@ public static function run( array $messages, callable $turn_runner, array $optio $messages = $mediation_result['messages']; $tool_results = array_merge( $tool_results, $mediation_result['tool_execution_results'] ); + $tool_audit_events = array_merge( $tool_audit_events, $mediation_result['tool_audit_events'] ); $events = array_merge( $events, $mediation_result['events'] ); $conversation_complete = $mediation_result['conversation_complete']; $exceeded_budget = $mediation_result['exceeded_budget']; @@ -206,7 +208,10 @@ public static function run( array $messages, callable $turn_runner, array $optio $result = WP_Agent_Conversation_Result::normalize( $result ); $messages = $result['messages']; $tool_results = array_merge( $tool_results, $result['tool_execution_results'] ); - $events = array_merge( $events, self::normalize_events( $result['events'] ?? array() ) ); + if ( isset( $result['tool_audit_events'] ) && is_array( $result['tool_audit_events'] ) ) { + $tool_audit_events = array_merge( $tool_audit_events, $result['tool_audit_events'] ); + } + $events = array_merge( $events, self::normalize_events( $result['events'] ?? array() ) ); if ( isset( $result['request_metadata'] ) && is_array( $result['request_metadata'] ) ) { $last_request_metadata = $result['request_metadata']; } @@ -261,6 +266,7 @@ public static function run( array $messages, callable $turn_runner, array $optio $final_result_data = array( 'messages' => $messages, 'tool_execution_results' => $tool_results, + 'tool_audit_events' => $tool_audit_events, 'events' => $events, 'turn_count' => $turns_run, 'final_content' => self::extract_final_content( $messages ), @@ -311,7 +317,7 @@ public static function run( array $messages, callable $turn_runner, array $optio * @param int $turn Current turn number. * @param callable|null $on_event Event sink. * @param array $budgets Named iteration budgets. - * @return array{messages: array, tool_execution_results: array, events: array, conversation_complete: bool, exceeded_budget: string|null} + * @return array{messages: array, tool_execution_results: array, tool_audit_events: array, events: array, conversation_complete: bool, exceeded_budget: string|null} */ private static function mediate_tool_calls( array $result, @@ -329,6 +335,7 @@ private static function mediate_tool_calls( : array(); $tool_calls = $result['tool_calls']; $tool_execution_results = array(); + $tool_audit_events = array(); $events = array(); $complete = false; $exceeded_budget = null; @@ -385,6 +392,15 @@ private static function mediate_tool_calls( 'turn_count' => $turn, ); + $tool_audit_events[] = self::tool_audit_event( + $tool_name, + is_array( $parameters ) ? $parameters : array(), + $exec_result, + is_array( $tool_def ) ? $tool_def : null, + $turn_context, + $turn + ); + // Add tool-result message to transcript. $result_content = ( $exec_result['success'] ?? false ) ? self::json_encode_safe( $exec_result['result'] ?? array() ) @@ -441,6 +457,7 @@ private static function mediate_tool_calls( return array( 'messages' => $messages, 'tool_execution_results' => $tool_execution_results, + 'tool_audit_events' => $tool_audit_events, 'events' => $events, 'conversation_complete' => $complete, 'exceeded_budget' => $exceeded_budget, @@ -538,6 +555,171 @@ private static function emit_event( ?callable $on_event, string $event, array $p } } + /** + * Build a stable, safe audit entry for a mediated tool call. + * + * The legacy `tool_execution_results` field intentionally keeps raw + * parameters for existing callers. Audit events avoid raw parameter storage by + * default so transcripts can be used for replay attestation without leaking + * secrets into generic observers. + * + * @param string $tool_name Tool identifier. + * @param array $parameters Runtime tool-call parameters. + * @param array $result Normalized tool execution result. + * @param array|null $tool_definition Tool declaration, when available. + * @param array $context Turn context. + * @param int $turn Turn number. + * @return array Audit event. + */ + private static function tool_audit_event( string $tool_name, array $parameters, array $result, ?array $tool_definition, array $context, int $turn ): array { + $safe_parameters = self::redact_tool_audit_parameters( $parameters, $tool_name, $tool_definition, $context ); + $metadata = isset( $result['metadata'] ) && is_array( $result['metadata'] ) ? $result['metadata'] : array(); + $error_type = isset( $metadata['error_type'] ) && is_string( $metadata['error_type'] ) ? $metadata['error_type'] : ''; + + $audit_event = array( + 'schema_version' => 1, + 'type' => 'tool_call', + 'turn_count' => $turn, + 'tool_name' => $tool_name, + 'tool_source' => is_array( $tool_definition ) && is_string( $tool_definition['source'] ?? null ) ? $tool_definition['source'] : '', + 'parameters_sha256' => self::stable_sha256( $safe_parameters ), + 'parameters_redacted' => true, + 'success' => (bool) ( $result['success'] ?? false ), + 'result_status' => ! empty( $result['success'] ) ? 'success' : 'error', + 'result_sha256' => self::stable_sha256( self::audit_result_summary( $result ) ), + ); + + if ( '' !== $error_type ) { + $audit_event['error_type'] = $error_type; + } + + return array_filter( + $audit_event, + static fn( $value ): bool => '' !== $value + ); + } + + /** + * Redact tool parameters before hashing them for audit events. + * + * @param array $parameters Raw tool-call parameters. + * @param string $tool_name Tool identifier. + * @param array|null $tool_definition Tool declaration, when available. + * @param array $context Turn context. + * @return array Redacted parameters. + */ + private static function redact_tool_audit_parameters( array $parameters, string $tool_name, ?array $tool_definition, array $context ): array { + $redacted = self::redact_sensitive_values( $parameters ); + + if ( function_exists( 'apply_filters' ) ) { + try { + /** + * Filters parameters before Agents API hashes them into tool audit events. + * + * Callers can remove or normalize product-specific sensitive fields while + * keeping deterministic replay hashes. Returning a non-array falls back to + * the default redacted parameters. + * + * @param array $redacted Default redacted parameters. + * @param array $parameters Raw tool-call parameters. + * @param string $tool_name Tool identifier. + * @param array|null $tool_definition Tool declaration, when available. + * @param array $context Turn context. + */ + $redacted = apply_filters( 'agents_api_tool_audit_parameters', $redacted, $parameters, $tool_name, $tool_definition, $context ); + } catch ( \Throwable $error ) { + // Audit redaction filters must not change loop results. + unset( $error ); + } + } + + return $redacted; + } + + /** + * Redact obviously sensitive scalar fields in nested parameter arrays. + * + * @param mixed $value Value to redact. + * @param string $key Current key. + * @return mixed Redacted value. + */ + private static function redact_sensitive_values( $value, string $key = '' ) { + if ( is_array( $value ) ) { + $redacted = array(); + foreach ( $value as $item_key => $item_value ) { + $redacted[ $item_key ] = self::redact_sensitive_values( $item_value, is_string( $item_key ) ? $item_key : '' ); + } + return $redacted; + } + + if ( '' !== $key && preg_match( '/(api[_-]?key|authorization|cookie|credential|nonce|password|secret|token)/i', $key ) ) { + return '[redacted]'; + } + + return $value; + } + + /** + * Keep the audit result hash focused on normalized status, not raw payloads. + * + * @param array $result Normalized tool result. + * @return array Hashable result summary. + */ + private static function audit_result_summary( array $result ): array { + $metadata = isset( $result['metadata'] ) && is_array( $result['metadata'] ) ? $result['metadata'] : array(); + + $summary = array( + 'success' => (bool) ( $result['success'] ?? false ), + 'tool_name' => is_string( $result['tool_name'] ?? null ) ? $result['tool_name'] : '', + 'metadata' => $metadata, + ); + + if ( empty( $result['success'] ) ) { + $summary['error_sha256'] = self::stable_sha256( is_string( $result['error'] ?? null ) ? $result['error'] : 'Tool execution failed.' ); + } + + return $summary; + } + + /** + * Hash data after recursively sorting array keys for deterministic output. + * + * @param mixed $data Data to hash. + * @return string sha256-prefixed hash. + */ + private static function stable_sha256( $data ): string { + $normalized = self::sort_for_hash( $data ); + $encoded = self::json_encode_safe( $normalized ); + if ( false === $encoded ) { + $encoded = ''; + } + + return 'sha256:' . hash( 'sha256', (string) $encoded ); + } + + /** + * Recursively sort associative arrays before hashing. + * + * @param mixed $value Value to normalize. + * @return mixed Normalized value. + */ + private static function sort_for_hash( $value ) { + if ( ! is_array( $value ) ) { + return $value; + } + + $normalized = array(); + foreach ( $value as $key => $item ) { + $normalized[ $key ] = self::sort_for_hash( $item ); + } + + if ( array() !== $normalized && array_keys( $normalized ) !== range( 0, count( $normalized ) - 1 ) ) { + ksort( $normalized ); + } + + return $normalized; + } + /** * Resolve the tool executor from options. * diff --git a/src/Runtime/class-wp-agent-conversation-result.php b/src/Runtime/class-wp-agent-conversation-result.php index 0ca41f5..e2a955f 100644 --- a/src/Runtime/class-wp-agent-conversation-result.php +++ b/src/Runtime/class-wp-agent-conversation-result.php @@ -56,10 +56,18 @@ public static function normalize( array $result ): array { $result['tool_execution_results'] = array(); } + if ( ! array_key_exists( 'tool_audit_events', $result ) ) { + $result['tool_audit_events'] = array(); + } + if ( ! is_array( $result['tool_execution_results'] ) ) { throw self::invalid( 'tool_execution_results', 'must be an array' ); } + if ( ! is_array( $result['tool_audit_events'] ) ) { + throw self::invalid( 'tool_audit_events', 'must be an array' ); + } + foreach ( $result['tool_execution_results'] as $index => $tool_result ) { $path = 'tool_execution_results[' . $index . ']'; @@ -88,6 +96,36 @@ public static function normalize( array $result ): array { } } + foreach ( $result['tool_audit_events'] as $index => $audit_event ) { + $path = 'tool_audit_events[' . $index . ']'; + + if ( ! is_array( $audit_event ) ) { + throw self::invalid( $path, 'must be an array' ); + } + + foreach ( array( 'type', 'tool_name', 'parameters_sha256', 'result_sha256' ) as $field ) { + if ( ! array_key_exists( $field, $audit_event ) || ! is_string( $audit_event[ $field ] ) || '' === $audit_event[ $field ] ) { + throw self::invalid( $path . '.' . $field, 'must be a non-empty string' ); + } + } + + if ( ! array_key_exists( 'schema_version', $audit_event ) || ! is_int( $audit_event['schema_version'] ) ) { + throw self::invalid( $path . '.schema_version', 'must be an integer' ); + } + + if ( ! array_key_exists( 'turn_count', $audit_event ) || ! is_int( $audit_event['turn_count'] ) ) { + throw self::invalid( $path . '.turn_count', 'must be an integer' ); + } + + if ( ! array_key_exists( 'success', $audit_event ) || ! is_bool( $audit_event['success'] ) ) { + throw self::invalid( $path . '.success', 'must be a boolean' ); + } + + if ( array_key_exists( 'error_type', $audit_event ) && ! is_string( $audit_event['error_type'] ) ) { + throw self::invalid( $path . '.error_type', 'must be a string when present' ); + } + } + // Validate optional budget-exceeded status fields. if ( array_key_exists( 'status', $result ) && ! is_string( $result['status'] ) ) { throw self::invalid( 'status', 'must be a string when present' ); diff --git a/src/Tools/class-wp-agent-tool-execution-core.php b/src/Tools/class-wp-agent-tool-execution-core.php index 46fc619..4547d82 100644 --- a/src/Tools/class-wp-agent-tool-execution-core.php +++ b/src/Tools/class-wp-agent-tool-execution-core.php @@ -30,7 +30,7 @@ public function prepareWP_Agent_Tool_Call( string $tool_name, array $tool_parame if ( ! is_array( $tool_definition ) ) { return array_merge( array( 'ready' => false ), - WP_Agent_Tool_Result::error( $tool_name, "Tool '{$tool_name}' not found" ) + WP_Agent_Tool_Result::error( $tool_name, "Tool '{$tool_name}' not found", array( 'error_type' => 'tool_not_found' ) ) ); } @@ -42,7 +42,10 @@ public function prepareWP_Agent_Tool_Call( string $tool_name, array $tool_parame WP_Agent_Tool_Result::error( $tool_name, sprintf( 'Tool "%s" requires the following parameters: %s.', $tool_name, implode( ', ', $validation['missing'] ) ), - array( 'missing_parameters' => $validation['missing'] ) + array( + 'error_type' => 'missing_required_parameters', + 'missing_parameters' => $validation['missing'], + ) ) ); } @@ -76,7 +79,7 @@ public function executePreparedTool( array $tool_call, array $tool_definition, W try { $result = $executor->executeWP_Agent_Tool_Call( $tool_call, $tool_definition, $context ); } catch ( \Throwable $throwable ) { - return WP_Agent_Tool_Result::error( $tool_call['tool_name'], $throwable->getMessage() ); + return WP_Agent_Tool_Result::error( $tool_call['tool_name'], $throwable->getMessage(), array( 'error_type' => 'executor_exception' ) ); } if ( ! array_key_exists( 'success', $result ) ) { diff --git a/tests/conversation-loop-tool-execution-smoke.php b/tests/conversation-loop-tool-execution-smoke.php index 55a6bf9..c38fdaa 100644 --- a/tests/conversation-loop-tool-execution-smoke.php +++ b/tests/conversation-loop-tool-execution-smoke.php @@ -85,6 +85,12 @@ static function ( array $messages, array $context ): array { agents_api_smoke_assert_equals( 1, count( $result['tool_execution_results'] ), 'result contains one tool execution result', $failures, $passes ); agents_api_smoke_assert_equals( 'client/summarize', $result['tool_execution_results'][0]['tool_name'], 'tool execution result has correct tool name', $failures, $passes ); agents_api_smoke_assert_equals( 'HELLO WORLD', $result['tool_execution_results'][0]['result']['result']['summary'], 'tool execution result carries executor payload', $failures, $passes ); +agents_api_smoke_assert_equals( 1, count( $result['tool_audit_events'] ), 'result contains one tool audit event', $failures, $passes ); +agents_api_smoke_assert_equals( 'tool_call', $result['tool_audit_events'][0]['type'], 'tool audit event has stable type', $failures, $passes ); +agents_api_smoke_assert_equals( 'client/summarize', $result['tool_audit_events'][0]['tool_name'], 'tool audit event has correct tool name', $failures, $passes ); +agents_api_smoke_assert_equals( true, $result['tool_audit_events'][0]['success'], 'tool audit event records success', $failures, $passes ); +agents_api_smoke_assert_equals( true, str_starts_with( $result['tool_audit_events'][0]['parameters_sha256'], 'sha256:' ), 'tool audit event hashes parameters', $failures, $passes ); +agents_api_smoke_assert_equals( true, ! array_key_exists( 'parameters', $result['tool_audit_events'][0] ), 'tool audit event omits raw parameters', $failures, $passes ); // Messages should contain: user, assistant text, tool_call, tool_result. $message_count = count( $result['messages'] ); @@ -183,6 +189,7 @@ static function ( array $messages ): array { agents_api_smoke_assert_equals( 0, count( $executor->executed ), 'executor was not called for invalid tool call', $failures, $passes ); agents_api_smoke_assert_equals( 1, count( $validation_result['tool_execution_results'] ), 'validation error is recorded as tool result', $failures, $passes ); agents_api_smoke_assert_equals( false, $validation_result['tool_execution_results'][0]['result']['success'], 'validation error marks result as failed', $failures, $passes ); +agents_api_smoke_assert_equals( 'missing_required_parameters', $validation_result['tool_audit_events'][0]['error_type'], 'validation audit event records missing parameter error type', $failures, $passes ); echo "\n[5] Multi-turn mediation runs without an explicit should_continue option:\n"; $executor->executed = array(); @@ -253,4 +260,60 @@ static function ( array $messages, array $context ) use ( &$caller_managed_defau agents_api_smoke_assert_equals( 1, $caller_managed_default_count, 'caller-managed path still breaks after one turn without should_continue', $failures, $passes ); agents_api_smoke_assert_equals( 2, count( $caller_managed_default_result['messages'] ), 'caller-managed transcript has user + one assistant message', $failures, $passes ); +echo "\n[7] Missing tools and executor exceptions produce safe audit events:\n"; +$executor->executed = array(); + +$missing_tool_result = AgentsAPI\AI\WP_Agent_Conversation_Loop::run( + array( array( 'role' => 'user', 'content' => 'test' ) ), + static function ( array $messages ): array { + return array( + 'messages' => $messages, + 'tool_calls' => array( + array( + 'name' => 'client/missing', + 'parameters' => array( 'token' => 'secret-value' ), + ), + ), + ); + }, + array( + 'max_turns' => 1, + 'tool_executor' => $executor, + 'tool_declarations' => $tools, + ) +); + +agents_api_smoke_assert_equals( 0, count( $executor->executed ), 'executor was not called for missing tool', $failures, $passes ); +agents_api_smoke_assert_equals( 'tool_not_found', $missing_tool_result['tool_audit_events'][0]['error_type'], 'missing tool audit event records error type', $failures, $passes ); +agents_api_smoke_assert_equals( true, ! str_contains( wp_json_encode( $missing_tool_result['tool_audit_events'][0] ), 'secret-value' ), 'missing tool audit event does not expose raw secret parameter', $failures, $passes ); + +$throwing_executor = new class() implements AgentsAPI\AI\Tools\WP_Agent_Tool_Executor { + public function executeWP_Agent_Tool_Call( array $tool_call, array $tool_definition, array $context = array() ): array { + throw new RuntimeException( 'executor exploded' ); + } +}; + +$exception_result = AgentsAPI\AI\WP_Agent_Conversation_Loop::run( + array( array( 'role' => 'user', 'content' => 'test' ) ), + static function ( array $messages ): array { + return array( + 'messages' => $messages, + 'tool_calls' => array( + array( + 'name' => 'client/summarize', + 'parameters' => array( 'text' => 'hello' ), + ), + ), + ); + }, + array( + 'max_turns' => 1, + 'tool_executor' => $throwing_executor, + 'tool_declarations' => $tools, + ) +); + +agents_api_smoke_assert_equals( 'executor_exception', $exception_result['tool_audit_events'][0]['error_type'], 'executor exception audit event records error type', $failures, $passes ); +agents_api_smoke_assert_equals( false, $exception_result['tool_audit_events'][0]['success'], 'executor exception audit event records failure', $failures, $passes ); + agents_api_smoke_finish( 'Agents API conversation loop tool execution', $failures, $passes );