diff --git a/agent-schema.json b/agent-schema.json index 7622d8b86..ce8598912 100644 --- a/agent-schema.json +++ b/agent-schema.json @@ -545,7 +545,7 @@ }, "provider_opts": { "type": "object", - "description": "Provider-specific options. Sampling parameters: top_k (integer, supported by anthropic, google, amazon-bedrock, and custom OpenAI-compatible providers like vLLM/Ollama), repetition_penalty (float, forwarded to custom OpenAI-compatible providers), min_p (float, forwarded to custom providers), seed (integer, forwarded to OpenAI). Infrastructure options: dmr: runtime_flags. anthropic/amazon-bedrock (Claude): interleaved_thinking (boolean, default true). openai: transport ('sse' or 'websocket') to choose between SSE and WebSocket streaming for the Responses API. openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance).", + "description": "Provider-specific options. Sampling parameters: top_k (integer, supported by anthropic, google, amazon-bedrock, and custom OpenAI-compatible providers like vLLM/Ollama), repetition_penalty (float, forwarded to custom OpenAI-compatible providers), min_p (float, forwarded to custom providers), seed (integer, forwarded to OpenAI). Infrastructure options: dmr: runtime_flags. anthropic/amazon-bedrock (Claude): interleaved_thinking (boolean, default true). openai: transport ('sse' or 'websocket') to choose between SSE and WebSocket streaming for the Responses API. openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance). Google: google_search (boolean) enables Google Search grounding, google_maps (boolean) enables Google Maps grounding, code_execution (boolean) enables server-side code execution.", "additionalProperties": true }, "track_usage": { diff --git a/docs/providers/google/index.md b/docs/providers/google/index.md index d39a23e21..cff067ba4 100644 --- a/docs/providers/google/index.md +++ b/docs/providers/google/index.md @@ -89,3 +89,25 @@ models: model: gemini-3-flash thinking_budget: medium # default for Flash: minimal | low | medium | high ``` + +## Built-in Tools (Grounding) + +Gemini models support built-in tools that let the model access Google Search and Google Maps +directly during generation. Enable them via `provider_opts`: + +```yaml +models: + gemini-grounded: + provider: google + model: gemini-2.5-flash + provider_opts: + google_search: true + google_maps: true + code_execution: true +``` + +| Option | Description | +| ---------------- | ---------------------------------------------------- | +| `google_search` | Enables Google Search grounding for up-to-date info | +| `google_maps` | Enables Google Maps grounding for location queries | +| `code_execution` | Enables server-side code execution for computations | diff --git a/examples/google_search_grounding.yaml b/examples/google_search_grounding.yaml new file mode 100644 index 000000000..e3dffa1f4 --- /dev/null +++ b/examples/google_search_grounding.yaml @@ -0,0 +1,16 @@ +#!/usr/bin/env docker agent run + +models: + gemini: + provider: google + model: gemini-3.1-flash-lite-preview + provider_opts: + google_search: true + +agents: + root: + model: gemini + description: Gemini with Google Search + instruction: | + You are a helpful assistant with access to the latest information via Google Search. + Use grounded search results to provide accurate, up-to-date answers. diff --git a/pkg/model/provider/gemini/client.go b/pkg/model/provider/gemini/client.go index 60e54c6ea..e48e6f6e4 100644 --- a/pkg/model/provider/gemini/client.go +++ b/pkg/model/provider/gemini/client.go @@ -442,6 +442,28 @@ func (c *Client) applyGemini25ThinkingBudget(config *genai.GenerateContentConfig slog.Debug("Gemini request using thinking_budget", "budget_tokens", tokens) } +// builtInTools returns Gemini built-in tools (Google Search, Google Maps, +// Code Execution) enabled via provider_opts. +func (c *Client) builtInTools() []*genai.Tool { + entries := []struct { + key string + tool *genai.Tool + }{ + {"google_search", &genai.Tool{GoogleSearch: &genai.GoogleSearch{}}}, + {"google_maps", &genai.Tool{GoogleMaps: &genai.GoogleMaps{}}}, + {"code_execution", &genai.Tool{CodeExecution: &genai.ToolCodeExecution{}}}, + } + + var builtIn []*genai.Tool + for _, e := range entries { + if enabled, ok := providerutil.GetProviderOptBool(c.ModelConfig.ProviderOpts, e.key); ok && enabled { + builtIn = append(builtIn, e.tool) + slog.Debug("Gemini built-in tool enabled", "key", e.key) + } + } + return builtIn +} + // convertToolsToGemini converts tools to Gemini format func convertToolsToGemini(requestTools []tools.Tool) ([]*genai.Tool, error) { if len(requestTools) == 0 { @@ -533,6 +555,9 @@ func (c *Client) CreateChatCompletionStream( config := c.buildConfig() + // Start with Google built-in tools (search, maps, code execution) from provider_opts + config.Tools = c.builtInTools() + // Add tools to config if provided if len(requestTools) > 0 { allTools, err := convertToolsToGemini(requestTools) @@ -541,7 +566,7 @@ func (c *Client) CreateChatCompletionStream( return nil, err } - config.Tools = allTools + config.Tools = append(config.Tools, allTools...) // Enable function calling config.ToolConfig = &genai.ToolConfig{ @@ -550,6 +575,11 @@ func (c *Client) CreateChatCompletionStream( }, } + // When mixing built-in tools with function calling, Gemini requires this flag + if len(config.Tools) > len(allTools) { + config.ToolConfig.IncludeServerSideToolInvocations = new(true) + } + // Debug: Log the tools we're sending slog.Debug("Gemini tools config", "tools", config.Tools) for _, tool := range config.Tools { diff --git a/pkg/model/provider/gemini/client_test.go b/pkg/model/provider/gemini/client_test.go index 2159bbcbf..2f81688bf 100644 --- a/pkg/model/provider/gemini/client_test.go +++ b/pkg/model/provider/gemini/client_test.go @@ -379,6 +379,98 @@ func TestConvertMessagesToGemini_ThoughtSignature(t *testing.T) { } } +func TestBuiltInTools(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + providerOpts map[string]any + wantCount int + wantSearch bool + wantMaps bool + wantCodeExec bool + }{ + { + name: "no built-in tools by default", + providerOpts: nil, + wantCount: 0, + }, + { + name: "google_search enabled", + providerOpts: map[string]any{"google_search": true}, + wantCount: 1, + wantSearch: true, + }, + { + name: "google_maps enabled", + providerOpts: map[string]any{"google_maps": true}, + wantCount: 1, + wantMaps: true, + }, + { + name: "both enabled", + providerOpts: map[string]any{"google_search": true, "google_maps": true}, + wantCount: 2, + wantSearch: true, + wantMaps: true, + }, + { + name: "explicitly disabled", + providerOpts: map[string]any{"google_search": false, "google_maps": false}, + wantCount: 0, + }, + { + name: "code_execution enabled", + providerOpts: map[string]any{"code_execution": true}, + wantCount: 1, + wantCodeExec: true, + }, + { + name: "all three enabled", + providerOpts: map[string]any{"google_search": true, "google_maps": true, "code_execution": true}, + wantCount: 3, + wantSearch: true, + wantMaps: true, + wantCodeExec: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + client := &Client{ + Config: base.Config{ + ModelConfig: latest.ModelConfig{ + Provider: "google", + Model: "gemini-2.5-flash", + ProviderOpts: tt.providerOpts, + }, + }, + } + + result := client.builtInTools() + assert.Len(t, result, tt.wantCount) + + var hasSearch, hasMaps, hasCodeExec bool + for _, tool := range result { + if tool.GoogleSearch != nil { + hasSearch = true + } + if tool.GoogleMaps != nil { + hasMaps = true + } + if tool.CodeExecution != nil { + hasCodeExec = true + } + } + assert.Equal(t, tt.wantSearch, hasSearch, "GoogleSearch") + assert.Equal(t, tt.wantMaps, hasMaps, "GoogleMaps") + assert.Equal(t, tt.wantCodeExec, hasCodeExec, "CodeExecution") + }) + } +} + func TestBuildConfig_ThinkingFromBudget(t *testing.T) { t.Parallel() diff --git a/pkg/model/provider/providerutil/provider_opts.go b/pkg/model/provider/providerutil/provider_opts.go index 8235fdd71..bc597e2a8 100644 --- a/pkg/model/provider/providerutil/provider_opts.go +++ b/pkg/model/provider/providerutil/provider_opts.go @@ -67,6 +67,28 @@ func GetProviderOptInt64(opts map[string]any, key string) (int64, bool) { } } +// GetProviderOptBool extracts a bool value from provider opts. +func GetProviderOptBool(opts map[string]any, key string) (bool, bool) { + if opts == nil { + return false, false + } + v, ok := opts[key] + if !ok { + return false, false + } + switch b := v.(type) { + case bool: + return b, true + default: + slog.Debug("provider_opts type mismatch, ignoring", + "key", key, + "expected_type", "bool", + "actual_type", fmt.Sprintf("%T", v), + "value", v) + return false, false + } +} + // samplingProviderOptsKeys lists the provider_opts keys that are // treated as sampling parameters and forwarded to provider APIs. // Provider-specific infrastructure keys (api_type, transport, region, etc.) diff --git a/pkg/model/provider/providerutil/provider_opts_test.go b/pkg/model/provider/providerutil/provider_opts_test.go index bbd27d9c8..7b59af121 100644 --- a/pkg/model/provider/providerutil/provider_opts_test.go +++ b/pkg/model/provider/providerutil/provider_opts_test.go @@ -33,6 +33,32 @@ func TestGetProviderOptFloat64(t *testing.T) { } } +func TestGetProviderOptBool(t *testing.T) { + tests := []struct { + name string + opts map[string]any + key string + want bool + wantOK bool + }{ + {"nil opts", nil, "google_search", false, false}, + {"missing key", map[string]any{}, "google_search", false, false}, + {"true value", map[string]any{"google_search": true}, "google_search", true, true}, + {"false value", map[string]any{"google_search": false}, "google_search", false, true}, + {"string value", map[string]any{"google_search": "true"}, "google_search", false, false}, + {"int value", map[string]any{"google_search": 1}, "google_search", false, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, ok := GetProviderOptBool(tt.opts, tt.key) + assert.Equal(t, tt.wantOK, ok) + if ok { + assert.Equal(t, tt.want, got) + } + }) + } +} + func TestGetProviderOptInt64(t *testing.T) { tests := []struct { name string