diff --git a/packages/core/src/flag/flag.ts b/packages/core/src/flag/flag.ts index f76d1aaf9d2..fd2e0199e08 100644 --- a/packages/core/src/flag/flag.ts +++ b/packages/core/src/flag/flag.ts @@ -86,6 +86,7 @@ export const Flag = { OPENCODE_EXPERIMENTAL_WORKSPACES: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_WORKSPACES"), OPENCODE_EXPERIMENTAL_EVENT_SYSTEM: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_EVENT_SYSTEM"), OPENCODE_EXPERIMENTAL_SESSION_SWITCHING: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_SESSION_SWITCHING"), + OPENCODE_EXPERIMENTAL_CACHE_AUDIT: truthy("OPENCODE_EXPERIMENTAL_CACHE_AUDIT"), // Evaluated at access time (not module load) because tests, the CLI, and // external tooling set these env vars at runtime. diff --git a/packages/opencode/src/cli/cmd/tui/feature-plugins/sidebar/context.tsx b/packages/opencode/src/cli/cmd/tui/feature-plugins/sidebar/context.tsx index 405e8c1458a..dc76001c0e9 100644 --- a/packages/opencode/src/cli/cmd/tui/feature-plugins/sidebar/context.tsx +++ b/packages/opencode/src/cli/cmd/tui/feature-plugins/sidebar/context.tsx @@ -1,7 +1,8 @@ import type { AssistantMessage } from "@opencode-ai/sdk/v2" import type { TuiPlugin, TuiPluginApi } from "@opencode-ai/plugin/tui" +import { Flag } from "@opencode-ai/core/flag/flag" import type { InternalTuiPlugin } from "../../plugin/internal" -import { createMemo } from "solid-js" +import { Show, createMemo } from "solid-js" const id = "internal:sidebar-context" @@ -22,27 +23,54 @@ function View(props: { api: TuiPluginApi; session_id: string }) { return { tokens: 0, percent: null, + cacheInput: 0, + cacheNew: 0, + cacheRead: 0, + cacheWrite: 0, + cacheHitPercent: null, + cacheOutput: 0, } } const tokens = last.tokens.input + last.tokens.output + last.tokens.reasoning + last.tokens.cache.read + last.tokens.cache.write const model = props.api.state.provider.find((item) => item.id === last.providerID)?.models[last.modelID] + const cacheInput = last.tokens.input + last.tokens.cache.read + last.tokens.cache.write + const cacheHitPercent = cacheInput > 0 ? ((last.tokens.cache.read / cacheInput) * 100).toFixed(1) : null return { tokens, percent: model?.limit.context ? Math.round((tokens / model.limit.context) * 100) : null, + cacheInput, + cacheNew: last.tokens.input, + cacheRead: last.tokens.cache.read, + cacheWrite: last.tokens.cache.write, + cacheHitPercent, + cacheOutput: last.tokens.output, } }) return ( - - - Context - - {state().tokens.toLocaleString()} tokens - {state().percent ?? 0}% used - {money.format(cost())} spent - + <> + + + Context + + {state().tokens.toLocaleString()} tokens + {state().percent ?? 0}% used + {money.format(cost())} spent + + + + Cache Audit + {state().cacheInput.toLocaleString()} input tokens + {state().cacheNew.toLocaleString()} new + {state().cacheRead.toLocaleString()} cache read + {state().cacheWrite.toLocaleString()} cache write + {state().cacheHitPercent}% hit rate + {state().cacheOutput.toLocaleString()} output tokens + + + ) } diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 9765175e9e1..1bbf860172e 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -25,6 +25,7 @@ import { SyncEvent } from "@/sync" import { SessionEvent } from "@/v2/session-event" import { ModelV2 } from "@opencode-ai/core/model" import { ProviderV2 } from "@opencode-ai/core/provider" +import { Flag } from "@opencode-ai/core/flag/flag" import * as DateTime from "effect/DateTime" import { RuntimeFlags } from "@/effect/runtime-flags" @@ -510,6 +511,13 @@ export const layer: Layer.Layer< usage: value.usage, metadata: value.providerMetadata, }) + if (Flag.OPENCODE_EXPERIMENTAL_CACHE_AUDIT) { + const totalInputTokens = usage.tokens.input + usage.tokens.cache.read + usage.tokens.cache.write + const cacheHitPercent = totalInputTokens > 0 ? ((usage.tokens.cache.read / totalInputTokens) * 100).toFixed(1) : "0.0" + slog.info( + `[CACHE] ${ctx.model.id} input=${totalInputTokens} (cache_read=${usage.tokens.cache.read} cache_write=${usage.tokens.cache.write} new=${usage.tokens.input}) hit=${cacheHitPercent}% output=${usage.tokens.output} total=${usage.tokens.total ?? 0}`, + ) + } if (!ctx.assistantMessage.summary) { // TODO(v2): Temporary dual-write while migrating session messages to v2 events. if (flags.experimentalEventSystem) { diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index bc58fbdf356..e4c612b7384 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -1579,7 +1579,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the text: nextPrompt.text.join("\n"), files: nextPrompt.files, agents: nextPrompt.agents, - references: nextPrompt.references, + }, }) } diff --git a/packages/opencode/src/session/session.ts b/packages/opencode/src/session/session.ts index 85486480aa4..dbf255964cb 100644 --- a/packages/opencode/src/session/session.ts +++ b/packages/opencode/src/session/session.ts @@ -419,6 +419,7 @@ export const getUsage = (input: { model: Provider.Model; usage: LanguageModelUsa } const contextTokens = inputTokens + const rawInputTokens = inputTokens const costInfo = input.model.cost?.tiers ?.filter((item) => item.tier.type === "context" && contextTokens > item.tier.size) @@ -439,6 +440,7 @@ export const getUsage = (input: { model: Provider.Model; usage: LanguageModelUsa .toNumber(), ), tokens, + rawInputTokens, } }