diff --git a/packages/core/src/flag/flag.ts b/packages/core/src/flag/flag.ts
index f76d1aaf9d2..fd2e0199e08 100644
--- a/packages/core/src/flag/flag.ts
+++ b/packages/core/src/flag/flag.ts
@@ -86,6 +86,7 @@ export const Flag = {
OPENCODE_EXPERIMENTAL_WORKSPACES: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_WORKSPACES"),
OPENCODE_EXPERIMENTAL_EVENT_SYSTEM: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_EVENT_SYSTEM"),
OPENCODE_EXPERIMENTAL_SESSION_SWITCHING: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_SESSION_SWITCHING"),
+ OPENCODE_EXPERIMENTAL_CACHE_AUDIT: truthy("OPENCODE_EXPERIMENTAL_CACHE_AUDIT"),
// Evaluated at access time (not module load) because tests, the CLI, and
// external tooling set these env vars at runtime.
diff --git a/packages/opencode/src/cli/cmd/tui/feature-plugins/sidebar/context.tsx b/packages/opencode/src/cli/cmd/tui/feature-plugins/sidebar/context.tsx
index 405e8c1458a..dc76001c0e9 100644
--- a/packages/opencode/src/cli/cmd/tui/feature-plugins/sidebar/context.tsx
+++ b/packages/opencode/src/cli/cmd/tui/feature-plugins/sidebar/context.tsx
@@ -1,7 +1,8 @@
import type { AssistantMessage } from "@opencode-ai/sdk/v2"
import type { TuiPlugin, TuiPluginApi } from "@opencode-ai/plugin/tui"
+import { Flag } from "@opencode-ai/core/flag/flag"
import type { InternalTuiPlugin } from "../../plugin/internal"
-import { createMemo } from "solid-js"
+import { Show, createMemo } from "solid-js"
const id = "internal:sidebar-context"
@@ -22,27 +23,54 @@ function View(props: { api: TuiPluginApi; session_id: string }) {
return {
tokens: 0,
percent: null,
+ cacheInput: 0,
+ cacheNew: 0,
+ cacheRead: 0,
+ cacheWrite: 0,
+ cacheHitPercent: null,
+ cacheOutput: 0,
}
}
const tokens =
last.tokens.input + last.tokens.output + last.tokens.reasoning + last.tokens.cache.read + last.tokens.cache.write
const model = props.api.state.provider.find((item) => item.id === last.providerID)?.models[last.modelID]
+ const cacheInput = last.tokens.input + last.tokens.cache.read + last.tokens.cache.write
+ const cacheHitPercent = cacheInput > 0 ? ((last.tokens.cache.read / cacheInput) * 100).toFixed(1) : null
return {
tokens,
percent: model?.limit.context ? Math.round((tokens / model.limit.context) * 100) : null,
+ cacheInput,
+ cacheNew: last.tokens.input,
+ cacheRead: last.tokens.cache.read,
+ cacheWrite: last.tokens.cache.write,
+ cacheHitPercent,
+ cacheOutput: last.tokens.output,
}
})
return (
-
-
- Context
-
- {state().tokens.toLocaleString()} tokens
- {state().percent ?? 0}% used
- {money.format(cost())} spent
-
+ <>
+
+
+ Context
+
+ {state().tokens.toLocaleString()} tokens
+ {state().percent ?? 0}% used
+ {money.format(cost())} spent
+
+
+
+ Cache Audit
+ {state().cacheInput.toLocaleString()} input tokens
+ {state().cacheNew.toLocaleString()} new
+ {state().cacheRead.toLocaleString()} cache read
+ {state().cacheWrite.toLocaleString()} cache write
+ {state().cacheHitPercent}% hit rate
+ {state().cacheOutput.toLocaleString()} output tokens
+
+
+ >
)
}
diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts
index 9765175e9e1..1bbf860172e 100644
--- a/packages/opencode/src/session/processor.ts
+++ b/packages/opencode/src/session/processor.ts
@@ -25,6 +25,7 @@ import { SyncEvent } from "@/sync"
import { SessionEvent } from "@/v2/session-event"
import { ModelV2 } from "@opencode-ai/core/model"
import { ProviderV2 } from "@opencode-ai/core/provider"
+import { Flag } from "@opencode-ai/core/flag/flag"
import * as DateTime from "effect/DateTime"
import { RuntimeFlags } from "@/effect/runtime-flags"
@@ -510,6 +511,13 @@ export const layer: Layer.Layer<
usage: value.usage,
metadata: value.providerMetadata,
})
+ if (Flag.OPENCODE_EXPERIMENTAL_CACHE_AUDIT) {
+ const totalInputTokens = usage.tokens.input + usage.tokens.cache.read + usage.tokens.cache.write
+ const cacheHitPercent = totalInputTokens > 0 ? ((usage.tokens.cache.read / totalInputTokens) * 100).toFixed(1) : "0.0"
+ slog.info(
+ `[CACHE] ${ctx.model.id} input=${totalInputTokens} (cache_read=${usage.tokens.cache.read} cache_write=${usage.tokens.cache.write} new=${usage.tokens.input}) hit=${cacheHitPercent}% output=${usage.tokens.output} total=${usage.tokens.total ?? 0}`,
+ )
+ }
if (!ctx.assistantMessage.summary) {
// TODO(v2): Temporary dual-write while migrating session messages to v2 events.
if (flags.experimentalEventSystem) {
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index bc58fbdf356..e4c612b7384 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -1579,7 +1579,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
text: nextPrompt.text.join("\n"),
files: nextPrompt.files,
agents: nextPrompt.agents,
- references: nextPrompt.references,
+
},
})
}
diff --git a/packages/opencode/src/session/session.ts b/packages/opencode/src/session/session.ts
index 85486480aa4..dbf255964cb 100644
--- a/packages/opencode/src/session/session.ts
+++ b/packages/opencode/src/session/session.ts
@@ -419,6 +419,7 @@ export const getUsage = (input: { model: Provider.Model; usage: LanguageModelUsa
}
const contextTokens = inputTokens
+ const rawInputTokens = inputTokens
const costInfo =
input.model.cost?.tiers
?.filter((item) => item.tier.type === "context" && contextTokens > item.tier.size)
@@ -439,6 +440,7 @@ export const getUsage = (input: { model: Provider.Model; usage: LanguageModelUsa
.toNumber(),
),
tokens,
+ rawInputTokens,
}
}