diff --git a/.gitignore b/.gitignore
index bf78c046d4b..1efa395e597 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,10 @@ a.out
 target
 .scripts
 .direnv/
+.venv-docling/
+.rag/
+__pycache__/
+*.pyc
 
 # Local dev files
 opencode-dev
diff --git a/.opencode/plugins/rag_context.ts b/.opencode/plugins/rag_context.ts
new file mode 100644
index 00000000000..7191740c3d5
--- /dev/null
+++ b/.opencode/plugins/rag_context.ts
@@ -0,0 +1,338 @@
+/// <reference path="../env.d.ts" />
+import type { Plugin } from "@opencode-ai/plugin"
+import path from "path"
+import {
+  allow,
+  allowExpand,
+  audit,
+  base,
+  cluster,
+  collection,
+  decide,
+  key,
+  model,
+  parse,
+  py,
+  reset,
+  rewriteMode,
+  rewriteModel,
+  rewriteQueries,
+  reuseSec,
+  root,
+  row,
+  session,
+  stateBlock,
+  strip,
+  summary,
+  topk,
+  chars,
+  db,
+} from "../rag"
+
+type Msg = {
+  info?: {
+    role?: string
+    id?: string
+    sessionID?: string
+    sessionId?: string
+  }
+  parts?: Array<{
+    type?: string
+    text?: string
+    synthetic?: boolean
+  }>
+}
+
+function sid(msgs: Msg[], idx: number) {
+  const direct = msgs[idx]?.info?.sessionID || msgs[idx]?.info?.sessionId
+  if (direct) return String(direct)
+  for (let i = idx; i >= 0; i--) {
+    const v = msgs[i]?.info?.sessionID || msgs[i]?.info?.sessionId
+    if (v) return String(v)
+  }
+  return "default"
+}
+
+function uid(msgs: Msg[], idx: number) {
+  const v = msgs[idx]?.info?.id
+  if (!v) return ""
+  return String(v)
+}
+
+function next(status: string) {
+  if (status === "new_evidence") return "call_rag_search_delta_if_needed"
+  if (status === "weak_match") return "call_rag_search_delta_or_refine_query"
+  if (status === "no_new_evidence") return "reuse_known_state_or_call_rag_search_state"
+  if (status === "cluster_throttled") return "avoid_repeating_same_search"
+  if (status === "retrieval_error") return "retry_or_check_rag_backend"
+  return "refine_query_or_call_rag_search"
+}
+
+function mark(
+  hit: ReturnType<typeof row>,
+  input: { query: string; status: string; reason: string; total?: number; rewrites?: string[] },
+) {
+  hit.last_query = input.query
+  hit.last_status = input.status
+  hit.last_reason = input.reason
+  hit.last_checked = Date.now()
+  hit.total_hits = input.total || 0
+  hit.delta = []
+  hit.hits = []
+  hit.top = []
+  hit.overlap = 0
+  hit.rewrites = input.rewrites || [input.query]
+}
+
+const RagContextPlugin: Plugin = async ({ worktree, $ }) => {
+  return {
+    "tool.definition": async (input, output) => {
+      if (input.toolID !== "rag_search") return
+      output.description = [
+        output.description,
+        "",
+        "Call this tool with valid JSON arguments only.",
+        'Use query as a plain string value. Do not insert extra quotes inside the query string.',
+        'Valid example: {"query":"luckfox-pico zero 传输文件方式","mode":"delta","node_type":"text","top_k":3}',
+        'Invalid example: {"query":"luck"fox-pico zero","mode":"brief"}',
+      ].join("\n")
+    },
+    "tool.execute.before": async (input, output) => {
+      if (input.tool !== "rag_search") return
+      if (allowExpand()) return
+      if (output.args?.mode !== "expand") return
+      output.args = {
+        ...output.args,
+        mode: "delta",
+        top_k: Math.min(Number(output.args?.top_k || 3), 3),
+      }
+    },
+    "experimental.chat.messages.transform": async (_input, output) => {
+      if (process.env.RAG_AUTO_INJECT === "0") return
+      const msgs = output.messages as Msg[]
+      if (!Array.isArray(msgs) || !msgs.length) return
+      let idx = -1
+      for (let i = msgs.length - 1; i >= 0; i--) {
+        if (msgs[i].info?.role === "user") {
+          idx = i
+          break
+        }
+      }
+      if (idx < 0) return
+      const loop = msgs.slice(idx + 1).some((msg) => msg.info?.role === "assistant")
+      const parts = Array.isArray(msgs[idx].parts) ? msgs[idx].parts : []
+      let textPart: { type?: string; text?: string; synthetic?: boolean } | undefined
+      for (let i = parts.length - 1; i >= 0; i--) {
+        const part = parts[i]
+        if (part?.type === "text" && typeof part.text === "string" && !part.synthetic) {
+          textPart = part
+          break
+        }
+      }
+      if (!textPart?.text) return
+
+      const clean = strip(textPart.text)
+      const query = clean.trim().slice(0, 800)
+      if (!query) return
+
+      const sessionID = sid(msgs, idx)
+      const userID = uid(msgs, idx)
+      const st = session(sessionID)
+      const keyName = cluster(query)
+      const hit = row(st, keyName)
+      const now = Date.now()
+      const baseDir = root(worktree)
+      const python = py(baseDir)
+      const script = path.join(baseDir, "script", "rag", "search-vector-index.py")
+      const dbPath = db(baseDir)
+      const same = st.last_user_id === userID && st.last_query === query && st.last_cluster === keyName
+      const cached = !!hit.last_status && (loop || (same && now - hit.last_checked <= reuseSec() * 1000))
+
+      if (cached) {
+        textPart.text = `${clean}\n\n${stateBlock(keyName, hit, next(hit.last_status))}`
+        await audit(worktree, {
+          channel: "rag_context",
+          event: "context_meta",
+          sessionID,
+          userID,
+          query,
+          cluster: keyName,
+          loop,
+          used_cache: true,
+          status: hit.last_status,
+          reason: hit.last_reason,
+          total_hits: hit.total_hits,
+          delta_hits: hit.delta.length,
+          known_hits: hit.known_hits,
+          overlap: hit.overlap,
+          rewrites: hit.rewrites,
+          top_hits: summary(hit.top, 3),
+          emitted_context: false,
+        })
+        return
+      }
+
+      if (!allow(hit)) {
+        mark(hit, {
+          query,
+          status: "cluster_throttled",
+          reason: "cluster_window_limit",
+        })
+        st.last_user_id = userID
+        st.last_query = query
+        st.last_cluster = keyName
+        textPart.text = `${clean}\n\n${stateBlock(keyName, hit, next(hit.last_status))}`
+        await audit(worktree, {
+          channel: "rag_context",
+          event: "context_meta",
+          sessionID,
+          userID,
+          query,
+          cluster: keyName,
+          loop,
+          used_cache: false,
+          status: hit.last_status,
+          reason: hit.last_reason,
+          total_hits: hit.total_hits,
+          delta_hits: hit.delta.length,
+          known_hits: hit.known_hits,
+          overlap: hit.overlap,
+          rewrites: hit.rewrites,
+          top_hits: [],
+          emitted_context: false,
+        })
+        return
+      }
+
+      const res =
+        await $`${python} ${script} --query ${query} --db-path ${dbPath} --collection ${collection()} --model ${model()} --top-k ${topk()} --node-type text --show-text-chars ${chars()} --base-url ${base()} --api-key ${key()} --format json --rewrite ${rewriteMode()} --rewrite-model ${rewriteModel()} --rewrite-queries ${rewriteQueries()}`
+          .quiet()
+          .nothrow()
+      const raw = res.stdout.toString()
+
+      if (res.exitCode !== 0) {
+        mark(hit, {
+          query,
+          status: "retrieval_error",
+          reason: "backend_error",
+        })
+        st.last_user_id = userID
+        st.last_query = query
+        st.last_cluster = keyName
+        textPart.text = `${clean}\n\n${stateBlock(keyName, hit, next(hit.last_status))}`
+        await audit(worktree, {
+          channel: "rag_context",
+          event: "search_fail",
+          sessionID,
+          userID,
+          query,
+          cluster: keyName,
+          loop,
+          code: res.exitCode,
+          stderr: res.stderr.toString().slice(0, 1200),
+          status: hit.last_status,
+          reason: hit.last_reason,
+          emitted_context: false,
+        })
+        return
+      }
+
+      let resData = { hits: [], rewrites: [query], keywords: [], rewrite_mode: "none" } as ReturnType<typeof parse>
+      try {
+        resData = parse(raw)
+      } catch {
+        mark(hit, {
+          query,
+          status: "retrieval_error",
+          reason: "parse_error",
+        })
+        st.last_user_id = userID
+        st.last_query = query
+        st.last_cluster = keyName
+        textPart.text = `${clean}\n\n${stateBlock(keyName, hit, next(hit.last_status))}`
+        await audit(worktree, {
+          channel: "rag_context",
+          event: "parse_fail",
+          sessionID,
+          userID,
+          query,
+          cluster: keyName,
+          loop,
+          raw: raw.slice(0, 1200),
+          status: hit.last_status,
+          reason: hit.last_reason,
+          emitted_context: false,
+        })
+        return
+      }
+
+      const out = decide(hit, resData.hits, query, resData.rewrites)
+      st.last_user_id = userID
+      st.last_query = query
+      st.last_cluster = keyName
+      textPart.text = `${clean}\n\n${stateBlock(keyName, hit, out.next)}`
+      await audit(worktree, {
+        channel: "rag_context",
+        event: "context_search",
+        sessionID,
+        userID,
+        query,
+        cluster: keyName,
+        loop,
+        used_cache: false,
+        status: out.status,
+        reason: out.reason,
+        total_hits: out.total,
+        delta_hits: out.delta.length,
+        known_hits: out.known,
+        overlap: out.overlap,
+        rewrite_mode: resData.rewrite_mode,
+        rewrites: hit.rewrites,
+        keywords: resData.keywords,
+        top_hits: summary(hit.top, 3),
+        delta_fps: out.delta.map((x) => ({
+          fp: `${x.text_file || x.source_url || ""}#${x.chunk_id || x.image_id || x.section_title || ""}`,
+          source_url: x.source_url || "",
+          section_title: x.section_title || "",
+          chunk_id: x.chunk_id || "",
+        })),
+        emitted_context: false,
+      })
+    },
+    "experimental.chat.system.transform": async (_input, output) => {
+      if (process.env.RAG_AUTO_INJECT === "0") return
+      output.system.push("RAG protocol: parse <rag_state> on every model step. rag_context injects retrieval meta only, not full evidence.")
+      output.system.push(
+        "If rag_state status=new_evidence and you still need facts, call rag_search with mode=delta first. Use mode=brief only when delta is insufficient.",
+      )
+      output.system.push(
+        "If rag_state status=no_new_evidence, reuse current state. Do not repeat the same retrieval unless the query becomes more specific.",
+      )
+      output.system.push(
+        "Do not call rag_search mode=expand in normal QA. Use expand only for explicit debugging or evidence inspection.",
+      )
+      output.system.push(
+        "Do not execute script/rag/search-vector-index.py directly from shell for QA retrieval. Use rag_search only.",
+      )
+      output.system.push(
+        'When calling rag_search, emit valid JSON arguments. query must be one plain string value, without nested or broken quotation marks.',
+      )
+      output.system.push(
+        "For long or noisy questions, trust rag_state rewrite metadata and prefer rag_search results derived from rewritten retrieval queries.",
+      )
+    },
+    "experimental.session.compacting": async (input, output) => {
+      const id = String((input as { sessionID?: string })?.sessionID || "default")
+      const st = reset(id)
+      await audit(worktree, {
+        channel: "rag_context",
+        event: "state_reset",
+        sessionID: id,
+        epoch: st.epoch,
+      })
+      return output
+    },
+  }
+}
+
+export default RagContextPlugin
diff --git a/.opencode/rag.ts b/.opencode/rag.ts
new file mode 100644
index 00000000000..042399e9831
--- /dev/null
+++ b/.opencode/rag.ts
@@ -0,0 +1,428 @@
+import path from "path"
+import { appendFile, mkdir } from "node:fs/promises"
+
+export type Hit = {
+  score?: number
+  rerank_score?: number
+  source_url?: string
+  section_title?: string
+  text_preview?: string
+  chunk_id?: string
+  image_id?: string
+  text_file?: string
+  matched_queries?: string[]
+  hit_count?: number
+}
+
+export type SearchResult = {
+  hits: Hit[]
+  rewrites: string[]
+  keywords: string[]
+  rewrite_mode: string
+}
+
+type Row = {
+  seen: Set<string>
+  window: number[]
+  last_query: string
+  last_status: string
+  last_reason: string
+  last_checked: number
+  total_hits: number
+  known_hits: number
+  overlap: number
+  delta: Hit[]
+  hits: Hit[]
+  top: Hit[]
+  rewrites: string[]
+}
+
+type Session = {
+  epoch: number
+  last_user_id: string
+  last_query: string
+  last_cluster: string
+  rows: Map<string, Row>
+}
+
+const STORE = new Map<string, Session>()
+const STOP = new Set([
+  "的",
+  "了",
+  "和",
+  "是",
+  "怎么",
+  "如何",
+  "请问",
+  "一下",
+  "关于",
+  "教程",
+  "方法",
+  "方式",
+  "what",
+  "how",
+  "the",
+  "a",
+  "an",
+  "to",
+  "for",
+  "of",
+  "in",
+])
+const SYN: Record<string, string> = {
+  flash: "烧录",
+  burn: "烧录",
+  firmware: "固件",
+  image: "镜像",
+  electerm: "electerm",
+  luckfox: "luckfox",
+  pico: "pico",
+  zero: "zero",
+}
+
+export function topk() {
+  const n = Number.parseInt(process.env.RAG_TOP_K ?? "4", 10)
+  if (Number.isFinite(n) && n > 0) return n
+  return 4
+}
+
+export function use() {
+  const n = Number.parseInt(process.env.RAG_CONTEXT_HITS ?? "2", 10)
+  if (Number.isFinite(n) && n > 0) return n
+  return 2
+}
+
+export function chars() {
+  const n = Number.parseInt(process.env.RAG_CONTEXT_CHARS ?? "120", 10)
+  if (Number.isFinite(n) && n >= 40) return n
+  return 120
+}
+
+export function expandChars() {
+  const n = Number.parseInt(process.env.RAG_EXPAND_CHARS ?? "420", 10)
+  if (Number.isFinite(n) && n >= 120) return n
+  return 420
+}
+
+export function simCut() {
+  const n = Number.parseFloat(process.env.RAG_OVERLAP_THRESHOLD ?? "0.8")
+  if (Number.isFinite(n) && n > 0 && n <= 1) return n
+  return 0.8
+}
+
+export function weakCut() {
+  const n = Number.parseFloat(process.env.RAG_WEAK_SCORE ?? "0.42")
+  if (Number.isFinite(n) && n > 0 && n < 1) return n
+  return 0.42
+}
+
+export function clusterWindowSec() {
+  const n = Number.parseInt(process.env.RAG_CLUSTER_WINDOW_SEC ?? "30", 10)
+  if (Number.isFinite(n) && n > 0) return n
+  return 30
+}
+
+export function clusterMax() {
+  const n = Number.parseInt(process.env.RAG_CLUSTER_MAX_FULL ?? "2", 10)
+  if (Number.isFinite(n) && n > 0) return n
+  return 2
+}
+
+export function reuseSec() {
+  const n = Number.parseInt(process.env.RAG_REUSE_SEC ?? "8", 10)
+  if (Number.isFinite(n) && n >= 0) return n
+  return 8
+}
+
+export function model() {
+  const v = process.env.RAG_EMBED_MODEL
+  if (v) return v
+  return "qwen3-embedding:4b"
+}
+
+export function rewriteMode() {
+  const v = process.env.RAG_REWRITE_MODE
+  if (v) return v
+  return "auto"
+}
+
+export function rewriteModel() {
+  const v = process.env.RAG_REWRITE_MODEL
+  if (v) return v
+  return process.env.RAG_STRUCT_MODEL || process.env.OPENAI_MODEL || "gpt-4o-mini"
+}
+
+export function rewriteQueries() {
+  const n = Number.parseInt(process.env.RAG_REWRITE_QUERIES ?? "3", 10)
+  if (Number.isFinite(n) && n > 0) return n
+  return 3
+}
+
+export function collection() {
+  const v = process.env.RAG_COLLECTION
+  if (v) return v
+  return "rag_chunks"
+}
+
+export function base() {
+  const v = process.env.RAG_BASE_URL || process.env.OPENAI_BASE_URL
+  if (v) return v
+  return "http://127.0.0.1:11434/v1"
+}
+
+export function key() {
+  const v = process.env.RAG_API_KEY || process.env.OPENAI_API_KEY || process.env.MINIMAX_API_KEY
+  if (v) return v
+  return "ollama"
+}
+
+export function debug() {
+  return process.env.RAG_DEBUG_LOG === "1" || process.env.RAG_DEBUG === "1"
+}
+
+export function allowExpand() {
+  return process.env.RAG_ALLOW_EXPAND_TOOL === "1"
+}
+
+export function root(input: string) {
+  const env = process.env.RAG_WORKTREE
+  if (env) return env
+  if (input && input !== "/") return input
+  return process.cwd()
+}
+
+export function py(rootDir: string) {
+  const env = process.env.RAG_DOCLING_PYTHON_BIN
+  if (env) return env
+  return path.join(rootDir, ".venv-docling", "bin", "python")
+}
+
+export function db(rootDir: string) {
+  const env = process.env.RAG_DB_PATH
+  if (env) return env
+  return path.join(rootDir, ".rag", "vector", "qdrant")
+}
+
+export function clip(text: string, n: number) {
+  const s = String(text || "").replace(/\s+/g, " ").trim()
+  if (s.length <= n) return s
+  return `${s.slice(0, n).trim()} ...`
+}
+
+export function strip(text: string) {
+  return text
+    .replace(/\n*<rag_context>[\s\S]*?<\/rag_context>\n*/g, "\n")
+    .replace(/\n*<rag_state>[\s\S]*?<\/rag_state>\n*/g, "\n")
+    .replace(/\n{3,}/g, "\n\n")
+    .trim()
+}
+
+export function terms(query: string) {
+  const rows = (query.toLowerCase().match(/[\p{Script=Han}]+|[a-z0-9_-]+/gu) || [])
+    .map((x) => x.trim())
+    .filter(Boolean)
+  const out: string[] = []
+  for (const raw of rows) {
+    const v = SYN[raw] || raw
+    if (!v || STOP.has(v)) continue
+    out.push(v)
+  }
+  return [...new Set(out)].sort()
+}
+
+export function cluster(query: string) {
+  const rows = terms(query)
+  if (!rows.length) return `q:${clip(query.toLowerCase(), 48)}`
+  return rows.slice(0, 8).join("|")
+}
+
+export function fp(hit: Hit) {
+  const src = hit.text_file || hit.source_url || ""
+  const id = hit.chunk_id || hit.image_id || hit.section_title || clip(String(hit.text_preview || ""), 36)
+  return `${src}#${id}`
+}
+
+export function parse(raw: string) {
+  const data = JSON.parse(raw)
+  const hits = Array.isArray(data?.hits) ? data.hits : []
+  const rewrites = Array.isArray(data?.rewrite?.queries) ? data.rewrite.queries.filter((x: unknown) => typeof x === "string") : []
+  const keywords = Array.isArray(data?.rewrite?.keywords) ? data.rewrite.keywords.filter((x: unknown) => typeof x === "string") : []
+  return {
+    hits: hits as Hit[],
+    rewrites,
+    keywords,
+    rewrite_mode: String(data?.rewrite?.mode || "none"),
+  } as SearchResult
+}
+
+export function session(id: string) {
+  const cur = STORE.get(id)
+  if (cur) return cur
+  const next: Session = {
+    epoch: 0,
+    last_user_id: "",
+    last_query: "",
+    last_cluster: "",
+    rows: new Map(),
+  }
+  STORE.set(id, next)
+  return next
+}
+
+export function row(st: Session, key: string) {
+  const cur = st.rows.get(key)
+  if (cur) return cur
+  const next: Row = {
+    seen: new Set(),
+    window: [],
+    last_query: "",
+    last_status: "",
+    last_reason: "",
+    last_checked: 0,
+    total_hits: 0,
+    known_hits: 0,
+    overlap: 0,
+    delta: [],
+    hits: [],
+    top: [],
+    rewrites: [],
+  }
+  st.rows.set(key, next)
+  return next
+}
+
+export function allow(row: Row) {
+  const now = Date.now()
+  const win = clusterWindowSec() * 1000
+  row.window = row.window.filter((x) => now - x <= win)
+  if (row.window.length >= clusterMax()) return false
+  row.window.push(now)
+  return true
+}
+
+export function decide(row: Row, hits: Hit[], query: string, rewrites?: string[]) {
+  const keys = hits.map(fp)
+  const fresh = hits.filter((hit) => !row.seen.has(fp(hit)))
+  const shared = keys.filter((key) => row.seen.has(key)).length
+  const ov = keys.length ? shared / keys.length : 0
+  const top = Number(hits[0]?.score || 0)
+  const status = !hits.length
+    ? "need_refine"
+    : !fresh.length && ov >= simCut()
+      ? "no_new_evidence"
+      : top < weakCut()
+        ? "weak_match"
+        : "new_evidence"
+  const reason = !hits.length
+    ? "empty_hits"
+    : !fresh.length && ov >= simCut()
+      ? "high_overlap"
+      : top < weakCut()
+        ? "low_score"
+        : fresh.length < hits.length
+          ? "delta_available"
+          : "fresh_hits"
+  const next =
+    status === "need_refine"
+      ? "refine_query_or_call_rag_search"
+      : status === "no_new_evidence"
+        ? "reuse_known_evidence_or_call_rag_search_state"
+        : status === "weak_match"
+          ? "call_rag_search_delta_or_refine_query"
+          : "call_rag_search_delta_if_more_detail_needed"
+  for (const key of keys) row.seen.add(key)
+  row.last_query = query
+  row.last_status = status
+  row.last_reason = reason
+  row.last_checked = Date.now()
+  row.total_hits = hits.length
+  row.known_hits = row.seen.size
+  row.overlap = ov
+  row.delta = fresh
+  row.hits = hits
+  row.top = hits.slice(0, 3)
+  row.rewrites = rewrites && rewrites.length ? rewrites : [query]
+  return { status, reason, next, overlap: ov, delta: fresh, hits, known: row.known_hits, total: hits.length }
+}
+
+export function stateBlock(key: string, row: Row, next?: string) {
+  const top = row.top[0]
+  return [
+    "<rag_state>",
+    `status=${row.last_status || "need_refine"}`,
+    `reason=${row.last_reason || "empty_hits"}`,
+    `cluster=${key}`,
+    `total_hits=${row.total_hits}`,
+    `delta_hits=${row.delta.length}`,
+    `known_hits=${row.known_hits}`,
+    `overlap=${Number(row.overlap || 0).toFixed(4)}`,
+    `top_source=${top?.source_url || ""}`,
+    `top_section=${clip(top?.section_title || "", 48)}`,
+    `rewrite_queries=${JSON.stringify(row.rewrites)}`,
+    `next_action=${next || "call_rag_search_delta_if_needed"}`,
+    "</rag_state>",
+  ].join("\n")
+}
+
+export function brief(hits: Hit[], limit: number) {
+  if (!hits.length) return "no_rag_hit"
+  return hits
+    .slice(0, Math.max(1, limit))
+    .map((hit, i) =>
+      [
+        `[${i + 1}]`,
+        `source=${hit.source_url || ""}`,
+        `section=${clip(hit.section_title || "", 48)}`,
+        `summary=${clip(hit.text_preview || "", chars())}`,
+      ].join(" "),
+    )
+    .join("\n")
+}
+
+export function expand(hits: Hit[], limit: number) {
+  if (!hits.length) return "no_rag_hit"
+  return hits
+    .slice(0, Math.max(1, limit))
+    .map((hit, i) =>
+      [
+        `[${i + 1}] score=${Number(hit.score || 0).toFixed(4)}`,
+        `source=${hit.source_url || ""}`,
+        `section=${hit.section_title || ""}`,
+        `chunk=${hit.chunk_id || hit.image_id || ""}`,
+        `text=${clip(hit.text_preview || "", expandChars())}`,
+      ].join("\n"),
+    )
+    .join("\n\n")
+}
+
+export function summary(hits: Hit[], limit: number) {
+  return hits.slice(0, Math.max(1, limit)).map((hit) => ({
+    score: Number(hit.score || 0),
+    rerank_score: Number(hit.rerank_score || 0),
+    source_url: hit.source_url || "",
+    section_title: hit.section_title || "",
+    chunk_id: hit.chunk_id || "",
+    image_id: hit.image_id || "",
+    text_preview: clip(hit.text_preview || "", chars()),
+    fp: fp(hit),
+    matched_queries: Array.isArray(hit.matched_queries) ? hit.matched_queries : [],
+    hit_count: Number(hit.hit_count || 0),
+  }))
+}
+
+export async function audit(worktree: string, data: Record<string, unknown>) {
+  if (!debug()) return
+  const dir = path.join(root(worktree), ".rag", "log")
+  await mkdir(dir, { recursive: true })
+  await appendFile(path.join(dir, "rag_debug.jsonl"), `${JSON.stringify({ ts: new Date().toISOString(), ...data })}\n`, "utf-8")
+}
+
+export function reset(id: string) {
+  const st = session(id)
+  st.epoch += 1
+  st.rows.clear()
+  st.last_user_id = ""
+  st.last_query = ""
+  st.last_cluster = ""
+  return st
+}
diff --git a/.opencode/skills/rag-pipeline/SKILL.md b/.opencode/skills/rag-pipeline/SKILL.md
new file mode 100644
index 00000000000..5eb4081f8a7
--- /dev/null
+++ b/.opencode/skills/rag-pipeline/SKILL.md
@@ -0,0 +1,93 @@
+---
+name: rag-pipeline
+description: Run standardized rag init/update pipeline with minimal options and manifest-based sync
+compatibility: opencode
+---
+
+## Goal
+
+Use two commands only:
+
+1. `rag-init` for first build
+2. `rag-update` for incremental sync
+
+If the target repo does not contain this pipeline yet, bootstrap first:
+
+```bash
+bash script/rag/cmd/rag-bootstrap.sh --target <target_project_root>
+```
+
+## Required Inputs
+
+1. source type: `structured` | `dir` | `url`
+2. source path (or url list)
+3. embedding model
+4. collection name
+
+## Exposed Options
+
+Only expose these options to users by default:
+
+1. `--source`
+2. `--struct-mode` + `--struct-model`
+3. `--embed-model`
+4. `--url` / `--url-file` / `--input-dir` / `--scan-dir`
+5. `--collection`
+
+Keep low-level knobs hidden unless users ask explicitly:
+
+1. chunk size / overlap
+2. OCR engine internals
+3. retry/backoff internals
+
+## Commands
+
+### Initial build
+
+Structured-only init:
+
+```bash
+bash script/rag/cmd/rag-init.sh --source structured --scan-dir .rag/text --glob "**/*.structured.json" --embed-model qwen3-embedding:4b --collection rag_chunks
+```
+
+Directory init:
+
+```bash
+bash script/rag/cmd/rag-init.sh --source dir --input-dir <raw_dir> --text-out-dir .rag/text/dir --embed-model qwen3-embedding:4b --collection rag_chunks
+```
+
+URL init:
+
+```bash
+bash script/rag/cmd/rag-init.sh --source url --url <url> --ocr-images --image-inline marker --url-text-dir .rag/text/url --embed-model qwen3-embedding:4b --collection rag_chunks
+```
+
+### Incremental update
+
+```bash
+bash script/rag/cmd/rag-update.sh --source structured --scan-dir .rag/text --glob "**/*.structured.json" --embed-model qwen3-embedding:4b --collection rag_chunks
+```
+
+## Behavior Rules
+
+1. Do not expose chunk-size/overlap or low-level OCR internals unless user explicitly asks.
+2. Keep defaults:
+   - `--struct-mode llamaindex`
+   - `--inline-ocr strip`
+   - `--image-inline marker`
+3. If collection or embedding model changes, allow full rebuild.
+4. Keep state in `--manifest` (default `.rag/state/manifest.json`) to support incremental update.
+5. Runtime retrieval policy:
+   - prefer plugin auto-inject with `<rag_state>` meta on every model step
+   - use `rag_search` to progressively reveal evidence text
+   - avoid repeated retrieval in the same query cluster unless new evidence appears
+   - use `rag_search` mode progressively: `state` -> `delta` -> `brief`
+   - use `expand` only for explicit debugging or when the user asks to inspect evidence details
+6. Debugging:
+   - enable with `RAG_DEBUG=1`
+   - inspect `.rag/log/rag_debug.jsonl`
+   - summarize quickly with `python script/rag/debug-rag-state.py --tail 100`
+7. On failure, return:
+   - exact command
+   - stderr summary
+   - recovery action
diff --git a/.opencode/tool/rag_search.ts b/.opencode/tool/rag_search.ts
new file mode 100644
index 00000000000..1dfd760133f
--- /dev/null
+++ b/.opencode/tool/rag_search.ts
@@ -0,0 +1,149 @@
+/// <reference path="../env.d.ts" />
+import { tool } from "@opencode-ai/plugin"
+import path from "path"
+import DESCRIPTION from "./rag_search.txt"
+import {
+  allowExpand,
+  audit,
+  base,
+  brief,
+  chars,
+  cluster,
+  collection,
+  db,
+  decide,
+  expand,
+  expandChars,
+  key,
+  model,
+  parse,
+  py,
+  rewriteMode,
+  rewriteModel,
+  rewriteQueries,
+  root,
+  row,
+  session,
+  stateBlock,
+  summary,
+} from "../rag"
+
+export default tool({
+  description: DESCRIPTION,
+  args: {
+    query: tool.schema.string().describe("Search query text"),
+    top_k: tool.schema.number().describe("Maximum hits to return").default(3),
+    node_type: tool.schema.enum(["any", "text", "image"]).describe("Filter node type").default("text"),
+    mode: tool.schema.enum(["state", "delta", "brief", "expand"]).describe("Result disclosure mode").default("delta"),
+  },
+  async execute(args, ctx) {
+    const baseDir = root(ctx?.worktree || ctx?.directory || process.cwd())
+    const python = py(baseDir)
+    const script = path.join(baseDir, "script", "rag", "search-vector-index.py")
+    const dbPath = db(baseDir)
+    const show = args.mode === "expand" ? expandChars() : chars()
+    const res =
+      await Bun.$`${python} ${script} --query ${args.query} --db-path ${dbPath} --collection ${collection()} --model ${model()} --top-k ${args.top_k} --node-type ${args.node_type} --show-text-chars ${show} --base-url ${base()} --api-key ${key()} --format json --rewrite ${rewriteMode()} --rewrite-model ${rewriteModel()} --rewrite-queries ${rewriteQueries()}`
+        .quiet()
+        .nothrow()
+    const out = res.stdout.toString().trim()
+    const sessionID = String(ctx?.sessionID || ctx?.sessionId || baseDir)
+    const keyName = cluster(args.query)
+    const st = session(sessionID)
+    const hit = row(st, keyName)
+
+    if (res.exitCode !== 0) {
+      const err = res.stderr.toString().trim()
+      await audit(baseDir, {
+        channel: "rag_search",
+        event: "tool_error",
+        sessionID,
+        query: args.query,
+        cluster: keyName,
+        mode: args.mode,
+        code: res.exitCode,
+        stderr: err.slice(0, 1200),
+        stdout: out.slice(0, 1200),
+      })
+      return JSON.stringify(
+        {
+          error: "rag_search_failed",
+          exit_code: res.exitCode,
+          worktree: baseDir,
+          python,
+          script,
+          db_path: dbPath,
+          collection: collection(),
+          model: model(),
+          base_url: base(),
+          mode: args.mode,
+          stderr: err.slice(0, 1200),
+          stdout: out.slice(0, 1200),
+          hint: "verify OPENAI_BASE_URL/OPENAI_API_KEY, collection exists, and venv has openai/qdrant-client",
+        },
+        null,
+        2,
+      )
+    }
+
+    let dataRes = { hits: [], rewrites: [args.query], keywords: [], rewrite_mode: "none" } as ReturnType<typeof parse>
+    try {
+      dataRes = parse(out)
+    } catch {
+      await audit(baseDir, {
+        channel: "rag_search",
+        event: "tool_parse_fail",
+        sessionID,
+        query: args.query,
+        cluster: keyName,
+        mode: args.mode,
+        raw: out.slice(0, 1200),
+      })
+      return out.slice(0, 1000)
+    }
+
+    const data = decide(hit, dataRes.hits, args.query, dataRes.rewrites)
+    const head = stateBlock(keyName, hit, data.next)
+    const body =
+      args.mode === "state"
+        ? ""
+        : args.mode === "expand"
+          ? allowExpand()
+            ? expand(dataRes.hits, args.top_k)
+            : "expand_blocked=1\nhint=use mode=delta or mode=brief unless debugging with RAG_ALLOW_EXPAND_TOOL=1"
+          : args.mode === "brief"
+            ? brief(dataRes.hits, args.top_k)
+            : data.delta.length
+              ? brief(data.delta, args.top_k)
+              : "no_new_delta"
+
+    await audit(baseDir, {
+      channel: "rag_search",
+      event: "tool_search",
+      sessionID,
+      query: args.query,
+      cluster: keyName,
+      mode: args.mode,
+      node_type: args.node_type,
+      status: data.status,
+      reason: data.reason,
+      total_hits: data.total,
+      delta_hits: data.delta.length,
+      known_hits: data.known,
+      overlap: data.overlap,
+      rewrite_mode: dataRes.rewrite_mode,
+      top_hits: summary(hit.top, 3),
+      delta_fps: data.delta.map((x) => ({
+        fp: `${x.text_file || x.source_url || ""}#${x.chunk_id || x.image_id || x.section_title || ""}`,
+        source_url: x.source_url || "",
+        section_title: x.section_title || "",
+        chunk_id: x.chunk_id || "",
+      })),
+      emitted_context: args.mode !== "state",
+      rewrites: hit.rewrites,
+      keywords: dataRes.keywords,
+    })
+
+    return body ? `${head}\n${body}` : head
+  },
+})
diff --git a/.opencode/tool/rag_search.txt b/.opencode/tool/rag_search.txt
new file mode 100644
index 00000000000..d38d2b8e510
--- /dev/null
+++ b/.opencode/tool/rag_search.txt
@@ -0,0 +1,17 @@
+Search local RAG vector index and return ranked evidence snippets for the current query.
+
+Use this tool when:
+- the user asks about project docs, internal wiki, SOP, or known indexed materials
+- you need grounded context before answering
+
+Behavior:
+- reads local qdrant index under .rag/vector/qdrant
+- may rewrite long queries into multiple focused retrieval queries before searching
+- shares the same session/cluster state used by `rag_context`
+- supports progressive disclosure via mode:
+  - `state`: retrieval state only, no evidence body
+  - `delta`: only new evidence within current query cluster (default)
+  - `brief`: short evidence list for current hits
+  - `expand`: richer per-hit details for follow-up drilling
+- default output is compact and should not dump full raw retrieval payload
+- do not use `expand` in normal QA unless the user explicitly asks to inspect evidence details
diff --git a/script/rag/build-offline-bundle.sh b/script/rag/build-offline-bundle.sh
new file mode 100755
index 00000000000..68353a56caa
--- /dev/null
+++ b/script/rag/build-offline-bundle.sh
@@ -0,0 +1,132 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)
+OUT=${RAG_OFFLINE_OUT:-"$ROOT/.rag/offline/bundle"}
+PY=${RAG_DOCLING_PYTHON:-python3}
+LANGS=${RAG_TESS_LANGS:-"eng chi-sim"}
+DOC_REQ=${RAG_DOCLING_REQUIREMENTS:-"$ROOT/script/rag/requirements-docling.txt"}
+LLM_REQ=${RAG_LLAMA_REQUIREMENTS:-"$ROOT/script/rag/requirements-llamaindex.txt"}
+VECTOR_REQ=${RAG_VECTOR_REQUIREMENTS:-"$ROOT/script/rag/requirements-vector.txt"}
+INCLUDE_LLM=false
+INCLUDE_VECTOR=false
+
+usage() {
+  cat <<'EOF'
+Build an offline bundle for Ubuntu hosts with limited mirror/network access.
+
+Usage:
+  script/rag/build-offline-bundle.sh [--out DIR] [--python BIN] [--langs "eng chi-sim"] [--include-llamaindex] [--include-vectordb]
+
+Options:
+  --out DIR               Bundle output directory (default: ./.rag/offline/bundle)
+  --python BIN            Python executable used for wheel download (default: python3)
+  --langs "a b"           Tesseract language packs (default: "eng chi-sim")
+  --include-llamaindex    Also download llamaindex wheels
+  --include-vectordb      Also download vector db wheels (qdrant-client/openai)
+  -h, --help              Show help
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --out)
+      OUT="$2"
+      shift 2
+      ;;
+    --python)
+      PY="$2"
+      shift 2
+      ;;
+    --langs)
+      LANGS="$2"
+      shift 2
+      ;;
+    --include-llamaindex)
+      INCLUDE_LLM=true
+      shift
+      ;;
+    --include-vectordb)
+      INCLUDE_VECTOR=true
+      shift
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "unknown argument: $1" >&2
+      usage
+      exit 1
+      ;;
+  esac
+done
+
+if ! command -v "$PY" >/dev/null 2>&1; then
+  echo "python executable not found: $PY" >&2
+  exit 1
+fi
+if ! command -v apt-get >/dev/null 2>&1; then
+  echo "apt-get not found, this script targets Debian/Ubuntu" >&2
+  exit 1
+fi
+
+rm -rf "$OUT"
+mkdir -p "$OUT/wheelhouse" "$OUT/deb" "$OUT/script/rag"
+
+TMP=$(mktemp -d)
+trap 'rm -rf "$TMP"' EXIT
+"$PY" -m venv "$TMP/venv"
+
+"$TMP/venv/bin/python" -m pip install -U pip
+"$TMP/venv/bin/pip" download -r "$DOC_REQ" -d "$OUT/wheelhouse"
+
+if [[ "$INCLUDE_LLM" == "true" && -f "$LLM_REQ" ]]; then
+  "$TMP/venv/bin/pip" download -r "$LLM_REQ" -d "$OUT/wheelhouse"
+fi
+if [[ "$INCLUDE_VECTOR" == "true" && -f "$VECTOR_REQ" ]]; then
+  "$TMP/venv/bin/pip" download -r "$VECTOR_REQ" -d "$OUT/wheelhouse"
+fi
+
+declare -a PKGS=("tesseract-ocr")
+read -ra ITEMS <<<"$LANGS"
+for l in "${ITEMS[@]}"; do
+  [[ -z "$l" ]] && continue
+  PKGS+=("tesseract-ocr-${l//_/-}")
+done
+
+if command -v apt-rdepends >/dev/null 2>&1; then
+  mapfile -t ALL < <(
+    apt-rdepends "${PKGS[@]}" 2>/dev/null |
+      awk '/^[a-zA-Z0-9]/ { print $1 }' |
+      rg -v '^(Reading|Building|Depends|PreDepends|Recommends|Suggests)$' |
+      sort -u
+  )
+else
+  echo "warning: apt-rdepends not installed, only top-level tesseract packages will be downloaded." >&2
+  ALL=("${PKGS[@]}")
+fi
+
+(
+  cd "$OUT/deb"
+  apt-get download "${ALL[@]}"
+)
+
+cp "$ROOT/script/rag/install-docling.sh" "$OUT/script/rag/"
+cp "$ROOT/script/rag/install-tesseract.sh" "$OUT/script/rag/"
+cp "$ROOT/script/rag/install-vector.sh" "$OUT/script/rag/"
+cp "$ROOT/script/rag/install-offline-bundle.sh" "$OUT/script/rag/" 2>/dev/null || true
+cp "$ROOT/script/rag/build-vector-index.py" "$OUT/script/rag/" 2>/dev/null || true
+cp "$ROOT/script/rag/search-vector-index.py" "$OUT/script/rag/" 2>/dev/null || true
+cp "$ROOT/script/rag/requirements-docling.txt" "$OUT/script/rag/"
+if [[ -f "$LLM_REQ" ]]; then
+  cp "$LLM_REQ" "$OUT/script/rag/"
+fi
+if [[ -f "$VECTOR_REQ" ]]; then
+  cp "$VECTOR_REQ" "$OUT/script/rag/"
+fi
+
+sha256sum "$OUT"/wheelhouse/* "$OUT"/deb/* >"$OUT/SHA256SUMS.txt"
+tar -C "$(dirname "$OUT")" -czf "${OUT%/}.tar.gz" "$(basename "$OUT")"
+echo "bundle directory: $OUT"
+echo "bundle archive: ${OUT%/}.tar.gz"
diff --git a/script/rag/build-vector-index.py b/script/rag/build-vector-index.py
new file mode 100755
index 00000000000..37ea3debc54
--- /dev/null
+++ b/script/rag/build-vector-index.py
@@ -0,0 +1,402 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+import sys
+import time
+import uuid
+from pathlib import Path
+
+IMAGE_OCR_RE = re.compile(r"\[IMAGE_OCR\][\s\S]*?\[/IMAGE_OCR\]")
+
+
+def clean(text: str) -> str:
+    return re.sub(r"\s+", " ", text).strip()
+
+
+def normalize_text(text: str, strip_inline_ocr: bool) -> str:
+    body = text or ""
+    if strip_inline_ocr:
+        body = IMAGE_OCR_RE.sub(" ", body)
+    return clean(body)
+
+
+def is_rate_limit_error(e: Exception) -> bool:
+    s = str(e).lower()
+    return "rate limit" in s or "too many requests" in s or "429" in s
+
+
+def embed_texts(
+    client,
+    model: str,
+    texts: list[str],
+    max_retries: int,
+    retry_initial: float,
+) -> list[list[float]]:
+    n = 0
+    delay = max(0.2, retry_initial)
+    while True:
+        try:
+            r = client.embeddings.create(model=model, input=texts)
+            return [item.embedding for item in r.data]
+        except Exception as e:
+            if not is_rate_limit_error(e) or n >= max_retries:
+                raise
+            n += 1
+            print(
+                f"[embed] rate limit; retry {n}/{max_retries} after {delay:.1f}s",
+                file=sys.stderr,
+            )
+            time.sleep(delay)
+            delay = min(delay * 2, 30)
+
+
+def list_inputs(paths: list[str], input_dir: str, glob: str) -> list[Path]:
+    files = [Path(p) for p in paths]
+    if input_dir:
+        files.extend(sorted(Path(input_dir).glob(glob)))
+    out = []
+    seen = set()
+    for path in files:
+        p = path.resolve()
+        if p in seen:
+            continue
+        seen.add(p)
+        if p.is_file():
+            out.append(p)
+    return out
+
+
+def doc_key(path: Path, root: Path) -> str:
+    p = path.resolve()
+    try:
+        return str(p.relative_to(root.resolve()))
+    except ValueError:
+        return str(p)
+
+
+def delete_keys(direct: list[str], file_path: str) -> list[str]:
+    out = [x for x in direct if x]
+    if file_path:
+        p = Path(file_path)
+        if p.exists():
+            out.extend(
+                line.strip()
+                for line in p.read_text(encoding="utf-8", errors="ignore").splitlines()
+                if line.strip()
+            )
+    return sorted(set(out))
+
+
+def merge_images(data: dict) -> list[dict]:
+    if isinstance(data.get("image_nodes"), list):
+        out = []
+        for item in data["image_nodes"]:
+            iid = item.get("image_id") or item.get("id")
+            if not iid:
+                continue
+            out.append(
+                {
+                    "id": iid,
+                    "section_ids": item.get("section_ids", []),
+                    "source_url": item.get("source_url", ""),
+                    "alt": item.get("alt", ""),
+                    "ocr_text": item.get("ocr_text", ""),
+                }
+            )
+        return out
+
+    image_map = {}
+    for sec in data.get("sections", []):
+        for item in sec.get("images", []):
+            iid = item.get("id")
+            if not iid:
+                continue
+            row = image_map.get(iid) or {
+                "id": iid,
+                "section_ids": [],
+                "source_url": item.get("url", ""),
+                "alt": item.get("alt", ""),
+                "ocr_text": item.get("ocr_text", ""),
+            }
+            sid = sec.get("id")
+            if sid and sid not in row["section_ids"]:
+                row["section_ids"].append(sid)
+            if not row["source_url"]:
+                row["source_url"] = item.get("url", "")
+            if not row["alt"]:
+                row["alt"] = item.get("alt", "")
+            if not row["ocr_text"]:
+                row["ocr_text"] = item.get("ocr_text", "")
+            image_map[iid] = row
+    return list(image_map.values())
+
+
+def load_nodes(
+    paths: list[Path],
+    include_images: bool,
+    strip_inline_ocr: bool,
+    image_min_chars: int,
+    root: Path,
+) -> list[dict]:
+    rows = []
+    for path in paths:
+        data = json.loads(path.read_text(encoding="utf-8", errors="ignore"))
+        source_url = data.get("source_url", "")
+        text_file = data.get("text_file", str(path))
+        key = doc_key(path, root)
+        for i, item in enumerate(data.get("chunks", [])):
+            text = normalize_text(item.get("text", ""), strip_inline_ocr)
+            if not text:
+                continue
+            raw = f"{path}:{item.get('id', i)}"
+            pid = str(uuid.uuid5(uuid.NAMESPACE_URL, raw))
+            meta = item.get("metadata") or {}
+            rows.append(
+                {
+                    "id": pid,
+                    "text": text,
+                    "payload": {
+                        "node_type": "text",
+                        "chunk_id": item.get("id", f"chunk-{i}"),
+                        "section_id": item.get("section_id", ""),
+                        "section_title": item.get("section_title", ""),
+                        "source_url": meta.get("source_url") or source_url,
+                        "text_file": meta.get("text_file") or text_file,
+                        "doc_key": key,
+                        "image_ids": item.get("image_ids", []),
+                        "char_len": meta.get("char_len", len(text)),
+                        "text": text,
+                        "raw_id": raw,
+                    },
+                }
+            )
+        if not include_images:
+            continue
+        for i, item in enumerate(merge_images(data)):
+            iid = item.get("id")
+            txt = clean(
+                "\n".join(
+                    x
+                    for x in [
+                        f"[IMAGE:{iid}]",
+                        item.get("alt", ""),
+                        item.get("ocr_text", ""),
+                    ]
+                    if x
+                )
+            )
+            if len(clean((item.get("alt", "") + " " + item.get("ocr_text", "")).strip())) < image_min_chars:
+                continue
+            raw = f"{path}:image:{iid}:{i}"
+            pid = str(uuid.uuid5(uuid.NAMESPACE_URL, raw))
+            rows.append(
+                {
+                    "id": pid,
+                    "text": txt,
+                    "payload": {
+                        "node_type": "image",
+                        "image_id": iid,
+                        "section_ids": item.get("section_ids", []),
+                        "section_title": "",
+                        "source_url": item.get("source_url", "") or source_url,
+                        "text_file": text_file,
+                        "doc_key": key,
+                        "image_ids": [iid],
+                        "char_len": len(txt),
+                        "text": txt,
+                        "alt": item.get("alt", ""),
+                        "ocr_text": item.get("ocr_text", ""),
+                        "raw_id": raw,
+                    },
+                }
+            )
+    return rows
+
+
+def has_collection(client: QdrantClient, name: str) -> bool:
+    if hasattr(client, "collection_exists"):
+        return bool(client.collection_exists(name))
+    cols = client.get_collections().collections
+    return any(c.name == name for c in cols)
+
+
+def delete_doc_keys(client, models, collection: str, keys: list[str]) -> int:
+    if not keys:
+        return 0
+    if not has_collection(client, collection):
+        return 0
+    for key in keys:
+        client.delete(
+            collection_name=collection,
+            points_selector=models.Filter(
+                must=[models.FieldCondition(key="doc_key", match=models.MatchValue(value=key))]
+            ),
+            wait=True,
+        )
+    return len(keys)
+
+
+def main() -> None:
+    p = argparse.ArgumentParser()
+    p.add_argument("--input", action="append", default=[])
+    p.add_argument("--input-dir", default="")
+    p.add_argument("--glob", default="*.structured.json")
+    p.add_argument("--db-path", default=".rag/vector/qdrant")
+    p.add_argument("--collection", default="rag_chunks")
+    p.add_argument("--model", default="nomic-embed-text")
+    p.add_argument("--base-url", default="")
+    p.add_argument("--api-key", default="")
+    p.add_argument("--batch-size", type=int, default=16)
+    p.add_argument("--max-retries", type=int, default=6)
+    p.add_argument("--retry-initial", type=float, default=1.5)
+    p.add_argument("--no-image-nodes", action="store_true")
+    p.add_argument("--keep-inline-ocr", action="store_true")
+    p.add_argument("--image-min-chars", type=int, default=2)
+    p.add_argument("--root", default=".")
+    p.add_argument("--delete-doc-key", action="append", default=[])
+    p.add_argument("--delete-doc-keys-file", default="")
+    p.add_argument("--recreate", action="store_true")
+    args = p.parse_args()
+
+    try:
+        from openai import OpenAI
+        from qdrant_client import QdrantClient, models
+    except ModuleNotFoundError as e:
+        raise SystemExit(
+            f"missing dependency: {e.name}. run: bash script/rag/install-vector.sh"
+        ) from e
+
+    inputs = list_inputs(args.input, args.input_dir, args.glob)
+    root = Path(args.root)
+    del_keys = delete_keys(args.delete_doc_key, args.delete_doc_keys_file)
+
+    rows = (
+        load_nodes(
+            inputs,
+            include_images=not args.no_image_nodes,
+            strip_inline_ocr=not args.keep_inline_ocr,
+            image_min_chars=max(0, args.image_min_chars),
+            root=root,
+        )
+        if inputs
+        else []
+    )
+    if not rows and not del_keys:
+        raise SystemExit("no input files and no delete doc keys; nothing to do")
+
+    key = args.api_key or os.getenv("OPENAI_API_KEY") or os.getenv("MINIMAX_API_KEY") or "ollama"
+    base = args.base_url or os.getenv("OPENAI_BASE_URL") or "http://127.0.0.1:11434/v1"
+    embed = OpenAI(api_key=key, base_url=base) if rows else None
+
+    db_path = Path(args.db_path)
+    db_path.mkdir(parents=True, exist_ok=True)
+    qdrant = QdrantClient(path=str(db_path))
+    deleted = 0
+
+    if args.recreate and has_collection(qdrant, args.collection):
+        qdrant.delete_collection(collection_name=args.collection)
+    if del_keys:
+        deleted = delete_doc_keys(qdrant, models, args.collection, del_keys)
+
+    if not rows:
+        count = qdrant.count(collection_name=args.collection, exact=True).count if has_collection(qdrant, args.collection) else 0
+        print(
+            json.dumps(
+                {
+                    "db_path": str(db_path),
+                    "collection": args.collection,
+                    "input_files": 0,
+                    "inserted": 0,
+                    "deleted_doc_keys": deleted,
+                    "collection_count": count,
+                    "text_nodes": 0,
+                    "image_nodes": 0,
+                    "vector_size": 0,
+                    "embedding_model": args.model,
+                    "embedding_base_url": base,
+                },
+                ensure_ascii=False,
+                indent=2,
+            )
+        )
+        return
+
+    vec0 = embed_texts(
+        embed,
+        args.model,
+        [rows[0]["text"]],
+        args.max_retries,
+        args.retry_initial,
+    )[0]
+    dim = len(vec0)
+    if dim <= 0:
+        raise SystemExit("embedding result is empty")
+    if not has_collection(qdrant, args.collection):
+        qdrant.create_collection(
+            collection_name=args.collection,
+            vectors_config=models.VectorParams(size=dim, distance=models.Distance.COSINE),
+        )
+
+    batch_size = max(1, args.batch_size)
+    total = 0
+    batch = [{"id": rows[0]["id"], "vector": vec0, "payload": rows[0]["payload"]}]
+    for i in range(1, len(rows), batch_size):
+        seg = rows[i : i + batch_size]
+        vecs = embed_texts(
+            embed,
+            args.model,
+            [x["text"] for x in seg],
+            args.max_retries,
+            args.retry_initial,
+        )
+        batch.extend(
+            {
+                "id": seg[j]["id"],
+                "vector": vecs[j],
+                "payload": seg[j]["payload"],
+            }
+            for j in range(len(seg))
+        )
+
+    for i in range(0, len(batch), batch_size):
+        seg = batch[i : i + batch_size]
+        qdrant.upsert(
+            collection_name=args.collection,
+            points=[
+                models.PointStruct(id=item["id"], vector=item["vector"], payload=item["payload"])
+                for item in seg
+            ],
+            wait=True,
+        )
+        total += len(seg)
+
+    count = qdrant.count(collection_name=args.collection, exact=True).count
+    text_nodes = sum(1 for x in rows if x["payload"].get("node_type") == "text")
+    image_nodes = sum(1 for x in rows if x["payload"].get("node_type") == "image")
+    print(
+        json.dumps(
+            {
+                "db_path": str(db_path),
+                "collection": args.collection,
+                "input_files": len(inputs),
+                "inserted": total,
+                "deleted_doc_keys": deleted,
+                "collection_count": count,
+                "text_nodes": text_nodes,
+                "image_nodes": image_nodes,
+                "vector_size": dim,
+                "embedding_model": args.model,
+                "embedding_base_url": base,
+            },
+            ensure_ascii=False,
+            indent=2,
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/script/rag/clean-text.py b/script/rag/clean-text.py
new file mode 100755
index 00000000000..72c9ea64134
--- /dev/null
+++ b/script/rag/clean-text.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import re
+from pathlib import Path
+
+
+def normalize(text: str) -> str:
+    text = text.replace("\r\n", "\n").replace("\r", "\n")
+    text = re.sub(r"[ \t]+", " ", text)
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    return text.strip() + "\n"
+
+
+def drop_noise(lines: list[str]) -> list[str]:
+    out = []
+    seen = set()
+    for line in lines:
+        row = line.strip()
+        if not row:
+            out.append("")
+            continue
+        if row.startswith("[上一页 ") or row.startswith("[下一页 "):
+            continue
+        if row.startswith("- [") and row.endswith(")"):
+            continue
+        if row == "<!-- image -->":
+            continue
+        key = re.sub(r"\s+", " ", row)
+        if key in seen and len(key) > 80:
+            continue
+        seen.add(key)
+        out.append(line)
+    return out
+
+
+def main() -> None:
+    p = argparse.ArgumentParser()
+    p.add_argument("--input", required=True)
+    p.add_argument("--output", required=True)
+    args = p.parse_args()
+
+    src = Path(args.input).read_text(encoding="utf-8", errors="ignore")
+    rows = drop_noise(src.splitlines())
+    out = normalize("\n".join(rows))
+    Path(args.output).write_text(out, encoding="utf-8")
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/script/rag/cmd/rag-bootstrap.sh b/script/rag/cmd/rag-bootstrap.sh
new file mode 100755
index 00000000000..722983be004
--- /dev/null
+++ b/script/rag/cmd/rag-bootstrap.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)
+TARGET=""
+WITH_OPENCODE=true
+
+usage() {
+  cat <<'EOF'
+Copy RAG pipeline scripts and optional OpenCode assets to another project.
+
+Usage:
+  bash script/rag/cmd/rag-bootstrap.sh --target /path/to/target [--no-opencode]
+
+Options:
+  --target DIR    Target project root
+  --no-opencode   Do not copy .opencode plugin/tool/skill files
+  -h, --help      Show help
+EOF
+}
+
+copy_dir() {
+  local src="$1"
+  local dst="$2"
+  mkdir -p "$dst"
+  if command -v rsync >/dev/null 2>&1; then
+    rsync -a --exclude '__pycache__' --exclude '*.pyc' "$src"/ "$dst"/
+    return
+  fi
+  find "$src" -type d -name "__pycache__" -prune -o -type f ! -name '*.pyc' -print | while read -r file; do
+    rel=${file#"$src"/}
+    mkdir -p "$dst/$(dirname "$rel")"
+    cp -f "$file" "$dst/$rel"
+  done
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --target)
+      TARGET="$2"
+      shift 2
+      ;;
+    --no-opencode)
+      WITH_OPENCODE=false
+      shift
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "unknown argument: $1" >&2
+      usage
+      exit 1
+      ;;
+  esac
+done
+
+if [[ -z "$TARGET" ]]; then
+  echo "--target is required" >&2
+  usage
+  exit 1
+fi
+
+mkdir -p "$TARGET/script"
+copy_dir "$ROOT/script/rag" "$TARGET/script/rag"
+
+if [[ "$WITH_OPENCODE" == "true" ]]; then
+  mkdir -p "$TARGET/.opencode/tool" "$TARGET/.opencode/plugins" "$TARGET/.opencode/skills/rag-pipeline"
+  cp -f "$ROOT/.opencode/tool/rag_search.ts" "$TARGET/.opencode/tool/rag_search.ts"
+  cp -f "$ROOT/.opencode/tool/rag_search.txt" "$TARGET/.opencode/tool/rag_search.txt"
+  cp -f "$ROOT/.opencode/plugins/rag_context.ts" "$TARGET/.opencode/plugins/rag_context.ts"
+  cp -f "$ROOT/.opencode/skills/rag-pipeline/SKILL.md" "$TARGET/.opencode/skills/rag-pipeline/SKILL.md"
+  cp -f "$ROOT/.opencode/rag.ts" "$TARGET/.opencode/rag.ts"
+fi
+
+echo "bootstrap_done target=$TARGET with_opencode=$WITH_OPENCODE"
+echo "next:"
+echo "  1) cd $TARGET"
+echo "  2) bash script/rag/install-docling.sh"
+echo "  3) bash script/rag/install-vector.sh"
+echo "  4) bash script/rag/cmd/rag-init.sh --help"
diff --git a/script/rag/cmd/rag-init.sh b/script/rag/cmd/rag-init.sh
new file mode 100644
index 00000000000..bfb728b640c
--- /dev/null
+++ b/script/rag/cmd/rag-init.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)
+PY=${RAG_DOCLING_PYTHON_BIN:-}
+if [[ -z "$PY" ]]; then
+  if [[ -x "$ROOT/.venv-docling/bin/python" ]]; then
+    PY="$ROOT/.venv-docling/bin/python"
+  else
+    PY="python3"
+  fi
+fi
+
+exec "$PY" "$ROOT/script/rag/rag-pipeline.py" init "$@"
diff --git a/script/rag/cmd/rag-update.sh b/script/rag/cmd/rag-update.sh
new file mode 100644
index 00000000000..1c518a8879e
--- /dev/null
+++ b/script/rag/cmd/rag-update.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)
+PY=${RAG_DOCLING_PYTHON_BIN:-}
+if [[ -z "$PY" ]]; then
+  if [[ -x "$ROOT/.venv-docling/bin/python" ]]; then
+    PY="$ROOT/.venv-docling/bin/python"
+  else
+    PY="python3"
+  fi
+fi
+
+exec "$PY" "$ROOT/script/rag/rag-pipeline.py" update "$@"
diff --git a/script/rag/compare-structured.py b/script/rag/compare-structured.py
new file mode 100755
index 00000000000..623eecb7ce9
--- /dev/null
+++ b/script/rag/compare-structured.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import re
+from pathlib import Path
+
+OCR_RE = re.compile(r"\[IMAGE_OCR\][\s\S]*?\[/IMAGE_OCR\]")
+
+
+def load(path: Path) -> dict:
+    return json.loads(path.read_text(encoding="utf-8", errors="ignore"))
+
+
+def metrics(data: dict) -> dict:
+    chunks = data.get("chunks", [])
+    sections = data.get("sections", [])
+    image_nodes = data.get("image_nodes", [])
+    nodes = data.get("nodes", [])
+    txt = [x.get("text", "") for x in chunks]
+    chars = [len(x) for x in txt]
+    with_ocr = sum(1 for x in txt if "[IMAGE_OCR]" in x)
+    ocr_blocks = sum(len(OCR_RE.findall(x)) for x in txt)
+    linked = sum(1 for x in chunks if (x.get("image_ids") or []))
+    return {
+        "chunks": len(chunks),
+        "sections": len(sections),
+        "image_nodes": len(image_nodes),
+        "nodes": len(nodes),
+        "chunks_with_image_refs": linked,
+        "chunks_with_inline_ocr": with_ocr,
+        "inline_ocr_blocks_in_chunks": ocr_blocks,
+        "avg_chunk_chars": 0 if not chars else round(sum(chars) / len(chars), 2),
+    }
+
+
+def main() -> None:
+    p = argparse.ArgumentParser()
+    p.add_argument("--old", required=True)
+    p.add_argument("--new", required=True)
+    args = p.parse_args()
+
+    old = metrics(load(Path(args.old)))
+    new = metrics(load(Path(args.new)))
+    keys = sorted(set(old) | set(new))
+    diff = {k: (new.get(k, 0) - old.get(k, 0)) for k in keys}
+    print(json.dumps({"old": old, "new": new, "delta_new_minus_old": diff}, ensure_ascii=False, indent=2))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/script/rag/convert-dir-to-text.sh b/script/rag/convert-dir-to-text.sh
new file mode 100755
index 00000000000..43118755a3f
--- /dev/null
+++ b/script/rag/convert-dir-to-text.sh
@@ -0,0 +1,136 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)
+DOC=${RAG_DOCLING_BIN:-"$ROOT/.venv-docling/bin/docling"}
+IN=""
+OUT=${RAG_TEXT_FILES_OUTPUT:-"$ROOT/.rag/text/files"}
+EXT="pdf docx pptx html htm md txt csv xls xlsx xml"
+
+usage() {
+  cat <<'EOF'
+Convert supported files in a directory to text with docling.
+
+Usage:
+  script/rag/convert-dir-to-text.sh --input DIR [--output DIR] [--ext "pdf docx html"]
+
+Options:
+  --input DIR            Source directory (required)
+  --output DIR           Text output directory (default: ./.rag/text/files)
+  --ext "a b c"          Extensions to include (default: pdf docx pptx html htm md txt csv xls xlsx xml)
+  --docling-bin PATH     docling executable (default: ./.venv-docling/bin/docling)
+  -h, --help             Show help
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --input)
+      IN="$2"
+      shift 2
+      ;;
+    --output)
+      OUT="$2"
+      shift 2
+      ;;
+    --ext)
+      EXT="$2"
+      shift 2
+      ;;
+    --docling-bin)
+      DOC="$2"
+      shift 2
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "unknown argument: $1" >&2
+      usage
+      exit 1
+      ;;
+  esac
+done
+
+if [[ -z "$IN" ]]; then
+  echo "--input is required" >&2
+  usage
+  exit 1
+fi
+
+if [[ ! -d "$IN" ]]; then
+  echo "input directory not found: $IN" >&2
+  exit 1
+fi
+
+if [[ ! -x "$DOC" ]]; then
+  echo "docling not found: $DOC" >&2
+  exit 1
+fi
+
+mkdir -p "$OUT"
+SUCCESS_LOG="$OUT/_success.log"
+FAIL_LOG="$OUT/_failed.log"
+RUN_LOG="$OUT/_run.log"
+: >"$SUCCESS_LOG"
+: >"$FAIL_LOG"
+: >"$RUN_LOG"
+
+declare -a FIND_EXPR=()
+read -ra PARTS <<<"$EXT"
+for i in "${!PARTS[@]}"; do
+  e="${PARTS[$i]}"
+  [[ -z "$e" ]] && continue
+  if [[ "$i" -gt 0 ]]; then
+    FIND_EXPR+=("-o")
+  fi
+  FIND_EXPR+=("-iname" "*.$e")
+done
+
+if [[ "${#FIND_EXPR[@]}" -eq 0 ]]; then
+  echo "no valid extensions in --ext" >&2
+  exit 1
+fi
+
+TMP=$(mktemp -d)
+trap 'rm -rf "$TMP"' EXIT
+
+mapfile -t FILES < <(find "$IN" -type f \( "${FIND_EXPR[@]}" \) | sort)
+if [[ "${#FILES[@]}" -eq 0 ]]; then
+  echo "no files matched in: $IN"
+  exit 0
+fi
+
+OK=0
+BAD=0
+
+for f in "${FILES[@]}"; do
+  rel=${f#"$IN"/}
+  target="$OUT/${rel%.*}.txt"
+  mkdir -p "$(dirname "$target")"
+
+  work="$TMP/out"
+  rm -rf "$work"
+  mkdir -p "$work"
+
+  if "$DOC" "$f" --to text --output "$work" --abort-on-error >>"$RUN_LOG" 2>&1; then
+    b=$(basename "${f%.*}")
+    src="$work/$b.txt"
+    if [[ -f "$src" ]]; then
+      mv "$src" "$target"
+      printf '%s\n' "$target" >>"$SUCCESS_LOG"
+      OK=$((OK + 1))
+      continue
+    fi
+  fi
+
+  printf '%s\n' "$f" >>"$FAIL_LOG"
+  BAD=$((BAD + 1))
+done
+
+echo "done: total=${#FILES[@]} success=$OK failed=$BAD"
+echo "success log: $SUCCESS_LOG"
+echo "failed log: $FAIL_LOG"
+echo "run log: $RUN_LOG"
+
diff --git a/script/rag/debug-rag-state.py b/script/rag/debug-rag-state.py
new file mode 100755
index 00000000000..962014f0ade
--- /dev/null
+++ b/script/rag/debug-rag-state.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+from collections import Counter
+from pathlib import Path
+
+
+def read_rows(path: Path) -> list[dict]:
+    rows = []
+    if not path.exists():
+        return rows
+    with path.open("r", encoding="utf-8", errors="ignore") as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                rows.append(json.loads(line))
+            except Exception:
+                continue
+    return rows
+
+
+def main() -> None:
+    p = argparse.ArgumentParser()
+    p.add_argument("--log", default=".rag/log/rag_debug.jsonl")
+    p.add_argument("--tail", type=int, default=80)
+    p.add_argument("--session", default="")
+    p.add_argument("--channel", default="")
+    p.add_argument("--full", action="store_true")
+    args = p.parse_args()
+
+    path = Path(args.log)
+    rows = read_rows(path)
+    if args.session:
+        rows = [x for x in rows if str(x.get("sessionID", "")) == args.session]
+    if args.channel:
+        rows = [x for x in rows if str(x.get("channel", "")) == args.channel]
+    if not rows:
+        raise SystemExit(f"no debug rows found in: {path}")
+
+    view = rows[-max(1, args.tail) :]
+    events = Counter(str(x.get("event", "")) for x in view)
+    statuses = Counter(str(x.get("status", "")) for x in view if x.get("status"))
+    clusters = Counter(str(x.get("cluster", "")) for x in view if x.get("cluster"))
+    channels = Counter(str(x.get("channel", "")) for x in view if x.get("channel"))
+    modes = Counter(str(x.get("mode", "")) for x in view if x.get("mode"))
+
+    print(json.dumps({
+        "log": str(path),
+        "rows_total": len(rows),
+        "rows_view": len(view),
+        "channels": dict(channels),
+        "events": dict(events),
+        "statuses": dict(statuses),
+        "modes": dict(modes),
+        "top_clusters": clusters.most_common(10),
+    }, ensure_ascii=False, indent=2))
+
+    print("\nlast_rows:")
+    for item in view[-20:]:
+        keep = item if args.full else {
+            "ts": item.get("ts", ""),
+            "channel": item.get("channel", ""),
+            "event": item.get("event", ""),
+            "sessionID": item.get("sessionID", ""),
+            "query": item.get("query", ""),
+            "cluster": item.get("cluster", ""),
+            "mode": item.get("mode", ""),
+            "loop": item.get("loop", ""),
+            "used_cache": item.get("used_cache", ""),
+            "status": item.get("status", ""),
+            "reason": item.get("reason", ""),
+            "rewrite_mode": item.get("rewrite_mode", ""),
+            "keywords": item.get("keywords", []),
+            "total_hits": item.get("total_hits", ""),
+            "delta_hits": item.get("delta_hits", ""),
+            "known_hits": item.get("known_hits", ""),
+            "overlap": item.get("overlap", ""),
+            "top_hits": item.get("top_hits", []),
+            "delta_fps": item.get("delta_fps", []),
+            "rewrites": item.get("rewrites", []),
+            "emitted_context": item.get("emitted_context", ""),
+        }
+        print(json.dumps(keep, ensure_ascii=False))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/script/rag/install-docling.sh b/script/rag/install-docling.sh
new file mode 100755
index 00000000000..53870ccdee9
--- /dev/null
+++ b/script/rag/install-docling.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)
+VENV=${RAG_DOCLING_VENV:-"$ROOT/.venv-docling"}
+PY=${RAG_DOCLING_PYTHON:-python3}
+REQ=${RAG_DOCLING_REQUIREMENTS:-"$ROOT/script/rag/requirements-docling.txt"}
+WHEEL=${RAG_DOCLING_WHEELHOUSE:-}
+
+usage() {
+  cat <<'EOF'
+Install docling into a dedicated virtual environment.
+
+Usage:
+  script/rag/install-docling.sh [--venv PATH] [--python BIN] [--requirements FILE] [--wheelhouse DIR]
+
+Options:
+  --venv PATH           Virtualenv path (default: ./.venv-docling)
+  --python BIN          Python executable (default: python3)
+  --requirements FILE   Requirements file (default: script/rag/requirements-docling.txt)
+  --wheelhouse DIR      Offline wheels directory, enables --no-index install
+  -h, --help            Show help
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --venv)
+      VENV="$2"
+      shift 2
+      ;;
+    --python)
+      PY="$2"
+      shift 2
+      ;;
+    --requirements)
+      REQ="$2"
+      shift 2
+      ;;
+    --wheelhouse)
+      WHEEL="$2"
+      shift 2
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "unknown argument: $1" >&2
+      usage
+      exit 1
+      ;;
+  esac
+done
+
+if ! command -v "$PY" >/dev/null 2>&1; then
+  echo "python executable not found: $PY" >&2
+  exit 1
+fi
+
+if [[ ! -d "$VENV" ]]; then
+  "$PY" -m venv "$VENV"
+fi
+
+declare -a PIP=("$VENV/bin/python" "-m" "pip" "--disable-pip-version-check")
+
+if [[ -n "$WHEEL" ]]; then
+  if [[ ! -d "$WHEEL" ]]; then
+    echo "wheelhouse directory not found: $WHEEL" >&2
+    exit 1
+  fi
+  if [[ -f "$REQ" ]]; then
+    "${PIP[@]}" install --no-index --find-links "$WHEEL" -r "$REQ"
+  else
+    "${PIP[@]}" install --no-index --find-links "$WHEEL" docling
+  fi
+  "$VENV/bin/docling" --version
+  echo "docling installed in: $VENV"
+  exit 0
+fi
+
+"${PIP[@]}" install -U pip setuptools wheel
+
+if [[ -f "$REQ" ]]; then
+  "${PIP[@]}" install -r "$REQ"
+else
+  "${PIP[@]}" install docling
+fi
+
+"$VENV/bin/docling" --version
+echo "docling installed in: $VENV"
diff --git a/script/rag/install-offline-bundle.sh b/script/rag/install-offline-bundle.sh
new file mode 100755
index 00000000000..3055716a097
--- /dev/null
+++ b/script/rag/install-offline-bundle.sh
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)
+BUNDLE=${RAG_OFFLINE_BUNDLE:-"$ROOT/.rag/offline/bundle"}
+VENV=${RAG_DOCLING_VENV:-"$ROOT/.venv-docling"}
+INSTALL_LLM=false
+INSTALL_VECTOR=false
+
+usage() {
+  cat <<'EOF'
+Install docling+tesseract from an offline bundle.
+
+Usage:
+  script/rag/install-offline-bundle.sh [--bundle DIR] [--venv PATH] [--install-llamaindex] [--install-vectordb]
+
+Options:
+  --bundle DIR            Offline bundle directory (default: ./.rag/offline/bundle)
+  --venv PATH             Venv install path (default: ./.venv-docling)
+  --install-llamaindex    Install llamaindex wheels if available in bundle
+  --install-vectordb      Install vector db wheels if available in bundle
+  -h, --help              Show help
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --bundle)
+      BUNDLE="$2"
+      shift 2
+      ;;
+    --venv)
+      VENV="$2"
+      shift 2
+      ;;
+    --install-llamaindex)
+      INSTALL_LLM=true
+      shift
+      ;;
+    --install-vectordb)
+      INSTALL_VECTOR=true
+      shift
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "unknown argument: $1" >&2
+      usage
+      exit 1
+      ;;
+  esac
+done
+
+if [[ ! -d "$BUNDLE" ]]; then
+  echo "bundle directory not found: $BUNDLE" >&2
+  exit 1
+fi
+if [[ ! -d "$BUNDLE/wheelhouse" ]]; then
+  echo "wheelhouse not found: $BUNDLE/wheelhouse" >&2
+  exit 1
+fi
+
+SUDO=""
+if [[ "${EUID:-$(id -u)}" -ne 0 ]]; then
+  if command -v sudo >/dev/null 2>&1; then
+    SUDO="sudo"
+  else
+    echo "sudo not found and current user is not root." >&2
+    exit 1
+  fi
+fi
+
+if ls "$BUNDLE/deb/"*.deb >/dev/null 2>&1; then
+  $SUDO apt-get install -y "$BUNDLE"/deb/*.deb
+fi
+
+bash "$ROOT/script/rag/install-docling.sh" \
+  --venv "$VENV" \
+  --requirements "$BUNDLE/script/rag/requirements-docling.txt" \
+  --wheelhouse "$BUNDLE/wheelhouse"
+
+if [[ "$INSTALL_LLM" == "true" && -f "$BUNDLE/script/rag/requirements-llamaindex.txt" ]]; then
+  "$VENV/bin/python" -m pip --disable-pip-version-check install \
+    --no-index --find-links "$BUNDLE/wheelhouse" \
+    -r "$BUNDLE/script/rag/requirements-llamaindex.txt"
+fi
+if [[ "$INSTALL_VECTOR" == "true" && -f "$BUNDLE/script/rag/requirements-vector.txt" ]]; then
+  "$VENV/bin/python" -m pip --disable-pip-version-check install \
+    --no-index --find-links "$BUNDLE/wheelhouse" \
+    -r "$BUNDLE/script/rag/requirements-vector.txt"
+fi
+
+echo "offline install completed"
diff --git a/script/rag/install-tesseract.sh b/script/rag/install-tesseract.sh
new file mode 100755
index 00000000000..c7d2241bd37
--- /dev/null
+++ b/script/rag/install-tesseract.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+LANGS=${RAG_TESS_LANGS:-"eng chi-sim"}
+NO_UPDATE=false
+
+usage() {
+  cat <<'EOF'
+Install tesseract OCR and language packs on Debian/Ubuntu.
+
+Usage:
+  script/rag/install-tesseract.sh [--langs "eng chi-sim"] [--no-update]
+
+Options:
+  --langs "a b"    Language packs to install (default: "eng chi-sim")
+  --no-update       Skip apt update
+  -h, --help        Show help
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --langs)
+      LANGS="$2"
+      shift 2
+      ;;
+    --no-update)
+      NO_UPDATE=true
+      shift
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "unknown argument: $1" >&2
+      usage
+      exit 1
+      ;;
+  esac
+done
+
+if ! command -v apt-get >/dev/null 2>&1; then
+  echo "apt-get not found. This script currently supports Debian/Ubuntu only." >&2
+  exit 1
+fi
+
+SUDO=""
+if [[ "${EUID:-$(id -u)}" -ne 0 ]]; then
+  if command -v sudo >/dev/null 2>&1; then
+    SUDO="sudo"
+  else
+    echo "sudo not found and current user is not root." >&2
+    exit 1
+  fi
+fi
+
+declare -a PKGS=("tesseract-ocr")
+read -ra ITEMS <<<"$LANGS"
+for l in "${ITEMS[@]}"; do
+  [[ -z "$l" ]] && continue
+  PKGS+=("tesseract-ocr-${l//_/-}")
+done
+
+if [[ "$NO_UPDATE" != "true" ]]; then
+  $SUDO apt-get update
+fi
+$SUDO apt-get install -y "${PKGS[@]}"
+
+tesseract --version | head -n 2
+tesseract --list-langs | sed -n '1,40p'
+echo "tesseract installed"
diff --git a/script/rag/install-vector.sh b/script/rag/install-vector.sh
new file mode 100755
index 00000000000..881786e23b2
--- /dev/null
+++ b/script/rag/install-vector.sh
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)
+VENV=${RAG_DOCLING_VENV:-"$ROOT/.venv-docling"}
+PY=${RAG_DOCLING_PYTHON:-python3}
+REQ=${RAG_VECTOR_REQUIREMENTS:-"$ROOT/script/rag/requirements-vector.txt"}
+WHEEL=${RAG_DOCLING_WHEELHOUSE:-}
+
+usage() {
+  cat <<'EOF'
+Install vector database dependencies into the existing rag virtual environment.
+
+Usage:
+  script/rag/install-vector.sh [--venv PATH] [--python BIN] [--requirements FILE] [--wheelhouse DIR]
+
+Options:
+  --venv PATH           Virtualenv path (default: ./.venv-docling)
+  --python BIN          Python executable (default: python3)
+  --requirements FILE   Requirements file (default: script/rag/requirements-vector.txt)
+  --wheelhouse DIR      Offline wheels directory, enables --no-index install
+  -h, --help            Show help
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --venv)
+      VENV="$2"
+      shift 2
+      ;;
+    --python)
+      PY="$2"
+      shift 2
+      ;;
+    --requirements)
+      REQ="$2"
+      shift 2
+      ;;
+    --wheelhouse)
+      WHEEL="$2"
+      shift 2
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "unknown argument: $1" >&2
+      usage
+      exit 1
+      ;;
+  esac
+done
+
+if ! command -v "$PY" >/dev/null 2>&1; then
+  echo "python executable not found: $PY" >&2
+  exit 1
+fi
+
+if [[ ! -d "$VENV" ]]; then
+  "$PY" -m venv "$VENV"
+fi
+
+declare -a PIP=("$VENV/bin/python" "-m" "pip" "--disable-pip-version-check")
+
+if [[ -n "$WHEEL" ]]; then
+  if [[ ! -d "$WHEEL" ]]; then
+    echo "wheelhouse directory not found: $WHEEL" >&2
+    exit 1
+  fi
+  "${PIP[@]}" install --no-index --find-links "$WHEEL" -r "$REQ"
+  echo "vector dependencies installed in: $VENV"
+  exit 0
+fi
+
+"${PIP[@]}" install -U pip setuptools wheel
+"${PIP[@]}" install -r "$REQ"
+echo "vector dependencies installed in: $VENV"
diff --git a/script/rag/merge-image-ocr.py b/script/rag/merge-image-ocr.py
new file mode 100755
index 00000000000..57de0fb886c
--- /dev/null
+++ b/script/rag/merge-image-ocr.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+
+
+def read(path: Path) -> str:
+    if not path.exists():
+        return ""
+    return path.read_text(encoding="utf-8", errors="ignore")
+
+
+def clean(text: str) -> str:
+    return re.sub(r"\s+", " ", text).strip()
+
+
+def snippet(text: str, n: int) -> str:
+    if len(text) <= n:
+        return text
+    return text[:n].rstrip() + " ..."
+
+
+def inline_block(image_id: str, text: str, limit: int, mode: str) -> str:
+    if mode == "none":
+        return ""
+    if mode == "marker":
+        return f"[IMAGE:{image_id}]"
+    if not text:
+        return f"[IMAGE:{image_id}]"
+    body = snippet(text, limit)
+    return f"[IMAGE:{image_id}]\n[IMAGE_OCR]\n{body}\n[/IMAGE_OCR]"
+
+
+def main() -> None:
+    p = argparse.ArgumentParser()
+    p.add_argument("--text", required=True)
+    p.add_argument("--meta", required=True)
+    p.add_argument("--ocr-dir", required=True)
+    p.add_argument("--sidecar", required=True)
+    p.add_argument("--source-url", required=True)
+    p.add_argument("--raw", required=False, default="")
+    p.add_argument("--inline-limit", type=int, default=2000)
+    p.add_argument("--inline-mode", choices=["ocr", "marker", "none"], default="marker")
+    args = p.parse_args()
+
+    text_path = Path(args.text)
+    meta_path = Path(args.meta)
+    ocr_dir = Path(args.ocr_dir)
+    sidecar_path = Path(args.sidecar)
+
+    raw = read(text_path)
+    if args.raw:
+        Path(args.raw).write_text(raw, encoding="utf-8")
+
+    rows = json.loads(read(meta_path) or "[]")
+    items = []
+    for i, row in enumerate(rows):
+        image_id = row.get("id") or f"img-{i}"
+        files = sorted(ocr_dir.glob(f"{image_id}*.txt"))
+        ocr_text = clean(read(files[0])) if files else ""
+        items.append(
+            {
+                "id": image_id,
+                "index": i,
+                "url": row.get("url", ""),
+                "alt": row.get("alt", ""),
+                "ocr_text": ocr_text,
+                "ocr_chars": len(ocr_text),
+                "status": "ok" if ocr_text else "empty",
+            }
+        )
+
+    marker = re.compile(r"<!--\s*image\s*-->")
+    text = raw
+    n = min(len(items), len(marker.findall(raw)))
+    for i in range(n):
+        block = inline_block(items[i]["id"], items[i]["ocr_text"], args.inline_limit, args.inline_mode)
+        text = marker.sub(lambda _: block, text, count=1)
+
+    text_path.write_text(text, encoding="utf-8")
+
+    sidecar = {
+        "source_url": args.source_url,
+        "text_file": str(text_path),
+        "raw_file": args.raw,
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "images": items,
+    }
+    sidecar_path.write_text(json.dumps(sidecar, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/script/rag/rag-pipeline.py b/script/rag/rag-pipeline.py
new file mode 100644
index 00000000000..f3572e3b4fc
--- /dev/null
+++ b/script/rag/rag-pipeline.py
@@ -0,0 +1,427 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+import os
+import subprocess
+import sys
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+
+
+def now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+def sha(path: Path) -> str:
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        for chunk in iter(lambda: f.read(1024 * 1024), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def rel(path: Path, root: Path) -> str:
+    p = path.resolve()
+    try:
+        return str(p.relative_to(root.resolve()))
+    except ValueError:
+        return str(p)
+
+
+def run(cmd: list[str], *, capture: bool = False) -> str:
+    if capture:
+        out = subprocess.run(cmd, check=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        return out.stdout
+    subprocess.run(cmd, check=True)
+    return ""
+
+
+def urls(args) -> list[str]:
+    out = [u for u in args.url if u]
+    if args.url_file:
+        p = Path(args.url_file)
+        if p.exists():
+            out.extend(
+                line.strip()
+                for line in p.read_text(encoding="utf-8", errors="ignore").splitlines()
+                if line.strip() and not line.strip().startswith("#")
+            )
+    seen = set()
+    uniq = []
+    for u in out:
+        if u in seen:
+            continue
+        seen.add(u)
+        uniq.append(u)
+    return uniq
+
+
+def txt_files(dir_path: Path) -> list[Path]:
+    bad = {"_success.log", "_failed.log", "_run.log"}
+    out = []
+    for path in sorted(dir_path.rglob("*.txt")):
+        name = path.name
+        if name in bad:
+            continue
+        if name.endswith(".clean.txt") or name.endswith(".raw.txt"):
+            continue
+        out.append(path)
+    return out
+
+
+def structured_files(scan_dir: Path, glob: str) -> list[Path]:
+    return sorted(p for p in scan_dir.glob(glob) if p.is_file())
+
+
+def clean_path(txt: Path) -> Path:
+    if txt.name.endswith(".txt"):
+        return txt.with_name(txt.name[:-4] + ".clean.txt")
+    return txt.with_name(txt.name + ".clean.txt")
+
+
+def structured_path(txt: Path) -> Path:
+    if txt.name.endswith(".txt"):
+        return txt.with_name(txt.name[:-4] + ".structured.json")
+    return txt.with_name(txt.name + ".structured.json")
+
+
+@dataclass
+class Env:
+    root: Path
+    py: Path
+    url_to_text: Path
+    convert_dir: Path
+    clean_text: Path
+    structure_text: Path
+    build_index: Path
+
+
+def env(root: Path, py: str) -> Env:
+    return Env(
+        root=root,
+        py=Path(py),
+        url_to_text=root / "script" / "rag" / "url-to-text.sh",
+        convert_dir=root / "script" / "rag" / "convert-dir-to-text.sh",
+        clean_text=root / "script" / "rag" / "clean-text.py",
+        structure_text=root / "script" / "rag" / "structure-text.py",
+        build_index=root / "script" / "rag" / "build-vector-index.py",
+    )
+
+
+def process_txt(e: Env, txt: Path, args, source_url: str = "") -> Path:
+    c = clean_path(txt)
+    s = structured_path(txt)
+    run([str(e.py), str(e.clean_text), "--input", str(txt), "--output", str(c)])
+    cmd = [
+        str(e.py),
+        str(e.structure_text),
+        "--text",
+        str(c),
+        "--output",
+        str(s),
+        "--mode",
+        args.struct_mode,
+        "--inline-ocr",
+        args.inline_ocr,
+    ]
+    img = txt.with_name(txt.name[:-4] + ".images.json") if txt.name.endswith(".txt") else txt.with_name(txt.name + ".images.json")
+    if img.exists():
+        cmd.extend(["--images", str(img)])
+    if source_url:
+        cmd.extend(["--source-url", source_url])
+    if args.struct_mode == "llamaindex":
+        cmd.extend(["--model", args.struct_model])
+    run(cmd)
+    return s
+
+
+def refresh_dir(e: Env, args) -> list[Path]:
+    src = Path(args.input_dir)
+    out = Path(args.text_out_dir)
+    out.mkdir(parents=True, exist_ok=True)
+    run(["bash", str(e.convert_dir), "--input", str(src), "--output", str(out)])
+    return [process_txt(e, txt, args) for txt in txt_files(out)]
+
+
+def pick_txt(stdout: str) -> Path:
+    rows = [line.strip() for line in stdout.splitlines() if line.strip()]
+    if not rows:
+        raise SystemExit("url-to-text returned empty output")
+    return Path(rows[-1])
+
+
+def refresh_url(e: Env, args) -> list[Path]:
+    all_urls = urls(args)
+    if not all_urls:
+        raise SystemExit("no url provided: use --url or --url-file")
+    out = []
+    for url in all_urls:
+        cmd = [
+            "bash",
+            str(e.url_to_text),
+            "--url",
+            url,
+            "--output",
+            args.url_text_dir,
+            "--image-inline",
+            args.image_inline,
+        ]
+        if args.ocr_images:
+            cmd.append("--ocr-images")
+        txt = pick_txt(run(cmd, capture=True))
+        out.append(process_txt(e, txt, args, source_url=url))
+    return out
+
+
+def manifest(paths: list[Path], root: Path, args) -> dict:
+    docs = {}
+    for p in paths:
+        key = rel(p, root)
+        data = json.loads(p.read_text(encoding="utf-8", errors="ignore"))
+        docs[key] = {
+            "path": key,
+            "sha256": sha(p),
+            "source_url": data.get("source_url", ""),
+            "updated_at": now(),
+        }
+    return {
+        "version": 1,
+        "generated_at": now(),
+        "root": str(root.resolve()),
+        "collection": args.collection,
+        "embedding_model": args.embed_model,
+        "struct_mode": args.struct_mode,
+        "struct_model": args.struct_model,
+        "docs": docs,
+    }
+
+
+def load_manifest(path: Path) -> dict:
+    if not path.exists():
+        return {}
+    try:
+        return json.loads(path.read_text(encoding="utf-8", errors="ignore"))
+    except Exception:
+        return {}
+
+
+def write_manifest(path: Path, data: dict) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def index(e: Env, args, files: list[Path], recreate: bool, delete_keys: list[str]) -> dict:
+    cmd = [
+        str(e.py),
+        str(e.build_index),
+        "--db-path",
+        args.db_path,
+        "--collection",
+        args.collection,
+        "--model",
+        args.embed_model,
+        "--root",
+        str(args.root),
+    ]
+    for f in files:
+        cmd.extend(["--input", str(f)])
+    for key in delete_keys:
+        cmd.extend(["--delete-doc-key", key])
+    if recreate:
+        cmd.append("--recreate")
+    out = run(cmd, capture=True)
+    return json.loads(out)
+
+
+def scan_all(args) -> list[Path]:
+    return structured_files(Path(args.scan_dir), args.glob)
+
+
+def init_cmd(e: Env, args) -> None:
+    if args.source == "dir":
+        files = refresh_dir(e, args)
+    elif args.source == "url":
+        files = refresh_url(e, args)
+    else:
+        files = scan_all(args)
+    if not files:
+        raise SystemExit("no structured files found for init")
+    res = index(e, args, files, recreate=True, delete_keys=[])
+    man = manifest(files, args.root, args)
+    write_manifest(Path(args.manifest), man)
+    print(
+        json.dumps(
+            {
+                "mode": "init",
+                "files": len(files),
+                "manifest": args.manifest,
+                "index": res,
+            },
+            ensure_ascii=False,
+            indent=2,
+        )
+    )
+
+
+def update_cmd(e: Env, args) -> None:
+    if args.source == "dir":
+        refresh_dir(e, args)
+    elif args.source == "url":
+        refresh_url(e, args)
+
+    files = scan_all(args)
+    old = load_manifest(Path(args.manifest))
+    old_docs = old.get("docs", {})
+    if not files:
+        new = manifest([], args.root, args)
+        removed = sorted(old_docs.keys())
+        res = None
+        if removed:
+            res = index(e, args, [], recreate=False, delete_keys=removed)
+        write_manifest(Path(args.manifest), new)
+        print(
+            json.dumps(
+                {
+                    "mode": "update",
+                    "changed": 0,
+                    "removed": len(removed),
+                    "manifest": args.manifest,
+                    "index": res,
+                },
+                ensure_ascii=False,
+                indent=2,
+            )
+        )
+        return
+
+    new = manifest(files, args.root, args)
+    new_docs = new.get("docs", {})
+
+    force_full = False
+    if old:
+        if old.get("collection") != args.collection or old.get("embedding_model") != args.embed_model:
+            force_full = True
+
+    if force_full:
+        res = index(e, args, files, recreate=True, delete_keys=[])
+        write_manifest(Path(args.manifest), new)
+        print(
+            json.dumps(
+                {
+                    "mode": "update",
+                    "reason": "collection_or_embedding_changed",
+                    "files": len(files),
+                    "manifest": args.manifest,
+                    "index": res,
+                },
+                ensure_ascii=False,
+                indent=2,
+            )
+        )
+        return
+
+    changed = [k for k, v in new_docs.items() if old_docs.get(k, {}).get("sha256") != v.get("sha256")]
+    removed = [k for k in old_docs if k not in new_docs]
+    if not changed and not removed:
+        write_manifest(Path(args.manifest), new)
+        print(
+            json.dumps(
+                {
+                    "mode": "update",
+                    "changed": 0,
+                    "removed": 0,
+                    "manifest": args.manifest,
+                    "index": None,
+                },
+                ensure_ascii=False,
+                indent=2,
+            )
+        )
+        return
+
+    pick = {k: Path(args.root) / new_docs[k]["path"] for k in changed}
+    res = index(e, args, [p for p in pick.values() if p.exists()], recreate=False, delete_keys=sorted(set(changed + removed)))
+    write_manifest(Path(args.manifest), new)
+    print(
+        json.dumps(
+            {
+                "mode": "update",
+                "changed": len(changed),
+                "removed": len(removed),
+                "manifest": args.manifest,
+                "index": res,
+            },
+            ensure_ascii=False,
+            indent=2,
+        )
+    )
+
+
+def add_common(sp) -> None:
+    struct_mode = os.getenv("RAG_STRUCT_MODE", "llamaindex")
+    if struct_mode not in {"rule", "llamaindex"}:
+        struct_mode = "llamaindex"
+    sp.add_argument("--root", default=".")
+    sp.add_argument("--python", default="./.venv-docling/bin/python")
+    sp.add_argument("--source", choices=["structured", "dir", "url"], default="structured")
+    sp.add_argument("--scan-dir", default=".rag/text")
+    sp.add_argument("--glob", default="**/*.structured.json")
+    sp.add_argument("--input-dir", default="")
+    sp.add_argument("--text-out-dir", default=".rag/text/dir")
+    sp.add_argument("--url", action="append", default=[])
+    sp.add_argument("--url-file", default="")
+    sp.add_argument("--url-text-dir", default=".rag/text/url")
+    sp.add_argument("--ocr-images", action="store_true")
+    sp.add_argument("--image-inline", choices=["marker", "ocr", "none"], default="marker")
+    sp.add_argument("--struct-mode", choices=["rule", "llamaindex"], default=struct_mode)
+    sp.add_argument("--struct-model", default=os.getenv("RAG_STRUCT_MODEL", "gpt-4o-mini"))
+    sp.add_argument("--inline-ocr", choices=["strip", "keep"], default="strip")
+    sp.add_argument("--embed-model", default="qwen3-embedding:4b")
+    sp.add_argument("--db-path", default=".rag/vector/qdrant")
+    sp.add_argument("--collection", default="rag_chunks")
+    sp.add_argument("--manifest", default=".rag/state/manifest.json")
+
+
+def main() -> None:
+    p = argparse.ArgumentParser()
+    sub = p.add_subparsers(dest="cmd", required=True)
+    p_init = sub.add_parser("init")
+    add_common(p_init)
+    p_update = sub.add_parser("update")
+    add_common(p_update)
+    args = p.parse_args()
+    args.root = Path(args.root).resolve()
+    e = env(args.root, args.python)
+
+    if args.cmd == "init":
+        init_cmd(e, args)
+        return
+    if args.cmd == "update":
+        update_cmd(e, args)
+        return
+    raise SystemExit("unknown cmd")
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except subprocess.CalledProcessError as e:
+        print(
+            json.dumps(
+                {
+                    "error": "command_failed",
+                    "cmd": e.cmd,
+                    "code": e.returncode,
+                    "stdout": e.stdout if isinstance(e.stdout, str) else "",
+                    "stderr": e.stderr if isinstance(e.stderr, str) else "",
+                },
+                ensure_ascii=False,
+                indent=2,
+            ),
+            file=sys.stderr,
+        )
+        raise SystemExit(e.returncode)
diff --git a/script/rag/requirements-docling.txt b/script/rag/requirements-docling.txt
new file mode 100644
index 00000000000..e195be9fd62
--- /dev/null
+++ b/script/rag/requirements-docling.txt
@@ -0,0 +1 @@
+docling==2.77.0
diff --git a/script/rag/requirements-llamaindex.txt b/script/rag/requirements-llamaindex.txt
new file mode 100644
index 00000000000..7aaa93fb77e
--- /dev/null
+++ b/script/rag/requirements-llamaindex.txt
@@ -0,0 +1,2 @@
+llama-index
+llama-index-llms-openai
diff --git a/script/rag/requirements-vector.txt b/script/rag/requirements-vector.txt
new file mode 100644
index 00000000000..e21b5db2afa
--- /dev/null
+++ b/script/rag/requirements-vector.txt
@@ -0,0 +1,2 @@
+qdrant-client
+openai
diff --git a/script/rag/search-vector-index.py b/script/rag/search-vector-index.py
new file mode 100644
index 00000000000..0b7d6185158
--- /dev/null
+++ b/script/rag/search-vector-index.py
@@ -0,0 +1,365 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import os
+from pathlib import Path
+
+
+def clip(text: str, n: int) -> str:
+    s = " ".join(str(text or "").split())
+    return s if len(s) <= n else s[:n].rstrip() + " ..."
+
+
+def uniq(rows: list[str]) -> list[str]:
+    seen = set()
+    out = []
+    for item in rows:
+        val = str(item or "").strip()
+        if not val or val in seen:
+            continue
+        seen.add(val)
+        out.append(val)
+    return out
+
+
+def pick_json(text: str) -> dict:
+    start = text.find("{")
+    end = text.rfind("}")
+    if start == -1 or end == -1 or end <= start:
+        raise ValueError("no json object found in rewrite response")
+    return json.loads(text[start : end + 1])
+
+
+def render_state(query: str, hits: list[dict], rewrite: dict) -> str:
+    top = hits[0] if hits else {}
+    status = "new_evidence" if hits else "need_refine"
+    reason = "top_hits_available" if hits else "empty_hits"
+    next_action = "use_delta_or_brief_only_if_needed" if hits else "refine_query_with_device_or_step"
+    return "\n".join(
+        [
+            "<rag_state>",
+            f"query={clip(query, 80)}",
+            f"status={status}",
+            f"reason={reason}",
+            f"total_hits={len(hits)}",
+            f"top_source={top.get('source_url', '')}",
+            f"top_section={clip(top.get('section_title', ''), 48)}",
+            f"rewrite_mode={rewrite.get('mode', 'none')}",
+            f"rewrite_queries={json.dumps(rewrite.get('queries', []), ensure_ascii=False)}",
+            f"next_action={next_action}",
+            "</rag_state>",
+        ]
+    )
+
+
+def render_brief(query: str, hits: list[dict], rewrite: dict, top_k: int) -> str:
+    state = render_state(query, hits, rewrite)
+    if not hits:
+        return state
+    body = []
+    for i, item in enumerate(hits[: max(1, top_k)], start=1):
+        body.append(
+            " ".join(
+                [
+                    f"[{i}]",
+                    f"source={item.get('source_url', '')}",
+                    f"section={clip(item.get('section_title', ''), 48)}",
+                    f"summary={clip(item.get('text_preview', ''), 120)}",
+                ]
+            )
+        )
+    return state + "\n" + "\n".join(body)
+
+
+def auto_format(value: str) -> str:
+    if value != "auto":
+        return value
+    if os.getenv("OPENCODE") == "1":
+        return "state"
+    return "json"
+
+
+def need_rewrite(query: str) -> bool:
+    text = str(query or "").strip()
+    if len(text) >= 48:
+        return True
+    if text.count(" ") >= 5:
+        return True
+    marks = ["并且", "以及", "同时", "还有", "怎么", "如何", "步骤", "方式", "版本", "命令"]
+    return sum(1 for x in marks if x in text) >= 2
+
+
+def auto_rewrite(value: str, model: str, query: str) -> str:
+    if value != "auto":
+        return value
+    if model and need_rewrite(query):
+        return "llm"
+    return "off"
+
+
+def embed_query(client, model: str, text: str) -> list[float]:
+    r = client.embeddings.create(model=model, input=[text])
+    return r.data[0].embedding
+
+
+def rewrite_query(client, model: str, query: str, limit: int) -> dict:
+    if not model:
+        return {"mode": "off", "queries": [query], "keywords": []}
+    prompt = "\n".join(
+        [
+            "你是RAG检索改写器。",
+            "目标：从长问题中提取真正的检索目标，去掉语义噪声。",
+            "输出必须是 JSON 对象，不要输出解释。",
+            f"最多给出 {max(1, limit)} 条 queries。",
+            '返回格式：{"queries":["..."],"keywords":["..."]}',
+            "要求：queries 应短、准、可用于 embedding 检索；keywords 只保留设备名、动作、文档对象、错误码、版本等关键信息。",
+            f"原始问题：{query}",
+        ]
+    )
+    try:
+        res = client.chat.completions.create(
+            model=model,
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0.1,
+        )
+        text = res.choices[0].message.content or ""
+    except Exception:
+        return {"mode": "llm_error", "queries": [query], "keywords": []}
+    try:
+        data = pick_json(text)
+    except Exception:
+        return {"mode": "llm_fallback", "queries": [query], "keywords": []}
+    queries = uniq([str(x) for x in data.get("queries", [])])[: max(1, limit)]
+    if query not in queries:
+        queries.insert(0, query)
+    keywords = uniq([str(x) for x in data.get("keywords", [])])[:8]
+    return {
+        "mode": "llm",
+        "queries": uniq(queries)[: max(1, limit)],
+        "keywords": keywords,
+    }
+
+
+def related_images(qdrant, models, collection: str, ids: list[str], text_chars: int) -> list[dict]:
+    out = []
+    for iid in ids:
+        flt = models.Filter(
+            must=[
+                models.FieldCondition(key="node_type", match=models.MatchValue(value="image")),
+                models.FieldCondition(key="image_id", match=models.MatchValue(value=iid)),
+            ]
+        )
+        points, _ = qdrant.scroll(
+            collection_name=collection,
+            scroll_filter=flt,
+            with_payload=True,
+            limit=1,
+        )
+        if not points:
+            continue
+        payload = points[0].payload or {}
+        text = str(payload.get("text", ""))
+        n = max(20, text_chars)
+        preview = text if len(text) <= n else text[:n].rstrip() + " ..."
+        out.append(
+            {
+                "image_id": iid,
+                "source_url": payload.get("source_url", ""),
+                "text_preview": preview,
+            }
+        )
+    return out
+
+
+def search(qdrant, models, collection: str, vec: list[float], limit: int, node_type: str):
+    flt = None
+    if node_type != "any":
+        flt = models.Filter(
+            must=[models.FieldCondition(key="node_type", match=models.MatchValue(value=node_type))]
+        )
+    if hasattr(qdrant, "query_points"):
+        res = qdrant.query_points(
+            collection_name=collection,
+            query=vec,
+            limit=max(1, limit),
+            with_payload=True,
+            query_filter=flt,
+        )
+        return res.points
+    return qdrant.search(
+        collection_name=collection,
+        query_vector=vec,
+        limit=max(1, limit),
+        with_payload=True,
+        query_filter=flt,
+    )
+
+
+def fp(payload: dict) -> str:
+    src = str(payload.get("text_file", "") or payload.get("source_url", ""))
+    ident = str(payload.get("chunk_id", "") or payload.get("image_id", "") or payload.get("section_title", ""))
+    return f"{src}#{ident}"
+
+
+def collect(points, qdrant, models, args, query: str) -> list[dict]:
+    out = []
+    for rank, item in enumerate(points, start=1):
+        payload = item.payload or {}
+        text = str(payload.get("text", ""))
+        n = max(20, args.show_text_chars)
+        preview = text if len(text) <= n else text[:n].rstrip() + " ..."
+        ids = payload.get("image_ids", [])
+        if not isinstance(ids, list):
+            ids = []
+        ext = (
+            []
+            if args.no_related_images
+            else related_images(
+                qdrant,
+                models,
+                args.collection,
+                [str(x) for x in ids if x],
+                args.show_text_chars,
+            )
+        )
+        out.append(
+            {
+                "fp": fp(payload),
+                "query": query,
+                "rank": rank,
+                "score": float(item.score),
+                "node_type": payload.get("node_type", "text"),
+                "image_id": payload.get("image_id", ""),
+                "chunk_id": payload.get("chunk_id", ""),
+                "section_title": payload.get("section_title", ""),
+                "source_url": payload.get("source_url", ""),
+                "text_file": payload.get("text_file", ""),
+                "image_ids": ids,
+                "related_images": ext,
+                "text_preview": preview,
+            }
+        )
+    return out
+
+
+def merge_hits(rows: list[list[dict]], primary: str, top_k: int) -> list[dict]:
+    merged: dict[str, dict] = {}
+    for batch in rows:
+      for item in batch:
+        cur = merged.get(item["fp"])
+        if not cur:
+            merged[item["fp"]] = {
+                **item,
+                "matched_queries": [item["query"]],
+                "hit_count": 1,
+                "max_score": float(item["score"]),
+                "rrf": 1.0 / (60 + int(item["rank"])),
+                "primary_match": 1 if item["query"] == primary else 0,
+            }
+            continue
+        if item["query"] not in cur["matched_queries"]:
+            cur["matched_queries"].append(item["query"])
+            cur["hit_count"] += 1
+        cur["max_score"] = max(float(cur["max_score"]), float(item["score"]))
+        cur["rrf"] += 1.0 / (60 + int(item["rank"]))
+        if item["query"] == primary:
+            cur["primary_match"] = 1
+        if float(item["score"]) > float(cur["score"]):
+            cur.update(
+                {
+                    "score": float(item["score"]),
+                    "node_type": item["node_type"],
+                    "image_id": item["image_id"],
+                    "chunk_id": item["chunk_id"],
+                    "section_title": item["section_title"],
+                    "source_url": item["source_url"],
+                    "text_file": item["text_file"],
+                    "image_ids": item["image_ids"],
+                    "related_images": item["related_images"],
+                    "text_preview": item["text_preview"],
+                }
+            )
+    out = []
+    for item in merged.values():
+        item["rerank_score"] = (
+            0.45 * float(item["max_score"])
+            + 0.35 * float(item["rrf"])
+            + 0.12 * float(item["hit_count"])
+            + 0.08 * float(item["primary_match"])
+        )
+        item.pop("fp", None)
+        item.pop("query", None)
+        item.pop("rank", None)
+        item.pop("max_score", None)
+        item.pop("rrf", None)
+        item.pop("primary_match", None)
+        out.append(item)
+    out.sort(key=lambda x: (float(x.get("rerank_score", 0)), float(x.get("score", 0))), reverse=True)
+    return out[: max(1, top_k)]
+
+
+def main() -> None:
+    p = argparse.ArgumentParser()
+    p.add_argument("--query", required=True)
+    p.add_argument("--db-path", default=".rag/vector/qdrant")
+    p.add_argument("--collection", default="rag_chunks")
+    p.add_argument("--model", default="nomic-embed-text")
+    p.add_argument("--base-url", default="")
+    p.add_argument("--api-key", default="")
+    p.add_argument("--top-k", type=int, default=5)
+    p.add_argument("--per-query-k", type=int, default=5)
+    p.add_argument("--show-text-chars", type=int, default=240)
+    p.add_argument("--node-type", choices=["any", "text", "image"], default="any")
+    p.add_argument("--no-related-images", action="store_true")
+    p.add_argument("--format", choices=["auto", "json", "state", "brief"], default="auto")
+    p.add_argument("--rewrite", choices=["auto", "off", "llm"], default="auto")
+    p.add_argument("--rewrite-model", default=os.getenv("RAG_REWRITE_MODEL", ""))
+    p.add_argument("--rewrite-queries", type=int, default=int(os.getenv("RAG_REWRITE_QUERIES", "3")))
+    args = p.parse_args()
+
+    try:
+        from openai import OpenAI
+        from qdrant_client import QdrantClient, models
+    except ModuleNotFoundError as e:
+        raise SystemExit(
+            f"missing dependency: {e.name}. run: bash script/rag/install-vector.sh"
+        ) from e
+
+    key = args.api_key or os.getenv("OPENAI_API_KEY") or os.getenv("MINIMAX_API_KEY") or "ollama"
+    base = args.base_url or os.getenv("OPENAI_BASE_URL") or "http://127.0.0.1:11434/v1"
+    client = OpenAI(api_key=key, base_url=base)
+    rewrite_mode = auto_rewrite(args.rewrite, args.rewrite_model, args.query)
+    rewrite = (
+        rewrite_query(client, args.rewrite_model, args.query, max(1, args.rewrite_queries))
+        if rewrite_mode == "llm"
+        else {"mode": "off", "queries": [args.query], "keywords": []}
+    )
+    queries = uniq([args.query, *rewrite.get("queries", [])])[: max(1, args.rewrite_queries)]
+
+    db = Path(args.db_path)
+    if not db.exists():
+        raise SystemExit(f"db path not found: {db}")
+
+    qdrant = QdrantClient(path=str(db))
+    rows = []
+    for query in queries:
+        vec = embed_query(client, args.model, query)
+        points = search(qdrant, models, args.collection, vec, max(args.top_k, args.per_query_k), args.node_type)
+        rows.append(collect(points, qdrant, models, args, query))
+
+    out = merge_hits(rows, queries[0], args.top_k)
+    rewrite["queries"] = queries
+    fmt = auto_format(args.format)
+    if fmt == "state":
+        print(render_state(args.query, out, rewrite))
+        return
+    if fmt == "brief":
+        print(render_brief(args.query, out, rewrite, args.top_k))
+        return
+    print(json.dumps({"query": args.query, "rewrite": rewrite, "hits": out}, ensure_ascii=False, indent=2))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/script/rag/structure-text.py b/script/rag/structure-text.py
new file mode 100755
index 00000000000..3d158cb80c4
--- /dev/null
+++ b/script/rag/structure-text.py
@@ -0,0 +1,307 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import importlib.util
+import json
+import os
+import re
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+
+LAST_LLM_AT = 0.0
+IMAGE_ID_RE = re.compile(r"\[IMAGE:([^\]]+)\]")
+IMAGE_OCR_RE = re.compile(r"\[IMAGE_OCR\][\s\S]*?\[/IMAGE_OCR\]")
+
+
+def read(path: Path) -> str:
+    return path.read_text(encoding="utf-8", errors="ignore")
+
+
+def clean(text: str) -> str:
+    return re.sub(r"\s+", " ", text).strip()
+
+
+def strip_inline_ocr(text: str) -> str:
+    out = IMAGE_OCR_RE.sub("", text)
+    out = re.sub(r"\n{3,}", "\n\n", out)
+    return out.strip()
+
+
+def image_ids(text: str) -> list[str]:
+    return sorted(set(IMAGE_ID_RE.findall(text)))
+
+
+def split_sections(text: str) -> list[dict]:
+    rows = []
+    title = "document"
+    buf = []
+    for line in text.splitlines():
+        if re.match(r"^#{1,6}\s+", line):
+            body = "\n".join(buf).strip()
+            if body:
+                rows.append({"title": title, "text": body})
+            title = re.sub(r"^#{1,6}\s+", "", line).strip()
+            buf = []
+            continue
+        buf.append(line)
+    body = "\n".join(buf).strip()
+    if body:
+        rows.append({"title": title, "text": body})
+    return rows
+
+
+def chunk_text(text: str, size: int, overlap: int) -> list[str]:
+    if len(text) <= size:
+        return [text]
+    out = []
+    i = 0
+    while i < len(text):
+        out.append(text[i : i + size])
+        if i + size >= len(text):
+            break
+        i += max(1, size - overlap)
+    return out
+
+
+def rule_summary(text: str, n: int = 280) -> str:
+    s = clean(text)
+    if len(s) <= n:
+        return s
+    return s[:n].rstrip() + " ..."
+
+
+def throttle(interval: float) -> None:
+    global LAST_LLM_AT
+    if interval <= 0:
+        return
+    now = time.monotonic()
+    wait = LAST_LLM_AT + interval - now
+    if wait > 0:
+        time.sleep(wait)
+    LAST_LLM_AT = time.monotonic()
+
+
+def is_rate_limit_error(e: Exception) -> bool:
+    s = str(e).lower()
+    return "rate limit" in s or "too many requests" in s or "429" in s
+
+
+def with_retry(
+    fn,
+    *,
+    min_interval: float,
+    max_retries: int,
+    retry_initial: float,
+) -> str:
+    delay = max(0.1, retry_initial)
+    n = 0
+    while True:
+        throttle(min_interval)
+        try:
+            return fn()
+        except Exception as e:
+            if not is_rate_limit_error(e) or n >= max_retries:
+                raise
+            n += 1
+            print(
+                f"[llm] rate limit; retry {n}/{max_retries} after {delay:.1f}s",
+                file=sys.stderr,
+            )
+            time.sleep(delay)
+            delay = min(delay * 2, 30)
+
+
+def llama_summary(
+    text: str,
+    model: str,
+    *,
+    min_interval: float,
+    max_retries: int,
+    retry_initial: float,
+) -> str:
+    if importlib.util.find_spec("llama_index.llms.openai") is None:
+        raise SystemExit(
+            "llama-index is not installed in this Python environment. "
+            "Use ./.venv-docling/bin/python -m pip install -r script/rag/requirements-llamaindex.txt"
+        )
+
+    prompt = (
+        "Summarize the following text in Chinese, keep factual key points in 3 sentences max.\n\n"
+        f"{text[:6000]}"
+    )
+
+    def key() -> str:
+        k = os.getenv("OPENAI_API_KEY") or os.getenv("MINIMAX_API_KEY")
+        if k:
+            return k
+        raise SystemExit(
+            "OPENAI_API_KEY is required for --mode llamaindex "
+            "(MINIMAX_API_KEY is also accepted)."
+        )
+
+    def compat() -> str:
+        from openai import OpenAI as OpenAIClient
+
+        client = OpenAIClient(
+            api_key=key(),
+            base_url=os.getenv("OPENAI_BASE_URL") or None,
+        )
+        res = client.chat.completions.create(
+            model=model,
+            temperature=0,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        msg = res.choices[0].message.content if res.choices else ""
+        return clean(msg or "")
+
+    from llama_index.llms.openai import OpenAI
+
+    try:
+        return with_retry(
+            lambda: clean(
+                OpenAI(
+                    model=model,
+                    temperature=0,
+                    api_base=os.getenv("OPENAI_BASE_URL"),
+                    api_key=key(),
+                ).complete(prompt).text
+            ),
+            min_interval=min_interval,
+            max_retries=max_retries,
+            retry_initial=retry_initial,
+        )
+    except ValueError as e:
+        if "Unknown model" not in str(e):
+            raise
+        if not os.getenv("OPENAI_BASE_URL"):
+            raise SystemExit(
+                f"Unknown model '{model}'. Set OPENAI_BASE_URL to your compatible endpoint, "
+                "for example: https://api.minimaxi.com/v1"
+            )
+        return with_retry(
+            compat,
+            min_interval=min_interval,
+            max_retries=max_retries,
+            retry_initial=retry_initial,
+        )
+
+
+def main() -> None:
+    p = argparse.ArgumentParser()
+    p.add_argument("--text", required=True)
+    p.add_argument("--images", required=False, default="")
+    p.add_argument("--output", required=True)
+    p.add_argument("--source-url", required=False, default="")
+    p.add_argument("--mode", choices=["rule", "llamaindex"], default="rule")
+    p.add_argument("--model", default="gpt-4o-mini")
+    p.add_argument("--llm-min-interval", type=float, default=1.0)
+    p.add_argument("--llm-max-retries", type=int, default=6)
+    p.add_argument("--llm-retry-initial", type=float, default=1.5)
+    p.add_argument("--inline-ocr", choices=["strip", "keep"], default="strip")
+    p.add_argument("--chunk-size", type=int, default=1600)
+    p.add_argument("--chunk-overlap", type=int, default=200)
+    args = p.parse_args()
+
+    text_path = Path(args.text)
+    src = read(text_path)
+    sections = split_sections(src)
+
+    image_rows = []
+    image_map = {}
+    if args.images:
+        rows = json.loads(read(Path(args.images)))
+        image_rows = rows.get("images", [])
+        for item in image_rows:
+            image_map[item["id"]] = item
+
+    out_sections = []
+    chunks = []
+    nodes = []
+    for si, sec in enumerate(sections):
+        body = strip_inline_ocr(sec["text"]) if args.inline_ocr == "strip" else sec["text"]
+        ids = image_ids(body)
+        summary = rule_summary(body)
+        if args.mode == "llamaindex":
+            summary = llama_summary(
+                body,
+                args.model,
+                min_interval=args.llm_min_interval,
+                max_retries=args.llm_max_retries,
+                retry_initial=args.llm_retry_initial,
+            )
+
+        out_sections.append(
+            {
+                "id": f"sec-{si}",
+                "title": sec["title"],
+                "summary": summary,
+                "image_ids": ids,
+                "images": [image_map[i] for i in ids if i in image_map],
+                "text": body,
+            }
+        )
+
+        parts = chunk_text(body, args.chunk_size, args.chunk_overlap)
+        for ci, body in enumerate(parts):
+            ids2 = image_ids(body)
+            chunk = {
+                "id": f"sec-{si}-chunk-{ci}",
+                "type": "text",
+                "section_id": f"sec-{si}",
+                "section_title": sec["title"],
+                "text": body,
+                "image_ids": ids2,
+                "metadata": {
+                    "source_url": args.source_url,
+                    "text_file": str(text_path),
+                    "char_len": len(body),
+                },
+            }
+            chunks.append(chunk)
+            nodes.append(chunk)
+
+    image_nodes = []
+    for item in image_rows:
+        iid = item.get("id")
+        if not iid:
+            continue
+        refs = [sec["id"] for sec in out_sections if iid in sec["image_ids"]]
+        text = clean("\n".join(x for x in [item.get("alt", ""), item.get("ocr_text", "")] if x))
+        image = {
+            "id": f"image-{iid}",
+            "type": "image",
+            "image_id": iid,
+            "section_ids": refs,
+            "source_url": item.get("url", ""),
+            "alt": item.get("alt", ""),
+            "ocr_text": item.get("ocr_text", ""),
+            "text": text,
+            "metadata": {
+                "source_url": args.source_url,
+                "text_file": str(text_path),
+                "ocr_chars": item.get("ocr_chars", len(item.get("ocr_text", "") or "")),
+                "status": item.get("status", ""),
+            },
+        }
+        image_nodes.append(image)
+        nodes.append(image)
+
+    out = {
+        "source_url": args.source_url,
+        "text_file": str(text_path),
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "mode": args.mode,
+        "inline_ocr": args.inline_ocr,
+        "sections": out_sections,
+        "chunks": chunks,
+        "image_nodes": image_nodes,
+        "nodes": nodes,
+    }
+    Path(args.output).write_text(json.dumps(out, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/script/rag/url-to-text.sh b/script/rag/url-to-text.sh
new file mode 100755
index 00000000000..bd600008225
--- /dev/null
+++ b/script/rag/url-to-text.sh
@@ -0,0 +1,449 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)
+DOC=${RAG_DOCLING_BIN:-"$ROOT/.venv-docling/bin/docling"}
+PY=${RAG_DOCLING_PYTHON_BIN:-"$ROOT/.venv-docling/bin/python"}
+OUT=${RAG_TEXT_URL_OUTPUT:-"$ROOT/.rag/text/url"}
+HTML=${RAG_TEXT_URL_HTML:-"$ROOT/.rag/html/url"}
+URL=""
+NAME=""
+KEEP_HTML=false
+OCR_IMAGES=false
+IMAGE_LIMIT=${RAG_TEXT_URL_IMAGE_LIMIT:-30}
+OCR_ENGINE=${RAG_TEXT_URL_OCR_ENGINE:-}
+OCR_LANG=${RAG_TEXT_URL_OCR_LANG:-}
+OCR_ARTIFACTS=${RAG_TEXT_URL_OCR_ARTIFACTS:-}
+OCR_PSM=${RAG_TEXT_URL_OCR_PSM:-}
+IMAGE_INLINE=${RAG_TEXT_URL_IMAGE_INLINE:-marker}
+USER=${RAG_TEXT_URL_USER:-}
+PASS=${RAG_TEXT_URL_PASSWORD:-}
+COOKIE=${RAG_TEXT_URL_COOKIE:-}
+COOKIE_FILE=${RAG_TEXT_URL_COOKIE_FILE:-}
+PROXY=${RAG_TEXT_URL_PROXY:-}
+NO_PROXY_MODE=false
+INSECURE=false
+declare -a HDR=()
+
+usage() {
+  cat <<'EOF'
+Fetch one URL as HTML, then convert it to plain text with docling.
+
+Usage:
+  script/rag/url-to-text.sh --url URL [--name NAME] [--output DIR] [--html-dir DIR] [--header "K: V"] [--user USER --password PASS] [--cookie "a=b"] [--cookie-file FILE] [--proxy URL] [--no-proxy] [--insecure] [--keep-html] [--ocr-images] [--image-limit N] [--ocr-engine NAME] [--ocr-lang CODE] [--psm N] [--image-inline MODE]
+
+Options:
+  --url URL              Source URL to fetch
+  --name NAME            Output file stem (default: generated from URL)
+  --output DIR           Text output directory (default: ./.rag/text/url)
+  --html-dir DIR         Downloaded HTML directory (default: ./.rag/html/url)
+  --header "K: V"        Extra request header for curl (repeatable)
+  --user USER            HTTP auth username for URL fetch
+  --password PASS        HTTP auth password for URL fetch (or set RAG_TEXT_URL_PASSWORD)
+  --cookie "k=v;..."     Cookie header value
+  --cookie-file FILE     Netscape cookie file used by curl
+  --proxy URL            Proxy for curl requests
+  --no-proxy             Bypass proxy for all hosts (adds --noproxy "*")
+  --insecure             Allow insecure TLS for intranet/self-signed cert
+  --keep-html            Keep downloaded HTML file
+  --ocr-images           OCR text in <img> resources and append to output txt
+  --image-limit N        Max images to OCR when --ocr-images is enabled (default: 30)
+  --ocr-engine NAME      OCR engine for image OCR (for example: tesseract, rapidocr, auto)
+  --ocr-lang CODE        OCR language list (for example: eng or eng,chi_sim)
+  --psm N                OCR page segmentation mode, 0-13 (useful for tesseract)
+  --image-inline MODE    Inline image strategy: marker|ocr|none (default: marker)
+  --artifacts-path PATH  Local docling artifacts path for OCR-related models
+  --docling-bin PATH     docling executable (default: ./.venv-docling/bin/docling)
+  --python-bin PATH      python executable used to parse html img tags (default: ./.venv-docling/bin/python)
+  -h, --help             Show help
+EOF
+}
+
+slug() {
+  printf '%s' "$1" |
+    sed -E 's#https?://##; s#[^a-zA-Z0-9._-]+#-#g; s#-+#-#g; s#(^-|-$)##g' |
+    cut -c1-120
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --url)
+      URL="$2"
+      shift 2
+      ;;
+    --name)
+      NAME="$2"
+      shift 2
+      ;;
+    --output)
+      OUT="$2"
+      shift 2
+      ;;
+    --html-dir)
+      HTML="$2"
+      shift 2
+      ;;
+    --header)
+      HDR+=("$2")
+      shift 2
+      ;;
+    --user)
+      USER="$2"
+      shift 2
+      ;;
+    --password)
+      PASS="$2"
+      shift 2
+      ;;
+    --cookie)
+      COOKIE="$2"
+      shift 2
+      ;;
+    --cookie-file)
+      COOKIE_FILE="$2"
+      shift 2
+      ;;
+    --proxy)
+      PROXY="$2"
+      shift 2
+      ;;
+    --no-proxy)
+      NO_PROXY_MODE=true
+      shift
+      ;;
+    --insecure)
+      INSECURE=true
+      shift
+      ;;
+    --keep-html)
+      KEEP_HTML=true
+      shift
+      ;;
+    --ocr-images)
+      OCR_IMAGES=true
+      shift
+      ;;
+    --image-limit)
+      IMAGE_LIMIT="$2"
+      shift 2
+      ;;
+    --ocr-engine)
+      OCR_ENGINE="$2"
+      shift 2
+      ;;
+    --ocr-lang)
+      OCR_LANG="$2"
+      shift 2
+      ;;
+    --psm)
+      OCR_PSM="$2"
+      shift 2
+      ;;
+    --image-inline)
+      IMAGE_INLINE="$2"
+      shift 2
+      ;;
+    --artifacts-path)
+      OCR_ARTIFACTS="$2"
+      shift 2
+      ;;
+    --docling-bin)
+      DOC="$2"
+      shift 2
+      ;;
+    --python-bin)
+      PY="$2"
+      shift 2
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "unknown argument: $1" >&2
+      usage
+      exit 1
+      ;;
+  esac
+done
+
+if [[ -z "$URL" ]]; then
+  echo "--url is required" >&2
+  usage
+  exit 1
+fi
+
+if [[ ! -x "$DOC" ]]; then
+  echo "docling not found: $DOC" >&2
+  exit 1
+fi
+
+if ! command -v curl >/dev/null 2>&1; then
+  echo "curl not found" >&2
+  exit 1
+fi
+
+if [[ -n "$COOKIE_FILE" && ! -f "$COOKIE_FILE" ]]; then
+  echo "cookie file not found: $COOKIE_FILE" >&2
+  exit 1
+fi
+
+if [[ "$OCR_IMAGES" == "true" && ! -x "$PY" ]]; then
+  echo "python not found or not executable: $PY" >&2
+  exit 1
+fi
+
+if [[ "$OCR_IMAGES" == "true" ]]; then
+  if [[ -z "$OCR_ENGINE" ]]; then
+    if command -v tesseract >/dev/null 2>&1; then
+      OCR_ENGINE="tesseract"
+      if [[ -z "$OCR_LANG" ]]; then
+        OCR_LANG="eng,chi_sim"
+      fi
+      echo "image OCR engine selected: tesseract" >&2
+    else
+      OCR_ENGINE="auto"
+      echo "image OCR engine selected: auto (tesseract not found)" >&2
+    fi
+  fi
+
+  if [[ "$OCR_ENGINE" == "tesseract" ]]; then
+    if ! command -v tesseract >/dev/null 2>&1; then
+      echo "tesseract not found, install it first: sudo apt install -y tesseract-ocr tesseract-ocr-eng tesseract-ocr-chi-sim" >&2
+      exit 1
+    fi
+    if [[ -z "$OCR_LANG" ]]; then
+      OCR_LANG="eng,chi_sim"
+    fi
+    if [[ -z "$OCR_PSM" ]]; then
+      OCR_PSM="6"
+    fi
+  fi
+  echo "image OCR config: engine=$OCR_ENGINE lang=${OCR_LANG:-<default>} psm=${OCR_PSM:-<default>}" >&2
+fi
+
+if [[ -n "$OCR_PSM" ]] && ! [[ "$OCR_PSM" =~ ^[0-9]+$ ]]; then
+  echo "invalid --psm: $OCR_PSM" >&2
+  exit 1
+fi
+if [[ "$IMAGE_INLINE" != "marker" && "$IMAGE_INLINE" != "ocr" && "$IMAGE_INLINE" != "none" ]]; then
+  echo "invalid --image-inline: $IMAGE_INLINE (expected marker|ocr|none)" >&2
+  exit 1
+fi
+
+if [[ -z "$NAME" ]]; then
+  NAME=$(slug "$URL")
+fi
+
+if [[ -z "$NAME" ]]; then
+  NAME="page-$(date +%Y%m%d-%H%M%S)"
+fi
+
+mkdir -p "$OUT" "$HTML"
+HTML_FILE="$HTML/$NAME.html"
+
+declare -a CURL_CMD=("curl" "-fsSL")
+if [[ "$NO_PROXY_MODE" == "true" ]]; then
+  CURL_CMD+=("--noproxy" "*")
+elif [[ -n "$PROXY" ]]; then
+  CURL_CMD+=("--proxy" "$PROXY")
+fi
+if [[ "$INSECURE" == "true" ]]; then
+  CURL_CMD+=("-k")
+fi
+if [[ -n "$USER" ]]; then
+  CURL_CMD+=("-u" "$USER:$PASS")
+fi
+if [[ -n "$COOKIE" ]]; then
+  CURL_CMD+=("-H" "Cookie: $COOKIE")
+fi
+if [[ -n "$COOKIE_FILE" ]]; then
+  CURL_CMD+=("-b" "$COOKIE_FILE")
+fi
+CURL_CMD+=("$URL" "-o" "$HTML_FILE")
+for h in "${HDR[@]}"; do
+  CURL_CMD+=("-H" "$h")
+done
+"${CURL_CMD[@]}"
+
+"$DOC" "$HTML_FILE" --from html --to text --output "$OUT" --abort-on-error
+
+TXT_FILE="$OUT/$NAME.txt"
+if [[ ! -f "$TXT_FILE" ]]; then
+  FALLBACK=$(find "$OUT" -maxdepth 1 -type f -name "$NAME*.txt" | head -n 1 || true)
+  if [[ -n "$FALLBACK" ]]; then
+    TXT_FILE="$FALLBACK"
+  fi
+fi
+
+if [[ ! -f "$TXT_FILE" ]]; then
+  echo "docling conversion finished but no txt was found for: $NAME" >&2
+  exit 1
+fi
+
+if [[ "$OCR_IMAGES" == "true" ]]; then
+  TMP=$(mktemp -d)
+  trap 'rm -rf "$TMP"' EXIT
+  IMG_LIST="$TMP/image_urls.txt"
+  IMG_META="$TMP/image_meta.json"
+  IMG_DIR="$TMP/images"
+  OCR_DIR="$TMP/ocr"
+  mkdir -p "$IMG_DIR" "$OCR_DIR"
+
+  "$PY" - "$URL" "$HTML_FILE" "$IMG_LIST" "$IMG_META" <<'PY'
+import json
+import pathlib
+import sys
+from urllib.parse import urljoin
+from bs4 import BeautifulSoup
+
+base = sys.argv[1]
+html_path = pathlib.Path(sys.argv[2])
+out = pathlib.Path(sys.argv[3])
+meta = pathlib.Path(sys.argv[4])
+raw = html_path.read_text(encoding="utf-8", errors="ignore")
+soup = BeautifulSoup(raw, "html.parser")
+seen = set()
+rows = []
+for n in soup.find_all("img"):
+    src = (n.get("src") or n.get("data-src") or n.get("data-original") or "").strip()
+    if not src:
+        continue
+    if src.startswith("data:"):
+        continue
+    u = urljoin(base, src)
+    if not u or u in seen:
+        continue
+    seen.add(u)
+    rows.append(
+        {
+            "id": f"img-{len(rows)}",
+            "url": u,
+            "alt": (n.get("alt") or "").strip(),
+        }
+    )
+out.write_text("\n".join(row["url"] for row in rows), encoding="utf-8")
+meta.write_text(json.dumps(rows, ensure_ascii=False, indent=2), encoding="utf-8")
+PY
+
+  mapfile -t IMAGES <"$IMG_LIST"
+  MAX="$IMAGE_LIMIT"
+  if ! [[ "$MAX" =~ ^[0-9]+$ ]]; then
+    echo "invalid --image-limit: $MAX" >&2
+    exit 1
+  fi
+
+  OCR_OK=0
+  OCR_BAD=0
+  OCR_DONE=0
+  OCR_LOG="$OUT/$NAME.image_ocr.log"
+  : >"$OCR_LOG"
+  for u in "${IMAGES[@]}"; do
+    if [[ "$OCR_DONE" -ge "$MAX" ]]; then
+      break
+    fi
+    clean="${u%%\?*}"
+    ext="${clean##*.}"
+    if [[ "$ext" == "$clean" ]] || [[ ! "$ext" =~ ^[A-Za-z0-9]{1,6}$ ]]; then
+      ext="img"
+    fi
+    f="$IMG_DIR/img-$OCR_DONE.$ext"
+    declare -a CURL_IMAGE=("curl" "-fsSL")
+    if [[ "$NO_PROXY_MODE" == "true" ]]; then
+      CURL_IMAGE+=("--noproxy" "*")
+    elif [[ -n "$PROXY" ]]; then
+      CURL_IMAGE+=("--proxy" "$PROXY")
+    fi
+    if [[ "$INSECURE" == "true" ]]; then
+      CURL_IMAGE+=("-k")
+    fi
+    if [[ -n "$USER" ]]; then
+      CURL_IMAGE+=("-u" "$USER:$PASS")
+    fi
+    if [[ -n "$COOKIE" ]]; then
+      CURL_IMAGE+=("-H" "Cookie: $COOKIE")
+    fi
+    if [[ -n "$COOKIE_FILE" ]]; then
+      CURL_IMAGE+=("-b" "$COOKIE_FILE")
+    fi
+    CURL_IMAGE+=("$u" "-o" "$f" "-H" "Referer: $URL")
+    for h in "${HDR[@]}"; do
+      CURL_IMAGE+=("-H" "$h")
+    done
+    if ! "${CURL_IMAGE[@]}" >/dev/null 2>&1; then
+      OCR_BAD=$((OCR_BAD + 1))
+      OCR_DONE=$((OCR_DONE + 1))
+      continue
+    fi
+
+    t="$OCR_DIR/$(basename "$f").txt"
+    if [[ "$OCR_ENGINE" == "tesseract" ]]; then
+      declare -a TESS=("tesseract" "$f" "stdout")
+      if [[ -n "$OCR_LANG" ]]; then
+        TESS+=("-l" "${OCR_LANG//,/+}")
+      fi
+      if [[ -n "$OCR_PSM" ]]; then
+        TESS+=("--psm" "$OCR_PSM")
+      fi
+      if "${TESS[@]}" >"$t" 2>>"$OCR_LOG"; then
+        :
+      else
+        OCR_BAD=$((OCR_BAD + 1))
+        OCR_DONE=$((OCR_DONE + 1))
+        continue
+      fi
+    else
+      declare -a OCR_CMD=("$DOC" "$f" "--from" "image" "--to" "text" "--output" "$OCR_DIR" "--ocr" "--force-ocr" "--abort-on-error")
+      if [[ -n "$OCR_ENGINE" ]]; then
+        OCR_CMD+=("--ocr-engine" "$OCR_ENGINE")
+      fi
+      if [[ -n "$OCR_LANG" ]]; then
+        OCR_CMD+=("--ocr-lang" "$OCR_LANG")
+      fi
+      if [[ -n "$OCR_ARTIFACTS" ]]; then
+        OCR_CMD+=("--artifacts-path" "$OCR_ARTIFACTS")
+      fi
+      if [[ -n "$OCR_PSM" ]]; then
+        OCR_CMD+=("--psm" "$OCR_PSM")
+      fi
+      if "${OCR_CMD[@]}" >>"$OCR_LOG" 2>&1; then
+        :
+      else
+        OCR_BAD=$((OCR_BAD + 1))
+        OCR_DONE=$((OCR_DONE + 1))
+        continue
+      fi
+    fi
+
+    if [[ -s "$t" ]] && grep -q '[^[:space:]]' "$t"; then
+      OCR_OK=$((OCR_OK + 1))
+    else
+      OCR_BAD=$((OCR_BAD + 1))
+    fi
+    OCR_DONE=$((OCR_DONE + 1))
+  done
+
+  SIDECAR="$OUT/$NAME.images.json"
+  RAW_TXT="$OUT/$NAME.raw.txt"
+  "$PY" "$ROOT/script/rag/merge-image-ocr.py" \
+    --text "$TXT_FILE" \
+    --meta "$IMG_META" \
+    --ocr-dir "$OCR_DIR" \
+    --sidecar "$SIDECAR" \
+    --raw "$RAW_TXT" \
+    --inline-mode "$IMAGE_INLINE" \
+    --source-url "$URL"
+
+  echo "image_ocr_total=${#IMAGES[@]} scanned=$OCR_DONE success=$OCR_OK failed=$OCR_BAD" >&2
+  echo "image_sidecar=$SIDECAR" >&2
+  if [[ "${#IMAGES[@]}" -gt 0 && "$OCR_OK" -eq 0 ]]; then
+    echo "image OCR produced no text; inspect log: $OCR_LOG" >&2
+    echo "hint: try --ocr-lang chi_sim or eng,chi_sim with --psm 6; if page images are tiny/icons, OCR may return empty." >&2
+  fi
+fi
+
+if [[ "$KEEP_HTML" != "true" ]]; then
+  rm -f "$HTML_FILE"
+fi
+
+echo "$TXT_FILE"
diff --git a/specs/rag-docling-deploy.zh.md b/specs/rag-docling-deploy.zh.md
new file mode 100644
index 00000000000..9e17aa8adcf
--- /dev/null
+++ b/specs/rag-docling-deploy.zh.md
@@ -0,0 +1,507 @@
+# RAG 文本化部署手册（Docling）
+
+本手册记录从环境准备到文本产出的完整步骤，适合在本地或内网机器复用。
+
+## 1. 环境准备
+
+在 Debian/Ubuntu 上安装 Python 虚拟环境能力：
+
+```bash
+sudo apt update
+sudo apt install -y python3 python3-venv python3-full curl
+```
+
+验证版本：
+
+```bash
+python3 --version
+curl --version | head -n 1
+```
+
+## 2. 安装 Docling（隔离 venv）
+
+在仓库根目录执行：
+
+```bash
+cd /home/zhang/01-my_code/09-my-opencode/opencode-worktrees/rag-enhance
+bash script/rag/install-docling.sh
+```
+
+脚本行为：
+
+1. 创建 `./.venv-docling`
+2. 升级 `pip/setuptools/wheel`
+3. 安装 `script/rag/requirements-docling.txt` 中的 `docling`
+4. 输出 `docling --version` 作为健康检查
+
+可选参数：
+
+```bash
+bash script/rag/install-docling.sh \
+  --venv /opt/rag/.venv-docling \
+  --python python3 \
+  --requirements script/rag/requirements-docling.txt
+```
+
+内网离线安装（本地 wheel 仓）：
+
+```bash
+bash script/rag/install-docling.sh \
+  --venv /opt/rag/.venv-docling \
+  --requirements script/rag/requirements-docling.txt \
+  --wheelhouse /opt/rag/docling-wheelhouse
+```
+
+## 3. 激活环境（可选）
+
+脚本默认直接调用绝对路径，不强制激活；如需手动调试可激活：
+
+```bash
+source .venv-docling/bin/activate
+docling --version
+```
+
+## 3.1 安装 Tesseract（方案 A，推荐内网）
+
+在 Debian/Ubuntu 上执行：
+
+```bash
+bash script/rag/install-tesseract.sh
+```
+
+默认安装：
+
+- `tesseract-ocr`
+- `tesseract-ocr-eng`
+- `tesseract-ocr-chi-sim`
+
+可自定义语言包：
+
+```bash
+bash script/rag/install-tesseract.sh --langs "eng chi-sim"
+```
+
+## 4. URL 抓取 HTML 并转换为 text
+
+单 URL：
+
+```bash
+bash script/rag/url-to-text.sh \
+  --url "https://example.com"
+```
+
+开启图片 OCR（识别页面 `img` 里的文字）：
+
+```bash
+bash script/rag/url-to-text.sh \
+  --url "https://example.com" \
+  --ocr-images \
+  --image-limit 30 \
+  --image-inline marker
+```
+
+说明：当 `--ocr-images` 启用且系统存在 `tesseract` 时，脚本会默认优先使用 `tesseract`（更适合内网离线）。
+且该路径会直接调用系统 `tesseract`，避免 docling 的 OSD 包装层导致的部分图片误报失败。
+
+`--image-inline` 说明：
+
+1. `marker`：仅保留 `[IMAGE:img-x]` 占位，OCR 文本只放 sidecar（推荐，避免污染 chunk）
+2. `ocr`：将 OCR 内联到正文（老行为）
+3. `none`：移除图片占位
+
+指定 OCR 引擎/语言：
+
+```bash
+bash script/rag/url-to-text.sh \
+  --url "https://example.com" \
+  --ocr-images \
+  --ocr-engine tesseract \
+  --ocr-lang eng,chi_sim \
+  --psm 6
+```
+
+代理控制（避免被错误代理拦住）：
+
+```bash
+# 强制绕过代理
+bash script/rag/url-to-text.sh --url "https://example.com" --no-proxy
+
+# 显式指定代理
+bash script/rag/url-to-text.sh --url "https://example.com" --proxy "http://proxy.local:7890"
+```
+
+输出默认为：
+
+- HTML 暂存目录：`./.rag/html/url/`
+- 文本目录：`./.rag/text/url/`
+
+带认证头示例：
+
+```bash
+bash script/rag/url-to-text.sh \
+  --url "https://intranet.example.local/doc?id=123" \
+  --header "Authorization: Bearer <token>" \
+  --header "Cookie: session=<value>" \
+  --name "intranet-doc-123" \
+  --ocr-images \
+  --keep-html
+```
+
+账号密码认证（Basic/Digest 场景）：
+
+```bash
+bash script/rag/url-to-text.sh \
+  --url "https://intranet.example.local/doc/123" \
+  --user "your_user" \
+  --password "your_password" \
+  --ocr-images
+```
+
+Cookie 文件认证（SSO 登录后导出的 cookie）：
+
+```bash
+bash script/rag/url-to-text.sh \
+  --url "https://intranet.example.local/doc/123" \
+  --cookie-file /path/to/cookies.txt \
+  --ocr-images
+```
+
+LDAP/SSO 场景说明：
+
+1. LDAP 只负责身份认证，`url-to-text.sh` 不能直接“输入 LDAP”完成网页表单登录
+2. 脚本本质是 `curl` 抓取，通常需要有效 session（Cookie）或网关支持 Basic Auth
+3. 你的内网若是 LDAP + SSO（CAS/OIDC/SAML），推荐先在浏览器登录，再导出 `cookies.txt` 给 `--cookie-file`
+
+命令标准输出会打印生成的 `.txt` 路径，可直接接入后续 embedding 流程。
+
+图片相关输出文件（`--ocr-images`）：
+
+1. 主文本：`<name>.txt`（`<!-- image -->` 会被替换为 `[IMAGE:img-x]` + 就地 OCR）
+2. 原始文本备份：`<name>.raw.txt`
+3. 图片 sidecar：`<name>.images.json`（包含 `id/url/alt/ocr_text/status`）
+4. OCR 运行日志：`<name>.image_ocr.log`
+
+说明：
+
+1. 默认只提取 HTML 可见文本，不做图片 OCR
+2. `--ocr-images` 会解析页面 `<img>` 链接并逐张 OCR，并就地写回到图片占位符附近
+3. 若页面是前端渲染（图片不在原始 HTML），需要先用浏览器渲染后再抓取 HTML 或导出 PDF 再转文本
+
+### 图片 OCR 常见问题
+
+如果你看到“图片无法识别”或 `image_ocr_total` 有值但 `success=0`，通常是 OCR 模型未就绪：
+
+1. `docling` 的 `rapidocr/auto` 首次运行可能需要联网下载模型
+2. 内网环境需预下载模型并同步缓存，或改用本机 `tesseract`
+
+你给的日志 `wiki.luckfox.com-zh-Luckfox-Pico-Zero-Overview.image_ocr.log` 显示：
+
+1. 模型下载是成功的（`Successfully saved`）
+2. 失败原因是 `RapidOCR returned empty result`（检测不到文字）
+3. 因此该问题不只是“无法访问”，更像是该页面图片内容对 RapidOCR 不友好
+4. 当前切换到 tesseract 后，报错多为 `OSD failed / Too few characters`，可通过 `--psm 6` 降低此类问题
+
+推荐排查顺序：
+
+```bash
+# 1) 查看脚本 stderr 给出的 image_ocr.log（默认在输出目录，如 ./.rag/text/url/<name>.image_ocr.log）
+
+# 2) 若能用系统 OCR，安装 tesseract 后强制使用
+sudo apt install -y tesseract-ocr tesseract-ocr-eng
+bash script/rag/url-to-text.sh --url "https://example.com" --ocr-images --ocr-engine tesseract --ocr-lang eng
+
+# 3) 若必须用 docling 默认 OCR，则在可联网机器先完成一次图片 OCR 预热，
+#    再把相关缓存目录复制到内网机器（例如 ~/.cache/rapidocr、~/.cache/docling）
+```
+
+## 5. 批量目录转 text
+
+把资料目录递归转换成文本，并保持子目录结构：
+
+```bash
+bash script/rag/convert-dir-to-text.sh \
+  --input /data/rag/raw \
+  --output /data/rag/text
+```
+
+默认处理扩展名：
+
+`pdf docx pptx html htm md txt csv xls xlsx xml`
+
+自定义扩展名：
+
+```bash
+bash script/rag/convert-dir-to-text.sh \
+  --input /data/rag/raw \
+  --output /data/rag/text \
+  --ext "pdf docx html"
+```
+
+转换日志：
+
+- 成功清单：`/data/rag/text/_success.log`
+- 失败清单：`/data/rag/text/_failed.log`
+- 运行日志：`/data/rag/text/_run.log`
+
+## 6. 内网离线打包与安装（Ubuntu 22.04）
+
+在可联网机器打包：
+
+```bash
+bash script/rag/build-offline-bundle.sh \
+  --out /tmp/rag-offline-bundle \
+  --langs "eng chi-sim" \
+  --include-llamaindex \
+  --include-vectordb
+```
+
+产物：
+
+1. 目录：`/tmp/rag-offline-bundle`
+2. 压缩包：`/tmp/rag-offline-bundle.tar.gz`
+
+拷贝到内网目标机后安装：
+
+```bash
+tar -xzf rag-offline-bundle.tar.gz
+bash script/rag/install-offline-bundle.sh \
+  --bundle ./rag-offline-bundle \
+  --venv ./.venv-docling \
+  --install-llamaindex \
+  --install-vectordb
+```
+
+## 7. 数据清洗与结构化
+
+清洗文本：
+
+```bash
+./.venv-docling/bin/python script/rag/clean-text.py \
+  --input .rag/text/url/<name>.txt \
+  --output .rag/text/url/<name>.clean.txt
+```
+
+结构化输出（规则模式）：
+
+```bash
+./.venv-docling/bin/python script/rag/structure-text.py \
+  --text .rag/text/url/<name>.clean.txt \
+  --images .rag/text/url/<name>.images.json \
+  --output .rag/text/url/<name>.structured.json \
+  --source-url "https://example.com" \
+  --mode rule \
+  --inline-ocr strip
+```
+
+结构化输出（LlamaIndex）：
+
+```bash
+export OPENAI_API_KEY=...
+./.venv-docling/bin/python script/rag/structure-text.py \
+  --text .rag/text/url/<name>.clean.txt \
+  --images .rag/text/url/<name>.images.json \
+  --output .rag/text/url/<name>.structured.json \
+  --source-url "https://example.com" \
+  --mode llamaindex \
+  --model gpt-4o-mini
+```
+
+结构化结果包含：
+
+1. `sections`：章节级标题、摘要、正文、关联图片 metadata
+2. `chunks`：可直接喂 embedding 的分块 + `image_ids` + 来源 metadata
+
+## 8. 备用离线方式（wheelhouse 手工流程）
+
+若内网机器不能直接访问公网，建议在可联网机器提前准备 wheel 包：
+
+```bash
+mkdir -p /tmp/docling-wheelhouse
+python3 -m venv /tmp/docling-venv
+/tmp/docling-venv/bin/python -m pip install -U pip
+/tmp/docling-venv/bin/pip download -r script/rag/requirements-docling.txt -d /tmp/docling-wheelhouse
+tar -C /tmp -czf docling-wheelhouse.tar.gz docling-wheelhouse
+```
+
+将 `docling-wheelhouse.tar.gz` 拷贝到内网机器后：
+
+```bash
+tar -xzf docling-wheelhouse.tar.gz
+python3 -m venv .venv-docling
+.venv-docling/bin/python -m pip install -U pip
+.venv-docling/bin/pip install --no-index --find-links ./docling-wheelhouse -r script/rag/requirements-docling.txt
+```
+
+## 9. 最小验收
+
+```bash
+./.venv-docling/bin/docling --version
+bash script/rag/url-to-text.sh --url "https://example.com"
+```
+
+满足以下条件即通过：
+
+1. `docling --version` 正常返回版本信息
+2. URL 转换命令输出一个 `.txt` 文件路径
+3. 对应 `.txt` 文件可读取并包含页面正文
+
+## 10. 向量库落地（Qdrant 本地持久化 + Ollama Embedding）
+
+安装向量依赖：
+
+```bash
+bash script/rag/install-vector.sh
+```
+
+准备 Ollama embedding 模型（建议）：
+
+```bash
+ollama pull nomic-embed-text
+```
+
+设置 OpenAI 兼容环境变量（Ollama）：
+
+```bash
+export OPENAI_BASE_URL="http://127.0.0.1:11434/v1"
+export OPENAI_API_KEY="ollama"
+```
+
+构建向量索引（单文件）：
+
+```bash
+./.venv-docling/bin/python script/rag/build-vector-index.py \
+  --input .rag/text/url/<name>.structured.json \
+  --db-path .rag/vector/qdrant \
+  --collection rag_chunks \
+  --model nomic-embed-text \
+  --recreate
+```
+
+构建向量索引（目录批量）：
+
+```bash
+./.venv-docling/bin/python script/rag/build-vector-index.py \
+  --input-dir .rag/text/url \
+  --glob "*.structured.json" \
+  --db-path .rag/vector/qdrant \
+  --collection rag_chunks \
+  --model nomic-embed-text
+```
+
+检索验证：
+
+```bash
+./.venv-docling/bin/python script/rag/search-vector-index.py \
+  --query "如何刷写镜像到 Luckfox Pico Zero" \
+  --db-path .rag/vector/qdrant \
+  --collection rag_chunks \
+  --model nomic-embed-text \
+  --top-k 5
+```
+
+向量脚本产物说明：
+
+1. 向量库目录：`.rag/vector/qdrant`
+2. 集合名：默认 `rag_chunks`
+3. 每条向量 payload 包含：`node_type(text/image)`、`chunk_id`、`section_title`、`source_url`、`image_ids`、`text`
+
+## 11. OpenCode 注入 RAG 上下文
+
+已提供两种接入方式：
+
+1. 自定义工具：`.opencode/tool/rag_search.ts`（手动调用）
+2. 自动注入插件：`.opencode/plugins/rag_context.ts`（每轮用户消息前自动检索 top-k 注入 `<rag_context>`）
+
+建议环境变量：
+
+```bash
+export OPENAI_BASE_URL="http://192.168.0.99:11434/v1"
+export OPENAI_API_KEY="ollama"
+export RAG_STRUCT_MODE="llamaindex"
+export RAG_STRUCT_MODEL="gpt-4o-mini"
+export RAG_EMBED_MODEL="qwen3-embedding:4b"
+export RAG_COLLECTION="rag_chunks"
+export RAG_TOP_K=4
+export RAG_CONTEXT_HITS=2
+export RAG_CONTEXT_CHARS=120
+export RAG_AUTO_INJECT=1
+```
+
+关闭自动注入：
+
+```bash
+export RAG_AUTO_INJECT=0
+```
+
+可选调试（排查“是否注入成功”）：
+
+```bash
+export RAG_DEBUG_LOG=1
+```
+
+插件会写入：`.rag/log/rag_context.log`
+
+可选覆盖（当 OpenAI 兼容地址或密钥与默认环境不同）：
+
+```bash
+export RAG_BASE_URL="http://192.168.0.99:11434/v1"
+export RAG_API_KEY="ollama"
+```
+
+## 12. Agent 一键编排（Skill）
+
+已新增技能文件：`.opencode/skills/rag-pipeline/SKILL.md`
+
+建议通过统一入口命令执行：
+
+初始化（首建）：
+
+```bash
+bash script/rag/cmd/rag-init.sh --source structured --scan-dir .rag/text --glob "**/*.structured.json" --embed-model qwen3-embedding:4b --collection rag_chunks
+```
+
+增量更新：
+
+```bash
+bash script/rag/cmd/rag-update.sh --source structured --scan-dir .rag/text --glob "**/*.structured.json" --embed-model qwen3-embedding:4b --collection rag_chunks
+```
+
+该流程会维护 manifest（默认 `.rag/state/manifest.json`）用于判断：
+
+1. `changed`：内容 hash 变化，执行“先删旧 doc_key，再 upsert 新向量”
+2. `removed`：文件消失，执行按 doc_key 删除
+3. embedding 模型或 collection 变化，自动触发全量重建
+
+建议只暴露这些高层选项给用户：
+
+1. `--source`
+2. `--struct-mode`/`--struct-model`
+3. `--embed-model`
+4. 数据来源参数（`--url`/`--url-file`/`--input-dir`/`--scan-dir`）
+5. `--collection`
+
+其余算法细节（chunk、重试、OCR 引擎细节）默认不暴露。
+
+## 13. 迁移到其他项目
+
+在当前仓库执行：
+
+```bash
+bash script/rag/cmd/rag-bootstrap.sh --target /path/to/target-project
+```
+
+默认会复制：
+
+1. `script/rag/*`（安装、转换、结构化、索引、检索、init/update）
+2. `.opencode/tool/rag_search.*`
+3. `.opencode/plugins/rag_context.ts`
+4. `.opencode/skills/rag-pipeline/SKILL.md`
+
+目标项目里继续执行：
+
+```bash
+cd /path/to/target-project
+bash script/rag/install-docling.sh
+bash script/rag/install-vector.sh
+bash script/rag/cmd/rag-init.sh --help
+```
diff --git a/specs/rag-enhance-architecture.zh.md b/specs/rag-enhance-architecture.zh.md
new file mode 100644
index 00000000000..261f5a851df
--- /dev/null
+++ b/specs/rag-enhance-architecture.zh.md
@@ -0,0 +1,266 @@
+# RAG Enhance 架构设计说明（rag-enhance）
+
+## 1. 目标与设计原则
+
+### 1.1 目标
+
+1. 在 OpenCode 对话中提供稳定的本地 RAG 能力（内网可部署）
+2. 降低重复检索与重复注入导致的推理循环
+3. 控制上下文窗口占用，优先增量披露
+4. 提供可观测调试手段，便于快速定位问题
+
+### 1.2 原则
+
+1. 优先改插件与脚本，不侵入 opencode core
+2. 结构化协议先行（`<rag_state>` + `<rag_context>`）
+3. 去重与增量优先于硬编码“单次限制”
+4. 参数可配置，默认值保守
+
+## 2. 总体架构
+
+### 2.1 模块分层
+
+1. 数据准备层：`script/rag/url-to-text.sh`、`convert-dir-to-text.sh`、`clean-text.py`
+2. 结构化层：`script/rag/structure-text.py`（rule/llamaindex）
+3. 向量索引层：`script/rag/build-vector-index.py` + Qdrant local
+4. 检索层：`script/rag/search-vector-index.py`
+5. 编排层：`script/rag/rag-pipeline.py` + `cmd/rag-init.sh`/`cmd/rag-update.sh`
+6. 交互层：
+   - 自动注入插件：`.opencode/plugins/rag_context.ts`
+   - 手动工具：`.opencode/tool/rag_search.ts`
+   - 共享状态模块：`.opencode/rag.ts`
+
+### 2.2 运行路径
+
+1. 离线/内网数据进入文本化
+2. 文本结构化为 section/chunk/image 节点
+3. embedding 写入 Qdrant（payload 包含 source、section、doc_key 等）
+4. 对话时：插件读取检索结果并注入状态 meta
+5. 长 query 会先执行 rewrite + multi-query retrieval + merge/rerank
+6. 模型必要时再调用 `rag_search(mode=state|delta|brief|expand)` 渐进补证据
+
+## 3. 文档处理与切分策略
+
+### 3.1 当前切分策略
+
+1. 按 Markdown 标题（`#`）拆 section
+2. section 内按固定窗口切 chunk（默认 `chunk_size=1600`, `chunk_overlap=200`）
+3. 图片 OCR 独立为 image node，避免污染正文 chunk
+
+说明：当前不是句法感知切分，`overlap` 用于缓解边界截断，但不能完全消除语义断裂。
+
+### 3.2 结构化与 LLM
+
+1. `structure-text.py` 直接执行时默认 `mode=rule`
+2. `rag-pipeline.py` 默认 `RAG_STRUCT_MODE=llamaindex`
+3. llamaindex 模式下调用 OpenAI 兼容接口做 section summary
+
+## 4. 检索交互协议（RAG-LLM）
+
+### 4.1 注入块
+
+插件当前向用户消息注入一个主逻辑块：
+
+1. `<rag_state>`：检索状态协议（短）
+
+说明：正文证据当前主要通过 `rag_search` 渐进披露，不再由自动注入直接提供。
+
+示例：
+
+```text
+<rag_state>
+status=no_new_evidence
+reason=high_overlap
+cluster=luckfox|zero|烧录
+delta_hits=0
+known_hits=3
+next_action=reuse_known_evidence_or_refine_query
+</rag_state>
+```
+
+### 4.2 status 枚举
+
+1. `new_evidence`
+2. `no_new_evidence`
+3. `weak_match`
+4. `need_refine`
+5. `cluster_throttled`
+6. `retrieval_error`
+7. `state_reset`
+
+### 4.3 reason 典型值
+
+1. `fresh_hits`
+2. `delta_available`
+3. `high_overlap`
+4. `low_score`
+5. `empty_hits`
+6. `cluster_window_limit`
+7. `backend_error`
+8. `parse_error`
+9. `compaction_epoch_changed`
+10. `cached_recent_result`
+
+## 5. 去重、增量与局部限流
+
+### 5.1 Query Cluster
+
+`query_cluster` 为“检索意图簇”，由 query 规范化词项生成（停用词过滤+同义词归一+排序）。
+
+用途：
+
+1. 将近义 query 归为同簇
+2. 对同簇做局部预算与节流
+3. 避免全局限流误伤其他主题
+
+### 5.2 重复检测
+
+1. 命中 fingerprint：`text_file/source + chunk_id/image_id/section`
+2. overlap = 交集 / 当前命中数
+3. `overlap >= RAG_OVERLAP_THRESHOLD` 且无新增时，标记 `no_new_evidence`
+
+### 5.3 增量注入
+
+1. 仅注入“未见过”的 delta hits
+2. 无 delta 时只注入 `<rag_state>`，不重复注入上下文正文
+3. 同 query 的短时间重复触发走缓存复用（`RAG_REUSE_SEC`）
+
+### 5.4 局部限流
+
+1. 仅针对同一 cluster
+2. 时间窗：`RAG_CLUSTER_WINDOW_SEC`
+3. 上限：`RAG_CLUSTER_MAX_FULL`
+4. 超限状态：`cluster_throttled`
+
+## 6. 渐进式披露
+
+`rag_search` 支持模式：
+
+1. `state`：只返回检索状态
+2. `delta`：同 query cluster 仅新增证据（默认）
+3. `brief`：当前命中的短摘要
+4. `expand`：扩展细节（用于二次追问）
+
+策略：
+
+1. 默认由插件持续注入 `rag_state`
+2. 模型需要证据时优先 `delta`
+3. `brief`/`expand` 仅在需要更多正文时使用
+
+## 7. 会话生命周期与 compact
+
+### 7.1 loop 触发
+
+OpenCode loop 每步都会触发 `experimental.chat.messages.transform`，因此插件必须具备状态机去重能力。
+
+### 7.2 compaction 重置
+
+插件实现 `experimental.session.compacting`：
+
+1. session `epoch + 1`
+2. 清空 seen hit 与 cluster 窗口
+3. 标记 `state_reset`
+
+目的：防止 compaction 后继续引用旧上下文状态。
+
+## 8. 配置参数
+
+### 8.1 基础连接
+
+1. `OPENAI_BASE_URL` / `OPENAI_API_KEY`
+2. `RAG_BASE_URL` / `RAG_API_KEY`（覆盖）
+3. `RAG_WORKTREE`
+4. `RAG_DOCLING_PYTHON_BIN`
+5. `RAG_DB_PATH`
+
+### 8.2 检索与注入
+
+1. `RAG_TOP_K`（默认 4）
+2. `RAG_CONTEXT_HITS`（默认 2）
+3. `RAG_CONTEXT_CHARS`（默认 120）
+4. `RAG_EXPAND_CHARS`（默认 420）
+5. `RAG_REWRITE_MODE`（默认 `auto`）
+6. `RAG_REWRITE_MODEL`
+7. `RAG_REWRITE_QUERIES`（默认 3）
+
+### 8.3 控制与阈值
+
+1. `RAG_AUTO_INJECT`（`0` 关闭）
+2. `RAG_OVERLAP_THRESHOLD`（默认 0.8）
+3. `RAG_WEAK_SCORE`（默认 0.42）
+4. `RAG_CLUSTER_WINDOW_SEC`（默认 30）
+5. `RAG_CLUSTER_MAX_FULL`（默认 2）
+6. `RAG_REUSE_SEC`（默认 8）
+
+### 8.4 调试
+
+1. `RAG_DEBUG=1` 或 `RAG_DEBUG_LOG=1`
+2. 日志：`.rag/log/rag_debug.jsonl`
+3. 查看：`script/rag/debug-rag-state.py --tail 100`
+
+## 9. 典型问题与解决方案
+
+### 9.1 问题：循环检索与重复思考
+
+原因：loop 多步触发 + 命中不充分 + 无状态去重。
+
+解决：
+
+1. `query_cluster` 局部限流
+2. overlap 去重
+3. delta 注入
+4. cache reuse
+
+### 9.2 问题：TUI 回显过多
+
+原因：工具多轮调用 + 大块文本注入。
+
+解决：
+
+1. 默认 `brief`
+2. `RAG_CONTEXT_HITS` 降低
+3. 强制“禁止 dump 原始 JSON/rag_context”系统提示
+4. 必要时仅保留 plugin，禁用显式 `rag_search`
+
+### 9.3 问题：手工命令成功但插件失败
+
+常见：worktree 识别为 `/`。
+
+解决：
+
+1. 显式配置 `RAG_WORKTREE`
+2. 显式配置 `RAG_DOCLING_PYTHON_BIN`
+3. 显式配置 `RAG_DB_PATH`
+
+### 9.4 问题：compaction 后行为异常
+
+原因：检索状态与压缩后消息不一致。
+
+解决：
+
+1. 在 `experimental.session.compacting` 事件重置 RAG 状态
+
+## 10. 运维与回归检查清单
+
+1. 检索可用：`search-vector-index.py` 手工命令返回 hits
+2. 集合存在：Qdrant `rag_chunks` 可见
+3. 插件注入：日志出现 `event=inject`
+4. 无新增命中：出现 `status=no_new_evidence`
+5. 局部限流触发：出现 `event=cluster_throttled`
+6. compact 后：出现 `event=state_reset`
+
+## 11. 代码锚点（便于回溯）
+
+1. 自动注入状态机：`.opencode/plugins/rag_context.ts`
+2. 工具渐进披露：`.opencode/tool/rag_search.ts`
+3. 调试脚本：`script/rag/debug-rag-state.py`
+4. 结构化切分：`script/rag/structure-text.py`
+5. 编排入口：`script/rag/rag-pipeline.py`
+
+## 12. 后续可演进方向
+
+1. 语义切分（句法/段落边界）替代纯字符窗口
+2. query cluster 从词法升级到 embedding 聚类
+3. reranker 引入（重排 top-k）
+4. `expand` 模式支持按 `chunk_id` 精确拉取
+5. 将状态机下沉到独立模块，支持单元测试
diff --git a/specs/rag-llm-prompt-protocol.zh.md b/specs/rag-llm-prompt-protocol.zh.md
new file mode 100644
index 00000000000..d5f5e867f20
--- /dev/null
+++ b/specs/rag-llm-prompt-protocol.zh.md
@@ -0,0 +1,309 @@
+# RAG 输出给 LLM 的当前协议
+
+## 1. 范围
+
+这份文档只描述当前代码里真正输出给 LLM 的内容，不描述 debug 日志，也不描述理想设计。
+
+当前协议由三部分组成：
+
+1. 自动注入的 `<rag_state>`
+2. 系统提示里的 RAG 协议说明
+3. `rag_search` 工具定义与工具返回
+
+相关实现文件：
+
+1. `.opencode/rag.ts`
+2. `.opencode/plugins/rag_context.ts`
+3. `.opencode/tool/rag_search.ts`
+
+## 2. 自动注入块
+
+### 2.1 注入位置
+
+`rag_context` 会在 `experimental.chat.messages.transform` 阶段，把 `<rag_state>` 注入到当前最新的 user text 中。
+
+当前默认行为：
+
+1. 自动注入只注入检索 meta
+2. 不自动注入正文 `<rag_context>`
+3. 正文证据主要由 `rag_search` 按需补充
+
+### 2.2 当前字段
+
+当前注入给 LLM 的 `<rag_state>` 字段来自 `.opencode/rag.ts` 的 `stateBlock()`：
+
+```text
+<rag_state>
+status=...
+reason=...
+cluster=...
+total_hits=...
+delta_hits=...
+known_hits=...
+overlap=...
+top_source=...
+top_section=...
+rewrite_queries=...
+next_action=...
+</rag_state>
+```
+
+字段含义：
+
+1. `status`
+   当前 `session + cluster` 最近一次有效检索状态
+2. `reason`
+   对应状态的原因
+3. `cluster`
+   当前 query 归一化后的检索意图簇
+4. `total_hits`
+   当前最近一次检索返回的总命中数
+5. `delta_hits`
+   相对当前 cluster 已知证据，本轮新增命中数
+6. `known_hits`
+   当前 cluster 已记录的累计命中数
+7. `overlap`
+   本轮结果和已知命中的重合比例
+8. `top_source`
+   当前 top hit 的来源 URL
+9. `top_section`
+   当前 top hit 的 section 标题
+10. `rewrite_queries`
+   当前底层检索实际使用的 rewrite query 列表
+11. `next_action`
+   给 LLM 的下一步建议动作
+
+### 2.3 当前不输出给 LLM 的字段
+
+下面这些字段当前只写入 debug 日志，不直接注入给 LLM：
+
+1. `event`
+2. `channel`
+3. `loop`
+4. `used_cache`
+5. `rewrite_mode`
+6. `keywords`
+7. `top_hits`
+8. `delta_fps`
+9. `emitted_context`
+
+因此，LLM 不会直接看到“这一步是 `context_search` 还是 `context_meta`”，也不会直接看到完整 hit 列表。
+
+## 3. 系统提示协议
+
+`rag_context` 还会在 `experimental.chat.system.transform` 中追加 RAG 协议说明。
+
+当前系统提示的核心约束是：
+
+1. 每一步先解析 `<rag_state>`
+2. `rag_context` 只注入 retrieval meta，不注入正文
+3. 如果 `status=new_evidence` 且仍需要事实细节，优先调用 `rag_search mode=delta`
+4. 如果 `status=no_new_evidence`，优先复用当前状态，不要重复检索
+5. 普通问答不要调用 `mode=expand`
+6. 不要直接通过 shell 执行 `script/rag/search-vector-index.py` 做问答检索
+7. 调用 `rag_search` 时，参数必须是合法 JSON
+8. 对于长 query 或噪声 query，优先信任 rewrite 后的检索结果
+
+这部分不是结构化字段，而是对 LLM 的操作协议说明。
+
+## 4. `rag_search` 工具协议
+
+### 4.1 工具入参
+
+当前 `rag_search` 暴露给 LLM 的主要入参是：
+
+1. `query`
+2. `top_k`
+3. `node_type`
+4. `mode`
+
+其中：
+
+1. `query` 是普通字符串
+2. `top_k` 是返回条数
+3. `node_type` 目前主要是 `text` 或 `image`
+4. `mode` 控制渐进式披露层级
+
+### 4.2 工具模式
+
+当前支持的模式：
+
+1. `state`
+2. `delta`
+3. `brief`
+4. `expand`
+
+推荐顺序：
+
+1. `state`
+2. `delta`
+3. `brief`
+4. `expand`
+
+默认约束：
+
+1. 普通 QA 下优先 `delta`
+2. `expand` 默认受限，仅用于调试或显式证据展开
+
+### 4.3 工具返回
+
+`rag_search` 的返回不是原始 JSON，而是给 LLM 的文本协议。
+
+当前工具返回的第一部分始终是：
+
+1. `<rag_state>`
+
+然后按 `mode` 决定是否追加正文：
+
+1. `state`
+   只返回 `<rag_state>`
+2. `delta`
+   返回 `<rag_state>` + 本轮新增命中的短摘要
+3. `brief`
+   返回 `<rag_state>` + 当前命中的短摘要
+4. `expand`
+   返回 `<rag_state>` + 更长文本
+
+### 4.4 摘要格式
+
+`brief` 和 `delta` 当前使用 `.opencode/rag.ts` 里的 `brief()` 生成，格式类似：
+
+```text
+[1] source=... section=... summary=...
+[2] source=... section=... summary=...
+```
+
+`expand` 当前使用 `.opencode/rag.ts` 里的 `expand()`，会给更长的 `score/source/section/text`。
+
+## 5. LLM 实际看到的内容
+
+从 prompt 协议角度看，LLM 当前会看到三类信息：
+
+1. 用户原始问题
+2. 自动注入的 `<rag_state>`
+3. 系统提示里的 RAG 使用规则
+
+如果模型主动调用 `rag_search`，还会额外看到：
+
+1. 工具参数 schema
+2. 工具返回的 `<rag_state>`
+3. 工具返回的摘要或扩展正文
+
+因此当前架构下：
+
+1. 自动注入负责给状态
+2. 工具调用负责给正文
+
+## 6. 当前典型工作流
+
+### 6.1 自动注入阶段
+
+模型先看到：
+
+```text
+用户问题
+
+<rag_state>
+status=new_evidence
+reason=fresh_hits
+cluster=luckfox|文件传输
+total_hits=4
+delta_hits=4
+known_hits=4
+overlap=0.0000
+top_source=https://wiki.luckfox.com/...
+top_section=ADB 传输文件
+rewrite_queries=["Luckfox Pico Zero 文件传输","adb 文件传输"]
+next_action=call_rag_search_delta_if_more_detail_needed
+</rag_state>
+```
+
+这时模型应该先基于状态判断：
+
+1. 是否已有足够信息直接回答
+2. 是否需要调用 `rag_search mode=delta`
+3. 是否应该缩小或改写 query
+
+### 6.2 工具补充阶段
+
+如果模型调用：
+
+```json
+{"query":"Luckfox Pico Zero 文件传输方式","mode":"delta","node_type":"text","top_k":4}
+```
+
+它会看到类似返回：
+
+```text
+<rag_state>
+status=new_evidence
+reason=delta_available
+cluster=luckfox|文件传输方式
+total_hits=4
+delta_hits=2
+known_hits=6
+overlap=0.5000
+top_source=https://wiki.luckfox.com/...
+top_section=ADB 传输文件
+rewrite_queries=["Luckfox Pico Zero 文件传输方式","adb push pull 文件传输"]
+next_action=call_rag_search_delta_if_more_detail_needed
+</rag_state>
+[1] source=https://wiki.luckfox.com/... section=ADB 传输文件 summary=...
+[2] source=https://wiki.luckfox.com/... section=SCP 传输文件 summary=...
+```
+
+这时模型拿到的就不只是状态，还有正文摘要。
+
+## 7. 当前语义边界
+
+### 7.1 `status` 的语义
+
+当前 `<rag_state>.status` 表示：
+
+1. 当前 `session + cluster` 最近一次有效检索结果的状态
+
+它不等价于：
+
+1. “当前这一个 loop step 刚刚重新搜索得到的新状态”
+
+因此，如果当前 step 只是复用了缓存状态，LLM 看到的 `status=new_evidence`，实际语义更接近：
+
+1. 当前 cluster 的已知状态是 `new_evidence`
+
+而不是：
+
+1. 本 step 又重新找到了新证据
+
+### 7.2 `next_action` 的语义
+
+`next_action` 是建议，不是硬约束。
+
+LLM 仍然可以：
+
+1. 直接回答
+2. 选择更具体的 query
+3. 调 `rag_search`
+4. 放弃继续检索
+
+但系统提示已经对推荐行为做了收敛。
+
+## 8. 当前已知限制
+
+1. `event/context_meta/context_search` 只在 debug 日志里，LLM 不可见
+2. LLM 不能直接看到完整命中列表，除非主动调用 `rag_search`
+3. `status` 当前更接近 cluster 持久状态，不是严格的 step 状态
+4. 自动注入与工具调用虽然共享状态，但 query cluster 仍可能因为 agent rewrite 而不同
+
+## 9. 结论
+
+当前真正输出给 LLM 的协议可以概括为：
+
+1. 自动注入 `<rag_state>` 提供检索 meta
+2. 系统提示解释如何使用这些 meta
+3. `rag_search` 提供分层的正文证据披露
+
+因此，当前系统不是“自动把所有 RAG 内容都塞进 prompt”，而是：
+
+1. 先给状态
+2. 再由模型按需索取正文
+
diff --git a/specs/rag-progressive-disclosure.zh.md b/specs/rag-progressive-disclosure.zh.md
new file mode 100644
index 00000000000..0931a86568f
--- /dev/null
+++ b/specs/rag-progressive-disclosure.zh.md
@@ -0,0 +1,365 @@
+# RAG 渐进式披露当前实现说明
+
+## 1. 范围
+
+这份文档描述当前代码里的真实实现，不是理想设计。
+
+当前“渐进式披露”系统由三部分组成：
+
+1. 自动注入：`.opencode/plugins/rag_context.ts`
+2. 显式检索工具：`.opencode/tool/rag_search.ts`
+3. 共享状态与公共逻辑：`.opencode/rag.ts`
+
+底层检索脚本仍然是：
+
+1. `script/rag/search-vector-index.py`
+
+## 2. 当前目标
+
+当前实现要解决的是：
+
+1. 在 ReAct 式 loop 中持续给模型提供检索状态
+2. 不在每一轮 loop 中重复注入相同正文
+3. 把 `rag_context` 和 `rag_search` 统一为同一套渐进式披露系统
+4. 提供可追踪的 JSONL 调试日志
+
+## 3. 当前架构
+
+### 3.1 自动注入链路
+
+`rag_context` 当前只负责注入检索 meta 信息，不再自动注入正文摘要。
+
+它每次在 `experimental.chat.messages.transform` 被调用时会：
+
+1. 找到当前会话里最新的 user text
+2. 去掉旧的 `<rag_state>` / `<rag_context>`
+3. 生成 query cluster
+4. 查询共享状态
+5. 必要时调用底层检索脚本
+6. 只把 `<rag_state>` 注回用户消息
+
+这意味着：
+
+1. 模型在 loop 中每一步都能看到当前的 RAG 状态
+2. 是否继续调 `rag_search`，由模型自己判断
+
+### 3.2 显式工具链路
+
+`rag_search` 当前负责渐进式补充证据。
+
+支持模式：
+
+1. `state`
+2. `delta`
+3. `brief`
+4. `expand`
+
+推荐顺序：
+
+1. `state`
+2. `delta`
+3. `brief`
+4. `expand`
+
+其中：
+
+1. `state` 只返回状态
+2. `delta` 只返回新增证据
+3. `brief` 返回短摘要
+4. `expand` 返回扩展文本，默认受限
+
+### 3.3 共享状态
+
+自动注入和显式工具现在都使用同一个共享状态模块：
+
+1. `.opencode/rag.ts`
+
+共享状态粒度是：
+
+1. `session`
+2. `cluster`
+
+每个 cluster 当前维护的信息包括：
+
+1. `seen`
+2. `window`
+3. `last_query`
+4. `last_status`
+5. `last_reason`
+6. `last_checked`
+7. `total_hits`
+8. `known_hits`
+9. `overlap`
+10. `delta`
+11. `hits`
+12. `top`
+13. `rewrites`
+
+因此当前 `rag_context` 和 `rag_search` 已经不是两套独立状态机，而是同一状态系统的两个入口。
+
+## 4. 自动注入的当前规则
+
+### 4.1 注入内容
+
+自动注入当前只注入：
+
+1. `<rag_state>`
+
+不再自动注入正文 `<rag_context>`。
+
+这样做的目的：
+
+1. 让模型在每一步都能看到检索状态
+2. 把正文披露权交给 `rag_search`
+3. 避免 loop 中重复刷证据文本
+
+### 4.2 何时触发
+
+自动注入不是只在“用户第一次提问”时触发。
+
+当前实现里，只要：
+
+1. `experimental.chat.messages.transform` 被调用
+2. 最新 user text 还存在
+
+插件就会再次运行。
+
+区别在于：
+
+1. 首次进入当前 query 时，通常会实际检索
+2. 后续 loop 更常见的是复用共享状态，只重新注入 `<rag_state>`
+
+### 4.3 缓存与复用
+
+自动注入会优先复用共享状态，条件包括：
+
+1. 同一 user query
+2. 同一 cluster
+3. 在 `RAG_REUSE_SEC` 时间窗内
+4. 或已经进入 assistant loop 阶段
+
+如果命中缓存，插件不会重新检索，而是直接注入当前 cluster 的状态。
+
+### 4.4 局部限流
+
+每个 cluster 单独维护时间窗：
+
+1. `RAG_CLUSTER_WINDOW_SEC`
+2. `RAG_CLUSTER_MAX_FULL`
+
+超过上限后，状态会变成：
+
+1. `cluster_throttled`
+
+## 5. 当前状态机
+
+当前状态枚举：
+
+1. `new_evidence`
+2. `no_new_evidence`
+3. `weak_match`
+4. `need_refine`
+5. `cluster_throttled`
+6. `retrieval_error`
+7. `state_reset`
+
+典型 reason：
+
+1. `fresh_hits`
+2. `delta_available`
+3. `high_overlap`
+4. `low_score`
+5. `empty_hits`
+6. `cluster_window_limit`
+7. `backend_error`
+8. `parse_error`
+9. `cached_recent_result`
+10. `compaction_epoch_changed`
+
+## 6. 什么叫“渐进式披露”
+
+### 6.1 自动注入侧
+
+自动注入侧的渐进式披露体现在：
+
+1. 首轮只建立状态并记录 hits
+2. 后续 loop 主要复用状态
+3. 自动注入不再负责正文披露
+
+换句话说，当前自动注入承担的是：
+
+1. 渐进提供 meta
+
+而不是：
+
+1. 渐进提供正文
+
+### 6.2 工具侧
+
+显式工具侧的渐进式披露体现在：
+
+1. `state` 只给状态
+2. `delta` 只给新增证据
+3. `brief` 给短摘要
+4. `expand` 给更多文本
+
+这才是当前正文证据的主要披露链路。
+
+## 7. Query Cluster
+
+当前 cluster 生成方式：
+
+1. query 小写化
+2. 中英文词项切分
+3. 去停用词
+4. 同义词归一
+5. 排序拼接
+
+作用：
+
+1. 把近义问题归到同一局部检索意图
+2. 支持同 cluster 去重
+3. 支持同 cluster 限流
+
+## 8. 底层检索脚本的当前角色
+
+`search-vector-index.py` 仍然只负责：
+
+1. embedding query
+2. 检索向量库
+3. 返回 hits
+
+当前输出格式支持：
+
+1. `json`
+2. `state`
+3. `brief`
+4. `auto`
+
+当前约束：
+
+1. `rag_context` 强制 `--format json`
+2. `rag_search` 也强制 `--format json`
+3. 只有 shell 直接运行脚本时，`OPENCODE=1` 下默认输出 `state`
+
+这样做是为了：
+
+1. 插件和工具都自己控制披露层级
+2. 终端里不要直接泄漏 hits 正文
+
+### 8.1 当前 rewrite 与 multi-query 检索
+
+当前底层检索脚本已经支持：
+
+1. LLM query rewrite
+2. multi-query retrieval
+3. merge 去重
+4. simple rerank
+
+流程如下：
+
+1. 原始 query 输入
+2. LLM 产出 `queries` 和 `keywords`
+3. 每个 rewrite query 单独向量检索
+4. 多路结果按 fingerprint merge
+5. 用简单规则做 rerank
+6. 输出最终 `top_k`
+
+当前 rerank 不是独立 reranker 模型，而是规则组合：
+
+1. `max_score`
+2. `reciprocal_rank`
+3. `hit_count`
+4. `primary_match`
+
+## 9. 调试日志
+
+### 9.1 日志文件
+
+当前统一日志：
+
+1. `.rag/log/rag_debug.jsonl`
+
+### 9.2 当前记录的链路
+
+现在会同时记录：
+
+1. `rag_context`
+2. `rag_search`
+
+通过字段区分：
+
+1. `channel`
+2. `event`
+
+### 9.3 当前重点字段
+
+当前日志里重点字段包括：
+
+1. `channel`
+2. `event`
+3. `sessionID`
+4. `query`
+5. `cluster`
+6. `mode`
+7. `loop`
+8. `used_cache`
+9. `status`
+10. `reason`
+11. `total_hits`
+12. `delta_hits`
+13. `known_hits`
+14. `overlap`
+15. `rewrites`
+16. `keywords`
+17. `rewrite_mode`
+18. `top_hits`
+19. `delta_fps`
+20. `emitted_context`
+
+### 9.4 当前怎么判断渐进式披露生效
+
+看同一 `sessionID + cluster` 的连续日志：
+
+1. 首次检索：
+   - `status=new_evidence`
+   - `delta_hits>0`
+2. 后续 loop：
+   - `channel=rag_context`
+   - `event=context_meta`
+   - `used_cache=true`
+3. 后续主动补证据：
+   - `channel=rag_search`
+   - `event=tool_search`
+   - `mode=delta|brief|expand`
+
+这说明当前系统是在“先提供状态，再按需补正文”。
+
+## 10. 终端与 TUI 控制
+
+当前实现已经做了三层控制：
+
+1. 检索子进程使用 `.quiet()`
+2. shell 直接跑脚本时默认只输出 `state`
+3. `expand` 默认受限
+
+当前目标不是完全隐藏检索，而是：
+
+1. 不让底层脚本 stdout 直接污染终端
+2. 不让自动注入链路在 loop 中刷大段正文
+
+## 11. 当前限制
+
+1. 自动注入只提供 meta，不提供正文，需要模型自行决定是否调 `rag_search`
+2. 还没有 decomposition
+3. 当前 rerank 还是简单规则，不是专门 reranker 模型
+4. debug 已能看到 top hits 和 delta 指纹，但还没有记录 assistant reasoning 原文
+5. 多模态 embedding 还未接入当前渐进披露链路
+
+## 12. 关键代码锚点
+
+1. 共享状态：`.opencode/rag.ts`
+2. 自动注入：`.opencode/plugins/rag_context.ts`
+3. 渐进检索工具：`.opencode/tool/rag_search.ts`
+4. 底层检索：`script/rag/search-vector-index.py`
+5. 调试查看：`script/rag/debug-rag-state.py`
diff --git a/specs/rag-updates-history.zh.md b/specs/rag-updates-history.zh.md
new file mode 100644
index 00000000000..de35f2ca049
--- /dev/null
+++ b/specs/rag-updates-history.zh.md
@@ -0,0 +1,226 @@
+# RAG Enhance 变更回溯记录
+
+## 1. 目的
+
+这份文档用于记录本分支上 RAG 增强相关的关键演进，方便后续回溯问题来源、定位设计变更和重新部署时核对差异。
+
+## 2. 第一阶段：基础 RAG 流水线落地
+
+这一阶段完成了基础数据链路：
+
+1. 文档转文本
+2. 文本清洗
+3. 结构化输出
+4. embedding 落库
+5. 本地向量检索
+
+主要脚本：
+
+1. `script/rag/url-to-text.sh`
+2. `script/rag/convert-dir-to-text.sh`
+3. `script/rag/clean-text.py`
+4. `script/rag/structure-text.py`
+5. `script/rag/build-vector-index.py`
+6. `script/rag/search-vector-index.py`
+
+## 3. 第二阶段：OpenCode 插件化接入
+
+这一阶段引入了 OpenCode 集成层：
+
+1. 自动注入插件：`.opencode/plugins/rag_context.ts`
+2. 手动工具：`.opencode/tool/rag_search.ts`
+3. skill：`.opencode/skills/rag-pipeline/SKILL.md`
+
+目标是：
+
+1. 让 agent 在对话中可使用本地 RAG
+2. 支持插件迁移到其他项目
+3. 用 `rag-bootstrap.sh` / `install.sh` 完成交付
+
+## 4. 第三阶段：图片 OCR 与结构化关联
+
+这一阶段处理了图片与正文的关联问题：
+
+1. 图片 OCR 从纯追加文本改成与 image node 关联
+2. 结构化输出中保留 image metadata
+3. 向量检索命中正文时，可挂出 `related_images`
+
+目标是：
+
+1. 不直接污染正文 section
+2. 在命中 chunk 时仍然能关联图片信息
+
+## 5. 第四阶段：初版渐进式披露
+
+这一阶段第一次引入：
+
+1. `<rag_state>`
+2. `<rag_context>`
+3. overlap 去重
+4. cluster 局部限流
+5. debug 日志
+
+初版实现特点：
+
+1. 自动注入会注入状态和正文摘要
+2. `rag_search` 自己维护一套独立状态
+3. debug 主要看状态，证据可见性较弱
+
+当时解决的问题：
+
+1. 检索循环
+2. 重复注入
+3. context 窗口浪费
+
+## 6. 第五阶段：终端/TUI 回显治理
+
+这一阶段重点修了“检索输出污染终端/TUI”的问题。
+
+核心修复：
+
+1. `rag_search.ts` 和 `rag_context.ts` 调检索脚本时补 `.quiet()`
+2. 两条链路都强制 `search-vector-index.py --format json`
+3. `search-vector-index.py` 在 `OPENCODE=1` 下默认只输出 `state`
+4. `rag_search expand` 默认拦截
+
+目标是：
+
+1. 检索子进程不再把 stdout 直接打印到终端
+2. 工具链路不再因为 parse fail 回退成整段文本回显
+
+## 7. 第六阶段：非法 JSON tool args 缓解
+
+这一阶段修复了模型调用 `rag_search` 时偶发生成坏 JSON 的问题。
+
+核心修复：
+
+1. 在 `tool.definition` 中补充合法/非法 JSON 示例
+2. 在 system prompt 中明确要求 `query` 必须是单个普通字符串
+
+目标是：
+
+1. 降低模型把 query 引号拼坏的概率
+
+注意：
+
+1. 这类问题是模型生成错误，无法 100% 从代码层彻底消除
+
+## 8. 第七阶段：共享状态统一
+
+这一阶段把 `rag_context` 和 `rag_search` 统一进同一套共享状态系统。
+
+新增文件：
+
+1. `.opencode/rag.ts`
+
+统一后：
+
+1. 两条链路共享 session/cluster 状态
+2. 共享 `seen`
+3. 共享 `total_hits / known_hits / overlap`
+4. 共享 `top_hits`
+5. 共享 `rewrites`
+
+这一阶段的设计变化很关键：
+
+1. `rag_context` 不再自动注入正文，只注入检索 meta
+2. `rag_search` 成为正文证据的渐进式补充入口
+
+## 9. 第八阶段：ReAct loop 对齐
+
+这一阶段是为适配 OpenCode 的 ReAct 式 loop。
+
+变化点：
+
+1. `rag_context` 不再只在“第一次用户提问前”工作
+2. 在 loop 中也会再次运行
+3. 但后续更常见的是复用缓存状态，只重复注入 `<rag_state>`
+
+目标是：
+
+1. 在推理过程中让模型持续看到当前检索状态
+2. 由模型自行决定是否继续调用 `rag_search`
+
+## 10. 第九阶段：debug 日志增强
+
+这一阶段把 debug 从“状态日志”增强成“过程日志”。
+
+现在统一记录到：
+
+1. `.rag/log/rag_debug.jsonl`
+
+日志覆盖：
+
+1. `rag_context`
+2. `rag_search`
+
+主要新增字段：
+
+1. `channel`
+2. `mode`
+3. `loop`
+4. `used_cache`
+5. `top_hits`
+6. `delta_fps`
+7. `rewrites`
+8. `emitted_context`
+
+目的：
+
+1. 可追踪每一次状态注入
+2. 可追踪每一次显式检索
+3. 可回溯当前 cluster 的命中情况
+
+## 11. 第十阶段：query rewrite 与 multi-query retrieval
+
+这一阶段在底层检索脚本里加入了：
+
+1. LLM query rewrite
+2. 多 query 独立召回
+3. merge 去重
+4. simple rerank
+
+当前实现方式：
+
+1. LLM 输出 `queries` 和 `keywords`
+2. 每个 query 单独做 embedding 检索
+3. 按 chunk fingerprint 合并候选
+4. 结合 `max_score / reciprocal_rank / hit_count / primary_match` 做重排
+
+目标是：
+
+1. 降低长 query 的语义噪声
+2. 提高多视角召回能力
+3. 给后续 decomposition 留出接口
+
+## 12. 当前结论
+
+到当前版本为止，系统已经形成了下面的职责分离：
+
+1. `rag_context`
+   - 持续注入 RAG meta
+   - 在 loop 中复用共享状态
+   - 不主动注入正文
+
+2. `rag_search`
+   - 按 `state -> delta -> brief -> expand` 渐进补证据
+   - 与自动注入共享同一状态
+
+3. `debug`
+   - 统一记录自动注入与显式检索
+   - 便于后续对 query、cluster、命中和状态做回放
+
+## 13. 仍未完成的方向
+
+当前明确还没有完成的方向：
+
+1. decomposition
+2. 专门 reranker
+4. assistant reasoning 原文级别的日志追踪
+5. 多模态 embedding 接入当前渐进式披露系统
+
+## 14. 对应文档
+
+1. 当前实现说明：`specs/rag-progressive-disclosure.zh.md`
+2. 总体架构：`specs/rag-enhance-architecture.zh.md`
+3. 本回溯文档：`specs/rag-updates-history.zh.md`