diff --git a/.claude/skills/.gitignore b/.claude/skills/.gitignore index 229f4495ee3..2dd55eba801 100644 --- a/.claude/skills/.gitignore +++ b/.claude/skills/.gitignore @@ -8,3 +8,5 @@ !test/** !btrace-perfetto/ !btrace-perfetto/** +!check-code-attribution/ +!check-code-attribution/** diff --git a/.claude/skills/check-code-attribution/SKILL.md b/.claude/skills/check-code-attribution/SKILL.md new file mode 100644 index 00000000000..82e31bc2683 --- /dev/null +++ b/.claude/skills/check-code-attribution/SKILL.md @@ -0,0 +1,295 @@ +--- +name: check-code-attribution +description: Per-file check of vendored code attribution in the current branch diff, including license headers, THIRD_PARTY_NOTICES.md entries, and compatibility with Sentry's licensing policy +allowed-tools: Bash Read Grep Glob +--- + +**Maintainers:** Only edit files in `.claude/skills/check-code-attribution` (the committed file) and run `npx @sentry/dotagents sync` from the command line to automatically update the matching files in `.agents/skills/check-code-attribution`. + +# Check Code Attribution + +You are reviewing changed files for third-party code attribution compliance in **sentry-java**, an MIT-licensed repository. + +## Local runs — discover changed files first + +When running locally (not via Warden), determine which files changed on this branch: + +```bash +MB=$(git merge-base HEAD origin/main 2>/dev/null || git merge-base HEAD main) +git diff --name-only "${MB}"..HEAD +``` + +Then run the Quick triage and subsequent checks on **every** file in that list. Warden's `ignorePaths` in `warden.toml` lists the paths to skip — apply the same exclusions locally. + +### Warden CLI (optional local parity check) + +Warden does **not** use Cursor auth. Before running Warden locally, configure a provider (same model family as `warden.toml`, or override with `-m`): + +```bash +# Option A: Anthropic API key (matches CI model in warden.toml) +export WARDEN_ANTHROPIC_API_KEY=sk-ant-... # or: export ANTHROPIC_API_KEY=sk-ant-... + +# Option B: Pi OAuth / API key store (~/.pi/agent/auth.json) +npx pi # then run /login and pick Anthropic (or another provider) + +# Option C: Different provider for a one-off run +export WARDEN_OPENAI_API_KEY=sk-... +npx @sentry/warden origin/main..HEAD --skill check-code-attribution -m openai/gpt-5.5 -vv +``` + +```bash +npx @sentry/warden origin/main..HEAD --skill check-code-attribution -vv +``` + +If you only need attribution review in the IDE, `/check-code-attribution` in Cursor does not require Warden credentials. + +When running via Warden, the changed file is already provided — skip branch-wide discovery, but follow **Warden execution** below. + +## Warden execution + +Warden analyzes one changed file per run (whole-file mode). Complete every Quick triage step — the diff alone is not sufficient. + +**Mandatory on every run (do not skip):** + +1. `Read` the first 50 lines of the changed file. +2. `Grep` `THIRD_PARTY_NOTICES.md` for the class name (filename without extension, e.g. `ANRWatchDog` for `ANRWatchDog.java`). On renames, also grep the old basename and read Scope sections (see Quick triage). +3. When Bash is available, compare the merge-base header: + ```bash + MB=$(git merge-base HEAD origin/main 2>/dev/null || git merge-base HEAD main) + git show "${MB}:" | head -50 + ``` + +**Do not dismiss findings because:** + +- A `THIRD_PARTY_NOTICES.md` entry exists — file headers are still required; NOTICES does not replace them. +- The diff only removes a header comment block — if removed `-` lines include a **required field** (see below) or vendoring language ("adapted from", etc.), attribution was stripped. Removing boilerplate alone is not stripping. +- The header says "Adapted from …" but omits copyright holder or license name — flag missing header fields. +- The file header has all four required fields — a missing THIRD_PARTY_NOTICES.md entry is independently required and is ⚠️ medium regardless of header completeness. + +For `THIRD_PARTY_NOTICES.md` runs: for every **removed** entry in the diff, use `Read` or `Glob` to confirm whether Scope files still exist with attribution headers. If they do, the entry must not be removed. + +## Quick triage + +Sentry's own files carry **no** copyright headers — any copyright/license line indicates third-party code. Every file that reaches this skill is in scope — do not skip files based on extension. + +If this file is `THIRD_PARTY_NOTICES.md`, go to the THIRD_PARTY_NOTICES section below. + +For all other files, perform these checks **before** deciding whether to proceed: + +1. **Read the file header** — use the Read tool to read the first 50 lines of the file. Look for vendored-code signals: `Copyright`, `Licensed under`, `SPDX-License-Identifier`, or vendoring language ("adapted from", "backported from", "based on", "copied from", "derived from", "inspired by", "ported from", "translated from", "vendored"). +2. **Check THIRD_PARTY_NOTICES.md** — use Grep to search `THIRD_PARTY_NOTICES.md` for the file name without extension (e.g., search for `ANRWatchDog` when reviewing `ANRWatchDog.java`). A match means this is a known vendored file. **Renames:** if the diff is a rename (`similarity index` / `rename from` in the diff, or a delete of one path and add of another with the same content), also Grep for the **old** basename and read **Scope** sections in matching entries — NOTICES may still reference the previous class or path name. + > **A complete NOTICES entry does NOT end the check.** It confirms the file is vendored and that the NOTICES requirement is satisfied. The file header is a separate, additional requirement — continue to header verification regardless of NOTICES completeness. +3. **Scan the diff** — check for vendored-code signals on both added (`+`) and **removed (`-`)** lines. Removed lines that drop a **required field** (copyright, license name, source URL, vendoring origin) ARE signals. Removed disclaimer/boilerplate lines alone are not. + +**A signal in ANY of these three sources means this is vendored code — proceed to the vendored source file section.** + +A file referenced in THIRD_PARTY_NOTICES.md is ALWAYS vendored, even if its current header has no attribution. + +**If none of the three sources have signals, report no findings and stop.** + +--- + +## If this file is `THIRD_PARTY_NOTICES.md` + +Validate the changed entries using the diff context: + +1. For each added or modified entry, verify it has all required fields: **Source URL**, **License name**, **Copyright**, **Scope** (file paths), and **full license text** in a fenced code block. +2. For each Scope path, verify the file(s) exist (use Glob or Read). +3. Flag new license types using the same license-tier table as for source files: weak copyleft (LGPL, MPL, EPL) → 🚨 **high**, strong copyleft (GPL) → 🚨 **high**, AGPL → 🚨 **high** (absolute ban, must be removed). Do not use low or medium for copyleft or AGPL. +4. Flag orphaned entries whose Scope files no longer exist. +5. For **removed** entries (lines prefixed with `-` in the diff), use Read to check whether the Scope files still exist and still have attribution headers. If they do, the entry must not be removed. +6. Check **copyright consistency** — the Copyright field must match the copyright line inside the embedded license text. Flag mismatches. + +--- + +## If this is a vendored file + +### 1. Check attribution header + +Check each of the following by reading the file header — not NOTICES. Each is an independent yes/no; a "no" is ⚠️ medium regardless of NOTICES completeness: + +- [ ] **Vendoring origin phrase** — explicit wording such as `Adapted from …`, `Based on …`, `Vendored from …`, or a library name. +- [ ] **Copyright line** — e.g. `Copyright (c) 2016 …`, `Copyright 2010 Square, Inc.` +- [ ] **License name** — e.g. `Licensed under the Apache License, Version 2.0`, `The MIT License` +- [ ] **Source URL** — e.g. `https://github.com/…` + +Exact wording and comment style may vary. **Do not flag** missing or changed content that is not one of these four fields. + +**Each field must be physically present in the file header. A complete `THIRD_PARTY_NOTICES.md` entry does not satisfy any required field — both are independently required. Check each of the four fields by reading the file header, not by reasoning from NOTICES.** + +**Not required in the file header** (full text belongs in `THIRD_PARTY_NOTICES.md`, not in every source file): + +- Full license boilerplate (MIT permission paragraph, Apache "Unless required by applicable law…" disclaimer, ASF contributor grant preamble) +- Wording differences vs the NOTICES embedded license text (e.g. shortened Apache header vs canonical ASF phrasing) +- Comment style (`//` vs `/* */`), line wrapping, or extra Sentry modification notes + +Compare the current header against the NOTICES entry **only for the four required fields** — e.g. if NOTICES says MIT by "Salomon BRYS" but the header has no copyright or license name, flag it. If both have copyright + license name but the header omits the Apache disclaimer while NOTICES still has the full text, **do not flag**. + +When Bash is available (local runs), also compare against the merge-base version for additional context: +```bash +MB=$(git merge-base HEAD origin/main 2>/dev/null || git merge-base HEAD main) +git show "${MB}:" | head -50 +``` + +Flag these issues: +- **Header stripped** — file is in NOTICES but current header has none of the four required fields +- **Header truncated** — one or more **required** fields were removed (e.g. copyright line or `Licensed under …` removed) while the file remains vendored +- **Header inconsistent** — a **required** field contradicts NOTICES (wrong copyright holder/year, wrong license name) — not boilerplate or phrasing differences +- **Diff removes required attribution** — removed `-` lines drop a required field or vendoring origin (`Adapted from`, etc.); removing disclaimer/boilerplate lines alone is **not** this + +**Do not report** (no finding — prefer silence): + +- Apache/MIT disclaimer or permission paragraphs removed but all four required fields remain +- Header reworded to a shorter permissive-license form with the same copyright holder and license name +- Header and NOTICES differ only in full license body text (wording or boilerplate, not missing required fields) + +These exceptions apply only when an entry already exists in NOTICES and only to header-vs-NOTICES wording differences. A **missing** NOTICES entry is ⚠️ medium per section 2 — never covered by these exceptions. + +### 2. Check THIRD_PARTY_NOTICES.md entry + +**Severity: always `medium`. Do not output `severity: "low"` for a missing entry even if the attribution header is complete.** + +`THIRD_PARTY_NOTICES.md` is a mandatory legal exhibit that Sentry ships with every SDK distribution. It must enumerate all vendored code regardless of what the source file header says. A missing entry is a distribution-level compliance failure, not a nit. A complete file header does not satisfy the NOTICES requirement — both are mandatory. + +From the Grep in Quick triage: if no matching entry exists, output `severity: "medium"` and flag as ⚠️ Missing THIRD_PARTY_NOTICES.md entry. A valid entry needs: Source URL, License name, Copyright, Scope, full license text. + +### 3. Check license compatibility + +Classify the license per Sentry's Open Source Legal Policy (https://open.sentry.io/licensing/): + +| Tier | Examples | Finding | +|-----------------|-------------------------------------------------|---------------------------------------------| +| Permissive | MIT, BSD, Apache 2.0, ISC, CC0, Unlicense, Zlib | None — license is compatible | +| Weak copyleft | LGPL, MPL, EPL, CDDL | 🚨 **high** — requires review | +| Strong copyleft | GPL, QPL, Sleepycat, OSL | 🚨 **high** — requires legal review | +| AGPL | — | 🚨 **high** — absolute ban, must be removed | +| No license | — | 🚨 **high** — assume no permission | + +**Permissive licenses:** do not report a finding solely because the license is MIT/BSD/Apache/etc. Only flag missing or stripped **required** header fields, or missing/inconsistent `THIRD_PARTY_NOTICES.md` entry. Do not flag disclaimer/boilerplate-only diffs. Copyleft and unlicensed code still get 🚨 findings per the table. + +--- + +## If this is a deleted vendored file + +If the diff deletes a file and the removed lines contained attribution headers, check whether `THIRD_PARTY_NOTICES.md` still references it — the entry should be updated or removed. + +--- + +## Severity guide + +| Level | Use for | +|------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **high** | 🚨 License violations: AGPL, copyleft, unlicensed, no-license code | +| **medium** | ⚠️ Missing **required** header fields, stripped required fields, missing/inconsistent NOTICES entries (even when header is complete), deleted/renamed vendored files needing NOTICES update | +| **low** | 👀 Cosmetic/style differences only (shortened license wording, comment style). **Never** use for a missing NOTICES entry or missing header field — those are always medium. | + +Warden relies on these severity levels when deciding whether to comment on PRs or require changes. Put the severity emoji **only on the finding title** (see Output) so reviewers can triage at a glance. + +## Output + +**No issues → empty response (say nothing).** + +Otherwise, report each finding ordered by severity (most severe first). + +### Emoji placement (required) + +Use the emoji from the severity guide (🚨, ⚠️, or 👀) — not the word `high`, `medium`, or `low`. + +| Field | Emoji? | Example | +|-------------------|--------------------------|----------------------------------------------------------------------------------------------------------------------------------------| +| **Title** | Yes — once, at the start | `⚠️ Copyright line stripped from vendored file header` | +| **Description** | **No** | `**io.sentry.cache.tape.FileObjectQueue** — The Copyright (C) 2010 Square, Inc. line was removed…` (see **Description subject** below) | +| **Verification** | **No** | Evidence steps only | +| **Suggested fix** | **No** | Fix text only | + +**Good (Warden PR comment):** + +``` +Title: ⚠️ Copyright line stripped from vendored file header +Description: **io.sentry.cache.tape.FileObjectQueue** — The `Copyright (C) 2010 Square, Inc.` line was removed from this vendored file's header. Please restore the copyright line. +``` + +**Bad — emoji in the description (never do this):** + +``` +Title: ⚠️ Copyright line stripped from vendored file header +Description: ⚠️ The `Copyright (C) 2010 Square, Inc.` line was removed… +``` + +**Bad — emoji before the class name:** + +``` +Title: ⚠️ Copyright line stripped from vendored file header +Description: ⚠️ **io.sentry.cache.tape.FileObjectQueue** — The copyright line was removed… +``` + +### Description subject (required) + +Every description **must** start with `**** —` (bold subject, space, em dash, space). Pick **one** subject by file type: + +| File type | Subject format | Example | +|-------------------------------------------------------------------------------------------|----------------------------------------------------------------------|----------------------------------------------------------------| +| Java / Kotlin source (`.java`, `.kt`) with a top-level type | Fully qualified class name (FQCN) | `**io.sentry.CircularFifoQueue** —` | +| Java / Kotlin with no single clear type (multiple top-level types, unclear which changed) | FQCN of the primary type under review, or repo-relative path if none | `**sentry/src/.../Foo.kt** —` | +| `THIRD_PARTY_NOTICES.md` | `THIRD_PARTY_NOTICES.md — ` | `**THIRD_PARTY_NOTICES.md — Square — Seismic (Apache 2.0)** —` | +| Gradle / other scripts (e.g. `.kts`, `.gradle`) | Repo-relative path from repository root | `**build.gradle.kts** —` | + +- Prefer **FQCN** for `.java` / `.kt` vendored source (derive from `package` + primary public top-level class). Do not use file paths when a FQCN is clear. +- For license-tier / policy issues, include https://open.sentry.io/licensing/ in the description body. + +### Warden runs + +For each finding, set these fields exactly: + +| Field | Value | +|------------------|-------------------------------------------------------------------------------------------------------------------| +| **severity** | `high`, `medium`, or `low` — **never** put emoji here; Warden maps severity from this field, not from the title | +| **title** | ` ` — emoji allowed **only** here (imperative, no class name) | +| **description** | `**** — ` — **plain text only**; subject per **Description subject** above | +| **verification** | Optional evidence steps — plain text only | + +**Description rules (Warden):** + +- **Must** match `**** — …` using the table in **Description subject**. +- **Must not** contain 🚨, ⚠️, 👀, or the words `high`, `medium`, or `low` as severity labels. +- **Must not** repeat the title or paraphrase it with an emoji prefix. + +**Good (NOTICES entry removed while scope files remain):** + +``` +Title: ⚠️ NOTICES entry removed for vendored code still in tree +Description: **THIRD_PARTY_NOTICES.md — Square — Seismic (Apache 2.0)** — The Seismic entry was removed but `io.sentry.android.core.SentryShakeDetector` still has an attribution header. Restore the entry or remove attribution from the scope files. +``` + +**Before submitting findings:** For every finding, confirm `description` does not match `[🚨⚠️👀]` and matches `^\*\*.+\*\* — `. If it contains any emoji, rewrite the description without it. + +### Local / IDE runs + +Use this numbered format — same title vs description split as above: + +``` +1\. **** + **** — + +2\. **** + **** — +``` + +Rules: + +- Put the severity emoji **only** on the title line (`1\. ⚠️ **…**`), never on the description line. +- The description line uses `**** —` per **Description subject** and must not contain 🚨, ⚠️, or 👀. +- **Escape the period** after the number (`1\.` not `1.`) so markdown does not collapse entries into a tight list. +- Leave an empty line between each numbered finding. + +## Validation (maintainers) + +Test samples live under `validation-tests/` and are excluded from normal runs via `.claude/**` in `warden.toml`. + +```bash +.claude/skills/check-code-attribution/validation-tests/check-code-attribution-tests.sh +``` + +Expected outcomes are in `validation-tests/EXPECTED.json`. The script creates isolated git worktrees, runs Warden with `--report-on medium --json`, and asserts per-scenario pass/fail. Scenarios marked `"isolated": true` in `EXPECTED.json` each get their own worktree to avoid Anthropic prompt-cache priming that can suppress findings below medium in concurrent batches. Exit 0 = all pass. + +When manually reviewing a file under `validation-tests/scenarios/`, grep `validation-tests/THIRD_PARTY_NOTICES.catalog.md` in addition to root `THIRD_PARTY_NOTICES.md` in Quick triage step 2. See `validation-tests/README.md`. diff --git a/.claude/skills/check-code-attribution/validation-tests/EXPECTED.json b/.claude/skills/check-code-attribution/validation-tests/EXPECTED.json new file mode 100644 index 00000000000..a82637b84e2 --- /dev/null +++ b/.claude/skills/check-code-attribution/validation-tests/EXPECTED.json @@ -0,0 +1,53 @@ +[ + { + "id": "header-complete-and-notice-present", + "file": "HeaderCompleteAndNoticePresent.java", + "expectFinding": false, + "notes": "Header matches catalog entry" + }, + { + "id": "header-complete-but-notice-missing", + "file": "HeaderCompleteButNoticeMissing.java", + "expectFinding": true, + "isolated": true, + "notes": "Full header; no catalog / root NOTICES entry. Isolated: prompt-cache priming in a concurrent batch suppresses the missing-NOTICES finding below medium." + }, + { + "id": "header-missing-but-notice-present", + "file": "HeaderMissingButNoticePresent.java", + "expectFinding": true, + "isolated": true, + "notes": "NOTICES entry claims file is vendored but file has no attribution header. Isolated: a complete NOTICES entry suppresses the missing-header finding in a concurrent batch." + }, + { + "id": "header-fully-stripped", + "file": "HeaderFullyStripped.java", + "expectFinding": true, + "notes": "Header has no required attribution fields" + }, + { + "id": "header-partially-stripped", + "file": "HeaderPartiallyStripped.java", + "expectFinding": true, + "notes": "Adapted from + URL only; no copyright or license name" + }, + { + "id": "header-missing-non-essential-info", + "file": "HeaderMissingNonEssentialInfo.java", + "expectFinding": false, + "notes": "All four required fields present; no license boilerplate — boilerplate is not required in the header" + }, + { + "id": "header-vs-notice-mismatch", + "file": "THIRD_PARTY_NOTICES.md", + "expectFinding": true, + "isolated": true, + "notes": "Copyright in metadata field does not match embedded license text. Isolated: mismatch finding needs an independent assertion free of interference from other NOTICES changes." + }, + { + "id": "new-license-type", + "file": "NewLicenseType.java", + "expectFinding": true, + "notes": "AGPL v3 license in file header — absolute ban, must be removed" + } +] diff --git a/.claude/skills/check-code-attribution/validation-tests/README.md b/.claude/skills/check-code-attribution/validation-tests/README.md new file mode 100644 index 00000000000..d1217dcb0c3 --- /dev/null +++ b/.claude/skills/check-code-attribution/validation-tests/README.md @@ -0,0 +1,41 @@ +# Attribution skill validation tests + +Self-contained samples for validating `check-code-attribution` without touching production SDK sources. + + +## Run the tests + +```bash +./check-code-attribution-tests.sh +``` + +Requires Node.js and an Anthropic API key (`WARDEN_ANTHROPIC_API_KEY` or `ANTHROPIC_API_KEY`). See SKILL.md "Warden CLI" section for all auth options. + +In practice, straight command line runs tend to be a bit flakier than asking Claude Code to run the tests for you. + +## Layout + +- `EXPECTED.json` — scenario IDs and expected outcomes (single source of truth). +- `THIRD_PARTY_NOTICES.catalog.md` — NOTICES-style entries for validation class names. +- `scenarios/` — `.java` files and `THIRD_PARTY_NOTICES.mismatch-snippet.md` (copyright-mismatch fixture). +- `check-code-attribution-tests.sh` — runs Warden on a temp branch and asserts per-scenario pass/fail. +- `assert-scenarios.mjs` — validation driver (`list-isolated`, `routing-set`, `assert` subcommands); parses Warden JSONL and checks outcomes from `EXPECTED.json`. + +### assert-scenarios.mjs commands + +```bash +node assert-scenarios.mjs validate EXPECTED.json scenarios/ # pre-flight (no API); run automatically by the shell script +node assert-scenarios.mjs list-isolated EXPECTED.json # idfile per isolated scenario +node assert-scenarios.mjs list-main-java EXPECTED.json scenarios/ # .java files for the main Warden batch +node assert-scenarios.mjs routing-set routing.json # update id → Warden JSONL path +node assert-scenarios.mjs assert EXPECTED.json routing.json +``` + +Warden runs are limited to 300s. On macOS the script uses `gtimeout` (from `brew install coreutils`) when available, otherwise GNU `timeout`, otherwise `perl` with `alarm`. + +## Add a scenario + +1. Add `scenarios/.java`. +2. Add or omit a catalog entry in `THIRD_PARTY_NOTICES.catalog.md`. +3. Add an entry to `EXPECTED.json`. +4. **Isolation (if needed):** If the scenario relies on a finding that could be suppressed by Anthropic prompt-cache priming when analyzed alongside many other files (e.g. a missing-NOTICES entry, or a missing header on a file that has a complete NOTICES entry), add `"isolated": true` to its `EXPECTED.json` entry. The test script creates a dedicated worktree for each isolated scenario automatically — no changes to the script itself are needed. diff --git a/.claude/skills/check-code-attribution/validation-tests/THIRD_PARTY_NOTICES.catalog.md b/.claude/skills/check-code-attribution/validation-tests/THIRD_PARTY_NOTICES.catalog.md new file mode 100644 index 00000000000..0b9a9af364b --- /dev/null +++ b/.claude/skills/check-code-attribution/validation-tests/THIRD_PARTY_NOTICES.catalog.md @@ -0,0 +1,130 @@ +# Test THIRD_PARTY_NOTICES catalog (not shipped) + +Used only when validating `check-code-attribution` against `validation-tests/scenarios/**`. +Grep this file in addition to the repository root `THIRD_PARTY_NOTICES.md`. + +--- + +## Example — HeaderFullyStripped (MIT) + +**Source:** https://github.com/example/attribution-fixtures
+**License:** MIT License
+**Copyright:** Copyright (c) 2016 Example Author + +### Scope + +Attribution validation sample. The code resides in `io.sentry.skills.verification.HeaderFullyStripped` (`validation-tests/scenarios/HeaderFullyStripped.java`). + +``` +MIT License + +Copyright (c) 2016 Example Author + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +``` + +--- + +## Example — HeaderMissingButNoticePresent (Apache 2.0) + +**Source:** https://github.com/example/notices-without-header
+**License:** Apache License 2.0
+**Copyright:** Copyright 2023 Example Corp. + +### Scope + +Attribution validation sample. The code resides in `io.sentry.skills.verification.HeaderMissingButNoticePresent`. + +``` +Copyright 2023 Example Corp. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +``` + +--- + +## Example — HeaderMissingNonEssentialInfo (MIT) + +**Source:** https://github.com/example/examplelib
+**License:** MIT License
+**Copyright:** Copyright 2020 Example Corp. + +### Scope + +Attribution validation sample. The code resides in `io.sentry.skills.verification.HeaderMissingNonEssentialInfo`. + +``` +MIT License + +Copyright (c) 2020 Example Corp. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +``` + +--- + +## Example — HeaderCompleteAndNoticePresent (Apache 2.0) + +**Source:** https://github.com/example/complete-with-notices
+**License:** Apache License 2.0
+**Copyright:** Copyright 2020 Example Authors + +### Scope + +Attribution validation sample. The code resides in `io.sentry.skills.verification.HeaderCompleteAndNoticePresent`. + +``` +Copyright 2020 Example Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +``` diff --git a/.claude/skills/check-code-attribution/validation-tests/assert-scenarios.mjs b/.claude/skills/check-code-attribution/validation-tests/assert-scenarios.mjs new file mode 100755 index 00000000000..118d99f10d7 --- /dev/null +++ b/.claude/skills/check-code-attribution/validation-tests/assert-scenarios.mjs @@ -0,0 +1,377 @@ +#!/usr/bin/env node +/** + * Validation driver for check-code-attribution scenario tests. + * + * Usage: + * node assert-scenarios.mjs validate + * node assert-scenarios.mjs list-isolated + * node assert-scenarios.mjs list-main-java + * node assert-scenarios.mjs routing-set + * node assert-scenarios.mjs assert + * + * routing.json maps scenario id to Warden JSONL output path, e.g. { "main": "/tmp/..." }. + * Non-isolated scenarios use the "main" entry when no dedicated id is present. + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import { pathToFileURL } from 'node:url'; + +const ISOLATED_FILE_JAVA = /\.java$/i; +const ISOLATED_FILE_NOTICES = 'THIRD_PARTY_NOTICES.md'; + +export function loadExpected(expectedPath) { + return JSON.parse(fs.readFileSync(expectedPath, 'utf8')); +} + +export function listIsolated(scenarios) { + return scenarios.filter((s) => s.isolated); +} + +/** Repo-relative path normalization for Warden JSONL matching. */ +export function normalizeRepoPath(filePath) { + if (!filePath) return filePath; + return filePath.replace(/\\/g, '/').replace(/^\.\//, '').replace(/\/+/g, '/'); +} + +/** True when a Warden-reported path refers to the expected scenario file. */ +export function pathMatchesWardenFile(reportedPath, wardenFile) { + const reported = normalizeRepoPath(reportedPath); + const expected = normalizeRepoPath(wardenFile); + if (reported === expected) return true; + const base = expected.split('/').pop(); + return base != null && reported.endsWith(`/${base}`); +} + +export function findingCountForFile(fileMap, wardenFile) { + const expected = normalizeRepoPath(wardenFile); + if (fileMap[expected] != null) return fileMap[expected]; + for (const [key, count] of Object.entries(fileMap)) { + if (pathMatchesWardenFile(key, wardenFile)) return count; + } + return 0; +} + +export function findingsForFile(findings, wardenFile) { + return findings.filter( + (f) => f.location && pathMatchesWardenFile(f.location.path, wardenFile), + ); +} + +export function listMainBatchJava(scenarios, scenariosDir) { + const isolatedJava = new Set( + listIsolated(scenarios) + .map((s) => s.file) + .filter((file) => ISOLATED_FILE_JAVA.test(file)), + ); + return fs + .readdirSync(scenariosDir) + .filter((name) => name.endsWith('.java') && !isolatedJava.has(name)) + .sort(); +} + +/** + * @returns {string[]} validation error messages (empty = ok) + */ +export function validateExpected(scenarios, scenariosDir) { + const errors = []; + + if (!Array.isArray(scenarios)) { + return ['EXPECTED.json must be a JSON array']; + } + + const ids = new Set(); + const expectedJava = new Set(); + + for (const [index, s] of scenarios.entries()) { + const label = `entry ${index}`; + if (!s || typeof s !== 'object') { + errors.push(`${label}: must be an object`); + continue; + } + if (typeof s.id !== 'string' || !s.id) { + errors.push(`${label}: missing or empty "id"`); + } else { + if (ids.has(s.id)) errors.push(`duplicate id "${s.id}"`); + ids.add(s.id); + if (s.id === 'main') { + errors.push(`id "main" is reserved for routing.json`); + } + } + if (typeof s.file !== 'string' || !s.file) { + errors.push(`${label}: missing or empty "file"`); + } else if (ISOLATED_FILE_JAVA.test(s.file)) { + expectedJava.add(s.file); + const onDisk = path.join(scenariosDir, s.file); + if (!fs.existsSync(onDisk)) { + errors.push(`${s.id}: scenarios/${s.file} does not exist`); + } + } else if (s.file !== ISOLATED_FILE_NOTICES) { + errors.push( + `${s.id}: unsupported file "${s.file}" (use *.java or ${ISOLATED_FILE_NOTICES})`, + ); + } + if (typeof s.expectFinding !== 'boolean') { + errors.push(`${s.id ?? label}: "expectFinding" must be a boolean`); + } + if (s.isolated) { + if ( + !ISOLATED_FILE_JAVA.test(s.file) && + s.file !== ISOLATED_FILE_NOTICES + ) { + errors.push( + `${s.id}: isolated scenarios must use *.java or ${ISOLATED_FILE_NOTICES}`, + ); + } + } + } + + let diskJava = []; + try { + diskJava = fs.readdirSync(scenariosDir).filter((n) => n.endsWith('.java')); + } catch (e) { + errors.push(`cannot read scenarios dir ${scenariosDir}: ${e.message}`); + return errors; + } + + for (const name of diskJava) { + if (!expectedJava.has(name)) { + errors.push(`scenarios/${name} has no matching entry in EXPECTED.json`); + } + } + + if (listMainBatchJava(scenarios, scenariosDir).length === 0) { + errors.push('main Warden batch needs at least one non-isolated .java scenario'); + } + + return errors; +} + +export function parseWardenJsonl(jsonlPath) { + /** @type {Record} */ + const fileMap = {}; + const allFindings = []; + try { + const raw = fs.readFileSync(jsonlPath, 'utf8').trim(); + if (!raw) return { fileMap, findings: [] }; + const records = raw + .split('\n') + .filter((l) => l.trim()) + .map((l) => JSON.parse(l)); + for (const record of records) { + const file = record.chunk && record.chunk.file; + if (!file) continue; + const normalized = normalizeRepoPath(file); + const recordFindings = record.findings || []; + fileMap[normalized] = (fileMap[normalized] || 0) + recordFindings.length; + for (const f of recordFindings) { + allFindings.push({ + ...f, + location: f.location || { path: normalized, startLine: 1 }, + }); + } + } + } catch (e) { + console.error( + 'ERROR: Could not parse Warden output from ' + jsonlPath + ':', + e.message, + ); + process.exit(2); + } + return { fileMap, findings: allFindings }; +} + +export function routingSet(routingPath, id, jsonlPath) { + const routing = JSON.parse(fs.readFileSync(routingPath, 'utf8')); + routing[id] = jsonlPath; + fs.writeFileSync(routingPath, JSON.stringify(routing)); +} + +function wardenFileForScenario(destPkg, scenario) { + return scenario.file === ISOLATED_FILE_NOTICES + ? ISOLATED_FILE_NOTICES + : `${destPkg}/${scenario.file}`; +} + +function loadRouting(routingPath) { + /** @type {Record} */ + let routing; + try { + routing = JSON.parse(fs.readFileSync(routingPath, 'utf8')); + } catch (e) { + console.error(`ERROR: Could not read routing file ${routingPath}:`, e.message); + process.exit(2); + } + + if (typeof routing.main !== 'string' || !routing.main) { + console.error('ERROR: routing.json must include a non-empty "main" JSONL path.'); + process.exit(2); + } + return routing; +} + +function cmdValidate(expectedPath, scenariosDir) { + if (!expectedPath || !scenariosDir) { + console.error( + 'Usage: node assert-scenarios.mjs validate ', + ); + process.exit(2); + } + const errors = validateExpected(loadExpected(expectedPath), scenariosDir); + if (errors.length > 0) { + console.error('EXPECTED.json validation failed:'); + for (const err of errors) console.error(` - ${err}`); + process.exit(1); + } + console.log('EXPECTED.json OK'); +} + +function cmdListIsolated(expectedPath) { + for (const s of listIsolated(loadExpected(expectedPath))) { + process.stdout.write(`${s.id}\t${s.file}\n`); + } +} + +function cmdListMainJava(expectedPath, scenariosDir) { + if (!expectedPath || !scenariosDir) { + console.error( + 'Usage: node assert-scenarios.mjs list-main-java ', + ); + process.exit(2); + } + for (const name of listMainBatchJava(loadExpected(expectedPath), scenariosDir)) { + process.stdout.write(`${name}\n`); + } +} + +function cmdRoutingSet(routingPath, id, jsonlPath) { + if (!routingPath || !id || !jsonlPath) { + console.error( + 'Usage: node assert-scenarios.mjs routing-set ', + ); + process.exit(2); + } + routingSet(routingPath, id, jsonlPath); +} + +function cmdAssert(expectedPath, destPkg, routingPath) { + if (!expectedPath || !destPkg || !routingPath) { + console.error( + 'Usage: node assert-scenarios.mjs assert ', + ); + process.exit(2); + } + + const routing = loadRouting(routingPath); + const scenarios = loadExpected(expectedPath); + + /** @type {Record>} */ + const parsed = {}; + function getSource(id) { + const jsonlPath = routing[id] ?? routing.main; + if (!parsed[jsonlPath]) parsed[jsonlPath] = parseWardenJsonl(jsonlPath); + return parsed[jsonlPath]; + } + + const GREEN = '\x1b[32m'; + const RED = '\x1b[31m'; + const RESET = '\x1b[0m'; + + const failures = []; + let pass = 0; + + for (const s of scenarios) { + if (s.isolated && !routing[s.id]) { + console.error( + `ERROR: isolated scenario "${s.id}" has no routing entry (missing Warden run?)`, + ); + process.exit(2); + } + + const wardenFile = wardenFileForScenario(destPkg, s); + const source = getSource(s.id); + const count = findingCountForFile(source.fileMap, wardenFile); + const passed = s.expectFinding ? count > 0 : count === 0; + + if (passed) { + console.log(`${GREEN}PASS${RESET} ${s.id}`); + pass++; + } else { + const reason = s.expectFinding + ? 'expected finding (>= medium), got none' + : `expected no finding (>= medium), got ${count}`; + console.log(`${RED}FAIL${RESET} ${s.id} (${reason})`); + + failures.push({ + id: s.id, + findings: findingsForFile(source.findings, wardenFile), + }); + } + } + + const total = scenarios.length; + console.log(''); + console.log(`${total} scenarios: ${pass} passed, ${total - pass} failed`); + + if (failures.length > 0) { + console.log(''); + console.log('Warden output'); + console.log('══════════════════════'); + + for (const { id, findings } of failures) { + console.log(''); + console.log(id); + console.log('-'.repeat(id.length)); + if (findings.length === 0) { + console.log('(Warden produced no findings for this file)'); + } else { + for (const f of findings) { + console.log(f.title); + if (f.description) console.log(f.description); + if (f.verification) console.log('\nVerification: ' + f.verification); + console.log(''); + } + } + } + + process.exit(1); + } +} + +function usage() { + console.error(`Usage: + node assert-scenarios.mjs validate + node assert-scenarios.mjs list-isolated + node assert-scenarios.mjs list-main-java + node assert-scenarios.mjs routing-set + node assert-scenarios.mjs assert `); + process.exit(2); +} + +function main() { + const [, , cmd, ...args] = process.argv; + switch (cmd) { + case 'validate': + cmdValidate(args[0], args[1]); + break; + case 'list-isolated': + if (!args[0]) usage(); + cmdListIsolated(args[0]); + break; + case 'list-main-java': + cmdListMainJava(args[0], args[1]); + break; + case 'routing-set': + cmdRoutingSet(args[0], args[1], args[2]); + break; + case 'assert': + cmdAssert(args[0], args[1], args[2]); + break; + default: + usage(); + } +} + +if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) { + main(); +} diff --git a/.claude/skills/check-code-attribution/validation-tests/check-code-attribution-tests.sh b/.claude/skills/check-code-attribution/validation-tests/check-code-attribution-tests.sh new file mode 100755 index 00000000000..713815734a2 --- /dev/null +++ b/.claude/skills/check-code-attribution/validation-tests/check-code-attribution-tests.sh @@ -0,0 +1,246 @@ +#!/usr/bin/env bash +# check-code-attribution-tests.sh — Validate the check-code-attribution skill against synthetic scenarios. +# +# Usage: +# ./check-code-attribution-tests.sh [--help] +# +# What it does: +# 1. Validates EXPECTED.json and scenario fixtures (no API calls). +# 2. Creates an isolated git worktree on a temp branch from HEAD. +# 3. Creates a diff (non-isolated .java files, NOTICES catalog, mismatch snippet), +# commits, and runs Warden on the main batch. +# 4. Scenarios marked "isolated" in EXPECTED.json each get their own worktree and Warden +# run to avoid prompt-cache priming that can suppress findings in concurrent batches. +# 5. Asserts per-scenario pass/fail against EXPECTED.json (>= medium findings only). +# 6. Prints Warden's actual output for each failing scenario. +# 7. Cleans up all worktrees. +# +# Requires: +# - Node.js / npx +# - One of: WARDEN_ANTHROPIC_API_KEY, ANTHROPIC_API_KEY, or Pi OAuth config +# (see SKILL.md "Warden CLI" section for setup options) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)" +SCENARIOS_DIR="$SCRIPT_DIR/scenarios" +CATALOG="$SCRIPT_DIR/THIRD_PARTY_NOTICES.catalog.md" +EXPECTED_JSON="$SCRIPT_DIR/EXPECTED.json" +VALIDATION="$SCRIPT_DIR/assert-scenarios.mjs" +MISMATCH_SNIPPET="$SCENARIOS_DIR/THIRD_PARTY_NOTICES.mismatch-snippet.md" + +# Destination path inside the worktree — must not appear in warden.toml ignorePaths. +DEST_PACKAGE_PATH="sentry/src/test/java/io/sentry/skills/verification" + +# Warden wall-clock limit (seconds). +TIMEOUT_SEC=300 + +die() { echo "ERROR: $*" >&2; exit 1; } + +show_usage() { + cat <<'EOF' +Usage: check-code-attribution-tests.sh [--help] + +Validates the check-code-attribution skill against all scenarios in EXPECTED.json. +Runs Warden on a temporary branch and asserts per-scenario pass/fail (>= medium findings). + +Prerequisites: + - Node.js (npx) + - API key: WARDEN_ANTHROPIC_API_KEY or ANTHROPIC_API_KEY + (or Pi OAuth: npx pi && /login — see SKILL.md "Warden CLI" section) + - Wall-clock limit: gtimeout (brew install coreutils), GNU timeout, or perl +EOF +} + +[[ "${1:-}" == "--help" || "${1:-}" == "-h" ]] && { show_usage; exit 0; } + +# --- prereq checks --- + +command -v node >/dev/null 2>&1 || die "node not found — install Node.js." +command -v npx >/dev/null 2>&1 || die "npx not found — install Node.js." +command -v git >/dev/null 2>&1 || die "git not found." + +# macOS: GNU timeout is `gtimeout` from coreutils; fall back to perl alarm. +TIMEOUT_CMD=() +if command -v gtimeout >/dev/null 2>&1; then + TIMEOUT_CMD=(gtimeout "$TIMEOUT_SEC") +elif command -v timeout >/dev/null 2>&1; then + TIMEOUT_CMD=(timeout "$TIMEOUT_SEC") +elif command -v perl >/dev/null 2>&1; then + TIMEOUT_CMD=(perl -e 'alarm shift; exec @ARGV' "$TIMEOUT_SEC") +else + die "Need gtimeout (brew install coreutils), GNU timeout, or perl for Warden wall-clock limit" +fi + +if [[ -z "${WARDEN_ANTHROPIC_API_KEY:-}" && -z "${ANTHROPIC_API_KEY:-}" ]]; then + if [[ ! -f "$HOME/.pi/agent/auth.json" ]]; then + die "No API key found. Set WARDEN_ANTHROPIC_API_KEY, ANTHROPIC_API_KEY, or run: npx pi && /login" + fi +fi + +node "$VALIDATION" validate "$EXPECTED_JSON" "$SCENARIOS_DIR" + +# --- cleanup tracking --- + +declare -a WORKTREES=() +declare -a BRANCHES=() +declare -a JSON_FILES=() + +cleanup() { + for wt in "${WORKTREES[@]+"${WORKTREES[@]}"}"; do + git -C "$REPO_ROOT" worktree remove --force "$wt" 2>/dev/null || true + done + for b in "${BRANCHES[@]+"${BRANCHES[@]}"}"; do + git -C "$REPO_ROOT" branch -D "$b" 2>/dev/null || true + done + (( ${#JSON_FILES[@]} )) && rm -f "${JSON_FILES[@]}" +} +trap cleanup EXIT + +# --- resolve base commit --- +# Branch from HEAD so the worktree includes the current skill definition. + +BASE=$(git -C "$REPO_ROOT" rev-parse HEAD || die "Cannot resolve HEAD.") +TS=$(date +%s) + +# --- helpers --- + +# Commits paths in a validation worktree with consistent author metadata. +# Usage: git_commit_in_worktree [path...] +git_commit_in_worktree() { + local worktree="$1" message="$2" + shift 2 + if (($# > 0)); then + git -C "$worktree" add "$@" + fi + git -C "$worktree" \ + -c user.email="ci@sentry.io" \ + -c user.name="Validation Test" \ + commit --quiet -m "$message" +} + +# Creates a git worktree from $BASE and commits the NOTICES catalog as the Warden +# analysis base — so only fixture changes appear in the diff Warden analyzes. +# Prints the catalog-commit SHA to stdout. +setup_catalog_base() { + local worktree="$1" branch="$2" + git -C "$REPO_ROOT" worktree add --quiet "$worktree" "$BASE" -b "$branch" + printf '\n' >> "$worktree/THIRD_PARTY_NOTICES.md" + sed "s|validation-tests/scenarios/|${DEST_PACKAGE_PATH}/|g" \ + "$CATALOG" >> "$worktree/THIRD_PARTY_NOTICES.md" + git_commit_in_worktree "$worktree" "test: apply NOTICES catalog [skip ci]" \ + THIRD_PARTY_NOTICES.md + git -C "$worktree" rev-parse HEAD +} + +# Appends the mismatch snippet to THIRD_PARTY_NOTICES.md, stripping the fixture's +# prose header so only the NOTICES entry itself lands in the file. +append_mismatch_snippet() { + local worktree="$1" + printf '\n' >> "$worktree/THIRD_PARTY_NOTICES.md" + sed '1,/^---$/d' "$MISMATCH_SNIPPET" >> "$worktree/THIRD_PARTY_NOTICES.md" +} + +# Runs Warden and writes JSON output to the given file. +run_warden() { + local base="$1" worktree="$2" json_out="$3" label="$4" + echo "Running Warden on ${base:0:7}..HEAD ($label)..." + : > "$json_out" + if ! "${TIMEOUT_CMD[@]}" npx @sentry/warden "${base}..HEAD" \ + --skill check-code-attribution \ + --fail-on off \ + --report-on medium \ + --json \ + -C "$worktree" \ + > "$json_out"; then + if [[ ! -s "$json_out" ]]; then + die "Warden failed for $label with no JSON output (check API key, network, and Warden logs)." + fi + die "Warden exited with an error for $label but left partial JSON in $json_out." + fi + [[ -s "$json_out" ]] || die "Warden succeeded but produced no JSON output for $label." +} + +# --- main worktree: non-isolated scenarios --- +# Isolated .java files are omitted here; they get dedicated worktrees below. + +echo "Creating worktrees from $(git -C "$REPO_ROOT" rev-parse --short "$BASE")..." +echo "" + +MAIN_WORKTREE=$(mktemp -d) +MAIN_BRANCH="validation-main-${TS}" +MAIN_JSON=$(mktemp) +ROUTING_JSON_FILE=$(mktemp) +echo '{}' > "$ROUTING_JSON_FILE" +WORKTREES+=("$MAIN_WORKTREE") +BRANCHES+=("$MAIN_BRANCH") +JSON_FILES+=("$MAIN_JSON" "$ROUTING_JSON_FILE") + +MAIN_BASE=$(setup_catalog_base "$MAIN_WORKTREE" "$MAIN_BRANCH") + +DEST_DIR="$MAIN_WORKTREE/$DEST_PACKAGE_PATH" +mkdir -p "$DEST_DIR" + +shopt -s nullglob +copied=0 +while IFS= read -r java_file; do + cp "$SCENARIOS_DIR/$java_file" "$DEST_DIR/" + copied=$((copied + 1)) +done < <(node "$VALIDATION" list-main-java "$EXPECTED_JSON" "$SCENARIOS_DIR") +echo "Copied ${copied} scenario files → $DEST_PACKAGE_PATH/ (non-isolated batch)" +append_mismatch_snippet "$MAIN_WORKTREE" +git_commit_in_worktree "$MAIN_WORKTREE" \ + "test: add check-code-attribution validation fixtures [skip ci]" \ + "$DEST_PACKAGE_PATH" THIRD_PARTY_NOTICES.md + +run_warden "$MAIN_BASE" "$MAIN_WORKTREE" "$MAIN_JSON" "main" +node "$VALIDATION" routing-set "$ROUTING_JSON_FILE" main "$MAIN_JSON" + +# --- isolated worktrees: one per scenario marked "isolated" in EXPECTED.json --- +# +# Scenarios where Anthropic prompt-cache priming can suppress findings in a concurrent +# batch get their own worktree and Warden run. EXPECTED.json is the single source of +# truth for which scenarios need isolation — add "isolated": true there, not here. +# Java isolates omit the mismatch snippet; the NOTICES mismatch scenario adds it alone. + +while IFS=$'\t' read -r id file; do + worktree=$(mktemp -d) + branch="validation-isolated-${TS}-${id//[^a-zA-Z0-9]/-}" + json=$(mktemp) + WORKTREES+=("$worktree") + BRANCHES+=("$branch") + JSON_FILES+=("$json") + + base=$(setup_catalog_base "$worktree" "$branch") + + commit_paths=() + if [[ "$file" == *.java ]]; then + dest_dir="$worktree/$DEST_PACKAGE_PATH" + mkdir -p "$dest_dir" + cp "$SCENARIOS_DIR/$file" "$dest_dir/" + commit_paths=("$DEST_PACKAGE_PATH") + elif [[ "$file" == "THIRD_PARTY_NOTICES.md" ]]; then + append_mismatch_snippet "$worktree" + commit_paths=(THIRD_PARTY_NOTICES.md) + else + die "Unsupported isolated scenario file: $file (id: $id)" + fi + + git_commit_in_worktree "$worktree" "test: isolated fixture for $id [skip ci]" \ + "${commit_paths[@]}" + + echo "" + run_warden "$base" "$worktree" "$json" "$id" + node "$VALIDATION" routing-set "$ROUTING_JSON_FILE" "$id" "$json" + +done < <(node "$VALIDATION" list-isolated "$EXPECTED_JSON") + +echo "" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +# --- assert per-scenario --- +# +# ROUTING_JSON_FILE maps scenario id → Warden JSONL path; non-isolated scenarios use "main". + +node "$VALIDATION" assert "$EXPECTED_JSON" "$DEST_PACKAGE_PATH" "$ROUTING_JSON_FILE" diff --git a/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderCompleteAndNoticePresent.java b/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderCompleteAndNoticePresent.java new file mode 100644 index 00000000000..4dca9ad3603 --- /dev/null +++ b/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderCompleteAndNoticePresent.java @@ -0,0 +1,21 @@ +/* + * Adapted from https://github.com/example + * + * Copyright 2020 Example Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * https://github.com/example/something + */ +package io.sentry.skills.verification; + +public final class HeaderCompleteAndNoticePresent { + + public int sum(int a, int b) { + return a + b; + } +} diff --git a/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderCompleteButNoticeMissing.java b/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderCompleteButNoticeMissing.java new file mode 100644 index 00000000000..081d1848300 --- /dev/null +++ b/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderCompleteButNoticeMissing.java @@ -0,0 +1,17 @@ +/* + * Adapted from https://github.com/example + * + * Copyright 2024 Example Authors + * + * Licensed under the MIT License + * + * https://github.com/example/something + */ +package io.sentry.skills.verification; + +public final class HeaderCompleteButNoticeMissing { + + public boolean ok() { + return true; + } +} diff --git a/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderFullyStripped.java b/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderFullyStripped.java new file mode 100644 index 00000000000..6973848c61e --- /dev/null +++ b/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderFullyStripped.java @@ -0,0 +1,7 @@ +/* Attribution stripped — fixture for check-code-attribution validation only. */ +package io.sentry.skills.verification; + +public final class HeaderFullyStripped { + + public void run() {} +} diff --git a/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderMissingButNoticePresent.java b/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderMissingButNoticePresent.java new file mode 100644 index 00000000000..5c4953ea3ad --- /dev/null +++ b/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderMissingButNoticePresent.java @@ -0,0 +1,8 @@ +package io.sentry.skills.verification; + +public final class HeaderMissingButNoticePresent { + + public int compute(int x) { + return x * 2; + } +} diff --git a/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderMissingNonEssentialInfo.java b/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderMissingNonEssentialInfo.java new file mode 100644 index 00000000000..c524a2593a4 --- /dev/null +++ b/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderMissingNonEssentialInfo.java @@ -0,0 +1,12 @@ +// Adapted from ExampleLib. +// Copyright 2020 Example Corp. +// Licensed under the MIT License. +// https://github.com/example/examplelib +package io.sentry.skills.verification; + +public final class HeaderMissingNonEssentialInfo { + + public int compute(int x) { + return x + 1; + } +} diff --git a/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderPartiallyStripped.java b/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderPartiallyStripped.java new file mode 100644 index 00000000000..0389934d94a --- /dev/null +++ b/.claude/skills/check-code-attribution/validation-tests/scenarios/HeaderPartiallyStripped.java @@ -0,0 +1,10 @@ +// Adapted from Example RateLimiter. +// https://github.com/example +package io.sentry.skills.verification; + +public final class HeaderPartiallyStripped { + + public synchronized boolean tryAcquire() { + return true; + } +} diff --git a/.claude/skills/check-code-attribution/validation-tests/scenarios/NewLicenseType.java b/.claude/skills/check-code-attribution/validation-tests/scenarios/NewLicenseType.java new file mode 100644 index 00000000000..e148f5a1a4f --- /dev/null +++ b/.claude/skills/check-code-attribution/validation-tests/scenarios/NewLicenseType.java @@ -0,0 +1,10 @@ +// Adapted from ExampleLib. +// Copyright 2020 Example Corp. +// Licensed under the GNU Affero General Public License v3.0. +// https://github.com/example/agpl-lib +package io.sentry.skills.verification; + +public final class NewLicenseType { + + public void run() {} +} diff --git a/.claude/skills/check-code-attribution/validation-tests/scenarios/THIRD_PARTY_NOTICES.mismatch-snippet.md b/.claude/skills/check-code-attribution/validation-tests/scenarios/THIRD_PARTY_NOTICES.mismatch-snippet.md new file mode 100644 index 00000000000..5a9b87285df --- /dev/null +++ b/.claude/skills/check-code-attribution/validation-tests/scenarios/THIRD_PARTY_NOTICES.mismatch-snippet.md @@ -0,0 +1,37 @@ +# Snippet fixture — MismatchLib entry for the isolated mismatch worktree. +# header-vs-notice-mismatch: copyright in metadata field does not match embedded license text. + +--- + +## Example — MismatchLib (MIT) + +**Source:** https://github.com/example/mismatch
+**License:** MIT License
+**Copyright:** Copyright (c) 2020 Wrong Holder + +### Scope + +Validation sample only. The code resides in `io.sentry.skills.verification.MismatchLib`. + +``` +MIT License + +Copyright (c) 2016 Correct Holder + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +``` diff --git a/.gitignore b/.gitignore index a7899736a86..f252087a5ab 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,6 @@ spy.log # Auto-generated by dotagents — do not commit these files. agents.lock .agents/.gitignore + +# Warden local run logs +.warden/logs/ diff --git a/AGENTS.md b/AGENTS.md index 1784e4f950e..e6e49477d6a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -154,6 +154,8 @@ When adapting code from third-party libraries: ``` 2. Add a full attribution entry to `THIRD_PARTY_NOTICES.md` following the existing format (Source, License, Copyright, Scope, full license text) +3. Run the `check-code-attribution` skill locally or wait for it to be auto-run against your PR to check for required fields and verify new licenses against [Sentry's Open Source Legal Policy](https://open.sentry.io/licensing/). The skill definition lives at `.claude/skills/check-code-attribution/SKILL.md` (registered in `agents.toml`; `.agents/skills/` is a symlink to `.claude/skills/`). + ### Getting PR Information Use `gh pr view` to get PR details from the current branch. This is needed when adding changelog entries, which require the PR number. diff --git a/agents.toml b/agents.toml index b4c9e091b70..d9770ee7df5 100644 --- a/agents.toml +++ b/agents.toml @@ -35,3 +35,7 @@ source = "path:.agents/skills/test" [[skills]] name = "btrace-perfetto" source = "path:.agents/skills/btrace-perfetto" + +[[skills]] +name = "check-code-attribution" +source = "path:.agents/skills/check-code-attribution" diff --git a/warden.toml b/warden.toml new file mode 100644 index 00000000000..7bb2865b747 --- /dev/null +++ b/warden.toml @@ -0,0 +1,100 @@ +version = 1 + +[defaults] +model = "anthropic/claude-sonnet-4-6" + +# Warden's schema does not support per-skill verification config; this is the only +# placement available. Disabled for attribution policy checks: a second verifier +# pass often rejects valid header/NOTICES mismatches (e.g. "NOTICES still documents it"). +[defaults.verification] +enabled = false + +# Warden's schema does not support per-skill chunking config; these patterns apply +# globally but are tuned for check-code-attribution. Attribution checks need the full +# file header and a NOTICES cross-check — not isolated diff hunks. +[[defaults.chunking.filePatterns]] +pattern = "**/*.api" +mode = "skip" + +[[defaults.chunking.filePatterns]] +pattern = "**/gradlew" +mode = "skip" + +[[defaults.chunking.filePatterns]] +pattern = "**/gradlew.bat" +mode = "skip" + +[[defaults.chunking.filePatterns]] +pattern = "**/*.java" +mode = "whole-file" + +[[defaults.chunking.filePatterns]] +pattern = "**/*.kt" +mode = "whole-file" + +[[defaults.chunking.filePatterns]] +pattern = "**/*.kts" +mode = "whole-file" + +[[defaults.chunking.filePatterns]] +pattern = "THIRD_PARTY_NOTICES.md" +mode = "whole-file" + +# Coalesce hunks aggressively for any remaining per-hunk files +[defaults.chunking.coalesce] +enabled = true +maxGapLines = 100 +maxChunkSize = 16000 + +[[skills]] +name = "check-code-attribution" +maxTurns = 30 +# Phase 1: report only — Warden comments on PRs but does not block merges. +# Tighten to failOn = "medium" / requestChanges = true once the false-positive baseline is established. +failOn = "off" +reportOn = "medium" +ignorePaths = [ + # Infrastructure directories + ".agents/**", + ".claude/**", + ".cursor/**", + ".github/**", + ".gradle/**", + ".idea/**", + ".mvn/**", + "gradle/**", + # Generated files + "**/*.aidl", + "**/*.api", + "**/*.g.kt", + "**/*.interp", + "**/*.pb.java", + "**/*.tokens", + "**/databinding/*Binding.java", + "**/generated/**", + "**/gradlew", + "**/gradlew.bat", + "**/grpc/*Grpc.java", + "**/ksp/**", + "**/mvnw", + "**/mvnw.cmd", + # Binary files + "**/*.jar", + # Repo docs (attribution examples in prose, not vendored code) + "AGENTS.md", + "CHANGELOG.md", + "CLAUDE.md", + "**/README.md", + # Warden infrastructure + ".warden/**", + "warden.toml", +] + +[[skills.triggers]] +type = "pull_request" +actions = ["opened", "synchronize"] +requestChanges = false +failCheck = false + +[[skills.triggers]] +type = "local"