diff --git a/.github/actions/comment_ai_review/action.yml b/.github/actions/comment_ai_review/action.yml new file mode 100644 index 00000000..c4e378be --- /dev/null +++ b/.github/actions/comment_ai_review/action.yml @@ -0,0 +1,89 @@ +name: Comment AI Review +description: Create or update a single PR comment with the Azure OpenAI code review, keyed by a header marker. + +inputs: + has_review: + description: Whether a review was produced (True/False). If False, any existing AI review comment is deleted. + required: true + comment: + description: The base64-encoded markdown comment body + required: false + marker: + description: The header marker used to find an existing comment to update + required: true + pr: + description: ID of the PR to comment on + required: true + github_token: + description: Token used to read/write PR comments + required: true + +runs: + using: 'composite' + steps: + - id: comment-ai-review + name: Comment AI Review + uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7 + with: + github-token: ${{ inputs.github_token }} + script: | + const prNumber = "${{ inputs.pr }}"; + const hasReview = "${{ inputs.has_review }}" === 'True'; + const marker = `${{ inputs.marker }}`; + + if (!prNumber || prNumber === 'null' || prNumber === 'undefined' || prNumber.trim() === '') { + console.log('No PR number provided, skipping'); + return; + } + + // Paginate through PR comments to find any prior review comment for this marker. + let page = 1; const per_page = 100; const comments = []; + while (true) { + const { data: pageComments } = await github.rest.issues.listComments({ + issue_number: prNumber, + owner: context.repo.owner, + repo: context.repo.repo, + per_page, + page, + }); + comments.push(...pageComments); + if (!pageComments || pageComments.length < per_page) break; + page += 1; if (page > 50) break; + } + + const existing = comments.find(c => c.body && c.body.includes(marker)); + + if (!hasReview) { + if (existing) { + console.log(`No review produced; deleting prior AI review comment ${existing.id}`); + await github.rest.issues.deleteComment({ + comment_id: existing.id, + owner: context.repo.owner, + repo: context.repo.repo, + }); + } else { + console.log('No review produced and no prior comment to delete'); + } + return; + } + + const commentEncoded = `${{ inputs.comment }}`; + const body = Buffer.from(commentEncoded, 'base64').toString('utf-8'); + + if (existing) { + console.log(`Updating existing AI review comment ${existing.id}`); + await github.rest.issues.updateComment({ + comment_id: existing.id, + owner: context.repo.owner, + repo: context.repo.repo, + body, + }); + } else { + console.log('Creating new AI review comment'); + await github.rest.issues.createComment({ + issue_number: prNumber, + owner: context.repo.owner, + repo: context.repo.repo, + body, + }); + } diff --git a/.github/scripts/review_pr_diff.py b/.github/scripts/review_pr_diff.py new file mode 100644 index 00000000..ddc9ba69 --- /dev/null +++ b/.github/scripts/review_pr_diff.py @@ -0,0 +1,128 @@ +"""Generate an Azure OpenAI code review for a GitHub PR diff. + +Reads the unified PR diff from stdin (piped from `gh api`), calls the +configured Azure OpenAI deployment with a code-review system prompt, and +writes a base64-encoded markdown comment body to $GITHUB_OUTPUT under the +key `comment_result`. The composite action `comment_ai_review` then posts +or updates a single PR comment keyed by the header marker. +""" + +import base64 +import os +import sys + +from openai import AzureOpenAI + +# --- Inputs ----------------------------------------------------------------- +repo = sys.argv[1] # e.g. IABTechLab/uid2-admin +pr_number = sys.argv[2] # e.g. 1234 +workflow_run_link = sys.argv[3] # link back to the Actions run + +# The marker used by comment_ai_review to find/update/delete its own comment. +HEADER_MARKER = "## Azure OpenAI Code Review" + +# Hard cap on diff size sent to the model. Diffs larger than this are +# truncated; the comment notes the truncation. ~60k chars ≈ 15k tokens, well +# inside gpt-5 input limits while leaving headroom for the system prompt. +MAX_DIFF_CHARS = 60000 + +# --- Read diff -------------------------------------------------------------- +def write_no_review(reason: str) -> None: + """Tell the composite action there is no review, so any prior comment is removed.""" + print(f"No review produced: {reason}", file=sys.stderr) + github_output = os.environ.get("GITHUB_OUTPUT") + if github_output: + with open(github_output, "a") as f: + f.write("has_review=False\n") + f.write("comment_result=\n") + + +diff = sys.stdin.read() +if not diff.strip(): + write_no_review("empty diff") + sys.exit(0) + +truncated = False +if len(diff) > MAX_DIFF_CHARS: + diff = diff[:MAX_DIFF_CHARS] + truncated = True + +# --- Azure OpenAI ----------------------------------------------------------- +endpoint = os.getenv("AZURE_OPENAI_ENDPOINT", "") +deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT", "gpt-5") +api_key = os.getenv("AZURE_OPENAI_API_KEY", "") + +if not api_key: + write_no_review("AZURE_OPENAI_API_KEY is not set") + sys.exit(0) + +if not endpoint: + write_no_review("AZURE_OPENAI_ENDPOINT is not set") + sys.exit(0) + +client = AzureOpenAI( + azure_endpoint=endpoint, + api_key=api_key, + api_version="2024-12-01-preview", +) + +system_prompt = """You are a senior software engineer reviewing a pull request for the UID2/EUID project. The codebase is primarily Java (Maven) with some Python tooling. + +Review the unified diff below and produce a focused, actionable review. Concentrate on: + +1. **Correctness** — logic errors, off-by-one, null handling, concurrency, resource leaks. +2. **Security** — input validation, secret/credential handling, SQL/command injection, OWASP top 10, unsafe deserialization. +3. **Error handling** — swallowed exceptions, missing logging, retry/backoff, partial failure modes. +4. **Test coverage** — note untested code paths the diff introduces, especially around new branches, error handling, and edge cases. +5. **API/contract changes** — anything that breaks consumers, changes wire format, or alters persisted data shape. + +Format the review as GitHub-flavoured markdown: + +- Start with a one-paragraph **Summary** of what the PR does. +- Then a **Findings** section, grouped under bold severity headers: `### 🔴 Blocking`, `### 🟡 Suggestions`, `### 🟢 Nits`. Omit any header with no findings. +- Each finding: a short bold title, one or two sentences explaining the issue, and a `file.ext:line` reference where possible. Quote no more than 3 lines of code per finding. + +Be terse. Prefer fewer high-signal findings over an exhaustive list. If the diff looks fine, say so plainly under a single **Summary** and skip Findings. Do not restate what the diff already shows. +""" + +completion = client.chat.completions.create( + model=deployment, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": diff}, + ], + max_completion_tokens=2000, + frequency_penalty=0, + presence_penalty=0, + stream=False, +) + +review_body = completion.choices[0].message.content or "_(no review produced)_" + +# --- Compose comment -------------------------------------------------------- +truncation_note = ( + f"\n> _Note: the diff exceeded {MAX_DIFF_CHARS:,} characters and was truncated before review._\n" + if truncated else "" +) + +comment = f"""{HEADER_MARKER} + +_Powered by Azure OpenAI ({deployment}). This is an automated review intended as a starting point — human review is still required._ +{truncation_note} +{review_body} + +--- + +Workflow run: {workflow_run_link} · Repo: {repo} · PR: #{pr_number} +""" + +print(comment) + +encoded = base64.b64encode(comment.encode("utf-8")).decode("utf-8") +github_output = os.environ.get("GITHUB_OUTPUT") +if github_output: + with open(github_output, "a") as f: + f.write(f"comment_result={encoded}\n") + f.write("has_review=True\n") +else: + print("GITHUB_OUTPUT not set; comment will not be posted.", file=sys.stderr) diff --git a/.github/workflows/ai-code-review.yaml b/.github/workflows/ai-code-review.yaml new file mode 100644 index 00000000..648bb754 --- /dev/null +++ b/.github/workflows/ai-code-review.yaml @@ -0,0 +1,85 @@ +name: AI Code Review + +# Posts an Azure OpenAI–generated code review as a single PR comment, keyed by +# a header marker so subsequent pushes update (rather than append to) the same +# comment. +# +# Prerequisites: AZURE_OPENAI_API_KEY and AZURE_OPENAI_ENDPOINT must be +# available to this workflow, either as org-level secrets on IABTechLab +# (scoped to uid2-admin) or as repo-level secrets. If either is absent, +# the script logs and exits cleanly without posting a comment. + +on: + pull_request: + types: [opened, synchronize, reopened] + workflow_dispatch: + inputs: + pr_number: + description: PR number to (re-)review + required: true + type: string + +permissions: + contents: read + pull-requests: write + +concurrency: + group: ai-code-review-${{ github.event.pull_request.number || github.event.inputs.pr_number }} + cancel-in-progress: true + +jobs: + review: + runs-on: ubuntu-latest + # Skip drafts and forks (forks don't get the secret anyway). + if: ${{ github.event_name == 'workflow_dispatch' || (github.event.pull_request.draft == false && github.event.pull_request.head.repo.full_name == github.repository) }} + steps: + - name: Checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + + - name: Set up Python + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: pip install --no-cache-dir 'openai>=1.40,<2' + + - name: Resolve PR number + id: pr + env: + EVENT_PR: ${{ github.event.pull_request.number }} + INPUT_PR: ${{ github.event.inputs.pr_number }} + run: echo "number=${EVENT_PR:-$INPUT_PR}" >> "$GITHUB_OUTPUT" + + - name: Fetch PR diff + id: diff + env: + GH_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ steps.pr.outputs.number }} + run: | + gh api \ + -H "Accept: application/vnd.github.v3.diff" \ + "repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \ + > pr.diff + echo "size=$(wc -c < pr.diff)" >> "$GITHUB_OUTPUT" + + - name: Generate AI review + id: review + env: + AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} + AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }} + run: | + python ./.github/scripts/review_pr_diff.py \ + "${GITHUB_REPOSITORY}" \ + "${{ steps.pr.outputs.number }}" \ + "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" \ + < pr.diff + + - name: Post, update, or delete review comment + uses: ./.github/actions/comment_ai_review + with: + has_review: ${{ steps.review.outputs.has_review }} + comment: ${{ steps.review.outputs.comment_result }} + marker: '## Azure OpenAI Code Review' + pr: ${{ steps.pr.outputs.number }} + github_token: ${{ github.token }}