Skip to content

feat: add full OpenAPI semantic analysis to API analyzer #7

feat: add full OpenAPI semantic analysis to API analyzer

feat: add full OpenAPI semantic analysis to API analyzer #7

---
name: SDK Diff Analyzer
# Analyzes gdc-nas repository for REST API changes that may require SDK updates.
# Uses GitHub API - no clone required.
# Reports are uploaded as artifacts for use by sdk-analyze and jira-sync skills.
on:
schedule:
- cron: '0 */6 * * *' # Every 6 hours
push:
branches:
- feature/sdk-diff-analyzer # Temporary: for testing
workflow_dispatch:
inputs:
commits_to_analyze:
description: 'Number of commits to analyze (default: 200)'
default: '200'
since_commit:
description: 'Analyze since this commit SHA (overrides state tag)'
default: ''
debug:
description: 'Enable verbose debug logging'
default: 'true'
type: boolean
env:
# Use fork for testing, main repo in production
GDC_NAS_REPO: ${{ github.repository_owner == 'gooddata' && 'gooddata/gdc-nas' || 'tychtjan/gdc-nas' }}
# File in this repo that stores the last analyzed gdc-nas commit SHA
STATE_FILE: '.github/gdc-nas-last-analyzed.txt'
jobs:
analyze:
name: Analyze gdc-nas changes
# Use self-hosted runners in production, GitHub-hosted for forks/testing
runs-on: ${{ github.repository == 'gooddata/gooddata-python-sdk' && 'infra1-runners-arc' || 'ubuntu-latest' }}
permissions:
contents: write
actions: write
env:
# Use PAT for gdc-nas access, falls back to GITHUB_TOKEN
GH_TOKEN: ${{ secrets.GDC_NAS_READ_TOKEN || secrets.GITHUB_TOKEN }}
steps:
- name: Debug - Environment info
run: |
echo "=== Environment ==="
echo "Repository: ${{ github.repository }}"
echo "Event: ${{ github.event_name }}"
echo "Runner: ${{ runner.os }}"
echo "GH CLI version: $(gh --version | head -1)"
echo ""
echo "=== Inputs ==="
echo "commits_to_analyze: ${{ inputs.commits_to_analyze || '200' }}"
echo "since_commit: ${{ inputs.since_commit || '(not set)' }}"
- name: Verify gdc-nas access
run: |
echo "=== Testing API access to ${{ env.GDC_NAS_REPO }} ==="
if gh api "repos/${{ env.GDC_NAS_REPO }}" --jq '.full_name' 2>/dev/null; then
echo "✅ Access verified"
else
echo "❌ Cannot access ${{ env.GDC_NAS_REPO }}"
echo ""
echo "To fix: Add a Personal Access Token with 'repo' scope as secret GDC_NAS_READ_TOKEN"
exit 1
fi
- name: Checkout SDK repository
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Get current gdc-nas HEAD
id: gdc_nas
run: |
HEAD_SHA=$(gh api "repos/${{ env.GDC_NAS_REPO }}/commits/HEAD" --jq '.sha')
echo "head_sha=$HEAD_SHA" >> $GITHUB_OUTPUT
echo "gdc-nas HEAD: $HEAD_SHA"
- name: Determine analysis range
id: range
run: |
COMMITS="${{ inputs.commits_to_analyze || '200' }}"
SINCE_INPUT="${{ inputs.since_commit }}"
echo "=== Determining range ==="
if [ -n "$SINCE_INPUT" ]; then
# User provided explicit since commit
echo "Using user-provided since commit: $SINCE_INPUT"
echo "since=$SINCE_INPUT" >> $GITHUB_OUTPUT
echo "mode=manual" >> $GITHUB_OUTPUT
elif [ -f "${{ env.STATE_FILE }}" ]; then
# Read from state file
LAST_SHA=$(cat "${{ env.STATE_FILE }}" | tr -d '[:space:]')
echo "Found state file with SHA: $LAST_SHA"
# Verify commit exists
if gh api "repos/${{ env.GDC_NAS_REPO }}/commits/$LAST_SHA" --jq '.sha' >/dev/null 2>&1; then
echo "Commit verified, using incremental mode"
echo "since=$LAST_SHA" >> $GITHUB_OUTPUT
echo "mode=incremental" >> $GITHUB_OUTPUT
else
echo "Stored commit not found, falling back to last $COMMITS commits"
echo "since=" >> $GITHUB_OUTPUT
echo "mode=fallback" >> $GITHUB_OUTPUT
echo "commits=$COMMITS" >> $GITHUB_OUTPUT
fi
else
echo "No state file, analyzing last $COMMITS commits"
echo "since=" >> $GITHUB_OUTPUT
echo "mode=initial" >> $GITHUB_OUTPUT
echo "commits=$COMMITS" >> $GITHUB_OUTPUT
fi
- name: Run API-based analyzer
id: analyze
run: |
mkdir -p reports
echo "=== Running Analyzer ==="
echo "Mode: ${{ steps.range.outputs.mode }}"
ARGS="--repo ${{ env.GDC_NAS_REPO }} --output-dir ./reports"
if [ -n "${{ steps.range.outputs.since }}" ]; then
ARGS="$ARGS --since ${{ steps.range.outputs.since }}"
else
ARGS="$ARGS --commits ${{ steps.range.outputs.commits || '200' }}"
fi
echo "Running: python3 scripts/gdc_nas_api_analyzer.py $ARGS"
python3 scripts/gdc_nas_api_analyzer.py $ARGS 2>&1 | tee analyzer.log
echo ""
echo "=== Reports ==="
ls -la reports/
# Count SDK-relevant reports (excluding summary)
REPORT_COUNT=$(ls reports/*.md 2>/dev/null | grep -v "00-summary" | wc -l || echo "0")
echo "sdk_reports=$REPORT_COUNT" >> $GITHUB_OUTPUT
- name: Upload analysis reports
uses: actions/upload-artifact@v4
with:
name: sdk-diff-reports-${{ github.run_number }}
path: reports/
retention-days: 30
- name: Upload analyzer log
if: always()
uses: actions/upload-artifact@v4
with:
name: analyzer-log-${{ github.run_number }}
path: analyzer.log
retention-days: 7
if-no-files-found: ignore
- name: Update state file
run: |
echo "=== Updating state ==="
HEAD_SHA="${{ steps.gdc_nas.outputs.head_sha }}"
echo "$HEAD_SHA" > "${{ env.STATE_FILE }}"
echo "Stored SHA: $HEAD_SHA"
# Commit and push state file
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add "${{ env.STATE_FILE }}"
if git diff --staged --quiet; then
echo "No changes to state file"
else
git commit -m "chore: update gdc-nas analyzer state to ${HEAD_SHA:0:12}"
git push
echo "State file updated and pushed"
fi
- name: Job summary
if: always()
run: |
cat >> $GITHUB_STEP_SUMMARY << 'EOF'
## SDK Diff Analyzer Results
| Parameter | Value |
|-----------|-------|
| Mode | ${{ steps.range.outputs.mode }} |
| Since | `${{ steps.range.outputs.since || 'N/A' }}` |
| gdc-nas HEAD | `${{ steps.gdc_nas.outputs.head_sha }}` |
| SDK-relevant commits | ${{ steps.analyze.outputs.sdk_reports }} |
EOF
if [ -f reports/00-summary.md ]; then
echo "### Report Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
cat reports/00-summary.md >> $GITHUB_STEP_SUMMARY
fi