feat: add report state tracking for workflow management #13
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --- | |
| name: SDK Diff Analyzer | |
| # Analyzes gdc-nas repository for REST API changes that may require SDK updates. | |
| # Uses GitHub API - no clone required. | |
| # Reports are uploaded as artifacts for use by sdk-analyze and jira-sync skills. | |
| on: | |
| schedule: | |
| - cron: '0 */6 * * *' # Every 6 hours | |
| push: | |
| branches: | |
| - feature/sdk-diff-analyzer # Temporary: for testing | |
| workflow_dispatch: | |
| inputs: | |
| commits_to_analyze: | |
| description: 'Number of commits to analyze (default: 200)' | |
| default: '200' | |
| since_commit: | |
| description: 'Analyze since this commit SHA (overrides state tag)' | |
| default: '' | |
| enable_clustering: | |
| description: 'Enable report clustering (groups similar changes)' | |
| default: 'true' | |
| type: boolean | |
| use_claude_clustering: | |
| description: 'Use Claude for enhanced clustering (requires ANTHROPIC_API_KEY)' | |
| default: 'false' | |
| type: boolean | |
| debug: | |
| description: 'Enable verbose debug logging' | |
| default: 'true' | |
| type: boolean | |
| env: | |
| # Use fork for testing, main repo in production | |
| GDC_NAS_REPO: ${{ github.repository_owner == 'gooddata' && 'gooddata/gdc-nas' || 'tychtjan/gdc-nas' }} | |
| # File in this repo that stores the last analyzed gdc-nas commit SHA | |
| STATE_FILE: '.github/gdc-nas-last-analyzed.txt' | |
| jobs: | |
| analyze: | |
| name: Analyze gdc-nas changes | |
| # Use self-hosted runners in production, GitHub-hosted for forks/testing | |
| runs-on: ${{ github.repository == 'gooddata/gooddata-python-sdk' && 'infra1-runners-arc' || 'ubuntu-latest' }} | |
| permissions: | |
| contents: write | |
| actions: write | |
| env: | |
| # Use PAT for gdc-nas access, falls back to GITHUB_TOKEN | |
| GH_TOKEN: ${{ secrets.GDC_NAS_READ_TOKEN || secrets.GITHUB_TOKEN }} | |
| steps: | |
| - name: Debug - Environment info | |
| run: | | |
| echo "=== Environment ===" | |
| echo "Repository: ${{ github.repository }}" | |
| echo "Event: ${{ github.event_name }}" | |
| echo "Runner: ${{ runner.os }}" | |
| echo "GH CLI version: $(gh --version | head -1)" | |
| echo "" | |
| echo "=== Inputs ===" | |
| echo "commits_to_analyze: ${{ inputs.commits_to_analyze || '200' }}" | |
| echo "since_commit: ${{ inputs.since_commit || '(not set)' }}" | |
| - name: Verify gdc-nas access | |
| run: | | |
| echo "=== Testing API access to ${{ env.GDC_NAS_REPO }} ===" | |
| if gh api "repos/${{ env.GDC_NAS_REPO }}" --jq '.full_name' 2>/dev/null; then | |
| echo "✅ Access verified" | |
| else | |
| echo "❌ Cannot access ${{ env.GDC_NAS_REPO }}" | |
| echo "" | |
| echo "To fix: Add a Personal Access Token with 'repo' scope as secret GDC_NAS_READ_TOKEN" | |
| exit 1 | |
| fi | |
| - name: Checkout SDK repository | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - name: Setup Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.12' | |
| - name: Get current gdc-nas HEAD | |
| id: gdc_nas | |
| run: | | |
| HEAD_SHA=$(gh api "repos/${{ env.GDC_NAS_REPO }}/commits/HEAD" --jq '.sha') | |
| echo "head_sha=$HEAD_SHA" >> $GITHUB_OUTPUT | |
| echo "gdc-nas HEAD: $HEAD_SHA" | |
| - name: Determine analysis range | |
| id: range | |
| run: | | |
| COMMITS="${{ inputs.commits_to_analyze || '200' }}" | |
| SINCE_INPUT="${{ inputs.since_commit }}" | |
| echo "=== Determining range ===" | |
| if [ -n "$SINCE_INPUT" ]; then | |
| # User provided explicit since commit | |
| echo "Using user-provided since commit: $SINCE_INPUT" | |
| echo "since=$SINCE_INPUT" >> $GITHUB_OUTPUT | |
| echo "mode=manual" >> $GITHUB_OUTPUT | |
| elif [ -f "${{ env.STATE_FILE }}" ]; then | |
| # Read from state file | |
| LAST_SHA=$(cat "${{ env.STATE_FILE }}" | tr -d '[:space:]') | |
| echo "Found state file with SHA: $LAST_SHA" | |
| # Verify commit exists | |
| if gh api "repos/${{ env.GDC_NAS_REPO }}/commits/$LAST_SHA" --jq '.sha' >/dev/null 2>&1; then | |
| echo "Commit verified, using incremental mode" | |
| echo "since=$LAST_SHA" >> $GITHUB_OUTPUT | |
| echo "mode=incremental" >> $GITHUB_OUTPUT | |
| else | |
| echo "Stored commit not found, falling back to last $COMMITS commits" | |
| echo "since=" >> $GITHUB_OUTPUT | |
| echo "mode=fallback" >> $GITHUB_OUTPUT | |
| echo "commits=$COMMITS" >> $GITHUB_OUTPUT | |
| fi | |
| else | |
| echo "No state file, analyzing last $COMMITS commits" | |
| echo "since=" >> $GITHUB_OUTPUT | |
| echo "mode=initial" >> $GITHUB_OUTPUT | |
| echo "commits=$COMMITS" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Run API-based analyzer | |
| id: analyze | |
| run: | | |
| mkdir -p reports | |
| echo "=== Running Analyzer ===" | |
| echo "Mode: ${{ steps.range.outputs.mode }}" | |
| ARGS="--repo ${{ env.GDC_NAS_REPO }} --output-dir ./reports" | |
| if [ -n "${{ steps.range.outputs.since }}" ]; then | |
| ARGS="$ARGS --since ${{ steps.range.outputs.since }}" | |
| else | |
| ARGS="$ARGS --commits ${{ steps.range.outputs.commits || '200' }}" | |
| fi | |
| echo "Running: python3 scripts/gdc_nas_api_analyzer.py $ARGS" | |
| python3 scripts/gdc_nas_api_analyzer.py $ARGS 2>&1 | tee analyzer.log | |
| echo "" | |
| echo "=== Reports ===" | |
| ls -la reports/ | |
| # Count SDK-relevant reports (excluding summary) | |
| REPORT_COUNT=$(ls reports/*.md 2>/dev/null | grep -v "00-summary" | wc -l || echo "0") | |
| echo "sdk_reports=$REPORT_COUNT" >> $GITHUB_OUTPUT | |
| - name: Cluster similar reports | |
| id: cluster | |
| if: steps.analyze.outputs.sdk_reports > 1 && inputs.enable_clustering != 'false' | |
| env: | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| run: | | |
| echo "=== Clustering Reports ===" | |
| echo "Reports to cluster: ${{ steps.analyze.outputs.sdk_reports }}" | |
| mkdir -p clustered | |
| # Build clustering arguments | |
| CLUSTER_ARGS="--input-dir ./reports --output-dir ./clustered" | |
| # Check if Claude enhancement is requested and API key is available | |
| if [ "${{ inputs.use_claude_clustering }}" = "true" ] && [ -n "$ANTHROPIC_API_KEY" ]; then | |
| echo "Claude enhancement enabled" | |
| pip install anthropic --quiet | |
| CLUSTER_ARGS="$CLUSTER_ARGS --use-claude" | |
| elif [ "${{ inputs.use_claude_clustering }}" = "true" ]; then | |
| echo "Warning: Claude clustering requested but ANTHROPIC_API_KEY not set" | |
| echo "Falling back to heuristic clustering" | |
| fi | |
| echo "Running: python3 scripts/cluster_sdk_reports.py $CLUSTER_ARGS" | |
| python3 scripts/cluster_sdk_reports.py $CLUSTER_ARGS 2>&1 | tee clustering.log | |
| # Count clusters | |
| CLUSTER_COUNT=$(ls clustered/cluster-*.md 2>/dev/null | wc -l || echo "0") | |
| echo "clusters_created=$CLUSTER_COUNT" >> $GITHUB_OUTPUT | |
| if [ -f clustered/clusters.json ]; then | |
| echo "" | |
| echo "=== Cluster Summary ===" | |
| cat clustered/00-clusters.md | |
| fi | |
| - name: Sync report state | |
| id: state | |
| if: steps.analyze.outputs.sdk_reports > 0 | |
| run: | | |
| echo "=== Syncing Report State ===" | |
| # Sync state with new reports and clusters | |
| SYNC_ARGS="--reports-dir ./reports" | |
| if [ -d "./clustered" ]; then | |
| SYNC_ARGS="$SYNC_ARGS --clusters-dir ./clustered" | |
| fi | |
| python3 scripts/sdk_report_state.py sync $SYNC_ARGS | |
| # Show what needs attention | |
| echo "" | |
| python3 scripts/sdk_report_state.py needs-attention | |
| # Output counts for summary | |
| SUMMARY=$(python3 scripts/sdk_report_state.py summary 2>&1) | |
| NEW_REPORTS=$(echo "$SUMMARY" | grep -A10 "REPORTS:" | grep "new" | awk '{print $NF}' || echo "0") | |
| NEW_CLUSTERS=$(echo "$SUMMARY" | grep -A10 "CLUSTERS:" | grep "new" | awk '{print $NF}' || echo "0") | |
| echo "new_reports=$NEW_REPORTS" >> $GITHUB_OUTPUT | |
| echo "new_clusters=$NEW_CLUSTERS" >> $GITHUB_OUTPUT | |
| - name: Upload analysis reports | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: sdk-diff-reports-${{ github.run_number }} | |
| path: reports/ | |
| retention-days: 30 | |
| - name: Upload clustered reports | |
| if: steps.cluster.outputs.clusters_created > 0 | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: sdk-clustered-reports-${{ github.run_number }} | |
| path: clustered/ | |
| retention-days: 30 | |
| - name: Upload analyzer log | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: analyzer-log-${{ github.run_number }} | |
| path: | | |
| analyzer.log | |
| clustering.log | |
| retention-days: 7 | |
| if-no-files-found: ignore | |
| - name: Update state files | |
| run: | | |
| echo "=== Updating state ===" | |
| HEAD_SHA="${{ steps.gdc_nas.outputs.head_sha }}" | |
| # Update gdc-nas commit state | |
| echo "$HEAD_SHA" > "${{ env.STATE_FILE }}" | |
| echo "Stored gdc-nas SHA: $HEAD_SHA" | |
| # Commit and push state files | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| # Add both state files | |
| git add "${{ env.STATE_FILE }}" | |
| git add ".github/sdk-report-state.json" 2>/dev/null || true | |
| if git diff --staged --quiet; then | |
| echo "No changes to state files" | |
| else | |
| git commit -m "chore: update analyzer state to ${HEAD_SHA:0:12} | |
| gdc-nas HEAD: ${HEAD_SHA} | |
| New reports: ${{ steps.state.outputs.new_reports || '0' }} | |
| New clusters: ${{ steps.state.outputs.new_clusters || '0' }}" | |
| git push | |
| echo "State files updated and pushed" | |
| fi | |
| - name: Job summary | |
| if: always() | |
| run: | | |
| cat >> $GITHUB_STEP_SUMMARY << EOF | |
| ## SDK Diff Analyzer Results | |
| | Parameter | Value | | |
| |-----------|-------| | |
| | Mode | ${{ steps.range.outputs.mode }} | | |
| | Since | \`${{ steps.range.outputs.since || 'N/A' }}\` | | |
| | gdc-nas HEAD | \`${{ steps.gdc_nas.outputs.head_sha }}\` | | |
| | SDK-relevant commits | ${{ steps.analyze.outputs.sdk_reports }} | | |
| | Clusters created | ${{ steps.cluster.outputs.clusters_created || '0' }} | | |
| | **New reports** | **${{ steps.state.outputs.new_reports || '0' }}** | | |
| | **New clusters** | **${{ steps.state.outputs.new_clusters || '0' }}** | | |
| EOF | |
| # Show items needing attention | |
| if [ -f ".github/sdk-report-state.json" ]; then | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "### 🔔 Items Needing Attention" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo '```' >> $GITHUB_STEP_SUMMARY | |
| python3 scripts/sdk_report_state.py needs-attention 2>&1 >> $GITHUB_STEP_SUMMARY || echo "State check failed" | |
| echo '```' >> $GITHUB_STEP_SUMMARY | |
| fi | |
| if [ -f reports/00-summary.md ]; then | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "### Report Summary" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| cat reports/00-summary.md >> $GITHUB_STEP_SUMMARY | |
| fi | |
| if [ -f clustered/00-clusters.md ]; then | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "### Clustering Summary" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| cat clustered/00-clusters.md >> $GITHUB_STEP_SUMMARY | |
| fi |