Skip to content

Fix curated resources search #860

Fix curated resources search

Fix curated resources search #860

name: Staging Aggregate Deployment
# =======================
# Staging Deployment Workflow
# =======================
# Purpose: Aggregates and deploys staging changes to production
# Triggers: PRs to master, monthly schedule, or manual dispatch
# Features: Multi-PR aggregation, staging deployment, and force deploy option
on:
pull_request:
branches:
- master
types: [opened, synchronize, reopened, ready_for_review]
schedule:
- cron: '0 1 1 * *' # 1 AM UTC on the 1st of each month
workflow_dispatch:
inputs:
force_deploy:
description: 'Force deployment even if no PRs found'
required: false
type: boolean
default: false
single_pr:
description: 'Deploy a single PR only (enter PR number, e.g. 123)'
required: false
type: string
default: ''
create_summary:
description: 'Create deployment summary (and monthly report if 1st of month)'
required: false
type: boolean
default: false
# Workflow-level concurrency to queue staging builds (wait instead of cancel)
concurrency:
group: staging-aggregate
cancel-in-progress: false
jobs:
aggregate-prs:
name: Aggregate PRs for Staging
runs-on: ubuntu-22.04
timeout-minutes: 10 # Prevent blocking the queue
# Skip draft PRs entirely - they should not trigger staging builds
if: github.event.pull_request.draft != true
permissions:
contents: write
pull-requests: read
outputs:
aggregated-branch: ${{ steps.aggregate.outputs.branch }}
included-prs: ${{ steps.aggregate.outputs.included_prs }}
attempted-prs: ${{ steps.aggregate.outputs.attempted_prs }}
total-prs: ${{ steps.aggregate.outputs.total_prs }}
has-prs: ${{ steps.aggregate.outputs.has_prs }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.STAGING_GITHUB_TOKEN }}
- name: Configure Git
run: |
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git config --global user.name "github-actions[bot]"
- name: Aggregate PRs
id: aggregate
run: |
# Debug: Show trigger information
echo "🔍 Workflow triggered by: ${{ github.event_name }}"
if [ "${{ github.event_name }}" = "pull_request" ]; then
echo "🔍 Triggering PR: #${{ github.event.pull_request.number }}"
fi
# Check if single PR mode is enabled
SINGLE_PR="${{ github.event.inputs.single_pr }}"
if [ -n "$SINGLE_PR" ] && [ "$SINGLE_PR" != "" ]; then
echo "🎯 Single PR mode: deploying only PR #$SINGLE_PR"
# Validate PR exists and is open
if ! gh pr view "$SINGLE_PR" --json state --jq '.state' | grep -q "OPEN"; then
echo "❌ PR #$SINGLE_PR is not open or doesn't exist"
echo "branch=master" >> $GITHUB_OUTPUT
echo "included_prs=" >> $GITHUB_OUTPUT
echo "attempted_prs=$SINGLE_PR" >> $GITHUB_OUTPUT
echo "total_prs=0" >> $GITHUB_OUTPUT
echo "has_prs=false" >> $GITHUB_OUTPUT
exit 1
fi
PRS="$SINGLE_PR"
echo "has_prs=true" >> $GITHUB_OUTPUT
echo "total_prs=1" >> $GITHUB_OUTPUT
else
# Get all open, non-draft PRs targeting master (regardless of CI status)
PRS=$(gh pr list --base master --state open --json number,title,headRefName,isDraft --jq '.[] | select(.isDraft == false) | .number')
if [ -z "$PRS" ]; then
echo "No open non-draft PRs found"
echo "branch=master" >> $GITHUB_OUTPUT
echo "included_prs=" >> $GITHUB_OUTPUT
echo "attempted_prs=" >> $GITHUB_OUTPUT
echo "total_prs=0" >> $GITHUB_OUTPUT
echo "has_prs=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "Found PRs: $PRS"
echo "total_prs=$(echo "$PRS" | wc -l)" >> $GITHUB_OUTPUT
echo "has_prs=true" >> $GITHUB_OUTPUT
fi
# Create a new branch for aggregation starting from master
AGGREGATE_BRANCH="staging-aggregate-$(date +%Y%m%d-%H%M%S)"
git checkout master
git checkout -b "$AGGREGATE_BRANCH"
INCLUDED_PRS=""
ATTEMPTED_PRS=""
# Try to merge each PR in order
for pr in $PRS; do
echo "Attempting to merge PR #$pr"
# Track all attempted PRs (including current PR)
if [ -z "$ATTEMPTED_PRS" ]; then
ATTEMPTED_PRS="$pr"
else
ATTEMPTED_PRS="$ATTEMPTED_PRS,$pr"
fi
# Note: We no longer skip the triggering PR - all PRs should be included
# The "self-reference" issue was preventing the latest PR from being included
# Get PR details
PR_DATA=$(gh pr view $pr --json headRefName,headRepositoryOwner,title)
HEAD_REF=$(echo "$PR_DATA" | jq -r '.headRefName')
HEAD_OWNER=$(echo "$PR_DATA" | jq -r '.headRepositoryOwner.login')
TITLE=$(echo "$PR_DATA" | jq -r '.title')
echo "Processing PR #$pr: $TITLE"
# Add remote if it's a fork
if [ "$HEAD_OWNER" != "forrtproject" ]; then
git remote add "pr-$pr" "https://github.com/$HEAD_OWNER/forrtproject.github.io.git"
git fetch "pr-$pr" "$HEAD_REF"
MERGE_REF="pr-$pr/$HEAD_REF"
else
git fetch origin "$HEAD_REF"
MERGE_REF="origin/$HEAD_REF"
fi
# Try to merge into our aggregate branch
if git merge "$MERGE_REF" --no-edit; then
echo "✅ Successfully merged PR #$pr"
if [ -z "$INCLUDED_PRS" ]; then
INCLUDED_PRS="$pr"
else
INCLUDED_PRS="$INCLUDED_PRS,$pr"
fi
else
echo "⚠️ Merge conflict with PR #$pr - skipping and continuing with next PR"
# Only abort if there's an active merge
if [ -f .git/MERGE_HEAD ]; then
git merge --abort
fi
# Continue with next PR instead of stopping
fi
done
# Push the aggregate branch
git push origin "$AGGREGATE_BRANCH"
# Clean up old staging branches, keeping only the 2 most recent
echo "🧹 Cleaning up old staging branches..."
# Get all staging-aggregate branches sorted by name (which includes timestamp YYYYMMDD-HHMMSS)
# Lexicographic sorting works correctly because the timestamp format naturally sorts chronologically
OLD_BRANCHES=$(git ls-remote --heads origin 'refs/heads/staging-aggregate-*' | \
awk '{print $2}' | \
sed 's|refs/heads/||' | \
sort -r | \
tail -n +3)
if [ -n "$OLD_BRANCHES" ]; then
echo "Found $(echo "$OLD_BRANCHES" | wc -l) old staging branches to delete"
for branch in $OLD_BRANCHES; do
echo " Deleting: $branch"
git push origin --delete "$branch" || echo " ⚠️ Failed to delete $branch (may already be deleted)"
done
echo "✅ Cleanup completed"
else
echo "No old staging branches to delete (keeping 2 most recent)"
fi
echo "branch=$AGGREGATE_BRANCH" >> $GITHUB_OUTPUT
echo "included_prs=$INCLUDED_PRS" >> $GITHUB_OUTPUT
echo "attempted_prs=$ATTEMPTED_PRS" >> $GITHUB_OUTPUT
echo "✅ Aggregation completed"
echo "📊 Summary:"
echo " - Mode: $( [ -n \"$SINGLE_PR\" ] && echo 'Single PR' || echo 'Aggregate' )"
echo " - Total PRs found: $(echo "$PRS" | wc -l)"
echo " - PRs attempted: $ATTEMPTED_PRS"
echo " - PRs successfully merged: $INCLUDED_PRS"
echo " - Workflow trigger: ${{ github.event_name }}"
if [ "${{ github.event_name }}" = "pull_request" ]; then
echo " - Triggering PR: #${{ github.event.pull_request.number }}"
fi
env:
GH_TOKEN: ${{ secrets.STAGING_GITHUB_TOKEN }}
build:
name: Build
runs-on: ubuntu-22.04
timeout-minutes: 20 # Prevent blocking the queue
needs: [aggregate-prs]
if: always() && (needs.aggregate-prs.outputs.has-prs == 'true' || github.event.inputs.force_deploy == 'true')
permissions:
contents: read
actions: read # Needed for artifact access
env:
HUGO_VERSION: "0.158.0"
HUGO_EXTENDED: true
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ needs.aggregate-prs.outputs.aggregated-branch || 'master' }}
- name: Configure Git
run: |
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git config --global user.name "github-actions[bot]"
# =======================
# Data Artifact Retrieval
# =======================
- name: Try to download data artifact
id: download-artifact
uses: dawidd6/action-download-artifact@v6
continue-on-error: true
with:
workflow: data-processing.yml
name: data-artifact
path: .
github_token: ${{ secrets.GITHUB_TOKEN }}
search_artifacts: true
if_no_artifact_found: warn
# =======================
# Data Processing (Fallback)
# =======================
- name: Run data processing if artifact missing
id: data-processing
if: steps.download-artifact.outcome == 'failure'
env:
GITHUB_TOKEN: ${{ secrets.FORRT_PAT }}
run: |
set -e
# If this is a pull request, we can't easily trigger the external workflow and wait for it
# in the same way (or maybe we can, but let's stick to the plan).
# Actually, for PRs, we might want to just warn and proceed if it's not critical,
# OR trigger it if we really need the data.
# The original logic skipped it for PRs.
if [ "${{ github.event_name }}" = "pull_request" ]; then
echo "::warning::Data artifact missing in PR. Skipping trigger."
echo "data_processing_triggered=false" >> $GITHUB_OUTPUT
exit 0
fi
# Determine branch ref to dispatch (must be a branch or tag)
REF="${GITHUB_REF#refs/heads/}"
if [ -z "$REF" ]; then
REF="master"
fi
echo "🚀 Dispatching data-processing workflow for ref: $REF because artifact was missing"
curl -s -X POST \
-H "Authorization: Bearer $GITHUB_TOKEN" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/${{ github.repository }}/actions/workflows/data-processing.yml/dispatches" \
-d "{\"ref\":\"$REF\", \"inputs\": {\"skip_deploy\": \"true\"}}" \
|| { echo "::error::Failed to dispatch data-processing workflow"; exit 1; }
echo "data_processing_triggered=true" >> $GITHUB_OUTPUT
echo "⏳ Waiting for data-processing workflow run to start and complete..."
attempts=0
max_attempts=90
interval=10
# Wait a bit for the run to be created
sleep 10
while [ $attempts -lt $max_attempts ]; do
# Get the latest run triggered by workflow_dispatch
runs_json=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" -H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/${{ github.repository }}/actions/workflows/data-processing.yml/runs?event=workflow_dispatch&branch=$REF&per_page=1")
run_id=$(echo "$runs_json" | jq -r '.workflow_runs[0].id')
status=$(echo "$runs_json" | jq -r '.workflow_runs[0].status')
conclusion=$(echo "$runs_json" | jq -r '.workflow_runs[0].conclusion')
if [ -n "$run_id" ] && [ "$run_id" != "null" ]; then
echo "Tracking data-processing run: $run_id (Status: $status)"
if [ "$status" = "completed" ]; then
if [ "$conclusion" = "success" ]; then
echo "✅ data-processing workflow completed successfully"
echo "run_id=$run_id" >> $GITHUB_OUTPUT
exit 0
else
echo "::error::data-processing workflow completed with conclusion: $conclusion"
exit 1
fi
fi
fi
attempts=$((attempts + 1))
sleep $interval
done
echo "::error::Timed out waiting for data-processing workflow"
exit 1
# =======================
# Data Artifact Retrieval (Retry)
# =======================
- name: Download data artifact (Retry)
id: download-artifact-retry
if: steps.download-artifact.outcome == 'failure' && steps.data-processing.outputs.data_processing_triggered == 'true'
uses: dawidd6/action-download-artifact@v6
with:
workflow: data-processing.yml
name: data-artifact
path: .
github_token: ${{ secrets.GITHUB_TOKEN }}
run_id: ${{ steps.data-processing.outputs.run_id }}
- name: Run data processing if needed
if: steps.download-artifact.outcome == 'failure' && steps.data-processing.outputs.data_processing_triggered != 'true'
env:
PYTHON_VERSION: "3.11"
GA_API_CREDENTIALS: ${{ secrets.GA_API_CREDENTIALS }}
GA_PROPERTY_ID: ${{ secrets.GA_PROPERTY_ID }}
run: |
# Install Python dependencies
python3 -m pip install -r ./requirements.txt
# Generate data files
python3 scripts/forrt_contribs/tenzing.py
python3 content/resources/resource.py
mv scripts/forrt_contribs/tenzing.md content/contributors/tenzing.md
# Download GA data if possible
python scripts/download_ga_data.py
# Quick validation of GA data structure
if [ -f "data/ga_data.json" ]; then
python3 -c "import json; data = json.load(open('data/ga_data.json')); print('✅ GA data:', len(data.get('regions', [])), 'countries,', len(data.get('top_pages', [])), 'pages')"
fi
- name: Setup Hugo
uses: peaceiris/actions-hugo@75d2e84710de30f6ff7268e08f310b60ef14033f
with:
hugo-version: ${{ env.HUGO_VERSION }}
extended: ${{ env.HUGO_EXTENDED }}
- name: Build site
run: |
hugo --gc --minify --cleanDestinationDir --destination public --baseURL https://staging.forrt.org
env:
HUGO_ENV: staging
- name: Upload site artifact
uses: actions/upload-artifact@v4
with:
name: forrt-website-aggregate-${{ github.run_number }}
path: public/
retention-days: 1
deploy-staging:
name: Deploy - Staging
runs-on: ubuntu-22.04
timeout-minutes: 10 # Prevent blocking the queue
concurrency:
group: staging
permissions:
contents: write
pull-requests: write
issues: write
needs: [build, aggregate-prs]
# Run on successful build OR on schedule (for monthly reports even when no PRs)
if: always() && (needs.build.result == 'success' || github.event_name == 'schedule')
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
token: ${{ secrets.STAGING_GITHUB_TOKEN }}
- name: Download Artifact - Website
if: needs.build.result == 'success'
uses: actions/download-artifact@v4
with:
name: forrt-website-aggregate-${{ github.run_number }}
path: ${{ github.repository }}/forrt-website
- name: Create deployment summary
if: github.event_name != 'workflow_dispatch' || github.event.inputs.create_summary == 'true'
run: |
echo "## 🚀 Staging Deployment Summary" > deployment-summary.md
echo "" >> deployment-summary.md
echo "**Deployment Time:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> deployment-summary.md
echo "" >> deployment-summary.md
echo "**Aggregated Branch:** ${{ needs.aggregate-prs.outputs.aggregated-branch }}" >> deployment-summary.md
echo "" >> deployment-summary.md
echo "**Total PRs Found:** ${{ needs.aggregate-prs.outputs.total-prs }}" >> deployment-summary.md
echo "**PRs Attempted:** ${{ needs.aggregate-prs.outputs.attempted-prs }}" >> deployment-summary.md
echo "" >> deployment-summary.md
if [ "${{ needs.aggregate-prs.outputs.included-prs }}" != "" ]; then
echo "### ✅ PRs Successfully Merged:" >> deployment-summary.md
IFS=',' read -ra PR_ARRAY <<< "${{ needs.aggregate-prs.outputs.included-prs }}"
for pr in "${PR_ARRAY[@]}"; do
echo "- PR #$pr" >> deployment-summary.md
done
echo "" >> deployment-summary.md
fi
# Show PRs that were attempted but not successfully merged
if [ "${{ needs.aggregate-prs.outputs.attempted-prs }}" != "" ] && [ "${{ needs.aggregate-prs.outputs.included-prs }}" != "" ]; then
echo "### ⚠️ PRs Attempted but Skipped (Conflicts):" >> deployment-summary.md
IFS=',' read -ra ATTEMPTED_ARRAY <<< "${{ needs.aggregate-prs.outputs.attempted-prs }}"
IFS=',' read -ra SUCCESSFUL_ARRAY <<< "${{ needs.aggregate-prs.outputs.included-prs }}"
for attempted_pr in "${ATTEMPTED_ARRAY[@]}"; do
SKIPPED=true
for successful_pr in "${SUCCESSFUL_ARRAY[@]}"; do
if [ "$attempted_pr" = "$successful_pr" ]; then
SKIPPED=false
break
fi
done
if [ "$SKIPPED" = true ]; then
echo "- PR #$attempted_pr (merge conflicts)" >> deployment-summary.md
fi
done
echo "" >> deployment-summary.md
fi
echo "**Staging URL:** https://staging.forrt.org" >> deployment-summary.md
echo "" >> deployment-summary.md
echo "### Aggregation Strategy:" >> deployment-summary.md
echo "- ✅ All open PRs attempted" >> deployment-summary.md
echo "- ✅ Conflicting PRs skipped" >> deployment-summary.md
echo "- ✅ Shows combined state of all compatible PRs" >> deployment-summary.md
- name: Deploy - GitHub Pages
if: needs.build.result == 'success'
uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e
with:
personal_token: ${{ secrets.STAGING_GITHUB_TOKEN }}
publish_dir: ${{ github.repository }}/forrt-website
external_repository: forrtproject/webpage-staging
publish_branch: staging
cname: staging.forrt.org
- name: Comment on PRs
if: needs.aggregate-prs.outputs.attempted-prs != ''
run: |
DEPLOYMENT_TIME=$(date -u '+%Y-%m-%d %H:%M:%S UTC')
IFS=',' read -ra PR_ARRAY <<< "${{ needs.aggregate-prs.outputs.attempted-prs }}"
for pr in "${PR_ARRAY[@]}"; do
# Check if this PR was successfully merged
SUCCESSFUL_MERGE=false
if [ "${{ needs.aggregate-prs.outputs.included-prs }}" != "" ]; then
IFS=',' read -ra SUCCESSFUL_ARRAY <<< "${{ needs.aggregate-prs.outputs.included-prs }}"
for successful_pr in "${SUCCESSFUL_ARRAY[@]}"; do
if [ "$successful_pr" = "$pr" ]; then
SUCCESSFUL_MERGE=true
break
fi
done
fi
# Create the comment body with a unique identifier
COMMENT_IDENTIFIER="<!-- staging-deployment-comment -->"
if [ "$SUCCESSFUL_MERGE" = true ]; then
COMMENT_BODY="${COMMENT_IDENTIFIER}
✅ **Staging Deployment Status**
This PR has been successfully deployed to staging as part of an aggregated deployment.
**Deployed at:** ${DEPLOYMENT_TIME}
**Staging URL:** https://staging.forrt.org
The staging site shows the combined state of all compatible open PRs."
else
COMMENT_BODY="${COMMENT_IDENTIFIER}
⚠️ **Staging Deployment Status**
This PR was attempted for staging deployment but had merge conflicts and was skipped.
**Attempted at:** ${DEPLOYMENT_TIME}
**Staging URL:** https://staging.forrt.org
Please resolve conflicts with the base branch and the deployment will be retried automatically."
fi
# Find existing staging deployment comment (search all comments)
# Note: Don't filter by user since STAGING_GITHUB_TOKEN is a PAT
EXISTING_COMMENT_ID=$(gh api "repos/${{ github.repository }}/issues/${pr}/comments" \
--paginate \
--jq '.[] | select(.body | contains("<!-- staging-deployment-comment -->")) | .id' 2>/dev/null | head -1)
if [ -n "$EXISTING_COMMENT_ID" ]; then
echo "💬 Updating existing comment on PR #$pr (comment ID: $EXISTING_COMMENT_ID)"
# Try to update the comment, fall back to creating a new one if update fails
if ! UPDATE_OUTPUT=$(gh api "repos/${{ github.repository }}/issues/comments/${EXISTING_COMMENT_ID}" \
-X PATCH \
-f body="$COMMENT_BODY" 2>&1); then
echo "⚠️ Failed to update comment: $UPDATE_OUTPUT"
echo "⚠️ Creating a new comment instead"
gh pr comment $pr --body "$COMMENT_BODY"
fi
else
echo "💬 Creating new comment on PR #$pr"
gh pr comment $pr --body "$COMMENT_BODY"
fi
done
env:
GH_TOKEN: ${{ secrets.STAGING_GITHUB_TOKEN }}
- name: Create monthly deployment report
if: github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.create_summary == 'true')
run: |
# Only create monthly report on scheduled runs (1st of month)
CURRENT_DAY=$(date +%d)
echo "🔍 Current day: $CURRENT_DAY, Event: ${{ github.event_name }}"
if [ "$CURRENT_DAY" = "01" ]; then
MONTH=$(date +"%B %Y")
echo "📊 Creating monthly deployment report for $MONTH"
# Check if deployment summary file exists
if [ -f "deployment-summary.md" ]; then
gh issue create \
--title "📊 Monthly Staging Deployment Report - $MONTH" \
--body-file deployment-summary.md \
--label "deployment,staging,monthly-report"
echo "✅ Monthly deployment report created for $MONTH"
else
echo "⚠️ deployment-summary.md not found, creating basic report"
gh issue create \
--title "📊 Monthly Staging Deployment Report - $MONTH" \
--body "Monthly staging deployment report for $MONTH. No PRs were processed this month." \
--label "deployment,staging,monthly-report"
echo "✅ Basic monthly deployment report created for $MONTH"
fi
else
echo "ℹ️ Skipping issue creation (not 1st of month, current day: $CURRENT_DAY)"
fi
env:
GH_TOKEN: ${{ secrets.STAGING_GITHUB_TOKEN }}