From 05c00a007ab547c30250b2b5ee0f400d3cfd54b4 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Fri, 22 May 2026 12:11:06 -0700 Subject: [PATCH 1/9] Validate all labels assigned to failures --- .../scripts/validate_results.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py index b2c8d59aba75ba..d9e91fca6b2a21 100644 --- a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py +++ b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py @@ -18,6 +18,7 @@ import urllib.parse import urllib.request +LABELS_API_ENDPOINT = "https://api.github.com/repos/dotnet/runtime/labels" def check(name, passed, detail="", warn_only=False): if not passed and warn_only: @@ -574,6 +575,45 @@ def _build_sig(error_message, stack_trace): if not ok: failures += 1 + # 16h. Every failure's suggested labels are valid labels on the dotnet/runtime repo + # Otherwise, update_github will fail to file the issue + + all_label_names = set() + # TODO: If dotnet/runtime ever has more than 999 labels, increase this page limit. At time of script authoring + # we have around 300 so this is more than enough. + for page_index in range(10): + req = urllib.request.Request(f"{LABELS_API_ENDPOINT}?per_page=100&page={page_index}", headers={ + "Accept": "application/vnd.github+json", + "User-Agent": "ci-pipeline-monitor-validator", + }) + with urllib.request.urlopen(req, timeout=15) as resp: + data = json.loads(resp.read()) + page_label_names = set([item["name"] for item in data]) + # Don't bother querying additional empty pages after this one. + if len(page_label_names) <= 0: + break + + for name in page_label_names: + all_label_names.add(name) + + all_failure_rows = conn.execute(""" + SELECT id, labels FROM failures + """).fetchall() + bad_labels = [] + for failure_row in all_failure_rows: + failure_labels = failure_row["labels"].split(',') + for failure_label in failure_labels: + failure_label = failure_label.strip() + if not (failure_label in all_label_names): + print(f" [FAIL] Invalid label '{failure_label}' for failure {failure_row['id']}") + bad_labels.append(failure_row) + + ok = check("All failure labels are valid labels from dotnet/runtime repo", + len(bad_labels) == 0, + f"{len(bad_labels)} label(s) were invalid (see above)" if len(bad_labels) else "") + if not ok: + failures += 1 + # Report checks (only if --report provided) if args.report: print("\n=== Report Sanity ===") From c7628d7c400a39ac740bcf536bdbbee96d5ee851 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Fri, 22 May 2026 12:11:18 -0700 Subject: [PATCH 2/9] Always apply the blocking-clean-ci-optional label to new issues --- .github/skills/ci-pipeline-monitor/scripts/update_github.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/skills/ci-pipeline-monitor/scripts/update_github.py b/.github/skills/ci-pipeline-monitor/scripts/update_github.py index a18e96e1c6f2ea..60a73e419ca66c 100644 --- a/.github/skills/ci-pipeline-monitor/scripts/update_github.py +++ b/.github/skills/ci-pipeline-monitor/scripts/update_github.py @@ -85,6 +85,8 @@ def _one_failure(self, fail, go): else: print("GitHub Issue: NEW — creating new issue") gh_issue_command.append("create") + gh_issue_command.append("--label") + gh_issue_command.append("blocking-clean-ci-optional") creating_new_issue = True if fail["labels"]: From 574591799d3292ceb01f5b41a2fc843ab12586ba Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Fri, 22 May 2026 12:15:10 -0700 Subject: [PATCH 3/9] Make label validation case insensitive --- .../skills/ci-pipeline-monitor/scripts/validate_results.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py index d9e91fca6b2a21..111df903eb8b2c 100644 --- a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py +++ b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py @@ -594,7 +594,7 @@ def _build_sig(error_message, stack_trace): break for name in page_label_names: - all_label_names.add(name) + all_label_names.add(name.lower()) all_failure_rows = conn.execute(""" SELECT id, labels FROM failures @@ -603,7 +603,7 @@ def _build_sig(error_message, stack_trace): for failure_row in all_failure_rows: failure_labels = failure_row["labels"].split(',') for failure_label in failure_labels: - failure_label = failure_label.strip() + failure_label = failure_label.strip().lower() if not (failure_label in all_label_names): print(f" [FAIL] Invalid label '{failure_label}' for failure {failure_row['id']}") bad_labels.append(failure_row) From 958504a2cf8b2fcc4ea98b1f865db1a882d88d19 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Fri, 22 May 2026 12:22:46 -0700 Subject: [PATCH 4/9] Print list of all valid labels if there is a label error, for the agent to use for fixes Address copilot feedback --- .../ci-pipeline-monitor/scripts/validate_results.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py index 111df903eb8b2c..c5dd1c0fc6b75c 100644 --- a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py +++ b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py @@ -578,10 +578,11 @@ def _build_sig(error_message, stack_trace): # 16h. Every failure's suggested labels are valid labels on the dotnet/runtime repo # Otherwise, update_github will fail to file the issue + total += 1 all_label_names = set() # TODO: If dotnet/runtime ever has more than 999 labels, increase this page limit. At time of script authoring # we have around 300 so this is more than enough. - for page_index in range(10): + for page_index in range(1, 10): req = urllib.request.Request(f"{LABELS_API_ENDPOINT}?per_page=100&page={page_index}", headers={ "Accept": "application/vnd.github+json", "User-Agent": "ci-pipeline-monitor-validator", @@ -601,9 +602,12 @@ def _build_sig(error_message, stack_trace): """).fetchall() bad_labels = [] for failure_row in all_failure_rows: - failure_labels = failure_row["labels"].split(',') + raw_labels = failure_row["labels"] + failure_labels = raw_labels.split(',') if raw_labels else [] for failure_label in failure_labels: failure_label = failure_label.strip().lower() + if not failure_label: + continue if not (failure_label in all_label_names): print(f" [FAIL] Invalid label '{failure_label}' for failure {failure_row['id']}") bad_labels.append(failure_row) @@ -612,6 +616,8 @@ def _build_sig(error_message, stack_trace): len(bad_labels) == 0, f"{len(bad_labels)} label(s) were invalid (see above)" if len(bad_labels) else "") if not ok: + print("Full set of valid labels follows:") + print(", ".join(sorted(all_label_names))) failures += 1 # Report checks (only if --report provided) From 51ba238e3080723ba1433f4549d392a2b5cd1dd8 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Fri, 22 May 2026 12:46:18 -0700 Subject: [PATCH 5/9] Maintain a cache of known good labels so that repeated validation runs will succeed even if we hit a rate limit, as long as one validation run has succeeded --- .github/skills/ci-pipeline-monitor/.gitignore | 1 + .../scripts/validate_results.py | 41 ++++++++++++------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/.github/skills/ci-pipeline-monitor/.gitignore b/.github/skills/ci-pipeline-monitor/.gitignore index ff595e326d8e23..336d4ad83fb8af 100644 --- a/.github/skills/ci-pipeline-monitor/.gitignore +++ b/.github/skills/ci-pipeline-monitor/.gitignore @@ -13,3 +13,4 @@ temp/ # Intermediate JSON output files (piped between scripts) failing_builds.json failed_tests.json +cached_labels.json diff --git a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py index c5dd1c0fc6b75c..e3c957dc1f127c 100644 --- a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py +++ b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py @@ -19,6 +19,7 @@ import urllib.request LABELS_API_ENDPOINT = "https://api.github.com/repos/dotnet/runtime/labels" +LABELS_CACHE_FILE = os.path.join(__file__, "..", "cached_labels.json") def check(name, passed, detail="", warn_only=False): if not passed and warn_only: @@ -580,22 +581,32 @@ def _build_sig(error_message, stack_trace): total += 1 all_label_names = set() - # TODO: If dotnet/runtime ever has more than 999 labels, increase this page limit. At time of script authoring - # we have around 300 so this is more than enough. - for page_index in range(1, 10): - req = urllib.request.Request(f"{LABELS_API_ENDPOINT}?per_page=100&page={page_index}", headers={ - "Accept": "application/vnd.github+json", - "User-Agent": "ci-pipeline-monitor-validator", - }) - with urllib.request.urlopen(req, timeout=15) as resp: - data = json.loads(resp.read()) - page_label_names = set([item["name"] for item in data]) - # Don't bother querying additional empty pages after this one. - if len(page_label_names) <= 0: - break + try: + # TODO: If dotnet/runtime ever has more than 999 labels, increase this page limit. At time of script authoring + # we have around 300 so this is more than enough. + for page_index in range(1, 10): + req = urllib.request.Request(f"{LABELS_API_ENDPOINT}?per_page=100&page={page_index}", headers={ + "Accept": "application/vnd.github+json", + "User-Agent": "ci-pipeline-monitor-validator", + }) + with urllib.request.urlopen(req, timeout=15) as resp: + data = json.loads(resp.read()) + page_label_names = set([item["name"] for item in data]) + # Don't bother querying additional empty pages after this one. + if len(page_label_names) <= 0: + break - for name in page_label_names: - all_label_names.add(name.lower()) + for name in page_label_names: + all_label_names.add(name.lower()) + + with open(LABELS_CACHE_FILE) as f: + f.write(json.dumps(sorted(all_label_names))) + except urllib.error.HTTPError: + print(" [WARN] Failed to fetch labels list from GitHub due to an HTTP error. Loading cached labels list.") + with open(LABELS_CACHE_FILE) as f: + labels_json = f.read() + labels_list = json.loads(labels_json) + all_label_names = set(labels_list) all_failure_rows = conn.execute(""" SELECT id, labels FROM failures From 3bca0d6a9afdd7cc164136e4530526c8928470f9 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Fri, 22 May 2026 12:50:00 -0700 Subject: [PATCH 6/9] Don't apply blocking-clean-ci-optional twice --- .github/skills/ci-pipeline-monitor/scripts/update_github.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/skills/ci-pipeline-monitor/scripts/update_github.py b/.github/skills/ci-pipeline-monitor/scripts/update_github.py index 60a73e419ca66c..71e0736a9545d6 100644 --- a/.github/skills/ci-pipeline-monitor/scripts/update_github.py +++ b/.github/skills/ci-pipeline-monitor/scripts/update_github.py @@ -93,7 +93,7 @@ def _one_failure(self, fail, go): if creating_new_issue: for label in fail["labels"].split(','): stripped_label = label.strip() - if stripped_label: + if stripped_label and (stripped_label != "blocking-clean-ci-optional"): gh_issue_command.append('--label') gh_issue_command.append(stripped_label) From 8d738ea167259acfd224b4f8462d8bd4a8e75f01 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Fri, 22 May 2026 13:00:50 -0700 Subject: [PATCH 7/9] Fix cache write --- .github/skills/ci-pipeline-monitor/scripts/validate_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py index e3c957dc1f127c..577b385db6ea8a 100644 --- a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py +++ b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py @@ -599,7 +599,7 @@ def _build_sig(error_message, stack_trace): for name in page_label_names: all_label_names.add(name.lower()) - with open(LABELS_CACHE_FILE) as f: + with open(LABELS_CACHE_FILE, "w") as f: f.write(json.dumps(sorted(all_label_names))) except urllib.error.HTTPError: print(" [WARN] Failed to fetch labels list from GitHub due to an HTTP error. Loading cached labels list.") From f0673429c2a96162a7baaa0fcf904f84879027c6 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Fri, 22 May 2026 13:02:37 -0700 Subject: [PATCH 8/9] Fix off-by-one --- .github/skills/ci-pipeline-monitor/scripts/validate_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py index 577b385db6ea8a..385de9691a3a0d 100644 --- a/.github/skills/ci-pipeline-monitor/scripts/validate_results.py +++ b/.github/skills/ci-pipeline-monitor/scripts/validate_results.py @@ -584,7 +584,7 @@ def _build_sig(error_message, stack_trace): try: # TODO: If dotnet/runtime ever has more than 999 labels, increase this page limit. At time of script authoring # we have around 300 so this is more than enough. - for page_index in range(1, 10): + for page_index in range(1, 11): req = urllib.request.Request(f"{LABELS_API_ENDPOINT}?per_page=100&page={page_index}", headers={ "Accept": "application/vnd.github+json", "User-Agent": "ci-pipeline-monitor-validator", From 72628c2c5464b48a5b469ec4c3a863307cf882e5 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Sat, 23 May 2026 17:04:40 -0700 Subject: [PATCH 9/9] Better approach to ensuring the blocking clean ci optional label is applied --- .../scripts/update_github.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/.github/skills/ci-pipeline-monitor/scripts/update_github.py b/.github/skills/ci-pipeline-monitor/scripts/update_github.py index 71e0736a9545d6..4538c42982cd1d 100644 --- a/.github/skills/ci-pipeline-monitor/scripts/update_github.py +++ b/.github/skills/ci-pipeline-monitor/scripts/update_github.py @@ -85,19 +85,16 @@ def _one_failure(self, fail, go): else: print("GitHub Issue: NEW — creating new issue") gh_issue_command.append("create") - gh_issue_command.append("--label") - gh_issue_command.append("blocking-clean-ci-optional") creating_new_issue = True - if fail["labels"]: - if creating_new_issue: - for label in fail["labels"].split(','): - stripped_label = label.strip() - if stripped_label and (stripped_label != "blocking-clean-ci-optional"): - gh_issue_command.append('--label') - gh_issue_command.append(stripped_label) - # Title / Labels / Milestone for issue filing + if fail["labels"] and creating_new_issue: + label_set = set([l.strip().lower() for l in fail["labels"].split(',')]) + label_set.add("blocking-clean-ci-optional") + for label in sorted(label_set): + gh_issue_command.append('--label') + gh_issue_command.append(label) + test_name = fail["test_name"] if creating_new_issue: gh_issue_command.append('--title')