From 6d95999b028910621c49a3b4be69f7efda3a16ca Mon Sep 17 00:00:00 2001 From: Pulkit Chauhan Date: Wed, 11 Mar 2026 20:03:33 +0530 Subject: [PATCH 1/2] added artifacts for ai --- install/ci-vm/ci-linux/ci/runCI | 68 ++++++++- mod_ci/controllers.py | 10 +- mod_test/controllers.py | 257 +++++++++++++++++++++++++++++++- utility.py | 7 +- 4 files changed, 327 insertions(+), 15 deletions(-) diff --git a/install/ci-vm/ci-linux/ci/runCI b/install/ci-vm/ci-linux/ci/runCI index 092d425e..d1d84765 100644 --- a/install/ci-vm/ci-linux/ci/runCI +++ b/install/ci-vm/ci-linux/ci/runCI @@ -7,6 +7,11 @@ DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) +# Enable coredump capture +ulimit -c unlimited +mkdir -p /tmp/coredumps +echo "/tmp/coredumps/core.%e.%p" | sudo tee /proc/sys/kernel/core_pattern > /dev/null + if [ ! -f "$DIR/variables" ]; then # No variable file defined sudo shutdown -h now @@ -123,10 +128,69 @@ if [ -e "${dstDir}/ccextractor" ]; then echo "=== CCExtractor Binary Version ===" >> "${logFile}" ./ccextractor --version >> "${logFile}" 2>&1 echo "=== End Version Info ===" >> "${logFile}" - postStatus "testing" "Running tests" + + + ccextractor_path="$(pwd)/ccextractor" + combined_stdout="/tmp/combined_stdout.log" + : > "${combined_stdout}" + + # Create a wrapper script that tees stdout/stderr to a combined log + wrapper_path="$(pwd)/ccextractor_wrapper" + cat > "${wrapper_path}" << 'WRAPPER_EOF' +#!/bin/bash +COMBINED_LOG="/tmp/combined_stdout.log" +REAL_BINARY="PLACEHOLDER_BINARY" +EXIT_CODE_FILE="/tmp/.wrapper_exit_code" +echo "=== TEST INVOCATION: $@ ===" >> "$COMBINED_LOG" +{ "$REAL_BINARY" "$@" 2>&1; echo $? > "$EXIT_CODE_FILE"; } | tee -a "$COMBINED_LOG" +exit_code=$(cat "$EXIT_CODE_FILE") +echo "=== EXIT CODE: ${exit_code} ===" >> "$COMBINED_LOG" +echo "" >> "$COMBINED_LOG" +exit $exit_code +WRAPPER_EOF + sed -i "s|PLACEHOLDER_BINARY|${ccextractor_path}|" "${wrapper_path}" + chmod +x "${wrapper_path}" + executeCommand cd ${suiteDstDir} - executeCommand ${tester} --debug --entries "${testFile}" --executable "ccextractor" --tempfolder "${tempFolder}" --timeout 600 --reportfolder "${reportFolder}" --resultfolder "${resultFolder}" --samplefolder "${sampleFolder}" --method Server --url "${reportURL}" + executeCommand ${tester} --debug --entries "${testFile}" --executable "${wrapper_path}" --tempfolder "${tempFolder}" --timeout 600 --reportfolder "${reportFolder}" --resultfolder "${resultFolder}" --samplefolder "${sampleFolder}" --method Server --url "${reportURL}" + + # Upload AI artifacts to GCS + gcs_bucket=$(curl -s "http://metadata/computeMetadata/v1/instance/attributes/bucket" -H "Metadata-Flavor: Google") + test_id=$(curl -s "http://metadata/computeMetadata/v1/instance/attributes/testID" -H "Metadata-Flavor: Google") + token=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" -H "Metadata-Flavor: Google" | python3 -c "import sys,json; print(json.load(sys.stdin)['access_token'])") + + upload_artifact() { + local file_path="$1" + local dest_path="$2" + if [ -f "$file_path" ]; then + local http_code + http_code=$(curl -s -X POST --data-binary @"$file_path" \ + -H "Authorization: Bearer $token" \ + -H "Content-Type: application/octet-stream" \ + -w "%{http_code}" \ + -o /dev/null \ + "https://storage.googleapis.com/upload/storage/v1/b/${gcs_bucket}/o?uploadType=media&name=${dest_path}") + if [ -z "$http_code" ] || [ "$http_code" -ne 200 ]; then + echo "GCS upload failed for ${dest_path}: HTTP ${http_code:-no_response}" >> "${logFile}" + fi + fi + } + + upload_artifact "$ccextractor_path" "test_artifacts/${test_id}/ccextractor" + + # Upload combined stdout log + upload_artifact "${combined_stdout}" "test_artifacts/${test_id}/combined_stdout.log" + + # Upload coredumps if any + for core_file in /tmp/coredumps/core.*; do + if [ -f "$core_file" ]; then + upload_artifact "$core_file" "test_artifacts/${test_id}/coredump" + break + fi + done + sendLogFile + upload_artifact "${logFile}" "test_artifacts/${test_id}/full_output.log" postStatus "completed" "Ran all tests" sudo shutdown -h now diff --git a/mod_ci/controllers.py b/mod_ci/controllers.py index d625103a..879d5aca 100755 --- a/mod_ci/controllers.py +++ b/mod_ci/controllers.py @@ -1193,7 +1193,8 @@ def create_instance(compute, project, zone, test, reportURL) -> Dict: metadata_items = [ {'key': 'startup-script', 'value': startup_script}, {'key': 'reportURL', 'value': reportURL}, - {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')} + {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')}, + {'key': 'testID', 'value': str(test.id)} ] elif test.platform == TestPlatform.windows: image_response = compute.images().getFromFamily(project=config.get('WINDOWS_INSTANCE_PROJECT_NAME', ''), @@ -1209,7 +1210,8 @@ def create_instance(compute, project, zone, test, reportURL) -> Dict: {'key': 'service_account', 'value': service_account}, {'key': 'rclone_conf', 'value': rclone_conf}, {'key': 'reportURL', 'value': reportURL}, - {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')} + {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')}, + {'key': 'testID', 'value': str(test.id)} ] source_disk_image = image_response['selfLink'] @@ -2625,7 +2627,7 @@ def upload_log_type_request(log, test_id, repo_folder, test, request) -> bool: uploaded_file.save(temp_path) final_path = os.path.join(repo_folder, 'LogFiles', f"{test.id}.txt") - os.rename(temp_path, final_path) + os.replace(temp_path, final_path) log.debug("Stored log file") return True @@ -2671,7 +2673,7 @@ def upload_type_request(log, test_id, repo_folder, test, request) -> bool: results_dir = os.path.join(repo_folder, 'TestResults') os.makedirs(results_dir, exist_ok=True) final_path = os.path.join(results_dir, f'{file_hash}{file_extension}') - os.rename(temp_path, final_path) + os.replace(temp_path, final_path) rto = RegressionTestOutput.query.filter( RegressionTestOutput.id == request.form['test_file_id']).first() result_file = TestResultFile(test.id, request.form['test_id'], rto.id, rto.correct, file_hash) diff --git a/mod_test/controllers.py b/mod_test/controllers.py index d5e91c9e..e910f8a0 100644 --- a/mod_test/controllers.py +++ b/mod_test/controllers.py @@ -367,7 +367,6 @@ def generate_diff(test_id: int, regression_test_id: int, output_id: int, to_view @mod_test.route('/log-files/') -@login_required def download_build_log_file(test_id): """ Serve download of build log. @@ -379,15 +378,17 @@ def download_build_log_file(test_id): :return: build log text file :rtype: Flask response """ - from run import config + from run import config, storage_client_bucket test = Test.query.filter(Test.id == test_id).first() + from flask import send_from_directory + if test is not None: file_name = f"{test_id}.txt" - log_file_path = os.path.join(config.get('SAMPLE_REPOSITORY', ''), 'LogFiles', file_name) - + log_dir = os.path.join(config.get('SAMPLE_REPOSITORY', ''), 'LogFiles') + log_file_path = os.path.join(log_dir, file_name) if os.path.isfile(log_file_path): - return serve_file_download(file_name, 'LogFiles') + return send_from_directory(log_dir, file_name, as_attachment=True) raise TestNotFoundException(f"Build log for Test {test_id} not found") @@ -442,3 +443,249 @@ def stop_test(test_id): g.db.commit() g.log.info(f"test with id: {test_id} stopped") return redirect(url_for('.by_id', test_id=test.id)) + + +def _artifact_redirect(test_id, blob_path, filename='artifact'): + """Generate a signed URL for a GCS artifact and redirect, or 404.""" + from datetime import timedelta + + from run import config, storage_client_bucket + + blob = storage_client_bucket.blob(blob_path) + if not blob.exists(): + abort(404) + url = blob.generate_signed_url( + version="v4", + expiration=timedelta(minutes=config.get('GCS_SIGNED_URL_EXPIRY_LIMIT', 30)), + method="GET", + response_disposition=f'attachment; filename="{filename}"' + ) + return redirect(url) + + +@mod_test.route('//binary') +def download_binary(test_id): + """Download the ccextractor binary used in a test (linux or windows).""" + from run import storage_client_bucket + # Try linux name first, then windows + for name in ['ccextractor', 'ccextractor.exe']: + blob_path = f'test_artifacts/{test_id}/{name}' + if storage_client_bucket.blob(blob_path).exists(): + return _artifact_redirect(test_id, blob_path, filename=name) + abort(404) + + +@mod_test.route('//coredump') +def download_coredump(test_id): + """Download the coredump from a test, if one was produced.""" + return _artifact_redirect( + test_id, + f'test_artifacts/{test_id}/coredump', + filename=f'coredump-{test_id}' + ) + + +@mod_test.route('//combined-stdout') +def download_combined_stdout(test_id): + """Download the combined stdout/stderr log from all test invocations.""" + return _artifact_redirect( + test_id, + f'test_artifacts/{test_id}/combined_stdout.log', + filename=f'combined_stdout-{test_id}.log' + ) + + +@mod_test.route('//regression///output-got') +def download_output_got(test_id, regression_test_id, output_id): + """Download the actual output file from TestResults using DB hash.""" + rf = TestResultFile.query.filter(and_( + TestResultFile.test_id == test_id, + TestResultFile.regression_test_id == regression_test_id, + TestResultFile.regression_test_output_id == output_id + )).first() + if rf is None or rf.got is None: + abort(404) + import os + ext = os.path.splitext(rf.regression_test_output.filename_correct)[1] + return _artifact_redirect( + test_id, + f'TestResults/{rf.got}{ext}', + filename=f'output_got_{regression_test_id}_{output_id}{ext}' + ) + + +@mod_test.route('//regression///output-expected') +def download_output_expected(test_id, regression_test_id, output_id): + """Download the expected output file from TestResults using DB hash.""" + rf = TestResultFile.query.filter(and_( + TestResultFile.test_id == test_id, + TestResultFile.regression_test_id == regression_test_id, + TestResultFile.regression_test_output_id == output_id + )).first() + if rf is None: + abort(404) + import os + ext = os.path.splitext(rf.regression_test_output.filename_correct)[1] + return _artifact_redirect( + test_id, + f'TestResults/{rf.expected}{ext}', + filename=f'output_expected_{regression_test_id}_{output_id}{ext}' + ) +@mod_test.route('//sample/') +def download_sample_ai(test_id, sample_id): + """Download the sample file for a regression test (no auth required for AI workflow).""" + from mod_sample.models import Sample + sample = Sample.query.filter(Sample.id == sample_id).first() + if sample is None: + abort(404) + return _artifact_redirect( + test_id, + f'TestFiles/{sample.filename}', + filename=sample.original_name + ) + + +@mod_test.route('//ai.json') +def ai_json_endpoint(test_id): + """Structured JSON with download URLs for all artifacts — for AI agents.""" + from run import storage_client_bucket + + test = Test.query.filter(Test.id == test_id).first() + if test is None: + return jsonify({'error': f'Test {test_id} not found'}), 404 + + def blob_exists(path): + return storage_client_bucket.blob(path).exists() + + has_binary = ( + blob_exists(f'test_artifacts/{test_id}/ccextractor') or + blob_exists(f'test_artifacts/{test_id}/ccextractor.exe') + ) + has_coredump = blob_exists(f'test_artifacts/{test_id}/coredump') + has_combined_stdout = blob_exists(f'test_artifacts/{test_id}/combined_stdout.log') + + results = get_test_results(test) + test_cases = [] + total = 0 + passed = 0 + failed = 0 + + for category in results: + for t_data in category['tests']: + total += 1 + rt = t_data['test'] + result = t_data['result'] + is_error = t_data.get('error', False) + result_files = t_data['files'] + + if is_error: + failed += 1 + else: + passed += 1 + + outputs = [] + for expected_output in rt.output_files: + if expected_output.ignore: + continue + + matched_rf = None + for rf in result_files: + if rf.test_id != -1 and rf.regression_test_output_id == expected_output.id: + matched_rf = rf + break + + got_url = None + diff_url = None + + if matched_rf and matched_rf.got is not None: + got_url = url_for( + '.download_output_got', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + _external=True + ) + diff_url = url_for( + '.generate_diff', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + to_view=0, + _external=True + ) + else: + # If test passed, got and expected match exactly. + got_url = url_for( + '.download_output_expected', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + _external=True + ) + + output_entry = { + 'output_id': expected_output.id, + 'correct_extension': expected_output.correct_extension, + 'expected_url': url_for( + '.download_output_expected', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + _external=True + ), + 'got_url': got_url, + 'diff_url': diff_url, + } + outputs.append(output_entry) + + test_cases.append({ + 'regression_test_id': rt.id, + 'category': category['category'].name, + 'sample_filename': rt.sample.original_name, + 'sample_url': url_for( + '.download_sample_ai', + test_id=test_id, + sample_id=rt.sample.id, + _external=True + ), + 'arguments': rt.command, + 'result': 'Fail' if is_error else 'Pass', + 'exit_code': result.exit_code if result else None, + 'expected_exit_code': result.expected_rc if result else None, + 'runtime_ms': result.runtime if result else None, + 'outputs': outputs, + 'how_to_reproduce': f'./ccextractor {rt.command} {rt.sample.original_name}', + }) + + report = { + 'test_id': test.id, + 'commit': test.commit, + 'platform': test.platform.value, + 'branch': test.branch, + 'status': 'completed' if test.finished else 'running', + 'binary_url': url_for( + '.download_binary', test_id=test_id, _external=True + ) if has_binary else None, + 'coredump_url': url_for( + '.download_coredump', test_id=test_id, _external=True + ) if has_coredump else None, + 'log_url': url_for( + '.download_build_log_file', test_id=test_id, _external=True + ), + 'combined_stdout_url': url_for( + '.download_combined_stdout', test_id=test_id, _external=True + ) if has_combined_stdout else None, + 'summary': { + 'total': total, + 'passed': passed, + 'failed': failed, + }, + 'test_cases': test_cases, + 'how_to_reproduce': ( + 'Download the binary and sample, then run: ' + + ('./ccextractor {arguments} {sample_filename}' if test.platform.value == 'linux' + else 'ccextractorwinfull.exe {arguments} {sample_filename}') + ), + } + + return jsonify(report) diff --git a/utility.py b/utility.py index 96308e41..e8680f14 100644 --- a/utility.py +++ b/utility.py @@ -30,14 +30,13 @@ def serve_file_download(file_name, file_folder, file_sub_folder='') -> werkzeug. """ from run import config, storage_client_bucket - file_path = path.join(file_folder, file_sub_folder, file_name) + file_path = '/'.join(filter(None, [file_folder, file_sub_folder, file_name])) blob = storage_client_bucket.blob(file_path) - blob.content_disposition = f'attachment; filename="{file_name}"' - blob.patch() url = blob.generate_signed_url( version="v4", - expiration=timedelta(minutes=config.get('GCS_SIGNED_URL_EXPIRY_LIMIT', '')), + expiration=timedelta(minutes=config.get('GCS_SIGNED_URL_EXPIRY_LIMIT', 30)), method="GET", + response_disposition=f'attachment; filename="{file_name}"' ) return redirect(url) From 0e183d4d85cdbc3a7372be9b89e11a8e8e16c98f Mon Sep 17 00:00:00 2001 From: Pulkit Chauhan Date: Wed, 11 Mar 2026 21:23:30 +0530 Subject: [PATCH 2/2] minor cleanup --- install/ci-vm/ci-linux/ci/runCI | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/install/ci-vm/ci-linux/ci/runCI b/install/ci-vm/ci-linux/ci/runCI index d1d84765..521ac2aa 100644 --- a/install/ci-vm/ci-linux/ci/runCI +++ b/install/ci-vm/ci-linux/ci/runCI @@ -128,8 +128,7 @@ if [ -e "${dstDir}/ccextractor" ]; then echo "=== CCExtractor Binary Version ===" >> "${logFile}" ./ccextractor --version >> "${logFile}" 2>&1 echo "=== End Version Info ===" >> "${logFile}" - - + postStatus "testing" "Running tests" ccextractor_path="$(pwd)/ccextractor" combined_stdout="/tmp/combined_stdout.log" : > "${combined_stdout}" @@ -190,7 +189,6 @@ WRAPPER_EOF done sendLogFile - upload_artifact "${logFile}" "test_artifacts/${test_id}/full_output.log" postStatus "completed" "Ran all tests" sudo shutdown -h now