From 6d95999b028910621c49a3b4be69f7efda3a16ca Mon Sep 17 00:00:00 2001
From: Pulkit Chauhan <chauhanpulkit1708@gmail.com>
Date: Wed, 11 Mar 2026 20:03:33 +0530
Subject: [PATCH 1/2] added artifacts for ai

---
 install/ci-vm/ci-linux/ci/runCI |  68 ++++++++-
 mod_ci/controllers.py           |  10 +-
 mod_test/controllers.py         | 257 +++++++++++++++++++++++++++++++-
 utility.py                      |   7 +-
 4 files changed, 327 insertions(+), 15 deletions(-)

diff --git a/install/ci-vm/ci-linux/ci/runCI b/install/ci-vm/ci-linux/ci/runCI
index 092d425e..d1d84765 100644
--- a/install/ci-vm/ci-linux/ci/runCI
+++ b/install/ci-vm/ci-linux/ci/runCI
@@ -7,6 +7,11 @@
 
 DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
 
+# Enable coredump capture
+ulimit -c unlimited
+mkdir -p /tmp/coredumps
+echo "/tmp/coredumps/core.%e.%p" | sudo tee /proc/sys/kernel/core_pattern > /dev/null
+
 if [ ! -f "$DIR/variables" ]; then
     # No variable file defined
     sudo shutdown -h now
@@ -123,10 +128,69 @@ if [ -e "${dstDir}/ccextractor" ]; then
         echo "=== CCExtractor Binary Version ===" >> "${logFile}"
         ./ccextractor --version >> "${logFile}" 2>&1
         echo "=== End Version Info ===" >> "${logFile}"
-        postStatus "testing" "Running tests"
+
+
+        ccextractor_path="$(pwd)/ccextractor"
+        combined_stdout="/tmp/combined_stdout.log"
+        : > "${combined_stdout}"
+
+        # Create a wrapper script that tees stdout/stderr to a combined log
+        wrapper_path="$(pwd)/ccextractor_wrapper"
+        cat > "${wrapper_path}" << 'WRAPPER_EOF'
+#!/bin/bash
+COMBINED_LOG="/tmp/combined_stdout.log"
+REAL_BINARY="PLACEHOLDER_BINARY"
+EXIT_CODE_FILE="/tmp/.wrapper_exit_code"
+echo "=== TEST INVOCATION: $@ ===" >> "$COMBINED_LOG"
+{ "$REAL_BINARY" "$@" 2>&1; echo $? > "$EXIT_CODE_FILE"; } | tee -a "$COMBINED_LOG"
+exit_code=$(cat "$EXIT_CODE_FILE")
+echo "=== EXIT CODE: ${exit_code} ===" >> "$COMBINED_LOG"
+echo "" >> "$COMBINED_LOG"
+exit $exit_code
+WRAPPER_EOF
+        sed -i "s|PLACEHOLDER_BINARY|${ccextractor_path}|" "${wrapper_path}"
+        chmod +x "${wrapper_path}"
+
         executeCommand cd ${suiteDstDir}
-        executeCommand ${tester} --debug --entries "${testFile}" --executable "ccextractor" --tempfolder "${tempFolder}" --timeout 600 --reportfolder "${reportFolder}" --resultfolder "${resultFolder}" --samplefolder "${sampleFolder}" --method Server --url "${reportURL}"
+        executeCommand ${tester} --debug --entries "${testFile}" --executable "${wrapper_path}" --tempfolder "${tempFolder}" --timeout 600 --reportfolder "${reportFolder}" --resultfolder "${resultFolder}" --samplefolder "${sampleFolder}" --method Server --url "${reportURL}"
+
+        # Upload AI artifacts to GCS
+        gcs_bucket=$(curl -s "http://metadata/computeMetadata/v1/instance/attributes/bucket" -H "Metadata-Flavor: Google")
+        test_id=$(curl -s "http://metadata/computeMetadata/v1/instance/attributes/testID" -H "Metadata-Flavor: Google")
+        token=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" -H "Metadata-Flavor: Google" | python3 -c "import sys,json; print(json.load(sys.stdin)['access_token'])")
+
+        upload_artifact() {
+            local file_path="$1"
+            local dest_path="$2"
+            if [ -f "$file_path" ]; then
+                local http_code
+                http_code=$(curl -s -X POST --data-binary @"$file_path" \
+                    -H "Authorization: Bearer $token" \
+                    -H "Content-Type: application/octet-stream" \
+                    -w "%{http_code}" \
+                    -o /dev/null \
+                    "https://storage.googleapis.com/upload/storage/v1/b/${gcs_bucket}/o?uploadType=media&name=${dest_path}")
+                if [ -z "$http_code" ] || [ "$http_code" -ne 200 ]; then
+                    echo "GCS upload failed for ${dest_path}: HTTP ${http_code:-no_response}" >> "${logFile}"
+                fi
+            fi
+        }
+
+        upload_artifact "$ccextractor_path" "test_artifacts/${test_id}/ccextractor"
+
+        # Upload combined stdout log
+        upload_artifact "${combined_stdout}" "test_artifacts/${test_id}/combined_stdout.log"
+
+        # Upload coredumps if any
+        for core_file in /tmp/coredumps/core.*; do
+            if [ -f "$core_file" ]; then
+                upload_artifact "$core_file" "test_artifacts/${test_id}/coredump"
+                break
+            fi
+        done
+
         sendLogFile
+        upload_artifact "${logFile}" "test_artifacts/${test_id}/full_output.log"
         postStatus "completed" "Ran all tests"
 
         sudo shutdown -h now
diff --git a/mod_ci/controllers.py b/mod_ci/controllers.py
index d625103a..879d5aca 100755
--- a/mod_ci/controllers.py
+++ b/mod_ci/controllers.py
@@ -1193,7 +1193,8 @@ def create_instance(compute, project, zone, test, reportURL) -> Dict:
         metadata_items = [
             {'key': 'startup-script', 'value': startup_script},
             {'key': 'reportURL', 'value': reportURL},
-            {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')}
+            {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')},
+            {'key': 'testID', 'value': str(test.id)}
         ]
     elif test.platform == TestPlatform.windows:
         image_response = compute.images().getFromFamily(project=config.get('WINDOWS_INSTANCE_PROJECT_NAME', ''),
@@ -1209,7 +1210,8 @@ def create_instance(compute, project, zone, test, reportURL) -> Dict:
             {'key': 'service_account', 'value': service_account},
             {'key': 'rclone_conf', 'value': rclone_conf},
             {'key': 'reportURL', 'value': reportURL},
-            {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')}
+            {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')},
+            {'key': 'testID', 'value': str(test.id)}
         ]
     source_disk_image = image_response['selfLink']
 
@@ -2625,7 +2627,7 @@ def upload_log_type_request(log, test_id, repo_folder, test, request) -> bool:
         uploaded_file.save(temp_path)
         final_path = os.path.join(repo_folder, 'LogFiles', f"{test.id}.txt")
 
-        os.rename(temp_path, final_path)
+        os.replace(temp_path, final_path)
         log.debug("Stored log file")
         return True
 
@@ -2671,7 +2673,7 @@ def upload_type_request(log, test_id, repo_folder, test, request) -> bool:
         results_dir = os.path.join(repo_folder, 'TestResults')
         os.makedirs(results_dir, exist_ok=True)
         final_path = os.path.join(results_dir, f'{file_hash}{file_extension}')
-        os.rename(temp_path, final_path)
+        os.replace(temp_path, final_path)
         rto = RegressionTestOutput.query.filter(
             RegressionTestOutput.id == request.form['test_file_id']).first()
         result_file = TestResultFile(test.id, request.form['test_id'], rto.id, rto.correct, file_hash)
diff --git a/mod_test/controllers.py b/mod_test/controllers.py
index d5e91c9e..e910f8a0 100644
--- a/mod_test/controllers.py
+++ b/mod_test/controllers.py
@@ -367,7 +367,6 @@ def generate_diff(test_id: int, regression_test_id: int, output_id: int, to_view
 
 
 @mod_test.route('/log-files/<test_id>')
-@login_required
 def download_build_log_file(test_id):
     """
     Serve download of build log.
@@ -379,15 +378,17 @@ def download_build_log_file(test_id):
     :return: build log text file
     :rtype: Flask response
     """
-    from run import config
+    from run import config, storage_client_bucket
     test = Test.query.filter(Test.id == test_id).first()
 
+    from flask import send_from_directory
+    
     if test is not None:
         file_name = f"{test_id}.txt"
-        log_file_path = os.path.join(config.get('SAMPLE_REPOSITORY', ''), 'LogFiles', file_name)
-
+        log_dir = os.path.join(config.get('SAMPLE_REPOSITORY', ''), 'LogFiles')
+        log_file_path = os.path.join(log_dir, file_name)
         if os.path.isfile(log_file_path):
-            return serve_file_download(file_name, 'LogFiles')
+            return send_from_directory(log_dir, file_name, as_attachment=True)
 
         raise TestNotFoundException(f"Build log for Test {test_id} not found")
 
@@ -442,3 +443,249 @@ def stop_test(test_id):
     g.db.commit()
     g.log.info(f"test with id: {test_id} stopped")
     return redirect(url_for('.by_id', test_id=test.id))
+
+
+def _artifact_redirect(test_id, blob_path, filename='artifact'):
+    """Generate a signed URL for a GCS artifact and redirect, or 404."""
+    from datetime import timedelta
+
+    from run import config, storage_client_bucket
+
+    blob = storage_client_bucket.blob(blob_path)
+    if not blob.exists():
+        abort(404)
+    url = blob.generate_signed_url(
+        version="v4",
+        expiration=timedelta(minutes=config.get('GCS_SIGNED_URL_EXPIRY_LIMIT', 30)),
+        method="GET",
+        response_disposition=f'attachment; filename="{filename}"'
+    )
+    return redirect(url)
+
+
+@mod_test.route('/<int:test_id>/binary')
+def download_binary(test_id):
+    """Download the ccextractor binary used in a test (linux or windows)."""
+    from run import storage_client_bucket
+    # Try linux name first, then windows
+    for name in ['ccextractor', 'ccextractor.exe']:
+        blob_path = f'test_artifacts/{test_id}/{name}'
+        if storage_client_bucket.blob(blob_path).exists():
+            return _artifact_redirect(test_id, blob_path, filename=name)
+    abort(404)
+
+
+@mod_test.route('/<int:test_id>/coredump')
+def download_coredump(test_id):
+    """Download the coredump from a test, if one was produced."""
+    return _artifact_redirect(
+        test_id,
+        f'test_artifacts/{test_id}/coredump',
+        filename=f'coredump-{test_id}'
+    )
+
+
+@mod_test.route('/<int:test_id>/combined-stdout')
+def download_combined_stdout(test_id):
+    """Download the combined stdout/stderr log from all test invocations."""
+    return _artifact_redirect(
+        test_id,
+        f'test_artifacts/{test_id}/combined_stdout.log',
+        filename=f'combined_stdout-{test_id}.log'
+    )
+
+
+@mod_test.route('/<int:test_id>/regression/<int:regression_test_id>/<int:output_id>/output-got')
+def download_output_got(test_id, regression_test_id, output_id):
+    """Download the actual output file from TestResults using DB hash."""
+    rf = TestResultFile.query.filter(and_(
+        TestResultFile.test_id == test_id,
+        TestResultFile.regression_test_id == regression_test_id,
+        TestResultFile.regression_test_output_id == output_id
+    )).first()
+    if rf is None or rf.got is None:
+        abort(404)
+    import os
+    ext = os.path.splitext(rf.regression_test_output.filename_correct)[1]
+    return _artifact_redirect(
+        test_id,
+        f'TestResults/{rf.got}{ext}',
+        filename=f'output_got_{regression_test_id}_{output_id}{ext}'
+    )
+
+
+@mod_test.route('/<int:test_id>/regression/<int:regression_test_id>/<int:output_id>/output-expected')
+def download_output_expected(test_id, regression_test_id, output_id):
+    """Download the expected output file from TestResults using DB hash."""
+    rf = TestResultFile.query.filter(and_(
+        TestResultFile.test_id == test_id,
+        TestResultFile.regression_test_id == regression_test_id,
+        TestResultFile.regression_test_output_id == output_id
+    )).first()
+    if rf is None:
+        abort(404)
+    import os
+    ext = os.path.splitext(rf.regression_test_output.filename_correct)[1]
+    return _artifact_redirect(
+        test_id,
+        f'TestResults/{rf.expected}{ext}',
+        filename=f'output_expected_{regression_test_id}_{output_id}{ext}'
+    )
+@mod_test.route('/<int:test_id>/sample/<int:sample_id>')
+def download_sample_ai(test_id, sample_id):
+    """Download the sample file for a regression test (no auth required for AI workflow)."""
+    from mod_sample.models import Sample
+    sample = Sample.query.filter(Sample.id == sample_id).first()
+    if sample is None:
+        abort(404)
+    return _artifact_redirect(
+        test_id,
+        f'TestFiles/{sample.filename}',
+        filename=sample.original_name
+    )
+
+
+@mod_test.route('/<int:test_id>/ai.json')
+def ai_json_endpoint(test_id):
+    """Structured JSON with download URLs for all artifacts — for AI agents."""
+    from run import storage_client_bucket
+
+    test = Test.query.filter(Test.id == test_id).first()
+    if test is None:
+        return jsonify({'error': f'Test {test_id} not found'}), 404
+
+    def blob_exists(path):
+        return storage_client_bucket.blob(path).exists()
+
+    has_binary = (
+        blob_exists(f'test_artifacts/{test_id}/ccextractor') or
+        blob_exists(f'test_artifacts/{test_id}/ccextractor.exe')
+    )
+    has_coredump = blob_exists(f'test_artifacts/{test_id}/coredump')
+    has_combined_stdout = blob_exists(f'test_artifacts/{test_id}/combined_stdout.log')
+
+    results = get_test_results(test)
+    test_cases = []
+    total = 0
+    passed = 0
+    failed = 0
+
+    for category in results:
+        for t_data in category['tests']:
+            total += 1
+            rt = t_data['test']
+            result = t_data['result']
+            is_error = t_data.get('error', False)
+            result_files = t_data['files']
+
+            if is_error:
+                failed += 1
+            else:
+                passed += 1
+
+            outputs = []
+            for expected_output in rt.output_files:
+                if expected_output.ignore:
+                    continue
+                
+                matched_rf = None
+                for rf in result_files:
+                    if rf.test_id != -1 and rf.regression_test_output_id == expected_output.id:
+                        matched_rf = rf
+                        break
+                
+                got_url = None
+                diff_url = None
+                
+                if matched_rf and matched_rf.got is not None:
+                    got_url = url_for(
+                        '.download_output_got',
+                        test_id=test_id,
+                        regression_test_id=rt.id,
+                        output_id=expected_output.id,
+                        _external=True
+                    )
+                    diff_url = url_for(
+                        '.generate_diff',
+                        test_id=test_id,
+                        regression_test_id=rt.id,
+                        output_id=expected_output.id,
+                        to_view=0,
+                        _external=True
+                    )
+                else:
+                    # If test passed, got and expected match exactly.
+                    got_url = url_for(
+                        '.download_output_expected',
+                        test_id=test_id,
+                        regression_test_id=rt.id,
+                        output_id=expected_output.id,
+                        _external=True
+                    )
+                
+                output_entry = {
+                    'output_id': expected_output.id,
+                    'correct_extension': expected_output.correct_extension,
+                    'expected_url': url_for(
+                        '.download_output_expected',
+                        test_id=test_id,
+                        regression_test_id=rt.id,
+                        output_id=expected_output.id,
+                        _external=True
+                    ),
+                    'got_url': got_url,
+                    'diff_url': diff_url,
+                }
+                outputs.append(output_entry)
+
+            test_cases.append({
+                'regression_test_id': rt.id,
+                'category': category['category'].name,
+                'sample_filename': rt.sample.original_name,
+                'sample_url': url_for(
+                    '.download_sample_ai',
+                    test_id=test_id,
+                    sample_id=rt.sample.id,
+                    _external=True
+                ),
+                'arguments': rt.command,
+                'result': 'Fail' if is_error else 'Pass',
+                'exit_code': result.exit_code if result else None,
+                'expected_exit_code': result.expected_rc if result else None,
+                'runtime_ms': result.runtime if result else None,
+                'outputs': outputs,
+                'how_to_reproduce': f'./ccextractor {rt.command} {rt.sample.original_name}',
+            })
+
+    report = {
+        'test_id': test.id,
+        'commit': test.commit,
+        'platform': test.platform.value,
+        'branch': test.branch,
+        'status': 'completed' if test.finished else 'running',
+        'binary_url': url_for(
+            '.download_binary', test_id=test_id, _external=True
+        ) if has_binary else None,
+        'coredump_url': url_for(
+            '.download_coredump', test_id=test_id, _external=True
+        ) if has_coredump else None,
+        'log_url': url_for(
+            '.download_build_log_file', test_id=test_id, _external=True
+        ),
+        'combined_stdout_url': url_for(
+            '.download_combined_stdout', test_id=test_id, _external=True
+        ) if has_combined_stdout else None,
+        'summary': {
+            'total': total,
+            'passed': passed,
+            'failed': failed,
+        },
+        'test_cases': test_cases,
+        'how_to_reproduce': (
+            'Download the binary and sample, then run: '
+            + ('./ccextractor {arguments} {sample_filename}' if test.platform.value == 'linux'
+               else 'ccextractorwinfull.exe {arguments} {sample_filename}')
+        ),
+    }
+
+    return jsonify(report)
diff --git a/utility.py b/utility.py
index 96308e41..e8680f14 100644
--- a/utility.py
+++ b/utility.py
@@ -30,14 +30,13 @@ def serve_file_download(file_name, file_folder, file_sub_folder='') -> werkzeug.
     """
     from run import config, storage_client_bucket
 
-    file_path = path.join(file_folder, file_sub_folder, file_name)
+    file_path = '/'.join(filter(None, [file_folder, file_sub_folder, file_name]))
     blob = storage_client_bucket.blob(file_path)
-    blob.content_disposition = f'attachment; filename="{file_name}"'
-    blob.patch()
     url = blob.generate_signed_url(
         version="v4",
-        expiration=timedelta(minutes=config.get('GCS_SIGNED_URL_EXPIRY_LIMIT', '')),
+        expiration=timedelta(minutes=config.get('GCS_SIGNED_URL_EXPIRY_LIMIT', 30)),
         method="GET",
+        response_disposition=f'attachment; filename="{file_name}"'
     )
     return redirect(url)
 

From 0e183d4d85cdbc3a7372be9b89e11a8e8e16c98f Mon Sep 17 00:00:00 2001
From: Pulkit Chauhan <chauhanpulkit1708@gmail.com>
Date: Wed, 11 Mar 2026 21:23:30 +0530
Subject: [PATCH 2/2] minor cleanup

---
 install/ci-vm/ci-linux/ci/runCI | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/install/ci-vm/ci-linux/ci/runCI b/install/ci-vm/ci-linux/ci/runCI
index d1d84765..521ac2aa 100644
--- a/install/ci-vm/ci-linux/ci/runCI
+++ b/install/ci-vm/ci-linux/ci/runCI
@@ -128,8 +128,7 @@ if [ -e "${dstDir}/ccextractor" ]; then
         echo "=== CCExtractor Binary Version ===" >> "${logFile}"
         ./ccextractor --version >> "${logFile}" 2>&1
         echo "=== End Version Info ===" >> "${logFile}"
-
-
+        postStatus "testing" "Running tests"
         ccextractor_path="$(pwd)/ccextractor"
         combined_stdout="/tmp/combined_stdout.log"
         : > "${combined_stdout}"
@@ -190,7 +189,6 @@ WRAPPER_EOF
         done
 
         sendLogFile
-        upload_artifact "${logFile}" "test_artifacts/${test_id}/full_output.log"
         postStatus "completed" "Ran all tests"
 
         sudo shutdown -h now