9595 python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input /root/${BENCHMARK_CSV} --output /root/${BENCHMARK_CSV}
9696
9797 - name : Profiler - Nsight Compute
98- if : ${{ matrix.vendor == 'nvidia' }}
98+ if : ${{ matrix.name == 'nvidia-h100 ' }}
9999 run : |
100100 dnf install -y cuda-nsight-compute-13-1
101101 source /etc/profile.d/modules.sh
@@ -104,17 +104,31 @@ jobs:
104104 ncu --set none --metrics gpu__time_duration.avg --export ${{ matrix.name }} --clock-control none --force-overwrite ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.ncu-rep
105105 ncu --import ${STANDALONE_DIR}/${{ matrix.name }}.ncu-rep --print-units base --csv > /root/${PROFILER_CSV}
106106 rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
107- python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_${{ matrix.vendor }}.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
107+ python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_ncu.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
108+
109+ - name : Profiler - Nsight Systems
110+ if : ${{ matrix.name == 'nvidia-l40s' }}
111+ run : |
112+ curl -fL --retry 3 -o ${STANDALONE_DIR}/nsys.rpm https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2026_2/NsightSystems-linux-cli-public-2026.2.1.210-3763964.rpm
113+ dnf install -y ${STANDALONE_DIR}/nsys.rpm
114+ rm -f ${STANDALONE_DIR}/nsys.rpm
115+ source /etc/profile.d/modules.sh
116+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
117+ cd ${STANDALONE_DIR}
118+ nsys profile -o ${{ matrix.name }} ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.nsys-rep
119+ nsys stats --report cuda_gpu_kern_sum --timeunit us --force-export=true --format csv ${{ matrix.name }}.nsys-rep > /root/${PROFILER_CSV}
120+ rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
121+ python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_nsys.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
108122
109123 - name : Profiler - rocprofv2
110- if : ${{ matrix.vendor == 'amd' }}
124+ if : ${{ matrix.name == 'amd-mi300x' || matrix.name == 'amd-w7900 ' }}
111125 run : |
112126 source /etc/profile.d/modules.sh
113127 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
114128 cd ${STANDALONE_DIR}
115129 rocprofv2 --output-directory /root --output-file-name ${{ matrix.name }} ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates results_${{ matrix.name }}.csv == ${PROFILER_CSV}
116130 rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
117- python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_${{ matrix.vendor }} .py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
131+ python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_rocprofv2 .py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
118132
119133 - name : Upload Artifact
120134 uses : actions/upload-artifact@v6
@@ -127,8 +141,7 @@ jobs:
127141 mkdir -p ${STANDALONE_DIR}/baseline
128142 curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${PROFILER_CSV}
129143 curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${BENCHMARK_CSV}
130- #source /etc/profile.d/modules.sh
131- #module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
144+ python3 -m pip install --no-cache-dir --user tabulate
132145 python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${PROFILER_CSV} --current /root/${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY}
133146 echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY}
134147 python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} --current /root/${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}
0 commit comments