Skip to content

Commit b2d85cd

Browse files
Use nsys for L40s only
1 parent f20dbba commit b2d85cd

4 files changed

Lines changed: 57 additions & 6 deletions

File tree

.github/scripts/profiler_nsys.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import argparse
2+
import csv
3+
import statistics
4+
5+
parser = argparse.ArgumentParser()
6+
parser.add_argument('-r', '--runs', type=int, required=True, help='Number of runs')
7+
parser.add_argument('-i', '--input', required=True, help='Input CSV file')
8+
parser.add_argument('-o', '--output', required=True, help='Output CSV file')
9+
args = parser.parse_args()
10+
11+
ntsi_list = []
12+
with open(args.input) as csv_file:
13+
csv_reader = csv.reader(csv_file)
14+
next(csv_reader)
15+
next(csv_reader)
16+
next(csv_reader)
17+
for row in csv_reader:
18+
if row:
19+
full_name = row[8]
20+
instances = int(row[2])
21+
time = float(row[1])
22+
sigma = float(row[7])
23+
if len(full_name) > 5 and full_name[:5] == "krnl_":
24+
name = full_name[5:]
25+
ntsi_list.append([name, time, sigma, instances])
26+
27+
ntsi_list.sort(key = lambda row: row[0])
28+
29+
data = [["name", "time", "stdev"]]
30+
for name, time, sigma, instances in ntsi_list:
31+
count = instances / args.runs
32+
mean = int(time * count)
33+
stdev = sigma * count
34+
data.append([name, mean, stdev])
35+
36+
with open(args.output, 'w') as csv_file:
37+
csv_writer = csv.writer(csv_file)
38+
csv_writer.writerows(data)

.github/workflows/standalone-benchmark.yml

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ jobs:
9595
python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input /root/${BENCHMARK_CSV} --output /root/${BENCHMARK_CSV}
9696
9797
- name: Profiler - Nsight Compute
98-
if: ${{ matrix.vendor == 'nvidia' }}
98+
if: ${{ matrix.name == 'nvidia-h100' }}
9999
run: |
100100
dnf install -y cuda-nsight-compute-13-1
101101
source /etc/profile.d/modules.sh
@@ -104,17 +104,31 @@ jobs:
104104
ncu --set none --metrics gpu__time_duration.avg --export ${{ matrix.name }} --clock-control none --force-overwrite ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.ncu-rep
105105
ncu --import ${STANDALONE_DIR}/${{ matrix.name }}.ncu-rep --print-units base --csv > /root/${PROFILER_CSV}
106106
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
107-
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_${{ matrix.vendor }}.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
107+
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_ncu.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
108+
109+
- name: Profiler - Nsight Systems
110+
if: ${{ matrix.name == 'nvidia-l40s' }}
111+
run: |
112+
curl -fL --retry 3 -o ${STANDALONE_DIR}/nsys.rpm https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2026_2/NsightSystems-linux-cli-public-2026.2.1.210-3763964.rpm
113+
dnf install -y ${STANDALONE_DIR}/nsys.rpm
114+
rm -f ${STANDALONE_DIR}/nsys.rpm
115+
source /etc/profile.d/modules.sh
116+
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
117+
cd ${STANDALONE_DIR}
118+
nsys profile -o ${{ matrix.name }} ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.nsys-rep
119+
nsys stats --report cuda_gpu_kern_sum --timeunit us --force-export=true --format csv ${{ matrix.name }}.nsys-rep > /root/${PROFILER_CSV}
120+
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
121+
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_nsys.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
108122
109123
- name: Profiler - rocprofv2
110-
if: ${{ matrix.vendor == 'amd' }}
124+
if: ${{ matrix.name == 'amd-mi300x' || matrix.name == 'amd-w7900' }}
111125
run: |
112126
source /etc/profile.d/modules.sh
113127
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
114128
cd ${STANDALONE_DIR}
115129
rocprofv2 --output-directory /root --output-file-name ${{ matrix.name }} ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates results_${{ matrix.name }}.csv == ${PROFILER_CSV}
116130
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
117-
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_${{ matrix.vendor }}.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
131+
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_rocprofv2.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
118132
119133
- name: Upload Artifact
120134
uses: actions/upload-artifact@v6
@@ -127,8 +141,7 @@ jobs:
127141
mkdir -p ${STANDALONE_DIR}/baseline
128142
curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${PROFILER_CSV}
129143
curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${BENCHMARK_CSV}
130-
#source /etc/profile.d/modules.sh
131-
#module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
144+
python3 -m pip install --no-cache-dir --user tabulate
132145
python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${PROFILER_CSV} --current /root/${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY}
133146
echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY}
134147
python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} --current /root/${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}

0 commit comments

Comments
 (0)