diff --git a/benchmarks.yml b/benchmarks.yml index 5b61fd2f..8e94439f 100644 --- a/benchmarks.yml +++ b/benchmarks.yml @@ -74,6 +74,11 @@ binarytrees: blurhash: desc: blurhash (blurred preview image) calculation ractor: true +gcbench: + desc: Ellis-Kovac-Boehm GCBench builds binary trees of various depths to exercise GC marking, sweeping, and write barriers. + category: gc + single_file: true + default_harness: harness-gc erubi: desc: erubi compiles a simple Erb template into a method with erubi, then times evaluating that method. ractor: true diff --git a/benchmarks/gcbench.rb b/benchmarks/gcbench.rb new file mode 100644 index 00000000..be6399fc --- /dev/null +++ b/benchmarks/gcbench.rb @@ -0,0 +1,90 @@ +# Ellis-Kovac-Boehm GCBench +# +# Adapted from the benchmark by John Ellis and Pete Kovac (Post Communications), +# modified by Hans Boehm (Silicon Graphics), translated to Ruby by Noel Padavan +# and Chris Seaton. Adapted for yjit-bench by Matt Valentine-House. +# +# Builds balanced binary trees of various depths to generate objects with a range +# of lifetimes. Two long-lived structures (a tree and a float array) are kept +# alive throughout to model applications that maintain persistent heap data. +# +# Tree construction uses both top-down (populate — creates old-to-young pointers, +# exercises write barriers) and bottom-up (make_tree — young-to-young only). + +require_relative '../harness/loader' + +class GCBench + class Node + attr_accessor :left, :right, :i, :j + + def initialize(left = nil, right = nil) + @left = left + @right = right + @i = 0 + @j = 0 + end + end + + STRETCH_TREE_DEPTH = 18 + LONG_LIVED_TREE_DEPTH = 16 + ARRAY_SIZE = 500_000 + MIN_TREE_DEPTH = 4 + MAX_TREE_DEPTH = 16 + + def self.tree_size(depth) + (1 << (depth + 1)) - 1 + end + + def self.num_iters(depth) + 2 * tree_size(STRETCH_TREE_DEPTH) / tree_size(depth) + end + + # Top-down: assigns children to an existing (older) node — old-to-young pointers. + def self.populate(depth, node) + if depth > 0 + depth -= 1 + node.left = Node.new + node.right = Node.new + populate(depth, node.left) + populate(depth, node.right) + end + end + + # Bottom-up: children allocated before parent — young-to-young pointers only. + def self.make_tree(depth) + if depth <= 0 + Node.new + else + Node.new(make_tree(depth - 1), make_tree(depth - 1)) + end + end + + def self.time_construction(depth) + n = num_iters(depth) + + n.times do + node = Node.new + populate(depth, node) + end + + n.times do + make_tree(depth) + end + end +end + +# Stretch the heap before measurement +GCBench.make_tree(GCBench::STRETCH_TREE_DEPTH) + +# Long-lived objects that persist across all iterations +long_lived_tree = GCBench::Node.new +GCBench.populate(GCBench::LONG_LIVED_TREE_DEPTH, long_lived_tree) + +long_lived_array = Array.new(GCBench::ARRAY_SIZE) +(GCBench::ARRAY_SIZE / 2).times { |i| long_lived_array[i + 1] = 1.0 / (i + 1) } + +run_benchmark(10) do + GCBench::MIN_TREE_DEPTH.step(GCBench::MAX_TREE_DEPTH, 2) do |depth| + GCBench.time_construction(depth) + end +end diff --git a/harness-gc/harness.rb b/harness-gc/harness.rb new file mode 100644 index 00000000..22f54527 --- /dev/null +++ b/harness-gc/harness.rb @@ -0,0 +1,112 @@ +require_relative "../harness/harness-common" + +WARMUP_ITRS = Integer(ENV.fetch('WARMUP_ITRS', 15)) +MIN_BENCH_ITRS = Integer(ENV.fetch('MIN_BENCH_ITRS', 10)) +MIN_BENCH_TIME = Integer(ENV.fetch('MIN_BENCH_TIME', 10)) + +puts RUBY_DESCRIPTION + +def realtime + r0 = Process.clock_gettime(Process::CLOCK_MONOTONIC) + yield + Process.clock_gettime(Process::CLOCK_MONOTONIC) - r0 +end + +def gc_stat_heap_snapshot + return {} unless GC.respond_to?(:stat_heap) + GC.stat_heap +end + +def gc_stat_heap_delta(before, after) + delta = {} + after.each do |heap_idx, after_stats| + before_stats = before[heap_idx] || {} + heap_delta = {} + after_stats.each do |key, val| + next unless val.is_a?(Numeric) && before_stats.key?(key) + heap_delta[key] = val - before_stats[key] + end + delta[heap_idx] = heap_delta unless heap_delta.empty? + end + delta +end + +def run_benchmark(_num_itrs_hint, **, &block) + times = [] + marking_times = [] + sweeping_times = [] + gc_counts = [] + gc_heap_deltas = [] + total_time = 0 + num_itrs = 0 + + has_marking = GC.stat.key?(:marking_time) + has_sweeping = GC.stat.key?(:sweeping_time) + + header = "itr: time" + header << " marking" if has_marking + header << " sweeping" if has_sweeping + header << " gc_count" + puts header + + begin + gc_before = GC.stat + heap_before = gc_stat_heap_snapshot + + time = realtime(&block) + num_itrs += 1 + + gc_after = GC.stat + heap_after = gc_stat_heap_snapshot + + time_ms = (1000 * time).to_i + mark_delta = has_marking ? gc_after[:marking_time] - gc_before[:marking_time] : 0 + sweep_delta = has_sweeping ? gc_after[:sweeping_time] - gc_before[:sweeping_time] : 0 + count_delta = gc_after[:count] - gc_before[:count] + + itr_str = "%4s %6s" % ["##{num_itrs}:", "#{time_ms}ms"] + itr_str << " %9.1fms" % mark_delta if has_marking + itr_str << " %9.1fms" % sweep_delta if has_sweeping + itr_str << " %9d" % count_delta + puts itr_str + + times << time + marking_times << mark_delta + sweeping_times << sweep_delta + gc_counts << count_delta + gc_heap_deltas << gc_stat_heap_delta(heap_before, heap_after) + total_time += time + end until num_itrs >= WARMUP_ITRS + MIN_BENCH_ITRS and total_time >= MIN_BENCH_TIME + + warmup_range = 0...WARMUP_ITRS + bench_range = WARMUP_ITRS..-1 + + extra = {} + extra["gc_marking_time_warmup"] = marking_times[warmup_range] + extra["gc_marking_time_bench"] = marking_times[bench_range] + extra["gc_sweeping_time_warmup"] = sweeping_times[warmup_range] + extra["gc_sweeping_time_bench"] = sweeping_times[bench_range] + extra["gc_count_warmup"] = gc_counts[warmup_range] + extra["gc_count_bench"] = gc_counts[bench_range] + extra["gc_stat_heap_deltas"] = gc_heap_deltas[bench_range] + + return_results(times[warmup_range], times[bench_range], **extra) + + non_warmups = times[bench_range] + if non_warmups.size > 1 + non_warmups_ms = ((non_warmups.sum / non_warmups.size) * 1000.0).to_i + puts "Average of last #{non_warmups.size}, non-warmup iters: #{non_warmups_ms}ms" + + if has_marking + mark_bench = marking_times[bench_range] + avg_mark = mark_bench.sum / mark_bench.size + puts "Average marking time: %.1fms" % avg_mark + end + + if has_sweeping + sweep_bench = sweeping_times[bench_range] + avg_sweep = sweep_bench.sum / sweep_bench.size + puts "Average sweeping time: %.1fms" % avg_sweep + end + end +end diff --git a/harness/harness-common.rb b/harness/harness-common.rb index 05b90a9f..6fd0e888 100644 --- a/harness/harness-common.rb +++ b/harness/harness-common.rb @@ -75,8 +75,13 @@ def get_rss # Collect our own peak mem usage as soon as reasonable after finishing the last iteration. # This method is only accurate to kilobytes, but is nicely portable and doesn't require # any extra gems/dependencies. - mem = `ps -o rss= -p #{Process.pid}` - 1024 * Integer(mem) + begin + mem = `ps -o rss= -p #{Process.pid}` + 1024 * Integer(mem) + rescue ArgumentError, Errno::ENOENT + # ps failed (e.g. Nix procps on macOS). Fall back to peak RSS via getrusage. + get_maxrss || 0 + end end end @@ -135,11 +140,12 @@ def get_maxrss yb_env_var = ENV.fetch("RESULT_JSON_PATH", default_path) YB_OUTPUT_FILE = File.expand_path yb_env_var -def return_results(warmup_iterations, bench_iterations) +def return_results(warmup_iterations, bench_iterations, **extra) ruby_bench_results = { "RUBY_DESCRIPTION" => RUBY_DESCRIPTION, "warmup" => warmup_iterations, "bench" => bench_iterations, + **extra, } # Collect JIT stats before loading any additional code. diff --git a/lib/benchmark_runner.rb b/lib/benchmark_runner.rb index a5a85cc8..c50bedea 100644 --- a/lib/benchmark_runner.rb +++ b/lib/benchmark_runner.rb @@ -48,7 +48,7 @@ def write_csv(output_path, ruby_descriptions, table) end # Build output text string with metadata, table, and legend - def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false) + def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false, include_gc: false) base_name, *other_names = ruby_descriptions.keys output_str = +"" @@ -68,6 +68,10 @@ def build_output_text(ruby_descriptions, table, format, bench_failures, include_ if include_rss output_str << "- RSS #{base_name}/#{name}: ratio of #{base_name}/#{name} RSS. Higher is better for #{name}. Above 1 means lower memory usage.\n" end + if include_gc + output_str << "- mark #{base_name}/#{name}: ratio of GC marking time. Higher is better for #{name}. Above 1 represents faster marking.\n" + output_str << "- sweep #{base_name}/#{name}: ratio of GC sweeping time. Higher is better for #{name}. Above 1 represents faster sweeping.\n" + end end output_str << "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)\n" end diff --git a/lib/benchmark_runner/cli.rb b/lib/benchmark_runner/cli.rb index 8db164c2..44127776 100644 --- a/lib/benchmark_runner/cli.rb +++ b/lib/benchmark_runner/cli.rb @@ -109,7 +109,7 @@ def run BenchmarkRunner.write_csv(output_path, ruby_descriptions, table) # Save the output in a text file that we can easily refer to - output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss) + output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss, include_gc: builder.include_gc?) out_txt_path = output_path + ".txt" File.open(out_txt_path, "w") { |f| f.write output_str } diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb index f9d90868..a08c7494 100644 --- a/lib/results_table_builder.rb +++ b/lib/results_table_builder.rb @@ -10,11 +10,16 @@ def initialize(executable_names:, bench_data:, include_rss: false, include_pvalu @bench_data = bench_data @include_rss = include_rss @include_pvalue = include_pvalue + @include_gc = detect_gc_data(bench_data) @base_name = executable_names.first @other_names = executable_names[1..] @bench_names = compute_bench_names end + def include_gc? + @include_gc + end + def build table = [build_header] format = build_format @@ -41,6 +46,10 @@ def build_header @executable_names.each do |name| header << "#{name} (ms)" header << "RSS (MiB)" if @include_rss + if @include_gc + header << "#{name} mark (ms)" + header << "#{name} sweep (ms)" + end end @other_names.each do |name| @@ -60,6 +69,13 @@ def build_header end end + if @include_gc + @other_names.each do |name| + header << "mark #{@base_name}/#{name}" + header << "sweep #{@base_name}/#{name}" + end + end + header end @@ -69,6 +85,10 @@ def build_format @executable_names.each do |_name| format << "%s" format << "%.1f" if @include_rss + if @include_gc + format << "%s" + format << "%s" + end end @other_names.each do |_name| @@ -88,6 +108,13 @@ def build_format end end + if @include_gc + @other_names.each do |_name| + format << "%s" + format << "%s" + end + end + format end @@ -100,28 +127,45 @@ def build_row(bench_name) base_t, *other_ts = times_no_warmup base_rss, *other_rsss = rsss + if @include_gc + marking_times = extract_gc_times(bench_name, 'gc_marking_time_bench') + sweeping_times = extract_gc_times(bench_name, 'gc_sweeping_time_bench') + base_mark, *other_marks = marking_times + base_sweep, *other_sweeps = sweeping_times + end + row = [bench_name] - build_base_columns(row, base_t, base_rss) - build_comparison_columns(row, other_ts, other_rsss) + build_base_columns(row, base_t, base_rss, base_mark, base_sweep) + build_comparison_columns(row, other_ts, other_rsss, other_marks, other_sweeps) build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts) build_rss_ratio_columns(row, base_rss, other_rsss) + build_gc_ratio_columns(row, base_mark, other_marks, base_sweep, other_sweeps) row end - def build_base_columns(row, base_t, base_rss) + def build_base_columns(row, base_t, base_rss, base_mark, base_sweep) row << format_time_with_stddev(base_t) row << base_rss if @include_rss + if @include_gc + row << format_time_with_stddev(base_mark) + row << format_time_with_stddev(base_sweep) + end end - def build_comparison_columns(row, other_ts, other_rsss) - other_ts.zip(other_rsss).each do |other_t, other_rss| + def build_comparison_columns(row, other_ts, other_rsss, other_marks, other_sweeps) + other_ts.each_with_index do |other_t, i| row << format_time_with_stddev(other_t) - row << other_rss if @include_rss + row << other_rsss[i] if @include_rss + if @include_gc + row << format_time_with_stddev(other_marks[i]) + row << format_time_with_stddev(other_sweeps[i]) + end end end def format_time_with_stddev(values) + return "N/A" if values.nil? || values.empty? "%.1f ± %.1f%%" % [mean(values), stddev_percent(values)] end @@ -147,6 +191,26 @@ def build_rss_ratio_columns(row, base_rss, other_rsss) end end + def build_gc_ratio_columns(row, base_mark, other_marks, base_sweep, other_sweeps) + return unless @include_gc + + (other_marks || []).each do |other_mark| + row << gc_ratio(base_mark, other_mark) + end + (other_sweeps || []).each do |other_sweep| + row << gc_ratio(base_sweep, other_sweep) + end + end + + def gc_ratio(base, other) + if base.nil? || base.empty? || other.nil? || other.empty? || + mean(other) == 0.0 + return "N/A" + end + pval = Stats.welch_p_value(base, other) + format_ratio(mean(base) / mean(other), pval) + end + def format_ratio(ratio, pval) sym = significance_symbol(pval) formatted = "%.3f" % ratio @@ -211,6 +275,16 @@ def extract_rss_values(bench_name) end end + def extract_gc_times(bench_name, key) + @executable_names.map do |name| + bench_data_for(name, bench_name)[key] || [] + end + end + + def detect_gc_data(bench_data) + bench_data.values.any? { |benchmarks| benchmarks.values.any? { |d| d.is_a?(Hash) && d.key?('gc_marking_time_bench') } } + end + def bench_data_for(name, bench_name) @bench_data[name][bench_name] end diff --git a/test/run_benchmarks_integration_test.rb b/test/run_benchmarks_integration_test.rb index 9cf5a888..5f0aa7b4 100644 --- a/test/run_benchmarks_integration_test.rb +++ b/test/run_benchmarks_integration_test.rb @@ -56,7 +56,7 @@ it 'benchmarks.yml has valid category values' do require 'yaml' data = YAML.load_file(@benchmarks_yml) - valid_categories = ['headline', 'micro', 'other'] + valid_categories = ['headline', 'micro', 'gc', 'other'] data.each do |name, metadata| if metadata['category']