Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ binarytrees:
blurhash:
desc: blurhash (blurred preview image) calculation
ractor: true
gcbench:
desc: Ellis-Kovac-Boehm GCBench builds binary trees of various depths to exercise GC marking, sweeping, and write barriers.
category: gc
single_file: true
default_harness: harness-gc
erubi:
desc: erubi compiles a simple Erb template into a method with erubi, then times evaluating that method.
ractor: true
Expand Down
90 changes: 90 additions & 0 deletions benchmarks/gcbench.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Ellis-Kovac-Boehm GCBench
#
# Adapted from the benchmark by John Ellis and Pete Kovac (Post Communications),
# modified by Hans Boehm (Silicon Graphics), translated to Ruby by Noel Padavan
# and Chris Seaton. Adapted for yjit-bench by Matt Valentine-House.
#
# Builds balanced binary trees of various depths to generate objects with a range
# of lifetimes. Two long-lived structures (a tree and a float array) are kept
# alive throughout to model applications that maintain persistent heap data.
#
# Tree construction uses both top-down (populate — creates old-to-young pointers,
# exercises write barriers) and bottom-up (make_tree — young-to-young only).

require_relative '../harness/loader'

class GCBench
class Node
attr_accessor :left, :right, :i, :j

def initialize(left = nil, right = nil)
@left = left
@right = right
@i = 0
@j = 0
end
end

STRETCH_TREE_DEPTH = 18
LONG_LIVED_TREE_DEPTH = 16
ARRAY_SIZE = 500_000
MIN_TREE_DEPTH = 4
MAX_TREE_DEPTH = 16

def self.tree_size(depth)
(1 << (depth + 1)) - 1
end

def self.num_iters(depth)
2 * tree_size(STRETCH_TREE_DEPTH) / tree_size(depth)
end

# Top-down: assigns children to an existing (older) node — old-to-young pointers.
def self.populate(depth, node)
if depth > 0
depth -= 1
node.left = Node.new
node.right = Node.new
populate(depth, node.left)
populate(depth, node.right)
end
end

# Bottom-up: children allocated before parent — young-to-young pointers only.
def self.make_tree(depth)
if depth <= 0
Node.new
else
Node.new(make_tree(depth - 1), make_tree(depth - 1))
end
end

def self.time_construction(depth)
n = num_iters(depth)

n.times do
node = Node.new
populate(depth, node)
end

n.times do
make_tree(depth)
end
end
end

# Stretch the heap before measurement
GCBench.make_tree(GCBench::STRETCH_TREE_DEPTH)

# Long-lived objects that persist across all iterations
long_lived_tree = GCBench::Node.new
GCBench.populate(GCBench::LONG_LIVED_TREE_DEPTH, long_lived_tree)

long_lived_array = Array.new(GCBench::ARRAY_SIZE)
(GCBench::ARRAY_SIZE / 2).times { |i| long_lived_array[i + 1] = 1.0 / (i + 1) }

run_benchmark(10) do
GCBench::MIN_TREE_DEPTH.step(GCBench::MAX_TREE_DEPTH, 2) do |depth|
GCBench.time_construction(depth)
end
end
112 changes: 112 additions & 0 deletions harness-gc/harness.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
require_relative "../harness/harness-common"

WARMUP_ITRS = Integer(ENV.fetch('WARMUP_ITRS', 15))
MIN_BENCH_ITRS = Integer(ENV.fetch('MIN_BENCH_ITRS', 10))
MIN_BENCH_TIME = Integer(ENV.fetch('MIN_BENCH_TIME', 10))

puts RUBY_DESCRIPTION

def realtime
r0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
yield
Process.clock_gettime(Process::CLOCK_MONOTONIC) - r0
end

def gc_stat_heap_snapshot
return {} unless GC.respond_to?(:stat_heap)
GC.stat_heap
end

def gc_stat_heap_delta(before, after)
delta = {}
after.each do |heap_idx, after_stats|
before_stats = before[heap_idx] || {}
heap_delta = {}
after_stats.each do |key, val|
next unless val.is_a?(Numeric) && before_stats.key?(key)
heap_delta[key] = val - before_stats[key]
end
delta[heap_idx] = heap_delta unless heap_delta.empty?
end
delta
end

def run_benchmark(_num_itrs_hint, **, &block)
times = []
marking_times = []
sweeping_times = []
gc_counts = []
gc_heap_deltas = []
total_time = 0
num_itrs = 0

has_marking = GC.stat.key?(:marking_time)
has_sweeping = GC.stat.key?(:sweeping_time)

header = "itr: time"
header << " marking" if has_marking
header << " sweeping" if has_sweeping
header << " gc_count"
puts header

begin
gc_before = GC.stat
heap_before = gc_stat_heap_snapshot

time = realtime(&block)
num_itrs += 1

gc_after = GC.stat
heap_after = gc_stat_heap_snapshot

time_ms = (1000 * time).to_i
mark_delta = has_marking ? gc_after[:marking_time] - gc_before[:marking_time] : 0
sweep_delta = has_sweeping ? gc_after[:sweeping_time] - gc_before[:sweeping_time] : 0
count_delta = gc_after[:count] - gc_before[:count]

itr_str = "%4s %6s" % ["##{num_itrs}:", "#{time_ms}ms"]
itr_str << " %9.1fms" % mark_delta if has_marking
itr_str << " %9.1fms" % sweep_delta if has_sweeping
itr_str << " %9d" % count_delta
puts itr_str

times << time
marking_times << mark_delta
sweeping_times << sweep_delta
gc_counts << count_delta
gc_heap_deltas << gc_stat_heap_delta(heap_before, heap_after)
total_time += time
end until num_itrs >= WARMUP_ITRS + MIN_BENCH_ITRS and total_time >= MIN_BENCH_TIME

warmup_range = 0...WARMUP_ITRS
bench_range = WARMUP_ITRS..-1

extra = {}
extra["gc_marking_time_warmup"] = marking_times[warmup_range]
extra["gc_marking_time_bench"] = marking_times[bench_range]
extra["gc_sweeping_time_warmup"] = sweeping_times[warmup_range]
extra["gc_sweeping_time_bench"] = sweeping_times[bench_range]
extra["gc_count_warmup"] = gc_counts[warmup_range]
extra["gc_count_bench"] = gc_counts[bench_range]
extra["gc_stat_heap_deltas"] = gc_heap_deltas[bench_range]

return_results(times[warmup_range], times[bench_range], **extra)

non_warmups = times[bench_range]
if non_warmups.size > 1
non_warmups_ms = ((non_warmups.sum / non_warmups.size) * 1000.0).to_i
puts "Average of last #{non_warmups.size}, non-warmup iters: #{non_warmups_ms}ms"

if has_marking
mark_bench = marking_times[bench_range]
avg_mark = mark_bench.sum / mark_bench.size
puts "Average marking time: %.1fms" % avg_mark
end

if has_sweeping
sweep_bench = sweeping_times[bench_range]
avg_sweep = sweep_bench.sum / sweep_bench.size
puts "Average sweeping time: %.1fms" % avg_sweep
end
end
end
12 changes: 9 additions & 3 deletions harness/harness-common.rb
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,13 @@ def get_rss
# Collect our own peak mem usage as soon as reasonable after finishing the last iteration.
# This method is only accurate to kilobytes, but is nicely portable and doesn't require
# any extra gems/dependencies.
mem = `ps -o rss= -p #{Process.pid}`
1024 * Integer(mem)
begin
mem = `ps -o rss= -p #{Process.pid}`
1024 * Integer(mem)
rescue ArgumentError, Errno::ENOENT
# ps failed (e.g. Nix procps on macOS). Fall back to peak RSS via getrusage.
get_maxrss || 0
end
end
end

Expand Down Expand Up @@ -135,11 +140,12 @@ def get_maxrss
yb_env_var = ENV.fetch("RESULT_JSON_PATH", default_path)
YB_OUTPUT_FILE = File.expand_path yb_env_var

def return_results(warmup_iterations, bench_iterations)
def return_results(warmup_iterations, bench_iterations, **extra)
ruby_bench_results = {
"RUBY_DESCRIPTION" => RUBY_DESCRIPTION,
"warmup" => warmup_iterations,
"bench" => bench_iterations,
**extra,
}

# Collect JIT stats before loading any additional code.
Expand Down
6 changes: 5 additions & 1 deletion lib/benchmark_runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def write_csv(output_path, ruby_descriptions, table)
end

# Build output text string with metadata, table, and legend
def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false)
def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false, include_gc: false)
base_name, *other_names = ruby_descriptions.keys

output_str = +""
Expand All @@ -68,6 +68,10 @@ def build_output_text(ruby_descriptions, table, format, bench_failures, include_
if include_rss
output_str << "- RSS #{base_name}/#{name}: ratio of #{base_name}/#{name} RSS. Higher is better for #{name}. Above 1 means lower memory usage.\n"
end
if include_gc
output_str << "- mark #{base_name}/#{name}: ratio of GC marking time. Higher is better for #{name}. Above 1 represents faster marking.\n"
output_str << "- sweep #{base_name}/#{name}: ratio of GC sweeping time. Higher is better for #{name}. Above 1 represents faster sweeping.\n"
end
end
output_str << "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)\n"
end
Expand Down
2 changes: 1 addition & 1 deletion lib/benchmark_runner/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def run
BenchmarkRunner.write_csv(output_path, ruby_descriptions, table)

# Save the output in a text file that we can easily refer to
output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss)
output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss, include_gc: builder.include_gc?)
out_txt_path = output_path + ".txt"
File.open(out_txt_path, "w") { |f| f.write output_str }

Expand Down
Loading
Loading