From 49ae7748e4a95afa1fd9d08f4886952dfc1deca4 Mon Sep 17 00:00:00 2001 From: Sadeep Madurange Date: Tue, 21 Apr 2026 12:54:31 +0800 Subject: Rebase benchmark. --- bm/BM_HISTORY_1000_100.txt | 41 +++++------ bm/BM_REBASE_1000_100.txt | 62 +++++++++++++++++ bm/bm_history.pl | 90 +++++++++++++++--------- bm/bm_rebase.pl | 167 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 306 insertions(+), 54 deletions(-) create mode 100644 bm/BM_REBASE_1000_100.txt create mode 100644 bm/bm_rebase.pl diff --git a/bm/BM_HISTORY_1000_100.txt b/bm/BM_HISTORY_1000_100.txt index 00243fb..a31c179 100644 --- a/bm/BM_HISTORY_1000_100.txt +++ b/bm/BM_HISTORY_1000_100.txt @@ -6,54 +6,55 @@ SNAPSHOT: Commit #20 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.34s | 0.18s +Time | 0.43s | 0.08s Max RSS | 0.02 MB | 0.01 MB -Page Faults | Maj:0/Min:0 | Maj:0/Min:0 -Inodes | 1302 | 2122 -Repo Size | 19220 KB | 21944 KB +Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 +Inodes | 1299 | 2113 +Repo size | 19384 KB | 22056 KB ------------------------------------------------------------- SNAPSHOT: Commit #40 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.41s | 0.11s +Time | 0.42s | 0.11s Max RSS | 0.02 MB | 0.01 MB -Page Faults | Maj:0/Min:0 | Maj:0/Min:0 -Inodes | 1342 | 2924 -Repo Size | 19380 KB | 28848 KB +Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 +Inodes | 1339 | 2908 +Repo size | 19544 KB | 29020 KB ------------------------------------------------------------- SNAPSHOT: Commit #60 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.41s | 0.09s +Time | 0.42s | 0.10s Max RSS | 0.02 MB | 0.01 MB -Page Faults | Maj:0/Min:0 | Maj:0/Min:0 -Inodes | 1383 | 3719 -Repo Size | 19544 KB | 35796 KB +Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 +Inodes | 1379 | 3711 +Repo size | 19704 KB | 35920 KB ------------------------------------------------------------- SNAPSHOT: Commit #80 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.42s | 0.11s +Time | 0.41s | 0.11s Max RSS | 0.02 MB | 0.01 MB -Page Faults | Maj:0/Min:0 | Maj:0/Min:0 -Inodes | 1424 | 4532 -Repo Size | 19708 KB | 42868 KB +Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 +Inodes | 1419 | 4520 +Repo size | 19864 KB | 43176 KB ------------------------------------------------------------- SNAPSHOT: Commit #100 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.40s | 0.10s +Time | 0.42s | 0.11s Max RSS | 0.02 MB | 0.01 MB -Page Faults | Maj:0/Min:0 | Maj:0/Min:0 -Inodes | 1464 | 5341 -Repo Size | 19868 KB | 49840 KB +Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 +Inodes | 1459 | 5324 +Repo size | 20024 KB | 50232 KB ------------------------------------------------------------- +TOTAL URN REBASES: 0 diff --git a/bm/BM_REBASE_1000_100.txt b/bm/BM_REBASE_1000_100.txt new file mode 100644 index 0000000..7875767 --- /dev/null +++ b/bm/BM_REBASE_1000_100.txt @@ -0,0 +1,62 @@ +============================================================= + REBASE BENCHMARK: 1000 files (100 commits) + CONDITIONS: Depth=2, Files Mod=5%, Change=50% + INITIAL RAW DATA SIZE: 16976 KB +============================================================= + +SNAPSHOT: Commit #20 +------------------------------------------------------------- +METRIC | URN | GIT +----------------+----------------------+--------------------- +Time | 0.29s | 0.05s +Max RSS | 0.02 MB | 0.01 MB +Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 +Inodes | 1578 | 2334 +Repo size | 20404 KB | 19380 KB +------------------------------------------------------------- + +SNAPSHOT: Commit #40 +------------------------------------------------------------- +METRIC | URN | GIT +----------------+----------------------+--------------------- +Time | 0.54s | 0.05s +Max RSS | 0.02 MB | 0.01 MB +Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 +Inodes | 1607 | 3374 +Repo size | 20520 KB | 23788 KB +------------------------------------------------------------- + +SNAPSHOT: Commit #60 +------------------------------------------------------------- +METRIC | URN | GIT +----------------+----------------------+--------------------- +Time | 0.31s | 0.05s +Max RSS | 0.02 MB | 0.01 MB +Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 +Inodes | 1635 | 4414 +Repo size | 20632 KB | 28196 KB +------------------------------------------------------------- + +SNAPSHOT: Commit #80 +------------------------------------------------------------- +METRIC | URN | GIT +----------------+----------------------+--------------------- +Time | 0.29s | 0.05s +Max RSS | 0.02 MB | 0.01 MB +Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 +Inodes | 1664 | 5454 +Repo size | 20748 KB | 32596 KB +------------------------------------------------------------- + +SNAPSHOT: Commit #100 +------------------------------------------------------------- +METRIC | URN | GIT +----------------+----------------------+--------------------- +Time | 0.54s | 0.10s +Max RSS | 0.02 MB | 0.01 MB +Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 +Inodes | 1693 | 6495 +Repo size | 20864 KB | 37008 KB +------------------------------------------------------------- + +TOTAL URN REBASES: 273 diff --git a/bm/bm_history.pl b/bm/bm_history.pl index 5ebf7e1..5f2ea4a 100644 --- a/bm/bm_history.pl +++ b/bm/bm_history.pl @@ -19,6 +19,9 @@ my $bm_repo = "bm_repo"; my $sample_rate = int($total_commits / 5) || 1; my %results; +my $rebase_count = 0; +my %last_base_hashes; + sub get_size { my $dir = shift; return "0 KB" unless -d $dir; @@ -35,71 +38,89 @@ sub count_inodes { return $count || 0; } +sub track_rebases { + return unless -f ".urn/index"; + open(my $fh, '<', ".urn/index") or return; + while (<$fh>) { + chomp; + my @cols = split(/\t/); + next unless @cols >= 6; + my ($b_hash, $path) = ($cols[2], $cols[5]); + if (exists $last_base_hashes{$path} && $last_base_hashes{$path} ne $b_hash) { + $rebase_count++; + } + $last_base_hashes{$path} = $b_hash; + } + close($fh); +} + sub run_history_benchmark { my ($tool_name) = @_; print ">>> Starting History Benchmark: $tool_name\n"; remove_tree($bm_repo) if -d $bm_repo; - system("perl $seed_bin $files $depth > /dev/null 2>&1") == 0 - or die "FATAL: seed.pl failed.\n"; + system("perl $seed_bin $files $depth > /dev/null 2>&1"); - chdir($bm_repo) or die "FATAL: Could not enter $bm_repo: $!\n"; + chdir($bm_repo) or die $!; my $init_cmd = ($tool_name eq "URN") ? "perl $urn_bin init" : "git init"; + my $add_cmd = ($tool_name eq "URN") ? "perl $urn_bin add ." : "git add ."; + system("$init_cmd > /dev/null 2>&1"); + system("$add_cmd > /dev/null 2>&1"); + system(($tool_name eq "URN" ? "perl $urn_bin" : "git") . " commit -m 'initial' > /dev/null 2>&1"); + + track_rebases() if $tool_name eq "URN"; my @file_list; - find(sub { - push @file_list, $File::Find::name if -f $_ && $File::Find::name !~ m{\.(git|urn)}; - }, "."); - - my $count = scalar(@file_list); - die "FATAL: No files found in $bm_repo after seeding!" if $count == 0; - - # Use explicit add + commit to ensure untracked files are caught - my $initial_cmd = ($tool_name eq "URN") ? "perl $urn_bin add . && perl $urn_bin commit -m 'initial'" - : "git add . && git commit -m 'initial'"; - system("$initial_cmd > /dev/null 2>&1"); + find(sub { push @file_list, $File::Find::name if -f $_ && $File::Find::name !~ /\.(git|urn)/ }, "."); for my $i (1 .. $total_commits) { my $to_modify = int($files * 0.02) || 1; for (1 .. $to_modify) { my $target = $file_list[rand @file_list]; if (open(my $fh, '>>', $target)) { - print $fh "Commit $i: Mod for $tool_name test.\n"; + print $fh "Churn $i\n"; close($fh); } } - my $msg = "commit_$i"; - my $run_cmd = ($tool_name eq "URN") ? "perl $urn_bin add . && perl $urn_bin commit -m '$msg'" - : "git add . && git commit -m '$msg'"; - + my $cmd = ($tool_name eq "URN") + ? "perl $urn_bin add . && perl $urn_bin commit -m 'c$i'" + : "git add . && git commit -m 'c$i'"; + if ($i % $sample_rate == 0 || $i == $total_commits) { - # Run add and commit together inside the time wrapper - my $raw = `(/usr/bin/time -l sh -c "$run_cmd" > /dev/null) 2>&1`; + # Capture hardware metrics via /usr/bin/time -l + my $stats = `/usr/bin/time -l sh -c "$cmd" 2>&1 > /dev/null`; - my ($real) = $raw =~ /(\d+\.\d+)\s+real/; - my ($rss) = $raw =~ /(\d+)\s+maximum resident set size/; - my ($maj) = $raw =~ /(\d+)\s+page faults caused by physical I\/O/; - my ($min) = $raw =~ /(\d+)\s+page reclaims by virtual memory/; + my ($real, $rss, $maj, $min) = (0, 0, 0, 0); + if ($stats =~ /(\d+\.\d+)\s+real/) { $real = $1; } + if ($stats =~ /(\d+)\s+maximum resident set size/) { $rss = sprintf("%.2f MB", $1 / 1024 / 1024); } + if ($stats =~ /(\d+)\s+page reclaims/) { $min = $1; } + if ($stats =~ /(\d+)\s+page faults/) { $maj = $1; } + + if ($tool_name eq "URN") { track_rebases(); } my $meta = ($tool_name eq "URN") ? ".urn" : ".git"; $results{$i}{$tool_name} = { - real => $real // "0.00", - rss => $rss ? sprintf("%.2f MB", $rss / 1024 / 1024) : "0.00 MB", - faults => sprintf("Maj:%d/Min:%d", $maj // 0, $min // 0), + real => $real . "s", + rss => $rss, + faults => "Maj:$maj / Min:$min", inodes => count_inodes($meta), size => get_size($meta), }; print " [Commit $i] $tool_name sampled.\n"; } else { - system("$run_cmd > /dev/null 2>&1"); + system("$cmd > /dev/null 2>&1"); + if ($tool_name eq "URN") { track_rebases(); } } } chdir($base_dir); + remove_tree($bm_repo); } +$rebase_count = 0; +%last_base_hashes = (); run_history_benchmark("URN"); run_history_benchmark("GIT"); @@ -116,15 +137,16 @@ foreach my $i (sort { $a <=> $b } keys %results) { print $res "-------------------------------------------------------------\n"; printf $res "%-15s | %-20s | %-20s\n", "METRIC", "URN", "GIT"; print $res "----------------+----------------------+---------------------\n"; - printf $res "%-15s | %20s | %20s\n", "Time", $u->{real}."s", $g->{real}."s"; + printf $res "%-15s | %20s | %20s\n", "Time", $u->{real}, $g->{real}; printf $res "%-15s | %20s | %20s\n", "Max RSS", $u->{rss}, $g->{rss}; - printf $res "%-15s | %20s | %20s\n", "Page Faults", $u->{faults}, $g->{faults}; + printf $res "%-15s | %20s | %20s\n", "Page faults", $u->{faults}, $g->{faults}; printf $res "%-15s | %20s | %20s\n", "Inodes", $u->{inodes}, $g->{inodes}; - printf $res "%-15s | %20s | %20s\n", "Repo Size", $u->{size}, $g->{size}; + printf $res "%-15s | %20s | %20s\n", "Repo size", $u->{size}, $g->{size}; print $res "-------------------------------------------------------------\n\n"; } +print $res "TOTAL URN REBASES: $rebase_count\n"; close($res); -print "\nRun complete! Opening results...\n"; -system($ENV{PAGER} || 'less', $out_file); +my $pager = $ENV{PAGER} || 'less'; +exec $pager, $out_file; diff --git a/bm/bm_rebase.pl b/bm/bm_rebase.pl new file mode 100644 index 0000000..3cb0fd5 --- /dev/null +++ b/bm/bm_rebase.pl @@ -0,0 +1,167 @@ +#!/usr/bin/perl +use strict; +use warnings; +use File::Spec; +use File::Path qw(remove_tree); +use File::Find; +use Cwd qw(getcwd abs_path); + +my ($files, $depth, $total_commits, $file_perc, $change_perc) = @ARGV; +if (!defined $files || !defined $depth || !defined $total_commits || !defined $file_perc || !defined $change_perc) { + die "Usage: perl bm_rebase.pl \n"; +} + +my $base_dir = getcwd(); +my $urn_bin = abs_path(File::Spec->catfile("..", "urn")); +my $seed_bin = abs_path("seed.pl"); +my $bm_repo = "bm_repo"; + +my $sample_rate = int($total_commits / 5) || 1; +my %results; +my $initial_repo_size = "0 KB"; + +my $rebase_count = 0; +my %last_base_hashes; + +sub get_size { + my $dir = shift; + return "0 KB" unless -d $dir; + my $size = `du -sk $dir 2>/dev/null`; + $size =~ /^(\d+)/; + return ($1 || 0) . " KB"; +} + +sub count_inodes { + my $dir = shift; + return 0 unless -d $dir; + my $count = `find $dir 2>/dev/null | wc -l`; + $count =~ s/\s+//g; + return $count || 0; +} + +sub track_rebases { + return unless -f ".urn/index"; + open(my $fh, '<', ".urn/index") or return; + while (<$fh>) { + chomp; + my @cols = split(/\t/); + next unless @cols >= 6; + my ($b_hash, $path) = ($cols[2], $cols[5]); + if (exists $last_base_hashes{$path} && $last_base_hashes{$path} ne $b_hash) { + $rebase_count++; + } + $last_base_hashes{$path} = $b_hash; + } + close($fh); +} + +sub run_rebase_benchmark { + my ($tool_name) = @_; + print ">>> Starting REBASE BENCHMARK: $tool_name\n"; + + remove_tree($bm_repo) if -d $bm_repo; + system("perl $seed_bin $files $depth > /dev/null 2>&1"); + + # Capture raw directory size before VCS initialization + if ($initial_repo_size eq "0 KB") { + $initial_repo_size = get_size($bm_repo); + } + + chdir($bm_repo) or die $!; + + my $init_cmd = ($tool_name eq "URN") ? "perl $urn_bin init" : "git init"; + my $add_cmd = ($tool_name eq "URN") ? "perl $urn_bin add ." : "git add ."; + + system("$init_cmd > /dev/null 2>&1"); + system("$add_cmd > /dev/null 2>&1"); + system(($tool_name eq "URN" ? "perl $urn_bin" : "git") . " commit -m 'initial' > /dev/null 2>&1"); + + track_rebases() if $tool_name eq "URN"; + + my @file_list; + find(sub { push @file_list, $File::Find::name if -f $_ && $File::Find::name !~ /\.(git|urn)/ }, "."); + + my $num_to_change = int(scalar(@file_list) * ($file_perc / 100)) || 1; + my @target_files = @file_list[0 .. $num_to_change - 1]; + + for my $i (1 .. $total_commits) { + for my $target (@target_files) { + open(my $fh, '<', $target) or next; + my @lines = <$fh>; + close($fh); + + my $lines_to_change = int(scalar(@lines) * ($change_perc / 100)) || 1; + splice(@lines, 0, $lines_to_change, "Commit $i: block change\n"); + + if (open(my $out, '>', $target)) { + print $out @lines; + close($out); + } + } + + my $cmd = ($tool_name eq "URN") + ? "perl $urn_bin add . && perl $urn_bin commit -m 'c$i'" + : "git add . && git commit -m 'c$i'"; + + if ($i % $sample_rate == 0 || $i == $total_commits) { + my $stats = `/usr/bin/time -l sh -c "$cmd" 2>&1 > /dev/null`; + my ($real, $rss, $maj, $min) = (0, 0, 0, 0); + if ($stats =~ /(\d+\.\d+)\s+real/) { $real = $1; } + if ($stats =~ /(\d+)\s+maximum resident set size/) { $rss = sprintf("%.2f MB", $1 / 1024 / 1024); } + if ($stats =~ /(\d+)\s+page reclaims/) { $min = $1; } + if ($stats =~ /(\d+)\s+page faults/) { $maj = $1; } + + if ($tool_name eq "URN") { track_rebases(); } + + my $meta = ($tool_name eq "URN") ? ".urn" : ".git"; + $results{$i}{$tool_name} = { + real => $real . "s", + rss => $rss, + faults => "Maj:$maj / Min:$min", + inodes => count_inodes($meta), + size => get_size($meta), + }; + print " [Commit $i] $tool_name sampled.\n"; + } else { + system("$cmd > /dev/null 2>&1"); + if ($tool_name eq "URN") { track_rebases(); } + } + } + chdir($base_dir); + remove_tree($bm_repo); +} + +$rebase_count = 0; +%last_base_hashes = (); +run_rebase_benchmark("URN"); +run_rebase_benchmark("GIT"); + +my $out_file = "BM_REBASE_${files}_${total_commits}.txt"; +open(my $res, '>', $out_file) or die $!; +print $res "=============================================================\n"; +print $res " REBASE BENCHMARK: $files files ($total_commits commits)\n"; +print $res " CONDITIONS: Depth=$depth, Files Mod=$file_perc%, Change=$change_perc%\n"; +print $res " INITIAL RAW DATA SIZE: $initial_repo_size\n"; +print $res "=============================================================\n\n"; + +foreach my $i (sort { $a <=> $b } keys %results) { + my $u = $results{$i}{"URN"}; + my $g = $results{$i}{"GIT"}; + print $res "SNAPSHOT: Commit #$i\n"; + print $res "-------------------------------------------------------------\n"; + printf $res "%-15s | %-20s | %-20s\n", "METRIC", "URN", "GIT"; + print $res "----------------+----------------------+---------------------\n"; + printf $res "%-15s | %20s | %20s\n", "Time", $u->{real}, $g->{real}; + printf $res "%-15s | %20s | %20s\n", "Max RSS", $u->{rss}, $g->{rss}; + printf $res "%-15s | %20s | %20s\n", "Page faults", $u->{faults}, $g->{faults}; + printf $res "%-15s | %20s | %20s\n", "Inodes", $u->{inodes}, $g->{inodes}; + printf $res "%-15s | %20s | %20s\n", "Repo size", $u->{size}, $g->{size}; + print $res "-------------------------------------------------------------\n\n"; +} + +print $res "TOTAL URN REBASES: $rebase_count\n"; +close($res); + +my $pager = $ENV{PAGER} || 'less'; +exec $pager, $out_file; + -- cgit v1.2.3