summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSadeep Madurange <sadeep@asciimx.com>2026-04-21 12:54:31 +0800
committerSadeep Madurange <sadeep@asciimx.com>2026-04-21 12:54:31 +0800
commit49ae7748e4a95afa1fd9d08f4886952dfc1deca4 (patch)
tree17d6d63647e0d3a9ef19fb10331fe4e5c51c9be1
parent57eb41d13914c2fdadcb863d36d73848a5fd589b (diff)
downloadurn-49ae7748e4a95afa1fd9d08f4886952dfc1deca4.tar.gz
Rebase benchmark.
-rw-r--r--bm/BM_HISTORY_1000_100.txt41
-rw-r--r--bm/BM_REBASE_1000_100.txt62
-rw-r--r--bm/bm_history.pl90
-rw-r--r--bm/bm_rebase.pl167
4 files changed, 306 insertions, 54 deletions
diff --git a/bm/BM_HISTORY_1000_100.txt b/bm/BM_HISTORY_1000_100.txt
index 00243fb..a31c179 100644
--- a/bm/BM_HISTORY_1000_100.txt
+++ b/bm/BM_HISTORY_1000_100.txt
@@ -6,54 +6,55 @@ SNAPSHOT: Commit #20
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.34s | 0.18s
+Time | 0.43s | 0.08s
Max RSS | 0.02 MB | 0.01 MB
-Page Faults | Maj:0/Min:0 | Maj:0/Min:0
-Inodes | 1302 | 2122
-Repo Size | 19220 KB | 21944 KB
+Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
+Inodes | 1299 | 2113
+Repo size | 19384 KB | 22056 KB
-------------------------------------------------------------
SNAPSHOT: Commit #40
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.41s | 0.11s
+Time | 0.42s | 0.11s
Max RSS | 0.02 MB | 0.01 MB
-Page Faults | Maj:0/Min:0 | Maj:0/Min:0
-Inodes | 1342 | 2924
-Repo Size | 19380 KB | 28848 KB
+Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
+Inodes | 1339 | 2908
+Repo size | 19544 KB | 29020 KB
-------------------------------------------------------------
SNAPSHOT: Commit #60
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.41s | 0.09s
+Time | 0.42s | 0.10s
Max RSS | 0.02 MB | 0.01 MB
-Page Faults | Maj:0/Min:0 | Maj:0/Min:0
-Inodes | 1383 | 3719
-Repo Size | 19544 KB | 35796 KB
+Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
+Inodes | 1379 | 3711
+Repo size | 19704 KB | 35920 KB
-------------------------------------------------------------
SNAPSHOT: Commit #80
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.42s | 0.11s
+Time | 0.41s | 0.11s
Max RSS | 0.02 MB | 0.01 MB
-Page Faults | Maj:0/Min:0 | Maj:0/Min:0
-Inodes | 1424 | 4532
-Repo Size | 19708 KB | 42868 KB
+Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
+Inodes | 1419 | 4520
+Repo size | 19864 KB | 43176 KB
-------------------------------------------------------------
SNAPSHOT: Commit #100
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.40s | 0.10s
+Time | 0.42s | 0.11s
Max RSS | 0.02 MB | 0.01 MB
-Page Faults | Maj:0/Min:0 | Maj:0/Min:0
-Inodes | 1464 | 5341
-Repo Size | 19868 KB | 49840 KB
+Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
+Inodes | 1459 | 5324
+Repo size | 20024 KB | 50232 KB
-------------------------------------------------------------
+TOTAL URN REBASES: 0
diff --git a/bm/BM_REBASE_1000_100.txt b/bm/BM_REBASE_1000_100.txt
new file mode 100644
index 0000000..7875767
--- /dev/null
+++ b/bm/BM_REBASE_1000_100.txt
@@ -0,0 +1,62 @@
+=============================================================
+ REBASE BENCHMARK: 1000 files (100 commits)
+ CONDITIONS: Depth=2, Files Mod=5%, Change=50%
+ INITIAL RAW DATA SIZE: 16976 KB
+=============================================================
+
+SNAPSHOT: Commit #20
+-------------------------------------------------------------
+METRIC | URN | GIT
+----------------+----------------------+---------------------
+Time | 0.29s | 0.05s
+Max RSS | 0.02 MB | 0.01 MB
+Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
+Inodes | 1578 | 2334
+Repo size | 20404 KB | 19380 KB
+-------------------------------------------------------------
+
+SNAPSHOT: Commit #40
+-------------------------------------------------------------
+METRIC | URN | GIT
+----------------+----------------------+---------------------
+Time | 0.54s | 0.05s
+Max RSS | 0.02 MB | 0.01 MB
+Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
+Inodes | 1607 | 3374
+Repo size | 20520 KB | 23788 KB
+-------------------------------------------------------------
+
+SNAPSHOT: Commit #60
+-------------------------------------------------------------
+METRIC | URN | GIT
+----------------+----------------------+---------------------
+Time | 0.31s | 0.05s
+Max RSS | 0.02 MB | 0.01 MB
+Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
+Inodes | 1635 | 4414
+Repo size | 20632 KB | 28196 KB
+-------------------------------------------------------------
+
+SNAPSHOT: Commit #80
+-------------------------------------------------------------
+METRIC | URN | GIT
+----------------+----------------------+---------------------
+Time | 0.29s | 0.05s
+Max RSS | 0.02 MB | 0.01 MB
+Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
+Inodes | 1664 | 5454
+Repo size | 20748 KB | 32596 KB
+-------------------------------------------------------------
+
+SNAPSHOT: Commit #100
+-------------------------------------------------------------
+METRIC | URN | GIT
+----------------+----------------------+---------------------
+Time | 0.54s | 0.10s
+Max RSS | 0.02 MB | 0.01 MB
+Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
+Inodes | 1693 | 6495
+Repo size | 20864 KB | 37008 KB
+-------------------------------------------------------------
+
+TOTAL URN REBASES: 273
diff --git a/bm/bm_history.pl b/bm/bm_history.pl
index 5ebf7e1..5f2ea4a 100644
--- a/bm/bm_history.pl
+++ b/bm/bm_history.pl
@@ -19,6 +19,9 @@ my $bm_repo = "bm_repo";
my $sample_rate = int($total_commits / 5) || 1;
my %results;
+my $rebase_count = 0;
+my %last_base_hashes;
+
sub get_size {
my $dir = shift;
return "0 KB" unless -d $dir;
@@ -35,71 +38,89 @@ sub count_inodes {
return $count || 0;
}
+sub track_rebases {
+ return unless -f ".urn/index";
+ open(my $fh, '<', ".urn/index") or return;
+ while (<$fh>) {
+ chomp;
+ my @cols = split(/\t/);
+ next unless @cols >= 6;
+ my ($b_hash, $path) = ($cols[2], $cols[5]);
+ if (exists $last_base_hashes{$path} && $last_base_hashes{$path} ne $b_hash) {
+ $rebase_count++;
+ }
+ $last_base_hashes{$path} = $b_hash;
+ }
+ close($fh);
+}
+
sub run_history_benchmark {
my ($tool_name) = @_;
print ">>> Starting History Benchmark: $tool_name\n";
remove_tree($bm_repo) if -d $bm_repo;
- system("perl $seed_bin $files $depth > /dev/null 2>&1") == 0
- or die "FATAL: seed.pl failed.\n";
+ system("perl $seed_bin $files $depth > /dev/null 2>&1");
- chdir($bm_repo) or die "FATAL: Could not enter $bm_repo: $!\n";
+ chdir($bm_repo) or die $!;
my $init_cmd = ($tool_name eq "URN") ? "perl $urn_bin init" : "git init";
+ my $add_cmd = ($tool_name eq "URN") ? "perl $urn_bin add ." : "git add .";
+
system("$init_cmd > /dev/null 2>&1");
+ system("$add_cmd > /dev/null 2>&1");
+ system(($tool_name eq "URN" ? "perl $urn_bin" : "git") . " commit -m 'initial' > /dev/null 2>&1");
+
+ track_rebases() if $tool_name eq "URN";
my @file_list;
- find(sub {
- push @file_list, $File::Find::name if -f $_ && $File::Find::name !~ m{\.(git|urn)};
- }, ".");
-
- my $count = scalar(@file_list);
- die "FATAL: No files found in $bm_repo after seeding!" if $count == 0;
-
- # Use explicit add + commit to ensure untracked files are caught
- my $initial_cmd = ($tool_name eq "URN") ? "perl $urn_bin add . && perl $urn_bin commit -m 'initial'"
- : "git add . && git commit -m 'initial'";
- system("$initial_cmd > /dev/null 2>&1");
+ find(sub { push @file_list, $File::Find::name if -f $_ && $File::Find::name !~ /\.(git|urn)/ }, ".");
for my $i (1 .. $total_commits) {
my $to_modify = int($files * 0.02) || 1;
for (1 .. $to_modify) {
my $target = $file_list[rand @file_list];
if (open(my $fh, '>>', $target)) {
- print $fh "Commit $i: Mod for $tool_name test.\n";
+ print $fh "Churn $i\n";
close($fh);
}
}
- my $msg = "commit_$i";
- my $run_cmd = ($tool_name eq "URN") ? "perl $urn_bin add . && perl $urn_bin commit -m '$msg'"
- : "git add . && git commit -m '$msg'";
-
+ my $cmd = ($tool_name eq "URN")
+ ? "perl $urn_bin add . && perl $urn_bin commit -m 'c$i'"
+ : "git add . && git commit -m 'c$i'";
+
if ($i % $sample_rate == 0 || $i == $total_commits) {
- # Run add and commit together inside the time wrapper
- my $raw = `(/usr/bin/time -l sh -c "$run_cmd" > /dev/null) 2>&1`;
+ # Capture hardware metrics via /usr/bin/time -l
+ my $stats = `/usr/bin/time -l sh -c "$cmd" 2>&1 > /dev/null`;
- my ($real) = $raw =~ /(\d+\.\d+)\s+real/;
- my ($rss) = $raw =~ /(\d+)\s+maximum resident set size/;
- my ($maj) = $raw =~ /(\d+)\s+page faults caused by physical I\/O/;
- my ($min) = $raw =~ /(\d+)\s+page reclaims by virtual memory/;
+ my ($real, $rss, $maj, $min) = (0, 0, 0, 0);
+ if ($stats =~ /(\d+\.\d+)\s+real/) { $real = $1; }
+ if ($stats =~ /(\d+)\s+maximum resident set size/) { $rss = sprintf("%.2f MB", $1 / 1024 / 1024); }
+ if ($stats =~ /(\d+)\s+page reclaims/) { $min = $1; }
+ if ($stats =~ /(\d+)\s+page faults/) { $maj = $1; }
+
+ if ($tool_name eq "URN") { track_rebases(); }
my $meta = ($tool_name eq "URN") ? ".urn" : ".git";
$results{$i}{$tool_name} = {
- real => $real // "0.00",
- rss => $rss ? sprintf("%.2f MB", $rss / 1024 / 1024) : "0.00 MB",
- faults => sprintf("Maj:%d/Min:%d", $maj // 0, $min // 0),
+ real => $real . "s",
+ rss => $rss,
+ faults => "Maj:$maj / Min:$min",
inodes => count_inodes($meta),
size => get_size($meta),
};
print " [Commit $i] $tool_name sampled.\n";
} else {
- system("$run_cmd > /dev/null 2>&1");
+ system("$cmd > /dev/null 2>&1");
+ if ($tool_name eq "URN") { track_rebases(); }
}
}
chdir($base_dir);
+ remove_tree($bm_repo);
}
+$rebase_count = 0;
+%last_base_hashes = ();
run_history_benchmark("URN");
run_history_benchmark("GIT");
@@ -116,15 +137,16 @@ foreach my $i (sort { $a <=> $b } keys %results) {
print $res "-------------------------------------------------------------\n";
printf $res "%-15s | %-20s | %-20s\n", "METRIC", "URN", "GIT";
print $res "----------------+----------------------+---------------------\n";
- printf $res "%-15s | %20s | %20s\n", "Time", $u->{real}."s", $g->{real}."s";
+ printf $res "%-15s | %20s | %20s\n", "Time", $u->{real}, $g->{real};
printf $res "%-15s | %20s | %20s\n", "Max RSS", $u->{rss}, $g->{rss};
- printf $res "%-15s | %20s | %20s\n", "Page Faults", $u->{faults}, $g->{faults};
+ printf $res "%-15s | %20s | %20s\n", "Page faults", $u->{faults}, $g->{faults};
printf $res "%-15s | %20s | %20s\n", "Inodes", $u->{inodes}, $g->{inodes};
- printf $res "%-15s | %20s | %20s\n", "Repo Size", $u->{size}, $g->{size};
+ printf $res "%-15s | %20s | %20s\n", "Repo size", $u->{size}, $g->{size};
print $res "-------------------------------------------------------------\n\n";
}
+print $res "TOTAL URN REBASES: $rebase_count\n";
close($res);
-print "\nRun complete! Opening results...\n";
-system($ENV{PAGER} || 'less', $out_file);
+my $pager = $ENV{PAGER} || 'less';
+exec $pager, $out_file;
diff --git a/bm/bm_rebase.pl b/bm/bm_rebase.pl
new file mode 100644
index 0000000..3cb0fd5
--- /dev/null
+++ b/bm/bm_rebase.pl
@@ -0,0 +1,167 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use File::Spec;
+use File::Path qw(remove_tree);
+use File::Find;
+use Cwd qw(getcwd abs_path);
+
+my ($files, $depth, $total_commits, $file_perc, $change_perc) = @ARGV;
+if (!defined $files || !defined $depth || !defined $total_commits || !defined $file_perc || !defined $change_perc) {
+ die "Usage: perl bm_rebase.pl <file_count> <depth> <total_commits> <file_perc> <change_perc>\n";
+}
+
+my $base_dir = getcwd();
+my $urn_bin = abs_path(File::Spec->catfile("..", "urn"));
+my $seed_bin = abs_path("seed.pl");
+my $bm_repo = "bm_repo";
+
+my $sample_rate = int($total_commits / 5) || 1;
+my %results;
+my $initial_repo_size = "0 KB";
+
+my $rebase_count = 0;
+my %last_base_hashes;
+
+sub get_size {
+ my $dir = shift;
+ return "0 KB" unless -d $dir;
+ my $size = `du -sk $dir 2>/dev/null`;
+ $size =~ /^(\d+)/;
+ return ($1 || 0) . " KB";
+}
+
+sub count_inodes {
+ my $dir = shift;
+ return 0 unless -d $dir;
+ my $count = `find $dir 2>/dev/null | wc -l`;
+ $count =~ s/\s+//g;
+ return $count || 0;
+}
+
+sub track_rebases {
+ return unless -f ".urn/index";
+ open(my $fh, '<', ".urn/index") or return;
+ while (<$fh>) {
+ chomp;
+ my @cols = split(/\t/);
+ next unless @cols >= 6;
+ my ($b_hash, $path) = ($cols[2], $cols[5]);
+ if (exists $last_base_hashes{$path} && $last_base_hashes{$path} ne $b_hash) {
+ $rebase_count++;
+ }
+ $last_base_hashes{$path} = $b_hash;
+ }
+ close($fh);
+}
+
+sub run_rebase_benchmark {
+ my ($tool_name) = @_;
+ print ">>> Starting REBASE BENCHMARK: $tool_name\n";
+
+ remove_tree($bm_repo) if -d $bm_repo;
+ system("perl $seed_bin $files $depth > /dev/null 2>&1");
+
+ # Capture raw directory size before VCS initialization
+ if ($initial_repo_size eq "0 KB") {
+ $initial_repo_size = get_size($bm_repo);
+ }
+
+ chdir($bm_repo) or die $!;
+
+ my $init_cmd = ($tool_name eq "URN") ? "perl $urn_bin init" : "git init";
+ my $add_cmd = ($tool_name eq "URN") ? "perl $urn_bin add ." : "git add .";
+
+ system("$init_cmd > /dev/null 2>&1");
+ system("$add_cmd > /dev/null 2>&1");
+ system(($tool_name eq "URN" ? "perl $urn_bin" : "git") . " commit -m 'initial' > /dev/null 2>&1");
+
+ track_rebases() if $tool_name eq "URN";
+
+ my @file_list;
+ find(sub { push @file_list, $File::Find::name if -f $_ && $File::Find::name !~ /\.(git|urn)/ }, ".");
+
+ my $num_to_change = int(scalar(@file_list) * ($file_perc / 100)) || 1;
+ my @target_files = @file_list[0 .. $num_to_change - 1];
+
+ for my $i (1 .. $total_commits) {
+ for my $target (@target_files) {
+ open(my $fh, '<', $target) or next;
+ my @lines = <$fh>;
+ close($fh);
+
+ my $lines_to_change = int(scalar(@lines) * ($change_perc / 100)) || 1;
+ splice(@lines, 0, $lines_to_change, "Commit $i: block change\n");
+
+ if (open(my $out, '>', $target)) {
+ print $out @lines;
+ close($out);
+ }
+ }
+
+ my $cmd = ($tool_name eq "URN")
+ ? "perl $urn_bin add . && perl $urn_bin commit -m 'c$i'"
+ : "git add . && git commit -m 'c$i'";
+
+ if ($i % $sample_rate == 0 || $i == $total_commits) {
+ my $stats = `/usr/bin/time -l sh -c "$cmd" 2>&1 > /dev/null`;
+ my ($real, $rss, $maj, $min) = (0, 0, 0, 0);
+ if ($stats =~ /(\d+\.\d+)\s+real/) { $real = $1; }
+ if ($stats =~ /(\d+)\s+maximum resident set size/) { $rss = sprintf("%.2f MB", $1 / 1024 / 1024); }
+ if ($stats =~ /(\d+)\s+page reclaims/) { $min = $1; }
+ if ($stats =~ /(\d+)\s+page faults/) { $maj = $1; }
+
+ if ($tool_name eq "URN") { track_rebases(); }
+
+ my $meta = ($tool_name eq "URN") ? ".urn" : ".git";
+ $results{$i}{$tool_name} = {
+ real => $real . "s",
+ rss => $rss,
+ faults => "Maj:$maj / Min:$min",
+ inodes => count_inodes($meta),
+ size => get_size($meta),
+ };
+ print " [Commit $i] $tool_name sampled.\n";
+ } else {
+ system("$cmd > /dev/null 2>&1");
+ if ($tool_name eq "URN") { track_rebases(); }
+ }
+ }
+ chdir($base_dir);
+ remove_tree($bm_repo);
+}
+
+$rebase_count = 0;
+%last_base_hashes = ();
+run_rebase_benchmark("URN");
+run_rebase_benchmark("GIT");
+
+my $out_file = "BM_REBASE_${files}_${total_commits}.txt";
+open(my $res, '>', $out_file) or die $!;
+print $res "=============================================================\n";
+print $res " REBASE BENCHMARK: $files files ($total_commits commits)\n";
+print $res " CONDITIONS: Depth=$depth, Files Mod=$file_perc%, Change=$change_perc%\n";
+print $res " INITIAL RAW DATA SIZE: $initial_repo_size\n";
+print $res "=============================================================\n\n";
+
+foreach my $i (sort { $a <=> $b } keys %results) {
+ my $u = $results{$i}{"URN"};
+ my $g = $results{$i}{"GIT"};
+ print $res "SNAPSHOT: Commit #$i\n";
+ print $res "-------------------------------------------------------------\n";
+ printf $res "%-15s | %-20s | %-20s\n", "METRIC", "URN", "GIT";
+ print $res "----------------+----------------------+---------------------\n";
+ printf $res "%-15s | %20s | %20s\n", "Time", $u->{real}, $g->{real};
+ printf $res "%-15s | %20s | %20s\n", "Max RSS", $u->{rss}, $g->{rss};
+ printf $res "%-15s | %20s | %20s\n", "Page faults", $u->{faults}, $g->{faults};
+ printf $res "%-15s | %20s | %20s\n", "Inodes", $u->{inodes}, $g->{inodes};
+ printf $res "%-15s | %20s | %20s\n", "Repo size", $u->{size}, $g->{size};
+ print $res "-------------------------------------------------------------\n\n";
+}
+
+print $res "TOTAL URN REBASES: $rebase_count\n";
+close($res);
+
+my $pager = $ENV{PAGER} || 'less';
+exec $pager, $out_file;
+