summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSadeep Madurange <sadeep@asciimx.com>2026-04-29 14:45:41 +0800
committerSadeep Madurange <sadeep@asciimx.com>2026-04-30 13:03:34 +0800
commit79d9ec2bdef0a82172fa0aa56f12004bef206c04 (patch)
treea67429a13b094145823bb5669d49b0f37401a6d4
parent1b64a87816c06526229f9dc03b12f1092541ae59 (diff)
downloadurn-79d9ec2bdef0a82172fa0aa56f12004bef206c04.tar.gz
Compress base files, use c-style strings in benchmarks.HEADmaster
-rw-r--r--bm/BM_COMMIT_1000_100.txt (renamed from bm/BM_HISTORY_1000_100.txt)40
-rw-r--r--bm/BM_REBASE_1000_100.txt61
-rw-r--r--bm/BM_REPO_SIZE_200_20.txt16
-rw-r--r--bm/BM_REPO_SIZE_5000_50.txt14
-rw-r--r--bm/bm_commit.pl (renamed from bm/bm_rebase.pl)151
-rw-r--r--bm/bm_history.pl152
-rw-r--r--bm/bm_size.pl2
-rw-r--r--bm/seed.pl69
-rw-r--r--urn91
9 files changed, 250 insertions, 346 deletions
diff --git a/bm/BM_HISTORY_1000_100.txt b/bm/BM_COMMIT_1000_100.txt
index 2885405..73a2b5a 100644
--- a/bm/BM_HISTORY_1000_100.txt
+++ b/bm/BM_COMMIT_1000_100.txt
@@ -1,60 +1,68 @@
=============================================================
- HISTORY BENCHMARK: 1000 files (100 commits)
+ COMMIT BENCHMARK: 1000 files (100 commits)
+ CONDITIONS: Depth=2, Files Mod=0.5%, Line Mod=5%
+ INITIAL REPO SIZE: 17332 KB
=============================================================
SNAPSHOT: Commit #20
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.35s | 0.09s
+Time | 0.29s | 0.03s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1301 | 2121
-Repo size | 18868 KB | 22076 KB
+Inodes | 1300 | 1425
+Repo size | 6836 KB | 8296 KB
-------------------------------------------------------------
SNAPSHOT: Commit #40
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.46s | 0.11s
+Time | 0.29s | 0.03s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1341 | 2929
-Repo size | 19028 KB | 29136 KB
+Inodes | 1340 | 1566
+Repo size | 7332 KB | 9268 KB
-------------------------------------------------------------
SNAPSHOT: Commit #60
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.43s | 0.12s
+Time | 0.35s | 0.03s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1381 | 3732
-Repo size | 19188 KB | 36088 KB
+Inodes | 1381 | 1706
+Repo size | 7896 KB | 10236 KB
-------------------------------------------------------------
SNAPSHOT: Commit #80
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.45s | 0.08s
+Time | 0.35s | 0.03s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1421 | 4538
-Repo size | 19348 KB | 43104 KB
+Inodes | 1421 | 1847
+Repo size | 8456 KB | 11200 KB
-------------------------------------------------------------
SNAPSHOT: Commit #100
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.44s | 0.10s
+Time | 0.35s | 0.03s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1462 | 5346
-Repo size | 19512 KB | 49980 KB
+Inodes | 1462 | 1987
+Repo size | 9020 KB | 12168 KB
+-------------------------------------------------------------
+
+AFTER GIT GC
+-------------------------------------------------------------
+Final Size | 9020 KB | 3812 KB
+Final Inodes | 1462 | 41
-------------------------------------------------------------
TOTAL URN REBASES: 0
diff --git a/bm/BM_REBASE_1000_100.txt b/bm/BM_REBASE_1000_100.txt
deleted file mode 100644
index 7550570..0000000
--- a/bm/BM_REBASE_1000_100.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-=============================================================
- REBASE BENCHMARK: 1000 files (100 commits)
- CONDITIONS: Depth=2, Files Mod=5%, Change=50%
-=============================================================
-
-SNAPSHOT: Commit #20
--------------------------------------------------------------
-METRIC | URN | GIT
-----------------+----------------------+---------------------
-Time | 0.65s | 0.08s
-Max RSS | 0.02 MB | 0.01 MB
-Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 2272 | 2282
-Repo size | 38504 KB | 22700 KB
--------------------------------------------------------------
-
-SNAPSHOT: Commit #40
--------------------------------------------------------------
-METRIC | URN | GIT
-----------------+----------------------+---------------------
-Time | 0.59s | 0.08s
-Max RSS | 0.02 MB | 0.01 MB
-Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 3332 | 3322
-Repo size | 59384 KB | 31188 KB
--------------------------------------------------------------
-
-SNAPSHOT: Commit #60
--------------------------------------------------------------
-METRIC | URN | GIT
-----------------+----------------------+---------------------
-Time | 0.57s | 0.08s
-Max RSS | 0.02 MB | 0.01 MB
-Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 4392 | 4362
-Repo size | 80264 KB | 39676 KB
--------------------------------------------------------------
-
-SNAPSHOT: Commit #80
--------------------------------------------------------------
-METRIC | URN | GIT
-----------------+----------------------+---------------------
-Time | 0.57s | 0.08s
-Max RSS | 0.02 MB | 0.01 MB
-Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 5452 | 5402
-Repo size | 101144 KB | 48156 KB
--------------------------------------------------------------
-
-SNAPSHOT: Commit #100
--------------------------------------------------------------
-METRIC | URN | GIT
-----------------+----------------------+---------------------
-Time | 0.57s | 0.08s
-Max RSS | 0.02 MB | 0.01 MB
-Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 6512 | 6442
-Repo size | 122024 KB | 56644 KB
--------------------------------------------------------------
-
-TOTAL URN REBASES: 4950
diff --git a/bm/BM_REPO_SIZE_200_20.txt b/bm/BM_REPO_SIZE_200_20.txt
index 69531c5..6c055bd 100644
--- a/bm/BM_REPO_SIZE_200_20.txt
+++ b/bm/BM_REPO_SIZE_200_20.txt
@@ -17,22 +17,22 @@ ACTION: Add
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.16s | 0.17s
+Time | 0.17s | 0.17s
Max RSS | 0.02 MB | 0.00 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 225 | 360
-Repo size | 3700 KB | 3348 KB
+Inodes | 225 | 370
+Repo size | 3608 KB | 3472 KB
-------------------------------------------------------------
ACTION: Commit
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.17s | 0.04s
+Time | 0.17s | 0.05s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 347 | 397
-Repo size | 4212 KB | 3496 KB
+Inodes | 352 | 408
+Repo size | 4140 KB | 3624 KB
-------------------------------------------------------------
ACTION: Status(Clean)
@@ -42,7 +42,7 @@ METRIC | URN | GIT
Time | 0.10s | 0.01s
Max RSS | 0.02 MB | 0.00 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 347 | 397
-Repo size | 4212 KB | 3496 KB
+Inodes | 352 | 408
+Repo size | 4140 KB | 3624 KB
-------------------------------------------------------------
diff --git a/bm/BM_REPO_SIZE_5000_50.txt b/bm/BM_REPO_SIZE_5000_50.txt
index cd90798..e194153 100644
--- a/bm/BM_REPO_SIZE_5000_50.txt
+++ b/bm/BM_REPO_SIZE_5000_50.txt
@@ -6,7 +6,7 @@ ACTION: Status
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.26s | 0.00s
+Time | 0.27s | 0.00s
Max RSS | 0.02 MB | 0.00 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
Inodes | 6 | 27
@@ -17,32 +17,32 @@ ACTION: Add
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 2.82s | 4.62s
+Time | 2.90s | 4.54s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
Inodes | 5055 | 5284
-Repo size | 89444 KB | 70360 KB
+Repo size | 89996 KB | 69844 KB
-------------------------------------------------------------
ACTION: Commit
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 1.18s | 0.93s
+Time | 1.60s | 0.93s
Max RSS | 0.03 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
Inodes | 5264 | 5342
-Repo size | 91620 KB | 70592 KB
+Repo size | 92172 KB | 70076 KB
-------------------------------------------------------------
ACTION: Status(Clean)
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.34s | 0.10s
+Time | 0.28s | 0.11s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
Inodes | 5264 | 5342
-Repo size | 91620 KB | 70592 KB
+Repo size | 92172 KB | 70076 KB
-------------------------------------------------------------
diff --git a/bm/bm_rebase.pl b/bm/bm_commit.pl
index e89e7f5..841325d 100644
--- a/bm/bm_rebase.pl
+++ b/bm/bm_commit.pl
@@ -1,4 +1,6 @@
#!/usr/bin/perl
+# Usage: perl bm_commit.pl <file_count> <depth> <total_commits> <file_perc> <line_perc>
+# Example: perl bm_commit.pl 100 5 50 10 2
use strict;
use warnings;
use File::Copy;
@@ -9,9 +11,9 @@ use File::Basename;
use Cwd qw(getcwd abs_path);
use Time::HiRes qw(time);
-my ($files, $depth, $total_commits, $file_perc, $change_perc) = @ARGV;
-if (!defined $files || !defined $depth || !defined $total_commits || !defined $file_perc || !defined $change_perc) {
- die "Usage: perl bm_rebase.pl <file_count> <depth> <total_commits> <file_perc> <change_perc>\n";
+my ($files, $depth, $total_commits, $file_perc, $line_perc) = @ARGV;
+if (!defined $line_perc) {
+ die "Usage: perl bm_commit.pl <file_count> <depth> <total_commits> <file_perc> <line_perc>\n";
}
my $base_dir = getcwd();
@@ -21,12 +23,11 @@ my $bm_repo = "sandbox";
my $sample_rate = int($total_commits / 5) || 1;
my %results;
+my %final_stats = ( URN => { size => "N/A", inodes => 0 }, GIT => { size => "N/A", inodes => 0 } );
my $initial_repo_size = "0 KB";
my $rebase_count = 0;
my %last_base_hashes;
-my $global_tick = 0;
-
sub get_size {
my $dir = shift;
return "0 KB" unless -d $dir;
@@ -38,35 +39,56 @@ sub get_size {
sub count_inodes {
my $dir = shift;
return 0 unless -d $dir;
- my $count = `find $dir 2>/dev/null | wc -l`;
- $count =~ s/\s+//g;
- return $count || 0;
+ my %inodes;
+ find(sub { my @s = lstat($_); $inodes{$s[1]} = 1 if @s; }, $dir);
+ return scalar(keys %inodes);
}
sub track_rebases {
return unless -f ".urn/index";
open(my $fh, '<', ".urn/index") or return;
while (<$fh>) {
- chomp;
- my @cols = split(/\t/);
- next unless @cols >= 6;
+ chomp; my @cols = split(/\t/); next unless @cols >= 6;
my ($b_hash, $path) = ($cols[2], $cols[5]);
- if (exists $last_base_hashes{$path} && $last_base_hashes{$path} ne $b_hash) {
- $rebase_count++;
- }
+ $rebase_count++ if exists $last_base_hashes{$path} && $last_base_hashes{$path} ne $b_hash;
$last_base_hashes{$path} = $b_hash;
}
close($fh);
}
-sub run_rebase_benchmark {
+sub generate_surgical_line {
+ my ($original_line) = @_;
+ my @types = qw(int char float double bool uint32_t);
+ my @vars = qw(offset limit buffer status count ptr index);
+
+ $original_line =~ /^(\s*)/;
+ my $indent = $1 || " ";
+
+ my $type = $types[rand @types];
+ my $var = $vars[rand @vars] . "_" . int(rand(100));
+ my $val = int(rand(1000));
+
+ my $new_line = "$indent$type $var = $val;\n";
+
+ # Byte matching: pad with spaces before the newline to keep file size identical
+ my $target_len = length($original_line);
+ if (length($new_line) < $target_len) {
+ substr($new_line, -1, 0, " " x ($target_len - length($new_line)));
+ } elsif (length($new_line) > $target_len) {
+ $new_line = substr($new_line, 0, $target_len - 2) . ";\n";
+ }
+
+ return $new_line;
+}
+
+sub run_commit_benchmark {
my ($tool_name) = @_;
- print ">>> Starting REBASE BENCHMARK: $tool_name\n";
+ print ">>> Starting COMMIT BENCHMARK: $tool_name\n";
remove_tree($bm_repo) if -d $bm_repo;
system("perl $seed_bin $files $depth > /dev/null 2>&1");
- if ($initial_repo_size eq "0 KB") { $initial_repo_size = get_size($bm_repo); }
+ $initial_repo_size = get_size($bm_repo) if $initial_repo_size eq "0 KB";
chdir($bm_repo) or die $!;
@@ -86,36 +108,31 @@ sub run_rebase_benchmark {
my @target_files = @file_list[0 .. $num_to_change - 1];
for my $i (2 .. $total_commits) {
- my $debug_dir = "/tmp/urn/commit_$i";
- make_path($debug_dir) unless -d $debug_dir;
-
for my $target (@target_files) {
- open(my $fh, '<:raw', $target) or die "Read fail: $target - $!";
- my $content = do { local $/; <$fh> };
- close($fh);
-
- $global_tick++;
- my $ts = time();
- my $header = "C$i-T$ts-N$global_tick ";
-
- my $total_bytes = length($content);
- my $min_len = length($header);
- my $to_change = int($total_bytes * ($change_perc / 100));
-
- $to_change = $min_len if $to_change < $min_len;
- $to_change = $total_bytes if $to_change > $total_bytes;
-
- my $new_segment = substr($header . ("." x $to_change), 0, $to_change);
- substr($content, 0, $to_change, $new_segment);
-
- open(my $out, '>:raw', $target) or die "Write fail: $target - $!";
- print $out $content;
- close($out);
-
- my $target_name = basename($target);
- copy($target, "$debug_dir/$target_name") or warn "Backup failed: $!";
-
- utime(undef, undef, $target);
+ open(my $fh, '<', $target) or die "Read fail: $target - $!";
+ my @lines = <$fh>;
+ close($fh);
+
+ # Find lines that are simple variable assignments to maintain surgical diffs
+ my @eligible = grep { $lines[$_] =~ /^\s*\w+ \w+ = \d+;/ } (0 .. $#lines);
+
+ if (@eligible) {
+ # Calculate count based on percentage of eligible lines
+ my $to_mod = int(scalar(@eligible) * ($line_perc / 100));
+ $to_mod = 1 if $to_mod == 0;
+
+ my @indices = (sort { rand() <=> rand() } @eligible)[0 .. $to_mod - 1];
+
+ for my $idx (@indices) {
+ next unless defined $idx;
+ $lines[$idx] = generate_surgical_line($lines[$idx]);
+ }
+
+ open(my $out, '>', $target) or die "Write fail: $target - $!";
+ print $out join('', @lines);
+ close($out);
+ utime(undef, undef, $target);
+ }
}
my $cmd = ($tool_name eq "URN")
@@ -124,10 +141,7 @@ sub run_rebase_benchmark {
if ($i % $sample_rate == 0 || $i == $total_commits) {
my $raw_output = `/usr/bin/time -l sh -c "$cmd" 2>&1`;
-
- if ($tool_name eq "URN") {
- track_rebases();
- }
+ track_rebases() if $tool_name eq "URN";
my ($real, $rss, $maj, $min) = (0, 0, 0, 0);
$real = $1 if $raw_output =~ /(\d+\.\d+)\s+real/;
@@ -138,7 +152,7 @@ sub run_rebase_benchmark {
my $meta = ($tool_name eq "URN") ? ".urn" : ".git";
$results{$i}{$tool_name} = {
real => $real . "s",
- rss => $rss,
+ rss => $rss || "0 MB",
faults => "Maj:$maj / Min:$min",
inodes => count_inodes($meta),
size => get_size($meta),
@@ -149,24 +163,35 @@ sub run_rebase_benchmark {
track_rebases() if $tool_name eq "URN";
}
}
+
+ system("git gc --prune=now --quiet") if $tool_name eq "GIT";
+
+ my $final_meta = ($tool_name eq "URN") ? ".urn" : ".git";
+ $final_stats{$tool_name} = {
+ size => get_size($final_meta) || "0 KB",
+ inodes => count_inodes($final_meta) || 0,
+ };
+
chdir($base_dir);
+ remove_tree($bm_repo);
}
-$rebase_count = 0;
-%last_base_hashes = ();
-run_rebase_benchmark("URN");
-run_rebase_benchmark("GIT");
+run_commit_benchmark("URN");
+run_commit_benchmark("GIT");
-my $out_file = "BM_REBASE_${files}_${total_commits}.txt";
+my $out_file = "BM_COMMIT_${files}_${total_commits}.txt";
open(my $res, '>', $out_file) or die $!;
print $res "=============================================================\n";
-print $res " REBASE BENCHMARK: $files files ($total_commits commits)\n";
-print $res " CONDITIONS: Depth=$depth, Files Mod=$file_perc%, Change=$change_perc%\n";
+print $res " COMMIT BENCHMARK: $files files ($total_commits commits)\n";
+print $res " CONDITIONS: Depth=$depth, Files Mod=$file_perc%, Line Mod=$line_perc%\n";
+print $res " INITIAL REPO SIZE: $initial_repo_size\n";
print $res "=============================================================\n\n";
foreach my $i (sort { $a <=> $b } keys %results) {
my $u = $results{$i}{"URN"};
my $g = $results{$i}{"GIT"};
+ next unless defined $u && defined $g;
+
print $res "SNAPSHOT: Commit #$i\n";
print $res "-------------------------------------------------------------\n";
printf $res "%-15s | %-20s | %-20s\n", "METRIC", "URN", "GIT";
@@ -178,7 +203,15 @@ foreach my $i (sort { $a <=> $b } keys %results) {
printf $res "%-15s | %20s | %20s\n", "Repo size", $u->{size}, $g->{size};
print $res "-------------------------------------------------------------\n\n";
}
+
+print $res "AFTER GIT GC\n";
+print $res "-------------------------------------------------------------\n";
+printf $res "%-15s | %20s | %20s\n", "Final Size", $final_stats{URN}{size}, $final_stats{GIT}{size};
+printf $res "%-15s | %20s | %20s\n", "Final Inodes", $final_stats{URN}{inodes}, $final_stats{GIT}{inodes};
+print $res "-------------------------------------------------------------\n\n";
+
print $res "TOTAL URN REBASES: $rebase_count\n";
close($res);
-exec "less $out_file";
+my $pager = $ENV{PAGER} || 'less';
+system("$pager $out_file");
diff --git a/bm/bm_history.pl b/bm/bm_history.pl
deleted file mode 100644
index 5f2ea4a..0000000
--- a/bm/bm_history.pl
+++ /dev/null
@@ -1,152 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-use File::Spec;
-use File::Path qw(remove_tree);
-use File::Find;
-use Cwd qw(getcwd abs_path);
-
-my ($files, $depth, $total_commits) = @ARGV;
-if (!defined $files || !defined $depth || !defined $total_commits) {
- die "Usage: perl bm_history.pl <file_count> <depth> <total_commits>\n";
-}
-
-my $base_dir = getcwd();
-my $urn_bin = abs_path(File::Spec->catfile("..", "urn"));
-my $seed_bin = abs_path("seed.pl");
-my $bm_repo = "bm_repo";
-
-my $sample_rate = int($total_commits / 5) || 1;
-my %results;
-
-my $rebase_count = 0;
-my %last_base_hashes;
-
-sub get_size {
- my $dir = shift;
- return "0 KB" unless -d $dir;
- my $size = `du -sk $dir 2>/dev/null`;
- $size =~ /^(\d+)/;
- return ($1 || 0) . " KB";
-}
-
-sub count_inodes {
- my $dir = shift;
- return 0 unless -d $dir;
- my $count = `find $dir 2>/dev/null | wc -l`;
- $count =~ s/\s+//g;
- return $count || 0;
-}
-
-sub track_rebases {
- return unless -f ".urn/index";
- open(my $fh, '<', ".urn/index") or return;
- while (<$fh>) {
- chomp;
- my @cols = split(/\t/);
- next unless @cols >= 6;
- my ($b_hash, $path) = ($cols[2], $cols[5]);
- if (exists $last_base_hashes{$path} && $last_base_hashes{$path} ne $b_hash) {
- $rebase_count++;
- }
- $last_base_hashes{$path} = $b_hash;
- }
- close($fh);
-}
-
-sub run_history_benchmark {
- my ($tool_name) = @_;
- print ">>> Starting History Benchmark: $tool_name\n";
-
- remove_tree($bm_repo) if -d $bm_repo;
- system("perl $seed_bin $files $depth > /dev/null 2>&1");
-
- chdir($bm_repo) or die $!;
-
- my $init_cmd = ($tool_name eq "URN") ? "perl $urn_bin init" : "git init";
- my $add_cmd = ($tool_name eq "URN") ? "perl $urn_bin add ." : "git add .";
-
- system("$init_cmd > /dev/null 2>&1");
- system("$add_cmd > /dev/null 2>&1");
- system(($tool_name eq "URN" ? "perl $urn_bin" : "git") . " commit -m 'initial' > /dev/null 2>&1");
-
- track_rebases() if $tool_name eq "URN";
-
- my @file_list;
- find(sub { push @file_list, $File::Find::name if -f $_ && $File::Find::name !~ /\.(git|urn)/ }, ".");
-
- for my $i (1 .. $total_commits) {
- my $to_modify = int($files * 0.02) || 1;
- for (1 .. $to_modify) {
- my $target = $file_list[rand @file_list];
- if (open(my $fh, '>>', $target)) {
- print $fh "Churn $i\n";
- close($fh);
- }
- }
-
- my $cmd = ($tool_name eq "URN")
- ? "perl $urn_bin add . && perl $urn_bin commit -m 'c$i'"
- : "git add . && git commit -m 'c$i'";
-
- if ($i % $sample_rate == 0 || $i == $total_commits) {
- # Capture hardware metrics via /usr/bin/time -l
- my $stats = `/usr/bin/time -l sh -c "$cmd" 2>&1 > /dev/null`;
-
- my ($real, $rss, $maj, $min) = (0, 0, 0, 0);
- if ($stats =~ /(\d+\.\d+)\s+real/) { $real = $1; }
- if ($stats =~ /(\d+)\s+maximum resident set size/) { $rss = sprintf("%.2f MB", $1 / 1024 / 1024); }
- if ($stats =~ /(\d+)\s+page reclaims/) { $min = $1; }
- if ($stats =~ /(\d+)\s+page faults/) { $maj = $1; }
-
- if ($tool_name eq "URN") { track_rebases(); }
-
- my $meta = ($tool_name eq "URN") ? ".urn" : ".git";
- $results{$i}{$tool_name} = {
- real => $real . "s",
- rss => $rss,
- faults => "Maj:$maj / Min:$min",
- inodes => count_inodes($meta),
- size => get_size($meta),
- };
- print " [Commit $i] $tool_name sampled.\n";
- } else {
- system("$cmd > /dev/null 2>&1");
- if ($tool_name eq "URN") { track_rebases(); }
- }
- }
- chdir($base_dir);
- remove_tree($bm_repo);
-}
-
-$rebase_count = 0;
-%last_base_hashes = ();
-run_history_benchmark("URN");
-run_history_benchmark("GIT");
-
-my $out_file = "BM_HISTORY_${files}_${total_commits}.txt";
-open(my $res, '>', $out_file) or die $!;
-print $res "=============================================================\n";
-print $res " HISTORY BENCHMARK: $files files ($total_commits commits)\n";
-print $res "=============================================================\n\n";
-
-foreach my $i (sort { $a <=> $b } keys %results) {
- my $u = $results{$i}{"URN"};
- my $g = $results{$i}{"GIT"};
- print $res "SNAPSHOT: Commit #$i\n";
- print $res "-------------------------------------------------------------\n";
- printf $res "%-15s | %-20s | %-20s\n", "METRIC", "URN", "GIT";
- print $res "----------------+----------------------+---------------------\n";
- printf $res "%-15s | %20s | %20s\n", "Time", $u->{real}, $g->{real};
- printf $res "%-15s | %20s | %20s\n", "Max RSS", $u->{rss}, $g->{rss};
- printf $res "%-15s | %20s | %20s\n", "Page faults", $u->{faults}, $g->{faults};
- printf $res "%-15s | %20s | %20s\n", "Inodes", $u->{inodes}, $g->{inodes};
- printf $res "%-15s | %20s | %20s\n", "Repo size", $u->{size}, $g->{size};
- print $res "-------------------------------------------------------------\n\n";
-}
-print $res "TOTAL URN REBASES: $rebase_count\n";
-close($res);
-
-my $pager = $ENV{PAGER} || 'less';
-exec $pager, $out_file;
-
diff --git a/bm/bm_size.pl b/bm/bm_size.pl
index d21cfd6..d3d10d7 100644
--- a/bm/bm_size.pl
+++ b/bm/bm_size.pl
@@ -13,7 +13,7 @@ if (!defined $files || !defined $depth) {
my $base_dir = getcwd();
my $urn_bin = abs_path(File::Spec->catfile("..", "urn"));
my $seed_bin = abs_path("seed.pl");
-my $bm_repo = "bm_repo";
+my $bm_repo = "sandbox";
my %results;
my @actions = qw(Status Add Commit Status(Clean));
diff --git a/bm/seed.pl b/bm/seed.pl
index 81c70dd..fdf3884 100644
--- a/bm/seed.pl
+++ b/bm/seed.pl
@@ -13,20 +13,53 @@ if (!defined $total_files || !defined $max_depth) {
my $target_root = "sandbox";
my $files_created = 0;
-# Helper to generate random "code-like" text
sub generate_content {
- my $size_kb = 5 + int(rand(21)); # 5-25 KB
- my $bytes = $size_kb * 1024;
+ my $size_kb = 5 + int(rand(21));
+ my $target_bytes = $size_kb * 1024;
my $content = "";
- my @chars = ('a'..'z', 'A'..'Z', '0'..'9');
-
- while (length($content) < $bytes) {
- my $line_len = 20 + int(rand(60));
- $content .= "\t" if rand() > 0.5; # Random indentation
- for (1..$line_len) { $content .= $chars[rand @chars]; }
- $content .= (rand() > 0.7) ? " " : ""; # Random spaces
- $content .= "\n";
+
+ my @types = qw(int char void float double uint32_t bool);
+ my @ops = qw(= += -= == != < >);
+ my @vars = qw(count index status buffer ptr limit offset);
+
+ $content .= "#include <stdio.h>\n#include <stdlib.h>\n\n";
+
+ while (length($content) < $target_bytes) {
+ my $roll = rand();
+
+ if ($roll < 0.15) {
+ # Function definition (no indent)
+ my $type = $types[rand @types];
+ my $name = "process_data_" . int(rand(100));
+ $content .= "\n$type $name() {\n";
+ }
+ elsif ($roll < 0.60) {
+ # Standard assignment (1 tab)
+ my $type = $types[rand @types];
+ my $var = $vars[rand @vars] . "_" . int(rand(50));
+ my $val = int(rand(1000));
+ $content .= "\t$type $var = $val;\n";
+ }
+ elsif ($roll < 0.80) {
+ # If block with nested logic (1-2 tabs)
+ my $var = $vars[rand @vars];
+ $content .= "\tif ($var > " . int(rand(100)) . ") {\n";
+ $content .= "\t\t$var " . $ops[rand @ops] . " 1;\n";
+ $content .= "\t\treturn $var;\n";
+ $content .= "\t}\n";
+ }
+ elsif ($roll < 0.95) {
+ # Closing brace (no indent)
+ $content .= "}\n\n";
+ }
+ else {
+ # Comment line (1 tab)
+ $content .= "\t/* Update state " . int(rand(1000)) . " */\n";
+ }
}
+
+ # Cleanup: Ensure we close the last function block
+ $content .= "\n}\n" unless $content =~ /}\n\s*$/;
return $content;
}
@@ -34,8 +67,6 @@ sub seed_tree {
my ($current_path, $depth, $files_left) = @_;
return if $files_left <= 0;
- # Determine how many files to put in THIS directory
- # Heuristic: Spread them out, but ensure at least 1 if depth remains
my $files_here = ($depth == $max_depth)
? $files_left
: int($files_left / ($max_depth - $depth + 1)) + int(rand(3));
@@ -45,20 +76,20 @@ sub seed_tree {
make_path($current_path) unless -d $current_path;
for (1..$files_here) {
- my $filename = "file_" . sprintf("%04d", ++$files_created) . ".txt";
+ my $filename = "src_" . sprintf("%04d", ++$files_created) . ".c";
my $full_path = File::Spec->catfile($current_path, $filename);
- open(my $fh, '>', $full_path) or die $!;
+ open(my $fh, '>', $full_path) or die "Could not open $full_path: $!";
print $fh generate_content();
close($fh);
}
- # If we have depth and files remaining, go deeper
if ($depth < $max_depth && ($files_left - $files_here) > 0) {
- my $next_dir = File::Spec->catdir($current_path, "depth_" . ($depth + 1));
+ my $next_dir = File::Spec->catdir($current_path, "subdir_" . ($depth + 1));
seed_tree($next_dir, $depth + 1, $files_left - $files_here);
}
}
-print "Seeding $total_files files across $max_depth levels...\n";
+print "Seeding $total_files files (C-style) with tab indents...\n";
seed_tree($target_root, 1, $total_files);
-print "Done. Created $files_created files in '$target_root'.\n";
+print "Success. Created $files_created files in '$target_root'.\n";
+
diff --git a/urn b/urn
index c8ddbce..5fe2f8a 100644
--- a/urn
+++ b/urn
@@ -3,6 +3,8 @@
use strict;
use warnings;
use IO::Handle;
+use IO::Compress::Gzip qw(gzip $GzipError);
+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use File::Path qw(make_path);
use File::Copy qw(copy);
use File::Basename;
@@ -26,6 +28,10 @@ use constant CHUNK_LEN => 8192;
use constant MEM_LIMIT => 64 * 1024 * 1024;
use constant IO_LAYER => ":raw:perlio(layer=" . CHUNK_LEN . ")";
+use constant GZIP_LEVEL => 6;
+use constant GZIP_THRESHOLD => 256;
+use constant REBASE_THRESHOLD => 1.4;
+
Getopt::Long::Configure("bundling");
my $cmd = shift @ARGV // '';
@@ -261,6 +267,8 @@ sub run_commit {
my $use_disk_patch = 0;
my ($pt_fh, $pt_path);
+ my $tar_size = 0;
+
my ($tmp_idx_fh, $tmp_idx_path) = tempfile(DIR => TMP_DIR, UNLINK => 0);
binmode $tmp_idx_fh, ":raw";
@@ -276,26 +284,26 @@ sub run_commit {
$out_c = $out_s;
$out_b = $out_s;
- my $obj_path = get_obj_path($out_b);
my $stg_file = File::Spec->catfile(TMP_DIR, $out_p);
- if (!-e $obj_path) {
- rename($stg_file, $obj_path) or copy($stg_file, $obj_path);
- }
+ store_base_file($stg_file, $out_b);
$idx = $it_idx->();
}
elsif ($cmp == 0) { # Modified or Unchanged
($out_p, $out_s, $out_m, $out_z) = ($idx->{path}, $idx->{s_hash}, $idx->{mtime}, $idx->{size});
if ($idx->{s_hash} ne ($idx->{c_hash} // "-")) {
- my $base_obj = get_obj_path($old->{hash});
+ my $base_obj = get_base_file($old->{hash});
my $stg_file = File::Spec->catfile(TMP_DIR, $out_p);
my $patch = (-T $stg_file)
? qx(diff '$base_obj' '$stg_file')
: make_bin_patch($stg_file, $base_obj);
- if (defined $patch && length($patch) <= $out_z) {
- if (!$use_disk_patch && ($patch_mem_size + length($patch)) > MEM_LIMIT) {
+ my $patch_len = length($patch);
+ $tar_size += $patch_len;
+
+ if (defined $patch && $patch_len <= ($out_z * REBASE_THRESHOLD)) {
+ if (!$use_disk_patch && ($patch_mem_size + $patch_len) > MEM_LIMIT) {
($pt_fh, $pt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0);
my $tar = Archive::Tar->new;
$tar->add_data($_, $patches{$_}) for keys %patches;
@@ -309,16 +317,13 @@ sub run_commit {
$tar->write($pt_path);
} else {
$patches{"$out_p.patch"} = $patch;
- $patch_mem_size += length($patch);
+ $patch_mem_size += $patch_len;
}
$out_b = $old->{hash};
unlink($stg_file);
} else {
$out_b = $out_s;
- my $obj_path = get_obj_path($out_b);
- if (!-e $obj_path) {
- rename($stg_file, $obj_path) or copy($stg_file, $obj_path);
- }
+ store_object_file($stg_file, $out_b);
}
$out_c = $out_s;
} else {
@@ -364,7 +369,7 @@ sub run_commit {
my $tar = Archive::Tar->new;
if ($use_disk_patch) { $tar->read($pt_path); unlink $pt_path; }
$tar->add_data($_, $patches{$_}) for keys %patches;
- $tar->write($bundle_tmp, COMPRESS_GZIP);
+ $tar->write($bundle_tmp, ($tar_size >= GZIP_THRESHOLD ? GZIP_LEVEL : 0));
$patch_bundle_hash = hash_file_content($bundle_tmp);
rename($bundle_tmp, get_obj_path($patch_bundle_hash));
}
@@ -437,11 +442,9 @@ sub run_show {
open(my $pipe, "| $pager") or die "Can't pipe to $pager: $!";
my $old_fh = select($pipe);
- # 2. Use existing logic to get the content
my $v = get_file_version($rev_id, $file_path);
die "Error: Could not resolve '$file_path' at revision $rev_id.\n" unless defined $v;
- # 3. Output content
if (ref($v) eq 'SCALAR') {
binmode STDOUT, ":raw";
print $$v;
@@ -493,7 +496,7 @@ sub run_diff {
}
}
} else {
- # Full Tree Walk
+ # Full tree walk
my $s_id = (lc($src // '') eq 'head') ? read_head() : ($src // '');
my $th; if (open my $rf, '<', File::Spec->catfile(REV_DIR, $s_id)) {
while (<$rf>) { $th = $1 if /^tree:(.*)$/ } close $rf;
@@ -815,7 +818,7 @@ sub is_revision {
sub get_file_version {
my ($source, $path) = @_;
- # Handle Workspace
+ # Handle workspace
if (!defined $source) {
return undef unless -e $path;
if ((-s $path // 0) > MEM_LIMIT) {
@@ -828,7 +831,7 @@ sub get_file_version {
return \$data;
}
- # Resolve Revision
+ # Resolve revision
my $rev_id = (lc($source) eq 'head') ? read_head() : $source;
my $rev_file = File::Spec->catfile(REV_DIR, $rev_id);
return undef unless -f $rev_file;
@@ -852,10 +855,10 @@ sub get_file_version {
}
return undef unless $node;
- my $obj_path = get_obj_path($node->{hash} // '');
+ my $obj_path = get_base_file($node->{hash});
return undef unless -f $obj_path;
- # Extract Base Object to Temp File
+ # Extract base object to temp file
my ($tfh, $tpath) = tempfile(DIR => TMP_DIR, UNLINK => 1);
binmode $tfh, IO_LAYER;
@@ -863,9 +866,9 @@ sub get_file_version {
binmode $ofh, IO_LAYER;
while (read($ofh, my $buf, CHUNK_LEN)) { print $tfh $buf; }
close $ofh;
- close $tfh;
+ close $tfh;
- # Apply Patch via Streaming Bundle
+ # Apply patch via streaming bundle
if ($patch_bundle_hash) {
my $bundle_path = get_obj_path($patch_bundle_hash);
if (-f $bundle_path && -s $bundle_path > 0) {
@@ -891,7 +894,7 @@ sub get_file_version {
}
}
- # Final Output decision based on result size
+ # Final output decision based on result size
if ((-s $tpath // 0) > MEM_LIMIT) {
return $tpath;
} else {
@@ -900,3 +903,45 @@ sub get_file_version {
}
}
+sub get_base_file {
+ my ($obj_hash) = @_;
+ my $obj_path = get_obj_path($obj_hash);
+ return undef unless -f $obj_path;
+
+ # Peek at the first two bytes for GZIP header
+ open my $fh, '<:raw', $obj_path or return $obj_path;
+ read($fh, my $header, 2);
+ close $fh;
+
+ # If it's not gzipped, return the original path in the object store
+ return $obj_path unless (defined $header && $header eq "\x1f\x8b");
+
+ # If gzipped, decompress to a temporary file in the staging area
+ my ($tfh, $tpath) = tempfile(DIR => TMP_DIR, UNLINK => 1);
+ close $tfh;
+
+ gunzip $obj_path => $tpath
+ or die "Failed to decompress base $obj_path: $GunzipError";
+
+ return $tpath;
+}
+
+sub store_base_file {
+ my ($stg_file, $obj_hash) = @_;
+ my $obj_path = get_obj_path($obj_hash);
+
+ # If the object already exists, we don't need to do anything
+ return $obj_path if -e $obj_path;
+
+ if (!-l $stg_file && -s $stg_file > GZIP_THRESHOLD) {
+ gzip $stg_file => $obj_path, Level => GZIP_LEVEL
+ or die "gzip failed for $obj_path: $GzipError";
+ unlink($stg_file);
+ }
+ else {
+ rename($stg_file, $obj_path) or copy($stg_file, $obj_path);
+ }
+
+ return $obj_path;
+}
+