diff options
| author | Sadeep Madurange <sadeep@asciimx.com> | 2026-04-29 14:05:46 +0800 |
|---|---|---|
| committer | Sadeep Madurange <sadeep@asciimx.com> | 2026-04-29 14:05:46 +0800 |
| commit | 1b64a87816c06526229f9dc03b12f1092541ae59 (patch) | |
| tree | 354deb4fa8cfc2ec54e1ddfca5eca6422cfce4f9 | |
| parent | 49ae7748e4a95afa1fd9d08f4886952dfc1deca4 (diff) | |
| download | urn-1b64a87816c06526229f9dc03b12f1092541ae59.tar.gz | |
Fix mtime change <1s bug.
| -rw-r--r-- | bm/BM_HISTORY_1000_100.txt | 30 | ||||
| -rw-r--r-- | bm/BM_REBASE_1000_100.txt | 33 | ||||
| -rw-r--r-- | bm/bm_rebase.pl | 83 | ||||
| -rw-r--r-- | bm/seed.pl | 2 | ||||
| -rw-r--r-- | urn | 15 |
5 files changed, 92 insertions, 71 deletions
diff --git a/bm/BM_HISTORY_1000_100.txt b/bm/BM_HISTORY_1000_100.txt index a31c179..2885405 100644 --- a/bm/BM_HISTORY_1000_100.txt +++ b/bm/BM_HISTORY_1000_100.txt @@ -6,55 +6,55 @@ SNAPSHOT: Commit #20 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.43s | 0.08s +Time | 0.35s | 0.09s Max RSS | 0.02 MB | 0.01 MB Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 -Inodes | 1299 | 2113 -Repo size | 19384 KB | 22056 KB +Inodes | 1301 | 2121 +Repo size | 18868 KB | 22076 KB ------------------------------------------------------------- SNAPSHOT: Commit #40 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.42s | 0.11s +Time | 0.46s | 0.11s Max RSS | 0.02 MB | 0.01 MB Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 -Inodes | 1339 | 2908 -Repo size | 19544 KB | 29020 KB +Inodes | 1341 | 2929 +Repo size | 19028 KB | 29136 KB ------------------------------------------------------------- SNAPSHOT: Commit #60 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.42s | 0.10s +Time | 0.43s | 0.12s Max RSS | 0.02 MB | 0.01 MB Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 -Inodes | 1379 | 3711 -Repo size | 19704 KB | 35920 KB +Inodes | 1381 | 3732 +Repo size | 19188 KB | 36088 KB ------------------------------------------------------------- SNAPSHOT: Commit #80 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.41s | 0.11s +Time | 0.45s | 0.08s Max RSS | 0.02 MB | 0.01 MB Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 -Inodes | 1419 | 4520 -Repo size | 19864 KB | 43176 KB +Inodes | 1421 | 4538 +Repo size | 19348 KB | 43104 KB ------------------------------------------------------------- SNAPSHOT: Commit #100 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.42s | 0.11s +Time | 0.44s | 0.10s Max RSS | 0.02 MB | 0.01 MB Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 -Inodes | 1459 | 5324 -Repo size | 20024 KB | 50232 KB +Inodes | 1462 | 5346 +Repo size | 19512 KB | 49980 KB ------------------------------------------------------------- TOTAL URN REBASES: 0 diff --git a/bm/BM_REBASE_1000_100.txt b/bm/BM_REBASE_1000_100.txt index 7875767..7550570 100644 --- a/bm/BM_REBASE_1000_100.txt +++ b/bm/BM_REBASE_1000_100.txt @@ -1,62 +1,61 @@ ============================================================= REBASE BENCHMARK: 1000 files (100 commits) CONDITIONS: Depth=2, Files Mod=5%, Change=50% - INITIAL RAW DATA SIZE: 16976 KB ============================================================= SNAPSHOT: Commit #20 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.29s | 0.05s +Time | 0.65s | 0.08s Max RSS | 0.02 MB | 0.01 MB Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 -Inodes | 1578 | 2334 -Repo size | 20404 KB | 19380 KB +Inodes | 2272 | 2282 +Repo size | 38504 KB | 22700 KB ------------------------------------------------------------- SNAPSHOT: Commit #40 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.54s | 0.05s +Time | 0.59s | 0.08s Max RSS | 0.02 MB | 0.01 MB Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 -Inodes | 1607 | 3374 -Repo size | 20520 KB | 23788 KB +Inodes | 3332 | 3322 +Repo size | 59384 KB | 31188 KB ------------------------------------------------------------- SNAPSHOT: Commit #60 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.31s | 0.05s +Time | 0.57s | 0.08s Max RSS | 0.02 MB | 0.01 MB Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 -Inodes | 1635 | 4414 -Repo size | 20632 KB | 28196 KB +Inodes | 4392 | 4362 +Repo size | 80264 KB | 39676 KB ------------------------------------------------------------- SNAPSHOT: Commit #80 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.29s | 0.05s +Time | 0.57s | 0.08s Max RSS | 0.02 MB | 0.01 MB Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 -Inodes | 1664 | 5454 -Repo size | 20748 KB | 32596 KB +Inodes | 5452 | 5402 +Repo size | 101144 KB | 48156 KB ------------------------------------------------------------- SNAPSHOT: Commit #100 ------------------------------------------------------------- METRIC | URN | GIT ----------------+----------------------+--------------------- -Time | 0.54s | 0.10s +Time | 0.57s | 0.08s Max RSS | 0.02 MB | 0.01 MB Page faults | Maj:0 / Min:0 | Maj:0 / Min:0 -Inodes | 1693 | 6495 -Repo size | 20864 KB | 37008 KB +Inodes | 6512 | 6442 +Repo size | 122024 KB | 56644 KB ------------------------------------------------------------- -TOTAL URN REBASES: 273 +TOTAL URN REBASES: 4950 diff --git a/bm/bm_rebase.pl b/bm/bm_rebase.pl index 3cb0fd5..e89e7f5 100644 --- a/bm/bm_rebase.pl +++ b/bm/bm_rebase.pl @@ -1,10 +1,13 @@ #!/usr/bin/perl use strict; use warnings; +use File::Copy; use File::Spec; -use File::Path qw(remove_tree); +use File::Path qw(remove_tree make_path); use File::Find; +use File::Basename; use Cwd qw(getcwd abs_path); +use Time::HiRes qw(time); my ($files, $depth, $total_commits, $file_perc, $change_perc) = @ARGV; if (!defined $files || !defined $depth || !defined $total_commits || !defined $file_perc || !defined $change_perc) { @@ -14,15 +17,16 @@ if (!defined $files || !defined $depth || !defined $total_commits || !defined $f my $base_dir = getcwd(); my $urn_bin = abs_path(File::Spec->catfile("..", "urn")); my $seed_bin = abs_path("seed.pl"); -my $bm_repo = "bm_repo"; +my $bm_repo = "sandbox"; my $sample_rate = int($total_commits / 5) || 1; my %results; my $initial_repo_size = "0 KB"; - my $rebase_count = 0; my %last_base_hashes; +my $global_tick = 0; + sub get_size { my $dir = shift; return "0 KB" unless -d $dir; @@ -62,10 +66,7 @@ sub run_rebase_benchmark { remove_tree($bm_repo) if -d $bm_repo; system("perl $seed_bin $files $depth > /dev/null 2>&1"); - # Capture raw directory size before VCS initialization - if ($initial_repo_size eq "0 KB") { - $initial_repo_size = get_size($bm_repo); - } + if ($initial_repo_size eq "0 KB") { $initial_repo_size = get_size($bm_repo); } chdir($bm_repo) or die $!; @@ -84,19 +85,37 @@ sub run_rebase_benchmark { my $num_to_change = int(scalar(@file_list) * ($file_perc / 100)) || 1; my @target_files = @file_list[0 .. $num_to_change - 1]; - for my $i (1 .. $total_commits) { - for my $target (@target_files) { - open(my $fh, '<', $target) or next; - my @lines = <$fh>; - close($fh); + for my $i (2 .. $total_commits) { + my $debug_dir = "/tmp/urn/commit_$i"; + make_path($debug_dir) unless -d $debug_dir; - my $lines_to_change = int(scalar(@lines) * ($change_perc / 100)) || 1; - splice(@lines, 0, $lines_to_change, "Commit $i: block change\n"); - - if (open(my $out, '>', $target)) { - print $out @lines; - close($out); - } + for my $target (@target_files) { + open(my $fh, '<:raw', $target) or die "Read fail: $target - $!"; + my $content = do { local $/; <$fh> }; + close($fh); + + $global_tick++; + my $ts = time(); + my $header = "C$i-T$ts-N$global_tick "; + + my $total_bytes = length($content); + my $min_len = length($header); + my $to_change = int($total_bytes * ($change_perc / 100)); + + $to_change = $min_len if $to_change < $min_len; + $to_change = $total_bytes if $to_change > $total_bytes; + + my $new_segment = substr($header . ("." x $to_change), 0, $to_change); + substr($content, 0, $to_change, $new_segment); + + open(my $out, '>:raw', $target) or die "Write fail: $target - $!"; + print $out $content; + close($out); + + my $target_name = basename($target); + copy($target, "$debug_dir/$target_name") or warn "Backup failed: $!"; + + utime(undef, undef, $target); } my $cmd = ($tool_name eq "URN") @@ -104,14 +123,17 @@ sub run_rebase_benchmark { : "git add . && git commit -m 'c$i'"; if ($i % $sample_rate == 0 || $i == $total_commits) { - my $stats = `/usr/bin/time -l sh -c "$cmd" 2>&1 > /dev/null`; - my ($real, $rss, $maj, $min) = (0, 0, 0, 0); - if ($stats =~ /(\d+\.\d+)\s+real/) { $real = $1; } - if ($stats =~ /(\d+)\s+maximum resident set size/) { $rss = sprintf("%.2f MB", $1 / 1024 / 1024); } - if ($stats =~ /(\d+)\s+page reclaims/) { $min = $1; } - if ($stats =~ /(\d+)\s+page faults/) { $maj = $1; } + my $raw_output = `/usr/bin/time -l sh -c "$cmd" 2>&1`; + + if ($tool_name eq "URN") { + track_rebases(); + } - if ($tool_name eq "URN") { track_rebases(); } + my ($real, $rss, $maj, $min) = (0, 0, 0, 0); + $real = $1 if $raw_output =~ /(\d+\.\d+)\s+real/; + $rss = sprintf("%.2f MB", $1 / 1024 / 1024) if $raw_output =~ /(\d+)\s+maximum resident set size/; + $min = $1 if $raw_output =~ /(\d+)\s+page reclaims/; + $maj = $1 if $raw_output =~ /(\d+)\s+page faults/; my $meta = ($tool_name eq "URN") ? ".urn" : ".git"; $results{$i}{$tool_name} = { @@ -124,11 +146,10 @@ sub run_rebase_benchmark { print " [Commit $i] $tool_name sampled.\n"; } else { system("$cmd > /dev/null 2>&1"); - if ($tool_name eq "URN") { track_rebases(); } + track_rebases() if $tool_name eq "URN"; } } chdir($base_dir); - remove_tree($bm_repo); } $rebase_count = 0; @@ -141,7 +162,6 @@ open(my $res, '>', $out_file) or die $!; print $res "=============================================================\n"; print $res " REBASE BENCHMARK: $files files ($total_commits commits)\n"; print $res " CONDITIONS: Depth=$depth, Files Mod=$file_perc%, Change=$change_perc%\n"; -print $res " INITIAL RAW DATA SIZE: $initial_repo_size\n"; print $res "=============================================================\n\n"; foreach my $i (sort { $a <=> $b } keys %results) { @@ -158,10 +178,7 @@ foreach my $i (sort { $a <=> $b } keys %results) { printf $res "%-15s | %20s | %20s\n", "Repo size", $u->{size}, $g->{size}; print $res "-------------------------------------------------------------\n\n"; } - print $res "TOTAL URN REBASES: $rebase_count\n"; close($res); -my $pager = $ENV{PAGER} || 'less'; -exec $pager, $out_file; - +exec "less $out_file"; @@ -10,7 +10,7 @@ if (!defined $total_files || !defined $max_depth) { die "Usage: perl seed.pl <total_files> <max_depth>\nExample: perl seed.pl 200 20\n"; } -my $target_root = "bm_repo"; +my $target_root = "sandbox"; my $files_created = 0; # Helper to generate random "code-like" text @@ -178,19 +178,24 @@ sub run_add { : $idx_entry->{path} cmp $wrk_entry->{path}; if ($cmp == 0) { + my $idx_mtime = (stat(INDEX))[9]; # Needed if a mtime change <1s if ($idx_entry->{mtime} == $wrk_entry->{mtime} && - $idx_entry->{size} == $wrk_entry->{size}) { + $idx_entry->{size} == $wrk_entry->{size} && + $idx_entry->{mtime} != $idx_mtime) { # No change: Preserve all 3 hashes and metadata printf $out "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n", $idx_entry->{s_hash}, $idx_entry->{c_hash}, $idx_entry->{b_hash}, $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}; } else { my $p = $wrk_entry->{path}; + my $last_hash = $idx_entry->{c_hash}; my $current_hash = hash_file_content($p); - my $stg_path = File::Spec->catfile(TMP_DIR, $p); - make_path(dirname($stg_path)); - - (-l $p) ? symlink(readlink($p), $stg_path) : copy($p, $stg_path); + + if ($last_hash ne $current_hash) { + my $stg_path = File::Spec->catfile(TMP_DIR, $p); + make_path(dirname($stg_path)); + (-l $p) ? symlink(readlink($p), $stg_path) : copy($p, $stg_path); + } # Update staged hash, preserve committed and base hashes printf $out "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n", |
