summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSadeep Madurange <sadeep@asciimx.com>2026-04-29 14:05:46 +0800
committerSadeep Madurange <sadeep@asciimx.com>2026-04-29 14:05:46 +0800
commit1b64a87816c06526229f9dc03b12f1092541ae59 (patch)
tree354deb4fa8cfc2ec54e1ddfca5eca6422cfce4f9
parent49ae7748e4a95afa1fd9d08f4886952dfc1deca4 (diff)
downloadurn-1b64a87816c06526229f9dc03b12f1092541ae59.tar.gz
Fix mtime change <1s bug.
-rw-r--r--bm/BM_HISTORY_1000_100.txt30
-rw-r--r--bm/BM_REBASE_1000_100.txt33
-rw-r--r--bm/bm_rebase.pl83
-rw-r--r--bm/seed.pl2
-rw-r--r--urn15
5 files changed, 92 insertions, 71 deletions
diff --git a/bm/BM_HISTORY_1000_100.txt b/bm/BM_HISTORY_1000_100.txt
index a31c179..2885405 100644
--- a/bm/BM_HISTORY_1000_100.txt
+++ b/bm/BM_HISTORY_1000_100.txt
@@ -6,55 +6,55 @@ SNAPSHOT: Commit #20
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.43s | 0.08s
+Time | 0.35s | 0.09s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1299 | 2113
-Repo size | 19384 KB | 22056 KB
+Inodes | 1301 | 2121
+Repo size | 18868 KB | 22076 KB
-------------------------------------------------------------
SNAPSHOT: Commit #40
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.42s | 0.11s
+Time | 0.46s | 0.11s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1339 | 2908
-Repo size | 19544 KB | 29020 KB
+Inodes | 1341 | 2929
+Repo size | 19028 KB | 29136 KB
-------------------------------------------------------------
SNAPSHOT: Commit #60
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.42s | 0.10s
+Time | 0.43s | 0.12s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1379 | 3711
-Repo size | 19704 KB | 35920 KB
+Inodes | 1381 | 3732
+Repo size | 19188 KB | 36088 KB
-------------------------------------------------------------
SNAPSHOT: Commit #80
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.41s | 0.11s
+Time | 0.45s | 0.08s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1419 | 4520
-Repo size | 19864 KB | 43176 KB
+Inodes | 1421 | 4538
+Repo size | 19348 KB | 43104 KB
-------------------------------------------------------------
SNAPSHOT: Commit #100
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.42s | 0.11s
+Time | 0.44s | 0.10s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1459 | 5324
-Repo size | 20024 KB | 50232 KB
+Inodes | 1462 | 5346
+Repo size | 19512 KB | 49980 KB
-------------------------------------------------------------
TOTAL URN REBASES: 0
diff --git a/bm/BM_REBASE_1000_100.txt b/bm/BM_REBASE_1000_100.txt
index 7875767..7550570 100644
--- a/bm/BM_REBASE_1000_100.txt
+++ b/bm/BM_REBASE_1000_100.txt
@@ -1,62 +1,61 @@
=============================================================
REBASE BENCHMARK: 1000 files (100 commits)
CONDITIONS: Depth=2, Files Mod=5%, Change=50%
- INITIAL RAW DATA SIZE: 16976 KB
=============================================================
SNAPSHOT: Commit #20
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.29s | 0.05s
+Time | 0.65s | 0.08s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1578 | 2334
-Repo size | 20404 KB | 19380 KB
+Inodes | 2272 | 2282
+Repo size | 38504 KB | 22700 KB
-------------------------------------------------------------
SNAPSHOT: Commit #40
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.54s | 0.05s
+Time | 0.59s | 0.08s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1607 | 3374
-Repo size | 20520 KB | 23788 KB
+Inodes | 3332 | 3322
+Repo size | 59384 KB | 31188 KB
-------------------------------------------------------------
SNAPSHOT: Commit #60
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.31s | 0.05s
+Time | 0.57s | 0.08s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1635 | 4414
-Repo size | 20632 KB | 28196 KB
+Inodes | 4392 | 4362
+Repo size | 80264 KB | 39676 KB
-------------------------------------------------------------
SNAPSHOT: Commit #80
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.29s | 0.05s
+Time | 0.57s | 0.08s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1664 | 5454
-Repo size | 20748 KB | 32596 KB
+Inodes | 5452 | 5402
+Repo size | 101144 KB | 48156 KB
-------------------------------------------------------------
SNAPSHOT: Commit #100
-------------------------------------------------------------
METRIC | URN | GIT
----------------+----------------------+---------------------
-Time | 0.54s | 0.10s
+Time | 0.57s | 0.08s
Max RSS | 0.02 MB | 0.01 MB
Page faults | Maj:0 / Min:0 | Maj:0 / Min:0
-Inodes | 1693 | 6495
-Repo size | 20864 KB | 37008 KB
+Inodes | 6512 | 6442
+Repo size | 122024 KB | 56644 KB
-------------------------------------------------------------
-TOTAL URN REBASES: 273
+TOTAL URN REBASES: 4950
diff --git a/bm/bm_rebase.pl b/bm/bm_rebase.pl
index 3cb0fd5..e89e7f5 100644
--- a/bm/bm_rebase.pl
+++ b/bm/bm_rebase.pl
@@ -1,10 +1,13 @@
#!/usr/bin/perl
use strict;
use warnings;
+use File::Copy;
use File::Spec;
-use File::Path qw(remove_tree);
+use File::Path qw(remove_tree make_path);
use File::Find;
+use File::Basename;
use Cwd qw(getcwd abs_path);
+use Time::HiRes qw(time);
my ($files, $depth, $total_commits, $file_perc, $change_perc) = @ARGV;
if (!defined $files || !defined $depth || !defined $total_commits || !defined $file_perc || !defined $change_perc) {
@@ -14,15 +17,16 @@ if (!defined $files || !defined $depth || !defined $total_commits || !defined $f
my $base_dir = getcwd();
my $urn_bin = abs_path(File::Spec->catfile("..", "urn"));
my $seed_bin = abs_path("seed.pl");
-my $bm_repo = "bm_repo";
+my $bm_repo = "sandbox";
my $sample_rate = int($total_commits / 5) || 1;
my %results;
my $initial_repo_size = "0 KB";
-
my $rebase_count = 0;
my %last_base_hashes;
+my $global_tick = 0;
+
sub get_size {
my $dir = shift;
return "0 KB" unless -d $dir;
@@ -62,10 +66,7 @@ sub run_rebase_benchmark {
remove_tree($bm_repo) if -d $bm_repo;
system("perl $seed_bin $files $depth > /dev/null 2>&1");
- # Capture raw directory size before VCS initialization
- if ($initial_repo_size eq "0 KB") {
- $initial_repo_size = get_size($bm_repo);
- }
+ if ($initial_repo_size eq "0 KB") { $initial_repo_size = get_size($bm_repo); }
chdir($bm_repo) or die $!;
@@ -84,19 +85,37 @@ sub run_rebase_benchmark {
my $num_to_change = int(scalar(@file_list) * ($file_perc / 100)) || 1;
my @target_files = @file_list[0 .. $num_to_change - 1];
- for my $i (1 .. $total_commits) {
- for my $target (@target_files) {
- open(my $fh, '<', $target) or next;
- my @lines = <$fh>;
- close($fh);
+ for my $i (2 .. $total_commits) {
+ my $debug_dir = "/tmp/urn/commit_$i";
+ make_path($debug_dir) unless -d $debug_dir;
- my $lines_to_change = int(scalar(@lines) * ($change_perc / 100)) || 1;
- splice(@lines, 0, $lines_to_change, "Commit $i: block change\n");
-
- if (open(my $out, '>', $target)) {
- print $out @lines;
- close($out);
- }
+ for my $target (@target_files) {
+ open(my $fh, '<:raw', $target) or die "Read fail: $target - $!";
+ my $content = do { local $/; <$fh> };
+ close($fh);
+
+ $global_tick++;
+ my $ts = time();
+ my $header = "C$i-T$ts-N$global_tick ";
+
+ my $total_bytes = length($content);
+ my $min_len = length($header);
+ my $to_change = int($total_bytes * ($change_perc / 100));
+
+ $to_change = $min_len if $to_change < $min_len;
+ $to_change = $total_bytes if $to_change > $total_bytes;
+
+ my $new_segment = substr($header . ("." x $to_change), 0, $to_change);
+ substr($content, 0, $to_change, $new_segment);
+
+ open(my $out, '>:raw', $target) or die "Write fail: $target - $!";
+ print $out $content;
+ close($out);
+
+ my $target_name = basename($target);
+ copy($target, "$debug_dir/$target_name") or warn "Backup failed: $!";
+
+ utime(undef, undef, $target);
}
my $cmd = ($tool_name eq "URN")
@@ -104,14 +123,17 @@ sub run_rebase_benchmark {
: "git add . && git commit -m 'c$i'";
if ($i % $sample_rate == 0 || $i == $total_commits) {
- my $stats = `/usr/bin/time -l sh -c "$cmd" 2>&1 > /dev/null`;
- my ($real, $rss, $maj, $min) = (0, 0, 0, 0);
- if ($stats =~ /(\d+\.\d+)\s+real/) { $real = $1; }
- if ($stats =~ /(\d+)\s+maximum resident set size/) { $rss = sprintf("%.2f MB", $1 / 1024 / 1024); }
- if ($stats =~ /(\d+)\s+page reclaims/) { $min = $1; }
- if ($stats =~ /(\d+)\s+page faults/) { $maj = $1; }
+ my $raw_output = `/usr/bin/time -l sh -c "$cmd" 2>&1`;
+
+ if ($tool_name eq "URN") {
+ track_rebases();
+ }
- if ($tool_name eq "URN") { track_rebases(); }
+ my ($real, $rss, $maj, $min) = (0, 0, 0, 0);
+ $real = $1 if $raw_output =~ /(\d+\.\d+)\s+real/;
+ $rss = sprintf("%.2f MB", $1 / 1024 / 1024) if $raw_output =~ /(\d+)\s+maximum resident set size/;
+ $min = $1 if $raw_output =~ /(\d+)\s+page reclaims/;
+ $maj = $1 if $raw_output =~ /(\d+)\s+page faults/;
my $meta = ($tool_name eq "URN") ? ".urn" : ".git";
$results{$i}{$tool_name} = {
@@ -124,11 +146,10 @@ sub run_rebase_benchmark {
print " [Commit $i] $tool_name sampled.\n";
} else {
system("$cmd > /dev/null 2>&1");
- if ($tool_name eq "URN") { track_rebases(); }
+ track_rebases() if $tool_name eq "URN";
}
}
chdir($base_dir);
- remove_tree($bm_repo);
}
$rebase_count = 0;
@@ -141,7 +162,6 @@ open(my $res, '>', $out_file) or die $!;
print $res "=============================================================\n";
print $res " REBASE BENCHMARK: $files files ($total_commits commits)\n";
print $res " CONDITIONS: Depth=$depth, Files Mod=$file_perc%, Change=$change_perc%\n";
-print $res " INITIAL RAW DATA SIZE: $initial_repo_size\n";
print $res "=============================================================\n\n";
foreach my $i (sort { $a <=> $b } keys %results) {
@@ -158,10 +178,7 @@ foreach my $i (sort { $a <=> $b } keys %results) {
printf $res "%-15s | %20s | %20s\n", "Repo size", $u->{size}, $g->{size};
print $res "-------------------------------------------------------------\n\n";
}
-
print $res "TOTAL URN REBASES: $rebase_count\n";
close($res);
-my $pager = $ENV{PAGER} || 'less';
-exec $pager, $out_file;
-
+exec "less $out_file";
diff --git a/bm/seed.pl b/bm/seed.pl
index 7649b64..81c70dd 100644
--- a/bm/seed.pl
+++ b/bm/seed.pl
@@ -10,7 +10,7 @@ if (!defined $total_files || !defined $max_depth) {
die "Usage: perl seed.pl <total_files> <max_depth>\nExample: perl seed.pl 200 20\n";
}
-my $target_root = "bm_repo";
+my $target_root = "sandbox";
my $files_created = 0;
# Helper to generate random "code-like" text
diff --git a/urn b/urn
index 069b695..c8ddbce 100644
--- a/urn
+++ b/urn
@@ -178,19 +178,24 @@ sub run_add {
: $idx_entry->{path} cmp $wrk_entry->{path};
if ($cmp == 0) {
+ my $idx_mtime = (stat(INDEX))[9]; # Needed if a mtime change <1s
if ($idx_entry->{mtime} == $wrk_entry->{mtime} &&
- $idx_entry->{size} == $wrk_entry->{size}) {
+ $idx_entry->{size} == $wrk_entry->{size} &&
+ $idx_entry->{mtime} != $idx_mtime) {
# No change: Preserve all 3 hashes and metadata
printf $out "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n",
$idx_entry->{s_hash}, $idx_entry->{c_hash}, $idx_entry->{b_hash},
$idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path};
} else {
my $p = $wrk_entry->{path};
+ my $last_hash = $idx_entry->{c_hash};
my $current_hash = hash_file_content($p);
- my $stg_path = File::Spec->catfile(TMP_DIR, $p);
- make_path(dirname($stg_path));
-
- (-l $p) ? symlink(readlink($p), $stg_path) : copy($p, $stg_path);
+
+ if ($last_hash ne $current_hash) {
+ my $stg_path = File::Spec->catfile(TMP_DIR, $p);
+ make_path(dirname($stg_path));
+ (-l $p) ? symlink(readlink($p), $stg_path) : copy($p, $stg_path);
+ }
# Update staged hash, preserve committed and base hashes
printf $out "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n",