From 72dbe9ecd1bb5ba0ac938bb49d817b2f89d55a3f Mon Sep 17 00:00:00 2001 From: Sadeep Madurange Date: Sat, 18 Apr 2026 16:00:09 +0800 Subject: Track last committed hash of files in index. --- vcx | 143 ++++++++++++++++++++++++-------------------------------------------- 1 file changed, 51 insertions(+), 92 deletions(-) diff --git a/vcx b/vcx index f30e1f1..f1772c9 100644 --- a/vcx +++ b/vcx @@ -149,8 +149,7 @@ sub run_add { my $it_idx = stream_index(); my $it_wrk = stream_tree(@paths); - open(my $out, ">:raw", $tmp_idx) - or die "Could not create $tmp_idx: $!"; + open(my $out, ">:raw", $tmp_idx) or die "Could not create $tmp_idx: $!"; my $idx_entry = $it_idx->(); my $wrk_entry = $it_wrk->(); @@ -160,63 +159,40 @@ sub run_add { : !defined $wrk_entry ? -1 : $idx_entry->{path} cmp $wrk_entry->{path}; - my $path = ($cmp <= 0) ? $idx_entry->{path} : $wrk_entry->{path}; + my ($s, $c, $b, $m, $z, $p); + if ($cmp == 0) { - if ($idx_entry->{mtime} == $wrk_entry->{mtime} && - $idx_entry->{size} == $wrk_entry->{size}) { - # No change - print $out join("\t", $idx_entry->{s_hash}, - $idx_entry->{b_hash}, $idx_entry->{mtime}, - $idx_entry->{size}, $idx_entry->{path}) . "\n"; + ($p, $m, $z) = ($wrk_entry->{path}, $wrk_entry->{mtime}, $wrk_entry->{size}); + $c = $idx_entry->{c_hash} // "-"; + $b = $idx_entry->{b_hash} // "-"; + + if ($idx_entry->{mtime} == $wrk_entry->{mtime} && $idx_entry->{size} == $wrk_entry->{size}) { + $s = $idx_entry->{s_hash}; } else { - my $current_hash = hash_file_content($wrk_entry->{path}); - # Snapshot to staging area - my $stg_path = File::Spec->catfile(TMP_DIR, $wrk_entry->{path}); + $s = hash_file_content($p); + my $stg_path = File::Spec->catfile(TMP_DIR, $p); make_path(dirname($stg_path)); - - if (!-l $wrk_entry->{path}) { - copy($wrk_entry->{path}, $stg_path) or die "Failed to stage file: $!"; - } else { - my $link_target = readlink($wrk_entry->{path}); - symlink($link_target, $stg_path) or die "Failed to stage symlink: $!"; - } - - if ($current_hash eq $idx_entry->{s_hash}) { - print $out join("\t", $idx_entry->{s_hash}, - $idx_entry->{b_hash}, $wrk_entry->{mtime}, - $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; - } else { - print $out join("\t", $current_hash, $idx_entry->{b_hash}, - $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; - } + (-l $p) ? symlink(readlink($p), $stg_path) : copy($p, $stg_path); } $idx_entry = $it_idx->(); $wrk_entry = $it_wrk->(); } elsif ($cmp > 0) { - # New File: hash and snapshot to staging - my $hash = hash_file_content($wrk_entry->{path}); - my $stg_path = File::Spec->catfile(TMP_DIR, $wrk_entry->{path}); + ($p, $m, $z) = ($wrk_entry->{path}, $wrk_entry->{mtime}, $wrk_entry->{size}); + $s = hash_file_content($p); + ($c, $b) = ("-", "-"); + + my $stg_path = File::Spec->catfile(TMP_DIR, $p); make_path(dirname($stg_path)); - - if (!-l $wrk_entry->{path}) { - copy($wrk_entry->{path}, $stg_path) or die "Failed to stage file: $!"; - } else { - my $link_target = readlink($wrk_entry->{path}); - symlink($link_target, $stg_path) or die "Failed to stage symlink: $!"; - } - - print $out join("\t", $hash, $hash, $wrk_entry->{mtime}, - $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; + (-l $p) ? symlink(readlink($p), $stg_path) : copy($p, $stg_path); $wrk_entry = $it_wrk->(); } else { - # File in index but missing from disk - print $out join("\t", $idx_entry->{s_hash}, - $idx_entry->{b_hash}, $idx_entry->{mtime}, - $idx_entry->{size}, $idx_entry->{path}) . "\n"; + ($s, $c, $b, $m, $z, $p) = ($idx_entry->{s_hash}, $idx_entry->{c_hash}, $idx_entry->{b_hash}, $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}); $idx_entry = $it_idx->(); } + + printf $out "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n", $s, $c, $b, $m, $z, $p; } close $out; @@ -264,30 +240,24 @@ sub run_commit { while ($idx || $old) { my $cmp = !defined $idx ? 1 : !defined $old ? -1 : $idx->{path} cmp $old->{path}; - my ($out_s, $out_b, $out_m, $out_z, $out_p); + my ($out_s, $out_c, $out_b, $out_m, $out_z, $out_p); if ($cmp < 0) { # New file - $out_p = $idx->{path}; - $out_s = $idx->{s_hash}; - $out_b = $idx->{s_hash}; - $out_m = $idx->{mtime}; - $out_z = $idx->{size}; + ($out_p, $out_s, $out_m, $out_z) = ($idx->{path}, $idx->{s_hash}, $idx->{mtime}, $idx->{size}); + $out_c = $out_s; + $out_b = $out_s; my $obj_path = get_obj_path($out_b); my $stg_file = File::Spec->catfile(TMP_DIR, $out_p); if (!-e $obj_path) { rename($stg_file, $obj_path) or copy($stg_file, $obj_path); } - $idx = $it_idx->(); } - elsif ($cmp == 0) { # Modified/unchanged - $out_p = $idx->{path}; - $out_s = $idx->{s_hash}; - $out_m = $idx->{mtime}; - $out_z = $idx->{size}; + elsif ($cmp == 0) { # Modified or Unchanged + ($out_p, $out_s, $out_m, $out_z) = ($idx->{path}, $idx->{s_hash}, $idx->{mtime}, $idx->{size}); - if ($idx->{s_hash} ne $old->{hash}) { + if ($idx->{s_hash} ne ($idx->{c_hash} // "-")) { my $base_obj = get_obj_path($old->{hash}); my $stg_file = File::Spec->catfile(TMP_DIR, $out_p); @@ -295,7 +265,6 @@ sub run_commit { ? qx(diff '$base_obj' '$stg_file') : make_bin_patch($stg_file, $base_obj); - # 1.0 Factor: Use patch if it is smaller than or equal to the file size if (defined $patch && length($patch) <= $out_z) { if (!$use_disk_patch && ($patch_mem_size + length($patch)) > MEM_LIMIT) { ($pt_fh, $pt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); @@ -305,7 +274,6 @@ sub run_commit { %patches = (); $use_disk_patch = 1; } - if ($use_disk_patch) { my $tar = Archive::Tar->new($pt_path); $tar->add_data("$out_p.patch", $patch); @@ -323,19 +291,19 @@ sub run_commit { rename($stg_file, $obj_path) or copy($stg_file, $obj_path); } } + $out_c = $out_s; } else { - # 0.0 Factor: Identity check (no changes) $out_b = $old->{hash}; + $out_c = $idx->{c_hash}; } $idx = $it_idx->(); $old = $it_old->(); } - else { # Deleted + else { $old = $it_old->(); next; } - # Record tree my $t_line = "$out_b\t$out_p\n"; $sha_new_tree->add($t_line); $new_tree_size += length($t_line); @@ -348,11 +316,10 @@ sub run_commit { } $use_disk_nt ? print $nt_fh $t_line : push @new_tree_lines, $t_line; - # Record index - print $tmp_idx_fh join("\t", $out_s, $out_b, $out_m, $out_z, $out_p) . "\n"; + printf $tmp_idx_fh "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n", + $out_s, ($out_c // "-"), ($out_b // "-"), $out_m, $out_z, $out_p; } - # Finalize tree my $new_tree_hash = $sha_new_tree->hexdigest; my $tree_obj_path = get_obj_path($new_tree_hash); if ($use_disk_nt) { @@ -362,27 +329,17 @@ sub run_commit { write_file($tree_obj_path, join("", @new_tree_lines)); } - # Finalize patch bundle my $patch_bundle_hash = ""; if ($use_disk_patch || %patches) { my $bundle_tmp = File::Spec->catfile(TMP_DIR, "bundle.tar.gz"); my $tar = Archive::Tar->new; - - if ($use_disk_patch) { - $tar->read($pt_path); - unlink $pt_path; - } - + if ($use_disk_patch) { $tar->read($pt_path); unlink $pt_path; } $tar->add_data($_, $patches{$_}) for keys %patches; - - # Write directly to disk with Gzip to avoid loading huge tarballs into RAM $tar->write($bundle_tmp, COMPRESS_GZIP); - $patch_bundle_hash = hash_file_content($bundle_tmp); rename($bundle_tmp, get_obj_path($patch_bundle_hash)); } - # Revision file my $next_id = to_hex_id(from_hex_id($parent_id) + 1); $msg //= "Commit $next_id"; open my $rfh, '>:raw', File::Spec->catfile(REV_DIR, $next_id) or die $!; @@ -727,24 +684,26 @@ sub stream_tree { } sub stream_index { - my $idx = INDEX; - return sub { return; } unless -e $idx && -s $idx > 0; - open(my $fh, "<:raw", $idx) or die "Could not open index: $!"; + my $idx_path = INDEX; + return sub { return undef; } unless -e $idx_path; + open(my $fh, "<:raw", $idx_path) or die $!; return sub { my $line = <$fh>; - unless (defined $line) { close $fh; return; } - chomp $line; - - my ($s_hash, $b_hash, $mtime, $size, $path) = split(/\t/, $line, 5); - - return { - s_hash => $s_hash, - b_hash => $b_hash, - mtime => $mtime, - size => $size, - path => $path, - }; + if ($line) { + chomp $line; + my ($s_h, $c_h, $b_h, $m, $z, $p) = split(/\t/, $line, 6); + return { + s_hash => $s_h, + c_hash => $c_h, + b_hash => $b_h, + mtime => $m, + size => $z, + path => $p, + }; + } + close $fh; + return undef; }; } -- cgit v1.2.3