summaryrefslogtreecommitdiffstats
path: root/vcx
diff options
context:
space:
mode:
authorSadeep Madurange <sadeep@asciimx.com>2026-04-06 15:02:39 +0800
committerSadeep Madurange <sadeep@asciimx.com>2026-04-17 16:39:52 +0800
commitd59e107305060b844ef3098774be85566456306d (patch)
tree04028a34e8529c56652657522bbf200cc8877c53 /vcx
parenteb93a00ae4faafd7b6b3907546515d471c5cc30a (diff)
downloadcvn-d59e107305060b844ef3098774be85566456306d.tar.gz
Add, commit, log commands.
Diffstat (limited to 'vcx')
-rw-r--r--vcx870
1 files changed, 475 insertions, 395 deletions
diff --git a/vcx b/vcx
index d6139b5..b5997f3 100644
--- a/vcx
+++ b/vcx
@@ -2,6 +2,7 @@
use strict;
use warnings;
+use IO::Handle;
use File::Path qw(make_path);
use File::Copy qw(copy);
use File::Find;
@@ -9,22 +10,25 @@ use File::Compare;
use File::Basename;
use File::Glob qw(:bsd_glob);
use File::Spec;
+use File::Temp qw(tempfile);
use Getopt::Long;
use Archive::Tar;
use Compress::Zlib;
use POSIX qw(strftime);
use Digest::SHA qw(sha1_hex);
-use constant VCX_DIR => '.vcx';
-use constant HEAD => VCX_DIR . '/head'; # Current commit ID
-use constant OBJ_DIR => VCX_DIR . '/obj'; # Latest version of a file
-use constant REV_DIR => VCX_DIR . '/rev'; # Commits
+use constant REPO => '.vcx';
+use constant HEAD => REPO . '/head'; # Current revision ID
+use constant INDEX => REPO . '/index'; # Index
+use constant OBJ_DIR => REPO . '/obj'; # Object store
+use constant REV_DIR => REPO . '/rev'; # Revisions
# Staging area
-use constant TMP_DIR => VCX_DIR . '/index';
-use constant TMP_TREE => TMP_DIR . '/tree';
-use constant TMP_META => TMP_DIR . '/meta';
-use constant TMP_DIFF => TMP_DIR . '/delta.tar.gz';
+use constant TMP_DIR => REPO . '/stg';
+use constant TMP_DIFF => TMP_DIR . '/delta.tar.gz';
+
+use constant MEM_LIMIT => 64 * 1024 * 1024;
+use constant MAX_INDEX_SIZE => 16 * 1024 * 1024;
Getopt::Long::Configure("bundling");
@@ -51,261 +55,269 @@ if ($cmd eq 'init') {
}
sub run_init {
- make_path(OBJ_DIR, REV_DIR);
-
- my $initial_hex = to_hex_id(0);
- my $rev0_dir = File::Spec->catfile(REV_DIR, $initial_hex);
- make_path($rev0_dir);
-
- write_file(HEAD, "$initial_hex\n");
-
- # Baseline tree (empty)
- my $empty_tree_hash = sha1_hex("");
- my $empty_tree_file = File::Spec->catfile($rev0_dir, "tree-$empty_tree_hash");
- open my $fh, '>', $empty_tree_file or die $!; close $fh;
- make_path(File::Spec->catdir(OBJ_DIR, $empty_tree_hash));
-
- open my $mfh, '>', File::Spec->catfile($rev0_dir, "message"); close $mfh;
+ make_path(OBJ_DIR, REV_DIR, TMP_DIR);
+ touch_file(INDEX);
+ my $rev_id = to_hex_id(0);
+ write_file(HEAD, "$rev_id\n");
print "Initialized repository.\n";
}
sub run_status {
- open my $fh, '<', HEAD or die "Not a repository.\n";
- my $head = <$fh>; chomp $head; close $fh;
- print "On revision [$head]\n";
-
- my %staged_diffs;
- my $staged_diff_bundle = TMP_DIFF;
+}
- if (-e TMP_DIFF) {
- my @list = qx(tar -tf '$staged_diff_bundle');
- foreach (@list) {
- chomp;
- if (/(.+)\.patch$/) { $staged_diffs{$1} = 1; }
+sub run_add {
+ my @paths = @_;
+ @paths = (".") unless @paths;
+
+ my $idx_path = INDEX;
+ my $tmp_idx = "$idx_path.tmp";
+
+ my $it_idx = stream_index();
+ my $it_wrk = stream_tree(@paths);
+
+ open(my $out, ">:raw", $tmp_idx)
+ or die "Could not create $tmp_idx: $!";
+
+ my $idx_entry = $it_idx->();
+ my $wrk_entry = $it_wrk->();
+
+ while ($idx_entry || $wrk_entry) {
+ my $cmp = !defined $idx_entry ? 1
+ : !defined $wrk_entry ? -1
+ : $idx_entry->{path} cmp $wrk_entry->{path};
+
+ my $path = ($cmp <= 0) ? $idx_entry->{path} : $wrk_entry->{path};
+ if ($cmp == 0) {
+ if ($idx_entry->{mtime} == $wrk_entry->{mtime} &&
+ $idx_entry->{size} == $wrk_entry->{size}) {
+ # No change
+ print $out join("\t", $idx_entry->{s_hash},
+ $idx_entry->{b_hash}, $idx_entry->{mtime},
+ $idx_entry->{size}, $idx_entry->{path}) . "\n";
+ } else {
+ my $current_hash = hash_file_content($wrk_entry->{path});
+ # Snapshot to staging area
+ my $stg_path = File::Spec->catfile(TMP_DIR, $wrk_entry->{path});
+ make_path(dirname($stg_path));
+ copy($wrk_entry->{path}, $stg_path);
+
+ if ($current_hash eq $idx_entry->{s_hash}) {
+ print $out join("\t", $idx_entry->{s_hash},
+ $idx_entry->{b_hash}, $wrk_entry->{mtime},
+ $wrk_entry->{size}, $wrk_entry->{path}) . "\n";
+ } else {
+ print $out join("\t", $current_hash, $idx_entry->{b_hash},
+ $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n";
+ }
+ }
+ $idx_entry = $it_idx->();
+ $wrk_entry = $it_wrk->();
+ }
+ elsif ($cmp > 0) {
+ # New File: hash and snapshot to staging
+ my $hash = hash_file_content($wrk_entry->{path});
+ my $stg_path = File::Spec->catfile(TMP_DIR, $wrk_entry->{path});
+ make_path(dirname($stg_path));
+ copy($wrk_entry->{path}, $stg_path);
+
+ print $out join("\t", $hash, $hash, $wrk_entry->{mtime},
+ $wrk_entry->{size}, $wrk_entry->{path}) . "\n";
+ $wrk_entry = $it_wrk->();
+ }
+ else {
+ # File in index but missing from disk
+ print $out join("\t", $idx_entry->{s_hash},
+ $idx_entry->{b_hash}, $idx_entry->{mtime},
+ $idx_entry->{size}, $idx_entry->{path}) . "\n";
+ $idx_entry = $it_idx->();
}
}
- my ($tree_ptr) = bsd_glob(File::Spec->catfile(REV_DIR, $head, "tree-*"));
- my ($tree_hash) = $tree_ptr =~ /tree-([a-f0-9]{40})$/;
- my $latest_tree = File::Spec->catdir(OBJ_DIR, $tree_hash);
-
- # Pass 1: Workspace -> History (Detects New and Modified)
- find({
- wanted => sub {
- if ($File::Find::name =~ /\/\Q${\VCX_DIR}\E$/ || $_ eq VCX_DIR) {
- $File::Find::prune = 1;
- return;
- }
- return if -d $File::Find::name;
+ close $out;
+ rename($tmp_idx, $idx_path) or die "Failed to update index: $!";
+}
- my $rel = File::Spec->abs2rel($File::Find::name, '.');
- $rel =~ s|^\./||;
-
- my $base_in_tree = File::Spec->catfile($latest_tree, $rel);
- my $path_hash = sha1_hex($rel);
-
- if (-e $base_in_tree || -l $base_in_tree) {
- my $obj_in_store = readlink($base_in_tree);
- if (compare($_, $obj_in_store) != 0) {
- my $staged = $staged_diffs{$path_hash} ? " (staged)" : "";
- print "[M] $rel$staged\n";
- }
- } else {
- my $tmp_link = File::Spec->catfile(TMP_TREE, $rel);
- my $staged = (-e $tmp_link || -l $tmp_link) ? " (staged)" : "";
- print "[N] $rel$staged\n";
+sub run_commit {
+ my ($msg) = @_;
+
+ my $parent_id = read_head() // to_hex_id(0);
+ my $parent_tree_hash = "";
+
+ if ($parent_id ne to_hex_id(0)) {
+ my $p_rev_file = File::Spec->catfile(REV_DIR, $parent_id);
+ if (open my $pfh, '<', $p_rev_file) {
+ while (<$pfh>) {
+ if (/^tree:(.*)$/) { $parent_tree_hash = $1; last; }
}
- },
- no_chdir => 1
- }, '.');
-
- # Pass 2: History -> Workspace (Detects Deletions)
- if (-d $latest_tree) {
- find({
- wanted => sub {
- return if -d $_;
- my $rel = File::Spec->abs2rel($_, $latest_tree);
-
- # If it's in the commit tree but GONE from the workspace
- if (!-e $rel && !-l $rel) {
- my $tmp_link = File::Spec->catfile(TMP_TREE, $rel);
- my $staged = (!-e $tmp_link && !-l $tmp_link) ? " (staged)" : "";
- print "[D] $rel$staged\n";
- }
- },
- no_chdir => 1
- }, $latest_tree);
+ close $pfh;
+ }
}
-}
-
-sub run_add {
- my @targets = @_;
- my %patches;
-
- make_path(TMP_TREE);
-
- open my $fh_h, '<', HEAD or die $!;
- my $head = <$fh_h>; chomp $head; close $fh_h;
-
- my ($latest_tree_ptr) = bsd_glob(File::Spec->catfile(REV_DIR, $head, "tree-*"));
- my ($latest_tree_hash) = $latest_tree_ptr =~ /tree-([a-f0-9]{40})$/;
- my $latest_tree_dir = File::Spec->catdir(OBJ_DIR, $latest_tree_hash);
- my $next_id_hex = to_hex_id(from_hex_id($head) + 1);
+ my $it_idx = stream_index();
+ my $it_old = stream_tree_file($parent_tree_hash);
- my %entries;
- open my $afh, '>>', TMP_META or die $!;
+ my $sha_new_tree = Digest::SHA->new(1);
+ my @new_tree_lines;
+ my $new_tree_size = 0;
+ my $use_disk_nt = 0;
+ my ($nt_fh, $nt_path);
- init_stage($latest_tree_dir, \%entries);
+ my %patches;
+ my $patch_mem_size = 0;
+ my $use_disk_patch = 0;
+ my ($pt_fh, $pt_path);
+
+ my ($tmp_idx_fh, $tmp_idx_path) = tempfile(DIR => TMP_DIR, UNLINK => 0);
+ binmode $tmp_idx_fh, ":raw";
+
+ my $idx = $it_idx->();
+ my $old = $it_old->();
+
+ while ($idx || $old) {
+ my $cmp = !defined $idx ? 1 : !defined $old ? -1 : $idx->{path} cmp $old->{path};
+ my ($out_s, $out_b, $out_m, $out_z, $out_p);
+
+ if ($cmp < 0) { # New file
+ $out_p = $idx->{path};
+ $out_s = $idx->{s_hash};
+ $out_b = $idx->{s_hash};
+ $out_m = $idx->{mtime};
+ $out_z = $idx->{size};
+
+ my $obj_path = get_obj_path($out_b);
+ my $stg_file = File::Spec->catfile(TMP_DIR, $out_p);
+ if (!-e $obj_path) {
+ rename($stg_file, $obj_path) or copy($stg_file, $obj_path);
+ }
+
+ $idx = $it_idx->();
+ }
+ elsif ($cmp == 0) { # Modified/unchanged
+ $out_p = $idx->{path};
+ $out_s = $idx->{s_hash};
+ $out_m = $idx->{mtime};
+ $out_z = $idx->{size};
+
+ if ($idx->{s_hash} ne $old->{hash}) {
+ my $base_obj = get_obj_path($old->{hash});
+ my $stg_file = File::Spec->catfile(TMP_DIR, $out_p);
+
+ my $patch = (-T $stg_file)
+ ? qx(diff -u '$base_obj' '$stg_file')
+ : make_bin_patch($stg_file, $base_obj);
+
+ # DEBUG CODE START
+ if (defined $patch) {
+ my $raw_l = length($patch);
+ my $gz_p = Compress::Zlib::compress($patch);
+ my $gz_l = length($gz_p);
+ my $ratio = $raw_l > 0 ? ($gz_l / $raw_l) * 100 : 0;
+ printf("DEBUG: %s | File: %d | Raw Patch: %d | GZ Patch: %d (%.2f%% ratio)\n",
+ $out_p, $out_z, $raw_l, $gz_l, $ratio);
+ }
+ # DEBUG CODE END
+
+ if (defined $patch && length($patch) < ($out_z * 0.5)) {
+ if (!$use_disk_patch && ($patch_mem_size + length($patch)) > MEM_LIMIT) {
+ ($pt_fh, $pt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0);
+ my $tar = Archive::Tar->new;
+ $tar->add_data($_, $patches{$_}) for keys %patches;
+ $tar->write($pt_fh);
+ %patches = ();
+ $use_disk_patch = 1;
+ }
- foreach my $input (@targets) {
- my @expanded = bsd_glob($input);
- foreach my $t (@expanded) {
- find({
- wanted => sub {
- if ($File::Find::name =~ /\/\Q${\VCX_DIR}\E$/ || $_ eq VCX_DIR) {
- $File::Find::prune = 1;
- return;
+ if ($use_disk_patch) {
+ my $tar = Archive::Tar->new($pt_path);
+ $tar->add_data("$out_p.patch", $patch);
+ $tar->write($pt_path);
+ } else {
+ $patches{"$out_p.patch"} = $patch;
+ $patch_mem_size += length($patch);
}
-
- return if -d $_;
-
- my $rel = $File::Find::name =~ s|^\./||r;
- $entries{$rel} = 1;
-
- my $staged_path = File::Spec->catfile(TMP_TREE, $rel);
- my $prev_link = File::Spec->catfile($latest_tree_dir, $rel);
-
- # CASE 1: Regular File
- if (-f $_ && !-l $_) {
- return if -e $staged_path; # Already staged
-
- if (-l $prev_link) {
- my $obj_in_head = readlink($prev_link);
- return if compare($_, $obj_in_head) == 0;
- }
-
- my $obj_name = sha1_hex($rel);
- my $obj_path = File::Spec->catfile(OBJ_DIR, $obj_name);
-
- # Prepare patches in memory and save to disk in one write.
- make_patch($File::Find::name, $obj_name, $obj_path, \%patches);
- stage_file($File::Find::name, $obj_path, $staged_path);
- print $afh "$rel\n"; # Record in meta file for the commit command
- }
-
- # CASE 2: Symlink
- elsif (-l _) {
- my $target = readlink($File::Find::name);
- if (-l $prev_link) {
- return if readlink($prev_link) eq $target;
- }
- if (-l $staged_path) {
- return if readlink($staged_path) eq $target;
- }
- stage_link($File::Find::name, $staged_path);
+ $out_b = $old->{hash};
+ unlink($stg_file); # Remove staged file since we only need the patch
+ } else {
+ $out_b = $out_s;
+ my $obj_path = get_obj_path($out_b);
+ if (!-e $obj_path) {
+ rename($stg_file, $obj_path) or copy($stg_file, $obj_path);
}
- },
- no_chdir => 1,
- }, $t);
- }
- }
-
- save_patches(\%patches);
-
- # Pass 2: History -> Workspace (Detects Deletions)
- foreach my $path (keys %entries) {
- if (!-e $path && !-l $path) {
- delete $entries{$path};
- my $staged_path = File::Spec->catfile(TMP_TREE, $path);
- if (-e $staged_path || -l $staged_path) {
- unlink($staged_path) or die "Could not unlink staged $path: $!";
- my $parent = dirname($staged_path);
- while ($parent ne TMP_TREE && -d $parent) {
- last if bsd_glob("$parent/*"); # Stop if not empty
- rmdir($parent);
- $parent = dirname($parent);
}
+ } else {
+ $out_b = $old->{hash};
}
- print "[D] $path (staged for deletion)\n";
+ $idx = $it_idx->();
+ $old = $it_old->();
+ }
+ else { # Deleted
+ $old = $it_old->();
+ next;
}
- }
-
- close $afh;
-
- my @sorted_paths = sort keys %entries;
- my $tree_ents = join("\n", @sorted_paths);
- my $tree_header = "tree " . scalar(@sorted_paths) . "\n";
- my $tree_data = $tree_header . $tree_ents;
-
- my $tree_hash = sha1_hex($tree_data);
- my $tree_file = File::Spec->catfile(TMP_DIR, "tree-$tree_hash");
- open my $fh, '>', $tree_file or die $!; close $fh;
-}
-
-sub run_commit {
- my ($msg) = @_;
- my ($staged_tree_ptr) = bsd_glob(File::Spec->catfile(TMP_DIR, "tree-*"));
- my ($tree_hash) = $staged_tree_ptr =~ /tree-([a-f0-9]{40})$/;
- my $tree_path = File::Spec->catdir(OBJ_DIR, $tree_hash);
- my $tree_exists = -d $tree_path;
+ # Record tree
+ my $t_line = "$out_b\t$out_p\n";
+ $sha_new_tree->add($t_line);
+ $new_tree_size += length($t_line);
- my $content_changed = (-e TMP_META || -e TMP_DIFF);
+ if (!$use_disk_nt && $new_tree_size > MEM_LIMIT) {
+ ($nt_fh, $nt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0);
+ print $nt_fh @new_tree_lines;
+ @new_tree_lines = ();
+ $use_disk_nt = 1;
+ }
+ $use_disk_nt ? print $nt_fh $t_line : push @new_tree_lines, $t_line;
- if ($tree_exists && !$content_changed) {
- print "Nothing to commit.";
- File::Path::remove_tree(TMP_DIR) if -d TMP_DIR;
- return;
+ # Record index
+ print $tmp_idx_fh join("\t", $out_s, $out_b, $out_m, $out_z, $out_p) . "\n";
}
- if (!$msg || $msg eq "") {
- $msg = launch_editor();
- die "Commit aborted: empty message.\n" unless $msg =~ /\S/;
+ # Finalize tree
+ my $new_tree_hash = $sha_new_tree->hexdigest;
+ my $tree_obj_path = get_obj_path($new_tree_hash);
+ if ($use_disk_nt) {
+ close $nt_fh;
+ rename($nt_path, $tree_obj_path);
+ } else {
+ write_file($tree_obj_path, join("", @new_tree_lines));
}
- # Prepare IDs
- open my $fh_h, '<', HEAD or die "Not a repository.\n";
- my $old_head = <$fh_h>; chomp $old_head; close $fh_h;
- my $next_id_hex = to_hex_id(from_hex_id($old_head) + 1);
- my $rev_dir = File::Spec->catfile(REV_DIR, $next_id_hex);
- make_path($rev_dir);
-
- # Save file to store
- if (-e TMP_META) {
- open my $mfh, '<', TMP_META or die $!;
- while (my $rel = <$mfh>) {
- chomp $rel;
- my $obj_name = sha1_hex($rel);
- my $obj_path = File::Spec->catfile(OBJ_DIR, $obj_name);
- copy($rel, $obj_path) or die "Failed to update store for $rel: $!";
+ # Finalize patch bundle
+ my $patch_bundle_hash = "";
+ if ($use_disk_patch || %patches) {
+ use IO::Compress::Gzip qw(gzip $GzipError);
+ my $tar_data;
+ if ($use_disk_patch) {
+ open my $pt_in, '<:raw', $pt_path;
+ $tar_data = do { local $/; <$pt_in> };
+ close $pt_in;
+ unlink $pt_path;
+ } else {
+ my $tar = Archive::Tar->new;
+ $tar->add_data($_, $patches{$_}) for keys %patches;
+ $tar_data = $tar->write();
}
- close $mfh;
+ my $gzipped_payload;
+ gzip \$tar_data => \$gzipped_payload or die "Gzip failed: $GzipError";
+ $patch_bundle_hash = sha1_hex($gzipped_payload);
+ write_file(get_obj_path($patch_bundle_hash), $gzipped_payload);
}
- # Save tree (snapshots the structure)
- if (!$tree_exists) {
- make_path($tree_path);
- rename(TMP_TREE, $tree_path) or die "Failed to save directories: $!";
- }
-
- rename($staged_tree_ptr, File::Spec->catfile($rev_dir, "tree-$tree_hash"))
- or die "Failed to save tree pointer to revision: $!";
-
- # Move deltas
- if (-e TMP_DIFF) {
- my $dest_diff = File::Spec->catfile($rev_dir, "delta.tar.gz");
- rename(TMP_DIFF, $dest_diff)
- or die "Failed to move delta to $dest_diff: $!";
- }
-
- write_file(File::Spec->catfile($rev_dir, "message"), "$msg\n");
- write_file(HEAD, "$next_id_hex\n"); # Update head
-
- File::Path::remove_tree(TMP_DIR) if -d TMP_DIR;
-
- my ($subject) = split(/\n/, $msg);
- print "Committed revision [$next_id_hex]: $subject\n";
+ # Revision file
+ my $next_id = to_hex_id(from_hex_id($parent_id) + 1);
+ $msg //= "Commit $next_id";
+ open my $rfh, '>:raw', File::Spec->catfile(REV_DIR, $next_id) or die $!;
+ print $rfh "tree:$new_tree_hash\nparent:$parent_id\n";
+ print $rfh "patch:$patch_bundle_hash\n" if $patch_bundle_hash;
+ print $rfh "date:" . time() . "\nmsg:$msg\n";
+ close $rfh;
+
+ close $tmp_idx_fh;
+ rename($tmp_idx_path, INDEX);
+ write_file(HEAD, "$next_id\n");
+ File::Path::remove_tree(TMP_DIR);
+ make_path(TMP_DIR);
+ print "Revision $next_id committed.\n";
}
sub run_log {
@@ -319,207 +331,275 @@ sub run_log {
print "Revision History (HEAD: $head)\n\n";
- my $rev_num = from_hex_id($head);
- while ($rev_num > 0) {
- my $hex_id = to_hex_id($rev_num);
- my $rev_dir = File::Spec->catfile(REV_DIR, $hex_id);
-
- # Stop if we hit a gap in history or the beginning
- last unless -d $rev_dir;
+ my $current_id = $head;
+ while ($current_id) {
+ my $rev_file = File::Spec->catfile(REV_DIR, $current_id);
+ last unless -f $rev_file;
- # Stat index 9 is the last modification time
- my $mtime = (stat($rev_dir))[9];
- my $date_str = strftime("%a %b %e %H:%M:%S %Y", localtime($mtime));
-
- my $msg_file = File::Spec->catfile($rev_dir, "message");
- my $message = "[No message]";
-
- if (-e $msg_file) {
- open my $mfh, '<', $msg_file;
- $message = do { local $/; <$mfh> } // "[Empty message]";
- $message =~ s/^\s+|\s+$//g; # Trim whitespace
- close $mfh;
+ open my $rfh, '<', $rev_file or last;
+ my ($tree, $parent, $date, $msg) = ("", "", "", "[No message]");
+
+ while (<$rfh>) {
+ chomp;
+ if (/^tree:(.*)$/) { $tree = $1; }
+ elsif (/^parent:(.*)$/) { $parent = $1; }
+ elsif (/^date:(.*)$/) { $date = $1; }
+ elsif (/^msg:(.*)$/) { $msg = $1; }
}
+ close $rfh;
+
+ my $date_str = $date ? strftime("%a %b %e %H:%M:%S %Y", localtime($date))
+ : "Unknown Date";
- print "commit $hex_id\n";
- print "Date: $date_str\n";
- print "\n $message\n\n";
+ print "commit $current_id\n";
+ print "Date: $date_str\n";
+ print "\n $msg\n\n";
- $rev_num--;
+ # Move to parent to walk backward
+ $current_id = $parent;
}
close $pipe;
select($old_fh);
}
-sub stage_file {
- my ($src, $obj, $tmp) = @_;
- make_path(dirname($tmp));
-
- # We want the link inside tmp/ to point to "obj/HASH"
- # relative to project root.
- my $rel_target = $obj;
-
- unlink($tmp) if -e $tmp || -l $tmp;
- symlink($rel_target, $tmp) or die "Failed to symlink $tmp: $!";
- print "[Staged File] $src\n";
+sub make_bin_patch {
+ my ($new_file, $old_file) = @_;
+
+ my $new_size = -s $new_file;
+ open my $f_new, '<:raw', $new_file or die $!;
+ open my $f_old, '<:raw', $old_file or die $!;
+
+ # Start patch string with new total size (8 bytes)
+ my $patch = pack("Q", $new_size);
+
+ my $offset = 0;
+ my $blk_size = 4096;
+
+ while (1) {
+ my $read_new = sysread($f_new, my $buf_new, $blk_size);
+ my $read_old = sysread($f_old, my $buf_old, $blk_size);
+ last if !$read_new && !$read_old;
+
+ # If blocks differ, record the change
+ if (($buf_new // '') ne ($buf_old // '')) {
+ # Format: Offset (Q), Length (L), then the raw data
+ $patch .= pack("QL", $offset, length($buf_new)) . $buf_new;
+ }
+ $offset += $blk_size;
+ }
+ close $f_new; close $f_old;
+
+ # If only the 8-byte header exists, files were identical
+ return length($patch) > 8 ? $patch : undef;
}
-sub stage_link {
- my ($src, $tmp) = @_;
- my $target = readlink($src);
-
- make_path(dirname($tmp));
- unlink($tmp) if -e $tmp || -l $tmp;
- # For workspace symlinks, we clone the target
- symlink($target, $tmp) or die "Failed to symlink $tmp: $!";
- print "[Staged link] $src -> $target\n";
-}
+# Convert decimal to a padded 7-char hex string
+sub to_hex_id { sprintf("%07x", $_[0]) }
+
+# Convert hex back to decimal
+sub from_hex_id { hex($_[0]) }
-sub make_patch {
- my ($src, $obj_name, $obj_path, $patches) = @_;
+sub write_file {
+ my ($path, $content) = @_;
+ my $dir = dirname($path);
+ make_path($dir) unless -d $dir;
+ open my $fh, '>', $path or die "Could not open '$path' for writing: $!";
+ print $fh $content;
+ close $fh or die "Could not close '$path' after writing: $!";
+}
- return unless -e $obj_path;
+sub touch_file {
+ my ($path) = @_;
+ my $dir = dirname($path);
+ make_path($dir) unless -d $dir;
+ open my $fh, '>', $path or die "Could not touch $path: $!";
+ close $fh;
+}
- my $patch;
+sub hash_file_content {
+ my ($filename) = @_;
+ my $sha = Digest::SHA->new(1);
- # TODO: implement a disk-based diff for large files, esp bin files
- if (-T $src) {
- if (compare($src, $obj_path) != 0) {
- $patch = qx(diff -u '$obj_path' '$src');
- }
+ if (-l $filename) {
+ my $target = readlink($filename);
+ die "Could not readlink '$filename': $!" unless defined $target;
+ $sha->add($target);
} else {
- $patch = make_bin_patch($src, $obj_path);
+ open(my $fh, '<:raw', $filename) or die "Could not open '$filename': $!";
+ $sha->addfile($fh);
+ close($fh);
}
- if ($patch) { $patches->{$obj_name} = $patch; }
+ return $sha->hexdigest;
}
-sub make_bin_patch {
- my ($new_file, $old_file) = @_;
-
- open my $f_new, '<:raw', $new_file or die "Cannot open file: $!";
- open my $f_old, '<:raw', $old_file or die "Cannot open object file: $!";
- my $f_out;
-
- my $patch = "";
- my $offset = 0;
- my $blk_size = 4096;
-
- while (1) {
- my $read_new = sysread($f_new, my $buf_new, $blk_size);
- my $read_old = sysread($f_old, my $buf_old, $blk_size);
-
- # Stop when we've processed the entire new file
- last if $read_new == 0 && $read_old == 0;
-
- # Handle file size differences (pad with nulls if one is shorter)
- $buf_new .= "\0" x ($blk_size - length($buf_new)) if length($buf_new) < $blk_size;
- $buf_old .= "\0" x ($blk_size - length($buf_old)) if length($buf_old) < $blk_size;
-
- # If they differ, we save the OLD buffer (reverse delta)
- if ($buf_new ne $buf_old) {
- # Header: [64-bit offset][32-bit length][data]
- # 'Q' is 64-bit unsigned (quad), 'L' is 32-bit unsigned (long)
- $patch .= pack("QL", $offset, length($buf_old)) . $buf_old;
+sub stream_tree {
+ my (@paths) = @_;
+ @paths = (".") unless @paths;
+
+ my $chunk_size = 1024 * 64;
+ my @buf;
+ my $buf_size = 0;
+ my $tot_size = 0;
+ my $use_disk = 0;
+ my ($tmp_fh, $tmp_path);
+
+ my $flush = sub {
+ if (!$use_disk) {
+ ($tmp_fh, $tmp_path) = tempfile(UNLINK => 1);
+ $tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size);
+ binmode $tmp_fh, ":raw";
+ $use_disk = 1;
+ }
+ print $tmp_fh @buf;
+ @buf = ();
+ $buf_size = 0;
+ };
+
+ my @stack = @paths;
+ while (@stack) {
+ my $path = (pop @stack) =~ s|^\./||r;
+ next if $path eq REPO;
+
+ my @st = lstat($path);
+ if (-d _) {
+ if (opendir(my $dh, $path)) {
+ push @stack, map { File::Spec->catfile($path, $_) }
+ grep { $_ ne '.' && $_ ne '..' && $_ ne REPO }
+ readdir($dh);
+ closedir($dh);
+ }
+ } elsif (-f _ || -l _) {
+ my $line = "$path\t$st[9]\t$st[7]\n";
+ push @buf, $line;
+ $buf_size += length($line);
+ $tot_size += length($line);
+
+ if ((!$use_disk && $tot_size > MEM_LIMIT) ||
+ ($use_disk && $buf_size > $chunk_size)) {
+ $flush->();
+ }
}
-
- $offset += $blk_size;
}
- close $f_new; close $f_old;
- close $f_out if $f_out;
- return length($patch) > 0 ? $patch : undef;
-}
-
-sub save_patches {
- my ($patches) = @_;
-
- return unless keys %$patches;
-
- my $tar = Archive::Tar->new;
- while (my ($obj_name, $patch) = each %$patches) {
- $tar->add_data("$obj_name.patch", $patch);
+ if (!$use_disk) {
+ @buf = sort @buf;
+ return sub {
+ my $line = shift @buf;
+ return unless $line;
+ chomp $line;
+ my ($p, $m, $s) = split(/\t/, $line);
+ return { path => $p, mtime => $m, size => $s };
+ };
+ } else {
+ $flush->() if @buf;
+ close $tmp_fh;
+ open(my $sort_fh, "-|", "sort", "-t", "\t", "-k1,1", $tmp_path) or die $!;
+ return sub {
+ my $line = <$sort_fh>;
+ unless ($line) { close $sort_fh; return; }
+ chomp $line;
+ my ($p, $s, $m) = split(/\t/, $line);
+ return { path => $p, mtime => $m, size => $s };
+ };
}
-
- $tar->write(TMP_DIFF, 1); # gzip
}
-sub apply_bin_patch {
- my ($obj_file, $patch_file) = @_;
-
- open my $obj_fh, '+<:raw', $obj_file or die "Cannot open object: $!";
- open my $ptch_fh, '<:raw', $patch_file or die "Cannot open patch: $!";
+sub stream_index {
+ my $idx = INDEX;
+ return sub { return; } unless -e $idx && -s $idx > 0;
+ open(my $fh, "<:raw", $idx) or die "Could not open index: $!";
- # Header is 12 bytes (Q + L)
- while (sysread($ptch_fh, my $header, 12)) {
- my ($offset, $len) = unpack("QL", $header);
+ return sub {
+ my $line = <$fh>;
+ unless (defined $line) { close $fh; return; }
+ chomp $line;
- # sysread/syswrite to avoid read()'s internal buffers.
- sysread($ptch_fh, my $payload, $len);
- sysseek($obj_fh, $offset, 0);
- syswrite($obj_fh, $payload, $len);
- }
-
- close $obj_fh;
- close $ptch_fh;
+ my ($s_hash, $b_hash, $mtime, $size, $path) = split(/\t/, $line, 5);
+
+ return {
+ s_hash => $s_hash,
+ b_hash => $b_hash,
+ mtime => $mtime,
+ size => $size,
+ path => $path,
+ };
+ };
}
-# Convert decimal to a padded 7-char hex string
-sub to_hex_id { sprintf("%07x", $_[0]) }
-
-# Convert hex back to decimal
-sub from_hex_id { hex($_[0]) }
+# Logic for sharded object directory
+sub get_obj_path {
+ my ($hash) = @_;
+ my $dir = File::Spec->catfile(OBJ_DIR, substr($hash, 0, 2));
+ make_path($dir) unless -d $dir;
+ return File::Spec->catfile($dir, substr($hash, 2));
+}
-sub launch_editor {
- my $editor = $ENV{EDITOR} || $ENV{VISUAL} || 'vi';
- my $temp_msg_file = File::Spec->catfile(VCX_DIR, "COMMIT_EDITMSG");
-
- open my $fh, '>', $temp_msg_file or die "Cannot create temp message file: $!";
- print $fh "\n# Enter the commit message for your changes.\n";
- print $fh "# Lines starting with '#' will be ignored.\n";
- close $fh;
+sub snapshot_tree {
+ my $it = stream_index();
+ my @buf;
+ my $use_disk = 0;
+ my $total_size = 0;
+ my $chunk_size = 1024 * 64;
+ my $sha = Digest::SHA->new(1);
+ my ($tmp_fh, $tmp_path);
+
+ while (my $entry = $it->()) {
+ my $line = "$entry->{s_hash}\t$entry->{path}\n";
+ $sha->add($line);
+ $total_size += length($line);
+
+ if (!$use_disk && $total_size > MEM_LIMIT) {
+ ($tmp_fh, $tmp_path) = tempfile();
+ $tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size);
+ binmode $tmp_fh, ":raw";
+ print $tmp_fh @buf;
+ @buf = ();
+ $use_disk = 1;
+ }
- system("$editor \"$temp_msg_file\"");
+ if ($use_disk) {
+ print $tmp_fh $line;
+ } else {
+ push @buf, $line;
+ }
+ }
- my $final_msg = "";
- if (-e $temp_msg_file) {
- open my $rfh, '<', $temp_msg_file or die $!;
- while (my $line = <$rfh>) {
- next if $line =~ /^#/; # Strip out the helper comments
- $final_msg .= $line;
+ my $tree_hash = $sha->hexdigest;
+ my $obj_path = get_obj_path($tree_hash);
+ if (!-e $obj_path) {
+ if ($use_disk) {
+ close $tmp_fh;
+ rename($tmp_path, $obj_path) or die "Rename failed: $!";
+ } else {
+ write_file($obj_path, join("", @buf));
}
- close $rfh;
- unlink($temp_msg_file); # Clean up
+ } else {
+ unlink($tmp_path) if $use_disk;
}
- $final_msg =~ s/^\s+|\s+$//g; # Trim whitespace
- return $final_msg;
+ return $tree_hash;
}
-sub write_file {
- my ($path, $content) = @_;
- open my $fh, '>', $path or die "Could not open '$path' for writing: $!";
- print $fh $content;
- close $fh or die "Could not close '$path' after writing: $!";
+sub stream_tree_file {
+ my ($hash) = @_;
+ return sub { return } unless $hash;
+ my $path = get_obj_path($hash);
+ open my $fh, '<:raw', $path or return sub { return };
+ return sub {
+ my $line = <$fh>;
+ return unless $line;
+ chomp $line;
+ my ($h, $p) = split(/\t/, $line, 2);
+ return { hash => $h, path => $p };
+ }
}
-sub init_stage {
- my ($latest_tree_dir, $entries_ref) = @_;
-
- find({
- wanted => sub {
- return if -d _;
-
- my $rel = File::Spec->abs2rel($_, $latest_tree_dir);
- my $staged_path = File::Spec->catfile(TMP_TREE, $rel);
- make_path(dirname($staged_path));
-
- my $target = readlink($_);
- symlink($target, $staged_path) or die "Failed to link $rel: $!";
-
- $entries_ref->{$rel} = 1;
- },
- no_chdir => 1
- }, $latest_tree_dir);
+sub read_head {
+ return unless -e HEAD;
+ open my $fh, '<', HEAD or return;
+ my $val = <$fh>;
+ chomp $val if $val;
+ return $val;
}