From 777ee74b76c17122608b6421b6824f31807ea52b Mon Sep 17 00:00:00 2001 From: Sadeep Madurange Date: Mon, 6 Apr 2026 15:02:39 +0800 Subject: wip: version registry, forward bin patch, CAS for objs. --- vcx | 201 +++++++++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 140 insertions(+), 61 deletions(-) (limited to 'vcx') diff --git a/vcx b/vcx index d6139b5..1282ef1 100644 --- a/vcx +++ b/vcx @@ -16,15 +16,17 @@ use POSIX qw(strftime); use Digest::SHA qw(sha1_hex); use constant VCX_DIR => '.vcx'; -use constant HEAD => VCX_DIR . '/head'; # Current commit ID -use constant OBJ_DIR => VCX_DIR . '/obj'; # Latest version of a file +use constant HEAD => VCX_DIR . '/head'; # Current commit ID +use constant OBJ_DIR => VCX_DIR . '/obj'; # Data store: file snapshots, trees use constant REV_DIR => VCX_DIR . '/rev'; # Commits +use constant REG_FILE => VCX_DIR . '/reg'; # File version registry. # Staging area -use constant TMP_DIR => VCX_DIR . '/index'; -use constant TMP_TREE => TMP_DIR . '/tree'; -use constant TMP_META => TMP_DIR . '/meta'; -use constant TMP_DIFF => TMP_DIR . '/delta.tar.gz'; +use constant TMP_DIR => VCX_DIR . '/index'; +use constant TMP_TREE => TMP_DIR . '/tree'; +use constant TMP_META => TMP_DIR . '/meta'; +use constant TMP_DIFF => TMP_DIR . '/delta.tar.gz'; +use constant TMP_REG => REG_FILE . '.tmp'; Getopt::Long::Configure("bundling"); @@ -52,6 +54,7 @@ if ($cmd eq 'init') { sub run_init { make_path(OBJ_DIR, REV_DIR); + touch_file(REG_FILE); my $initial_hex = to_hex_id(0); my $rev0_dir = File::Spec->catfile(REV_DIR, $initial_hex); @@ -151,13 +154,14 @@ sub run_add { my ($latest_tree_hash) = $latest_tree_ptr =~ /tree-([a-f0-9]{40})$/; my $latest_tree_dir = File::Spec->catdir(OBJ_DIR, $latest_tree_hash); - my $next_id_hex = to_hex_id(from_hex_id($head) + 1); - my %entries; open my $afh, '>>', TMP_META or die $!; init_stage($latest_tree_dir, \%entries); + my $reg = load_registry(); + my $reg_updated = 0; + foreach my $input (@targets) { my @expanded = bsd_glob($input); foreach my $t (@expanded) { @@ -171,8 +175,8 @@ sub run_add { return if -d $_; my $rel = $File::Find::name =~ s|^\./||r; - $entries{$rel} = 1; + $entries{$rel} = 1; my $staged_path = File::Spec->catfile(TMP_TREE, $rel); my $prev_link = File::Spec->catfile($latest_tree_dir, $rel); @@ -185,13 +189,23 @@ sub run_add { return if compare($_, $obj_in_head) == 0; } - my $obj_name = sha1_hex($rel); + my $obj_name; + my $path_hash = sha1_hex($rel); + + if (exists $reg->{$path_hash}) { + $obj_name = $reg->{$path_hash}; + } else { + $obj_name = hash_file_content($_); + $reg->{$path_hash} = $obj_name; + $reg_updated = 1; + } + my $obj_path = File::Spec->catfile(OBJ_DIR, $obj_name); # Prepare patches in memory and save to disk in one write. - make_patch($File::Find::name, $obj_name, $obj_path, \%patches); + make_patch($File::Find::name, $obj_path, \%patches); stage_file($File::Find::name, $obj_path, $staged_path); - print $afh "$rel\n"; # Record in meta file for the commit command + print $afh "$rel\0$obj_path\n"; # Record in meta file for the commit command } # CASE 2: Symlink @@ -213,6 +227,10 @@ sub run_add { save_patches(\%patches); + if ($reg_updated == 1) { + write_registry($reg); + } + # Pass 2: History -> Workspace (Detects Deletions) foreach my $path (keys %entries) { if (!-e $path && !-l $path) { @@ -234,9 +252,9 @@ sub run_add { close $afh; my @sorted_paths = sort keys %entries; - my $tree_ents = join("\n", @sorted_paths); + my $tree_ents = join("\n", @sorted_paths); my $tree_header = "tree " . scalar(@sorted_paths) . "\n"; - my $tree_data = $tree_header . $tree_ents; + my $tree_data = $tree_header . $tree_ents; my $tree_hash = sha1_hex($tree_data); my $tree_file = File::Spec->catfile(TMP_DIR, "tree-$tree_hash"); @@ -274,15 +292,19 @@ sub run_commit { # Save file to store if (-e TMP_META) { open my $mfh, '<', TMP_META or die $!; - while (my $rel = <$mfh>) { - chomp $rel; - my $obj_name = sha1_hex($rel); - my $obj_path = File::Spec->catfile(OBJ_DIR, $obj_name); + while (my $line = <$mfh>) { + chomp $line; + my ($rel, $obj_path) = split("\0", $line); copy($rel, $obj_path) or die "Failed to update store for $rel: $!"; } close $mfh; } + # Update registry + if (-e TMP_REG) { + rename(TMP_REG, REG_FILE) or die "Failed to update registry: $!"; + } + # Save tree (snapshots the structure) if (!$tree_exists) { make_path($tree_path); @@ -377,13 +399,13 @@ sub stage_link { } sub make_patch { - my ($src, $obj_name, $obj_path, $patches) = @_; + my ($src, $obj_path, $patches) = @_; - return unless -e $obj_path; + return unless -f $obj_path; my $patch; - # TODO: implement a disk-based diff for large files, esp bin files + # TODO: implement a disk-based diff for large files if (-T $src) { if (compare($src, $obj_path) != 0) { $patch = qx(diff -u '$obj_path' '$src'); @@ -392,44 +414,69 @@ sub make_patch { $patch = make_bin_patch($src, $obj_path); } - if ($patch) { $patches->{$obj_name} = $patch; } + if ($patch) { + my $obj_name = basename($obj_path); + $patches->{$obj_name} = $patch; + } } sub make_bin_patch { my ($new_file, $old_file) = @_; - open my $f_new, '<:raw', $new_file or die "Cannot open file: $!"; - open my $f_old, '<:raw', $old_file or die "Cannot open object file: $!"; - my $f_out; + my $new_size = -s $new_file; + + open my $f_new, '<:raw', $new_file or die $!; + open my $f_old, '<:raw', $old_file or die $!; + + # Start the patch with the new total size + my $patch = pack("Q", $new_size); - my $patch = ""; - my $offset = 0; + my $offset = 0; my $blk_size = 4096; while (1) { my $read_new = sysread($f_new, my $buf_new, $blk_size); my $read_old = sysread($f_old, my $buf_old, $blk_size); + last if !$read_new && !$read_old; - # Stop when we've processed the entire new file - last if $read_new == 0 && $read_old == 0; - - # Handle file size differences (pad with nulls if one is shorter) - $buf_new .= "\0" x ($blk_size - length($buf_new)) if length($buf_new) < $blk_size; - $buf_old .= "\0" x ($blk_size - length($buf_old)) if length($buf_old) < $blk_size; - - # If they differ, we save the OLD buffer (reverse delta) if ($buf_new ne $buf_old) { - # Header: [64-bit offset][32-bit length][data] - # 'Q' is 64-bit unsigned (quad), 'L' is 32-bit unsigned (long) - $patch .= pack("QL", $offset, length($buf_old)) . $buf_old; + # Offset (Q) and Length (L) + $patch .= pack("QL", $offset, length($buf_new)) . $buf_new; } - $offset += $blk_size; } close $f_new; close $f_old; - close $f_out if $f_out; - return length($patch) > 0 ? $patch : undef; + # Only return if we have more than just the size header + return length($patch) > 8 ? $patch : undef; +} + +sub apply_bin_patch { + my ($base_file, $patch_data) = @_; + + open my $fh, '+<:raw', $base_file or die $!; + + # Read the total new size (first 8 bytes) + my $new_total_size = unpack("Q", substr($patch_data, 0, 8)); + my $pos = 8; + + # Apply block updates + while ($pos < length($patch_data)) { + my ($offset, $len) = unpack("QL", substr($patch_data, $pos, 12)); + $pos += 12; + + my $payload = substr($patch_data, $pos, $len); + $pos += $len; + + sysseek($fh, $offset, 0); + syswrite($fh, $payload, $len); + } + + # Clip file to the new size + # This removes any leftover bytes from the old version + truncate($fh, $new_total_size) or die "Could not truncate: $!"; + + close $fh; } sub save_patches { @@ -445,26 +492,6 @@ sub save_patches { $tar->write(TMP_DIFF, 1); # gzip } -sub apply_bin_patch { - my ($obj_file, $patch_file) = @_; - - open my $obj_fh, '+<:raw', $obj_file or die "Cannot open object: $!"; - open my $ptch_fh, '<:raw', $patch_file or die "Cannot open patch: $!"; - - # Header is 12 bytes (Q + L) - while (sysread($ptch_fh, my $header, 12)) { - my ($offset, $len) = unpack("QL", $header); - - # sysread/syswrite to avoid read()'s internal buffers. - sysread($ptch_fh, my $payload, $len); - sysseek($obj_fh, $offset, 0); - syswrite($obj_fh, $payload, $len); - } - - close $obj_fh; - close $ptch_fh; -} - # Convert decimal to a padded 7-char hex string sub to_hex_id { sprintf("%07x", $_[0]) } @@ -523,3 +550,55 @@ sub init_stage { no_chdir => 1 }, $latest_tree_dir); } + +sub touch_file { + my ($path) = @_; + my $dir = dirname($path); + make_path($dir) unless -d $dir; + open my $fh, '>', $path or die "Could not touch $path: $!"; + close $fh; +} + +sub load_registry { + my $reg_path = REG_FILE; + my %reg_data; + + return \%reg_data unless -e $reg_path; + + open my $fh, '<', $reg_path or die "Could not open registry: $!"; + while (my $line = <$fh>) { + chomp $line; + # Split the line: source:target + # We use a limit of 2 to ensure it only splits at the first dot + if ($line =~ /^([a-f0-9]{40}):([a-f0-9]{40})$/i) { + $reg_data{$1} = $2; + } + } + close $fh; + return \%reg_data; +} + +sub write_registry { + my ($reg) = @_; + my $tmp_path = TMP_REG; + + open(my $fh, '>', $tmp_path) or die "Could not open $tmp_path for writing: $!"; + + foreach my $src (sort keys %$reg) { + my $target = $reg->{$src}; + print $fh "$src:$target\n"; + } + + close($fh) or die "Could not close $tmp_path: $!"; +} + +sub hash_file_content { + my ($filename) = @_; + + open(my $fh, '<:raw', $filename) or die "Could not open '$filename': $!"; + + my $sha = Digest::SHA->new(1); + $sha->addfile($fh); + close($fh); + return $sha->hexdigest; +} -- cgit v1.2.3