From d59e107305060b844ef3098774be85566456306d Mon Sep 17 00:00:00 2001 From: Sadeep Madurange Date: Mon, 6 Apr 2026 15:02:39 +0800 Subject: Add, commit, log commands. --- README.txt | 11 + test_vcx.t | 167 ------------ vcx | 870 +++++++++++++++++++++++++++++++++--------------------------- vcx.t | 153 +++++++++++ vcx_patch.t | 104 ++++++++ 5 files changed, 743 insertions(+), 562 deletions(-) create mode 100644 README.txt delete mode 100644 test_vcx.t create mode 100644 vcx.t create mode 100644 vcx_patch.t diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..b5383d5 --- /dev/null +++ b/README.txt @@ -0,0 +1,11 @@ +FUNCTIONAL + +Common ops: status / add / commit / log / checkout / diff +File integrity + +CONSTRAINTS + +SSD: TBW / append-only +File systems: inode count, file descriptors +System: CPU / memory + diff --git a/test_vcx.t b/test_vcx.t deleted file mode 100644 index 2467819..0000000 --- a/test_vcx.t +++ /dev/null @@ -1,167 +0,0 @@ -use strict; -use warnings; -use Test::More; -use File::Path qw(remove_tree make_path); -use File::Spec; -use Cwd; -use File::Glob qw(:bsd_glob); - -use constant ROOT => '.vcx'; -use constant HEAD => ROOT . '/head'; # Current commit ID -use constant OBJ_DIR => ROOT . '/obj'; # Latest version of a file -use constant REV_DIR => ROOT . '/rev'; # Commits - -# Staging area -use constant TMP_DIR => ROOT . '/index'; -use constant TMP_TREE => TMP_DIR . '/tree'; - -# Setup Sandbox -my $sandbox = "sandbox"; -remove_tree($sandbox) if -d $sandbox; -make_path($sandbox); - -my $orig_dir = getcwd(); -chdir($sandbox) or die "Cant enter sandbox: $!"; - -my $cmd = File::Spec->catfile($orig_dir, "vcx"); - -# Test 'init' -ok(system("perl $cmd init > /dev/null") == 0, "Init command ran successfully"); -ok(-d ROOT, "ROOT directory created"); -ok(-e HEAD, "Head file created"); -ok(-d OBJ_DIR, "OBJ_DIR directory created"); -ok(-d REV_DIR, "REV_DIR directory created"); - -# Test 'add' -open my $fh, '>', "test.txt" or die $!; -print $fh "Hello, world!"; -close $fh; - -ok(system("perl $cmd add test.txt > /dev/null") == 0, - "Add command ran successfully"); -ok(-d TMP_TREE, "Staging tree created"); - -# Test 'commit -m' -ok(system("perl $cmd commit -m 'Initial commit' > /dev/null") == 0, - "Commit with -m ran successfully"); -my $rev1 = File::Spec->catdir(REV_DIR, "0000001"); -ok(-d $rev1, "Revision 0000001 created"); - -# Check the message content -my $msg_file = File::Spec->catfile($rev1, "message"); -ok(-f $msg_file, "Message file exists in revision 1"); - -open my $msg_fh, '<', $msg_file or die $!; -my $saved_msg = <$msg_fh>; chomp $saved_msg; close $msg_fh; -is($saved_msg, "Initial commit", "Commit message matches the input"); - -# Test 'commit -am' (Auto-staging) -open my $fh2, '>>', "test.txt" or die $!; -print $fh2 "\nMore content"; -close $fh2; - -ok(system("perl $cmd commit -am 'Second commit' > /dev/null") == 0, - "Commit -am (auto-staging) ran successfully"); - -# Check if the object store updated -opendir(my $dh, OBJ_DIR) or die $!; -my @objs = grep { /[a-f0-9]{40}/ } readdir($dh); -closedir($dh); - -ok(scalar @objs >= 1, "Objects exist in store"); - -# Shuffle subdirectories -make_path("dirA/dirB"); -write_file("dirA/dirB/shuffle.txt", "Same Content"); -system("perl $cmd add dirA/dirB/shuffle.txt > /dev/null"); -system("perl $cmd commit -m 'Shuffle part 1' > /dev/null"); - -# Move and delete -make_path("dirC"); -rename("dirA/dirB/shuffle.txt", "dirC/shuffle.txt"); -remove_tree("dirA"); - -# Commit the move -system("perl $cmd commit -am 'Shuffle part 2' > /dev/null"); - -# Verify Revision 4's tree structure -my $head = read_file(HEAD); -my ($tree_ptr) = bsd_glob(File::Spec->catfile(REV_DIR, $head, "tree-*")); -my ($tree_hash) = $tree_ptr =~ /tree-([a-f0-9]{40})$/; -my $actual_tree_path = File::Spec->catdir(OBJ_DIR, $tree_hash); - -# Check for new path -ok(-l File::Spec->catfile($actual_tree_path, "dirC/shuffle.txt"), - "New path dirC/shuffle.txt exists in object store"); - -# Check for deleted path (Should NOT exist) -ok(!-d File::Spec->catdir($actual_tree_path, "dirA"), - "Deleted directory removed from tree snapshot"); - -# Verify the symlink still points to the correct content blob -my $link_target = readlink(File::Spec->catfile($actual_tree_path, "dirC/shuffle.txt")); -like($link_target, qr/obj\/[a-f0-9]{40}/, "Symlink in tree points to a valid object blob"); - -## Symlink-to-symlink -write_file("target.txt", "Final Destination"); -symlink("target.txt", "link_a"); -symlink("link_a", "link_b"); - -system("perl $cmd add link_b > /dev/null"); -system("perl $cmd commit -m 'Double link' > /dev/null"); - -$head = read_file(HEAD); -($tree_ptr) = bsd_glob(File::Spec->catfile(REV_DIR, $head, "tree-*")); -my $staged_link = File::Spec->catfile(OBJ_DIR, $tree_ptr =~ s/.*tree-//r, "link_b"); - -is(readlink($staged_link), "link_a", "Symlink-to-symlink preserved literal target"); - -## Empty file -write_file("empty.txt", ""); -system("perl $cmd add empty.txt > /dev/null"); -system("perl $cmd commit -m 'Empty file' > /dev/null"); - -ok(-e File::Spec->catfile(OBJ_DIR, "da39a3ee5e6b4b0d3255bfef95601890afd80709"), - "Empty file object created (da39a3...)"); - -# The overwrite -write_file("repeat.txt", "Version A"); -system("perl $cmd add repeat.txt > /dev/null"); -system("perl $cmd commit -m 'State A' > /dev/null"); -my $tree_v1 = (bsd_glob(File::Spec->catfile(REV_DIR, read_file(HEAD), "tree-*")))[0]; - -write_file("repeat.txt", "Version B"); -system("perl $cmd add repeat.txt > /dev/null"); -system("perl $cmd commit -m 'State B' > /dev/null"); - -## Revert content -write_file("repeat.txt", "Version A"); -system("perl $cmd add repeat.txt > /dev/null"); -system("perl $cmd commit -m 'Back to State A' > /dev/null"); -my $tree_v3 = (bsd_glob(File::Spec->catfile(REV_DIR, read_file(HEAD), "tree-*")))[0]; - -is($tree_v1 =~ s/.*tree-//r, $tree_v3 =~ s/.*tree-//r, - "Tree hash reverted perfectly after content restoration"); - -# Cleanup -chdir($orig_dir); -# remove_tree($sandbox); # Uncomment this once you're sure it works - -done_testing(); - -sub write_file { - my ($path, $content) = @_; - open my $fh, '>', $path or die "Could not open '$path' for writing: $!"; - print $fh $content; - close $fh or die "Could not close '$path' after writing: $!"; -} - -sub read_file { - my $path = shift; - open my $fh, '<', $path or return ""; - my $content = <$fh>; - chomp $content if $content; - close $fh; - return $content; -} - diff --git a/vcx b/vcx index d6139b5..b5997f3 100644 --- a/vcx +++ b/vcx @@ -2,6 +2,7 @@ use strict; use warnings; +use IO::Handle; use File::Path qw(make_path); use File::Copy qw(copy); use File::Find; @@ -9,22 +10,25 @@ use File::Compare; use File::Basename; use File::Glob qw(:bsd_glob); use File::Spec; +use File::Temp qw(tempfile); use Getopt::Long; use Archive::Tar; use Compress::Zlib; use POSIX qw(strftime); use Digest::SHA qw(sha1_hex); -use constant VCX_DIR => '.vcx'; -use constant HEAD => VCX_DIR . '/head'; # Current commit ID -use constant OBJ_DIR => VCX_DIR . '/obj'; # Latest version of a file -use constant REV_DIR => VCX_DIR . '/rev'; # Commits +use constant REPO => '.vcx'; +use constant HEAD => REPO . '/head'; # Current revision ID +use constant INDEX => REPO . '/index'; # Index +use constant OBJ_DIR => REPO . '/obj'; # Object store +use constant REV_DIR => REPO . '/rev'; # Revisions # Staging area -use constant TMP_DIR => VCX_DIR . '/index'; -use constant TMP_TREE => TMP_DIR . '/tree'; -use constant TMP_META => TMP_DIR . '/meta'; -use constant TMP_DIFF => TMP_DIR . '/delta.tar.gz'; +use constant TMP_DIR => REPO . '/stg'; +use constant TMP_DIFF => TMP_DIR . '/delta.tar.gz'; + +use constant MEM_LIMIT => 64 * 1024 * 1024; +use constant MAX_INDEX_SIZE => 16 * 1024 * 1024; Getopt::Long::Configure("bundling"); @@ -51,261 +55,269 @@ if ($cmd eq 'init') { } sub run_init { - make_path(OBJ_DIR, REV_DIR); - - my $initial_hex = to_hex_id(0); - my $rev0_dir = File::Spec->catfile(REV_DIR, $initial_hex); - make_path($rev0_dir); - - write_file(HEAD, "$initial_hex\n"); - - # Baseline tree (empty) - my $empty_tree_hash = sha1_hex(""); - my $empty_tree_file = File::Spec->catfile($rev0_dir, "tree-$empty_tree_hash"); - open my $fh, '>', $empty_tree_file or die $!; close $fh; - make_path(File::Spec->catdir(OBJ_DIR, $empty_tree_hash)); - - open my $mfh, '>', File::Spec->catfile($rev0_dir, "message"); close $mfh; + make_path(OBJ_DIR, REV_DIR, TMP_DIR); + touch_file(INDEX); + my $rev_id = to_hex_id(0); + write_file(HEAD, "$rev_id\n"); print "Initialized repository.\n"; } sub run_status { - open my $fh, '<', HEAD or die "Not a repository.\n"; - my $head = <$fh>; chomp $head; close $fh; - print "On revision [$head]\n"; - - my %staged_diffs; - my $staged_diff_bundle = TMP_DIFF; +} - if (-e TMP_DIFF) { - my @list = qx(tar -tf '$staged_diff_bundle'); - foreach (@list) { - chomp; - if (/(.+)\.patch$/) { $staged_diffs{$1} = 1; } +sub run_add { + my @paths = @_; + @paths = (".") unless @paths; + + my $idx_path = INDEX; + my $tmp_idx = "$idx_path.tmp"; + + my $it_idx = stream_index(); + my $it_wrk = stream_tree(@paths); + + open(my $out, ">:raw", $tmp_idx) + or die "Could not create $tmp_idx: $!"; + + my $idx_entry = $it_idx->(); + my $wrk_entry = $it_wrk->(); + + while ($idx_entry || $wrk_entry) { + my $cmp = !defined $idx_entry ? 1 + : !defined $wrk_entry ? -1 + : $idx_entry->{path} cmp $wrk_entry->{path}; + + my $path = ($cmp <= 0) ? $idx_entry->{path} : $wrk_entry->{path}; + if ($cmp == 0) { + if ($idx_entry->{mtime} == $wrk_entry->{mtime} && + $idx_entry->{size} == $wrk_entry->{size}) { + # No change + print $out join("\t", $idx_entry->{s_hash}, + $idx_entry->{b_hash}, $idx_entry->{mtime}, + $idx_entry->{size}, $idx_entry->{path}) . "\n"; + } else { + my $current_hash = hash_file_content($wrk_entry->{path}); + # Snapshot to staging area + my $stg_path = File::Spec->catfile(TMP_DIR, $wrk_entry->{path}); + make_path(dirname($stg_path)); + copy($wrk_entry->{path}, $stg_path); + + if ($current_hash eq $idx_entry->{s_hash}) { + print $out join("\t", $idx_entry->{s_hash}, + $idx_entry->{b_hash}, $wrk_entry->{mtime}, + $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; + } else { + print $out join("\t", $current_hash, $idx_entry->{b_hash}, + $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; + } + } + $idx_entry = $it_idx->(); + $wrk_entry = $it_wrk->(); + } + elsif ($cmp > 0) { + # New File: hash and snapshot to staging + my $hash = hash_file_content($wrk_entry->{path}); + my $stg_path = File::Spec->catfile(TMP_DIR, $wrk_entry->{path}); + make_path(dirname($stg_path)); + copy($wrk_entry->{path}, $stg_path); + + print $out join("\t", $hash, $hash, $wrk_entry->{mtime}, + $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; + $wrk_entry = $it_wrk->(); + } + else { + # File in index but missing from disk + print $out join("\t", $idx_entry->{s_hash}, + $idx_entry->{b_hash}, $idx_entry->{mtime}, + $idx_entry->{size}, $idx_entry->{path}) . "\n"; + $idx_entry = $it_idx->(); } } - my ($tree_ptr) = bsd_glob(File::Spec->catfile(REV_DIR, $head, "tree-*")); - my ($tree_hash) = $tree_ptr =~ /tree-([a-f0-9]{40})$/; - my $latest_tree = File::Spec->catdir(OBJ_DIR, $tree_hash); - - # Pass 1: Workspace -> History (Detects New and Modified) - find({ - wanted => sub { - if ($File::Find::name =~ /\/\Q${\VCX_DIR}\E$/ || $_ eq VCX_DIR) { - $File::Find::prune = 1; - return; - } - return if -d $File::Find::name; + close $out; + rename($tmp_idx, $idx_path) or die "Failed to update index: $!"; +} - my $rel = File::Spec->abs2rel($File::Find::name, '.'); - $rel =~ s|^\./||; - - my $base_in_tree = File::Spec->catfile($latest_tree, $rel); - my $path_hash = sha1_hex($rel); - - if (-e $base_in_tree || -l $base_in_tree) { - my $obj_in_store = readlink($base_in_tree); - if (compare($_, $obj_in_store) != 0) { - my $staged = $staged_diffs{$path_hash} ? " (staged)" : ""; - print "[M] $rel$staged\n"; - } - } else { - my $tmp_link = File::Spec->catfile(TMP_TREE, $rel); - my $staged = (-e $tmp_link || -l $tmp_link) ? " (staged)" : ""; - print "[N] $rel$staged\n"; +sub run_commit { + my ($msg) = @_; + + my $parent_id = read_head() // to_hex_id(0); + my $parent_tree_hash = ""; + + if ($parent_id ne to_hex_id(0)) { + my $p_rev_file = File::Spec->catfile(REV_DIR, $parent_id); + if (open my $pfh, '<', $p_rev_file) { + while (<$pfh>) { + if (/^tree:(.*)$/) { $parent_tree_hash = $1; last; } } - }, - no_chdir => 1 - }, '.'); - - # Pass 2: History -> Workspace (Detects Deletions) - if (-d $latest_tree) { - find({ - wanted => sub { - return if -d $_; - my $rel = File::Spec->abs2rel($_, $latest_tree); - - # If it's in the commit tree but GONE from the workspace - if (!-e $rel && !-l $rel) { - my $tmp_link = File::Spec->catfile(TMP_TREE, $rel); - my $staged = (!-e $tmp_link && !-l $tmp_link) ? " (staged)" : ""; - print "[D] $rel$staged\n"; - } - }, - no_chdir => 1 - }, $latest_tree); + close $pfh; + } } -} - -sub run_add { - my @targets = @_; - my %patches; - - make_path(TMP_TREE); - - open my $fh_h, '<', HEAD or die $!; - my $head = <$fh_h>; chomp $head; close $fh_h; - - my ($latest_tree_ptr) = bsd_glob(File::Spec->catfile(REV_DIR, $head, "tree-*")); - my ($latest_tree_hash) = $latest_tree_ptr =~ /tree-([a-f0-9]{40})$/; - my $latest_tree_dir = File::Spec->catdir(OBJ_DIR, $latest_tree_hash); - my $next_id_hex = to_hex_id(from_hex_id($head) + 1); + my $it_idx = stream_index(); + my $it_old = stream_tree_file($parent_tree_hash); - my %entries; - open my $afh, '>>', TMP_META or die $!; + my $sha_new_tree = Digest::SHA->new(1); + my @new_tree_lines; + my $new_tree_size = 0; + my $use_disk_nt = 0; + my ($nt_fh, $nt_path); - init_stage($latest_tree_dir, \%entries); + my %patches; + my $patch_mem_size = 0; + my $use_disk_patch = 0; + my ($pt_fh, $pt_path); + + my ($tmp_idx_fh, $tmp_idx_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); + binmode $tmp_idx_fh, ":raw"; + + my $idx = $it_idx->(); + my $old = $it_old->(); + + while ($idx || $old) { + my $cmp = !defined $idx ? 1 : !defined $old ? -1 : $idx->{path} cmp $old->{path}; + my ($out_s, $out_b, $out_m, $out_z, $out_p); + + if ($cmp < 0) { # New file + $out_p = $idx->{path}; + $out_s = $idx->{s_hash}; + $out_b = $idx->{s_hash}; + $out_m = $idx->{mtime}; + $out_z = $idx->{size}; + + my $obj_path = get_obj_path($out_b); + my $stg_file = File::Spec->catfile(TMP_DIR, $out_p); + if (!-e $obj_path) { + rename($stg_file, $obj_path) or copy($stg_file, $obj_path); + } + + $idx = $it_idx->(); + } + elsif ($cmp == 0) { # Modified/unchanged + $out_p = $idx->{path}; + $out_s = $idx->{s_hash}; + $out_m = $idx->{mtime}; + $out_z = $idx->{size}; + + if ($idx->{s_hash} ne $old->{hash}) { + my $base_obj = get_obj_path($old->{hash}); + my $stg_file = File::Spec->catfile(TMP_DIR, $out_p); + + my $patch = (-T $stg_file) + ? qx(diff -u '$base_obj' '$stg_file') + : make_bin_patch($stg_file, $base_obj); + + # DEBUG CODE START + if (defined $patch) { + my $raw_l = length($patch); + my $gz_p = Compress::Zlib::compress($patch); + my $gz_l = length($gz_p); + my $ratio = $raw_l > 0 ? ($gz_l / $raw_l) * 100 : 0; + printf("DEBUG: %s | File: %d | Raw Patch: %d | GZ Patch: %d (%.2f%% ratio)\n", + $out_p, $out_z, $raw_l, $gz_l, $ratio); + } + # DEBUG CODE END + + if (defined $patch && length($patch) < ($out_z * 0.5)) { + if (!$use_disk_patch && ($patch_mem_size + length($patch)) > MEM_LIMIT) { + ($pt_fh, $pt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); + my $tar = Archive::Tar->new; + $tar->add_data($_, $patches{$_}) for keys %patches; + $tar->write($pt_fh); + %patches = (); + $use_disk_patch = 1; + } - foreach my $input (@targets) { - my @expanded = bsd_glob($input); - foreach my $t (@expanded) { - find({ - wanted => sub { - if ($File::Find::name =~ /\/\Q${\VCX_DIR}\E$/ || $_ eq VCX_DIR) { - $File::Find::prune = 1; - return; + if ($use_disk_patch) { + my $tar = Archive::Tar->new($pt_path); + $tar->add_data("$out_p.patch", $patch); + $tar->write($pt_path); + } else { + $patches{"$out_p.patch"} = $patch; + $patch_mem_size += length($patch); } - - return if -d $_; - - my $rel = $File::Find::name =~ s|^\./||r; - $entries{$rel} = 1; - - my $staged_path = File::Spec->catfile(TMP_TREE, $rel); - my $prev_link = File::Spec->catfile($latest_tree_dir, $rel); - - # CASE 1: Regular File - if (-f $_ && !-l $_) { - return if -e $staged_path; # Already staged - - if (-l $prev_link) { - my $obj_in_head = readlink($prev_link); - return if compare($_, $obj_in_head) == 0; - } - - my $obj_name = sha1_hex($rel); - my $obj_path = File::Spec->catfile(OBJ_DIR, $obj_name); - - # Prepare patches in memory and save to disk in one write. - make_patch($File::Find::name, $obj_name, $obj_path, \%patches); - stage_file($File::Find::name, $obj_path, $staged_path); - print $afh "$rel\n"; # Record in meta file for the commit command - } - - # CASE 2: Symlink - elsif (-l _) { - my $target = readlink($File::Find::name); - if (-l $prev_link) { - return if readlink($prev_link) eq $target; - } - if (-l $staged_path) { - return if readlink($staged_path) eq $target; - } - stage_link($File::Find::name, $staged_path); + $out_b = $old->{hash}; + unlink($stg_file); # Remove staged file since we only need the patch + } else { + $out_b = $out_s; + my $obj_path = get_obj_path($out_b); + if (!-e $obj_path) { + rename($stg_file, $obj_path) or copy($stg_file, $obj_path); } - }, - no_chdir => 1, - }, $t); - } - } - - save_patches(\%patches); - - # Pass 2: History -> Workspace (Detects Deletions) - foreach my $path (keys %entries) { - if (!-e $path && !-l $path) { - delete $entries{$path}; - my $staged_path = File::Spec->catfile(TMP_TREE, $path); - if (-e $staged_path || -l $staged_path) { - unlink($staged_path) or die "Could not unlink staged $path: $!"; - my $parent = dirname($staged_path); - while ($parent ne TMP_TREE && -d $parent) { - last if bsd_glob("$parent/*"); # Stop if not empty - rmdir($parent); - $parent = dirname($parent); } + } else { + $out_b = $old->{hash}; } - print "[D] $path (staged for deletion)\n"; + $idx = $it_idx->(); + $old = $it_old->(); + } + else { # Deleted + $old = $it_old->(); + next; } - } - - close $afh; - - my @sorted_paths = sort keys %entries; - my $tree_ents = join("\n", @sorted_paths); - my $tree_header = "tree " . scalar(@sorted_paths) . "\n"; - my $tree_data = $tree_header . $tree_ents; - - my $tree_hash = sha1_hex($tree_data); - my $tree_file = File::Spec->catfile(TMP_DIR, "tree-$tree_hash"); - open my $fh, '>', $tree_file or die $!; close $fh; -} - -sub run_commit { - my ($msg) = @_; - my ($staged_tree_ptr) = bsd_glob(File::Spec->catfile(TMP_DIR, "tree-*")); - my ($tree_hash) = $staged_tree_ptr =~ /tree-([a-f0-9]{40})$/; - my $tree_path = File::Spec->catdir(OBJ_DIR, $tree_hash); - my $tree_exists = -d $tree_path; + # Record tree + my $t_line = "$out_b\t$out_p\n"; + $sha_new_tree->add($t_line); + $new_tree_size += length($t_line); - my $content_changed = (-e TMP_META || -e TMP_DIFF); + if (!$use_disk_nt && $new_tree_size > MEM_LIMIT) { + ($nt_fh, $nt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); + print $nt_fh @new_tree_lines; + @new_tree_lines = (); + $use_disk_nt = 1; + } + $use_disk_nt ? print $nt_fh $t_line : push @new_tree_lines, $t_line; - if ($tree_exists && !$content_changed) { - print "Nothing to commit."; - File::Path::remove_tree(TMP_DIR) if -d TMP_DIR; - return; + # Record index + print $tmp_idx_fh join("\t", $out_s, $out_b, $out_m, $out_z, $out_p) . "\n"; } - if (!$msg || $msg eq "") { - $msg = launch_editor(); - die "Commit aborted: empty message.\n" unless $msg =~ /\S/; + # Finalize tree + my $new_tree_hash = $sha_new_tree->hexdigest; + my $tree_obj_path = get_obj_path($new_tree_hash); + if ($use_disk_nt) { + close $nt_fh; + rename($nt_path, $tree_obj_path); + } else { + write_file($tree_obj_path, join("", @new_tree_lines)); } - # Prepare IDs - open my $fh_h, '<', HEAD or die "Not a repository.\n"; - my $old_head = <$fh_h>; chomp $old_head; close $fh_h; - my $next_id_hex = to_hex_id(from_hex_id($old_head) + 1); - my $rev_dir = File::Spec->catfile(REV_DIR, $next_id_hex); - make_path($rev_dir); - - # Save file to store - if (-e TMP_META) { - open my $mfh, '<', TMP_META or die $!; - while (my $rel = <$mfh>) { - chomp $rel; - my $obj_name = sha1_hex($rel); - my $obj_path = File::Spec->catfile(OBJ_DIR, $obj_name); - copy($rel, $obj_path) or die "Failed to update store for $rel: $!"; + # Finalize patch bundle + my $patch_bundle_hash = ""; + if ($use_disk_patch || %patches) { + use IO::Compress::Gzip qw(gzip $GzipError); + my $tar_data; + if ($use_disk_patch) { + open my $pt_in, '<:raw', $pt_path; + $tar_data = do { local $/; <$pt_in> }; + close $pt_in; + unlink $pt_path; + } else { + my $tar = Archive::Tar->new; + $tar->add_data($_, $patches{$_}) for keys %patches; + $tar_data = $tar->write(); } - close $mfh; + my $gzipped_payload; + gzip \$tar_data => \$gzipped_payload or die "Gzip failed: $GzipError"; + $patch_bundle_hash = sha1_hex($gzipped_payload); + write_file(get_obj_path($patch_bundle_hash), $gzipped_payload); } - # Save tree (snapshots the structure) - if (!$tree_exists) { - make_path($tree_path); - rename(TMP_TREE, $tree_path) or die "Failed to save directories: $!"; - } - - rename($staged_tree_ptr, File::Spec->catfile($rev_dir, "tree-$tree_hash")) - or die "Failed to save tree pointer to revision: $!"; - - # Move deltas - if (-e TMP_DIFF) { - my $dest_diff = File::Spec->catfile($rev_dir, "delta.tar.gz"); - rename(TMP_DIFF, $dest_diff) - or die "Failed to move delta to $dest_diff: $!"; - } - - write_file(File::Spec->catfile($rev_dir, "message"), "$msg\n"); - write_file(HEAD, "$next_id_hex\n"); # Update head - - File::Path::remove_tree(TMP_DIR) if -d TMP_DIR; - - my ($subject) = split(/\n/, $msg); - print "Committed revision [$next_id_hex]: $subject\n"; + # Revision file + my $next_id = to_hex_id(from_hex_id($parent_id) + 1); + $msg //= "Commit $next_id"; + open my $rfh, '>:raw', File::Spec->catfile(REV_DIR, $next_id) or die $!; + print $rfh "tree:$new_tree_hash\nparent:$parent_id\n"; + print $rfh "patch:$patch_bundle_hash\n" if $patch_bundle_hash; + print $rfh "date:" . time() . "\nmsg:$msg\n"; + close $rfh; + + close $tmp_idx_fh; + rename($tmp_idx_path, INDEX); + write_file(HEAD, "$next_id\n"); + File::Path::remove_tree(TMP_DIR); + make_path(TMP_DIR); + print "Revision $next_id committed.\n"; } sub run_log { @@ -319,207 +331,275 @@ sub run_log { print "Revision History (HEAD: $head)\n\n"; - my $rev_num = from_hex_id($head); - while ($rev_num > 0) { - my $hex_id = to_hex_id($rev_num); - my $rev_dir = File::Spec->catfile(REV_DIR, $hex_id); - - # Stop if we hit a gap in history or the beginning - last unless -d $rev_dir; + my $current_id = $head; + while ($current_id) { + my $rev_file = File::Spec->catfile(REV_DIR, $current_id); + last unless -f $rev_file; - # Stat index 9 is the last modification time - my $mtime = (stat($rev_dir))[9]; - my $date_str = strftime("%a %b %e %H:%M:%S %Y", localtime($mtime)); - - my $msg_file = File::Spec->catfile($rev_dir, "message"); - my $message = "[No message]"; - - if (-e $msg_file) { - open my $mfh, '<', $msg_file; - $message = do { local $/; <$mfh> } // "[Empty message]"; - $message =~ s/^\s+|\s+$//g; # Trim whitespace - close $mfh; + open my $rfh, '<', $rev_file or last; + my ($tree, $parent, $date, $msg) = ("", "", "", "[No message]"); + + while (<$rfh>) { + chomp; + if (/^tree:(.*)$/) { $tree = $1; } + elsif (/^parent:(.*)$/) { $parent = $1; } + elsif (/^date:(.*)$/) { $date = $1; } + elsif (/^msg:(.*)$/) { $msg = $1; } } + close $rfh; + + my $date_str = $date ? strftime("%a %b %e %H:%M:%S %Y", localtime($date)) + : "Unknown Date"; - print "commit $hex_id\n"; - print "Date: $date_str\n"; - print "\n $message\n\n"; + print "commit $current_id\n"; + print "Date: $date_str\n"; + print "\n $msg\n\n"; - $rev_num--; + # Move to parent to walk backward + $current_id = $parent; } close $pipe; select($old_fh); } -sub stage_file { - my ($src, $obj, $tmp) = @_; - make_path(dirname($tmp)); - - # We want the link inside tmp/ to point to "obj/HASH" - # relative to project root. - my $rel_target = $obj; - - unlink($tmp) if -e $tmp || -l $tmp; - symlink($rel_target, $tmp) or die "Failed to symlink $tmp: $!"; - print "[Staged File] $src\n"; +sub make_bin_patch { + my ($new_file, $old_file) = @_; + + my $new_size = -s $new_file; + open my $f_new, '<:raw', $new_file or die $!; + open my $f_old, '<:raw', $old_file or die $!; + + # Start patch string with new total size (8 bytes) + my $patch = pack("Q", $new_size); + + my $offset = 0; + my $blk_size = 4096; + + while (1) { + my $read_new = sysread($f_new, my $buf_new, $blk_size); + my $read_old = sysread($f_old, my $buf_old, $blk_size); + last if !$read_new && !$read_old; + + # If blocks differ, record the change + if (($buf_new // '') ne ($buf_old // '')) { + # Format: Offset (Q), Length (L), then the raw data + $patch .= pack("QL", $offset, length($buf_new)) . $buf_new; + } + $offset += $blk_size; + } + close $f_new; close $f_old; + + # If only the 8-byte header exists, files were identical + return length($patch) > 8 ? $patch : undef; } -sub stage_link { - my ($src, $tmp) = @_; - my $target = readlink($src); - - make_path(dirname($tmp)); - unlink($tmp) if -e $tmp || -l $tmp; - # For workspace symlinks, we clone the target - symlink($target, $tmp) or die "Failed to symlink $tmp: $!"; - print "[Staged link] $src -> $target\n"; -} +# Convert decimal to a padded 7-char hex string +sub to_hex_id { sprintf("%07x", $_[0]) } + +# Convert hex back to decimal +sub from_hex_id { hex($_[0]) } -sub make_patch { - my ($src, $obj_name, $obj_path, $patches) = @_; +sub write_file { + my ($path, $content) = @_; + my $dir = dirname($path); + make_path($dir) unless -d $dir; + open my $fh, '>', $path or die "Could not open '$path' for writing: $!"; + print $fh $content; + close $fh or die "Could not close '$path' after writing: $!"; +} - return unless -e $obj_path; +sub touch_file { + my ($path) = @_; + my $dir = dirname($path); + make_path($dir) unless -d $dir; + open my $fh, '>', $path or die "Could not touch $path: $!"; + close $fh; +} - my $patch; +sub hash_file_content { + my ($filename) = @_; + my $sha = Digest::SHA->new(1); - # TODO: implement a disk-based diff for large files, esp bin files - if (-T $src) { - if (compare($src, $obj_path) != 0) { - $patch = qx(diff -u '$obj_path' '$src'); - } + if (-l $filename) { + my $target = readlink($filename); + die "Could not readlink '$filename': $!" unless defined $target; + $sha->add($target); } else { - $patch = make_bin_patch($src, $obj_path); + open(my $fh, '<:raw', $filename) or die "Could not open '$filename': $!"; + $sha->addfile($fh); + close($fh); } - if ($patch) { $patches->{$obj_name} = $patch; } + return $sha->hexdigest; } -sub make_bin_patch { - my ($new_file, $old_file) = @_; - - open my $f_new, '<:raw', $new_file or die "Cannot open file: $!"; - open my $f_old, '<:raw', $old_file or die "Cannot open object file: $!"; - my $f_out; - - my $patch = ""; - my $offset = 0; - my $blk_size = 4096; - - while (1) { - my $read_new = sysread($f_new, my $buf_new, $blk_size); - my $read_old = sysread($f_old, my $buf_old, $blk_size); - - # Stop when we've processed the entire new file - last if $read_new == 0 && $read_old == 0; - - # Handle file size differences (pad with nulls if one is shorter) - $buf_new .= "\0" x ($blk_size - length($buf_new)) if length($buf_new) < $blk_size; - $buf_old .= "\0" x ($blk_size - length($buf_old)) if length($buf_old) < $blk_size; - - # If they differ, we save the OLD buffer (reverse delta) - if ($buf_new ne $buf_old) { - # Header: [64-bit offset][32-bit length][data] - # 'Q' is 64-bit unsigned (quad), 'L' is 32-bit unsigned (long) - $patch .= pack("QL", $offset, length($buf_old)) . $buf_old; +sub stream_tree { + my (@paths) = @_; + @paths = (".") unless @paths; + + my $chunk_size = 1024 * 64; + my @buf; + my $buf_size = 0; + my $tot_size = 0; + my $use_disk = 0; + my ($tmp_fh, $tmp_path); + + my $flush = sub { + if (!$use_disk) { + ($tmp_fh, $tmp_path) = tempfile(UNLINK => 1); + $tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); + binmode $tmp_fh, ":raw"; + $use_disk = 1; + } + print $tmp_fh @buf; + @buf = (); + $buf_size = 0; + }; + + my @stack = @paths; + while (@stack) { + my $path = (pop @stack) =~ s|^\./||r; + next if $path eq REPO; + + my @st = lstat($path); + if (-d _) { + if (opendir(my $dh, $path)) { + push @stack, map { File::Spec->catfile($path, $_) } + grep { $_ ne '.' && $_ ne '..' && $_ ne REPO } + readdir($dh); + closedir($dh); + } + } elsif (-f _ || -l _) { + my $line = "$path\t$st[9]\t$st[7]\n"; + push @buf, $line; + $buf_size += length($line); + $tot_size += length($line); + + if ((!$use_disk && $tot_size > MEM_LIMIT) || + ($use_disk && $buf_size > $chunk_size)) { + $flush->(); + } } - - $offset += $blk_size; } - close $f_new; close $f_old; - close $f_out if $f_out; - return length($patch) > 0 ? $patch : undef; -} - -sub save_patches { - my ($patches) = @_; - - return unless keys %$patches; - - my $tar = Archive::Tar->new; - while (my ($obj_name, $patch) = each %$patches) { - $tar->add_data("$obj_name.patch", $patch); + if (!$use_disk) { + @buf = sort @buf; + return sub { + my $line = shift @buf; + return unless $line; + chomp $line; + my ($p, $m, $s) = split(/\t/, $line); + return { path => $p, mtime => $m, size => $s }; + }; + } else { + $flush->() if @buf; + close $tmp_fh; + open(my $sort_fh, "-|", "sort", "-t", "\t", "-k1,1", $tmp_path) or die $!; + return sub { + my $line = <$sort_fh>; + unless ($line) { close $sort_fh; return; } + chomp $line; + my ($p, $s, $m) = split(/\t/, $line); + return { path => $p, mtime => $m, size => $s }; + }; } - - $tar->write(TMP_DIFF, 1); # gzip } -sub apply_bin_patch { - my ($obj_file, $patch_file) = @_; - - open my $obj_fh, '+<:raw', $obj_file or die "Cannot open object: $!"; - open my $ptch_fh, '<:raw', $patch_file or die "Cannot open patch: $!"; +sub stream_index { + my $idx = INDEX; + return sub { return; } unless -e $idx && -s $idx > 0; + open(my $fh, "<:raw", $idx) or die "Could not open index: $!"; - # Header is 12 bytes (Q + L) - while (sysread($ptch_fh, my $header, 12)) { - my ($offset, $len) = unpack("QL", $header); + return sub { + my $line = <$fh>; + unless (defined $line) { close $fh; return; } + chomp $line; - # sysread/syswrite to avoid read()'s internal buffers. - sysread($ptch_fh, my $payload, $len); - sysseek($obj_fh, $offset, 0); - syswrite($obj_fh, $payload, $len); - } - - close $obj_fh; - close $ptch_fh; + my ($s_hash, $b_hash, $mtime, $size, $path) = split(/\t/, $line, 5); + + return { + s_hash => $s_hash, + b_hash => $b_hash, + mtime => $mtime, + size => $size, + path => $path, + }; + }; } -# Convert decimal to a padded 7-char hex string -sub to_hex_id { sprintf("%07x", $_[0]) } - -# Convert hex back to decimal -sub from_hex_id { hex($_[0]) } +# Logic for sharded object directory +sub get_obj_path { + my ($hash) = @_; + my $dir = File::Spec->catfile(OBJ_DIR, substr($hash, 0, 2)); + make_path($dir) unless -d $dir; + return File::Spec->catfile($dir, substr($hash, 2)); +} -sub launch_editor { - my $editor = $ENV{EDITOR} || $ENV{VISUAL} || 'vi'; - my $temp_msg_file = File::Spec->catfile(VCX_DIR, "COMMIT_EDITMSG"); - - open my $fh, '>', $temp_msg_file or die "Cannot create temp message file: $!"; - print $fh "\n# Enter the commit message for your changes.\n"; - print $fh "# Lines starting with '#' will be ignored.\n"; - close $fh; +sub snapshot_tree { + my $it = stream_index(); + my @buf; + my $use_disk = 0; + my $total_size = 0; + my $chunk_size = 1024 * 64; + my $sha = Digest::SHA->new(1); + my ($tmp_fh, $tmp_path); + + while (my $entry = $it->()) { + my $line = "$entry->{s_hash}\t$entry->{path}\n"; + $sha->add($line); + $total_size += length($line); + + if (!$use_disk && $total_size > MEM_LIMIT) { + ($tmp_fh, $tmp_path) = tempfile(); + $tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); + binmode $tmp_fh, ":raw"; + print $tmp_fh @buf; + @buf = (); + $use_disk = 1; + } - system("$editor \"$temp_msg_file\""); + if ($use_disk) { + print $tmp_fh $line; + } else { + push @buf, $line; + } + } - my $final_msg = ""; - if (-e $temp_msg_file) { - open my $rfh, '<', $temp_msg_file or die $!; - while (my $line = <$rfh>) { - next if $line =~ /^#/; # Strip out the helper comments - $final_msg .= $line; + my $tree_hash = $sha->hexdigest; + my $obj_path = get_obj_path($tree_hash); + if (!-e $obj_path) { + if ($use_disk) { + close $tmp_fh; + rename($tmp_path, $obj_path) or die "Rename failed: $!"; + } else { + write_file($obj_path, join("", @buf)); } - close $rfh; - unlink($temp_msg_file); # Clean up + } else { + unlink($tmp_path) if $use_disk; } - $final_msg =~ s/^\s+|\s+$//g; # Trim whitespace - return $final_msg; + return $tree_hash; } -sub write_file { - my ($path, $content) = @_; - open my $fh, '>', $path or die "Could not open '$path' for writing: $!"; - print $fh $content; - close $fh or die "Could not close '$path' after writing: $!"; +sub stream_tree_file { + my ($hash) = @_; + return sub { return } unless $hash; + my $path = get_obj_path($hash); + open my $fh, '<:raw', $path or return sub { return }; + return sub { + my $line = <$fh>; + return unless $line; + chomp $line; + my ($h, $p) = split(/\t/, $line, 2); + return { hash => $h, path => $p }; + } } -sub init_stage { - my ($latest_tree_dir, $entries_ref) = @_; - - find({ - wanted => sub { - return if -d _; - - my $rel = File::Spec->abs2rel($_, $latest_tree_dir); - my $staged_path = File::Spec->catfile(TMP_TREE, $rel); - make_path(dirname($staged_path)); - - my $target = readlink($_); - symlink($target, $staged_path) or die "Failed to link $rel: $!"; - - $entries_ref->{$rel} = 1; - }, - no_chdir => 1 - }, $latest_tree_dir); +sub read_head { + return unless -e HEAD; + open my $fh, '<', HEAD or return; + my $val = <$fh>; + chomp $val if $val; + return $val; } diff --git a/vcx.t b/vcx.t new file mode 100644 index 0000000..bec9aa3 --- /dev/null +++ b/vcx.t @@ -0,0 +1,153 @@ +use strict; +use warnings; +use Test::More; +use File::Path qw(remove_tree make_path); +use File::Spec; +use Cwd; +use File::Glob qw(:bsd_glob); + +use constant ROOT => '.vcx'; +use constant HEAD => ROOT . '/head'; +use constant OBJ_DIR => ROOT . '/obj'; +use constant REV_DIR => ROOT . '/rev'; +use constant TMP_DIR => ROOT . '/index'; +use constant TMP_TREE => TMP_DIR . '/tree'; + +# Setup sandbox +my $sandbox = "sandbox"; +remove_tree($sandbox) if -d $sandbox; +make_path($sandbox); + +my $orig_dir = getcwd(); +chdir($sandbox) or die "Cant enter sandbox: $!"; +my $cmd = File::Spec->catfile($orig_dir, "vcx"); + +# Helper functions +sub write_file { + my ($path, $content) = @_; + open my $fh, '>', $path or die $!; + print $fh $content; + close $fh; +} + +sub read_file { + my $path = shift; + open my $fh, '<', $path or return ""; + my $content = <$fh>; + chomp $content if $content; + close $fh; + return $content; +} + +# Tests + +subtest 'Repository Initialization' => sub { + ok(system("perl $cmd init > /dev/null") == 0, "Init exit code 0"); + ok(-d ROOT, "ROOT directory created"); + ok(-e HEAD, "Head file created"); + ok(-d OBJ_DIR, "OBJ_DIR directory created"); + ok(-d REV_DIR, "REV_DIR directory created"); +}; + +subtest 'Adding and Committing Files' => sub { + write_file("test.txt", "Hello, world!"); + + ok(system("perl $cmd add test.txt > /dev/null") == 0, "Add file successful"); + ok(-d TMP_TREE, "Staging tree created"); + + ok(system("perl $cmd commit -m 'Initial commit' > /dev/null") == 0, "Commit successful"); + + my $rev1 = File::Spec->catdir(REV_DIR, "0000001"); + ok(-d $rev1, "Revision 0000001 directory exists"); + + my $msg_file = File::Spec->catfile($rev1, "message"); + is(read_file($msg_file), "Initial commit", "Commit message stored correctly"); +}; + +subtest 'Auto-staging with commit -am' => sub { + # Append content + open my $fh, '>>', "test.txt" or die $!; + print $fh "\nMore content"; + close $fh; + + ok(system("perl $cmd commit -am 'Second commit' > /dev/null") == 0, "Commit -am successful"); + + opendir(my $dh, OBJ_DIR) or die $!; + my @objs = grep { /[a-f0-9]{40}/ } readdir($dh); + closedir($dh); + ok(scalar @objs >= 1, "Content blobs found in object store"); +}; + +subtest 'File Moves and Deletions (Tree Integrity)' => sub { + # Create nested structure + make_path("dirA/dirB"); + write_file("dirA/dirB/shuffle.txt", "Same Content"); + system("perl $cmd add dirA/dirB/shuffle.txt > /dev/null"); + system("perl $cmd commit -m 'Shuffle part 1' > /dev/null"); + + # Move file and delete old dir + make_path("dirC"); + rename("dirA/dirB/shuffle.txt", "dirC/shuffle.txt"); + remove_tree("dirA"); + + ok(system("perl $cmd commit -am 'Shuffle part 2' > /dev/null") == 0, "Commit move successful"); + + my $head = read_file(HEAD); + my ($tree_ptr) = bsd_glob(File::Spec->catfile(REV_DIR, $head, "tree-*")); + my $tree_hash = $tree_ptr =~ s/.*tree-//r; + my $actual_tree_path = File::Spec->catdir(OBJ_DIR, $tree_hash); + + ok(-l File::Spec->catfile($actual_tree_path, "dirC/shuffle.txt"), "New path exists in tree"); + ok(!-d File::Spec->catdir($actual_tree_path, "dirA"), "Deleted path removed from tree"); + + my $link_target = readlink(File::Spec->catfile($actual_tree_path, "dirC/shuffle.txt")); + like($link_target, qr/obj\/[a-f0-9]{40}/, "Tree link points to object store"); +}; + +subtest 'Symlink Handling' => sub { + write_file("target.txt", "Final Destination"); + symlink("target.txt", "link_a"); + symlink("link_a", "link_b"); + + system("perl $cmd add link_b > /dev/null"); + system("perl $cmd commit -m 'Double link' > /dev/null"); + + my $head = read_file(HEAD); + my ($tree_ptr) = bsd_glob(File::Spec->catfile(REV_DIR, $head, "tree-*")); + my $staged_link = File::Spec->catfile(OBJ_DIR, $tree_ptr =~ s/.*tree-//r, "link_b"); + + is(readlink($staged_link), "link_a", "Symlink-to-symlink targets preserved literally"); +}; + +subtest 'Edge Cases: Content Reversion' => sub { + # Clear the staging area to ensure a "Pure" tree + remove_tree(TMP_DIR); + make_path(TMP_TREE); + + make_path("revert_test"); + write_file("revert_test/data.txt", "Version A"); + + # Commit State A + system("perl $cmd add revert_test/data.txt > /dev/null"); + system("perl $cmd commit -m 'State A' > /dev/null"); + my $tree_v1 = (bsd_glob(File::Spec->catfile(REV_DIR, read_file(HEAD), "tree-*")))[0]; + + # Commit State B + write_file("revert_test/data.txt", "Version B"); + system("perl $cmd commit -am 'State B' > /dev/null"); + + # Revert to State A + write_file("revert_test/data.txt", "Version A"); + system("perl $cmd commit -am 'Back to State A' > /dev/null"); + my $tree_v3 = (bsd_glob(File::Spec->catfile(REV_DIR, read_file(HEAD), "tree-*")))[0]; + + # Extract hashes and compare + my $hash1 = $tree_v1 =~ s/.*tree-//r; + my $hash3 = $tree_v3 =~ s/.*tree-//r; + + is($hash1, $hash3, "Tree hashes match after index reset and content restoration"); +}; + +# Cleanup +chdir($orig_dir); +done_testing(); diff --git a/vcx_patch.t b/vcx_patch.t new file mode 100644 index 0000000..d2b813f --- /dev/null +++ b/vcx_patch.t @@ -0,0 +1,104 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Test::More; +use File::Temp qw(tempfile); +use File::Spec; + +# Overrides exit and silences the 'Usage' print during the 'do' call. +BEGIN { + *CORE::GLOBAL::exit = sub { }; +} + +{ + local @ARGV = (); + open my $oldout, ">&STDOUT"; + open STDOUT, ">", File::Spec->devnull(); + eval { do './vcx' }; + open STDOUT, ">&", $oldout; +} + +subtest 'Binary Patching: Shrink & Truncate' => sub { + my ($fh_old, $old_path) = tempfile(); + my ($fh_new, $new_path) = tempfile(); + + # 8KB of 'A' shrinking to 6 bytes + print $fh_old "A" x 8192; close $fh_old; + print $fh_new "SHRINK"; close $fh_new; + + my $patch = main::make_bin_patch($new_path, $old_path); + ok(defined($patch), "Patch generated for shrinking file"); + + main::apply_bin_patch($old_path, $patch); + + is(-s $old_path, 6, "File size correctly truncated to 6 bytes"); + + open my $check, '<:raw', $old_path; + my $content = <$check>; + is($content, "SHRINK", "Content matches perfectly (no stale trailing data)"); + close $check; +}; + +subtest 'Binary Patching: Growth & Extension' => sub { + my ($fh_old, $old_path) = tempfile(); + my ($fh_new, $new_path) = tempfile(); + + print $fh_old "Tiny"; + close $fh_old; + + # Create a 5KB string to cross a block boundary (4096) + my $big_data = "EXTENDED" . ("." x 5000); + print $fh_new $big_data; + close $fh_new; + + my $patch = main::make_bin_patch($new_path, $old_path); + main::apply_bin_patch($old_path, $patch); + + is(-s $old_path, 5008, "File size correctly extended"); + + open my $check, '<:raw', $old_path; + my $result = do { local $/; <$check> }; + is($result, $big_data, "Extended content matches perfectly"); + close $check; +}; + +subtest 'Binary Patching: Sparse Block Edits' => sub { + my ($fh_old, $old_path) = tempfile(); + my ($fh_new, $new_path) = tempfile(); + + # Create three 4KB blocks + my $data = ("X" x 4096) . ("Y" x 4096) . ("Z" x 4096); + print $fh_old $data; close $fh_old; + + # Modify only the middle block ('Y' block) + substr($data, 5000, 10) = "MODIFIED!!"; + print $fh_new $data; close $fh_new; + + my $patch = main::make_bin_patch($new_path, $old_path); + + # Header (8) + Block Header (12) + Block (4096) = ~4116 bytes + ok(length($patch) < 4200, "Patch is efficient (only captured the changed block)"); + + main::apply_bin_patch($old_path, $patch); + + open my $check, '<:raw', $old_path; + my $final = do { local $/; <$check> }; + is(substr($final, 5000, 10), "MODIFIED!!", "Middle block update applied"); + is(substr($final, 0, 10), "XXXXXXXXXX", "First block preserved"); + is(substr($final, -10), "ZZZZZZZZZZ", "Last block preserved"); + close $check; +}; + +subtest 'Binary Patching: No Change Identity' => sub { + my ($fh_old, $old_path) = tempfile(); + my ($fh_new, $new_path) = tempfile(); + + my $data = "Same data" x 50; + print $fh_old $data; close $fh_old; + print $fh_new $data; close $fh_new; + + my $patch = main::make_bin_patch($new_path, $old_path); + is($patch, undef, "No patch generated for identical files"); +}; + +done_testing(); -- cgit v1.2.3