From 4a8322267b08cf8fb2738134c7cdc4e9e3c23ded Mon Sep 17 00:00:00 2001 From: Sadeep Madurange Date: Sun, 19 Apr 2026 09:42:01 +0800 Subject: Rename tool to urn: vessel of time from GoW. --- urn | 892 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ urn.t | 175 +++++++++++++ vcx | 892 ------------------------------------------------------------------ vcx.t | 175 ------------- 4 files changed, 1067 insertions(+), 1067 deletions(-) create mode 100644 urn create mode 100644 urn.t delete mode 100644 vcx delete mode 100644 vcx.t diff --git a/urn b/urn new file mode 100644 index 0000000..c9d8e14 --- /dev/null +++ b/urn @@ -0,0 +1,892 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use IO::Handle; +use File::Path qw(make_path); +use File::Copy qw(copy); +use File::Basename; +use File::Glob qw(:bsd_glob); +use File::Spec; +use File::Temp qw(tempfile); +use Getopt::Long; +use Archive::Tar; +use Compress::Zlib; +use POSIX qw(strftime); +use Digest::SHA qw(sha1_hex); + +use constant REPO => '.urn'; +use constant HEAD => REPO . '/head'; # Current revision ID +use constant INDEX => REPO . '/index'; # Index +use constant OBJ_DIR => REPO . '/obj'; # Object store +use constant REV_DIR => REPO . '/rev'; # Revisions +use constant TMP_DIR => REPO . '/stg'; # Staging area + +use constant CHUNK_LEN => 4096; +use constant MEM_LIMIT => 10 * 1024 * 1024; +use constant IO_LAYER => ":raw:perlio(layer=" . CHUNK_LEN . ")"; + +Getopt::Long::Configure("bundling"); + +my $cmd = shift @ARGV // ''; +my @args = @ARGV; + +if ($cmd eq 'init') { + run_init(); +} elsif ($cmd eq 'status') { + run_status(); +} elsif ($cmd eq 'add') { + die "Usage: $0 add [path1] [path2] ...\n" unless @args; + run_add(@args); +} elsif ($cmd eq 'commit') { + my ($m, $a); + GetOptions('m=s' => \$m, 'a' => \$a); + if ($a) { run_add("."); } + run_commit($m); +} elsif ($cmd eq 'log') { + run_log(); +} elsif ($cmd eq 'show') { + # Usage: show HEAD main.c + my $rev = shift @args; + my $file = shift @args; + die "Usage: $0 show \n" unless defined $rev && defined $file; + run_show($rev, $file); +} elsif ($cmd eq 'diff') { + run_diff(@args); +} else { + print "Usage: $0 [init|status|add|commit|log]\n"; + exit 1; +} + +sub run_init { + make_path(OBJ_DIR, REV_DIR, TMP_DIR); + touch_file(INDEX); + my $rev_id = to_hex_id(0); + write_file(HEAD, "$rev_id\n"); + print "Initialized repository.\n"; +} + +sub run_status { + my $it_idx = stream_index(); + my $it_wrk = stream_tree("."); + + # Get the base tree from the last commit + my $head = read_head() // to_hex_id(0); + my $parent_tree_hash = ""; + if ($head ne to_hex_id(0)) { + my $rev_path = File::Spec->catfile(REV_DIR, $head); + if (open my $fh, '<', $rev_path) { + while (<$fh>) { if (/^tree:(.*)$/) { $parent_tree_hash = $1; last; } } + close $fh; + } + } + my $it_old = stream_tree_file($parent_tree_hash); + + print "On Revision: $head\n\n"; + + my $idx = $it_idx->(); + my $wrk = $it_wrk->(); + my $old = $it_old->(); + my $found_changes = 0; + + # Iterate while any of the three sources have entries + while ($idx || $wrk || $old) { + # Find the alphabetically "lowest" path among the three streams + my $path = (sort grep { defined } ($idx->{path}, $wrk->{path}, $old->{path}))[0]; + + my $flag = ""; + my $suffix = ""; + + # Logical check for Deletions (In Old Tree, but missing elsewhere) + my $in_old = (defined $old && $old->{path} eq $path); + my $in_idx = (defined $idx && $idx->{path} eq $path); + my $in_wrk = (defined $wrk && $wrk->{path} eq $path); + + if ($in_old && !$in_idx) { + # File existed in last commit but is gone from index -> Staged for Deletion + $flag = "[D]"; + $suffix = "(staged)"; + } + elsif ($in_idx && !$in_wrk) { + # In index but missing from disk -> Deleted but not staged + $flag = "[D]"; + } + elsif ($in_wrk && !$in_idx) { + # On disk but not in index -> New file (Unstaged) + $flag = "[N]"; + } + elsif ($in_idx && $in_wrk) { + # Path exists in both; check for modifications + my $stg_path = File::Spec->catfile(TMP_DIR, $path); + my $is_staged = -e $stg_path; + my $workspace_changed = ($wrk->{mtime} != $idx->{mtime} || $wrk->{size} != $idx->{size}); + + if ($is_staged) { + if ($workspace_changed) { + $flag = "[M]"; + $suffix = "(dirty)"; + } else { + # Compare against c_hash (Committed Hash) to see if it's new or modified + $flag = ($idx->{c_hash} eq "-") ? "[N]" : "[M]"; + $suffix = "(staged)"; + } + } elsif ($workspace_changed || ($in_old && $idx->{c_hash} ne $old->{hash})) { + # Not staged, but differs from last commit + $flag = "[M]"; + } + } + + if ($flag ne "") { + printf "%s %s %s\n", $flag, $path, $suffix; + $found_changes = 1; + } + + # Advance iterators if they matched the current path + $old = $it_old->() if $in_old; + $idx = $it_idx->() if $in_idx; + $wrk = $it_wrk->() if $in_wrk; + } + + print "No changes detected.\n" unless $found_changes; + print "\n"; +} + +sub run_add { + my @paths = @_; + @paths = (".") unless @paths; + + my $idx_path = INDEX; + my $tmp_idx = "$idx_path.tmp"; + + my $it_idx = stream_index(); + my $it_wrk = stream_tree(@paths); + + open(my $out, ">:raw", $tmp_idx) + or die "Could not create $tmp_idx: $!"; + + my $idx_entry = $it_idx->(); + my $wrk_entry = $it_wrk->(); + + while ($idx_entry || $wrk_entry) { + my $cmp = !defined $idx_entry ? 1 + : !defined $wrk_entry ? -1 + : $idx_entry->{path} cmp $wrk_entry->{path}; + + if ($cmp == 0) { + if ($idx_entry->{mtime} == $wrk_entry->{mtime} && + $idx_entry->{size} == $wrk_entry->{size}) { + # No change: Preserve all 3 hashes and metadata + printf $out "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n", + $idx_entry->{s_hash}, $idx_entry->{c_hash}, $idx_entry->{b_hash}, + $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}; + } else { + my $p = $wrk_entry->{path}; + my $current_hash = hash_file_content($p); + my $stg_path = File::Spec->catfile(TMP_DIR, $p); + make_path(dirname($stg_path)); + + (-l $p) ? symlink(readlink($p), $stg_path) : copy($p, $stg_path); + + # Update staged hash, preserve committed and base hashes + printf $out "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n", + $current_hash, $idx_entry->{c_hash}, $idx_entry->{b_hash}, + $wrk_entry->{mtime}, $wrk_entry->{size}, $p; + } + $idx_entry = $it_idx->(); + $wrk_entry = $it_wrk->(); + } + elsif ($cmp > 0) { + # New File: hash and snapshot to staging + my $p = $wrk_entry->{path}; + my $hash = hash_file_content($p); + my $stg_path = File::Spec->catfile(TMP_DIR, $p); + make_path(dirname($stg_path)); + + (-l $p) ? symlink(readlink($p), $stg_path) : copy($p, $stg_path); + + # Staged is the new hash; Committed and Base are '-' + printf $out "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n", + $hash, "-", "-", $wrk_entry->{mtime}, $wrk_entry->{size}, $p; + $wrk_entry = $it_wrk->(); + } + else { # Deleted + $idx_entry = $it_idx->(); + } + } + + close $out; + rename($tmp_idx, $idx_path) or die "Failed to update index: $!"; +} + +sub run_commit { + my ($msg) = @_; + if (!defined $msg || $msg eq "") { + $msg = get_commit_message(); + } + + my $parent_id = read_head() // to_hex_id(0); + my $parent_tree_hash = ""; + + if ($parent_id ne to_hex_id(0)) { + my $p_rev_file = File::Spec->catfile(REV_DIR, $parent_id); + if (open my $pfh, '<', $p_rev_file) { + while (<$pfh>) { + if (/^tree:(.*)$/) { $parent_tree_hash = $1; last; } + } + close $pfh; + } + } + + my $it_idx = stream_index(); + my $it_old = stream_tree_file($parent_tree_hash); + + my $sha_new_tree = Digest::SHA->new(1); + my @new_tree_lines; + my $new_tree_size = 0; + my $use_disk_nt = 0; + my ($nt_fh, $nt_path); + + my %patches; + my $patch_mem_size = 0; + my $use_disk_patch = 0; + my ($pt_fh, $pt_path); + + my ($tmp_idx_fh, $tmp_idx_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); + binmode $tmp_idx_fh, ":raw"; + + my $idx = $it_idx->(); + my $old = $it_old->(); + + while ($idx || $old) { + my $cmp = !defined $idx ? 1 : !defined $old ? -1 : $idx->{path} cmp $old->{path}; + my ($out_s, $out_c, $out_b, $out_m, $out_z, $out_p); + + if ($cmp < 0) { # New file + ($out_p, $out_s, $out_m, $out_z) = ($idx->{path}, $idx->{s_hash}, $idx->{mtime}, $idx->{size}); + $out_c = $out_s; + $out_b = $out_s; + + my $obj_path = get_obj_path($out_b); + my $stg_file = File::Spec->catfile(TMP_DIR, $out_p); + if (!-e $obj_path) { + rename($stg_file, $obj_path) or copy($stg_file, $obj_path); + } + $idx = $it_idx->(); + } + elsif ($cmp == 0) { # Modified or Unchanged + ($out_p, $out_s, $out_m, $out_z) = ($idx->{path}, $idx->{s_hash}, $idx->{mtime}, $idx->{size}); + + if ($idx->{s_hash} ne ($idx->{c_hash} // "-")) { + my $base_obj = get_obj_path($old->{hash}); + my $stg_file = File::Spec->catfile(TMP_DIR, $out_p); + + my $patch = (-T $stg_file) + ? qx(diff '$base_obj' '$stg_file') + : make_bin_patch($stg_file, $base_obj); + + if (defined $patch && length($patch) <= $out_z) { + if (!$use_disk_patch && ($patch_mem_size + length($patch)) > MEM_LIMIT) { + ($pt_fh, $pt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); + my $tar = Archive::Tar->new; + $tar->add_data($_, $patches{$_}) for keys %patches; + $tar->write($pt_fh); + %patches = (); + $use_disk_patch = 1; + } + if ($use_disk_patch) { + my $tar = Archive::Tar->new($pt_path); + $tar->add_data("$out_p.patch", $patch); + $tar->write($pt_path); + } else { + $patches{"$out_p.patch"} = $patch; + $patch_mem_size += length($patch); + } + $out_b = $old->{hash}; + unlink($stg_file); + } else { + $out_b = $out_s; + my $obj_path = get_obj_path($out_b); + if (!-e $obj_path) { + rename($stg_file, $obj_path) or copy($stg_file, $obj_path); + } + } + $out_c = $out_s; + } else { + $out_b = $old->{hash}; + $out_c = $idx->{c_hash}; + } + $idx = $it_idx->(); + $old = $it_old->(); + } + else { + $old = $it_old->(); + next; + } + + my $t_line = "$out_b\t$out_p\n"; + $sha_new_tree->add($t_line); + $new_tree_size += length($t_line); + + if (!$use_disk_nt && $new_tree_size > MEM_LIMIT) { + ($nt_fh, $nt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); + print $nt_fh @new_tree_lines; + @new_tree_lines = (); + $use_disk_nt = 1; + } + $use_disk_nt ? print $nt_fh $t_line : push @new_tree_lines, $t_line; + + printf $tmp_idx_fh "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n", + $out_s, ($out_c // "-"), ($out_b // "-"), $out_m, $out_z, $out_p; + } + + my $new_tree_hash = $sha_new_tree->hexdigest; + my $tree_obj_path = get_obj_path($new_tree_hash); + if ($use_disk_nt) { + close $nt_fh; + rename($nt_path, $tree_obj_path); + } else { + write_file($tree_obj_path, join("", @new_tree_lines)); + } + + my $patch_bundle_hash = ""; + if ($use_disk_patch || %patches) { + my $bundle_tmp = File::Spec->catfile(TMP_DIR, "bundle.tar.gz"); + my $tar = Archive::Tar->new; + if ($use_disk_patch) { $tar->read($pt_path); unlink $pt_path; } + $tar->add_data($_, $patches{$_}) for keys %patches; + $tar->write($bundle_tmp, COMPRESS_GZIP); + $patch_bundle_hash = hash_file_content($bundle_tmp); + rename($bundle_tmp, get_obj_path($patch_bundle_hash)); + } + + my $next_id = to_hex_id(from_hex_id($parent_id) + 1); + $msg //= "Commit $next_id"; + open my $rfh, '>:raw', File::Spec->catfile(REV_DIR, $next_id) or die $!; + print $rfh "tree:$new_tree_hash\nparent:$parent_id\n"; + print $rfh "patch:$patch_bundle_hash\n" if $patch_bundle_hash; + print $rfh "date:" . time() . "\nmsg:$msg\n"; + close $rfh; + + close $tmp_idx_fh; + rename($tmp_idx_path, INDEX); + write_file(HEAD, "$next_id\n"); + File::Path::remove_tree(TMP_DIR); + make_path(TMP_DIR); + print "Revision $next_id committed.\n"; +} + +sub run_log { + open my $fh_h, '<', HEAD or die "Not a repository.\n"; + my $head = <$fh_h>; chomp $head; close $fh_h; + + # Setup pager + my $pager = $ENV{PAGER} || 'less -R'; + open(my $pipe, "| $pager") or die "Can't pipe to $pager: $!"; + my $old_fh = select($pipe); + + print "Revision History (HEAD: $head)\n\n"; + + my $current_id = $head; + while ($current_id) { + my $rev_file = File::Spec->catfile(REV_DIR, $current_id); + last unless -f $rev_file; + + open my $rfh, '<', $rev_file or last; + my ($tree, $parent, $date, $msg) = ("", "", "", "[No message]"); + + while (<$rfh>) { + chomp; + if (/^tree:(.*)$/) { $tree = $1; } + elsif (/^parent:(.*)$/) { $parent = $1; } + elsif (/^date:(.*)$/) { $date = $1; } + elsif (/^msg:(.*)$/) { $msg = $1; } + } + close $rfh; + + my $date_str = $date ? strftime("%a %b %e %H:%M:%S %Y", localtime($date)) + : "Unknown Date"; + + print "commit $current_id\n"; + print "Date: $date_str\n"; + print "\n $msg\n\n"; + + # Move to parent to walk backward + $current_id = $parent; + } + + close $pipe; + select($old_fh); +} + +sub run_show { + my ($rev_id, $file_path) = @_; + + die "Usage: $0 show \n" unless defined $rev_id && defined $file_path; + + my $pager = $ENV{PAGER} || 'less -R'; + open(my $pipe, "| $pager") or die "Can't pipe to $pager: $!"; + my $old_fh = select($pipe); + + # 2. Use existing logic to get the content + my $v = get_file_version($rev_id, $file_path); + die "Error: Could not resolve '$file_path' at revision $rev_id.\n" unless defined $v; + + # 3. Output content + if (ref($v) eq 'SCALAR') { + binmode STDOUT, ":raw"; + print $$v; + } else { + # It's a file path (for large files) + open my $fh, '<', $v or die $!; + binmode $fh, IO_LAYER; + while (read($fh, my $buf, CHUNK_LEN)) { + print $buf; + } + close $fh; + } +} + +sub run_diff { + my @args = @_; + my ($src, $dst, $target_path); + + if (@args == 0) { ($src, $dst) = ('head', undef); } + elsif (@args == 1) { is_revision($args[0]) ? (($src, $dst) = ($args[0], undef)) : (($src, $dst, $target_path) = ('head', undef, $args[0])); } + elsif (@args == 2) { is_revision($args[0]) && is_revision($args[1]) ? (($src, $dst) = ($args[0], $args[1])) : (($src, $dst, $target_path) = ($args[0], undef, $args[1])); } + else { ($src, $dst, $target_path) = @args; } + + my ($pipe, $old_fh); + + # Helper to open pager only once when needed + my $out = sub { + my $msg = shift; + if (!defined $pipe && -t STDOUT) { + my $pager = $ENV{PAGER} || 'less -R'; + open($pipe, "| $pager") or die $!; + $old_fh = select($pipe); + } + print $msg; + }; + + if (defined $target_path) { + my $v1 = get_file_version($src, $target_path); + my $v2 = get_file_version($dst, $target_path); + if (defined $v1 && defined $v2) { + my $f1 = ref($v1) ? "<(echo -n " . escapeshellarg($$v1) . ")" : escapeshellarg($v1); + my $f2 = ref($v2) ? "<(echo -n " . escapeshellarg($$v2) . ")" : escapeshellarg($v2); + if (system("bash", "-c", "diff -q $f1 $f2 > /dev/null 2>&1") != 0) { + $out->("\033[1mdiff --urn a/$target_path b/$target_path\033[0m\n"); + # Stream the diff output line by line to the pipe + open my $dfh, '-|', "bash -c \"diff -u $f1 $f2 | tail -n +3\""; + while (<$dfh>) { $out->($_) } + close $dfh; + } + } + } else { + # Full Tree Walk + my $s_id = (lc($src // '') eq 'head') ? read_head() : ($src // ''); + my $th; if (open my $rf, '<', File::Spec->catfile(REV_DIR, $s_id)) { + while (<$rf>) { $th = $1 if /^tree:(.*)$/ } close $rf; + } + my $it_old = $th ? stream_tree_file($th) : sub { undef }; + my $it_new = defined $dst ? do { + my $d_id = (lc($dst // '') eq 'head') ? read_head() : $dst; + my $dth; if (open my $df, '<', File::Spec->catfile(REV_DIR, $d_id)) { + while (<$df>) { $dth = $1 if /^tree:(.*)$/ } close $df; + } + $dth ? stream_tree_file($dth) : sub { undef }; + } : stream_tree("."); + + my ($old, $new) = ($it_old->(), $it_new->()); + while ($old || $new) { + my $p_old = $old->{path} // ''; + my $p_new = $new->{path} // ''; + my $cmp = !defined $old ? 1 : !defined $new ? -1 : $p_old cmp $p_new; + if ($cmp == 0) { + if (($old->{hash} // '') ne ($new->{hash} // '')) { + # Recursively call or just handle diff here + my $v1 = get_file_version($src, $p_old); + my $v2 = get_file_version($dst, $p_old); + if (defined $v1 && defined $v2) { + my $f1 = ref($v1) ? "<(echo -n " . escapeshellarg($$v1) . ")" : escapeshellarg($v1); + my $f2 = ref($v2) ? "<(echo -n " . escapeshellarg($$v2) . ")" : escapeshellarg($v2); + if (system("bash", "-c", "diff -q $f1 $f2 > /dev/null 2>&1") != 0) { + $out->("\033[1mdiff --urn a/$p_old b/$p_old\033[0m\n"); + open my $dfh, '-|', "bash -c \"diff -u $f1 $f2 | tail -n +3\""; + while (<$dfh>) { $out->($_) } + close $dfh; + } + } + } + ($old, $new) = ($it_old->(), $it_new->()); + } elsif ($cmp < 0) { + $out->("\033[31m--- $p_old (deleted)\033[0m\n"); + $old = $it_old->(); + } else { + $out->("\033[32m+++ $p_new (new file)\033[0m\n"); + $new = $it_new->(); + } + } + } + + if (defined $pipe) { + close $pipe; + select($old_fh); + } +} + +sub make_bin_patch { + my ($new_file, $old_file) = @_; + + my $new_size = -s $new_file; + open my $f_new, '<:raw', $new_file or die $!; + open my $f_old, '<:raw', $old_file or die $!; + + # Start patch string with new total size (8 bytes) + my $patch = pack("Q", $new_size); + + my $offset = 0; + my $blk_size = 4096; + + while (1) { + my $read_new = sysread($f_new, my $buf_new, $blk_size); + my $read_old = sysread($f_old, my $buf_old, $blk_size); + last if !$read_new && !$read_old; + + # If blocks differ, record the change + if (($buf_new // '') ne ($buf_old // '')) { + # Format: Offset (Q), Length (L), then the raw data + $patch .= pack("QL", $offset, length($buf_new)) . $buf_new; + } + $offset += $blk_size; + } + close $f_new; close $f_old; + + # If only the 8-byte header exists, files were identical + return length($patch) > 8 ? $patch : undef; +} + +sub apply_bin_patch_file { + my ($target_path, $patch_ref) = @_; + my $patch = $$patch_ref; + return unless length($patch) > 8; + + my $new_size = unpack("Q", substr($patch, 0, 8)); + + # Open target file for update and resize + open my $fh, '+<:raw', $target_path or die $!; + truncate($fh, $new_size); + + my $pos = 8; + while ($pos < length($patch)) { + my ($offset, $len) = unpack("QL", substr($patch, $pos, 12)); + $pos += 12; + my $data = substr($patch, $pos, $len); + $pos += $len; + + seek($fh, $offset, 0); + print $fh $data; + } + close $fh; +} + +# Convert decimal to a padded 7-char hex string +sub to_hex_id { sprintf("%07x", $_[0]) } + +# Convert hex back to decimal +sub from_hex_id { hex($_[0]) } + +sub write_file { + my ($path, $content) = @_; + my $dir = dirname($path); + make_path($dir) unless -d $dir; + open my $fh, '>', $path or die "Could not open '$path' for writing: $!"; + print $fh $content; + close $fh or die "Could not close '$path' after writing: $!"; +} + +sub touch_file { + my ($path) = @_; + my $dir = dirname($path); + make_path($dir) unless -d $dir; + open my $fh, '>', $path or die "Could not touch $path: $!"; + close $fh; +} + +sub hash_file_content { + my ($filename) = @_; + my $sha = Digest::SHA->new(1); + + if (-l $filename) { + my $target = readlink($filename); + die "Could not readlink '$filename': $!" unless defined $target; + $sha->add($target); + } else { + open(my $fh, '<:raw', $filename) or die "Could not open '$filename': $!"; + $sha->addfile($fh); + close($fh); + } + + return $sha->hexdigest; +} + +sub stream_tree { + my (@paths) = @_; + @paths = (".") unless @paths; + + my $chunk_size = 1024 * 64; + my @buf; + my $buf_size = 0; + my $tot_size = 0; + my $use_disk = 0; + my ($tmp_fh, $tmp_path); + + my $flush = sub { + if (!$use_disk) { + ($tmp_fh, $tmp_path) = tempfile(UNLINK => 1); + $tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); + binmode $tmp_fh, ":raw"; + $use_disk = 1; + } + print $tmp_fh @buf; + @buf = (); + $buf_size = 0; + }; + + my @stack = @paths; + while (@stack) { + my $path = (pop @stack) =~ s|^\./||r; + next if $path eq REPO; + + my @st = lstat($path); + if (-d _) { + if (opendir(my $dh, $path)) { + push @stack, map { File::Spec->catfile($path, $_) } + grep { $_ ne '.' && $_ ne '..' && $_ ne REPO } + readdir($dh); + closedir($dh); + } + } elsif (-f _ || -l _) { + my $line = "$path\t$st[9]\t$st[7]\n"; + push @buf, $line; + $buf_size += length($line); + $tot_size += length($line); + + if ((!$use_disk && $tot_size > MEM_LIMIT) || + ($use_disk && $buf_size > $chunk_size)) { + $flush->(); + } + } + } + + if (!$use_disk) { + @buf = sort @buf; + return sub { + my $line = shift @buf; + return unless $line; + chomp $line; + my ($p, $m, $s) = split(/\t/, $line); + return { path => $p, mtime => $m, size => $s }; + }; + } else { + $flush->() if @buf; + close $tmp_fh; + open(my $sort_fh, "-|", "sort", "-t", "\t", "-k1,1", $tmp_path) or die $!; + return sub { + my $line = <$sort_fh>; + unless ($line) { close $sort_fh; return; } + chomp $line; + my ($p, $s, $m) = split(/\t/, $line); + return { path => $p, mtime => $m, size => $s }; + }; + } +} + +sub stream_index { + my $idx_path = INDEX; + return sub { return undef; } unless -e $idx_path; + + open(my $fh, "<:raw", $idx_path) or die $!; + return sub { + my $line = <$fh>; + if ($line) { + chomp $line; + my ($s_h, $c_h, $b_h, $m, $z, $p) = split(/\t/, $line, 6); + return { + s_hash => $s_h, + c_hash => $c_h, + b_hash => $b_h, + mtime => $m, + size => $z, + path => $p, + }; + } + close $fh; + return undef; + }; +} + +# Logic for sharded object directory +sub get_obj_path { + my ($hash) = @_; + my $dir = File::Spec->catfile(OBJ_DIR, substr($hash, 0, 2)); + make_path($dir) unless -d $dir; + return File::Spec->catfile($dir, substr($hash, 2)); +} + +sub stream_tree_file { + my ($hash) = @_; + return sub { return } unless $hash; + my $path = get_obj_path($hash); + open my $fh, '<:raw', $path or return sub { return }; + return sub { + my $line = <$fh>; + return unless $line; + chomp $line; + my ($h, $p) = split(/\t/, $line, 2); + return { hash => $h, path => $p }; + } +} + +sub read_head { + return unless -e HEAD; + open my $fh, '<', HEAD or return; + my $val = <$fh>; + chomp $val if $val; + return $val; +} + +sub read_file { + my ($path) = @_; + return "" unless -e $path; + open my $fh, '<:raw', $path or return ""; + my $content = do { local $/; <$fh> }; + close $fh; + return $content; +} + +sub get_commit_message { + my $editor = $ENV{VISUAL} || $ENV{EDITOR} || 'vi'; + my $tmp_file = File::Spec->catfile(TMP_DIR, "COMMIT_EDITMSG"); + my $template = <<"EOF"; + +# Please enter the commit message for your changes. Lines starting +# with '#' will be ignored, and an empty message aborts the commit. +# +# On revision: @{[read_head() // '0']} +EOF + + write_file($tmp_file, $template); + + system($editor, $tmp_file); + my $raw_content = read_file($tmp_file); + my @lines = split(/\n/, $raw_content); + + # Filter out lines starting with # + my $final_msg = join("\n", grep { $_ !~ /^\s*#/ } @lines); + $final_msg =~ s/^\s+|\s+$//g; # Trim whitespace + + unlink($tmp_file); + return ($final_msg ne "") ? $final_msg : undef; +} + +sub escapeshellarg { + my $str = shift; + $str =~ s/'/'\\''/g; + return "'$str'"; +} + +# Check if a string is a 7-character hex revision ID +sub is_revision { + my ($str) = @_; + return defined $str && $str =~ /^[0-9a-f]{7}$/i; +} + +sub get_file_version { + my ($source, $path) = @_; + + # Handle Workspace + if (!defined $source) { + return undef unless -e $path; + if ((-s $path // 0) > MEM_LIMIT) { + return $path; + } + open my $fh, '<', $path or return undef; + binmode($fh, IO_LAYER); + my $data = do { local $/; <$fh> }; + close $fh; + return \$data; + } + + # Resolve Revision + my $rev_id = (lc($source) eq 'head') ? read_head() : $source; + my $rev_file = File::Spec->catfile(REV_DIR, $rev_id); + return undef unless -f $rev_file; + + my ($tree_hash, $patch_bundle_hash) = ("", ""); + open my $rfh, '<', $rev_file or return undef; + while (<$rfh>) { + $tree_hash = $1 if /^tree:(.*)$/; + $patch_bundle_hash = $1 if /^patch:(.*)$/; + } + close $rfh; + + # Locate node in tree + my $it = stream_tree_file($tree_hash); + my $node; + while (my $n = $it->()) { + if (($n->{path} // '') eq $path) { + $node = $n; + last; + } + } + return undef unless $node; + + my $obj_path = get_obj_path($node->{hash} // ''); + return undef unless -f $obj_path; + + # Extract Base Object to Temp File + my ($tfh, $tpath) = tempfile(DIR => TMP_DIR, UNLINK => 1); + binmode $tfh, IO_LAYER; + + open my $ofh, '<', $obj_path or return undef; + binmode $ofh, IO_LAYER; + while (read($ofh, my $buf, CHUNK_LEN)) { print $tfh $buf; } + close $ofh; + close $tfh; + + # Apply Patch via Streaming Bundle + if ($patch_bundle_hash) { + my $bundle_path = get_obj_path($patch_bundle_hash); + if (-f $bundle_path && -s $bundle_path > 0) { + # Use iterator to find the patch without loading the whole tarball into RAM + my $next = Archive::Tar->iter($bundle_path, 1); + while (my $f = $next->()) { + if ($f->name eq "$path.patch") { + my $p_content = $f->get_content(); + my ($pfh, $ppath) = tempfile(DIR => TMP_DIR, UNLINK => 1); + binmode $pfh, ":raw"; + print $pfh $p_content; + close $pfh; + + # Determine if text patch or binary block replacement + if ($p_content =~ /^\d+(?:,\d+)?[adc]\d+/) { + system("patch -s -f $tpath < $ppath >/dev/null 2>&1"); + } else { + apply_bin_patch_file($tpath, \$p_content); + } + last; + } + } + } + } + + # Final Output decision based on result size + if ((-s $tpath // 0) > MEM_LIMIT) { + return $tpath; + } else { + my $content = read_file($tpath); + return \$content; + } +} + diff --git a/urn.t b/urn.t new file mode 100644 index 0000000..734bb06 --- /dev/null +++ b/urn.t @@ -0,0 +1,175 @@ +#!/usr/bin/perl +use strict; +use warnings; +use File::Spec; +use File::Path qw(make_path remove_tree); +use Digest::SHA qw(sha1_hex); + +# Setup testing environment +my $sandbox = File::Spec->rel2abs("sandbox_env"); +my $script_bin = File::Spec->catfile(File::Spec->rel2abs("."), "urn"); +my $repo_meta = ".urn"; + +remove_tree($sandbox); +make_path($sandbox); +chdir($sandbox) or die "Cannot chdir to $sandbox: $!"; + +sub run_cmd { + my $cmd = shift; + my $out = `perl $script_bin $cmd 2>&1`; + if ($? != 0) { + die "\nCommand '$cmd' failed with exit code $?:\n$out\n"; + } + return $out; +} + +sub get_head_id { + my $head_path = File::Spec->catfile($repo_meta, "head"); + return undef unless -f $head_path; + open(my $fh, '<', $head_path) or return undef; + my $id = <$fh>; + close($fh); + chomp($id //= ""); + $id =~ s/^\s+|\s+$//g; + return $id; +} + +print "Starting tests...\n"; +run_cmd("init"); + +# --- Subtest 1: All-In Integration --- +print "Subtest 1: Initial commit with symlink...\t"; +{ + open(my $fh, '>', "file1.txt") or die $!; + print $fh "Original content\n" x 10; + close($fh); + + symlink("file1.txt", "link1.txt") or die "Symlink failed: $!"; + + run_cmd("commit -am 'initial'"); + + my $idx_path = File::Spec->catfile($repo_meta, "index"); + open(my $idx, '<', $idx_path) or die "Could not open $idx_path: $!"; + my %index_hashes; + while (<$idx>) { + chomp; + # Updated parser: split on tabs and trim padding from hashes + my ($shash, $chash, $bhash, $m, $s, $path) = split(/\t/, $_, 6); + $shash =~ s/\s+$//; # Remove fixed-width padding + $index_hashes{$path} = $shash; + } + close($idx); + + die "Missing file1" unless $index_hashes{"file1.txt"}; + # Symlink hash is the SHA of the target path string + die "Symlink hash mismatch" unless $index_hashes{"link1.txt"} eq sha1_hex("file1.txt"); +} +print "OK\n"; + +# --- Subtest 2: Large Source, Small Patch (Text Patching) --- +print "Subtest 2: Large source, small patch...\t"; +{ + my $large_p = "large.txt"; + open(my $lf, '>', $large_p) or die $!; + print $lf "Line $_\n" for 1..1000; + close($lf); + + run_cmd("commit -am 'large file base'"); + + open($lf, '>>', $large_p) or die $!; + print $lf "One small change\n"; + close($lf); + run_cmd("commit -am 'patch commit'"); + + my $head_id = get_head_id(); + my $rev_path = File::Spec->catfile($repo_meta, "rev", $head_id); + open(my $rf, '<', $rev_path) or die "Could not open $rev_path: $!"; + my $has_patch = 0; + while (<$rf>) { $has_patch = 1 if /^patch:[a-f0-9]/; } + close($rf); + die "Expected patch in rev $head_id" unless $has_patch; +} +print "OK\n"; + +# --- Subtest 3: Small Source, Large Patch (Full Store) --- +print "Subtest 3: Small source, large patch...\t"; +{ + my $small_p = "small.txt"; + open(my $sf, '>', $small_p) or die $!; + print $sf "A"; + close($sf); + run_cmd("commit -am 'small file base'"); + + open($sf, '>', $small_p) or die $!; + print $sf "Completely different content" x 200; + close($sf); + run_cmd("commit -am 'full store commit'"); + + my $restored = run_cmd("show HEAD $small_p"); + my $actual = `cat $small_p`; + die "Reconstruction mismatch" unless $restored eq $actual; +} +print "OK\n"; + +# --- Subtest 4: Binary Integrity --- +print "Subtest 4: Binary patch integrity...\t\t"; +{ + my $bin_p = "data.bin"; + my $orig_data = pack("C*", map { int(rand(256)) } 1..2048); + open(my $bf, '>:raw', $bin_p) or die $!; + print $bf $orig_data; + close($bf); + run_cmd("commit -am 'initial bin'"); + + my $old_rev = get_head_id(); + + open($bf, '+<:raw', $bin_p) or die $!; + seek($bf, 1024, 0); + print $bf pack("C", 255); + close($bf); + run_cmd("commit -am 'mod bin'"); + + my $v_old = run_cmd("show $old_rev $bin_p"); + my $v_new = run_cmd("show HEAD $bin_p"); + + die "Old version corrupted" unless $v_old eq $orig_data; + die "Binary restore error" unless length($v_new) == 2048; +} +print "OK\n"; + +# --- Subtest 5: CRUD (Deletion) --- +print "Subtest 5: File deletion...\t\t\t"; +{ + unlink("file1.txt"); + run_cmd("commit -am 'delete file1'"); + + my $head_id = get_head_id(); + my $rev_path = File::Spec->catfile($repo_meta, "rev", $head_id); + my $rev_data = `cat $rev_path`; + my ($tree_h) = $rev_data =~ /^tree:([a-f0-9]+)/m; + + my $t_obj = File::Spec->catfile($repo_meta, "obj", substr($tree_h, 0, 2), substr($tree_h, 2)); + my $tree_content = `cat $t_obj`; + die "File1 still in tree after deletion" if $tree_content =~ /file1\.txt/; +} +print "OK\n"; + +# --- Subtest 6: Dirty Status --- +print "Subtest 6: Dirty state detection...\t\t"; +{ + open(my $fh, '>', "dirty.txt") or die $!; + print $fh "Clean state\n"; + close($fh); + run_cmd("add dirty.txt"); + + open($fh, '>>', "dirty.txt") or die $!; + print $fh "Dirty edit\n"; + close($fh); + + my $status = run_cmd("status"); + die "Failed to detect dirty state" unless $status =~ /\[M\].*dirty\.txt.*\(dirty\)/; +} +print "OK\n"; + +print "\nAll tests passed successfully.\n"; + diff --git a/vcx b/vcx deleted file mode 100644 index cf850ae..0000000 --- a/vcx +++ /dev/null @@ -1,892 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; -use IO::Handle; -use File::Path qw(make_path); -use File::Copy qw(copy); -use File::Basename; -use File::Glob qw(:bsd_glob); -use File::Spec; -use File::Temp qw(tempfile); -use Getopt::Long; -use Archive::Tar; -use Compress::Zlib; -use POSIX qw(strftime); -use Digest::SHA qw(sha1_hex); - -use constant REPO => '.vcx'; -use constant HEAD => REPO . '/head'; # Current revision ID -use constant INDEX => REPO . '/index'; # Index -use constant OBJ_DIR => REPO . '/obj'; # Object store -use constant REV_DIR => REPO . '/rev'; # Revisions -use constant TMP_DIR => REPO . '/stg'; # Staging area - -use constant CHUNK_LEN => 4096; -use constant MEM_LIMIT => 10 * 1024 * 1024; -use constant IO_LAYER => ":raw:perlio(layer=" . CHUNK_LEN . ")"; - -Getopt::Long::Configure("bundling"); - -my $cmd = shift @ARGV // ''; -my @args = @ARGV; - -if ($cmd eq 'init') { - run_init(); -} elsif ($cmd eq 'status') { - run_status(); -} elsif ($cmd eq 'add') { - die "Usage: $0 add [path1] [path2] ...\n" unless @args; - run_add(@args); -} elsif ($cmd eq 'commit') { - my ($m, $a); - GetOptions('m=s' => \$m, 'a' => \$a); - if ($a) { run_add("."); } - run_commit($m); -} elsif ($cmd eq 'log') { - run_log(); -} elsif ($cmd eq 'show') { - # Usage: vcx show HEAD main.c - my $rev = shift @args; - my $file = shift @args; - die "Usage: $0 show \n" unless defined $rev && defined $file; - run_show($rev, $file); -} elsif ($cmd eq 'diff') { - run_diff(@args); -} else { - print "Usage: $0 [init|status|add|commit|log]\n"; - exit 1; -} - -sub run_init { - make_path(OBJ_DIR, REV_DIR, TMP_DIR); - touch_file(INDEX); - my $rev_id = to_hex_id(0); - write_file(HEAD, "$rev_id\n"); - print "Initialized repository.\n"; -} - -sub run_status { - my $it_idx = stream_index(); - my $it_wrk = stream_tree("."); - - # Get the base tree from the last commit - my $head = read_head() // to_hex_id(0); - my $parent_tree_hash = ""; - if ($head ne to_hex_id(0)) { - my $rev_path = File::Spec->catfile(REV_DIR, $head); - if (open my $fh, '<', $rev_path) { - while (<$fh>) { if (/^tree:(.*)$/) { $parent_tree_hash = $1; last; } } - close $fh; - } - } - my $it_old = stream_tree_file($parent_tree_hash); - - print "On Revision: $head\n\n"; - - my $idx = $it_idx->(); - my $wrk = $it_wrk->(); - my $old = $it_old->(); - my $found_changes = 0; - - # Iterate while any of the three sources have entries - while ($idx || $wrk || $old) { - # Find the alphabetically "lowest" path among the three streams - my $path = (sort grep { defined } ($idx->{path}, $wrk->{path}, $old->{path}))[0]; - - my $flag = ""; - my $suffix = ""; - - # Logical check for Deletions (In Old Tree, but missing elsewhere) - my $in_old = (defined $old && $old->{path} eq $path); - my $in_idx = (defined $idx && $idx->{path} eq $path); - my $in_wrk = (defined $wrk && $wrk->{path} eq $path); - - if ($in_old && !$in_idx) { - # File existed in last commit but is gone from index -> Staged for Deletion - $flag = "[D]"; - $suffix = "(staged)"; - } - elsif ($in_idx && !$in_wrk) { - # In index but missing from disk -> Deleted but not staged - $flag = "[D]"; - } - elsif ($in_wrk && !$in_idx) { - # On disk but not in index -> New file (Unstaged) - $flag = "[N]"; - } - elsif ($in_idx && $in_wrk) { - # Path exists in both; check for modifications - my $stg_path = File::Spec->catfile(TMP_DIR, $path); - my $is_staged = -e $stg_path; - my $workspace_changed = ($wrk->{mtime} != $idx->{mtime} || $wrk->{size} != $idx->{size}); - - if ($is_staged) { - if ($workspace_changed) { - $flag = "[M]"; - $suffix = "(dirty)"; - } else { - # Compare against c_hash (Committed Hash) to see if it's new or modified - $flag = ($idx->{c_hash} eq "-") ? "[N]" : "[M]"; - $suffix = "(staged)"; - } - } elsif ($workspace_changed || ($in_old && $idx->{c_hash} ne $old->{hash})) { - # Not staged, but differs from last commit - $flag = "[M]"; - } - } - - if ($flag ne "") { - printf "%s %s %s\n", $flag, $path, $suffix; - $found_changes = 1; - } - - # Advance iterators if they matched the current path - $old = $it_old->() if $in_old; - $idx = $it_idx->() if $in_idx; - $wrk = $it_wrk->() if $in_wrk; - } - - print "No changes detected.\n" unless $found_changes; - print "\n"; -} - -sub run_add { - my @paths = @_; - @paths = (".") unless @paths; - - my $idx_path = INDEX; - my $tmp_idx = "$idx_path.tmp"; - - my $it_idx = stream_index(); - my $it_wrk = stream_tree(@paths); - - open(my $out, ">:raw", $tmp_idx) - or die "Could not create $tmp_idx: $!"; - - my $idx_entry = $it_idx->(); - my $wrk_entry = $it_wrk->(); - - while ($idx_entry || $wrk_entry) { - my $cmp = !defined $idx_entry ? 1 - : !defined $wrk_entry ? -1 - : $idx_entry->{path} cmp $wrk_entry->{path}; - - if ($cmp == 0) { - if ($idx_entry->{mtime} == $wrk_entry->{mtime} && - $idx_entry->{size} == $wrk_entry->{size}) { - # No change: Preserve all 3 hashes and metadata - printf $out "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n", - $idx_entry->{s_hash}, $idx_entry->{c_hash}, $idx_entry->{b_hash}, - $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}; - } else { - my $p = $wrk_entry->{path}; - my $current_hash = hash_file_content($p); - my $stg_path = File::Spec->catfile(TMP_DIR, $p); - make_path(dirname($stg_path)); - - (-l $p) ? symlink(readlink($p), $stg_path) : copy($p, $stg_path); - - # Update staged hash, preserve committed and base hashes - printf $out "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n", - $current_hash, $idx_entry->{c_hash}, $idx_entry->{b_hash}, - $wrk_entry->{mtime}, $wrk_entry->{size}, $p; - } - $idx_entry = $it_idx->(); - $wrk_entry = $it_wrk->(); - } - elsif ($cmp > 0) { - # New File: hash and snapshot to staging - my $p = $wrk_entry->{path}; - my $hash = hash_file_content($p); - my $stg_path = File::Spec->catfile(TMP_DIR, $p); - make_path(dirname($stg_path)); - - (-l $p) ? symlink(readlink($p), $stg_path) : copy($p, $stg_path); - - # Staged is the new hash; Committed and Base are '-' - printf $out "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n", - $hash, "-", "-", $wrk_entry->{mtime}, $wrk_entry->{size}, $p; - $wrk_entry = $it_wrk->(); - } - else { # Deleted - $idx_entry = $it_idx->(); - } - } - - close $out; - rename($tmp_idx, $idx_path) or die "Failed to update index: $!"; -} - -sub run_commit { - my ($msg) = @_; - if (!defined $msg || $msg eq "") { - $msg = get_commit_message(); - } - - my $parent_id = read_head() // to_hex_id(0); - my $parent_tree_hash = ""; - - if ($parent_id ne to_hex_id(0)) { - my $p_rev_file = File::Spec->catfile(REV_DIR, $parent_id); - if (open my $pfh, '<', $p_rev_file) { - while (<$pfh>) { - if (/^tree:(.*)$/) { $parent_tree_hash = $1; last; } - } - close $pfh; - } - } - - my $it_idx = stream_index(); - my $it_old = stream_tree_file($parent_tree_hash); - - my $sha_new_tree = Digest::SHA->new(1); - my @new_tree_lines; - my $new_tree_size = 0; - my $use_disk_nt = 0; - my ($nt_fh, $nt_path); - - my %patches; - my $patch_mem_size = 0; - my $use_disk_patch = 0; - my ($pt_fh, $pt_path); - - my ($tmp_idx_fh, $tmp_idx_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); - binmode $tmp_idx_fh, ":raw"; - - my $idx = $it_idx->(); - my $old = $it_old->(); - - while ($idx || $old) { - my $cmp = !defined $idx ? 1 : !defined $old ? -1 : $idx->{path} cmp $old->{path}; - my ($out_s, $out_c, $out_b, $out_m, $out_z, $out_p); - - if ($cmp < 0) { # New file - ($out_p, $out_s, $out_m, $out_z) = ($idx->{path}, $idx->{s_hash}, $idx->{mtime}, $idx->{size}); - $out_c = $out_s; - $out_b = $out_s; - - my $obj_path = get_obj_path($out_b); - my $stg_file = File::Spec->catfile(TMP_DIR, $out_p); - if (!-e $obj_path) { - rename($stg_file, $obj_path) or copy($stg_file, $obj_path); - } - $idx = $it_idx->(); - } - elsif ($cmp == 0) { # Modified or Unchanged - ($out_p, $out_s, $out_m, $out_z) = ($idx->{path}, $idx->{s_hash}, $idx->{mtime}, $idx->{size}); - - if ($idx->{s_hash} ne ($idx->{c_hash} // "-")) { - my $base_obj = get_obj_path($old->{hash}); - my $stg_file = File::Spec->catfile(TMP_DIR, $out_p); - - my $patch = (-T $stg_file) - ? qx(diff '$base_obj' '$stg_file') - : make_bin_patch($stg_file, $base_obj); - - if (defined $patch && length($patch) <= $out_z) { - if (!$use_disk_patch && ($patch_mem_size + length($patch)) > MEM_LIMIT) { - ($pt_fh, $pt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); - my $tar = Archive::Tar->new; - $tar->add_data($_, $patches{$_}) for keys %patches; - $tar->write($pt_fh); - %patches = (); - $use_disk_patch = 1; - } - if ($use_disk_patch) { - my $tar = Archive::Tar->new($pt_path); - $tar->add_data("$out_p.patch", $patch); - $tar->write($pt_path); - } else { - $patches{"$out_p.patch"} = $patch; - $patch_mem_size += length($patch); - } - $out_b = $old->{hash}; - unlink($stg_file); - } else { - $out_b = $out_s; - my $obj_path = get_obj_path($out_b); - if (!-e $obj_path) { - rename($stg_file, $obj_path) or copy($stg_file, $obj_path); - } - } - $out_c = $out_s; - } else { - $out_b = $old->{hash}; - $out_c = $idx->{c_hash}; - } - $idx = $it_idx->(); - $old = $it_old->(); - } - else { - $old = $it_old->(); - next; - } - - my $t_line = "$out_b\t$out_p\n"; - $sha_new_tree->add($t_line); - $new_tree_size += length($t_line); - - if (!$use_disk_nt && $new_tree_size > MEM_LIMIT) { - ($nt_fh, $nt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); - print $nt_fh @new_tree_lines; - @new_tree_lines = (); - $use_disk_nt = 1; - } - $use_disk_nt ? print $nt_fh $t_line : push @new_tree_lines, $t_line; - - printf $tmp_idx_fh "%-40s\t%-40s\t%-40s\t%-12d\t%-10d\t%s\n", - $out_s, ($out_c // "-"), ($out_b // "-"), $out_m, $out_z, $out_p; - } - - my $new_tree_hash = $sha_new_tree->hexdigest; - my $tree_obj_path = get_obj_path($new_tree_hash); - if ($use_disk_nt) { - close $nt_fh; - rename($nt_path, $tree_obj_path); - } else { - write_file($tree_obj_path, join("", @new_tree_lines)); - } - - my $patch_bundle_hash = ""; - if ($use_disk_patch || %patches) { - my $bundle_tmp = File::Spec->catfile(TMP_DIR, "bundle.tar.gz"); - my $tar = Archive::Tar->new; - if ($use_disk_patch) { $tar->read($pt_path); unlink $pt_path; } - $tar->add_data($_, $patches{$_}) for keys %patches; - $tar->write($bundle_tmp, COMPRESS_GZIP); - $patch_bundle_hash = hash_file_content($bundle_tmp); - rename($bundle_tmp, get_obj_path($patch_bundle_hash)); - } - - my $next_id = to_hex_id(from_hex_id($parent_id) + 1); - $msg //= "Commit $next_id"; - open my $rfh, '>:raw', File::Spec->catfile(REV_DIR, $next_id) or die $!; - print $rfh "tree:$new_tree_hash\nparent:$parent_id\n"; - print $rfh "patch:$patch_bundle_hash\n" if $patch_bundle_hash; - print $rfh "date:" . time() . "\nmsg:$msg\n"; - close $rfh; - - close $tmp_idx_fh; - rename($tmp_idx_path, INDEX); - write_file(HEAD, "$next_id\n"); - File::Path::remove_tree(TMP_DIR); - make_path(TMP_DIR); - print "Revision $next_id committed.\n"; -} - -sub run_log { - open my $fh_h, '<', HEAD or die "Not a repository.\n"; - my $head = <$fh_h>; chomp $head; close $fh_h; - - # Setup pager - my $pager = $ENV{PAGER} || 'less -R'; - open(my $pipe, "| $pager") or die "Can't pipe to $pager: $!"; - my $old_fh = select($pipe); - - print "Revision History (HEAD: $head)\n\n"; - - my $current_id = $head; - while ($current_id) { - my $rev_file = File::Spec->catfile(REV_DIR, $current_id); - last unless -f $rev_file; - - open my $rfh, '<', $rev_file or last; - my ($tree, $parent, $date, $msg) = ("", "", "", "[No message]"); - - while (<$rfh>) { - chomp; - if (/^tree:(.*)$/) { $tree = $1; } - elsif (/^parent:(.*)$/) { $parent = $1; } - elsif (/^date:(.*)$/) { $date = $1; } - elsif (/^msg:(.*)$/) { $msg = $1; } - } - close $rfh; - - my $date_str = $date ? strftime("%a %b %e %H:%M:%S %Y", localtime($date)) - : "Unknown Date"; - - print "commit $current_id\n"; - print "Date: $date_str\n"; - print "\n $msg\n\n"; - - # Move to parent to walk backward - $current_id = $parent; - } - - close $pipe; - select($old_fh); -} - -sub run_show { - my ($rev_id, $file_path) = @_; - - die "Usage: $0 show \n" unless defined $rev_id && defined $file_path; - - my $pager = $ENV{PAGER} || 'less -R'; - open(my $pipe, "| $pager") or die "Can't pipe to $pager: $!"; - my $old_fh = select($pipe); - - # 2. Use existing logic to get the content - my $v = get_file_version($rev_id, $file_path); - die "Error: Could not resolve '$file_path' at revision $rev_id.\n" unless defined $v; - - # 3. Output content - if (ref($v) eq 'SCALAR') { - binmode STDOUT, ":raw"; - print $$v; - } else { - # It's a file path (for large files) - open my $fh, '<', $v or die $!; - binmode $fh, IO_LAYER; - while (read($fh, my $buf, CHUNK_LEN)) { - print $buf; - } - close $fh; - } -} - -sub run_diff { - my @args = @_; - my ($src, $dst, $target_path); - - if (@args == 0) { ($src, $dst) = ('head', undef); } - elsif (@args == 1) { is_revision($args[0]) ? (($src, $dst) = ($args[0], undef)) : (($src, $dst, $target_path) = ('head', undef, $args[0])); } - elsif (@args == 2) { is_revision($args[0]) && is_revision($args[1]) ? (($src, $dst) = ($args[0], $args[1])) : (($src, $dst, $target_path) = ($args[0], undef, $args[1])); } - else { ($src, $dst, $target_path) = @args; } - - my ($pipe, $old_fh); - - # Helper to open pager only once when needed - my $out = sub { - my $msg = shift; - if (!defined $pipe && -t STDOUT) { - my $pager = $ENV{PAGER} || 'less -R'; - open($pipe, "| $pager") or die $!; - $old_fh = select($pipe); - } - print $msg; - }; - - if (defined $target_path) { - my $v1 = get_file_version($src, $target_path); - my $v2 = get_file_version($dst, $target_path); - if (defined $v1 && defined $v2) { - my $f1 = ref($v1) ? "<(echo -n " . escapeshellarg($$v1) . ")" : escapeshellarg($v1); - my $f2 = ref($v2) ? "<(echo -n " . escapeshellarg($$v2) . ")" : escapeshellarg($v2); - if (system("bash", "-c", "diff -q $f1 $f2 > /dev/null 2>&1") != 0) { - $out->("\033[1mdiff --vcx a/$target_path b/$target_path\033[0m\n"); - # Stream the diff output line by line to the pipe - open my $dfh, '-|', "bash -c \"diff -u $f1 $f2 | tail -n +3\""; - while (<$dfh>) { $out->($_) } - close $dfh; - } - } - } else { - # Full Tree Walk - my $s_id = (lc($src // '') eq 'head') ? read_head() : ($src // ''); - my $th; if (open my $rf, '<', File::Spec->catfile(REV_DIR, $s_id)) { - while (<$rf>) { $th = $1 if /^tree:(.*)$/ } close $rf; - } - my $it_old = $th ? stream_tree_file($th) : sub { undef }; - my $it_new = defined $dst ? do { - my $d_id = (lc($dst // '') eq 'head') ? read_head() : $dst; - my $dth; if (open my $df, '<', File::Spec->catfile(REV_DIR, $d_id)) { - while (<$df>) { $dth = $1 if /^tree:(.*)$/ } close $df; - } - $dth ? stream_tree_file($dth) : sub { undef }; - } : stream_tree("."); - - my ($old, $new) = ($it_old->(), $it_new->()); - while ($old || $new) { - my $p_old = $old->{path} // ''; - my $p_new = $new->{path} // ''; - my $cmp = !defined $old ? 1 : !defined $new ? -1 : $p_old cmp $p_new; - if ($cmp == 0) { - if (($old->{hash} // '') ne ($new->{hash} // '')) { - # Recursively call or just handle diff here - my $v1 = get_file_version($src, $p_old); - my $v2 = get_file_version($dst, $p_old); - if (defined $v1 && defined $v2) { - my $f1 = ref($v1) ? "<(echo -n " . escapeshellarg($$v1) . ")" : escapeshellarg($v1); - my $f2 = ref($v2) ? "<(echo -n " . escapeshellarg($$v2) . ")" : escapeshellarg($v2); - if (system("bash", "-c", "diff -q $f1 $f2 > /dev/null 2>&1") != 0) { - $out->("\033[1mdiff --vcx a/$p_old b/$p_old\033[0m\n"); - open my $dfh, '-|', "bash -c \"diff -u $f1 $f2 | tail -n +3\""; - while (<$dfh>) { $out->($_) } - close $dfh; - } - } - } - ($old, $new) = ($it_old->(), $it_new->()); - } elsif ($cmp < 0) { - $out->("\033[31m--- $p_old (deleted)\033[0m\n"); - $old = $it_old->(); - } else { - $out->("\033[32m+++ $p_new (new file)\033[0m\n"); - $new = $it_new->(); - } - } - } - - if (defined $pipe) { - close $pipe; - select($old_fh); - } -} - -sub make_bin_patch { - my ($new_file, $old_file) = @_; - - my $new_size = -s $new_file; - open my $f_new, '<:raw', $new_file or die $!; - open my $f_old, '<:raw', $old_file or die $!; - - # Start patch string with new total size (8 bytes) - my $patch = pack("Q", $new_size); - - my $offset = 0; - my $blk_size = 4096; - - while (1) { - my $read_new = sysread($f_new, my $buf_new, $blk_size); - my $read_old = sysread($f_old, my $buf_old, $blk_size); - last if !$read_new && !$read_old; - - # If blocks differ, record the change - if (($buf_new // '') ne ($buf_old // '')) { - # Format: Offset (Q), Length (L), then the raw data - $patch .= pack("QL", $offset, length($buf_new)) . $buf_new; - } - $offset += $blk_size; - } - close $f_new; close $f_old; - - # If only the 8-byte header exists, files were identical - return length($patch) > 8 ? $patch : undef; -} - -sub apply_bin_patch_file { - my ($target_path, $patch_ref) = @_; - my $patch = $$patch_ref; - return unless length($patch) > 8; - - my $new_size = unpack("Q", substr($patch, 0, 8)); - - # Open target file for update and resize - open my $fh, '+<:raw', $target_path or die $!; - truncate($fh, $new_size); - - my $pos = 8; - while ($pos < length($patch)) { - my ($offset, $len) = unpack("QL", substr($patch, $pos, 12)); - $pos += 12; - my $data = substr($patch, $pos, $len); - $pos += $len; - - seek($fh, $offset, 0); - print $fh $data; - } - close $fh; -} - -# Convert decimal to a padded 7-char hex string -sub to_hex_id { sprintf("%07x", $_[0]) } - -# Convert hex back to decimal -sub from_hex_id { hex($_[0]) } - -sub write_file { - my ($path, $content) = @_; - my $dir = dirname($path); - make_path($dir) unless -d $dir; - open my $fh, '>', $path or die "Could not open '$path' for writing: $!"; - print $fh $content; - close $fh or die "Could not close '$path' after writing: $!"; -} - -sub touch_file { - my ($path) = @_; - my $dir = dirname($path); - make_path($dir) unless -d $dir; - open my $fh, '>', $path or die "Could not touch $path: $!"; - close $fh; -} - -sub hash_file_content { - my ($filename) = @_; - my $sha = Digest::SHA->new(1); - - if (-l $filename) { - my $target = readlink($filename); - die "Could not readlink '$filename': $!" unless defined $target; - $sha->add($target); - } else { - open(my $fh, '<:raw', $filename) or die "Could not open '$filename': $!"; - $sha->addfile($fh); - close($fh); - } - - return $sha->hexdigest; -} - -sub stream_tree { - my (@paths) = @_; - @paths = (".") unless @paths; - - my $chunk_size = 1024 * 64; - my @buf; - my $buf_size = 0; - my $tot_size = 0; - my $use_disk = 0; - my ($tmp_fh, $tmp_path); - - my $flush = sub { - if (!$use_disk) { - ($tmp_fh, $tmp_path) = tempfile(UNLINK => 1); - $tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); - binmode $tmp_fh, ":raw"; - $use_disk = 1; - } - print $tmp_fh @buf; - @buf = (); - $buf_size = 0; - }; - - my @stack = @paths; - while (@stack) { - my $path = (pop @stack) =~ s|^\./||r; - next if $path eq REPO; - - my @st = lstat($path); - if (-d _) { - if (opendir(my $dh, $path)) { - push @stack, map { File::Spec->catfile($path, $_) } - grep { $_ ne '.' && $_ ne '..' && $_ ne REPO } - readdir($dh); - closedir($dh); - } - } elsif (-f _ || -l _) { - my $line = "$path\t$st[9]\t$st[7]\n"; - push @buf, $line; - $buf_size += length($line); - $tot_size += length($line); - - if ((!$use_disk && $tot_size > MEM_LIMIT) || - ($use_disk && $buf_size > $chunk_size)) { - $flush->(); - } - } - } - - if (!$use_disk) { - @buf = sort @buf; - return sub { - my $line = shift @buf; - return unless $line; - chomp $line; - my ($p, $m, $s) = split(/\t/, $line); - return { path => $p, mtime => $m, size => $s }; - }; - } else { - $flush->() if @buf; - close $tmp_fh; - open(my $sort_fh, "-|", "sort", "-t", "\t", "-k1,1", $tmp_path) or die $!; - return sub { - my $line = <$sort_fh>; - unless ($line) { close $sort_fh; return; } - chomp $line; - my ($p, $s, $m) = split(/\t/, $line); - return { path => $p, mtime => $m, size => $s }; - }; - } -} - -sub stream_index { - my $idx_path = INDEX; - return sub { return undef; } unless -e $idx_path; - - open(my $fh, "<:raw", $idx_path) or die $!; - return sub { - my $line = <$fh>; - if ($line) { - chomp $line; - my ($s_h, $c_h, $b_h, $m, $z, $p) = split(/\t/, $line, 6); - return { - s_hash => $s_h, - c_hash => $c_h, - b_hash => $b_h, - mtime => $m, - size => $z, - path => $p, - }; - } - close $fh; - return undef; - }; -} - -# Logic for sharded object directory -sub get_obj_path { - my ($hash) = @_; - my $dir = File::Spec->catfile(OBJ_DIR, substr($hash, 0, 2)); - make_path($dir) unless -d $dir; - return File::Spec->catfile($dir, substr($hash, 2)); -} - -sub stream_tree_file { - my ($hash) = @_; - return sub { return } unless $hash; - my $path = get_obj_path($hash); - open my $fh, '<:raw', $path or return sub { return }; - return sub { - my $line = <$fh>; - return unless $line; - chomp $line; - my ($h, $p) = split(/\t/, $line, 2); - return { hash => $h, path => $p }; - } -} - -sub read_head { - return unless -e HEAD; - open my $fh, '<', HEAD or return; - my $val = <$fh>; - chomp $val if $val; - return $val; -} - -sub read_file { - my ($path) = @_; - return "" unless -e $path; - open my $fh, '<:raw', $path or return ""; - my $content = do { local $/; <$fh> }; - close $fh; - return $content; -} - -sub get_commit_message { - my $editor = $ENV{VISUAL} || $ENV{EDITOR} || 'vi'; - my $tmp_file = File::Spec->catfile(TMP_DIR, "COMMIT_EDITMSG"); - my $template = <<"EOF"; - -# Please enter the commit message for your changes. Lines starting -# with '#' will be ignored, and an empty message aborts the commit. -# -# On revision: @{[read_head() // '0']} -EOF - - write_file($tmp_file, $template); - - system($editor, $tmp_file); - my $raw_content = read_file($tmp_file); - my @lines = split(/\n/, $raw_content); - - # Filter out lines starting with # - my $final_msg = join("\n", grep { $_ !~ /^\s*#/ } @lines); - $final_msg =~ s/^\s+|\s+$//g; # Trim whitespace - - unlink($tmp_file); - return ($final_msg ne "") ? $final_msg : undef; -} - -sub escapeshellarg { - my $str = shift; - $str =~ s/'/'\\''/g; - return "'$str'"; -} - -# Check if a string is a 7-character hex revision ID -sub is_revision { - my ($str) = @_; - return defined $str && $str =~ /^[0-9a-f]{7}$/i; -} - -sub get_file_version { - my ($source, $path) = @_; - - # Handle Workspace - if (!defined $source) { - return undef unless -e $path; - if ((-s $path // 0) > MEM_LIMIT) { - return $path; - } - open my $fh, '<', $path or return undef; - binmode($fh, IO_LAYER); - my $data = do { local $/; <$fh> }; - close $fh; - return \$data; - } - - # Resolve Revision - my $rev_id = (lc($source) eq 'head') ? read_head() : $source; - my $rev_file = File::Spec->catfile(REV_DIR, $rev_id); - return undef unless -f $rev_file; - - my ($tree_hash, $patch_bundle_hash) = ("", ""); - open my $rfh, '<', $rev_file or return undef; - while (<$rfh>) { - $tree_hash = $1 if /^tree:(.*)$/; - $patch_bundle_hash = $1 if /^patch:(.*)$/; - } - close $rfh; - - # Locate node in tree - my $it = stream_tree_file($tree_hash); - my $node; - while (my $n = $it->()) { - if (($n->{path} // '') eq $path) { - $node = $n; - last; - } - } - return undef unless $node; - - my $obj_path = get_obj_path($node->{hash} // ''); - return undef unless -f $obj_path; - - # Extract Base Object to Temp File - my ($tfh, $tpath) = tempfile(DIR => TMP_DIR, UNLINK => 1); - binmode $tfh, IO_LAYER; - - open my $ofh, '<', $obj_path or return undef; - binmode $ofh, IO_LAYER; - while (read($ofh, my $buf, CHUNK_LEN)) { print $tfh $buf; } - close $ofh; - close $tfh; - - # Apply Patch via Streaming Bundle - if ($patch_bundle_hash) { - my $bundle_path = get_obj_path($patch_bundle_hash); - if (-f $bundle_path && -s $bundle_path > 0) { - # Use iterator to find the patch without loading the whole tarball into RAM - my $next = Archive::Tar->iter($bundle_path, 1); - while (my $f = $next->()) { - if ($f->name eq "$path.patch") { - my $p_content = $f->get_content(); - my ($pfh, $ppath) = tempfile(DIR => TMP_DIR, UNLINK => 1); - binmode $pfh, ":raw"; - print $pfh $p_content; - close $pfh; - - # Determine if text patch or binary block replacement - if ($p_content =~ /^\d+(?:,\d+)?[adc]\d+/) { - system("patch -s -f $tpath < $ppath >/dev/null 2>&1"); - } else { - apply_bin_patch_file($tpath, \$p_content); - } - last; - } - } - } - } - - # Final Output decision based on result size - if ((-s $tpath // 0) > MEM_LIMIT) { - return $tpath; - } else { - my $content = read_file($tpath); - return \$content; - } -} - diff --git a/vcx.t b/vcx.t deleted file mode 100644 index 3a87963..0000000 --- a/vcx.t +++ /dev/null @@ -1,175 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; -use File::Spec; -use File::Path qw(make_path remove_tree); -use Digest::SHA qw(sha1_hex); - -# Setup testing environment -my $sandbox = File::Spec->rel2abs("sandbox_env"); -my $script_bin = File::Spec->catfile(File::Spec->rel2abs("."), "vcx"); -my $repo_meta = ".vcx"; - -remove_tree($sandbox); -make_path($sandbox); -chdir($sandbox) or die "Cannot chdir to $sandbox: $!"; - -sub run_cmd { - my $cmd = shift; - my $out = `perl $script_bin $cmd 2>&1`; - if ($? != 0) { - die "\nCommand '$cmd' failed with exit code $?:\n$out\n"; - } - return $out; -} - -sub get_head_id { - my $head_path = File::Spec->catfile($repo_meta, "head"); - return undef unless -f $head_path; - open(my $fh, '<', $head_path) or return undef; - my $id = <$fh>; - close($fh); - chomp($id //= ""); - $id =~ s/^\s+|\s+$//g; - return $id; -} - -print "Starting tests...\n"; -run_cmd("init"); - -# --- Subtest 1: All-In Integration --- -print "Subtest 1: Initial commit with symlink...\t"; -{ - open(my $fh, '>', "file1.txt") or die $!; - print $fh "Original content\n" x 10; - close($fh); - - symlink("file1.txt", "link1.txt") or die "Symlink failed: $!"; - - run_cmd("commit -am 'initial'"); - - my $idx_path = File::Spec->catfile($repo_meta, "index"); - open(my $idx, '<', $idx_path) or die "Could not open $idx_path: $!"; - my %index_hashes; - while (<$idx>) { - chomp; - # Updated parser: split on tabs and trim padding from hashes - my ($shash, $chash, $bhash, $m, $s, $path) = split(/\t/, $_, 6); - $shash =~ s/\s+$//; # Remove fixed-width padding - $index_hashes{$path} = $shash; - } - close($idx); - - die "Missing file1" unless $index_hashes{"file1.txt"}; - # Symlink hash is the SHA of the target path string - die "Symlink hash mismatch" unless $index_hashes{"link1.txt"} eq sha1_hex("file1.txt"); -} -print "OK\n"; - -# --- Subtest 2: Large Source, Small Patch (Text Patching) --- -print "Subtest 2: Large source, small patch...\t"; -{ - my $large_p = "large.txt"; - open(my $lf, '>', $large_p) or die $!; - print $lf "Line $_\n" for 1..1000; - close($lf); - - run_cmd("commit -am 'large file base'"); - - open($lf, '>>', $large_p) or die $!; - print $lf "One small change\n"; - close($lf); - run_cmd("commit -am 'patch commit'"); - - my $head_id = get_head_id(); - my $rev_path = File::Spec->catfile($repo_meta, "rev", $head_id); - open(my $rf, '<', $rev_path) or die "Could not open $rev_path: $!"; - my $has_patch = 0; - while (<$rf>) { $has_patch = 1 if /^patch:[a-f0-9]/; } - close($rf); - die "Expected patch in rev $head_id" unless $has_patch; -} -print "OK\n"; - -# --- Subtest 3: Small Source, Large Patch (Full Store) --- -print "Subtest 3: Small source, large patch...\t"; -{ - my $small_p = "small.txt"; - open(my $sf, '>', $small_p) or die $!; - print $sf "A"; - close($sf); - run_cmd("commit -am 'small file base'"); - - open($sf, '>', $small_p) or die $!; - print $sf "Completely different content" x 200; - close($sf); - run_cmd("commit -am 'full store commit'"); - - my $restored = run_cmd("show HEAD $small_p"); - my $actual = `cat $small_p`; - die "Reconstruction mismatch" unless $restored eq $actual; -} -print "OK\n"; - -# --- Subtest 4: Binary Integrity --- -print "Subtest 4: Binary patch integrity...\t\t"; -{ - my $bin_p = "data.bin"; - my $orig_data = pack("C*", map { int(rand(256)) } 1..2048); - open(my $bf, '>:raw', $bin_p) or die $!; - print $bf $orig_data; - close($bf); - run_cmd("commit -am 'initial bin'"); - - my $old_rev = get_head_id(); - - open($bf, '+<:raw', $bin_p) or die $!; - seek($bf, 1024, 0); - print $bf pack("C", 255); - close($bf); - run_cmd("commit -am 'mod bin'"); - - my $v_old = run_cmd("show $old_rev $bin_p"); - my $v_new = run_cmd("show HEAD $bin_p"); - - die "Old version corrupted" unless $v_old eq $orig_data; - die "Binary restore error" unless length($v_new) == 2048; -} -print "OK\n"; - -# --- Subtest 5: CRUD (Deletion) --- -print "Subtest 5: File deletion...\t\t\t"; -{ - unlink("file1.txt"); - run_cmd("commit -am 'delete file1'"); - - my $head_id = get_head_id(); - my $rev_path = File::Spec->catfile($repo_meta, "rev", $head_id); - my $rev_data = `cat $rev_path`; - my ($tree_h) = $rev_data =~ /^tree:([a-f0-9]+)/m; - - my $t_obj = File::Spec->catfile($repo_meta, "obj", substr($tree_h, 0, 2), substr($tree_h, 2)); - my $tree_content = `cat $t_obj`; - die "File1 still in tree after deletion" if $tree_content =~ /file1\.txt/; -} -print "OK\n"; - -# --- Subtest 6: Dirty Status --- -print "Subtest 6: Dirty state detection...\t\t"; -{ - open(my $fh, '>', "dirty.txt") or die $!; - print $fh "Clean state\n"; - close($fh); - run_cmd("add dirty.txt"); - - open($fh, '>>', "dirty.txt") or die $!; - print $fh "Dirty edit\n"; - close($fh); - - my $status = run_cmd("status"); - die "Failed to detect dirty state" unless $status =~ /\[M\].*dirty\.txt.*\(dirty\)/; -} -print "OK\n"; - -print "\nAll tests passed successfully.\n"; - -- cgit v1.2.3