#!/usr/bin/perl use strict; use warnings; use IO::Handle; use File::Path qw(make_path); use File::Copy qw(copy); use File::Basename; use File::Glob qw(:bsd_glob); use File::Spec; use File::Temp qw(tempfile); use Getopt::Long; use Archive::Tar; use Compress::Zlib; use POSIX qw(strftime); use Digest::SHA qw(sha1_hex); use constant REPO => '.vcx'; use constant HEAD => REPO . '/head'; # Current revision ID use constant INDEX => REPO . '/index'; # Index use constant OBJ_DIR => REPO . '/obj'; # Object store use constant REV_DIR => REPO . '/rev'; # Revisions use constant TMP_DIR => REPO . '/stg'; # Staging area use constant CHUNK_LEN => 4096; use constant MEM_LIMIT => 10 * 1024 * 1024; use constant IO_LAYER => ":raw:perlio(layer=" . CHUNK_LEN . ")"; Getopt::Long::Configure("bundling"); my $cmd = shift @ARGV // ''; my @args = @ARGV; if ($cmd eq 'init') { run_init(); } elsif ($cmd eq 'status') { run_status(); } elsif ($cmd eq 'add') { die "Usage: $0 add [path1] [path2] ...\n" unless @args; run_add(@args); } elsif ($cmd eq 'commit') { my ($m, $a); GetOptions('m=s' => \$m, 'a' => \$a); if ($a) { run_add("."); } run_commit($m); } elsif ($cmd eq 'log') { run_log(); } elsif ($cmd eq 'show') { # Usage: vcx show HEAD main.c my $rev = shift @args; my $file = shift @args; die "Usage: $0 show \n" unless defined $rev && defined $file; run_show($rev, $file); } elsif ($cmd eq 'diff') { run_diff(@args); } else { print "Usage: $0 [init|status|add|commit|log]\n"; exit 1; } sub run_init { make_path(OBJ_DIR, REV_DIR, TMP_DIR); touch_file(INDEX); my $rev_id = to_hex_id(0); write_file(HEAD, "$rev_id\n"); print "Initialized repository.\n"; } sub run_status { my $it_idx = stream_index(); my $it_wrk = stream_tree("."); my $head = read_head() // '0'; print "On Revision: $head\n\n"; my $idx = $it_idx->(); my $wrk = $it_wrk->(); my $found_changes = 0; while ($idx || $wrk) { # Determine walk direction: -1 (Index only), 1 (Disk only), 0 (Both) my $cmp = !defined $idx ? 1 : !defined $wrk ? -1 : $idx->{path} cmp $wrk->{path}; my $flag = ""; my $suffix = ""; my $path = ""; if ($cmp < 0) { # File exists in Index but is missing from the Disk $path = $idx->{path}; $flag = "[D]"; $idx = $it_idx->(); } elsif ($cmp > 0) { # File is on Disk but not yet known to the Index $path = $wrk->{path}; $flag = "[N]"; $wrk = $it_wrk->(); } else { # Path exists in both; check for modifications $path = $idx->{path}; my $stg_path = File::Spec->catfile(TMP_DIR, $path); my $is_staged = -e $stg_path; # Compare workspace metadata against the metadata stored in the Index # (The Index metadata was updated during the last 'add' or 'commit') my $workspace_changed = ($wrk->{mtime} != $idx->{mtime} || $wrk->{size} != $idx->{size}); if ($is_staged) { if ($workspace_changed) { # It's staged, but the workspace has been touched since the 'add' $flag = "[M]"; $suffix = "(dirty)"; } else { # It's staged and matches the workspace exactly # Use hash comparison to see if it's a brand new file $flag = ($idx->{s_hash} eq $idx->{b_hash}) ? "[N]" : "[M]"; $suffix = "(staged)"; } } elsif ($workspace_changed) { # Not staged, but differs from the last commit $flag = "[M]"; } $idx = $it_idx->(); $wrk = $it_wrk->(); } if ($flag ne "") { printf "%s %s %s\n", $flag, $path, $suffix; $found_changes = 1; } } print "No changes detected.\n" unless $found_changes; print "\n"; } sub run_add { my @paths = @_; @paths = (".") unless @paths; my $idx_path = INDEX; my $tmp_idx = "$idx_path.tmp"; my $it_idx = stream_index(); my $it_wrk = stream_tree(@paths); open(my $out, ">:raw", $tmp_idx) or die "Could not create $tmp_idx: $!"; my $idx_entry = $it_idx->(); my $wrk_entry = $it_wrk->(); while ($idx_entry || $wrk_entry) { my $cmp = !defined $idx_entry ? 1 : !defined $wrk_entry ? -1 : $idx_entry->{path} cmp $wrk_entry->{path}; my $path = ($cmp <= 0) ? $idx_entry->{path} : $wrk_entry->{path}; if ($cmp == 0) { if ($idx_entry->{mtime} == $wrk_entry->{mtime} && $idx_entry->{size} == $wrk_entry->{size}) { # No change print $out join("\t", $idx_entry->{s_hash}, $idx_entry->{b_hash}, $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}) . "\n"; } else { my $current_hash = hash_file_content($wrk_entry->{path}); # Snapshot to staging area my $stg_path = File::Spec->catfile(TMP_DIR, $wrk_entry->{path}); make_path(dirname($stg_path)); copy($wrk_entry->{path}, $stg_path); if ($current_hash eq $idx_entry->{s_hash}) { print $out join("\t", $idx_entry->{s_hash}, $idx_entry->{b_hash}, $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; } else { print $out join("\t", $current_hash, $idx_entry->{b_hash}, $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; } } $idx_entry = $it_idx->(); $wrk_entry = $it_wrk->(); } elsif ($cmp > 0) { # New File: hash and snapshot to staging my $hash = hash_file_content($wrk_entry->{path}); my $stg_path = File::Spec->catfile(TMP_DIR, $wrk_entry->{path}); make_path(dirname($stg_path)); copy($wrk_entry->{path}, $stg_path); print $out join("\t", $hash, $hash, $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; $wrk_entry = $it_wrk->(); } else { # File in index but missing from disk print $out join("\t", $idx_entry->{s_hash}, $idx_entry->{b_hash}, $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}) . "\n"; $idx_entry = $it_idx->(); } } close $out; rename($tmp_idx, $idx_path) or die "Failed to update index: $!"; } sub run_commit { my ($msg) = @_; if (!defined $msg || $msg eq "") { $msg = get_commit_message(); } my $parent_id = read_head() // to_hex_id(0); my $parent_tree_hash = ""; if ($parent_id ne to_hex_id(0)) { my $p_rev_file = File::Spec->catfile(REV_DIR, $parent_id); if (open my $pfh, '<', $p_rev_file) { while (<$pfh>) { if (/^tree:(.*)$/) { $parent_tree_hash = $1; last; } } close $pfh; } } my $it_idx = stream_index(); my $it_old = stream_tree_file($parent_tree_hash); my $sha_new_tree = Digest::SHA->new(1); my @new_tree_lines; my $new_tree_size = 0; my $use_disk_nt = 0; my ($nt_fh, $nt_path); my %patches; my $patch_mem_size = 0; my $use_disk_patch = 0; my ($pt_fh, $pt_path); my ($tmp_idx_fh, $tmp_idx_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); binmode $tmp_idx_fh, ":raw"; my $idx = $it_idx->(); my $old = $it_old->(); while ($idx || $old) { my $cmp = !defined $idx ? 1 : !defined $old ? -1 : $idx->{path} cmp $old->{path}; my ($out_s, $out_b, $out_m, $out_z, $out_p); if ($cmp < 0) { # New file $out_p = $idx->{path}; $out_s = $idx->{s_hash}; $out_b = $idx->{s_hash}; $out_m = $idx->{mtime}; $out_z = $idx->{size}; my $obj_path = get_obj_path($out_b); my $stg_file = File::Spec->catfile(TMP_DIR, $out_p); if (!-e $obj_path) { rename($stg_file, $obj_path) or copy($stg_file, $obj_path); } $idx = $it_idx->(); } elsif ($cmp == 0) { # Modified/unchanged $out_p = $idx->{path}; $out_s = $idx->{s_hash}; $out_m = $idx->{mtime}; $out_z = $idx->{size}; if ($idx->{s_hash} ne $old->{hash}) { my $base_obj = get_obj_path($old->{hash}); my $stg_file = File::Spec->catfile(TMP_DIR, $out_p); my $patch = (-T $stg_file) ? qx(diff '$base_obj' '$stg_file') : make_bin_patch($stg_file, $base_obj); # 1.0 Factor: Use patch if it is smaller than or equal to the file size if (defined $patch && length($patch) <= $out_z) { if (!$use_disk_patch && ($patch_mem_size + length($patch)) > MEM_LIMIT) { ($pt_fh, $pt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); my $tar = Archive::Tar->new; $tar->add_data($_, $patches{$_}) for keys %patches; $tar->write($pt_fh); %patches = (); $use_disk_patch = 1; } if ($use_disk_patch) { my $tar = Archive::Tar->new($pt_path); $tar->add_data("$out_p.patch", $patch); $tar->write($pt_path); } else { $patches{"$out_p.patch"} = $patch; $patch_mem_size += length($patch); } $out_b = $old->{hash}; unlink($stg_file); } else { $out_b = $out_s; my $obj_path = get_obj_path($out_b); if (!-e $obj_path) { rename($stg_file, $obj_path) or copy($stg_file, $obj_path); } } } else { # 0.0 Factor: Identity check (no changes) $out_b = $old->{hash}; } $idx = $it_idx->(); $old = $it_old->(); } else { # Deleted $old = $it_old->(); next; } # Record tree my $t_line = "$out_b\t$out_p\n"; $sha_new_tree->add($t_line); $new_tree_size += length($t_line); if (!$use_disk_nt && $new_tree_size > MEM_LIMIT) { ($nt_fh, $nt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); print $nt_fh @new_tree_lines; @new_tree_lines = (); $use_disk_nt = 1; } $use_disk_nt ? print $nt_fh $t_line : push @new_tree_lines, $t_line; # Record index print $tmp_idx_fh join("\t", $out_s, $out_b, $out_m, $out_z, $out_p) . "\n"; } # Finalize tree my $new_tree_hash = $sha_new_tree->hexdigest; my $tree_obj_path = get_obj_path($new_tree_hash); if ($use_disk_nt) { close $nt_fh; rename($nt_path, $tree_obj_path); } else { write_file($tree_obj_path, join("", @new_tree_lines)); } # Finalize patch bundle my $patch_bundle_hash = ""; if ($use_disk_patch || %patches) { my $bundle_tmp = File::Spec->catfile(TMP_DIR, "bundle.tar.gz"); my $tar = Archive::Tar->new; if ($use_disk_patch) { $tar->read($pt_path); unlink $pt_path; } $tar->add_data($_, $patches{$_}) for keys %patches; # Write directly to disk with Gzip to avoid loading huge tarballs into RAM $tar->write($bundle_tmp, COMPRESS_GZIP); $patch_bundle_hash = hash_file_content($bundle_tmp); rename($bundle_tmp, get_obj_path($patch_bundle_hash)); } # Revision file my $next_id = to_hex_id(from_hex_id($parent_id) + 1); $msg //= "Commit $next_id"; open my $rfh, '>:raw', File::Spec->catfile(REV_DIR, $next_id) or die $!; print $rfh "tree:$new_tree_hash\nparent:$parent_id\n"; print $rfh "patch:$patch_bundle_hash\n" if $patch_bundle_hash; print $rfh "date:" . time() . "\nmsg:$msg\n"; close $rfh; close $tmp_idx_fh; rename($tmp_idx_path, INDEX); write_file(HEAD, "$next_id\n"); File::Path::remove_tree(TMP_DIR); make_path(TMP_DIR); print "Revision $next_id committed.\n"; } sub run_log { open my $fh_h, '<', HEAD or die "Not a repository.\n"; my $head = <$fh_h>; chomp $head; close $fh_h; # Setup pager my $pager = $ENV{PAGER} || 'less -R'; open(my $pipe, "| $pager") or die "Can't pipe to $pager: $!"; my $old_fh = select($pipe); print "Revision History (HEAD: $head)\n\n"; my $current_id = $head; while ($current_id) { my $rev_file = File::Spec->catfile(REV_DIR, $current_id); last unless -f $rev_file; open my $rfh, '<', $rev_file or last; my ($tree, $parent, $date, $msg) = ("", "", "", "[No message]"); while (<$rfh>) { chomp; if (/^tree:(.*)$/) { $tree = $1; } elsif (/^parent:(.*)$/) { $parent = $1; } elsif (/^date:(.*)$/) { $date = $1; } elsif (/^msg:(.*)$/) { $msg = $1; } } close $rfh; my $date_str = $date ? strftime("%a %b %e %H:%M:%S %Y", localtime($date)) : "Unknown Date"; print "commit $current_id\n"; print "Date: $date_str\n"; print "\n $msg\n\n"; # Move to parent to walk backward $current_id = $parent; } close $pipe; select($old_fh); } sub run_show { my ($rev_id, $file_path) = @_; die "Usage: $0 show \n" unless defined $rev_id && defined $file_path; my $pager = $ENV{PAGER} || 'less -R'; open(my $pipe, "| $pager") or die "Can't pipe to $pager: $!"; my $old_fh = select($pipe); # 2. Use existing logic to get the content my $v = get_file_version($rev_id, $file_path); die "Error: Could not resolve '$file_path' at revision $rev_id.\n" unless defined $v; # 3. Output content if (ref($v) eq 'SCALAR') { binmode STDOUT, ":raw"; print $$v; } else { # It's a file path (for large files) open my $fh, '<', $v or die $!; binmode $fh, IO_LAYER; while (read($fh, my $buf, CHUNK_LEN)) { print $buf; } close $fh; } } sub run_diff { my @args = @_; my ($src, $dst, $target_path); if (@args == 0) { ($src, $dst) = ('head', undef); } elsif (@args == 1) { is_revision($args[0]) ? (($src, $dst) = ($args[0], undef)) : (($src, $dst, $target_path) = ('head', undef, $args[0])); } elsif (@args == 2) { is_revision($args[0]) && is_revision($args[1]) ? (($src, $dst) = ($args[0], $args[1])) : (($src, $dst, $target_path) = ($args[0], undef, $args[1])); } else { ($src, $dst, $target_path) = @args; } my ($pipe, $old_fh); # Helper to open pager only once when needed my $out = sub { my $msg = shift; if (!defined $pipe && -t STDOUT) { my $pager = $ENV{PAGER} || 'less -R'; open($pipe, "| $pager") or die $!; $old_fh = select($pipe); } print $msg; }; if (defined $target_path) { my $v1 = get_file_version($src, $target_path); my $v2 = get_file_version($dst, $target_path); if (defined $v1 && defined $v2) { my $f1 = ref($v1) ? "<(echo -n " . escapeshellarg($$v1) . ")" : escapeshellarg($v1); my $f2 = ref($v2) ? "<(echo -n " . escapeshellarg($$v2) . ")" : escapeshellarg($v2); if (system("bash", "-c", "diff -q $f1 $f2 > /dev/null 2>&1") != 0) { $out->("\033[1mdiff --vcx a/$target_path b/$target_path\033[0m\n"); # Stream the diff output line by line to the pipe open my $dfh, '-|', "bash -c \"diff -u $f1 $f2 | tail -n +3\""; while (<$dfh>) { $out->($_) } close $dfh; } } } else { # Full Tree Walk my $s_id = (lc($src // '') eq 'head') ? read_head() : ($src // ''); my $th; if (open my $rf, '<', File::Spec->catfile(REV_DIR, $s_id)) { while (<$rf>) { $th = $1 if /^tree:(.*)$/ } close $rf; } my $it_old = $th ? stream_tree_file($th) : sub { undef }; my $it_new = defined $dst ? do { my $d_id = (lc($dst // '') eq 'head') ? read_head() : $dst; my $dth; if (open my $df, '<', File::Spec->catfile(REV_DIR, $d_id)) { while (<$df>) { $dth = $1 if /^tree:(.*)$/ } close $df; } $dth ? stream_tree_file($dth) : sub { undef }; } : stream_tree("."); my ($old, $new) = ($it_old->(), $it_new->()); while ($old || $new) { my $p_old = $old->{path} // ''; my $p_new = $new->{path} // ''; my $cmp = !defined $old ? 1 : !defined $new ? -1 : $p_old cmp $p_new; if ($cmp == 0) { if (($old->{hash} // '') ne ($new->{hash} // '')) { # Recursively call or just handle diff here my $v1 = get_file_version($src, $p_old); my $v2 = get_file_version($dst, $p_old); if (defined $v1 && defined $v2) { my $f1 = ref($v1) ? "<(echo -n " . escapeshellarg($$v1) . ")" : escapeshellarg($v1); my $f2 = ref($v2) ? "<(echo -n " . escapeshellarg($$v2) . ")" : escapeshellarg($v2); if (system("bash", "-c", "diff -q $f1 $f2 > /dev/null 2>&1") != 0) { $out->("\033[1mdiff --vcx a/$p_old b/$p_old\033[0m\n"); open my $dfh, '-|', "bash -c \"diff -u $f1 $f2 | tail -n +3\""; while (<$dfh>) { $out->($_) } close $dfh; } } } ($old, $new) = ($it_old->(), $it_new->()); } elsif ($cmp < 0) { $out->("\033[31m--- $p_old (deleted)\033[0m\n"); $old = $it_old->(); } else { $out->("\033[32m+++ $p_new (new file)\033[0m\n"); $new = $it_new->(); } } } if (defined $pipe) { close $pipe; select($old_fh); } } sub make_bin_patch { my ($new_file, $old_file) = @_; my $new_size = -s $new_file; open my $f_new, '<:raw', $new_file or die $!; open my $f_old, '<:raw', $old_file or die $!; # Start patch string with new total size (8 bytes) my $patch = pack("Q", $new_size); my $offset = 0; my $blk_size = 4096; while (1) { my $read_new = sysread($f_new, my $buf_new, $blk_size); my $read_old = sysread($f_old, my $buf_old, $blk_size); last if !$read_new && !$read_old; # If blocks differ, record the change if (($buf_new // '') ne ($buf_old // '')) { # Format: Offset (Q), Length (L), then the raw data $patch .= pack("QL", $offset, length($buf_new)) . $buf_new; } $offset += $blk_size; } close $f_new; close $f_old; # If only the 8-byte header exists, files were identical return length($patch) > 8 ? $patch : undef; } sub apply_bin_patch_file { my ($target_path, $patch_ref) = @_; my $patch = $$patch_ref; return unless length($patch) > 8; my $new_size = unpack("Q", substr($patch, 0, 8)); # Open target file for update and resize open my $fh, '+<:raw', $target_path or die $!; truncate($fh, $new_size); my $pos = 8; while ($pos < length($patch)) { my ($offset, $len) = unpack("QL", substr($patch, $pos, 12)); $pos += 12; my $data = substr($patch, $pos, $len); $pos += $len; seek($fh, $offset, 0); print $fh $data; } close $fh; } # Convert decimal to a padded 7-char hex string sub to_hex_id { sprintf("%07x", $_[0]) } # Convert hex back to decimal sub from_hex_id { hex($_[0]) } sub write_file { my ($path, $content) = @_; my $dir = dirname($path); make_path($dir) unless -d $dir; open my $fh, '>', $path or die "Could not open '$path' for writing: $!"; print $fh $content; close $fh or die "Could not close '$path' after writing: $!"; } sub touch_file { my ($path) = @_; my $dir = dirname($path); make_path($dir) unless -d $dir; open my $fh, '>', $path or die "Could not touch $path: $!"; close $fh; } sub hash_file_content { my ($filename) = @_; my $sha = Digest::SHA->new(1); if (-l $filename) { my $target = readlink($filename); die "Could not readlink '$filename': $!" unless defined $target; $sha->add($target); } else { open(my $fh, '<:raw', $filename) or die "Could not open '$filename': $!"; $sha->addfile($fh); close($fh); } return $sha->hexdigest; } sub stream_tree { my (@paths) = @_; @paths = (".") unless @paths; my $chunk_size = 1024 * 64; my @buf; my $buf_size = 0; my $tot_size = 0; my $use_disk = 0; my ($tmp_fh, $tmp_path); my $flush = sub { if (!$use_disk) { ($tmp_fh, $tmp_path) = tempfile(UNLINK => 1); $tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); binmode $tmp_fh, ":raw"; $use_disk = 1; } print $tmp_fh @buf; @buf = (); $buf_size = 0; }; my @stack = @paths; while (@stack) { my $path = (pop @stack) =~ s|^\./||r; next if $path eq REPO; my @st = lstat($path); if (-d _) { if (opendir(my $dh, $path)) { push @stack, map { File::Spec->catfile($path, $_) } grep { $_ ne '.' && $_ ne '..' && $_ ne REPO } readdir($dh); closedir($dh); } } elsif (-f _ || -l _) { my $line = "$path\t$st[9]\t$st[7]\n"; push @buf, $line; $buf_size += length($line); $tot_size += length($line); if ((!$use_disk && $tot_size > MEM_LIMIT) || ($use_disk && $buf_size > $chunk_size)) { $flush->(); } } } if (!$use_disk) { @buf = sort @buf; return sub { my $line = shift @buf; return unless $line; chomp $line; my ($p, $m, $s) = split(/\t/, $line); return { path => $p, mtime => $m, size => $s }; }; } else { $flush->() if @buf; close $tmp_fh; open(my $sort_fh, "-|", "sort", "-t", "\t", "-k1,1", $tmp_path) or die $!; return sub { my $line = <$sort_fh>; unless ($line) { close $sort_fh; return; } chomp $line; my ($p, $s, $m) = split(/\t/, $line); return { path => $p, mtime => $m, size => $s }; }; } } sub stream_index { my $idx = INDEX; return sub { return; } unless -e $idx && -s $idx > 0; open(my $fh, "<:raw", $idx) or die "Could not open index: $!"; return sub { my $line = <$fh>; unless (defined $line) { close $fh; return; } chomp $line; my ($s_hash, $b_hash, $mtime, $size, $path) = split(/\t/, $line, 5); return { s_hash => $s_hash, b_hash => $b_hash, mtime => $mtime, size => $size, path => $path, }; }; } # Logic for sharded object directory sub get_obj_path { my ($hash) = @_; my $dir = File::Spec->catfile(OBJ_DIR, substr($hash, 0, 2)); make_path($dir) unless -d $dir; return File::Spec->catfile($dir, substr($hash, 2)); } sub stream_tree_file { my ($hash) = @_; return sub { return } unless $hash; my $path = get_obj_path($hash); open my $fh, '<:raw', $path or return sub { return }; return sub { my $line = <$fh>; return unless $line; chomp $line; my ($h, $p) = split(/\t/, $line, 2); return { hash => $h, path => $p }; } } sub read_head { return unless -e HEAD; open my $fh, '<', HEAD or return; my $val = <$fh>; chomp $val if $val; return $val; } sub read_file { my ($path) = @_; return "" unless -e $path; open my $fh, '<:raw', $path or return ""; my $content = do { local $/; <$fh> }; close $fh; return $content; } sub get_commit_message { my $editor = $ENV{VISUAL} || $ENV{EDITOR} || 'vi'; my $tmp_file = File::Spec->catfile(TMP_DIR, "COMMIT_EDITMSG"); my $template = <<"EOF"; # Please enter the commit message for your changes. Lines starting # with '#' will be ignored, and an empty message aborts the commit. # # On revision: @{[read_head() // '0']} EOF write_file($tmp_file, $template); system($editor, $tmp_file); my $raw_content = read_file($tmp_file); my @lines = split(/\n/, $raw_content); # Filter out lines starting with # my $final_msg = join("\n", grep { $_ !~ /^\s*#/ } @lines); $final_msg =~ s/^\s+|\s+$//g; # Trim whitespace unlink($tmp_file); return ($final_msg ne "") ? $final_msg : undef; } sub escapeshellarg { my $str = shift; $str =~ s/'/'\\''/g; return "'$str'"; } # Check if a string is a 7-character hex revision ID sub is_revision { my ($str) = @_; return defined $str && $str =~ /^[0-9a-f]{7}$/i; } sub get_file_version { my ($source, $path) = @_; # Handle Workspace if (!defined $source) { return undef unless -e $path; if ((-s $path // 0) > MEM_LIMIT) { return $path; } open my $fh, '<', $path or return undef; binmode($fh, IO_LAYER); my $data = do { local $/; <$fh> }; close $fh; return \$data; } # Resolve Revision my $rev_id = (lc($source) eq 'head') ? read_head() : $source; my $rev_file = File::Spec->catfile(REV_DIR, $rev_id); return undef unless -f $rev_file; my ($tree_hash, $patch_bundle_hash) = ("", ""); open my $rfh, '<', $rev_file or return undef; while (<$rfh>) { $tree_hash = $1 if /^tree:(.*)$/; $patch_bundle_hash = $1 if /^patch:(.*)$/; } close $rfh; # Locate node in tree my $it = stream_tree_file($tree_hash); my $node; while (my $n = $it->()) { if (($n->{path} // '') eq $path) { $node = $n; last; } } return undef unless $node; my $obj_path = get_obj_path($node->{hash} // ''); return undef unless -f $obj_path; # Extract Base Object to Temp File my ($tfh, $tpath) = tempfile(DIR => TMP_DIR, UNLINK => 1); binmode $tfh, IO_LAYER; open my $ofh, '<', $obj_path or return undef; binmode $ofh, IO_LAYER; while (read($ofh, my $buf, CHUNK_LEN)) { print $tfh $buf; } close $ofh; close $tfh; # Apply Patch via Streaming Bundle if ($patch_bundle_hash) { my $bundle_path = get_obj_path($patch_bundle_hash); if (-f $bundle_path && -s $bundle_path > 0) { # Use iterator to find the patch without loading the whole tarball into RAM my $next = Archive::Tar->iter($bundle_path, 1); while (my $f = $next->()) { if ($f->name eq "$path.patch") { my $p_content = $f->get_content(); my ($pfh, $ppath) = tempfile(DIR => TMP_DIR, UNLINK => 1); binmode $pfh, ":raw"; print $pfh $p_content; close $pfh; # Determine if text patch or binary block replacement if ($p_content =~ /^\d+(?:,\d+)?[adc]\d+/) { system("patch -s -f $tpath < $ppath >/dev/null 2>&1"); } else { apply_bin_patch_file($tpath, \$p_content); } last; } } } } # Final Output decision based on result size if ((-s $tpath // 0) > MEM_LIMIT) { return $tpath; } else { my $content = read_file($tpath); return \$content; } }