#!/usr/bin/perl use strict; use warnings; use IO::Handle; use File::Path qw(make_path); use File::Copy qw(copy); use File::Find; use File::Compare; use File::Basename; use File::Glob qw(:bsd_glob); use File::Spec; use File::Temp qw(tempfile); use Getopt::Long; use Archive::Tar; use Compress::Zlib; use POSIX qw(strftime); use Digest::SHA qw(sha1_hex); use constant REPO => '.vcx'; use constant HEAD => REPO . '/head'; # Current revision ID use constant INDEX => REPO . '/index'; # Index use constant OBJ_DIR => REPO . '/obj'; # Object store use constant REV_DIR => REPO . '/rev'; # Revisions # Staging area use constant TMP_DIR => REPO . '/stg'; use constant TMP_META => TMP_DIR . '/meta'; use constant TMP_DIFF => TMP_DIR . '/delta.tar.gz'; use constant MEM_LIMIT => 64 * 1024 * 1024; use constant MAX_INDEX_SIZE => 16 * 1024 * 1024; Getopt::Long::Configure("bundling"); my $cmd = shift @ARGV // ''; my @args = @ARGV; if ($cmd eq 'init') { run_init(); } elsif ($cmd eq 'status') { run_status(); } elsif ($cmd eq 'add') { die "Usage: $0 add [path1] [path2] ...\n" unless @args; run_add(@args); } elsif ($cmd eq 'commit') { my ($m, $a); GetOptions('m=s' => \$m, 'a' => \$a); if ($a) { run_add("."); } run_commit($m); } elsif ($cmd eq 'log') { run_log(); } else { print "Usage: $0 [init|status|add|commit|log]\n"; exit 1; } sub run_init { make_path(OBJ_DIR, REV_DIR, TMP_DIR); touch_file(INDEX); my $rev_id = to_hex_id(0); write_file(HEAD, "$rev_id\n"); print "Initialized repository.\n"; } sub run_status { } sub run_add { my @paths = @_; @paths = (".") unless @paths; my $idx_path = INDEX; my $tmp_idx = "$idx_path.tmp"; my $it_idx = stream_index(); my $it_wrk = stream_tree(@paths); open(my $out, ">:raw", $tmp_idx) or die "Could not create $tmp_idx: $!"; my $idx_entry = $it_idx->(); my $wrk_entry = $it_wrk->(); while ($idx_entry || $wrk_entry) { my $cmp = !defined $idx_entry ? 1 : !defined $wrk_entry ? -1 : $idx_entry->{path} cmp $wrk_entry->{path}; my $path = ($cmp <= 0) ? $idx_entry->{path} : $wrk_entry->{path}; my $matches_spec = matches_paths($path, \@paths); if ($cmp == 0) { if ($matches_spec) { if ($idx_entry->{mtime} == $wrk_entry->{mtime} && $idx_entry->{size} == $wrk_entry->{size}) { # No change print $out join("\t", $idx_entry->{staged_hash}, $idx_entry->{base_hash}, $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}) . "\n"; } else { my $current_hash = hash_file_content($wrk_entry->{path}); if ($current_hash eq $idx_entry->{staged_hash}) { # mtime changed, but content is identical. # Refresh mtime/size in index, but keep staged/base hashes. print $out join("\t", $idx_entry->{staged_hash}, $idx_entry->{base_hash}, $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; } else { # Modified: update staged_hash to the new content, keep base_hash as-is. print $out join("\t", $current_hash, $idx_entry->{base_hash}, $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; } } } else { # Outside pathspec: keep index row print $out join("\t", $idx_entry->{staged_hash}, $idx_entry->{base_hash}, $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}) . "\n"; } $idx_entry = $it_idx->(); $wrk_entry = $it_wrk->(); } elsif ($cmp > 0) { # New File on disk if ($matches_spec) { my $hash = hash_file_content($wrk_entry->{path}); # For a brand new file, staged and base start as the same hash. print $out join("\t", $hash, $hash, $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; } $wrk_entry = $it_wrk->(); } else { # File in index but missing from disk (deletion) if (!$matches_spec) { # Not in our current 'add' scope; keep it in the index. print $out join("\t", $idx_entry->{staged_hash}, $idx_entry->{base_hash}, $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}) . "\n"; } # If it matches spec but is gone from disk, we drop it (staging the deletion). $idx_entry = $it_idx->(); } } close $out; rename($tmp_idx, $idx_path) or die "Failed to update index: $!"; } sub run_commit { my ($msg) = @_; # Get message and parent $msg //= launch_editor(); die "Commit aborted: empty message\n" unless $msg && $msg =~ /\S/; my $parent_id = read_head(); my $it = stream_index(); my %patches; # New index (temporary) my ($idx_fh, $idx_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); binmode $idx_fh, ":raw"; # Tree logic my $sha_tree = Digest::SHA->new(1); my @tree_lines; my $tree_size = 0; my $use_disk_tree = 0; my ($tree_fh, $tree_path); while (my $entry = $it->()) { my $final_hash_for_tree = $entry->{base_hash}; if ($entry->{staged_hash} ne $entry->{base_hash}) { my $patch = calculate_delta($entry); my $compressed = compress_data($patch); # THE PIVOT DECISION (50% Rule) if (length($compressed) < ($entry->{size} * 0.5)) { # Efficiency Win: Store Patch $patches{"$entry->{path}.patch"} = $compressed; # Tree continues to point to OLD base (maintaining delta chain) $final_hash_for_tree = $entry->{base_hash}; } else { # Pivot: Store full object as a new Keyframe my $obj_path = File::Spec->catfile(OBJ_DIR, $entry->{staged_hash}); if (!-e $obj_path) { copy($entry->{path}, $obj_path) or die "Pivot failed: $!"; } # Tree points to NEW hash (resetting delta chain) $final_hash_for_tree = $entry->{staged_hash}; } } # --- A. Update Tree State --- my $t_line = "$final_hash_for_tree\t$entry->{path}\n"; $sha_tree->add($t_line); $tree_size += length($t_line); if (!$use_disk_tree && $tree_size > MEM_LIMIT) { ($tree_fh, $tree_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); binmode $tree_fh, ":raw"; print $tree_fh @tree_lines; @tree_lines = (); $use_disk_tree = 1; } $use_disk_tree ? print $tree_fh $t_line : push @tree_lines, $t_line; # --- B. Update Index State --- # Current staged_hash becomes the next base_hash # We also refresh metadata from the actual file my @stats = stat($entry->{path}); print $idx_fh join("\t", $entry->{staged_hash}, # New Staged $entry->{staged_hash}, # New Base (Index always tracks "Current") $stats[9], # Fresh mtime $stats[7], # Fresh size $entry->{path} ) . "\n"; } # 4. Finalize Tree Object my $tree_hash = $sha_tree->hexdigest; my $final_tree_obj = File::Spec->catfile(OBJ_DIR, $tree_hash); if (!-e $final_tree_obj) { if ($use_disk_tree) { close $tree_fh; rename($tree_path, $final_tree_obj) or die $!; } else { write_file($final_tree_obj, join("", @tree_lines)); } } else { unlink($tree_path) if $use_disk_tree; } # 5. Assemble Unified Revision File my $next_id = to_hex_id(from_hex_id($parent_id) + 1); my $rev_file = File::Spec->catfile(REV_DIR, $next_id); my $tar_payload = create_tarball(\%patches); my $ts = time(); my $msg_final = "$msg\n"; my $msg_len = length($msg_final); open my $rfh, '>:raw', $rev_file or die $!; print $rfh "tree:$tree_hash\n"; print $rfh "parent:$parent_id\n"; print $rfh "date:$ts\n"; print $rfh "len:$msg_len\n\n"; print $rfh $msg_final; print $rfh $tar_payload; close $rfh; # 6. Final Atomicity: Commit Index and HEAD close $idx_fh; rename($idx_path, INDEX) or die "Index update failed: $!"; write_file(HEAD, "$next_id\n"); print "Revision $next_id committed.\n"; } sub run_log { open my $fh_h, '<', HEAD or die "Not a repository.\n"; my $head = <$fh_h>; chomp $head; close $fh_h; # Setup pager my $pager = $ENV{PAGER} || 'less -R'; open(my $pipe, "| $pager") or die "Can't pipe to $pager: $!"; my $old_fh = select($pipe); print "Revision History (HEAD: $head)\n\n"; my $rev_num = from_hex_id($head); while ($rev_num > 0) { my $hex_id = to_hex_id($rev_num); my $rev_dir = File::Spec->catfile(REV_DIR, $hex_id); # Stop if we hit a gap in history or the beginning last unless -d $rev_dir; # Stat index 9 is the last modification time my $mtime = (stat($rev_dir))[9]; my $date_str = strftime("%a %b %e %H:%M:%S %Y", localtime($mtime)); my $msg_file = File::Spec->catfile($rev_dir, "message"); my $message = "[No message]"; if (-e $msg_file) { open my $mfh, '<', $msg_file; $message = do { local $/; <$mfh> } // "[Empty message]"; $message =~ s/^\s+|\s+$//g; # Trim whitespace close $mfh; } print "commit $hex_id\n"; print "Date: $date_str\n"; print "\n $message\n\n"; $rev_num--; } close $pipe; select($old_fh); } sub make_patch { my ($src, $obj_path, $patches) = @_; return unless -f $obj_path; my $patch; # TODO: implement a disk-based diff for large files if (-T $src) { if (compare($src, $obj_path) != 0) { $patch = qx(diff -u '$obj_path' '$src'); } } else { $patch = make_bin_patch($src, $obj_path); } if ($patch) { my $obj_name = basename($obj_path); $patches->{$obj_name} = $patch; } } sub make_bin_patch { my ($new_file, $old_file) = @_; my $new_size = -s $new_file; open my $f_new, '<:raw', $new_file or die $!; open my $f_old, '<:raw', $old_file or die $!; # Start the patch with the new total size my $patch = pack("Q", $new_size); my $offset = 0; my $blk_size = 4096; while (1) { my $read_new = sysread($f_new, my $buf_new, $blk_size); my $read_old = sysread($f_old, my $buf_old, $blk_size); last if !$read_new && !$read_old; if ($buf_new ne $buf_old) { # Offset (Q) and Length (L) $patch .= pack("QL", $offset, length($buf_new)) . $buf_new; } $offset += $blk_size; } close $f_new; close $f_old; # Only return if we have more than just the size header return length($patch) > 8 ? $patch : undef; } sub apply_bin_patch { my ($base_file, $patch_data) = @_; open my $fh, '+<:raw', $base_file or die $!; # Read the total new size (first 8 bytes) my $new_total_size = unpack("Q", substr($patch_data, 0, 8)); my $pos = 8; # Apply block updates while ($pos < length($patch_data)) { my ($offset, $len) = unpack("QL", substr($patch_data, $pos, 12)); $pos += 12; my $payload = substr($patch_data, $pos, $len); $pos += $len; sysseek($fh, $offset, 0); syswrite($fh, $payload, $len); } # Clip file to the new size # This removes any leftover bytes from the old version truncate($fh, $new_total_size) or die "Could not truncate: $!"; close $fh; } # Convert decimal to a padded 7-char hex string sub to_hex_id { sprintf("%07x", $_[0]) } # Convert hex back to decimal sub from_hex_id { hex($_[0]) } sub launch_editor { my $editor = $ENV{EDITOR} || $ENV{VISUAL} || 'vi'; my $temp_msg_file = File::Spec->catfile(REPO, "COMMIT_EDITMSG"); open my $fh, '>', $temp_msg_file or die "Cannot create temp message file: $!"; print $fh "\n# Enter the commit message for your changes.\n"; print $fh "# Lines starting with '#' will be ignored.\n"; close $fh; system("$editor \"$temp_msg_file\""); my $final_msg = ""; if (-e $temp_msg_file) { open my $rfh, '<', $temp_msg_file or die $!; while (my $line = <$rfh>) { next if $line =~ /^#/; # Strip out the helper comments $final_msg .= $line; } close $rfh; unlink($temp_msg_file); # Clean up } $final_msg =~ s/^\s+|\s+$//g; # Trim whitespace return $final_msg; } sub write_file { my ($path, $content) = @_; open my $fh, '>', $path or die "Could not open '$path' for writing: $!"; print $fh $content; close $fh or die "Could not close '$path' after writing: $!"; } sub touch_file { my ($path) = @_; my $dir = dirname($path); make_path($dir) unless -d $dir; open my $fh, '>', $path or die "Could not touch $path: $!"; close $fh; } sub hash_file_content { my ($filename) = @_; my $sha = Digest::SHA->new(1); if (-l $filename) { my $target = readlink($filename); die "Could not readlink '$filename': $!" unless defined $target; $sha->add($target); } else { open(my $fh, '<:raw', $filename) or die "Could not open '$filename': $!"; $sha->addfile($fh); close($fh); } return $sha->hexdigest; } sub stream_tree { my (@paths) = @_; @paths = (".") unless @paths; my $chunk_size = 1024 * 64; my @buf; my $buf_size = 0; my $tot_size = 0; my $use_disk = 0; my ($tmp_fh, $tmp_path); my $flush = sub { if (!$use_disk) { ($tmp_fh, $tmp_path) = tempfile(UNLINK => 1); $tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); binmode $tmp_fh, ":raw"; # Keep it raw for consistent offsets $use_disk = 1; } print $tmp_fh @buf; @buf = (); $buf_size = 0; }; my @stack = @paths; while (@stack) { my $path = (pop @stack) =~ s|^\./||r; next if $path eq REPO; # Skip the repo itself my @st = lstat($path); if (-d _) { if (opendir(my $dh, $path)) { push @stack, map { File::Spec->catfile($path, $_) } grep { $_ ne '.' && $_ ne '..' && $_ ne REPO } readdir($dh); closedir($dh); } } elsif (-f _ || -l _) { # Format: path \t mtime \t size my $line = "$path\t$st[9]\t$st[7]\n"; push @buf, $line; $buf_size += length($line); $tot_size += length($line); if ((!$use_disk && $tot_size > MEM_LIMIT) || ($use_disk && $buf_size > $chunk_size)) { $flush->(); } } } if (!$use_disk) { @buf = sort @buf; return sub { my $line = shift @buf; return unless $line; chomp $line; my ($p, $m, $s) = split(/\t/, $line); return { path => $p, mtime => $m, size => $s }; }; } else { $flush->() if @buf; close $tmp_fh; open(my $sort_fh, "-|", "sort", "-t", "\t", "-k1,1", $tmp_path) or die $!; return sub { my $line = <$sort_fh>; unless ($line) { close $sort_fh; return; } chomp $line; my ($p, $s, $m) = split(/\t/, $line); return { path => $p, mtime => $m, size => $s }; }; } } sub stream_index { my $idx = INDEX; return sub { return; } unless -e $idx && -s $idx > 0; open(my $fh, "<:raw", $idx) or die "Could not open index: $!"; return sub { my $line = <$fh>; unless (defined $line) { close $fh; return; } chomp $line; # Format: staged, base, mtime, size, path my ($s_hash, $b_hash, $mtime, $size, $path) = split(/\t/, $line, 5); return { s_hash => $s_hash, b_hash => $b_hash, mtime => $mtime, size => $size, path => $path, }; }; } sub matches_paths { my ($path, $specs_ref) = @_; return 1 if !@$specs_ref; foreach my $spec (@$specs_ref) { my $clean_spec = $spec =~ s|/+$||r; # Remove trailing slashes return 1 if $clean_spec eq '.'; return 1 if $path eq $clean_spec; return 1 if $path =~ /^\Q$clean_spec\E\//; # Directory match } return 0; } sub snapshot_tree { my $it = stream_index(); my @buf; my $use_disk = 0; my $total_size = 0; my $chunk_size = 1024 * 64; my $sha = Digest::SHA->new(1); my ($tmp_fh, $tmp_path); while (my $entry = $it->()) { my $line = "$entry->{s_hash}\t$entry->{path}\n"; $sha->add($line); $total_size += length($line); if (!$use_disk && $total_size > MEM_LIMIT) { ($tmp_fh, $tmp_path) = tempfile(); $tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); binmode $tmp_fh, ":raw"; print $tmp_fh @buf; @buf = (); $use_disk = 1; } if ($use_disk) { print $tmp_fh $line; } else { push @buf, $line; } } my $tree_hash = $sha->hexdigest; my $obj_path = File::Spec->catfile(OBJ_DIR, $tree_hash); if (!-e $obj_path) { if ($use_disk) { close $tmp_fh; rename($tmp_path, $obj_path) or die "Rename failed: $!"; } else { write_file($obj_path, join("", @buffer)); } } else { unlink($tmp_path) if $use_disk; } return $tree_hash; }