#!/usr/bin/perl use strict; use warnings; use IO::Handle; use File::Path qw(make_path); use File::Copy qw(copy); use File::Find; use File::Compare; use File::Basename; use File::Glob qw(:bsd_glob); use File::Spec; use File::Temp qw(tempfile); use Getopt::Long; use Archive::Tar; use Compress::Zlib; use POSIX qw(strftime); use Digest::SHA qw(sha1_hex); use constant REPO => '.vcx'; use constant HEAD => REPO . '/head'; # Current revision ID use constant INDEX => REPO . '/index'; # Index use constant OBJ_DIR => REPO . '/obj'; # Object store use constant REV_DIR => REPO . '/rev'; # Revisions # Staging area use constant TMP_DIR => REPO . '/stg'; use constant TMP_META => TMP_DIR . '/meta'; use constant TMP_DIFF => TMP_DIR . '/delta.tar.gz'; Getopt::Long::Configure("bundling"); my $cmd = shift @ARGV // ''; my @args = @ARGV; if ($cmd eq 'init') { run_init(); } elsif ($cmd eq 'status') { run_status(); } elsif ($cmd eq 'add') { die "Usage: $0 add [path1] [path2] ...\n" unless @args; run_add(@args); } elsif ($cmd eq 'commit') { my ($m, $a); GetOptions('m=s' => \$m, 'a' => \$a); if ($a) { run_add("."); } run_commit($m); } elsif ($cmd eq 'log') { run_log(); } else { print "Usage: $0 [init|status|add|commit|log]\n"; exit 1; } sub run_init { make_path(OBJ_DIR, REV_DIR); touch_file(INDEX); my $rev_id = to_hex_id(0); my $rev_dir = File::Spec->catfile(REV_DIR, $rev_id); make_path($rev_dir); write_file(HEAD, "$rev_id\n"); # Baseline tree (empty) my $tree_hash = sha1_hex(""); my $tree_file = File::Spec->catfile($rev_dir, "tree-$tree_hash"); open my $fh, '>', $tree_file or die $!; close $fh; make_path(File::Spec->catdir(OBJ_DIR, $tree_hash)); open my $mfh, '>', File::Spec->catfile($rev_dir, "message"); close $mfh; print "Initialized repository.\n"; } sub run_status { } sub run_add { } sub run_commit { } sub run_log { open my $fh_h, '<', HEAD or die "Not a repository.\n"; my $head = <$fh_h>; chomp $head; close $fh_h; # Setup pager my $pager = $ENV{PAGER} || 'less -R'; open(my $pipe, "| $pager") or die "Can't pipe to $pager: $!"; my $old_fh = select($pipe); print "Revision History (HEAD: $head)\n\n"; my $rev_num = from_hex_id($head); while ($rev_num > 0) { my $hex_id = to_hex_id($rev_num); my $rev_dir = File::Spec->catfile(REV_DIR, $hex_id); # Stop if we hit a gap in history or the beginning last unless -d $rev_dir; # Stat index 9 is the last modification time my $mtime = (stat($rev_dir))[9]; my $date_str = strftime("%a %b %e %H:%M:%S %Y", localtime($mtime)); my $msg_file = File::Spec->catfile($rev_dir, "message"); my $message = "[No message]"; if (-e $msg_file) { open my $mfh, '<', $msg_file; $message = do { local $/; <$mfh> } // "[Empty message]"; $message =~ s/^\s+|\s+$//g; # Trim whitespace close $mfh; } print "commit $hex_id\n"; print "Date: $date_str\n"; print "\n $message\n\n"; $rev_num--; } close $pipe; select($old_fh); } sub stage_file { my ($src, $obj, $tmp) = @_; make_path(dirname($tmp)); # We want the link inside tmp/ to point to "obj/HASH" # relative to project root. my $rel_target = $obj; unlink($tmp) if -e $tmp || -l $tmp; symlink($rel_target, $tmp) or die "Failed to symlink $tmp: $!"; print "[Staged File] $src\n"; } sub stage_link { my ($src, $tmp) = @_; my $target = readlink($src); make_path(dirname($tmp)); unlink($tmp) if -e $tmp || -l $tmp; # For workspace symlinks, we clone the target symlink($target, $tmp) or die "Failed to symlink $tmp: $!"; print "[Staged link] $src -> $target\n"; } sub make_patch { my ($src, $obj_path, $patches) = @_; return unless -f $obj_path; my $patch; # TODO: implement a disk-based diff for large files if (-T $src) { if (compare($src, $obj_path) != 0) { $patch = qx(diff -u '$obj_path' '$src'); } } else { $patch = make_bin_patch($src, $obj_path); } if ($patch) { my $obj_name = basename($obj_path); $patches->{$obj_name} = $patch; } } sub make_bin_patch { my ($new_file, $old_file) = @_; my $new_size = -s $new_file; open my $f_new, '<:raw', $new_file or die $!; open my $f_old, '<:raw', $old_file or die $!; # Start the patch with the new total size my $patch = pack("Q", $new_size); my $offset = 0; my $blk_size = 4096; while (1) { my $read_new = sysread($f_new, my $buf_new, $blk_size); my $read_old = sysread($f_old, my $buf_old, $blk_size); last if !$read_new && !$read_old; if ($buf_new ne $buf_old) { # Offset (Q) and Length (L) $patch .= pack("QL", $offset, length($buf_new)) . $buf_new; } $offset += $blk_size; } close $f_new; close $f_old; # Only return if we have more than just the size header return length($patch) > 8 ? $patch : undef; } sub apply_bin_patch { my ($base_file, $patch_data) = @_; open my $fh, '+<:raw', $base_file or die $!; # Read the total new size (first 8 bytes) my $new_total_size = unpack("Q", substr($patch_data, 0, 8)); my $pos = 8; # Apply block updates while ($pos < length($patch_data)) { my ($offset, $len) = unpack("QL", substr($patch_data, $pos, 12)); $pos += 12; my $payload = substr($patch_data, $pos, $len); $pos += $len; sysseek($fh, $offset, 0); syswrite($fh, $payload, $len); } # Clip file to the new size # This removes any leftover bytes from the old version truncate($fh, $new_total_size) or die "Could not truncate: $!"; close $fh; } sub save_patches { my ($patches) = @_; return unless keys %$patches; my $tar = Archive::Tar->new; while (my ($obj_name, $patch) = each %$patches) { $tar->add_data("$obj_name.patch", $patch); } $tar->write(TMP_DIFF, 1); # gzip } # Convert decimal to a padded 7-char hex string sub to_hex_id { sprintf("%07x", $_[0]) } # Convert hex back to decimal sub from_hex_id { hex($_[0]) } sub launch_editor { my $editor = $ENV{EDITOR} || $ENV{VISUAL} || 'vi'; my $temp_msg_file = File::Spec->catfile(REPO, "COMMIT_EDITMSG"); open my $fh, '>', $temp_msg_file or die "Cannot create temp message file: $!"; print $fh "\n# Enter the commit message for your changes.\n"; print $fh "# Lines starting with '#' will be ignored.\n"; close $fh; system("$editor \"$temp_msg_file\""); my $final_msg = ""; if (-e $temp_msg_file) { open my $rfh, '<', $temp_msg_file or die $!; while (my $line = <$rfh>) { next if $line =~ /^#/; # Strip out the helper comments $final_msg .= $line; } close $rfh; unlink($temp_msg_file); # Clean up } $final_msg =~ s/^\s+|\s+$//g; # Trim whitespace return $final_msg; } sub write_file { my ($path, $content) = @_; open my $fh, '>', $path or die "Could not open '$path' for writing: $!"; print $fh $content; close $fh or die "Could not close '$path' after writing: $!"; } sub init_stage { my ($latest_tree_dir, $entries_ref) = @_; find({ wanted => sub { return if -d _; my $rel = File::Spec->abs2rel($_, $latest_tree_dir); my $staged_path = File::Spec->catfile("", $rel); make_path(dirname($staged_path)); my $target = readlink($_); symlink($target, $staged_path) or die "Failed to link $rel: $!"; $entries_ref->{$rel} = 1; }, no_chdir => 1 }, $latest_tree_dir); } sub touch_file { my ($path) = @_; my $dir = dirname($path); make_path($dir) unless -d $dir; open my $fh, '>', $path or die "Could not touch $path: $!"; close $fh; } sub hash_file_content { my ($filename) = @_; open(my $fh, '<:raw', $filename) or die "Could not open '$filename': $!"; my $sha = Digest::SHA->new(1); $sha->addfile($fh); close($fh); return $sha->hexdigest; } sub stream_tree { my (@paths) = @_; my $chunk_size = 1024 * 64; # 64 KB chunks for IO buffering my @buf; my $buf_size = 0; my $tot_size = 0; my $use_disk = 0; my ($tmp_fh, $tmp_path); my $flush = sub { if (!$use_disk) { ($tmp_fh, $tmp_path) = tempfile(UNLINK => 1); $tmp_fh->setvbuf(undef, _IOFBF, chunk_size); binmode $tmp_fh, ":utf8"; $use_disk = 1; } print $tmp_fh @buf; @buf = (); $buf_size = 0; }; my @stack = @paths; while (@stack) { my $path = (pop @stack) =~ s|^\./||r; my @st = lstat($path); if (-d _) { if (opendir(my $dh, $path)) { push @stack, map { File::Spec->catfile($path, $_) } grep { $_ ne '.' && $_ ne '..' && $_ ne 'REPO' } readdir($dh); closedir($dh); } else { warn "Could not open '$path': $!\n"; } } elsif (-f _ || -l _ || !-e $path) { # Use 0 as a default for size and mtime for deleted files. my $size = $st[7] // 0; my $mtime = $st[9] // 0; my $line = "$clean_path\t$st[7]\t$st[9]\n"; my $len = length($record); push @buf, $line; $buf_size += $len; $tot_size += $len; if ((!$use_disk && $tot_size > MEM_LIMIT) || ($use_disk && $buf_size > $chunk_size)) { $flush->(); } } } if (!$use_disk) { @buf = sort @buf; return sub { my $line = shift @buffer; return unless $line; chomp $line; my ($p, $s, $m) = split(/\t/, $line); return { path => $p, size => $s, mtime => $m }; }; } else { $flush->() if @buffer; # Clear remaining close $tmp_fh; open(my $sort_fh, "-|", "sort", "-t", "\t", "-k1,1", $tmp_path) or die "Could not open sort pipe: $!"; return sub { my $line = <$sort_fh>; unless ($line) { close $sort_fh; # Reap the sort process return; } chomp $line; my ($p, $s, $m) = split(/\t/, $line); return { path => $p, size => $s, mtime => $m }; }; } } sub stream_index { my $index = INDEX; my $offset_len = 8; return sub { return; } unless -e $index && -s $index > $offset_len; open(my $fh, "<:raw", $index) or die "Could not open index: $!"; my $file_size = -s $index_path; seek($fh, $file_size - $offset_len, 0); read($fh, my $buf, $offset_len); my $offset = unpack("Q", $buf); seek($fh, $offset, 0); return sub { my $pos = tell($fh); return if $pos >= ($file_size - $offset_len); my $line = <$fh>; unless (defined $line) { close $fh; return; } chomp $line; my ($path, $size, $mtime, $hash) = split(/\t/, $line); return { path => $path, size => $size, mtime => $mtime, hash => $hash, }; }; }