#!/usr/bin/perl use strict; use warnings; use File::Path qw(make_path); use File::Copy qw(copy); use File::Find; use File::Compare; use File::Basename; use File::Glob qw(:bsd_glob); use File::Spec; use Getopt::Long; use Archive::Tar; use Compress::Zlib; use POSIX qw(strftime); use Digest::SHA qw(sha1_hex); use constant VCX_DIR => '.vcx'; use constant HEAD => VCX_DIR . '/head'; # Current commit ID use constant OBJ_DIR => VCX_DIR . '/obj'; # Latest version of a file use constant REV_DIR => VCX_DIR . '/rev'; # Commits # Staging area use constant TMP_DIR => VCX_DIR . '/index'; use constant TMP_TREE => TMP_DIR . '/tree'; use constant TMP_META => TMP_DIR . '/meta'; use constant TMP_DIFF => TMP_DIR . '/delta.tar.gz'; Getopt::Long::Configure("bundling"); my $cmd = shift @ARGV // ''; my @args = @ARGV; if ($cmd eq 'init') { run_init(); } elsif ($cmd eq 'status') { run_status(); } elsif ($cmd eq 'add') { die "Usage: $0 add [path1] [path2] ...\n" unless @args; run_add(@args); } elsif ($cmd eq 'commit') { my ($m, $a); GetOptions('m=s' => \$m, 'a' => \$a); if ($a) { run_add("."); } run_commit($m); } elsif ($cmd eq 'log') { run_log(); } else { print "Usage: $0 [init|status|add|commit|log]\n"; exit 1; } sub run_init { make_path(OBJ_DIR, REV_DIR); my $initial_hex = to_hex_id(0); my $rev0_dir = File::Spec->catfile(REV_DIR, $initial_hex); make_path($rev0_dir); write_file(HEAD, "$initial_hex\n"); # Baseline tree (empty) my $empty_tree_hash = sha1_hex(""); my $empty_tree_file = File::Spec->catfile($rev0_dir, "tree-$empty_tree_hash"); open my $fh, '>', $empty_tree_file or die $!; close $fh; make_path(File::Spec->catdir(OBJ_DIR, $empty_tree_hash)); open my $mfh, '>', File::Spec->catfile($rev0_dir, "message"); close $mfh; print "Initialized repository.\n"; } sub run_status { open my $fh, '<', HEAD or die "VCX not initialized.\n"; my $head = <$fh>; chomp $head; close $fh; print "On revision [$head]\n"; my %staged_diffs; my $staged_diff_bundle = TMP_DIFF; if (-e TMP_DIFF) { my @list = qx(tar -tf '$staged_diff_bundle'); foreach (@list) { chomp; if (/(.+)\.patch$/) { $staged_diffs{$1} = 1; } } } my ($tree_ptr) = bsd_glob(File::Spec->catfile(REV_DIR, $head, "tree-*")); my ($tree_hash) = $tree_ptr =~ /tree-([a-f0-9]{40})$/; my $latest_tree_dir = File::Spec->catdir(OBJ_DIR, $tree_hash); # Pass 1: Workspace -> History (Detects New and Modified) find({ wanted => sub { return if $File::Find::name =~ /^\.\/\Q${\VCX_DIR}\E/; return if -d $File::Find::name; my $rel = File::Spec->abs2rel($File::Find::name, '.'); $rel =~ s|^\./||; my $base_in_tree = File::Spec->catfile($latest_tree_dir, $rel); my $path_hash = sha1_hex($rel); if (-e $base_in_tree || -l $base_in_tree) { my $obj_in_store = readlink($base_in_tree); if (compare($_, $obj_in_store) != 0) { my $staged = $staged_diffs{$path_hash} ? " (staged)" : ""; print "[M] $rel$staged\n"; } } else { my $tmp_link = File::Spec->catfile(TMP_TREE, $rel); my $staged = (-e $tmp_link || -l $tmp_link) ? " (staged)" : ""; print "[N] $rel$staged\n"; } }, no_chdir => 1 }, '.'); # Pass 2: History -> Workspace (Detects Deletions) if (-d $latest_tree_dir) { find({ wanted => sub { return if -d $_; my $rel = File::Spec->abs2rel($_, $latest_tree_dir); # If it's in the commit tree but GONE from the workspace if (!-e $rel && !-l $rel) { my $tmp_link = File::Spec->catfile(TMP_TREE, $rel); my $staged = (!-e $tmp_link && !-l $tmp_link) ? " (staged)" : ""; print "[D] $rel$staged\n"; } }, no_chdir => 1 }, $latest_tree_dir); } } sub run_add { my @targets = @_; my %patches; make_path(TMP_TREE); open my $fh_h, '<', HEAD or die $!; my $head = <$fh_h>; chomp $head; close $fh_h; my ($latest_tree_ptr) = bsd_glob(File::Spec->catfile(REV_DIR, $head, "tree-*")); my ($latest_tree_hash) = $latest_tree_ptr =~ /tree-([a-f0-9]{40})$/; my $latest_tree_dir = File::Spec->catdir(OBJ_DIR, $latest_tree_hash); my $next_id_hex = to_hex_id(from_hex_id($head) + 1); my %entries; open my $afh, '>>', TMP_META or die $!; init_stage($latest_tree_dir, \%entries); foreach my $input (@targets) { my @expanded = bsd_glob($input); foreach my $t (@expanded) { find({ wanted => sub { if ($File::Find::name =~ /\/\Q${\VCX_DIR}\E$/ || $_ eq VCX_DIR) { $File::Find::prune = 1; return; } return if -d $_; my $rel = $File::Find::name =~ s|^\./||r; $entries{$rel} = 1; my $staged_path = File::Spec->catfile(TMP_TREE, $rel); my $prev_link = File::Spec->catfile($latest_tree_dir, $rel); # CASE 1: Regular File if (-f $_ && !-l $_) { return if -e $staged_path; # Already staged if (-l $prev_link) { my $obj_in_head = readlink($prev_link); return if compare($_, $obj_in_head) == 0; } my $obj_name = sha1_hex($rel); my $obj_path = File::Spec->catfile(OBJ_DIR, $obj_name); # Prepare patches in memory and save to disk in one write. make_patch($File::Find::name, $obj_name, $obj_path, \%patches); stage_file($File::Find::name, $obj_path, $staged_path); print $afh "$rel\n"; # Record in meta file for the commit command } # CASE 2: Symlink elsif (-l _) { my $target = readlink($File::Find::name); if (-l $prev_link) { return if readlink($prev_link) eq $target; } if (-l $staged_path) { return if readlink($staged_path) eq $target; } stage_link($File::Find::name, $staged_path); } }, no_chdir => 1, }, $t); } } save_patches(\%patches); # Pass 2: History -> Workspace (Detects Deletions) foreach my $path (keys %entries) { if (!-e $path && !-l $path) { delete $entries{$path}; my $staged_path = File::Spec->catfile(TMP_TREE, $path); if (-e $staged_path || -l $staged_path) { unlink($staged_path) or die "Could not unlink staged $path: $!"; my $parent = dirname($staged_path); while ($parent ne TMP_TREE && -d $parent) { last if bsd_glob("$parent/*"); # Stop if not empty rmdir($parent); $parent = dirname($parent); } } print "[D] $path (staged for deletion)\n"; } } close $afh; my @sorted_paths = sort keys %entries; my $tree_ents = join("\n", @sorted_paths); my $tree_header = "tree " . scalar(@sorted_paths) . "\n"; my $tree_data = $tree_header . $tree_ents; my $tree_hash = sha1_hex($tree_data); my $tree_file = File::Spec->catfile(TMP_DIR, "tree-$tree_hash"); open my $fh, '>', $tree_file or die $!; close $fh; } sub run_commit { my ($msg) = @_; my ($staged_tree_ptr) = bsd_glob(File::Spec->catfile(TMP_DIR, "tree-*")); my ($tree_hash) = $staged_tree_ptr =~ /tree-([a-f0-9]{40})$/; my $tree_path = File::Spec->catdir(OBJ_DIR, $tree_hash); my $tree_exists = -d $tree_path; my $content_changed = (-e TMP_META || -e TMP_DIFF); if ($tree_exists && !$content_changed) { print "Nothing to commit."; File::Path::remove_tree(TMP_DIR) if -d TMP_DIR; return; } if (!$msg || $msg eq "") { $msg = launch_editor(); die "Commit aborted: empty message.\n" unless $msg =~ /\S/; } # Prepare IDs open my $fh_h, '<', HEAD or die "Not a repository.\n"; my $old_head = <$fh_h>; chomp $old_head; close $fh_h; my $next_id_hex = to_hex_id(from_hex_id($old_head) + 1); my $rev_dir = File::Spec->catfile(REV_DIR, $next_id_hex); make_path($rev_dir); # Save file to store if (-e TMP_META) { open my $mfh, '<', TMP_META or die $!; while (my $rel = <$mfh>) { chomp $rel; my $obj_name = sha1_hex($rel); my $obj_path = File::Spec->catfile(OBJ_DIR, $obj_name); copy($rel, $obj_path) or die "Failed to update store for $rel: $!"; } close $mfh; } # Save tree (snapshots the structure) if (!$tree_exists) { make_path($tree_path); rename(TMP_TREE, $tree_path) or die "Failed to save directories: $!"; } rename($staged_tree_ptr, File::Spec->catfile($rev_dir, "tree-$tree_hash")) or die "Failed to save tree pointer to revision: $!"; # Move deltas if (-e TMP_DIFF) { my $dest_diff = File::Spec->catfile($rev_dir, "delta.tar.gz"); rename(TMP_DIFF, $dest_diff) or die "Failed to move delta to $dest_diff: $!"; } write_file(File::Spec->catfile($rev_dir, "message"), "$msg\n"); write_file(HEAD, "$next_id_hex\n"); # Update head File::Path::remove_tree(TMP_DIR) if -d TMP_DIR; my ($subject) = split(/\n/, $msg); print "Committed revision [$next_id_hex]: $subject\n"; } sub run_log { open my $fh_h, '<', HEAD or die "Not a repository.\n"; my $head = <$fh_h>; chomp $head; close $fh_h; # Setup pager my $pager = $ENV{PAGER} || 'less -R'; open(my $pipe, "| $pager") or die "Can't pipe to $pager: $!"; my $old_fh = select($pipe); print "Revision History (HEAD: $head)\n\n"; my $rev_num = from_hex_id($head); while ($rev_num > 0) { my $hex_id = to_hex_id($rev_num); my $rev_dir = File::Spec->catfile(REV_DIR, $hex_id); # Stop if we hit a gap in history or the beginning last unless -d $rev_dir; # Stat index 9 is the last modification time my $mtime = (stat($rev_dir))[9]; my $date_str = strftime("%a %b %e %H:%M:%S %Y", localtime($mtime)); my $msg_file = File::Spec->catfile($rev_dir, "message"); my $message = "[No message]"; if (-e $msg_file) { open my $mfh, '<', $msg_file; $message = do { local $/; <$mfh> } // "[Empty message]"; $message =~ s/^\s+|\s+$//g; # Trim whitespace close $mfh; } print "commit $hex_id\n"; print "Date: $date_str\n"; print "\n $message\n\n"; $rev_num--; } close $pipe; select($old_fh); } sub stage_file { my ($src, $obj, $tmp) = @_; make_path(dirname($tmp)); # We want the link inside tmp/ to point to "obj/HASH" # relative to project root. my $rel_target = $obj; unlink($tmp) if -e $tmp || -l $tmp; symlink($rel_target, $tmp) or die "Failed to symlink $tmp: $!"; print "[Staged File] $src\n"; } sub stage_link { my ($src, $tmp) = @_; my $target = readlink($src); make_path(dirname($tmp)); unlink($tmp) if -e $tmp || -l $tmp; # For workspace symlinks, we clone the target symlink($target, $tmp) or die "Failed to symlink $tmp: $!"; print "[Staged link] $src -> $target\n"; } sub make_patch { my ($src, $obj_name, $obj_path, $patches) = @_; return unless -e $obj_path; my $patch; # TODO: implement a disk-based diff for large files, esp bin files if (-T $src) { if (compare($src, $obj_path) != 0) { $patch = qx(diff -u '$obj_path' '$src'); } } else { $patch = make_bin_patch($src, $obj_path); } if ($patch) { $patches->{$obj_name} = $patch; } } sub make_bin_patch { my ($new_file, $old_file) = @_; open my $f_new, '<:raw', $new_file or die "Cannot open file: $!"; open my $f_old, '<:raw', $old_file or die "Cannot open object file: $!"; my $f_out; my $patch = ""; my $offset = 0; my $blk_size = 4096; while (1) { my $read_new = sysread($f_new, my $buf_new, $blk_size); my $read_old = sysread($f_old, my $buf_old, $blk_size); # Stop when we've processed the entire new file last if $read_new == 0 && $read_old == 0; # Handle file size differences (pad with nulls if one is shorter) $buf_new .= "\0" x ($blk_size - length($buf_new)) if length($buf_new) < $blk_size; $buf_old .= "\0" x ($blk_size - length($buf_old)) if length($buf_old) < $blk_size; # If they differ, we save the OLD buffer (reverse delta) if ($buf_new ne $buf_old) { # Header: [64-bit offset][32-bit length][data] # 'Q' is 64-bit unsigned (quad), 'L' is 32-bit unsigned (long) $patch .= pack("QL", $offset, length($buf_old)) . $buf_old; } $offset += $blk_size; } close $f_new; close $f_old; close $f_out if $f_out; return length($patch) > 0 ? $patch : undef; } sub save_patches { my ($patches) = @_; return unless keys %$patches; my $tar = Archive::Tar->new; while (my ($obj_name, $patch) = each %$patches) { $tar->add_data("$obj_name.patch", $patch); } $tar->write(TMP_DIFF, 1); # gzip } sub apply_bin_patch { my ($obj_file, $patch_file) = @_; open my $obj_fh, '+<:raw', $obj_file or die "Cannot open object: $!"; open my $ptch_fh, '<:raw', $patch_file or die "Cannot open patch: $!"; # Header is 12 bytes (Q + L) while (sysread($ptch_fh, my $header, 12)) { my ($offset, $len) = unpack("QL", $header); # sysread/syswrite to avoid read()'s internal buffers. sysread($ptch_fh, my $payload, $len); sysseek($obj_fh, $offset, 0); syswrite($obj_fh, $payload, $len); } close $obj_fh; close $ptch_fh; } # Convert decimal to a padded 7-char hex string sub to_hex_id { sprintf("%07x", $_[0]) } # Convert hex back to decimal sub from_hex_id { hex($_[0]) } sub launch_editor { my $editor = $ENV{EDITOR} || $ENV{VISUAL} || 'vi'; my $temp_msg_file = File::Spec->catfile(VCX_DIR, "COMMIT_EDITMSG"); open my $fh, '>', $temp_msg_file or die "Cannot create temp message file: $!"; print $fh "\n# Enter the commit message for your changes.\n"; print $fh "# Lines starting with '#' will be ignored.\n"; close $fh; system("$editor \"$temp_msg_file\""); my $final_msg = ""; if (-e $temp_msg_file) { open my $rfh, '<', $temp_msg_file or die $!; while (my $line = <$rfh>) { next if $line =~ /^#/; # Strip out the helper comments $final_msg .= $line; } close $rfh; unlink($temp_msg_file); # Clean up } $final_msg =~ s/^\s+|\s+$//g; # Trim whitespace return $final_msg; } sub write_file { my ($path, $content) = @_; open my $fh, '>', $path or die "Could not open '$path' for writing: $!"; print $fh $content; close $fh or die "Could not close '$path' after writing: $!"; } sub init_stage { my ($latest_tree_dir, $entries_ref) = @_; find({ wanted => sub { return if -d _; my $rel = File::Spec->abs2rel($_, $latest_tree_dir); my $staged_path = File::Spec->catfile(TMP_TREE, $rel); make_path(dirname($staged_path)); my $target = readlink($_); symlink($target, $staged_path) or die "Failed to link $rel: $!"; $entries_ref->{$rel} = 1; }, no_chdir => 1 }, $latest_tree_dir); }