#!/usr/bin/perl use strict; use warnings; use IO::Handle; use File::Path qw(make_path); use File::Copy qw(copy); use File::Find; use File::Compare; use File::Basename; use File::Glob qw(:bsd_glob); use File::Spec; use File::Temp qw(tempfile); use Getopt::Long; use Archive::Tar; use Compress::Zlib; use POSIX qw(strftime); use Digest::SHA qw(sha1_hex); use constant REPO => '.vcx'; use constant HEAD => REPO . '/head'; # Current revision ID use constant INDEX => REPO . '/index'; # Index use constant OBJ_DIR => REPO . '/obj'; # Object store use constant REV_DIR => REPO . '/rev'; # Revisions # Staging area use constant TMP_DIR => REPO . '/stg'; use constant TMP_DIFF => TMP_DIR . '/delta.tar.gz'; use constant MEM_LIMIT => 64 * 1024 * 1024; use constant MAX_INDEX_SIZE => 16 * 1024 * 1024; Getopt::Long::Configure("bundling"); my $cmd = shift @ARGV // ''; my @args = @ARGV; if ($cmd eq 'init') { run_init(); } elsif ($cmd eq 'status') { run_status(); } elsif ($cmd eq 'add') { die "Usage: $0 add [path1] [path2] ...\n" unless @args; run_add(@args); } elsif ($cmd eq 'commit') { my ($m, $a); GetOptions('m=s' => \$m, 'a' => \$a); if ($a) { run_add("."); } run_commit($m); } elsif ($cmd eq 'log') { run_log(); } else { print "Usage: $0 [init|status|add|commit|log]\n"; exit 1; } sub run_init { make_path(OBJ_DIR, REV_DIR, TMP_DIR); touch_file(INDEX); my $rev_id = to_hex_id(0); write_file(HEAD, "$rev_id\n"); print "Initialized repository.\n"; } sub run_status { } sub run_add { my @paths = @_; @paths = (".") unless @paths; my $idx_path = INDEX; my $tmp_idx = "$idx_path.tmp"; my $it_idx = stream_index(); my $it_wrk = stream_tree(@paths); open(my $out, ">:raw", $tmp_idx) or die "Could not create $tmp_idx: $!"; my $idx_entry = $it_idx->(); my $wrk_entry = $it_wrk->(); while ($idx_entry || $wrk_entry) { my $cmp = !defined $idx_entry ? 1 : !defined $wrk_entry ? -1 : $idx_entry->{path} cmp $wrk_entry->{path}; my $path = ($cmp <= 0) ? $idx_entry->{path} : $wrk_entry->{path}; if ($cmp == 0) { if ($idx_entry->{mtime} == $wrk_entry->{mtime} && $idx_entry->{size} == $wrk_entry->{size}) { # No change print $out join("\t", $idx_entry->{s_hash}, $idx_entry->{b_hash}, $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}) . "\n"; } else { my $current_hash = hash_file_content($wrk_entry->{path}); # Snapshot to staging area my $stg_path = File::Spec->catfile(TMP_DIR, $wrk_entry->{path}); make_path(dirname($stg_path)); copy($wrk_entry->{path}, $stg_path); if ($current_hash eq $idx_entry->{s_hash}) { print $out join("\t", $idx_entry->{s_hash}, $idx_entry->{b_hash}, $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; } else { print $out join("\t", $current_hash, $idx_entry->{b_hash}, $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; } } $idx_entry = $it_idx->(); $wrk_entry = $it_wrk->(); } elsif ($cmp > 0) { # New File: hash and snapshot to staging my $hash = hash_file_content($wrk_entry->{path}); my $stg_path = File::Spec->catfile(TMP_DIR, $wrk_entry->{path}); make_path(dirname($stg_path)); copy($wrk_entry->{path}, $stg_path); print $out join("\t", $hash, $hash, $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; $wrk_entry = $it_wrk->(); } else { # File in index but missing from disk print $out join("\t", $idx_entry->{s_hash}, $idx_entry->{b_hash}, $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}) . "\n"; $idx_entry = $it_idx->(); } } close $out; rename($tmp_idx, $idx_path) or die "Failed to update index: $!"; } sub run_commit { my ($msg) = @_; if (!defined $msg || $msg eq "") { $msg = get_commit_message(); } my $parent_id = read_head() // to_hex_id(0); my $parent_tree_hash = ""; if ($parent_id ne to_hex_id(0)) { my $p_rev_file = File::Spec->catfile(REV_DIR, $parent_id); if (open my $pfh, '<', $p_rev_file) { while (<$pfh>) { if (/^tree:(.*)$/) { $parent_tree_hash = $1; last; } } close $pfh; } } my $it_idx = stream_index(); my $it_old = stream_tree_file($parent_tree_hash); my $sha_new_tree = Digest::SHA->new(1); my @new_tree_lines; my $new_tree_size = 0; my $use_disk_nt = 0; my ($nt_fh, $nt_path); my %patches; my $patch_mem_size = 0; my $use_disk_patch = 0; my ($pt_fh, $pt_path); my ($tmp_idx_fh, $tmp_idx_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); binmode $tmp_idx_fh, ":raw"; my $idx = $it_idx->(); my $old = $it_old->(); while ($idx || $old) { my $cmp = !defined $idx ? 1 : !defined $old ? -1 : $idx->{path} cmp $old->{path}; my ($out_s, $out_b, $out_m, $out_z, $out_p); if ($cmp < 0) { # New file $out_p = $idx->{path}; $out_s = $idx->{s_hash}; $out_b = $idx->{s_hash}; $out_m = $idx->{mtime}; $out_z = $idx->{size}; my $obj_path = get_obj_path($out_b); my $stg_file = File::Spec->catfile(TMP_DIR, $out_p); if (!-e $obj_path) { rename($stg_file, $obj_path) or copy($stg_file, $obj_path); } $idx = $it_idx->(); } elsif ($cmp == 0) { # Modified/unchanged $out_p = $idx->{path}; $out_s = $idx->{s_hash}; $out_m = $idx->{mtime}; $out_z = $idx->{size}; if ($idx->{s_hash} ne $old->{hash}) { my $base_obj = get_obj_path($old->{hash}); my $stg_file = File::Spec->catfile(TMP_DIR, $out_p); my $patch = (-T $stg_file) ? qx(diff -u '$base_obj' '$stg_file') : make_bin_patch($stg_file, $base_obj); # DEBUG CODE START if (defined $patch) { my $raw_l = length($patch); my $gz_p = Compress::Zlib::compress($patch); my $gz_l = length($gz_p); my $ratio = $raw_l > 0 ? ($gz_l / $raw_l) * 100 : 0; printf("DEBUG: %s | File: %d | Raw Patch: %d | GZ Patch: %d (%.2f%% ratio)\n", $out_p, $out_z, $raw_l, $gz_l, $ratio); } # DEBUG CODE END if (defined $patch && length($patch) < ($out_z * 0.5)) { if (!$use_disk_patch && ($patch_mem_size + length($patch)) > MEM_LIMIT) { ($pt_fh, $pt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); my $tar = Archive::Tar->new; $tar->add_data($_, $patches{$_}) for keys %patches; $tar->write($pt_fh); %patches = (); $use_disk_patch = 1; } if ($use_disk_patch) { my $tar = Archive::Tar->new($pt_path); $tar->add_data("$out_p.patch", $patch); $tar->write($pt_path); } else { $patches{"$out_p.patch"} = $patch; $patch_mem_size += length($patch); } $out_b = $old->{hash}; unlink($stg_file); # Remove staged file since we only need the patch } else { $out_b = $out_s; my $obj_path = get_obj_path($out_b); if (!-e $obj_path) { rename($stg_file, $obj_path) or copy($stg_file, $obj_path); } } } else { $out_b = $old->{hash}; } $idx = $it_idx->(); $old = $it_old->(); } else { # Deleted $old = $it_old->(); next; } # Record tree my $t_line = "$out_b\t$out_p\n"; $sha_new_tree->add($t_line); $new_tree_size += length($t_line); if (!$use_disk_nt && $new_tree_size > MEM_LIMIT) { ($nt_fh, $nt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0); print $nt_fh @new_tree_lines; @new_tree_lines = (); $use_disk_nt = 1; } $use_disk_nt ? print $nt_fh $t_line : push @new_tree_lines, $t_line; # Record index print $tmp_idx_fh join("\t", $out_s, $out_b, $out_m, $out_z, $out_p) . "\n"; } # Finalize tree my $new_tree_hash = $sha_new_tree->hexdigest; my $tree_obj_path = get_obj_path($new_tree_hash); if ($use_disk_nt) { close $nt_fh; rename($nt_path, $tree_obj_path); } else { write_file($tree_obj_path, join("", @new_tree_lines)); } # Finalize patch bundle my $patch_bundle_hash = ""; if ($use_disk_patch || %patches) { use IO::Compress::Gzip qw(gzip $GzipError); my $tar_data; if ($use_disk_patch) { open my $pt_in, '<:raw', $pt_path; $tar_data = do { local $/; <$pt_in> }; close $pt_in; unlink $pt_path; } else { my $tar = Archive::Tar->new; $tar->add_data($_, $patches{$_}) for keys %patches; $tar_data = $tar->write(); } my $gzipped_payload; gzip \$tar_data => \$gzipped_payload or die "Gzip failed: $GzipError"; $patch_bundle_hash = sha1_hex($gzipped_payload); write_file(get_obj_path($patch_bundle_hash), $gzipped_payload); } # Revision file my $next_id = to_hex_id(from_hex_id($parent_id) + 1); $msg //= "Commit $next_id"; open my $rfh, '>:raw', File::Spec->catfile(REV_DIR, $next_id) or die $!; print $rfh "tree:$new_tree_hash\nparent:$parent_id\n"; print $rfh "patch:$patch_bundle_hash\n" if $patch_bundle_hash; print $rfh "date:" . time() . "\nmsg:$msg\n"; close $rfh; close $tmp_idx_fh; rename($tmp_idx_path, INDEX); write_file(HEAD, "$next_id\n"); File::Path::remove_tree(TMP_DIR); make_path(TMP_DIR); print "Revision $next_id committed.\n"; } sub run_log { open my $fh_h, '<', HEAD or die "Not a repository.\n"; my $head = <$fh_h>; chomp $head; close $fh_h; # Setup pager my $pager = $ENV{PAGER} || 'less -R'; open(my $pipe, "| $pager") or die "Can't pipe to $pager: $!"; my $old_fh = select($pipe); print "Revision History (HEAD: $head)\n\n"; my $current_id = $head; while ($current_id) { my $rev_file = File::Spec->catfile(REV_DIR, $current_id); last unless -f $rev_file; open my $rfh, '<', $rev_file or last; my ($tree, $parent, $date, $msg) = ("", "", "", "[No message]"); while (<$rfh>) { chomp; if (/^tree:(.*)$/) { $tree = $1; } elsif (/^parent:(.*)$/) { $parent = $1; } elsif (/^date:(.*)$/) { $date = $1; } elsif (/^msg:(.*)$/) { $msg = $1; } } close $rfh; my $date_str = $date ? strftime("%a %b %e %H:%M:%S %Y", localtime($date)) : "Unknown Date"; print "commit $current_id\n"; print "Date: $date_str\n"; print "\n $msg\n\n"; # Move to parent to walk backward $current_id = $parent; } close $pipe; select($old_fh); } sub make_bin_patch { my ($new_file, $old_file) = @_; my $new_size = -s $new_file; open my $f_new, '<:raw', $new_file or die $!; open my $f_old, '<:raw', $old_file or die $!; # Start patch string with new total size (8 bytes) my $patch = pack("Q", $new_size); my $offset = 0; my $blk_size = 4096; while (1) { my $read_new = sysread($f_new, my $buf_new, $blk_size); my $read_old = sysread($f_old, my $buf_old, $blk_size); last if !$read_new && !$read_old; # If blocks differ, record the change if (($buf_new // '') ne ($buf_old // '')) { # Format: Offset (Q), Length (L), then the raw data $patch .= pack("QL", $offset, length($buf_new)) . $buf_new; } $offset += $blk_size; } close $f_new; close $f_old; # If only the 8-byte header exists, files were identical return length($patch) > 8 ? $patch : undef; } # Convert decimal to a padded 7-char hex string sub to_hex_id { sprintf("%07x", $_[0]) } # Convert hex back to decimal sub from_hex_id { hex($_[0]) } sub write_file { my ($path, $content) = @_; my $dir = dirname($path); make_path($dir) unless -d $dir; open my $fh, '>', $path or die "Could not open '$path' for writing: $!"; print $fh $content; close $fh or die "Could not close '$path' after writing: $!"; } sub touch_file { my ($path) = @_; my $dir = dirname($path); make_path($dir) unless -d $dir; open my $fh, '>', $path or die "Could not touch $path: $!"; close $fh; } sub hash_file_content { my ($filename) = @_; my $sha = Digest::SHA->new(1); if (-l $filename) { my $target = readlink($filename); die "Could not readlink '$filename': $!" unless defined $target; $sha->add($target); } else { open(my $fh, '<:raw', $filename) or die "Could not open '$filename': $!"; $sha->addfile($fh); close($fh); } return $sha->hexdigest; } sub stream_tree { my (@paths) = @_; @paths = (".") unless @paths; my $chunk_size = 1024 * 64; my @buf; my $buf_size = 0; my $tot_size = 0; my $use_disk = 0; my ($tmp_fh, $tmp_path); my $flush = sub { if (!$use_disk) { ($tmp_fh, $tmp_path) = tempfile(UNLINK => 1); $tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); binmode $tmp_fh, ":raw"; $use_disk = 1; } print $tmp_fh @buf; @buf = (); $buf_size = 0; }; my @stack = @paths; while (@stack) { my $path = (pop @stack) =~ s|^\./||r; next if $path eq REPO; my @st = lstat($path); if (-d _) { if (opendir(my $dh, $path)) { push @stack, map { File::Spec->catfile($path, $_) } grep { $_ ne '.' && $_ ne '..' && $_ ne REPO } readdir($dh); closedir($dh); } } elsif (-f _ || -l _) { my $line = "$path\t$st[9]\t$st[7]\n"; push @buf, $line; $buf_size += length($line); $tot_size += length($line); if ((!$use_disk && $tot_size > MEM_LIMIT) || ($use_disk && $buf_size > $chunk_size)) { $flush->(); } } } if (!$use_disk) { @buf = sort @buf; return sub { my $line = shift @buf; return unless $line; chomp $line; my ($p, $m, $s) = split(/\t/, $line); return { path => $p, mtime => $m, size => $s }; }; } else { $flush->() if @buf; close $tmp_fh; open(my $sort_fh, "-|", "sort", "-t", "\t", "-k1,1", $tmp_path) or die $!; return sub { my $line = <$sort_fh>; unless ($line) { close $sort_fh; return; } chomp $line; my ($p, $s, $m) = split(/\t/, $line); return { path => $p, mtime => $m, size => $s }; }; } } sub stream_index { my $idx = INDEX; return sub { return; } unless -e $idx && -s $idx > 0; open(my $fh, "<:raw", $idx) or die "Could not open index: $!"; return sub { my $line = <$fh>; unless (defined $line) { close $fh; return; } chomp $line; my ($s_hash, $b_hash, $mtime, $size, $path) = split(/\t/, $line, 5); return { s_hash => $s_hash, b_hash => $b_hash, mtime => $mtime, size => $size, path => $path, }; }; } # Logic for sharded object directory sub get_obj_path { my ($hash) = @_; my $dir = File::Spec->catfile(OBJ_DIR, substr($hash, 0, 2)); make_path($dir) unless -d $dir; return File::Spec->catfile($dir, substr($hash, 2)); } sub snapshot_tree { my $it = stream_index(); my @buf; my $use_disk = 0; my $total_size = 0; my $chunk_size = 1024 * 64; my $sha = Digest::SHA->new(1); my ($tmp_fh, $tmp_path); while (my $entry = $it->()) { my $line = "$entry->{s_hash}\t$entry->{path}\n"; $sha->add($line); $total_size += length($line); if (!$use_disk && $total_size > MEM_LIMIT) { ($tmp_fh, $tmp_path) = tempfile(); $tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); binmode $tmp_fh, ":raw"; print $tmp_fh @buf; @buf = (); $use_disk = 1; } if ($use_disk) { print $tmp_fh $line; } else { push @buf, $line; } } my $tree_hash = $sha->hexdigest; my $obj_path = get_obj_path($tree_hash); if (!-e $obj_path) { if ($use_disk) { close $tmp_fh; rename($tmp_path, $obj_path) or die "Rename failed: $!"; } else { write_file($obj_path, join("", @buf)); } } else { unlink($tmp_path) if $use_disk; } return $tree_hash; } sub stream_tree_file { my ($hash) = @_; return sub { return } unless $hash; my $path = get_obj_path($hash); open my $fh, '<:raw', $path or return sub { return }; return sub { my $line = <$fh>; return unless $line; chomp $line; my ($h, $p) = split(/\t/, $line, 2); return { hash => $h, path => $p }; } } sub read_head { return unless -e HEAD; open my $fh, '<', HEAD or return; my $val = <$fh>; chomp $val if $val; return $val; } sub read_file { my ($path) = @_; return "" unless -e $path; open my $fh, '<:raw', $path or return ""; my $content = do { local $/; <$fh> }; close $fh; return $content; } sub get_commit_message { my $editor = $ENV{VISUAL} || $ENV{EDITOR} || 'vi'; my $tmp_file = File::Spec->catfile(TMP_DIR, "COMMIT_EDITMSG"); my $template = <<"EOF"; # Please enter the commit message for your changes. Lines starting # with '#' will be ignored, and an empty message aborts the commit. # # On revision: @{[read_head() // '0']} EOF write_file($tmp_file, $template); system($editor, $tmp_file); my $raw_content = read_file($tmp_file); my @lines = split(/\n/, $raw_content); # Filter out lines starting with # my $final_msg = join("\n", grep { $_ !~ /^\s*#/ } @lines); $final_msg =~ s/^\s+|\s+$//g; # Trim whitespace unlink($tmp_file); return ($final_msg ne "") ? $final_msg : undef; }