diff options
| -rw-r--r-- | vcx | 216 |
1 files changed, 133 insertions, 83 deletions
@@ -28,6 +28,9 @@ use constant TMP_DIR => REPO . '/stg'; use constant TMP_META => TMP_DIR . '/meta'; use constant TMP_DIFF => TMP_DIR . '/delta.tar.gz'; +use constant MEM_LIMIT => 64 * 1024 * 1024; +use constant MAX_INDEX_SIZE => 16 * 1024 * 1024; + Getopt::Long::Configure("bundling"); my $cmd = shift @ARGV // ''; @@ -53,7 +56,7 @@ if ($cmd eq 'init') { } sub run_init { - make_path(OBJ_DIR, REV_DIR); + make_path(OBJ_DIR, REV_DIR, TMP_DIR); touch_file(INDEX); my $rev_id = to_hex_id(0); @@ -75,6 +78,80 @@ sub run_status { } sub run_add { + my @paths = @_; + @paths = (".") unless @paths; + + my $idx_path = INDEX; + my $tmp_idx = "$idx_path.tmp"; + + my $it_idx = stream_index(); + my $it_wrk = stream_tree(@paths); + + open(my $out, ">:raw", $tmp_idx) or die "Could not create $tmp_idx: $!"; + + my $idx_entry = $it_idx->(); + my $wrk_entry = $it_wrk->(); + + while ($idx_entry || $wrk_entry) { + my $cmp = !defined $idx_entry ? 1 + : !defined $wrk_entry ? -1 + : $idx_entry->{path} cmp $wrk_entry->{path}; + + my $path = ($cmp <= 0) ? $idx_entry->{path} : $wrk_entry->{path}; + my $matches_spec = matches_paths($path, \@paths); + + if ($cmp == 0) { + if ($matches_spec) { + if ($idx_entry->{mtime} == $wrk_entry->{mtime} && + $idx_entry->{size} == $wrk_entry->{size}) { + # No change + print $out join("\t", $idx_entry->{staged_hash}, $idx_entry->{base_hash}, + $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}) . "\n"; + } else { + my $current_hash = hash_file_content($wrk_entry->{path}); + if ($current_hash eq $idx_entry->{staged_hash}) { + # mtime changed, but content is identical. + # Refresh mtime/size in index, but keep staged/base hashes. + print $out join("\t", $idx_entry->{staged_hash}, $idx_entry->{base_hash}, + $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; + } else { + # Modified: update staged_hash to the new content, keep base_hash as-is. + print $out join("\t", $current_hash, $idx_entry->{base_hash}, + $wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n"; + } + } + } else { + # Outside pathspec: keep index row + print $out join("\t", $idx_entry->{staged_hash}, $idx_entry->{base_hash}, + $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}) . "\n"; + } + $idx_entry = $it_idx->(); + $wrk_entry = $it_wrk->(); + } + elsif ($cmp > 0) { + # New File on disk + if ($matches_spec) { + my $hash = hash_file_content($wrk_entry->{path}); + # For a brand new file, staged and base start as the same hash. + print $out join("\t", $hash, $hash, $wrk_entry->{mtime}, $wrk_entry->{size}, + $wrk_entry->{path}) . "\n"; + } + $wrk_entry = $it_wrk->(); + } + else { + # File in index but missing from disk (deletion) + if (!$matches_spec) { + # Not in our current 'add' scope; keep it in the index. + print $out join("\t", $idx_entry->{staged_hash}, $idx_entry->{base_hash}, + $idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}) . "\n"; + } + # If it matches spec but is gone from disk, we drop it (staging the deletion). + $idx_entry = $it_idx->(); + } + } + + close $out; + rename($tmp_idx, $idx_path) or die "Failed to update index: $!"; } sub run_commit { @@ -281,26 +358,6 @@ sub write_file { close $fh or die "Could not close '$path' after writing: $!"; } -sub init_stage { - my ($latest_tree_dir, $entries_ref) = @_; - - find({ - wanted => sub { - return if -d _; - - my $rel = File::Spec->abs2rel($_, $latest_tree_dir); - my $staged_path = File::Spec->catfile("", $rel); - make_path(dirname($staged_path)); - - my $target = readlink($_); - symlink($target, $staged_path) or die "Failed to link $rel: $!"; - - $entries_ref->{$rel} = 1; - }, - no_chdir => 1 - }, $latest_tree_dir); -} - sub touch_file { my ($path) = @_; my $dir = dirname($path); @@ -311,35 +368,39 @@ sub touch_file { sub hash_file_content { my ($filename) = @_; + my $sha = Digest::SHA->new(1); - open(my $fh, '<:raw', $filename) or die "Could not open '$filename': $!"; + if (-l $filename) { + my $target = readlink($filename); + die "Could not readlink '$filename': $!" unless defined $target; + $sha->add($target); + } else { + open(my $fh, '<:raw', $filename) or die "Could not open '$filename': $!"; + $sha->addfile($fh); + close($fh); + } - my $sha = Digest::SHA->new(1); - $sha->addfile($fh); - close($fh); return $sha->hexdigest; } sub stream_tree { my (@paths) = @_; + @paths = (".") unless @paths; - my $chunk_size = 1024 * 64; # 64 KB chunks for IO buffering - + my $chunk_size = 1024 * 64; my @buf; my $buf_size = 0; my $tot_size = 0; my $use_disk = 0; - my ($tmp_fh, $tmp_path); my $flush = sub { if (!$use_disk) { ($tmp_fh, $tmp_path) = tempfile(UNLINK => 1); - $tmp_fh->setvbuf(undef, _IOFBF, chunk_size); - binmode $tmp_fh, ":utf8"; + $tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); + binmode $tmp_fh, ":raw"; # Keep it raw for consistent offsets $use_disk = 1; } - print $tmp_fh @buf; @buf = (); $buf_size = 0; @@ -348,29 +409,25 @@ sub stream_tree { my @stack = @paths; while (@stack) { my $path = (pop @stack) =~ s|^\./||r; + next if $path eq REPO; # Skip the repo itself + my @st = lstat($path); if (-d _) { if (opendir(my $dh, $path)) { push @stack, map { File::Spec->catfile($path, $_) } - grep { $_ ne '.' && $_ ne '..' && $_ ne 'REPO' } + grep { $_ ne '.' && $_ ne '..' && $_ ne REPO } readdir($dh); closedir($dh); - } else { - warn "Could not open '$path': $!\n"; } - } elsif (-f _ || -l _ || !-e $path) { - # Use 0 as a default for size and mtime for deleted files. - my $size = $st[7] // 0; - my $mtime = $st[9] // 0; - my $line = "$clean_path\t$st[7]\t$st[9]\n"; - my $len = length($record); - + } elsif (-f _ || -l _) { + # Format: path \t mtime \t size + my $line = "$path\t$st[9]\t$st[7]\n"; push @buf, $line; - $buf_size += $len; - $tot_size += $len; + $buf_size += length($line); + $tot_size += length($line); if ((!$use_disk && $tot_size > MEM_LIMIT) || - ($use_disk && $buf_size > $chunk_size)) { + ($use_disk && $buf_size > $chunk_size)) { $flush->(); } } @@ -379,66 +436,59 @@ sub stream_tree { if (!$use_disk) { @buf = sort @buf; return sub { - my $line = shift @buffer; + my $line = shift @buf; return unless $line; chomp $line; - my ($p, $s, $m) = split(/\t/, $line); - return { path => $p, size => $s, mtime => $m }; + my ($p, $m, $s) = split(/\t/, $line); + return { path => $p, mtime => $m, size => $s }; }; } else { - $flush->() if @buffer; # Clear remaining + $flush->() if @buf; close $tmp_fh; - - open(my $sort_fh, "-|", "sort", "-t", "\t", "-k1,1", $tmp_path) - or die "Could not open sort pipe: $!"; - + open(my $sort_fh, "-|", "sort", "-t", "\t", "-k1,1", $tmp_path) or die $!; return sub { my $line = <$sort_fh>; - unless ($line) { - close $sort_fh; # Reap the sort process - return; - } - + unless ($line) { close $sort_fh; return; } chomp $line; my ($p, $s, $m) = split(/\t/, $line); - return { path => $p, size => $s, mtime => $m }; + return { path => $p, mtime => $m, size => $s }; }; } } sub stream_index { - my $index = INDEX; - my $offset_len = 8; - - return sub { return; } unless -e $index && -s $index > $offset_len; - - open(my $fh, "<:raw", $index) or die "Could not open index: $!"; + my $idx = INDEX; + return sub { return; } unless -e $idx && -s $idx > 0; + open(my $fh, "<:raw", $idx) or die "Could not open index: $!"; - my $file_size = -s $index_path; - seek($fh, $file_size - $offset_len, 0); - read($fh, my $buf, $offset_len); - my $offset = unpack("Q", $buf); - seek($fh, $offset, 0); - return sub { - my $pos = tell($fh); - return if $pos >= ($file_size - $offset_len); - my $line = <$fh>; - unless (defined $line) { - close $fh; - return; - } - + unless (defined $line) { close $fh; return; } chomp $line; - my ($path, $size, $mtime, $hash) = split(/\t/, $line); + + # Format: staged, base, mtime, size, path + my ($s_hash, $b_hash, $mtime, $size, $path) = split(/\t/, $line, 5); return { - path => $path, - size => $size, - mtime => $mtime, - hash => $hash, + s_hash => $s_hash, + b_hash => $b_hash, + mtime => $mtime, + size => $size, + path => $path, }; }; } +sub matches_paths { + my ($path, $specs_ref) = @_; + return 1 if !@$specs_ref; + + foreach my $spec (@$specs_ref) { + my $clean_spec = $spec =~ s|/+$||r; # Remove trailing slashes + return 1 if $clean_spec eq '.'; + return 1 if $path eq $clean_spec; + return 1 if $path =~ /^\Q$clean_spec\E\//; # Directory match + } + return 0; +} + |
