summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSadeep Madurange <sadeep@asciimx.com>2026-04-10 21:41:43 +0800
committerSadeep Madurange <sadeep@asciimx.com>2026-04-10 21:41:43 +0800
commit35b9a686cb8c5473ad756f49ee293d8ed23e34e8 (patch)
tree146918918ff4e32b2651434f9a55c7dc21906511
parent8a848fa2b1c67829c69001bbe5bff2cb182c3588 (diff)
downloadcvn-35b9a686cb8c5473ad756f49ee293d8ed23e34e8.tar.gz
wip: workspace scanner.
-rw-r--r--README.txt11
-rw-r--r--vcx113
2 files changed, 86 insertions, 38 deletions
diff --git a/README.txt b/README.txt
new file mode 100644
index 0000000..b5383d5
--- /dev/null
+++ b/README.txt
@@ -0,0 +1,11 @@
+FUNCTIONAL
+
+Common ops: status / add / commit / log / checkout / diff
+File integrity
+
+CONSTRAINTS
+
+SSD: TBW / append-only
+File systems: inode count, file descriptors
+System: CPU / memory
+
diff --git a/vcx b/vcx
index 8682be8..b68d28d 100644
--- a/vcx
+++ b/vcx
@@ -2,6 +2,7 @@
use strict;
use warnings;
+use IO::Handle;
use File::Path qw(make_path);
use File::Copy qw(copy);
use File::Find;
@@ -9,6 +10,7 @@ use File::Compare;
use File::Basename;
use File::Glob qw(:bsd_glob);
use File::Spec;
+use File::Temp qw(tempfile);
use Getopt::Long;
use Archive::Tar;
use Compress::Zlib;
@@ -327,54 +329,89 @@ sub hash_file_content {
return $sha->hexdigest;
}
-sub scan_tree {
- my $cb = pop @_;
+sub get_dir_scanner {
+ my (@paths) = @_;
- my @stack;
+ my $chunk_size = 1024 * 64; # 64 KB chunks for IO buffering
- my $collect = sub {
- my ($path, $files) = @_;
- my @stat = lstat($path) or (warn("lstat '$path': $!\n") and return);
- if (-d _) {
- push @stack, $path;
- } elsif (-f _ || -l _) {
- push @$files, {
- path => $path =~ s|^\./||r,
- size => $stat[7],
- mtime => $stat[9],
- };
+ my @buf;
+ my $buf_size = 0;
+ my $tot_size = 0;
+ my $use_disk = 0;
+
+ my ($tmp_fh, $tmp_path);
+
+ my $flush = sub {
+ if (!$use_disk) {
+ ($tmp_fh, $tmp_path) = tempfile(UNLINK => 1);
+ $tmp_fh->setvbuf(undef, _IOFBF, chunk_size);
+ binmode $tmp_fh, ":utf8";
+ $use_disk = 1;
}
- };
- my @input_files;
- $collect->($_, \@input_files) for @_;
- if (@input_files) {
- @input_files = sort { $a->{path} cmp $b->{path} } @input_files;
- $cb->('.', \@input_files);
+ print $tmp_fh @buf;
+ @buf = ();
+ $buf_size = 0;
}
+ my @stack = @paths;
while (@stack) {
- my $dir = pop @stack;
- my $dh;
- unless (opendir($dh, $dir)) {
- warn "Can't open $dir\n";
- next;
- }
+ my $path = (pop @stack) =~ s|^\./||r;
+ my @st = lstat($path);
- my @files;
- my $subdir_idx = @stack; # Track where this level's subdirs start
- while (my $ent = readdir($dh)) {
- next if $ent eq '.' or $ent eq '..' or $ent eq REPO;
- $collect->(File::Spec->catfile($dir, $ent), \@files);
+ if (-d _) {
+ if (opendir(my $dh, $path)) {
+ push @stack, map { File::Spec->catfile($path, $_) }
+ grep { $_ ne '.' && $_ ne '..' && $_ ne 'REPO' }
+ readdir($dh);
+ closedir($dh);
+ } else {
+ warn "Could not open '$path': $!\n";
+ }
+ } elsif (-f _ || -l _ || !-e $path) {
+ # Use 0 as a default for size and mtime for deleted files.
+ my $size = $st[7] // 0;
+ my $mtime = $st[9] // 0;
+ my $line = "$clean_path\t$st[7]\t$st[9]\n";
+ my $len = length($record);
+
+ push @buf, $line;
+ $buf_size += $len;
+ $tot_size += $len;
+
+ if ((!$use_disk && $tot_size > MEM_LIMIT) ||
+ ($use_disk && $buf_size > $chunk_size)) {
+ $flush->();
+ }
}
+ }
- closedir($dh);
- @files = sort { $a->{path} cmp $b->{path} } @files;
- $cb->($dir, \@files) if @files;
-
- if (@stack > $subdir_idx) {
- my @subdirs = splice(@stack, $subdir_idx);
- push @stack, sort { $b cmp $a } @subdirs;
+ if (!$use_disk) {
+ @buf = sort @buf;
+ return sub {
+ my $line = shift @buffer;
+ return unless $line;
+ chomp $line;
+ my ($p, $s, $m) = split(/\t/, $line);
+ return { path => $p, size => $s, mtime => $m };
+ }
+ } else {
+ $flush->() if @buffer; # Clear remaining
+ close $tmp_fh;
+
+ open(my $sort_fh, "-|", "sort", "-t", "\t", "-k1,1", $tmp_path)
+ or die "Could not open sort pipe: $!";
+
+ return sub {
+ my $line = <$sort_fh>;
+ unless ($line) {
+ close $sort_fh; # Reap the sort process
+ return;
+ }
+
+ chomp $line;
+ my ($p, $s, $m) = split(/\t/, $line);
+ return { path => $p, size => $s, mtime => $m };
}
}
}