#!/usr/bin/perl

use strict;
use warnings;
use IO::Handle;
use File::Path qw(make_path);
use File::Copy qw(copy);
use File::Find;
use File::Compare;
use File::Basename;
use File::Glob qw(:bsd_glob);
use File::Spec;
use File::Temp qw(tempfile);
use Getopt::Long;
use Archive::Tar;
use Compress::Zlib;
use POSIX qw(strftime);
use Digest::SHA qw(sha1_hex);

use constant REPO     => '.vcx';
use constant HEAD     => REPO . '/head';   # Current revision ID
use constant INDEX    => REPO . '/index';  # Index
use constant OBJ_DIR  => REPO . '/obj';    # Object store
use constant REV_DIR  => REPO . '/rev';    # Revisions

# Staging area
use constant TMP_DIR  => REPO     . '/stg';
use constant TMP_DIFF => TMP_DIR  . '/delta.tar.gz';

use constant MEM_LIMIT      => 64 * 1024 * 1024;
use constant MAX_INDEX_SIZE => 16 * 1024 * 1024; 

Getopt::Long::Configure("bundling");

my $cmd = shift @ARGV // '';
my @args = @ARGV;

if ($cmd eq 'init') {
	run_init();
} elsif ($cmd eq 'status') {
	run_status();
} elsif ($cmd eq 'add') {
	die "Usage: $0 add [path1] [path2] ...\n" unless @args;
	run_add(@args); 
} elsif ($cmd eq 'commit') {
	my ($m, $a);
	GetOptions('m=s' => \$m, 'a' => \$a);
	if ($a) { run_add("."); }
	run_commit($m);
} elsif ($cmd eq 'log') {
	run_log();
} else {
	print "Usage: $0 [init|status|add|commit|log]\n";
	exit 1;
}

sub run_init {
	make_path(OBJ_DIR, REV_DIR, TMP_DIR);
	touch_file(INDEX);
	my $rev_id = to_hex_id(0);
	write_file(HEAD, "$rev_id\n");
	print "Initialized repository.\n";
}

sub run_status {
}

sub run_add {
	my @paths = @_;
	@paths = (".") unless @paths;

	my $idx_path = INDEX;
	my $tmp_idx  = "$idx_path.tmp";

	my $it_idx = stream_index();	 
	my $it_wrk = stream_tree(@paths); 

	open(my $out, ">:raw", $tmp_idx) 
		or die "Could not create $tmp_idx: $!";

	my $idx_entry = $it_idx->();
	my $wrk_entry = $it_wrk->();

	while ($idx_entry || $wrk_entry) {
		my $cmp = !defined $idx_entry ? 1 
				: !defined $wrk_entry ? -1 
				: $idx_entry->{path} cmp $wrk_entry->{path};

		my $path = ($cmp <= 0) ? $idx_entry->{path} : $wrk_entry->{path};
		if ($cmp == 0) {
			if ($idx_entry->{mtime} == $wrk_entry->{mtime} && 
				$idx_entry->{size}  == $wrk_entry->{size}) {
				# No change
				print $out join("\t", $idx_entry->{s_hash}, 
					$idx_entry->{b_hash}, $idx_entry->{mtime}, 
					$idx_entry->{size}, $idx_entry->{path}) . "\n";
			} else {
				my $current_hash = hash_file_content($wrk_entry->{path});
				# Snapshot to staging area
				my $stg_path = File::Spec->catfile(TMP_DIR, $wrk_entry->{path});
				make_path(dirname($stg_path));
				copy($wrk_entry->{path}, $stg_path);

				if ($current_hash eq $idx_entry->{s_hash}) {
					print $out join("\t", $idx_entry->{s_hash}, 
						$idx_entry->{b_hash}, $wrk_entry->{mtime}, 
						$wrk_entry->{size}, $wrk_entry->{path}) . "\n";
				} else {
					print $out join("\t", $current_hash, $idx_entry->{b_hash}, 
						$wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n";
				}
			}
			$idx_entry = $it_idx->(); 
			$wrk_entry = $it_wrk->();
		}
		elsif ($cmp > 0) { 
			# New File: hash and snapshot to staging
			my $hash = hash_file_content($wrk_entry->{path});
			my $stg_path = File::Spec->catfile(TMP_DIR, $wrk_entry->{path});
			make_path(dirname($stg_path));
			copy($wrk_entry->{path}, $stg_path);

			print $out join("\t", $hash, $hash, $wrk_entry->{mtime}, 
				$wrk_entry->{size}, $wrk_entry->{path}) . "\n";
			$wrk_entry = $it_wrk->();
		}
		else { 
			# File in index but missing from disk
			print $out join("\t", $idx_entry->{s_hash}, 
				$idx_entry->{b_hash}, $idx_entry->{mtime}, 
				$idx_entry->{size}, $idx_entry->{path}) . "\n";
			$idx_entry = $it_idx->();
		}
	}

	close $out;
	rename($tmp_idx, $idx_path) or die "Failed to update index: $!";
}

sub run_commit {
	my ($msg) = @_;
	if (!defined $msg || $msg eq "") {	
		$msg = get_commit_message();
	}
	
	my $parent_id = read_head() // to_hex_id(0);
	my $parent_tree_hash = "";
	
	if ($parent_id ne to_hex_id(0)) {
		my $p_rev_file = File::Spec->catfile(REV_DIR, $parent_id);
		if (open my $pfh, '<', $p_rev_file) {
			while (<$pfh>) {
				if (/^tree:(.*)$/) { $parent_tree_hash = $1; last; }
			}
			close $pfh;
		}
	}

	my $it_idx = stream_index();
	my $it_old = stream_tree_file($parent_tree_hash);

	my $sha_new_tree = Digest::SHA->new(1);
	my @new_tree_lines;
	my $new_tree_size = 0;
	my $use_disk_nt = 0;
	my ($nt_fh, $nt_path);

	my %patches;
	my $patch_mem_size = 0;
	my $use_disk_patch = 0;
	my ($pt_fh, $pt_path);

	my ($tmp_idx_fh, $tmp_idx_path) = tempfile(DIR => TMP_DIR, UNLINK => 0);
	binmode $tmp_idx_fh, ":raw";

	my $idx = $it_idx->();
	my $old = $it_old->();

	while ($idx || $old) {
		my $cmp = !defined $idx ? 1 : !defined $old ? -1 : $idx->{path} cmp $old->{path};
		my ($out_s, $out_b, $out_m, $out_z, $out_p);

		if ($cmp < 0) { # New file
			$out_p = $idx->{path};
			$out_s = $idx->{s_hash};
			$out_b = $idx->{s_hash};
			$out_m = $idx->{mtime};
			$out_z = $idx->{size};

			my $obj_path = get_obj_path($out_b);
			my $stg_file = File::Spec->catfile(TMP_DIR, $out_p);
			if (!-e $obj_path) {
				rename($stg_file, $obj_path) or copy($stg_file, $obj_path);
			}
			
			$idx = $it_idx->();
		}
		elsif ($cmp == 0) { # Modified/unchanged
			$out_p = $idx->{path};
			$out_s = $idx->{s_hash};
			$out_m = $idx->{mtime};
			$out_z = $idx->{size};

			if ($idx->{s_hash} ne $old->{hash}) {
				my $base_obj = get_obj_path($old->{hash});
				my $stg_file = File::Spec->catfile(TMP_DIR, $out_p);

				my $patch = (-T $stg_file) 
					? qx(diff -u '$base_obj' '$stg_file')
					: make_bin_patch($stg_file, $base_obj);

				# DEBUG CODE START
				if (defined $patch) {
					my $raw_l = length($patch);
					my $gz_p  = Compress::Zlib::compress($patch);
					my $gz_l  = length($gz_p);
					my $ratio = $raw_l > 0 ? ($gz_l / $raw_l) * 100 : 0;
					printf("DEBUG: %s | File: %d | Raw Patch: %d | GZ Patch: %d (%.2f%% ratio)\n", 
						$out_p, $out_z, $raw_l, $gz_l, $ratio);
				}
				# DEBUG CODE END

				if (defined $patch && length($patch) < ($out_z * 0.5)) {
					if (!$use_disk_patch && ($patch_mem_size + length($patch)) > MEM_LIMIT) {
						($pt_fh, $pt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0);
						my $tar = Archive::Tar->new;
						$tar->add_data($_, $patches{$_}) for keys %patches;
						$tar->write($pt_fh);
						%patches = ();
						$use_disk_patch = 1;
					}

					if ($use_disk_patch) {
						my $tar = Archive::Tar->new($pt_path);
						$tar->add_data("$out_p.patch", $patch);
						$tar->write($pt_path);
					} else {
						$patches{"$out_p.patch"} = $patch;
						$patch_mem_size += length($patch);
					}
					$out_b = $old->{hash};
					unlink($stg_file); # Remove staged file since we only need the patch
				} else {
					$out_b = $out_s;
					my $obj_path = get_obj_path($out_b);
					if (!-e $obj_path) {
						rename($stg_file, $obj_path) or copy($stg_file, $obj_path);
					}
				}
			} else {
				$out_b = $old->{hash};
			}
			$idx = $it_idx->();
			$old = $it_old->();
		}
		else { # Deleted
			$old = $it_old->();
			next;
		}

		# Record tree
		my $t_line = "$out_b\t$out_p\n";
		$sha_new_tree->add($t_line);
		$new_tree_size += length($t_line);

		if (!$use_disk_nt && $new_tree_size > MEM_LIMIT) {
			($nt_fh, $nt_path) = tempfile(DIR => TMP_DIR, UNLINK => 0);
			print $nt_fh @new_tree_lines;
			@new_tree_lines = ();
			$use_disk_nt = 1;
		}
		$use_disk_nt ? print $nt_fh $t_line : push @new_tree_lines, $t_line;

		# Record index
		print $tmp_idx_fh join("\t", $out_s, $out_b, $out_m, $out_z, $out_p) . "\n";
	}

	# Finalize tree
	my $new_tree_hash = $sha_new_tree->hexdigest;
	my $tree_obj_path = get_obj_path($new_tree_hash);
	if ($use_disk_nt) {
		close $nt_fh;
		rename($nt_path, $tree_obj_path);
	} else {
		write_file($tree_obj_path, join("", @new_tree_lines));
	}

	# Finalize patch bundle
	my $patch_bundle_hash = "";
	if ($use_disk_patch || %patches) {
		use IO::Compress::Gzip qw(gzip $GzipError);
		my $tar_data;
		if ($use_disk_patch) {
			open my $pt_in, '<:raw', $pt_path;
			$tar_data = do { local $/; <$pt_in> };
			close $pt_in;
			unlink $pt_path;
		} else {
			my $tar = Archive::Tar->new;
			$tar->add_data($_, $patches{$_}) for keys %patches;
			$tar_data = $tar->write();
		}
		my $gzipped_payload;
		gzip \$tar_data => \$gzipped_payload or die "Gzip failed: $GzipError";
		$patch_bundle_hash = sha1_hex($gzipped_payload);
		write_file(get_obj_path($patch_bundle_hash), $gzipped_payload);
	}

	# Revision file
	my $next_id = to_hex_id(from_hex_id($parent_id) + 1);
	$msg //= "Commit $next_id";
	open my $rfh, '>:raw', File::Spec->catfile(REV_DIR, $next_id) or die $!;
	print $rfh "tree:$new_tree_hash\nparent:$parent_id\n";
	print $rfh "patch:$patch_bundle_hash\n" if $patch_bundle_hash;
	print $rfh "date:" . time() . "\nmsg:$msg\n";
	close $rfh;

	close $tmp_idx_fh;
	rename($tmp_idx_path, INDEX);
	write_file(HEAD, "$next_id\n");
	File::Path::remove_tree(TMP_DIR);
	make_path(TMP_DIR);
	print "Revision $next_id committed.\n";
}

sub run_log {
	open my $fh_h, '<', HEAD or die "Not a repository.\n";
	my $head = <$fh_h>; chomp $head; close $fh_h;

	# Setup pager
	my $pager = $ENV{PAGER} || 'less -R';
	open(my $pipe, "| $pager") or die "Can't pipe to $pager: $!";
	my $old_fh = select($pipe); 

	print "Revision History (HEAD: $head)\n\n";

	my $current_id = $head;
	while ($current_id) {
		my $rev_file = File::Spec->catfile(REV_DIR, $current_id);
		last unless -f $rev_file;

		open my $rfh, '<', $rev_file or last;
		my ($tree, $parent, $date, $msg) = ("", "", "", "[No message]");
		
		while (<$rfh>) {
			chomp;
			if (/^tree:(.*)$/)   { $tree   = $1; }
			elsif (/^parent:(.*)$/) { $parent = $1; }
			elsif (/^date:(.*)$/)   { $date   = $1; }
			elsif (/^msg:(.*)$/)    { $msg    = $1; }
		}
		close $rfh;

		my $date_str = $date ? strftime("%a %b %e %H:%M:%S %Y", localtime($date)) 
		                     : "Unknown Date";

		print "commit $current_id\n";
		print "Date:   $date_str\n";
		print "\n    $msg\n\n";

		# Move to parent to walk backward
		$current_id = $parent;
	}

	close $pipe;
	select($old_fh);
}

sub make_bin_patch {
    my ($new_file, $old_file) = @_;
    
    my $new_size = -s $new_file;
    open my $f_new, '<:raw', $new_file or die $!;
    open my $f_old, '<:raw', $old_file or die $!;

    # Start patch string with new total size (8 bytes)
    my $patch = pack("Q", $new_size); 

    my $offset = 0;
    my $blk_size = 4096;

    while (1) {
        my $read_new = sysread($f_new, my $buf_new, $blk_size);
        my $read_old = sysread($f_old, my $buf_old, $blk_size);
        last if !$read_new && !$read_old;

        # If blocks differ, record the change
        if (($buf_new // '') ne ($buf_old // '')) {
            # Format: Offset (Q), Length (L), then the raw data
            $patch .= pack("QL", $offset, length($buf_new)) . $buf_new;
        }
        $offset += $blk_size;
    }
    close $f_new; close $f_old;

    # If only the 8-byte header exists, files were identical
    return length($patch) > 8 ? $patch : undef; 
}

# Convert decimal to a padded 7-char hex string
sub to_hex_id { sprintf("%07x", $_[0]) }

# Convert hex back to decimal
sub from_hex_id { hex($_[0]) }

sub write_file {
	my ($path, $content) = @_;
	my $dir = dirname($path);
	make_path($dir) unless -d $dir;
	open my $fh, '>', $path or die "Could not open '$path' for writing: $!";
	print $fh $content;
	close $fh or die "Could not close '$path' after writing: $!";
}

sub touch_file {
	my ($path) = @_;
	my $dir = dirname($path);
	make_path($dir) unless -d $dir;
	open my $fh, '>', $path or die "Could not touch $path: $!";
	close $fh;
}

sub hash_file_content {
	my ($filename) = @_;
	my $sha = Digest::SHA->new(1);

	if (-l $filename) {
		my $target = readlink($filename);
		die "Could not readlink '$filename': $!" unless defined $target;
		$sha->add($target);
	} else {
		open(my $fh, '<:raw', $filename) or die "Could not open '$filename': $!";
		$sha->addfile($fh);
		close($fh);
	}

	return $sha->hexdigest;
}

sub stream_tree {
	my (@paths) = @_;
	@paths = (".") unless @paths;

	my $chunk_size = 1024 * 64; 
	my @buf;
	my $buf_size = 0;
	my $tot_size = 0;
	my $use_disk = 0;
	my ($tmp_fh, $tmp_path);

	my $flush = sub {
		if (!$use_disk) {
			($tmp_fh, $tmp_path) = tempfile(UNLINK => 1);
			$tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); 
			binmode $tmp_fh, ":raw"; 
			$use_disk = 1;
		}
		print $tmp_fh @buf;
		@buf = ();
		$buf_size = 0;
	};

	my @stack = @paths;
	while (@stack) {
		my $path = (pop @stack) =~ s|^\./||r;
		next if $path eq REPO; 

		my @st = lstat($path);
		if (-d _) {
			if (opendir(my $dh, $path)) {
				push @stack, map { File::Spec->catfile($path, $_) }
					grep { $_ ne '.' && $_ ne '..' && $_ ne REPO } 
					readdir($dh);
				closedir($dh);
			}
		} elsif (-f _ || -l _) {
			my $line = "$path\t$st[9]\t$st[7]\n";
			push @buf, $line;
			$buf_size += length($line);
			$tot_size += length($line);

			if ((!$use_disk && $tot_size > MEM_LIMIT) || 
				($use_disk && $buf_size > $chunk_size)) {
				$flush->();
			}
		}
	}

	if (!$use_disk) {
		@buf = sort @buf;
		return sub {
			my $line = shift @buf;
			return unless $line;
			chomp $line;
			my ($p, $m, $s) = split(/\t/, $line);
			return { path => $p, mtime => $m, size => $s };
		};
	} else {
		$flush->() if @buf;
		close $tmp_fh;
		open(my $sort_fh, "-|", "sort", "-t", "\t", "-k1,1", $tmp_path) or die $!;
		return sub {
			my $line = <$sort_fh>;
			unless ($line) { close $sort_fh; return; }
			chomp $line;
			my ($p, $s, $m) = split(/\t/, $line);
			return { path => $p, mtime => $m, size => $s };
		};
	}
}

sub stream_index {
	my $idx = INDEX;
	return sub { return; } unless -e $idx && -s $idx > 0;
	open(my $fh, "<:raw", $idx) or die "Could not open index: $!";

	return sub {
		my $line = <$fh>;
		unless (defined $line) { close $fh; return; }
		chomp $line;
		
		my ($s_hash, $b_hash, $mtime, $size, $path) = split(/\t/, $line, 5);

		return {
			s_hash  => $s_hash,
			b_hash  => $b_hash,
			mtime   => $mtime,
			size    => $size,
			path    => $path,
		};
	};
}

# Logic for sharded object directory
sub get_obj_path {
	my ($hash) = @_;
	my $dir = File::Spec->catfile(OBJ_DIR, substr($hash, 0, 2));
	make_path($dir) unless -d $dir;
	return File::Spec->catfile($dir, substr($hash, 2));
}

sub snapshot_tree {
	my $it = stream_index();
	my @buf;
	my $use_disk = 0;
	my $total_size = 0;
	my $chunk_size = 1024 * 64; 
	my $sha = Digest::SHA->new(1);
	my ($tmp_fh, $tmp_path);

	while (my $entry = $it->()) {
		my $line = "$entry->{s_hash}\t$entry->{path}\n";
		$sha->add($line);
		$total_size += length($line);

		if (!$use_disk && $total_size > MEM_LIMIT) {
			($tmp_fh, $tmp_path) = tempfile();
			$tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); 
			binmode $tmp_fh, ":raw";
			print $tmp_fh @buf;
			@buf = ();
			$use_disk = 1;
		}

		if ($use_disk) {
			print $tmp_fh $line;
		} else {
			push @buf, $line;
		}
	}

	my $tree_hash = $sha->hexdigest;
	my $obj_path = get_obj_path($tree_hash);
	if (!-e $obj_path) {
		if ($use_disk) {
			close $tmp_fh;
			rename($tmp_path, $obj_path) or die "Rename failed: $!";
		} else {
			write_file($obj_path, join("", @buf));
		}
	} else {
		unlink($tmp_path) if $use_disk;
	}

	return $tree_hash;
}

sub stream_tree_file {
	my ($hash) = @_;
	return sub { return } unless $hash;
	my $path = get_obj_path($hash);
	open my $fh, '<:raw', $path or return sub { return };
	return sub {
		my $line = <$fh>;
		return unless $line;
		chomp $line;
		my ($h, $p) = split(/\t/, $line, 2);
		return { hash => $h, path => $p };
	}
}

sub read_head {
	return unless -e HEAD;
	open my $fh, '<', HEAD or return;
	my $val = <$fh>;
	chomp $val if $val;
	return $val;
}

sub read_file {
	my ($path) = @_;
	return "" unless -e $path;
	open my $fh, '<:raw', $path or return "";
	my $content = do { local $/; <$fh> };
	close $fh;
	return $content;
}

sub get_commit_message {
    my $editor = $ENV{VISUAL} || $ENV{EDITOR} || 'vi';
    my $tmp_file = File::Spec->catfile(TMP_DIR, "COMMIT_EDITMSG");
    my $template = <<"EOF";

# Please enter the commit message for your changes. Lines starting
# with '#' will be ignored, and an empty message aborts the commit.
#
# On revision: @{[read_head() // '0']}
EOF

    write_file($tmp_file, $template);

    system($editor, $tmp_file);
    my $raw_content = read_file($tmp_file);
    my @lines = split(/\n/, $raw_content);
    
    # Filter out lines starting with #
    my $final_msg = join("\n", grep { $_ !~ /^\s*#/ } @lines);
    $final_msg =~ s/^\s+|\s+$//g; # Trim whitespace

    unlink($tmp_file);
    return ($final_msg ne "") ? $final_msg : undef;
}