#!/usr/bin/perl

use strict;
use warnings;
use IO::Handle;
use File::Path qw(make_path);
use File::Copy qw(copy);
use File::Find;
use File::Compare;
use File::Basename;
use File::Glob qw(:bsd_glob);
use File::Spec;
use File::Temp qw(tempfile);
use Getopt::Long;
use Archive::Tar;
use Compress::Zlib;
use POSIX qw(strftime);
use Digest::SHA qw(sha1_hex);

use constant REPO     => '.vcx';
use constant HEAD     => REPO . '/head';   # Current revision ID
use constant INDEX    => REPO . '/index';  # Index
use constant OBJ_DIR  => REPO . '/obj';    # Object store
use constant REV_DIR  => REPO . '/rev';    # Revisions

# Staging area
use constant TMP_DIR   => REPO     . '/stg';
use constant TMP_META  => TMP_DIR  . '/meta';
use constant TMP_DIFF  => TMP_DIR  . '/delta.tar.gz';

use constant MEM_LIMIT      => 64 * 1024 * 1024;
use constant MAX_INDEX_SIZE => 16 * 1024 * 1024; 

Getopt::Long::Configure("bundling");

my $cmd = shift @ARGV // '';
my @args = @ARGV;

if ($cmd eq 'init') {
	run_init();
} elsif ($cmd eq 'status') {
	run_status();
} elsif ($cmd eq 'add') {
	die "Usage: $0 add [path1] [path2] ...\n" unless @args;
	run_add(@args); 
} elsif ($cmd eq 'commit') {
	my ($m, $a);
	GetOptions('m=s' => \$m, 'a' => \$a);
	if ($a) { run_add("."); }
	run_commit($m);
} elsif ($cmd eq 'log') {
	run_log();
} else {
	print "Usage: $0 [init|status|add|commit|log]\n";
	exit 1;
}

sub run_init {
	make_path(OBJ_DIR, REV_DIR, TMP_DIR);
	touch_file(INDEX);
	my $rev_id = to_hex_id(0);
	write_file(HEAD, "$rev_id\n");
	print "Initialized repository.\n";
}

sub run_status {
}

sub run_add {
	my @paths = @_;
	@paths = (".") unless @paths;

	my $idx_path = INDEX;
	my $tmp_idx  = "$idx_path.tmp";

	my $it_idx = stream_index();	 
	my $it_wrk = stream_tree(@paths); 

	open(my $out, ">:raw", $tmp_idx) or die "Could not create $tmp_idx: $!";

	my $idx_entry = $it_idx->();
	my $wrk_entry = $it_wrk->();

	while ($idx_entry || $wrk_entry) {
		my $cmp = !defined $idx_entry ? 1 
				: !defined $wrk_entry ? -1 
				: $idx_entry->{path} cmp $wrk_entry->{path};

		my $path = ($cmp <= 0) ? $idx_entry->{path} : $wrk_entry->{path};
		my $matches_spec = matches_paths($path, \@paths);

		if ($cmp == 0) {
			if ($matches_spec) {
				if ($idx_entry->{mtime} == $wrk_entry->{mtime} && 
					$idx_entry->{size}  == $wrk_entry->{size}) {
					# No change
					print $out join("\t", $idx_entry->{staged_hash}, $idx_entry->{base_hash}, 
						$idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}) . "\n";
				} else {
					my $current_hash = hash_file_content($wrk_entry->{path});
					if ($current_hash eq $idx_entry->{staged_hash}) {
						# mtime changed, but content is identical.
						# Refresh mtime/size in index, but keep staged/base hashes.
						print $out join("\t", $idx_entry->{staged_hash}, $idx_entry->{base_hash}, 
							$wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n";
					} else {
						# Modified: update staged_hash to the new content, keep base_hash as-is.
						print $out join("\t", $current_hash, $idx_entry->{base_hash}, 
							$wrk_entry->{mtime}, $wrk_entry->{size}, $wrk_entry->{path}) . "\n";
					}
				}
			} else {
				# Outside pathspec: keep index row
				print $out join("\t", $idx_entry->{staged_hash}, $idx_entry->{base_hash}, 
					$idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}) . "\n";
			}
			$idx_entry = $it_idx->(); 
			$wrk_entry = $it_wrk->();
		}
		elsif ($cmp > 0) { 
			# New File on disk
			if ($matches_spec) {
				my $hash = hash_file_content($wrk_entry->{path});
				# For a brand new file, staged and base start as the same hash.
				print $out join("\t", $hash, $hash, $wrk_entry->{mtime}, $wrk_entry->{size}, 
					$wrk_entry->{path}) . "\n";
			}
			$wrk_entry = $it_wrk->();
		}
		else { 
			# File in index but missing from disk (deletion)
			if (!$matches_spec) {
				# Not in our current 'add' scope; keep it in the index.
				print $out join("\t", $idx_entry->{staged_hash}, $idx_entry->{base_hash}, 
					$idx_entry->{mtime}, $idx_entry->{size}, $idx_entry->{path}) . "\n";
			}
			# If it matches spec but is gone from disk, we drop it (staging the deletion).
			$idx_entry = $it_idx->();
		}
	}

	close $out;
	rename($tmp_idx, $idx_path) or die "Failed to update index: $!";
}

sub run_commit {
	my ($msg) = @_;
	
	# Get message and parent
	$msg //= launch_editor(); 
	die "Commit aborted: empty message\n" unless $msg && $msg =~ /\S/;
	
	my $parent_id = read_head();
	
	my $it = stream_index();
	my %patches;
	
	# New index (temporary)
	my ($idx_fh, $idx_path) = tempfile(DIR => TMP_DIR, UNLINK => 0);
	binmode $idx_fh, ":raw";

	# Tree logic
	my $sha_tree = Digest::SHA->new(1);
	my @tree_lines;
	my $tree_size = 0;
	my $use_disk_tree = 0;
	my ($tree_fh, $tree_path);

	while (my $entry = $it->()) {
		my $final_hash_for_tree = $entry->{base_hash};

		if ($entry->{staged_hash} ne $entry->{base_hash}) {
			my $patch = calculate_delta($entry);
			my $compressed = compress_data($patch);

			# THE PIVOT DECISION (50% Rule)
			if (length($compressed) < ($entry->{size} * 0.5)) {
				# Efficiency Win: Store Patch
				$patches{"$entry->{path}.patch"} = $compressed;
				# Tree continues to point to OLD base (maintaining delta chain)
				$final_hash_for_tree = $entry->{base_hash};
			} else {
				# Pivot: Store full object as a new Keyframe
				my $obj_path = File::Spec->catfile(OBJ_DIR, $entry->{staged_hash});
				if (!-e $obj_path) {
					copy($entry->{path}, $obj_path) or die "Pivot failed: $!";
				}
				# Tree points to NEW hash (resetting delta chain)
				$final_hash_for_tree = $entry->{staged_hash};
			}
		}

		# --- A. Update Tree State ---
		my $t_line = "$final_hash_for_tree\t$entry->{path}\n";
		$sha_tree->add($t_line);
		$tree_size += length($t_line);

		if (!$use_disk_tree && $tree_size > MEM_LIMIT) {
			($tree_fh, $tree_path) = tempfile(DIR => TMP_DIR, UNLINK => 0);
			binmode $tree_fh, ":raw";
			print $tree_fh @tree_lines;
			@tree_lines = ();
			$use_disk_tree = 1;
		}
		$use_disk_tree ? print $tree_fh $t_line : push @tree_lines, $t_line;

		# --- B. Update Index State ---
		# Current staged_hash becomes the next base_hash
		# We also refresh metadata from the actual file
		my @stats = stat($entry->{path});
		print $idx_fh join("\t", 
			$entry->{staged_hash}, # New Staged
			$entry->{staged_hash}, # New Base (Index always tracks "Current")
			$stats[9],			 # Fresh mtime
			$stats[7],			 # Fresh size
			$entry->{path}
		) . "\n";
	}

	# 4. Finalize Tree Object
	my $tree_hash = $sha_tree->hexdigest;
	my $final_tree_obj = File::Spec->catfile(OBJ_DIR, $tree_hash);
	
	if (!-e $final_tree_obj) {
		if ($use_disk_tree) {
			close $tree_fh;
			rename($tree_path, $final_tree_obj) or die $!;
		} else {
			write_file($final_tree_obj, join("", @tree_lines));
		}
	} else {
		unlink($tree_path) if $use_disk_tree;
	}

	# 5. Assemble Unified Revision File
	my $next_id = to_hex_id(from_hex_id($parent_id) + 1);
	my $rev_file = File::Spec->catfile(REV_DIR, $next_id);
	
	my $tar_payload = create_tarball(\%patches);
	my $ts = time();
	my $msg_final = "$msg\n";
	my $msg_len = length($msg_final);

	open my $rfh, '>:raw', $rev_file or die $!;
	print $rfh "tree:$tree_hash\n";
	print $rfh "parent:$parent_id\n";
	print $rfh "date:$ts\n";
	print $rfh "len:$msg_len\n\n";
	print $rfh $msg_final;
	print $rfh $tar_payload;
	close $rfh;

	# 6. Final Atomicity: Commit Index and HEAD
	close $idx_fh;
	rename($idx_path, INDEX) or die "Index update failed: $!";
	write_file(HEAD, "$next_id\n");

	print "Revision $next_id committed.\n";
}

sub run_log {
	open my $fh_h, '<', HEAD or die "Not a repository.\n";
	my $head = <$fh_h>; chomp $head; close $fh_h;

	# Setup pager
	my $pager = $ENV{PAGER} || 'less -R';
	open(my $pipe, "| $pager") or die "Can't pipe to $pager: $!";
	my $old_fh = select($pipe); 

	print "Revision History (HEAD: $head)\n\n";

	my $rev_num = from_hex_id($head);
	while ($rev_num > 0) {
		my $hex_id  = to_hex_id($rev_num);
		my $rev_dir = File::Spec->catfile(REV_DIR, $hex_id);
		
		# Stop if we hit a gap in history or the beginning
		last unless -d $rev_dir;

		# Stat index 9 is the last modification time
		my $mtime = (stat($rev_dir))[9];
		my $date_str = strftime("%a %b %e %H:%M:%S %Y", localtime($mtime));

		my $msg_file = File::Spec->catfile($rev_dir, "message");
		my $message  = "[No message]";

		if (-e $msg_file) {
			open my $mfh, '<', $msg_file;
			$message = do { local $/; <$mfh> } // "[Empty message]";
			$message =~ s/^\s+|\s+$//g; # Trim whitespace
			close $mfh;
		}

		print "commit $hex_id\n";
		print "Date:  $date_str\n";
		print "\n	$message\n\n";

		$rev_num--;
	}

	close $pipe;
	select($old_fh);
}

sub make_patch {
	my ($src, $obj_path, $patches) = @_;

	return unless -f $obj_path;

	my $patch;

	# TODO: implement a disk-based diff for large files
	if (-T $src) {
		if (compare($src, $obj_path) != 0) {
			$patch = qx(diff -u '$obj_path' '$src');
		}
	} else {
		$patch = make_bin_patch($src, $obj_path);
	}

	if ($patch) { 
		my $obj_name = basename($obj_path);
		$patches->{$obj_name} = $patch; 
	}
}

sub make_bin_patch {
	my ($new_file, $old_file) = @_;
	
	my $new_size = -s $new_file;
	
	open my $f_new, '<:raw', $new_file or die $!;
	open my $f_old, '<:raw', $old_file or die $!;

	# Start the patch with the new total size
	my $patch = pack("Q", $new_size); 

	my $offset = 0;
	my $blk_size = 4096;

	while (1) {
		my $read_new = sysread($f_new, my $buf_new, $blk_size);
		my $read_old = sysread($f_old, my $buf_old, $blk_size);
		last if !$read_new && !$read_old;

		if ($buf_new ne $buf_old) {
			# Offset (Q) and Length (L)
			$patch .= pack("QL", $offset, length($buf_new)) . $buf_new;
		}
		$offset += $blk_size;
	}
	close $f_new; close $f_old;

	# Only return if we have more than just the size header
	return length($patch) > 8 ? $patch : undef; 
}

sub apply_bin_patch {
	my ($base_file, $patch_data) = @_;

	open my $fh, '+<:raw', $base_file or die $!;
	
	# Read the total new size (first 8 bytes)
	my $new_total_size = unpack("Q", substr($patch_data, 0, 8));
	my $pos = 8;

	# Apply block updates
	while ($pos < length($patch_data)) {
		my ($offset, $len) = unpack("QL", substr($patch_data, $pos, 12));
		$pos += 12;
		
		my $payload = substr($patch_data, $pos, $len);
		$pos += $len;

		sysseek($fh, $offset, 0);
		syswrite($fh, $payload, $len);
	}
	
	# Clip file to the new size
	# This removes any leftover bytes from the old version
	truncate($fh, $new_total_size) or die "Could not truncate: $!";
	
	close $fh;
}

# Convert decimal to a padded 7-char hex string
sub to_hex_id { sprintf("%07x", $_[0]) }

# Convert hex back to decimal
sub from_hex_id { hex($_[0]) }

sub launch_editor {
	my $editor = $ENV{EDITOR} || $ENV{VISUAL} || 'vi';
	my $temp_msg_file = File::Spec->catfile(REPO, "COMMIT_EDITMSG");
	
	open my $fh, '>', $temp_msg_file or die "Cannot create temp message file: $!";
	print $fh "\n# Enter the commit message for your changes.\n";
	print $fh "# Lines starting with '#' will be ignored.\n";
	close $fh;

	system("$editor \"$temp_msg_file\"");

	my $final_msg = "";
	if (-e $temp_msg_file) {
		open my $rfh, '<', $temp_msg_file or die $!;
		while (my $line = <$rfh>) {
			next if $line =~ /^#/; # Strip out the helper comments
			$final_msg .= $line;
		}
		close $rfh;
		unlink($temp_msg_file); # Clean up
	}

	$final_msg =~ s/^\s+|\s+$//g; # Trim whitespace
	return $final_msg;
}

sub write_file {
	my ($path, $content) = @_;
	open my $fh, '>', $path or die "Could not open '$path' for writing: $!";
	print $fh $content;
	close $fh or die "Could not close '$path' after writing: $!";
}

sub touch_file {
	my ($path) = @_;
	my $dir = dirname($path);
	make_path($dir) unless -d $dir;
	open my $fh, '>', $path or die "Could not touch $path: $!";
	close $fh;
}

sub hash_file_content {
	my ($filename) = @_;
	my $sha = Digest::SHA->new(1);

	if (-l $filename) {
		my $target = readlink($filename);
		die "Could not readlink '$filename': $!" unless defined $target;
		$sha->add($target);
	} else {
		open(my $fh, '<:raw', $filename) or die "Could not open '$filename': $!";
		$sha->addfile($fh);
		close($fh);
	}

	return $sha->hexdigest;
}

sub stream_tree {
	my (@paths) = @_;
	@paths = (".") unless @paths;

	my $chunk_size = 1024 * 64; 
	my @buf;
	my $buf_size = 0;
	my $tot_size = 0;
	my $use_disk = 0;
	my ($tmp_fh, $tmp_path);

	my $flush = sub {
		if (!$use_disk) {
			($tmp_fh, $tmp_path) = tempfile(UNLINK => 1);
			$tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); 
			binmode $tmp_fh, ":raw"; # Keep it raw for consistent offsets
			$use_disk = 1;
		}
		print $tmp_fh @buf;
		@buf = ();
		$buf_size = 0;
	};

	my @stack = @paths;
	while (@stack) {
		my $path = (pop @stack) =~ s|^\./||r;
		next if $path eq REPO; # Skip the repo itself

		my @st = lstat($path);
		if (-d _) {
			if (opendir(my $dh, $path)) {
				push @stack, map { File::Spec->catfile($path, $_) }
					grep { $_ ne '.' && $_ ne '..' && $_ ne REPO } 
					readdir($dh);
				closedir($dh);
			}
		} elsif (-f _ || -l _) {
			# Format: path \t mtime \t size
			my $line = "$path\t$st[9]\t$st[7]\n";
			push @buf, $line;
			$buf_size += length($line);
			$tot_size += length($line);

			if ((!$use_disk && $tot_size > MEM_LIMIT) || 
				($use_disk && $buf_size > $chunk_size)) {
				$flush->();
			}
		}
	}

	if (!$use_disk) {
		@buf = sort @buf;
		return sub {
			my $line = shift @buf;
			return unless $line;
			chomp $line;
			my ($p, $m, $s) = split(/\t/, $line);
			return { path => $p, mtime => $m, size => $s };
		};
	} else {
		$flush->() if @buf;
		close $tmp_fh;
		open(my $sort_fh, "-|", "sort", "-t", "\t", "-k1,1", $tmp_path) or die $!;
		return sub {
			my $line = <$sort_fh>;
			unless ($line) { close $sort_fh; return; }
			chomp $line;
			my ($p, $s, $m) = split(/\t/, $line);
			return { path => $p, mtime => $m, size => $s };
		};
	}
}

sub stream_index {
	my $idx = INDEX;
	return sub { return; } unless -e $idx && -s $idx > 0;
	open(my $fh, "<:raw", $idx) or die "Could not open index: $!";

	return sub {
		my $line = <$fh>;
		unless (defined $line) { close $fh; return; }
		chomp $line;
		
		# Format: staged, base, mtime, size, path
		my ($s_hash, $b_hash, $mtime, $size, $path) = split(/\t/, $line, 5);

		return {
			s_hash  => $s_hash,
			b_hash  => $b_hash,
			mtime   => $mtime,
			size    => $size,
			path    => $path,
		};
	};
}

sub matches_paths {
    my ($path, $specs_ref) = @_;
    return 1 if !@$specs_ref;
    
    foreach my $spec (@$specs_ref) {
        my $clean_spec = $spec =~ s|/+$||r; # Remove trailing slashes
        return 1 if $clean_spec eq '.';
        return 1 if $path eq $clean_spec;
        return 1 if $path =~ /^\Q$clean_spec\E\//; # Directory match
    }
    return 0;
}

sub snapshot_tree {
	my $it = stream_index();
	
	my @buf;
	my $use_disk = 0;
	my $total_size = 0;
	my $chunk_size = 1024 * 64; 
	my $sha = Digest::SHA->new(1);

	my ($tmp_fh, $tmp_path);

	while (my $entry = $it->()) {
		my $line = "$entry->{s_hash}\t$entry->{path}\n";
		$sha->add($line);
		$total_size += length($line);

		if (!$use_disk && $total_size > MEM_LIMIT) {
			($tmp_fh, $tmp_path) = tempfile();
			$tmp_fh->setvbuf(undef, POSIX::_IOFBF(), $chunk_size); 
			binmode $tmp_fh, ":raw";
			print $tmp_fh @buf;
			@buf = ();
			$use_disk = 1;
		}

		if ($use_disk) {
			print $tmp_fh $line;
		} else {
			push @buf, $line;
		}
	}

	my $tree_hash = $sha->hexdigest;
	my $obj_path = File::Spec->catfile(OBJ_DIR, $tree_hash);
	if (!-e $obj_path) {
		if ($use_disk) {
			close $tmp_fh;
			rename($tmp_path, $obj_path) or die "Rename failed: $!";
		} else {
			write_file($obj_path, join("", @buffer));
		}
	} else {
		unlink($tmp_path) if $use_disk;
	}

	return $tree_hash;
}