#!/usr/bin/perl
# Usage: perl bm_commit.pl <file_count> <depth> <total_commits> <file_perc> <line_perc>
# Example: perl bm_commit.pl 100 5 50 10 2
use strict;
use warnings;
use File::Copy;
use File::Spec;
use File::Path qw(remove_tree make_path);
use File::Find;
use File::Basename;
use Cwd qw(getcwd abs_path);
use Time::HiRes qw(time);

my ($files, $depth, $total_commits, $file_perc, $line_perc) = @ARGV;
if (!defined $line_perc) {
	die "Usage: perl bm_commit.pl <file_count> <depth> <total_commits> <file_perc> <line_perc>\n";
}

my $base_dir = getcwd();
my $urn_bin  = abs_path(File::Spec->catfile("..", "urn")); 
my $seed_bin = abs_path("seed.pl");
my $bm_repo  = "sandbox";

my $sample_rate = int($total_commits / 5) || 1;
my %results;
my %final_stats = ( URN => { size => "N/A", inodes => 0 }, GIT => { size => "N/A", inodes => 0 } );
my $initial_repo_size = "0 KB";
my $rebase_count = 0;
my %last_base_hashes;

sub get_size {
	my $dir = shift;
	return "0 KB" unless -d $dir;
	my $size = `du -sk $dir 2>/dev/null`;
	$size =~ /^(\d+)/;
	return ($1 || 0) . " KB";
}

sub count_inodes {
	my $dir = shift;
	return 0 unless -d $dir;
	my %inodes;
	find(sub { my @s = lstat($_); $inodes{$s[1]} = 1 if @s; }, $dir);
	return scalar(keys %inodes);
}

sub track_rebases {
	return unless -f ".urn/index";
	open(my $fh, '<', ".urn/index") or return;
	while (<$fh>) {
		chomp; my @cols = split(/\t/); next unless @cols >= 6;
		my ($b_hash, $path) = ($cols[2], $cols[5]);
		$rebase_count++ if exists $last_base_hashes{$path} && $last_base_hashes{$path} ne $b_hash;
		$last_base_hashes{$path} = $b_hash;
	}
	close($fh);
}

sub generate_surgical_line {
	my ($original_line) = @_;
	my @types = qw(int char float double bool uint32_t);
	my @vars  = qw(offset limit buffer status count ptr index);
	
	$original_line =~ /^(\s*)/;
	my $indent = $1 || "    ";
	
	my $type = $types[rand @types];
	my $var  = $vars[rand @vars] . "_" . int(rand(100));
	my $val  = int(rand(1000));
	
	my $new_line = "$indent$type $var = $val;\n";
	
	# Byte matching: pad with spaces before the newline to keep file size identical
	my $target_len = length($original_line);
	if (length($new_line) < $target_len) {
		substr($new_line, -1, 0, " " x ($target_len - length($new_line)));
	} elsif (length($new_line) > $target_len) {
		$new_line = substr($new_line, 0, $target_len - 2) . ";\n";
	}
	
	return $new_line;
}

sub run_commit_benchmark {
	my ($tool_name) = @_;
	print ">>> Starting COMMIT BENCHMARK: $tool_name\n";

	remove_tree($bm_repo) if -d $bm_repo;
	system("perl $seed_bin $files $depth > /dev/null 2>&1");
	
	$initial_repo_size = get_size($bm_repo) if $initial_repo_size eq "0 KB";
	
	chdir($bm_repo) or die $!;

	my $init_cmd = ($tool_name eq "URN") ? "perl $urn_bin init" : "git init";
	my $add_cmd  = ($tool_name eq "URN") ? "perl $urn_bin add ." : "git add .";
	
	system("$init_cmd > /dev/null 2>&1");
	system("$add_cmd > /dev/null 2>&1");
	system(($tool_name eq "URN" ? "perl $urn_bin" : "git") . " commit -m 'initial' > /dev/null 2>&1");
	
	track_rebases() if $tool_name eq "URN";

	my @file_list;
	find(sub { push @file_list, $File::Find::name if -f $_ && $File::Find::name !~ /\.(git|urn)/ }, ".");

	my $num_to_change = int(scalar(@file_list) * ($file_perc / 100)) || 1;
	my @target_files = @file_list[0 .. $num_to_change - 1];

	for my $i (2 .. $total_commits) {
		for my $target (@target_files) {
			open(my $fh, '<', $target) or die "Read fail: $target - $!";
			my @lines = <$fh>;
			close($fh);

			# Find lines that are simple variable assignments to maintain surgical diffs
			my @eligible = grep { $lines[$_] =~ /^\s*\w+ \w+ = \d+;/ } (0 .. $#lines);

			if (@eligible) {
				# Calculate count based on percentage of eligible lines
				my $to_mod = int(scalar(@eligible) * ($line_perc / 100));
				$to_mod = 1 if $to_mod == 0; 

				my @indices = (sort { rand() <=> rand() } @eligible)[0 .. $to_mod - 1];

				for my $idx (@indices) {
					next unless defined $idx;
					$lines[$idx] = generate_surgical_line($lines[$idx]);
				}

				open(my $out, '>', $target) or die "Write fail: $target - $!";
				print $out join('', @lines);
				close($out);
				utime(undef, undef, $target);
			}
		}

		my $cmd = ($tool_name eq "URN") 
			? "perl $urn_bin add . && perl $urn_bin commit -m 'c$i'" 
			: "git add . && git commit -m 'c$i'";

		if ($i % $sample_rate == 0 || $i == $total_commits) {
			my $raw_output = `/usr/bin/time -l sh -c "$cmd" 2>&1`;
			track_rebases() if $tool_name eq "URN";

			my ($real, $rss, $maj, $min) = (0, 0, 0, 0);
			$real = $1 if $raw_output =~ /(\d+\.\d+)\s+real/;
			$rss = sprintf("%.2f MB", $1 / 1024 / 1024) if $raw_output =~ /(\d+)\s+maximum resident set size/;
			$min = $1 if $raw_output =~ /(\d+)\s+page reclaims/;
			$maj = $1 if $raw_output =~ /(\d+)\s+page faults/;

			my $meta = ($tool_name eq "URN") ? ".urn" : ".git";
			$results{$i}{$tool_name} = {
				real   => $real . "s",
				rss    => $rss || "0 MB",
				faults => "Maj:$maj / Min:$min",
				inodes => count_inodes($meta),
				size   => get_size($meta),
			};
			print "   [Commit $i] $tool_name sampled.\n";
		} else {
			system("$cmd > /dev/null 2>&1");
			track_rebases() if $tool_name eq "URN";
		}
	}

	system("git gc --prune=now --quiet") if $tool_name eq "GIT";

	my $final_meta = ($tool_name eq "URN") ? ".urn" : ".git";
	$final_stats{$tool_name} = {
		size   => get_size($final_meta) || "0 KB",
		inodes => count_inodes($final_meta) || 0,
	};

	chdir($base_dir);
	remove_tree($bm_repo);
}

run_commit_benchmark("URN");
run_commit_benchmark("GIT");

my $out_file = "BM_COMMIT_${files}_${total_commits}.txt";
open(my $res, '>', $out_file) or die $!;
print $res "=============================================================\n";
print $res " COMMIT BENCHMARK: $files files ($total_commits commits)\n";
print $res " CONDITIONS: Depth=$depth, Files Mod=$file_perc%, Line Mod=$line_perc%\n";
print $res " INITIAL REPO SIZE: $initial_repo_size\n";
print $res "=============================================================\n\n";

foreach my $i (sort { $a <=> $b } keys %results) {
	my $u = $results{$i}{"URN"};
	my $g = $results{$i}{"GIT"};
	next unless defined $u && defined $g;

	print $res "SNAPSHOT: Commit #$i\n";
	print $res "-------------------------------------------------------------\n";
	printf $res "%-15s | %-20s | %-20s\n", "METRIC", "URN", "GIT";
	print $res "----------------+----------------------+---------------------\n";
	printf $res "%-15s | %20s | %20s\n", "Time", $u->{real}, $g->{real};
	printf $res "%-15s | %20s | %20s\n", "Max RSS", $u->{rss}, $g->{rss};
	printf $res "%-15s | %20s | %20s\n", "Page faults", $u->{faults}, $g->{faults};
	printf $res "%-15s | %20s | %20s\n", "Inodes", $u->{inodes}, $g->{inodes};
	printf $res "%-15s | %20s | %20s\n", "Repo size", $u->{size}, $g->{size};
	print $res "-------------------------------------------------------------\n\n";
}

print $res "AFTER GIT GC\n";
print $res "-------------------------------------------------------------\n";
printf $res "%-15s | %20s | %20s\n", "Final Size", $final_stats{URN}{size}, $final_stats{GIT}{size};
printf $res "%-15s | %20s | %20s\n", "Final Inodes", $final_stats{URN}{inodes}, $final_stats{GIT}{inodes};
print $res "-------------------------------------------------------------\n\n";

print $res "TOTAL URN REBASES: $rebase_count\n";
close($res);

my $pager = $ENV{PAGER} || 'less';
system("$pager $out_file");