summaryrefslogtreecommitdiffstats
path: root/bm/bm_history.pl
blob: 5ebf7e1ee9ce0874294fd270c0b67990f03f742d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/perl
use strict;
use warnings;
use File::Spec;
use File::Path qw(remove_tree);
use File::Find;
use Cwd qw(getcwd abs_path);

my ($files, $depth, $total_commits) = @ARGV;
if (!defined $files || !defined $depth || !defined $total_commits) {
	die "Usage: perl bm_history.pl <file_count> <depth> <total_commits>\n";
}

my $base_dir = getcwd();
my $urn_bin  = abs_path(File::Spec->catfile("..", "urn")); 
my $seed_bin = abs_path("seed.pl");
my $bm_repo  = "bm_repo";

my $sample_rate = int($total_commits / 5) || 1;
my %results;

sub get_size {
	my $dir = shift;
	return "0 KB" unless -d $dir;
	my $size = `du -sk $dir 2>/dev/null`;
	$size =~ /^(\d+)/;
	return ($1 || 0) . " KB";
}

sub count_inodes {
	my $dir = shift;
	return 0 unless -d $dir;
	my $count = `find $dir 2>/dev/null | wc -l`;
	$count =~ s/\s+//g;
	return $count || 0;
}

sub run_history_benchmark {
	my ($tool_name) = @_;
	print ">>> Starting History Benchmark: $tool_name\n";

	remove_tree($bm_repo) if -d $bm_repo;
	system("perl $seed_bin $files $depth > /dev/null 2>&1") == 0 
		or die "FATAL: seed.pl failed.\n";
	
	chdir($bm_repo) or die "FATAL: Could not enter $bm_repo: $!\n";

	my $init_cmd = ($tool_name eq "URN") ? "perl $urn_bin init" : "git init";
	system("$init_cmd > /dev/null 2>&1");

	my @file_list;
	find(sub {
		push @file_list, $File::Find::name if -f $_ && $File::Find::name !~ m{\.(git|urn)};
	}, ".");

	my $count = scalar(@file_list);
	die "FATAL: No files found in $bm_repo after seeding!" if $count == 0;

	# Use explicit add + commit to ensure untracked files are caught
	my $initial_cmd = ($tool_name eq "URN") ? "perl $urn_bin add . && perl $urn_bin commit -m 'initial'" 
	                                        : "git add . && git commit -m 'initial'";
	system("$initial_cmd > /dev/null 2>&1");

	for my $i (1 .. $total_commits) {
		my $to_modify = int($files * 0.02) || 1;
		for (1 .. $to_modify) {
			my $target = $file_list[rand @file_list];
			if (open(my $fh, '>>', $target)) {
				print $fh "Commit $i: Mod for $tool_name test.\n";
				close($fh);
			}
		}

		my $msg = "commit_$i";
		my $run_cmd = ($tool_name eq "URN") ? "perl $urn_bin add . && perl $urn_bin commit -m '$msg'" 
		                                    : "git add . && git commit -m '$msg'";
		
		if ($i % $sample_rate == 0 || $i == $total_commits) {
			# Run add and commit together inside the time wrapper
			my $raw = `(/usr/bin/time -l sh -c "$run_cmd" > /dev/null) 2>&1`;
			
			my ($real) = $raw =~ /(\d+\.\d+)\s+real/;
			my ($rss)  = $raw =~ /(\d+)\s+maximum resident set size/;
			my ($maj)  = $raw =~ /(\d+)\s+page faults caused by physical I\/O/;
			my ($min)  = $raw =~ /(\d+)\s+page reclaims by virtual memory/;

			my $meta = ($tool_name eq "URN") ? ".urn" : ".git";
			$results{$i}{$tool_name} = {
				real   => $real // "0.00",
				rss    => $rss ? sprintf("%.2f MB", $rss / 1024 / 1024) : "0.00 MB",
				faults => sprintf("Maj:%d/Min:%d", $maj // 0, $min // 0),
				inodes => count_inodes($meta),
				size   => get_size($meta),
			};
			print "   [Commit $i] $tool_name sampled.\n";
		} else {
			system("$run_cmd > /dev/null 2>&1");
		}
	}
	chdir($base_dir);
}

run_history_benchmark("URN");
run_history_benchmark("GIT");

my $out_file = "BM_HISTORY_${files}_${total_commits}.txt";
open(my $res, '>', $out_file) or die $!;
print $res "=============================================================\n";
print $res " HISTORY BENCHMARK: $files files ($total_commits commits)\n";
print $res "=============================================================\n\n";

foreach my $i (sort { $a <=> $b } keys %results) {
	my $u = $results{$i}{"URN"};
	my $g = $results{$i}{"GIT"};
	print $res "SNAPSHOT: Commit #$i\n";
	print $res "-------------------------------------------------------------\n";
	printf $res "%-15s | %-20s | %-20s\n", "METRIC", "URN", "GIT";
	print $res "----------------+----------------------+---------------------\n";
	printf $res "%-15s | %20s | %20s\n", "Time", $u->{real}."s", $g->{real}."s";
	printf $res "%-15s | %20s | %20s\n", "Max RSS", $u->{rss}, $g->{rss};
	printf $res "%-15s | %20s | %20s\n", "Page Faults", $u->{faults}, $g->{faults};
	printf $res "%-15s | %20s | %20s\n", "Inodes", $u->{inodes}, $g->{inodes};
	printf $res "%-15s | %20s | %20s\n", "Repo Size", $u->{size}, $g->{size};
	print $res "-------------------------------------------------------------\n\n";
}
close($res);

print "\nRun complete! Opening results...\n";
system($ENV{PAGER} || 'less', $out_file);