diff options
Diffstat (limited to 'bm/bm_commit.pl')
| -rw-r--r-- | bm/bm_commit.pl | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/bm/bm_commit.pl b/bm/bm_commit.pl new file mode 100644 index 0000000..841325d --- /dev/null +++ b/bm/bm_commit.pl @@ -0,0 +1,217 @@ +#!/usr/bin/perl +# Usage: perl bm_commit.pl <file_count> <depth> <total_commits> <file_perc> <line_perc> +# Example: perl bm_commit.pl 100 5 50 10 2 +use strict; +use warnings; +use File::Copy; +use File::Spec; +use File::Path qw(remove_tree make_path); +use File::Find; +use File::Basename; +use Cwd qw(getcwd abs_path); +use Time::HiRes qw(time); + +my ($files, $depth, $total_commits, $file_perc, $line_perc) = @ARGV; +if (!defined $line_perc) { + die "Usage: perl bm_commit.pl <file_count> <depth> <total_commits> <file_perc> <line_perc>\n"; +} + +my $base_dir = getcwd(); +my $urn_bin = abs_path(File::Spec->catfile("..", "urn")); +my $seed_bin = abs_path("seed.pl"); +my $bm_repo = "sandbox"; + +my $sample_rate = int($total_commits / 5) || 1; +my %results; +my %final_stats = ( URN => { size => "N/A", inodes => 0 }, GIT => { size => "N/A", inodes => 0 } ); +my $initial_repo_size = "0 KB"; +my $rebase_count = 0; +my %last_base_hashes; + +sub get_size { + my $dir = shift; + return "0 KB" unless -d $dir; + my $size = `du -sk $dir 2>/dev/null`; + $size =~ /^(\d+)/; + return ($1 || 0) . " KB"; +} + +sub count_inodes { + my $dir = shift; + return 0 unless -d $dir; + my %inodes; + find(sub { my @s = lstat($_); $inodes{$s[1]} = 1 if @s; }, $dir); + return scalar(keys %inodes); +} + +sub track_rebases { + return unless -f ".urn/index"; + open(my $fh, '<', ".urn/index") or return; + while (<$fh>) { + chomp; my @cols = split(/\t/); next unless @cols >= 6; + my ($b_hash, $path) = ($cols[2], $cols[5]); + $rebase_count++ if exists $last_base_hashes{$path} && $last_base_hashes{$path} ne $b_hash; + $last_base_hashes{$path} = $b_hash; + } + close($fh); +} + +sub generate_surgical_line { + my ($original_line) = @_; + my @types = qw(int char float double bool uint32_t); + my @vars = qw(offset limit buffer status count ptr index); + + $original_line =~ /^(\s*)/; + my $indent = $1 || " "; + + my $type = $types[rand @types]; + my $var = $vars[rand @vars] . "_" . int(rand(100)); + my $val = int(rand(1000)); + + my $new_line = "$indent$type $var = $val;\n"; + + # Byte matching: pad with spaces before the newline to keep file size identical + my $target_len = length($original_line); + if (length($new_line) < $target_len) { + substr($new_line, -1, 0, " " x ($target_len - length($new_line))); + } elsif (length($new_line) > $target_len) { + $new_line = substr($new_line, 0, $target_len - 2) . ";\n"; + } + + return $new_line; +} + +sub run_commit_benchmark { + my ($tool_name) = @_; + print ">>> Starting COMMIT BENCHMARK: $tool_name\n"; + + remove_tree($bm_repo) if -d $bm_repo; + system("perl $seed_bin $files $depth > /dev/null 2>&1"); + + $initial_repo_size = get_size($bm_repo) if $initial_repo_size eq "0 KB"; + + chdir($bm_repo) or die $!; + + my $init_cmd = ($tool_name eq "URN") ? "perl $urn_bin init" : "git init"; + my $add_cmd = ($tool_name eq "URN") ? "perl $urn_bin add ." : "git add ."; + + system("$init_cmd > /dev/null 2>&1"); + system("$add_cmd > /dev/null 2>&1"); + system(($tool_name eq "URN" ? "perl $urn_bin" : "git") . " commit -m 'initial' > /dev/null 2>&1"); + + track_rebases() if $tool_name eq "URN"; + + my @file_list; + find(sub { push @file_list, $File::Find::name if -f $_ && $File::Find::name !~ /\.(git|urn)/ }, "."); + + my $num_to_change = int(scalar(@file_list) * ($file_perc / 100)) || 1; + my @target_files = @file_list[0 .. $num_to_change - 1]; + + for my $i (2 .. $total_commits) { + for my $target (@target_files) { + open(my $fh, '<', $target) or die "Read fail: $target - $!"; + my @lines = <$fh>; + close($fh); + + # Find lines that are simple variable assignments to maintain surgical diffs + my @eligible = grep { $lines[$_] =~ /^\s*\w+ \w+ = \d+;/ } (0 .. $#lines); + + if (@eligible) { + # Calculate count based on percentage of eligible lines + my $to_mod = int(scalar(@eligible) * ($line_perc / 100)); + $to_mod = 1 if $to_mod == 0; + + my @indices = (sort { rand() <=> rand() } @eligible)[0 .. $to_mod - 1]; + + for my $idx (@indices) { + next unless defined $idx; + $lines[$idx] = generate_surgical_line($lines[$idx]); + } + + open(my $out, '>', $target) or die "Write fail: $target - $!"; + print $out join('', @lines); + close($out); + utime(undef, undef, $target); + } + } + + my $cmd = ($tool_name eq "URN") + ? "perl $urn_bin add . && perl $urn_bin commit -m 'c$i'" + : "git add . && git commit -m 'c$i'"; + + if ($i % $sample_rate == 0 || $i == $total_commits) { + my $raw_output = `/usr/bin/time -l sh -c "$cmd" 2>&1`; + track_rebases() if $tool_name eq "URN"; + + my ($real, $rss, $maj, $min) = (0, 0, 0, 0); + $real = $1 if $raw_output =~ /(\d+\.\d+)\s+real/; + $rss = sprintf("%.2f MB", $1 / 1024 / 1024) if $raw_output =~ /(\d+)\s+maximum resident set size/; + $min = $1 if $raw_output =~ /(\d+)\s+page reclaims/; + $maj = $1 if $raw_output =~ /(\d+)\s+page faults/; + + my $meta = ($tool_name eq "URN") ? ".urn" : ".git"; + $results{$i}{$tool_name} = { + real => $real . "s", + rss => $rss || "0 MB", + faults => "Maj:$maj / Min:$min", + inodes => count_inodes($meta), + size => get_size($meta), + }; + print " [Commit $i] $tool_name sampled.\n"; + } else { + system("$cmd > /dev/null 2>&1"); + track_rebases() if $tool_name eq "URN"; + } + } + + system("git gc --prune=now --quiet") if $tool_name eq "GIT"; + + my $final_meta = ($tool_name eq "URN") ? ".urn" : ".git"; + $final_stats{$tool_name} = { + size => get_size($final_meta) || "0 KB", + inodes => count_inodes($final_meta) || 0, + }; + + chdir($base_dir); + remove_tree($bm_repo); +} + +run_commit_benchmark("URN"); +run_commit_benchmark("GIT"); + +my $out_file = "BM_COMMIT_${files}_${total_commits}.txt"; +open(my $res, '>', $out_file) or die $!; +print $res "=============================================================\n"; +print $res " COMMIT BENCHMARK: $files files ($total_commits commits)\n"; +print $res " CONDITIONS: Depth=$depth, Files Mod=$file_perc%, Line Mod=$line_perc%\n"; +print $res " INITIAL REPO SIZE: $initial_repo_size\n"; +print $res "=============================================================\n\n"; + +foreach my $i (sort { $a <=> $b } keys %results) { + my $u = $results{$i}{"URN"}; + my $g = $results{$i}{"GIT"}; + next unless defined $u && defined $g; + + print $res "SNAPSHOT: Commit #$i\n"; + print $res "-------------------------------------------------------------\n"; + printf $res "%-15s | %-20s | %-20s\n", "METRIC", "URN", "GIT"; + print $res "----------------+----------------------+---------------------\n"; + printf $res "%-15s | %20s | %20s\n", "Time", $u->{real}, $g->{real}; + printf $res "%-15s | %20s | %20s\n", "Max RSS", $u->{rss}, $g->{rss}; + printf $res "%-15s | %20s | %20s\n", "Page faults", $u->{faults}, $g->{faults}; + printf $res "%-15s | %20s | %20s\n", "Inodes", $u->{inodes}, $g->{inodes}; + printf $res "%-15s | %20s | %20s\n", "Repo size", $u->{size}, $g->{size}; + print $res "-------------------------------------------------------------\n\n"; +} + +print $res "AFTER GIT GC\n"; +print $res "-------------------------------------------------------------\n"; +printf $res "%-15s | %20s | %20s\n", "Final Size", $final_stats{URN}{size}, $final_stats{GIT}{size}; +printf $res "%-15s | %20s | %20s\n", "Final Inodes", $final_stats{URN}{inodes}, $final_stats{GIT}{inodes}; +print $res "-------------------------------------------------------------\n\n"; + +print $res "TOTAL URN REBASES: $rebase_count\n"; +close($res); + +my $pager = $ENV{PAGER} || 'less'; +system("$pager $out_file"); |
