#!/usr/bin/perl use strict; use warnings; use File::Copy; use File::Spec; use File::Path qw(remove_tree make_path); use File::Find; use File::Basename; use Cwd qw(getcwd abs_path); use Time::HiRes qw(time); my ($files, $depth, $total_commits, $file_perc, $change_perc) = @ARGV; if (!defined $files || !defined $depth || !defined $total_commits || !defined $file_perc || !defined $change_perc) { die "Usage: perl bm_rebase.pl \n"; } my $base_dir = getcwd(); my $urn_bin = abs_path(File::Spec->catfile("..", "urn")); my $seed_bin = abs_path("seed.pl"); my $bm_repo = "sandbox"; my $sample_rate = int($total_commits / 5) || 1; my %results; my $initial_repo_size = "0 KB"; my $rebase_count = 0; my %last_base_hashes; my $global_tick = 0; sub get_size { my $dir = shift; return "0 KB" unless -d $dir; my $size = `du -sk $dir 2>/dev/null`; $size =~ /^(\d+)/; return ($1 || 0) . " KB"; } sub count_inodes { my $dir = shift; return 0 unless -d $dir; my $count = `find $dir 2>/dev/null | wc -l`; $count =~ s/\s+//g; return $count || 0; } sub track_rebases { return unless -f ".urn/index"; open(my $fh, '<', ".urn/index") or return; while (<$fh>) { chomp; my @cols = split(/\t/); next unless @cols >= 6; my ($b_hash, $path) = ($cols[2], $cols[5]); if (exists $last_base_hashes{$path} && $last_base_hashes{$path} ne $b_hash) { $rebase_count++; } $last_base_hashes{$path} = $b_hash; } close($fh); } sub run_rebase_benchmark { my ($tool_name) = @_; print ">>> Starting REBASE BENCHMARK: $tool_name\n"; remove_tree($bm_repo) if -d $bm_repo; system("perl $seed_bin $files $depth > /dev/null 2>&1"); if ($initial_repo_size eq "0 KB") { $initial_repo_size = get_size($bm_repo); } chdir($bm_repo) or die $!; my $init_cmd = ($tool_name eq "URN") ? "perl $urn_bin init" : "git init"; my $add_cmd = ($tool_name eq "URN") ? "perl $urn_bin add ." : "git add ."; system("$init_cmd > /dev/null 2>&1"); system("$add_cmd > /dev/null 2>&1"); system(($tool_name eq "URN" ? "perl $urn_bin" : "git") . " commit -m 'initial' > /dev/null 2>&1"); track_rebases() if $tool_name eq "URN"; my @file_list; find(sub { push @file_list, $File::Find::name if -f $_ && $File::Find::name !~ /\.(git|urn)/ }, "."); my $num_to_change = int(scalar(@file_list) * ($file_perc / 100)) || 1; my @target_files = @file_list[0 .. $num_to_change - 1]; for my $i (2 .. $total_commits) { my $debug_dir = "/tmp/urn/commit_$i"; make_path($debug_dir) unless -d $debug_dir; for my $target (@target_files) { open(my $fh, '<:raw', $target) or die "Read fail: $target - $!"; my $content = do { local $/; <$fh> }; close($fh); $global_tick++; my $ts = time(); my $header = "C$i-T$ts-N$global_tick "; my $total_bytes = length($content); my $min_len = length($header); my $to_change = int($total_bytes * ($change_perc / 100)); $to_change = $min_len if $to_change < $min_len; $to_change = $total_bytes if $to_change > $total_bytes; my $new_segment = substr($header . ("." x $to_change), 0, $to_change); substr($content, 0, $to_change, $new_segment); open(my $out, '>:raw', $target) or die "Write fail: $target - $!"; print $out $content; close($out); my $target_name = basename($target); copy($target, "$debug_dir/$target_name") or warn "Backup failed: $!"; utime(undef, undef, $target); } my $cmd = ($tool_name eq "URN") ? "perl $urn_bin add . && perl $urn_bin commit -m 'c$i'" : "git add . && git commit -m 'c$i'"; if ($i % $sample_rate == 0 || $i == $total_commits) { my $raw_output = `/usr/bin/time -l sh -c "$cmd" 2>&1`; if ($tool_name eq "URN") { track_rebases(); } my ($real, $rss, $maj, $min) = (0, 0, 0, 0); $real = $1 if $raw_output =~ /(\d+\.\d+)\s+real/; $rss = sprintf("%.2f MB", $1 / 1024 / 1024) if $raw_output =~ /(\d+)\s+maximum resident set size/; $min = $1 if $raw_output =~ /(\d+)\s+page reclaims/; $maj = $1 if $raw_output =~ /(\d+)\s+page faults/; my $meta = ($tool_name eq "URN") ? ".urn" : ".git"; $results{$i}{$tool_name} = { real => $real . "s", rss => $rss, faults => "Maj:$maj / Min:$min", inodes => count_inodes($meta), size => get_size($meta), }; print " [Commit $i] $tool_name sampled.\n"; } else { system("$cmd > /dev/null 2>&1"); track_rebases() if $tool_name eq "URN"; } } chdir($base_dir); } $rebase_count = 0; %last_base_hashes = (); run_rebase_benchmark("URN"); run_rebase_benchmark("GIT"); my $out_file = "BM_REBASE_${files}_${total_commits}.txt"; open(my $res, '>', $out_file) or die $!; print $res "=============================================================\n"; print $res " REBASE BENCHMARK: $files files ($total_commits commits)\n"; print $res " CONDITIONS: Depth=$depth, Files Mod=$file_perc%, Change=$change_perc%\n"; print $res "=============================================================\n\n"; foreach my $i (sort { $a <=> $b } keys %results) { my $u = $results{$i}{"URN"}; my $g = $results{$i}{"GIT"}; print $res "SNAPSHOT: Commit #$i\n"; print $res "-------------------------------------------------------------\n"; printf $res "%-15s | %-20s | %-20s\n", "METRIC", "URN", "GIT"; print $res "----------------+----------------------+---------------------\n"; printf $res "%-15s | %20s | %20s\n", "Time", $u->{real}, $g->{real}; printf $res "%-15s | %20s | %20s\n", "Max RSS", $u->{rss}, $g->{rss}; printf $res "%-15s | %20s | %20s\n", "Page faults", $u->{faults}, $g->{faults}; printf $res "%-15s | %20s | %20s\n", "Inodes", $u->{inodes}, $g->{inodes}; printf $res "%-15s | %20s | %20s\n", "Repo size", $u->{size}, $g->{size}; print $res "-------------------------------------------------------------\n\n"; } print $res "TOTAL URN REBASES: $rebase_count\n"; close($res); exec "less $out_file";