summaryrefslogtreecommitdiffstats
path: root/benchmark.pl
diff options
context:
space:
mode:
Diffstat (limited to 'benchmark.pl')
-rwxr-xr-xbenchmark.pl107
1 files changed, 54 insertions, 53 deletions
diff --git a/benchmark.pl b/benchmark.pl
index 3d0d855..27277d7 100755
--- a/benchmark.pl
+++ b/benchmark.pl
@@ -3,16 +3,19 @@ use strict;
use warnings;
use Time::HiRes qw(gettimeofday tv_interval);
-# Enable autoflush for live status updates
$| = 1;
+# Args: size_kb [count1 count2 ...]
+my $size_kb = shift @ARGV;
+if (!$size_kb) {
+ die "Usage: $0 <size_kb> [count1 count2 ...]\n";
+}
+
my @test_counts = @ARGV ? @ARGV : (500, 1000, 10000);
my $report_file = "result.txt";
-# Open report file for writing
open(my $rfh, '>', $report_file) or die "Could not open $report_file: $!";
-# Configuration
my $seed_script = "./seed.sh";
my $indexer = "./indexer.pl";
my $sa_cgi = "./find_sa.cgi";
@@ -20,57 +23,55 @@ my $regex_cgi = "./find_regex.cgi";
my $header = "=============================================================\n"
. "SEARCH BENCHMARK: Suffix array vs. Linear regex\n"
- . "ARTICLE SIZE: 8 KB\n"
+ . "ARTICLE SIZE: $size_kb KB\n"
. "=============================================================\n\n";
print $rfh $header;
-print $header;
foreach my $count (@test_counts) {
- my $search_query = "keyword_-1"; # Likely not in corpus
-
- # Progress tracking to STDOUT
- print "--> Processing batch: $count files\n";
-
- print " [1/4] Reseeding _site/log... ";
- system("rm -rf _site/log/*");
- system("$seed_script $count > /dev/null 2>&1");
- print "Done.\n";
-
- print " [2/4] Indexing (Suffix array)... ";
- unlink('sa.bin', 'corpus.bin', 'file_map.dat');
- my $idx_start = [gettimeofday];
- system("perl $indexer > /dev/null 2>&1");
- my $idx_time = tv_interval($idx_start);
- print "Done.\n";
-
- my $idx_size = 0;
- if (-f 'sa.bin' && -f 'corpus.bin') {
- $idx_size = ((-s 'sa.bin') + (-s 'corpus.bin')) / 1024;
- }
-
- print " [3/4] Testing SA search... ";
- my $sa_out = `QUERY_STRING="q=$search_query" perl $sa_cgi`;
- my ($sa_time, $sa_ram) = parse_metrics($sa_out);
- print "Done.\n";
-
- print " [4/4] Testing regex search... ";
- my $reg_out = `QUERY_STRING="q=$search_query" perl $regex_cgi`;
- my ($reg_time, $reg_ram) = parse_metrics($reg_out);
- print "Done.\n\n";
-
- # Format the table for result.txt
- my $table = sprintf("%d files (Targeting: %s):\n", $count, $search_query);
- $table .= "----------------+----------------------+---------------------\n";
- $table .= sprintf("%-15s | %-20s | %-20s\n", "METRIC", "SA", "REGEX");
- $table .= "----------------+----------------------+---------------------\n";
- $table .= sprintf("%-15s | %-20s | %-20s\n", "Search time", sprintf("%.4fs", $sa_time), sprintf("%.4fs", $reg_time));
- $table .= sprintf("%-15s | %-20s | %-20s\n", "Peak RAM", sprintf("%d KB", $sa_ram), sprintf("%d KB", $reg_ram));
- $table .= sprintf("%-15s | %-20s | %-20s\n", "Indexing time", sprintf("%.4fs", $idx_time), "N/A");
- $table .= sprintf("%-15s | %-20s | %-20s\n", "Index size", sprintf("%.2f KB", $idx_size), "N/A");
- $table .= "----------------+----------------------+---------------------\n\n";
-
- print $rfh $table;
+ my $search_query = "keyword_-1";
+
+ print "--> Processing batch: $count files\n";
+
+ print " [1/4] Reseeding _site/log... ";
+ system("rm -rf _site/log/*");
+ # Passing size first, then count to match seed.sh logic
+ system("$seed_script $size_kb $count > /dev/null 2>&1");
+ print "Done.\n";
+
+ print " [2/4] Indexing (Suffix array)... ";
+ unlink('sa.bin', 'corpus.bin', 'file_map.dat');
+ my $idx_start = [gettimeofday];
+ system("perl $indexer > /dev/null 2>&1");
+ my $idx_time = tv_interval($idx_start);
+ print "Done.\n";
+
+ my $idx_size = 0;
+ if (-f 'sa.bin' && -f 'corpus.bin') {
+ $idx_size = ((-s 'sa.bin') + (-s 'corpus.bin')) / 1024;
+ }
+
+ print " [3/4] Testing SA search... ";
+ my $sa_out = `QUERY_STRING="q=$search_query" perl $sa_cgi`;
+ my ($sa_time, $sa_ram) = parse_metrics($sa_out);
+ print "Done.\n";
+
+ print " [4/4] Testing regex search... ";
+ my $reg_out = `QUERY_STRING="q=$search_query" perl $regex_cgi`;
+ my ($reg_time, $reg_ram) = parse_metrics($reg_out);
+ print "Done.\n\n";
+
+ my $table = sprintf("%d files (Targeting: %s):\n", $count, $search_query);
+ $table .= "----------------+----------------------+---------------------\n";
+ $table .= sprintf("%-15s | %-20s | %-20s\n", "METRIC", "SA", "REGEX");
+ $table .= "----------------+----------------------+---------------------\n";
+ $table .= sprintf("%-15s | %-20s | %-20s\n", "Search time", sprintf("%.4fs", $sa_time), sprintf("%.4fs", $reg_time));
+ $table .= sprintf("%-15s | %-20s | %-20s\n", "Peak RAM", sprintf("%d KB", $sa_ram), sprintf("%d KB", $reg_ram));
+ $table .= sprintf("%-15s | %-20s | %-20s\n", "Indexing time", sprintf("%.4fs", $idx_time), "N/A");
+ $table .= sprintf("%-15s | %-20s | %-20s\n", "Index size", sprintf("%.2f KB", $idx_size), "N/A");
+ $table .= "----------------+----------------------+---------------------\n\n";
+
+ print $rfh $table;
}
close $rfh;
@@ -80,8 +81,8 @@ my $pager = $ENV{PAGER} || 'more';
system("$pager $report_file");
sub parse_metrics {
- my $text = shift || "";
- my $time = ($text =~ /Total Time:\s+([\d.]+)/) ? $1 : 0;
- my $ram = ($text =~ /Peak RAM:\s+(\d+)/) ? $1 : 0;
- return ($time, $ram);
+ my $text = shift || "";
+ my $time = ($text =~ /Total Time:\s+([\d.]+)/) ? $1 : 0;
+ my $ram = ($text =~ /Peak RAM:\s+(\d+)/) ? $1 : 0;
+ return ($time, $ram);
}