summaryrefslogtreecommitdiffstats
path: root/benchmark.pl
diff options
context:
space:
mode:
Diffstat (limited to 'benchmark.pl')
-rwxr-xr-xbenchmark.pl86
1 files changed, 86 insertions, 0 deletions
diff --git a/benchmark.pl b/benchmark.pl
new file mode 100755
index 0000000..8c1b4ea
--- /dev/null
+++ b/benchmark.pl
@@ -0,0 +1,86 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use Time::HiRes qw(gettimeofday tv_interval);
+
+# 1. Accept directory counts from @ARGV, or use defaults
+my @test_counts = @ARGV ? @ARGV : (500, 1000, 10000);
+
+# Configuration - All scripts are now in the root
+my $seed_script = "./seed.sh";
+my $indexer = "./indexer.pl";
+my $sa_cgi = "./find_sa.cgi";
+my $regex_cgi = "./find_regex.cgi";
+
+print "=============================================================\n";
+print "SEARCH BENCHMARK: Suffix array vs. Linear regex\n";
+print "ARTICLE SIZE: 16 KB\n";
+print "=============================================================\n\n";
+
+foreach my $count (@test_counts) {
+ my $search_query = "keyword_$count";
+
+ print "$count files (Targeting: $search_query):\n";
+ print "-------------------------------------------------------------\n";
+ print sprintf("%-15s | %-20s | %-20s\n", "METRIC", "SA", "REGEX");
+ print "----------------+----------------------+---------------------\n";
+
+ # 1. Seed
+ system("$seed_script $count > /dev/null 2>&1");
+
+ # 2. Cleanup old index files
+ unlink('sa.bin', 'corpus.bin', 'file_map.dat');
+
+ # 3. Indexing
+ my $idx_start = [gettimeofday];
+ system("perl $indexer > /dev/null 2>&1");
+ my $idx_time = tv_interval($idx_start);
+
+ my $idx_size = 0;
+ if (-f 'sa.bin' && -f 'corpus.bin') {
+ $idx_size = ((-s 'sa.bin') + (-s 'corpus.bin')) / 1024;
+ }
+
+ # 4. SA Search
+ my $sa_out = `QUERY_STRING="q=$search_query" perl $sa_cgi`;
+ my ($sa_time, $sa_ram) = parse_metrics($sa_out);
+
+ # 5. Regex Search
+ my $reg_out = `QUERY_STRING="q=$search_query" perl $regex_cgi`;
+ my ($reg_time, $reg_ram) = parse_metrics($reg_out);
+
+ # 6. Final Output Table
+ print sprintf("%-15s | %-20s | %-20s\n",
+ "Search time",
+ sprintf("%.4fs", $sa_time),
+ sprintf("%.4fs", $reg_time)
+ );
+
+ print sprintf("%-15s | %-20s | %-20s\n",
+ "Peak RAM",
+ sprintf("%d KB", $sa_ram),
+ sprintf("%d KB", $reg_ram)
+ );
+
+ print sprintf("%-15s | %-20s | %-20s\n",
+ "Indexing time",
+ sprintf("%.4fs", $idx_time),
+ "N/A"
+ );
+
+ print sprintf("%-15s | %-20s | %-20s\n",
+ "Index size",
+ sprintf("%.2f KB", $idx_size),
+ "N/A"
+ );
+
+ print "----------------+----------------------+---------------------\n\n";
+}
+
+sub parse_metrics {
+ my $text = shift || "";
+ my $time = ($text =~ /Total Time:\s+([\d.]+)/) ? $1 : 0;
+ my $ram = ($text =~ /Peak RAM:\s+(\d+)/) ? $1 : 0;
+ return ($time, $ram);
+}
+