diff options
Diffstat (limited to 'benchmark.pl')
| -rwxr-xr-x | benchmark.pl | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/benchmark.pl b/benchmark.pl new file mode 100755 index 0000000..8c1b4ea --- /dev/null +++ b/benchmark.pl @@ -0,0 +1,86 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Time::HiRes qw(gettimeofday tv_interval); + +# 1. Accept directory counts from @ARGV, or use defaults +my @test_counts = @ARGV ? @ARGV : (500, 1000, 10000); + +# Configuration - All scripts are now in the root +my $seed_script = "./seed.sh"; +my $indexer = "./indexer.pl"; +my $sa_cgi = "./find_sa.cgi"; +my $regex_cgi = "./find_regex.cgi"; + +print "=============================================================\n"; +print "SEARCH BENCHMARK: Suffix array vs. Linear regex\n"; +print "ARTICLE SIZE: 16 KB\n"; +print "=============================================================\n\n"; + +foreach my $count (@test_counts) { + my $search_query = "keyword_$count"; + + print "$count files (Targeting: $search_query):\n"; + print "-------------------------------------------------------------\n"; + print sprintf("%-15s | %-20s | %-20s\n", "METRIC", "SA", "REGEX"); + print "----------------+----------------------+---------------------\n"; + + # 1. Seed + system("$seed_script $count > /dev/null 2>&1"); + + # 2. Cleanup old index files + unlink('sa.bin', 'corpus.bin', 'file_map.dat'); + + # 3. Indexing + my $idx_start = [gettimeofday]; + system("perl $indexer > /dev/null 2>&1"); + my $idx_time = tv_interval($idx_start); + + my $idx_size = 0; + if (-f 'sa.bin' && -f 'corpus.bin') { + $idx_size = ((-s 'sa.bin') + (-s 'corpus.bin')) / 1024; + } + + # 4. SA Search + my $sa_out = `QUERY_STRING="q=$search_query" perl $sa_cgi`; + my ($sa_time, $sa_ram) = parse_metrics($sa_out); + + # 5. Regex Search + my $reg_out = `QUERY_STRING="q=$search_query" perl $regex_cgi`; + my ($reg_time, $reg_ram) = parse_metrics($reg_out); + + # 6. Final Output Table + print sprintf("%-15s | %-20s | %-20s\n", + "Search time", + sprintf("%.4fs", $sa_time), + sprintf("%.4fs", $reg_time) + ); + + print sprintf("%-15s | %-20s | %-20s\n", + "Peak RAM", + sprintf("%d KB", $sa_ram), + sprintf("%d KB", $reg_ram) + ); + + print sprintf("%-15s | %-20s | %-20s\n", + "Indexing time", + sprintf("%.4fs", $idx_time), + "N/A" + ); + + print sprintf("%-15s | %-20s | %-20s\n", + "Index size", + sprintf("%.2f KB", $idx_size), + "N/A" + ); + + print "----------------+----------------------+---------------------\n\n"; +} + +sub parse_metrics { + my $text = shift || ""; + my $time = ($text =~ /Total Time:\s+([\d.]+)/) ? $1 : 0; + my $ram = ($text =~ /Peak RAM:\s+(\d+)/) ? $1 : 0; + return ($time, $ram); +} + |
