#!/usr/bin/perl use strict; use warnings; use Time::HiRes qw(gettimeofday tv_interval); # Enable autoflush for live status updates $| = 1; my @test_counts = @ARGV ? @ARGV : (500, 1000, 10000); my $report_file = "result.txt"; # Open report file for writing open(my $rfh, '>', $report_file) or die "Could not open $report_file: $!"; # Configuration my $seed_script = "./seed.sh"; my $indexer = "./indexer.pl"; my $sa_cgi = "./find_sa.cgi"; my $regex_cgi = "./find_regex.cgi"; my $header = "=============================================================\n" . "SEARCH BENCHMARK: Suffix array vs. Linear regex\n" . "ARTICLE SIZE: 8 KB\n" . "=============================================================\n\n"; print $rfh $header; print $header; foreach my $count (@test_counts) { my $search_query = "keyword_-1"; # Likely not in corpus # Progress tracking to STDOUT print "--> Processing batch: $count files\n"; print " [1/4] Reseeding _site/log... "; system("rm -rf _site/log/*"); system("$seed_script $count > /dev/null 2>&1"); print "Done.\n"; print " [2/4] Indexing (Suffix array)... "; unlink('sa.bin', 'corpus.bin', 'file_map.dat'); my $idx_start = [gettimeofday]; system("perl $indexer > /dev/null 2>&1"); my $idx_time = tv_interval($idx_start); print "Done.\n"; my $idx_size = 0; if (-f 'sa.bin' && -f 'corpus.bin') { $idx_size = ((-s 'sa.bin') + (-s 'corpus.bin')) / 1024; } print " [3/4] Testing SA search... "; my $sa_out = `QUERY_STRING="q=$search_query" perl $sa_cgi`; my ($sa_time, $sa_ram) = parse_metrics($sa_out); print "Done.\n"; print " [4/4] Testing regex search... "; my $reg_out = `QUERY_STRING="q=$search_query" perl $regex_cgi`; my ($reg_time, $reg_ram) = parse_metrics($reg_out); print "Done.\n\n"; # Format the table for result.txt my $table = sprintf("%d files (Targeting: %s):\n", $count, $search_query); $table .= "----------------+----------------------+---------------------\n"; $table .= sprintf("%-15s | %-20s | %-20s\n", "METRIC", "SA", "REGEX"); $table .= "----------------+----------------------+---------------------\n"; $table .= sprintf("%-15s | %-20s | %-20s\n", "Search time", sprintf("%.4fs", $sa_time), sprintf("%.4fs", $reg_time)); $table .= sprintf("%-15s | %-20s | %-20s\n", "Peak RAM", sprintf("%d KB", $sa_ram), sprintf("%d KB", $reg_ram)); $table .= sprintf("%-15s | %-20s | %-20s\n", "Indexing time", sprintf("%.4fs", $idx_time), "N/A"); $table .= sprintf("%-15s | %-20s | %-20s\n", "Index size", sprintf("%.2f KB", $idx_size), "N/A"); $table .= "----------------+----------------------+---------------------\n\n"; print $rfh $table; } close $rfh; print "All tests finished. Results written to $report_file.\n\n"; my $pager = $ENV{PAGER} || 'more'; system("$pager $report_file"); sub parse_metrics { my $text = shift || ""; my $time = ($text =~ /Total Time:\s+([\d.]+)/) ? $1 : 0; my $ram = ($text =~ /Peak RAM:\s+(\d+)/) ? $1 : 0; return ($time, $ram); }