4 files changed, 106 insertions, 80 deletions
diff --git a/README.txt b/README.txt
index 0bcf66a..bc5b4f6 100644
--- a/README.txt
+++ b/README.txt
@@ -1,11 +1,3 @@
-HOW TO USE
-
-  1. Copy _site/cgi-bin/see.sh to _site/log/
-  2. Set the directory counta (TOTAL)
-  3. Run script ./seed.sh
-  4. In _site/cgi_bin/ execute indexer script (e.g., perl sa_indexer.pl) 
-  5. Run search query: QUERY_STRING="q=ard" perl find_sa_mmap.cgi
-
 Directory structure:
 
 .
@@ -19,5 +11,4 @@ Directory structure:
 |   `-- log/              (Created by seed.sh)
 
 chmod +x seed.sh benchmark.pl _site/cgi-bin/*.cgi _site/cgi-bin/*.pl
-
-
+perl benchmark.pl 3 10 50
diff --git a/benchmark.pl b/benchmark.pl
index 8c1b4ea..3d0d855 100755
--- a/benchmark.pl
+++ b/benchmark.pl
@@ -3,84 +3,85 @@ use strict;
 use warnings;
 use Time::HiRes qw(gettimeofday tv_interval);
 
-# 1. Accept directory counts from @ARGV, or use defaults
+# Enable autoflush for live status updates
+$| = 1; 
+
 my @test_counts = @ARGV ? @ARGV : (500, 1000, 10000);
+my $report_file = "result.txt";
+
+# Open report file for writing
+open(my $rfh, '>', $report_file) or die "Could not open $report_file: $!";
 
-# Configuration - All scripts are now in the root
+# Configuration
 my $seed_script  = "./seed.sh";
 my $indexer      = "./indexer.pl";
 my $sa_cgi       = "./find_sa.cgi";
 my $regex_cgi    = "./find_regex.cgi";
 
-print "=============================================================\n";
-print "SEARCH BENCHMARK: Suffix array vs. Linear regex\n";
-print "ARTICLE SIZE: 16 KB\n";
-print "=============================================================\n\n";
+my $header = "=============================================================\n"
+           . "SEARCH BENCHMARK: Suffix array vs. Linear regex\n"
+           . "ARTICLE SIZE: 8 KB\n"
+           . "=============================================================\n\n";
+
+print $rfh $header;
+print $header;
 
 foreach my $count (@test_counts) {
-	my $search_query = "keyword_$count"; 
-
-	print "$count files (Targeting: $search_query):\n";
-	print "-------------------------------------------------------------\n";
-	print sprintf("%-15s | %-20s | %-20s\n", "METRIC", "SA", "REGEX");
-	print "----------------+----------------------+---------------------\n";
-
-	# 1. Seed
-	system("$seed_script $count > /dev/null 2>&1");
-
-	# 2. Cleanup old index files
-	unlink('sa.bin', 'corpus.bin', 'file_map.dat');
-
-	# 3. Indexing
-	my $idx_start = [gettimeofday];
-	system("perl $indexer > /dev/null 2>&1");
-	my $idx_time = tv_interval($idx_start);
-	
-	my $idx_size = 0;
-	if (-f 'sa.bin' && -f 'corpus.bin') {
-		$idx_size = ((-s 'sa.bin') + (-s 'corpus.bin')) / 1024; 
-	}
-
-	# 4. SA Search
-	my $sa_out = `QUERY_STRING="q=$search_query" perl $sa_cgi`;
-	my ($sa_time, $sa_ram) = parse_metrics($sa_out);
-
-	# 5. Regex Search
-	my $reg_out = `QUERY_STRING="q=$search_query" perl $regex_cgi`;
-	my ($reg_time, $reg_ram) = parse_metrics($reg_out);
-
-	# 6. Final Output Table
-	print sprintf("%-15s | %-20s | %-20s\n", 
-		"Search time", 
-		sprintf("%.4fs", $sa_time), 
-		sprintf("%.4fs", $reg_time)
-	);
-
-	print sprintf("%-15s | %-20s | %-20s\n", 
-		"Peak RAM", 
-		sprintf("%d KB", $sa_ram), 
-		sprintf("%d KB", $reg_ram)
-	);
-
-	print sprintf("%-15s | %-20s | %-20s\n", 
-		"Indexing time", 
-		sprintf("%.4fs", $idx_time), 
-		"N/A"
-	);
-
-	print sprintf("%-15s | %-20s | %-20s\n", 
-		"Index size", 
-		sprintf("%.2f KB", $idx_size), 
-		"N/A"
-	);
-
-	print "----------------+----------------------+---------------------\n\n";
+    my $search_query = "keyword_-1"; # Likely not in corpus
+
+    # Progress tracking to STDOUT
+    print "--> Processing batch: $count files\n";
+    
+    print "    [1/4] Reseeding _site/log... ";
+    system("rm -rf _site/log/*"); 
+    system("$seed_script $count > /dev/null 2>&1");
+    print "Done.\n";
+
+    print "    [2/4] Indexing (Suffix array)... ";
+    unlink('sa.bin', 'corpus.bin', 'file_map.dat');
+    my $idx_start = [gettimeofday];
+    system("perl $indexer > /dev/null 2>&1");
+    my $idx_time = tv_interval($idx_start);
+    print "Done.\n";
+    
+    my $idx_size = 0;
+    if (-f 'sa.bin' && -f 'corpus.bin') {
+        $idx_size = ((-s 'sa.bin') + (-s 'corpus.bin')) / 1024; 
+    }
+
+    print "    [3/4] Testing SA search... ";
+    my $sa_out = `QUERY_STRING="q=$search_query" perl $sa_cgi`;
+    my ($sa_time, $sa_ram) = parse_metrics($sa_out);
+    print "Done.\n";
+
+    print "    [4/4] Testing regex search... ";
+    my $reg_out = `QUERY_STRING="q=$search_query" perl $regex_cgi`;
+    my ($reg_time, $reg_ram) = parse_metrics($reg_out);
+    print "Done.\n\n";
+
+    # Format the table for result.txt
+    my $table = sprintf("%d files (Targeting: %s):\n", $count, $search_query);
+    $table .= "----------------+----------------------+---------------------\n";
+    $table .= sprintf("%-15s | %-20s | %-20s\n", "METRIC", "SA", "REGEX");
+    $table .= "----------------+----------------------+---------------------\n";
+    $table .= sprintf("%-15s | %-20s | %-20s\n", "Search time", sprintf("%.4fs", $sa_time), sprintf("%.4fs", $reg_time));
+    $table .= sprintf("%-15s | %-20s | %-20s\n", "Peak RAM", sprintf("%d KB", $sa_ram), sprintf("%d KB", $reg_ram));
+    $table .= sprintf("%-15s | %-20s | %-20s\n", "Indexing time", sprintf("%.4fs", $idx_time), "N/A");
+    $table .= sprintf("%-15s | %-20s | %-20s\n", "Index size", sprintf("%.2f KB", $idx_size), "N/A");
+    $table .= "----------------+----------------------+---------------------\n\n";
+
+    print $rfh $table;
 }
 
+close $rfh;
+print "All tests finished. Results written to $report_file.\n\n";
+
+my $pager = $ENV{PAGER} || 'more';
+system("$pager $report_file");
+
 sub parse_metrics {
-	my $text = shift || "";
-	my $time = ($text =~ /Total Time:\s+([\d.]+)/) ? $1 : 0;
-	my $ram  = ($text =~ /Peak RAM:\s+(\d+)/) ? $1 : 0;
-	return ($time, $ram);
+    my $text = shift || "";
+    my $time = ($text =~ /Total Time:\s+([\d.]+)/) ? $1 : 0;
+    my $ram  = ($text =~ /Peak RAM:\s+(\d+)/) ? $1 : 0;
+    return ($time, $ram);
 }
-
diff --git a/result.txt b/result.txt
new file mode 100644
index 0000000..b9407ce
--- /dev/null
+++ b/result.txt
@@ -0,0 +1,35 @@
+=============================================================
+SEARCH BENCHMARK: Suffix array vs. Linear regex
+ARTICLE SIZE: 8 KB
+=============================================================
+
+500 files (Targeting: keyword_-1):
+----------------+----------------------+---------------------
+METRIC          | SA                   | REGEX               
+----------------+----------------------+---------------------
+Search time     | 0.0014s              | 0.0451s             
+Peak RAM        | 8124 KB              | 9612 KB             
+Indexing time   | 18.1865s             | N/A                 
+Index size      | 19610.39 KB          | N/A                 
+----------------+----------------------+---------------------
+
+1000 files (Targeting: keyword_-1):
+----------------+----------------------+---------------------
+METRIC          | SA                   | REGEX               
+----------------+----------------------+---------------------
+Search time     | 0.0021s              | 0.0918s             
+Peak RAM        | 8280 KB              | 9960 KB             
+Indexing time   | 43.1748s             | N/A                 
+Index size      | 39225.06 KB          | N/A                 
+----------------+----------------------+---------------------
+
+10000 files (Targeting: keyword_-1):
+----------------+----------------------+---------------------
+METRIC          | SA                   | REGEX               
+----------------+----------------------+---------------------
+Search time     | 0.0173s              | 1.1275s             
+Peak RAM        | 11848 KB             | 13392 KB            
+Indexing time   | 663.3909s            | N/A                 
+Index size      | 392263.01 KB         | N/A                 
+----------------+----------------------+---------------------
+
diff --git a/seed.sh b/seed.sh
index f6f817d..bdfe71e 100755
--- a/seed.sh
+++ b/seed.sh
@@ -7,8 +7,7 @@ TOTAL=${1:-500}
 SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
 BASE_DIR="$SCRIPT_DIR/_site/log"
 
-# Target size: 16000 bytes is ~15.6 KB
-CONTENT_SIZE=16000 
+CONTENT_SIZE=8000 
 
 # Ensure the target directory exists
 mkdir -p "$BASE_DIR"