summaryrefslogtreecommitdiffstats
path: root/benchmark.pl
blob: 8c1b4eafc781e10ab94a52de23070606979c422d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/perl
use strict;
use warnings;
use Time::HiRes qw(gettimeofday tv_interval);

# 1. Accept directory counts from @ARGV, or use defaults
my @test_counts = @ARGV ? @ARGV : (500, 1000, 10000);

# Configuration - All scripts are now in the root
my $seed_script  = "./seed.sh";
my $indexer      = "./indexer.pl";
my $sa_cgi       = "./find_sa.cgi";
my $regex_cgi    = "./find_regex.cgi";

print "=============================================================\n";
print "SEARCH BENCHMARK: Suffix array vs. Linear regex\n";
print "ARTICLE SIZE: 16 KB\n";
print "=============================================================\n\n";

foreach my $count (@test_counts) {
	my $search_query = "keyword_$count"; 

	print "$count files (Targeting: $search_query):\n";
	print "-------------------------------------------------------------\n";
	print sprintf("%-15s | %-20s | %-20s\n", "METRIC", "SA", "REGEX");
	print "----------------+----------------------+---------------------\n";

	# 1. Seed
	system("$seed_script $count > /dev/null 2>&1");

	# 2. Cleanup old index files
	unlink('sa.bin', 'corpus.bin', 'file_map.dat');

	# 3. Indexing
	my $idx_start = [gettimeofday];
	system("perl $indexer > /dev/null 2>&1");
	my $idx_time = tv_interval($idx_start);
	
	my $idx_size = 0;
	if (-f 'sa.bin' && -f 'corpus.bin') {
		$idx_size = ((-s 'sa.bin') + (-s 'corpus.bin')) / 1024; 
	}

	# 4. SA Search
	my $sa_out = `QUERY_STRING="q=$search_query" perl $sa_cgi`;
	my ($sa_time, $sa_ram) = parse_metrics($sa_out);

	# 5. Regex Search
	my $reg_out = `QUERY_STRING="q=$search_query" perl $regex_cgi`;
	my ($reg_time, $reg_ram) = parse_metrics($reg_out);

	# 6. Final Output Table
	print sprintf("%-15s | %-20s | %-20s\n", 
		"Search time", 
		sprintf("%.4fs", $sa_time), 
		sprintf("%.4fs", $reg_time)
	);

	print sprintf("%-15s | %-20s | %-20s\n", 
		"Peak RAM", 
		sprintf("%d KB", $sa_ram), 
		sprintf("%d KB", $reg_ram)
	);

	print sprintf("%-15s | %-20s | %-20s\n", 
		"Indexing time", 
		sprintf("%.4fs", $idx_time), 
		"N/A"
	);

	print sprintf("%-15s | %-20s | %-20s\n", 
		"Index size", 
		sprintf("%.2f KB", $idx_size), 
		"N/A"
	);

	print "----------------+----------------------+---------------------\n\n";
}

sub parse_metrics {
	my $text = shift || "";
	my $time = ($text =~ /Total Time:\s+([\d.]+)/) ? $1 : 0;
	my $ram  = ($text =~ /Peak RAM:\s+(\d+)/) ? $1 : 0;
	return ($time, $ram);
}