1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
#!/usr/bin/perl
use strict;
use warnings;
use Time::HiRes qw(gettimeofday tv_interval);
# 1. Accept directory counts from @ARGV, or use defaults
my @test_counts = @ARGV ? @ARGV : (500, 1000, 10000);
# Configuration - All scripts are now in the root
my $seed_script = "./seed.sh";
my $indexer = "./indexer.pl";
my $sa_cgi = "./find_sa.cgi";
my $regex_cgi = "./find_regex.cgi";
print "=============================================================\n";
print "SEARCH BENCHMARK: Suffix array vs. Linear regex\n";
print "ARTICLE SIZE: 16 KB\n";
print "=============================================================\n\n";
foreach my $count (@test_counts) {
my $search_query = "keyword_$count";
print "$count files (Targeting: $search_query):\n";
print "-------------------------------------------------------------\n";
print sprintf("%-15s | %-20s | %-20s\n", "METRIC", "SA", "REGEX");
print "----------------+----------------------+---------------------\n";
# 1. Seed
system("$seed_script $count > /dev/null 2>&1");
# 2. Cleanup old index files
unlink('sa.bin', 'corpus.bin', 'file_map.dat');
# 3. Indexing
my $idx_start = [gettimeofday];
system("perl $indexer > /dev/null 2>&1");
my $idx_time = tv_interval($idx_start);
my $idx_size = 0;
if (-f 'sa.bin' && -f 'corpus.bin') {
$idx_size = ((-s 'sa.bin') + (-s 'corpus.bin')) / 1024;
}
# 4. SA Search
my $sa_out = `QUERY_STRING="q=$search_query" perl $sa_cgi`;
my ($sa_time, $sa_ram) = parse_metrics($sa_out);
# 5. Regex Search
my $reg_out = `QUERY_STRING="q=$search_query" perl $regex_cgi`;
my ($reg_time, $reg_ram) = parse_metrics($reg_out);
# 6. Final Output Table
print sprintf("%-15s | %-20s | %-20s\n",
"Search time",
sprintf("%.4fs", $sa_time),
sprintf("%.4fs", $reg_time)
);
print sprintf("%-15s | %-20s | %-20s\n",
"Peak RAM",
sprintf("%d KB", $sa_ram),
sprintf("%d KB", $reg_ram)
);
print sprintf("%-15s | %-20s | %-20s\n",
"Indexing time",
sprintf("%.4fs", $idx_time),
"N/A"
);
print sprintf("%-15s | %-20s | %-20s\n",
"Index size",
sprintf("%.2f KB", $idx_size),
"N/A"
);
print "----------------+----------------------+---------------------\n\n";
}
sub parse_metrics {
my $text = shift || "";
my $time = ($text =~ /Total Time:\s+([\d.]+)/) ? $1 : 0;
my $ram = ($text =~ /Peak RAM:\s+(\d+)/) ? $1 : 0;
return ($time, $ram);
}
|