summaryrefslogtreecommitdiffstats
path: root/find_regex.cgi
diff options
context:
space:
mode:
authorSadeep Madurange <sadeep@asciimx.com>2026-05-06 19:42:33 +0800
committerSadeep Madurange <sadeep@asciimx.com>2026-05-06 19:42:33 +0800
commit819bf74c2841fabdcc481e12e13615d48a92cb7f (patch)
tree00ba8c9105a96d88536f50f8ef96e838c04408e3 /find_regex.cgi
parenteddb76ad8c6e850c7e24f97ff27a185d48b104ee (diff)
downloadsite-search-bm-819bf74c2841fabdcc481e12e13615d48a92cb7f.tar.gz
Change directory structure and add benchmark runner.
Diffstat (limited to 'find_regex.cgi')
-rwxr-xr-xfind_regex.cgi144
1 files changed, 144 insertions, 0 deletions
diff --git a/find_regex.cgi b/find_regex.cgi
new file mode 100755
index 0000000..d826c12
--- /dev/null
+++ b/find_regex.cgi
@@ -0,0 +1,144 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use File::Find;
+use Time::HiRes qw(gettimeofday tv_interval);
+use BSD::Resource;
+use Encode qw(decode_utf8);
+
+# 1. Start Benchmark Timer
+my $start_time = [gettimeofday];
+
+# Helper to keep HTML output safe
+sub escape_html {
+ my $str = shift;
+ return "" unless defined $str;
+ $str =~ s/&/&amp;/g;
+ $str =~ s/</&lt;/g;
+ $str =~ s/>/&gt;/g;
+ $str =~ s/"/&quot;/g;
+ $str =~ s/'/&#39;/g;
+ return $str;
+}
+
+# Parse Query String (q=keyword)
+my %params;
+if ($ENV{QUERY_STRING}) {
+ foreach my $pair (split /&/, $ENV{QUERY_STRING}) {
+ my ($key, $value) = split /=/, $pair;
+ $value //= '';
+ $value =~ tr/+/ /;
+ $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
+ $params{$key} = decode_utf8($value);
+ }
+}
+
+my $search_text = $params{'q'} || '';
+$search_text = substr($search_text, 0, 64);
+$search_text =~ s/[^a-zA-Z0-9 ]//g;
+
+# Configuration
+my $directory = '_site/log/';
+my @results;
+my $files_read = 0;
+
+# 2. The Linear Search (Crawl)
+if ($search_text =~ /\S/) {
+ find({
+ wanted => sub {
+ # Only look at index.html files inside the subdirectories
+ return unless -f $_ && $_ eq 'index.html';
+
+ if (open my $fh, '<', $_) {
+ $files_read++;
+ # Slurp the entire file (approx 16KB per your seed script)
+ my $content = do { local $/; <$fh> };
+ close $fh;
+
+ # Regex match (Case Insensitive)
+ if ($content =~ /\Q$search_text\E/i) {
+ my ($title) = $content =~ /<title>(.*?)<\/title>/is;
+ my ($p_content) = $content =~ /<p[^>]*>(.*?)<\/p>/is;
+
+ # Clean up snippet
+ my $snippet = $p_content || "";
+ $snippet =~ s/<[^>]*>//g; # Strip internal tags
+ $snippet =~ s/\s+/ /g;
+ $snippet = substr($snippet, 0, 100);
+
+ push @results, {
+ path => $File::Find::name,
+ title => $title || $File::Find::name,
+ snippet => $snippet . "..."
+ };
+ }
+ }
+ # Stop collecting after 20 results for display,
+ # but the benchmark usually looks for unique keywords
+ # where only 1 result exists.
+ },
+ no_chdir => 0,
+ }, $directory);
+}
+
+# 3. Calculate Performance Metrics
+my $end_time = [gettimeofday];
+my $elapsed = tv_interval($start_time, $end_time);
+
+my $rusage = getrusage();
+my $user_cpu = $rusage->utime;
+my $system_cpu = $rusage->stime;
+my $max_rss = $rusage->maxrss;
+
+# 4. Generate Output
+print "Content-Type: text/html\n\n";
+
+my $list_html = "";
+if ($search_text eq '') {
+ $list_html = "<p>Please enter a search term.</p>";
+} elsif (@results == 0) {
+ $list_html = "<p>No results found for \"<b>" . escape_html($search_text) . "</b>\".</p>";
+} else {
+ $list_html = "<ul>";
+ foreach my $res (@results) {
+ $list_html .= sprintf('<li><a href="/%s">%s</a><br><small>%s</small></li>',
+ $res->{path}, escape_html($res->{title}), escape_html($res->{snippet}));
+ }
+ $list_html .= "</ul>";
+}
+
+my $safe_q = escape_html($search_text);
+
+print <<"HTML";
+<!DOCTYPE html>
+<html>
+<head>
+ <meta charset="utf-8">
+ <title>Regex Search Results</title>
+ <style>
+ body { font-family: sans-serif; line-height: 1.5; padding: 20px; }
+ .stats { background: #f4f4f4; padding: 15px; border-radius: 5px;
+ font-family: monospace; font-size: 0.9em; border: 1px solid #ddd; margin-top: 20px; }
+ </style>
+</head>
+<body>
+ <h2>Regex Search (Linear Crawl)</h2>
+ <form method="GET">
+ <input type="text" name="q" value="$safe_q">
+ <input type="submit" value="Search">
+ </form>
+
+ $list_html
+
+ <div class="stats">
+ <strong>Performance Metrics:</strong><br>
+ Total Time: @{[ sprintf("%.4f", $elapsed) ]} seconds<br>
+ User CPU: $user_cpu s<br>
+ System CPU: $system_cpu s<br>
+ Peak RAM: $max_rss KB<br>
+ Files Read: $files_read (IO Activity)
+ </div>
+</body>
+</html>
+HTML