#!/usr/bin/perl use strict; use warnings; use File::Find; use Time::HiRes qw(gettimeofday tv_interval); use BSD::Resource; use Encode qw(decode_utf8); # 1. Start Benchmark Timer my $start_time = [gettimeofday]; # Helper to keep HTML output safe sub escape_html { my $str = shift; return "" unless defined $str; $str =~ s/&/&/g; $str =~ s//>/g; $str =~ s/"/"/g; $str =~ s/'/'/g; return $str; } # Parse Query String (q=keyword) my %params; if ($ENV{QUERY_STRING}) { foreach my $pair (split /&/, $ENV{QUERY_STRING}) { my ($key, $value) = split /=/, $pair; $value //= ''; $value =~ tr/+/ /; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; $params{$key} = decode_utf8($value); } } my $search_text = $params{'q'} || ''; $search_text = substr($search_text, 0, 64); $search_text =~ s/[^a-zA-Z0-9 ]//g; # Configuration my $directory = '_site/log/'; my @results; my $files_read = 0; # 2. The Linear Search (Crawl) if ($search_text =~ /\S/) { find({ wanted => sub { # Only look at index.html files inside the subdirectories return unless -f $_ && $_ eq 'index.html'; if (open my $fh, '<', $_) { $files_read++; # Slurp the entire file (approx 16KB per your seed script) my $content = do { local $/; <$fh> }; close $fh; # Regex match (Case Insensitive) if ($content =~ /\Q$search_text\E/i) { my ($title) = $content =~ /(.*?)<\/title>/is; my ($p_content) = $content =~ /<p[^>]*>(.*?)<\/p>/is; # Clean up snippet my $snippet = $p_content || ""; $snippet =~ s/<[^>]*>//g; # Strip internal tags $snippet =~ s/\s+/ /g; $snippet = substr($snippet, 0, 100); push @results, { path => $File::Find::name, title => $title || $File::Find::name, snippet => $snippet . "..." }; } } # Stop collecting after 20 results for display, # but the benchmark usually looks for unique keywords # where only 1 result exists. }, no_chdir => 0, }, $directory); } # 3. Calculate Performance Metrics my $end_time = [gettimeofday]; my $elapsed = tv_interval($start_time, $end_time); my $rusage = getrusage(); my $user_cpu = $rusage->utime; my $system_cpu = $rusage->stime; my $max_rss = $rusage->maxrss; # 4. Generate Output print "Content-Type: text/html\n\n"; my $list_html = ""; if ($search_text eq '') { $list_html = "<p>Please enter a search term.</p>"; } elsif (@results == 0) { $list_html = "<p>No results found for \"<b>" . escape_html($search_text) . "</b>\".</p>"; } else { $list_html = "<ul>"; foreach my $res (@results) { $list_html .= sprintf('<li><a href="/%s">%s</a><br><small>%s</small></li>', $res->{path}, escape_html($res->{title}), escape_html($res->{snippet})); } $list_html .= "</ul>"; } my $safe_q = escape_html($search_text); print <<"HTML"; <!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title>Regex Search Results

Regex Search (Linear Crawl)

$list_html
Performance Metrics:
Total Time: @{[ sprintf("%.4f", $elapsed) ]} seconds
User CPU: $user_cpu s
System CPU: $system_cpu s
Peak RAM: $max_rss KB
Files Read: $files_read (IO Activity)
HTML