#!/usr/bin/perl use strict; use warnings; use Storable qw(retrieve); use Encode qw(decode_utf8); use HTML::Escape qw(escape_html); use Time::HiRes qw(gettimeofday tv_interval); use BSD::Resource; # 1. Start Benchmark Timer my $start_time = [gettimeofday]; my $files_read = 0; # Track IO Activity # Configuration my $max_parallel = 100; my $lock_timeout = 30; my $max_results = 1000; my $min_query_len = 3; my $index_file = 'search_index.dat'; my $lock_dir = '/tmp/search_locks'; # Concurrency control mkdir $lock_dir, 0777 unless -d $lock_dir; my $active_count = 0; my $now = time(); opendir(my $dh, $lock_dir); while (my $file = readdir($dh)) { next unless $file =~ /\.lock$/; my $path = "$lock_dir/$file"; my $mtime = (stat($path))[9] || 0; ( $now - $mtime > $lock_timeout ) ? unlink($path) : $active_count++; } closedir($dh); # Too many search requests if ($active_count >= $max_parallel) { print "Content-Type: text/html\n\n"; render_html("

Server busy. Please try again in a few seconds.

", "", (localtime)[5]+1900); exit; } my $lock_file = "$lock_dir/$$.lock"; open(my $fh_lock, '>', $lock_file); $files_read++; # IO for lock creation my $search_text = ''; if (($ENV{QUERY_STRING} || '') =~ /^q=([^&]*)/) { $search_text = decode_utf8($1 // ""); $search_text =~ s/\P{Print}//g; $search_text = substr($search_text, 0, 64); $search_text =~ s/^\s+|\s+$//g; } my $safe_search_text = escape_html($search_text); my $year = (localtime)[5] + 1900; print "Content-Type: text/html\n\n"; if ($search_text eq '') { final_output("

Please enter a search term above.

"); } if (length($search_text) < $min_query_len) { final_output("

Search term is too short. Please enter at least $min_query_len characters.

"); } if (!-f $index_file) { final_output("

Search temporarily unavailable.

"); } # IO for index retrieval my $index = retrieve($index_file); $files_read++; my @results; my $found = 0; foreach my $url (sort keys %$index) { last if $found >= $max_results; my $data = $index->{$url}; next unless $data->{c} =~ /(.{0,40})(\Q$search_text\E)(.{0,40})/is; my ($before, $actual, $after) = ($1, $2, $3); $found++; $after =~ s/\s\S*$// if length($after) > 25; $before =~ s/^.*?\s// if length($before) > 25; $before = ($before =~ /\S/) ? ucfirst($before) : ""; $actual = ($before eq "") ? ucfirst($actual) : $actual; my $snippet = escape_html($before) . "" . escape_html($actual) . "" . escape_html($after) . "..."; push @results, { path => $url, title => escape_html($data->{t}), snippet => $snippet }; } my $list_html = ""; if (@results == 0) { $list_html = "

No results found for \"$safe_search_text\".

"; } else { $list_html = ""; } final_output($list_html); sub final_output { my ($content) = @_; # 2. Calculate Metrics just before rendering my $elapsed = tv_interval($start_time, [gettimeofday]); my $rusage = getrusage(); my $user_cpu = $rusage->utime; my $system_cpu = $rusage->stime; my $max_rss = $rusage->maxrss; my $bench_html = <<"BENCH";
Performance Metrics:
Total Time: @{[ sprintf("%.4f", $elapsed) ]} seconds
User CPU: $user_cpu s
System CPU: $system_cpu s
Peak RAM: $max_rss KB
Files Read: $files_read (IO Activity)
BENCH render_html($content . $bench_html, $safe_search_text, $year); close($fh_lock) if $fh_lock; unlink($lock_file) if -f $lock_file; exit; } sub render_html { my ($content, $q_val, $yr) = @_; print <<"HTML"; Search

Search

$content
HTML }