#!/usr/bin/perl use strict; use warnings; use Storable qw(retrieve); use Encode qw(decode_utf8); use HTML::Escape qw(escape_html); # Configuration my $max_parallel = 50; # max no. of parallel searches my $lock_timeout = 30; # drop stale locks after this many seconds my $max_results = 20; # max search results my $min_query_len = 3; # min query length to avoid matching 'a', 'e' my $index_file = 'search_index.dat'; # index file my $lock_dir = '/tmp/search_locks'; # lock file directory # Concurrency control mkdir $lock_dir, 0777 unless -d $lock_dir; my $active_count = 0; my $now = time(); opendir(my $dh, $lock_dir); while (my $file = readdir($dh)) { next unless $file =~ /\.lock$/; my $path = "$lock_dir/$file"; my $mtime = (stat($path))[9] || 0; ( $now - $mtime > $lock_timeout ) ? unlink($path) : $active_count++; } closedir($dh); # Too many search requests if ($active_count >= $max_parallel) { print "Content-Type: text/html\n\n"; render_html("
Server busy. Please try again in a few seconds.
", "", (localtime)[5]+1900); exit; } my $lock_file = "$lock_dir/$$.lock"; open(my $fh_lock, '>', $lock_file); # Decode search text as utf-8, toss non-printable chars, trim my $search_text = ''; if (($ENV{QUERY_STRING} || '') =~ /^q=([^&]*)/) { $search_text = decode_utf8($1 // ""); $search_text =~ s/\P{Print}//g; $search_text = substr($search_text, 0, 64); $search_text =~ s/^\s+|\s+$//g; } # Pre-prepare common template variables my $safe_search_text = escape_html($search_text); my $year = (localtime)[5] + 1900; print "Content-Type: text/html\n\n"; # Input validation if ($search_text eq '') { final_output("Please enter a search term above.
"); } if (length($search_text) < $min_query_len) { final_output("Search term is too short. Please enter at least $min_query_len characters.
"); } if (!-f $index_file) { final_output("Search temporarily unavailable.
"); } my $index = retrieve($index_file); my @results; my $found = 0; foreach my $url (sort keys %$index) { last if $found >= $max_results; my $data = $index->{$url}; # Grab 80 char snippet to chop at a word boundary later next unless $data->{c} =~ /(.{0,40})(\Q$search_text\E)(.{0,40})/is; my ($before, $actual, $after) = ($1, $2, $3); $found++; # Chop at 25 or word boundary $after =~ s/\s\S*$// if length($after) > 25; $before =~ s/^.*?\s// if length($before) > 25; $before = ($before =~ /\S/) ? ucfirst($before) : ""; $actual = ($before eq "") ? ucfirst($actual) : $actual; my $snippet = escape_html($before) . "" . escape_html($actual) . "" . escape_html($after) . "..."; push @results, { path => $url, title => escape_html($data->{t}), snippet => $snippet }; } # Format results list my $list_html = ""; if (@results == 0) { $list_html = "No results found for \"$safe_search_text\".
"; } else { $list_html = "