diff options
| -rw-r--r-- | _config.yml | 1 | ||||
| -rw-r--r-- | cgi-bin/search.cgi | 207 | ||||
| -rw-r--r-- | index.md | 2 |
3 files changed, 209 insertions, 1 deletions
diff --git a/_config.yml b/_config.yml index a327ffb..a700d6a 100644 --- a/_config.yml +++ b/_config.yml @@ -26,3 +26,4 @@ exclude: - Gemfile.lock - README.txt - cgi-bin/indexer.pl + - cgi-bin/find.cgi diff --git a/cgi-bin/search.cgi b/cgi-bin/search.cgi new file mode 100644 index 0000000..67815c8 --- /dev/null +++ b/cgi-bin/search.cgi @@ -0,0 +1,207 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use File::Spec; +use Encode qw(decode_utf8); +use URI::Escape qw(uri_unescape); +use HTML::Escape qw(escape_html); + +binmode(STDOUT, ":utf8"); + +# --- Configuration --- +my $max_parallel = 50; +my $lock_timeout = 30; +my $max_results = 20; +my $lock_dir = '/tmp/search_locks'; +my $directory = '../log/'; + +# --- Concurrency Control --- +mkdir $lock_dir, 0777 unless -d $lock_dir; +my $active_count = 0; +my $now = time(); + +opendir(my $dh_lock, $lock_dir); +while (my $file = readdir($dh_lock)) { + next unless $file =~ /\.lock$/; + my $path = "$lock_dir/$file"; + my $mtime = (stat($path))[9] || 0; + ($now - $mtime > $lock_timeout) ? unlink($path) : $active_count++; +} +closedir($dh_lock); + +if ($active_count >= $max_parallel) { + render_html("<p>Server busy. Please try again in a few seconds.</p>", ""); + exit; +} + +my $lock_file = "$lock_dir/$$.lock"; +open(my $fh_lock, '>', $lock_file) or die "Cannot create lock: $!"; + +# --- Query Decoding --- +my $search_text = ''; +if (($ENV{QUERY_STRING} || '') =~ /^q=([^&]*)/) { + my $raw_q = $1; + $raw_q =~ tr/+/ /; + $search_text = uri_unescape($raw_q); + $search_text = decode_utf8($search_text // ""); + $search_text =~ s/\P{Print}//g; + $search_text = substr($search_text, 0, 64); + $search_text =~ s/^\s+|\s+$//g; +} + +if ($search_text eq '') { + final_output("<p>Please enter a search term above.</p>"); +} + +# --- Search --- +my @results; +my $files_read = 0; + +if ($search_text =~ /\S/) { + my @stack = ($directory); + + while (@stack) { + # Exit search immediately if limit reached + last if scalar @results >= $max_results; + + my $current_path = pop @stack; + + if (-d $current_path) { + if (opendir(my $dh, $current_path)) { + while (my $entry = readdir($dh)) { + next if $entry =~ /^\.\.?$/; + push @stack, File::Spec->catfile($current_path, $entry); + } + closedir($dh); + } + } + elsif (-f $current_path && $current_path =~ /index\.html$/) { + if (open my $fh, '<:utf8', $current_path) { + $files_read++; + my $raw_content = do { local $/; <$fh> }; + close $fh; + + my ($article) = $raw_content =~ /<article[^>]*>(.*?)<\/article>/is; + $article =~ s/<pre.*?>.*?<\/pre>//isg; + $article =~ s/<code.*?>.*?<\/code>//isg; + next unless $article; + + # Clean for accurate regex offsets + my $clean_text = $article; + $clean_text =~ s/<[^>]*>/ /g; # Tags to space to prevent word mashing + $clean_text =~ s/\s+/ /g; + $clean_text =~ s/^\s+|\s+$//g; + + if ($clean_text =~ /(\Q$search_text\E)/i) { + my $match_pos = "$-[0]"; + my $match_end = "$+[0]"; + + # Grab a context window (120 chars padding) + my $grab_start = ($match_pos > 120) ? $match_pos - 120 : 0; + my $grab_len = ($match_end - $grab_start) + 120; + my $raw_chunk = substr($clean_text, $grab_start, $grab_len); + + my $is_start = ($grab_start == 0); + my $is_end = ($grab_start + $grab_len >= length($clean_text)); + + my $snippet = trim_and_clean_snippet($raw_chunk, $is_start, $is_end); + + my ($title) = $raw_content =~ /<title>(.*?)<\/title>/is; + push @results, { + path => $current_path, + title => $title || $current_path, + snippet => $snippet + }; + } + } + } + } +} + +my $safe_search_text = escape_html($search_text); + +my $list_html = ""; +if (@results == 0) { + $list_html = "<p>No results found for \"<b>$safe_search_text</b>\".</p>"; +} else { + $list_html = "<ul>" . join('', map { + "<li><a href=\"/$_->{path}\">$_->{title}</a><br><small>$_->{snippet}</small></li>" + } @results) . "</ul>"; +} + +final_output($list_html); + +sub final_output { + my ($content) = @_; + render_html($content, $safe_search_text); + if ($fh_lock) { close($fh_lock); unlink($lock_file); } + exit; +} + +sub trim_and_clean_snippet { + my ($raw_chunk, $is_start, $is_end) = @_; + + # Start check: + # If we aren't at the very start of the article, we likely have a leading fragment. + # We search for the first space to drop the partial word. + if (!$is_start) { + $raw_chunk =~ s/^[^\s]*\s//; + } + + # Length and end Check: + my $show_ellipsis = !$is_end; + + if (length($raw_chunk) > 160) { + $raw_chunk = substr($raw_chunk, 0, 160); + + # Look for the last space within our 160 chars to avoid cutting a word in half. + # If we find a space and trim, we add the ellipsis. + if ($raw_chunk =~ s/\s+[^\s]*$//) { + $show_ellipsis = 1; + } + } + + # Sometimes leading punctuation or weird fragments remain after the trim. + $raw_chunk =~ s/^[:;,.?!\s]+//; # Remove leading punctuation/space + + # Final polish + $raw_chunk = ucfirst($raw_chunk); + + # Ensure it ends cleanly: if it doesn't end in terminal punctuation, add ellipsis + my $final_text = escape_html($raw_chunk); + if ($show_ellipsis && $raw_chunk !~ /[.!?]$/) { + $final_text .= "..."; + } + + return $final_text; +} + +sub render_html { + my ($content, $q_val) = @_; + print "Content-Type: text/html; charset=UTF-8\n\n"; + print <<"HTML"; +<!DOCTYPE html> +<html lang="en-us"> +<head> + <meta charset="utf-8"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>Journal | Search</title> + <link rel="stylesheet" href="/assets/css/main.css"> +</head> +<body> + <header> + <h1><a href="/">Journal</a></h1> / + <h1><a href="/cgi-bin/search.cgi">Search</a></h1> + </header> + <article> + <form id="search-bar" action="" method="GET"> + <input id="search-box" type="text" name="q" value="$q_val"> + <input id="search-btn" type="submit" value="Search"> + </form> + $content + </article> +</body> +</html> +HTML +} @@ -16,6 +16,6 @@ title: "Home" <footer> <p>Built with <a href="https://github.com/ronv/minimalist" class="external" target="_blank" rel="noopener noreferrer">Minimalist</a>. - <a href="/cgi-bin/find.cgi">Search</a> + <a href="/cgi-bin/search.cgi">Search</a> </p> </footer> |
