diff options
| author | Sadeep Madurange <sadeep@asciimx.com> | 2025-12-31 21:52:15 +0800 |
|---|---|---|
| committer | Sadeep Madurange <sadeep@asciimx.com> | 2025-12-31 21:52:15 +0800 |
| commit | 646faee949b4e60f96eab442945f0bba53dcf995 (patch) | |
| tree | 9a3743f55be44837dd2084bd1824853e864c18c7 /cgi-bin/find.cgi | |
| parent | 99a21a1bf35c5af6188abdcb87e894b371bbffa5 (diff) | |
| download | www-646faee949b4e60f96eab442945f0bba53dcf995.tar.gz | |
CGI script for searching site.
Diffstat (limited to 'cgi-bin/find.cgi')
| -rw-r--r-- | cgi-bin/find.cgi | 96 |
1 files changed, 44 insertions, 52 deletions
diff --git a/cgi-bin/find.cgi b/cgi-bin/find.cgi index f3f68f5..f0d2b3c 100644 --- a/cgi-bin/find.cgi +++ b/cgi-bin/find.cgi @@ -1,8 +1,6 @@ #!/usr/bin/perl use Encode qw(decode_utf8); - -use File::Find; use HTML::Escape qw(escape_html); my $search_text = ''; @@ -11,60 +9,54 @@ if ($ENV{QUERY_STRING} =~ /^q=([^&]*)/) { $search_text = decode_utf8($1 // ""); $search_text =~ s/\P{Print}//g; # toss any non-printable utf-8 characters $search_text = substr($search_text, 0, 64); + $search_text =~ s/^\s+|\s+$//g; } -my $directory = '../_site/log/'; my @results; -my %excluded_files = ( - 'index.html' => 1, # /log/index.html -); - -if ($search_text =~ /\S/) { - find({ - wanted => sub { - # Ignore directories and only process index.html - return unless -f $_ && $_ eq 'index.html'; - - # Calculate the relative path for the URL (prevents leaking server file structure) - my $rel_path = $File::Find::name; - $rel_path =~ s|^\Q$directory\E/?||; - return if $excluded_files{$rel_path}; - - if (open my $fh, '<', $_) { - my $content = do { local $/; <$fh> }; - close $fh; - - if ($content =~ /\Q$search_text\E/i) { - - # Extract Title - my ($title) = $content =~ /<title>(.*?)<\/title>/is; - $title = $title ? escape_html($title) : $rel_path; - - # Extract the first <p> tag content - my ($p_content) = $content =~ /<p[^>]*>(.*?)<\/p>/is; - - # Process the snippet - my $snippet = $p_content || ""; - $snippet =~ s/<[^>]*>//g; # Remove internal tags - $snippet =~ s/\s+/ /g; # Collapse whitespace - - # Escape HTML entities AFTER stripping tags - # but BEFORE sending to the user to prevent XSS. - $snippet = escape_html(substr($snippet, 0, 50)); - $snippet .= "..." if length($p_content || "") > 50; - - push @results, { - path => $File::Find::name, - title => $title, - snippet => $snippet - }; - } - } - }, - no_chdir => 0, - follow => 0, - }, $directory); +# Search only index.html files inside the first level of subdirectories +my $start_dir = '../log'; +my @files = glob("$start_dir/*/index.html"); + +foreach my $path (@files) { + # Skip if the path is a symlink or not a file + next if -l $path || ! -f $path; + + next unless open(my $fh, '<:utf8', $path); + my $html = do { local $/; <$fh> }; + close($fh); + + my ($text) = $html =~ m|<main>(.*?)</main>|is; + $text =~ s|<[^>]+>| |g; + $text =~ s|\s+| |g; + + next unless $text =~ /(.{0,40})(\Q$search_text\E)(.{0,40})/is; + my ($before, $actual, $after) = ($1, $2, $3); + + # Trim if we cut into the middle of a sentence + $after =~ s/\s\S*$// if length($after) > 25; + $before =~ s/^.*?\s// if length($before) > 25; + + if ($before =~ /\S/) { # If before has non-whitespace characters + $before = ucfirst($before); + } else { + $before = ""; # Clear any stray spaces + $actual = ucfirst($actual); + } + + my $safe_before = escape_html($before); + my $safe_actual = escape_html($actual); + my $safe_after = escape_html($after); + my $snippet = "${safe_before}<b>${safe_actual}</b>${safe_after}..."; + + my ($title) = $html =~ m|<title>(.*?)</title>|is; + my $safe_title = escape_html($title); + + push @results, { + path => $path, + title => $safe_title, + snippet => $snippet + }; } print "Content-Type: text/html\n\n"; |
