diff options
| author | Sadeep Madurange <sadeep@asciimx.com> | 2025-12-31 21:52:15 +0800 |
|---|---|---|
| committer | Sadeep Madurange <sadeep@asciimx.com> | 2025-12-31 21:52:15 +0800 |
| commit | 646faee949b4e60f96eab442945f0bba53dcf995 (patch) | |
| tree | 9a3743f55be44837dd2084bd1824853e864c18c7 /_site | |
| parent | 99a21a1bf35c5af6188abdcb87e894b371bbffa5 (diff) | |
| download | www-646faee949b4e60f96eab442945f0bba53dcf995.tar.gz | |
CGI script for searching site.
Diffstat (limited to '_site')
| -rw-r--r-- | _site/cgi-bin/find.cgi | 126 |
1 files changed, 53 insertions, 73 deletions
diff --git a/_site/cgi-bin/find.cgi b/_site/cgi-bin/find.cgi index bad12e7..f0d2b3c 100644 --- a/_site/cgi-bin/find.cgi +++ b/_site/cgi-bin/find.cgi @@ -1,83 +1,62 @@ #!/usr/bin/perl -use File::Find; - -sub escape_html { - my $str = shift; - $str =~ s/&/&/g; - $str =~ s/</</g; - $str =~ s/>/>/g; - $str =~ s/"/"/g; - $str =~ s/'/'/g; - return $str; +use Encode qw(decode_utf8); +use HTML::Escape qw(escape_html); + +my $search_text = ''; + +if ($ENV{QUERY_STRING} =~ /^q=([^&]*)/) { + $search_text = decode_utf8($1 // ""); + $search_text =~ s/\P{Print}//g; # toss any non-printable utf-8 characters + $search_text = substr($search_text, 0, 64); + $search_text =~ s/^\s+|\s+$//g; } -my %params; -if ($ENV{QUERY_STRING}) { - foreach my $pair (split /&/, $ENV{QUERY_STRING}) { - my ($key, $value) = split /=/, $pair; - $value =~ tr/+/ /; - $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; - $params{$key} = $value; +my @results; + +# Search only index.html files inside the first level of subdirectories +my $start_dir = '../log'; +my @files = glob("$start_dir/*/index.html"); + +foreach my $path (@files) { + # Skip if the path is a symlink or not a file + next if -l $path || ! -f $path; + + next unless open(my $fh, '<:utf8', $path); + my $html = do { local $/; <$fh> }; + close($fh); + + my ($text) = $html =~ m|<main>(.*?)</main>|is; + $text =~ s|<[^>]+>| |g; + $text =~ s|\s+| |g; + + next unless $text =~ /(.{0,40})(\Q$search_text\E)(.{0,40})/is; + my ($before, $actual, $after) = ($1, $2, $3); + + # Trim if we cut into the middle of a sentence + $after =~ s/\s\S*$// if length($after) > 25; + $before =~ s/^.*?\s// if length($before) > 25; + + if ($before =~ /\S/) { # If before has non-whitespace characters + $before = ucfirst($before); + } else { + $before = ""; # Clear any stray spaces + $actual = ucfirst($actual); } -} -my $search_text = $params{'q'} || ''; -$search_text = substr($search_text, 0, 64); -$search_text =~ s/[^a-zA-Z0-9 ]//g; + my $safe_before = escape_html($before); + my $safe_actual = escape_html($actual); + my $safe_after = escape_html($after); + my $snippet = "${safe_before}<b>${safe_actual}</b>${safe_after}..."; -my $directory = '../log/'; -my @results; + my ($title) = $html =~ m|<title>(.*?)</title>|is; + my $safe_title = escape_html($title); -my %excluded_files = ( - 'index.html' => 1, # /log/index.html -); - -if ($search_text =~ /\S/) { - find({ - wanted => sub { - # Ignore directories and only process index.html - return unless -f $_ && $_ eq 'index.html'; - - # Calculate the relative path for the URL (prevents leaking server file structure) - my $rel_path = $File::Find::name; - $rel_path =~ s|^\Q$directory\E/?||; - return if $excluded_files{$rel_path}; - - if (open my $fh, '<', $_) { - my $content = do { local $/; <$fh> }; - close $fh; - - if ($content =~ /\Q$search_text\E/i) { - - # Extract Title - my ($title) = $content =~ /<title>(.*?)<\/title>/is; - $title = $title ? escape_html($title) : $rel_path; - - # Extract the first <p> tag content - my ($p_content) = $content =~ /<p[^>]*>(.*?)<\/p>/is; - - # Process the snippet - my $snippet = $p_content || ""; - $snippet =~ s/<[^>]*>//g; # Remove internal tags - $snippet =~ s/\s+/ /g; # Collapse whitespace - - # Escape HTML entities AFTER stripping tags - # but BEFORE sending to the user to prevent XSS. - $snippet = escape_html(substr($snippet, 0, 50)); - $snippet .= "..." if length($p_content || "") > 50; - - push @results, { - path => $File::Find::name, - title => $title, - snippet => $snippet - }; - } - } - }, - no_chdir => 0, - follow => 0, - }, $directory); + push @results, { + path => $path, + title => $safe_title, + snippet => $snippet + }; } print "Content-Type: text/html\n\n"; @@ -97,6 +76,7 @@ if ($search_text eq '') { } my $safe_search_text = escape_html($search_text); +my $year = (localtime)[5] + 1900; print <<"HTML"; <!DOCTYPE html> @@ -132,7 +112,7 @@ print <<"HTML"; <div class="footer"> <div class="container"> <div class="twelve columns right container-2"> - <p id="footer-text">© ASCIIMX - 2025</p> + <p id="footer-text">© ASCIIMX - $year</p> </div> </div> </div> |
