From 9fec793abe0a73e5cd502a1d1e935e2413b85079 Mon Sep 17 00:00:00 2001 From: Sadeep Madurange Date: Sun, 28 Dec 2025 21:17:30 +0800 Subject: Search via CGI script. --- _site/404.html | 7 +- _site/about/index.html | 8 +- _site/assets/css/main.css | 15 +++ _site/cgi-bin/find.cgi | 141 ++++++++++++++++++++++++++++ _site/feed.xml | 2 +- _site/index.html | 33 ++++--- _site/log/arduino-due/index.html | 7 +- _site/log/arduino-uno/index.html | 7 +- _site/log/bumblebee/index.html | 7 +- _site/log/e-reader/index.html | 7 +- _site/log/etlas/index.html | 7 +- _site/log/fpm-door-lock-lp/index.html | 7 +- _site/log/fpm-door-lock-rf/index.html | 7 +- _site/log/index.html | 20 +++- _site/log/matrix-digital-rain/index.html | 10 +- _site/log/mosfet-switches/index.html | 7 +- _site/log/neo4j-a-star-search/index.html | 7 +- _site/log/search-with-cgi/index.html | 152 +++++++++++++++++++++++++++++++ _site/posts.xml | 2 +- _site/projects/index.html | 7 +- _site/sitemap.xml | 4 + 21 files changed, 432 insertions(+), 32 deletions(-) create mode 100644 _site/cgi-bin/find.cgi create mode 100644 _site/log/search-with-cgi/index.html (limited to '_site') diff --git a/_site/404.html b/_site/404.html index 00df26d..6edae67 100644 --- a/_site/404.html +++ b/_site/404.html @@ -28,7 +28,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • diff --git a/_site/about/index.html b/_site/about/index.html index 804e2d8..bd7e241 100644 --- a/_site/about/index.html +++ b/_site/about/index.html @@ -28,7 +28,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • @@ -53,6 +58,7 @@
  • log: Technical log.
  • poc: Projects.
  • abt: Site information.
  • +
  • sws: Search within site.
  • rss: RSS (Atom) feed.
  • diff --git a/_site/assets/css/main.css b/_site/assets/css/main.css index 910ddd6..091baf3 100644 --- a/_site/assets/css/main.css +++ b/_site/assets/css/main.css @@ -237,3 +237,18 @@ pre::-webkit-scrollbar { 100% { visibility: visible; } } +#search-box { + border-radius: 0; + border-color: var(--main-fg-color); + color: var(--main-fg-color) !important; + background-color: var(--main-bg-color); + text-shadow: 0 0 1px var(--main-fg-color), 0 0 6px var(--main-fg-color); +} + +#search-btn { + border-radius: 0; + border-color: var(--main-fg-color); + color: var(--main-fg-color); + background-color: var(--main-bg-color); + text-shadow: 0 0 1px var(--main-fg-color), 0 0 6px var(--main-fg-color); +} diff --git a/_site/cgi-bin/find.cgi b/_site/cgi-bin/find.cgi new file mode 100644 index 0000000..bad12e7 --- /dev/null +++ b/_site/cgi-bin/find.cgi @@ -0,0 +1,141 @@ +#!/usr/bin/perl + +use File::Find; + +sub escape_html { + my $str = shift; + $str =~ s/&/&/g; + $str =~ s//>/g; + $str =~ s/"/"/g; + $str =~ s/'/'/g; + return $str; +} + +my %params; +if ($ENV{QUERY_STRING}) { + foreach my $pair (split /&/, $ENV{QUERY_STRING}) { + my ($key, $value) = split /=/, $pair; + $value =~ tr/+/ /; + $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; + $params{$key} = $value; + } +} + +my $search_text = $params{'q'} || ''; +$search_text = substr($search_text, 0, 64); +$search_text =~ s/[^a-zA-Z0-9 ]//g; + +my $directory = '../log/'; +my @results; + +my %excluded_files = ( + 'index.html' => 1, # /log/index.html +); + +if ($search_text =~ /\S/) { + find({ + wanted => sub { + # Ignore directories and only process index.html + return unless -f $_ && $_ eq 'index.html'; + + # Calculate the relative path for the URL (prevents leaking server file structure) + my $rel_path = $File::Find::name; + $rel_path =~ s|^\Q$directory\E/?||; + return if $excluded_files{$rel_path}; + + if (open my $fh, '<', $_) { + my $content = do { local $/; <$fh> }; + close $fh; + + if ($content =~ /\Q$search_text\E/i) { + + # Extract Title + my ($title) = $content =~ /(.*?)<\/title>/is; + $title = $title ? escape_html($title) : $rel_path; + + # Extract the first <p> tag content + my ($p_content) = $content =~ /<p[^>]*>(.*?)<\/p>/is; + + # Process the snippet + my $snippet = $p_content || ""; + $snippet =~ s/<[^>]*>//g; # Remove internal tags + $snippet =~ s/\s+/ /g; # Collapse whitespace + + # Escape HTML entities AFTER stripping tags + # but BEFORE sending to the user to prevent XSS. + $snippet = escape_html(substr($snippet, 0, 50)); + $snippet .= "..." if length($p_content || "") > 50; + + push @results, { + path => $File::Find::name, + title => $title, + snippet => $snippet + }; + } + } + }, + no_chdir => 0, + follow => 0, + }, $directory); +} + +print "Content-Type: text/html\n\n"; + +my $list; +if ($search_text eq '') { + $list = "<p>Please enter a search term above.</p>"; +} elsif (@results == 0) { + $list = "<p>No results found for \"<b>$search_text</b>\".</p>"; +} else { + $list = "<ul>"; + foreach my $res (@results) { + my $url = $res->{path}; + $list .= "<li><a href=\"/$url\">$res->{title}</a><br><small>$res->{snippet}</small></li>"; + } + $list .= "</ul>"; +} + +my $safe_search_text = escape_html($search_text); + +print <<"HTML"; +<!DOCTYPE html> +<html lang="en-us"> +<head> + <meta charset="utf-8"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <title>Search + + + + + +
    +
    +

    Search

    +
    + + +
    + $list +
    +
    + + + +HTML diff --git a/_site/feed.xml b/_site/feed.xml index f4bdc86..bc8a117 100644 --- a/_site/feed.xml +++ b/_site/feed.xml @@ -1 +1 @@ -Jekyll2025-12-28T15:18:50+08:00/feed.xmlASCIIMX | LogW. D. Sadeep MadurangeMatrix Rain: 2025 refactor2025-12-21T00:00:00+08:002025-12-21T00:00:00+08:00/log/matrix-digital-rainW. D. Sadeep MadurangeFingerprint door lock (LP)2025-08-18T00:00:00+08:002025-08-18T00:00:00+08:00/log/fpm-door-lock-lpW. D. Sadeep MadurangeHigh-side MOSFET switching2025-06-22T00:00:00+08:002025-06-22T00:00:00+08:00/log/mosfet-switchesW. D. Sadeep MadurangeATmega328P at 3.3V and 5V2025-06-10T00:00:00+08:002025-06-10T00:00:00+08:00/log/arduino-unoW. D. Sadeep MadurangeFingerprint door lock (RF)2025-06-05T00:00:00+08:002025-06-05T00:00:00+08:00/log/fpm-door-lock-rfW. D. Sadeep MadurangeBumblebee: browser automation2025-04-02T00:00:00+08:002025-04-02T00:00:00+08:00/log/bumblebeeW. D. Sadeep MadurangeATSAM3X8E bare-metal programming2024-09-16T00:00:00+08:002024-09-16T00:00:00+08:00/log/arduino-dueW. D. Sadeep MadurangeEtlas: e-paper dashboard2024-09-05T00:00:00+08:002024-09-05T00:00:00+08:00/log/etlasW. D. Sadeep MadurangeESP32 e-reader prototype2023-10-24T00:00:00+08:002023-10-24T00:00:00+08:00/log/e-readerW. D. Sadeep MadurangeNeo4j shortest path optimization2018-03-06T00:00:00+08:002018-03-06T00:00:00+08:00/log/neo4j-a-star-searchW. D. Sadeep Madurange \ No newline at end of file +Jekyll2025-12-29T21:56:36+08:00/feed.xmlASCIIMX | LogW. D. Sadeep MadurangeSite search using Perl + CGI2025-12-29T00:00:00+08:002025-12-29T00:00:00+08:00/log/search-with-cgiW. D. Sadeep MadurangeMatrix Rain: 2025 refactor2025-12-21T00:00:00+08:002025-12-21T00:00:00+08:00/log/matrix-digital-rainW. D. Sadeep MadurangeFingerprint door lock (LP)2025-08-18T00:00:00+08:002025-08-18T00:00:00+08:00/log/fpm-door-lock-lpW. D. Sadeep MadurangeHigh-side MOSFET switching2025-06-22T00:00:00+08:002025-06-22T00:00:00+08:00/log/mosfet-switchesW. D. Sadeep MadurangeATmega328P at 3.3V and 5V2025-06-10T00:00:00+08:002025-06-10T00:00:00+08:00/log/arduino-unoW. D. Sadeep MadurangeFingerprint door lock (RF)2025-06-05T00:00:00+08:002025-06-05T00:00:00+08:00/log/fpm-door-lock-rfW. D. Sadeep MadurangeBumblebee: browser automation2025-04-02T00:00:00+08:002025-04-02T00:00:00+08:00/log/bumblebeeW. D. Sadeep MadurangeATSAM3X8E bare-metal programming2024-09-16T00:00:00+08:002024-09-16T00:00:00+08:00/log/arduino-dueW. D. Sadeep MadurangeEtlas: e-paper dashboard2024-09-05T00:00:00+08:002024-09-05T00:00:00+08:00/log/etlasW. D. Sadeep MadurangeESP32 e-reader prototype2023-10-24T00:00:00+08:002023-10-24T00:00:00+08:00/log/e-readerW. D. Sadeep Madurange \ No newline at end of file diff --git a/_site/index.html b/_site/index.html index f7b6c70..79b0549 100644 --- a/_site/index.html +++ b/_site/index.html @@ -28,7 +28,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • @@ -54,6 +59,19 @@ + + + Site search using Perl + CGI + + + + + + + + + + Matrix Rain: 2025 refactor @@ -171,19 +189,6 @@ - - - Neo4j shortest path optimization - - - - - - - - - - diff --git a/_site/log/arduino-due/index.html b/_site/log/arduino-due/index.html index 0916c6f..a2135cb 100644 --- a/_site/log/arduino-due/index.html +++ b/_site/log/arduino-due/index.html @@ -32,7 +32,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • diff --git a/_site/log/arduino-uno/index.html b/_site/log/arduino-uno/index.html index 95681a4..66fff37 100644 --- a/_site/log/arduino-uno/index.html +++ b/_site/log/arduino-uno/index.html @@ -32,7 +32,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • diff --git a/_site/log/bumblebee/index.html b/_site/log/bumblebee/index.html index 0962a27..b4c9c25 100644 --- a/_site/log/bumblebee/index.html +++ b/_site/log/bumblebee/index.html @@ -32,7 +32,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • diff --git a/_site/log/e-reader/index.html b/_site/log/e-reader/index.html index 3eef05d..44ae1e6 100644 --- a/_site/log/e-reader/index.html +++ b/_site/log/e-reader/index.html @@ -32,7 +32,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • diff --git a/_site/log/etlas/index.html b/_site/log/etlas/index.html index c1b5975..8149618 100644 --- a/_site/log/etlas/index.html +++ b/_site/log/etlas/index.html @@ -32,7 +32,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • diff --git a/_site/log/fpm-door-lock-lp/index.html b/_site/log/fpm-door-lock-lp/index.html index 76715d7..a5d237c 100644 --- a/_site/log/fpm-door-lock-lp/index.html +++ b/_site/log/fpm-door-lock-lp/index.html @@ -32,7 +32,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • diff --git a/_site/log/fpm-door-lock-rf/index.html b/_site/log/fpm-door-lock-rf/index.html index 0c5037b..04c84de 100644 --- a/_site/log/fpm-door-lock-rf/index.html +++ b/_site/log/fpm-door-lock-rf/index.html @@ -32,7 +32,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • diff --git a/_site/log/index.html b/_site/log/index.html index de9c470..defcfed 100644 --- a/_site/log/index.html +++ b/_site/log/index.html @@ -28,7 +28,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • @@ -44,6 +49,19 @@ + + + Site search using Perl + CGI + + + + + + + + + + Matrix Rain: 2025 refactor diff --git a/_site/log/matrix-digital-rain/index.html b/_site/log/matrix-digital-rain/index.html index 8383776..a570230 100644 --- a/_site/log/matrix-digital-rain/index.html +++ b/_site/log/matrix-digital-rain/index.html @@ -32,7 +32,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • @@ -91,8 +96,7 @@ static inline void insert_code(matrix *mat,

    Tossed license and automake cruft. Just cc -O3 main.c -o matrix now. Don’t need the ceremony.

    -

    Runs at 2-3% CPU on OpenBSD (T490). No cause to measure performance more -precisely. No regressions. Fans are quiet.

    +

    Runs at 2-3% CPU on OpenBSD (T490). No regressions. Fans are quiet.

    Commit: 69a888a

    diff --git a/_site/log/mosfet-switches/index.html b/_site/log/mosfet-switches/index.html index 97bba07..37d6707 100644 --- a/_site/log/mosfet-switches/index.html +++ b/_site/log/mosfet-switches/index.html @@ -32,7 +32,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • diff --git a/_site/log/neo4j-a-star-search/index.html b/_site/log/neo4j-a-star-search/index.html index a00dd6c..013cd95 100644 --- a/_site/log/neo4j-a-star-search/index.html +++ b/_site/log/neo4j-a-star-search/index.html @@ -32,7 +32,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • diff --git a/_site/log/search-with-cgi/index.html b/_site/log/search-with-cgi/index.html new file mode 100644 index 0000000..060a282 --- /dev/null +++ b/_site/log/search-with-cgi/index.html @@ -0,0 +1,152 @@ + + + + + Site search using Perl + CGI + + + + + Site search using Perl + CGI + + + + + + + + + + + + + +
    +
    +
    +

    SITE SEARCH USING PERL + CGI

    +
    29 DECEMBER 2025
    +
    +

    Need a way to search site–number of articles are growing.

    + +

    Searching site client-side using the RSS feed and JavaScript is not an option– +bloats the feed and breaks the site for Lynx and other text browsers.

    + +

    Perl’s great for text processing–especially regex work. Few lines of Perl +could do a regex search and send the result back via CGI. OpenBSD httpd speaks +CGI, Perl and slowcgi are in the base systems. No dependencies. Works on every +conceivable browser.

    + +

    Perl: traverse the directory with File::Find recursively. If search text is +found grab the file name, title and up to 50 chars of the first paragraph to +include in the search result.

    + +
    find(sub {
    +    if (open my $fh, '<', $_) {
    +        my $content = do { local $/; <$fh> };
    +        close $fh;
    +            
    +    if ($content =~ /\Q$search_text\E/i) {
    +        my ($title) = $content =~ /<title>(.*?)<\/title>/is;
    +        $title ||= $File::Find::name;
    +        my ($p_content) = $content =~ /<p[^>]*>(.*?)<\/p>/is;
    +        my $snippet = $p_content || "";
    +        $snippet =~ s/<[^>]*>//g; 
    +        $snippet =~ s/\s+/ /g;
    +        $snippet = substr($snippet, 0, 50);
    +        $snippet .= "..." if length($p_content || "") > 50;
    +
    +        push @results, { 
    +            path    => $File::Find::name, 
    +            title   => $title, 
    +            snippet => $snippet 
    +        };
    +    }
    +  }
    +}, $dir);
    +
    + +

    Don’t need the Perl CGI module, httpd sets QUERY_STRING for the slowcgi script:

    + +
    my %params;
    +if ($ENV{QUERY_STRING}) {
    +    foreach my $pair (split /&/, $ENV{QUERY_STRING}) {
    +        my ($key, $value) = split /=/, $pair;
    +        $value =~ tr/+/ /;
    +        $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
    +        $params{$key} = $value;
    +    }
    +}
    +
    + +

    Run the script as www user. Permissions: 554 (read + execute).

    + +

    Running in OpenBSD chroot: Check Perl’s dynamic object dependencies:

    + +
    $ ldd $(which perl)
    +/usr/bin/perl:
    +        Start            End              Type  Open Ref GrpRef Name
    +        000008797e8e6000 000008797e8eb000 exe   1    0   0      /usr/bin/perl
    +        0000087c1ffe5000 0000087c20396000 rlib  0    1   0      /usr/lib/libperl.so.26.0
    +        0000087bf4508000 0000087bf4539000 rlib  0    2   0      /usr/lib/libm.so.10.1
    +        0000087b9e801000 0000087b9e907000 rlib  0    2   0      /usr/lib/libc.so.102.0
    +        0000087bba182000 0000087bba182000 ld.so 0    1   0      /usr/libexec/ld.so
    +
    + +

    Copy them over to chroot. Now should have /var/www/usr/bin/perl, +/usr/lib/libperl.so.26.0, and so on.

    + +

    Troubleshooting: look for issues in logs or try executing the script in chroot:

    + +
    $ cat /var/log/messages | grep slowcgi
    +# chroot /var/www/ htdocs/path/to/script/script.cgi
    +
    +

    The last command exposes any missing Perl modules in chroot and where to find +them. Copy them over as well.

    + +
    location "/cgi-bin/*" {
    +    fastcgi socket "/run/slowcgi.sock"
    +}
    +
    + +

    in httpd.conf routes queries to slowcgi.

    + +
    + +
    +
    +
    + + + + + + diff --git a/_site/posts.xml b/_site/posts.xml index 8f7399b..66974e6 100644 --- a/_site/posts.xml +++ b/_site/posts.xml @@ -1 +1 @@ -Jekyll2025-12-28T15:18:50+08:00/posts.xmlASCIIMXW. D. Sadeep Madurange \ No newline at end of file +Jekyll2025-12-29T21:56:36+08:00/posts.xmlASCIIMXW. D. Sadeep Madurange \ No newline at end of file diff --git a/_site/projects/index.html b/_site/projects/index.html index 71a41e6..2357793 100644 --- a/_site/projects/index.html +++ b/_site/projects/index.html @@ -28,7 +28,12 @@
  • abt
  • -
  • rss
  • +
  • + sws +
  • +
  • + rss +
  • diff --git a/_site/sitemap.xml b/_site/sitemap.xml index b799547..93b39c0 100644 --- a/_site/sitemap.xml +++ b/_site/sitemap.xml @@ -41,6 +41,10 @@ 2025-12-21T00:00:00+08:00 +/log/search-with-cgi/ +2025-12-29T00:00:00+08:00 + + /about/ -- cgit v1.2.3