From 5d84833e25ed23e40ac5703527469101448b9b66 Mon Sep 17 00:00:00 2001
From: Sadeep Madurange <sadeep@asciimx.com>
Date: Thu, 1 Jan 2026 17:18:27 +0800
Subject: Perl script for indexing site.

---
 cgi-bin/indexer.pl | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 cgi-bin/indexer.pl

(limited to 'cgi-bin')
diff --git a/cgi-bin/indexer.pl b/cgi-bin/indexer.pl
new file mode 100644
index 0000000..d0314a1
--- /dev/null
+++ b/cgi-bin/indexer.pl
@@ -0,0 +1,55 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use Storable qw(nstore);
+use HTML::Entities qw(decode_entities);
+
+my $built_site_dir = '../_site/log';
+my $output_file    = 'search_index.dat';
+my %index;
+
+print "Building search index from $built_site_dir...\n";
+
+# glob finds every index.html in subdirectories of /log/
+foreach my $path (glob("$built_site_dir/*/index.html")) {
+    next unless open(my $fh, '<:utf8', $path);
+    my $html = do { local $/; <$fh> };
+    close($fh);
+
+    # Extract Title and Main Content
+    my ($title) = $html =~ m|<title>(.*?)</title>|is;
+    my ($main)  = $html =~ m|<main>(.*?)</main>|is;
+    $main //= '';
+
+    # Remove code and pre blocks to keep index prose-only
+    $main =~ s|<pre[^>]*>.*?</pre>| |gs;
+    $main =~ s|<code[^>]*>.*?</code>| |gs;
+
+    # Strip all remaining HTML tags
+    $main =~ s|<[^>]+>| |g;
+
+    # Decode entities (e.g., &amp; -> &) for accurate searching
+    $main = decode_entities($main);
+
+    # Normalize whitespace (squash multiple spaces/newlines)
+    $main =~ s|\s+| |g;
+    $main =~ s/^\s+|\s+$//g;
+
+    # Map file path to the final web URL
+    # Example: ../_site/log/arduino/index.html -> /log/arduino/index.html
+    (my $url = $path) =~ s|^\.\./_site/|/|;
+
+    $index{$url} = {
+        t => $title || "Untitled",
+        c => $main
+    };
+}
+
+# Save using network-order binary (nstore) for portability
+nstore(\%index, $output_file);
+
+my $count = scalar(keys %index);
+my $size  = -s $output_file;
+printf("Index complete: %d files (%.2f KB)\n", $count, $size / 1024);
+
-- 
cgit v1.2.3