summaryrefslogtreecommitdiffstats
path: root/cgi-bin
diff options
context:
space:
mode:
authorSadeep Madurange <sadeep@asciimx.com>2026-01-01 17:18:27 +0800
committerSadeep Madurange <sadeep@asciimx.com>2026-01-01 17:18:27 +0800
commit5d84833e25ed23e40ac5703527469101448b9b66 (patch)
tree09d8fd57597bdc1c86856ab55e31fe15505552e5 /cgi-bin
parent2609fa6fe825c1c56e7c2fa11c3a28e93be8ad4a (diff)
downloadwww-5d84833e25ed23e40ac5703527469101448b9b66.tar.gz
Perl script for indexing site.
Diffstat (limited to 'cgi-bin')
-rw-r--r--cgi-bin/indexer.pl55
1 files changed, 55 insertions, 0 deletions
diff --git a/cgi-bin/indexer.pl b/cgi-bin/indexer.pl
new file mode 100644
index 0000000..d0314a1
--- /dev/null
+++ b/cgi-bin/indexer.pl
@@ -0,0 +1,55 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use Storable qw(nstore);
+use HTML::Entities qw(decode_entities);
+
+my $built_site_dir = '../_site/log';
+my $output_file = 'search_index.dat';
+my %index;
+
+print "Building search index from $built_site_dir...\n";
+
+# glob finds every index.html in subdirectories of /log/
+foreach my $path (glob("$built_site_dir/*/index.html")) {
+ next unless open(my $fh, '<:utf8', $path);
+ my $html = do { local $/; <$fh> };
+ close($fh);
+
+ # Extract Title and Main Content
+ my ($title) = $html =~ m|<title>(.*?)</title>|is;
+ my ($main) = $html =~ m|<main>(.*?)</main>|is;
+ $main //= '';
+
+ # Remove code and pre blocks to keep index prose-only
+ $main =~ s|<pre[^>]*>.*?</pre>| |gs;
+ $main =~ s|<code[^>]*>.*?</code>| |gs;
+
+ # Strip all remaining HTML tags
+ $main =~ s|<[^>]+>| |g;
+
+ # Decode entities (e.g., &amp; -> &) for accurate searching
+ $main = decode_entities($main);
+
+ # Normalize whitespace (squash multiple spaces/newlines)
+ $main =~ s|\s+| |g;
+ $main =~ s/^\s+|\s+$//g;
+
+ # Map file path to the final web URL
+ # Example: ../_site/log/arduino/index.html -> /log/arduino/index.html
+ (my $url = $path) =~ s|^\.\./_site/|/|;
+
+ $index{$url} = {
+ t => $title || "Untitled",
+ c => $main
+ };
+}
+
+# Save using network-order binary (nstore) for portability
+nstore(\%index, $output_file);
+
+my $count = scalar(keys %index);
+my $size = -s $output_file;
+printf("Index complete: %d files (%.2f KB)\n", $count, $size / 1024);
+