diff options
| author | Sadeep Madurange <sadeep@asciimx.com> | 2026-01-03 12:58:01 +0800 |
|---|---|---|
| committer | Sadeep Madurange <sadeep@asciimx.com> | 2026-01-03 12:58:01 +0800 |
| commit | 8a4da6809cf9368cd6a5dd7351181ea4256453f9 (patch) | |
| tree | 77b2e109ba979332d81799a957bbfa86d010b81b /_site/cgi-bin/indexer.pl | |
| download | site-search-bm-8a4da6809cf9368cd6a5dd7351181ea4256453f9.tar.gz | |
Diffstat (limited to '_site/cgi-bin/indexer.pl')
| -rw-r--r-- | _site/cgi-bin/indexer.pl | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/_site/cgi-bin/indexer.pl b/_site/cgi-bin/indexer.pl new file mode 100644 index 0000000..0dcd7e2 --- /dev/null +++ b/_site/cgi-bin/indexer.pl @@ -0,0 +1,34 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Storable qw(nstore); +use HTML::Entities qw(decode_entities); + +# --- Configuration --- +my $built_site_dir = '../log'; +my $output_file = 'search_index.dat'; +my %index; + +print "Building search index from $built_site_dir...\n"; + +foreach my $path (glob("$built_site_dir/*/index.html")) { + next unless open(my $fh, '<:utf8', $path); + my $html = do { local $/; <$fh> }; + close($fh); + + # Extract Title and Main Content + my ($title) = $html =~ m|<title>(.*?)</title>|is || "Unknown"; + my ($main) = $html; + + # Normalize path + my $url = $path; + + $index{$url} = { + t => $title || "Untitled", + c => $main + }; +} + +nstore(\%index, $output_file); +printf("Index complete: %d files (%.2f KB)\n", scalar(keys %index), (-s $output_file) / 1024); |
