diff options
Diffstat (limited to '_site/cgi-bin/indexer.pl')
| -rw-r--r-- | _site/cgi-bin/indexer.pl | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/_site/cgi-bin/indexer.pl b/_site/cgi-bin/indexer.pl new file mode 100644 index 0000000..0dcd7e2 --- /dev/null +++ b/_site/cgi-bin/indexer.pl @@ -0,0 +1,34 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use Storable qw(nstore); +use HTML::Entities qw(decode_entities); + +# --- Configuration --- +my $built_site_dir = '../log'; +my $output_file = 'search_index.dat'; +my %index; + +print "Building search index from $built_site_dir...\n"; + +foreach my $path (glob("$built_site_dir/*/index.html")) { + next unless open(my $fh, '<:utf8', $path); + my $html = do { local $/; <$fh> }; + close($fh); + + # Extract Title and Main Content + my ($title) = $html =~ m|<title>(.*?)</title>|is || "Unknown"; + my ($main) = $html; + + # Normalize path + my $url = $path; + + $index{$url} = { + t => $title || "Untitled", + c => $main + }; +} + +nstore(\%index, $output_file); +printf("Index complete: %d files (%.2f KB)\n", scalar(keys %index), (-s $output_file) / 1024); |
