#!/usr/bin/perl use strict; use warnings; use Storable qw(nstore); use HTML::Entities qw(decode_entities); # --- Configuration --- my $built_site_dir = '../_site/log'; my $output_file = '../_site/cgi-bin/search_index.dat'; my %index; print "Building search index from $built_site_dir...\n"; foreach my $path (glob("$built_site_dir/*/index.html")) { next unless open(my $fh, '<:utf8', $path); my $html = do { local $/; <$fh> }; close($fh); # Extract Title and Main Content my ($title) = $html =~ m|
]*>.*?| |gs; $main =~ s|
]*>.*?| |gs;
$main =~ s|<[^>]+>| |g;
$main = decode_entities($main);
$main =~ s|\s+| |g;
$main =~ s/^\s+|\s+$//g;
# Normalize path
my $url = $path;
$url =~ s|^\.\./_site/||; # Remove local build directory
$url =~ s|^\.\./||; # Remove any leading dots
$url =~ s|^/+||; # Remove leading slashes
$index{$url} = {
t => $title || "Untitled",
c => $main
};
}
nstore(\%index, $output_file);
printf("Index complete: %d files (%.2f KB)\n", scalar(keys %index), (-s $output_file) / 1024);