From 7375a6b8c6ac05f79755e27afeb3062d027c37f2 Mon Sep 17 00:00:00 2001 From: Sadeep Madurange Date: Thu, 1 Jan 2026 18:33:54 +0800 Subject: Optimize search and add guards. --- cgi-bin/indexer.pl | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'cgi-bin/indexer.pl') diff --git a/cgi-bin/indexer.pl b/cgi-bin/indexer.pl index d0314a1..38a918e 100644 --- a/cgi-bin/indexer.pl +++ b/cgi-bin/indexer.pl @@ -5,13 +5,13 @@ use warnings; use Storable qw(nstore); use HTML::Entities qw(decode_entities); -my $built_site_dir = '../_site/log'; -my $output_file = 'search_index.dat'; +# --- Configuration --- +my $built_site_dir = '../_site/log'; +my $output_file = '../_site/cgi-bin/search_index.dat'; my %index; print "Building search index from $built_site_dir...\n"; -# glob finds every index.html in subdirectories of /log/ foreach my $path (glob("$built_site_dir/*/index.html")) { next unless open(my $fh, '<:utf8', $path); my $html = do { local $/; <$fh> }; @@ -22,23 +22,19 @@ foreach my $path (glob("$built_site_dir/*/index.html")) { my ($main) = $html =~ m|
(.*?)
|is; $main //= ''; - # Remove code and pre blocks to keep index prose-only + # Strip HTML and clean prose $main =~ s|]*>.*?| |gs; $main =~ s|]*>.*?| |gs; - - # Strip all remaining HTML tags $main =~ s|<[^>]+>| |g; - - # Decode entities (e.g., & -> &) for accurate searching $main = decode_entities($main); - - # Normalize whitespace (squash multiple spaces/newlines) $main =~ s|\s+| |g; $main =~ s/^\s+|\s+$//g; - # Map file path to the final web URL - # Example: ../_site/log/arduino/index.html -> /log/arduino/index.html - (my $url = $path) =~ s|^\.\./_site/|/|; + # Normalize path + my $url = $path; + $url =~ s|^\.\./_site/||; # Remove local build directory + $url =~ s|^\.\./||; # Remove any leading dots + $url =~ s|^/+||; # Remove leading slashes $index{$url} = { t => $title || "Untitled", @@ -46,10 +42,5 @@ foreach my $path (glob("$built_site_dir/*/index.html")) { }; } -# Save using network-order binary (nstore) for portability nstore(\%index, $output_file); - -my $count = scalar(keys %index); -my $size = -s $output_file; -printf("Index complete: %d files (%.2f KB)\n", $count, $size / 1024); - +printf("Index complete: %d files (%.2f KB)\n", scalar(keys %index), (-s $output_file) / 1024); -- cgit v1.2.3