diff options
| author | Sadeep Madurange <sadeep@asciimx.com> | 2026-01-01 18:33:54 +0800 |
|---|---|---|
| committer | Sadeep Madurange <sadeep@asciimx.com> | 2026-01-01 18:33:54 +0800 |
| commit | 7375a6b8c6ac05f79755e27afeb3062d027c37f2 (patch) | |
| tree | 93d204976ecf0d9a7bf02064ee2116af272624b9 /cgi-bin/indexer.pl | |
| parent | 5d84833e25ed23e40ac5703527469101448b9b66 (diff) | |
| download | www-7375a6b8c6ac05f79755e27afeb3062d027c37f2.tar.gz | |
Optimize search and add guards.
Diffstat (limited to 'cgi-bin/indexer.pl')
| -rw-r--r-- | cgi-bin/indexer.pl | 29 |
1 files changed, 10 insertions, 19 deletions
diff --git a/cgi-bin/indexer.pl b/cgi-bin/indexer.pl index d0314a1..38a918e 100644 --- a/cgi-bin/indexer.pl +++ b/cgi-bin/indexer.pl @@ -5,13 +5,13 @@ use warnings; use Storable qw(nstore); use HTML::Entities qw(decode_entities); -my $built_site_dir = '../_site/log'; -my $output_file = 'search_index.dat'; +# --- Configuration --- +my $built_site_dir = '../_site/log'; +my $output_file = '../_site/cgi-bin/search_index.dat'; my %index; print "Building search index from $built_site_dir...\n"; -# glob finds every index.html in subdirectories of /log/ foreach my $path (glob("$built_site_dir/*/index.html")) { next unless open(my $fh, '<:utf8', $path); my $html = do { local $/; <$fh> }; @@ -22,23 +22,19 @@ foreach my $path (glob("$built_site_dir/*/index.html")) { my ($main) = $html =~ m|<main>(.*?)</main>|is; $main //= ''; - # Remove code and pre blocks to keep index prose-only + # Strip HTML and clean prose $main =~ s|<pre[^>]*>.*?</pre>| |gs; $main =~ s|<code[^>]*>.*?</code>| |gs; - - # Strip all remaining HTML tags $main =~ s|<[^>]+>| |g; - - # Decode entities (e.g., & -> &) for accurate searching $main = decode_entities($main); - - # Normalize whitespace (squash multiple spaces/newlines) $main =~ s|\s+| |g; $main =~ s/^\s+|\s+$//g; - # Map file path to the final web URL - # Example: ../_site/log/arduino/index.html -> /log/arduino/index.html - (my $url = $path) =~ s|^\.\./_site/|/|; + # Normalize path + my $url = $path; + $url =~ s|^\.\./_site/||; # Remove local build directory + $url =~ s|^\.\./||; # Remove any leading dots + $url =~ s|^/+||; # Remove leading slashes $index{$url} = { t => $title || "Untitled", @@ -46,10 +42,5 @@ foreach my $path (glob("$built_site_dir/*/index.html")) { }; } -# Save using network-order binary (nstore) for portability nstore(\%index, $output_file); - -my $count = scalar(keys %index); -my $size = -s $output_file; -printf("Index complete: %d files (%.2f KB)\n", $count, $size / 1024); - +printf("Index complete: %d files (%.2f KB)\n", scalar(keys %index), (-s $output_file) / 1024); |
