summaryrefslogtreecommitdiffstats
path: root/cgi-bin/indexer.pl
diff options
context:
space:
mode:
Diffstat (limited to 'cgi-bin/indexer.pl')
-rw-r--r--cgi-bin/indexer.pl29
1 files changed, 10 insertions, 19 deletions
diff --git a/cgi-bin/indexer.pl b/cgi-bin/indexer.pl
index d0314a1..38a918e 100644
--- a/cgi-bin/indexer.pl
+++ b/cgi-bin/indexer.pl
@@ -5,13 +5,13 @@ use warnings;
use Storable qw(nstore);
use HTML::Entities qw(decode_entities);
-my $built_site_dir = '../_site/log';
-my $output_file = 'search_index.dat';
+# --- Configuration ---
+my $built_site_dir = '../_site/log';
+my $output_file = '../_site/cgi-bin/search_index.dat';
my %index;
print "Building search index from $built_site_dir...\n";
-# glob finds every index.html in subdirectories of /log/
foreach my $path (glob("$built_site_dir/*/index.html")) {
next unless open(my $fh, '<:utf8', $path);
my $html = do { local $/; <$fh> };
@@ -22,23 +22,19 @@ foreach my $path (glob("$built_site_dir/*/index.html")) {
my ($main) = $html =~ m|<main>(.*?)</main>|is;
$main //= '';
- # Remove code and pre blocks to keep index prose-only
+ # Strip HTML and clean prose
$main =~ s|<pre[^>]*>.*?</pre>| |gs;
$main =~ s|<code[^>]*>.*?</code>| |gs;
-
- # Strip all remaining HTML tags
$main =~ s|<[^>]+>| |g;
-
- # Decode entities (e.g., &amp; -> &) for accurate searching
$main = decode_entities($main);
-
- # Normalize whitespace (squash multiple spaces/newlines)
$main =~ s|\s+| |g;
$main =~ s/^\s+|\s+$//g;
- # Map file path to the final web URL
- # Example: ../_site/log/arduino/index.html -> /log/arduino/index.html
- (my $url = $path) =~ s|^\.\./_site/|/|;
+ # Normalize path
+ my $url = $path;
+ $url =~ s|^\.\./_site/||; # Remove local build directory
+ $url =~ s|^\.\./||; # Remove any leading dots
+ $url =~ s|^/+||; # Remove leading slashes
$index{$url} = {
t => $title || "Untitled",
@@ -46,10 +42,5 @@ foreach my $path (glob("$built_site_dir/*/index.html")) {
};
}
-# Save using network-order binary (nstore) for portability
nstore(\%index, $output_file);
-
-my $count = scalar(keys %index);
-my $size = -s $output_file;
-printf("Index complete: %d files (%.2f KB)\n", $count, $size / 1024);
-
+printf("Index complete: %d files (%.2f KB)\n", scalar(keys %index), (-s $output_file) / 1024);