diff options
Diffstat (limited to '_site/cgi-bin/sa_indexer.pl')
| -rw-r--r-- | _site/cgi-bin/sa_indexer.pl | 86 |
1 files changed, 0 insertions, 86 deletions
diff --git a/_site/cgi-bin/sa_indexer.pl b/_site/cgi-bin/sa_indexer.pl deleted file mode 100644 index 2395dac..0000000 --- a/_site/cgi-bin/sa_indexer.pl +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; -use File::Find; -use Storable qw(store); -use Time::HiRes qw(gettimeofday tv_interval); - -# Configuration -my $directory = '../log'; -my $corpus_file = 'corpus.bin'; -my $sa_file = 'sa.bin'; -my $map_file = 'file_map.dat'; - -# Start timing -my $t0 = [gettimeofday]; - -my $corpus = ""; -my @file_map; - -print "1. Building Case-Insensitive Corpus...\n"; -find({ - wanted => sub { - return unless -f $_ && $_ eq 'index.html'; - if (open my $fh, '<:encoding(UTF-8)', $_) { - my $content = do { local $/; <$fh> }; - close $fh; - - my ($text) = $content =~ m|<main>(.*?)</main>|is; - $text //= $content; - $text =~ s|<[^>]+>| |g; - $text =~ s|\s+| |g; - - my $start = length($corpus); - $corpus .= lc($text) . "\0"; - push @file_map, { start => $start, end => length($corpus), path => $File::Find::name }; - } - }, - no_chdir => 0, -}, $directory); - -print "2. Sorting Suffixes (Two-Pass Cache-Optimized)...\n"; -my @sa = 0 .. (length($corpus) - 1); - -@sa = sort { - (substr($corpus, $a, 64) cmp substr($corpus, $b, 64)) - || - (substr($corpus, $a) cmp substr($corpus, $b)) -} @sa; - -print "3. Writing Index Files to Disk...\n"; -open my $cfh, '>', $corpus_file or die $!; -print $cfh $corpus; -close $cfh; - -open my $sfh, '>', $sa_file or die $!; -binmode($sfh); -print $sfh pack("L*", @sa); -close $sfh; - -store \@file_map, $map_file; - -# End timing -my $elapsed = tv_interval($t0); - -# Calculate Sizes -my $c_size = -s $corpus_file; -my $s_size = -s $sa_file; -my $m_size = -s $map_file; -my $total = $c_size + $s_size + $m_size; - -# --- Final Report --- -print "\n" . "="x35 . "\n"; -print " INDEX BUILDING COMPLETE\n"; -print "="x35 . "\n"; -printf "Total Time: %.4f seconds\n", $elapsed; -print "Files Processed: " . scalar(@file_map) . "\n"; -print "-"x35 . "\n"; -print "File Sizes (KB):\n"; -printf " %-14s %10.2f KB\n", $corpus_file, $c_size / 1024; -printf " %-14s %10.2f KB\n", $sa_file, $s_size / 1024; -printf " %-14s %10.2f KB\n", $map_file, $m_size / 1024; -print "-"x35 . "\n"; -printf " TOTAL INDEX: %10.2f KB\n", $total / 1024; -print "="x35 . "\n"; - |
