summaryrefslogtreecommitdiffstats
path: root/_site/cgi-bin/sa_indexer.pl
diff options
context:
space:
mode:
Diffstat (limited to '_site/cgi-bin/sa_indexer.pl')
-rw-r--r--_site/cgi-bin/sa_indexer.pl86
1 files changed, 0 insertions, 86 deletions
diff --git a/_site/cgi-bin/sa_indexer.pl b/_site/cgi-bin/sa_indexer.pl
deleted file mode 100644
index 2395dac..0000000
--- a/_site/cgi-bin/sa_indexer.pl
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use warnings;
-use File::Find;
-use Storable qw(store);
-use Time::HiRes qw(gettimeofday tv_interval);
-
-# Configuration
-my $directory = '../log';
-my $corpus_file = 'corpus.bin';
-my $sa_file = 'sa.bin';
-my $map_file = 'file_map.dat';
-
-# Start timing
-my $t0 = [gettimeofday];
-
-my $corpus = "";
-my @file_map;
-
-print "1. Building Case-Insensitive Corpus...\n";
-find({
- wanted => sub {
- return unless -f $_ && $_ eq 'index.html';
- if (open my $fh, '<:encoding(UTF-8)', $_) {
- my $content = do { local $/; <$fh> };
- close $fh;
-
- my ($text) = $content =~ m|<main>(.*?)</main>|is;
- $text //= $content;
- $text =~ s|<[^>]+>| |g;
- $text =~ s|\s+| |g;
-
- my $start = length($corpus);
- $corpus .= lc($text) . "\0";
- push @file_map, { start => $start, end => length($corpus), path => $File::Find::name };
- }
- },
- no_chdir => 0,
-}, $directory);
-
-print "2. Sorting Suffixes (Two-Pass Cache-Optimized)...\n";
-my @sa = 0 .. (length($corpus) - 1);
-
-@sa = sort {
- (substr($corpus, $a, 64) cmp substr($corpus, $b, 64))
- ||
- (substr($corpus, $a) cmp substr($corpus, $b))
-} @sa;
-
-print "3. Writing Index Files to Disk...\n";
-open my $cfh, '>', $corpus_file or die $!;
-print $cfh $corpus;
-close $cfh;
-
-open my $sfh, '>', $sa_file or die $!;
-binmode($sfh);
-print $sfh pack("L*", @sa);
-close $sfh;
-
-store \@file_map, $map_file;
-
-# End timing
-my $elapsed = tv_interval($t0);
-
-# Calculate Sizes
-my $c_size = -s $corpus_file;
-my $s_size = -s $sa_file;
-my $m_size = -s $map_file;
-my $total = $c_size + $s_size + $m_size;
-
-# --- Final Report ---
-print "\n" . "="x35 . "\n";
-print " INDEX BUILDING COMPLETE\n";
-print "="x35 . "\n";
-printf "Total Time: %.4f seconds\n", $elapsed;
-print "Files Processed: " . scalar(@file_map) . "\n";
-print "-"x35 . "\n";
-print "File Sizes (KB):\n";
-printf " %-14s %10.2f KB\n", $corpus_file, $c_size / 1024;
-printf " %-14s %10.2f KB\n", $sa_file, $s_size / 1024;
-printf " %-14s %10.2f KB\n", $map_file, $m_size / 1024;
-print "-"x35 . "\n";
-printf " TOTAL INDEX: %10.2f KB\n", $total / 1024;
-print "="x35 . "\n";
-