CGI search post.

author: Sadeep Madurange <sadeep@asciimx.com> 2026-01-03 13:06:08 +0800
committer: Sadeep Madurange <sadeep@asciimx.com> 2026-01-03 14:42:43 +0800
commit: 15205d0cf770058b59be07e00f6dbc6523b9cede (patch)
tree: 43ba2ab53add863286e0ef24f1e9aa0e94bb5334 /_site/cgi-bin
parent: 6da102d6e0494a3eac3f05fa3b2cdcc25ba2754e (diff)
download: www-15205d0cf770058b59be07e00f6dbc6523b9cede.tar.gz
1 files changed, 20 insertions, 14 deletions
diff --git a/_site/cgi-bin/find.cgi b/_site/cgi-bin/find.cgi
index 5f95e3a..ab066dd 100644
--- a/_site/cgi-bin/find.cgi
+++ b/_site/cgi-bin/find.cgi
@@ -7,7 +7,7 @@ use Encode qw(decode_utf8 encode_utf8);
 use URI::Escape qw(uri_unescape);
 use HTML::Escape qw(escape_html);
 
-# --- Configuration ---
+# Configuration
 my $max_parallel   = 50;                    # Max parallel search requests
 my $lock_timeout   = 30;                    # Seconds before dropping stale locks
 my $max_results    = 20;                    # Max search results to display
@@ -16,7 +16,7 @@ my $cp_file        = 'corpus.bin';          # Raw text corpus
 my $map_file       = 'file_map.dat';        # File metadata
 my $lock_dir       = '/tmp/search_locks';   # Semaphore directory
 
-# --- Concurrency Control ---
+# Concurrency control
 mkdir $lock_dir, 0777 unless -d $lock_dir;
 my $active_count = 0;
 my $now = time();
@@ -45,7 +45,7 @@ if ($active_count >= $max_parallel) {
 my $lock_file = "$lock_dir/$$.lock";
 open(my $fh_lock, '>', $lock_file);
 
-# --- Query Decoding ---
+# Query decoding
 if (($ENV{QUERY_STRING} || '') =~ /^q=([^&]*)/) {
     my $raw_q = $1;
     $raw_q =~ tr/+/ /;
@@ -64,7 +64,7 @@ if ($search_text eq '') {
     final_output("<p>Please enter a search term above.</p>");
 }
 
-# --- Binary Search Logic ---
+# Binary search 
 my @results;
 my $query = encode_utf8(lc($search_text));
 my $query_len = length($query);
@@ -130,38 +130,44 @@ if (-f $sa_file && -f $cp_file) {
             foreach my $m (@$file_map) {
                 if ($offset >= $m->{start} && $offset < $m->{end}) {
                     if (!$seen{$m->{path}}++) {
-                        # 1. Capture slightly more than 50 chars for trimming
+                        # Capture more than 50 chars for trimming
                         my $snip_start = ($offset - 30 < $m->{start}) ? $m->{start} : $offset - 30;
+                        my $max_len = $m->{end} - $snip_start;
+                        my $read_len = ($max_len > 120) ? 120 : $max_len;
                         seek($fh_cp, $snip_start, 0);
-                        read($fh_cp, my $raw_snip, 120);
+                        read($fh_cp, my $raw_snip, $read_len);
                         
                         my $snippet = decode_utf8($raw_snip, Encode::FB_QUIET) // $raw_snip;
                         $snippet =~ s/\s+/ /g; # Normalize whitespace
 
-                        # 2. Trim Start: Partial word removal
+                        # Trim start: Partial word removal
                         if ($snip_start > $m->{start}) {
                             $snippet =~ s/^[^\s]*\s//; 
                         }
 
-                        # 3. Trim End: Length limit and partial word removal
+                        # Trim end: Length limit and partial word removal
                         my $has_more = 0;
                         if (length($snippet) > 50) {
                             $snippet = substr($snippet, 0, 50);
                             $has_more = 1 if $snippet =~ s/\s+[^\s]*$//;
                         }
+                        elsif ($snip_start + $read_len < $m->{end}) {
+                            # This check handles snippets that are naturally short but 
+                            # there's still more text in the article we didn't read
+                            $has_more = 1; 
+                        }
 
-                        # 4. Cleanup & Capitalize
+                        # Cleanup & capitalize
                         $snippet = ucfirst($snippet);
+                        $snippet = escape_html($snippet) . ($has_more ? "..." : "");
+
                         my $clean_path = $m->{path};
                         $clean_path =~ s|^\.\./_site/||;
 
-                        # 5. Build Final Snippet
-                        my $display_snippet = escape_html($snippet) . ($has_more ? "..." : "");
-
                         push @results, {
                             path    => $clean_path,
-                            title   => (split('/', $m->{path}))[-2],
-                            snippet => $display_snippet
+                            title   => $m->{title},,
+                            snippet => $snippet
                         };
                     }
                     last;
author	Sadeep Madurange <sadeep@asciimx.com>	2026-01-03 13:06:08 +0800
committer	Sadeep Madurange <sadeep@asciimx.com>	2026-01-03 14:42:43 +0800
commit	15205d0cf770058b59be07e00f6dbc6523b9cede (patch)
tree	43ba2ab53add863286e0ef24f1e9aa0e94bb5334 /_site/cgi-bin
parent	6da102d6e0494a3eac3f05fa3b2cdcc25ba2754e (diff)
download	www-15205d0cf770058b59be07e00f6dbc6523b9cede.tar.gz