diff options
| author | Sadeep Madurange <sadeep@asciimx.com> | 2026-01-03 13:06:08 +0800 |
|---|---|---|
| committer | Sadeep Madurange <sadeep@asciimx.com> | 2026-01-03 14:42:43 +0800 |
| commit | 15205d0cf770058b59be07e00f6dbc6523b9cede (patch) | |
| tree | 43ba2ab53add863286e0ef24f1e9aa0e94bb5334 /_site/cgi-bin | |
| parent | 6da102d6e0494a3eac3f05fa3b2cdcc25ba2754e (diff) | |
| download | www-15205d0cf770058b59be07e00f6dbc6523b9cede.tar.gz | |
CGI search post.
Diffstat (limited to '_site/cgi-bin')
| -rw-r--r-- | _site/cgi-bin/find.cgi | 34 |
1 files changed, 20 insertions, 14 deletions
diff --git a/_site/cgi-bin/find.cgi b/_site/cgi-bin/find.cgi index 5f95e3a..ab066dd 100644 --- a/_site/cgi-bin/find.cgi +++ b/_site/cgi-bin/find.cgi @@ -7,7 +7,7 @@ use Encode qw(decode_utf8 encode_utf8); use URI::Escape qw(uri_unescape); use HTML::Escape qw(escape_html); -# --- Configuration --- +# Configuration my $max_parallel = 50; # Max parallel search requests my $lock_timeout = 30; # Seconds before dropping stale locks my $max_results = 20; # Max search results to display @@ -16,7 +16,7 @@ my $cp_file = 'corpus.bin'; # Raw text corpus my $map_file = 'file_map.dat'; # File metadata my $lock_dir = '/tmp/search_locks'; # Semaphore directory -# --- Concurrency Control --- +# Concurrency control mkdir $lock_dir, 0777 unless -d $lock_dir; my $active_count = 0; my $now = time(); @@ -45,7 +45,7 @@ if ($active_count >= $max_parallel) { my $lock_file = "$lock_dir/$$.lock"; open(my $fh_lock, '>', $lock_file); -# --- Query Decoding --- +# Query decoding if (($ENV{QUERY_STRING} || '') =~ /^q=([^&]*)/) { my $raw_q = $1; $raw_q =~ tr/+/ /; @@ -64,7 +64,7 @@ if ($search_text eq '') { final_output("<p>Please enter a search term above.</p>"); } -# --- Binary Search Logic --- +# Binary search my @results; my $query = encode_utf8(lc($search_text)); my $query_len = length($query); @@ -130,38 +130,44 @@ if (-f $sa_file && -f $cp_file) { foreach my $m (@$file_map) { if ($offset >= $m->{start} && $offset < $m->{end}) { if (!$seen{$m->{path}}++) { - # 1. Capture slightly more than 50 chars for trimming + # Capture more than 50 chars for trimming my $snip_start = ($offset - 30 < $m->{start}) ? $m->{start} : $offset - 30; + my $max_len = $m->{end} - $snip_start; + my $read_len = ($max_len > 120) ? 120 : $max_len; seek($fh_cp, $snip_start, 0); - read($fh_cp, my $raw_snip, 120); + read($fh_cp, my $raw_snip, $read_len); my $snippet = decode_utf8($raw_snip, Encode::FB_QUIET) // $raw_snip; $snippet =~ s/\s+/ /g; # Normalize whitespace - # 2. Trim Start: Partial word removal + # Trim start: Partial word removal if ($snip_start > $m->{start}) { $snippet =~ s/^[^\s]*\s//; } - # 3. Trim End: Length limit and partial word removal + # Trim end: Length limit and partial word removal my $has_more = 0; if (length($snippet) > 50) { $snippet = substr($snippet, 0, 50); $has_more = 1 if $snippet =~ s/\s+[^\s]*$//; } + elsif ($snip_start + $read_len < $m->{end}) { + # This check handles snippets that are naturally short but + # there's still more text in the article we didn't read + $has_more = 1; + } - # 4. Cleanup & Capitalize + # Cleanup & capitalize $snippet = ucfirst($snippet); + $snippet = escape_html($snippet) . ($has_more ? "..." : ""); + my $clean_path = $m->{path}; $clean_path =~ s|^\.\./_site/||; - # 5. Build Final Snippet - my $display_snippet = escape_html($snippet) . ($has_more ? "..." : ""); - push @results, { path => $clean_path, - title => (split('/', $m->{path}))[-2], - snippet => $display_snippet + title => $m->{title},, + snippet => $snippet }; } last; |
