summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSadeep Madurange <sadeep@asciimx.com>2026-03-08 17:32:07 +0800
committerSadeep Madurange <sadeep@asciimx.com>2026-03-08 17:32:07 +0800
commit0ed1af827b5c8628053307bfa791d8c4e97ac315 (patch)
tree8d49671d2ba7292f5e5f38d31bed56557f653d90
parentff79aff37e49f154bdb216c05637e6a4ec767388 (diff)
downloadwww-0ed1af827b5c8628053307bfa791d8c4e97ac315.tar.gz
Replace seek/read with mmap.
-rw-r--r--cgi-bin/find.cgi31
1 files changed, 12 insertions, 19 deletions
diff --git a/cgi-bin/find.cgi b/cgi-bin/find.cgi
index 4091c7a..9a4bd34 100644
--- a/cgi-bin/find.cgi
+++ b/cgi-bin/find.cgi
@@ -2,6 +2,7 @@
use strict;
use warnings;
+use Sys::Mmap;
use Storable qw(retrieve);
use Encode qw(decode_utf8 encode_utf8);
use URI::Escape qw(uri_unescape);
@@ -75,6 +76,11 @@ if (-f $sa_file && -f $cp_file) {
binmode($fh_sa);
binmode($fh_cp);
+ # Memory map files
+ my ($sa_mapped, $cp_mapped);
+ mmap($sa_mapped, 0, PROT_READ, MAP_SHARED, $fh_sa) or die "Could not map SA: $!";
+ mmap($cp_mapped, 0, PROT_READ, MAP_SHARED, $fh_cp) or die "Could not map Corpus: $!";
+
my $file_map = retrieve($map_file);
my $total_suffixes = (-s $sa_file) / 4;
@@ -84,12 +90,8 @@ if (-f $sa_file && -f $cp_file) {
while ($low <= $high) {
my $mid = int(($low + $high) / 2);
- seek($fh_sa, $mid * 4, 0);
- read($fh_sa, my $bin_off, 4);
- my $off = unpack("L", $bin_off);
- seek($fh_cp, $off, 0);
- read($fh_cp, my $text, $query_len);
-
+ my $off = unpack("L", substr($sa_mapped, $mid * 4, 4));
+ my $text = substr($cp_mapped, $off, $query_len);
my $cmp = $text cmp $query;
if ($cmp >= 0) {
$first_hit = $mid if $cmp == 0;
@@ -107,12 +109,8 @@ if (-f $sa_file && -f $cp_file) {
# Find right boundary
while ($low <= $high) {
my $mid = int(($low + $high) / 2);
- seek($fh_sa, $mid * 4, 0);
- read($fh_sa, my $bin_off, 4);
- my $off = unpack("L", $bin_off);
- seek($fh_cp, $off, 0);
- read($fh_cp, my $text, $query_len);
-
+ my $off = unpack("L", substr($sa_mapped, $mid * 4, 4));
+ my $text = substr($cp_mapped, $off, $query_len);
if (($text cmp $query) <= 0) {
$last_hit = $mid if $text eq $query;
$low = $mid + 1;
@@ -123,10 +121,7 @@ if (-f $sa_file && -f $cp_file) {
my %seen;
for my $i ($first_hit .. $last_hit) {
- seek($fh_sa, $i * 4, 0);
- read($fh_sa, my $bin_off, 4);
- my $offset = unpack("L", $bin_off);
-
+ my $offset = unpack("L", substr($sa_mapped, $i * 4, 4));
foreach my $m (@$file_map) {
if ($offset >= $m->{start} && $offset < $m->{end}) {
if (!$seen{$m->{path}}++) {
@@ -134,9 +129,7 @@ if (-f $sa_file && -f $cp_file) {
my $snip_start = ($offset - 30 < $m->{start}) ? $m->{start} : $offset - 30;
my $max_len = $m->{end} - $snip_start;
my $read_len = ($max_len > 120) ? 120 : $max_len;
- seek($fh_cp, $snip_start, 0);
- read($fh_cp, my $raw_snip, $read_len);
-
+ my $raw_snip = substr($cp_mapped, $snip_start, $read_len);
my $snippet = decode_utf8($raw_snip, Encode::FB_QUIET) // $raw_snip;
$snippet =~ s/\s+/ /g; # Normalize whitespace