diff options
| author | Sadeep Madurange <sadeep@asciimx.com> | 2026-03-08 17:32:07 +0800 |
|---|---|---|
| committer | Sadeep Madurange <sadeep@asciimx.com> | 2026-03-08 17:32:07 +0800 |
| commit | 0ed1af827b5c8628053307bfa791d8c4e97ac315 (patch) | |
| tree | 8d49671d2ba7292f5e5f38d31bed56557f653d90 | |
| parent | ff79aff37e49f154bdb216c05637e6a4ec767388 (diff) | |
| download | www-0ed1af827b5c8628053307bfa791d8c4e97ac315.tar.gz | |
Replace seek/read with mmap.
| -rw-r--r-- | cgi-bin/find.cgi | 31 |
1 files changed, 12 insertions, 19 deletions
diff --git a/cgi-bin/find.cgi b/cgi-bin/find.cgi index 4091c7a..9a4bd34 100644 --- a/cgi-bin/find.cgi +++ b/cgi-bin/find.cgi @@ -2,6 +2,7 @@ use strict; use warnings; +use Sys::Mmap; use Storable qw(retrieve); use Encode qw(decode_utf8 encode_utf8); use URI::Escape qw(uri_unescape); @@ -75,6 +76,11 @@ if (-f $sa_file && -f $cp_file) { binmode($fh_sa); binmode($fh_cp); + # Memory map files + my ($sa_mapped, $cp_mapped); + mmap($sa_mapped, 0, PROT_READ, MAP_SHARED, $fh_sa) or die "Could not map SA: $!"; + mmap($cp_mapped, 0, PROT_READ, MAP_SHARED, $fh_cp) or die "Could not map Corpus: $!"; + my $file_map = retrieve($map_file); my $total_suffixes = (-s $sa_file) / 4; @@ -84,12 +90,8 @@ if (-f $sa_file && -f $cp_file) { while ($low <= $high) { my $mid = int(($low + $high) / 2); - seek($fh_sa, $mid * 4, 0); - read($fh_sa, my $bin_off, 4); - my $off = unpack("L", $bin_off); - seek($fh_cp, $off, 0); - read($fh_cp, my $text, $query_len); - + my $off = unpack("L", substr($sa_mapped, $mid * 4, 4)); + my $text = substr($cp_mapped, $off, $query_len); my $cmp = $text cmp $query; if ($cmp >= 0) { $first_hit = $mid if $cmp == 0; @@ -107,12 +109,8 @@ if (-f $sa_file && -f $cp_file) { # Find right boundary while ($low <= $high) { my $mid = int(($low + $high) / 2); - seek($fh_sa, $mid * 4, 0); - read($fh_sa, my $bin_off, 4); - my $off = unpack("L", $bin_off); - seek($fh_cp, $off, 0); - read($fh_cp, my $text, $query_len); - + my $off = unpack("L", substr($sa_mapped, $mid * 4, 4)); + my $text = substr($cp_mapped, $off, $query_len); if (($text cmp $query) <= 0) { $last_hit = $mid if $text eq $query; $low = $mid + 1; @@ -123,10 +121,7 @@ if (-f $sa_file && -f $cp_file) { my %seen; for my $i ($first_hit .. $last_hit) { - seek($fh_sa, $i * 4, 0); - read($fh_sa, my $bin_off, 4); - my $offset = unpack("L", $bin_off); - + my $offset = unpack("L", substr($sa_mapped, $i * 4, 4)); foreach my $m (@$file_map) { if ($offset >= $m->{start} && $offset < $m->{end}) { if (!$seen{$m->{path}}++) { @@ -134,9 +129,7 @@ if (-f $sa_file && -f $cp_file) { my $snip_start = ($offset - 30 < $m->{start}) ? $m->{start} : $offset - 30; my $max_len = $m->{end} - $snip_start; my $read_len = ($max_len > 120) ? 120 : $max_len; - seek($fh_cp, $snip_start, 0); - read($fh_cp, my $raw_snip, $read_len); - + my $raw_snip = substr($cp_mapped, $snip_start, $read_len); my $snippet = decode_utf8($raw_snip, Encode::FB_QUIET) // $raw_snip; $snippet =~ s/\s+/ /g; # Normalize whitespace |
