From 0ed1af827b5c8628053307bfa791d8c4e97ac315 Mon Sep 17 00:00:00 2001 From: Sadeep Madurange Date: Sun, 8 Mar 2026 17:32:07 +0800 Subject: Replace seek/read with mmap. --- cgi-bin/find.cgi | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'cgi-bin/find.cgi') diff --git a/cgi-bin/find.cgi b/cgi-bin/find.cgi index 4091c7a..9a4bd34 100644 --- a/cgi-bin/find.cgi +++ b/cgi-bin/find.cgi @@ -2,6 +2,7 @@ use strict; use warnings; +use Sys::Mmap; use Storable qw(retrieve); use Encode qw(decode_utf8 encode_utf8); use URI::Escape qw(uri_unescape); @@ -75,6 +76,11 @@ if (-f $sa_file && -f $cp_file) { binmode($fh_sa); binmode($fh_cp); + # Memory map files + my ($sa_mapped, $cp_mapped); + mmap($sa_mapped, 0, PROT_READ, MAP_SHARED, $fh_sa) or die "Could not map SA: $!"; + mmap($cp_mapped, 0, PROT_READ, MAP_SHARED, $fh_cp) or die "Could not map Corpus: $!"; + my $file_map = retrieve($map_file); my $total_suffixes = (-s $sa_file) / 4; @@ -84,12 +90,8 @@ if (-f $sa_file && -f $cp_file) { while ($low <= $high) { my $mid = int(($low + $high) / 2); - seek($fh_sa, $mid * 4, 0); - read($fh_sa, my $bin_off, 4); - my $off = unpack("L", $bin_off); - seek($fh_cp, $off, 0); - read($fh_cp, my $text, $query_len); - + my $off = unpack("L", substr($sa_mapped, $mid * 4, 4)); + my $text = substr($cp_mapped, $off, $query_len); my $cmp = $text cmp $query; if ($cmp >= 0) { $first_hit = $mid if $cmp == 0; @@ -107,12 +109,8 @@ if (-f $sa_file && -f $cp_file) { # Find right boundary while ($low <= $high) { my $mid = int(($low + $high) / 2); - seek($fh_sa, $mid * 4, 0); - read($fh_sa, my $bin_off, 4); - my $off = unpack("L", $bin_off); - seek($fh_cp, $off, 0); - read($fh_cp, my $text, $query_len); - + my $off = unpack("L", substr($sa_mapped, $mid * 4, 4)); + my $text = substr($cp_mapped, $off, $query_len); if (($text cmp $query) <= 0) { $last_hit = $mid if $text eq $query; $low = $mid + 1; @@ -123,10 +121,7 @@ if (-f $sa_file && -f $cp_file) { my %seen; for my $i ($first_hit .. $last_hit) { - seek($fh_sa, $i * 4, 0); - read($fh_sa, my $bin_off, 4); - my $offset = unpack("L", $bin_off); - + my $offset = unpack("L", substr($sa_mapped, $i * 4, 4)); foreach my $m (@$file_map) { if ($offset >= $m->{start} && $offset < $m->{end}) { if (!$seen{$m->{path}}++) { @@ -134,9 +129,7 @@ if (-f $sa_file && -f $cp_file) { my $snip_start = ($offset - 30 < $m->{start}) ? $m->{start} : $offset - 30; my $max_len = $m->{end} - $snip_start; my $read_len = ($max_len > 120) ? 120 : $max_len; - seek($fh_cp, $snip_start, 0); - read($fh_cp, my $raw_snip, $read_len); - + my $raw_snip = substr($cp_mapped, $snip_start, $read_len); my $snippet = decode_utf8($raw_snip, Encode::FB_QUIET) // $raw_snip; $snippet =~ s/\s+/ /g; # Normalize whitespace -- cgit v1.2.3