diff options
Diffstat (limited to 'cgi-bin/find.cgi')
| -rw-r--r-- | cgi-bin/find.cgi | 31 |
1 files changed, 19 insertions, 12 deletions
diff --git a/cgi-bin/find.cgi b/cgi-bin/find.cgi index 9a4bd34..4091c7a 100644 --- a/cgi-bin/find.cgi +++ b/cgi-bin/find.cgi @@ -2,7 +2,6 @@ use strict; use warnings; -use Sys::Mmap; use Storable qw(retrieve); use Encode qw(decode_utf8 encode_utf8); use URI::Escape qw(uri_unescape); @@ -76,11 +75,6 @@ if (-f $sa_file && -f $cp_file) { binmode($fh_sa); binmode($fh_cp); - # Memory map files - my ($sa_mapped, $cp_mapped); - mmap($sa_mapped, 0, PROT_READ, MAP_SHARED, $fh_sa) or die "Could not map SA: $!"; - mmap($cp_mapped, 0, PROT_READ, MAP_SHARED, $fh_cp) or die "Could not map Corpus: $!"; - my $file_map = retrieve($map_file); my $total_suffixes = (-s $sa_file) / 4; @@ -90,8 +84,12 @@ if (-f $sa_file && -f $cp_file) { while ($low <= $high) { my $mid = int(($low + $high) / 2); - my $off = unpack("L", substr($sa_mapped, $mid * 4, 4)); - my $text = substr($cp_mapped, $off, $query_len); + seek($fh_sa, $mid * 4, 0); + read($fh_sa, my $bin_off, 4); + my $off = unpack("L", $bin_off); + seek($fh_cp, $off, 0); + read($fh_cp, my $text, $query_len); + my $cmp = $text cmp $query; if ($cmp >= 0) { $first_hit = $mid if $cmp == 0; @@ -109,8 +107,12 @@ if (-f $sa_file && -f $cp_file) { # Find right boundary while ($low <= $high) { my $mid = int(($low + $high) / 2); - my $off = unpack("L", substr($sa_mapped, $mid * 4, 4)); - my $text = substr($cp_mapped, $off, $query_len); + seek($fh_sa, $mid * 4, 0); + read($fh_sa, my $bin_off, 4); + my $off = unpack("L", $bin_off); + seek($fh_cp, $off, 0); + read($fh_cp, my $text, $query_len); + if (($text cmp $query) <= 0) { $last_hit = $mid if $text eq $query; $low = $mid + 1; @@ -121,7 +123,10 @@ if (-f $sa_file && -f $cp_file) { my %seen; for my $i ($first_hit .. $last_hit) { - my $offset = unpack("L", substr($sa_mapped, $i * 4, 4)); + seek($fh_sa, $i * 4, 0); + read($fh_sa, my $bin_off, 4); + my $offset = unpack("L", $bin_off); + foreach my $m (@$file_map) { if ($offset >= $m->{start} && $offset < $m->{end}) { if (!$seen{$m->{path}}++) { @@ -129,7 +134,9 @@ if (-f $sa_file && -f $cp_file) { my $snip_start = ($offset - 30 < $m->{start}) ? $m->{start} : $offset - 30; my $max_len = $m->{end} - $snip_start; my $read_len = ($max_len > 120) ? 120 : $max_len; - my $raw_snip = substr($cp_mapped, $snip_start, $read_len); + seek($fh_cp, $snip_start, 0); + read($fh_cp, my $raw_snip, $read_len); + my $snippet = decode_utf8($raw_snip, Encode::FB_QUIET) // $raw_snip; $snippet =~ s/\s+/ /g; # Normalize whitespace |
