summaryrefslogtreecommitdiffstats
path: root/cgi-bin/rgx-search.cgi
blob: 67815c852e56679c326273807b775a9befe37975 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/usr/bin/perl

use strict;
use warnings;
use File::Spec;
use Encode qw(decode_utf8);
use URI::Escape qw(uri_unescape);
use HTML::Escape qw(escape_html);

binmode(STDOUT, ":utf8");

# --- Configuration ---
my $max_parallel   = 50;
my $lock_timeout    = 30;
my $max_results     = 20;
my $lock_dir        = '/tmp/search_locks';
my $directory       = '../log/';

# --- Concurrency Control ---
mkdir $lock_dir, 0777 unless -d $lock_dir;
my $active_count = 0;
my $now = time();

opendir(my $dh_lock, $lock_dir);
while (my $file = readdir($dh_lock)) {
	next unless $file =~ /\.lock$/;
	my $path = "$lock_dir/$file";
	my $mtime = (stat($path))[9] || 0;
	($now - $mtime > $lock_timeout) ? unlink($path) : $active_count++;
}
closedir($dh_lock);

if ($active_count >= $max_parallel) {
    render_html("<p>Server busy. Please try again in a few seconds.</p>", "");
    exit;
}

my $lock_file = "$lock_dir/$$.lock";
open(my $fh_lock, '>', $lock_file) or die "Cannot create lock: $!";

# --- Query Decoding ---
my $search_text = '';
if (($ENV{QUERY_STRING} || '') =~ /^q=([^&]*)/) {
	my $raw_q = $1;
	$raw_q =~ tr/+/ /;
	$search_text = uri_unescape($raw_q);
	$search_text = decode_utf8($search_text // "");
	$search_text =~ s/\P{Print}//g; 
	$search_text = substr($search_text, 0, 64);
	$search_text =~ s/^\s+|\s+$//g;
}

if ($search_text eq '') {
    final_output("<p>Please enter a search term above.</p>");
}

# --- Search ---
my @results;
my $files_read = 0;

if ($search_text =~ /\S/) {
	my @stack = ($directory);

	while (@stack) {
		# Exit search immediately if limit reached
		last if scalar @results >= $max_results;

		my $current_path = pop @stack;

		if (-d $current_path) {
			if (opendir(my $dh, $current_path)) {
				while (my $entry = readdir($dh)) {
					next if $entry =~ /^\.\.?$/;
					push @stack, File::Spec->catfile($current_path, $entry);
				}
				closedir($dh);
			}
		}
		elsif (-f $current_path && $current_path =~ /index\.html$/) {
			if (open my $fh, '<:utf8', $current_path) {
				$files_read++;
				my $raw_content = do { local $/; <$fh> };
				close $fh;

				my ($article) = $raw_content =~ /<article[^>]*>(.*?)<\/article>/is;
				$article =~ s/<pre.*?>.*?<\/pre>//isg;
				$article =~ s/<code.*?>.*?<\/code>//isg;
				next unless $article;

				# Clean for accurate regex offsets
				my $clean_text = $article;
				$clean_text =~ s/<[^>]*>/ /g; # Tags to space to prevent word mashing
				$clean_text =~ s/\s+/ /g;
				$clean_text =~ s/^\s+|\s+$//g;

				if ($clean_text =~ /(\Q$search_text\E)/i) {
					my $match_pos = "$-[0]";
					my $match_end = "$+[0]";

					# Grab a context window (120 chars padding)
					my $grab_start = ($match_pos > 120) ? $match_pos - 120 : 0;
					my $grab_len   = ($match_end - $grab_start) + 120;
					my $raw_chunk  = substr($clean_text, $grab_start, $grab_len);
					
					my $is_start = ($grab_start == 0);
					my $is_end   = ($grab_start + $grab_len >= length($clean_text));

					my $snippet = trim_and_clean_snippet($raw_chunk, $is_start, $is_end);

					my ($title) = $raw_content =~ /<title>(.*?)<\/title>/is;
					push @results, { 
						path    => $current_path,
						title   => $title || $current_path, 
						snippet => $snippet
					};
				}
			}
		}
	}
}

my $safe_search_text = escape_html($search_text);

my $list_html = "";
if (@results == 0) {
    $list_html = "<p>No results found for \"<b>$safe_search_text</b>\".</p>";
} else {
    $list_html = "<ul>" . join('', map { 
        "<li><a href=\"/$_->{path}\">$_->{title}</a><br><small>$_->{snippet}</small></li>" 
    } @results) . "</ul>";
}

final_output($list_html);

sub final_output {
    my ($content) = @_;
    render_html($content, $safe_search_text);
    if ($fh_lock) { close($fh_lock); unlink($lock_file); }
    exit;
}

sub trim_and_clean_snippet {
	my ($raw_chunk, $is_start, $is_end) = @_;

	# Start check:
	# If we aren't at the very start of the article, we likely have a leading fragment.
	# We search for the first space to drop the partial word.
	if (!$is_start) {
		$raw_chunk =~ s/^[^\s]*\s//;
	}

	# Length and end Check:
	my $show_ellipsis = !$is_end;

	if (length($raw_chunk) > 160) {
		$raw_chunk = substr($raw_chunk, 0, 160);
		
		# Look for the last space within our 160 chars to avoid cutting a word in half.
		# If we find a space and trim, we add the ellipsis.
		if ($raw_chunk =~ s/\s+[^\s]*$//) {
			$show_ellipsis = 1;
		}
	}

	# Sometimes leading punctuation or weird fragments remain after the trim.
	$raw_chunk =~ s/^[:;,.?!\s]+//; # Remove leading punctuation/space
	
	# Final polish
	$raw_chunk = ucfirst($raw_chunk);
	
	# Ensure it ends cleanly: if it doesn't end in terminal punctuation, add ellipsis
	my $final_text = escape_html($raw_chunk);
	if ($show_ellipsis && $raw_chunk !~ /[.!?]$/) {
		$final_text .= "...";
	}

	return $final_text;
}

sub render_html {
    my ($content, $q_val) = @_;
	print "Content-Type: text/html; charset=UTF-8\n\n";
    print <<"HTML";
<!DOCTYPE html>
<html lang="en-us">
<head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title>Journal | Search</title>
    <link rel="stylesheet" href="/assets/css/main.css">
</head>
<body>
    <header>
        <h1><a href="/">Journal</a></h1> / 
        <h1><a href="/cgi-bin/search.cgi">Search</a></h1>
    </header>
    <article>
        <form id="search-bar" action="" method="GET">
            <input id="search-box" type="text" name="q" value="$q_val">
            <input id="search-btn" type="submit" value="Search">
        </form>
        $content
    </article>
</body>
</html>
HTML
}