summaryrefslogtreecommitdiffstats
path: root/cgi-bin/find.cgi
blob: f3f68f59fda911530d7607a06e66c468aed0c3ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/perl

use Encode qw(decode_utf8);

use File::Find;
use HTML::Escape qw(escape_html);

my $search_text = '';

if ($ENV{QUERY_STRING} =~ /^q=([^&]*)/) {
    $search_text = decode_utf8($1 // "");
    $search_text =~ s/\P{Print}//g;  # toss any non-printable utf-8 characters
    $search_text = substr($search_text, 0, 64);
}

my $directory   = '../_site/log/'; 
my @results;

my %excluded_files = (
    'index.html'  => 1, # /log/index.html
);

if ($search_text =~ /\S/) { 
    find({
        wanted => sub {
            # Ignore directories and only process index.html
            return unless -f $_ && $_ eq 'index.html';

            # Calculate the relative path for the URL (prevents leaking server file structure)
            my $rel_path = $File::Find::name;
            $rel_path =~ s|^\Q$directory\E/?||; 
            return if $excluded_files{$rel_path};

            if (open my $fh, '<', $_) {
                my $content = do { local $/; <$fh> };
                close $fh;
                
                if ($content =~ /\Q$search_text\E/i) {
                    
                    # Extract Title
                    my ($title) = $content =~ /<title>(.*?)<\/title>/is;
                    $title = $title ? escape_html($title) : $rel_path;

                    # Extract the first <p> tag content
                    my ($p_content) = $content =~ /<p[^>]*>(.*?)<\/p>/is;
                    
                    # Process the snippet
                    my $snippet = $p_content || "";
                    $snippet =~ s/<[^>]*>//g; # Remove internal tags
                    $snippet =~ s/\s+/ /g;    # Collapse whitespace
                    
                    # Escape HTML entities AFTER stripping tags 
                    # but BEFORE sending to the user to prevent XSS.
                    $snippet = escape_html(substr($snippet, 0, 50));
                    $snippet .= "..." if length($p_content || "") > 50;

                    push @results, { 
                        path    => $File::Find::name,
                        title   => $title, 
                        snippet => $snippet 
                    };
                }
            }
        },
        no_chdir => 0,
        follow   => 0,
    }, $directory);
}

print "Content-Type: text/html\n\n";

my $list;
if ($search_text eq '') {
    $list = "<p>Please enter a search term above.</p>";
} elsif (@results == 0) {
    $list = "<p>No results found for \"<b>$search_text</b>\".</p>";
} else {
    $list = "<ul>";
    foreach my $res (@results) {
        my $url = $res->{path};
        $list .= "<li><a href=\"/$url\">$res->{title}</a><br><small>$res->{snippet}</small></li>";
    }
    $list .= "</ul>";
}

my $safe_search_text = escape_html($search_text);
my $year = (localtime)[5] + 1900;

print <<"HTML";
<!DOCTYPE html>
<html lang="en-us">
<head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title>Search</title>
    <link rel="stylesheet" href="/assets/css/main.css">
    <link rel="stylesheet" href="/assets/css/skeleton.css">
</head>
<body>
    <div id="nav-container" class="container">
        <ul id="navlist" class="left">
            <li><a href="/" class="link-decor-none">hme</a></li>
            <li><a href="/log/" class="link-decor-none">log</a></li>
            <li><a href="/projects/" class="link-decor-none">poc</a></li>
            <li><a href="/about/" class="link-decor-none">abt</a></li>
            <li class="active"><a href="/cgi-bin/find.cgi" class="link-decor-none">sws</a></li>
            <li><a href="/feed.xml" class="link-decor-none">rss</a></li>
        </ul>
    </div>
    <main class="container" id="main">
        <div class="container">
            <h2>Search</h2>
            <form action="" method="GET">
                <input id="search-box" type="text" name="q" value="$safe_search_text">
                <input id="search-btn" type="submit" value="Search">
            </form>
            $list
        </div>
    </main>
    <div class="footer">
        <div class="container">
            <div class="twelve columns right container-2">
                <p id="footer-text">&copy; ASCIIMX - $year</p>
            </div>
        </div>
    </div>
</body>
</html>
HTML