summaryrefslogtreecommitdiffstats
path: root/util/article_stats.sh
blob: c91afa1fc495d72c1b5536ab1875abfa2070bc84 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/ksh

# Find all index.html files and get sizes in bytes
# Using ls -ln for portability across AIX, Solaris, and BSD
tmp_file=$(mktemp /tmp/sizes.XXXXXX)

find . -type f -name "index.html" -exec ls -ln {} + | awk '{print $5 / 1024}' | sort -n > "$tmp_file"

count=$(wc -l < "$tmp_file")

if [ "$count" -eq 0 ]; then
	echo "No index.html files found."
	rm -f "$tmp_file"
	exit 0
fi

# Process statistics using awk
stats=$(awk '
	{
		a[NR] = $1;
		sum += $1;
		count[$1]++;
		if ($1 > max) max = $1;
	}
	END {
		# Calculate Average
		avg = sum / NR;
		
		# Calculate Median
		if (NR % 2 == 1) {
			med = a[int(NR/2) + 1];
		} else {
			med = (a[NR/2] + a[NR/2 + 1]) / 2;
		}
		
		# Calculate Mode
		max_freq = 0;
		mode = "N/A";
		for (val in count) {
			if (count[val] > max_freq) {
				max_freq = count[val];
				mode = val;
			}
		}
		
		printf "%.2f|%.2f|%.2f|%d|%.2f", avg, med, mode, max_freq, max;
	}' "$tmp_file")

# Parse the awk results into shell variables
IFS='|' read average median mode freq maximum <<EOF
$stats
EOF

echo "--- Statistics for index.html (KB) ---"
echo "Files found:	$count"
echo "Average:	$average KB"
echo "Median: 	$median KB"
echo "Mode:   	$mode KB (appears $freq times)"
echo "Maximum:	$maximum KB"

rm -f "$tmp_file"