diff options
Diffstat (limited to 'util/article_stats.sh')
| -rw-r--r-- | util/article_stats.sh | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/util/article_stats.sh b/util/article_stats.sh new file mode 100644 index 0000000..c91afa1 --- /dev/null +++ b/util/article_stats.sh @@ -0,0 +1,61 @@ +#!/bin/ksh + +# Find all index.html files and get sizes in bytes +# Using ls -ln for portability across AIX, Solaris, and BSD +tmp_file=$(mktemp /tmp/sizes.XXXXXX) + +find . -type f -name "index.html" -exec ls -ln {} + | awk '{print $5 / 1024}' | sort -n > "$tmp_file" + +count=$(wc -l < "$tmp_file") + +if [ "$count" -eq 0 ]; then + echo "No index.html files found." + rm -f "$tmp_file" + exit 0 +fi + +# Process statistics using awk +stats=$(awk ' + { + a[NR] = $1; + sum += $1; + count[$1]++; + if ($1 > max) max = $1; + } + END { + # Calculate Average + avg = sum / NR; + + # Calculate Median + if (NR % 2 == 1) { + med = a[int(NR/2) + 1]; + } else { + med = (a[NR/2] + a[NR/2 + 1]) / 2; + } + + # Calculate Mode + max_freq = 0; + mode = "N/A"; + for (val in count) { + if (count[val] > max_freq) { + max_freq = count[val]; + mode = val; + } + } + + printf "%.2f|%.2f|%.2f|%d|%.2f", avg, med, mode, max_freq, max; + }' "$tmp_file") + +# Parse the awk results into shell variables +IFS='|' read average median mode freq maximum <<EOF +$stats +EOF + +echo "--- Statistics for index.html (KB) ---" +echo "Files found: $count" +echo "Average: $average KB" +echo "Median: $median KB" +echo "Mode: $mode KB (appears $freq times)" +echo "Maximum: $maximum KB" + +rm -f "$tmp_file" |
