#!/usr/bin/awk -f # # count the number each word is used BEGIN { FS = "[^A-Za-z]+" } { for (i = 1; i <= NF; i++) { words[tolower($i)]++ nwords++ if (length($i) > maxlen) { maxlen = length($i) } } } END { delete words[""] for (i in words) { printf "%4d %-"maxlen"s (%2.2f%%)\n", words[i], i, (100.0*words[i]/nwords) } }