#!/bin/bash
echo 'frequencies: {'
EXCLUDES=`cat exclude.txt |sed -e s/\\\\\\(.*\\\\\\)/\\\\\\|\\\1/ |tr -d '[:space:]'`
tr ' ' '
'<$1 |\
sed -e 's/[^a-zA-Z0-9]//g'|\
tr '[:upper:]' '[:lower:]'|\
sort |\
grep -Eiv "^(\ $EXCLUDES)$" |\
uniq -c |\
grep -iv ^\\\ *[0-9]*\\\ *$ |\
grep -iv ^\\\ *[12345]\\\ .*$ |\
sed -e 's/\( *\)\([0-9]*\)\ \([^ ]*\)/  "\3": \2,/'
echo '}'
