#!/bin/sh # # This script creates a new dictionary by expanding the original, # Mozilla's, and the upstream dictionary to remove affix flags and # then doing the wordlist equivalent of diff3 to create a new # dictionary. # # The files 2-mozilla-add and 2-mozilla-rem contain words added and # removed, receptively in the Mozilla dictionary. The final # dictionary will be in hunspell-en_US-mozilla.zip. # set -e export LANG=C export LC_ALL=C export LC_CTYPE=C export LC_COLLATE=C WKDIR="`pwd`" export SCOWL="$WKDIR/scowl/" ORIG="$WKDIR/orig/" SPELLER="$SCOWL/speller" expand() { grep -v '^[0-9]\+$' | $SPELLER/munch-list expand $1 | sort -u } cd $SPELLER MK_LIST="../mk-list -v1 --accents=both en_US 60" cat < params.txt With Input Command: $MK_LIST EOF # note: output of make-hunspell-dict is utf-8 $MK_LIST | ./make-hunspell-dict -one en_US-custom params.txt > ./make-hunspell-dict.log cd $WKDIR # Note: Input and output of "expand" is always iso-8859-1. # All expanded word list files are thus in iso-8859-1. expand $SPELLER/en.aff < $SPELLER/en.dic.supp > 0-special # input: ASCII # input in utf-8, expand expects iso-8859-1 so use iconv iconv -f utf-8 -t iso-8859-1 $ORIG/en_US-custom.dic | expand $SPELLER/en_US-custom.aff > 1-base.txt expand ../en-US.aff < ../en-US.dic > 2-mozilla.txt # input: iso-8850-1 # input in utf-8, expand expects iso-8859-1 so use iconv iconv -f utf-8 -t iso-8859-1 $SPELLER/en_US-custom.dic | expand $SPELLER/en_US-custom.aff > 3-upstream.txt comm -23 1-base.txt 2-mozilla.txt > 2-mozilla-rem comm -13 1-base.txt 2-mozilla.txt > 2-mozilla-add comm -23 3-upstream.txt 2-mozilla-rem | cat - 2-mozilla-add | sort -u > 4-patched.txt # note: output of make-hunspell-dict is utf-8 cat 4-patched.txt | comm -23 - 0-special | $SPELLER/make-hunspell-dict -one en_US-mozilla /dev/null # sanity check should yield identical results #comm -23 1-base.txt 3-upstream.txt > 3-upstream-rem #comm -13 1-base.txt 3-upstream.txt > 3-upstream-add #comm -23 2-mozilla.txt 3-upstream-rem | cat - 3-upstream-add | sort -u > 4-patched-v2.txt expand ../en-US.aff < mozilla-specific.txt > 5-mozilla-specific comm -12 3-upstream.txt 2-mozilla-rem > 5-mozilla-removed comm -13 3-upstream.txt 2-mozilla-add > 5-mozilla-added