// Program to create mnemonic words for numbers. // This uses the Major system. // 0 1 2 3 4 5 6 7 8 9 letterMap = ["Z S", "D T DH TH", "N NG", "M", "R ER", "L", "CH JH SH ZH", "G K", "F V", "B P"] phonemeToNumberMap = invertMap[letterMap] words = new dict words@10000=1 pronDict = new dict // Read in pronunciations of words. Just save the phonemes that are defined // in the major system. // The wordlist files are part of the Moby wordlist project, available at: // http://icon.shef.ac.uk/Moby/ for line = lines["file:///home/eliasen/prog/mobydict/mpron/cmupronunciation.txt", "ISO8859_1"] { if line =~ %r/^\s*;/ // Skip comments next [word, pron] = line =~ %r/(.*?)\s+(.*)/ // Skip words with multiple pronunciation if [baseword] = word =~ %r/(.*)\(\d+\)/ { pronDict@baseword = undef // Undefine previous definitions. next } word = lc[word] [condensedPron, codenum] = stripUnusedPhonemes[pron, phonemeToNumberMap] // Filter by length if length[codenum] >= 3 and length[codenum] <= 4 pronDict@word = [condensedPron, codenum] } println[""] // Read in nouns and verbs from the "parts of speech" data file. for line = lines["file:///home/eliasen/prog/mobydict/mpos/partsofspeechUTF-8.txt", "UTF-8"] if [word] = line =~ %r/^([a-z\-\s]*)\|.*[NVitph]/ { word = lc[word] [pron, num] = pronDict@word if (pron != undef) { println["
$word$num"] if words@num words@num.push[word] else words@num=[word] } } println["
"] println[""] for i = 0 to 999 { pad = right["00" + i, 3] // Pad the number with zeroes. if words@pad println["
$pad" + join[", ", words@pad]] else println["
$pad"] } for i = 0 to 9999 { pad = right["000" + i, 4] // Pad the number with zeroes. if words@pad println["
$pad" + join[", ", words@pad]] else println["
$pad"] } println["
"] // Turns an array with each element separated by spaces into a dictionary // where the key is the symbol and the value is the number. invertMap[orig] := { result = new dict idx = 0 for codes = orig { for token = split[%r/\s+/, codes] result@token = idx idx = idx + 1 } return result } // Take a pronunciation string and remove any unused phonemes or // stress indicators. stripUnusedPhonemes[pron, mapDict] := { stripped = "" numStr = "" // Remove stress indicators. (0, 1, 2) pron =~ %s/\d//g for phoneme = split[%r/\s+/, pron] { num = mapDict@phoneme if num != undef { if (stripped == "") stripped = phoneme else stripped = stripped + " $phoneme" numStr = numStr + num } } return [stripped, numStr] }