2 * Portions Copyright 2001 Sun Microsystems, Inc.
3 * Portions Copyright 1999-2001 Language Technologies Institute,
4 * Carnegie Mellon University.
5 * All Rights Reserved. Use is subject to license terms.
7 * See the file "license.terms" for information on usage and
8 * redistribution of this file, and for a DISCLAIMER OF ALL
11 package com.sun.speech.freetts.lexicon;
13 import java.io.BufferedOutputStream;
14 import java.io.BufferedReader;
15 import java.io.DataInputStream;
16 import java.io.DataOutputStream;
17 import java.io.FileOutputStream;
18 import java.io.IOException;
19 import java.io.InputStream;
20 import java.io.InputStreamReader;
22 import java.util.ArrayList;
23 import java.util.HashMap;
24 import java.util.HashSet;
25 import java.util.Iterator;
26 import java.util.List;
28 import java.util.StringTokenizer;
30 import com.sun.speech.freetts.util.BulkTimer;
31 import com.sun.speech.freetts.util.Utilities;
34 * Provides the phone list for words using the CMU6 letter-to-sound
35 * (LTS) rules, which are based on the Black, Lenzo, and Pagel paper,
36 * "Issues in Building General Letter-to-Sound Rules." Proceedings
37 * of ECSA Workshop on Speech Synthesis, pages 77-80, Australia, 1998.
39 * <p>The LTS rules are a simple state machine, with one entry point
40 * for each letter of the alphabet (lower case letters are always
41 * assumed, and the rules keep an array with one entry per letter that
42 * point into the state machine).
44 * <p>The state machine consists of a huge array, with most entries
45 * containing a decision and the indices of two other entries. The
46 * first of these two indices represents where to go if the decision
47 * is true, and the second represents where to go if the decision is
48 * false. All entries that do not contain a decision are final
49 * entries, and these contain a phone.
51 * <p>The decision in this case is a simple character comparison,
52 * but it is done in the context of a window around the character in
53 * the word. The decision consists of a index into the context window
54 * and a character value. If the character in the context window
55 * matches the character value, then the decision is true.
57 * <p>The machine traversal for each letter starts at that letter's
58 * entry in the state machine and ends only when it reaches a final
59 * state. If there is no phone that can be mapped, the phone in the
60 * final state is set to 'epsilon.'
62 * <p>The context window for a character is generated in the following
66 * <li>Pad the original word on either side with '#' and '0'
67 * characters the size of the window for the LTS rules (in this case,
68 * the window size is 4). The "#" is used to indicate the beginning
69 * and end of the word. So, the word "monkey" would turn into
71 * <li>For each character in the word, the context window consists of
72 * the characters in the padded form the preceed and follow the word.
73 * The number of characters on each side is dependent upon the window
74 * size. So, for this implementation, the context window for the 'k'
75 * in monkey is "#money#0".
78 * <p>Here's how the phone for 'k' in 'monkey' might be determined:
81 * <li>Create the context window "#money#0".
82 * <li>Start at the state machine entry for 'k' in the state machine.
83 * <li>Grab the 'index' from the current state. This represents an
84 * index into the context window.
85 * <li>Compare the value of the character at the index in the context
86 * window to the character from the current state. If there is a
87 * match, the next state is the qtrue value. If there isn't a match,
88 * the next state is the qfalse state.
89 * <li>Keep on working through the machine until you read a final
91 * <li>When you get to the final state, the phone is the character in
95 * <p>This implementation will either read from a straight ASCII file
96 * or a binary file. When reading from an ASCII file, you can specify
97 * when the input line is tokenized: load, lookup, or never. If you
98 * specify 'load', the entire file will be parsed when it is loaded.
99 * If you specify 'lookup', the file will be loaded, but the parsing
100 * for each line will be delayed until it is referenced and the parsed
101 * form will be saved away. If you specify 'never', the lines will
102 * parsed each time they are referenced. The default is 'load'. To
103 * specify the load type, set the system property as follows:
106 * -Dcom.sun.speech.freetts.lexicon.LTSTokenize=load
109 * <p>[[[TODO: This implementation uses ASCII 'a'-'z', which is not
110 * internationalized.]]]
112 public class LetterToSoundImpl implements LetterToSound {
114 * Entry in file represents the total number of states in the
115 * file. This should be at the top of the file. The format
116 * should be "TOTAL n" where n is an integer value.
118 final static String TOTAL = "TOTAL";
121 * Entry in file represents the beginning of a new letter index.
122 * This should appear before the list of a new set of states for
123 * a particular letter. The format should be "INDEX n c" where
124 * n is the index into the state machine array and c is the
127 final static String INDEX = "INDEX";
130 * Entry in file represents a state. The format should be
131 * "STATE i c t f" where 'i' represents an index to look at in the
132 * decision string, c is the character that should match, t is the
133 * index of the state to go to if there is a match, and f is the
134 * of the state to go to if there isn't a match.
136 final static String STATE = "STATE";
139 * Entry in file represents a final state. The format should be
140 * "PHONE p" where p represents a phone string that comes from the
143 final static String PHONE = "PHONE";
146 * If true, the state string is tokenized when it is first read.
147 * The side effects of this are quicker lookups, but more memory
148 * usage and a longer startup time.
150 protected boolean tokenizeOnLoad = false;
153 * If true, the state string is tokenized the first time it is
154 * referenced. The side effects of this are quicker lookups, but
157 protected boolean tokenizeOnLookup = false;
160 * Magic number for binary LTS files.
162 private final static int MAGIC = 0xdeadbeef;
165 * Current binary file version.
167 private final static int VERSION = 1;
170 * The LTS state machine. Entries can be String or State. An
171 * ArrayList could be used here -- I chose not to because I
172 * thought it might be quicker to avoid dealing with the dynamic
175 private Object[] stateMachine = null;
178 * The number of states in the state machine.
180 private int numStates = 0;
183 * The 'window size' of the LTS rules.
185 private final static int WINDOW_SIZE = 4;
188 * An array of characters to hold a string for checking against a
189 * rule. This will be reused over and over again, so the goal
190 * was just to have a single area instead of new'ing up a new one
191 * for every word. The name choice is to match that in Flite's
192 * <code>cst_lts.c</code>.
194 private char[] fval_buff = new char[WINDOW_SIZE * 2];
197 * The indexes of the starting points for letters in the state machine.
199 protected HashMap letterIndex;
202 * The list of phones that can be returned by the LTS rules.
204 static private List phonemeTable;
209 * @param ltsRules a URL pointing to the text
210 * containing the letter to sound rules
211 * @param binary if true, the URL is a binary source
213 * @throws NullPointerException if the ltsRules are null
214 * @throws IOException if errors are encountered while reading the
215 * compiled form or the addenda
217 public LetterToSoundImpl(URL ltsRules, boolean binary) throws IOException {
218 BulkTimer.LOAD.start("LTS");
219 InputStream is = ltsRules.openStream();
226 BulkTimer.LOAD.stop("LTS");
230 * Loads the LTS rules from the given text input stream. The
231 * stream is not closed after the rules are read.
233 * @param is the input stream
235 * @throws IOException if an error occurs on input.
237 private void loadText(InputStream is) throws IOException {
238 BufferedReader reader;
241 // Find out when to convert the phone string into an array.
244 Utilities.getProperty("com.sun.speech.freetts.lexicon.LTSTokenize",
246 tokenizeOnLoad = tokenize.equals("load");
247 tokenizeOnLookup = tokenize.equals("lookup");
249 letterIndex = new HashMap();
251 reader = new BufferedReader(new InputStreamReader(is));
252 line = reader.readLine();
253 while (line != null) {
254 if (!line.startsWith("***")) {
257 line = reader.readLine();
262 * Loads the LTS rules from the given binary input stream. The
263 * input stream is not closed after the rules are read.
265 * @param is the input stream
267 * @throws IOException if an error occurs on input.
269 private void loadBinary(InputStream is) throws IOException {
270 DataInputStream dis = new DataInputStream(is);
272 if (dis.readInt() != MAGIC) {
273 throw new Error("Bad LTS binary file format");
276 if (dis.readInt() != VERSION) {
277 throw new Error("Bad LTS binary file version");
280 // read the phoneme table
282 int phonemeTableSize = dis.readInt();
283 phonemeTable = new ArrayList(phonemeTableSize);
285 for (int i = 0; i < phonemeTableSize; i++) {
286 String phoneme = dis.readUTF();
287 phonemeTable.add(phoneme);
292 int letterIndexSize = dis.readInt();
293 letterIndex = new HashMap();
294 for (int i = 0; i < letterIndexSize; i++) {
295 char c = dis.readChar();
296 int index = dis.readInt();
297 letterIndex.put(Character.toString(c), new Integer(index));
300 // statemachine states
302 int stateMachineSize = dis.readInt();
303 stateMachine = new Object[stateMachineSize];
304 for (int i = 0; i < stateMachineSize; i++) {
305 int type = dis.readInt();
307 if (type == FinalState.TYPE) {
308 stateMachine[i] = FinalState.loadBinary(dis);
309 } else if (type == DecisionState.TYPE) {
310 stateMachine[i] = DecisionState.loadBinary(dis);
312 throw new Error("Unknown state type in LTS load");
319 * Creates a word from the given input line and add it to the state
320 * machine. It expects the TOTAL line to come before any of the
323 * @param line the line of text from the input file
325 protected void parseAndAdd(String line) {
326 StringTokenizer tokenizer = new StringTokenizer(line," ");
327 String type = tokenizer.nextToken();
329 if (type.equals(STATE) || type.equals(PHONE)) {
330 if (tokenizeOnLoad) {
331 stateMachine[numStates] = getState(type, tokenizer);
333 stateMachine[numStates] = line;
336 } else if (type.equals(INDEX)) {
337 Integer index = new Integer(tokenizer.nextToken());
338 if (index.intValue() != numStates) {
339 throw new Error("Bad INDEX in file.");
341 String c = tokenizer.nextToken();
342 letterIndex.put(c,index);
344 } else if (type.equals(TOTAL)) {
345 stateMachine = new Object[Integer.parseInt(tokenizer.nextToken())];
350 * Dumps a binary form of the letter to sound rules.
351 * This method is not thread-safe.
353 * <p>Binary format is:
361 * @param path the path to dump the file to
363 * @throws IOException if a problem occurs during the dump
365 public void dumpBinary(String path) throws IOException {
366 FileOutputStream fos = new FileOutputStream(path);
367 DataOutputStream dos = new DataOutputStream(new
368 BufferedOutputStream(fos));
371 dos.writeInt(VERSION);
375 phonemeTable = findPhonemes();
376 dos.writeInt(phonemeTable.size());
377 for (Iterator i = phonemeTable.iterator(); i.hasNext(); ) {
378 String phoneme = (String) i.next();
379 dos.writeUTF(phoneme);
384 dos.writeInt(letterIndex.size());
385 for (Iterator i = letterIndex.keySet().iterator(); i.hasNext(); ) {
386 String letter = (String) i.next();
387 int index = ((Integer) letterIndex.get(letter)).intValue();
388 dos.writeChar(letter.charAt(0));
392 // statemachine states
394 dos.writeInt(stateMachine.length);
396 for (int i = 0; i < stateMachine.length; i++) {
397 getState(i).writeBinary(dos);
403 * Returns a list of all the phonemes used by the LTS rules.
405 * @return a list of all the phonemes
407 private List findPhonemes() {
408 Set set = new HashSet();
409 for (int i = 0; i < stateMachine.length; i++) {
410 if (stateMachine[i] instanceof FinalState) {
411 FinalState fstate = (FinalState) stateMachine[i];
412 if (fstate.phoneList != null) {
413 for (int j = 0; j < fstate.phoneList.length; j++) {
414 set.add(fstate.phoneList[j]);
419 return new ArrayList(set);
424 * Gets the <code>State</code> at the given index. This may
425 * replace a <code>String</code> at
426 * the current spot with an actual <code>State</code> instance.
428 * @param i the index into the state machine
430 * @return the <code>State</code> at the given index.
432 protected State getState(int i) {
434 if (stateMachine[i] instanceof String) {
435 state = getState((String) stateMachine[i]);
436 if (tokenizeOnLookup) {
437 stateMachine[i] = state;
440 state = (State) stateMachine[i];
446 * Gets the <code>State</code> based upon the <code>String</code>.
448 * @param s the string to parse
450 * @return the parsed <code>State</code>
452 protected State getState(String s) {
453 StringTokenizer tokenizer = new StringTokenizer(s, " ");
454 return getState(tokenizer.nextToken(), tokenizer);
458 * Gets the <code>State</code> based upon the <code>type</code>
459 * and <code>tokenizer<code>.
461 * @param type one of <code>STATE</code> or <code>PHONE</code>
462 * @param tokenizer a <code>StringTokenizer</code> containing the
465 * @return the parsed <code>State</code>
467 protected State getState(String type, StringTokenizer tokenizer) {
468 if (type.equals(STATE)) {
469 int index = Integer.parseInt(tokenizer.nextToken());
470 String c = tokenizer.nextToken();
471 int qtrue = Integer.parseInt(tokenizer.nextToken());
472 int qfalse = Integer.parseInt(tokenizer.nextToken());
473 return new DecisionState(index, c.charAt(0), qtrue, qfalse);
474 } else if (type.equals(PHONE)) {
475 return new FinalState(tokenizer.nextToken());
481 * Makes a character array that looks like "000#word#000".
483 * @param word the original word
485 * @return the padded word
487 protected char[] getFullBuff(String word) {
488 char[] full_buff = new char[word.length() + (2 * WINDOW_SIZE)];
490 // Make full_buff look like "000#word#000"
492 for (int i = 0; i < (WINDOW_SIZE - 1); i++)
496 full_buff[WINDOW_SIZE - 1] = '#';
497 word.getChars(0,word.length(),full_buff,WINDOW_SIZE);
498 for (int i = 0; i < (WINDOW_SIZE - 1); i++)
500 full_buff[full_buff.length - i - 1] = '0';
502 full_buff[full_buff.length - WINDOW_SIZE] = '#';
507 * Calculates the phone list for a given word. If a phone list cannot
508 * be determined, <code>null</code> is returned. This particular
509 * implementation ignores the part of speech.
511 * @param word the word to find
512 * @param partOfSpeech the part of speech.
514 * @return the list of phones for word or <code>null</code>
516 public String[] getPhones(String word, String partOfSpeech) {
517 ArrayList phoneList = new ArrayList();
523 // Create "000#word#000"
525 char[] full_buff = getFullBuff(word);
527 // For each character in the word, create a WINDOW_SIZE
528 // context on each size of the character, and then ask the
529 // state machine what's next. It's magic. BTW, this goes
530 // through the word from beginning to end. Flite goes through
531 // it from end to beginning. There doesn't seem to be a
532 // difference in the result.
534 for (int pos = 0; pos < word.length(); pos++) {
535 for (int i = 0; i < WINDOW_SIZE; i++) {
536 fval_buff[i] = full_buff[pos + i];
537 fval_buff[i + WINDOW_SIZE] =
538 full_buff[i + pos + 1 + WINDOW_SIZE];
540 c = word.charAt(pos);
541 startIndex = (Integer) letterIndex.get(Character.toString(c));
542 if (startIndex == null) {
545 stateIndex = startIndex.intValue();
546 currentState = getState(stateIndex);
547 while (!(currentState instanceof FinalState)) {
550 currentState).getNextState(fval_buff);
551 currentState = getState(stateIndex);
553 ((FinalState) currentState).append(phoneList);
555 return (String[]) phoneList.toArray(new String[0]);
559 * Compares this LTS to another for debugging purposes.
561 * @param other the other LTS to compare to
563 * @return <code>true</code> if these are equivalent
565 public boolean compare(LetterToSoundImpl other) {
567 // compare letter index table
569 for (Iterator i = letterIndex.keySet().iterator(); i.hasNext(); ) {
570 String key = (String) i.next();
571 Integer thisIndex = (Integer) letterIndex.get(key);
572 Integer otherIndex = (Integer) other.letterIndex.get(key);
573 if (!thisIndex.equals(otherIndex)) {
574 System.out.println("Bad Index for " + key);
581 for (int i = 0; i < stateMachine.length; i++) {
582 State state = getState(i);
583 State otherState = other.getState(i);
584 if (!state.compare(otherState)) {
585 System.out.println("Bad state " + i);
594 * A marker interface for the states in the LTS state machine.
599 static interface State {
600 public void writeBinary(DataOutputStream dos) throws IOException;
601 public boolean compare(State other);
606 * A <code>State</code> that represents a decision to be made.
610 static class DecisionState implements State {
611 final static int TYPE = 1;
620 * @param index the index into a string for comparison to c
621 * @param c the character to match in a string at index
622 * @param qtrue the state to go to in the state machine on a match
623 * @param qfalse the state to go to in the state machine on no match
625 public DecisionState(int index, char c, int qtrue, int qfalse) {
629 this.qfalse = qfalse;
633 * Gets the next state to go to based upon the given character
636 * @param chars the characters for comparison
638 * @ret an index into the state machine.
640 public int getNextState(char[] chars) {
641 return (chars[index] == c) ? qtrue : qfalse;
645 * Outputs this <code>State</code> as though it came from the
648 * @return a <code>String</code> describing this <code>State</code>.
650 public String toString() {
651 return STATE + " " + Integer.toString(index)
652 + " " + Character.toString(c)
653 + " " + Integer.toString(qtrue)
654 + " " + Integer.toString(qfalse);
658 * Writes this <code>State</code> to the given output stream.
660 * @param dos the data output stream
662 * @throws IOException if an error occurs
664 public void writeBinary(DataOutputStream dos) throws IOException {
669 dos.writeInt(qfalse);
673 * Loads a <code>DecisionState</code> object from the given
676 * @param dis the data input stream
677 * @return a newly constructed decision state
679 * @throws IOException if an error occurs
681 public static State loadBinary(DataInputStream dis)
683 int index = dis.readInt();
684 char c = dis.readChar();
685 int qtrue = dis.readInt();
686 int qfalse = dis.readInt();
687 return new DecisionState(index, c, qtrue, qfalse);
691 * Compares this state to another state for debugging purposes.
693 * @param other the other state to compare against
695 * @return true if the states are equivalent
697 public boolean compare(State other) {
698 if (other instanceof DecisionState) {
699 DecisionState otherState = (DecisionState) other;
700 return index == otherState.index &&
702 qtrue == otherState.qtrue &&
703 qfalse == otherState.qfalse;
711 * A <code>State</code> that represents a final state in the
712 * state machine. It contains one or more phones from the
717 static class FinalState implements State {
718 final static int TYPE = 2;
722 * Class constructor. The string "epsilon" is used to indicate
725 * @param phones the phones for this state
727 public FinalState(String phones) {
728 if (phones.equals("epsilon")) {
731 int i = phones.indexOf('-');
733 phoneList = new String[2];
734 phoneList[0] = phones.substring(0, i);
735 phoneList[1] = phones.substring(i + 1);
737 phoneList = new String[1];
738 phoneList[0] = phones;
746 * @param phones an array of phones for this state
748 public FinalState(String[] phones) {
753 * Appends the phone list for this state to the given
754 * <code>ArrayList</code>.
756 * @param array the array to append to
758 public void append(ArrayList array) {
759 if (phoneList == null) {
762 for (int i = 0; i < phoneList.length; i++) {
763 array.add(phoneList[i]);
769 * Outputs this <code>State</code> as though it came from the
770 * text input file. The string "epsilon" is used to indicate
773 * @return a <code>String</code> describing this <code>State</code>
775 public String toString() {
776 if (phoneList == null) {
777 return PHONE + " epsilon";
778 } else if (phoneList.length == 1) {
779 return PHONE + " " + phoneList[0];
781 return PHONE + " " + phoneList[0] + "-" + phoneList[1];
786 * Compares this state to another state for debugging
789 * @param other the other state to compare against
791 * @return <code>true</code> if the states are equivalent
793 public boolean compare(State other) {
794 if (other instanceof FinalState) {
795 FinalState otherState = (FinalState) other;
796 if (phoneList == null) {
797 return otherState.phoneList == null;
799 for (int i = 0; i < phoneList.length; i++) {
800 if (!phoneList[i].equals(otherState.phoneList[i])) {
812 * Writes this state to the given output stream.
814 * @param dos the data output stream
816 * @throws IOException if an error occurs
818 public void writeBinary(DataOutputStream dos) throws IOException {
820 if (phoneList == null) {
823 dos.writeInt(phoneList.length);
824 for (int i = 0; i < phoneList.length; i++) {
825 dos.writeInt(phonemeTable.indexOf(phoneList[i]));
831 * Loads a FinalState object from the given input stream
833 * @param dis the data input stream
835 * @return a newly constructed final state
837 * @throws IOException if an error occurs
839 public static State loadBinary(DataInputStream dis)
842 int phoneListLength = dis.readInt();
844 if (phoneListLength == 0) {
847 phoneList = new String[phoneListLength];
849 for (int i = 0; i < phoneListLength; i++) {
850 int index = dis.readInt();
851 phoneList[i] = (String) phonemeTable.get(index);
853 return new FinalState(phoneList);
859 * Translates between text and binary forms of the CMU6 LTS rules.
861 public static void main(String[] args) {
862 LexiconImpl lex, lex2;
863 boolean showTimes = false;
864 String srcPath = ".";
865 String destPath = ".";
866 String name = "cmulex_lts";
869 if (args.length > 0) {
870 BulkTimer timer = new BulkTimer();
872 for (int i = 0 ; i < args.length; i++) {
873 if (args[i].equals("-src")) {
875 } else if (args[i].equals("-dest")) {
876 destPath = args[++i];
877 } else if (args[i].equals("-name")
878 && i < args.length -1) {
880 } else if (args[i].equals("-generate_binary")) {
882 System.out.println("Loading " + name);
883 timer.start("load_text");
884 LetterToSoundImpl text = new LetterToSoundImpl(
885 new URL("file:" + srcPath + "/"
888 timer.stop("load_text");
890 System.out.println("Dumping " + name);
891 timer.start("dump_binary");
892 text.dumpBinary(destPath + "/" + name + ".bin");
893 timer.stop("dump_binary");
895 } else if (args[i].equals("-compare")) {
897 timer.start("load_text");
898 LetterToSoundImpl text = new LetterToSoundImpl(
899 new URL("file:./" + name + ".txt"), false);
900 timer.stop("load_text");
902 timer.start("load_binary");
903 LetterToSoundImpl binary = new LetterToSoundImpl(
904 new URL("file:./" + name + ".bin"), true);
905 timer.stop("load_binary");
907 timer.start("compare");
908 if (!text.compare(binary)) {
909 System.out.println("NOT EQUIVALENT");
911 System.out.println("ok");
913 timer.stop("compare");
914 } else if (args[i].equals("-showtimes")) {
917 System.out.println("Unknown option " + args[i]);
922 timer.show("LTS loading and dumping");
925 System.out.println("Options: ");
926 System.out.println(" -src path");
927 System.out.println(" -dest path");
928 System.out.println(" -compare");
929 System.out.println(" -generate_binary");
930 System.out.println(" -showTimes");
932 } catch (IOException ioe) {
933 System.err.println(ioe);