2 * Portions Copyright 2001 Sun Microsystems, Inc.
3 * Portions Copyright 1999-2001 Language Technologies Institute,
4 * Carnegie Mellon University.
5 * All Rights Reserved. Use is subject to license terms.
7 * See the file "license.terms" for information on usage and
8 * redistribution of this file, and for a DISCLAIMER OF ALL
11 package com.sun.speech.freetts.en.us;
13 import java.io.IOException;
15 import java.util.List;
17 import com.sun.speech.freetts.VoiceManager;
18 import com.sun.speech.freetts.lexicon.LexiconImpl;
19 import com.sun.speech.freetts.util.BulkTimer;
22 * Provides a CMU lexicon-specific implementation of a Lexicon that is
23 * stored in a text file.
25 public class CMULexicon extends LexiconImpl {
30 static final private String VOWELS = "aeiou";
35 static final private String GLIDES_LIQUIDS = "wylr";
40 static final private String NASALS = "nm";
45 static final private String VOICED_OBSTRUENTS = "bdgjlmnnnrvwyz";
48 * Creates a CMULexicon based upon the given compiled and addenda
49 * DBs and the given letter to sound rules
51 * @param compiledURL the compiled database is loaded from here
52 * @param addendaURL the database addenda is loaded from here
53 * @param letterToSoundURL the letter to sound rules are loaded
55 * @param binary if <code>true</code> the input data are loaded as
56 * binary ; otherwise if <code>false</code> the input
57 * data are loaded as text.
60 public CMULexicon(URL compiledURL,
64 setLexiconParameters(compiledURL, addendaURL, letterToSoundURL, binary);
68 * Creates the default CMU Lexicon which is a binary lexicon
75 * Creates the CMU Lexicon which is a binary lexicon
77 * @param basename the basename for the lexicon.
79 public CMULexicon(String basename) {
83 public CMULexicon(String basename, boolean useBinaryIO) {
84 java.net.URLClassLoader classLoader =
85 VoiceManager.getVoiceClassLoader();
86 String type = (useBinaryIO ? "bin" : "txt");
88 URL letterToSoundURL = classLoader.getResource(
89 "com/sun/speech/freetts/en/us/" + basename + "_lts." + type);
90 URL compiledURL = classLoader.getResource(
91 "com/sun/speech/freetts/en/us/" + basename
92 + "_compiled." + type);
93 URL addendaURL = classLoader.getResource(
94 "com/sun/speech/freetts/en/us/" + basename
95 + "_addenda." + type);
97 /* Just another try with possibly a different class loader
98 * if the above didn't work.
100 if (letterToSoundURL == null) {
101 Class cls = CMULexicon.class;
102 letterToSoundURL = cls.getResource(basename + "_lts." + type);
103 compiledURL = cls.getResource(basename + "_compiled." + type);
104 addendaURL = cls.getResource(basename + "_addenda." + type);
105 if (letterToSoundURL == null) {
107 "CMULexicon: Oh no! Couldn't find lexicon data!");
111 setLexiconParameters(compiledURL, addendaURL,
112 letterToSoundURL, useBinaryIO);
116 * Get the CMULexicon.
118 * @param useBinaryIO if true use binary IO to load DB
120 * @throws IOException if problems occurred while reading the data
122 static public CMULexicon getInstance( boolean useBinaryIO)
124 return getInstance("cmulex", useBinaryIO);
128 * Get the CMULexicon.
130 * @param useBinaryIO if true use binary IO to load DB
132 * @throws IOException if problems occurred while reading the data
134 static public CMULexicon getInstance( String basename, boolean useBinaryIO)
136 CMULexicon lexicon = new CMULexicon(basename, useBinaryIO);
143 * Determines if the currentPhone represents a new syllable
146 * @param syllablePhones the phones in the current syllable so far
147 * @param wordPhones the phones for the whole word
148 * @param currentWordPhone the word phone in question
150 * @return <code>true</code> if the word phone in question is on a
151 * syllable boundary; otherwise <code>false</code>.
153 public boolean isSyllableBoundary(List syllablePhones,
155 int currentWordPhone) {
156 if (currentWordPhone >= wordPhones.length) {
158 } else if (isSilence(wordPhones[currentWordPhone])) {
160 } else if (!hasVowel(wordPhones, currentWordPhone)) { // rest of word
162 } else if (!hasVowel(syllablePhones)) { // current syllable
164 } else if (isVowel(wordPhones[currentWordPhone])) {
166 } else if (currentWordPhone == (wordPhones.length - 1)) {
171 (String) syllablePhones.get(syllablePhones.size() - 1));
172 n = getSonority(wordPhones[currentWordPhone]);
173 nn = getSonority(wordPhones[currentWordPhone + 1]);
174 if ((p <= n) && (n <= nn)) {
183 * Determines if the given phone represents a silent phone.
185 * @param phone the phone to test
187 * @return <code>true</code> if the phone represents a silent
188 * phone; otherwise <code>false</code>.
190 static protected boolean isSilence(String phone) {
191 return phone.equals("pau");
195 * Determines if there is a vowel in the remainder of the array,
196 * starting at the given index.
198 * @param phones the set of phones to check
199 * @param index start checking at this index
201 * @return <code>true</code> if a vowel is found;
202 * otherwise <code>false</code>.
204 static protected boolean hasVowel(String[] phones, int index) {
205 for (int i = index; i < phones.length; i++) {
206 if (isVowel(phones[i])) {
214 * Determines if there is a vowel in given list of phones.
216 * @param phones the list of phones
218 * @return <code>true</code> if a vowel is found;
219 * otherwise <code>false</code>.
221 static protected boolean hasVowel(List phones) {
222 for (int i = 0; i < phones.size(); i++) {
223 if (isVowel((String) phones.get(i))) {
231 * Determines if the given phone is a vowel
233 * @param phone the phone to test
235 * @return <code>true</code> if phone is a vowel
236 * otherwise <code>false</code>.
238 static protected boolean isVowel(String phone) {
239 return VOWELS.indexOf(phone.substring(0,1)) != -1;
243 * Determines the sonority for the given phone.
245 * @param phone the phone of interest
247 * @return an integer that classifies phone transitions
249 static protected int getSonority(String phone) {
250 if (isVowel(phone) || isSilence(phone)) {
252 } else if (GLIDES_LIQUIDS.indexOf(phone.substring(0,1)) != -1) {
254 } else if (NASALS.indexOf(phone.substring(0,1)) != -1) {
256 } else if (VOICED_OBSTRUENTS.indexOf(phone.substring(0,1)) != -1) {
264 * Provides test code for the CMULexicon.
265 * <br><b>Usage:</b><br>
267 * com.sun.speech.freetts.en.us.CMULexicon [options]
269 * Where options is any combination of:
273 * -generate_binary [base_name]
279 public static void main(String[] args) {
280 LexiconImpl lex, lex2;
281 boolean showTimes = false;
282 String srcPath = ".";
283 String destPath = ".";
284 String baseName = "cmulex";
287 if (args.length > 0) {
288 BulkTimer.LOAD.start();
289 for (int i = 0 ; i < args.length; i++) {
290 if (args[i].equals("-src")) {
292 } else if (args[i].equals("-dest")) {
293 destPath = args[++i];
294 } else if (args[i].equals("-name")
295 && i < args.length - 1) {
296 baseName = args[++i];
297 } else if (args[i].equals("-generate_binary")) {
299 System.out.println("Loading " + baseName);
300 String path = "file:" + srcPath + "/" + baseName;
301 lex = new CMULexicon(
302 new URL(path + "_compiled.txt"),
303 new URL(path + "_addenda.txt"),
304 new URL(path + "_lts.txt"),
306 BulkTimer.LOAD.start("load_text");
308 BulkTimer.LOAD.stop("load_text");
310 System.out.println("Dumping " + baseName);
311 BulkTimer.LOAD.start("dump_text");
312 lex.dumpBinary(destPath + "/" + baseName);
313 BulkTimer.LOAD.stop("dump_text");
315 } else if (args[i].equals("-compare")) {
317 BulkTimer.LOAD.start("load_text");
318 lex = CMULexicon.getInstance(baseName, false);
319 BulkTimer.LOAD.stop("load_text");
321 BulkTimer.LOAD.start("load_binary");
322 lex2 = CMULexicon.getInstance(baseName, true);
323 BulkTimer.LOAD.stop("load_binary");
325 BulkTimer.LOAD.start("compare");
327 BulkTimer.LOAD.stop("compare");
328 } else if (args[i].equals("-showtimes")) {
331 System.out.println("Unknown option " + args[i]);
334 BulkTimer.LOAD.stop();
336 BulkTimer.LOAD.show("CMULexicon loading and dumping");
339 System.out.println("Options: ");
340 System.out.println(" -src path");
341 System.out.println(" -dest path");
342 System.out.println(" -compare");
343 System.out.println(" -generate_binary");
344 System.out.println(" -showtimes");
346 } catch (IOException ioe) {
347 System.err.println(ioe);