2 * Portions Copyright 2004 DFKI GmbH.
3 * Portions Copyright 2001 Sun Microsystems, Inc.
4 * Portions Copyright 1999-2001 Language Technologies Institute,
5 * Carnegie Mellon University.
6 * All Rights Reserved. Use is subject to license terms.
8 * See the file "license.terms" for information on usage and
9 * redistribution of this file, and for a DISCLAIMER OF ALL
12 package de.dfki.lt.freetts;
14 import java.io.IOException;
16 import java.util.Locale;
18 import com.sun.speech.freetts.Age;
19 import com.sun.speech.freetts.Gender;
20 import com.sun.speech.freetts.PartOfSpeech;
21 import com.sun.speech.freetts.PartOfSpeechImpl;
22 import com.sun.speech.freetts.PhoneSet;
23 import com.sun.speech.freetts.PhoneSetImpl;
24 import com.sun.speech.freetts.Tokenizer;
25 import com.sun.speech.freetts.UtteranceProcessor;
26 import com.sun.speech.freetts.Voice;
27 import com.sun.speech.freetts.diphone.DiphonePitchmarkGenerator;
28 import com.sun.speech.freetts.diphone.DiphoneUnitSelector;
29 import com.sun.speech.freetts.en.us.CMULexicon;
30 import com.sun.speech.freetts.en.us.FeatureProcessors;
31 import com.sun.speech.freetts.lexicon.Lexicon;
32 import com.sun.speech.freetts.relp.AudioOutput;
33 import com.sun.speech.freetts.relp.SampleInfo;
34 import com.sun.speech.freetts.relp.UnitConcatenator;
38 * A simple dummy voice as a starting point for non-US-English
39 * cluster unit voices. All NLP stuff would need to be implemented
40 * in order for this to become a full TTS voice.
42 public class DiphoneVoice extends Voice implements ConcatenativeVoice {
43 private PhoneSet phoneSet;
44 protected URL database;
45 protected URL phonesetURL;
46 protected URL partOfSpeechURL;
47 protected DiphoneUnitSelector unitSelector;
49 public DiphoneVoice(String name, Gender gender, Age age,
50 String description, Locale locale, String domain,
51 String organization, Lexicon lexicon, URL database) {
52 this(name, gender, age, description, locale, domain,
53 organization, lexicon, database, null, null);
57 * Creates a ClusterUnitVoice
59 * @param database the database of the voice
60 * @param unitNamer specifies the name of the Units (if null, an
61 * ldom naming scheme will be used: 'ae_afternoon')
62 * @param phonesetURL leads to the phoneset, which will be used
63 * for the FeatureProcessors (can be null)
64 * @param partOfSpeechURL leads to the pos-textfile which will be used
65 * for the FeatureProcessors (can be null)
67 public DiphoneVoice(String name, Gender gender, Age age,
68 String description, Locale locale, String domain,
69 String organization, Lexicon lexicon, URL database,
70 URL phonesetURL, URL partOfSpeechURL) {
72 //TODO: do something useful with the lexicon
73 super(name, gender, age, description, locale,
74 domain, organization);
75 // Set default prosody values:
79 if (lexicon != null) {
82 // Use a small dummy lexicon
83 setLexicon(new CMULexicon("cmutimelex"));
85 this.database = database;
86 this.phonesetURL = phonesetURL;
87 this.partOfSpeechURL = partOfSpeechURL;
89 this.unitSelector = new DiphoneUnitSelector(getDatabase());
90 } catch (IOException ioe) {
91 ioe.printStackTrace();
95 public Tokenizer getTokenizer() {
100 protected void loader() throws IOException {
101 setupFeatureProcessors();
105 * Get the sample info for the underlying database.
106 * @return the sample info object
108 public SampleInfo getSampleInfo() {
109 return unitSelector.getSampleInfo();
112 protected UtteranceProcessor getAudioOutput() throws IOException {
113 return new AudioOutput();
117 * Gets the url to the database that defines the unit data for this
120 * @return a url to the database
122 public URL getDatabase() {
127 * Returns the unit selector to be used by this voice.
128 * Derived voices typically override this to customize behaviors.
129 * This voice uses a diphone selector as the unit selector.
131 * @return the post lexical processor
133 * @throws IOException if an IO error occurs while getting
136 public UtteranceProcessor getUnitSelector() throws IOException {
141 * Returns the pitch mark generator to be used by this voice.
142 * Derived voices typically override this to customize behaviors.
143 * There is no default unit selector
145 * @return the post lexical processor
147 * @throws IOException if an IO error occurs while getting
150 public UtteranceProcessor getPitchmarkGenerator() throws IOException {
151 return new DiphonePitchmarkGenerator();
155 * Returns the unit concatenator to be used by this voice.
156 * Derived voices typically override this to customize behaviors.
157 * There is no default unit selector
159 * @return the post lexical processor
161 * @throws IOException if an IO error occurs while getting
164 public UtteranceProcessor getUnitConcatenator() throws IOException {
165 return new UnitConcatenator();
168 protected void setupFeatureProcessors() throws IOException {
169 if (phonesetURL != null) {
170 phoneSet = new PhoneSetImpl(phonesetURL);
172 if (partOfSpeechURL != null) {
173 PartOfSpeech pos = new PartOfSpeechImpl(partOfSpeechURL, "content");
174 addFeatureProcessor("gpos", new FeatureProcessors.Gpos(pos));
176 addFeatureProcessor("word_break", new FeatureProcessors.WordBreak());
177 addFeatureProcessor("word_punc", new FeatureProcessors.WordPunc());
178 addFeatureProcessor("word_numsyls",new FeatureProcessors.WordNumSyls());
179 addFeatureProcessor("ssyl_in", new FeatureProcessors.StressedSylIn());
180 addFeatureProcessor("syl_in", new FeatureProcessors.SylIn());
181 addFeatureProcessor("syl_out", new FeatureProcessors.SylOut());
182 addFeatureProcessor("ssyl_out", new
183 FeatureProcessors.StressedSylOut());
184 addFeatureProcessor("syl_break", new FeatureProcessors.SylBreak());
185 addFeatureProcessor("old_syl_break", new FeatureProcessors.SylBreak());
186 addFeatureProcessor("num_digits", new FeatureProcessors.NumDigits());
187 addFeatureProcessor("month_range", new FeatureProcessors.MonthRange());
188 addFeatureProcessor("token_pos_guess",
189 new FeatureProcessors.TokenPosGuess());
190 addFeatureProcessor("segment_duration",
191 new FeatureProcessors.SegmentDuration());
192 addFeatureProcessor("sub_phrases", new FeatureProcessors.SubPhrases());
193 addFeatureProcessor("asyl_in", new FeatureProcessors.AccentedSylIn());
194 addFeatureProcessor("last_accent", new FeatureProcessors.LastAccent());
195 addFeatureProcessor("pos_in_syl", new FeatureProcessors.PosInSyl());
196 addFeatureProcessor("position_type", new
197 FeatureProcessors.PositionType());
199 addFeatureProcessor("ph_cplace", new FeatureProcessors.PH_CPlace());
200 addFeatureProcessor("ph_ctype", new FeatureProcessors.PH_CType());
201 addFeatureProcessor("ph_cvox", new FeatureProcessors.PH_CVox());
202 addFeatureProcessor("ph_vc", new FeatureProcessors.PH_VC());
203 addFeatureProcessor("ph_vfront", new FeatureProcessors.PH_VFront());
204 addFeatureProcessor("ph_vheight", new FeatureProcessors.PH_VHeight());
205 addFeatureProcessor("ph_vlng", new FeatureProcessors.PH_VLength());
206 addFeatureProcessor("ph_vrnd", new FeatureProcessors.PH_VRnd());
208 addFeatureProcessor("seg_coda_fric", new
209 FeatureProcessors.SegCodaFric());
210 addFeatureProcessor("seg_onset_fric", new
211 FeatureProcessors.SegOnsetFric());
213 addFeatureProcessor("seg_coda_stop", new
214 FeatureProcessors.SegCodaStop());
215 addFeatureProcessor("seg_onset_stop", new
216 FeatureProcessors.SegOnsetStop());
218 addFeatureProcessor("seg_coda_nasal", new
219 FeatureProcessors.SegCodaNasal());
220 addFeatureProcessor("seg_onset_nasal", new
221 FeatureProcessors.SegOnsetNasal());
223 addFeatureProcessor("seg_coda_glide", new
224 FeatureProcessors.SegCodaGlide());
225 addFeatureProcessor("seg_onset_glide", new
226 FeatureProcessors.SegOnsetGlide());
228 addFeatureProcessor("seg_onsetcoda", new
229 FeatureProcessors.SegOnsetCoda());
230 addFeatureProcessor("syl_codasize", new
231 FeatureProcessors.SylCodaSize());
232 addFeatureProcessor("syl_onsetsize", new
233 FeatureProcessors.SylOnsetSize());
234 addFeatureProcessor("accented", new FeatureProcessors.Accented());
238 * Given a phoneme and a feature name, return the feature
240 * @param phone the phoneme of interest
241 * @param featureName the name of the feature of interest
243 * @return the feature with the given name
245 public String getPhoneFeature(String phone, String featureName) {
246 if (phoneSet != null)
247 return phoneSet.getPhoneFeature(phone, featureName);