2 * Portions Copyright 2004 DFKI GmbH.
3 * Portions Copyright 2001 Sun Microsystems, Inc.
4 * Portions Copyright 1999-2001 Language Technologies Institute,
5 * Carnegie Mellon University.
6 * All Rights Reserved. Use is subject to license terms.
8 * See the file "license.terms" for information on usage and
9 * redistribution of this file, and for a DISCLAIMER OF ALL
12 package de.dfki.lt.freetts;
14 import java.io.IOException;
16 import java.util.Locale;
18 import com.sun.speech.freetts.Age;
19 import com.sun.speech.freetts.Gender;
20 import com.sun.speech.freetts.PartOfSpeech;
21 import com.sun.speech.freetts.PartOfSpeechImpl;
22 import com.sun.speech.freetts.PhoneSet;
23 import com.sun.speech.freetts.PhoneSetImpl;
24 import com.sun.speech.freetts.Tokenizer;
25 import com.sun.speech.freetts.UtteranceProcessor;
26 import com.sun.speech.freetts.Voice;
27 import com.sun.speech.freetts.clunits.ClusterUnitPitchmarkGenerator;
28 import com.sun.speech.freetts.clunits.ClusterUnitSelector;
29 import com.sun.speech.freetts.en.us.CMULexicon;
30 import com.sun.speech.freetts.en.us.FeatureProcessors;
31 import com.sun.speech.freetts.lexicon.Lexicon;
32 import com.sun.speech.freetts.relp.AudioOutput;
33 import com.sun.speech.freetts.relp.SampleInfo;
34 import com.sun.speech.freetts.relp.UnitConcatenator;
37 * A simple dummy voice as a starting point for non-US-English
38 * cluster unit voices. All NLP stuff would need to be implemented
39 * in order for this to become a full TTS voice.
41 public class ClusterUnitVoice extends Voice implements ConcatenativeVoice {
42 private PhoneSet phoneSet;
43 protected URL database;
44 protected URL phonesetURL;
45 protected URL partOfSpeechURL;
46 protected ClusterUnitSelector unitSelector;
47 private ClusterUnitNamer unitNamer;
48 public ClusterUnitVoice(String name, Gender gender, Age age,
49 String description, Locale locale, String domain,
50 String organization, Lexicon lexicon, URL database) {
51 this(name, gender, age, description, locale, domain,
52 organization, lexicon, database, null, null, null);
56 * Creates a ClusterUnitVoice
58 * @param database the database of the voice
59 * @param unitNamer specifies the name of the Units (if null, an
60 * ldom naming scheme will be used: 'ae_afternoon')
61 * @param phonesetURL leads to the phoneset, which will be used
62 * for the FeatureProcessors (can be null)
63 * @param partOfSpeechURL leads to the pos-textfile which will be used
64 * for the FeatureProcessors (can be null)
66 public ClusterUnitVoice(String name, Gender gender, Age age,
67 String description, Locale locale, String domain,
68 String organization, Lexicon lexicon, URL database,
69 ClusterUnitNamer unitNamer, URL phonesetURL, URL partOfSpeechURL) {
71 //TODO: do something useful with the lexicon
72 super(name, gender, age, description, locale,
73 domain, organization);
77 if (lexicon != null) {
80 // Use a small dummy lexicon
81 setLexicon(new CMULexicon("cmutimelex"));
83 this.database = database;
84 this.unitNamer = unitNamer;
85 this.phonesetURL = phonesetURL;
86 this.partOfSpeechURL = partOfSpeechURL;
88 unitSelector = new ClusterUnitSelector(getDatabase(), unitNamer);
89 } catch (IOException ioe) {
90 ioe.printStackTrace();
95 * Get the sample info for the underlying database.
96 * @return the sample info object
98 public SampleInfo getSampleInfo() {
99 return unitSelector.getSampleInfo();
102 public Tokenizer getTokenizer() {
107 protected void loader() throws IOException {
108 setupFeatureProcessors();
112 protected UtteranceProcessor getAudioOutput() throws IOException {
113 return new AudioOutput();
117 * Gets the url to the database that defines the unit data for this
120 * @return a url to the database
122 public URL getDatabase() {
127 * Returns the unit selector to be used by this voice.
128 * Derived voices typically override this to customize behaviors.
129 * This voice uses a cluster unit selector as the unit selector.
131 * @return the post lexical processor
133 * @throws IOException if an IO error occurs while getting
136 public UtteranceProcessor getUnitSelector() throws IOException {
141 * Returns the pitch mark generator to be used by this voice.
142 * Derived voices typically override this to customize behaviors.
143 * There is no default unit selector
145 * @return the post lexical processor
147 * @throws IOException if an IO error occurs while getting
150 public UtteranceProcessor getPitchmarkGenerator() throws IOException {
151 return new ClusterUnitPitchmarkGenerator();
155 * Returns the unit concatenator to be used by this voice.
156 * Derived voices typically override this to customize behaviors.
157 * There is no default unit selector
159 * @return the post lexical processor
161 * @throws IOException if an IO error occurs while getting
164 public UtteranceProcessor getUnitConcatenator() throws IOException {
165 return new UnitConcatenator();
168 protected void setupFeatureProcessors() throws IOException {
169 if(phonesetURL != null){
170 phoneSet = new PhoneSetImpl(phonesetURL);
172 if(partOfSpeechURL != null){
173 PartOfSpeech pos = new PartOfSpeechImpl(partOfSpeechURL,
175 addFeatureProcessor("gpos", new FeatureProcessors.Gpos(pos));
181 addFeatureProcessor("word_break", new FeatureProcessors.WordBreak());
182 addFeatureProcessor("word_punc", new FeatureProcessors.WordPunc());
183 addFeatureProcessor("word_numsyls",new FeatureProcessors.WordNumSyls());
184 addFeatureProcessor("ssyl_in", new FeatureProcessors.StressedSylIn());
185 addFeatureProcessor("syl_in", new FeatureProcessors.SylIn());
186 addFeatureProcessor("syl_out", new FeatureProcessors.SylOut());
187 addFeatureProcessor("ssyl_out", new
188 FeatureProcessors.StressedSylOut());
189 addFeatureProcessor("syl_break", new FeatureProcessors.SylBreak());
190 addFeatureProcessor("old_syl_break", new FeatureProcessors.SylBreak());
191 addFeatureProcessor("num_digits", new FeatureProcessors.NumDigits());
192 addFeatureProcessor("month_range", new FeatureProcessors.MonthRange());
193 addFeatureProcessor("token_pos_guess",
194 new FeatureProcessors.TokenPosGuess());
195 addFeatureProcessor("segment_duration",
196 new FeatureProcessors.SegmentDuration());
197 addFeatureProcessor("sub_phrases", new FeatureProcessors.SubPhrases());
198 addFeatureProcessor("asyl_in", new FeatureProcessors.AccentedSylIn());
199 addFeatureProcessor("last_accent", new FeatureProcessors.LastAccent());
200 addFeatureProcessor("pos_in_syl", new FeatureProcessors.PosInSyl());
201 addFeatureProcessor("position_type", new
202 FeatureProcessors.PositionType());
204 addFeatureProcessor("ph_cplace", new FeatureProcessors.PH_CPlace());
205 addFeatureProcessor("ph_ctype", new FeatureProcessors.PH_CType());
206 addFeatureProcessor("ph_cvox", new FeatureProcessors.PH_CVox());
207 addFeatureProcessor("ph_vc", new FeatureProcessors.PH_VC());
208 addFeatureProcessor("ph_vfront", new FeatureProcessors.PH_VFront());
209 addFeatureProcessor("ph_vheight", new FeatureProcessors.PH_VHeight());
210 addFeatureProcessor("ph_vlng", new FeatureProcessors.PH_VLength());
211 addFeatureProcessor("ph_vrnd", new FeatureProcessors.PH_VRnd());
213 addFeatureProcessor("seg_coda_fric", new
214 FeatureProcessors.SegCodaFric());
215 addFeatureProcessor("seg_onset_fric", new
216 FeatureProcessors.SegOnsetFric());
218 addFeatureProcessor("seg_coda_stop", new
219 FeatureProcessors.SegCodaStop());
220 addFeatureProcessor("seg_onset_stop", new
221 FeatureProcessors.SegOnsetStop());
223 addFeatureProcessor("seg_coda_nasal", new
224 FeatureProcessors.SegCodaNasal());
225 addFeatureProcessor("seg_onset_nasal", new
226 FeatureProcessors.SegOnsetNasal());
228 addFeatureProcessor("seg_coda_glide", new
229 FeatureProcessors.SegCodaGlide());
230 addFeatureProcessor("seg_onset_glide", new
231 FeatureProcessors.SegOnsetGlide());
233 addFeatureProcessor("seg_onsetcoda", new
234 FeatureProcessors.SegOnsetCoda());
235 addFeatureProcessor("syl_codasize", new
236 FeatureProcessors.SylCodaSize());
237 addFeatureProcessor("syl_onsetsize", new
238 FeatureProcessors.SylOnsetSize());
239 addFeatureProcessor("accented", new FeatureProcessors.Accented());
243 * Given a phoneme and a feature name, return the feature
245 * @param phone the phoneme of interest
246 * @param featureName the name of the feature of interest
248 * @return the feature with the given name
250 public String getPhoneFeature(String phone, String featureName) {
251 if (phoneSet != null)
252 return phoneSet.getPhoneFeature(phone, featureName);