upstream version 1.2.2
[debian/freetts] / de / dfki / lt / freetts / ClusterUnitVoice.java
1 /**
2  * Portions Copyright 2004 DFKI GmbH.
3  * Portions Copyright 2001 Sun Microsystems, Inc.
4  * Portions Copyright 1999-2001 Language Technologies Institute, 
5  * Carnegie Mellon University.
6  * All Rights Reserved.  Use is subject to license terms.
7  * 
8  * See the file "license.terms" for information on usage and
9  * redistribution of this file, and for a DISCLAIMER OF ALL 
10  * WARRANTIES.
11  */
12 package de.dfki.lt.freetts;
13
14 import java.io.IOException;
15 import java.net.URL;
16 import java.util.Locale;
17
18 import com.sun.speech.freetts.Age;
19 import com.sun.speech.freetts.Gender;
20 import com.sun.speech.freetts.PartOfSpeech;
21 import com.sun.speech.freetts.PartOfSpeechImpl;
22 import com.sun.speech.freetts.PhoneSet;
23 import com.sun.speech.freetts.PhoneSetImpl;
24 import com.sun.speech.freetts.Tokenizer;
25 import com.sun.speech.freetts.UtteranceProcessor;
26 import com.sun.speech.freetts.Voice;
27 import com.sun.speech.freetts.clunits.ClusterUnitPitchmarkGenerator;
28 import com.sun.speech.freetts.clunits.ClusterUnitSelector;
29 import com.sun.speech.freetts.en.us.CMULexicon;
30 import com.sun.speech.freetts.en.us.FeatureProcessors;
31 import com.sun.speech.freetts.lexicon.Lexicon;
32 import com.sun.speech.freetts.relp.AudioOutput;
33 import com.sun.speech.freetts.relp.SampleInfo;
34 import com.sun.speech.freetts.relp.UnitConcatenator;
35
36 /**
37  * A simple dummy voice as a starting point for non-US-English
38  * cluster unit voices. All NLP stuff would need to be implemented
39  * in order for this to become a full TTS voice.
40  */
41 public class ClusterUnitVoice extends Voice implements ConcatenativeVoice {
42     private PhoneSet phoneSet;
43         protected URL database;
44         protected URL phonesetURL;
45         protected URL partOfSpeechURL;
46     protected ClusterUnitSelector unitSelector;
47         private ClusterUnitNamer unitNamer;
48         public ClusterUnitVoice(String name, Gender gender, Age age,
49                         String description, Locale locale, String domain,
50                         String organization, Lexicon lexicon, URL database) {
51                 this(name, gender, age, description, locale, domain,
52                                 organization, lexicon, database, null, null, null);
53         }
54         
55          /**
56      * Creates a ClusterUnitVoice
57      * 
58      * @param database the database of the voice
59      * @param unitNamer specifies the name of the Units (if null, an 
60      * ldom naming scheme will be used: 'ae_afternoon')
61      * @param phonesetURL leads to the phoneset, which will be used 
62      * for the FeatureProcessors (can be null)
63      * @param partOfSpeechURL leads to the pos-textfile which will be used
64      * for the FeatureProcessors (can be null)
65      */
66         public ClusterUnitVoice(String name, Gender gender, Age age,
67                         String description, Locale locale, String domain,
68                         String organization, Lexicon lexicon, URL database, 
69                         ClusterUnitNamer unitNamer, URL phonesetURL, URL partOfSpeechURL) {
70                 
71                 //TODO: do something useful with the lexicon
72                 super(name, gender, age, description, locale,
73                                 domain, organization);
74                 setRate(150f);
75                 setPitch(100F);
76                 setPitchRange(12F);
77         if (lexicon != null) {
78             setLexicon(lexicon);
79         } else {
80             // Use a small dummy lexicon
81             setLexicon(new CMULexicon("cmutimelex"));
82         }
83                 this.database = database;
84                 this.unitNamer = unitNamer;
85                 this.phonesetURL = phonesetURL;
86                 this.partOfSpeechURL = partOfSpeechURL;
87         try {
88             unitSelector = new ClusterUnitSelector(getDatabase(), unitNamer);
89         } catch (IOException ioe) {
90             ioe.printStackTrace();
91         }
92         }
93
94     /**
95      * Get the sample info for the underlying database.
96      * @return the sample info object
97      */
98     public SampleInfo getSampleInfo() {
99         return unitSelector.getSampleInfo();
100     }
101
102         public Tokenizer getTokenizer() {
103                 return null;
104         }
105         
106         
107         protected void loader() throws IOException {
108                 setupFeatureProcessors();
109         }
110         
111         
112         protected UtteranceProcessor getAudioOutput() throws IOException {
113                 return new AudioOutput();
114         }
115         
116         /**
117          * Gets the url to the database that defines the unit data for this
118          * voice.
119          *
120          * @return a url to the database
121          */
122         public URL getDatabase() {
123                 return database;
124         }
125         
126         /**
127          * Returns the unit selector to be used by this voice.
128          * Derived voices typically override this to customize behaviors.
129          * This voice uses  a cluster unit selector as the unit selector.
130          * 
131          * @return the post lexical processor
132          * 
133          * @throws IOException if an IO error occurs while getting
134          *     processor
135          */
136         public UtteranceProcessor getUnitSelector() throws IOException {
137         return unitSelector;
138     }
139     
140     /**
141      * Returns the pitch mark generator to be used by this voice.
142      * Derived voices typically override this to customize behaviors.
143      * There is no default unit selector
144      * 
145      * @return the post lexical processor
146      * 
147      * @throws IOException if an IO error occurs while getting
148      *     processor
149      */
150     public UtteranceProcessor getPitchmarkGenerator() throws IOException {
151         return new ClusterUnitPitchmarkGenerator();
152     }
153     
154     /**
155      * Returns the unit concatenator to be used by this voice.
156      * Derived voices typically override this to customize behaviors.
157      * There is no default unit selector
158      * 
159      * @return the post lexical processor
160      * 
161      * @throws IOException if an IO error occurs while getting
162      *     processor
163      */
164     public UtteranceProcessor getUnitConcatenator() throws IOException {
165         return new UnitConcatenator();
166     }
167
168     protected void setupFeatureProcessors() throws IOException {
169         if(phonesetURL != null){
170                        phoneSet  = new PhoneSetImpl(phonesetURL);
171                         }
172                 if(partOfSpeechURL != null){
173                         PartOfSpeech pos = new PartOfSpeechImpl(partOfSpeechURL,
174                         "content");
175                         addFeatureProcessor("gpos", new FeatureProcessors.Gpos(pos));
176                 }
177                 
178
179  
180
181         addFeatureProcessor("word_break", new FeatureProcessors.WordBreak());
182         addFeatureProcessor("word_punc", new FeatureProcessors.WordPunc());
183         addFeatureProcessor("word_numsyls",new FeatureProcessors.WordNumSyls());
184         addFeatureProcessor("ssyl_in", new FeatureProcessors.StressedSylIn());
185         addFeatureProcessor("syl_in", new FeatureProcessors.SylIn());
186         addFeatureProcessor("syl_out", new FeatureProcessors.SylOut());
187         addFeatureProcessor("ssyl_out", new
188                 FeatureProcessors.StressedSylOut());
189         addFeatureProcessor("syl_break", new FeatureProcessors.SylBreak());
190         addFeatureProcessor("old_syl_break", new FeatureProcessors.SylBreak());
191         addFeatureProcessor("num_digits", new FeatureProcessors.NumDigits());
192         addFeatureProcessor("month_range", new FeatureProcessors.MonthRange());
193         addFeatureProcessor("token_pos_guess", 
194                 new FeatureProcessors.TokenPosGuess());
195         addFeatureProcessor("segment_duration", 
196                 new FeatureProcessors.SegmentDuration());
197         addFeatureProcessor("sub_phrases", new FeatureProcessors.SubPhrases());
198         addFeatureProcessor("asyl_in", new FeatureProcessors.AccentedSylIn());
199         addFeatureProcessor("last_accent", new FeatureProcessors.LastAccent());
200         addFeatureProcessor("pos_in_syl", new FeatureProcessors.PosInSyl());
201         addFeatureProcessor("position_type", new
202                 FeatureProcessors.PositionType());
203
204         addFeatureProcessor("ph_cplace", new FeatureProcessors.PH_CPlace());
205         addFeatureProcessor("ph_ctype", new FeatureProcessors.PH_CType());
206         addFeatureProcessor("ph_cvox", new FeatureProcessors.PH_CVox());
207         addFeatureProcessor("ph_vc", new FeatureProcessors.PH_VC());
208         addFeatureProcessor("ph_vfront", new FeatureProcessors.PH_VFront());
209         addFeatureProcessor("ph_vheight", new FeatureProcessors.PH_VHeight());
210         addFeatureProcessor("ph_vlng", new FeatureProcessors.PH_VLength());
211         addFeatureProcessor("ph_vrnd", new FeatureProcessors.PH_VRnd());
212
213         addFeatureProcessor("seg_coda_fric", new
214                 FeatureProcessors.SegCodaFric());
215         addFeatureProcessor("seg_onset_fric", new
216                 FeatureProcessors.SegOnsetFric());
217
218         addFeatureProcessor("seg_coda_stop", new
219                 FeatureProcessors.SegCodaStop());
220         addFeatureProcessor("seg_onset_stop", new
221                 FeatureProcessors.SegOnsetStop());
222
223         addFeatureProcessor("seg_coda_nasal", new
224                 FeatureProcessors.SegCodaNasal());
225         addFeatureProcessor("seg_onset_nasal", new
226                 FeatureProcessors.SegOnsetNasal());
227
228         addFeatureProcessor("seg_coda_glide", new
229                 FeatureProcessors.SegCodaGlide());
230         addFeatureProcessor("seg_onset_glide", new
231                 FeatureProcessors.SegOnsetGlide());
232
233         addFeatureProcessor("seg_onsetcoda", new
234                 FeatureProcessors.SegOnsetCoda());
235         addFeatureProcessor("syl_codasize", new
236                 FeatureProcessors.SylCodaSize());
237         addFeatureProcessor("syl_onsetsize", new
238                 FeatureProcessors.SylOnsetSize());
239         addFeatureProcessor("accented", new FeatureProcessors.Accented());
240          }
241
242     /**
243      * Given a phoneme and a feature name, return the feature
244      *
245      * @param phone the phoneme of interest
246      * @param featureName the name of the feature of interest
247      *
248      * @return the feature with the given name
249      */
250     public String getPhoneFeature(String phone, String featureName) {
251         if (phoneSet != null)
252             return phoneSet.getPhoneFeature(phone, featureName);
253         else
254             return null;
255     }
256
257 }