upstream version 1.2.2
[debian/freetts] / de / dfki / lt / freetts / DiphoneVoice.java
1 /**
2  * Portions Copyright 2004 DFKI GmbH.
3  * Portions Copyright 2001 Sun Microsystems, Inc.
4  * Portions Copyright 1999-2001 Language Technologies Institute, 
5  * Carnegie Mellon University.
6  * All Rights Reserved.  Use is subject to license terms.
7  * 
8  * See the file "license.terms" for information on usage and
9  * redistribution of this file, and for a DISCLAIMER OF ALL 
10  * WARRANTIES.
11  */
12 package de.dfki.lt.freetts;
13
14 import java.io.IOException;
15 import java.net.URL;
16 import java.util.Locale;
17
18 import com.sun.speech.freetts.Age;
19 import com.sun.speech.freetts.Gender;
20 import com.sun.speech.freetts.PartOfSpeech;
21 import com.sun.speech.freetts.PartOfSpeechImpl;
22 import com.sun.speech.freetts.PhoneSet;
23 import com.sun.speech.freetts.PhoneSetImpl;
24 import com.sun.speech.freetts.Tokenizer;
25 import com.sun.speech.freetts.UtteranceProcessor;
26 import com.sun.speech.freetts.Voice;
27 import com.sun.speech.freetts.diphone.DiphonePitchmarkGenerator;
28 import com.sun.speech.freetts.diphone.DiphoneUnitSelector;
29 import com.sun.speech.freetts.en.us.CMULexicon;
30 import com.sun.speech.freetts.en.us.FeatureProcessors;
31 import com.sun.speech.freetts.lexicon.Lexicon;
32 import com.sun.speech.freetts.relp.AudioOutput;
33 import com.sun.speech.freetts.relp.SampleInfo;
34 import com.sun.speech.freetts.relp.UnitConcatenator;
35
36
37 /**
38  * A simple dummy voice as a starting point for non-US-English
39  * cluster unit voices. All NLP stuff would need to be implemented
40  * in order for this to become a full TTS voice.
41  */
42 public class DiphoneVoice extends Voice implements ConcatenativeVoice {
43     private PhoneSet phoneSet;
44         protected URL database;
45         protected URL phonesetURL;
46         protected URL partOfSpeechURL;
47     protected DiphoneUnitSelector unitSelector;
48     
49         public DiphoneVoice(String name, Gender gender, Age age,
50                         String description, Locale locale, String domain,
51                         String organization, Lexicon lexicon, URL database) {
52                 this(name, gender, age, description, locale, domain,
53                                 organization, lexicon, database, null, null);
54         }
55         
56          /**
57      * Creates a ClusterUnitVoice
58      * 
59      * @param database the database of the voice
60      * @param unitNamer specifies the name of the Units (if null, an 
61      * ldom naming scheme will be used: 'ae_afternoon')
62      * @param phonesetURL leads to the phoneset, which will be used 
63      * for the FeatureProcessors (can be null)
64      * @param partOfSpeechURL leads to the pos-textfile which will be used
65      * for the FeatureProcessors (can be null)
66      */
67         public DiphoneVoice(String name, Gender gender, Age age,
68                         String description, Locale locale, String domain,
69                         String organization, Lexicon lexicon, URL database,
70             URL phonesetURL, URL partOfSpeechURL) {
71                 
72                 //TODO: do something useful with the lexicon
73                 super(name, gender, age, description, locale,
74                                 domain, organization);
75                 // Set default prosody values:
76         setRate(150f);
77                 setPitch(100F);
78                 setPitchRange(12F);
79         if (lexicon != null) {
80             setLexicon(lexicon);
81         } else {
82             // Use a small dummy lexicon
83             setLexicon(new CMULexicon("cmutimelex"));
84         }
85                 this.database = database;
86                 this.phonesetURL = phonesetURL;
87                 this.partOfSpeechURL = partOfSpeechURL;
88         try {
89             this.unitSelector = new DiphoneUnitSelector(getDatabase());
90         } catch (IOException ioe) {
91             ioe.printStackTrace();
92         }
93         }
94         
95         public Tokenizer getTokenizer() {
96                 return null;
97         }
98         
99         
100         protected void loader() throws IOException {
101                 setupFeatureProcessors();
102         }
103         
104     /**
105      * Get the sample info for the underlying database.
106      * @return the sample info object
107      */
108     public SampleInfo getSampleInfo() {
109         return unitSelector.getSampleInfo();
110     }
111         
112         protected UtteranceProcessor getAudioOutput() throws IOException {
113                 return new AudioOutput();
114         }
115         
116         /**
117          * Gets the url to the database that defines the unit data for this
118          * voice.
119          *
120          * @return a url to the database
121          */
122         public URL getDatabase() {
123                 return database;
124         }
125         
126         /**
127          * Returns the unit selector to be used by this voice.
128          * Derived voices typically override this to customize behaviors.
129          * This voice uses  a diphone selector as the unit selector.
130          * 
131          * @return the post lexical processor
132          * 
133          * @throws IOException if an IO error occurs while getting
134          *     processor
135          */
136         public UtteranceProcessor getUnitSelector() throws IOException {
137         return unitSelector;
138     }
139     
140     /**
141      * Returns the pitch mark generator to be used by this voice.
142      * Derived voices typically override this to customize behaviors.
143      * There is no default unit selector
144      * 
145      * @return the post lexical processor
146      * 
147      * @throws IOException if an IO error occurs while getting
148      *     processor
149      */
150     public UtteranceProcessor getPitchmarkGenerator() throws IOException {
151         return new DiphonePitchmarkGenerator();
152     }
153     
154     /**
155      * Returns the unit concatenator to be used by this voice.
156      * Derived voices typically override this to customize behaviors.
157      * There is no default unit selector
158      * 
159      * @return the post lexical processor
160      * 
161      * @throws IOException if an IO error occurs while getting
162      *     processor
163      */
164     public UtteranceProcessor getUnitConcatenator() throws IOException {
165         return new UnitConcatenator();
166     }
167
168     protected void setupFeatureProcessors() throws IOException {
169         if (phonesetURL != null) {
170             phoneSet = new PhoneSetImpl(phonesetURL);
171         }
172         if (partOfSpeechURL != null) {
173             PartOfSpeech pos = new PartOfSpeechImpl(partOfSpeechURL, "content");
174             addFeatureProcessor("gpos", new FeatureProcessors.Gpos(pos));
175         }
176         addFeatureProcessor("word_break", new FeatureProcessors.WordBreak());
177         addFeatureProcessor("word_punc", new FeatureProcessors.WordPunc());
178         addFeatureProcessor("word_numsyls",new FeatureProcessors.WordNumSyls());
179         addFeatureProcessor("ssyl_in", new FeatureProcessors.StressedSylIn());
180         addFeatureProcessor("syl_in", new FeatureProcessors.SylIn());
181         addFeatureProcessor("syl_out", new FeatureProcessors.SylOut());
182         addFeatureProcessor("ssyl_out", new
183                 FeatureProcessors.StressedSylOut());
184         addFeatureProcessor("syl_break", new FeatureProcessors.SylBreak());
185         addFeatureProcessor("old_syl_break", new FeatureProcessors.SylBreak());
186         addFeatureProcessor("num_digits", new FeatureProcessors.NumDigits());
187         addFeatureProcessor("month_range", new FeatureProcessors.MonthRange());
188         addFeatureProcessor("token_pos_guess", 
189                 new FeatureProcessors.TokenPosGuess());
190         addFeatureProcessor("segment_duration", 
191                 new FeatureProcessors.SegmentDuration());
192         addFeatureProcessor("sub_phrases", new FeatureProcessors.SubPhrases());
193         addFeatureProcessor("asyl_in", new FeatureProcessors.AccentedSylIn());
194         addFeatureProcessor("last_accent", new FeatureProcessors.LastAccent());
195         addFeatureProcessor("pos_in_syl", new FeatureProcessors.PosInSyl());
196         addFeatureProcessor("position_type", new
197                 FeatureProcessors.PositionType());
198
199         addFeatureProcessor("ph_cplace", new FeatureProcessors.PH_CPlace());
200         addFeatureProcessor("ph_ctype", new FeatureProcessors.PH_CType());
201         addFeatureProcessor("ph_cvox", new FeatureProcessors.PH_CVox());
202         addFeatureProcessor("ph_vc", new FeatureProcessors.PH_VC());
203         addFeatureProcessor("ph_vfront", new FeatureProcessors.PH_VFront());
204         addFeatureProcessor("ph_vheight", new FeatureProcessors.PH_VHeight());
205         addFeatureProcessor("ph_vlng", new FeatureProcessors.PH_VLength());
206         addFeatureProcessor("ph_vrnd", new FeatureProcessors.PH_VRnd());
207
208         addFeatureProcessor("seg_coda_fric", new
209                 FeatureProcessors.SegCodaFric());
210         addFeatureProcessor("seg_onset_fric", new
211                 FeatureProcessors.SegOnsetFric());
212
213         addFeatureProcessor("seg_coda_stop", new
214                 FeatureProcessors.SegCodaStop());
215         addFeatureProcessor("seg_onset_stop", new
216                 FeatureProcessors.SegOnsetStop());
217
218         addFeatureProcessor("seg_coda_nasal", new
219                 FeatureProcessors.SegCodaNasal());
220         addFeatureProcessor("seg_onset_nasal", new
221                 FeatureProcessors.SegOnsetNasal());
222
223         addFeatureProcessor("seg_coda_glide", new
224                 FeatureProcessors.SegCodaGlide());
225         addFeatureProcessor("seg_onset_glide", new
226                 FeatureProcessors.SegOnsetGlide());
227
228         addFeatureProcessor("seg_onsetcoda", new
229                 FeatureProcessors.SegOnsetCoda());
230         addFeatureProcessor("syl_codasize", new
231                 FeatureProcessors.SylCodaSize());
232         addFeatureProcessor("syl_onsetsize", new
233                 FeatureProcessors.SylOnsetSize());
234         addFeatureProcessor("accented", new FeatureProcessors.Accented());
235     }
236
237     /**
238      * Given a phoneme and a feature name, return the feature
239      *
240      * @param phone the phoneme of interest
241      * @param featureName the name of the feature of interest
242      *
243      * @return the feature with the given name
244      */
245     public String getPhoneFeature(String phone, String featureName) {
246         if (phoneSet != null)
247             return phoneSet.getPhoneFeature(phone, featureName);
248         else
249             return null;
250     }
251
252 }