git.gag.com Git - debian/freetts/blob - de/dfki/lt/freetts/DiphoneVoice.java

   1 /**
   2  * Portions Copyright 2004 DFKI GmbH.
   3  * Portions Copyright 2001 Sun Microsystems, Inc.
   4  * Portions Copyright 1999-2001 Language Technologies Institute,
   5  * Carnegie Mellon University.
   6  * All Rights Reserved.  Use is subject to license terms.
   7  *
   8  * See the file "license.terms" for information on usage and
   9  * redistribution of this file, and for a DISCLAIMER OF ALL
  10  * WARRANTIES.
  11  */
  12 package de.dfki.lt.freetts;
  13
  14 import java.io.IOException;
  15 import java.net.URL;
  16 import java.util.Locale;
  17
  18 import com.sun.speech.freetts.Age;
  19 import com.sun.speech.freetts.Gender;
  20 import com.sun.speech.freetts.PartOfSpeech;
  21 import com.sun.speech.freetts.PartOfSpeechImpl;
  22 import com.sun.speech.freetts.PhoneSet;
  23 import com.sun.speech.freetts.PhoneSetImpl;
  24 import com.sun.speech.freetts.Tokenizer;
  25 import com.sun.speech.freetts.UtteranceProcessor;
  26 import com.sun.speech.freetts.Voice;
  27 import com.sun.speech.freetts.diphone.DiphonePitchmarkGenerator;
  28 import com.sun.speech.freetts.diphone.DiphoneUnitSelector;
  29 import com.sun.speech.freetts.en.us.CMULexicon;
  30 import com.sun.speech.freetts.en.us.FeatureProcessors;
  31 import com.sun.speech.freetts.lexicon.Lexicon;
  32 import com.sun.speech.freetts.relp.AudioOutput;
  33 import com.sun.speech.freetts.relp.SampleInfo;
  34 import com.sun.speech.freetts.relp.UnitConcatenator;
  35
  36
  37 /**
  38  * A simple dummy voice as a starting point for non-US-English
  39  * cluster unit voices. All NLP stuff would need to be implemented
  40  * in order for this to become a full TTS voice.
  41  */
  42 public class DiphoneVoice extends Voice implements ConcatenativeVoice {
  43     private PhoneSet phoneSet;
  44         protected URL database;
  45         protected URL phonesetURL;
  46         protected URL partOfSpeechURL;
  47     protected DiphoneUnitSelector unitSelector;
  48
  49         public DiphoneVoice(String name, Gender gender, Age age,
  50                         String description, Locale locale, String domain,
  51                         String organization, Lexicon lexicon, URL database) {
  52                 this(name, gender, age, description, locale, domain,
  53                                 organization, lexicon, database, null, null);
  54         }
  55
  56          /**
  57      * Creates a ClusterUnitVoice
  58      *
  59      * @param database the database of the voice
  60      * @param unitNamer specifies the name of the Units (if null, an
  61      * ldom naming scheme will be used: 'ae_afternoon')
  62      * @param phonesetURL leads to the phoneset, which will be used
  63      * for the FeatureProcessors (can be null)
  64      * @param partOfSpeechURL leads to the pos-textfile which will be used
  65      * for the FeatureProcessors (can be null)
  66      */
  67         public DiphoneVoice(String name, Gender gender, Age age,
  68                         String description, Locale locale, String domain,
  69                         String organization, Lexicon lexicon, URL database,
  70             URL phonesetURL, URL partOfSpeechURL) {
  71
  72                 //TODO: do something useful with the lexicon
  73                 super(name, gender, age, description, locale,
  74                                 domain, organization);
  75                 // Set default prosody values:
  76         setRate(150f);
  77                 setPitch(100F);
  78                 setPitchRange(12F);
  79         if (lexicon != null) {
  80             setLexicon(lexicon);
  81         } else {
  82             // Use a small dummy lexicon
  83             setLexicon(new CMULexicon("cmutimelex"));
  84         }
  85                 this.database = database;
  86                 this.phonesetURL = phonesetURL;
  87                 this.partOfSpeechURL = partOfSpeechURL;
  88         try {
  89             this.unitSelector = new DiphoneUnitSelector(getDatabase());
  90         } catch (IOException ioe) {
  91             ioe.printStackTrace();
  92         }
  93         }
  94
  95         public Tokenizer getTokenizer() {
  96                 return null;
  97         }
  98
  99
 100         protected void loader() throws IOException {
 101                 setupFeatureProcessors();
 102         }
 103
 104     /**
 105      * Get the sample info for the underlying database.
 106      * @return the sample info object
 107      */
 108     public SampleInfo getSampleInfo() {
 109         return unitSelector.getSampleInfo();
 110     }
 111
 112         protected UtteranceProcessor getAudioOutput() throws IOException {
 113                 return new AudioOutput();
 114         }
 115
 116         /**
 117          * Gets the url to the database that defines the unit data for this
 118          * voice.
 119          *
 120          * @return a url to the database
 121          */
 122         public URL getDatabase() {
 123                 return database;
 124         }
 125
 126         /**
 127          * Returns the unit selector to be used by this voice.
 128          * Derived voices typically override this to customize behaviors.
 129          * This voice uses  a diphone selector as the unit selector.
 130          *
 131          * @return the post lexical processor
 132          *
 133          * @throws IOException if an IO error occurs while getting
 134          *     processor
 135          */
 136         public UtteranceProcessor getUnitSelector() throws IOException {
 137         return unitSelector;
 138     }
 139
 140     /**
 141      * Returns the pitch mark generator to be used by this voice.
 142      * Derived voices typically override this to customize behaviors.
 143      * There is no default unit selector
 144      *
 145      * @return the post lexical processor
 146      *
 147      * @throws IOException if an IO error occurs while getting
 148      *     processor
 149      */
 150     public UtteranceProcessor getPitchmarkGenerator() throws IOException {
 151         return new DiphonePitchmarkGenerator();
 152     }
 153
 154     /**
 155      * Returns the unit concatenator to be used by this voice.
 156      * Derived voices typically override this to customize behaviors.
 157      * There is no default unit selector
 158      *
 159      * @return the post lexical processor
 160      *
 161      * @throws IOException if an IO error occurs while getting
 162      *     processor
 163      */
 164     public UtteranceProcessor getUnitConcatenator() throws IOException {
 165         return new UnitConcatenator();
 166     }
 167
 168     protected void setupFeatureProcessors() throws IOException {
 169         if (phonesetURL != null) {
 170             phoneSet = new PhoneSetImpl(phonesetURL);
 171         }
 172         if (partOfSpeechURL != null) {
 173             PartOfSpeech pos = new PartOfSpeechImpl(partOfSpeechURL, "content");
 174             addFeatureProcessor("gpos", new FeatureProcessors.Gpos(pos));
 175         }
 176         addFeatureProcessor("word_break", new FeatureProcessors.WordBreak());
 177         addFeatureProcessor("word_punc", new FeatureProcessors.WordPunc());
 178         addFeatureProcessor("word_numsyls",new FeatureProcessors.WordNumSyls());
 179         addFeatureProcessor("ssyl_in", new FeatureProcessors.StressedSylIn());
 180         addFeatureProcessor("syl_in", new FeatureProcessors.SylIn());
 181         addFeatureProcessor("syl_out", new FeatureProcessors.SylOut());
 182         addFeatureProcessor("ssyl_out", new
 183                 FeatureProcessors.StressedSylOut());
 184         addFeatureProcessor("syl_break", new FeatureProcessors.SylBreak());
 185         addFeatureProcessor("old_syl_break", new FeatureProcessors.SylBreak());
 186         addFeatureProcessor("num_digits", new FeatureProcessors.NumDigits());
 187         addFeatureProcessor("month_range", new FeatureProcessors.MonthRange());
 188         addFeatureProcessor("token_pos_guess",
 189                 new FeatureProcessors.TokenPosGuess());
 190         addFeatureProcessor("segment_duration",
 191                 new FeatureProcessors.SegmentDuration());
 192         addFeatureProcessor("sub_phrases", new FeatureProcessors.SubPhrases());
 193         addFeatureProcessor("asyl_in", new FeatureProcessors.AccentedSylIn());
 194         addFeatureProcessor("last_accent", new FeatureProcessors.LastAccent());
 195         addFeatureProcessor("pos_in_syl", new FeatureProcessors.PosInSyl());
 196         addFeatureProcessor("position_type", new
 197                 FeatureProcessors.PositionType());
 198
 199         addFeatureProcessor("ph_cplace", new FeatureProcessors.PH_CPlace());
 200         addFeatureProcessor("ph_ctype", new FeatureProcessors.PH_CType());
 201         addFeatureProcessor("ph_cvox", new FeatureProcessors.PH_CVox());
 202         addFeatureProcessor("ph_vc", new FeatureProcessors.PH_VC());
 203         addFeatureProcessor("ph_vfront", new FeatureProcessors.PH_VFront());
 204         addFeatureProcessor("ph_vheight", new FeatureProcessors.PH_VHeight());
 205         addFeatureProcessor("ph_vlng", new FeatureProcessors.PH_VLength());
 206         addFeatureProcessor("ph_vrnd", new FeatureProcessors.PH_VRnd());
 207
 208         addFeatureProcessor("seg_coda_fric", new
 209                 FeatureProcessors.SegCodaFric());
 210         addFeatureProcessor("seg_onset_fric", new
 211                 FeatureProcessors.SegOnsetFric());
 212
 213         addFeatureProcessor("seg_coda_stop", new
 214                 FeatureProcessors.SegCodaStop());
 215         addFeatureProcessor("seg_onset_stop", new
 216                 FeatureProcessors.SegOnsetStop());
 217
 218         addFeatureProcessor("seg_coda_nasal", new
 219                 FeatureProcessors.SegCodaNasal());
 220         addFeatureProcessor("seg_onset_nasal", new
 221                 FeatureProcessors.SegOnsetNasal());
 222
 223         addFeatureProcessor("seg_coda_glide", new
 224                 FeatureProcessors.SegCodaGlide());
 225         addFeatureProcessor("seg_onset_glide", new
 226                 FeatureProcessors.SegOnsetGlide());
 227
 228         addFeatureProcessor("seg_onsetcoda", new
 229                 FeatureProcessors.SegOnsetCoda());
 230         addFeatureProcessor("syl_codasize", new
 231                 FeatureProcessors.SylCodaSize());
 232         addFeatureProcessor("syl_onsetsize", new
 233                 FeatureProcessors.SylOnsetSize());
 234         addFeatureProcessor("accented", new FeatureProcessors.Accented());
 235     }
 236
 237     /**
 238      * Given a phoneme and a feature name, return the feature
 239      *
 240      * @param phone the phoneme of interest
 241      * @param featureName the name of the feature of interest
 242      *
 243      * @return the feature with the given name
 244      */
 245     public String getPhoneFeature(String phone, String featureName) {
 246         if (phoneSet != null)
 247             return phoneSet.getPhoneFeature(phone, featureName);
 248         else
 249             return null;
 250     }
 251
 252 }