git.gag.com Git - debian/freetts/blob - com/sun/speech/freetts/Voice.java

   1 /**
   2  * Portions Copyright 2001 Sun Microsystems, Inc.
   3  * Portions Copyright 1999-2001 Language Technologies Institute,
   4  * Carnegie Mellon University.
   5  * All Rights Reserved.  Use is subject to license terms.
   6  *
   7  * See the file "license.terms" for information on usage and
   8  * redistribution of this file, and for a DISCLAIMER OF ALL
   9  * WARRANTIES.
  10  */
  11 package com.sun.speech.freetts;
  12
  13 import java.io.BufferedReader;
  14 import java.io.IOException;
  15 import java.io.InputStream;
  16 import java.io.InputStreamReader;
  17 import java.io.PrintWriter;
  18 import java.io.Reader;
  19 import java.net.URL;
  20 import java.util.ArrayList;
  21 import java.util.Collections;
  22 import java.util.HashMap;
  23 import java.util.Iterator;
  24 import java.util.List;
  25 import java.util.Locale;
  26 import java.util.Map;
  27 import java.util.logging.Level;
  28 import java.util.logging.Logger;
  29
  30 import org.w3c.dom.Document;
  31 import org.w3c.dom.Node;
  32 import org.w3c.dom.Text;
  33
  34 import com.sun.speech.freetts.audio.AudioPlayer;
  35 import com.sun.speech.freetts.lexicon.Lexicon;
  36 import com.sun.speech.freetts.relp.LPCResult;
  37 import com.sun.speech.freetts.util.BulkTimer;
  38 import com.sun.speech.freetts.util.Utilities;
  39
  40
  41 /**
  42  * Performs text-to-speech using a series of
  43  * <code>UtteranceProcessors</code>. It is the main conduit to the FreeTTS
  44  * speech synthesizer. It can perform TTS on ASCII text,
  45  * a JSML document, an <code>InputStream</code>, or a
  46  * <code>FreeTTSSpeakable</code>, by invoking the method <code>speak</code>.
  47  *
  48  * <p>Before a Voice can perform TTS, it must have a
  49  * <code>Lexicon</code>, from which it gets the vocabulary, and
  50  * an <code>AudioPlayer</code>, to which it sends the synthesized output.
  51  *
  52  * <p><b>Example</b> (using the <code>CMUDiphoneVoice</code>,
  53  * <code>CMULexicon</code> and <code>JavaClipAudioPlayer</code>):
  54  *
  55  * <pre>
  56  * Voice voice = new CMUDiphoneVoice();
  57  *
  58  * // sets the Lexicon
  59  * voice.setLexicon(new CMULexicon());
  60  *
  61  * // sets the AudioPlayer
  62  * voice.setAudioPlayer(new JavaClipAudioPlayer());
  63  *
  64  * // loads the Voice
  65  * voice.allocate();
  66  *
  67  * // start talking
  68  * voice.speak("I can talk forever without getting tired!");
  69  * </pre>
  70  *
  71  *
  72  * <p>A user can override the AudioPlayer to use by defining the
  73  * "com.sun.speech.freetts.voice.defaultAudioPlayer" system property.
  74  * The value of this property must be the name of a class that
  75  * implements the AudioPlayer interface, and which also has a no-arg
  76  * constructor.
  77  *
  78  * @see VoiceManager
  79  * @see VoiceDirectory
  80  */
  81 public abstract class Voice implements UtteranceProcessor, Dumpable {
  82     /** Logger instance. */
  83     private static final Logger LOGGER =
  84         Logger.getLogger(Voice.class.getName());
  85
  86     /**
  87      * Constant that describes the name of the unit database used by
  88      * this voice.
  89      */
  90     public final static String DATABASE_NAME = "databaseName";
  91
  92     private List utteranceProcessors;
  93     private Map featureProcessors;
  94     private FeatureSetImpl features;
  95     private boolean metrics = false;
  96     private boolean detailedMetrics = false;
  97     private boolean dumpUtterance = false;
  98     private boolean dumpRelations = false;
  99     private String runTitle = "unnamed run";
 100     private Lexicon lexicon = null;
 101     private AudioPlayer defaultAudioPlayer = null;
 102     private AudioPlayer audioPlayer = null;
 103     private UtteranceProcessor audioOutput;
 104     private OutputQueue outputQueue = null;
 105     private String waveDumpFile = null;
 106     private BulkTimer runTimer = new BulkTimer();
 107     private BulkTimer threadTimer = new BulkTimer();
 108     private boolean externalOutputQueue = false;
 109     private boolean externalAudioPlayer = false;
 110
 111
 112     private float nominalRate = 150;    // nominal speaking rate for this voice
 113     private float pitch = 100;          // pitch baseline (hertz)
 114     private float range = 10;           // pitch range (hertz)
 115     private float pitchShift = 1;       // F0 Shift
 116     private float volume = 0.8f;        // the volume (range 0 to 1)
 117     private float durationStretch = 1f; // the duration stretch
 118
 119     private boolean loaded = false;
 120
 121     private String name = "default_name";
 122     private Age age = Age.DONT_CARE;
 123     private Gender gender = Gender.DONT_CARE;
 124     private String description = "default description";
 125     private Locale locale = Locale.getDefault();
 126     private String domain = "general";
 127     private String style = "standard";
 128     private String organization = "unknown";
 129
 130     /**
 131      * Prefix for System property names.
 132      */
 133     public final static String PROP_PREFIX = "com.sun.speech.freetts.voice.";
 134
 135     /**
 136      * Feature name for the silence phone string.
 137      */
 138     public final static String FEATURE_SILENCE = "silence";
 139
 140     /**
 141      * Feature name for the join type string.
 142      */
 143     public final static String FEATURE_JOIN_TYPE = "join_type";
 144
 145     /**
 146      * Feature name for the default AudioPlayer class to use.
 147      */
 148     public final static String DEFAULT_AUDIO_PLAYER =
 149             PROP_PREFIX + "defaultAudioPlayer";
 150
 151
 152     /**
 153      * The default class to use for the DEFAULT_AUDIO_PLAYER.
 154      */
 155     public final static String DEFAULT_AUDIO_PLAYER_DEFAULT =
 156             "com.sun.speech.freetts.audio.JavaStreamingAudioPlayer";
 157
 158
 159     /**
 160      * Creates a new Voice. Utterances are sent to an
 161      * output queue to be rendered as audio.  Utterances are placed
 162      * on the queue by an output thread. This
 163      * queue is usually created via a call to 'createOutputThread,'
 164      * which creates a thread that waits on the queue and sends the
 165      * output to the audio player associated with this voice. If
 166      * the queue is null, the output is rendered in the calling
 167      * thread.
 168      *
 169      * @see #createOutputThread
 170      */
 171     public Voice() {
 172         /* Make the utteranceProcessors a synchronized list to avoid
 173          * some threading issues.
 174          */
 175         utteranceProcessors = Collections.synchronizedList(new ArrayList());
 176         features = new FeatureSetImpl();
 177         featureProcessors = new HashMap();
 178
 179         try {
 180             nominalRate = Float.parseFloat(
 181                     Utilities.getProperty(PROP_PREFIX + "speakingRate","150"));
 182             pitch = Float.parseFloat(
 183                     Utilities.getProperty(PROP_PREFIX + "pitch","100"));
 184             range = Float.parseFloat(
 185                     Utilities.getProperty(PROP_PREFIX + "range","10"));
 186             volume = Float.parseFloat(
 187                     Utilities.getProperty(PROP_PREFIX + "volume","1.0"));
 188         } catch (SecurityException se) {
 189              // can't get properties, just use defaults
 190         }
 191         outputQueue = null;
 192         audioPlayer = null;
 193         defaultAudioPlayer = null;
 194     }
 195
 196     /**
 197      * Creates a new Voice like above, except that it also
 198      * stores the properties of the voice.
 199      * @param name the name of the voice
 200      * @param gender the gender of the voice
 201      * @param age the age of the voice
 202      * @param description a human-readable string providing a
 203      * description that can be displayed for the users.
 204      * @param locale the locale of the voice
 205      * @param domain the domain of this voice.  For example,
 206      * @param organization the organization which created the voice
 207      * &quot;general&quot;, &quot;time&quot;, or
 208      * &quot;weather&quot;.
 209      *
 210      * @see #Voice()
 211      */
 212     public Voice(String name, Gender gender, Age age,
 213             String description, Locale locale, String domain,
 214             String organization) {
 215         this();
 216         setName(name);
 217         setGender(gender);
 218         setAge(age);
 219         setDescription(description);
 220         setLocale(locale);
 221         setDomain(domain);
 222         setOrganization(organization);
 223     }
 224
 225
 226     /**
 227      * Speaks the given text.
 228      *
 229      * @param text the text to speak
 230      *
 231      * @return <code>true</code> if the given text is spoken properly;
 232      *   otherwise <code>false</code>
 233      */
 234     public boolean speak(String text) {
 235         return speak(new FreeTTSSpeakableImpl(text));
 236     }
 237
 238
 239     /**
 240      * Speaks the given document.
 241      *
 242      * @param doc the JSML document to speak
 243      *
 244      * @return <code>true</code> if the given document is spoken properly;
 245      *   otherwise <code>false</code>
 246      */
 247     public boolean speak(Document doc) {
 248         return speak(new FreeTTSSpeakableImpl(doc));
 249     }
 250
 251
 252     /**
 253      * Speaks the input stream.
 254      *
 255      * @param inputStream the inputStream to speak
 256      *
 257      * @return <code>true</code> if the given input stream is spoken properly;
 258      *   otherwise <code>false</code>
 259      */
 260     public boolean speak(InputStream inputStream) {
 261         return speak(new FreeTTSSpeakableImpl(inputStream));
 262     }
 263
 264
 265     /**
 266      * Speak the given queue item. This is a synchronous method that
 267      * does not return until the speakable is completely
 268      * spoken or has been cancelled.
 269      *
 270      * @param speakable the item to speak
 271      *
 272      * @return <code>true</code> if the utterance was spoken properly,
 273      *         <code>false</code> otherwise
 274      */
 275     public boolean speak(FreeTTSSpeakable speakable) {
 276         if (LOGGER.isLoggable(Level.FINE)) {
 277             LOGGER.fine("speak(FreeTTSSpeakable) called");
 278         }
 279         boolean ok = true;
 280         boolean posted = false;
 281
 282         getAudioPlayer().startFirstSampleTimer();
 283
 284         for (Iterator i = tokenize(speakable);
 285              !speakable.isCompleted() && i.hasNext() ; ) {
 286             try {
 287                 Utterance utterance = (Utterance) i.next();
 288                 if (utterance != null) {
 289                     processUtterance(utterance);
 290                     posted = true;
 291                 }
 292             } catch (ProcessException pe) {
 293                 ok = false;
 294             }
 295         }
 296         if (ok && posted) {
 297             runTimer.start("WaitAudio");
 298             ok = speakable.waitCompleted();
 299             runTimer.stop("WaitAudio");
 300         }
 301         if (LOGGER.isLoggable(Level.FINE)) {
 302             LOGGER.fine("speak(FreeTTSSpeakable) completed");
 303         }
 304         return ok;
 305     }
 306
 307
 308     /**
 309      * @deprecated  As of FreeTTS 1.2, replaced by {@link #allocate}.
 310      */
 311     public void load() {
 312         allocate();
 313     }
 314
 315     /**
 316      * Allocate this Voice. It loads the lexicon and the
 317      * audio output handler, and creates an audio output thread by
 318      * invoking <code>createOutputThread()</code>, if
 319      * one is not already created. It then calls the <code>loader()</code>
 320      * method to load Voice-specific data, which include utterance processors.
 321      */
 322     public void allocate() {
 323         if (isLoaded()) {
 324             return;
 325         }
 326         BulkTimer.LOAD.start();
 327
 328
 329         if (!lexicon.isLoaded()) {
 330             try {
 331                 lexicon.load();
 332             } catch (IOException ioe) {
 333                 LOGGER.severe("Can't load voice " + ioe);
 334                 throw new Error(ioe);
 335             }
 336         }
 337
 338         try {
 339             audioOutput = getAudioOutput();
 340         } catch (IOException ioe) {
 341             LOGGER.severe("Can't load audio output handler for voice " + ioe);
 342             throw new Error(ioe);
 343         }
 344         if (outputQueue == null) {
 345             outputQueue = createOutputThread();
 346         }
 347         try {
 348             loader();
 349         } catch (IOException ioe) {
 350             LOGGER.severe("Can't load voice " + ioe);
 351             throw new Error(ioe);
 352         }
 353         BulkTimer.LOAD.stop();
 354         if (isMetrics()) {
 355             BulkTimer.LOAD.show("loading " + toString() + " for " +
 356                     getRunTitle());
 357         }
 358         setLoaded(true);
 359     }
 360
 361
 362     /**
 363      * Returns true if this voice is loaded.
 364      *
 365      * @return <code>true</code> if the voice is loaded;
 366      *   otherwise <code>false</code>
 367      */
 368     public boolean isLoaded() {
 369         return loaded;
 370     }
 371
 372     /**
 373      * Sets the loaded state
 374      *
 375      * @param loaded the new loaded state
 376      *   otherwise <code>false</code>
 377      */
 378     protected void setLoaded(boolean loaded) {
 379         this.loaded = loaded;
 380     }
 381
 382     /**
 383      * Processes the given Utterance by passing it to each
 384      * UtteranceProcessor managed by this Voice.  The
 385      * UtteranceProcessors are called in the order they were added to
 386      * the Voice.
 387      *
 388      * @param u the Utterance to process
 389      *
 390      * @throws ProcessException if an exception occurred while performing
 391      *   operations on the Utterance
 392      */
 393     public void processUtterance(Utterance u) throws ProcessException {
 394         UtteranceProcessor[] processors;
 395
 396         if (utteranceProcessors == null) {
 397             return;
 398         }
 399         if (u == null) {
 400             throw new ProcessException("Utterance is null.");
 401         }
 402
 403         runTimer.start("processing");
 404         processors = new UtteranceProcessor[utteranceProcessors.size()];
 405         processors = (UtteranceProcessor[])
 406             utteranceProcessors.toArray(processors);
 407
 408         if (LOGGER.isLoggable(Level.FINE)) {
 409             LOGGER.fine("Processing Utterance: " + u.getString("input_text"));
 410         }
 411         try {
 412             for (int i = 0; i < processors.length &&
 413                      !u.getSpeakable().isCompleted(); i++) {
 414                 runProcessor(processors[i], u, runTimer);
 415             }
 416             if (!u.getSpeakable().isCompleted()) {
 417                 if (outputQueue == null) {
 418                     if (LOGGER.isLoggable(Level.FINE)) {
 419                         LOGGER.fine("To AudioOutput");
 420                     }
 421                     outputUtterance(u, runTimer);
 422                 } else {
 423                     runTimer.start("..post");
 424                     outputQueue.post(u);
 425                     runTimer.stop("..post");
 426                 }
 427             }
 428         }  catch (ProcessException pe) {
 429             System.err.println("Processing Utterance: " + pe);
 430         }  catch (Exception e) {
 431             System.err.println("Trouble while processing utterance " + e);
 432             e.printStackTrace();
 433             u.getSpeakable().cancelled();
 434         }
 435
 436         if (LOGGER.isLoggable(Level.FINE)) {
 437             LOGGER.fine("Done Processing Utterance: "
 438                     + u.getString("input_text"));
 439         }
 440         runTimer.stop("processing");
 441
 442         if (dumpUtterance) {
 443             u.dump("Utterance");
 444         }
 445         if (dumpRelations) {
 446             u.dumpRelations("Utterance");
 447         }
 448
 449         dumpASCII(u);
 450     }
 451
 452
 453     /**
 454      * Dumps the wave for the given utterance.
 455      *
 456      * @param utterance the utterance of interest
 457      */
 458     private void dumpASCII(Utterance utterance) {
 459         if (waveDumpFile != null) {
 460             LPCResult lpcResult =
 461                 (LPCResult) utterance.getObject("target_lpcres");
 462             try {
 463                 if (waveDumpFile.equals("-")) {
 464                     lpcResult.dumpASCII();
 465                 } else {
 466                     lpcResult.dumpASCII(waveDumpFile);
 467                 }
 468             } catch (IOException ioe) {
 469                 LOGGER.severe("Can't dump file to " + waveDumpFile + " " + ioe);
 470                 throw new Error(ioe);
 471             }
 472         }
 473     }
 474
 475
 476     /**
 477      * Creates an output thread that will asynchronously
 478      * output utterances that are generated by this voice (and other
 479      * voices).
 480      *
 481      * @return the queue where utterances should be placed.
 482      */
 483     public static OutputQueue createOutputThread() {
 484         final OutputQueue queue =  new OutputQueue();
 485         Thread t = new Thread() {
 486             public void run() {
 487                 Utterance utterance = null;
 488                 do {
 489                     utterance = queue.pend();
 490                     if (utterance != null) {
 491                         Voice voice = utterance.getVoice();
 492                         if (LOGGER.isLoggable(Level.FINE)) {
 493                             LOGGER.fine("OUT: "
 494                                     + utterance.getString("input_text"));
 495                         }
 496                         voice.outputUtterance(utterance, voice.threadTimer);
 497                     }
 498                 } while (utterance != null);
 499             }
 500         };
 501         t.setDaemon(true);
 502         t.start();
 503         return queue;
 504     }
 505
 506
 507     /**
 508      * Sends the given utterance to the audio output processor
 509      * associated with this voice. If the queue item associated with
 510      * this utterance is completed, then this set of utterances has
 511      * been cancelled or otherwise aborted and the utterance should
 512      * not be output.
 513      *
 514      * @param utterance the utterance to be output
 515      * @param timer the timer for gathering performance metrics
 516      *
 517      * @return  true if the utterance was output properly; otherwise
 518      *    false
 519      */
 520     private boolean outputUtterance(Utterance utterance, BulkTimer timer) {
 521         boolean ok = true;
 522         FreeTTSSpeakable speakable = utterance.getSpeakable();
 523
 524         if (!speakable.isCompleted())  {
 525             if (utterance.isFirst()) {
 526                 getAudioPlayer().reset();
 527                 speakable.started();
 528                 if (LOGGER.isLoggable(Level.FINE)) {
 529                     LOGGER.fine(" --- started ---");
 530                 }
 531             }
 532
 533             // log("   utt: " + utterance.getString("input_text"));
 534             try {
 535                 if (!speakable.isCompleted()) {
 536                     runProcessor(audioOutput, utterance, timer);
 537                 } else {
 538                     ok = false;
 539                 }
 540             }  catch (ProcessException pe) {
 541                 ok = false;
 542             }
 543             if (ok && utterance.isLast()) {
 544                 getAudioPlayer().drain();
 545                 speakable.completed();
 546                 if (LOGGER.isLoggable(Level.FINE)) {
 547                     LOGGER.fine(" --- completed ---");
 548                 }
 549             } else if (!ok) {
 550                 // getAudioPlayer().drain();
 551                 speakable.cancelled();
 552                 if (LOGGER.isLoggable(Level.FINE)) {
 553                     LOGGER.fine(" --- cancelled ---");
 554                 }
 555             } else {
 556                 if (LOGGER.isLoggable(Level.FINE)) {
 557                     LOGGER.fine(" --- not last: " + speakable.getText()
 558                             + " --- ");
 559                 }
 560             }
 561                 if (LOGGER.isLoggable(Level.FINE)) {
 562                     LOGGER.fine("Calling speakable.completed() on "
 563                             + speakable.getText());
 564                 }
 565         } else {
 566             ok = false;
 567                 if (LOGGER.isLoggable(Level.FINE)) {
 568                     LOGGER.fine("STRANGE: speakable already completed: "
 569                             + speakable.getText());
 570                 }
 571         }
 572         return ok;
 573     }
 574
 575
 576     /**
 577      * Runs the given utterance processor.
 578      *
 579      * @param processor the processor to run.   If the processor
 580      *    is null, it is ignored
 581      * @param utterance the utterance to process
 582      *
 583      * @throws ProcessException if an exceptin occurs while processing
 584      *     the utterance
 585      */
 586     private void runProcessor(UtteranceProcessor processor,
 587                               Utterance utterance, BulkTimer timer)
 588         throws ProcessException {
 589         if (processor != null) {
 590             String processorName = ".." + processor.toString();
 591                 if (LOGGER.isLoggable(Level.FINE)) {
 592                     LOGGER.fine("   Running " + processorName);
 593                 }
 594             timer.start(processorName);
 595             processor.processUtterance(utterance);
 596             timer.stop(processorName);
 597         }
 598     }
 599
 600
 601     /**
 602      * Returns the tokenizer associated with this voice.
 603      *
 604      * @return the tokenizer
 605      */
 606     public abstract Tokenizer getTokenizer();
 607
 608
 609     /**
 610      * Return the list of UtteranceProcessor instances.  Applications
 611      * should use this to obtain and modify the contents of the
 612      * UtteranceProcessor list.
 613      *
 614      * @return a List containing UtteranceProcessor instances
 615      */
 616     public List getUtteranceProcessors() {
 617         return utteranceProcessors;
 618     }
 619
 620
 621     /**
 622      * Returns the feature set associated with this voice.
 623      *
 624      * @return the feature set.
 625      */
 626     public FeatureSet getFeatures() {
 627         return features;
 628     }
 629
 630
 631     /**
 632      * Starts a batch of utterances. Utterances are sometimes
 633      * batched in groups for timing purposes.
 634      *
 635      * @see #endBatch
 636      */
 637     public void startBatch() {
 638         runTimer.setVerbose(detailedMetrics);
 639         runTimer.start();
 640     }
 641
 642
 643     /**
 644      * Ends a batch of utterances.
 645      *
 646      * @see #startBatch
 647      */
 648     public void endBatch() {
 649         runTimer.stop();
 650
 651         if (metrics) {
 652             runTimer.show(getRunTitle() + " run");
 653             threadTimer.show(getRunTitle() + " thread");
 654             getAudioPlayer().showMetrics();
 655             long totalMemory = Runtime.getRuntime().totalMemory();
 656             LOGGER.info
 657                 ("Memory Use    : "
 658                  + (totalMemory - Runtime.getRuntime().freeMemory()) / 1024
 659                  + "k  of " + totalMemory / 1024 + "k");
 660         }
 661     }
 662
 663     /**
 664      * Sets the output queue for this voice. If no output queue is set
 665      * for the voice when the voice is loaded, a queue and thread will
 666      * be created when the voice is loaded.  If the outputQueue is set
 667      * by an external entity by calling setOutputQueue, the caller is
 668      * responsible for shutting down the output thread. That is, if
 669      * you call 'setOutputQueue' then you are responsible for shutting
 670      * down the output thread on your own. This is necessary since the
 671      * output queue may be shared by a number of voices.
 672      *
 673      * <p>Utterances are placed on the
 674      *    queue to be output by an output thread. This queue is
 675      *    usually created via a call to 'createOutputThread' which
 676      *    creates a thread that waits on the queue and sends the
 677      *    output to the audio player associated with this voice. If
 678      *    the queue is null, the output is rendered in the calling
 679      *    thread.
 680      *
 681      * @param queue the output queue
 682      */
 683     public void setOutputQueue(OutputQueue queue) {
 684         externalOutputQueue = true;
 685         outputQueue = queue;
 686     }
 687
 688     /**
 689      * Returns the output queue associated with this voice.
 690      *
 691      * @return the output queue associated with this voice
 692      */
 693     public OutputQueue getOutputQueue() {
 694         return outputQueue;
 695     }
 696
 697     /**
 698      * Loads voice specific data. Subclasses of voice should
 699      * implement this to perform class specific loading.
 700      */
 701     protected abstract void loader() throws IOException;
 702
 703     /**
 704      * tokenizes the given the queue item.
 705      *
 706      * @return an iterator that will yield a series of utterances
 707      */
 708     private Iterator tokenize(FreeTTSSpeakable speakable) {
 709         return new FreeTTSSpeakableTokenizer(speakable).iterator();
 710     }
 711
 712     /**
 713      * Converts the document to a string (a placeholder for more
 714      * sophisticated logic to be done).
 715      *
 716      * @param dom the jsml document
 717      *
 718      * @return the document as a string.
 719      */
 720     private String documentToString(Document dom) {
 721         StringBuffer buf = new StringBuffer();
 722         linearize(dom, buf);
 723         return buf.toString();
 724     }
 725
 726     /**
 727      * Appends the text for this node to the given StringBuffer.
 728      *
 729      * @param n the node to traverse in depth-first order
 730      * @param buf the buffer to append text to
 731      */
 732     private  void linearize(Node n, StringBuffer buf) {
 733         StringBuffer endText = processNode(n, buf);
 734         for (Node child = n.getFirstChild();
 735              child != null;
 736              child = child.getNextSibling()) {
 737             linearize(child, buf);
 738         }
 739
 740         if (endText != null) {
 741             buf.append(endText);
 742         }
 743     }
 744
 745     /**
 746      * Adds text for just this node and returns any text that might
 747      * be needed to undo the effects of this node after it is
 748      * processed.
 749      *
 750      * @param n the node to traverse in depth-first order
 751      * @param buf the buffer to append text to
 752      *
 753      * @return a <code>String</code> containing text to undo the
 754      *   effects of the node
 755      */
 756     protected StringBuffer processNode(Node n, StringBuffer buf) {
 757         StringBuffer endText = null;
 758
 759         int type = n.getNodeType();
 760         switch (type) {
 761             case Node.ATTRIBUTE_NODE:
 762                  break;
 763
 764             case Node.DOCUMENT_NODE:
 765                 break;
 766
 767             case Node.ELEMENT_NODE:
 768                 // endText = processElement((Element) n, buf);
 769                 break;
 770
 771             case Node.TEXT_NODE:
 772                 buf.append(((Text) n).getData());
 773                 break;
 774
 775             // Pass processing instructions (e.g., <?blah?>
 776             // right on to the synthesizer.  These types of things
 777             // probably should not be used.  Instead the 'engine'
 778             // element is probably the best thing to do.
 779             //
 780             case Node.PROCESSING_INSTRUCTION_NODE:
 781                 break;
 782
 783             // The document type had better be JSML.
 784             //
 785             case Node.DOCUMENT_TYPE_NODE:
 786                 break;
 787
 788             // I think NOTATION nodes are only DTD's.
 789             //
 790             case Node.NOTATION_NODE:
 791                 break;
 792
 793             // Should not get COMMENTS because the JSMLParser
 794             // ignores them.
 795             //
 796             case Node.COMMENT_NODE:
 797                 break;
 798
 799             // Should not get CDATA because the JSMLParser is
 800             // coalescing.
 801             //
 802             case Node.CDATA_SECTION_NODE:
 803                 break;
 804
 805             // Should not get ENTITY related notes because
 806             // entities are expanded by the JSMLParser
 807             //
 808             case Node.ENTITY_NODE:
 809             case Node.ENTITY_REFERENCE_NODE:
 810                 break;
 811
 812             // Should not get DOCUMENT_FRAGMENT nodes because I
 813             // [[[WDW]]] think they are only created via the API's
 814             // and cannot be defined via content.
 815             //
 816             case Node.DOCUMENT_FRAGMENT_NODE:
 817                 break;
 818
 819             default:
 820                 break;
 821         }
 822
 823         return endText;
 824     }
 825
 826     /**
 827      * Dumps the voice in textual form.
 828      *
 829      * @param output where to send the formatted output
 830      * @param pad the initial padding
 831      * @param title the title to print when dumping out
 832      */
 833     public void dump(PrintWriter output, int pad, String title) {
 834         Utilities.dump(output, pad, title);
 835         features.dump(output, pad + 4, title + " Features");
 836         dumpProcessors(output, pad + 4, title + " Processors");
 837     }
 838
 839
 840     /**
 841      * Dumps the voice processors.
 842      *
 843      * @param output where to send the formatted output
 844      * @param pad the initial padding
 845      * @param title the title to print when dumping out
 846      */
 847     public void dumpProcessors(PrintWriter output, int pad, String title) {
 848         UtteranceProcessor[] processors;
 849         if (utteranceProcessors == null) {
 850             return;
 851         }
 852
 853         processors = new UtteranceProcessor[utteranceProcessors.size()];
 854         processors = (UtteranceProcessor[])
 855             utteranceProcessors.toArray(processors);
 856
 857         Utilities.dump(output, pad, title);
 858         for (int i = 0; i < processors.length; i++) {
 859             Utilities.dump(output, pad + 4, processors[i].toString());
 860         }
 861     }
 862
 863
 864     /**
 865      * Returns a language/voice specific Feature Processor.
 866      *
 867      * @param name the name of the processor
 868      *
 869      * @return the processor associated with the name or null if none
 870      *   could be found
 871      */
 872     public FeatureProcessor getFeatureProcessor(String name) {
 873         return (FeatureProcessor) featureProcessors.get(name);
 874     }
 875
 876     /**
 877      * Adds a language/voice specific Feature Processor to the set of
 878      * FeatureProcessors supported by this voice.
 879      *
 880      * @param name the name of the processor
 881      * @param fp  the processor
 882      */
 883     public void addFeatureProcessor(String name, FeatureProcessor fp) {
 884         featureProcessors.put(name, fp);
 885     }
 886
 887     /**
 888      * Gets the state of the metrics mode.
 889      *
 890      * @return true if metrics mode is on
 891      */
 892     public boolean isMetrics() {
 893         return metrics;
 894     }
 895
 896     /**
 897      * Sets the metrics mode.
 898      *
 899      * @param metrics true if metrics mode should be on
 900      */
 901     public void setMetrics(boolean metrics) {
 902         this.metrics = metrics;
 903         if (LOGGER.isLoggable(Level.FINE)) {
 904             LOGGER.fine("Metrics mode is " + metrics);
 905         }
 906     }
 907
 908     /**
 909      * Gets the state of the detailedMetrics mode.
 910      *
 911      * @return true if detailedMetrics mode is on
 912      */
 913     public boolean isDetailedMetrics() {
 914         return detailedMetrics;
 915     }
 916
 917     /**
 918      * Sets the state of the detailedMetrics mode.
 919      *
 920      * @param detailedMetrics true if detailedMetrics mode should be on
 921      */
 922     public void setDetailedMetrics(boolean detailedMetrics) {
 923         this.detailedMetrics = detailedMetrics;
 924         if (LOGGER.isLoggable(Level.FINE)) {
 925             LOGGER.fine("DetailedMetrics mode is " + detailedMetrics);
 926         }
 927     }
 928
 929     /**
 930      * Gets the state of the dumpUtterance mode.
 931      *
 932      * @return true if dumpUtterance mode is on
 933      */
 934     public boolean isDumpUtterance() {
 935         return dumpUtterance;
 936     }
 937
 938     /**
 939      * Sets the state of the dumpUtterance mode.
 940      *
 941      * @param dumpUtterance true if dumpUtterance mode should be on
 942      */
 943     public void setDumpUtterance(boolean dumpUtterance) {
 944         this.dumpUtterance = dumpUtterance;
 945         if (LOGGER.isLoggable(Level.FINE)) {
 946             LOGGER.fine("DumpUtterance mode is " + dumpUtterance);
 947         }
 948     }
 949
 950     /**
 951      * Gets the state of the dumpRelations mode.
 952      *
 953      * @return true if dumpRelations mode is on
 954      */
 955     public boolean isDumpRelations() {
 956         return dumpRelations;
 957     }
 958
 959     /**
 960      * Sets the state of the dumpRelations mode.
 961      *
 962      * @param dumpRelations true if dumpRelations mode should be on
 963      */
 964     public void setDumpRelations(boolean dumpRelations) {
 965         this.dumpRelations = dumpRelations;
 966         if (LOGGER.isLoggable(Level.FINE)) {
 967             LOGGER.fine("DumpRelations mode is " + dumpRelations);
 968         }
 969     }
 970
 971     /**
 972      * Sets the title for this run.
 973      *
 974      * @param runTitle the title for the run
 975      */
 976     public void setRunTitle(String runTitle) {
 977         this.runTitle = runTitle;
 978     }
 979
 980     /**
 981      * Gets the title for this run.
 982      *
 983      * @return the title for the run
 984      */
 985     public String getRunTitle() {
 986         return runTitle;
 987     }
 988
 989     /**
 990      * Given a phoneme and a feature name, returns the feature.
 991      *
 992      * @param phone the phoneme of interest
 993      * @param featureName the name of the feature of interest
 994      *
 995      * @return the feature with the given name
 996      */
 997     public String getPhoneFeature(String phone, String featureName) {
 998         return null;
 999     }
1000
1001     /**
1002      * Shuts down the voice processing.
1003      */
1004     public void deallocate() {
1005         setLoaded(false);
1006
1007         if (!externalAudioPlayer) {
1008             if (audioPlayer != null) {
1009                 audioPlayer.close();
1010                 audioPlayer = null;
1011             }
1012         }
1013
1014         if (!externalOutputQueue) {
1015             outputQueue.close();
1016         }
1017     }
1018
1019     /**
1020      * Sets the baseline pitch.
1021      *
1022      * @param hertz the baseline pitch in hertz
1023      */
1024     public void setPitch(float hertz) {
1025         this.pitch = hertz;
1026     }
1027
1028     /**
1029      * Retreives the baseline pitch.
1030      *
1031      * @return  the baseline pitch in hertz
1032      */
1033     public float getPitch() {
1034         return pitch;
1035     }
1036
1037     /**
1038      * Sets the pitch range.
1039      *
1040      * @param range the range in hertz
1041      */
1042     public void setPitchRange(float range) {
1043         this.range = range;
1044     }
1045
1046     /**
1047      * Gets the pitch range.
1048      *
1049      * @return the range in hertz
1050      */
1051     public float getPitchRange() {
1052         return range;
1053     }
1054
1055     /**
1056      * Sets the pitch shift
1057      *
1058      * @param shift the pitch shift (1.0 is no shift)
1059      */
1060     public void setPitchShift(float shift) {
1061         this.pitchShift = shift;
1062     }
1063
1064     /**
1065      * Gets the pitch shift.
1066      *
1067      * @return the pitch shift
1068      */
1069     public float getPitchShift() {
1070         return pitchShift;
1071     }
1072
1073     /**
1074      * Sets the duration  stretch
1075      *
1076      * @param stretch the duration stretch (1.0 is no stretch)
1077      */
1078     public void setDurationStretch(float stretch) {
1079         this.durationStretch = stretch;
1080     }
1081
1082     /**
1083      * Gets the duration Stretch
1084      *
1085      * @return the duration stretch
1086      */
1087     public float getDurationStretch() {
1088         return durationStretch;
1089     }
1090
1091     /**
1092      * Sets the rate of speech.
1093      *
1094      * @param wpm words per minute
1095      */
1096     public void setRate(float wpm) {
1097         if (wpm > 0 && wpm < 1000) {
1098             setDurationStretch(nominalRate / wpm);
1099         }
1100     }
1101
1102     /**
1103      * Gets the rate of speech.
1104      *
1105      * @return words per minute
1106      */
1107     public float getRate() {
1108         return durationStretch * nominalRate;
1109     }
1110
1111
1112     /**
1113      * Sets the volume.
1114      *
1115      * @param vol the volume (0 to 1.0)
1116      */
1117     public void setVolume(float vol) {
1118         volume = vol;
1119     }
1120
1121     /**
1122      * Gets the volume.
1123      *
1124      * @return the volume (0 to 1.0)
1125      */
1126     public float getVolume() {
1127         return volume;
1128     }
1129
1130     /**
1131      * Gets the lexicon for this voice.
1132      *
1133      * @return the lexicon (or null if there is no lexicon)
1134      */
1135     public Lexicon getLexicon() {
1136         return lexicon;
1137     }
1138
1139     /**
1140      * Sets the lexicon to be used by this voice.
1141      *
1142      * @param lexicon the lexicon to use
1143      */
1144     public void setLexicon(Lexicon lexicon) {
1145         this.lexicon = lexicon;
1146
1147     }
1148
1149     /**
1150      * Sets the dumpfile for this voice.
1151      *
1152      * @param waveDumpFile the dumpfile
1153      */
1154     public void setWaveDumpFile(String waveDumpFile) {
1155         this.waveDumpFile = waveDumpFile;
1156     }
1157
1158     /**
1159      * Gets the dumpfile for this voice.
1160      *
1161      * @return the dumpfile
1162      */
1163     public String  getWaveDumpFile() {
1164         return waveDumpFile;
1165     }
1166
1167     /**
1168      * Sets the audio player associated with this voice. The caller is
1169      * responsible for closing this player.
1170      *
1171      * @param player the audio player
1172      */
1173     public void setAudioPlayer(AudioPlayer player) {
1174         audioPlayer = player;
1175         externalAudioPlayer = true;
1176     }
1177
1178     /**
1179      * Gets the default audio player for this voice.  The return
1180      * value will be non-null only if the DEFAULT_AUDIO_PLAYER
1181      * system property has been set to the name of an AudioPlayer
1182      * class, and that class is able to be instantiated via a
1183      * no arg constructor.  getAudioPlayer will automatically set
1184      * the audio player for this voice to the default audio player
1185      * if the audio player has not yet been set.
1186      *
1187      * @see #DEFAULT_AUDIO_PLAYER
1188      * @see #getAudioPlayer
1189      * @return the default AudioPlayer
1190      */
1191     public AudioPlayer getDefaultAudioPlayer() throws InstantiationException {
1192         if (defaultAudioPlayer != null) {
1193             return defaultAudioPlayer;
1194         }
1195
1196         String className = Utilities.getProperty(
1197             DEFAULT_AUDIO_PLAYER, DEFAULT_AUDIO_PLAYER_DEFAULT);
1198
1199         try {
1200             Class cls = Class.forName(className);
1201             defaultAudioPlayer = (AudioPlayer) cls.newInstance();
1202             return defaultAudioPlayer;
1203         } catch (ClassNotFoundException e) {
1204             throw new InstantiationException("Can't find class " + className);
1205         } catch (IllegalAccessException e) {
1206             throw new InstantiationException("Can't find class " + className);
1207         } catch (ClassCastException e) {
1208             throw new InstantiationException(className + " cannot be cast "
1209                                              + "to AudioPlayer");
1210         }
1211     }
1212
1213     /**
1214      * Gets the audio player associated with this voice.  If the
1215      * audio player has not yet been set, the value will default
1216      * to the return value of getDefaultAudioPlayer.
1217      *
1218      * @see #getDefaultAudioPlayer
1219      * @return the audio player
1220      */
1221     public AudioPlayer getAudioPlayer() {
1222         if (audioPlayer == null) {
1223             try {
1224                 audioPlayer = getDefaultAudioPlayer();
1225             } catch (InstantiationException e) {
1226                 e.printStackTrace();
1227             }
1228         }
1229         return audioPlayer;
1230     }
1231
1232     /**
1233      * Get a resource for this voice.
1234      * By default, the voice is searched for in the package
1235      * to which the voice class belongs. Subclasses are free to
1236      * override this behaviour.
1237      */
1238     protected URL getResource(String resource) {
1239         return this.getClass().getResource(resource);
1240     }
1241
1242     /**
1243      * Set the name of this voice.
1244      * [[[TODO: any standard format to the name?]]]
1245      *
1246      * @param name the name to assign this voice
1247      */
1248     protected void setName(String name) {
1249         this.name = name;
1250     }
1251
1252
1253     /**
1254      * Get the name of this voice.
1255      *
1256      * @return the name
1257      */
1258     public String getName() {
1259         return name;
1260     }
1261
1262     /**
1263      * Returns the name of this Voice.
1264      *
1265      * @return the name of this Voice
1266      */
1267     public String toString() {
1268         return getName();
1269     }
1270
1271     /**
1272      * Set the gender of this voice.
1273      *
1274      * @param gender the gender to assign
1275      */
1276     protected void setGender(Gender gender) {
1277         this.gender = gender;
1278     }
1279
1280     /**
1281      * Get the gender of this voice.
1282      *
1283      * @return the gender of this voice
1284      */
1285     public Gender getGender() {
1286         return gender;
1287     }
1288
1289     /**
1290      * Set the age of this voice.
1291      *
1292      * @param age the age to assign
1293      */
1294     protected void setAge(Age age) {
1295         this.age = age;
1296     }
1297
1298     /**
1299      * Get the age of this voice.
1300      *
1301      * @return the age of this voice
1302      */
1303     public Age getAge() {
1304         return age;
1305     }
1306
1307     /**
1308      * Set the description of this voice.
1309      *
1310      * @param description the human readable description to assign
1311      */
1312     protected void setDescription(String description) {
1313         this.description = description;
1314     }
1315
1316     /**
1317      * Get the description of this voice.
1318      *
1319      * @return the human readable description of this voice
1320      */
1321     public String getDescription() {
1322         return description;
1323     }
1324
1325     /**
1326      * Set the locale of this voice.
1327      *
1328      * @param locale the locale of this voice.
1329      */
1330     protected void setLocale(Locale locale) {
1331         this.locale = locale;
1332     }
1333
1334     /**
1335      * Get the locale of this voice.
1336      *
1337      * @return the locale of this voice.
1338      */
1339     public Locale getLocale() {
1340         return locale;
1341     }
1342
1343     /**
1344      * Set the domain of this voice.
1345      *
1346      * @param domain the domain of this voice.  For example,
1347      * &quot;general&quot;, &quot;time&quot;, or
1348      * &quot;weather&quot;.
1349      */
1350     protected void setDomain(String domain) {
1351         this.domain = domain;
1352     }
1353
1354     /**
1355      * Get the domain of this voice.
1356      *
1357      * @return the domain of this voice.  For example,
1358      * &quot;general&quot;, &quot;time&quot;, or
1359      * &quot;weather&quot;.
1360      */
1361     public String getDomain() {
1362         return domain;
1363     }
1364
1365     /**
1366      * Sets the voice style. This parameter is designed for human
1367      * interpretation. Values might include "business", "casual",
1368      * "robotic", "breathy"
1369      *
1370      * @param style the stile of this voice.
1371      */
1372     public void setStyle(String style) {
1373         this.style = style;
1374     }
1375
1376     /**
1377      * Gets the voice style. This parameter is designed for human
1378      * interpretation. Values might include "business", "casual",
1379      * "robotic", "breathy".
1380      */
1381     public String getStyle() {
1382         return style;
1383     }
1384
1385     /**
1386      * Sets the organization which created this voice.  For example
1387      * "cmu", "sun", ...
1388      *
1389      * @param organization the name of the organization
1390      */
1391     protected void setOrganization(String organization) {
1392         this.organization = organization;
1393     }
1394
1395     /**
1396      * Gets the organization which created this voice.  For example
1397      * "cmu", "sun", ...
1398      *
1399      * @return the name of the organization
1400      */
1401     public String getOrganization() {
1402         return organization;
1403     }
1404
1405     /**
1406      * Returns the AudioOutput processor to be used by this voice.
1407      * Derived voices typically override this to customize behaviors.
1408      *
1409      * @return the audio output processor
1410      *
1411      * @throws IOException if an IO error occurs while getting
1412      *     processor
1413      */
1414     protected abstract UtteranceProcessor getAudioOutput() throws IOException ;
1415
1416     /**
1417      * Tokenizes a FreeTTSSpeakable
1418      */
1419     private class FreeTTSSpeakableTokenizer  {
1420         FreeTTSSpeakable speakable;
1421         Tokenizer tok = getTokenizer();
1422
1423         /**
1424          * Constructor.
1425          *
1426          * @param speakable the queue item to be pretokenized
1427          */
1428         public FreeTTSSpeakableTokenizer(FreeTTSSpeakable speakable) {
1429             this.speakable = speakable;
1430             if (speakable.isPlainText()) {
1431                 tok.setInputText(speakable.getText());
1432             } else if (speakable.isStream()) {
1433                 Reader reader = new BufferedReader(
1434                     new InputStreamReader(speakable.getInputStream()));
1435                 tok.setInputReader(reader);
1436             } else if (speakable.isDocument()) {
1437                 tok.setInputText(documentToString(speakable.getDocument()));
1438             }
1439         }
1440
1441         /**
1442          * Returns an iterator for this text item.
1443          */
1444         public Iterator iterator() {
1445             return new Iterator() {
1446                 boolean first = true;
1447                 Token savedToken = null;
1448
1449                 /**
1450                  * Determines if there are more utterances
1451                  *
1452                  * @return true if there are more tokens
1453                  */
1454                 public boolean hasNext() {
1455                     return savedToken != null || tok.hasMoreTokens();
1456                 }
1457
1458                 /**
1459                  * Returns the next utterance.
1460                  *
1461                  * @return the next utterance (as an object) or
1462                  *    null if there is are no utterances left
1463                  */
1464                 public Object next() {
1465                     ArrayList tokenList = new ArrayList();
1466                     Utterance utterance = null;
1467
1468                     if (savedToken != null) {
1469                         tokenList.add(savedToken);
1470                         savedToken = null;
1471                     }
1472
1473                     while (tok.hasMoreTokens()) {
1474                         Token token = tok.getNextToken();
1475                         if ((token.getWord().length() == 0) ||
1476                             (tokenList.size() > 500) ||
1477                             tok.isBreak()) {
1478                             savedToken = token;
1479                             break;
1480                         }
1481                         tokenList.add(token);
1482                     }
1483                     utterance =  new Utterance(Voice.this, tokenList);
1484                     utterance.setSpeakable(speakable);
1485                     utterance.setFirst(first);
1486                     first = false;
1487                     boolean isLast =
1488                         (!tok.hasMoreTokens() &&
1489                          (savedToken == null ||
1490                           savedToken.getWord().length() == 0));
1491                     utterance.setLast(isLast);
1492                     return utterance;
1493                 }
1494
1495                 public void remove() {
1496                     throw new UnsupportedOperationException("remove");
1497                 }
1498             };
1499         }
1500     }
1501 }
1502
1503
1504
1505
1506