2 * Portions Copyright 2003 Sun Microsystems, Inc.
3 * Portions Copyright 1999-2001 Language Technologies Institute,
4 * Carnegie Mellon University.
5 * All Rights Reserved. Use is subject to license terms.
7 * See the file "license.terms" for information on usage and
8 * redistribution of this file, and for a DISCLAIMER OF ALL
11 package com.sun.speech.freetts.diphone;
12 import java.io.BufferedInputStream;
13 import java.io.BufferedReader;
14 import java.io.DataInputStream;
15 import java.io.DataOutputStream;
16 import java.io.FileInputStream;
17 import java.io.FileNotFoundException;
18 import java.io.FileOutputStream;
19 import java.io.IOException;
20 import java.io.InputStream;
21 import java.io.InputStreamReader;
22 import java.lang.ref.Reference;
23 import java.lang.ref.WeakReference;
25 import java.nio.ByteBuffer;
26 import java.nio.MappedByteBuffer;
27 import java.nio.channels.FileChannel;
28 import java.util.HashMap;
29 import java.util.Iterator;
30 import java.util.LinkedHashMap;
32 import java.util.NoSuchElementException;
33 import java.util.StringTokenizer;
34 import java.util.logging.Level;
35 import java.util.logging.Logger;
37 import com.sun.speech.freetts.relp.Sample;
38 import com.sun.speech.freetts.relp.SampleInfo;
39 import com.sun.speech.freetts.util.BulkTimer;
40 import com.sun.speech.freetts.util.Utilities;
43 * Represents and manages the unit data for all diphones. The diphone
44 * data set is stored in a set of data files. These data are loaded by this
45 * class into internal data structures before diphone synthesis can
48 *The diphone data set is one of the largest sets of data that
49 * needs to be loaded by the synthesizer and therefore can add to the
50 * overall startup time for any system using this database. For
51 * certain applications, the startup time is a critical spec that
52 * needs to be optimized, while for other applications, startup time
53 * is inconsequential. This class provides settings (via system
54 * properties) that control how the database is to be loaded so that
55 * applications can tune for quick startup or optimal run time.
57 * This class serves also as a testbed for comparing performance of
58 * the traditional java binary I/O and the new io ( <code>java.nio </code>)
61 * <p> A diphone database can be loaded from a text data file, or a
62 * binary datafile. The binary version loads significantly faster
63 * than the text version. Additionally, a binary index can be
64 * generated and used to reduce overall memory footprint.
67 * A DiphoneUnitDatabase contains an array of frames, and an aray of
68 * residuals. The frames are the samples of the wave, and the
69 * residuals are for linear predictive coding use. This is called
70 * "cst_sts" (a struct) in flite.
72 * Note that if 'com.sun.speech.freetts.useNewIO' is set to true and
73 * the input type is binary, than the JDK1.4+ new IO api is used to
78 * com.sun.speech.freetts.diphone.UnitDatabase.cacheType
81 * can be set to one of:
84 * <li> preload: database is loaded at startup
85 * <li> demand: database is loaded on demand
86 * <li> hard: database is loaded on demand but cached
87 * <li> soft: database is loaded on demand but cached with soft references
90 * This <code> cacheType </code> setting controls how the database is
91 * loaded. The default is to 'preload' the database. This setting
92 * gives best runtime performance but with longer initial startup
95 public class DiphoneUnitDatabase {
96 /** Logger instance. */
97 private static final Logger LOGGER =
98 Logger.getLogger(DiphoneUnitDatabase.class.getName());
101 private int sampleRate;
102 private int numChannels;
103 private int residualFold = 1;
104 private float lpcMin;
105 private float lpcRange;
106 private int lineCount = 0;
107 private Diphone defaultDiphone;
108 private Map diphoneMap = null;
109 private Map diphoneIndex;
110 private SampleInfo sampleInfo;
112 private boolean useNewIO =
113 Utilities.getProperty("com.sun.speech.freetts.useNewIO",
114 "true").equals("true");
115 // cache can be 'preload' 'none', 'soft' or 'hard'
116 private String cacheType =
117 Utilities.getProperty(
118 "com.sun.speech.freetts.diphone.UnitDatabase.cacheType",
120 private boolean useIndexing = !cacheType.equals("preload");
121 private boolean useCache = !cacheType.equals("demand");
122 private boolean useSoftCache = cacheType.equals("soft");
124 private final static int MAGIC = 0xFEEDFACE;
125 private final static int INDEX_MAGIC = 0xFACADE;
126 private final static int VERSION = 1;
127 private final static int MAX_DB_SIZE = 4 * 1024 * 1024;
129 private String indexName = null;
130 private MappedByteBuffer mbb = null;
131 private int defaultIndex = -1;
134 * Creates the DiphoneUnitDatabase from the given input stream.
136 * @param url the location of the database
137 * @param isBinary if <code>true</code> the database is in
138 * binary format; otherwise it is in text format
140 * @throws IOException if there is trouble opening the DB
142 public DiphoneUnitDatabase(URL url, boolean isBinary) throws IOException {
143 // MS, 22.04.2005: Commented out the "if" clause:
144 // indexing is applied only when useNewIO is turned on and
145 // data is read from a FileInputStream. This is not true when useing
146 // the default settings but setting
147 // com.sun.speech.freetts.diphone.UnitDatabase.cacheType=demand
148 //if (!useIndexing || useCache) {
149 diphoneMap = new LinkedHashMap();
151 InputStream is = Utilities.getInputStream(url);
153 indexName = getIndexName(url.toString());
161 sampleInfo = new SampleInfo(sampleRate, numChannels,
162 residualFold, lpcMin, lpcRange, 0.0f);
166 * Return the information about the sample data
169 * @return the sample info
172 SampleInfo getSampleInfo() {
178 * Returns the index name from the databaseName.
180 * @param databaseName the database name
182 * @return the index name or null if the database is not
185 * [[[ TODO the index should probably be incorporated into the
186 * binary database ]]]
188 private String getIndexName(String databaseName) {
189 String indexName = null;
190 if (databaseName.lastIndexOf(".") != -1) {
191 indexName = databaseName.substring(0,
192 databaseName.lastIndexOf(".")) + ".idx";
198 * Loads the database from the given input stream.
200 * @param is the input stream
202 private void loadText(InputStream is) {
203 BufferedReader reader;
207 throw new Error("Can't load diphone db file.");
210 reader = new BufferedReader(new InputStreamReader(is));
212 line = reader.readLine();
214 while (line != null) {
215 if (!line.startsWith("***")) {
216 parseAndAdd(line, reader);
218 line = reader.readLine();
221 } catch (IOException e) {
222 throw new Error(e.getMessage() + " at line " + lineCount);
228 * Parses and process the given line. Used to process the text
229 * form of the database.
231 * @param line the line to process
232 * @param reader the source for the lines
234 private void parseAndAdd(String line, BufferedReader reader) {
236 StringTokenizer tokenizer = new StringTokenizer(line," ");
237 String tag = tokenizer.nextToken();
238 if (tag.equals("NAME")) {
239 name = tokenizer.nextToken();
240 } else if (tag.equals("SAMPLE_RATE")) {
241 sampleRate = Integer.parseInt(tokenizer.nextToken());
242 } else if (tag.equals("NUM_CHANNELS")) {
243 numChannels = Integer.parseInt(tokenizer.nextToken());
244 } else if (tag.equals("LPC_MIN")) {
245 lpcMin = Float.parseFloat(tokenizer.nextToken());
246 } else if (tag.equals("COEFF_MIN")) {
247 lpcMin = Float.parseFloat(tokenizer.nextToken());
248 } else if (tag.equals("COEFF_RANGE")) {
249 lpcRange = Float.parseFloat(tokenizer.nextToken());
250 } else if (tag.equals("LPC_RANGE")) {
251 lpcRange = Float.parseFloat(tokenizer.nextToken());
252 } else if (tag.equals("ALIAS")) {
253 String name = tokenizer.nextToken();
254 String origName = tokenizer.nextToken();
255 AliasDiphone diphone = new AliasDiphone(name, origName);
257 } else if (tag.equals("DIPHONE")) {
258 String name = tokenizer.nextToken();
259 int start = Integer.parseInt(tokenizer.nextToken());
260 int mid = Integer.parseInt(tokenizer.nextToken());
261 int end = Integer.parseInt(tokenizer.nextToken());
262 int numSamples = (end - start);
263 int midPoint = mid - start;
265 if (numChannels <= 0) {
266 throw new Error("For diphone '"+name+"': Bad number of channels " + numChannels);
269 if (numSamples <= 0) {
270 throw new Error("For diphone '"+name+"': Bad number of samples " + numSamples);
273 Sample[] samples = new Sample[numSamples];
275 for (int i = 0; i < samples.length; i++) {
276 samples[i] = new Sample(reader, numChannels);
278 Diphone diphone = new Diphone(name, samples, midPoint);
281 throw new Error("Unsupported tag " + tag);
283 } catch (NoSuchElementException nse) {
284 throw new Error("Error parsing db " + nse.getMessage());
285 } catch (NumberFormatException nfe) {
286 throw new Error("Error parsing numbers in db " + nfe.getMessage());
292 * Adds the given diphone to the DB. Diphones are kept in a map so
293 * they can be accessed by name.
295 * @param diphone the diphone to add.
297 private void add(Diphone diphone) {
298 if (diphone instanceof AliasDiphone) {
299 AliasDiphone adiph = (AliasDiphone) diphone;
300 Diphone original = (Diphone)
301 diphoneMap.get(adiph.getOriginalName());
302 if (original != null) {
303 adiph.setOriginalDiphone(original);
305 // No original was found for this alias
306 // -- complain, and ignore
307 if (LOGGER.isLoggable(Level.FINER)) {
308 LOGGER.finer("For diphone alias "
309 +adiph.getName()+", could not find original "
310 +adiph.getOriginalName());
315 diphoneMap.put(diphone.getName(), diphone);
316 if (defaultDiphone == null) {
317 defaultDiphone = diphone;
322 * Looks up the diphone with the given name.
324 * @param unitName the name of the diphone to look for
326 * @return the diphone or the defaultDiphone if not found.
328 public Diphone getUnit(String unitName) {
329 Diphone diphone = null;
332 diphone = getFromCache(unitName);
333 if (diphone == null) {
334 int index = getIndex(unitName);
338 diphone = Diphone.loadBinary(mbb);
339 if (diphone != null) {
340 // If diphone is an alias, must also get the original
341 if (diphone instanceof AliasDiphone) {
342 AliasDiphone adiph = (AliasDiphone) diphone;
343 Diphone original = getUnit(adiph.getOriginalName());
344 if (original != null) {
345 adiph.setOriginalDiphone(original);
346 putIntoCache(unitName, adiph);
348 // No original was found for this alias
349 // -- complain, and ignore
350 if (LOGGER.isLoggable(Level.FINER)) {
351 LOGGER.finer("For diphone alias "
352 +adiph.getName()+", could not find original "
353 +adiph.getOriginalName());
357 } else { // a normal diphone
358 putIntoCache(unitName, diphone);
361 } catch (IOException ioe) {
362 System.err.println("Can't load diphone " +
369 diphone = (Diphone) diphoneMap.get(unitName);
372 if (diphone == null) {
373 System.err.println("Can't find diphone " + unitName);
374 diphone = defaultDiphone;
381 * Gets the named diphone from the cache. If we are using soft
382 * caching, the reference may be a soft/weak reference so check to
383 * see if the reference is still valid, if so return it; otherwise
384 * invalidate it. Note that we have not had good success with weak
385 * caches so far. The goal is to reduce the minimum required
386 * memory footprint as far as possible while not compromising
387 * performance. In small memory systems, the weak cache would
388 * likely be reclaimed, giving us lower performance but with the
389 * ability to still be able to run. In reality, the soft caches
390 * did not help much. They just did not work correctly.
391 * [[[ TODO: test weak/soft cache behavior with new versions of
392 * the runtime to see if their behavior has improved ]]]
394 * @param name the name of the diphone
396 * @return the diphone or <code> null </code> if not in the cache
398 private Diphone getFromCache(String name) {
399 if (diphoneMap == null) {
402 Diphone diphone = null;
405 Reference ref = (Reference) diphoneMap.get(name);
407 diphone = (Diphone) ref.get();
408 if (diphone == null) {
409 diphoneMap.remove(name);
414 diphone = (Diphone) diphoneMap.get(name);
420 * Puts the diphone in the cache.
422 * @param diphoneName the name of the diphone
423 * @param diphone the diphone to put in the cache
425 private void putIntoCache(String diphoneName, Diphone diphone) {
426 if (diphoneMap == null) {
430 diphoneMap.put(diphoneName, new WeakReference(diphone));
432 diphoneMap.put(diphoneName, diphone);
437 * Dumps the soft ref cache.
439 private void dumpCacheSize() {
442 System.out.println("Entries: " + diphoneMap.size());
443 for (Iterator i = diphoneMap.values().iterator(); i.hasNext(); ) {
444 Reference ref = (Reference) i.next();
445 if (ref.get() == null) {
451 System.out.println(" empty: " + empty);
452 System.out.println(" full: " + full);
457 * Returns the name of this DiphoneUnitDatabase.
459 public String getName() {
464 * Dumps the diphone database.
467 System.out.println("Name " + name);
468 System.out.println("SampleRate " + sampleRate);
469 System.out.println("NumChannels " + numChannels);
470 System.out.println("lpcMin " + lpcMin);
471 System.out.println("lpcRange " + lpcRange);
473 for (Iterator i = diphoneMap.values().iterator(); i.hasNext(); ) {
474 Diphone diphone = (Diphone) i.next();
480 * Dumps a binary form of the database.
482 * @param path the path to dump the file to
484 public void dumpBinary(String path) {
486 FileOutputStream fos = new FileOutputStream(path);
487 DataOutputStream os = new DataOutputStream(fos);
491 os.writeInt(VERSION);
492 os.writeInt(sampleRate);
493 os.writeInt(numChannels);
494 os.writeFloat(lpcMin);
495 os.writeFloat(lpcRange);
496 os.writeInt(diphoneMap.size());
498 for (Iterator i = diphoneMap.values().iterator(); i.hasNext();) {
499 Diphone diphone = (Diphone) i.next();
500 diphone.dumpBinary(os);
505 } catch (FileNotFoundException fe) {
506 throw new Error("Can't dump binary database " +
508 } catch (IOException ioe) {
509 throw new Error("Can't write binary database " +
515 * Dumps a binary index. The database index is used if our
516 * cacheType is not set to 'preload' and we are loading a binary
517 * database. The index is a simple mapping of diphone names (the
518 * key) to the file position in the database. In situations where
519 * the entire database is not preloaded, this index can be loaded
520 * and used to provide quicker startup (since only the index need
521 * be loaded at startup) and quick access to the diphone data.
523 * @param path the path to dump the file to
525 void dumpBinaryIndex(String path) {
527 FileOutputStream fos = new FileOutputStream(path);
528 DataOutputStream dos = new DataOutputStream(fos);
530 dos.writeInt(INDEX_MAGIC);
531 dos.writeInt(diphoneIndex.keySet().size());
533 for (Iterator i = diphoneIndex.keySet().iterator(); i.hasNext();) {
534 String key = (String) i.next();
535 int pos = ((Integer) diphoneIndex.get(key)).intValue();
541 } catch (FileNotFoundException fe) {
542 throw new Error("Can't dump binary index " +
544 } catch (IOException ioe) {
545 throw new Error("Can't write binary index " +
551 * Loads a binary index.
553 * @param url the location of the binary index file
555 private void loadBinaryIndex(URL url) {
557 diphoneIndex = new HashMap();
560 InputStream is = Utilities.getInputStream(url);
561 DataInputStream dis = new DataInputStream(is);
563 if (dis.readInt() != INDEX_MAGIC) {
564 throw new Error("Bad index file format");
567 int size = dis.readInt();
569 for (int i = 0; i < size; i++) {
570 String diphoneName = dis.readUTF();
571 int pos = dis.readInt();
572 diphoneIndex.put(diphoneName, new Integer(pos));
576 } catch (FileNotFoundException fe) {
577 throw new Error("Can't load binary index " +
579 } catch (IOException ioe) {
580 throw new Error("Can't read binary index " +
586 * Gets the index for the given diphone.
588 * @param diphone the name of the diphone
590 * @return the index into the database for the diphone
592 private int getIndex(String diphone) {
593 Integer index = (Integer) diphoneIndex.get(diphone);
595 int idx = index.intValue();
596 if (defaultIndex == -1) {
601 System.out.println("Can't find index entry for " + diphone);
609 * Loads a binary file from the input stream.
611 * Note that we currently have four! methods of loading up the
612 * database. We were interested in the performance characteristics
613 * of the various methods of loading the database so we coded it
616 * @param is the input stream to read the database
619 * @throws IOException if there is trouble opening the DB
622 private void loadBinary(InputStream is) throws IOException {
623 // we get better performance if we can map the file in
624 // 1.0 seconds vs. 1.75 seconds, but we can't
625 // always guarantee that we can do that.
626 if (useNewIO && is instanceof FileInputStream) {
627 FileInputStream fis = (FileInputStream) is;
629 loadBinaryIndex(new URL(indexName));
632 loadMappedBinary(fis);
635 useIndexing = false; // just to make this clear
636 DataInputStream dis = new DataInputStream(
637 new BufferedInputStream(is));
644 * Loads the binary data from the given input stream.
646 * @param dis the data input stream.
648 private void loadBinary(DataInputStream dis) throws IOException {
650 if (dis.readInt() != MAGIC) {
651 throw new Error("Bad magic in db");
653 if (dis.readInt() != VERSION) {
654 throw new Error("Bad VERSION in db");
657 sampleRate = dis.readInt();
658 numChannels = dis.readInt();
659 lpcMin = dis.readFloat();
660 lpcRange = dis.readFloat();
661 size = dis.readInt();
663 for (int i = 0; i < size; i++) {
664 Diphone diphone = Diphone.loadBinary(dis);
671 * Loads the database from the given FileInputStream.
673 * @param is the InputStream to load the database from
675 * @throws IOException if there is trouble opening the DB
677 private void loadMappedBinary(FileInputStream is) throws IOException {
678 FileChannel fc = is.getChannel();
680 MappedByteBuffer bb =
681 fc.map(FileChannel.MapMode.READ_ONLY, 0, (int) fc.size());
688 * Maps the database from the given FileInputStream.
690 * @param is the InputStream to load the database from
692 * @throws IOException if there is trouble opening the DB
694 private void mapDatabase(FileInputStream is) throws IOException {
695 FileChannel fc = is.getChannel();
696 mbb = fc.map(FileChannel.MapMode.READ_ONLY, 0, (int) fc.size());
698 loadDatabaseHeader(mbb);
702 * Loads the database header from the given byte buffer.
704 * @param bb the byte buffer to load the db from
706 * @throws IOException if there is trouble opening the DB
708 private void loadDatabaseHeader(ByteBuffer bb) throws IOException {
709 if (bb.getInt() != MAGIC) {
710 throw new Error("Bad magic in db");
712 if (bb.getInt() != VERSION) {
713 throw new Error("Bad VERSION in db");
716 sampleRate = bb.getInt();
717 numChannels = bb.getInt();
718 lpcMin = bb.getFloat();
719 lpcRange = bb.getFloat();
723 * Loads the database from the given byte buffer.
725 * @param bb the byte buffer to load the db from
727 * @throws IOException if there is trouble opening the DB
729 private void loadDatabase(ByteBuffer bb) throws IOException {
731 loadDatabaseHeader(bb);
734 diphoneIndex = new HashMap();
735 for (int i = 0; i < size; i++) {
736 int pos = bb.position();
737 Diphone diphone = Diphone.loadBinary(bb);
739 diphoneIndex.put(diphone.getName(), new Integer(pos));
744 * Compares this database to another. This is used for testing.
745 * With this method we can load up two databases (one perhaps from
746 * a text source and one from a binary source) and compare to
747 * verify that the dbs are identical
749 * @param other the other database
751 * @return <code>true</code> if the DBs are identical;
752 * otherwise <code>false</code>
754 public boolean compare(DiphoneUnitDatabase other) {
755 if (sampleRate != other.sampleRate) {
759 if (numChannels != other.numChannels) {
763 if (lpcMin != other.lpcMin) {
767 if (lpcRange != other.lpcRange) {
771 for (Iterator i = diphoneMap.values().iterator(); i.hasNext(); ) {
772 Diphone diphone = (Diphone) i.next();
773 Diphone otherDiphone = (Diphone) other.getUnit(diphone.getName());
774 if (!diphone.compare(otherDiphone)) {
775 System.out.println("Diphones differ:");
776 System.out.println("THis:");
778 System.out.println("Other:");
788 * Manipulates a DiphoneUnitDatabase. This program is typically
789 * used to generate the binary form (with index) of the
790 * DiphoneUnitDatabase from the text form. Additionally, this program
791 * can be used to compare two databases to see if they are
792 * identical (used for testing).
797 * <code> java com.sun.speech.freetts.diphone.DiphoneUnitDatabase
803 * <li> <code> -src path </code> provides a directory
804 * path to the source text for the database
805 * <li> <code> -dest path </code> provides a directory
806 * for where to place the resulting binaries
807 * <li> <code> -generate_binary [filename] </code>
809 * version of the database and generates the binary
810 * version of the database.
811 * <li> <code> -compare </code> Loads the text and
812 * binary versions of the database and compares them to
813 * see if they are equivalent.
814 * <li> <code> -showTimes </code> shows timings for any
815 * loading, comparing or dumping operation
819 public static void main(String[] args) {
820 boolean showTimes = false;
821 String srcPath = ".";
822 String destPath = ".";
825 if (args.length > 0) {
826 BulkTimer timer = BulkTimer.LOAD;
828 for (int i = 0 ; i < args.length; i++) {
829 if (args[i].equals("-src")) {
831 } else if (args[i].equals("-dest")) {
832 destPath = args[++i];
833 } else if (args[i].equals("-generate_binary")) {
834 String name = "diphone_units.txt";
835 if (i + 1 < args.length) {
836 String nameArg = args[++i];
837 if (!nameArg.startsWith("-")) {
842 int suffixPos = name.lastIndexOf(".txt");
844 String binaryName = "diphone_units.bin";
845 if (suffixPos != -1) {
846 binaryName = name.substring(0, suffixPos) + ".bin";
849 String indexName = "diphone_units.idx";
851 if (suffixPos != -1) {
852 indexName = name.substring(0, suffixPos) + ".idx";
855 System.out.println("Loading " + name);
856 timer.start("load_text");
857 DiphoneUnitDatabase udb = new DiphoneUnitDatabase(
859 + srcPath + "/" + name), false);
860 timer.stop("load_text");
862 System.out.println("Dumping " + binaryName);
863 timer.start("dump_binary");
864 udb.dumpBinary(destPath + "/" + binaryName);
865 timer.stop("dump_binary");
867 timer.start("load_binary");
868 DiphoneUnitDatabase budb =
869 new DiphoneUnitDatabase(
871 + destPath + "/" + binaryName),
873 timer.stop("load_binary");
875 System.out.println("Dumping " + indexName);
876 timer.start("dump index");
877 budb.dumpBinaryIndex(destPath + "/" + indexName);
878 timer.stop("dump index");
879 } else if (args[i].equals("-compare")) {
881 timer.start("load_text");
882 DiphoneUnitDatabase udb = new DiphoneUnitDatabase(
883 new URL("file:./diphone_units.txt"), false);
884 timer.stop("load_text");
886 timer.start("load_binary");
887 DiphoneUnitDatabase budb =
888 new DiphoneUnitDatabase(
889 new URL("file:./diphone_units.bin"), true);
890 timer.stop("load_binary");
892 timer.start("compare");
893 if (udb.compare(budb)) {
894 System.out.println("other compare ok");
896 System.out.println("other compare different");
898 timer.stop("compare");
899 } else if (args[i].equals("-showtimes")) {
902 System.out.println("Unknown option " + args[i]);
907 timer.show("DiphoneUnitDatabase");
910 System.out.println("Options: ");
911 System.out.println(" -src path");
912 System.out.println(" -dest path");
913 System.out.println(" -compare");
914 System.out.println(" -generate_binary");
915 System.out.println(" -showTimes");
917 } catch (IOException ioe) {
918 System.err.println(ioe);