2 * Portions Copyright 2003 Sun Microsystems, Inc.
3 * Portions Copyright 1999-2001 Language Technologies Institute,
4 * Carnegie Mellon University.
5 * All Rights Reserved. Use is subject to license terms.
7 * See the file "license.terms" for information on usage and
8 * redistribution of this file, and for a DISCLAIMER OF ALL
11 package com.sun.speech.freetts.clunits;
13 import java.io.BufferedOutputStream;
14 import java.io.BufferedReader;
15 import java.io.DataInputStream;
16 import java.io.DataOutputStream;
17 import java.io.FileInputStream;
18 import java.io.FileNotFoundException;
19 import java.io.FileOutputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.io.InputStreamReader;
24 import java.nio.ByteBuffer;
25 import java.nio.MappedByteBuffer;
26 import java.nio.channels.FileChannel;
27 import java.util.ArrayList;
28 import java.util.HashMap;
29 import java.util.Iterator;
30 import java.util.List;
32 import java.util.NoSuchElementException;
33 import java.util.StringTokenizer;
35 import com.sun.speech.freetts.cart.CART;
36 import com.sun.speech.freetts.cart.CARTImpl;
37 import com.sun.speech.freetts.relp.SampleInfo;
38 import com.sun.speech.freetts.relp.SampleSet;
39 import com.sun.speech.freetts.util.BulkTimer;
40 import com.sun.speech.freetts.util.Utilities;
44 * Provides support for the cluster unit database. The use of the
45 * cluster unit database is confined to this clunits package. This
46 * class provides a main program that can be used to convert from a
47 * text version of the database to a binary version of the database.
49 * The ClusterUnitDataBase can be loaded from a text or a binary
50 * source. The binary form of the database loads much faster and
51 * therefore is generally used in a deployed system.
54 public class ClusterUnitDatabase {
56 final static int CLUNIT_NONE = 65535;
58 private DatabaseClusterUnit[] units;
59 private UnitType[] unitTypes;
60 private SampleSet sts;
61 private SampleSet mcep;
63 private UnitOriginInfo[] unitOrigins; // for debugging
65 private int continuityWeight;
66 private int optimalCoupling;
67 private int extendSelections;
68 private int joinMethod;
69 private int[] joinWeights;
70 private int joinWeightShift;
72 private Map cartMap = new HashMap();
73 private CART defaultCart = null;
75 private transient List unitList;
76 private transient int lineCount;
77 private transient List unitTypesList;
79 private final static int MAGIC = 0xf0cacc1a;
80 private final static int VERSION = 0x1000;
84 * Creates the UnitDatabase from the given input stream.
86 * @param is the input stream to read the database from
87 * @param isBinary the input stream is a binary stream
89 * @throws IOException if there is trouble opening the DB
91 ClusterUnitDatabase(URL url, boolean isBinary) throws IOException {
92 BulkTimer.LOAD.start("ClusterUnitDatabase");
93 InputStream is = Utilities.getInputStream(url);
100 // Attempt to load debug info from a .debug resource.
101 // This will silently fail if no debug info is available.
102 String urlString = url.toExternalForm();
103 URL debugURL = new URL(urlString.substring(0, urlString.lastIndexOf(".")) + ".debug");
105 InputStream debugInfoStream = Utilities.getInputStream(debugURL);
106 loadUnitOrigins(debugInfoStream);
107 } catch (IOException ioe) {
108 // Silently ignore if you cannot load the debug info
110 BulkTimer.LOAD.stop("ClusterUnitDatabase");
115 * Retrieves the begininning sample index for the
118 * @param unitEntry the entry of interest
120 * @return the begininning sample index
122 int getStart(int unitEntry) {
123 return units[unitEntry].start;
127 * Retrieves the ending sample index for the
130 * @param unitEntry the entry of interest
132 * @return the ending sample index
134 int getEnd(int unitEntry) {
135 return units[unitEntry].end;
139 * Retrieves the phone for the given entry
141 * @param unitEntry the entry of interest
143 * @return the phone for the entry
145 int getPhone(int unitEntry) {
146 return units[unitEntry].phone;
150 * Returns the cart of the given unit type.
152 * @param unitType the type of cart
156 CART getTree(String unitType) {
157 CART cart = (CART) cartMap.get(unitType);
160 System.err.println("ClusterUnitDatabase: can't find tree for "
162 return defaultCart; // "graceful" failrue
168 * Retrieves the type index for the name given a name.
170 * @param name the name
172 * @return the index for the name
174 // [[[TODO: perhaps replace this with java.util.Arrays.binarySearch]]]
175 int getUnitTypeIndex(String name) {
176 int start, end, mid, c;
179 end = unitTypes.length;
181 while (start < end) {
182 mid = (start + end) / 2;
183 c = unitTypes[mid].getName().compareTo(name);
196 * Retrieves the unit index given a unit type and val.
198 * @param unitType the type of the unit
199 * @param instance the value associated with the unit
203 int getUnitIndex(String unitType, int instance) {
204 int i = getUnitTypeIndex(unitType);
206 error("getUnitIndex: can't find unit type " + unitType);
209 if (instance >= unitTypes[i].getCount()) {
210 error("getUnitIndex: can't find instance "
211 + instance + " of " + unitType);
214 return unitTypes[i].getStart() + instance;
219 * Retrieves the index for the name given a name.
221 * @param name the name
223 * @return the index for the name
225 int getUnitIndexName(String name) {
226 int lastIndex = name.lastIndexOf('_');
227 if (lastIndex == -1) {
228 error("getUnitIndexName: bad unit name " + name);
231 int index = Integer.parseInt(name.substring(lastIndex + 1));
232 String type = name.substring(0, lastIndex);
233 return getUnitIndex(type, index);
237 * Retrieves the extend selections setting.
239 * @return the extend selections setting
241 int getExtendSelections() {
242 return extendSelections;
246 * Gets the next unit.
248 * @return the next unit
250 int getNextUnit(int which) {
251 return units[which].next;
255 * Gets the previous units.
257 * @param which which unit is of interest
259 * @return the previous unit
261 int getPrevUnit(int which) {
262 return units[which].prev;
267 * Determines if the unit types are equal.
269 * @param unitA the index of unit a
270 * @param unitB the index of unit B
272 * @return <code>true</code> if the types of units a and b are
273 * equal; otherwise return <code>false</code>
275 boolean isUnitTypeEqual(int unitA, int unitB) {
276 return units[unitA].type == units[unitB].type;
277 // String nameA = units[unitA].getName();
278 // String nameB = units[unitB].getName();
279 // int lastUnderscore = nameA.lastIndexOf('_');
280 // return nameA.regionMatches(0, nameB, 0, lastUnderscore + 1);
284 * Retrieves the optimal coupling setting.
286 * @return the optimal coupling setting
288 int getOptimalCoupling() {
289 return optimalCoupling;
293 * Retrieves the continuity weight setting.
296 * @return the continuity weight setting
298 int getContinuityWeight() {
299 return continuityWeight;
303 * Retrieves the join weights.
305 * @return the join weights
307 int[] getJoinWeights() {
313 * Looks up the unit with the given name.
315 * @param unitName the name of the unit to look for
317 * @return the unit or the defaultUnit if not found.
319 DatabaseClusterUnit getUnit(String unitName) {
324 * Looks up the unit with the given index.
326 * @param index the index of the unit to look for
330 DatabaseClusterUnit getUnit(int which) {
335 * Looks up the origin info for the unit with the given index.
337 * @param index the index of the unit to look for
339 * @return the origin info for the unit, or null if none is available
341 UnitOriginInfo getUnitOriginInfo(int which) {
342 if (unitOrigins != null)
343 return unitOrigins[which];
350 * Returns the name of this UnitDatabase.
352 * @return the name of the database
355 return "ClusterUnitDatabase";
359 * Returns the sample info for this set of data.
361 * @return the sample info
363 SampleInfo getSampleInfo() {
364 return sts.getSampleInfo();
369 * Gets the sample list.
371 * @return the sample list
378 * Gets the Mel Ceptra list.
380 * @return the Mel Ceptra list
382 SampleSet getMcep() {
387 * Determines if the application of the given join weights could
388 * be applied as a simple right-shift. If so return the shift
389 * otherwise return 0.
391 * @return the amount to right shift (or zero if not possible)
393 int getJoinWeightShift() {
394 return joinWeightShift;
399 * Calculates the join weight shift.
401 * @param joinWeights the weights to check
403 * @return the amount to right shift (or zero if not possible)
405 private int calcJoinWeightShift(int[] joinWeights) {
406 int first = joinWeights[0];
407 for (int i = 1; i < joinWeights.length; i++) {
408 if (joinWeights[i] != first) {
413 int divisor = 65536 / first;
416 } else if (divisor == 4) {
423 * Loads the database from the given input stream.
425 * @param is the input stream
427 private void loadText(InputStream is) {
428 BufferedReader reader;
432 unitList = new ArrayList();
433 unitTypesList = new ArrayList();
436 throw new Error("Can't load cluster db file.");
439 reader = new BufferedReader(new InputStreamReader(is));
441 line = reader.readLine();
443 while (line != null) {
444 if (!line.startsWith("***")) {
445 parseAndAdd(line, reader);
447 line = reader.readLine();
451 units = new DatabaseClusterUnit[unitList.size()];
452 units = (DatabaseClusterUnit[]) unitList.toArray(units);
455 unitTypes = new UnitType[unitTypesList.size()];
456 unitTypes = (UnitType[]) unitTypesList.toArray(unitTypes);
457 unitTypesList = null;
459 } catch (IOException e) {
460 throw new Error(e.getMessage() + " at line " + lineCount);
466 * Parses and process the given line.
468 * @param line the line to process
469 * @param reader the source for the lines
471 * @throws IOException if an error occurs while reading
473 private void parseAndAdd(String line, BufferedReader reader)
476 StringTokenizer tokenizer = new StringTokenizer(line," ");
477 String tag = tokenizer.nextToken();
478 if (tag.equals("CONTINUITY_WEIGHT")) {
479 continuityWeight = Integer.parseInt(tokenizer.nextToken());
480 } else if (tag.equals("OPTIMAL_COUPLING")) {
481 optimalCoupling = Integer.parseInt(tokenizer.nextToken());
482 } else if (tag.equals("EXTEND_SELECTIONS")) {
483 extendSelections = Integer.parseInt(tokenizer.nextToken());
484 } else if (tag.equals("JOIN_METHOD")) {
485 joinMethod = Integer.parseInt(tokenizer.nextToken());
486 } else if (tag.equals("JOIN_WEIGHTS")) {
487 int numWeights = Integer.parseInt(tokenizer.nextToken());
488 joinWeights = new int[numWeights];
489 for (int i = 0; i < numWeights; i++) {
490 joinWeights[i] = Integer.parseInt(tokenizer.nextToken());
493 joinWeightShift = calcJoinWeightShift(joinWeights);
495 } else if (tag.equals("STS")) {
496 String name = tokenizer.nextToken();
497 if (name.equals("STS")) {
498 sts = new SampleSet(tokenizer, reader);
500 mcep = new SampleSet(tokenizer, reader);
502 } else if (tag.equals("UNITS")) {
503 int type = Integer.parseInt(tokenizer.nextToken());
504 int phone = Integer.parseInt(tokenizer.nextToken());
505 int start = Integer.parseInt(tokenizer.nextToken());
506 int end = Integer.parseInt(tokenizer.nextToken());
507 int prev = Integer.parseInt(tokenizer.nextToken());
508 int next = Integer.parseInt(tokenizer.nextToken());
509 DatabaseClusterUnit unit
510 = new DatabaseClusterUnit(type, phone, start,
513 } else if (tag.equals("CART")) {
514 String name = tokenizer.nextToken();
515 int nodes = Integer.parseInt(tokenizer.nextToken());
516 CART cart = new CARTImpl(reader, nodes);
517 cartMap.put(name, cart);
519 if (defaultCart == null) {
522 } else if (tag.equals("UNIT_TYPE")) {
523 String name = tokenizer.nextToken();
524 int start = Integer.parseInt(tokenizer.nextToken());
525 int count = Integer.parseInt(tokenizer.nextToken());
526 UnitType unitType = new UnitType(name, start, count);
527 unitTypesList.add(unitType);
529 throw new Error("Unsupported tag " + tag + " in db line `" + line + "'");
531 } catch (NoSuchElementException nse) {
532 throw new Error("Error parsing db " + nse.getMessage());
533 } catch (NumberFormatException nfe) {
534 throw new Error("Error parsing numbers in db line `" + line + "':" + nfe.getMessage());
539 * Loads a binary file from the input stream.
541 * @param is the input stream to read the database from
543 * @throws IOException if there is trouble opening the DB
546 private void loadBinary(InputStream is) throws IOException {
547 // we get better performance if we can map the file in
548 // 1.0 seconds vs. 1.75 seconds, but we can't
549 // always guarantee that we can do that.
550 if (is instanceof FileInputStream) {
551 FileInputStream fis = (FileInputStream) is;
552 FileChannel fc = fis.getChannel();
554 MappedByteBuffer bb =
555 fc.map(FileChannel.MapMode.READ_ONLY, 0, (int) fc.size());
560 loadBinary(new DataInputStream(is));
565 * Loads the database from the given byte buffer.
567 * @param bb the byte buffer to load the db from
569 * @throws IOException if there is trouble opening the DB
571 private void loadBinary(ByteBuffer bb) throws IOException {
573 if (bb.getInt() != MAGIC) {
574 throw new Error("Bad magic in db");
576 if (bb.getInt() != VERSION) {
577 throw new Error("Bad VERSION in db");
580 continuityWeight = bb.getInt();
581 optimalCoupling = bb.getInt();
582 extendSelections = bb.getInt();
583 joinMethod = bb.getInt();
584 joinWeightShift = bb.getInt();
586 int weightLength = bb.getInt();
587 joinWeights = new int[weightLength];
588 for (int i = 0; i < joinWeights.length; i++) {
589 joinWeights[i] = bb.getInt();
592 int unitsLength = bb.getInt();
593 units = new DatabaseClusterUnit[unitsLength];
594 for (int i = 0; i < units.length; i++) {
595 units[i] = new DatabaseClusterUnit(bb);
598 int unitTypesLength = bb.getInt();
599 unitTypes = new UnitType[unitTypesLength];
600 for (int i = 0; i < unitTypes.length; i++) {
601 unitTypes[i] = new UnitType(bb);
603 sts = new SampleSet(bb);
604 mcep = new SampleSet(bb);
606 int numCarts = bb.getInt();
607 cartMap = new HashMap();
608 for (int i = 0; i < numCarts; i++) {
609 String name = Utilities.getString(bb);
610 CART cart = CARTImpl.loadBinary(bb);
611 cartMap.put(name, cart);
613 if (defaultCart == null) {
620 * Loads the database from the given input stream.
622 * @param is the input stream to load the db from
624 * @throws IOException if there is trouble opening the DB
626 private void loadBinary(DataInputStream is) throws IOException {
628 if (is.readInt() != MAGIC) {
629 throw new Error("Bad magic in db");
631 if (is.readInt() != VERSION) {
632 throw new Error("Bad VERSION in db");
635 continuityWeight = is.readInt();
636 optimalCoupling = is.readInt();
637 extendSelections = is.readInt();
638 joinMethod = is.readInt();
639 joinWeightShift = is.readInt();
641 int weightLength = is.readInt();
642 joinWeights = new int[weightLength];
643 for (int i = 0; i < joinWeights.length; i++) {
644 joinWeights[i] = is.readInt();
647 int unitsLength = is.readInt();
648 units = new DatabaseClusterUnit[unitsLength];
649 for (int i = 0; i < units.length; i++) {
650 units[i] = new DatabaseClusterUnit(is);
653 int unitTypesLength = is.readInt();
654 unitTypes = new UnitType[unitTypesLength];
655 for (int i = 0; i < unitTypes.length; i++) {
656 unitTypes[i] = new UnitType(is);
658 sts = new SampleSet(is);
659 mcep = new SampleSet(is);
661 int numCarts = is.readInt();
662 cartMap = new HashMap();
663 for (int i = 0; i < numCarts; i++) {
664 String name = Utilities.getString(is);
665 CART cart = CARTImpl.loadBinary(is);
666 cartMap.put(name, cart);
668 if (defaultCart == null) {
675 * Load debug info about the origin of units from the given input stream.
676 * The file format is identical to that of the Festvox .catalogue files.
677 * This is useful when creating and debugging new voices: For a selected
678 * unit, you can find out which unit from which original sound file
680 * @param is the input stream from which to read the debug info.
681 * @throws IOException if a read problem occurs.
683 private void loadUnitOrigins(InputStream is) throws IOException
685 unitOrigins = new UnitOriginInfo[units.length];
686 BufferedReader in = new BufferedReader(new InputStreamReader(is));
688 String currentLine = null;
690 while ((currentLine = in.readLine()) != null) {
691 if (currentLine.startsWith("EST_Header_End")) break;
693 while ((currentLine = in.readLine()) != null) {
694 String[] tokens = currentLine.split(" ");
695 String name = tokens[0];
696 int index = getUnitIndexName(name);
698 unitOrigins[index] = new UnitOriginInfo();
699 unitOrigins[index].originFile = tokens[1];
700 unitOrigins[index].originStart = Float.valueOf(tokens[2]).floatValue();
701 unitOrigins[index].originEnd = Float.valueOf(tokens[4]).floatValue();
702 } catch (NumberFormatException nfe) {}
709 * Dumps a binary form of the database.
711 * @param path the path to dump the file to
713 void dumpBinary(String path) {
715 FileOutputStream fos = new FileOutputStream(path);
716 DataOutputStream os = new DataOutputStream(new
717 BufferedOutputStream(fos));
720 os.writeInt(VERSION);
721 os.writeInt(continuityWeight);
722 os.writeInt(optimalCoupling);
723 os.writeInt(extendSelections);
724 os.writeInt(joinMethod);
725 os.writeInt(joinWeightShift);
726 os.writeInt(joinWeights.length);
727 for (int i = 0; i < joinWeights.length; i++) {
728 os.writeInt(joinWeights[i]);
731 os.writeInt(units.length);
732 for (int i = 0; i < units.length; i++) {
733 units[i].dumpBinary(os);
736 os.writeInt(unitTypes.length);
737 for (int i = 0; i < unitTypes.length; i++) {
738 unitTypes[i].dumpBinary(os);
743 os.writeInt(cartMap.size());
744 for (Iterator i = cartMap.keySet().iterator(); i.hasNext();) {
745 String name = (String) i.next();
746 CART cart = (CART) cartMap.get(name);
748 Utilities.outString(os, name);
753 // note that we are not currently saving the state
754 // of the default cart
756 } catch (FileNotFoundException fe) {
757 throw new Error("Can't dump binary database " +
759 } catch (IOException ioe) {
760 throw new Error("Can't write binary database " +
767 * Determines if two databases are identical.
769 * @param other the database to compare this one to
771 * @return true if the databases are identical
773 public boolean compare(ClusterUnitDatabase other) {
774 System.out.println("Warning: Compare not implemented yet");
779 * Manipulates a ClusterUnitDatabase.
784 * <code> java com.sun.speech.freetts.clunits.ClusterUnitDatabase
790 * <li> <code> -src path </code> provides a directory
791 * path to the source text for the database
792 * <li> <code> -dest path </code> provides a directory
793 * for where to place the resulting binaries
794 * <li> <code> -generate_binary [filename]</code> reads
795 * in the text version of the database and generates
796 * the binary version of the database.
797 * <li> <code> -compare </code> Loads the text and
798 * binary versions of the database and compares them to
799 * see if they are equivalent.
800 * <li> <code> -showTimes </code> shows timings for any
801 * loading, comparing or dumping operation
805 public static void main(String[] args) {
806 boolean showTimes = false;
807 String srcPath = ".";
808 String destPath = ".";
811 if (args.length > 0) {
812 BulkTimer timer = new BulkTimer();
814 for (int i = 0 ; i < args.length; i++) {
815 if (args[i].equals("-src")) {
817 } else if (args[i].equals("-dest")) {
818 destPath = args[++i];
819 } else if (args[i].equals("-generate_binary")) {
820 String name = "clunits.txt";
821 if (i + 1 < args.length) {
822 String nameArg = args[++i];
823 if (!nameArg.startsWith("-")) {
828 int suffixPos = name.lastIndexOf(".txt");
830 String binaryName = "clunits.bin";
831 if (suffixPos != -1) {
832 binaryName = name.substring(0, suffixPos) + ".bin";
835 System.out.println("Loading " + name);
836 timer.start("load_text");
837 ClusterUnitDatabase udb = new
839 new URL("file:" + srcPath + "/" + name),
841 timer.stop("load_text");
843 System.out.println("Dumping " + binaryName);
844 timer.start("dump_binary");
845 udb.dumpBinary(destPath + "/" + binaryName);
846 timer.stop("dump_binary");
848 } else if (args[i].equals("-compare")) {
850 timer.start("load_text");
851 ClusterUnitDatabase udb = new
853 new URL("file:./cmu_time_awb.txt"), false);
854 timer.stop("load_text");
856 timer.start("load_binary");
857 ClusterUnitDatabase budb =
858 new ClusterUnitDatabase(
859 new URL("file:./cmu_time_awb.bin"), true);
860 timer.stop("load_binary");
862 timer.start("compare");
863 if (udb.compare(budb)) {
864 System.out.println("other compare ok");
866 System.out.println("other compare different");
868 timer.stop("compare");
869 } else if (args[i].equals("-showtimes")) {
872 System.out.println("Unknown option " + args[i]);
877 timer.show("ClusterUnitDatabase");
880 System.out.println("Options: ");
881 System.out.println(" -src path");
882 System.out.println(" -dest path");
883 System.out.println(" -compare");
884 System.out.println(" -generate_binary");
885 System.out.println(" -showTimes");
887 } catch (IOException ioe) {
888 System.err.println(ioe);
894 * Represents a unit for the cluster database.
896 class DatabaseClusterUnit {
908 * @param type the name of the unit
909 * @param phone the name of the unit
910 * @param start the starting frame
911 * @param end the ending frame
912 * @param prev the previous index
913 * @param next the next index
915 DatabaseClusterUnit(int type, int phone, int start,
916 int end, int prev, int next) {
926 * Creates a unit by reading it from the given byte buffer.
928 * @param bb source of the DatabaseClusterUnit data
930 * @throws IOException if an IO error occurs
932 DatabaseClusterUnit(ByteBuffer bb) throws IOException {
933 this.type = bb.getInt();
934 this.phone = bb.getInt();
935 this.start = bb.getInt();
936 this.end = bb.getInt();
937 this.prev = bb.getInt();
938 this.next = bb.getInt();
942 * Creates a unit by reading it from the given input stream.
944 * @param is source of the DatabaseClusterUnit data
946 * @throws IOException if an IO error occurs
948 DatabaseClusterUnit(DataInputStream is) throws IOException {
949 this.type = is.readInt();
950 this.phone = is.readInt();
951 this.start = is.readInt();
952 this.end = is.readInt();
953 this.prev = is.readInt();
954 this.next = is.readInt();
958 * Returns the name of the unit.
963 return unitTypes[type].getName();
967 * Dumps this unit to the given output stream.
969 * @param os the output stream
971 * @throws IOException if an error occurs.
973 void dumpBinary(DataOutputStream os) throws IOException {
984 * Represents debug information about the origin of a unit.
986 class UnitOriginInfo {
993 * Displays an error message
995 * @param s the error message
997 private void error(String s) {
998 System.out.println("ClusterUnitDatabase Error: " + s);
1003 * Represents a unit type in the system
1006 private String name;
1011 * Constructs a UnitType from the given parameters
1013 * @param name the name of the type
1014 * @param start the starting index for this type
1015 * @param count the number of elements for this type
1017 UnitType(String name, int start, int count) {
1024 * Creates a unit type by reading it from the given input stream.
1026 * @param is source of the UnitType data
1028 * @throws IOException if an IO error occurs
1030 UnitType(DataInputStream is) throws IOException {
1031 this.name = Utilities.getString(is);
1032 this.start = is.readInt();
1033 this.count = is.readInt();
1037 * Creates a unit type by reading it from the given byte buffer.
1039 * @param bb source of the UnitType data
1041 * @throws IOException if an IO error occurs
1043 UnitType(ByteBuffer bb) throws IOException {
1044 this.name = Utilities.getString(bb);
1045 this.start = bb.getInt();
1046 this.count = bb.getInt();
1050 * Gets the name for this unit type
1052 * @return the name for the type
1059 * Gets the start index for this type
1061 * @return the start index
1068 * Gets the count for this type
1070 * @return the count for this type
1077 * Dumps this unit to the given output stream.
1079 * @param os the output stream
1081 * @throws IOException if an error occurs.
1083 void dumpBinary(DataOutputStream os) throws IOException {
1084 Utilities.outString(os, name);