2 * Portions Copyright 2001-2003 Sun Microsystems, Inc.
\r
3 * Portions Copyright 1999-2001 Language Technologies Institute,
\r
4 * Carnegie Mellon University.
\r
5 * All Rights Reserved. Use is subject to license terms.
\r
7 * See the file "license.terms" for information on usage and
\r
8 * redistribution of this file, and for a DISCLAIMER OF ALL
\r
11 package com.sun.speech.freetts.clunits;
\r
12 import java.io.IOException;
\r
13 import java.net.URL;
\r
14 import java.util.logging.Level;
\r
15 import java.util.logging.Logger;
\r
17 import com.sun.speech.freetts.FeatureSet;
\r
18 import com.sun.speech.freetts.FeatureSetImpl;
\r
19 import com.sun.speech.freetts.Item;
\r
20 import com.sun.speech.freetts.PathExtractor;
\r
21 import com.sun.speech.freetts.PathExtractorImpl;
\r
22 import com.sun.speech.freetts.ProcessException;
\r
23 import com.sun.speech.freetts.Relation;
\r
24 import com.sun.speech.freetts.Utterance;
\r
25 import com.sun.speech.freetts.UtteranceProcessor;
\r
26 import com.sun.speech.freetts.Voice;
\r
27 import com.sun.speech.freetts.cart.CART;
\r
28 import com.sun.speech.freetts.clunits.ClusterUnitDatabase.UnitOriginInfo;
\r
29 import com.sun.speech.freetts.relp.Sample;
\r
30 import com.sun.speech.freetts.relp.SampleInfo;
\r
31 import com.sun.speech.freetts.relp.SampleSet;
\r
33 import de.dfki.lt.freetts.ClusterUnitNamer;
\r
37 * Generates the Unit Relation of an Utterance from the
\r
41 public class ClusterUnitSelector implements UtteranceProcessor {
\r
42 /** Logger instance. */
\r
43 private static final Logger LOGGER =
\r
44 Logger.getLogger(ClusterUnitSelector.class.getName());
\r
46 private final static PathExtractor DNAME = new PathExtractorImpl(
\r
47 "R:SylStructure.parent.parent.name", true);
\r
48 private ClusterUnitDatabase clunitDB;
\r
49 private ClusterUnitNamer unitNamer;
\r
52 * Constructs a ClusterUnitSelector.
\r
54 * @param url the URL for the unit database. If the URL path ends
\r
55 * with a '.bin' it is assumed that the DB is a binary database,
\r
56 * otherwise, its assumed that its a text database1
\r
58 * @throws IOException if an error occurs while loading the
\r
62 public ClusterUnitSelector(URL url) throws IOException {
\r
67 * Constructs a ClusterUnitSelector.
\r
69 * @param url the URL for the unit database. If the URL path ends
\r
70 * with a '.bin' it is assumed that the DB is a binary database,
\r
71 * otherwise, its assumed that its a text database1
\r
72 * @param unitNamer an optional unit namer, specifying how the cluster
\r
73 * units are called in the voice database referenced by url. If this is null,
\r
74 * an ldom unit naming scheme will be used (e.g., 'ae_afternoon' for the
\r
75 * phoneme 'ae' in the word 'afternoon'.
\r
77 * @throws IOException if an error occurs while loading the
\r
81 public ClusterUnitSelector(URL url, ClusterUnitNamer unitNamer) throws IOException {
\r
83 throw new IOException("Can't load cluster unit database");
\r
85 boolean binary = url.getPath().endsWith(".bin");
\r
86 clunitDB = new ClusterUnitDatabase(url, binary);
\r
87 this.unitNamer = unitNamer;
\r
93 * Get the sample info for the underlying database.
\r
94 * @return the sample info object
\r
96 public SampleInfo getSampleInfo() {
\r
97 return clunitDB.getSampleInfo();
\r
101 * Generates the Unit Relation from the Segment Relation.
\r
102 * <br><b>Implementation note:</b><br>
\r
103 * Populates the segment relation with segment names of the form:
\r
104 * XX_YY where XX is the segment name (typically a phoneme)
\r
105 * and YY is the word that the segment is in (stripped and
\r
108 * The first step in cluster unit selection is to determine the unit
\r
109 * type for each unit in the utterance. The unit type for
\r
110 * selection in the simple talking clock example (cmu_time_awb) is
\r
111 * done per phone. The unit type consists of the phone
\r
112 * name followed by the word the phone comes from (e.g., n_now for
\r
113 * the phone 'n' in the word 'now').
\r
115 * Invoke the Viterbi algorithm (via a viterbi class) that
\r
116 * selects the proper units for the segment and adds that to
\r
117 * each segment item.
\r
119 * For each segment, create a unit and attach features based
\r
120 * upon the selected units.
\r
122 * @param utterance the utterance to generate the Unit Relation
\r
124 * @throws ProcessException if an IOException is thrown during the
\r
125 * processing of the utterance
\r
128 public void processUtterance(Utterance utterance) throws ProcessException {
\r
130 Relation segs = utterance.getRelation(Relation.SEGMENT);
\r
132 utterance.setObject(SampleInfo.UTT_NAME,
\r
133 clunitDB.getSampleInfo());
\r
134 utterance.setObject("sts_list", clunitDB.getSts());
\r
136 vd = new Viterbi(segs, clunitDB);
\r
138 for (Item s = segs.getHead(); s != null; s = s.getNext()) {
\r
142 // Carry out the CART lookup for the target costs, and the viterbi
\r
143 // search for finding the best path (join costs) through the candidates.
\r
146 // Now associate the candidate units in the best path
\r
147 // with the items in the segment relation.
\r
148 if (!vd.result("selected_unit")) {
\r
149 LOGGER.severe("clunits: can't find path");
\r
153 // If optimal coupling was used, the join points must now be copied
\r
154 // from the path elements to the actual items in the segment relation.
\r
155 vd.copyFeature("unit_prev_move");
\r
156 vd.copyFeature("unit_this_move");
\r
158 // Based on this data, create a Unit relation giving the details of the
\r
159 // units to concatenate.
\r
160 Relation unitRelation = utterance.createRelation(Relation.UNIT);
\r
162 for (Item s = segs.getHead(); s != null; s = s.getNext()) {
\r
163 Item unit = unitRelation.appendItem();
\r
164 FeatureSet unitFeatureSet = unit.getFeatures();
\r
165 int unitEntry = s.getFeatures().getInt("selected_unit");
\r
167 // The item name is the segment name
\r
168 unitFeatureSet.setString("name", s.getFeatures().getString("name"));
\r
172 String clunitName = s.getFeatures().getString("clunit_name");
\r
174 if (s.getFeatures().isPresent("unit_this_move")) {
\r
175 unitStart = s.getFeatures().getInt("unit_this_move");
\r
177 unitStart = clunitDB.getStart(unitEntry);
\r
180 if (s.getNext() != null &&
\r
181 s.getNext().getFeatures().isPresent("unit_prev_move")) {
\r
182 unitEnd = s.getNext().getFeatures().getInt("unit_prev_move");
\r
184 unitEnd = clunitDB.getEnd(unitEntry);
\r
187 unitFeatureSet.setInt("unit_entry", unitEntry);
\r
188 ClusterUnit clunit = new ClusterUnit(clunitDB,
\r
189 clunitName, unitStart, unitEnd);
\r
190 unitFeatureSet.setObject("unit", clunit);
\r
192 unitFeatureSet.setInt("unit_start", clunit.getStart());
\r
193 unitFeatureSet.setInt("unit_end", clunit.getEnd());
\r
194 unitFeatureSet.setInt("instance", unitEntry - clunitDB.getUnitIndex(clunitName, 0));
\r
195 } // add the rest of these things for debugging.
\r
197 if (LOGGER.isLoggable(Level.FINE)) {
\r
198 LOGGER.fine(" sr " + clunitDB.getSampleInfo().getSampleRate() + " " +
\r
199 s.getFeatures().getFloat("end") + " " +
\r
200 (int) (s.getFeatures().getFloat("end") *
\r
201 clunitDB.getSampleInfo().getSampleRate()));
\r
203 unitFeatureSet.setInt("target_end",
\r
204 (int) (s.getFeatures().getFloat("end")
\r
205 * clunitDB.getSampleInfo().getSampleRate()));
\r
207 // Associate debug info about unit origin if available:
\r
208 UnitOriginInfo unitOrigin = clunitDB.getUnitOriginInfo(unitEntry);
\r
209 if (unitOrigin != null) {
\r
210 unitFeatureSet.setString("origin", unitOrigin.originFile);
\r
211 unitFeatureSet.setFloat("origin_start", unitOrigin.originStart);
\r
212 unitFeatureSet.setFloat("origin_end", unitOrigin.originEnd);
\r
220 * Sets the cluster unit name given the segment.
\r
222 * @param seg the segment item that gets the name
\r
224 protected void setUnitName(Item seg) {
\r
225 if (unitNamer != null) {
\r
226 unitNamer.setUnitName(seg);
\r
229 // default to LDOM naming scheme 'ae_afternoon':
\r
230 String cname = null;
\r
232 String segName = seg.getFeatures().getString("name");
\r
234 Voice voice = seg.getUtterance().getVoice();
\r
235 String silenceSymbol = voice.getPhoneFeature("silence", "symbol");
\r
236 if (silenceSymbol == null)
\r
237 silenceSymbol = "pau";
\r
238 if (segName.equals(silenceSymbol)) {
\r
239 cname = silenceSymbol + "_" + seg.findFeature("p.name");
\r
241 // remove single quotes from name
\r
242 String dname = ((String) DNAME.findFeature(seg)).toLowerCase();
\r
243 cname = segName + "_" + stripQuotes(dname);
\r
245 seg.getFeatures().setString("clunit_name", cname);
\r
250 * Strips quotes from the given string.
\r
252 * @param s the string to strip quotes from
\r
254 * @return a string with all single quotes removed
\r
256 private String stripQuotes(String s) {
\r
257 StringBuffer sb = new StringBuffer(s.length());
\r
258 for (int i = 0; i < s.length(); i++) {
\r
259 char c = s.charAt(i);
\r
264 return sb.toString();
\r
269 * Retrieves the string representation of this object.
\r
271 * @return the string representation of this object
\r
273 public String toString() {
\r
274 return "ClusterUnitSelector";
\r
278 * Provides support for the Viterbi Algorithm.
\r
280 * Implementation Notes
\r
282 * For each candidate for the current unit, calculate the cost
\r
283 * between it and the first candidate in the next unit. Save
\r
284 * only the path that has the least cost. By default, if two
\r
285 * candidates come from units that are adjacent in the
\r
286 * database, the cost is 0 (i.e., they were spoken together,
\r
287 * so they are a perfect match).
\r
290 * Repeat the previous process for each candidate in the next
\r
291 * unit, creating a list of least cost paths between the
\r
292 * candidates between the current unit and the unit following
\r
296 * Toss out all candidates in the current unit that are not
\r
297 * included in a path.
\r
300 * Move to the next unit and repeat the process.
\r
302 static class Viterbi {
\r
303 private int numStates = -1;
\r
304 private boolean bigIsGood = false;
\r
305 private ViterbiPoint timeline = null;
\r
306 private ViterbiPoint lastPoint = null;
\r
307 private FeatureSet f = null;
\r
308 private ClusterUnitDatabase clunitDB;
\r
311 * Creates a Viterbi class to process the given utterance.
\r
312 * A queue of ViterbiPoints corresponding to the Items in the Relation segs
\r
316 public Viterbi(Relation segs, ClusterUnitDatabase db) {
\r
317 ViterbiPoint last = null;
\r
319 f = new FeatureSetImpl();
\r
320 for (Item s = segs.getHead(); true; s = s.getNext()) {
\r
321 ViterbiPoint n = new ViterbiPoint(s);
\r
322 // The number of ViterbiPaths associated with each ViterbiPoint
\r
323 // is determined using the variable numStates.
\r
324 // TODO: Where can numStates be set?
\r
325 if (numStates > 0) {
\r
326 n.initPathArray(numStates);
\r
328 if (last != null) { // continue to build up the queue
\r
330 } else { // timeline is the start of the queue
\r
335 if (s == null) { // no further segments, leave loop
\r
341 if (LOGGER.isLoggable(Level.FINE)) {
\r
342 LOGGER.fine("num states " + numStates);
\r
345 if (numStates == 0) { // its a general beam search
\r
346 timeline.paths = new ViterbiPath();
\r
349 if (numStates == -1) { // dynamic number of states (# cands)
\r
350 timeline.initPathArray(1);
\r
355 * Sets the given feature to the given value.
\r
357 * @param name the name of the feature
\r
358 * @param obj the new value.
\r
360 public void setFeature(String name, Object obj) {
\r
361 f.setObject(name, obj);
\r
365 * Gets the value for the given feature.
\r
367 * @param name the name of the feature
\r
369 * @return the value of the feature
\r
371 public Object getFeature(String name) {
\r
372 return f.getObject(name);
\r
376 * Carry out a Viterbi search in for a prepared queue of ViterbiPoints.
\r
377 * In a nutshell, each Point represents a target item (a target segment);
\r
378 * for each target Point, a number of Candidate units in the voice database
\r
379 * are determined; a Path structure is built up, based on local best transitions.
\r
380 * Concretely, a Path consists of a (possibly empty) previous Path, a current Candidate,
\r
381 * and a Score. This Score is a quality measure of the Path; it is calculated as the
\r
382 * sum of the previous Path's score, the Candidate's score, and the Cost of joining
\r
383 * the Candidate to the previous Path's Candidate. At each step, only one Path
\r
384 * leading to each Candidate is retained, viz. the Path with the best Score.
\r
385 * All that is left to do is to call result() to get the best-rated
\r
386 * path from among the paths associated with the last Point, and to associate
\r
387 * the resulting Candidates with the segment items they will realise.
\r
390 for (ViterbiPoint p = timeline; p.next != null; p = p.next) {
\r
391 // The candidates for the current item:
\r
392 p.cands = getCandidate(p.item);
\r
393 if (LOGGER.isLoggable(Level.FINE)) {
\r
394 LOGGER.fine("decode " + p.cands);
\r
396 if (numStates != 0) {
\r
397 if (numStates == -1) {
\r
398 // put as many (empty) path elements into p.next as there are candidates in p
\r
399 p.next.initDynamicPathArray(p.cands);
\r
402 // Now go through all existing paths and all candidates for the current item;
\r
403 // tentatively extend each existing path to each of the candidates,
\r
404 // but only retain the
\r
405 // Attention: p.numStates is not numStates!
\r
406 // numStates = a general flag indicating which type of viterbi search
\r
407 // to use (only -1 seems to be implemented);
\r
408 // p.numStates = the number of paths in p.statePaths, i.e. p.numStates==p.statePaths.length
\r
409 for (int i = 0; i < p.numStates; i++) {
\r
410 if ((p == timeline && i == 0) ||
\r
411 (p.statePaths[i] != null)) {
\r
412 // We are at the very beginning of the search, or have a usable path to extend
\r
413 // debug(" dc p " + p);
\r
414 for (ViterbiCandidate c = p.cands;
\r
415 c != null; c = c.next) {
\r
416 // For the candidate c, create a path extending the previous path
\r
417 // p.statePaths[i] to that candidate:
\r
418 ViterbiPath np = getPath(p.statePaths[i], c);
\r
419 // Compare this path to the existing best path (if any) leading to
\r
420 // candidate c; only retain the one with the better score.
\r
421 // TODO: why should the paths leading to the candidates realising p
\r
422 // be stored in p.next?
\r
423 addPaths(p.next, np);
\r
428 System.err.println(
\r
429 "Viterbi.decode: general beam search not implemented");
\r
436 * Try to add paths to the given point.
\r
438 * @param point the point to add the paths to
\r
439 * @param paths the path
\r
441 void addPaths(ViterbiPoint point, ViterbiPath path) {
\r
442 ViterbiPath nextPath;
\r
443 for (ViterbiPath p = path; p != null; p = nextPath) {
\r
450 * Add the new path to the state path if it is
\r
451 * better than the current path. In this, state means
\r
452 * the position of the candidate associated with this
\r
453 * path in the candidate queue
\r
454 * for the corresponding segment item. In other words,
\r
455 * this method uses newPath as the one path leading to
\r
456 * the candidate newPath.candidate, if it has a better
\r
457 * score than the previously best path leading to that candidate.
\r
459 * @param point where the path is added
\r
460 * @param newPath the path to add if its score is best
\r
462 void addPath(ViterbiPoint point, ViterbiPath newPath) {
\r
463 if (point.statePaths[newPath.state] == null) {
\r
464 // we don't have one yet, so this is best
\r
465 point.statePaths[newPath.state] = newPath;
\r
466 } else if (isBetterThan(newPath.score,
\r
467 point.statePaths[newPath.state].score)) {
\r
468 // its better than what we already have
\r
469 point.statePaths[newPath.state] = newPath;
\r
471 // its not better that what we already have
\r
472 // so we just forget about it.
\r
477 * See if a is better than b. Goodness is defined
\r
480 * @param a value to check
\r
481 * @param b value to check.
\r
483 * return true if a is better than b.
\r
485 private boolean isBetterThan(int a, int b) {
\r
494 * Find the best path through the decoder, adding the feature
\r
495 * name to the candidate.
\r
497 * @param feature the feature to add
\r
498 * @return true if a best path was found
\r
500 boolean result(String feature) {
\r
503 if (timeline == null || timeline.next == null) {
\r
504 return true; // null case succeeds
\r
506 path = findBestPath();
\r
508 if (path == null) {
\r
512 for (; path != null; path = path.from) {
\r
513 if (path.candidate != null) {
\r
514 path.candidate.item.getFeatures().setObject(feature,
\r
515 path.candidate.value);
\r
522 * Given a feature, copy the value associated with feature
\r
523 * name from the path to each item in the path.
\r
525 * @param feature the name of the feature.
\r
527 void copyFeature(String feature) {
\r
528 ViterbiPath path = findBestPath();
\r
529 if (path == null) {
\r
530 return; // nothing to copy, empty stream or no solution
\r
533 for (; path != null; path = path.from) {
\r
534 if (path.candidate != null && path.isPresent(feature)) {
\r
535 path.candidate.item.getFeatures().setObject(feature,
\r
536 path.getFeature(feature));
\r
542 * Finds the best (queue of) candidate(s) for a given (segment) item.
\r
543 * This traverses a CART tree for target cluster selection as described in
\r
544 * the paper introducing the clunits algorithm. This corresponds to the
\r
545 * "target costs" described for general unit selection.
\r
546 * @return the first candidate in the queue of candidate units for this item.
\r
548 private ViterbiCandidate getCandidate(Item item) {
\r
549 // TODO: This should better be called getCandidates() (plural form),
\r
550 // because what it does is find all the candidates for the item
\r
551 // and return the head of the queue.
\r
552 String unitType = item.getFeatures().getString("clunit_name");
\r
553 CART cart = clunitDB.getTree(unitType);
\r
554 // Here, the unit candidates are selected.
\r
555 int[] clist = (int[]) cart.interpret(item);
\r
556 // Now, clist is an array of instance numbers for the units of type
\r
557 // unitType that belong to the best cluster according to the CART.
\r
559 ViterbiCandidate p;
\r
560 ViterbiCandidate all;
\r
561 ViterbiCandidate gt;
\r
564 for (int i = 0; i < clist.length; i++) {
\r
565 p = new ViterbiCandidate();
\r
566 p.next = all; // link them reversely: the first in clist will be at the end of the queue
\r
567 p.item = item; // The item is the same for all these candidates in the queue.
\r
569 // remember the absolute unit index:
\r
570 p.setInt(clunitDB.getUnitIndex(unitType, clist[i]));
\r
573 if (LOGGER.isLoggable(Level.FINE)) {
\r
574 LOGGER.fine(" gc adding " + clist[i]);
\r
578 // Take into account candidates for previous item?
\r
579 // Depending on the setting of EXTEND_SELECTIONS in the database,
\r
580 // look the first candidates for the preceding item,
\r
581 // and add the units following these (which are not yet candidates)
\r
582 // as candidates. EXTEND_SELECTIONS indicates how many of these
\r
583 // are added. A high setting will add candidates which don't fit the
\r
584 // target well, but which can be smoothly concatenated with the context.
\r
585 // In a sense, this means trading target costs against join costs.
\r
586 if (clunitDB.getExtendSelections() > 0 &&
\r
587 item.getPrevious() != null) {
\r
588 // Get the candidates for the preceding (segment) item
\r
589 ViterbiCandidate lc = (ViterbiCandidate) (item.
\r
590 getPrevious().getFeatures().getObject("clunit_cands"));
\r
591 if (LOGGER.isLoggable(Level.FINE)) {
\r
592 LOGGER.fine(" lc " + lc);
\r
594 for (int e = 0; lc != null &&
\r
595 (e < clunitDB.getExtendSelections());
\r
597 int nu = clunitDB.getNextUnit(lc.ival);
\r
598 if (LOGGER.isLoggable(Level.FINE)) {
\r
599 LOGGER.fine(" e: " + e + " nu: " + nu);
\r
601 if (nu == ClusterUnitDatabase.CLUNIT_NONE) {
\r
605 // Look through the list of candidates for the current item:
\r
606 for (gt = all; gt != null; gt = gt.next) {
\r
607 if (LOGGER.isLoggable(Level.FINE)) {
\r
608 LOGGER.fine(" gt " + gt.ival + " nu " + nu);
\r
610 if (nu == gt.ival) {
\r
611 // The unit following one of the candidates for the preceding
\r
612 // item already is a candidate for the current item.
\r
617 if (LOGGER.isLoggable(Level.FINE)) {
\r
618 LOGGER.fine("nu " + clunitDB.getUnit(nu).getName() + " all " +
\r
619 clunitDB.getUnit(all.ival).getName() +
\r
622 if ((gt == null)&&clunitDB.isUnitTypeEqual(nu, all.ival)) {
\r
623 // nu is of the right unit type and is not yet one of the candidates.
\r
624 // add it to the queue of candidates for the current item:
\r
625 p = new ViterbiCandidate();
\r
635 item.getFeatures().setObject("clunit_cands", all);
\r
640 * Construct a new path element linking a previous path to the given candidate.
\r
641 * The (penalty) score associated with the new path is calculated as the sum of
\r
642 * the score of the old path plus the score of the candidate itself plus the
\r
643 * join cost of appending the candidate to the nearest candidate in the given path.
\r
644 * This join cost takes into account optimal coupling if the database has
\r
645 * OPTIMAL_COUPLING set to 1. The join position is saved in the new path, as
\r
646 * the features "unit_prev_move" and "unit_this_move".
\r
648 * @param path the previous path, or null if this candidate starts a new path
\r
649 * @param candiate the candidate to add to the path
\r
651 * @return a new path, consisting of this candidate appended to the previous path, and
\r
652 * with the cumulative (penalty) score calculated.
\r
654 private ViterbiPath getPath(ViterbiPath path,
\r
655 ViterbiCandidate candidate) {
\r
657 ViterbiPath newPath = new ViterbiPath();
\r
659 newPath.candidate = candidate;
\r
660 newPath.from = path;
\r
662 // Flite 1.1 has some logic here to test to see
\r
663 // if the unit database is fully populated or not and if not
\r
664 // load fixed residuals and calculate distance with a
\r
665 // different distance algorithm that is designed for fixed
\r
666 // point. FreeTTS doesn't really need to do that.
\r
669 if (path == null || path.candidate == null) {
\r
672 int u0 = path.candidate.ival;
\r
673 int u1 = candidate.ival;
\r
674 if (clunitDB.getOptimalCoupling() == 1) {
\r
675 Cost oCost = getOptimalCouple(u0, u1);
\r
676 if (oCost.u0Move != -1) {
\r
677 newPath.setFeature("unit_prev_move", new
\r
678 Integer(oCost.u0Move));
\r
680 if (oCost.u1Move != -1) {
\r
681 newPath.setFeature("unit_this_move", new
\r
682 Integer(oCost.u1Move));
\r
685 } else if (clunitDB.getOptimalCoupling() == 2) {
\r
686 cost = getOptimalCoupleFrame(u0, u1);
\r
692 // cost *= clunitDB.getContinuityWeight();
\r
693 cost *= 5; // magic number ("continuity weight") from flite
\r
694 // TODO: remove the state attribute from ViterbiPath, as it is simply path.candidate.pos!
\r
695 newPath.state = candidate.pos;
\r
696 if (path == null) {
\r
697 newPath.score = cost + candidate.score;
\r
699 newPath.score = cost + candidate.score + path.score;
\r
706 * Find the best path. This requires decode() to have been run.
\r
708 * @return the best path.
\r
710 private ViterbiPath findBestPath() {
\r
714 ViterbiPath bestPath = null;
\r
717 worst = Integer.MIN_VALUE;
\r
719 worst = Integer.MAX_VALUE;
\r
724 // TODO: do not need t, can use lastPoint throughout
\r
727 if (numStates != 0) {
\r
728 if (LOGGER.isLoggable(Level.FINE)) {
\r
729 LOGGER.fine("fbp ns " + numStates + " t "
\r
730 + t.numStates + " best " + best);
\r
732 // All paths end in lastPoint, and take into account
\r
733 // previous path segment's scores. Therefore, it is
\r
734 // sufficient to find the best path from among the
\r
735 // paths for lastPoint.
\r
736 for (int i = 0; i < t.numStates; i++) {
\r
737 if (t.statePaths[i] != null &&
\r
738 (isBetterThan(t.statePaths[i].score, best))) {
\r
739 best = t.statePaths[i].score;
\r
740 bestPath = t.statePaths[i];
\r
748 * Find the optimal coupling frame for a pair of units.
\r
750 * @param u0 first unit to try
\r
751 * @param u1 second unit to try
\r
753 * @return the cost for this coupling, including the best coupling frame
\r
755 Cost getOptimalCouple(int u0, int u1) {
\r
759 int u0_st, u1_p_st, u0_end, u1_p_end;
\r
760 int best_u0, best_u1_p;
\r
761 int dist, best_val;
\r
762 Cost cost = new Cost();
\r
764 u1_p = clunitDB.getPrevUnit(u1);
\r
766 // If u0 precedes u1, the cost is 0, and we're finished.
\r
772 // If u1 does not have a previous unit, or that previous
\r
773 // unit does not belong to the same phone, the optimal
\r
774 // couple frame must be found between u0 and u1.
\r
775 if (u1_p == ClusterUnitDatabase.CLUNIT_NONE ||
\r
776 clunitDB.getPhone(u0) !=
\r
777 clunitDB.getPhone(u1_p)) {
\r
778 cost.cost = 10 * getOptimalCoupleFrame(u0, u1);
\r
782 // If u1 has a valid previous unit, try to find the optimal
\r
783 // couple point between u0 and that previous unit, u1_p.
\r
785 // Find out which of u1_p and u0 is shorter.
\r
786 // In both units, we plan to start from one third of the unit length,
\r
787 // and to compare frame coupling frame by frame until the end of the
\r
788 // shorter unit is reached.
\r
789 u0_end = clunitDB.getEnd(u0) - clunitDB.getStart(u0);
\r
790 u1_p_end = clunitDB.getEnd(u1_p) - clunitDB.getStart(u1_p);
\r
791 u0_st = u0_end / 3;
\r
792 u1_p_st = u1_p_end / 3;
\r
794 if ((u0_end - u0_st) < (u1_p_end - u1_p_st)) {
\r
795 fcount = u0_end - u0_st;
\r
796 // We could now shift the starting point for coupling in the longer unit
\r
797 // so that the distance from the end is the same in both units:
\r
798 /* u1_p_st = u1_p_end - fcount; */
\r
800 fcount = u1_p_end - u1_p_st;
\r
801 // We could now shift the starting point for coupling in the longer unit
\r
802 // so that the distance from the end is the same in both units:
\r
803 /* u0_st = u0_end - fcount; */
\r
806 // Now go through the two units, and search for the frame pair where
\r
807 // the acoustic distance is smallest.
\r
809 best_u1_p = u1_p_end;
\r
810 best_val = Integer.MAX_VALUE;
\r
812 for (i = 0; i < fcount; ++i) {
\r
813 a = clunitDB.getStart(u0)+ u0_st + i;
\r
814 b = clunitDB.getStart(u1_p) + u1_p_st + i;
\r
815 dist = getFrameDistance(a, b,
\r
816 clunitDB.getJoinWeights(),
\r
817 clunitDB.getMcep().getSampleInfo().getNumberOfChannels())
\r
818 + Math.abs( clunitDB.getSts().getFrameSize(a) -
\r
819 clunitDB.getSts().getFrameSize(b)) *
\r
820 clunitDB.getContinuityWeight();
\r
822 if (dist < best_val) {
\r
824 best_u0 = u0_st + i;
\r
825 best_u1_p = u1_p_st + i;
\r
829 // u0Move is the new end for u0
\r
830 // u1Move is the new start for u1
\r
831 cost.u0Move = clunitDB.getStart(u0) + best_u0;
\r
832 cost.u1Move = clunitDB.getStart(u1_p) + best_u1_p;
\r
833 cost.cost = 30000 + best_val;
\r
838 * Returns the distance between the successive potential
\r
841 * @param u0 the first unit to try
\r
842 * @param u1 the second unit to try
\r
844 * @return the distance between the two units
\r
846 int getOptimalCoupleFrame(int u0, int u1) {
\r
849 if (clunitDB.getPrevUnit(u1) == u0) {
\r
850 return 0; // consecutive units win
\r
853 if (clunitDB.getNextUnit(u0) != ClusterUnitDatabase.CLUNIT_NONE) {
\r
854 a = clunitDB.getEnd(u0);
\r
855 } else { // don't want to do this but it's all that is left to do
\r
856 a = clunitDB.getEnd(u0) - 1; // if num frames < 1 this is bad
\r
858 b = clunitDB.getStart(u1);
\r
860 return getFrameDistance(a, b,
\r
861 clunitDB.getJoinWeights(),
\r
862 clunitDB.getMcep().getSampleInfo().getNumberOfChannels())
\r
863 + Math.abs( clunitDB.getSts().getFrameSize(a) -
\r
864 clunitDB.getSts().getFrameSize(b)) *
\r
865 clunitDB.getContinuityWeight();
\r
869 * Get the 'distance' between the frames a and b.
\r
871 * @param a first frame
\r
872 * @param b second frame
\r
873 * @param joinWeights the weights used in comparison
\r
874 * @param order number of compares
\r
876 * @return the distance between the frames
\r
878 public int getFrameDistance(int a, int b, int[] joinWeights,int order) {
\r
880 if (LOGGER.isLoggable(Level.FINE)) {
\r
881 LOGGER.fine(" gfd a " + a + " b " + b + " or " + order);
\r
884 short[] bv = clunitDB.getMcep().getSample(b).getFrameData();
\r
885 short[] av = clunitDB.getMcep().getSample(a).getFrameData();
\r
887 for (r = 0, i = 0; i < order; i++) {
\r
888 int diff = av[i] - bv[i];
\r
889 r += Math.abs(diff) * joinWeights[i] / 65536;
\r
898 * Represents a point in the Viterbi path. A point corresponds to an item,
\r
900 * Each ViterbiPoint knows
\r
901 * about its next ViterbiPoint, i.e. they can form a queue.
\r
903 static class ViterbiPoint {
\r
905 // TODO: remove the numStates attribute from ViterbiPoint, as this is only statePaths.length
\r
908 ViterbiCandidate cands = null;
\r
909 ViterbiPath paths = null;
\r
910 ViterbiPath[] statePaths = null;
\r
911 ViterbiPoint next = null;
\r
914 * Creates a ViterbiPoint for the given item. A typical item of choice is a Segment item.
\r
916 * @param item the item of interest
\r
918 public ViterbiPoint(Item item) {
\r
923 * Initialize the path array to the given size.
\r
925 * @param size the size of the path array
\r
927 public void initPathArray(int size) {
\r
928 if (LOGGER.isLoggable(Level.FINE)) {
\r
929 LOGGER.fine("init_path_array: " + size);
\r
932 statePaths = new ViterbiPath[size];
\r
936 * Initializes the dynamic path array. The path array will have
\r
937 * as many ViterbiPath members as there are candidates in the
\r
938 * queue starting with candidate.
\r
939 * Side effect on parameter: This will set the pos member of the
\r
940 * candidates in the queue starting with candidate to the position
\r
943 * @param candidate the first candidate of interest
\r
945 public void initDynamicPathArray(ViterbiCandidate candidate) {
\r
947 for (ViterbiCandidate cc = candidate; cc != null;
\r
948 i++, cc = cc.next) {
\r
951 if (LOGGER.isLoggable(Level.FINE)) {
\r
952 LOGGER.fine("init_dynamic_ path_array: " + i);
\r
957 public String toString() {
\r
958 return " pnt: " + numStates + " paths " + numPaths;
\r
963 * Represents a candidate for the Viterbi algorthm.
\r
964 * Each candidate knows about its next candidate, i.e. they can form
\r
967 static class ViterbiCandidate {
\r
969 Object value = null;
\r
973 ViterbiCandidate next = null;
\r
976 * Sets the object for this candidate.
\r
978 * @param obj the object
\r
980 void set(Object obj) {
\r
985 * Sets the integer value for this candidate. This can be used for saving
\r
986 * the unit index of the candidate unit represented by this ViterbiCandidate.
\r
988 * @param ival the integer value
\r
990 void setInt(int ival) {
\r
992 set(new Integer(ival));
\r
996 * Converts this object to a string.
\r
998 * @return the string form of this object
\r
1000 public String toString() {
\r
1001 return "VC: Score " + score + " ival " + ival + " Pos " + pos;
\r
1006 * Describes a Viterbi path.
\r
1008 static class ViterbiPath {
\r
1011 ViterbiCandidate candidate = null;
\r
1012 private FeatureSet f = null;
\r
1013 ViterbiPath from = null;
\r
1014 ViterbiPath next = null;
\r
1017 * Sets a feature with the given name to the given value.
\r
1019 * @param name the name of the feature
\r
1020 * @param value the new value for the feature
\r
1022 void setFeature(String name, Object value) {
\r
1024 f = new FeatureSetImpl();
\r
1026 f.setObject(name, value);
\r
1030 * Retrieves a feature.
\r
1032 * @param name the name of the feature
\r
1034 * @return the feature
\r
1036 Object getFeature(String name) {
\r
1037 Object value = null;
\r
1039 value = f.getObject(name);
\r
1045 * Determines if the feature with the given name
\r
1048 * @param name the feature to look for
\r
1050 * @return <code>true</code> if the feature is present;
\r
1051 * otherwise <code>false</code>.
\r
1053 boolean isPresent(String name) {
\r
1057 return getFeature(name) != null;
\r
1062 * Converts this object to a string.
\r
1064 * @return the string form of this object
\r
1066 public String toString() {
\r
1067 return "ViterbiPath score " + score + " state " + state;
\r
1074 * Information returned from getOptimalCoupling.
\r
1086 class ClusterUnit implements com.sun.speech.freetts.Unit {
\r
1088 private ClusterUnitDatabase db;
\r
1089 private String name;
\r
1090 private int start;
\r
1094 * Contructs a cluster unit given.
\r
1096 * @param db the database
\r
1097 * @param name the unitName
\r
1098 * @param start the start
\r
1099 * @param end the end
\r
1101 public ClusterUnit(ClusterUnitDatabase db, String name, int start,int end) {
\r
1103 this.start = start;
\r
1110 * Returns the start.
\r
1112 * @return the start
\r
1114 public int getStart() {
\r
1119 * Returns the end.
\r
1123 public int getEnd() {
\r
1128 * Returns the name of this Unit.
\r
1130 * @return the name of this unit
\r
1132 public String getName() {
\r
1137 * returns the size of the unit.
\r
1139 * @return the size of the unit
\r
1141 public int getSize() {
\r
1142 return db.getSts().getUnitSize(start, end);
\r
1146 * Retrieves the nearest sample.
\r
1148 * @param index the ideal index
\r
1150 * @return the nearest Sample
\r
1152 public Sample getNearestSample(float index) {
\r
1153 int i, iSize = 0, nSize;
\r
1154 SampleSet sts = db.getSts();
\r
1156 // loop through all the Samples in this unit
\r
1157 for (i = start; i < end; i++) {
\r
1158 Sample sample = sts.getSample(i);
\r
1159 nSize = iSize + sample.getResidualSize();
\r
1161 if (Math.abs(index - (float) iSize) <
\r
1162 Math.abs(index - (float) nSize)) {
\r
1167 return sts.getSample(end - 1);
\r
1171 * gets the string name for the unit.
\r
1173 * @return string rep of this object.
\r
1175 public String toString() {
\r
1181 * Dumps this unit.
\r
1183 public void dump() {
\r