git.gag.com Git - debian/freetts/blob - com/sun/speech/freetts/clunits/ClusterUnitSelector.java

   1 /**\r
   2  * Portions Copyright 2001-2003 Sun Microsystems, Inc.\r
   3  * Portions Copyright 1999-2001 Language Technologies Institute, \r
   4  * Carnegie Mellon University.\r
   5  * All Rights Reserved.  Use is subject to license terms.\r
   6  * \r
   7  * See the file "license.terms" for information on usage and\r
   8  * redistribution of this file, and for a DISCLAIMER OF ALL \r
   9  * WARRANTIES.\r
  10  */\r
  11 package com.sun.speech.freetts.clunits;\r
  12 import java.io.IOException;\r
  13 import java.net.URL;\r
  14 import java.util.logging.Level;\r
  15 import java.util.logging.Logger;\r
  16 \r
  17 import com.sun.speech.freetts.FeatureSet;\r
  18 import com.sun.speech.freetts.FeatureSetImpl;\r
  19 import com.sun.speech.freetts.Item;\r
  20 import com.sun.speech.freetts.PathExtractor;\r
  21 import com.sun.speech.freetts.PathExtractorImpl;\r
  22 import com.sun.speech.freetts.ProcessException;\r
  23 import com.sun.speech.freetts.Relation;\r
  24 import com.sun.speech.freetts.Utterance;\r
  25 import com.sun.speech.freetts.UtteranceProcessor;\r
  26 import com.sun.speech.freetts.Voice;\r
  27 import com.sun.speech.freetts.cart.CART;\r
  28 import com.sun.speech.freetts.clunits.ClusterUnitDatabase.UnitOriginInfo;\r
  29 import com.sun.speech.freetts.relp.Sample;\r
  30 import com.sun.speech.freetts.relp.SampleInfo;\r
  31 import com.sun.speech.freetts.relp.SampleSet;\r
  32 \r
  33 import de.dfki.lt.freetts.ClusterUnitNamer;\r
  34 \r
  35 \r
  36 /**\r
  37  * Generates the Unit Relation of an Utterance from the\r
  38  * Segment Relation.\r
  39  *\r
  40  */\r
  41 public class ClusterUnitSelector implements UtteranceProcessor {\r
  42     /** Logger instance. */\r
  43     private static final Logger LOGGER =\r
  44         Logger.getLogger(ClusterUnitSelector.class.getName());\r
  45 \r
  46     private final static PathExtractor DNAME = new PathExtractorImpl(\r
  47             "R:SylStructure.parent.parent.name", true);\r
  48     private ClusterUnitDatabase clunitDB;\r
  49     private ClusterUnitNamer unitNamer;\r
  50     \r
  51     /**\r
  52      * Constructs a ClusterUnitSelector.\r
  53      *\r
  54      * @param url the URL for the unit database. If the URL path ends\r
  55      *     with a '.bin' it is assumed that the DB is a binary database,\r
  56      *     otherwise, its assumed that its a text database1\r
  57      *\r
  58      * @throws IOException if an error occurs while loading the\r
  59      *     database\r
  60      *\r
  61      */\r
  62     public ClusterUnitSelector(URL url) throws IOException {\r
  63         this(url, null);\r
  64     }\r
  65     \r
  66     /**\r
  67      * Constructs a ClusterUnitSelector.\r
  68      *\r
  69      * @param url the URL for the unit database. If the URL path ends\r
  70      *     with a '.bin' it is assumed that the DB is a binary database,\r
  71      *     otherwise, its assumed that its a text database1\r
  72      * @param unitNamer an optional unit namer, specifying how the cluster\r
  73      * units are called in the voice database referenced by url. If this is null,\r
  74      * an ldom unit naming scheme will be used (e.g., 'ae_afternoon' for the\r
  75      * phoneme 'ae' in the word 'afternoon'.\r
  76      *\r
  77      * @throws IOException if an error occurs while loading the\r
  78      *     database\r
  79      *\r
  80      */\r
  81     public ClusterUnitSelector(URL url, ClusterUnitNamer unitNamer) throws IOException {\r
  82         if (url == null) {\r
  83             throw new IOException("Can't load cluster unit database");\r
  84         }\r
  85         boolean binary = url.getPath().endsWith(".bin");\r
  86         clunitDB = new ClusterUnitDatabase(url, binary);\r
  87         this.unitNamer = unitNamer; \r
  88 \r
  89     }\r
  90     \r
  91 \r
  92     /**\r
  93      * Get the sample info for the underlying database.\r
  94      * @return the sample info object\r
  95      */\r
  96     public SampleInfo getSampleInfo() {\r
  97         return clunitDB.getSampleInfo();\r
  98     }\r
  99     \r
 100     /**\r
 101      * Generates the Unit Relation from the Segment Relation.\r
 102      * <br><b>Implementation note:</b><br>\r
 103      *    Populates the segment relation with segment names of the form:\r
 104      *    XX_YY where XX is the segment name (typically a phoneme)\r
 105      *    and YY is the word that the segment is in (stripped and\r
 106      *    lower case).\r
 107      *\r
 108      *    The first step in cluster unit selection is to determine the unit\r
 109      *    type for each unit in the utterance. The unit type for\r
 110      *    selection in the simple talking clock example (cmu_time_awb) is\r
 111      *    done per phone. The unit type consists of the phone\r
 112      *    name followed by the word the phone comes from (e.g., n_now for\r
 113      *    the phone 'n' in the word 'now'). \r
 114      *\r
 115      *   Invoke the Viterbi algorithm (via a viterbi class) that \r
 116      *   selects the proper units for the segment and adds that to\r
 117      *   each segment item.\r
 118      *\r
 119      *   For each segment, create a unit and attach features based\r
 120      *   upon the selected units.\r
 121      *\r
 122      * @param  utterance  the utterance to generate the Unit Relation\r
 123      *\r
 124      * @throws ProcessException if an IOException is thrown during the\r
 125      *         processing of the utterance\r
 126      * \r
 127      */\r
 128     public void processUtterance(Utterance utterance) throws ProcessException {\r
 129         Viterbi vd;\r
 130         Relation segs = utterance.getRelation(Relation.SEGMENT);\r
 131 \r
 132         utterance.setObject(SampleInfo.UTT_NAME,\r
 133                 clunitDB.getSampleInfo());\r
 134         utterance.setObject("sts_list", clunitDB.getSts());\r
 135 \r
 136         vd = new Viterbi(segs, clunitDB);\r
 137         \r
 138         for (Item s = segs.getHead(); s != null; s = s.getNext()) {\r
 139             setUnitName(s);\r
 140         }\r
 141 \r
 142     // Carry out the CART lookup for the target costs, and the viterbi\r
 143     // search for finding the best path (join costs) through the candidates.\r
 144         vd.decode();\r
 145 \r
 146     // Now associate the candidate units in the best path \r
 147     // with the items in the segment relation.\r
 148         if (!vd.result("selected_unit")) {\r
 149             LOGGER.severe("clunits: can't find path");\r
 150             throw new Error();\r
 151         }\r
 152 \r
 153     // If optimal coupling was used, the join points must now be copied\r
 154     // from the path elements to the actual items in the segment relation.\r
 155         vd.copyFeature("unit_prev_move");\r
 156         vd.copyFeature("unit_this_move");\r
 157 \r
 158     // Based on this data, create a Unit relation giving the details of the\r
 159     // units to concatenate.\r
 160         Relation unitRelation = utterance.createRelation(Relation.UNIT);\r
 161 \r
 162         for (Item s = segs.getHead(); s != null; s = s.getNext()) {\r
 163             Item unit = unitRelation.appendItem();\r
 164             FeatureSet unitFeatureSet = unit.getFeatures();\r
 165             int unitEntry = s.getFeatures().getInt("selected_unit");\r
 166 \r
 167         // The item name is the segment name\r
 168             unitFeatureSet.setString("name", s.getFeatures().getString("name"));\r
 169 \r
 170             int unitStart;\r
 171             int unitEnd;\r
 172             String clunitName = s.getFeatures().getString("clunit_name");\r
 173 \r
 174             if (s.getFeatures().isPresent("unit_this_move")) {\r
 175                 unitStart = s.getFeatures().getInt("unit_this_move");\r
 176             } else {\r
 177                 unitStart = clunitDB.getStart(unitEntry);\r
 178             }\r
 179 \r
 180             if (s.getNext() != null &&\r
 181                     s.getNext().getFeatures().isPresent("unit_prev_move")) {\r
 182                 unitEnd = s.getNext().getFeatures().getInt("unit_prev_move");\r
 183             } else {\r
 184                 unitEnd = clunitDB.getEnd(unitEntry);\r
 185             }\r
 186 \r
 187             unitFeatureSet.setInt("unit_entry", unitEntry);\r
 188             ClusterUnit clunit = new ClusterUnit(clunitDB, \r
 189                     clunitName, unitStart, unitEnd);\r
 190             unitFeatureSet.setObject("unit", clunit);\r
 191             if (true) { \r
 192                 unitFeatureSet.setInt("unit_start", clunit.getStart());\r
 193                 unitFeatureSet.setInt("unit_end", clunit.getEnd());\r
 194         unitFeatureSet.setInt("instance", unitEntry - clunitDB.getUnitIndex(clunitName, 0));\r
 195             } // add the rest of these things for debugging.\r
 196 \r
 197             if (LOGGER.isLoggable(Level.FINE)) {\r
 198                 LOGGER.fine(" sr " + clunitDB.getSampleInfo().getSampleRate() + " " +\r
 199                     s.getFeatures().getFloat("end") + " " +\r
 200                     (int) (s.getFeatures().getFloat("end") * \r
 201                            clunitDB.getSampleInfo().getSampleRate()));\r
 202             }\r
 203             unitFeatureSet.setInt("target_end", \r
 204                 (int) (s.getFeatures().getFloat("end") \r
 205                        * clunitDB.getSampleInfo().getSampleRate()));\r
 206         \r
 207             // Associate debug info about unit origin if available:\r
 208         UnitOriginInfo unitOrigin = clunitDB.getUnitOriginInfo(unitEntry);\r
 209             if (unitOrigin != null) {\r
 210                 unitFeatureSet.setString("origin", unitOrigin.originFile);\r
 211                 unitFeatureSet.setFloat("origin_start", unitOrigin.originStart);\r
 212             unitFeatureSet.setFloat("origin_end", unitOrigin.originEnd);\r
 213         }\r
 214         \r
 215         }\r
 216     }\r
 217  \r
 218     \r
 219     /**\r
 220      * Sets the cluster unit name given the segment.\r
 221      *\r
 222      * @param seg the segment item that gets the name\r
 223      */\r
 224     protected void setUnitName(Item seg) {\r
 225         if (unitNamer != null) {\r
 226             unitNamer.setUnitName(seg);\r
 227             return;\r
 228         }\r
 229         // default to LDOM naming scheme 'ae_afternoon':\r
 230         String cname = null;\r
 231 \r
 232         String segName = seg.getFeatures().getString("name");\r
 233 \r
 234     Voice voice = seg.getUtterance().getVoice();\r
 235     String silenceSymbol = voice.getPhoneFeature("silence", "symbol");\r
 236     if (silenceSymbol == null)\r
 237         silenceSymbol = "pau";\r
 238         if (segName.equals(silenceSymbol)) {\r
 239             cname = silenceSymbol + "_" + seg.findFeature("p.name");\r
 240         } else {\r
 241             // remove single quotes from name\r
 242             String dname = ((String) DNAME.findFeature(seg)).toLowerCase();\r
 243             cname = segName + "_" + stripQuotes(dname);\r
 244         }\r
 245         seg.getFeatures().setString("clunit_name", cname);\r
 246     }\r
 247 \r
 248 \r
 249     /**\r
 250      * Strips quotes from the given string.\r
 251      *\r
 252      * @param s the string to strip quotes from\r
 253      *\r
 254      * @return a string with all single quotes removed\r
 255      */\r
 256     private String stripQuotes(String s) {\r
 257         StringBuffer sb = new StringBuffer(s.length());\r
 258         for (int i = 0; i < s.length(); i++) {\r
 259             char c = s.charAt(i);\r
 260             if (c != '\'') {\r
 261                 sb.append(c);\r
 262             }\r
 263         }\r
 264         return sb.toString();\r
 265     }\r
 266 \r
 267 \r
 268     /**\r
 269      * Retrieves the string representation of this object.\r
 270      * \r
 271      * @return the string representation of this object\r
 272      */\r
 273     public String toString() {\r
 274         return "ClusterUnitSelector";\r
 275     }\r
 276 \r
 277     /**\r
 278      * Provides support for the Viterbi Algorithm.\r
 279      *\r
 280      * Implementation Notes\r
 281      * <p>\r
 282      * For each candidate for the current unit, calculate the cost\r
 283      * between it and the first candidate in the next unit.  Save\r
 284      * only the path that has the least cost. By default, if two\r
 285      * candidates come from units that are adjacent in the\r
 286      * database, the cost is 0 (i.e., they were spoken together,\r
 287      * so they are a perfect match).\r
 288      * <p>\r
 289      * \r
 290      * Repeat the previous process for each candidate in the next\r
 291      * unit, creating a list of least cost paths between the\r
 292      * candidates between the current unit and the unit following\r
 293      * it.\r
 294      * <p>\r
 295      * \r
 296      * Toss out all candidates in the current unit that are not\r
 297      * included in a path.\r
 298      * <p>\r
 299      * \r
 300      * Move to the next unit and repeat the process.\r
 301     */\r
 302     static class Viterbi {\r
 303         private int numStates = -1;\r
 304         private boolean bigIsGood = false;\r
 305         private ViterbiPoint timeline = null;\r
 306         private ViterbiPoint lastPoint = null;\r
 307         private FeatureSet f = null;\r
 308         private ClusterUnitDatabase clunitDB;\r
 309 \r
 310         /**\r
 311          * Creates a Viterbi class to process the given utterance.\r
 312      * A queue of ViterbiPoints corresponding to the Items in the Relation segs \r
 313      * is built up.\r
 314      * \r
 315          */\r
 316         public Viterbi(Relation segs, ClusterUnitDatabase db) {\r
 317             ViterbiPoint last = null;\r
 318             clunitDB = db;\r
 319             f = new FeatureSetImpl();\r
 320             for (Item s = segs.getHead(); true; s = s.getNext()) {\r
 321                 ViterbiPoint n = new ViterbiPoint(s);\r
 322         // The number of ViterbiPaths associated with each ViterbiPoint\r
 323         // is determined using the variable numStates.\r
 324         // TODO: Where can numStates be set?\r
 325                 if (numStates > 0) {\r
 326                     n.initPathArray(numStates);\r
 327                 }\r
 328                 if (last != null) { // continue to build up the queue\r
 329                     last.next = n;\r
 330                 } else { // timeline is the start of the queue\r
 331                     timeline = n;\r
 332                 }\r
 333                 last = n;\r
 334 \r
 335                 if (s == null) { // no further segments, leave loop\r
 336                     lastPoint = n;\r
 337                     break;\r
 338                 }\r
 339             }\r
 340 \r
 341             if (LOGGER.isLoggable(Level.FINE)) {\r
 342                 LOGGER.fine("num states " + numStates);\r
 343             }\r
 344 \r
 345             if (numStates == 0) {       // its a  general beam search\r
 346                 timeline.paths = new ViterbiPath();\r
 347             }\r
 348 \r
 349             if (numStates == -1) {      // dynamic number of states (# cands)\r
 350                 timeline.initPathArray(1);\r
 351             }\r
 352         }\r
 353 \r
 354         /**\r
 355          * Sets the given feature to the given value.\r
 356          *\r
 357          * @param name the name of the feature\r
 358          * @param obj the new value.\r
 359          */\r
 360         public void setFeature(String name, Object obj) {\r
 361             f.setObject(name, obj);\r
 362         }\r
 363 \r
 364         /**\r
 365          * Gets the value for the given feature.\r
 366          *\r
 367          * @param name the name of the feature\r
 368          *\r
 369          * @return the value of the feature\r
 370          */\r
 371         public Object getFeature(String name) {\r
 372             return f.getObject(name);\r
 373         }\r
 374 \r
 375         /**\r
 376      * Carry out a Viterbi search in for a prepared queue of ViterbiPoints.\r
 377      * In a nutshell, each Point represents a target item (a target segment);\r
 378      * for each target Point, a number of Candidate units in the voice database \r
 379      * are determined; a Path structure is built up, based on local best transitions.\r
 380      * Concretely, a Path consists of a (possibly empty) previous Path, a current Candidate,\r
 381      * and a Score. This Score is a quality measure of the Path; it is calculated as the\r
 382      * sum of the previous Path's score, the Candidate's score, and the Cost of joining\r
 383      * the Candidate to the previous Path's Candidate. At each step, only one Path \r
 384      * leading to each Candidate is retained, viz. the Path with the best Score.\r
 385      * All that is left to do is to call result() to get the best-rated\r
 386      * path from among the paths associated with the last Point, and to associate\r
 387      * the resulting Candidates with the segment items they will realise. \r
 388          */\r
 389         void decode() {\r
 390             for (ViterbiPoint p = timeline; p.next != null; p = p.next) {\r
 391             // The candidates for the current item:\r
 392                 p.cands = getCandidate(p.item);\r
 393                 if (LOGGER.isLoggable(Level.FINE)) {\r
 394                     LOGGER.fine("decode " + p.cands);\r
 395                 }\r
 396                 if (numStates != 0) {\r
 397                     if (numStates == -1) {\r
 398                 // put as many (empty) path elements into p.next as there are candidates in p\r
 399                         p.next.initDynamicPathArray(p.cands);\r
 400                     }\r
 401 \r
 402             // Now go through all existing paths and all candidates for the current item;\r
 403             // tentatively extend each existing path to each of the candidates,\r
 404             // but only retain the \r
 405             // Attention: p.numStates is not numStates!\r
 406             // numStates = a general flag indicating which type of viterbi search\r
 407             //             to use (only -1 seems to be implemented);\r
 408             // p.numStates = the number of paths in p.statePaths, i.e. p.numStates==p.statePaths.length\r
 409             for (int i = 0; i < p.numStates; i++) {\r
 410                         if ((p == timeline && i == 0) || \r
 411                                 (p.statePaths[i] != null)) {\r
 412                 // We are at the very beginning of the search, or have a usable path to extend\r
 413                             // debug("   dc p " + p);\r
 414                             for (ViterbiCandidate c = p.cands; \r
 415                                         c != null; c = c.next) {\r
 416                     // For the candidate c, create a path extending the previous path\r
 417                     // p.statePaths[i] to that candidate: \r
 418                                 ViterbiPath np = getPath(p.statePaths[i], c);\r
 419                 // Compare this path to the existing best path (if any) leading to\r
 420                 // candidate c; only retain the one with the better score.\r
 421                 // TODO: why should the paths leading to the candidates realising p\r
 422                 // be stored in p.next?\r
 423                                 addPaths(p.next, np);\r
 424                             }\r
 425                         }\r
 426                     }\r
 427                 } else {\r
 428                     System.err.println(\r
 429                         "Viterbi.decode: general beam search not implemented");\r
 430                 }\r
 431             }\r
 432         }\r
 433 \r
 434 \r
 435         /**\r
 436          * Try to add paths to the given point.\r
 437          *\r
 438          * @param point the point to add the paths to\r
 439          * @param paths the path\r
 440          */\r
 441         void addPaths(ViterbiPoint point, ViterbiPath path) {\r
 442             ViterbiPath nextPath;\r
 443             for (ViterbiPath p = path; p != null; p = nextPath) {\r
 444                 nextPath = p.next;\r
 445                 addPath(point, p);\r
 446             }\r
 447         }\r
 448 \r
 449         /**\r
 450          * Add the new path to the state path if it is\r
 451          * better than the current path. In this, state means\r
 452      * the position of the candidate associated with this\r
 453      * path in the candidate queue\r
 454      * for the corresponding segment item. In other words,\r
 455      * this method uses newPath as the one path leading to\r
 456      * the candidate newPath.candidate, if it has a better\r
 457      * score than the previously best path leading to that candidate.\r
 458          *\r
 459          * @param point where the path is added\r
 460          * @param newPath the path to add if its score is best\r
 461          */\r
 462         void addPath(ViterbiPoint point, ViterbiPath newPath) {\r
 463             if (point.statePaths[newPath.state] == null) {\r
 464                 // we don't have one yet, so this is best\r
 465                 point.statePaths[newPath.state] = newPath;\r
 466             } else if (isBetterThan(newPath.score,\r
 467                         point.statePaths[newPath.state].score)) {\r
 468                 // its better than what we already have \r
 469                 point.statePaths[newPath.state] = newPath;\r
 470             } else {\r
 471                 // its not better that what we already have\r
 472                 // so we just forget about it.\r
 473             }\r
 474         }\r
 475 \r
 476         /**\r
 477          * See if a is better than b. Goodness is defined\r
 478          * by 'bigIsGood'.\r
 479          *\r
 480          * @param a value to check\r
 481          * @param b value to check.\r
 482          *\r
 483          * return true if a is better than b.\r
 484          */\r
 485         private boolean isBetterThan(int a, int b) {\r
 486             if (bigIsGood) {\r
 487                 return a > b;\r
 488             } else {\r
 489                 return a < b;\r
 490             }\r
 491         }\r
 492 \r
 493         /**\r
 494          * Find the best path through the decoder, adding the feature\r
 495          * name to the candidate.\r
 496          *\r
 497          * @param feature the feature to add\r
 498          * @return true if a best path was found\r
 499          */\r
 500         boolean  result(String feature) {\r
 501             ViterbiPath path;\r
 502 \r
 503             if (timeline == null || timeline.next == null) {\r
 504                 return true; // null case succeeds\r
 505             }\r
 506             path = findBestPath();\r
 507 \r
 508             if (path == null) {\r
 509                 return false;\r
 510             }\r
 511 \r
 512             for (; path != null; path = path.from) {\r
 513                 if (path.candidate != null) {\r
 514                     path.candidate.item.getFeatures().setObject(feature,\r
 515                                 path.candidate.value);\r
 516                 }\r
 517             }\r
 518             return true;\r
 519         }\r
 520 \r
 521         /**\r
 522          * Given a feature, copy the value associated with feature\r
 523          * name from the path to each item in the path.\r
 524          *\r
 525          * @param feature the name of the feature.\r
 526          */\r
 527         void copyFeature(String feature) {\r
 528             ViterbiPath path = findBestPath();\r
 529             if (path == null) { \r
 530                 return;  // nothing to copy, empty stream or no solution\r
 531             }\r
 532 \r
 533             for (; path != null; path = path.from) {\r
 534                 if (path.candidate != null && path.isPresent(feature)) {\r
 535                     path.candidate.item.getFeatures().setObject(feature,\r
 536                             path.getFeature(feature));\r
 537                 }\r
 538             }\r
 539         }\r
 540 \r
 541         /**\r
 542          * Finds the best (queue of) candidate(s) for a given (segment) item.\r
 543      * This traverses a CART tree for target cluster selection as described in \r
 544      * the paper introducing the clunits algorithm. This corresponds to the\r
 545      * "target costs" described for general unit selection.\r
 546      * @return the first candidate in the queue of candidate units for this item.\r
 547          */\r
 548         private ViterbiCandidate getCandidate(Item item) {\r
 549         // TODO: This should better be called getCandidates() (plural form),\r
 550         // because what it does is find all the candidates for the item\r
 551         // and return the head of the queue.\r
 552             String unitType = item.getFeatures().getString("clunit_name");\r
 553             CART cart = clunitDB.getTree(unitType);\r
 554         // Here, the unit candidates are selected.\r
 555             int[] clist = (int[]) cart.interpret(item);\r
 556         // Now, clist is an array of instance numbers for the units of type\r
 557         // unitType that belong to the best cluster according to the CART.\r
 558         \r
 559             ViterbiCandidate p;\r
 560             ViterbiCandidate all;\r
 561             ViterbiCandidate gt;\r
 562 \r
 563             all = null;\r
 564             for (int i = 0; i < clist.length; i++) {\r
 565                 p = new ViterbiCandidate();\r
 566                 p.next = all; // link them reversely: the first in clist will be at the end of the queue\r
 567                 p.item = item; // The item is the same for all these candidates in the queue.\r
 568                 p.score = 0;\r
 569         // remember the absolute unit index:\r
 570                 p.setInt(clunitDB.getUnitIndex(unitType,  clist[i]));\r
 571                 all = p;\r
 572                 // this is OK\r
 573                 if (LOGGER.isLoggable(Level.FINE)) {\r
 574                     LOGGER.fine("    gc adding " + clist[i]);\r
 575                 }\r
 576             }\r
 577 \r
 578         // Take into account candidates for previous item?\r
 579         // Depending on the setting of EXTEND_SELECTIONS in the database,\r
 580         // look the first candidates for the preceding item,\r
 581         // and add the units following these (which are not yet candidates)\r
 582         // as candidates. EXTEND_SELECTIONS indicates how many of these\r
 583         // are added. A high setting will add candidates which don't fit the\r
 584         // target well, but which can be smoothly concatenated with the context.\r
 585         // In a sense, this means trading target costs against join costs.\r
 586             if (clunitDB.getExtendSelections() > 0 &&\r
 587                     item.getPrevious() != null) {\r
 588             // Get the candidates for the preceding (segment) item\r
 589                 ViterbiCandidate lc = (ViterbiCandidate) (item.\r
 590                     getPrevious().getFeatures().getObject("clunit_cands"));\r
 591                 if (LOGGER.isLoggable(Level.FINE)) {\r
 592                     LOGGER.fine("      lc " + lc);\r
 593                 }\r
 594                 for (int e = 0; lc != null && \r
 595                         (e < clunitDB.getExtendSelections());\r
 596                         lc = lc.next) {\r
 597                     int nu = clunitDB.getNextUnit(lc.ival);\r
 598                     if (LOGGER.isLoggable(Level.FINE)) {\r
 599                         LOGGER.fine("      e: " + e + " nu: " + nu);\r
 600                     }\r
 601                     if (nu == ClusterUnitDatabase.CLUNIT_NONE) {\r
 602                         continue;\r
 603                     }\r
 604                     \r
 605             // Look through the list of candidates for the current item:\r
 606                     for (gt = all; gt != null; gt = gt.next) {\r
 607                         if (LOGGER.isLoggable(Level.FINE)) {\r
 608                             LOGGER.fine("       gt " + gt.ival + " nu " + nu);\r
 609                         }\r
 610                         if (nu == gt.ival) {\r
 611                 // The unit following one of the candidates for the preceding\r
 612                 // item already is a candidate for the current item.\r
 613                             break;\r
 614                         }\r
 615                     }\r
 616 \r
 617                     if (LOGGER.isLoggable(Level.FINE)) {\r
 618                         LOGGER.fine("nu " + clunitDB.getUnit(nu).getName() + " all " +\r
 619                               clunitDB.getUnit(all.ival).getName() +\r
 620                               " " + all.ival);\r
 621                     }\r
 622                     if ((gt == null)&&clunitDB.isUnitTypeEqual(nu, all.ival)) {\r
 623                 // nu is of the right unit type and is not yet one of the candidates.\r
 624                 // add it to the queue of candidates for the current item:\r
 625                         p = new ViterbiCandidate();\r
 626                         p.next = all;\r
 627                         p.item = item;\r
 628                         p.score = 0;\r
 629                         p.setInt(nu);\r
 630                         all = p;\r
 631                         e++;\r
 632                     }\r
 633                 }\r
 634             }\r
 635             item.getFeatures().setObject("clunit_cands", all);\r
 636             return all;\r
 637         }\r
 638 \r
 639         /**\r
 640          * Construct a new path element linking a previous path to the given candidate.\r
 641      * The (penalty) score associated with the new path is calculated as the sum of\r
 642      * the score of the old path plus the score of the candidate itself plus the\r
 643      * join cost of appending the candidate to the nearest candidate in the given path.\r
 644      * This join cost takes into account optimal coupling if the database has\r
 645      * OPTIMAL_COUPLING set to 1. The join position is saved in the new path, as\r
 646      * the features "unit_prev_move" and "unit_this_move".\r
 647          *\r
 648          * @param path the previous path, or null if this candidate starts a new path\r
 649          * @param candiate the candidate to add to the path\r
 650          *\r
 651          * @return a new path, consisting of this candidate appended to the previous path, and\r
 652      * with the cumulative (penalty) score calculated. \r
 653          */\r
 654         private ViterbiPath getPath(ViterbiPath path, \r
 655                 ViterbiCandidate candidate) {\r
 656             int cost;\r
 657             ViterbiPath newPath = new ViterbiPath();\r
 658 \r
 659             newPath.candidate = candidate;\r
 660             newPath.from = path;\r
 661         //\r
 662         // Flite 1.1 has some logic here to test to see\r
 663         // if  the unit database is fully populated or not and if not\r
 664         // load fixed residuals and calculate distance with a\r
 665         // different distance algorithm that is designed for fixed\r
 666         // point. FreeTTS doesn't really need to do that.\r
 667         //\r
 668 \r
 669             if (path == null || path.candidate == null) {\r
 670                 cost = 0;\r
 671             } else {\r
 672                 int u0 = path.candidate.ival;\r
 673                 int u1 = candidate.ival;\r
 674                 if (clunitDB.getOptimalCoupling() == 1) {\r
 675                     Cost oCost = getOptimalCouple(u0, u1);\r
 676                     if (oCost.u0Move != -1) {\r
 677                         newPath.setFeature("unit_prev_move", new\r
 678                                 Integer(oCost.u0Move));\r
 679                     }\r
 680                     if (oCost.u1Move != -1) { \r
 681                         newPath.setFeature("unit_this_move", new\r
 682                                 Integer(oCost.u1Move));\r
 683                     }\r
 684                     cost = oCost.cost;\r
 685                 } else if (clunitDB.getOptimalCoupling() == 2) {\r
 686                     cost = getOptimalCoupleFrame(u0, u1);\r
 687                 } else {\r
 688                     cost = 0;\r
 689                 }\r
 690             }\r
 691 \r
 692             // cost *= clunitDB.getContinuityWeight();\r
 693             cost *= 5;  // magic number ("continuity weight") from flite\r
 694             // TODO: remove the state attribute from ViterbiPath, as it is simply path.candidate.pos!\r
 695         newPath.state = candidate.pos;\r
 696             if (path == null) {\r
 697                 newPath.score = cost + candidate.score;\r
 698             } else {\r
 699                 newPath.score = cost + candidate.score + path.score;\r
 700             }\r
 701 \r
 702             return newPath;\r
 703         }\r
 704 \r
 705         /**\r
 706          * Find the best path. This requires decode() to have been run.\r
 707          *\r
 708          * @return the best path.\r
 709          */\r
 710         private ViterbiPath findBestPath() {\r
 711             ViterbiPoint t;\r
 712             int best;\r
 713             int worst;\r
 714             ViterbiPath bestPath = null;\r
 715 \r
 716             if (bigIsGood) {\r
 717                 worst = Integer.MIN_VALUE;\r
 718             } else {\r
 719                 worst = Integer.MAX_VALUE;\r
 720             }\r
 721 \r
 722             best = worst;\r
 723 \r
 724         // TODO: do not need t, can use lastPoint throughout\r
 725             t = lastPoint;\r
 726 \r
 727             if (numStates != 0) {\r
 728                 if (LOGGER.isLoggable(Level.FINE)) {\r
 729                     LOGGER.fine("fbp ns " + numStates + " t " \r
 730                             + t.numStates + " best " + best);\r
 731                 }\r
 732         // All paths end in lastPoint, and take into account\r
 733         // previous path segment's scores. Therefore, it is\r
 734         // sufficient to find the best path from among the\r
 735         // paths for lastPoint.\r
 736                 for (int i = 0; i < t.numStates; i++) {\r
 737                     if (t.statePaths[i] != null && \r
 738                         (isBetterThan(t.statePaths[i].score, best))) {\r
 739                         best = t.statePaths[i].score;\r
 740                         bestPath = t.statePaths[i];\r
 741                     }\r
 742                 }\r
 743             }\r
 744             return bestPath;\r
 745         }\r
 746 \r
 747         /**\r
 748      * Find the optimal coupling frame for a pair of units.\r
 749          *\r
 750          * @param u0  first unit to try\r
 751          * @param u1  second unit to try\r
 752          *\r
 753          * @return the cost for this coupling, including the best coupling frame\r
 754          */\r
 755         Cost getOptimalCouple(int u0, int u1) {\r
 756             int a,b;\r
 757             int u1_p;\r
 758             int i, fcount;\r
 759             int u0_st, u1_p_st, u0_end, u1_p_end;\r
 760             int best_u0, best_u1_p;\r
 761             int dist, best_val; \r
 762             Cost cost = new Cost();\r
 763 \r
 764             u1_p = clunitDB.getPrevUnit(u1);\r
 765 \r
 766         // If u0 precedes u1, the cost is 0, and we're finished.\r
 767             if (u1_p == u0) {\r
 768                 return cost;\r
 769             }\r
 770 \r
 771 \r
 772         // If u1 does not have a previous unit, or that previous\r
 773         // unit does not belong to the same phone, the optimal\r
 774         // couple frame must be found between u0 and u1.\r
 775             if (u1_p == ClusterUnitDatabase.CLUNIT_NONE ||\r
 776                     clunitDB.getPhone(u0) !=\r
 777                     clunitDB.getPhone(u1_p)) {\r
 778                 cost.cost = 10 * getOptimalCoupleFrame(u0, u1);\r
 779                 return cost;\r
 780             }\r
 781 \r
 782             // If u1 has a valid previous unit, try to find the optimal\r
 783         // couple point between u0 and that previous unit, u1_p.\r
 784         \r
 785         // Find out which of u1_p and u0 is shorter.\r
 786             // In both units, we plan to start from one third of the unit length,\r
 787         // and to compare frame coupling frame by frame until the end of the\r
 788         // shorter unit is reached.\r
 789             u0_end = clunitDB.getEnd(u0) - clunitDB.getStart(u0);\r
 790             u1_p_end = clunitDB.getEnd(u1_p) - clunitDB.getStart(u1_p);\r
 791             u0_st = u0_end / 3;\r
 792             u1_p_st = u1_p_end / 3;\r
 793 \r
 794             if ((u0_end - u0_st) < (u1_p_end - u1_p_st)) {\r
 795                 fcount = u0_end - u0_st;\r
 796             // We could now shift the starting point for coupling in the longer unit\r
 797             // so that the distance from the end is the same in both units:\r
 798             /* u1_p_st = u1_p_end - fcount; */\r
 799             } else {\r
 800                 fcount = u1_p_end - u1_p_st;\r
 801             // We could now shift the starting point for coupling in the longer unit\r
 802             // so that the distance from the end is the same in both units:\r
 803             /* u0_st = u0_end - fcount; */\r
 804             }\r
 805 \r
 806             // Now go through the two units, and search for the frame pair where\r
 807         // the acoustic distance is smallest.\r
 808             best_u0 = u0_end;\r
 809             best_u1_p = u1_p_end;\r
 810             best_val = Integer.MAX_VALUE;\r
 811 \r
 812             for (i = 0; i < fcount; ++i) {\r
 813                 a = clunitDB.getStart(u0)+ u0_st + i;\r
 814                 b = clunitDB.getStart(u1_p) + u1_p_st + i;\r
 815                 dist = getFrameDistance(a, b,\r
 816                      clunitDB.getJoinWeights(),\r
 817                      clunitDB.getMcep().getSampleInfo().getNumberOfChannels())\r
 818                       + Math.abs( clunitDB.getSts().getFrameSize(a) - \r
 819                             clunitDB.getSts().getFrameSize(b)) * \r
 820                             clunitDB.getContinuityWeight();\r
 821 \r
 822                 if (dist < best_val) {\r
 823                     best_val = dist;\r
 824                     best_u0 = u0_st + i;\r
 825                     best_u1_p = u1_p_st + i;\r
 826                 }\r
 827             }\r
 828         \r
 829         // u0Move is the new end for u0\r
 830         // u1Move is the new start for u1\r
 831             cost.u0Move = clunitDB.getStart(u0) + best_u0;\r
 832             cost.u1Move = clunitDB.getStart(u1_p) + best_u1_p;\r
 833             cost.cost = 30000 + best_val;\r
 834             return cost;\r
 835         }\r
 836 \r
 837         /** \r
 838          * Returns the distance between the successive potential\r
 839          * frames.\r
 840          *\r
 841          * @param u0 the first unit to try\r
 842          * @param u1 the second unit to try\r
 843          *\r
 844          * @return the distance between the two units\r
 845          */\r
 846         int getOptimalCoupleFrame(int u0, int u1) {\r
 847             int a, b;\r
 848 \r
 849             if (clunitDB.getPrevUnit(u1) == u0) {\r
 850                 return 0; // consecutive units win\r
 851             }\r
 852 \r
 853             if (clunitDB.getNextUnit(u0) != ClusterUnitDatabase.CLUNIT_NONE) {\r
 854                 a = clunitDB.getEnd(u0);\r
 855             } else {  // don't want to do this but it's all that is left to do\r
 856                 a = clunitDB.getEnd(u0) - 1; // if num frames < 1 this is bad\r
 857             }\r
 858             b = clunitDB.getStart(u1);\r
 859 \r
 860             return getFrameDistance(a, b, \r
 861                     clunitDB.getJoinWeights(),\r
 862                     clunitDB.getMcep().getSampleInfo().getNumberOfChannels())\r
 863                 + Math.abs( clunitDB.getSts().getFrameSize(a) - \r
 864                             clunitDB.getSts().getFrameSize(b)) * \r
 865                             clunitDB.getContinuityWeight();\r
 866         }\r
 867 \r
 868         /**\r
 869          * Get the 'distance' between the frames a and b.\r
 870          *\r
 871          * @param a first frame\r
 872          * @param b second frame\r
 873          * @param joinWeights the weights used in comparison\r
 874          * @param order number of compares\r
 875          *\r
 876          * @return the distance between the frames\r
 877          */\r
 878         public int getFrameDistance(int a, int b, int[] joinWeights,int order) {\r
 879 \r
 880             if (LOGGER.isLoggable(Level.FINE)) {\r
 881                 LOGGER.fine(" gfd  a " + a   + " b " + b + " or " + order);\r
 882             }\r
 883             int r, i;\r
 884             short[] bv = clunitDB.getMcep().getSample(b).getFrameData();\r
 885             short[] av = clunitDB.getMcep().getSample(a).getFrameData();\r
 886 \r
 887             for (r = 0, i = 0; i < order; i++) {\r
 888                 int diff = av[i] - bv[i];\r
 889                 r += Math.abs(diff) * joinWeights[i] / 65536;\r
 890             }\r
 891             return r;\r
 892         }\r
 893 \r
 894     }\r
 895 \r
 896 \r
 897     /**\r
 898      * Represents a point in the Viterbi path. A point corresponds to an item,\r
 899      * e.g. a Segment.\r
 900      * Each ViterbiPoint knows\r
 901      * about its next ViterbiPoint, i.e. they can form a queue.\r
 902      */\r
 903     static class ViterbiPoint {\r
 904         Item item = null;\r
 905     // TODO: remove the numStates attribute from ViterbiPoint, as this is only statePaths.length\r
 906         int numStates = 0;\r
 907         int numPaths = 0;\r
 908         ViterbiCandidate cands = null;\r
 909         ViterbiPath paths = null;\r
 910         ViterbiPath[] statePaths = null;\r
 911         ViterbiPoint next = null;\r
 912 \r
 913         /**\r
 914          * Creates a ViterbiPoint for the given item. A typical item of choice is a Segment item.\r
 915          *\r
 916          * @param item the item of interest\r
 917          */\r
 918         public ViterbiPoint(Item item) {\r
 919             this.item = item;\r
 920         }\r
 921 \r
 922         /**\r
 923          * Initialize the path array to the given size.\r
 924          *\r
 925          * @param size the size of the path array\r
 926          */\r
 927         public void initPathArray(int size) {\r
 928             if (LOGGER.isLoggable(Level.FINE)) {\r
 929                 LOGGER.fine("init_path_array: " + size);\r
 930             }\r
 931             numStates = size;\r
 932             statePaths = new ViterbiPath[size];\r
 933         }\r
 934 \r
 935         /**\r
 936          * Initializes the dynamic path array. The path array will have\r
 937      * as many ViterbiPath members as there are candidates in the\r
 938      * queue starting with candidate.\r
 939      * Side effect on parameter: This will set the pos member of the\r
 940      * candidates in the queue starting with candidate to the position\r
 941      * in the queue.  \r
 942          *\r
 943          * @param candidate the first candidate of interest\r
 944          */\r
 945         public void initDynamicPathArray(ViterbiCandidate candidate) {\r
 946             int i = 0;\r
 947             for (ViterbiCandidate cc = candidate; cc != null; \r
 948                                                 i++, cc = cc.next) {\r
 949                 cc.pos = i;\r
 950             }\r
 951             if (LOGGER.isLoggable(Level.FINE)) {\r
 952                 LOGGER.fine("init_dynamic_ path_array: " + i);\r
 953             }\r
 954             initPathArray(i);\r
 955         }\r
 956 \r
 957         public String toString() {\r
 958             return " pnt: " + numStates + " paths " + numPaths;\r
 959         }\r
 960     }\r
 961 \r
 962     /**\r
 963      * Represents a candidate for the Viterbi algorthm.\r
 964      * Each candidate knows about its next candidate, i.e. they can form\r
 965      * a queue.\r
 966      */\r
 967     static class ViterbiCandidate {\r
 968         int score = 0;\r
 969         Object value = null;\r
 970         int ival = 0;\r
 971         int pos = 0;\r
 972         Item item = null;\r
 973         ViterbiCandidate next = null;\r
 974 \r
 975         /**\r
 976          * Sets the object for this candidate.\r
 977          * \r
 978          * @param obj the object\r
 979          */\r
 980         void set(Object obj) {\r
 981             value = obj;\r
 982         }\r
 983 \r
 984         /**\r
 985          * Sets the integer value  for this candidate. This can be used for saving\r
 986      * the unit index of the candidate unit represented by this ViterbiCandidate.\r
 987          * \r
 988          * @param ival the integer value\r
 989          */\r
 990         void setInt(int ival) {\r
 991             this.ival = ival;\r
 992             set(new Integer(ival));\r
 993         }\r
 994 \r
 995         /**\r
 996          * Converts this object to a string.\r
 997          *\r
 998          * @return the string form of this object\r
 999          */\r
1000         public String toString() {\r
1001             return "VC: Score " + score + " ival " + ival + " Pos " + pos;\r
1002         }\r
1003     }\r
1004 \r
1005     /**\r
1006      * Describes a Viterbi path.\r
1007      */\r
1008     static class ViterbiPath {\r
1009         int score = 0;\r
1010         int state = 0;\r
1011         ViterbiCandidate candidate = null;\r
1012         private FeatureSet f = null;\r
1013         ViterbiPath from = null;\r
1014         ViterbiPath next = null;\r
1015 \r
1016         /**\r
1017          * Sets a feature with the given name to the given value.\r
1018          *\r
1019          * @param name the name of the feature\r
1020          * @param value the new value for the feature\r
1021          */\r
1022         void setFeature(String name, Object value) {\r
1023             if (f == null) {\r
1024                 f = new FeatureSetImpl();\r
1025             }\r
1026             f.setObject(name, value);\r
1027         }\r
1028 \r
1029         /**\r
1030          * Retrieves a feature.\r
1031          *\r
1032          * @param name the name of the feature\r
1033          * \r
1034          * @return the feature\r
1035          */\r
1036         Object getFeature(String name) {\r
1037             Object value = null;\r
1038             if (f != null) {\r
1039                 value = f.getObject(name);\r
1040             }\r
1041             return value;\r
1042         }\r
1043 \r
1044         /**\r
1045          * Determines if the feature with the given name\r
1046          * exsists.\r
1047          *\r
1048          * @param name the feature to look for\r
1049          *\r
1050          * @return <code>true</code> if the feature is present;\r
1051          *      otherwise <code>false</code>.\r
1052          */\r
1053         boolean isPresent(String name) {\r
1054             if (f == null) {\r
1055                 return false;\r
1056             } else {\r
1057                 return getFeature(name) != null;\r
1058             }\r
1059         }\r
1060 \r
1061         /**\r
1062          * Converts this object to a string.\r
1063          *\r
1064          * @return the string form of this object\r
1065          */\r
1066         public String toString() {\r
1067             return "ViterbiPath score " + score + " state " + state;\r
1068         }\r
1069     }\r
1070 }\r
1071 \r
1072 \r
1073 /**\r
1074  * Information returned from getOptimalCoupling.\r
1075  */\r
1076 class Cost {\r
1077     int cost = 0;\r
1078     int u0Move = -1;\r
1079     int u1Move = -1;\r
1080 }\r
1081 \r
1082 \r
1083 /**\r
1084  * A Cluster Unit.\r
1085  */\r
1086 class ClusterUnit implements com.sun.speech.freetts.Unit {\r
1087 \r
1088     private ClusterUnitDatabase db;\r
1089     private String name;\r
1090     private int start;\r
1091     private int end;\r
1092 \r
1093     /**\r
1094      * Contructs a cluster unit given.\r
1095      *\r
1096      * @param db the database\r
1097      * @param name the unitName\r
1098      * @param start the start\r
1099      * @param end the end\r
1100      */\r
1101     public ClusterUnit(ClusterUnitDatabase db, String name, int start,int end) {\r
1102         this.db = db;\r
1103         this.start = start;\r
1104         this.end = end;\r
1105         this.name = name;\r
1106     }\r
1107 \r
1108 \r
1109     /**\r
1110      * Returns the start.\r
1111      *\r
1112      * @return the start\r
1113      */\r
1114     public int getStart() {\r
1115         return start;\r
1116     }\r
1117 \r
1118     /**\r
1119      * Returns the end.\r
1120      *\r
1121      * @return the end\r
1122      */\r
1123     public int getEnd() {\r
1124         return end;\r
1125     }\r
1126 \r
1127     /**\r
1128      * Returns the name of this Unit.\r
1129      *\r
1130      * @return the name of this unit\r
1131      */\r
1132     public String getName() {\r
1133         return name;\r
1134     }\r
1135 \r
1136     /**\r
1137      * returns the size of the unit.\r
1138      *\r
1139      * @return the size of the unit\r
1140      */\r
1141     public int getSize() {\r
1142         return db.getSts().getUnitSize(start, end);\r
1143     }\r
1144 \r
1145     /**\r
1146      * Retrieves the nearest sample.\r
1147      *\r
1148      * @param index the ideal index\r
1149      *\r
1150      * @return the nearest Sample\r
1151      */\r
1152     public Sample getNearestSample(float index) {\r
1153         int i, iSize = 0, nSize;\r
1154         SampleSet sts = db.getSts();\r
1155 \r
1156         // loop through all the Samples in this unit\r
1157         for (i = start; i < end; i++) {\r
1158             Sample sample = sts.getSample(i);\r
1159             nSize = iSize + sample.getResidualSize();\r
1160 \r
1161             if (Math.abs(index - (float) iSize) <\r
1162                 Math.abs(index - (float) nSize)) {\r
1163                 return sample;\r
1164             }\r
1165             iSize = nSize;\r
1166         }\r
1167         return sts.getSample(end - 1);\r
1168     }\r
1169 \r
1170     /**\r
1171      * gets the string name for the unit.\r
1172      *\r
1173      * @return string rep of this object.\r
1174      */\r
1175     public String toString() {\r
1176         return getName();\r
1177     }\r
1178 \r
1179 \r
1180     /** \r
1181      * Dumps this unit.\r
1182      */\r
1183     public void dump()  {\r
1184     }\r
1185 }\r
1186 \r