2 * Portions Copyright 2001 Sun Microsystems, Inc.
3 * Portions Copyright 1999-2001 Language Technologies Institute,
4 * Carnegie Mellon University.
5 * All Rights Reserved. Use is subject to license terms.
7 * See the file "license.terms" for information on usage and
8 * redistribution of this file, and for a DISCLAIMER OF ALL
11 package com.sun.speech.freetts.en;
13 import com.sun.speech.freetts.UtteranceProcessor;
14 import com.sun.speech.freetts.Voice;
15 import com.sun.speech.freetts.Relation;
16 import com.sun.speech.freetts.Utterance;
17 import com.sun.speech.freetts.ProcessException;
18 import com.sun.speech.freetts.PathExtractorImpl;
19 import com.sun.speech.freetts.PathExtractor;
20 import com.sun.speech.freetts.Item;
21 import java.util.StringTokenizer;
22 import java.util.NoSuchElementException;
23 import java.util.List;
24 import java.util.ArrayList;
25 import java.io.BufferedReader;
26 import java.io.InputStreamReader;
27 import java.io.IOException;
32 * Calculates the F0 curve for an utterance based on the Black and
33 * Hunt article "Generating F0 Contours from ToBI Labels Using Linear
34 * Regression," ICSLP96, vol. 3, pp 1385-1388, Philadelphia,
37 public class ContourGenerator implements UtteranceProcessor {
38 private final static PathExtractor endPath =
39 new PathExtractorImpl("R:SylStructure.daughter.R:Segment.p.end",
41 private final static PathExtractor lastDaughterEndPath =
42 new PathExtractorImpl("R:SylStructure.daughtern.end",
44 private final static PathExtractor postBreakPath =
45 new PathExtractorImpl("R:SylStructure.daughter.R:Segment.p.name",
47 private final static PathExtractor preBreakPath =
48 new PathExtractorImpl("R:SylStructure.daughtern.R:Segment.n.name",
50 private final static PathExtractor vowelMidPath =
51 new PathExtractorImpl("R:Segment.p.end",
53 private final static PathExtractor localF0Shift =
54 new PathExtractorImpl(
55 "R:SylStructure.parent.R:Token.parent.local_f0_shift", true);
56 private final static PathExtractor localF0Range =
57 new PathExtractorImpl(
58 "R:SylStructure.parent.R:Token.parent.local_f0_range", true);
60 private final float modelMean;
61 private final float modelStddev;
62 private F0ModelTerm[] terms = { null };
65 * Creates a ContourGenerator utterance processor.
67 * @param url source of the data
68 * @param modelMean the average frequency
69 * @param modelStddev the std deviation of the frequency
71 * @throws IOException if an error occurs while loading data
73 public ContourGenerator(URL url,
74 float modelMean, float modelStddev)
76 this.modelMean = modelMean;
77 this.modelStddev = modelStddev;
79 List termsList = new ArrayList();
82 BufferedReader reader = new BufferedReader(
83 new InputStreamReader(url.openStream()));
84 line = reader.readLine();
85 while (line != null) {
86 if (!line.startsWith("***")) {
87 parseAndAdd(termsList, line);
89 line = reader.readLine();
91 terms = (F0ModelTerm[]) termsList.toArray(terms);
96 * Generates the F0 contour for the utterance.
98 * @param utterance the utterance to process
100 * @throws ProcessException if an <code>IOException</code> is
101 * thrown during the processing of the utterance
103 public void processUtterance(Utterance utterance) throws ProcessException {
111 mean = utterance.getVoice().getPitch();
112 mean *= utterance.getVoice().getPitchShift();
113 stddev = utterance.getVoice().getPitchRange();
115 Relation target = utterance.createRelation(Relation.TARGET);
117 utterance.getRelation(Relation.SYLLABLE).getHead();
119 syllable = syllable.getNext()) {
121 if (syllable.getItemAs(Relation.SYLLABLE_STRUCTURE).hasDaughters()) {
123 tval = localF0Shift.findFeature(syllable);
124 localMean = Float.parseFloat(tval.toString());
126 if (localMean == 0.0) {
132 tval = localF0Range.findFeature(syllable);
133 localStddev = Float.parseFloat(tval.toString());
135 if (localStddev == 0.0) {
136 localStddev = stddev;
139 Interceptor interceptor = applyLrModel(syllable);
140 if (isPostBreak(syllable)) {
141 lend = mapF0(interceptor.start, localMean, localStddev);
144 Float val = (Float) endPath.findFeature(syllable);
145 // assert val != null;
146 // don't mind null ptr exception
147 addTargetPoint(target, val.floatValue(),
148 mapF0((interceptor.start + lend) / 2.0f,
149 localMean, localStddev));
150 addTargetPoint(target, vowelMid(syllable),
151 mapF0(interceptor.mid, localMean, localStddev));
152 lend = mapF0(interceptor.end, localMean, localStddev);
153 if (isPreBreak(syllable)) {
154 Float eval = (Float) lastDaughterEndPath.findFeature(
156 addTargetPoint(target, eval.floatValue(),
157 mapF0(interceptor.end, localMean, localStddev));
162 if (utterance.getRelation(Relation.SEGMENT).getHead() != null) {
163 Item first = target.getHead();
165 addTargetPoint(target, 0, mean);
166 } else if (first.getFeatures().getFloat("pos") > 0) {
167 Item newItem = first.prependItem(null);
168 newItem.getFeatures().setFloat("pos", 0.0f);
169 newItem.getFeatures().setFloat(
170 "f0", first.getFeatures().getFloat("f0"));
172 Item last = (Item) target.getTail();
174 = utterance.getRelation(Relation.SEGMENT).getTail();
177 if (lastSegment != null) {
178 segEnd = lastSegment.getFeatures().getFloat("end");
181 if (last.getFeatures().getFloat("pos") < segEnd) {
182 addTargetPoint(target, segEnd, last.getFeatures().
189 * Applies the linear regression model.
191 * @param syllable the syllable to process
193 * @return the 3 points for the syllable as an <code>Interceptor</code>
195 private Interceptor applyLrModel(Item syllable) {
197 Interceptor interceptor = new Interceptor();
198 interceptor.start = terms[0].start;
199 interceptor.mid = terms[0].mid;
200 interceptor.end = terms[0].end;
202 for (int i = 1; i < terms.length; i++) {
203 Object value = terms[i].findFeature(syllable);
204 if (terms[i].type != null) {
205 if (value.toString().equals(terms[i].type)) {
211 fv = Float.parseFloat(value.toString());
214 interceptor.start += fv * terms[i].start;
215 interceptor.mid += fv * terms[i].mid;
216 interceptor.end += fv * terms[i].end;
223 * Returns the time point mid way in vowel in this syllable.
225 * @param syllable the syllable of interest
227 * @return the time point mid way in vowel in this syllable
229 private final float vowelMid(Item syllable) {
230 Voice voice = syllable.getUtterance().getVoice();
231 Item firstSeg = syllable.getItemAs(
232 Relation.SYLLABLE_STRUCTURE).getDaughter();
236 for (segment = firstSeg; segment != null; segment =segment.getNext()) {
237 // TODO refactor phone feature stuff like this so that
238 // it can be understood.
239 if ("+".equals(voice.getPhoneFeature(segment.toString(), "vc"))) {
240 val = (segment.getFeatures().getFloat("end") +
241 ((Float) vowelMidPath.findFeature(segment)).floatValue()) / 2.0f;
246 if (firstSeg == null) {
249 val = (firstSeg.getFeatures().getFloat("end") +
250 ((Float) vowelMidPath.findFeature(firstSeg)).floatValue())
258 * Adds the target point at the given time to the given frequency
259 * to the given relation.
261 * @param target the target of interest
262 * @param pos the time
263 * @param f0 the frequency
265 private void addTargetPoint(Relation target, float pos, float f0) {
266 Item item = target.appendItem();
267 item.getFeatures().setFloat("pos", pos);
269 item.getFeatures().setFloat("f0", 500.0f);
270 } else if (f0 < 50.0) {
271 item.getFeatures().setFloat("f0", 50.0f);
273 item.getFeatures().setFloat("f0", f0);
278 * Determines if this syllable is following a break.
280 * @param syllable the syllable to check
282 * @return <code>true</code> if this syllable is following a
283 * break; otherwise <code>false</code>.
285 private final boolean isPostBreak(Item syllable) {
286 return ((syllable.getPrevious() == null) ||
287 "pau".equals(postBreakPath.findFeature(syllable)));
291 * Determines if this syllable is before a break.
293 * @param syllable the syllable to check
295 * @return <code>true</code> if this syllable is before a
296 * break; otherwise <code>false</code>.
298 private final boolean isPreBreak(Item syllable) {
299 return ((syllable.getNext() == null) ||
300 "pau".equals(preBreakPath.findFeature(syllable)));
304 * Maps the given value to the curve.
306 * @param val the value to map
308 * @return the mapped value
310 private final float mapF0(float val, float mean, float stddev) {
311 return ((((val - modelMean)/ modelStddev) * stddev) + mean);
315 * Parses the line into an F0ModelTerm.
317 * @param list resulting F0ModelTerm is added to this list
318 * @param line the string to parse
320 protected void parseAndAdd(List list, String line) {
322 StringTokenizer tokenizer = new StringTokenizer(line," ");
323 String feature = tokenizer.nextToken();
324 float start = Float.parseFloat(tokenizer.nextToken());
325 float mid = Float.parseFloat(tokenizer.nextToken());
326 float end = Float.parseFloat(tokenizer.nextToken());
327 String type = tokenizer.nextToken();
329 if (type.equals("null")) {
333 list.add(new F0ModelTerm(feature, start, mid, end, type));
334 } catch (NoSuchElementException nsee) {
335 throw new Error("ContourGenerator: Error while parsing F0ModelTerm "
336 + nsee.getMessage());
337 } catch (NumberFormatException nfe) {
338 throw new Error("ContourGenerator: Bad float format "
344 * Returns the string representation of the object.
346 * @return the string representation of the object
348 public String toString() {
349 return "ContourGenerator";
354 * Represents a single term for the F0 model
364 * Constructs an F0ModelTerm.
366 * @param feature the feature of the term
367 * @param start the starting point of the term
368 * @param mid the mid-point of the term
369 * @param end the end point of the term
370 * @param type the type of the term
372 F0ModelTerm(String feature, float start, float mid,
373 float end, String type) {
374 path = new PathExtractorImpl(feature, true);
382 * Find the feature associated with the given item
384 * @param item the item of interest
386 * @return the object representing the feature.
388 public Object findFeature(Item item) {
389 return path.findFeature(item);
393 * Returns the string representation of the object
395 * @return the string representation of the object
397 public String toString() {
398 return path.toString();
403 * Represents an interceptor.
411 * Constructs the default interceptor
420 * Returns the string representation of the object.
422 * @return the string representation of the object
424 public String toString() {
425 return Float.toString(start) + " " +
426 Float.toString(mid) + " " +