2 * Portions Copyright 2001 Sun Microsystems, Inc.
3 * Portions Copyright 1999-2001 Language Technologies Institute,
4 * Carnegie Mellon University.
5 * All Rights Reserved. Use is subject to license terms.
7 * See the file "license.terms" for information on usage and
8 * redistribution of this file, and for a DISCLAIMER OF ALL
11 package com.sun.speech.freetts.relp;
13 import java.io.BufferedWriter;
14 import java.io.OutputStreamWriter;
15 import java.io.PrintWriter;
16 import java.io.Writer;
17 import java.io.FileWriter;
18 import java.io.IOException;
19 import java.text.DecimalFormat;
20 import javax.sound.sampled.AudioFormat;
21 import com.sun.speech.freetts.Utterance;
22 import com.sun.speech.freetts.FreeTTSSpeakable;
23 import com.sun.speech.freetts.audio.AudioPlayer;
24 import com.sun.speech.freetts.util.WaveUtils;
25 import com.sun.speech.freetts.util.Utilities;
29 * Contains the result of linear predictive coding processing.
32 public class LPCResult {
34 private static final double POST_EMPHASIS = 0.0;
36 private int frameSize = 10;
37 private int numberOfFrames = 0;
39 private short[][] frames = null;
40 private int[] times = null;
41 private int[] sizes = null;
44 * this is a normalized version of the residuals; to normalize it,
47 private byte[] residuals = null;
49 private int numberOfChannels;
50 private int sampleRate;
51 private int residualFold;
53 private float lpcMinimum;
54 private float lpcRange;
56 private final static int MAX_SAMPLE_SIZE =
57 Utilities.getInteger("com.sun.speech.freetts.LpcResult.maxSamples",
61 * Given a residual, maps it using WaveUtils.ulawToShort() to a float.
63 private final static float[] residualToFloatMap = new float[256];
66 for (short i = 0; i < residualToFloatMap.length; i++) {
67 residualToFloatMap[i] = (float) WaveUtils.ulawToShort(i);
69 residualToFloatMap[128] = (float) WaveUtils.ulawToShort((short) 255);
78 * Resets the number of frames in this LPCResult.
80 * @param numberOfFrames the number of frames in this LPC result
82 public void resizeFrames(int numberOfFrames) {
83 times = new int[numberOfFrames];
84 frames = new short[numberOfFrames][];
85 sizes = new int[numberOfFrames];
86 this.numberOfFrames = numberOfFrames;
90 * Resets the number of residuals, and initialize all of them to 255
91 * (which is 0 for mulaw).
93 * @param numberOfSamples the number of samples in this LPC result
95 public void resizeResiduals(int numberOfSamples) {
96 residuals = new byte[numberOfSamples];
100 * A convenience method for setting the LPC values.
102 * @param numberOfChannels the number of channels
103 * @param sampleRate the sample rate
104 * @param lpcMin the LPC minimum
105 * @param lpcRange the LPC range
107 public void setValues(int numberOfChannels,
110 float lpcMin, float lpcRange) {
111 this.numberOfChannels = numberOfChannels;
112 this.sampleRate = sampleRate;
113 this.lpcMinimum = lpcMin;
114 this.lpcRange = lpcRange;
118 * Returns the time difference of the frame at the given position
119 * with the frame prior to that. If the frame at the given position is
120 * the first frame (position 0), the time of that frame is returned.
122 * @param frameIndex the position of the frame
124 * @return the time difference of the frame at the given position
125 * with the frame prior to that
127 public int getFrameShift(int frameIndex) {
128 if (0 <= frameIndex && frameIndex < times.length) {
129 if (frameIndex > 0) {
130 return times[frameIndex] - times[frameIndex - 1];
132 return times[frameIndex];
140 * Returns the sizes of frames in this LPC.
142 * @return the sizes of frames
144 public int getFrameSize() {
149 * Returns the frame at the given index.
151 * @param index the index of interest
153 * @return the frame at the given index
155 public short[] getFrame(int index) {
156 return frames[index];
160 * Returns the array of times.
162 * @return the array of times
164 public int[] getTimes() {
169 * Returns the number of frames in this LPCResult.
171 * @return the number of frames
173 public int getNumberOfFrames() {
174 return numberOfFrames;
178 * Returns the number of channels in this LPCResult.
180 * @return the number of channels
182 public int getNumberOfChannels() {
183 return numberOfChannels;
187 * Returns the LPC minimum.
189 * @return the LPC minimum
191 public float getLPCMin() {
196 * Returns the LPC range.
198 * @return the LPC range
200 public float getLPCRange() {
205 * Returns the number of samples in this LPC result
207 * @return the number of samples
209 public int getNumberOfSamples() {
210 if (residuals == null) {
213 return residuals.length;
218 * Returns the sample rate.
220 * @return the sample rate
222 public int getSampleRate() {
227 * Returns the array of residuals sizes.
229 * @return the array of residuals sizes
231 public int[] getResidualSizes() {
236 * Returns the array of residuals.
238 * @return the array of residuals
240 public byte[] getResiduals() {
245 * Sets the sizes of frames in this LPC to the given size.
247 * @param frameSize the new frame size
249 public void setFrameSize(int frameSize) {
250 this.frameSize = frameSize;
254 * Sets the number of frames in this LPC Result.
256 * @param numberFrames the number of frames in this result
258 public void setNumberOfFrames(int numberFrames) {
259 this.numberOfFrames = numberFrames;
263 * Sets the frame at the given index.
265 * @param index the position of the frame to set
266 * @param newFrames new frame data
268 public void setFrame(int index, short[] newFrames) {
269 frames[index] = newFrames;
273 * Sets the array of times.
275 * @param times the times data
277 public void setTimes(int[] times) {
282 * Sets the number of channels.
284 * @param numberOfChannels the number of channels
286 public void setNumberOfChannels(int numberOfChannels) {
287 this.numberOfChannels = numberOfChannels;
291 * Sets the LPC minimum.
293 * @param min the LPC minimum
295 public void setLPCMin(float min) {
296 this.lpcMinimum = min;
300 * Sets the LPC range.
302 * @param range the LPC range
304 public void setLPCRange(float range) {
305 this.lpcRange = range;
309 * Sets the sample rate.
311 * @param rate the sample rate
313 public void setSampleRate(int rate) {
314 this.sampleRate = rate;
318 * Sets the array of residual sizes.
320 * @param sizes the new residual sizes
322 public void setResidualSizes(int[] sizes) {
323 for (int i = 0; i < this.sizes.length && i < sizes.length; i++) {
324 this.sizes[i] = sizes[i];
329 * Copies the information in the given unit to the array of residuals,
330 * starting at the given index, up until targetSize chars.
332 * @param source the unit that holds the information source
333 * @param targetPosition start position in the array of residuals
334 * @param targetSize the maximum number of characters to copy
336 public void copyResiduals(byte[] source,
339 int unitSize = source.length;
340 if (unitSize < targetSize) {
341 int targetStart = (targetSize - unitSize)/2;
342 System.arraycopy(source, 0,
343 residuals, targetPosition + targetStart,
346 int sourcePosition = (unitSize - targetSize)/2;
347 System.arraycopy(source, sourcePosition,
348 residuals, targetPosition,
354 * Copies the residual puse in the given unit to the array of residuals,
355 * starting at the given index, up until targetSize chars.
357 * @param source the unit that holds the information source
358 * @param targetPosition start position in the array of residuals
359 * @param targetSize the maximum number of characters to copy
361 public void copyResidualsPulse(byte[] source,
362 int targetPosition, int targetSize) {
363 int unitSize = source.length;
364 short sample = (short) (source[0] + 128);
365 if (unitSize < targetSize) {
366 residuals[(targetSize-unitSize)/2] = WaveUtils.shortToUlaw(sample);
368 residuals[(unitSize-targetSize)/2] = WaveUtils.shortToUlaw(sample);
373 * Given a 16 bit value (represented as an int), extract
374 * the high eight bits and return them
376 * @param val the 16 bit value
378 * @return the high eight bits
380 private final static byte hibyte(int val) {
381 return (byte) (val >>> 8);
385 * Given a 16 bit value (represented as an int), extract
386 * the low eight bits and return them
388 * @param val the 16 bit value
390 * @return the low eight bits
392 private final static byte lobyte(int val) {
393 return (byte) (val & 0x000000FF);
398 * Synthesize a Wave from this LPCResult
402 public boolean playWave(AudioPlayer player, Utterance utterance) {
403 return playWaveSamples(player, utterance.getSpeakable(),
404 getNumberOfSamples() * 2);
408 public byte[] getWaveSamples()
410 return getWaveSamples(2*getNumberOfSamples(), null);
414 * get the samples for this utterance
416 * @param numberSamples the number of samples desirred
417 * @param utterance the utterance
419 * [[[ TODO: well there is a bunch of duplicated code here ..
420 * these should be combined into one routine.
423 private byte[] getWaveSamples(int numberSamples,
424 Utterance utterance) {
425 int numberChannels = getNumberOfChannels();
429 byte[] samples = new byte[numberSamples];
430 byte[] residuals = getResiduals();
431 int[] residualSizes = getResidualSizes();
433 FloatList outBuffer = FloatList.createList(numberChannels + 1);
434 FloatList lpcCoefficients = FloatList.createList(numberChannels);
436 double multiplier = (double) getLPCRange() / 65535.0;
439 // for each frame in the LPC result
440 for (int r = 0, i = 0; i < numberOfFrames; i++) {
442 // unpack the LPC coefficients
443 short[] frame = getFrame(i);
445 FloatList lpcCoeffs = lpcCoefficients;
446 for (int k = 0; k < numberChannels; k++) {
447 lpcCoeffs.value = (float) ( (frame[k] + 32768.0)
448 * multiplier) + lpcMinimum;
449 lpcCoeffs = lpcCoeffs.next;
452 pmSizeSamples = residualSizes[i];
454 // resynthesis the signal, pmSizeSamples ~= 90
455 // what's in the loop is done for each residual
456 for (int j = 0; j < pmSizeSamples; j++, r++) {
458 FloatList backBuffer = outBuffer.prev;
459 float ob = residualToFloatMap[residuals[r] + 128];
461 lpcCoeffs = lpcCoefficients;
463 ob += lpcCoeffs.value * backBuffer.value;
464 backBuffer = backBuffer.prev;
465 lpcCoeffs = lpcCoeffs.next;
466 } while (lpcCoeffs != lpcCoefficients);
468 int sample = (int) (ob + (pp * POST_EMPHASIS));
469 samples[s++] = (byte) hibyte(sample);
470 samples[s++] = (byte) lobyte(sample);
473 outBuffer.value = pp = ob;
474 outBuffer = outBuffer.next;
481 * Play the sample data on the given player
483 * @param player where to send the audio
484 * @param numberSamples the number of samples
486 private boolean playWaveSamples(AudioPlayer player,
487 FreeTTSSpeakable speakable,
490 int numberChannels = getNumberOfChannels();
494 byte[] samples = new byte[MAX_SAMPLE_SIZE];
495 byte[] residuals = getResiduals();
496 int[] residualSizes = getResidualSizes();
498 FloatList outBuffer = FloatList.createList(numberChannels + 1);
499 FloatList lpcCoefficients = FloatList.createList(numberChannels);
501 double multiplier = (double) getLPCRange() / 65535.0;
504 // for each frame in the LPC result
505 player.begin(numberSamples);
506 for (int r = 0, i = 0;
507 (ok &= !speakable.isCompleted()) &&
508 i < numberOfFrames; i++) {
510 // unpack the LPC coefficients
511 short[] frame = getFrame(i);
513 FloatList lpcCoeffs = lpcCoefficients;
514 for (int k = 0; k < numberChannels; k++) {
515 lpcCoeffs.value = (float) ( (frame[k] + 32768.0)
516 * multiplier) + lpcMinimum;
517 lpcCoeffs = lpcCoeffs.next;
520 pmSizeSamples = residualSizes[i];
522 // resynthesis the signal, pmSizeSamples ~= 90
523 // what's in the loop is done for each residual
524 for (int j = 0; j < pmSizeSamples; j++, r++) {
526 FloatList backBuffer = outBuffer.prev;
527 float ob = residualToFloatMap[residuals[r] + 128];
529 lpcCoeffs = lpcCoefficients;
531 ob += lpcCoeffs.value * backBuffer.value;
532 backBuffer = backBuffer.prev;
533 lpcCoeffs = lpcCoeffs.next;
534 } while (lpcCoeffs != lpcCoefficients);
536 int sample = (int) (ob + (pp * POST_EMPHASIS));
537 samples[s++] = hibyte(sample);
538 samples[s++] = lobyte(sample);
540 if (s >= MAX_SAMPLE_SIZE) {
541 if ((ok &= !speakable.isCompleted()) &&
542 !player.write(samples)) {
548 outBuffer.value = pp = ob;
549 outBuffer = outBuffer.next;
553 // write out the very last samples
554 if ((ok &= !speakable.isCompleted()) && s > 0) {
555 ok = player.write(samples, 0, s);
559 // tell the AudioPlayer it is the end of Utterance
560 if (ok &= !speakable.isCompleted()) {
568 * Dumps this LPCResult to standard out
571 dump(new OutputStreamWriter(System.out));
575 * Dumps this LPCResult to the given stream.
577 * @param writer the output stream
579 public void dump(Writer writer) {
580 DecimalFormat numberFormat = new DecimalFormat();
581 numberFormat.setMaximumFractionDigits(6);
582 numberFormat.setMinimumFractionDigits(6);
583 PrintWriter pw = new PrintWriter(new BufferedWriter(writer));
585 if (getNumberOfFrames() == 0) {
586 pw.println("# ========== LPCResult ==========");
587 pw.println("# Num_of_Frames: " + getNumberOfFrames());
591 pw.println("========== LPCResult ==========");
592 pw.println("Num_of_Frames: " + getNumberOfFrames());
593 pw.println("Num_of_Channels: " + getNumberOfChannels());
594 pw.println("Num_of_Samples: " + getNumberOfSamples());
595 pw.println("Sample_Rate: " + sampleRate);
596 pw.println("LPC_Minimum: " + numberFormat.format(lpcMinimum));
597 pw.println("LPC_Range: " + numberFormat.format(lpcRange));
598 pw.println("Residual_Fold: " + residualFold);
599 pw.println("Post_Emphasis: " + numberFormat.format(POST_EMPHASIS));
602 pw.print("Times:\n");
603 for (i = 0; i < getNumberOfFrames(); i++) {
604 pw.print(times[i] + " ");
606 pw.print("\nFrames: ");
607 for (i = 0; i < getNumberOfFrames(); i++) {
608 // for each frame, print all elements
609 short[] frame = getFrame(i);
610 for (int j = 0; j < frame.length; j++) {
611 pw.print(( ((int) frame[j]) + 32768) + "\n");
614 pw.print("\nSizes: ");
615 for (i = 0; i < getNumberOfFrames(); i++) {
616 pw.print(sizes[i] + " ");
618 pw.print("\nResiduals: ");
619 for (i = 0; i < getNumberOfSamples(); i++) {
620 if (residuals[i] == 0) {
623 pw.print(( ((int) residuals[i]) + 128));
633 * Dumps the wave data associated with this result
635 public void dumpASCII() {
636 dumpASCII(new OutputStreamWriter(System.out));
640 * Dumps the wave data associated with this result
642 * @param path the path where the wave data is appended to
644 * @throws IOException if an IO error occurs
646 public void dumpASCII(String path) throws IOException {
647 Writer writer = new FileWriter(path, true);
648 getWave().dump(writer);
652 * Synthesize a Wave from this LPCResult
656 private Wave getWave() {
657 // construct a new wave object
658 AudioFormat audioFormat = new AudioFormat
660 Wave.DEFAULT_SAMPLE_SIZE_IN_BITS, 1,
661 Wave.DEFAULT_SIGNED, true);
662 return new Wave(audioFormat,
663 getWaveSamples( getNumberOfSamples() * 2, null));
667 * Dumps the wave out to the given stream
669 * @param writer the output stream
671 public void dumpASCII(Writer writer) {
672 Wave wave = getWave();
677 * A Wave is an immutable class that contains the AudioFormat and
678 * the actual wave samples, which currently is in the form
679 * of AudioInputStream.
681 private static class Wave {
683 * The default sample size of the Wave, which is 16.
685 public static final int DEFAULT_SAMPLE_SIZE_IN_BITS = 16;
688 * A boolean indicating that the Wave is signed, i.e.,
689 * this value is true.
691 public static final boolean DEFAULT_SIGNED = true;
694 * A boolean indicating that the Wave samples are represented as
695 * little endian, i.e., this value is false.
697 public static final boolean DEFAULT_BIG_ENDIAN = false;
700 private byte[] samples = null;
701 private AudioFormat audioFormat = null;
704 * Constructs a Wave with the given audio format and wave samples.
706 * @param audioFormat the audio format of the wave
707 * @param samples the wave samples
709 Wave(AudioFormat audioFormat, byte[] samples) {
710 this.audioFormat = audioFormat;
711 this.samples = samples;
716 * Dumps the wave out to the given stream
717 * @param writer the output stream
719 public void dump(Writer writer) {
720 PrintWriter pw = new PrintWriter(new BufferedWriter(writer));
721 pw.println("#========== Wave ==========");
722 pw.println("#Type: NULL");
723 pw.println("#Sample_Rate: " + (int)audioFormat.getSampleRate());
724 pw.println("#Num_of_Samples: " + samples.length / 2);
725 pw.println("#Num_of_Channels: " + audioFormat.getChannels());
726 if (samples != null) {
727 for (int i = 0; i < samples.length; i+=2) {
729 WaveUtils.bytesToShort(samples[i], samples[i+1]));
740 * FloatList is used to maintain a circular buffer of float values.
741 * It is essentially an index-free array of floats that can easily be
742 * iterated through forwards or backwards. Keeping values in an index
743 * free list like this eliminates index bounds checking which can
761 * Creates a circular list of nodes of the given size
763 * @param size the number of nodes in the list
765 * @return an entry in the list.
767 static FloatList createList(int size) {
768 FloatList prev = null;
769 FloatList first = null;
771 for (int i = 0; i < size; i++) {
772 FloatList cur = new FloatList();
788 * prints out the contents of this list
790 * @param title the title of the dump
791 * @param list the list to dump
793 static void dump(String title, FloatList list) {
794 System.out.println(title);
796 FloatList cur = list;
798 System.out.println("Item: " + cur.value);
800 } while (cur != list);