2 * Portions Copyright 2003 Sun Microsystems, Inc.
3 * Portions Copyright 1999-2003 Language Technologies Institute,
4 * Carnegie Mellon University.
5 * All Rights Reserved. Use is subject to license terms.
7 * See the file "license.terms" for information on usage and
8 * redistribution of this file, and for a DISCLAIMER OF ALL
13 import java.io.FileInputStream;
14 import java.io.DataInputStream;
15 import java.io.OutputStreamWriter;
16 import java.io.FileOutputStream;
17 import java.io.IOException;
18 import java.io.FileNotFoundException;
20 import javax.sound.sampled.spi.AudioFileReader;
21 import javax.sound.sampled.AudioInputStream;
25 * Performs the generation of STS files in FestVox to FreeTTS
29 * This program is a port from flite/tools/find_sts_main.c
34 * The a/b diff result is slightly different than the C version due to
35 * Intel floating-point math.
38 public strictfp class FindSTS {
40 * Generate an sts file
46 * <code> java FindSTS lpc_min lpc_range lpcFile waveFile stsFile
50 * <code> stsFile </code> the output file.
53 public static void main(String[] args) {
55 if (args.length != 5) {
56 System.err.println("Usage: java FindSTS lpc_min lpc_range "
57 + "lpcFile waveFile stsFile");
59 float lpc_min = Float.parseFloat(args[0]);
60 float lpc_range = Float.parseFloat(args[1]);
61 FileInputStream lpcFile = new FileInputStream(args[2]);
62 FileInputStream waveFile = new FileInputStream(args[3]);
63 FileOutputStream stsFile = new FileOutputStream(args[4]);
66 LPC lpc = new LPC(new DataInputStream(lpcFile));
67 Wave wave = new Wave(new DataInputStream(waveFile));
72 STS[] stsData = findSTS(wave, lpc, lpc_min, lpc_range);
74 // Verify STS data for sanity
75 Wave reconstructedWave = new
76 Wave(wave.getSampleRate(), stsData, lpc,
78 wave.compare(reconstructedWave);
81 OutputStreamWriter stsWriter = new OutputStreamWriter(stsFile);
82 saveSTS(stsData, lpc, wave, stsWriter, lpc_min, lpc_range);
87 } catch (FileNotFoundException ioe) {
88 throw new Error("Error while running FindSTS" + ioe.getMessage());
89 } catch (IOException ioe) {
90 throw new Error("IO error while finding sts" + ioe.getMessage());
97 * @param wave the data from the wave file
98 * @param lpc the data from the lpc file
99 * @param lpc_min the minimum lpc value
100 * @param lpc_range the range of the lpc values
102 * @return an <code>STS</code> array containing the data
104 private static STS[] findSTS(Wave wave, LPC lpc, float lpc_min,
110 STS[] stsData = new STS[lpc.getNumFrames()];
112 // read wave data into a special array.
114 new short[wave.getNumSamples() + lpc.getNumChannels()];
115 System.arraycopy(wave.getSamples(), 0, waveData,
116 lpc.getNumChannels(), wave.getNumSamples());
118 for (int i = 0; i < lpc.getNumFrames(); i++) {
123 end = (int) ((float) wave.getSampleRate() * lpc.getTime(i));
126 System.out.println("frame size at "
127 + Float.toString(lpc.getTime(i)) + " is "
128 + Integer.toString(size) + ".");
131 residual = generateResiduals(waveData,
132 start + lpc.getNumChannels(), lpc.getFrame(i),
133 lpc.getNumChannels(), size);
135 frame = new int[lpc.getNumChannels() - 1];
136 for (int j = 1; j < lpc.getNumChannels(); j++) {
138 ((((lpc.getFrameEntry(i, j) - lpc_min) / lpc_range))
142 stsData[i] = new STS(frame, size, residual);
150 * Generate the residuals for this sts
152 * @param wave specially formatted wave data
153 * @param start offset into the wave data
154 * @param frame frame data from the lpc
155 * @param order typically the number of lpc channels
156 * @param size size of the residual
158 * @return sts residuals
160 private static short[] generateResiduals(short[] wave, int start,
161 float[] frame, int order, int size) {
163 short[] residual = new short[size];
164 for (int i = 0; i < order; i++) {
166 for (int j = 1; j < order; j++) {
167 r -= frame[j] * ((double) wave[start + (i - j)]);
169 residual[i] = Utility.shortToUlaw((short) r);
171 for (int i = order; i < size; i++) {
173 for (int j = 1; j < order; j++) {
174 r -= frame[j] * ((double) wave[start + (i - j)]);
176 residual[i] = Utility.shortToUlaw((short) r);
184 * @param stsData generated sts data
185 * @param lpc data loaded from the lpc file
186 * @param wave data loaded from the wave file
187 * @param osw the OutputStreamWriter to write the sts data to
188 * @param lpc_min minimum lpc value
189 * @param lpc_range range of lpc values
192 private static void saveSTS(STS[] stsData, LPC lpc, Wave wave,
193 OutputStreamWriter osw, float lpc_min, float lpc_range) {
195 osw.write("( " + Integer.toString(lpc.getNumFrames())
196 + " " + Integer.toString(lpc.getNumChannels() - 1)
197 + " " + Integer.toString(wave.getSampleRate())
198 + " " + Float.toString(lpc_min)
199 + " " + Float.toString(lpc_range)
201 for (int m=0, i=0; i < lpc.getNumFrames(); i++) {
202 osw.write("( " + Float.toString(lpc.getTime(i)) + " (");
204 // Use the following line instead to compare against
205 // the flite find_sts output
206 //osw.write("( " + Utility.hex(lpc.getTime(i)) + " (");
208 for (int j = 1; j < lpc.getNumChannels(); j++) {
210 Integer.toString(stsData[i].getFrameEntry(j - 1)));
213 + Integer.toString(stsData[i].getNumSamples())
215 for (int j = 0; j < stsData[i].getNumSamples(); j++) {
217 Integer.toString(stsData[i].getResidual(j)));
221 } catch (IOException ioe) {
222 throw new Error("IO error while writing sts." + ioe.getMessage());
233 private int numFrames;
234 private int numChannels;
238 /** Create lpc data from an input stream
240 * @param dis DataInputStream to read the lpc in from
243 public LPC(DataInputStream dis) {
245 if (!Utility.readWord(dis).equals("EST_File") ||
246 !Utility.readWord(dis).equals("Track")) {
247 throw new Error("Lpc file not EST Track file");
250 boolean isBinary = false;
251 boolean isBigEndian = false;
254 String token = Utility.readWord(dis);
255 while (!token.equals("EST_Header_End")) {
256 if (token.equals("DataType")) {
257 if (Utility.readWord(dis).equals("binary")) {
262 } else if (token.equals("ByteOrder")) {
263 if (Utility.readWord(dis).equals("10")) {
268 } else if (token.equals("NumFrames")) {
269 numFrames = Integer.parseInt(Utility.readWord(dis));
270 } else if (token.equals("NumChannels")) {
271 numChannels = Integer.parseInt(Utility.readWord(dis));
273 // Ignore all other content in header
275 token = Utility.readWord(dis);
278 times = new float[numFrames];
279 frames = new float[numFrames][numChannels];
283 loadBinaryData(dis, isBigEndian);
288 catch (IOException ioe) {
289 throw new Error("IO error while parsing lpc" + ioe.getMessage());
294 * load the data section of the lpc file as ascii text
296 * @param dis DataInputStream to read from
298 * @throws IOException on ill-formatted input
300 private void loadTextData(DataInputStream dis) throws IOException {
301 for (int f=0; f < numFrames; f++) {
302 times[f] = Float.parseFloat(Utility.readWord(dis));
303 Utility.readWord(dis); // can be only 1
304 for (int c=0; c < numChannels; c++) {
305 frames[f][c] = Float.parseFloat(Utility.readWord(dis));
311 * load the data section of the lpc file as ascii text
313 * @param dis DataInputStream to read from
314 * @param isBigEndian whether or not the data in the file is in
315 * big endian byte order
317 * @throws IOException on ill-formatted input
319 private void loadBinaryData(DataInputStream dis, boolean isBigEndian)
321 for (int f=0; f < numFrames; f++) {
322 times[f] = Utility.readFloat(dis, isBigEndian);
324 // Ignore the 'breaks' field
325 Utility.readFloat(dis, isBigEndian);
327 for (int c=0; c < numChannels; c++) {
328 frames[f][c] = Utility.readFloat(dis, isBigEndian);
334 * Get the number of frames in this lpc
336 * @return number of frames in this lpc
338 public int getNumFrames() {
343 * Get the number of channels in this lpc
345 * @return number of channels in this lpc
347 public int getNumChannels() {
352 * Get the times associated with this lpc
354 * @return an array of times associated with this lpc
356 public float[] getTimes() {
361 * Get an individual time associated with this lpc
363 * @param index index of time to get
365 * @return time value at given index
367 public float getTime(int index) {
372 * Get an individual frame
374 * @param i index of frame
378 public float[] getFrame(int i) {
383 * Get an individual frame entry
385 * @param i index of frame
386 * @param j index into frame
388 * @return the frame entry in frame <code>i</code> at index
391 public float getFrameEntry(int i, int j) {
398 * The wave (riff) data
401 private int numSamples;
402 private int sampleRate;
403 private short[] samples;
405 // Only really used in loading of data.
406 private int headerSize;
407 private int numBytes;
408 private int numChannels = 1; // Only support mono
410 static final short RIFF_FORMAT_PCM = 0x0001;
413 * Read in a wave from a riff format
415 * @param dis DataInputStream to read data from
417 public Wave (DataInputStream dis) {
420 if (dis.skipBytes(headerSize - 16) != (headerSize - 16)) {
421 throw new Error("Unexpected error parsing wave file.");
424 // Bunch of potential random headers
426 String s = new String(Utility.readChars(dis, 4));
428 if (s.equals("data")) {
429 numSamples = Utility.readInt(dis, false) / 2;
431 } else if (s.equals("fact")) {
432 int i = Utility.readInt(dis, false);
433 if (dis.skipBytes(i) != i) {
434 throw new Error("Unexpected error parsing wave file.");
437 throw new Error("Unsupported wave header chunk type " + s);
441 int dataLength = numSamples * numChannels;
442 samples = new short[numSamples];
444 for (int i = 0; i < dataLength; i++) {
445 samples[i] = Utility.readShort(dis, false);
448 } catch (IOException ioe) {
449 throw new Error("IO error while parsing wave" + ioe.getMessage());
456 * @param dis DataInputStream to read from
458 * @throws IOException on ill-formatted input
460 private void loadHeader(DataInputStream dis) throws IOException {
461 if (!checkChars(dis, "RIFF")) {
462 throw new Error("Invalid wave file format.");
464 numBytes = Utility.readInt(dis,false);
465 if (!checkChars(dis, "WAVEfmt ")) {
466 throw new Error("Invalid wave file format.");
469 headerSize = Utility.readInt(dis, false);
471 if (Utility.readShort(dis, false) != RIFF_FORMAT_PCM) {
472 throw new Error("Invalid wave file format.");
475 if (Utility.readShort(dis, false) != 1) {
476 throw new Error("Only mono wave files supported.");
479 sampleRate = Utility.readInt(dis, false);
480 Utility.readInt(dis, false);
481 Utility.readShort(dis, false);
482 Utility.readShort(dis, false);
486 * Reconstruct a wave from a wave, sts, and lpc
488 * @param sampleRate the sample rate to use
490 * @param lpc_min minimum lpc value
491 * @param lpc_range range of lpc values
493 public Wave(int sampleRate, STS[] stsData, LPC lpc, float lpc_min,
495 // set number of samples and sample rate
497 for (int i = 0; i < lpc.getNumFrames(); i++) {
498 numSamples += stsData[i].getNumSamples();
500 samples = new short[numSamples];
501 this.sampleRate = sampleRate;
505 int[] lpcResTimes = new int[lpc.getNumFrames()];
506 int[] lpcResSizes = new int[lpc.getNumFrames()];
507 short[] lpcResResidual = new short[numSamples];
508 int[][] lpcResFrames = new int[lpc.getNumFrames()][];
509 int lpcResNumChannels = lpc.getNumChannels() - 1;
512 for (int i = 0; i < lpc.getNumFrames(); i++) {
513 lpcResTimes[i] = (int) (lpc.getTime(i) * sampleRate);
514 lpcResFrames[i] = stsData[i].getFrame();
515 end = start + stsData[i].getNumSamples();
516 lpcResSizes[i] = stsData[i].getNumSamples();
520 for (int r = 0, i = 0; i < lpc.getNumFrames(); i++) {
521 for (int j = 0; j < stsData[i].getNumSamples(); j++, r++) {
522 lpcResResidual[r] = stsData[i].getResidual(j);
526 float[] lpcCoefs = new float[lpcResNumChannels];
527 float[] outbuf = new float[lpcResNumChannels + 1];
529 //float pp = 0; // the C code uses this unnecessarily (for now)
531 for (int r = 0, o = lpcResNumChannels, i = 0; i <
532 lpc.getNumFrames(); i++) {
533 // residual_fold is hard-coded to 1.
534 int pm_size_samps = lpcResSizes[i];// * residual_fold;
536 // Unpack the LPC coefficients
537 for (int k = 0; k < lpcResNumChannels; k++) {
538 lpcCoefs[k] = (float)
539 ((((double) lpcResFrames[i][k])/65535.0) * lpc_range)
544 // resynthesize the signal
545 for (int j = 0; j < pm_size_samps; j++, r++) {
547 Utility.ulawToShort(lpcResResidual[r/* /residual_fold */]);
549 cr = (o == 0 ? lpcResNumChannels : o-1);
550 for (ci = 0; ci < lpcResNumChannels; ci++) {
551 outbuf[o] += lpcCoefs[ci] * outbuf[cr];
552 cr = (cr == 0 ? lpcResNumChannels : cr - 1);
554 samples[r] = (short) (outbuf[o]
555 /* + pp * lpcres->post_emphasis)*/); // post_emphasis = 0
557 o = (o == lpcResNumChannels ? 0 : o+1);
563 * Compare two waves and output how close the two are.
564 * Useful for checking the general accuracy of find sts.
567 * Output may not exactly match that of flite find_sts
568 * on Intel platforms due to discrepencies in the way that
569 * Intel Pentiums perform floating point computations.
572 * @param the wave to compare this wave against
575 public void compare(Wave wave2) {
576 if (numSamples > wave2.numSamples) {
581 for (i = 0; i < this.numSamples; i++) {
582 r += (double)((float)this.samples[i] - (float)wave2.samples[i])
583 *(double)((float)this.samples[i] - (float)wave2.samples[i]);
585 r /= this.numSamples;
586 System.out.println("a/b diff " + Double.toString(StrictMath.sqrt(r)));
591 * Make sure that a string of characters appear next in the file
593 * @param dis DataInputStream to read in
594 * @param chars a String containing the ascii characters you
595 * want the <code>dis</code> to contain.
597 * @return <code>true</code> if <code>chars</code> appears next
598 * in <code>dis</code>, else <code>false</code>
599 * @throws on ill-formatted input (end of file, for example)
601 private boolean checkChars(DataInputStream dis, String chars)
603 char[] carray = chars.toCharArray();
604 for (int i = 0; i < carray.length; i++) {
605 if ((char) dis.readByte() != carray[i]) {
613 * Get the sample rate for this wave
615 * @return sample rate
617 public int getSampleRate() {
622 * Get the number of samples for this wave
624 * @return number of samples
626 public int getNumSamples() {
630 /* Get the sample data of this wave
634 public short[] getSamples() {
644 private int numSamples;
645 private short[] residual;
648 * Create an empty STS
654 * Create an sts with the given data
656 * @param frame frame for this sts
657 * @param numSamples number of samples this sts will contain
658 * @param residual the residual for this sts
661 public STS(int[] frame, int numSamples, short[] residual) {
662 this.frame = new int[frame.length];
663 System.arraycopy(frame, 0, this.frame, 0, frame.length);
664 this.numSamples = numSamples;
665 this.residual = new short[residual.length];
666 System.arraycopy(residual, 0, this.residual, 0, residual.length);
670 * Get the number of samples associated with this sts
672 * @return the number of samples for this sts
674 public int getNumSamples() {
679 * Get the residual associated with this sts
681 * @return residual associated with this sts
683 public short getResidual(int i) {
688 * Get the frame associated with this sts
690 * @return a copy of the frame associated with this sts
692 public int[] getFrame() {
693 int[] f = new int[frame.length];
694 System.arraycopy(frame, 0, f, 0, frame.length);
699 * Get an entry out of the frame
701 * @param index the index into the frame
703 * @return the entry in the frame at offset <code>index</code>
705 public int getFrameEntry(int index) {
712 * This class is for general purpose functions such as reading and
713 * writing from files, or converting formats of numbers.
718 * Reads the next word (text separated by whitespace) from the
721 * @param dis the input stream
723 * @return the next word
725 * @throws IOException on error
727 public static String readWord(DataInputStream dis) throws IOException {
728 StringBuffer sb = new StringBuffer();
731 // skip leading whitespace
734 } while(Character.isWhitespace(c));
740 } while (!Character.isWhitespace(c));
741 return sb.toString();
745 * Reads a single char from the stream
747 * @param dis the stream to read
748 * @return the next character on the stream
750 * @throws IOException if an error occurs
752 public static char readChar(DataInputStream dis) throws IOException {
753 return (char) dis.readByte();
757 * Reads a given number of chars from the stream
759 * @param dis the stream to read
760 * @param num the number of chars to read
761 * @return a character array containing the next <code>num<code>
764 * @throws IOException if an error occurs
766 public static char[] readChars(DataInputStream dis, int num)
768 char[] carray = new char[num];
769 for (int i = 0; i < num; i++) {
770 carray[i] = readChar(dis);
776 * Read a float from the input stream, byte-swapping as
779 * @param dis the inputstream
780 * @param isBigEndian whether or not the data being read in is in
783 * @return a floating pint value
785 * @throws IOException on error
787 public static float readFloat(DataInputStream dis, boolean isBigEndian)
791 val = readLittleEndianFloat(dis);
793 val = dis.readFloat();
799 * Reads the next float from the given DataInputStream,
800 * where the data is in little endian.
802 * @param dataStream the DataInputStream to read from
806 public static float readLittleEndianFloat(DataInputStream dataStream)
808 return Float.intBitsToFloat(readLittleEndianInt(dataStream));
812 * Read an integer from the input stream, byte-swapping as
815 * @param dis the inputstream
816 * @param isBigEndian whether or not the data being read in is in
819 * @return an integer value
821 * @throws IOException on error
823 public static int readInt(DataInputStream dis, boolean isBigEndian)
826 return readLittleEndianInt(dis);
828 return dis.readInt();
833 * Reads the next little-endian integer from the given DataInputStream.
835 * @param dataStream the DataInputStream to read from
839 public static int readLittleEndianInt(DataInputStream dataStream)
841 int bits = 0x00000000;
842 for (int shift = 0; shift < 32; shift += 8) {
843 int byteRead = (0x000000ff & dataStream.readByte());
844 bits |= (byteRead << shift);
850 * Read a short from the input stream, byte-swapping as
853 * @param dis the inputstream
854 * @param isBigEndian whether or not the data being read in is in
857 * @return an integer value
859 * @throws IOException on error
861 public static short readShort(DataInputStream dis, boolean isBigEndian)
864 return readLittleEndianShort(dis);
866 return dis.readShort();
871 * Reads the next little-endian short from the given DataInputStream.
873 * @param dataStream the DataInputStream to read from
877 public static short readLittleEndianShort(DataInputStream dis)
879 short bits = (short)(0x0000ff & dis.readByte());
880 bits |= (((short)(0x0000ff & dis.readByte())) << 8);
885 * Convert a short to ulaw format
887 * @param sample the short to convert
889 * @return a short containing an unsigned 8-bit quantity
890 * representing the ulaw
892 public static short shortToUlaw(short sample) {
893 final int[] exp_lut = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,
894 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
895 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
896 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
897 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
898 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
899 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
900 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
901 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
902 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
903 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
904 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
905 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
906 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
907 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
908 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7};
910 int sign, exponent, mantissa;
913 final short CLIP = 32635;
914 final short BIAS = 0x0084;
916 /* Get the sample into sign-magnitude. */
917 sign = (sample >> 8) & 0x80; /* set aside the sign */
919 sample = (short) -sample; /* get magnitude */
921 if ( sample > CLIP ) sample = CLIP; /* clip the magnitude */
923 /* Convert from 16 bit linear to ulaw. */
924 sample = (short) (sample + BIAS);
925 exponent = exp_lut[( sample >> 7 ) & 0xFF];
926 mantissa = ( sample >> ( exponent + 3 ) ) & 0x0F;
928 ((~ ( sign | ( exponent << 4 ) | mantissa)) & 0x00FF);
929 if ( ulawbyte == 0 ) ulawbyte = 0x02; /* optional CCITT trap */
934 * Convert a ulaw format to short
936 * @param ulawbyte a short containing an unsigned 8-but quantity
937 * representing a ulaw
939 * @return the short equivalent of the ulaw
941 public static short ulawToShort(short ulawbyte) {
942 final int[] exp_lut = { 0, 132, 396, 924, 1980, 4092, 8316, 16764 };
943 int sign, exponent, mantissa;
946 ulawbyte = (short) (ulawbyte & 0x00FF);
947 ulawbyte = (short) (~ulawbyte);
948 sign = ( ulawbyte & ((short) 0x80) );
949 exponent = (int) ( (ulawbyte & (short) 0x00FF) >> 4 ) & 0x07;
950 mantissa = ulawbyte & (short) 0x0F;
951 sample = (short) (exp_lut[exponent] + (mantissa << (exponent + 3)));
952 if ( sign != 0 ) sample = (short) (-sample);
959 * Print a float type's internal bit representation in hex
961 * @param f the float to print
963 * @return a string containing the hex value of <code>f</code>
965 public static String hex(float f) {
966 return Integer.toHexString(Float.floatToIntBits(f));