2 * Portions Copyright 2003-2004 Sun Microsystems, Inc.
3 * Portions Copyright 1999-2003 Language Technologies Institute,
4 * Carnegie Mellon University.
5 * All Rights Reserved. Use is subject to license terms.
7 * See the file "license.terms" for information on usage and
8 * redistribution of this file, and for a DISCLAIMER OF ALL
11 import java.io.BufferedReader;
12 import java.io.DataInputStream;
13 import java.io.FileInputStream;
14 import java.io.FileOutputStream;
15 import java.io.FileNotFoundException;
16 import java.io.InputStreamReader;
17 import java.io.IOException;
18 import java.io.OutputStreamWriter;
20 import javax.sound.sampled.spi.AudioFileReader;
21 import javax.sound.sampled.AudioInputStream;
25 * Performs the generation of STS files in FestVox to FreeTTS
29 * This program is a port from flite/tools/find_sts_main.c
34 * The a/b diff result is slightly different than the C version due to
35 * Intel floating-point math.
38 public strictfp class FindSTS {
41 static float lpc_range;
44 * Gets the lpc parameters from lpc/lpc.params
46 static private void getLpcParams() throws IOException {
47 BufferedReader reader =
49 new InputStreamReader(
50 new FileInputStream("lpc/lpc.params")));
52 String line = reader.readLine();
53 while (line != null) {
54 if (line.startsWith("LPC_MIN=")) {
55 lpc_min = Float.parseFloat(line.substring(8));
56 } else if (line.startsWith("LPC_MAX=")) {
57 lpc_max = Float.parseFloat(line.substring(8));
58 } else if (line.startsWith("LPC_RANGE=")) {
59 lpc_range = Float.parseFloat(line.substring(10));
61 line = reader.readLine();
64 System.out.println("LPC_MIN=" + lpc_min);
65 System.out.println("LPC_MAX=" + lpc_max);
66 System.out.println("LPC_RANGE=" + lpc_range);
70 * Generate an sts file from lpc and wav files.
72 * args[0..n] = filenames without paths or extensions
73 * (e.g., "arctic_a0001")
75 public static void main(String[] args) {
78 for (int i = 0; i < args.length; i++) {
79 System.out.println(args[i] + " STS");
80 FileInputStream lpcFile = new FileInputStream(
81 "lpc/" + args[i] + ".lpc");
82 FileInputStream waveFile = new FileInputStream(
83 "wav/" + args[i] + ".wav");
84 FileOutputStream stsFile = new FileOutputStream(
85 "sts/" + args[i] + ".sts");
88 LPC lpc = new LPC(new DataInputStream(lpcFile));
89 Wave wave = new Wave(new DataInputStream(waveFile));
94 STS[] stsData = findSTS(wave, lpc, lpc_min, lpc_range);
96 // Verify STS data for sanity
98 Wave reconstructedWave = new
99 Wave(wave.getSampleRate(), stsData, lpc,
101 wave.compare(reconstructedWave);
105 OutputStreamWriter stsWriter = new OutputStreamWriter(stsFile);
106 saveSTS(stsData, lpc, wave, stsWriter, lpc_min, lpc_range);
111 } catch (FileNotFoundException ioe) {
112 throw new Error("Error while running FindSTS" + ioe.getMessage());
113 } catch (IOException ioe) {
114 throw new Error("IO error while finding sts" + ioe.getMessage());
121 * @param wave the data from the wave file
122 * @param lpc the data from the lpc file
123 * @param lpc_min the minimum lpc value
124 * @param lpc_range the range of the lpc values
126 * @return an <code>STS</code> array containing the data
128 private static STS[] findSTS(Wave wave, LPC lpc, float lpc_min,
134 STS[] stsData = new STS[lpc.getNumFrames()];
136 // read wave data into a special array.
138 new short[wave.getNumSamples() + lpc.getNumChannels()];
139 System.arraycopy(wave.getSamples(), 0, waveData,
140 lpc.getNumChannels(), wave.getNumSamples());
142 for (int i = 0; i < lpc.getNumFrames(); i++) {
147 end = (int) ((float) wave.getSampleRate() * lpc.getTime(i));
150 System.out.println("frame size at "
151 + Float.toString(lpc.getTime(i)) + " is "
152 + Integer.toString(size) + ".");
155 residual = generateResiduals(waveData,
156 start + lpc.getNumChannels(), lpc.getFrame(i),
157 lpc.getNumChannels(), size);
159 frame = new int[lpc.getNumChannels() - 1];
160 for (int j = 1; j < lpc.getNumChannels(); j++) {
162 ((((lpc.getFrameEntry(i, j) - lpc_min) / lpc_range))
166 stsData[i] = new STS(frame, size, residual);
174 * Generate the residuals for this sts
176 * @param wave specially formatted wave data
177 * @param start offset into the wave data
178 * @param frame frame data from the lpc
179 * @param order typically the number of lpc channels
180 * @param size size of the residual
182 * @return sts residuals
184 private static short[] generateResiduals(short[] wave, int start,
185 float[] frame, int order, int size) {
187 short[] residual = new short[size];
188 for (int i = 0; i < order; i++) {
190 for (int j = 1; j < order; j++) {
191 r -= frame[j] * ((double) wave[start + (i - j)]);
193 residual[i] = Utility.shortToUlaw((short) r);
195 for (int i = order; i < size; i++) {
197 for (int j = 1; j < order; j++) {
198 r -= frame[j] * ((double) wave[start + (i - j)]);
200 residual[i] = Utility.shortToUlaw((short) r);
208 * @param stsData generated sts data
209 * @param lpc data loaded from the lpc file
210 * @param wave data loaded from the wave file
211 * @param osw the OutputStreamWriter to write the sts data to
212 * @param lpc_min minimum lpc value
213 * @param lpc_range range of lpc values
216 private static void saveSTS(STS[] stsData, LPC lpc, Wave wave,
217 OutputStreamWriter osw, float lpc_min, float lpc_range) {
219 osw.write(Integer.toString(lpc.getNumFrames())
220 + " " + Integer.toString(lpc.getNumChannels() - 1)
221 + " " + Integer.toString(wave.getSampleRate())
222 + " " + Float.toString(lpc_min)
223 + " " + Float.toString(lpc_range) + "\n");
224 for (int m=0, i=0; i < lpc.getNumFrames(); i++) {
225 /* time lpc lpc lpc lpc ...
227 osw.write(Float.toString(lpc.getTime(i)) + "\n");
228 for (int j = 1; j < lpc.getNumChannels(); j++) {
230 Integer.toString(stsData[i].getFrameEntry(j - 1))
235 /* numResid resid resid resid
237 osw.write(Integer.toString(stsData[i].getNumSamples()));
238 for (int j = 0; j < stsData[i].getNumSamples(); j++) {
241 + Integer.toString(stsData[i].getResidual(j)));
245 } catch (IOException ioe) {
246 throw new Error("IO error while writing sts." + ioe.getMessage());
257 private int numFrames;
258 private int numChannels;
262 /** Create lpc data from an input stream
264 * @param dis DataInputStream to read the lpc in from
267 public LPC(DataInputStream dis) {
269 if (!Utility.readWord(dis).equals("EST_File") ||
270 !Utility.readWord(dis).equals("Track")) {
271 throw new Error("Lpc file not EST Track file");
274 boolean isBinary = false;
275 boolean isBigEndian = false;
278 String token = Utility.readWord(dis);
279 while (!token.equals("EST_Header_End")) {
280 if (token.equals("DataType")) {
281 if (Utility.readWord(dis).equals("binary")) {
286 } else if (token.equals("ByteOrder")) {
287 if (Utility.readWord(dis).equals("10")) {
292 } else if (token.equals("NumFrames")) {
293 numFrames = Integer.parseInt(Utility.readWord(dis));
294 } else if (token.equals("NumChannels")) {
295 numChannels = Integer.parseInt(Utility.readWord(dis));
297 // Ignore all other content in header
299 token = Utility.readWord(dis);
302 times = new float[numFrames];
303 frames = new float[numFrames][numChannels];
307 loadBinaryData(dis, isBigEndian);
312 catch (IOException ioe) {
313 throw new Error("IO error while parsing lpc" + ioe.getMessage());
318 * load the data section of the lpc file as ascii text
320 * @param dis DataInputStream to read from
322 * @throws IOException on ill-formatted input
324 private void loadTextData(DataInputStream dis) throws IOException {
325 for (int f=0; f < numFrames; f++) {
326 times[f] = Float.parseFloat(Utility.readWord(dis));
327 Utility.readWord(dis); // can be only 1
328 for (int c=0; c < numChannels; c++) {
329 frames[f][c] = Float.parseFloat(Utility.readWord(dis));
335 * load the data section of the lpc file as ascii text
337 * @param dis DataInputStream to read from
338 * @param isBigEndian whether or not the data in the file is in
339 * big endian byte order
341 * @throws IOException on ill-formatted input
343 private void loadBinaryData(DataInputStream dis, boolean isBigEndian)
345 for (int f=0; f < numFrames; f++) {
346 times[f] = Utility.readFloat(dis, isBigEndian);
348 // Ignore the 'breaks' field
349 Utility.readFloat(dis, isBigEndian);
351 for (int c=0; c < numChannels; c++) {
352 frames[f][c] = Utility.readFloat(dis, isBigEndian);
358 * Get the number of frames in this lpc
360 * @return number of frames in this lpc
362 public int getNumFrames() {
367 * Get the number of channels in this lpc
369 * @return number of channels in this lpc
371 public int getNumChannels() {
376 * Get the times associated with this lpc
378 * @return an array of times associated with this lpc
380 public float[] getTimes() {
385 * Get an individual time associated with this lpc
387 * @param index index of time to get
389 * @return time value at given index
391 public float getTime(int index) {
396 * Get an individual frame
398 * @param i index of frame
402 public float[] getFrame(int i) {
407 * Get an individual frame entry
409 * @param i index of frame
410 * @param j index into frame
412 * @return the frame entry in frame <code>i</code> at index
415 public float getFrameEntry(int i, int j) {
422 * The wave (riff) data
425 private int numSamples;
426 private int sampleRate;
427 private short[] samples;
429 // Only really used in loading of data.
430 private int headerSize;
431 private int numBytes;
432 private int numChannels = 1; // Only support mono
434 static final short RIFF_FORMAT_PCM = 0x0001;
437 * Read in a wave from a riff format
439 * @param dis DataInputStream to read data from
441 public Wave (DataInputStream dis) {
444 if (dis.skipBytes(headerSize - 16) != (headerSize - 16)) {
445 throw new Error("Unexpected error parsing wave file.");
448 // Bunch of potential random headers
450 String s = new String(Utility.readChars(dis, 4));
452 if (s.equals("data")) {
453 numSamples = Utility.readInt(dis, false) / 2;
455 } else if (s.equals("fact")) {
456 int i = Utility.readInt(dis, false);
457 if (dis.skipBytes(i) != i) {
458 throw new Error("Unexpected error parsing wave file.");
461 throw new Error("Unsupported wave header chunk type " + s);
465 int dataLength = numSamples * numChannels;
466 samples = new short[numSamples];
468 for (int i = 0; i < dataLength; i++) {
469 samples[i] = Utility.readShort(dis, false);
472 } catch (IOException ioe) {
473 throw new Error("IO error while parsing wave" + ioe.getMessage());
480 * @param dis DataInputStream to read from
482 * @throws IOException on ill-formatted input
484 private void loadHeader(DataInputStream dis) throws IOException {
485 if (!checkChars(dis, "RIFF")) {
486 throw new Error("Invalid wave file format.");
488 numBytes = Utility.readInt(dis,false);
489 if (!checkChars(dis, "WAVEfmt ")) {
490 throw new Error("Invalid wave file format.");
493 headerSize = Utility.readInt(dis, false);
495 if (Utility.readShort(dis, false) != RIFF_FORMAT_PCM) {
496 throw new Error("Invalid wave file format.");
499 if (Utility.readShort(dis, false) != 1) {
500 throw new Error("Only mono wave files supported.");
503 sampleRate = Utility.readInt(dis, false);
504 Utility.readInt(dis, false);
505 Utility.readShort(dis, false);
506 Utility.readShort(dis, false);
510 * Reconstruct a wave from a wave, sts, and lpc
512 * @param sampleRate the sample rate to use
514 * @param lpc_min minimum lpc value
515 * @param lpc_range range of lpc values
517 public Wave(int sampleRate, STS[] stsData, LPC lpc, float lpc_min,
519 // set number of samples and sample rate
521 for (int i = 0; i < lpc.getNumFrames(); i++) {
522 numSamples += stsData[i].getNumSamples();
524 samples = new short[numSamples];
525 this.sampleRate = sampleRate;
529 int[] lpcResTimes = new int[lpc.getNumFrames()];
530 int[] lpcResSizes = new int[lpc.getNumFrames()];
531 short[] lpcResResidual = new short[numSamples];
532 int[][] lpcResFrames = new int[lpc.getNumFrames()][];
533 int lpcResNumChannels = lpc.getNumChannels() - 1;
536 for (int i = 0; i < lpc.getNumFrames(); i++) {
537 lpcResTimes[i] = (int) (lpc.getTime(i) * sampleRate);
538 lpcResFrames[i] = stsData[i].getFrame();
539 end = start + stsData[i].getNumSamples();
540 lpcResSizes[i] = stsData[i].getNumSamples();
544 for (int r = 0, i = 0; i < lpc.getNumFrames(); i++) {
545 for (int j = 0; j < stsData[i].getNumSamples(); j++, r++) {
546 lpcResResidual[r] = stsData[i].getResidual(j);
550 float[] lpcCoefs = new float[lpcResNumChannels];
551 float[] outbuf = new float[lpcResNumChannels + 1];
553 //float pp = 0; // the C code uses this unnecessarily (for now)
555 for (int r = 0, o = lpcResNumChannels, i = 0; i <
556 lpc.getNumFrames(); i++) {
557 // residual_fold is hard-coded to 1.
558 int pm_size_samps = lpcResSizes[i];// * residual_fold;
560 // Unpack the LPC coefficients
561 for (int k = 0; k < lpcResNumChannels; k++) {
562 lpcCoefs[k] = (float)
563 ((((double) lpcResFrames[i][k])/65535.0) * lpc_range)
568 // resynthesize the signal
569 for (int j = 0; j < pm_size_samps; j++, r++) {
571 Utility.ulawToShort(lpcResResidual[r/* /residual_fold */]);
573 cr = (o == 0 ? lpcResNumChannels : o-1);
574 for (ci = 0; ci < lpcResNumChannels; ci++) {
575 outbuf[o] += lpcCoefs[ci] * outbuf[cr];
576 cr = (cr == 0 ? lpcResNumChannels : cr - 1);
578 samples[r] = (short) (outbuf[o]
579 /* + pp * lpcres->post_emphasis)*/); // post_emphasis = 0
581 o = (o == lpcResNumChannels ? 0 : o+1);
587 * Compare two waves and output how close the two are.
588 * Useful for checking the general accuracy of find sts.
591 * Output may not exactly match that of flite find_sts
592 * on Intel platforms due to discrepencies in the way that
593 * Intel Pentiums perform floating point computations.
596 * @param the wave to compare this wave against
599 public void compare(Wave wave2) {
600 if (numSamples > wave2.numSamples) {
605 for (i = 0; i < this.numSamples; i++) {
606 r += (double)((float)this.samples[i] - (float)wave2.samples[i])
607 *(double)((float)this.samples[i] - (float)wave2.samples[i]);
609 r /= this.numSamples;
610 System.out.println("a/b diff " + Double.toString(StrictMath.sqrt(r)));
615 * Make sure that a string of characters appear next in the file
617 * @param dis DataInputStream to read in
618 * @param chars a String containing the ascii characters you
619 * want the <code>dis</code> to contain.
621 * @return <code>true</code> if <code>chars</code> appears next
622 * in <code>dis</code>, else <code>false</code>
623 * @throws on ill-formatted input (end of file, for example)
625 private boolean checkChars(DataInputStream dis, String chars)
627 char[] carray = chars.toCharArray();
628 for (int i = 0; i < carray.length; i++) {
629 if ((char) dis.readByte() != carray[i]) {
637 * Get the sample rate for this wave
639 * @return sample rate
641 public int getSampleRate() {
646 * Get the number of samples for this wave
648 * @return number of samples
650 public int getNumSamples() {
654 /* Get the sample data of this wave
658 public short[] getSamples() {
668 private int numSamples;
669 private short[] residual;
672 * Create an empty STS
678 * Create an sts with the given data
680 * @param frame frame for this sts
681 * @param numSamples number of samples this sts will contain
682 * @param residual the residual for this sts
685 public STS(int[] frame, int numSamples, short[] residual) {
686 this.frame = new int[frame.length];
687 System.arraycopy(frame, 0, this.frame, 0, frame.length);
688 this.numSamples = numSamples;
689 this.residual = new short[residual.length];
690 System.arraycopy(residual, 0, this.residual, 0, residual.length);
694 * Get the number of samples associated with this sts
696 * @return the number of samples for this sts
698 public int getNumSamples() {
703 * Get the residual associated with this sts
705 * @return residual associated with this sts
707 public short getResidual(int i) {
712 * Get the frame associated with this sts
714 * @return a copy of the frame associated with this sts
716 public int[] getFrame() {
717 int[] f = new int[frame.length];
718 System.arraycopy(frame, 0, f, 0, frame.length);
723 * Get an entry out of the frame
725 * @param index the index into the frame
727 * @return the entry in the frame at offset <code>index</code>
729 public int getFrameEntry(int index) {
736 * This class is for general purpose functions such as reading and
737 * writing from files, or converting formats of numbers.
742 * Reads the next word (text separated by whitespace) from the
745 * @param dis the input stream
747 * @return the next word
749 * @throws IOException on error
751 public static String readWord(DataInputStream dis) throws IOException {
752 StringBuffer sb = new StringBuffer();
755 // skip leading whitespace
758 } while(Character.isWhitespace(c));
764 } while (!Character.isWhitespace(c));
765 return sb.toString();
769 * Reads a single char from the stream
771 * @param dis the stream to read
772 * @return the next character on the stream
774 * @throws IOException if an error occurs
776 public static char readChar(DataInputStream dis) throws IOException {
777 return (char) dis.readByte();
781 * Reads a given number of chars from the stream
783 * @param dis the stream to read
784 * @param num the number of chars to read
785 * @return a character array containing the next <code>num<code>
788 * @throws IOException if an error occurs
790 public static char[] readChars(DataInputStream dis, int num)
792 char[] carray = new char[num];
793 for (int i = 0; i < num; i++) {
794 carray[i] = readChar(dis);
800 * Read a float from the input stream, byte-swapping as
803 * @param dis the inputstream
804 * @param isBigEndian whether or not the data being read in is in
807 * @return a floating pint value
809 * @throws IOException on error
811 public static float readFloat(DataInputStream dis, boolean isBigEndian)
815 val = readLittleEndianFloat(dis);
817 val = dis.readFloat();
823 * Reads the next float from the given DataInputStream,
824 * where the data is in little endian.
826 * @param dataStream the DataInputStream to read from
830 public static float readLittleEndianFloat(DataInputStream dataStream)
832 return Float.intBitsToFloat(readLittleEndianInt(dataStream));
836 * Read an integer from the input stream, byte-swapping as
839 * @param dis the inputstream
840 * @param isBigEndian whether or not the data being read in is in
843 * @return an integer value
845 * @throws IOException on error
847 public static int readInt(DataInputStream dis, boolean isBigEndian)
850 return readLittleEndianInt(dis);
852 return dis.readInt();
857 * Reads the next little-endian integer from the given DataInputStream.
859 * @param dataStream the DataInputStream to read from
863 public static int readLittleEndianInt(DataInputStream dataStream)
865 int bits = 0x00000000;
866 for (int shift = 0; shift < 32; shift += 8) {
867 int byteRead = (0x000000ff & dataStream.readByte());
868 bits |= (byteRead << shift);
874 * Read a short from the input stream, byte-swapping as
877 * @param dis the inputstream
878 * @param isBigEndian whether or not the data being read in is in
881 * @return an integer value
883 * @throws IOException on error
885 public static short readShort(DataInputStream dis, boolean isBigEndian)
888 return readLittleEndianShort(dis);
890 return dis.readShort();
895 * Reads the next little-endian short from the given DataInputStream.
897 * @param dataStream the DataInputStream to read from
901 public static short readLittleEndianShort(DataInputStream dis)
903 short bits = (short)(0x0000ff & dis.readByte());
904 bits |= (((short)(0x0000ff & dis.readByte())) << 8);
909 * Convert a short to ulaw format
911 * @param sample the short to convert
913 * @return a short containing an unsigned 8-bit quantity
914 * representing the ulaw
916 public static short shortToUlaw(short sample) {
917 final int[] exp_lut = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,
918 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
919 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
920 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
921 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
922 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
923 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
924 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
925 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
926 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
927 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
928 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
929 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
930 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
931 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
932 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7};
934 int sign, exponent, mantissa;
937 final short CLIP = 32635;
938 final short BIAS = 0x0084;
940 /* Get the sample into sign-magnitude. */
941 sign = (sample >> 8) & 0x80; /* set aside the sign */
943 sample = (short) -sample; /* get magnitude */
945 if ( sample > CLIP ) sample = CLIP; /* clip the magnitude */
947 /* Convert from 16 bit linear to ulaw. */
948 sample = (short) (sample + BIAS);
949 exponent = exp_lut[( sample >> 7 ) & 0xFF];
950 mantissa = ( sample >> ( exponent + 3 ) ) & 0x0F;
952 ((~ ( sign | ( exponent << 4 ) | mantissa)) & 0x00FF);
953 if ( ulawbyte == 0 ) ulawbyte = 0x02; /* optional CCITT trap */
958 * Convert a ulaw format to short
960 * @param ulawbyte a short containing an unsigned 8-but quantity
961 * representing a ulaw
963 * @return the short equivalent of the ulaw
965 public static short ulawToShort(short ulawbyte) {
966 final int[] exp_lut = { 0, 132, 396, 924, 1980, 4092, 8316, 16764 };
967 int sign, exponent, mantissa;
970 ulawbyte = (short) (ulawbyte & 0x00FF);
971 ulawbyte = (short) (~ulawbyte);
972 sign = ( ulawbyte & ((short) 0x80) );
973 exponent = (int) ( (ulawbyte & (short) 0x00FF) >> 4 ) & 0x07;
974 mantissa = ulawbyte & (short) 0x0F;
975 sample = (short) (exp_lut[exponent] + (mantissa << (exponent + 3)));
976 if ( sign != 0 ) sample = (short) (-sample);
983 * Print a float type's internal bit representation in hex
985 * @param f the float to print
987 * @return a string containing the hex value of <code>f</code>
989 public static String hex(float f) {
990 return Integer.toHexString(Float.floatToIntBits(f));