2 * Portions Copyright 2001 Sun Microsystems, Inc.
3 * Portions Copyright 1999-2001 Language Technologies Institute,
4 * Carnegie Mellon University.
5 * All Rights Reserved. Use is subject to license terms.
7 * See the file "license.terms" for information on usage and
8 * redistribution of this file, and for a DISCLAIMER OF ALL
11 package com.sun.speech.freetts;
13 import java.io.Reader;
16 * Chops a string or text file into Token instances.
18 public interface Tokenizer {
20 * Sets the text to be tokenized by this tokenizer.
22 * @param textToTokenize the text to tokenize
24 void setInputText(String textToTokenize);
27 * Sets the input reader.
29 * @param reader the input source
31 void setInputReader(Reader reader);
35 * Returns the next token.
37 * @return the next token if it exists; otherwise null
43 * Returns true if there are more tokens, false otherwise.
45 * @return true if there are more tokens; otherwise false
47 boolean hasMoreTokens();
50 * Returns true if there were errors while reading tokens.
52 * @return true if there were errors; otherwise false
57 * If hasErrors returns true, returns a description of the error
58 * encountered. Otherwise returns null.
60 * @return a description of the last error that occurred
62 String getErrorDescription();
65 * Sets the whitespace symbols of this Tokenizer to the given
68 * @param symbols the whitespace symbols
70 void setWhitespaceSymbols(String symbols);
73 * Sets the single character symbols of this Tokenizer to the given
76 * @param symbols the single character symbols
78 void setSingleCharSymbols(String symbols);
81 * Sets the prepunctuation symbols of this Tokenizer to the given
84 * @param symbols the prepunctuation symbols
86 void setPrepunctuationSymbols(String symbols);
89 * Sets the postpunctuation symbols of this Tokenizer to the given
92 * @param symbols the postpunctuation symbols
94 void setPostpunctuationSymbols(String symbols);
97 * Determines if the current token should start a new sentence.
99 * @return true if a new sentence should be started