2 * Portions Copyright 2001 Sun Microsystems, Inc.
3 * Portions Copyright 1999-2001 Language Technologies Institute,
4 * Carnegie Mellon University.
5 * All Rights Reserved. Use is subject to license terms.
7 * See the file "license.terms" for information on usage and
8 * redistribution of this file, and for a DISCLAIMER OF ALL
11 package com.sun.speech.freetts;
13 import java.io.BufferedReader;
14 import java.io.IOException;
15 import java.io.InputStreamReader;
17 import java.util.HashMap;
19 import java.util.NoSuchElementException;
20 import java.util.StringTokenizer;
23 * Implementation of a <code>PartOfSpeech</code> that reads the info
24 * from a file. The format of the file is as follows:
33 * Where <code>word</code> is the word and <code>pos</code> is the
34 * part of speech for the word. The part of speech is implementation
37 public class PartOfSpeechImpl implements PartOfSpeech {
39 * Used for informational purposes if there's a bad line in the
42 private int lineCount = 0;
45 * A map from words to their part of speech.
47 private Map partOfSpeechMap;
50 * Default part of speech.
52 private String defaultPartOfSpeech;
55 * Creates a new PartOfSpeechImpl by reading from the given URL.
57 * @param url the input source
58 * @param defaultPartOfSpeech the default part of speech
60 * @throws IOException if an error occurs
62 public PartOfSpeechImpl(URL url, String defaultPartOfSpeech)
65 BufferedReader reader;
68 partOfSpeechMap = new HashMap();
69 this.defaultPartOfSpeech = defaultPartOfSpeech;
70 reader = new BufferedReader(new
71 InputStreamReader(url.openStream()));
72 line = reader.readLine();
74 while (line != null) {
75 if (!line.startsWith("***")) {
78 line = reader.readLine();
84 * Creates a word from the given input line and adds it to the map.
86 * @param line the input line
88 private void parseAndAdd(String line) {
89 StringTokenizer tokenizer = new StringTokenizer(line," ");
91 String word = tokenizer.nextToken();
92 String pos = tokenizer.nextToken();
93 partOfSpeechMap.put(word, pos);
94 } catch (NoSuchElementException nse) {
95 System.err.println("part of speech data in bad format at line "
101 * Returns a description of the part of speech given a word.
102 * If the given word cannot be found, the part of speech will be the
103 * <code>defaultPartOfSpeech</code> parameter passed to the constructor.
105 * @param word the word to classify
107 * @return an implementation dependent part of speech for the word
109 public String getPartOfSpeech(String word) {
110 String pos = (String) partOfSpeechMap.get(word);
112 pos = defaultPartOfSpeech;