1 package net.sf.jhunlang.jmorph.sword.parser;
2
3 import java.util.StringTokenizer;
4 import java.util.Collection;
5 import java.util.LinkedList;
6
7 import java.io.LineNumberReader;
8
9 import net.sf.jhunlang.jmorph.DictEntry;
10 import net.sf.jhunlang.jmorph.WordList;
11 import net.sf.jhunlang.jmorph.parser.DictionaryReader;
12 import net.sf.jhunlang.jmorph.parser.ParseException;
13 import net.sf.jhunlang.jmorph.sword.SwordDict;
14 import net.sf.jhunlang.jmorph.sword.SwordEntry;
15 import net.sf.jhunlang.jmorph.sword.SwordEntryExtension;
16
17 /***
18 * SwordReader stands for reading the szoszablya dictionary file and building a
19 * SwordDict instance storing words.
20 * Ignore COMPOUNDMIN flag.
21 */
22 public class SwordReader extends DictionaryReader
23 {
24 /***
25 * The collection of string s marking derivatives; an affix rule with
26 * morphological description of x_Y_z is a derivative if Y is in this
27 * collection.
28 */
29 public static Collection derivatives = new LinkedList();
30
31 /***
32 * Return if <code>morph</code> is a derivative i.e. if {@link #derivatives}
33 * contains it. The present implementation returns true if <code>morph</code>
34 * is 'PREF', the szoszablya convention for preverbs.
35 * @param morph
36 * @return if <code>morph</code> marks a derivative
37 */
38 public static boolean derivative(String morph)
39 {
40 return morph.equals("PREF") || derivatives.contains(morph);
41 }
42
43 /***
44 * Create and return a {@link SwordDict} instance for the dictionary words.
45 * @return a new {@link SwordDict} instance
46 */
47 protected WordList createWordList()
48 {
49 return new SwordDict();
50 }
51
52 /***
53 * Return {@link SwordEntry} built from <code>line</code>. The line specifies
54 * a dictionary word by one of the following:
55 * <ul>
56 * <li>word/flags [pos]</li>
57 * <li>word/flags stem[pos]</li>
58 * <li>word/flags [pos]{+[affixtype]...}</li>
59 * <li>word/flags stem[pos]{+[affixtype]...}</li>
60 * <li>word stem[pos]{+[affixtype]...}</li>
61 * </ul>
62 * @param wl the WordList
63 * @param lr the reader
64 * @param line the line to parse
65 */
66 protected DictEntry parseLine(WordList wl, LineNumberReader lr, String line)
67 throws ParseException
68 {
69 SwordDict dict = (SwordDict)wl;
70
71
72
73 StringTokenizer st = new StringTokenizer(line);
74 String wordPart = st.nextToken();
75
76 char[] flagCharacters;
77 String word;
78
79 int index = wordPart.indexOf(SEPARATOR);
80
81
82 if (index == -1)
83 {
84 flagCharacters = NO_FLAGS;
85 word = wordPart;
86 }
87 else
88 {
89 flagCharacters = wordPart.substring(index + 1).toCharArray();
90 word = wordPart.substring(0, index);
91 }
92
93 if (st.hasMoreTokens())
94 {
95 String descriptionPart = st.nextToken();
96
97 SwordEntryExtension ext = new SwordEntryExtension();
98 SwordEntryExtensionParser extParser = new SwordEntryExtensionParser(ext);
99
100 parser.setTokenizer(new StringTokenizer(descriptionPart));
101
102 String root = extParser.parseDescription(parser, derivatives);
103
104 SwordEntry entry = new SwordEntry(word, flagCharacters, ext);
105
106 if (root != null)
107 {
108 dict.setRoot(entry, root);
109 }
110 return entry;
111 }
112 else
113 {
114 return new SwordEntry(word, flagCharacters);
115 }
116 }
117
118 protected void done(WordList wl)
119 {
120 super.done(wl);
121 }
122 }