View Javadoc

1   package net.sf.jhunlang.jmorph.sword.parser;
2   
3   import java.util.StringTokenizer;
4   import java.util.Collection;
5   import java.util.LinkedList;
6   
7   import java.io.LineNumberReader;
8   
9   import net.sf.jhunlang.jmorph.DictEntry;
10  import net.sf.jhunlang.jmorph.WordList;
11  import net.sf.jhunlang.jmorph.parser.DictionaryReader;
12  import net.sf.jhunlang.jmorph.parser.ParseException;
13  import net.sf.jhunlang.jmorph.sword.SwordDict;
14  import net.sf.jhunlang.jmorph.sword.SwordEntry;
15  import net.sf.jhunlang.jmorph.sword.SwordEntryExtension;
16  
17  /***
18   * SwordReader stands for reading the szoszablya dictionary file and building a
19   * SwordDict instance storing words.
20   * Ignore COMPOUNDMIN flag.
21   */
22  public class SwordReader extends DictionaryReader
23  {
24    /***
25     * The collection of string s marking derivatives; an affix rule with
26     * morphological description of x_Y_z is a derivative if Y is in this
27     * collection.
28     */
29    public static Collection derivatives = new LinkedList();
30  
31    /***
32     * Return if <code>morph</code> is a derivative i.e. if {@link #derivatives}
33     * contains it. The present implementation returns true if <code>morph</code>
34     * is 'PREF', the szoszablya convention for preverbs.
35     * @param morph
36     * @return if <code>morph</code> marks a derivative
37     */
38    public static boolean derivative(String morph)
39    {
40      return morph.equals("PREF") || derivatives.contains(morph);
41    }
42  
43    /***
44     * Create and return a {@link SwordDict} instance for the dictionary words.
45     * @return a new {@link SwordDict} instance
46     */
47    protected WordList createWordList()
48    {
49      return new SwordDict();
50    }
51  
52    /***
53     * Return {@link SwordEntry} built from <code>line</code>. The line specifies
54     * a dictionary word by one of the following:
55     * <ul>
56     * <li>word/flags [pos]</li>
57     * <li>word/flags stem[pos]</li>
58     * <li>word/flags [pos]{+[affixtype]...}</li>
59     * <li>word/flags stem[pos]{+[affixtype]...}</li>
60     * <li>word stem[pos]{+[affixtype]...}</li>
61     * </ul>
62     * @param wl the WordList
63     * @param lr the reader
64     * @param line the line to parse
65     */
66    protected DictEntry parseLine(WordList wl, LineNumberReader lr, String line)
67      throws ParseException
68    {
69      SwordDict dict = (SwordDict)wl;
70  
71      // get word part until first space. Word part is word or word/flags
72      // the remainder starts with either stem or [, stem lasting to [
73      StringTokenizer st = new StringTokenizer(line);
74      String wordPart = st.nextToken();
75  
76      char[] flagCharacters;
77      String word;
78  
79      int index = wordPart.indexOf(SEPARATOR);
80  
81      // no flags if no SEPARATOR
82      if (index == -1)
83      {
84        flagCharacters = NO_FLAGS;
85        word = wordPart;
86      }
87      else
88      {
89        flagCharacters = wordPart.substring(index + 1).toCharArray();
90        word = wordPart.substring(0, index);
91      }
92  
93      if (st.hasMoreTokens())
94      {
95        String descriptionPart = st.nextToken();
96  
97        SwordEntryExtension ext = new SwordEntryExtension();
98        SwordEntryExtensionParser extParser = new SwordEntryExtensionParser(ext);
99        
100       parser.setTokenizer(new StringTokenizer(descriptionPart));
101 
102       String root = extParser.parseDescription(parser, derivatives);
103 
104       SwordEntry entry = new SwordEntry(word, flagCharacters, ext);
105 
106       if (root != null)
107       {
108         dict.setRoot(entry, root);
109       }
110       return entry;
111     }
112     else
113     {
114       return new SwordEntry(word, flagCharacters);
115     }
116   }
117 
118   protected void done(WordList wl)
119   {
120     super.done(wl);
121   }
122 }