View Javadoc

1   package net.sf.jhunlang.jmorph;
2   
3   import java.util.Iterator;
4   import java.util.Collection;
5   import java.util.Map;
6   import java.util.HashMap;
7   
8   import java.io.Serializable;
9   
10  public class WordList implements Serializable
11  {
12    /***
13     * The {@link Dictionaries} instances this WordList belongs to.
14     */
15    protected Dictionaries dicts;
16    /***
17     * Stores {@link DictEntry} instances associated to root words.
18     */
19    protected Map words = new HashMap();
20    /***
21     * Map of <root entry, collection of entries with entry as root> pairs.
22     */
23    protected Map reverseRoots = new HashMap();
24  
25    /***
26     * Create a new WordList instance.
27     */
28    public WordList()
29    {}
30  
31    public void setDictionaries(Dictionaries dicts)
32    {
33      this.dicts = dicts;
34    }
35  
36    public Dictionaries getDictionaries()
37    {
38      return dicts;
39    }
40  
41    /***
42     * Add the given {@link DictEntry} instance.
43     * @param entry the new DictEntry
44     */
45    public void add(DictEntry entry)
46    {
47      addEntry(words, entry);
48    }
49  
50    /***
51     * Remove and return the {@link DictEntry} instance belonging to
52     * <code>word</code> if any.
53     * @param word the word to remove
54     */
55    public DictEntry remove(String word)
56    {
57      String loword = word.toLowerCase();
58      Object value = words.get(loword);
59      if (value != null)
60      {
61        if (value instanceof DictEntry)
62        {
63          DictEntry entry = (DictEntry)value;
64          if (entry.admitCapitalization(word))
65          {
66            words.remove(loword);
67            return entry;
68          }
69        }
70        else
71        {
72          DictEntry entry = (DictEntry)((Map)value).get(word);
73  
74          if (entry != null && entry.admitCapitalization(word))
75          {
76            ((Map)value).remove(word);
77            return entry;
78          }
79        }
80      }
81      return null;
82    }
83  
84    /***
85     * Return the number of root words.
86     * @return the number of root words stored so far
87     */
88    public int size()
89    {
90      return words.size();
91    }
92  
93    /***
94     * Return the {@link DictEntry} associated with <code>word</code> or
95     * <code>null</code>.
96     * @param word the root word
97     * @return the {@link DictEntry} associated with <code>word</code> or
98     * <code>null</code>.
99     */
100   public DictEntry get(String word)
101   {
102     return get(word, word.toLowerCase());
103   }
104 
105   public DictEntry get(String word, String lowerCaseWord)
106   {
107     Object value = words.get(lowerCaseWord);
108     if (value != null)
109     {
110       if (value instanceof DictEntry)
111       {
112         DictEntry entry = (DictEntry)value;
113         if (entry.admitCapitalization(word))
114         {
115           return entry;
116         }
117         return null;
118       }
119       else
120       {
121         Iterator it = ((Map)value).values().iterator();
122 
123         DictEntry nearest = null;
124         byte nearestCap = 0;
125         while (it.hasNext())
126         {
127           DictEntry entry = (DictEntry)it.next();
128           if (entry.admitCapitalization(word))
129           {
130             byte cap = entry.getCapitalization();
131             if (nearest == null || nearestCap < cap)
132             {
133               nearest = entry;
134               nearestCap = cap;
135             }
136           }
137         }
138         return nearest;
139       }
140     }
141     else
142     {
143       return null;
144     }
145   }
146 
147   /***
148    * Return the object associated with <code>lowerCaseWord</code>
149    * @param lowerCaseWord the word in all lowercase
150    * @return the object associated with <code>lowercaseWord</code>
151    */
152   public Object rawGet(String lowerCaseWord)
153   {
154     return words.get(lowerCaseWord);
155   }
156 
157   /***
158    * Return the map of words stored in this instance. 
159    * @return the map of words 
160    */  
161   public Map getWords()
162   {
163     return words;
164   }
165 
166   /***
167    * Add the given {@link DictEntry} instance to the given map.
168    * @param map the map entry is to be put into
169    * @param entry the new DictEntry
170    */
171   protected void addEntry(Map map, DictEntry entry)
172   {
173     String key = entry.getWord().toLowerCase();
174     Object old = map.get(key);
175 
176     DictEntry oldEntry;
177     // if there is an entry with same lowercase word
178     if (old != null)
179     {
180       if (old instanceof DictEntry)
181       {
182         oldEntry = (DictEntry)old;
183         if (oldEntry.getWord().equals(entry.getWord()))
184         {
185           oldEntry.addHomonym(entry);
186         }
187         else
188         {
189           Map subMap = new HashMap();
190           map.put(key, subMap);
191           subMap.put(oldEntry.getWord(), oldEntry);
192           subMap.put(entry.getWord(), entry);
193         }
194       }
195       else // subMap already
196       {
197         Map subMap = (Map)old;        
198         if ((oldEntry = (DictEntry)subMap.get(entry.getWord())) != null)
199         {
200           oldEntry.addHomonym(entry);
201         }
202         else
203         {
204           subMap.put(entry.getWord(), entry);
205         }
206       }
207     }
208     else
209     {
210       map.put(key, entry);
211     }
212   }
213 
214   public Collection getReverseRoots(DictEntry entry)
215   {
216     return (Collection)reverseRoots.get(entry);
217   }
218 
219   /***
220    * This method does nothing. It is called by readers after all entries
221    * has been added to this list. 
222    */
223   public void sync()
224   {}
225 
226   public static String shorten(String s)
227   {
228     return s.substring(s.lastIndexOf('.') + 1);
229   }
230 
231   public static String shorten(Class clz)
232   {
233     return shorten(clz.getName());
234   }
235 
236   public String contentString()
237   {
238     return words.size() + " words ";
239   }
240 
241   public String toString()
242   {
243     return shorten(getClass()) + "[" + contentString() + "]";
244   }
245 }