View Javadoc

1   package net.sf.jhunlang.jmorph.analysis;
2   
3   /***
4    * CompoundControl stands for controlling the way of
5    * decomposing words to compound words in stemming.
6    */
7   public class CompoundControl
8   {
9     /***
10     * System property for the default disabled.
11     */
12    public final static String DISABLED = "compound.disabled"; 
13    /***
14     * System property for the default minimum length of component words.
15     */
16    public final static String MIN = "compound.min"; 
17    /***
18     * System property for the default maximum number of component words.
19     */
20    public final static String MAX = "compound.max"; 
21    /***
22     * System property for the default hint of compound stemming.
23     */
24    public final static String HINT = "compound.hint"; 
25  
26    /***
27     * Convenience constant for the hint
28     * 'decompose via the shortest component words first'
29     */
30    public final static int SHORTEST = 0;
31    /***
32     * Convenience constant for the hint
33     * 'decompose via the longest component words first'
34     */
35    public final static int LONGEST = 1;
36    /***
37     * Convenience constant for the hint
38     * 'decompose via both the longest and shortest component words'
39     */
40    public final static int BOTH = 2;
41    /***
42     * String representations for the legal hint values
43     */
44    public final static String[] HINTS = { "shortest", "longest", "both" };
45    /***
46     * Maximum value for 'minimum length of component words'
47     */
48    public final static int MAX_MIN = 6;
49    /***
50     * Maximum value for 'maximum number of component words in a compound word'
51     */
52    public final static int MAX_MAX = 32;
53    /***
54     * Maximum value for 'hint'
55     */
56    public final static int MAX_HINT = BOTH;
57    /***
58     * The default disabled value is false
59     */
60    public final static boolean DEFAULT_DISABLED;
61    /***
62     * The default hint value is SHORTEST
63     */
64    public final static int DEFAULT_HINT;
65    /***
66     * The default 'minimum length of component words' is 3
67     */
68    public static int DEFAULT_MIN = 2;
69    /***
70     * The default 'maximum number of component words' is 3
71     */
72    public static int DEFAULT_MAX = 3;
73  
74    /***
75     * Initialize static DAFULT_DISABLED, DEFAULT_MIN, DEFAULT_MAX and
76     * DEFAULT_HINT from the system properties DISABLED, MIN, MAX annd HINT
77     * respectively.
78     */
79    static
80    {
81      DEFAULT_DISABLED = Boolean.getBoolean(DISABLED);
82      int min = DEFAULT_MIN;
83      try
84      {
85        min = Integer.parseInt(System.getProperty(MIN, DEFAULT_MIN + ""));
86        if (min < 0 || min > MAX_MIN)
87        {
88          min = DEFAULT_MIN;
89        }
90      }
91      catch (Throwable t)
92      {}
93      DEFAULT_MIN = min;
94  
95      int max = DEFAULT_MAX;
96      try
97      {
98        max = Integer.parseInt(System.getProperty(MAX, DEFAULT_MAX + ""));
99        if (max < 0 || max > MAX_MAX)
100       {
101         max = DEFAULT_MAX;
102       }
103     }
104     catch (Throwable t)
105     {}
106     DEFAULT_MAX = max;
107 
108     int hint = SHORTEST;
109     try
110     {
111       hint = Integer.parseInt(System.getProperty(HINT, "" + SHORTEST));
112       if (hint < 0 || hint > MAX_HINT)
113       {
114         hint = SHORTEST;
115       }
116     }
117     catch (Throwable t)
118     {}
119     DEFAULT_HINT = hint;
120   }
121 
122   /***
123    * Tells if compound decomposition is enabled at all.
124    */
125   protected boolean disabled = DEFAULT_DISABLED;
126   /***
127    * The minimum length of a component word in compound words
128    */
129   protected int min;
130   /***
131    * The maximum number of component words in compound words
132    */
133   protected int max;
134   /***
135    * If decomposition recurse via the shortest/longest/both splits.
136    * Not implemented yet.
137    */
138   protected int hint;
139 
140   /***
141    * Create a CompoundControl with default controlling parameters.
142    */
143   public CompoundControl()
144   {
145     this(DEFAULT_HINT);
146   }
147 
148   /***
149    * Create a CompoundControl with the given hint. All other parameters are
150    * the defaults.
151    * @param hint the hint
152    * @exception IllegalArgumentException if hint is not one of
153    * SHORTEST, LONGEST and BOTH
154    */
155   public CompoundControl(int hint)
156   {
157     this(DEFAULT_MIN, DEFAULT_MAX, hint);
158   }
159 
160   /***
161    * Create a CompoundControl with the given minimum component length and
162    * maximum component number and with the default hint.
163    * @param min the minimum length of component words
164    * @param max the maximum number of component words
165    * @exception IllegalArgumentException if either any of min and max is
166    * negative or min is greater than MIN_MAX or max is greater than MAX_MAX
167    * SHORTEST, LONGEST or BOTH
168    */
169   public CompoundControl(int min, int max)
170   {
171     this(min, max, DEFAULT_HINT);
172   }
173 
174   /***
175    * Create a CompoundControl with the given minimum component length, the
176    * given maximum component number and with the given hint.
177    * @param min the minimum length of component words
178    * @param max the maximum number of component words
179    * @param hint the hint
180    * @exception IllegalArgumentException if either hint is not SHORTEST,
181    * LONGEST or BOTH or any of min and max is negative or min is greater
182    * than MIN_MAX or max is greater than MAX_MAX
183    */
184   public CompoundControl(int min, int max, int hint)
185   {
186     if (hint < 0 || hint > BOTH)
187     {
188       throw new IllegalArgumentException("" + hint);
189     }
190     if (min < 0 || min > MAX_MIN)
191     {
192       throw new IllegalArgumentException("" + min);
193     }
194     if (max < 0 || max > MAX_MAX)
195     {
196       throw new IllegalArgumentException("" + max);
197     }
198 
199     this.min = min;
200     this.max = max;
201     this.hint = hint;
202   }
203 
204   /***
205    * Enable or disable compound decomposition
206    * @param b if compound decomposition enabled
207    */
208   public void setEnabled(boolean b)
209   {
210     disabled = !b;
211   }
212 
213   /***
214    * Set minimum length of component words
215    * @param min the minimum length of compoenent words
216    */
217   public void setMin(int min)
218   {
219     this.min = min;
220   }
221 
222   /***
223    * Tells if compound decomposition is enabled at all
224    */
225   public boolean enabled()
226   {
227     return !disabled;
228   }
229 
230   /***
231    * Return hint
232    */
233   public int getHint()
234   {
235     return hint;
236   }
237 
238   /***
239    * Return minimum length of component words
240    */
241   public int getMin()
242   {
243     return min;
244   }
245 
246   /***
247    * Return maximum number of component words
248    */
249   public int getMax()
250   {
251     return max;
252   }
253 
254   public String toString()
255   {
256     return "CompoundControl[" +
257       enabled() + ", " + max + " of " + min + ", " + HINTS[hint] + "]";
258   }
259 }
260