1 package net.sf.jhunlang.jmorph.analysis;
2
3 /***
4 * CompoundControl stands for controlling the way of
5 * decomposing words to compound words in stemming.
6 */
7 public class CompoundControl
8 {
9 /***
10 * System property for the default disabled.
11 */
12 public final static String DISABLED = "compound.disabled";
13 /***
14 * System property for the default minimum length of component words.
15 */
16 public final static String MIN = "compound.min";
17 /***
18 * System property for the default maximum number of component words.
19 */
20 public final static String MAX = "compound.max";
21 /***
22 * System property for the default hint of compound stemming.
23 */
24 public final static String HINT = "compound.hint";
25
26 /***
27 * Convenience constant for the hint
28 * 'decompose via the shortest component words first'
29 */
30 public final static int SHORTEST = 0;
31 /***
32 * Convenience constant for the hint
33 * 'decompose via the longest component words first'
34 */
35 public final static int LONGEST = 1;
36 /***
37 * Convenience constant for the hint
38 * 'decompose via both the longest and shortest component words'
39 */
40 public final static int BOTH = 2;
41 /***
42 * String representations for the legal hint values
43 */
44 public final static String[] HINTS = { "shortest", "longest", "both" };
45 /***
46 * Maximum value for 'minimum length of component words'
47 */
48 public final static int MAX_MIN = 6;
49 /***
50 * Maximum value for 'maximum number of component words in a compound word'
51 */
52 public final static int MAX_MAX = 32;
53 /***
54 * Maximum value for 'hint'
55 */
56 public final static int MAX_HINT = BOTH;
57 /***
58 * The default disabled value is false
59 */
60 public final static boolean DEFAULT_DISABLED;
61 /***
62 * The default hint value is SHORTEST
63 */
64 public final static int DEFAULT_HINT;
65 /***
66 * The default 'minimum length of component words' is 3
67 */
68 public static int DEFAULT_MIN = 2;
69 /***
70 * The default 'maximum number of component words' is 3
71 */
72 public static int DEFAULT_MAX = 3;
73
74 /***
75 * Initialize static DAFULT_DISABLED, DEFAULT_MIN, DEFAULT_MAX and
76 * DEFAULT_HINT from the system properties DISABLED, MIN, MAX annd HINT
77 * respectively.
78 */
79 static
80 {
81 DEFAULT_DISABLED = Boolean.getBoolean(DISABLED);
82 int min = DEFAULT_MIN;
83 try
84 {
85 min = Integer.parseInt(System.getProperty(MIN, DEFAULT_MIN + ""));
86 if (min < 0 || min > MAX_MIN)
87 {
88 min = DEFAULT_MIN;
89 }
90 }
91 catch (Throwable t)
92 {}
93 DEFAULT_MIN = min;
94
95 int max = DEFAULT_MAX;
96 try
97 {
98 max = Integer.parseInt(System.getProperty(MAX, DEFAULT_MAX + ""));
99 if (max < 0 || max > MAX_MAX)
100 {
101 max = DEFAULT_MAX;
102 }
103 }
104 catch (Throwable t)
105 {}
106 DEFAULT_MAX = max;
107
108 int hint = SHORTEST;
109 try
110 {
111 hint = Integer.parseInt(System.getProperty(HINT, "" + SHORTEST));
112 if (hint < 0 || hint > MAX_HINT)
113 {
114 hint = SHORTEST;
115 }
116 }
117 catch (Throwable t)
118 {}
119 DEFAULT_HINT = hint;
120 }
121
122 /***
123 * Tells if compound decomposition is enabled at all.
124 */
125 protected boolean disabled = DEFAULT_DISABLED;
126 /***
127 * The minimum length of a component word in compound words
128 */
129 protected int min;
130 /***
131 * The maximum number of component words in compound words
132 */
133 protected int max;
134 /***
135 * If decomposition recurse via the shortest/longest/both splits.
136 * Not implemented yet.
137 */
138 protected int hint;
139
140 /***
141 * Create a CompoundControl with default controlling parameters.
142 */
143 public CompoundControl()
144 {
145 this(DEFAULT_HINT);
146 }
147
148 /***
149 * Create a CompoundControl with the given hint. All other parameters are
150 * the defaults.
151 * @param hint the hint
152 * @exception IllegalArgumentException if hint is not one of
153 * SHORTEST, LONGEST and BOTH
154 */
155 public CompoundControl(int hint)
156 {
157 this(DEFAULT_MIN, DEFAULT_MAX, hint);
158 }
159
160 /***
161 * Create a CompoundControl with the given minimum component length and
162 * maximum component number and with the default hint.
163 * @param min the minimum length of component words
164 * @param max the maximum number of component words
165 * @exception IllegalArgumentException if either any of min and max is
166 * negative or min is greater than MIN_MAX or max is greater than MAX_MAX
167 * SHORTEST, LONGEST or BOTH
168 */
169 public CompoundControl(int min, int max)
170 {
171 this(min, max, DEFAULT_HINT);
172 }
173
174 /***
175 * Create a CompoundControl with the given minimum component length, the
176 * given maximum component number and with the given hint.
177 * @param min the minimum length of component words
178 * @param max the maximum number of component words
179 * @param hint the hint
180 * @exception IllegalArgumentException if either hint is not SHORTEST,
181 * LONGEST or BOTH or any of min and max is negative or min is greater
182 * than MIN_MAX or max is greater than MAX_MAX
183 */
184 public CompoundControl(int min, int max, int hint)
185 {
186 if (hint < 0 || hint > BOTH)
187 {
188 throw new IllegalArgumentException("" + hint);
189 }
190 if (min < 0 || min > MAX_MIN)
191 {
192 throw new IllegalArgumentException("" + min);
193 }
194 if (max < 0 || max > MAX_MAX)
195 {
196 throw new IllegalArgumentException("" + max);
197 }
198
199 this.min = min;
200 this.max = max;
201 this.hint = hint;
202 }
203
204 /***
205 * Enable or disable compound decomposition
206 * @param b if compound decomposition enabled
207 */
208 public void setEnabled(boolean b)
209 {
210 disabled = !b;
211 }
212
213 /***
214 * Set minimum length of component words
215 * @param min the minimum length of compoenent words
216 */
217 public void setMin(int min)
218 {
219 this.min = min;
220 }
221
222 /***
223 * Tells if compound decomposition is enabled at all
224 */
225 public boolean enabled()
226 {
227 return !disabled;
228 }
229
230 /***
231 * Return hint
232 */
233 public int getHint()
234 {
235 return hint;
236 }
237
238 /***
239 * Return minimum length of component words
240 */
241 public int getMin()
242 {
243 return min;
244 }
245
246 /***
247 * Return maximum number of component words
248 */
249 public int getMax()
250 {
251 return max;
252 }
253
254 public String toString()
255 {
256 return "CompoundControl[" +
257 enabled() + ", " + max + " of " + min + ", " + HINTS[hint] + "]";
258 }
259 }
260