1 package net.sf.jhunlang.jmorph.analysis; 2 3 /*** 4 * CompoundControl stands for controlling the way of 5 * decomposing words to compound words in stemming. 6 */ 7 public class CompoundControl 8 { 9 /*** 10 * System property for the default disabled. 11 */ 12 public final static String DISABLED = "compound.disabled"; 13 /*** 14 * System property for the default minimum length of component words. 15 */ 16 public final static String MIN = "compound.min"; 17 /*** 18 * System property for the default maximum number of component words. 19 */ 20 public final static String MAX = "compound.max"; 21 /*** 22 * System property for the default hint of compound stemming. 23 */ 24 public final static String HINT = "compound.hint"; 25 26 /*** 27 * Convenience constant for the hint 28 * 'decompose via the shortest component words first' 29 */ 30 public final static int SHORTEST = 0; 31 /*** 32 * Convenience constant for the hint 33 * 'decompose via the longest component words first' 34 */ 35 public final static int LONGEST = 1; 36 /*** 37 * Convenience constant for the hint 38 * 'decompose via both the longest and shortest component words' 39 */ 40 public final static int BOTH = 2; 41 /*** 42 * String representations for the legal hint values 43 */ 44 public final static String[] HINTS = { "shortest", "longest", "both" }; 45 /*** 46 * Maximum value for 'minimum length of component words' 47 */ 48 public final static int MAX_MIN = 6; 49 /*** 50 * Maximum value for 'maximum number of component words in a compound word' 51 */ 52 public final static int MAX_MAX = 32; 53 /*** 54 * Maximum value for 'hint' 55 */ 56 public final static int MAX_HINT = BOTH; 57 /*** 58 * The default disabled value is false 59 */ 60 public final static boolean DEFAULT_DISABLED; 61 /*** 62 * The default hint value is SHORTEST 63 */ 64 public final static int DEFAULT_HINT; 65 /*** 66 * The default 'minimum length of component words' is 3 67 */ 68 public static int DEFAULT_MIN = 2; 69 /*** 70 * The default 'maximum number of component words' is 3 71 */ 72 public static int DEFAULT_MAX = 3; 73 74 /*** 75 * Initialize static DAFULT_DISABLED, DEFAULT_MIN, DEFAULT_MAX and 76 * DEFAULT_HINT from the system properties DISABLED, MIN, MAX annd HINT 77 * respectively. 78 */ 79 static 80 { 81 DEFAULT_DISABLED = Boolean.getBoolean(DISABLED); 82 int min = DEFAULT_MIN; 83 try 84 { 85 min = Integer.parseInt(System.getProperty(MIN, DEFAULT_MIN + "")); 86 if (min < 0 || min > MAX_MIN) 87 { 88 min = DEFAULT_MIN; 89 } 90 } 91 catch (Throwable t) 92 {} 93 DEFAULT_MIN = min; 94 95 int max = DEFAULT_MAX; 96 try 97 { 98 max = Integer.parseInt(System.getProperty(MAX, DEFAULT_MAX + "")); 99 if (max < 0 || max > MAX_MAX) 100 { 101 max = DEFAULT_MAX; 102 } 103 } 104 catch (Throwable t) 105 {} 106 DEFAULT_MAX = max; 107 108 int hint = SHORTEST; 109 try 110 { 111 hint = Integer.parseInt(System.getProperty(HINT, "" + SHORTEST)); 112 if (hint < 0 || hint > MAX_HINT) 113 { 114 hint = SHORTEST; 115 } 116 } 117 catch (Throwable t) 118 {} 119 DEFAULT_HINT = hint; 120 } 121 122 /*** 123 * Tells if compound decomposition is enabled at all. 124 */ 125 protected boolean disabled = DEFAULT_DISABLED; 126 /*** 127 * The minimum length of a component word in compound words 128 */ 129 protected int min; 130 /*** 131 * The maximum number of component words in compound words 132 */ 133 protected int max; 134 /*** 135 * If decomposition recurse via the shortest/longest/both splits. 136 * Not implemented yet. 137 */ 138 protected int hint; 139 140 /*** 141 * Create a CompoundControl with default controlling parameters. 142 */ 143 public CompoundControl() 144 { 145 this(DEFAULT_HINT); 146 } 147 148 /*** 149 * Create a CompoundControl with the given hint. All other parameters are 150 * the defaults. 151 * @param hint the hint 152 * @exception IllegalArgumentException if hint is not one of 153 * SHORTEST, LONGEST and BOTH 154 */ 155 public CompoundControl(int hint) 156 { 157 this(DEFAULT_MIN, DEFAULT_MAX, hint); 158 } 159 160 /*** 161 * Create a CompoundControl with the given minimum component length and 162 * maximum component number and with the default hint. 163 * @param min the minimum length of component words 164 * @param max the maximum number of component words 165 * @exception IllegalArgumentException if either any of min and max is 166 * negative or min is greater than MIN_MAX or max is greater than MAX_MAX 167 * SHORTEST, LONGEST or BOTH 168 */ 169 public CompoundControl(int min, int max) 170 { 171 this(min, max, DEFAULT_HINT); 172 } 173 174 /*** 175 * Create a CompoundControl with the given minimum component length, the 176 * given maximum component number and with the given hint. 177 * @param min the minimum length of component words 178 * @param max the maximum number of component words 179 * @param hint the hint 180 * @exception IllegalArgumentException if either hint is not SHORTEST, 181 * LONGEST or BOTH or any of min and max is negative or min is greater 182 * than MIN_MAX or max is greater than MAX_MAX 183 */ 184 public CompoundControl(int min, int max, int hint) 185 { 186 if (hint < 0 || hint > BOTH) 187 { 188 throw new IllegalArgumentException("" + hint); 189 } 190 if (min < 0 || min > MAX_MIN) 191 { 192 throw new IllegalArgumentException("" + min); 193 } 194 if (max < 0 || max > MAX_MAX) 195 { 196 throw new IllegalArgumentException("" + max); 197 } 198 199 this.min = min; 200 this.max = max; 201 this.hint = hint; 202 } 203 204 /*** 205 * Enable or disable compound decomposition 206 * @param b if compound decomposition enabled 207 */ 208 public void setEnabled(boolean b) 209 { 210 disabled = !b; 211 } 212 213 /*** 214 * Set minimum length of component words 215 * @param min the minimum length of compoenent words 216 */ 217 public void setMin(int min) 218 { 219 this.min = min; 220 } 221 222 /*** 223 * Tells if compound decomposition is enabled at all 224 */ 225 public boolean enabled() 226 { 227 return !disabled; 228 } 229 230 /*** 231 * Return hint 232 */ 233 public int getHint() 234 { 235 return hint; 236 } 237 238 /*** 239 * Return minimum length of component words 240 */ 241 public int getMin() 242 { 243 return min; 244 } 245 246 /*** 247 * Return maximum number of component words 248 */ 249 public int getMax() 250 { 251 return max; 252 } 253 254 public String toString() 255 { 256 return "CompoundControl[" + 257 enabled() + ", " + max + " of " + min + ", " + HINTS[hint] + "]"; 258 } 259 } 260