1 10 11 package org.mmbase.util.magicfile; 12 13 import java.io.BufferedReader ; 14 import java.io.File ; 15 import java.io.FileReader ; 16 import java.io.FileWriter ; 17 import java.io.IOException ; 18 import java.util.Iterator ; 19 import java.util.List ; 20 import java.util.Vector ; 21 22 import org.mmbase.util.logging.Logger; 23 import org.mmbase.util.logging.Logging; 24 25 33 34 public class MagicParser implements DetectorProvider { 35 39 public final static String DEFAULT_MAGIC_FILE = "/etc/mime-magic"; 40 41 private static final Logger log = Logging.getLoggerInstance(MagicParser.class); 42 private List detectors; 43 44 private boolean parsingFailure = false; 47 private int offset; 48 private String type; 49 private String typeAND; 50 private String test; 51 private String message; 52 private char testComparator; 53 54 public MagicParser() { 55 this(DEFAULT_MAGIC_FILE); 56 } 57 58 62 public MagicParser(String fileName) { 63 log.info("creating a new MagicParser with configuration" + DEFAULT_MAGIC_FILE); 64 try { 65 BufferedReader br = new BufferedReader (new FileReader (new File (fileName))); 66 String line; 67 detectors = new Vector (); 68 69 while ((line = br.readLine()) != null) { 70 Detector d = createDetector(line); 71 if (d != null) { 72 detectors.add(d); 74 } 75 } 76 } catch (Exception e) { 77 log.error("" + e.getMessage() + "}", e); 78 }; 79 } 80 81 public List getDetectors() { 82 return detectors; 83 } 84 87 protected int nextWhiteSpace(String s) { 88 return nextWhiteSpace(s, 0); 89 } 90 91 protected int nextWhiteSpace(String s, int startIndex) { 92 for (int j = startIndex; j < s.length(); j++) { 93 if (s.charAt(j) == ' ' || s.charAt(j) == '\t' || s.charAt(j) == '\n') { 94 return j; 95 } 96 } 97 return s.length(); 98 } 99 100 protected int nextNonWhiteSpace(String s, int startIndex) { 101 for (int j = startIndex; j < s.length(); j++) { 102 if (s.charAt(j) != ' ' && s.charAt(j) != '\t') { 103 return j; 104 } 105 } 106 return -1; 107 } 108 109 113 private int parseOffsetString(String s, int startIndex) throws Exception { 114 try { 115 int m = nextWhiteSpace(s, startIndex); 116 117 char c = s.charAt(startIndex); 119 if (c == '&') { 122 parsingFailure = true; 123 throw new UnsupportedOperationException ("parseOffsetString: >& offset feature not implemented\n(Tt is used only for HP Printer Job Language type)"); 124 } else if (c == '(') { 125 parsingFailure = true; 126 throw new UnsupportedOperationException ("parseOffsetString: indirect offsets not implemented"); 127 } 128 offset = Integer.decode(s.substring(startIndex, m)).intValue(); 129 return nextNonWhiteSpace(s, m + 1); 130 } catch (NumberFormatException e) { 131 throw new Exception ("parseOffetString: string->integer conversion failure for '" + s + "'"); 133 } 134 } 135 136 141 private int parseTypeString(String s, int startIndex) throws Exception { 142 int m = nextWhiteSpace(s, startIndex); 143 if (m <= startIndex) { 144 throw new Exception ("parseTypeString: failed to delimit type string"); 145 } 146 int n = s.indexOf('&', startIndex); 147 if (n > -1 && n < m - 2) { 148 type = s.substring(startIndex, n); 149 typeAND = s.substring(n + 1, m); 150 } else { 151 type = s.substring(startIndex, m); 152 typeAND = "0"; 153 } 154 return nextNonWhiteSpace(s, m + 1); 155 } 156 157 161 private int parseTestString(String s, int startIndex) throws Exception { 162 int start = 0; 163 boolean backslashmode = false; 166 boolean octalmode = false; 167 boolean hexmode = false; 168 char c; 170 StringBuffer numbuf = new StringBuffer (); 171 172 test = ""; 173 174 c = s.charAt(startIndex); 175 switch (c) { 176 case '>' : 177 case '<' : 178 case '&' : 179 case '^' : 180 case '=' : 181 testComparator = c; 182 start = 1; 183 break; 184 default : 185 testComparator = '='; 186 break; 187 } 188 if (s.charAt(startIndex + start) == '~' || s.charAt(startIndex + start) == '!') { 189 start++; 191 } 192 int i = startIndex + start; 193 194 if (!type.equals("string")) { 195 int m = nextWhiteSpace(s, i); 196 String t = s.substring(i, m); 197 if (t.equals("x")) { 198 test = "x"; 199 } else if (type.equals("beshort") || type.equals("leshort")) { 200 try { 201 test = "0x" + Integer.toHexString(Integer.decode(s.substring(i, m)).intValue()); 202 } catch (NumberFormatException e) { 204 throw new Exception ("decode(" + s.substring(i, m) + ")"); 205 } 206 } else if (type.equals("belong") || type.equals("lelong")) { 207 int endIndex = m; 209 try { 210 if (s.charAt(m - 1) == 'L' || s.charAt(m - 1) == 'l') { 212 endIndex = m - 1; 213 } 214 test = "0x" + Long.toHexString(Long.decode(s.substring(i, endIndex)).longValue()); 215 } catch (NumberFormatException e) { 216 log.error(e.getMessage()); 217 log.error(Logging.stackTrace(e)); 218 throw new Exception ("parseLong(" + s.substring(i, endIndex) + ") "); 219 } 220 } else if (type.equals("byte")) { 221 try { 222 test = "0x" + Integer.toHexString(Integer.decode(s.substring(i, m)).intValue()); 223 } catch (NumberFormatException e) { 225 throw new Exception ("decode(" + s.substring(i, m) + ")"); 226 } 227 } 228 i = m; 229 } else { 230 StringBuffer buf = new StringBuffer (); 231 232 int m = s.length(); 233 while (i < m) { 234 c = s.charAt(i); 235 if (backslashmode) { 236 switch (c) { 237 case 'n' : 238 backslashmode = false; 239 buf.append('\n'); 240 break; 241 case 'r' : 242 backslashmode = false; 243 buf.append('\r'); 244 break; 245 case 't' : 246 backslashmode = false; 247 buf.append('\t'); 248 break; 249 case '\\' : 250 if (hexmode) { 251 try { 252 test = test + (char)Integer.decode("0x" + numbuf.toString()).intValue(); 254 } catch (NumberFormatException e) { 255 throw new Exception ("decode(0x" + numbuf.toString() + ") faalde"); 256 } 257 hexmode = false; 258 } else if (octalmode) { 259 try { 260 test = test + (char)Integer.decode("0" + numbuf.toString()).intValue(); 262 } catch (NumberFormatException e) { 263 throw new Exception ("decode(0" + numbuf.toString() + ") faalde"); 264 } 265 octalmode = false; 266 } else { 267 backslashmode = false; 268 buf.append('\\'); 269 } 270 break; 271 case 'x' : 272 if (octalmode && numbuf.length() == 3) { 273 try { 274 test = test + (char)Integer.decode("0" + numbuf.toString()).intValue(); 276 } catch (NumberFormatException e) { 277 throw new Exception ("decode(0" + numbuf.toString() + ") faalde"); 278 } 279 octalmode = false; 280 backslashmode = false; 281 buf = new StringBuffer (); 282 buf.append('x'); 283 } else { 284 hexmode = true; 285 numbuf = new StringBuffer (); 286 if (buf.length() > 0) { 287 test = test + buf.toString(); 288 buf = new StringBuffer (); 289 } 290 } 291 break; 292 case '0' : 293 case '1' : 294 case '2' : 295 case '3' : 296 case '4' : 297 case '5' : 298 case '6' : 299 case '7' : 300 case '8' : 301 case '9' : 302 if (!octalmode && !hexmode) { 304 if (buf.length() > 0) { 305 test = test + buf.toString(); 307 buf = new StringBuffer (); 308 } 309 octalmode = true; 310 numbuf = new StringBuffer (); 311 } 312 numbuf.append(c); 313 break; 314 case ' ' : 315 if (octalmode) { 316 try { 317 test = test + (char)Integer.decode("0" + numbuf.toString()).intValue(); 319 } catch (NumberFormatException e) { 320 throw new Exception ("decode(0" + numbuf.toString() + ") faalde"); 321 } 322 octalmode = false; 323 } else if (hexmode) { 324 try { 325 test = test + (char)Integer.decode("0x" + numbuf.toString()).intValue(); 327 } catch (NumberFormatException e) { 328 throw new Exception ("decode(0x" + numbuf.toString() + ") faalde"); 329 } 330 hexmode = false; 331 } else { 332 buf.append(' '); 333 } 334 backslashmode = false; 335 break; 336 default : 337 if (hexmode) { 338 if (c == 'a' 339 || c == 'A' 340 || c == 'b' 341 || c == 'B' 342 || c == 'c' 343 || c == 'C' 344 || c == 'd' 345 || c == 'D' 346 || c == 'e' 347 || c == 'E' 348 || c == 'f' 349 || c == 'F') { 350 numbuf.append(c); 351 } else { 352 try { 353 test = test + (char)Integer.decode("0x" + numbuf.toString()).intValue(); 355 } catch (NumberFormatException e) { 356 throw new Exception ("decode(0x" + numbuf.toString() + ") faalde"); 357 } 358 hexmode = false; 359 backslashmode = false; 360 } 361 } else if (octalmode) { 362 try { 363 test = test + (char)Integer.decode("0" + numbuf.toString()).intValue(); 365 } catch (NumberFormatException e) { 366 throw new Exception ("decode(0" + numbuf.toString() + ") faalde"); 367 } 368 octalmode = false; 369 backslashmode = false; 370 } else { 371 backslashmode = false; 372 buf.append(c); 374 } 375 } 376 } else if (c == '\\') { 377 if (buf.length() > 0) { 378 test = test + buf.toString(); 380 buf = new StringBuffer (); 381 } 382 backslashmode = true; 383 } else if (c == ' ' || c == '\t' || c == '\n' || i == m - 1) { if (buf.length() > 0) { 385 test = test + buf.toString(); 387 buf = new StringBuffer (); 388 } 389 if (numbuf.length() > 0) { 390 if (octalmode) { 391 try { 392 test = test + (char)Integer.decode("0" + numbuf.toString()).intValue(); 394 } catch (NumberFormatException e) { 395 throw new Exception ("decode(0" + numbuf.toString() + ") faalde"); 396 } 397 octalmode = false; 398 backslashmode = false; 399 } else if (hexmode) { 400 try { 401 test = test + (char)Integer.decode("0x" + numbuf.toString()).intValue(); 403 } catch (NumberFormatException e) { 404 throw new Exception ("decode(0x" + numbuf.toString() + ") faalde"); 405 } 406 hexmode = false; 407 backslashmode = false; 408 } 409 } 410 break; 411 } else { 412 buf.append(c); 413 } 414 i++; 415 } 416 } 417 return nextNonWhiteSpace(s, i + 1); 420 } 421 422 427 private int parseMessageString(String s, int startIndex) throws Exception { 428 if (false) 429 throw new Exception ("dummy exception to stop jikes from complaining"); 430 message = s.substring(startIndex); 431 return s.length() - 1; 432 433 } 434 435 private Detector createDetector(String line) { 436 Detector detector = new Detector(); 437 439 444 log.debug("parse: " + line); 446 int n; 447 String level = "start"; 448 try { 449 level = "parseOffsetString"; 450 n = parseOffsetString(line, 0); 451 level = "parseTypeString"; 452 n = parseTypeString(line, n); 453 level = "parseTestString"; 454 n = parseTestString(line, n); 455 if (n > 0) { 457 level = "parseMessageString"; 458 parseMessageString(line, n); 459 } else { 460 message = ""; 461 } 462 level = "end"; 463 } catch (UnsupportedOperationException e) { 464 log.warn(e.getMessage()); 465 } catch (Exception e) { 466 log.error("parse failure at " + level + ": " + e.getMessage() + " for [" + line + "]"); 467 parsingFailure = true; 468 } 469 detector.setType(type); 470 detector.setOffset("" + offset); 471 detector.setTest(test); 472 detector.setComparator(testComparator); 473 detector.setMimeType(message); 474 detector.setDesignation(message); 475 return detector; 476 } 477 478 public boolean toXML(String path) throws IOException { 479 File f = new File (path); 480 return toXML(f); 481 } 482 483 486 public boolean toXML(File f) throws IOException { 487 FileWriter writer = new FileWriter (f); 488 489 writer.write( 490 "<!DOCTYPE magic PUBLIC \"-//MMBase//DTD magic config 1.0//EN\" \"http://www.mmbase.org/dtd/magic_1_0.dtd\">\n<magic>\n<info>\n<version>0.1</version>\n<author>cjr@dds.nl</author>\n<description>Conversion of the UNIX 'magic' file with added mime types and extensions.</description>\n</info>\n<detectorlist>\n"); 491 Iterator i = getDetectors().iterator(); 492 while (i.hasNext()) { 493 ((Detector)i.next()).toXML(writer); 494 } 495 writer.write("</detectorlist>\n</magic>\n"); 496 writer.close(); 497 return true; 498 } 499 500 public static void main(String [] argv) throws IOException { 501 if (argv.length != 2) { 502 System.err.println(MagicParser.class.getName() + " can be used to convert from mime files to mmbase magic.xml file format"); 503 System.err.println("Usage:" + MagicParser.class.getName() + " inpurtFileName outputfile.xml"); 504 System.err.println("Example:" + MagicParser.class.getName() + " /etc/mime-magic outputfile.xml"); 505 System.exit(1); 506 } 507 System.out.println("reading the mime file"); 508 MagicParser parser = new MagicParser(argv[0]); 509 System.out.println("writing the xml file"); 510 parser.toXML(new File (argv[1])); 511 System.out.println("finished"); 512 } 513 } 514 | Popular Tags |