KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > mmbase > util > magicfile > MagicParser


1 /*
2
3 This software is OSI Certified Open Source Software. OSI Certified is
4 a certification mark of the Open Source Initiative.
5
6 The license (Mozilla version 1.0) can be read at the MMBase site.
7 See http://www.MMBase.org/license
8
9  */

10
11 package org.mmbase.util.magicfile;
12
13 import java.io.BufferedReader JavaDoc;
14 import java.io.File JavaDoc;
15 import java.io.FileReader JavaDoc;
16 import java.io.FileWriter JavaDoc;
17 import java.io.IOException JavaDoc;
18 import java.util.Iterator JavaDoc;
19 import java.util.List JavaDoc;
20 import java.util.Vector JavaDoc;
21
22 import org.mmbase.util.logging.Logger;
23 import org.mmbase.util.logging.Logging;
24
25 /**
26  * This Parser translates the configuration file of UNIX's file to a
27  * list of Detectors (and to a magic.xml) Perhaps it's easier to
28  * rewrite this stuff to perl or something like that.
29  *
30  * @version $Id: MagicParser.java,v 1.10 2005/10/07 18:41:29 michiel Exp $
31  * @todo NOT TESTED YET
32  */

33
34 public class MagicParser implements DetectorProvider {
35     /**
36      * the default files used to create the Detectors
37      * DEFAULT_MAGIC_FILE = "/etc/mime-magic"
38      */

39     public final static String JavaDoc DEFAULT_MAGIC_FILE = "/etc/mime-magic";
40     
41     private static final Logger log = Logging.getLoggerInstance(MagicParser.class);
42     private List JavaDoc detectors;
43
44     // what a mess:
45
// I think all of these members must be removed:
46
private boolean parsingFailure = false;
47     private int offset;
48     private String JavaDoc type;
49     private String JavaDoc typeAND;
50     private String JavaDoc test;
51     private String JavaDoc message;
52     private char testComparator;
53
54     public MagicParser() {
55         this(DEFAULT_MAGIC_FILE);
56     }
57
58     /**
59      * Construct a new MagicParser with configuration file
60      * @since MMBase-1.7
61      */

62     public MagicParser(String JavaDoc fileName) {
63         log.info("creating a new MagicParser with configuration" + DEFAULT_MAGIC_FILE);
64         try {
65             BufferedReader JavaDoc br = new BufferedReader JavaDoc(new FileReader JavaDoc(new File JavaDoc(fileName)));
66             String JavaDoc line;
67             detectors = new Vector JavaDoc();
68
69             while ((line = br.readLine()) != null) {
70                 Detector d = createDetector(line);
71                 if (d != null) {
72                     //System.out.println(d.toString());
73
detectors.add(d);
74                 }
75             }
76         } catch (Exception JavaDoc e) {
77             log.error("" + e.getMessage() + "}", e);
78         };
79     }
80
81     public List JavaDoc getDetectors() {
82         return detectors;
83     }
84     // --------------------------------------------------------------------------------
85
// some utitily functions
86

87     protected int nextWhiteSpace(String JavaDoc s) {
88         return nextWhiteSpace(s, 0);
89     }
90
91     protected int nextWhiteSpace(String JavaDoc s, int startIndex) {
92         for (int j = startIndex; j < s.length(); j++) {
93             if (s.charAt(j) == ' ' || s.charAt(j) == '\t' || s.charAt(j) == '\n') {
94                 return j;
95             }
96         }
97         return s.length();
98     }
99
100     protected int nextNonWhiteSpace(String JavaDoc s, int startIndex) {
101         for (int j = startIndex; j < s.length(); j++) {
102             if (s.charAt(j) != ' ' && s.charAt(j) != '\t') {
103                 return j;
104             }
105         }
106         return -1;
107     }
108
109     /**
110      * Separate command from offset
111      * @exception Throws an exception when parsing failed
112      */

113     private int parseOffsetString(String JavaDoc s, int startIndex) throws Exception JavaDoc {
114         try {
115             int m = nextWhiteSpace(s, startIndex);
116
117             // Bail out when encountering an indirect offset
118
char c = s.charAt(startIndex);
119             // '&': In sublevel we can start relatively to where the previous match ended
120
// '(': Read value at first address, and add that at second to it
121
if (c == '&') {
122                 parsingFailure = true;
123                 throw new UnsupportedOperationException JavaDoc("parseOffsetString: >& offset feature not implemented\n(Tt is used only for HP Printer Job Language type)");
124             } else if (c == '(') {
125                 parsingFailure = true;
126                 throw new UnsupportedOperationException JavaDoc("parseOffsetString: indirect offsets not implemented");
127             }
128             offset = Integer.decode(s.substring(startIndex, m)).intValue();
129             return nextNonWhiteSpace(s, m + 1);
130         } catch (NumberFormatException JavaDoc e) {
131             // log.error("string->integer conversion failure for '"+s+"'");
132
throw new Exception JavaDoc("parseOffetString: string->integer conversion failure for '" + s + "'");
133         }
134     }
135
136     /**
137      * Parse the type string from the magic file
138      *
139      * -- nothing to be done: the found string is already atomic :-)
140      */

141     private int parseTypeString(String JavaDoc s, int startIndex) throws Exception JavaDoc {
142         int m = nextWhiteSpace(s, startIndex);
143         if (m <= startIndex) {
144             throw new Exception JavaDoc("parseTypeString: failed to delimit type string");
145         }
146         int n = s.indexOf('&', startIndex);
147         if (n > -1 && n < m - 2) {
148             type = s.substring(startIndex, n);
149             typeAND = s.substring(n + 1, m);
150         } else {
151             type = s.substring(startIndex, m);
152             typeAND = "0";
153         }
154         return nextNonWhiteSpace(s, m + 1);
155     }
156
157     /**
158      * Parse the test string from the magic file
159      * -- determine: a.) the test comparator, and b.) the test value
160      */

161     private int parseTestString(String JavaDoc s, int startIndex) throws Exception JavaDoc {
162         int start = 0;
163         //int m = nextWhiteSpace(s,startIndex); // XXX need a better algorithm to account for '\' syntax
164
// Can't use nextWhiteSpace here, we need harder parsing...
165
boolean backslashmode = false;
166         boolean octalmode = false;
167         boolean hexmode = false;
168         //int l = s.length();
169
char c;
170         StringBuffer JavaDoc numbuf = new StringBuffer JavaDoc();
171
172         test = "";
173
174         c = s.charAt(startIndex);
175         switch (c) {
176             case '>' :
177             case '<' :
178             case '&' :
179             case '^' :
180             case '=' :
181                 testComparator = c;
182                 start = 1;
183                 break;
184             default :
185                 testComparator = '=';
186                 break;
187         }
188         if (s.charAt(startIndex + start) == '~' || s.charAt(startIndex + start) == '!') {
189             // XXX do nothing with these, but remove them to get rid of decode errors
190
start++;
191         }
192         int i = startIndex + start;
193
194         if (!type.equals("string")) {
195             int m = nextWhiteSpace(s, i);
196             String JavaDoc t = s.substring(i, m);
197             if (t.equals("x")) {
198                 test = "x";
199             } else if (type.equals("beshort") || type.equals("leshort")) {
200                 try {
201                     test = "0x" + Integer.toHexString(Integer.decode(s.substring(i, m)).intValue());
202                     //test.addElement(Integer.decode(s.substring(i,m)));
203
} catch (NumberFormatException JavaDoc e) {
204                     throw new Exception JavaDoc("decode(" + s.substring(i, m) + ")");
205                 }
206             } else if (type.equals("belong") || type.equals("lelong")) {
207                 // Values possibly too long for Integer, while Long type won't parse :-(
208
int endIndex = m;
209                 try {
210                     //test.addElement(Long.decode(s.substring(i,m)));
211
if (s.charAt(m - 1) == 'L' || s.charAt(m - 1) == 'l') {
212                         endIndex = m - 1;
213                     }
214                     test = "0x" + Long.toHexString(Long.decode(s.substring(i, endIndex)).longValue());
215                 } catch (NumberFormatException JavaDoc e) {
216                     log.error(e.getMessage());
217                     log.error(Logging.stackTrace(e));
218                     throw new Exception JavaDoc("parseLong(" + s.substring(i, endIndex) + ") ");
219                 }
220             } else if (type.equals("byte")) {
221                 try {
222                     test = "0x" + Integer.toHexString(Integer.decode(s.substring(i, m)).intValue());
223                     //test.addElement(Integer.decode(s.substring(i,m)));
224
} catch (NumberFormatException JavaDoc e) {
225                     throw new Exception JavaDoc("decode(" + s.substring(i, m) + ")");
226                 }
227             }
228             i = m;
229         } else {
230             StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
231
232             int m = s.length();
233             while (i < m) {
234                 c = s.charAt(i);
235                 if (backslashmode) {
236                     switch (c) {
237                         case 'n' :
238                             backslashmode = false;
239                             buf.append('\n');
240                             break;
241                         case 'r' :
242                             backslashmode = false;
243                             buf.append('\r');
244                             break;
245                         case 't' :
246                             backslashmode = false;
247                             buf.append('\t');
248                             break;
249                         case '\\' :
250                             if (hexmode) {
251                                 try {
252                                     //test.addElement(Integer.decode("0x"+numbuf.toString()));
253
test = test + (char)Integer.decode("0x" + numbuf.toString()).intValue();
254                                 } catch (NumberFormatException JavaDoc e) {
255                                     throw new Exception JavaDoc("decode(0x" + numbuf.toString() + ") faalde");
256                                 }
257                                 hexmode = false;
258                             } else if (octalmode) {
259                                 try {
260                                     //test.addElement(Integer.decode("0"+numbuf.toString()));
261
test = test + (char)Integer.decode("0" + numbuf.toString()).intValue();
262                                 } catch (NumberFormatException JavaDoc e) {
263                                     throw new Exception JavaDoc("decode(0" + numbuf.toString() + ") faalde");
264                                 }
265                                 octalmode = false;
266                             } else {
267                                 backslashmode = false;
268                                 buf.append('\\');
269                             }
270                             break;
271                         case 'x' :
272                             if (octalmode && numbuf.length() == 3) {
273                                 try {
274                                     //test.addElement(Integer.decode("0"+numbuf.toString()));
275
test = test + (char)Integer.decode("0" + numbuf.toString()).intValue();
276                                 } catch (NumberFormatException JavaDoc e) {
277                                     throw new Exception JavaDoc("decode(0" + numbuf.toString() + ") faalde");
278                                 }
279                                 octalmode = false;
280                                 backslashmode = false;
281                                 buf = new StringBuffer JavaDoc();
282                                 buf.append('x');
283                             } else {
284                                 hexmode = true;
285                                 numbuf = new StringBuffer JavaDoc();
286                                 if (buf.length() > 0) {
287                                     test = test + buf.toString();
288                                     buf = new StringBuffer JavaDoc();
289                                 }
290                             }
291                             break;
292                         case '0' :
293                         case '1' :
294                         case '2' :
295                         case '3' :
296                         case '4' :
297                         case '5' :
298                         case '6' :
299                         case '7' :
300                         case '8' :
301                         case '9' :
302                             // We should be in octalmode or hexmode here!!
303
if (!octalmode && !hexmode) {
304                                 if (buf.length() > 0) {
305                                     //test.addElement(buf.toString());
306
test = test + buf.toString();
307                                     buf = new StringBuffer JavaDoc();
308                                 }
309                                 octalmode = true;
310                                 numbuf = new StringBuffer JavaDoc();
311                             }
312                             numbuf.append(c);
313                             break;
314                         case ' ' :
315                             if (octalmode) {
316                                 try {
317                                     //test.addElement(Integer.decode("0"+numbuf.toString()));
318
test = test + (char)Integer.decode("0" + numbuf.toString()).intValue();
319                                 } catch (NumberFormatException JavaDoc e) {
320                                     throw new Exception JavaDoc("decode(0" + numbuf.toString() + ") faalde");
321                                 }
322                                 octalmode = false;
323                             } else if (hexmode) {
324                                 try {
325                                     //test.addElement(Integer.decode("0x"+numbuf.toString()));
326
test = test + (char)Integer.decode("0x" + numbuf.toString()).intValue();
327                                 } catch (NumberFormatException JavaDoc e) {
328                                     throw new Exception JavaDoc("decode(0x" + numbuf.toString() + ") faalde");
329                                 }
330                                 hexmode = false;
331                             } else {
332                                 buf.append(' ');
333                             }
334                             backslashmode = false;
335                             break;
336                         default :
337                             if (hexmode) {
338                                 if (c == 'a'
339                                     || c == 'A'
340                                     || c == 'b'
341                                     || c == 'B'
342                                     || c == 'c'
343                                     || c == 'C'
344                                     || c == 'd'
345                                     || c == 'D'
346                                     || c == 'e'
347                                     || c == 'E'
348                                     || c == 'f'
349                                     || c == 'F') {
350                                     numbuf.append(c);
351                                 } else {
352                                     try {
353                                         //test.addElement(Integer.decode("0x"+numbuf.toString()));
354
test = test + (char)Integer.decode("0x" + numbuf.toString()).intValue();
355                                     } catch (NumberFormatException JavaDoc e) {
356                                         throw new Exception JavaDoc("decode(0x" + numbuf.toString() + ") faalde");
357                                     }
358                                     hexmode = false;
359                                     backslashmode = false;
360                                 }
361                             } else if (octalmode) {
362                                 try {
363                                     //test.addElement(Integer.decode("0"+numbuf.toString()));
364
test = test + (char)Integer.decode("0" + numbuf.toString()).intValue();
365                                 } catch (NumberFormatException JavaDoc e) {
366                                     throw new Exception JavaDoc("decode(0" + numbuf.toString() + ") faalde");
367                                 }
368                                 octalmode = false;
369                                 backslashmode = false;
370                             } else {
371                                 backslashmode = false;
372                                 //tmp[testIndex++] = charToByte(c);
373
buf.append(c);
374                             }
375                     }
376                 } else if (c == '\\') {
377                     if (buf.length() > 0) {
378                         //test.addElement(buf.toString());
379
test = test + buf.toString();
380                         buf = new StringBuffer JavaDoc();
381                     }
382                     backslashmode = true;
383                 } else if (c == ' ' || c == '\t' || c == '\n' || i == m - 1) { // Don't forget to set values on end of string
384
if (buf.length() > 0) {
385                         //test.addElement(buf.toString());
386
test = test + buf.toString();
387                         buf = new StringBuffer JavaDoc();
388                     }
389                     if (numbuf.length() > 0) {
390                         if (octalmode) {
391                             try {
392                                 //test.addElement(Integer.decode("0"+numbuf.toString()));
393
test = test + (char)Integer.decode("0" + numbuf.toString()).intValue();
394                             } catch (NumberFormatException JavaDoc e) {
395                                 throw new Exception JavaDoc("decode(0" + numbuf.toString() + ") faalde");
396                             }
397                             octalmode = false;
398                             backslashmode = false;
399                         } else if (hexmode) {
400                             try {
401                                 //test.addElement(Integer.decode("0x"+numbuf.toString()));
402
test = test + (char)Integer.decode("0x" + numbuf.toString()).intValue();
403                             } catch (NumberFormatException JavaDoc e) {
404                                 throw new Exception JavaDoc("decode(0x" + numbuf.toString() + ") faalde");
405                             }
406                             hexmode = false;
407                             backslashmode = false;
408                         }
409                     }
410                     break;
411                 } else {
412                     buf.append(c);
413                 }
414                 i++;
415             }
416         }
417         //log.debug("test size = "+test.size());
418
//log.debug("test = "+vectorToString(test));
419
return nextNonWhiteSpace(s, i + 1);
420     }
421
422     /**
423      * Parse the message string from the magic file
424      *
425      * -- nothing to be done: the found string is already atomic :-)
426      */

427     private int parseMessageString(String JavaDoc s, int startIndex) throws Exception JavaDoc {
428         if (false)
429             throw new Exception JavaDoc("dummy exception to stop jikes from complaining");
430         message = s.substring(startIndex);
431         return s.length() - 1;
432
433     }
434
435     private Detector createDetector(String JavaDoc line) {
436         Detector detector = new Detector();
437         // rawinput = line;
438

439         // hasX = false;
440
//xInt = -99;
441
//xString = "default";
442
//xChar = 'x';
443

444         // parse line
445
log.debug("parse: " + line);
446         int n;
447         String JavaDoc level = "start";
448         try {
449             level = "parseOffsetString";
450             n = parseOffsetString(line, 0);
451             level = "parseTypeString";
452             n = parseTypeString(line, n);
453             level = "parseTestString";
454             n = parseTestString(line, n);
455             // If there are multiple test level, an upper one doesn't have to have a message string
456
if (n > 0) {
457                 level = "parseMessageString";
458                 parseMessageString(line, n);
459             } else {
460                 message = "";
461             }
462             level = "end";
463         } catch (UnsupportedOperationException JavaDoc e) {
464             log.warn(e.getMessage());
465         } catch (Exception JavaDoc e) {
466             log.error("parse failure at " + level + ": " + e.getMessage() + " for [" + line + "]");
467             parsingFailure = true;
468         }
469         detector.setType(type);
470         detector.setOffset("" + offset);
471         detector.setTest(test);
472         detector.setComparator(testComparator);
473         detector.setMimeType(message);
474         detector.setDesignation(message);
475         return detector;
476     }
477
478     public boolean toXML(String JavaDoc path) throws IOException JavaDoc {
479         File JavaDoc f = new File JavaDoc(path);
480         return toXML(f);
481     }
482
483     /**
484      * Write the current datastructure to an XML file
485      */

486     public boolean toXML(File JavaDoc f) throws IOException JavaDoc {
487         FileWriter JavaDoc writer = new FileWriter JavaDoc(f);
488
489         writer.write(
490             "<!DOCTYPE magic PUBLIC \"-//MMBase//DTD magic config 1.0//EN\" \"http://www.mmbase.org/dtd/magic_1_0.dtd\">\n<magic>\n<info>\n<version>0.1</version>\n<author>cjr@dds.nl</author>\n<description>Conversion of the UNIX 'magic' file with added mime types and extensions.</description>\n</info>\n<detectorlist>\n");
491         Iterator JavaDoc i = getDetectors().iterator();
492         while (i.hasNext()) {
493             ((Detector)i.next()).toXML(writer);
494         }
495         writer.write("</detectorlist>\n</magic>\n");
496         writer.close();
497         return true;
498     }
499
500     public static void main(String JavaDoc[] argv) throws IOException JavaDoc {
501         if (argv.length != 2) {
502             System.err.println(MagicParser.class.getName() + " can be used to convert from mime files to mmbase magic.xml file format");
503             System.err.println("Usage:" + MagicParser.class.getName() + " inpurtFileName outputfile.xml");
504             System.err.println("Example:" + MagicParser.class.getName() + " /etc/mime-magic outputfile.xml");
505             System.exit(1);
506         }
507         System.out.println("reading the mime file");
508         MagicParser parser = new MagicParser(argv[0]);
509         System.out.println("writing the xml file");
510         parser.toXML(new File JavaDoc(argv[1]));
511         System.out.println("finished");
512     }
513 }
514
Popular Tags