KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > mmbase > util > magicfile > Detector


1 /*
2
3 This software is OSI Certified Open Source Software.
4 OSI Certified is a certification mark of the Open Source Initiative.
5
6 The license (Mozilla version 1.0) can be read at the MMBase site.
7 See http://www.MMBase.org/license
8
9 */

10
11 package org.mmbase.util.magicfile;
12 import java.util.*;
13 import java.io.*;
14 import org.mmbase.util.logging.*;
15
16 /**
17  * A Detector stores one entry from the magic.xml file, and contains
18  * the functionality to determines if a certain byte[] satisfies it.
19  *
20  * Implementation made on the basis of actual magic file and its manual.<br />
21  *
22  * TODO:<br />
23  * - link the info with mimetypes<br />
24  * - add test modifiers<br />
25  * - add commandline switches for warning, error and debugging messages<br />
26  *<br />
27  * Ignored features of magic:<br />
28  * - date types<br />
29  * - indirect offsets (prefix of '&' in sublevel match or (address+bytes) where offset = value of address plus bytes<br />
30  * - AND'ing of type<br />
31  *<br />
32  * BUGS:<br />
33  * - test string isn't read when end of line is reached in absence of a message string<br />
34  * <br />
35  *
36  * Tested:<br />
37  * - .doc<br />
38  * - .rtf<br />
39  * - .pdf<br />
40  * - .sh<br />
41  * - .gz<br />
42  * - .bz2<br />
43  * - .html<br />
44  * - .rpm<br />
45  * - .wav<br />
46  *<br />
47  * Not supported by magic file:<br />
48  * - StarOffice<br />
49  * @version $Id: Detector.java,v 1.11 2006/01/25 19:09:43 michiel Exp $
50  */

51
52 public class Detector {
53     private static final Logger log = Logging.getLoggerInstance(Detector.class);
54     
55     // No configuration below
56
private static final int BIG_ENDIAN = 0;
57     private static final int LITTLE_ENDIAN = 1;
58     private static final String JavaDoc[] label = new String JavaDoc[] { "big endian", "little endian" };
59     
60     private String JavaDoc rawinput; // Original input line
61
private int offset = -1;
62     private String JavaDoc type;
63     // types: byte, short, long, string, date, beshort, belong, bedate, leshort, lelong, ledate
64
private String JavaDoc typeAND;
65     // Some types are defined as e.g. "belong&0x0000ff70", then typeAND=0x0000ff70 (NOT IMPLEMENTED!)
66
private String JavaDoc test; // Test value
67
private char testComparator; // What the test is like,
68
private String JavaDoc message; // Designation for this type in 'magic' file
69
private List extensions; // Possible file extensions for this type
70
private String JavaDoc mimetype; // MimeType for this type
71

72     // What are these?
73
private String JavaDoc xString;
74     private int xInt;
75     private char xChar;
76
77     private List childList;
78
79     private boolean valid; // Set this if parsing of magic file fails
80
private boolean hasX; // Is set when an 'x' value is matched
81

82     /**
83      * Add an embedded detector object that searches for more details after an initial match.
84      */

85     public void addChild(Detector detector, int level) {
86         if (level == 1) {
87             childList.add(detector);
88         } else if (level > 1) {
89             if (childList.size() == 0) {
90                 log.debug("Hm. level = " + level + ", but childList is empty");
91             } else {
92                 ((Detector) childList.get(childList.size() - 1)).addChild(detector, level - 1);
93             }
94         }
95     }
96     /**
97      * Detectors are instanciated by MagicXMLReader, and by Parser.
98      */

99     Detector() {
100         childList = new ArrayList();
101         extensions = new ArrayList();
102         mimetype = "application/octet-stream";
103         message = "Unknown";
104         valid = true;
105     }
106
107     /**
108      * Adds a possible extension. The last added one is the default (returned by 'getExtension').
109      */

110     public void setExtension(String JavaDoc extension) {
111         extensions.add(0, extension);
112     }
113     public String JavaDoc getExtension() {
114         if (extensions.size() == 0) {
115             return "";
116         }
117         return (String JavaDoc) extensions.get(0);
118     }
119     public List getExtensions() {
120         return extensions;
121     }
122
123     public void setMimeType(String JavaDoc mimetype) {
124         this.mimetype = mimetype;
125     }
126     public String JavaDoc getMimeType() {
127         if (mimetype.equals("???")) {
128             return "application/octet-stream";
129         } else {
130             return mimetype;
131         }
132     }
133     public void setDesignation(String JavaDoc designation) {
134         this.message = designation;
135     }
136     public void setOffset(String JavaDoc offset) {
137         this.offset = Integer.parseInt(offset);
138     }
139     public int getOffset() {
140         return offset;
141     }
142     public void setType(String JavaDoc type) {
143         this.type = type;
144     }
145     public String JavaDoc getType() {
146         return type;
147     }
148     public void setTest(String JavaDoc test) {
149         this.test = test;
150     }
151     public String JavaDoc getTest() {
152         return test;
153     }
154     public void setComparator(char comparator) {
155         this.testComparator = comparator;
156     }
157     public char getComparator() {
158         return testComparator;
159     }
160
161     /**
162      * @return Whether detector matches the prefix/lithmus of the file
163      */

164     public boolean test(byte[] lithmus) {
165         if (lithmus == null || lithmus.length == 0 || offset == -1) {
166             return false;
167         }
168         boolean hit;
169         //log.debug("TESTING "+rawinput);
170
if (type.equals("string")) {
171             hit = testString(lithmus);
172         } else if (type.equals("beshort")) {
173             hit = testShort(lithmus, BIG_ENDIAN);
174         } else if (type.equals("belong")) {
175             hit = testLong(lithmus, BIG_ENDIAN);
176         } else if (type.equals("leshort")) {
177             hit = testShort(lithmus, LITTLE_ENDIAN);
178         } else if (type.equals("lelong")) {
179             hit = testLong(lithmus, LITTLE_ENDIAN);
180         } else if (type.equals("byte")) {
181             hit = testByte(lithmus);
182         } else {
183             // Date types are not supported
184
hit = false;
185         }
186         if (hit) {
187             log.debug("Detector " + this + " hit");
188             for (int i = 0; i < childList.size(); i++) {
189                 Detector child = (Detector) childList.get(i);
190                 if (child.test(lithmus)) {
191                     String JavaDoc s = child.getDesignation();
192                     if (s.startsWith("\\b")) {
193                         s = s.substring(2);
194                     }
195                     this.message = this.message + " " + s;
196                 }
197             }
198         }
199         return hit;
200     }
201
202     /**
203      * todo: I noticed there is also a %5.5s variation in magic...
204      */

205     public String JavaDoc getDesignation() {
206         if (hasX) {
207             int n = message.indexOf("%d");
208             if (n >= 0) {
209                 return message.substring(0, n) + xInt + message.substring(n + 2);
210             }
211
212             n = message.indexOf("%s");
213             if (n >= 0) {
214                 return message.substring(0, n) + xString + message.substring(n + 2);
215             }
216
217             n = message.indexOf("%c");
218             if (n >= 0) {
219                 return message.substring(0, n) + xChar + message.substring(n + 2);
220             }
221         }
222         return message;
223     }
224
225     public void setInvalid() {
226         valid = false;
227     }
228
229     /**
230      * @return Whether parsing of magic line for this detector succeeded
231      */

232     public boolean valid() {
233         return valid;
234     }
235
236     /**
237      * @return Conversion of 2 byte array to integer
238      */

239     private int byteArrayToInt(byte[] ar) {
240         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
241         for (int i = 0; i < ar.length; i++) {
242             buf.append(Integer.toHexString((int) ar[i] & 0x000000ff));
243         }
244         return Integer.decode("0x" + buf.toString()).intValue();
245     }
246
247     /**
248      * @return Conversion of 4 byte array to long
249      */

250     private long byteArrayToLong(byte[] ar) {
251         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
252         for (int i = 0; i < ar.length; i++) {
253             buf.append(Integer.toHexString((int) ar[i] & 0x000000ff));
254         }
255         return Long.decode("0x" + buf.toString()).longValue();
256     }
257     
258     /**
259      * Test whether a string matches
260      */

261     protected boolean testString(byte[] lithmus) {
262
263         if (test.length() == 0) {
264             log.warn("TEST STRING LENGTH ZERO FOR [" + rawinput + "]");
265             return false;
266         }
267
268         int maxNeeded = offset + test.length();
269
270         if (maxNeeded > lithmus.length) {
271             return false;
272         }
273
274         try {
275             xString = new String JavaDoc(lithmus, offset, test.length(), "US-ASCII");
276             // US-ASCII: fixate the charset, do not depend on platform default:
277
// US-ASCCII: one byte = one char, so length can be predicted
278
} catch (java.io.UnsupportedEncodingException JavaDoc usee) { // could not happen: US-ASCII is supported
279
}
280
281         log.debug("test string = '" + test + "' (" + message + ") comparing with '" + xString + "'");
282         int n = xString.compareTo(test);
283         switch (testComparator) {
284         case '=' :
285             return n == 0;
286         case '>' :
287             hasX = true;
288             return n > 0;
289         case '<' :
290             hasX = true;
291             return n < 0;
292         default:
293             return false;
294         }
295     }
296
297     /**
298      * Test whether a short matches
299      */

300     protected boolean testShort(byte[] lithmus, int endian) {
301         log.debug("testing " + label[endian] + " short for " + rawinput);
302         int found = 0;
303         if (endian == BIG_ENDIAN) {
304             found = byteArrayToInt(new byte[] { lithmus[offset], lithmus[offset + 1] });
305         } else if (endian == LITTLE_ENDIAN) {
306             found = byteArrayToInt(new byte[] { lithmus[offset + 1], lithmus[offset] });
307         }
308         xInt = found;
309
310         if (test.equals("x")) {
311             hasX = true;
312             return true;
313         } else if (test.equals("")) {
314             return false;
315         } else {
316             int v = Integer.decode(test).intValue();
317             // Hm. How did that binary arithmatic go?
318
log.debug(
319                       "dumb string conversion: 0x"
320                       + Integer.toHexString((int) lithmus[offset] & 0x000000ff)
321                       + Integer.toHexString((int) lithmus[offset + 1] & 0x000000ff));
322             
323             switch (testComparator) {
324             case '=' :
325                 log.debug(
326                           Integer.toHexString(v)
327                           + " = "
328                           + Integer.toHexString(found));
329                 return v == found;
330             case '>' :
331                 hasX = true;
332                 return found > v;
333             case '<' :
334                 hasX = true;
335                 return found < v;
336             default:
337                 return false;
338             }
339         }
340     }
341
342     /**
343      * Test whether a long matches
344      */

345     protected boolean testLong(byte[] lithmus, int endian) {
346         log.debug("testing " + label[endian] + " long for " + rawinput);
347         long found = 0;
348         try {
349             if (endian == BIG_ENDIAN) {
350                 found = byteArrayToLong(
351                                         new byte[] {
352                                             lithmus[offset],
353                                             lithmus[offset + 1],
354                                             lithmus[offset + 2],
355                                             lithmus[offset + 3] });
356             } else if (endian == LITTLE_ENDIAN) {
357                 found =
358                     byteArrayToLong(
359                                     new byte[] {
360                                         lithmus[offset + 3],
361                                         lithmus[offset + 2],
362                                         lithmus[offset + 1],
363                                         lithmus[offset] });
364             }
365         } catch (ArrayIndexOutOfBoundsException JavaDoc e) {
366             if (!message.equals("")) {
367                 log.error("Failed to test " + label[endian] + " long for " + message);
368             } else {
369                 log.error("Failed to test " + label[endian] + " long:");
370             }
371             log.error("Offset out of bounds: " + offset + " while max is " /*+BUFSIZE*/ );
372             return false;
373         }
374         xInt = (int) found;
375         // If it really is a long, we wouldn't want to know about it
376

377         if (test.equals("x")) {
378             hasX = true;
379             return true;
380         } else if (test.equals("")) {
381             return false;
382         } else {
383             long v = Long.decode(test).longValue();
384             
385             // Hm. How did that binary arithmatic go?
386

387             switch (testComparator) {
388             case '=' :
389                 log.debug("checking " + label[endian] + " long: " + Long.toHexString(v)
390                           + " = " + Long.toHexString(found));
391                 return v == found;
392             case '>' :
393                 hasX = true;
394                 return found > v;
395             case '<' :
396                 hasX = true;
397                 return found < v;
398             default:
399                 return false;
400             }
401         }
402     }
403     
404     /**
405      * Test whether a byte matches
406      */

407     protected boolean testByte(byte[] lithmus) {
408         log.debug("testing byte for " + rawinput);
409         if (test.equals("x")) {
410             hasX = true;
411             xInt = (int) lithmus[offset];
412             xChar = (char) lithmus[offset];
413             xString = "" + xChar;
414             return true;
415         } else if (test.equals("")) {
416             return false;
417         } else {
418             byte b = (byte) Integer.decode(test).intValue();
419             switch (testComparator) {
420                 // DOES THIS MAKE ANY SENSE AT ALL!!
421
case '=' :
422                 return b == lithmus[offset];
423             case '&' :
424                 // All bits in the test byte should be set in the found byte
425
//log.debug("byte test as string = '"+test+"'");
426
byte filter = (byte) (lithmus[offset] & b);
427                 //log.debug("lithmus = "+lithmus[offset]+"; test = "+b+"; filter = "+filter);
428
return filter == b;
429             default :
430                 return false;
431             }
432         }
433     }
434
435     /**
436      * @return Original unprocessed input line
437      * @since MMBase-1.7
438      */

439     public String JavaDoc getRawInput() {
440         return rawinput;
441     }
442
443     protected String JavaDoc xmlEntities(String JavaDoc s) {
444         StringBuffer JavaDoc res = new StringBuffer JavaDoc();
445         for (int i = 0; i < s.length(); i++) {
446             char c = s.charAt(i);
447             switch (c) {
448             case '>' :
449                 res.append("&gt;");
450                 break;
451             case '<' :
452                 res.append("&lt;");
453                 break;
454             case '&' :
455                 res.append("&amp;");
456                 break;
457             default :
458                 // Convert all characters not in the allowed XML character set
459
int n = (int) c;
460                 /* -- below is actual xml standard definition of allowed characters
461                    if (n == 0x9 || n == 0xA || n == 0xD || (n >= 0x20 && n <= 0xD7FF) || (n >= 0xE000 && n <= 0xFFFD) ||
462                    (n >= 0x10000 && n <= 0x10FFFF)) {
463                 */

464                 if (n == 0x9
465                     || n == 0xA
466                     || n == 0xD
467                     || (n >= 0x20 && n < 128)) {
468                     res.append(c);
469                 } else {
470                     // octal representation of number; pad with zeros
471
String JavaDoc oct = Integer.toOctalString(n);
472                     res.append("\\");
473                     for (int j = 3; j > oct.length(); j--) {
474                         res.append("0");
475                     }
476                     res.append(oct);
477                 }
478             }
479         }
480         return res.toString();
481     }
482
483     /**
484      * XML notatie:
485      * <detector>
486      * <mimetype>foo/bar</mimetype>
487      * <extension>bar</extension>
488      * <designation>blablabla</designation>
489      * <test offset="bla" type="bla" comparator="=">test string</test>
490      * <childlist>
491      * <detector>etc</detector>
492      * </childlist>
493      * </detector>
494      *
495      */

496     public void toXML(FileWriter f) throws IOException {
497         toXML(f, 0);
498     }
499
500     /**
501      * @param level Indicates depth of (child) element
502      */

503     public void toXML(FileWriter f, int level) throws IOException {
504         StringBuffer JavaDoc s = new StringBuffer JavaDoc();
505         String JavaDoc comparatorEntity;
506
507         char[] pad;
508         if (level > 0) {
509             pad = new char[level * 4];
510             for (int i = 0; i < level * 4; i++) {
511                 pad[i] = ' ';
512             }
513         } else {
514             pad = new char[] { };
515         }
516         String JavaDoc padStr = new String JavaDoc(pad);
517
518         if (testComparator == '>') {
519             comparatorEntity = "&gt;";
520         } else
521             if (testComparator == '<') {
522                 comparatorEntity = "&lt;";
523             } else if (testComparator == '&') {
524                 comparatorEntity = "&amp;";
525             } else {
526                 comparatorEntity = "" + testComparator;
527             }
528         s.append(
529                  padStr
530                  + "<detector>\n"
531                  + padStr
532                  + " <mimetype>" + getMimeType() + "</mimetype>\n"
533                  + padStr
534                  + " <extension>" + getExtension() + "</extension>\n"
535                  + padStr
536                  + " <designation>"
537                  + xmlEntities(message)
538                  + "</designation>\n"
539                  + padStr
540                  + " <test offset=\""
541                  + offset
542                  + "\" type=\""
543                  + type
544                  + "\" comparator=\""
545                  + comparatorEntity
546                  + "\">"
547                  + xmlEntities(test)
548                  + "</test>\n");
549         f.write(s.toString());
550         if (childList.size() > 0) {
551             f.write(padStr + " <childlist>\n");
552             Iterator i = childList.iterator();
553             while (i.hasNext()) {
554                 ((Detector) i.next()).toXML(f, level + 1);
555             }
556             f.write(padStr + " </childlist>\n");
557         }
558         f.write(padStr + "</detector>\n");
559
560     }
561
562     /**
563      * @return String representation of Detector object.
564      */

565     public String JavaDoc toString() {
566         if (!valid) {
567             return "parse error";
568         } else {
569             StringBuffer JavaDoc res = new StringBuffer JavaDoc("[" + offset + "] {" + type);
570             if (typeAND != null) {
571                 res.append("[" + typeAND + "]");
572             }
573             res.append("} " + testComparator + "(" + test + ") " + message);
574             if (childList.size() > 0) {
575                 res.append("\n");
576                 for (int i = 0; i < childList.size(); i++) {
577                     res.append("> ").append(
578                                             ((Detector) childList.get(i)).toString());
579                 }
580             }
581             return res.toString();
582         }
583     }
584 }
585
Popular Tags