KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > dappit > Dapper > parser > MozillaParser


1 package com.dappit.Dapper.parser;
2
3 import java.util.Vector JavaDoc;
4
5 import org.dom4j.DocumentException;
6 import org.w3c.dom.Document JavaDoc;
7
8 /**
9  * @author Ohad Serfaty
10  *
11  * A Mozilla native Html Parser
12  *
13  */

14 public class MozillaParser
15 {
16
17     boolean isParsing = false;
18     static boolean isInitialized = false;
19     DomDocumentBuilder domBuilder = new DomDocumentBuilder();
20     private static Object JavaDoc SynchronizationObject = new Object JavaDoc();
21     private static String JavaDoc MozillaInitializedJvmProperty = "MozillaParser.Initialized";
22
23     /**
24      * initialize the mozilla XPCOM embedded components with the proper
25      * components base directory
26      *
27      * @param componentBase
28      * mozilla's components directory (e.g
29      * /home/ohad/mozilla/dist/bin )
30      */

31     private synchronized static native void initXPCOM(String JavaDoc componentBase) throws ParserInitializationException;
32
33     /**
34      * Native function. parse an html function using mozilla's html parser and
35      * make callbacks to the java local sink ( DomDocumentBuilder for that
36      * matter)
37      *
38      * @param html
39      * HTML to parse.
40      * @throws ParserInitializationException
41      */

42     public native void parseHtml(String JavaDoc html) throws ParserInitializationException;
43
44     /**
45      *
46      * A callback is being made from native code to this function.
47      *
48      * @param domOperation
49      * @param domArgument
50      */

51     public synchronized void callback(String JavaDoc domOperation, String JavaDoc domArgument)
52     {
53         // System.out.println("called back with :"+domOperation +" " + domArgument );
54
domBuilder.addInstruction(domOperation, domArgument);
55     }
56
57     public Document JavaDoc parse(String JavaDoc html) throws DocumentException,ParserInitializationException
58     {
59         html = html.replaceAll("<\\s*style\\s*>", "<style harmless=''> ");
60         html = html.replaceAll("<\\s*script\\s*>", "<script harmless=''> ");
61         this.domBuilder.reset();
62         this.parseHtml(html);
63         return this.domBuilder.buildDocument();
64     }
65
66     public void dump() {
67         this.domBuilder.dump();
68     }
69
70     /**
71      * Initialize the mozilla html parser with a DLL to load and a mozilla
72      * component base
73      *
74      * @param dllToLoad
75      * @param componentsBase
76      * @throws ParserInitializationException
77      */

78     public static void init(String JavaDoc parserLibrary, String JavaDoc componentsBase) throws Exception JavaDoc
79     {
80         String JavaDoc initialized = System.getProperty(MozillaInitializedJvmProperty);
81         if (initialized == null)
82         {
83             System.setProperty(MozillaInitializedJvmProperty, "true");
84         }
85         else
86         {
87             System.err.println("Warning : MozillaParser detected an additional attempt to initialize XPCOM. operation ignored.");
88             return;
89         }
90         try
91         {
92             System.load(parserLibrary);
93         }
94         catch (Throwable JavaDoc e)
95         {
96             System.err.println("Warning:Could not load library "+parserLibrary +" Possible reason : " +
97                     "You have to include both mozilla.dist.bin." + EnviromentController.getOperatingSystemName()
98                     +" And mozilla.dist.bin."+ EnviromentController.getOperatingSystemName() + " " +
99                             "In the right environment variable (windows:PATH , Linux: LD_LIBRARY_PATH , macosx: DYLD_LIBRARY_PATH )") ;
100             throw new ParserInitializationException(e);
101         }
102         initXPCOM(componentsBase);
103     }
104
105     /**
106      * @return
107      */

108     public Vector JavaDoc<String JavaDoc> getDomBuilderArguments() {
109         return this.domBuilder.getInstructions();
110
111     }
112
113 }
114
Popular Tags