1 package com.dappit.Dapper.parser; 2 3 import java.util.Vector ; 4 5 import org.dom4j.DocumentException; 6 import org.w3c.dom.Document ; 7 8 14 public class MozillaParser 15 { 16 17 boolean isParsing = false; 18 static boolean isInitialized = false; 19 DomDocumentBuilder domBuilder = new DomDocumentBuilder(); 20 private static Object SynchronizationObject = new Object (); 21 private static String MozillaInitializedJvmProperty = "MozillaParser.Initialized"; 22 23 31 private synchronized static native void initXPCOM(String componentBase) throws ParserInitializationException; 32 33 42 public native void parseHtml(String html) throws ParserInitializationException; 43 44 51 public synchronized void callback(String domOperation, String domArgument) 52 { 53 domBuilder.addInstruction(domOperation, domArgument); 55 } 56 57 public Document parse(String html) throws DocumentException,ParserInitializationException 58 { 59 html = html.replaceAll("<\\s*style\\s*>", "<style harmless=''> "); 60 html = html.replaceAll("<\\s*script\\s*>", "<script harmless=''> "); 61 this.domBuilder.reset(); 62 this.parseHtml(html); 63 return this.domBuilder.buildDocument(); 64 } 65 66 public void dump() { 67 this.domBuilder.dump(); 68 } 69 70 78 public static void init(String parserLibrary, String componentsBase) throws Exception 79 { 80 String initialized = System.getProperty(MozillaInitializedJvmProperty); 81 if (initialized == null) 82 { 83 System.setProperty(MozillaInitializedJvmProperty, "true"); 84 } 85 else 86 { 87 System.err.println("Warning : MozillaParser detected an additional attempt to initialize XPCOM. operation ignored."); 88 return; 89 } 90 try 91 { 92 System.load(parserLibrary); 93 } 94 catch (Throwable e) 95 { 96 System.err.println("Warning:Could not load library "+parserLibrary +" Possible reason : " + 97 "You have to include both mozilla.dist.bin." + EnviromentController.getOperatingSystemName() 98 +" And mozilla.dist.bin."+ EnviromentController.getOperatingSystemName() + " " + 99 "In the right environment variable (windows:PATH , Linux: LD_LIBRARY_PATH , macosx: DYLD_LIBRARY_PATH )") ; 100 throw new ParserInitializationException(e); 101 } 102 initXPCOM(componentsBase); 103 } 104 105 108 public Vector <String > getDomBuilderArguments() { 109 return this.domBuilder.getInstructions(); 110 111 } 112 113 } 114 | Popular Tags |