KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > dappit > Dapper > parser > DomDocumentBuilder


1 /**
2  *
3  */

4 package com.dappit.Dapper.parser;
5
6 import java.util.Iterator JavaDoc;
7 import java.util.Vector JavaDoc;
8 import java.util.regex.Matcher JavaDoc;
9 import java.util.regex.Pattern JavaDoc;
10
11 import org.dom4j.dom.DOMDocument;
12 import org.dom4j.dom.DOMEntityReference;
13 import org.w3c.dom.CDATASection JavaDoc;
14 import org.w3c.dom.Comment JavaDoc;
15 import org.w3c.dom.Document JavaDoc;
16 import org.w3c.dom.Element JavaDoc;
17 import org.w3c.dom.Text JavaDoc;
18
19 /**
20  * @author Ohad Serfaty
21  *
22  * A class for building DOM documents from mozilla's content sink instructions
23  *
24  * supported operations are :
25  * OpenNode <tag name>
26  * CloseNode <tag name>
27  * AddText <content>
28  * AddLeaf <tag name>
29  * WriteAttributeKey <key> - in pair with the next op :
30  * WriteAttributeValue <value>
31  * CloseLead
32  * AddComment
33  * AddEntity
34  *
35  * Unsupported ( fot the time being ) :
36  * AddInstruction
37  * AddTitle
38  *
39  *
40  * Note that this class is reusable , you can use reset() to clear the content of the dom.
41  *
42  */

43 public class DomDocumentBuilder
44 {
45
46     private static final String JavaDoc String32 = new String JavaDoc(new byte[]{ 32 });
47     private static final String JavaDoc String0 = new String JavaDoc(new byte[]{ 0 });
48     private static final String JavaDoc String1 = new String JavaDoc(new byte[]{ 0x1 });
49     private static final String JavaDoc String14 = new String JavaDoc(new byte[]{ 0x14 });
50     private static final String JavaDoc String1d = new String JavaDoc(new byte[]{ 0x1d });
51     private static final String JavaDoc String0xf = new String JavaDoc(new byte[]{ 0xf });
52     private static final String JavaDoc String0x1A = new String JavaDoc(new byte[]{ 0x1A });
53     private static final String JavaDoc String0x12 = new String JavaDoc(new byte[]{ 0x12 });
54     private static final String JavaDoc String0x8 = new String JavaDoc(new byte[]{ 0x8 });
55     private static final String JavaDoc String0x1f = new String JavaDoc(new byte[]{ 0x1f });
56     private static final String JavaDoc String0x2 = new String JavaDoc(new byte[]{ 0x2 });
57     private static final String JavaDoc String0x7 = new String JavaDoc(new byte[]{ 0x7 });
58     private static final String JavaDoc String0x18 = new String JavaDoc(new byte[]{ 0x18 });
59     private static final String JavaDoc String0x19 = new String JavaDoc(new byte[]{ 0x19 });
60     private static final String JavaDoc String0x1B = new String JavaDoc(new byte[]{ 0x1B });
61     private static final String JavaDoc String0x1C = new String JavaDoc(new byte[]{ 0x1C });
62     private static final String JavaDoc String0x11 = new String JavaDoc(new byte[]{ 0x11 });
63     private static final String JavaDoc String0x10 = new String JavaDoc(new byte[]{ 0x10 });
64     private static final String JavaDoc String0x13 = new String JavaDoc(new byte[]{ 0x13 });
65     
66     Vector JavaDoc<String JavaDoc> operations = new Vector JavaDoc<String JavaDoc>();
67     Vector JavaDoc<String JavaDoc> arguments = new Vector JavaDoc<String JavaDoc>();
68     
69     /**
70      * reset the builder. can be reused after creating a document.
71      */

72     public void reset()
73     {
74         operations.clear();
75         arguments.clear();
76     }
77     
78     /**
79      * Add a content sink instruction with an argument
80      *
81      * @param domOperation
82      * @param domArgument
83      */

84     public void addInstruction(String JavaDoc domOperation , String JavaDoc domArgument){
85         this.operations.add(domOperation);
86         this.arguments.add(domArgument);
87 // System.out.println(domOperation+" " + domArgument);
88
}
89     
90     public static String JavaDoc getCDATASection(String JavaDoc domArgument)
91     {
92         if (!domArgument.contains("CDATA"))
93             return null;
94         Pattern JavaDoc pat = Pattern.compile("(.*)\\<\\!(\\s*)\\[CDATA(.*)\\]\\]\\>(.*)",Pattern.DOTALL + Pattern.MULTILINE);
95         Matcher JavaDoc mat = pat.matcher(domArgument);
96         if (mat.find())
97         {
98             String JavaDoc group3 = mat.group(3);
99             if (group3.startsWith("["))
100                 group3 = group3.replaceFirst("\\[", "");
101             String JavaDoc result = mat.group(1) + group3 +mat.group(4) ;
102             return result;
103         }
104         return null;
105     }
106     
107     /**
108      * Finalize and build the dom document.
109      *
110      * @return
111      */

112     public Document buildDocument()
113     {
114 // System.out.println("building document...");
115
DOMDocument resultDocument = new DOMDocument();
116         Iterator JavaDoc<String JavaDoc> i = this.operations.iterator();
117         Iterator JavaDoc<String JavaDoc> j = this.arguments.iterator();
118         Element JavaDoc currentElement = null;
119         boolean isInLeaf = false;
120         boolean closeHtml = true;
121         
122         while (i.hasNext())
123         {
124             String JavaDoc domOperation = i.next();
125             String JavaDoc domArgument = j.next();
126 // System.out.println("Operation :" + domOperation+" Arg:" + domArgument);
127
if (domOperation.equalsIgnoreCase("OpenNode"))
128             {
129                 closeHtml=true;
130                 Element JavaDoc childNode = resultDocument.createElement(domArgument.toLowerCase());
131                 if (currentElement == null)
132                 {
133                     resultDocument.setRootElement((org.dom4j.Element) childNode);
134                     currentElement = childNode;
135                 }
136                 else
137                 {
138                     if (!domArgument.equalsIgnoreCase("html"))
139                     {
140                             currentElement.appendChild(childNode);
141                             currentElement = childNode;
142                     }
143                     else
144                         closeHtml = false;
145                 }
146                 
147             }
148             else
149             if (domOperation.equalsIgnoreCase("CloseNode")){
150                 if (currentElement== null)
151                 {
152                     System.err.println("Error : Close Node where no OpenNode was called. trying to fix..." );
153 // this.dump();
154
}
155                 else
156                     if (closeHtml)
157                         currentElement = (Element JavaDoc)currentElement.getParentNode();
158                 
159             }
160             else
161             // check : may be problematic for cases of script or style
162
if (domOperation.equalsIgnoreCase("AddText") || domOperation.equalsIgnoreCase("AddContent"))
163             {
164 // System.out.println(currentElement.getNodeName() +" : Adding text :" + domArgument);
165
// check : try and resolve this with a <newline> from mozilla instead :
166
boolean script = false;
167                 boolean style = false;
168                 
169                 if (currentElement.getNodeName().equalsIgnoreCase("script") /*|| currentElement.getNodeName().equalsIgnoreCase("style")*/ )
170                     script = true;
171                 else
172                     if (currentElement.getNodeName().equalsIgnoreCase("style"))
173                     style=true;
174                 else
175                     domArgument = DomDocumentBuilder.fixText(domArgument);
176                 
177 // System.out.println("Body content :" + domArgument);
178

179 // System.out.println("AddText "+domArgument);
180
if (domArgument.length() >=1)
181                 {
182                     if (!script && !style)
183                     {
184                         Text JavaDoc textNode = resultDocument.createTextNode(domArgument);
185                         currentElement.appendChild(textNode);
186                     }
187                     else
188                     {
189                         domArgument = domArgument.trim();
190                         String JavaDoc cdata = getCDATASection(domArgument);
191                         
192                         if (cdata!=null)
193                         {
194                             if(script)
195                                 cdata = fixText(cdata);
196                             else
197                                 cdata = fixText(domArgument);
198                             CDATASection JavaDoc cdataSection = resultDocument.createCDATASection(cdata);
199                             currentElement.appendChild(cdataSection);
200                         }
201                         else
202                         {
203                             domArgument = DomDocumentBuilder.fixText(domArgument);
204                             Text JavaDoc textNode = resultDocument.createTextNode(domArgument);
205                             currentElement.appendChild(textNode);
206                         }
207                     }
208                 }
209             }
210             else
211             if (domOperation.equalsIgnoreCase("AddLeaf"))
212             {
213                 Element JavaDoc leafElement = resultDocument.createElement(domArgument);
214                 currentElement.appendChild(leafElement);
215                 currentElement=leafElement;
216                 isInLeaf=true;
217             }
218             else
219             if (domOperation.equalsIgnoreCase("WriteAttributeKey"))
220             {
221                 // add an attribute with the next lookahead operation :
222
domOperation = i.next(); // Fetch the next operation , must be WriteAttributeValue
223
String JavaDoc value = j.next(); // Feth the attributes value.
224
if (!domArgument.toLowerCase().trim().equalsIgnoreCase("_moz-userdefined"))
225                     currentElement.setAttribute(domArgument.toLowerCase(), value);
226             }
227             else
228             if (domOperation.equalsIgnoreCase("CloseLeaf"))
229             {
230                 if (isInLeaf)
231                 {
232                     currentElement = (Element JavaDoc)currentElement.getParentNode();
233                     isInLeaf=false;
234                 }
235                 
236             }
237             else
238             if (domOperation.equalsIgnoreCase("AddEntity"))
239             {
240                 DOMEntityReference entity = (DOMEntityReference) resultDocument.createEntityReference(domArgument);
241 // a bugfix for a c++ problem in the mozilla parser:
242
if (!Character.isDigit(domArgument.charAt(0)))
243                     entity.setText("&"+domArgument+";");
244                 else
245                     entity.setText("");
246                 currentElement.appendChild(entity);
247             }
248             else
249             if (domOperation.equalsIgnoreCase("AddComment"))
250             {
251                 Comment JavaDoc comment = resultDocument.createComment(domArgument);
252                 currentElement.appendChild(comment);
253             }
254             else
255                 if (domOperation.equalsIgnoreCase("SetTitle"))
256                 {
257                     //System.out.println("---------> Add Title :" + domArgument);
258
}
259             
260         }
261         return resultDocument;
262     }
263     
264     public void dump(){
265         Iterator JavaDoc<String JavaDoc> i2 = this.operations.iterator();
266         Iterator JavaDoc<String JavaDoc> j2 = this.arguments.iterator();
267         while (i2.hasNext())
268             System.err.println(i2.next() +" : " + j2.next());
269     }
270     
271     public static String JavaDoc fixText(String JavaDoc text)
272     {
273         String JavaDoc fixedText = new String JavaDoc(text);
274         fixedText = fixedText.replaceAll("&#10;", "");
275         fixedText = fixedText.replaceAll("&#9;", "");
276         fixedText = fixedText.replaceAll("&#160;", " ");
277         fixedText = fixedText.replaceAll("&#194;", "\"");
278         fixedText = fixedText.replaceAll("&quot;", "\"");
279         fixedText = fixedText.replaceAll("&lt;", "<");
280         fixedText = fixedText.replaceAll("&gt;", ">");
281         fixedText = fixedText.replaceAll("&amp;", "&");
282         fixedText = fixedText.replaceAll(String32, " ");
283         fixedText = fixedText.replaceAll("["+String0+String1+String14+String1d+String0xf + String0xf+
284                 String0x1A + String0x12 + String0x8 + String0x1f+ String0x2+String0x7+String0x18+String0x19+String0x1B+String0x1C+
285                 String0x11+String0x10+String0x13+"]" , "");
286         return fixedText;
287     }
288
289     /**
290      * @return
291      */

292     public Vector JavaDoc<String JavaDoc> getInstructions() {
293         return operations;
294     }
295     
296     
297 }
298
Popular Tags