KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > nu > xom > XOMHandler


1 /* Copyright 2002-2004 Elliotte Rusty Harold
2    
3    This library is free software; you can redistribute it and/or modify
4    it under the terms of version 2.1 of the GNU Lesser General Public
5    License as published by the Free Software Foundation.
6    
7    This library is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10    GNU Lesser General Public License for more details.
11    
12    You should have received a copy of the GNU Lesser General Public
13    License along with this library; if not, write to the
14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15    Boston, MA 02111-1307 USA
16    
17    You can contact Elliotte Rusty Harold by sending e-mail to
18    elharo@metalab.unc.edu. Please include the word "XOM" in the
19    subject line. The XOM home page is located at http://www.xom.nu/
20 */

21
22
23 package nu.xom;
24
25 import java.util.Stack JavaDoc;
26
27 import org.xml.sax.ContentHandler JavaDoc;
28 import org.xml.sax.DTDHandler JavaDoc;
29 import org.xml.sax.Locator JavaDoc;
30 import org.xml.sax.ext.DeclHandler JavaDoc;
31 import org.xml.sax.ext.LexicalHandler JavaDoc;
32
33 /**
34  * @author Elliotte Rusty Harold
35  * @version 1.0
36  *
37  */

38 class XOMHandler
39   implements ContentHandler JavaDoc, LexicalHandler JavaDoc, DeclHandler JavaDoc, DTDHandler JavaDoc {
40
41     protected Document document;
42     protected String JavaDoc documentBaseURI;
43     
44     // parent is never null. It is the node we're adding children
45
// to. current corresponds to the most recent startElement()
46
// method and may be null if we've skipped it (makeElement
47
// returned null.) If we didn't skip it, then parent and
48
// current should be the same node.
49
protected ParentNode parent;
50     protected ParentNode current;
51     protected Stack JavaDoc parents;
52     protected boolean inProlog;
53     protected boolean inDTD;
54     protected int position; // current number of items in prolog
55
protected Locator JavaDoc locator;
56     protected DocType doctype;
57     protected StringBuffer JavaDoc internalDTDSubset;
58     protected NodeFactory factory;
59     
60     
61     XOMHandler(NodeFactory factory) {
62         this.factory = factory;
63     }
64     
65     
66     public void setDocumentLocator(Locator JavaDoc locator) {
67         this.locator = locator;
68     }
69
70     
71     Document getDocument() {
72         return document;
73     }
74
75     
76     public void startDocument() {
77         
78         inDTD = false;
79         document = factory.startMakingDocument();
80         parent = document;
81         current = document;
82         parents = new Stack JavaDoc();
83         parents.push(document);
84         inProlog = true;
85         position = 0;
86         buffer = new StringBuffer JavaDoc();
87         doctype = null;
88         if (locator != null) {
89             documentBaseURI = locator.getSystemId();
90             // According to the XML spec,
91
// "It is an error for a fragment identifier
92
// (beginning with a # character) to be part of a system identifier"
93
// but some parsers including Xerces seem to get this wrong, so we'll
94
document.setBaseURI(documentBaseURI);
95         }
96         
97     }
98   
99     
100     public void endDocument() {
101         factory.finishMakingDocument(document);
102         parents.pop();
103     }
104   
105     
106     public void startElement(String JavaDoc namespaceURI, String JavaDoc localName,
107       String JavaDoc qualifiedName, org.xml.sax.Attributes JavaDoc attributes) {
108         
109         flushText();
110         Element element;
111         if (parent != document) {
112             element = factory.startMakingElement(qualifiedName, namespaceURI);
113         }
114         else { // root
115
element = factory.makeRootElement(qualifiedName, namespaceURI);
116             if (element == null) { // null root; that's a no-no
117
throw new NullPointerException JavaDoc(
118                     "Factory failed to create root element."
119                 );
120             }
121             document.setRootElement(element);
122             inProlog = false;
123         }
124         
125         current = element;
126         // Need to push this, even if it's null
127
parents.push(element);
128         
129         if (element != null) { // wasn't filtered out
130
if (parent != document) {
131                 // a.k.a. parent not instanceof Document
132
parent.appendChild(element);
133             }
134             // This is optimized for the very common case where
135
// everything in the document has the same actual base URI.
136
// It may add redundant base URIs in cases like XInclude
137
// where different parts of the document have different
138
// base URIs.
139
if (locator != null) {
140                  String JavaDoc baseURI = locator.getSystemId();
141                  if (baseURI != null && !baseURI.equals(documentBaseURI)) {
142                      element.setActualBaseURI(baseURI);
143                  }
144             }
145             
146             // Attach the attributes; this must be done before the
147
// namespaces are attached.
148
// XXX pull out length
149

150             // XXX we've got a pretty good guess at how many attributes there
151
// will be here; we should ensureCapacity up to that length
152
for (int i = 0; i < attributes.getLength(); i++) {
153                 String JavaDoc qName = attributes.getQName(i);
154                 if (qName.startsWith("xmlns:") || qName.equals("xmlns")) {
155                     continue;
156                 }
157                 else {
158                     String JavaDoc namespace = attributes.getURI(i);
159                     String JavaDoc value = attributes.getValue(i);
160                     Nodes nodes = factory.makeAttribute(
161                       qName,
162                       namespace,
163                       value,
164                       convertStringToType(attributes.getType(i))
165                     );
166                     int numberChildren = 0;
167                     for (int j=0; j < nodes.size(); j++) {
168                         Node node = nodes.get(j);
169                         if (node.isAttribute()) {
170                             factory.addAttribute(element, (Attribute) node);
171                         }
172                         else {
173                             factory.insertChild(element, node, numberChildren++);
174                         }
175                     }
176                 }
177             }
178
179             // Attach the namespaces
180
for (int i = 0; i < attributes.getLength(); i++) {
181                 String JavaDoc qName = attributes.getQName(i);
182                 if (qName.startsWith("xmlns:")) {
183                     String JavaDoc namespaceName = attributes.getValue(i);
184                     String JavaDoc namespacePrefix = qName.substring(6);
185                     String JavaDoc currentValue
186                        = element.getNamespaceURI(namespacePrefix);
187                     if (!namespaceName.equals(currentValue)) {
188                         element.addNamespaceDeclaration(
189                           namespacePrefix, namespaceName);
190                     }
191                 }
192                 else if (qName.equals("xmlns")) {
193                     String JavaDoc namespaceName = attributes.getValue(i);
194                     String JavaDoc namespacePrefix = "";
195                     String JavaDoc currentValue
196                       = element.getNamespaceURI(namespacePrefix);
197                     if (!namespaceName.equals(currentValue)) {
198                         element.addNamespaceDeclaration(namespacePrefix,
199                          namespaceName);
200                     }
201                 }
202             }
203             
204             // this is the new parent
205
parent = element;
206         }
207         
208     }
209
210     
211     public void endElement(
212       String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qualifiedName) {
213         
214         // If we're immediately inside a skipped element
215
// we need to reset current to null, not to the parent
216
current = (ParentNode) parents.pop();
217         flushText();
218         
219         if (current != null) {
220             parent = current.getParent();
221             Nodes result = factory.finishMakingElement((Element) current);
222             
223             // Optimization for default case where result only contains current
224
if (result.size() != 1 || result.get(0) != current) {
225                 if (!parent.isDocument()) {
226                     parent.removeChild(parent.getChildCount() - 1);
227                     for (int i=0; i < result.size(); i++) {
228                         Node node = result.get(i);
229                          if (node.isAttribute()) {
230                              ((Element) parent).addAttribute((Attribute) node);
231                          }
232                          else {
233                              parent.appendChild(node);
234                          }
235                     }
236                 }
237                 else { // root element
238
Document doc = (Document) parent;
239                     Element currentRoot = doc.getRootElement();
240                     boolean beforeRoot = true;
241                     for (int i=0; i < result.size(); i++) {
242                         Node node = result.get(i);
243                         if (node.isElement()) {
244                             if (node != currentRoot) {
245                                 if (!beforeRoot) {
246                                     // already set root, oops
247
throw new IllegalAddException("Factory returned multiple roots");
248                                 }
249                                 doc.setRootElement((Element) node);
250                             }
251                             beforeRoot = false;
252                         }
253                         else if (beforeRoot) {
254                             doc.insertChild(node, doc.indexOf(doc.getRootElement()));
255                         }
256                         else {
257                             doc.appendChild(node);
258                         }
259                     }
260                     if (beforeRoot) {
261                         // somebody tried to replace the root element with
262
// no element at all. That's a no-no
263
throw new WellformednessException(
264                           "Factory attempted to remove the root element");
265                     }
266                 }
267             }
268         }
269         
270     }
271     
272     
273     static Attribute.Type convertStringToType(String JavaDoc saxType) {
274     
275         if (saxType.equals("CDATA")) return Attribute.Type.CDATA;
276         if (saxType.equals("ID")) return Attribute.Type.ID;
277         if (saxType.equals("IDREF")) return Attribute.Type.IDREF;
278         if (saxType.equals("IDREFS")) return Attribute.Type.IDREFS;
279         if (saxType.equals("NMTOKEN")) return Attribute.Type.NMTOKEN;
280         if (saxType.equals("NMTOKENS")) return Attribute.Type.NMTOKENS;
281         if (saxType.equals("ENTITY")) return Attribute.Type.ENTITY;
282         if (saxType.equals("ENTITIES")) return Attribute.Type.ENTITIES;
283         if (saxType.equals("NOTATION")) return Attribute.Type.NOTATION;
284         
285         // non-standard but some parsers use this
286
if (saxType.equals("ENUMERATION")) {
287             return Attribute.Type.ENUMERATION;
288         }
289         if (saxType.startsWith("(")) return Attribute.Type.ENUMERATION;
290     
291         return Attribute.Type.UNDECLARED;
292         
293     }
294   
295     
296     protected StringBuffer JavaDoc buffer;
297   
298     public void characters(char[] text, int start, int length) {
299         buffer.append(text, start, length);
300         if (finishedCDATA && length > 0) inCDATA = false;
301     }
302  
303     
304     // accumulate all text that's in the buffer into a text node
305
protected void flushText() {
306         
307         if (buffer.length() > 0) {
308             Nodes result;
309             if (!inCDATA) {
310                 result = factory.makeText(buffer.toString());
311             }
312             else {
313                 result = factory.makeCDATASection(buffer.toString());
314             }
315             for (int i=0; i < result.size(); i++) {
316                 Node node = result.get(i);
317                 if (node.isAttribute()) {
318                     ((Element) parent).addAttribute((Attribute) node);
319                 }
320                 else {
321                     parent.appendChild(node);
322                 }
323             }
324             buffer = new StringBuffer JavaDoc();
325         }
326         inCDATA = false;
327         finishedCDATA = false;
328         
329     }
330   
331     
332     public void ignorableWhitespace(
333       char[] text, int start, int length) {
334         characters(text, start, length);
335     }
336   
337     
338     public void processingInstruction(String JavaDoc target, String JavaDoc data) {
339         
340         if (!inDTD) flushText();
341         if (inExternalSubset) return;
342         Nodes result = factory.makeProcessingInstruction(target, data);
343         
344         for (int i = 0; i < result.size(); i++) {
345             Node node = result.get(i);
346             if (!inDTD) {
347                 if (inProlog) {
348                     parent.insertChild(node, position);
349                     position++;
350                 }
351                 else {
352                     if (node.isAttribute()) {
353                         ((Element) parent).addAttribute((Attribute) node);
354                     }
355                     else parent.appendChild(node);
356                 }
357             }
358             else {
359                 if (node.isProcessingInstruction() || node.isComment()) {
360                     internalDTDSubset.append(" ");
361                     internalDTDSubset.append(node.toXML());
362                     internalDTDSubset.append("\n");
363                 }
364                 else {
365                     throw new XMLException("Factory tried to put a "
366                       + node.getClass().getName()
367                       + " in the internal DTD subset");
368                 }
369             }
370         }
371
372     }
373
374
375     // XOM handles this with attribute values; not prefix mappings
376
public void startPrefixMapping(String JavaDoc prefix, String JavaDoc uri) {}
377     public void endPrefixMapping(String JavaDoc prefix) {}
378
379     public void skippedEntity(String JavaDoc name) {
380         flushText();
381         throw new XMLException("Could not resolve entity " + name);
382     }
383     
384     
385     // LexicalHandler events
386
public void startDTD(String JavaDoc rootName, String JavaDoc publicID,
387       String JavaDoc systemID) {
388         
389         inDTD = true;
390         Nodes result = factory.makeDocType(rootName, publicID, systemID);
391         for (int i = 0; i < result.size(); i++) {
392             Node node = result.get(i);
393             document.insertChild(node, position);
394             position++;
395             if (node.isDocType()) {
396                 DocType doctype = (DocType) node;
397                 internalDTDSubset = new StringBuffer JavaDoc();
398                 this.doctype = doctype;
399             }
400         }
401         
402     }
403      
404     
405     public void endDTD() {
406         
407         inDTD = false;
408         if (doctype != null) {
409             doctype.setInternalDTDSubset(internalDTDSubset.toString());
410         }
411         
412     }
413
414     
415     protected boolean inExternalSubset = false;
416
417     // We have a problem here. Xerces gets this right,
418
// but Crimson and possibly other parsers don't properly
419
// report these entities, or perhaps just not tag them
420
// with [dtd] like they're supposed to.
421
public void startEntity(String JavaDoc name) {
422       if (name.equals("[dtd]")) inExternalSubset = true;
423     }
424     
425     
426     public void endEntity(String JavaDoc name) {
427       if (name.equals("[dtd]")) inExternalSubset = false;
428     }
429     
430     
431     protected boolean inCDATA = false;
432     protected boolean finishedCDATA = false;
433     
434     public void startCDATA() {
435         if (buffer.length() == 0) inCDATA = true;
436         finishedCDATA = false;
437     }
438     
439     
440     public void endCDATA() {
441         finishedCDATA = true;
442     }
443
444     
445     public void comment(char[] text, int start, int length) {
446         
447         if (!inDTD) flushText();
448         if (inExternalSubset) return;
449
450         Nodes result = factory.makeComment(new String JavaDoc(text, start, length));
451         
452         for (int i = 0; i < result.size(); i++) {
453             Node node = result.get(i);
454             if (!inDTD) {
455                 if (inProlog) {
456                     parent.insertChild(node, position);
457                     position++;
458                 }
459                 else {
460                     if (node instanceof Attribute) {
461                         ((Element) parent).addAttribute((Attribute) node);
462                     }
463                     else parent.appendChild(node);
464                 }
465             }
466             else {
467                 if (node.isComment() || node.isProcessingInstruction()) {
468                     internalDTDSubset.append(" ");
469                     internalDTDSubset.append(node.toXML());
470                     internalDTDSubset.append("\n");
471                 }
472                 else {
473                     throw new XMLException("Factory tried to put a "
474                       + node.getClass().getName()
475                       + " in the internal DTD subset");
476                 }
477             }
478         }
479
480     }
481     
482     
483     public void elementDecl(String JavaDoc name, String JavaDoc model) {
484         
485         if (!inExternalSubset && doctype != null) {
486             internalDTDSubset.append(" <!ELEMENT ");
487             internalDTDSubset.append(name);
488             internalDTDSubset.append(' ');
489             internalDTDSubset.append(model);
490             internalDTDSubset.append(">\n");
491         }
492         
493     }
494   
495     
496     public void attributeDecl(String JavaDoc elementName,
497       String JavaDoc attributeName, String JavaDoc type, String JavaDoc mode,
498       String JavaDoc defaultValue) {
499     
500         if (!inExternalSubset && doctype != null) {
501             internalDTDSubset.append(" <!ATTLIST ");
502             internalDTDSubset.append(elementName);
503             internalDTDSubset.append(' ');
504             internalDTDSubset.append(attributeName);
505             internalDTDSubset.append(' ');
506             internalDTDSubset.append(type);
507             if (mode != null) {
508             internalDTDSubset.append(' ');
509                 internalDTDSubset.append(mode);
510             }
511             if (defaultValue != null) {
512                 internalDTDSubset.append(' ');
513                 internalDTDSubset.append('"');
514                 internalDTDSubset.append(escapeReservedCharacters(defaultValue));
515                 internalDTDSubset.append("\"");
516             }
517             internalDTDSubset.append(">\n");
518         }
519         
520     }
521   
522     
523     public void internalEntityDecl(String JavaDoc name,
524        String JavaDoc value) {
525         
526         if (!inExternalSubset && doctype != null) {
527             if (!name.startsWith("%")) { // ignore parameter entities
528
internalDTDSubset.append(" <!ENTITY ");
529                 internalDTDSubset.append(name);
530                 internalDTDSubset.append(" \"");
531                 internalDTDSubset.append(escapeCarriageReturns(value));
532                 internalDTDSubset.append("\">\n");
533             }
534         }
535         
536     }
537   
538     
539     public void externalEntityDecl(String JavaDoc name,
540        String JavaDoc publicID, String JavaDoc systemID) {
541      
542         if (!inExternalSubset && doctype != null) {
543             if (!name.startsWith("%")) { // ignore parameter entities
544
internalDTDSubset.append(" <!ENTITY ");
545                 if (publicID != null) {
546                     internalDTDSubset.append(name);
547                     internalDTDSubset.append(" PUBLIC \"");
548                     internalDTDSubset.append(publicID);
549                     internalDTDSubset.append("\" \"");
550                     internalDTDSubset.append(systemID);
551                 }
552                 else {
553                     internalDTDSubset.append(name);
554                     internalDTDSubset.append(" SYSTEM \"");
555                     internalDTDSubset.append(systemID);
556                 }
557                 internalDTDSubset.append("\">\n");
558             }
559         }
560         
561     }
562     
563     
564     public void notationDecl(String JavaDoc name, String JavaDoc publicID,
565       String JavaDoc systemID) {
566         
567         if (!inExternalSubset && doctype != null) {
568             internalDTDSubset.append(" <!NOTATION ");
569             internalDTDSubset.append(name);
570             if (publicID != null) {
571                 internalDTDSubset.append(" PUBLIC \"");
572                 internalDTDSubset.append(publicID);
573                 internalDTDSubset.append('"');
574                 if (systemID != null) {
575                     internalDTDSubset.append(" \"");
576                     internalDTDSubset.append(systemID);
577                     internalDTDSubset.append('"');
578                 }
579             }
580             else {
581                 internalDTDSubset.append(" SYSTEM \"");
582                 internalDTDSubset.append(systemID);
583                 internalDTDSubset.append('"');
584             }
585             internalDTDSubset.append(">\n");
586         }
587         
588     }
589    
590     
591     public void unparsedEntityDecl(String JavaDoc name, String JavaDoc publicID,
592      String JavaDoc systemID, String JavaDoc notationName) {
593         
594         if (!inExternalSubset && doctype != null) {
595             internalDTDSubset.append(" <!ENTITY ");
596             if (publicID != null) {
597                 internalDTDSubset.append(name);
598                 internalDTDSubset.append(" PUBLIC \"");
599                 internalDTDSubset.append(publicID);
600                 internalDTDSubset.append("\" \"");
601                 internalDTDSubset.append(systemID);
602                 internalDTDSubset.append("\" NDATA ");
603                 internalDTDSubset.append(notationName);
604             }
605             else {
606                 internalDTDSubset.append(name);
607                 internalDTDSubset.append(" SYSTEM \"");
608                 internalDTDSubset.append(systemID);
609                 internalDTDSubset.append("\" NDATA ");
610                 internalDTDSubset.append(notationName);
611             }
612             internalDTDSubset.append(">\n");
613         }
614         
615     }
616     
617     
618     /* It's really weird that SAX needs two different escape methods
619        here, but it does. We need to escape the carriage returns (and
620        only the carriage returns for entity replacement text,
621        because those do not resolve general entities. However,
622        general entities are resolved in attribute default values.
623      */

624     private static String JavaDoc escapeCarriageReturns(String JavaDoc s) {
625         
626         int length = s.length();
627         StringBuffer JavaDoc result = new StringBuffer JavaDoc(length);
628         for (int i = 0; i < length; i++) {
629             char c = s.charAt(i);
630             if (c == '\r') result.append("&#x0D;");
631             else result.append(c);
632         }
633         
634         return result.toString();
635         
636     }
637
638     
639     private static String JavaDoc escapeReservedCharacters(String JavaDoc s) {
640         
641         int length = s.length();
642         StringBuffer JavaDoc result = new StringBuffer JavaDoc(length);
643         for (int i = 0; i < length; i++) {
644             char c = s.charAt(i);
645             // XXX table lookup
646
switch (c) {
647                 case '\r':
648                     result.append("&#x0D;");
649                     break;
650                 case '&':
651                     result.append("&amp;");
652                     break;
653                 case '"':
654                     result.append("&quot;");
655                     break;
656                 case '<':
657                     result.append("&lt;");
658                     break;
659                 default:
660                     result.append(c);
661             }
662         }
663         
664         return result.toString();
665         
666     }
667
668     
669 }
Popular Tags