KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jdom > input > SAXBuilder


1 /*--
2
3  $Id: SAXBuilder.java,v 1.91 2004/12/11 02:17:58 jhunter Exp $
4
5  Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
6  All rights reserved.
7
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11
12  1. Redistributions of source code must retain the above copyright
13     notice, this list of conditions, and the following disclaimer.
14
15  2. Redistributions in binary form must reproduce the above copyright
16     notice, this list of conditions, and the disclaimer that follows
17     these conditions in the documentation and/or other materials
18     provided with the distribution.
19
20  3. The name "JDOM" must not be used to endorse or promote products
21     derived from this software without prior written permission. For
22     written permission, please contact <request_AT_jdom_DOT_org>.
23
24  4. Products derived from this software may not be called "JDOM", nor
25     may "JDOM" appear in their name, without prior written permission
26     from the JDOM Project Management <request_AT_jdom_DOT_org>.
27
28  In addition, we request (but do not require) that you include in the
29  end-user documentation provided with the redistribution and/or in the
30  software itself an acknowledgement equivalent to the following:
31      "This product includes software developed by the
32       JDOM Project (http://www.jdom.org/)."
33  Alternatively, the acknowledgment may be graphical using the logos
34  available at http://www.jdom.org/images/logos.
35
36  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
40  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  SUCH DAMAGE.
48
49  This software consists of voluntary contributions made by many
50  individuals on behalf of the JDOM Project and was originally
51  created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
52  Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
53  on the JDOM Project, please see <http://www.jdom.org/>.
54
55  */

56
57 package org.jdom.input;
58
59 import java.io.*;
60 import java.lang.reflect.*;
61 import java.net.*;
62 import java.util.*;
63
64 import org.jdom.*;
65
66 import org.xml.sax.*;
67 import org.xml.sax.helpers.XMLReaderFactory JavaDoc;
68
69 /**
70  * Builds a JDOM document from files, streams, readers, URLs, or a SAX {@link
71  * org.xml.sax.InputSource} instance using a SAX parser. The builder uses a
72  * third-party SAX parser (chosen by JAXP by default, or you can choose
73  * manually) to handle the parsing duties and simply listens to the SAX events
74  * to construct a document. Details which SAX does not provide, such as
75  * whitespace outside the root element, are not represented in the JDOM
76  * document. Information about SAX can be found at <a
77  * HREF="http://www.saxproject.org">http://www.saxproject.org</a>.
78  * <p>
79  * Known issues: Relative paths for a {@link DocType} or {@link EntityRef} may
80  * be converted by the SAX parser into absolute paths.
81  *
82  * @version $Revision: 1.91 $, $Date: 2004/12/11 02:17:58 $
83  * @author Jason Hunter
84  * @author Brett McLaughlin
85  * @author Dan Schaffer
86  * @author Philip Nelson
87  * @author Alex Rosen
88  */

89 public class SAXBuilder {
90
91     private static final String JavaDoc CVS_ID =
92       "@(#) $RCSfile: SAXBuilder.java,v $ $Revision: 1.91 $ $Date: 2004/12/11 02:17:58 $ $Name: $";
93
94     /**
95      * Default parser class to use. This is used when no other parser
96      * is given and JAXP isn't available.
97      */

98     private static final String JavaDoc DEFAULT_SAX_DRIVER =
99         "org.apache.xerces.parsers.SAXParser";
100
101     /** Whether validation should occur */
102     private boolean validate;
103
104     /** Whether expansion of entities should occur */
105     private boolean expand = true;
106
107     /** Adapter class to use */
108     private String JavaDoc saxDriverClass;
109
110     /** ErrorHandler class to use */
111     private ErrorHandler saxErrorHandler = null;
112
113     /** EntityResolver class to use */
114     private EntityResolver saxEntityResolver = null;
115
116     /** DTDHandler class to use */
117     private DTDHandler saxDTDHandler = null;
118
119     /** XMLFilter instance to use */
120     private XMLFilter saxXMLFilter = null;
121
122     /** The factory for creating new JDOM objects */
123     private JDOMFactory factory = new DefaultJDOMFactory();
124
125     /** Whether to ignore ignorable whitespace */
126     private boolean ignoringWhite = false;
127
128     /** Whether to ignore all whitespace content */
129     private boolean ignoringBoundaryWhite = false;
130
131     /** User-specified features to be set on the SAX parser */
132     private HashMap features = new HashMap(5);
133
134     /** User-specified properties to be set on the SAX parser */
135     private HashMap properties = new HashMap(5);
136
137     /**
138      * Whether parser reuse is allowed.
139      * <p>Default: <code>true</code></p>
140      */

141     private boolean reuseParser = true;
142
143     /** The current SAX parser, if parser reuse has been activated. */
144     private XMLReader saxParser = null;
145
146     /**
147      * Creates a new SAXBuilder which will attempt to first locate
148      * a parser via JAXP, then will try to use a set of default
149      * SAX Drivers. The underlying parser will not validate.
150      */

151     public SAXBuilder() {
152         this(false);
153     }
154
155     /**
156      * Creates a new SAXBuilder which will attempt to first locate
157      * a parser via JAXP, then will try to use a set of default
158      * SAX Drivers. The underlying parser will validate or not
159      * according to the given parameter.
160      *
161      * @param validate <code>boolean</code> indicating if
162      * validation should occur.
163      */

164     public SAXBuilder(boolean validate) {
165         this.validate = validate;
166     }
167
168     /**
169      * Creates a new SAXBuilder using the specified SAX parser.
170      * The underlying parser will not validate.
171      *
172      * @param saxDriverClass <code>String</code> name of SAX Driver
173      * to use for parsing.
174      */

175     public SAXBuilder(String JavaDoc saxDriverClass) {
176         this(saxDriverClass, false);
177     }
178
179     /**
180      * Creates a new SAXBuilder using the specified SAX parser.
181      * The underlying parser will validate or not
182      * according to the given parameter.
183      *
184      * @param saxDriverClass <code>String</code> name of SAX Driver
185      * to use for parsing.
186      * @param validate <code>boolean</code> indicating if
187      * validation should occur.
188      */

189     public SAXBuilder(String JavaDoc saxDriverClass, boolean validate) {
190         this.saxDriverClass = saxDriverClass;
191         this.validate = validate;
192     }
193
194     /**
195      * Returns the driver class assigned in the constructor, or null if none.
196      *
197      * @return the driver class assigned in the constructor
198      */

199     public String JavaDoc getDriverClass() {
200         return saxDriverClass;
201     }
202
203     /**
204      * Returns the current {@link org.jdom.JDOMFactory} in use.
205      * @return the factory in use
206      */

207     public JDOMFactory getFactory() {
208         return factory;
209     }
210
211     /**
212      * This sets a custom JDOMFactory for the builder. Use this to build
213      * the tree with your own subclasses of the JDOM classes.
214      *
215      * @param factory <code>JDOMFactory</code> to use
216      */

217     public void setFactory(JDOMFactory factory) {
218         this.factory = factory;
219     }
220
221     /**
222      * Returns whether validation is to be performed during the build.
223      *
224      * @return whether validation is to be performed during the build
225      */

226     public boolean getValidation() {
227         return validate;
228     }
229
230     /**
231      * This sets validation for the builder.
232      *
233      * @param validate <code>boolean</code> indicating whether validation
234      * should occur.
235      */

236     public void setValidation(boolean validate) {
237         this.validate = validate;
238     }
239
240     /**
241      * Returns the {@link ErrorHandler} assigned, or null if none.
242      * @return the ErrorHandler assigned, or null if none
243      */

244     public ErrorHandler getErrorHandler() {
245         return saxErrorHandler;
246     }
247
248     /**
249      * This sets custom ErrorHandler for the <code>Builder</code>.
250      *
251      * @param errorHandler <code>ErrorHandler</code>
252      */

253     public void setErrorHandler(ErrorHandler errorHandler) {
254         saxErrorHandler = errorHandler;
255     }
256
257     /**
258      * Returns the {@link EntityResolver} assigned, or null if none.
259      *
260      * @return the EntityResolver assigned
261      */

262     public EntityResolver getEntityResolver() {
263         return saxEntityResolver;
264     }
265
266     /**
267      * This sets custom EntityResolver for the <code>Builder</code>.
268      *
269      * @param entityResolver <code>EntityResolver</code>
270      */

271     public void setEntityResolver(EntityResolver entityResolver) {
272         saxEntityResolver = entityResolver;
273     }
274
275     /**
276      * Returns the {@link DTDHandler} assigned, or null if none.
277      *
278      * @return the DTDHandler assigned
279      */

280     public DTDHandler getDTDHandler() {
281         return saxDTDHandler;
282     }
283
284     /**
285      * This sets custom DTDHandler for the <code>Builder</code>.
286      *
287      * @param dtdHandler <code>DTDHandler</code>
288      */

289     public void setDTDHandler(DTDHandler dtdHandler) {
290         saxDTDHandler = dtdHandler;
291     }
292
293     /**
294      * Returns the {@link XMLFilter} used during parsing, or null if none.
295      *
296      * @return the XMLFilter used during parsing
297      */

298     public XMLFilter getXMLFilter() {
299         return saxXMLFilter;
300     }
301
302     /**
303      * This sets a custom {@link org.xml.sax.XMLFilter} for the builder.
304      *
305      * @param xmlFilter the filter to use
306      */

307     public void setXMLFilter(XMLFilter xmlFilter) {
308         saxXMLFilter = xmlFilter;
309     }
310
311     /**
312      * Returns whether element content whitespace is to be ignored during the
313      * build.
314      *
315      * @return whether element content whitespace is to be ignored during the
316      * build
317      */

318     public boolean getIgnoringElementContentWhitespace() {
319         return ignoringWhite;
320     }
321
322     /**
323      * Specifies whether or not the parser should elminate whitespace in
324      * element content (sometimes known as "ignorable whitespace") when
325      * building the document. Only whitespace which is contained within
326      * element content that has an element only content model will be
327      * eliminated (see XML Rec 3.2.1). For this setting to take effect
328      * requires that validation be turned on. The default value of this
329      * setting is <code>false</code>.
330      *
331      * @param ignoringWhite Whether to ignore ignorable whitespace
332      */

333     public void setIgnoringElementContentWhitespace(boolean ignoringWhite) {
334         this.ignoringWhite = ignoringWhite;
335     }
336
337     /**
338      * Returns whether or not the parser will elminate element content
339      * containing only whitespace.
340      *
341      * @return <code>boolean</code> - whether only whitespace content will
342      * be ignored during build.
343      *
344      * @see #setIgnoringBoundaryWhitespace
345      */

346     public boolean getIgnoringBoundaryWhitespace() {
347         return ignoringBoundaryWhite;
348     }
349
350     /**
351      * Specifies whether or not the parser should elminate boundary whitespace,
352      * a term that indicates whitespace-only text between element tags. This
353      * feature is a lot like {@link #setIgnoringElementContentWhitespace(boolean)}
354      * but this feature is more aggressive and doesn't require validation be
355      * turned on. The {@link #setIgnoringElementContentWhitespace(boolean)}
356      * call impacts the SAX parse process while this method impacts the JDOM
357      * build process, so it can be beneficial to turn both on for efficiency.
358      * For implementation efficiency, this method actually removes all
359      * whitespace-only text() nodes. That can, in some cases (like beteween an
360      * element tag and a comment), include whitespace that isn't just boundary
361      * whitespace. The default is <code>false</code>.
362      *
363      * @param ignoringBoundaryWhite Whether to ignore whitespace-only text
364      * noes
365      */

366     public void setIgnoringBoundaryWhitespace(boolean ignoringBoundaryWhite) {
367         this.ignoringBoundaryWhite = ignoringBoundaryWhite;
368     }
369
370     /**
371      * Returns whether the contained SAX parser instance is reused across
372      * multiple parses. The default is true.
373      *
374      * @return whether the contained SAX parser instance is reused across
375      * multiple parses
376      */

377     public boolean getReuseParser() {
378         return reuseParser;
379     }
380
381     /**
382      * Specifies whether this builder shall reuse the same SAX parser
383      * when performing subsequent parses or allocate a new parser for
384      * each parse. The default value of this setting is
385      * <code>true</code> (parser reuse).
386      * <p>
387      * <strong>Note</strong>: As SAX parser instances are not thread safe,
388      * the parser reuse feature should not be used with SAXBuilder instances
389      * shared among threads.</p>
390      *
391      * @param reuseParser Whether to reuse the SAX parser.
392      */

393     public void setReuseParser(boolean reuseParser) {
394         this.reuseParser = reuseParser;
395         this.saxParser = null;
396     }
397
398     /**
399      * This sets a feature on the SAX parser. See the SAX documentation for
400      * </p>
401      * <p>
402      * NOTE: SAXBuilder requires that some particular features of the SAX parser be
403      * set up in certain ways for it to work properly. The list of such features
404      * may change in the future. Therefore, the use of this method may cause
405      * parsing to break, and even if it doesn't break anything today it might
406      * break parsing in a future JDOM version, because what JDOM parsers require
407      * may change over time. Use with caution.
408      * </p>
409      *
410      * @param name The feature name, which is a fully-qualified URI.
411      * @param value The requested state of the feature (true or false).
412      */

413     public void setFeature(String JavaDoc name, boolean value) {
414         // Save the specified feature for later.
415
features.put(name, new Boolean JavaDoc(value));
416     }
417
418     /**
419      * This sets a property on the SAX parser. See the SAX documentation for
420      * more information.
421      * <p>
422      * NOTE: SAXBuilder requires that some particular properties of the SAX parser be
423      * set up in certain ways for it to work properly. The list of such properties
424      * may change in the future. Therefore, the use of this method may cause
425      * parsing to break, and even if it doesn't break anything today it might
426      * break parsing in a future JDOM version, because what JDOM parsers require
427      * may change over time. Use with caution.
428      * </p>
429      *
430      * @param name The property name, which is a fully-qualified URI.
431      * @param value The requested value for the property.
432      */

433     public void setProperty(String JavaDoc name, Object JavaDoc value) {
434         // Save the specified property for later.
435
properties.put(name, value);
436     }
437
438     /**
439      * This builds a document from the supplied
440      * input source.
441      *
442      * @param in <code>InputSource</code> to read from
443      * @return <code>Document</code> resultant Document object
444      * @throws JDOMException when errors occur in parsing
445      * @throws IOException when an I/O error prevents a document
446      * from being fully parsed
447      */

448     public Document build(InputSource in)
449      throws JDOMException, IOException {
450         SAXHandler contentHandler = null;
451
452         try {
453             // Create and configure the content handler.
454
contentHandler = createContentHandler();
455             configureContentHandler(contentHandler);
456
457             XMLReader parser = this.saxParser;
458             if (parser == null) {
459                 // Create and configure the parser.
460
parser = createParser();
461
462                 // Install optional filter
463
if (saxXMLFilter != null) {
464                     // Connect filter chain to parser
465
XMLFilter root = saxXMLFilter;
466                     while (root.getParent() instanceof XMLFilter) {
467                         root = (XMLFilter)root.getParent();
468                     }
469                     root.setParent(parser);
470
471                     // Read from filter
472
parser = saxXMLFilter;
473                 }
474
475                 // Configure parser
476
configureParser(parser, contentHandler);
477
478                 if (reuseParser == true) {
479                     this.saxParser = parser;
480                 }
481             }
482             else {
483                 // Reset content handler as SAXHandler instances cannot
484
// be reused
485
configureParser(parser, contentHandler);
486             }
487
488             // Parse the document.
489
parser.parse(in);
490
491             return contentHandler.getDocument();
492         }
493         catch (SAXParseException e) {
494             Document doc = contentHandler.getDocument();
495             if (doc.hasRootElement() == false) {
496                 doc = null;
497             }
498
499             String JavaDoc systemId = e.getSystemId();
500             if (systemId != null) {
501                 throw new JDOMParseException("Error on line " +
502                     e.getLineNumber() + " of document " + systemId, e, doc);
503             } else {
504                 throw new JDOMParseException("Error on line " +
505                     e.getLineNumber(), e, doc);
506             }
507         }
508         catch (SAXException e) {
509             throw new JDOMParseException("Error in building: " +
510                 e.getMessage(), e, contentHandler.getDocument());
511         }
512         finally {
513             // Explicitly nullify the handler to encourage GC
514
// It's a stack var so this shouldn't be necessary, but it
515
// seems to help on some JVMs
516
contentHandler = null;
517         }
518     }
519
520     /**
521      * This creates the SAXHandler that will be used to build the Document.
522      *
523      * @return <code>SAXHandler</code> - resultant SAXHandler object.
524      */

525     protected SAXHandler createContentHandler() {
526         SAXHandler contentHandler = new SAXHandler(factory);
527         return contentHandler;
528     }
529
530     /**
531      * This configures the SAXHandler that will be used to build the Document.
532      * <p>
533      * The default implementation simply passes through some configuration
534      * settings that were set on the SAXBuilder: setExpandEntities() and
535      * setIgnoringElementContentWhitespace().
536      * </p>
537      */

538     protected void configureContentHandler(SAXHandler contentHandler) {
539         // Setup pass through behavior
540
contentHandler.setExpandEntities(expand);
541         contentHandler.setIgnoringElementContentWhitespace(ignoringWhite);
542         contentHandler.setIgnoringBoundaryWhitespace(ignoringBoundaryWhite);
543     }
544
545     /**
546      * This creates the XMLReader to be used for reading the XML document.
547      * <p>
548      * The default behavior is to (1) use the saxDriverClass, if it has been
549      * set, (2) try to obtain a parser from JAXP, if it is available, and
550      * (3) if all else fails, use a hard-coded default parser (currently
551      * the Xerces parser). Subclasses may override this method to determine
552      * the parser to use in a different way.
553      * </p>
554      *
555      * @return <code>XMLReader</code> - resultant XMLReader object.
556      */

557     protected XMLReader createParser() throws JDOMException {
558         XMLReader parser = null;
559         if (saxDriverClass != null) {
560             // The user knows that they want to use a particular class
561
try {
562                 parser = XMLReaderFactory.createXMLReader(saxDriverClass);
563
564                 // Configure parser
565
setFeaturesAndProperties(parser, true);
566             }
567             catch (SAXException e) {
568               throw new JDOMException("Could not load " + saxDriverClass, e);
569             }
570         } else {
571             // Try using JAXP...
572
// Note we need JAXP 1.1, and if JAXP 1.0 is all that's
573
// available then the getXMLReader call fails and we skip
574
// to the hard coded default parser
575
try {
576                 // Get factory class and method.
577
Class JavaDoc factoryClass =
578                     Class.forName("org.jdom.input.JAXPParserFactory");
579
580                 Method createParser =
581                     factoryClass.getMethod("createParser",
582                         new Class JavaDoc[] { boolean.class, Map.class, Map.class });
583
584                 // Create SAX parser.
585
parser = (XMLReader)createParser.invoke(null,
586                                 new Object JavaDoc[] { new Boolean JavaDoc(validate),
587                                                features, properties });
588
589                 // Configure parser
590
setFeaturesAndProperties(parser, false);
591             }
592             catch (JDOMException e) {
593                 throw e;
594             }
595             catch (NoClassDefFoundError JavaDoc e) {
596                 // The class loader failed to resolve the dependencies
597
// of org.jdom.input.JAXPParserFactory. This probably means
598
// that no JAXP parser is present in its class path.
599
// => Ignore and try allocating default SAX parser instance.
600
}
601             catch (Exception JavaDoc e) {
602                 // Ignore and try allocating default SAX parser instance.
603
}
604         }
605
606         // Check to see if we got a parser yet, if not, try to use a
607
// hard coded default
608
if (parser == null) {
609             try {
610                 parser = XMLReaderFactory.createXMLReader(DEFAULT_SAX_DRIVER);
611                 // System.out.println("using default " + DEFAULT_SAX_DRIVER);
612
saxDriverClass = parser.getClass().getName();
613
614                 // Configure parser
615
setFeaturesAndProperties(parser, true);
616             }
617             catch (SAXException e) {
618                 throw new JDOMException("Could not load default SAX parser: "
619                   + DEFAULT_SAX_DRIVER, e);
620             }
621         }
622
623         return parser;
624     }
625
626     /**
627      * This configures the XMLReader to be used for reading the XML document.
628      * <p>
629      * The default implementation sets various options on the given XMLReader,
630      * such as validation, DTD resolution, entity handlers, etc., according
631      * to the options that were set (e.g. via <code>setEntityResolver</code>)
632      * and set various SAX properties and features that are required for JDOM
633      * internals. These features may change in future releases, so change this
634      * behavior at your own risk.
635      * </p>
636      */

637     protected void configureParser(XMLReader parser, SAXHandler contentHandler)
638         throws JDOMException {
639
640         // Setup SAX handlers.
641

642         parser.setContentHandler(contentHandler);
643
644         if (saxEntityResolver != null) {
645             parser.setEntityResolver(saxEntityResolver);
646         }
647
648         if (saxDTDHandler != null) {
649             parser.setDTDHandler(saxDTDHandler);
650         } else {
651             parser.setDTDHandler(contentHandler);
652         }
653
654         if (saxErrorHandler != null) {
655              parser.setErrorHandler(saxErrorHandler);
656         } else {
657              parser.setErrorHandler(new BuilderErrorHandler());
658         }
659
660         // Setup lexical reporting.
661
boolean lexicalReporting = false;
662         try {
663             parser.setProperty("http://xml.org/sax/handlers/LexicalHandler",
664                                contentHandler);
665             lexicalReporting = true;
666         } catch (SAXNotSupportedException e) {
667             // No lexical reporting available
668
} catch (SAXNotRecognizedException e) {
669             // No lexical reporting available
670
}
671
672         // Some parsers use alternate property for lexical handling (grr...)
673
if (!lexicalReporting) {
674             try {
675                 parser.setProperty(
676                     "http://xml.org/sax/properties/lexical-handler",
677                     contentHandler);
678                 lexicalReporting = true;
679             } catch (SAXNotSupportedException e) {
680                 // No lexical reporting available
681
} catch (SAXNotRecognizedException e) {
682                 // No lexical reporting available
683
}
684         }
685
686         // Try setting the DeclHandler if entity expansion is off
687
if (!expand) {
688             try {
689                 parser.setProperty(
690                     "http://xml.org/sax/properties/declaration-handler",
691                     contentHandler);
692             } catch (SAXNotSupportedException e) {
693                 // No lexical reporting available
694
} catch (SAXNotRecognizedException e) {
695                 // No lexical reporting available
696
}
697         }
698     }
699
700     private void setFeaturesAndProperties(XMLReader parser,
701                                           boolean coreFeatures)
702                                                         throws JDOMException {
703         // Set any user-specified features on the parser.
704
Iterator iter = features.keySet().iterator();
705         while (iter.hasNext()) {
706             String JavaDoc name = (String JavaDoc)iter.next();
707             Boolean JavaDoc value = (Boolean JavaDoc)features.get(name);
708             internalSetFeature(parser, name, value.booleanValue(), name);
709         }
710
711         // Set any user-specified properties on the parser.
712
iter = properties.keySet().iterator();
713         while (iter.hasNext()) {
714             String JavaDoc name = (String JavaDoc)iter.next();
715             internalSetProperty(parser, name, properties.get(name), name);
716         }
717
718         if (coreFeatures) {
719             // Set validation.
720
try {
721                 internalSetFeature(parser,
722                         "http://xml.org/sax/features/validation",
723                         validate, "Validation");
724             } catch (JDOMException e) {
725                 // If validation is not supported, and the user is requesting
726
// that we don't validate, that's fine - don't throw an
727
// exception.
728
if (validate)
729                     throw e;
730             }
731
732             // Setup some namespace features.
733
internalSetFeature(parser,
734                         "http://xml.org/sax/features/namespaces",
735                         true, "Namespaces");
736             internalSetFeature(parser,
737                         "http://xml.org/sax/features/namespace-prefixes",
738                         true, "Namespace prefixes");
739         }
740
741         // Set entity expansion
742
// Note SAXHandler can work regardless of how this is set, but when
743
// entity expansion it's worth it to try to tell the parser not to
744
// even bother with external general entities.
745
// Apparently no parsers yet support this feature.
746
// XXX It might make sense to setEntityResolver() with a resolver
747
// that simply ignores external general entities
748
try {
749             if (parser.getFeature("http://xml.org/sax/features/external-general-entities") != expand) {
750                 parser.setFeature("http://xml.org/sax/features/external-general-entities", expand);
751             }
752         }
753         catch (SAXNotRecognizedException e) { /* Ignore... */ }
754         catch (SAXNotSupportedException e) { /* Ignore... */ }
755     }
756
757     /**
758      * Tries to set a feature on the parser. If the feature cannot be set,
759      * throws a JDOMException describing the problem.
760      */

761     private void internalSetFeature(XMLReader parser, String JavaDoc feature,
762                     boolean value, String JavaDoc displayName) throws JDOMException {
763         try {
764             parser.setFeature(feature, value);
765         } catch (SAXNotSupportedException e) {
766             throw new JDOMException(
767                 displayName + " feature not supported for SAX driver " + parser.getClass().getName());
768         } catch (SAXNotRecognizedException e) {
769             throw new JDOMException(
770                 displayName + " feature not recognized for SAX driver " + parser.getClass().getName());
771         }
772     }
773
774     /**
775      * <p>
776      * Tries to set a property on the parser. If the property cannot be set,
777      * throws a JDOMException describing the problem.
778      * </p>
779      */

780     private void internalSetProperty(XMLReader parser, String JavaDoc property,
781                     Object JavaDoc value, String JavaDoc displayName) throws JDOMException {
782         try {
783             parser.setProperty(property, value);
784         } catch (SAXNotSupportedException e) {
785             throw new JDOMException(
786                 displayName + " property not supported for SAX driver " + parser.getClass().getName());
787         } catch (SAXNotRecognizedException e) {
788             throw new JDOMException(
789                 displayName + " property not recognized for SAX driver " + parser.getClass().getName());
790         }
791     }
792
793     /**
794      * <p>
795      * This builds a document from the supplied
796      * input stream.
797      * </p>
798      *
799      * @param in <code>InputStream</code> to read from
800      * @return <code>Document</code> resultant Document object
801      * @throws JDOMException when errors occur in parsing
802      * @throws IOException when an I/O error prevents a document
803      * from being fully parsed.
804      */

805     public Document build(InputStream in)
806      throws JDOMException, IOException {
807         return build(new InputSource(in));
808     }
809
810     /**
811      * <p>
812      * This builds a document from the supplied
813      * filename.
814      * </p>
815      *
816      * @param file <code>File</code> to read from
817      * @return <code>Document</code> resultant Document object
818      * @throws JDOMException when errors occur in parsing
819      * @throws IOException when an I/O error prevents a document
820      * from being fully parsed
821      */

822     public Document build(File file)
823         throws JDOMException, IOException {
824         try {
825             URL url = fileToURL(file);
826             return build(url);
827         } catch (MalformedURLException e) {
828             throw new JDOMException("Error in building", e);
829         }
830     }
831
832     /**
833      * <p>
834      * This builds a document from the supplied
835      * URL.
836      * </p>
837      *
838      * @param url <code>URL</code> to read from.
839      * @return <code>Document</code> - resultant Document object.
840      * @throws JDOMException when errors occur in parsing
841      * @throws IOException when an I/O error prevents a document
842      * from being fully parsed.
843      */

844     public Document build(URL url)
845         throws JDOMException, IOException {
846         String JavaDoc systemID = url.toExternalForm();
847         return build(new InputSource(systemID));
848     }
849
850     /**
851      * <p>
852      * This builds a document from the supplied
853      * input stream.
854      * </p>
855      *
856      * @param in <code>InputStream</code> to read from.
857      * @param systemId base for resolving relative URIs
858      * @return <code>Document</code> resultant Document object
859      * @throws JDOMException when errors occur in parsing
860      * @throws IOException when an I/O error prevents a document
861      * from being fully parsed
862      */

863     public Document build(InputStream in, String JavaDoc systemId)
864         throws JDOMException, IOException {
865
866         InputSource src = new InputSource(in);
867         src.setSystemId(systemId);
868         return build(src);
869     }
870
871     /**
872      * <p>
873      * This builds a document from the supplied
874      * Reader. It's the programmer's responsibility to make sure
875      * the reader matches the encoding of the file. It's often easier
876      * and safer to use an InputStream rather than a Reader, and to let the
877      * parser auto-detect the encoding from the XML declaration.
878      * </p>
879      *
880      * @param characterStream <code>Reader</code> to read from
881      * @return <code>Document</code> resultant Document object
882      * @throws JDOMException when errors occur in parsing
883      * @throws IOException when an I/O error prevents a document
884      * from being fully parsed
885      */

886     public Document build(Reader characterStream)
887         throws JDOMException, IOException {
888         return build(new InputSource(characterStream));
889     }
890
891     /**
892      * <p>
893      * This builds a document from the supplied
894      * Reader. It's the programmer's responsibility to make sure
895      * the reader matches the encoding of the file. It's often easier
896      * and safer to use an InputStream rather than a Reader, and to let the
897      * parser auto-detect the encoding from the XML declaration.
898      * </p>
899      *
900      * @param characterStream <code>Reader</code> to read from.
901      * @param systemId base for resolving relative URIs
902      * @return <code>Document</code> resultant Document object
903      * @throws JDOMException when errors occur in parsing
904      * @throws IOException when an I/O error prevents a document
905      * from being fully parsed
906      */

907     public Document build(Reader characterStream, String JavaDoc systemId)
908         throws JDOMException, IOException {
909
910         InputSource src = new InputSource(characterStream);
911         src.setSystemId(systemId);
912         return build(src);
913     }
914
915     /**
916      * <p>
917      * This builds a document from the supplied
918      * URI.
919      * </p>
920      * @param systemId URI for the input
921      * @return <code>Document</code> resultant Document object
922      * @throws JDOMException when errors occur in parsing
923      * @throws IOException when an I/O error prevents a document
924      * from being fully parsed
925      */

926     public Document build(String JavaDoc systemId)
927         throws JDOMException, IOException {
928         return build(new InputSource(systemId));
929     }
930
931 // /**
932
// * Imitation of File.toURL(), a JDK 1.2 method, reimplemented
933
// * here to work with JDK 1.1.
934
// *
935
// * @see java.io.File
936
// *
937
// * @param f the file to convert
938
// * @return the file path converted to a file: URL
939
// */
940
// protected URL fileToURL(File f) throws MalformedURLException {
941
// String path = f.getAbsolutePath();
942
// if (File.separatorChar != '/') {
943
// path = path.replace(File.separatorChar, '/');
944
// }
945
// if (!path.startsWith("/")) {
946
// path = "/" + path;
947
// }
948
// if (!path.endsWith("/") && f.isDirectory()) {
949
// path = path + "/";
950
// }
951
// return new URL("file", "", path);
952
// }
953

954     /** Custom File.toUrl() implementation to handle special chars in file names
955      *
956      * @param file file object whose path will be converted
957      * @return URL form of the file, with special characters handled
958      * @throws MalformedURLException if there's a problem constructing a URL
959      */

960     private static URL fileToURL(File file) throws MalformedURLException {
961         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
962         String JavaDoc path = file.getAbsolutePath();
963
964         // Convert non-URL style file separators
965
if (File.separatorChar != '/') {
966             path = path.replace(File.separatorChar, '/');
967         }
968
969         // Make sure it starts at root
970
if (!path.startsWith("/")) {
971             buffer.append('/');
972         }
973
974         // Copy, converting URL special characters as we go
975
int len = path.length();
976         for (int i = 0; i < len; i++) {
977             char c = path.charAt(i);
978             if (c == ' ')
979                 buffer.append("%20");
980             else if (c == '#')
981                 buffer.append("%23");
982             else if (c == '%')
983                 buffer.append("%25");
984             else if (c == '&')
985                 buffer.append("%26");
986             else if (c == ';')
987                 buffer.append("%3B");
988             else if (c == '<')
989                 buffer.append("%3C");
990             else if (c == '=')
991                 buffer.append("%3D");
992             else if (c == '>')
993                 buffer.append("%3E");
994             else if (c == '?')
995                 buffer.append("%3F");
996             else if (c == '~')
997                 buffer.append("%7E");
998             else
999                 buffer.append(c);
1000        }
1001
1002        // Make sure directories end with slash
1003
if (!path.endsWith("/") && file.isDirectory()) {
1004            buffer.append('/');
1005        }
1006
1007        // Return URL
1008
return new URL("file", "", buffer.toString());
1009    }
1010
1011    /**
1012     * Returns whether or not entities are being expanded into normal text
1013     * content.
1014     *
1015     * @return whether entities are being expanded
1016     */

1017    public boolean getExpandEntities() {
1018        return expand;
1019    }
1020
1021    /**
1022     * <p>
1023     * This sets whether or not to expand entities for the builder.
1024     * A true means to expand entities as normal content. A false means to
1025     * leave entities unexpanded as <code>EntityRef</code> objects. The
1026     * default is true.
1027     * </p>
1028     * <p>
1029     * When this setting is false, the internal DTD subset is retained; when
1030     * this setting is true, the internal DTD subset is not retained.
1031     * </p>
1032     * <p>
1033     * Note that Xerces (at least up to 1.4.4) has a bug where entities
1034     * in attribute values will be misreported if this flag is turned off,
1035     * resulting in entities to appear within element content. When turning
1036     * entity expansion off either avoid entities in attribute values, or
1037     * use another parser like Crimson.
1038     * http://nagoya.apache.org/bugzilla/show_bug.cgi?id=6111
1039     * </p>
1040     *
1041     * @param expand <code>boolean</code> indicating whether entity expansion
1042     * should occur.
1043     */

1044    public void setExpandEntities(boolean expand) {
1045        this.expand = expand;
1046    }
1047}
1048
Popular Tags