KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > nu > xom > canonical > Canonicalizer


1 /* Copyright 2002-2005 Elliotte Rusty Harold
2    
3    This library is free software; you can redistribute it and/or modify
4    it under the terms of version 2.1 of the GNU Lesser General Public
5    License as published by the Free Software Foundation.
6    
7    This library is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10    GNU Lesser General Public License for more details.
11    
12    You should have received a copy of the GNU Lesser General Public
13    License along with this library; if not, write to the
14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15    Boston, MA 02111-1307 USA
16    
17    You can contact Elliotte Rusty Harold by sending e-mail to
18    elharo@metalab.unc.edu. Please include the word "XOM" in the
19    subject line. The XOM home page is located at http://www.xom.nu/
20 */

21
22 package nu.xom.canonical;
23
24 import java.io.IOException JavaDoc;
25 import java.io.OutputStream JavaDoc;
26 import java.util.Arrays JavaDoc;
27 import java.util.Comparator JavaDoc;
28
29 import nu.xom.Attribute;
30 import nu.xom.Comment;
31 import nu.xom.DocType;
32 import nu.xom.Document;
33 import nu.xom.Element;
34 import nu.xom.Node;
35 import nu.xom.ParentNode;
36 import nu.xom.ProcessingInstruction;
37 import nu.xom.Serializer;
38 import nu.xom.Text;
39
40 /**
41  * <p>
42  * Writes XML in the format specified by <a target="_top"
43  * HREF="http://www.w3.org/TR/2001/REC-xml-c14n-20010315">Canonical
44  * XML Version 1.0</a>.
45  * </p>
46  *
47  * <p>
48  * Only complete documents can be canonicalized.
49  * Document subset canonicalization is not yet supported.
50  * </p>
51  *
52  * @author Elliotte Rusty Harold
53  * @version 1.0
54  *
55  */

56 public class Canonicalizer {
57
58     private boolean withComments;
59     private Serializer serializer;
60     
61     private static Comparator JavaDoc comparator = new AttributeComparator();
62     
63     private static class AttributeComparator implements Comparator JavaDoc {
64         
65         public int compare(Object JavaDoc o1, Object JavaDoc o2) {
66             Attribute a1 = (Attribute) o1;
67             Attribute a2 = (Attribute) o2;
68             
69             String JavaDoc namespace1 = a1.getNamespaceURI();
70             String JavaDoc namespace2 = a2.getNamespaceURI();
71             if (namespace1.equals(namespace2)) {
72                 return a1.getLocalName().compareTo(a2.getLocalName());
73             }
74             else if (namespace1.equals("")) {
75                  return -1;
76             }
77             else if (namespace2.equals("")) {
78                  return 1;
79             }
80             else { // compare namespace URIs
81
return namespace1.compareTo(namespace2);
82             }
83             
84         }
85
86     }
87     
88     
89     /**
90      * <p>
91      * Creates a <code>Canonicalizer</code> that outputs a
92      * canonical XML document with comments.
93      * </p>
94      *
95      * @param out the output stream the document
96      * is written onto
97      */

98     public Canonicalizer(OutputStream JavaDoc out) {
99         this(out, true);
100     }
101
102     
103     /**
104      * <p>
105      * Creates a <code>Canonicalizer</code> that outputs a
106      * canonical XML document with or without comments.
107      * </p>
108      *
109      * @param out the output stream the document
110      * is written onto
111      * @param withComments true if comments should be included
112      * in the output, false otherwise
113      */

114     public Canonicalizer(
115       OutputStream JavaDoc out, boolean withComments) {
116         this.serializer = new CanonicalXMLSerializer(out);
117         serializer.setLineSeparator("\n");
118         this.withComments = withComments;
119     }
120
121
122     private class CanonicalXMLSerializer extends Serializer {
123
124         /**
125          * <p>
126          * Creates a <code>Serializer</code> that outputs a
127          * canonical XML document with or without comments.
128          * </p>
129          *
130          * @param out the <code>OutputStream</code> the document
131          * is written onto
132          * @param withComments true if comments should be included
133          * in the output, false otherwise
134          */

135         CanonicalXMLSerializer(OutputStream JavaDoc out) {
136             super(out);
137             setLineSeparator("\n");
138         }
139
140         
141         /**
142          * <p>
143          * Serializes a document onto the output
144          * stream using the canonical XML algorithm.
145          * </p>
146          *
147          * @param doc the <code>Document</code> to serialize
148          *
149          * @throws IOException if the underlying <code>OutputStream</code>
150          * encounters an I/O error
151          */

152          public final void write(Document doc) throws IOException JavaDoc {
153             
154             int position = 0;
155             while (true) {
156                 Node child = doc.getChild(position);
157                 writeChild(child);
158                 position++;
159                 if (child instanceof ProcessingInstruction) breakLine();
160                 else if (child instanceof Comment && withComments) {
161                     breakLine();
162                 }
163                 else if (child instanceof Element) break;
164             }
165             
166             for (int i = position; i < doc.getChildCount(); i++) {
167                 Node child = doc.getChild(i);
168                 if (child instanceof ProcessingInstruction) breakLine();
169                 else if (child instanceof Comment && withComments) {
170                     breakLine();
171                 }
172                 writeChild(child);
173             }
174             
175             flush();
176         }
177      
178          
179         /**
180          * <p>
181          * Serializes an element onto the output stream using the canonical
182          * XML algorithm. The result is guaranteed to be well-formed.
183          * If <code>element</code> does not have a parent element, it will
184          * also be namespace well-formed.
185          * </p>
186          *
187          * @param element the <code>Element</code> to serialize
188          *
189          * @throws IOException if the underlying <code>OutputStream</code>
190          * encounters an I/O error
191          */

192         protected final void write(Element element)
193           throws IOException JavaDoc {
194
195             // treat empty elements differently to avoid an
196
// instance of test
197
if (element.getChildCount() == 0) {
198                 writeStartTag(element, false);
199                 writeEndTag(element);
200             }
201             else {
202                 Node current = element;
203                 boolean end = false;
204                 int index = -1;
205                 int[] indexes = new int[10];
206                 int top = 0;
207                 indexes[0] = -1;
208                 while (true) {
209                     if (!end && current.getChildCount() > 0) {
210                        writeStartTag((Element) current, false);
211                        current = current.getChild(0);
212                        index = 0;
213                        top++;
214                        indexes = grow(indexes, top);
215                        indexes[top] = 0;
216                     }
217                     else {
218                         if (end) {
219                             writeEndTag((Element) current);
220                             if (current == element) break;
221                         }
222                         else {
223                             writeChild(current);
224                         }
225                         end = false;
226                         ParentNode parent = current.getParent();
227                         if (parent.getChildCount() - 1 == index) {
228                             current = parent;
229                             top--;
230                             if (current != element) {
231                                 parent = current.getParent();
232                                 index = indexes[top];
233                             }
234                             end = true;
235                         }
236                         else {
237                             index++;
238                             indexes[top] = index;
239                             current = parent.getChild(index);
240                         }
241                     }
242                 }
243             }
244             
245         }
246     
247         
248         private int[] grow(int[] indexes, int top) {
249             
250             if (top < indexes.length) return indexes;
251             int[] result = new int[indexes.length*2];
252             System.arraycopy(indexes, 0, result, 0, indexes.length);
253             return result;
254             
255         }
256
257
258         protected void writeStartTag(Element element, boolean isEmpty)
259           throws IOException JavaDoc {
260             writeRaw("<");
261             writeRaw(element.getQualifiedName());
262             
263             ParentNode parent = element.getParent();
264             
265             Element parentElement = null;
266             if (parent instanceof Element) {
267                 parentElement = (Element) parent;
268             }
269             
270             for (int i = 0;
271                  i < element.getNamespaceDeclarationCount();
272                  i++) {
273                 String JavaDoc prefix = element.getNamespacePrefix(i);
274                 String JavaDoc uri = element.getNamespaceURI(prefix);
275                 if (parentElement != null) {
276                    if (uri.equals(
277                      parentElement.getNamespaceURI(prefix))) {
278                        continue;
279                    }
280                 }
281                 else if (uri.equals("")) {
282                     continue; // no need to say xmlns=""
283
}
284                 
285                 writeRaw(" ");
286                 writeNamespaceDeclaration(prefix, uri);
287             }
288             
289             Attribute[] sorted = sortAttributes(element);
290             for (int i = 0; i < sorted.length; i++) {
291                 writeRaw(" ");
292                 write(sorted[i]);
293             }
294             
295             writeRaw(">");
296         }
297     
298         
299         protected void write(Attribute attribute) throws IOException JavaDoc {
300             writeRaw(attribute.getQualifiedName());
301             writeRaw("=\"");
302             writeRaw(prepareAttributeValue(attribute));
303             writeRaw("\"");
304         }
305         
306         
307         protected void writeEndTag(Element element) throws IOException JavaDoc {
308             writeRaw("</");
309             writeRaw(element.getQualifiedName());
310             writeRaw(">");
311         }
312         
313         
314         private Attribute[] sortAttributes(Element element) {
315     
316             Attribute[] result
317               = new Attribute[element.getAttributeCount()];
318             for (int i = 0; i < element.getAttributeCount(); i++) {
319                 result[i] = element.getAttribute(i);
320             }
321             Arrays.sort(result, comparator);
322             
323             return result;
324             
325         }
326     
327         
328         private String JavaDoc prepareAttributeValue(Attribute attribute) {
329     
330             String JavaDoc value = attribute.getValue();
331             StringBuffer JavaDoc result = new StringBuffer JavaDoc(value.length());
332     
333             if (attribute.getType().equals(Attribute.Type.CDATA)
334               || attribute.getType().equals(Attribute.Type.UNDECLARED)) {
335                 char[] data = value.toCharArray();
336                 for (int i = 0; i < data.length; i++) {
337                     char c = data[i];
338                     if (c == '\t') {
339                         result.append("&#x9;");
340                     }
341                     else if (c == '\n') {
342                         result.append("&#xA;");
343                     }
344                     else if (c == '\r') {
345                         result.append("&#xD;");
346                     }
347                     else if (c == '\"') {
348                         result.append("&quot;");
349                     }
350                     else if (c == '&') {
351                         result.append("&amp;");
352                     }
353                     else if (c == '<') {
354                         result.append("&lt;");
355                     }
356                     else {
357                         result.append(c);
358                     }
359                 }
360             }
361             else {
362                 // According to the spec, "Whitespace character references
363
// other than &#x20; are not affected by attribute value
364
// normalization. For parsed documents, the parser will
365
// still replace these with the actual character. I am
366
// going to assume that if one is found here, that the
367
// user meant to put it there; and so we will escape it
368
// with a character reference
369
char[] data = value.toCharArray();
370                 boolean seenFirstNonSpace = false;
371                 for (int i = 0; i < data.length; i++) {
372                     if (data[i] == ' ') {
373                         if (i != data.length-1 && data[i+1] != ' ' && seenFirstNonSpace) {
374                              result.append(data[i]);
375                         }
376                         continue;
377                     }
378                     seenFirstNonSpace = true;
379                     if (data[i] == '\t') {
380                         result.append("&#x9;");
381                     }
382                     else if (data[i] == '\n') {
383                         result.append("&#xA;");
384                     }
385                     else if (data[i] == '\r') {
386                         result.append("&#xD;");
387                     }
388                     else if (data[i] == '\"') {
389                         result.append("&quot;");
390                     }
391                     else if (data[i] == '&') {
392                         result.append("&amp;");
393                     }
394                     else if (data[i] == '<') {
395                         result.append("&lt;");
396                     }
397                     else {
398                         result.append(data[i]);
399                     }
400                 }
401             }
402     
403             return result.toString();
404             
405         }
406         
407         
408         /**
409          * <p>
410          * Serializes a <code>Text</code> object
411          * onto the output stream using the UTF-8 encoding.
412          * The reserved characters &lt;, &gt;, and &amp;
413          * are escaped using the standard entity references such as
414          * <code>&amp;lt;</code>, <code>&amp;gt;</code>,
415          * and <code>&amp;amp;</code>.
416          * </p>
417          *
418          * @param text the <code>Text</code> to serialize
419          *
420          * @throws IOException if the underlying <code>OutputStream</code>
421          * encounters an I/O error
422          */

423         protected final void write(Text text) throws IOException JavaDoc {
424             String JavaDoc input = text.getValue();
425             StringBuffer JavaDoc result = new StringBuffer JavaDoc(input.length());
426             for (int i = 0; i < input.length(); i++) {
427                 char c = input.charAt(i);
428                 if (c == '\r') {
429                     result.append("&#xD;");
430                 }
431                 else if (c == '&') {
432                     result.append("&amp;");
433                 }
434                 else if (c == '<') {
435                     result.append("&lt;");
436                 }
437                 else if (c == '>') {
438                     result.append("&gt;");
439                 }
440                 else {
441                     result.append(c);
442                 }
443             }
444             writeRaw(result.toString());
445         }
446     
447         
448         /**
449          * <p>
450          * Serializes a <code>Comment</code> object
451          * onto the output stream if and only if this
452          * serializer is configured to produce canonical XML
453          * with comments.
454          * </p>
455          *
456          * @param comment the <code>Comment</code> to serialize
457          *
458          * @throws IOException if the underlying <code>OutputStream</code>
459          * encounters an I/O error
460          */

461         protected final void write(Comment comment) throws IOException JavaDoc {
462             if (withComments) super.write(comment);
463         }
464         
465         
466         /**
467          * <p>
468          * Does nothing because canonical XML does not include
469          * document type declarations.
470          * </p>
471          *
472          * @param doctype the document type declaration to serialize
473          */

474         protected final void write(DocType doctype) {
475             // DocType is not serialized in canonical XML
476
}
477        
478         
479     }
480
481     
482     /**
483      * <p>
484      * Serializes a document onto the output
485      * stream using the canonical XML algorithm.
486      * </p>
487      *
488      * @param doc the document to serialize
489      *
490      * @throws IOException if the underlying <code>OutputStream</code>
491      * encounters an I/O error
492      */

493     public final void write(Document doc) throws IOException JavaDoc {
494         serializer.write(doc);
495         serializer.flush();
496     }
497  
498     
499 }
Popular Tags