KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > nu > xom > benchmarks > DocumentModifier


1 /* Copyright 2002-2004 Elliotte Rusty Harold
2    
3    This library is free software; you can redistribute it and/or modify
4    it under the terms of version 2.1 of the GNU Lesser General Public
5    License as published by the Free Software Foundation.
6    
7    This library is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10    GNU Lesser General Public License for more details.
11    
12    You should have received a copy of the GNU Lesser General Public
13    License along with this library; if not, write to the
14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15    Boston, MA 02111-1307 USA
16    
17    You can contact Elliotte Rusty Harold by sending e-mail to
18    elharo@metalab.unc.edu. Please include the word "XOM" in the
19    subject line. The XOM home page is located at http://www.xom.nu/
20 */

21
22 package nu.xom.benchmarks;
23
24 import java.io.BufferedInputStream JavaDoc;
25 import java.io.ByteArrayInputStream JavaDoc;
26 import java.io.ByteArrayOutputStream JavaDoc;
27 import java.io.IOException JavaDoc;
28 import java.io.InputStream JavaDoc;
29
30 import nu.xom.Attribute;
31 import nu.xom.Builder;
32 import nu.xom.DocType;
33 import nu.xom.Document;
34 import nu.xom.Element;
35 import nu.xom.Node;
36 import nu.xom.ParentNode;
37 import nu.xom.Serializer;
38 import nu.xom.Text;
39 import nu.xom.ParsingException;
40
41
42 /**
43  *
44  * <p>
45  * Based on Dennis Sosnoski's benchmarks:
46  * </p>
47  *
48  * <blockquote>
49  * This test looks at the time required to systematically
50  * modify the constructed document representation. It walks
51  * the representation, deleting all isolated whitespace content
52  * and wrapping each non-whitespace content string with a new,
53  * added, element. It also adds an attribute to each element of
54  * the original document that contained non-whitespace content.
55  * This test is intended to represent the performance of the
56  * document models across a range of modifications to the
57  * documents. As with the walk times, the modify times are
58  * considerably faster than the parse times. As a result,
59  * the parse times are going to be more important for applications
60  * that make only a single pass through each parsed document.
61  * </blockquote>
62  *
63  * @author Elliotte Rusty Harold
64  * @version 1.0
65  *
66  */

67 class DocumentModifier {
68
69     public static void main(String JavaDoc[] args) {
70      
71         if (args.length <= 0) {
72           System.out.println(
73             "Usage: java nu.xom.benchmarks.DocumentModifier URL"
74           );
75           return;
76         }
77          
78         DocumentModifier iterator = new DocumentModifier();
79         Builder parser = new Builder();
80         try {
81             // Separate out the basic I/O by parsing document,
82
// and then serializing into a byte array.
83
// This caches the and removes any dependence on the DTD.
84
Document doc = parser.build(args[0]);
85             DocType type = doc.getDocType();
86             if (type != null) {
87                 doc.removeChild(type);
88             }
89             ByteArrayOutputStream JavaDoc out = new ByteArrayOutputStream JavaDoc();
90             Serializer serializer = new Serializer(out);
91             serializer.write(doc);
92             serializer.flush();
93             out.close();
94             byte[] data = out.toByteArray();
95              
96             warmup(parser, iterator, data, 5);
97             InputStream JavaDoc raw = new BufferedInputStream JavaDoc(
98               new ByteArrayInputStream JavaDoc(data)
99             );
100             
101             // Try to avoid garbage collection pauses
102
System.gc(); System.gc(); System.gc();
103             
104             long prebuild = System.currentTimeMillis();
105           
106             // Read the entire document into memory
107
Document document = parser.build(raw);
108             long postbuild = System.currentTimeMillis();
109             
110             System.out.println((postbuild - prebuild)
111               + "ms to build the document");
112
113             long prewalk = System.currentTimeMillis();
114             performTask(iterator, document);
115             long postwalk = System.currentTimeMillis();
116             
117             System.out.println((postwalk - prewalk)
118               + "ms to modify the document");
119             
120         }
121         catch (IOException JavaDoc ex) {
122             System.out.println(ex);
123         }
124         catch (ParsingException ex) {
125             System.out.println(ex);
126         }
127   
128     } // end main
129

130     private static void warmup(Builder builder,
131       DocumentModifier iterator, byte[] data, int numPasses)
132       throws IOException JavaDoc, ParsingException {
133           
134         InputStream JavaDoc in = new BufferedInputStream JavaDoc(
135           new ByteArrayInputStream JavaDoc(data));
136         Document doc = builder.build(in);
137         for (int i = 0; i < numPasses; i++) {
138             performTask(iterator, doc);
139         }
140     }
141
142     private static void performTask(DocumentModifier iterator, Document document)
143       throws IOException JavaDoc {
144         iterator.followNode(document);
145     }
146
147     // note use of recursion
148
public void followNode(Node node) throws IOException JavaDoc {
149     
150         // Chances are most of the time is spent in the instanceof test
151
if (node instanceof Text) {
152             if (node.getValue().trim().length() == 0) {
153                 node.detach();
154             }
155             else {
156                 Element dummy = new Element("dummy");
157                 ParentNode parent = node.getParent();
158                 parent.insertChild(dummy, parent.indexOf(node));
159                 node.detach();
160                 dummy.appendChild(node);
161             }
162             return;
163         }
164         else if (node instanceof Element){
165             Element element = (Element) node;
166             element.addAttribute(new Attribute("class", "original"));
167             for (int i = 0; i < node.getChildCount(); i++) {
168                 followNode(node.getChild(i));
169             }
170         }
171         else {
172             for (int i = 0; i < node.getChildCount(); i++) {
173                 followNode(node.getChild(i));
174             }
175         }
176     
177     }
178
179 }
Popular Tags