DocumentComparator


1   /*
2   Copyright (c) 2003-2004, Dennis M. Sosnoski
3   All rights reserved.
4   
5   Redistribution and use in source and binary forms, with or without modification,
6   are permitted provided that the following conditions are met:
7   
8    * Redistributions of source code must retain the above copyright notice, this
9      list of conditions and the following disclaimer.
10   * Redistributions in binary form must reproduce the above copyright notice,
11     this list of conditions and the following disclaimer in the documentation
12     and/or other materials provided with the distribution.
13   * Neither the name of JiBX nor the names of its contributors may be used
14     to endorse or promote products derived from this software without specific
15     prior written permission.
16  
17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
24  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28  
29  package org.jibx.extras;
30  
31  import java.io.IOException  ;
32  import java.io.PrintStream  ;
33  import java.io.Reader  ;
34  
35  import org.xmlpull.v1.XmlPullParser;
36  import org.xmlpull.v1.XmlPullParserException;
37  import org.xmlpull.v1.XmlPullParserFactory;
38  
39  /**
40   * XML document comparator. This uses XMLPull parsers to read a pair of
41   * documents in parallel, comparing the streams of components seen from the two
42   * documents. The comparison ignores differences in whitespace separating
43   * elements, but treats whitespace as significant within elements with only
44   * character data content. 
45   * 
46   * @author Dennis M. Sosnoski
47   * @version 1.0
48   */
49  
50  public class DocumentComparator
51  {
52      /** Parser for first document. */
53      protected XmlPullParser m_parserA;
54  
55      /** Parser for second document. */
56      protected XmlPullParser m_parserB;
57      
58      /** Print stream for reporting differences. */
59      protected PrintStream   m_differencePrint;
60  
61      /**
62       * Constructor. Builds the actual parser.
63       *
64       * @param print print stream for reporting differences
65       * @throws XmlPullParserException on error creating parsers
66       */
67  
68      public DocumentComparator(PrintStream   print) throws XmlPullParserException {
69          XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
70          factory.setNamespaceAware(true);
71          m_parserA = factory.newPullParser();
72          m_parserB = factory.newPullParser();
73          m_differencePrint = print;
74      }
75  
76      /**
77       * Build parse input position description.
78       *
79       * @param parser for which to build description
80       * @return text description of current parse position
81       */
82  
83      protected String   buildPositionString(XmlPullParser parser) {
84          return " line " + parser.getLineNumber() + ", col " +
85              parser.getColumnNumber();
86      }
87  
88      /**
89       * Prints error description text. The generated text include position
90       * information from both documents. 
91       *
92       * @param msg error message text
93       */
94      
95      protected void printError(String   msg) {
96          if (m_differencePrint != null) {
97              m_differencePrint.println(msg + " - from " +
98                  buildPositionString(m_parserA) + " to " +
99                  buildPositionString(m_parserB));
100         }
101     }
102 
103     /**
104      * Verifies that the attributes on the current start tags match.
105      *
106      * @return <code>true</code> if the attributes match, <code>false</code> if
107      * not
108      */
109     
110     protected boolean matchAttributes() {
111         int count = m_parserA.getAttributeCount();
112         if (m_parserB.getAttributeCount() != count) {
113             return false;
114         }
115         for (int i = 0; i < count; i++) {
116             String   name = m_parserA.getAttributeName(i);
117             String   ns = m_parserA.getAttributeNamespace(i);
118             String   value = m_parserA.getAttributeValue(i);
119             if (!value.equals(m_parserB.getAttributeValue(ns, name))) {
120                 return false;
121             }
122         }
123         return true;
124     }
125 
126     /**
127      * Verifies that the current start or end tag names match.
128      *
129      * @return <code>true</code> if the names match, <code>false</code> if not
130      */
131     
132     protected boolean matchNames() {
133         return m_parserA.getName().equals(m_parserB.getName()) &&
134             m_parserA.getNamespace().equals(m_parserB.getNamespace());
135     }
136 
137     /**
138      * Compares a pair of documents by reading them in parallel from a pair of
139      * parsers. The comparison ignores differences in whitespace separating
140      * elements, but treats whitespace as significant within elements with only
141      * character data content. 
142      *
143      * @param rdra reader for first document to be compared
144      * @param rdrb reader for second document to be compared
145      * @return <code>true</code> if the documents are the same,
146      * <code>false</code> if they're different
147      */
148     
149     public boolean compare(Reader   rdra, Reader   rdrb) {
150         try {
151         
152             // set the documents and initialize
153             m_parserA.setInput(rdra);
154             m_parserB.setInput(rdrb);
155             boolean content = false;
156             String   texta = "";
157             String   textb = "";
158             while (true) {
159                 
160                 // start by collecting and moving past text content
161                 if (m_parserA.getEventType() == XmlPullParser.TEXT) {
162                     texta = m_parserA.getText();
163                     m_parserA.next();
164                 }
165                 if (m_parserB.getEventType() == XmlPullParser.TEXT) {
166                     textb = m_parserB.getText();
167                     m_parserB.next();
168                 }
169                 
170                 // now check actual tag state
171                 int typea = m_parserA.getEventType();
172                 int typeb = m_parserB.getEventType();
173                 if (typea != typeb) {
174                     printError("Different document structure");
175                     return false;
176                 } else if (typea == XmlPullParser.START_TAG) {
177                     
178                     // compare start tags, attributes, and prior text
179                     content = true;
180                     if (!matchNames()) {
181                         printError("Different start tags");
182                         return false;
183                     } else if (!matchAttributes()) {
184                         printError("Different attributes");
185                         return false;
186                     } else if (!texta.trim().equals(textb.trim())) {
187                         printError("Different text content between elements");
188                         return false;
189                     }
190                     texta = textb = "";
191                     
192                 } else if (typea == XmlPullParser.END_TAG) {
193                     
194                     // compare end tags and prior text
195                     if (!matchNames()) {
196                         printError("Different end tags");
197                         return false;
198                     }
199                     if (content) {
200                         if (!texta.equals(textb)) {
201                             printError("Different text content");
202                             if (m_differencePrint != null) {
203                                 m_differencePrint.println(" \"" + texta +
204                                 "\" (length " + texta.length() + " vs. \"" +
205                                 textb + "\" (length " + textb.length() + ')');
206 
207                             }
208                             return false;
209                         }
210                         content = false;
211                     } else if (!texta.trim().equals(textb.trim())) {
212                         printError("Different text content between elements");
213                         return false;
214                     }
215                     texta = textb = "";
216                     
217                 } else if (typea == XmlPullParser.END_DOCUMENT) {
218                     return true;
219                 }
220                 
221                 // advance both parsers to next component
222                 m_parserA.next();
223                 m_parserB.next();
224                 
225             }
226         } catch (IOException   ex) {
227             if (m_differencePrint != null) {
228                 ex.printStackTrace(m_differencePrint);
229             }
230             return false;
231         } catch (XmlPullParserException ex) {
232             if (m_differencePrint != null) {
233                 ex.printStackTrace(m_differencePrint);
234             }
235             return false;
236         }
237     }
238 }
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags