KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > snipsnap > util > XMLSnipRepair


1 /*
2  * This file is part of "SnipSnap Wiki/Weblog".
3  *
4  * CopyAtright (c) 2002 Stephan J. Schmidt, Matthias L. Jugel
5  * All Rights Reserved.
6  *
7  * Please visit http://snipsnap.org/ for updates and contact.
8  *
9  * --LICENSE NOTICE--
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License
12  * as published by the Free Software Foundation; either version 2
13  * of the License, or (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23  * --LICENSE NOTICE--
24  */

25 package org.snipsnap.util;
26
27 import org.dom4j.Document;
28 import org.dom4j.DocumentException;
29 import org.dom4j.DocumentHelper;
30 import org.dom4j.Element;
31 import org.dom4j.io.OutputFormat;
32 import org.dom4j.io.SAXReader;
33 import org.dom4j.io.XMLWriter;
34
35 import java.io.BufferedInputStream JavaDoc;
36 import java.io.ByteArrayOutputStream JavaDoc;
37 import java.io.File JavaDoc;
38 import java.io.FileInputStream JavaDoc;
39 import java.io.FileOutputStream JavaDoc;
40 import java.io.FilenameFilter JavaDoc;
41 import java.io.IOException JavaDoc;
42 import java.io.InputStreamReader JavaDoc;
43 import java.io.OutputStream JavaDoc;
44 import java.io.StringReader JavaDoc;
45 import java.text.NumberFormat JavaDoc;
46 import java.util.HashSet JavaDoc;
47 import java.util.Iterator JavaDoc;
48 import java.util.Map JavaDoc;
49 import java.util.Set JavaDoc;
50 import java.util.TreeMap JavaDoc;
51
52 /**
53  * Repair XML File
54  */

55 public class XMLSnipRepair {
56   public static void main(String JavaDoc args[]) {
57     if (args.length < 2) {
58       System.err.println("usage: XMLSnipRepair <input file> <output file> [<webapp directory>]");
59       System.exit(0);
60     }
61
62   }
63
64   public static void repair(File JavaDoc input, File JavaDoc output, File JavaDoc webAppDir) {
65     System.err.println("STEP 1: parsing input file ...");
66     Document document = null;
67     try {
68       document = load(input);
69     } catch (Exception JavaDoc e) {
70       System.err.println("Unable to read input document: " + e);
71       System.err.println("This is usually the case for illegal XML characters, please manually edit the file and remove them.");
72       System.exit(0);
73     }
74
75     System.err.println("STEP 2: checking SnipSpace consistency ...");
76     Document repaired = repair(document, webAppDir);
77
78     System.err.println("STEP 3: writing output file ...");
79     OutputFormat outputFormat = new OutputFormat();
80     outputFormat.setEncoding("UTF-8");
81     outputFormat.setNewlines(true);
82     try {
83       XMLWriter xmlWriter = new XMLWriter(null == output ? System.out : (OutputStream JavaDoc) new FileOutputStream JavaDoc(output));
84       xmlWriter.write(repaired);
85       xmlWriter.flush();
86       xmlWriter.close();
87     } catch (Exception JavaDoc e) {
88       System.err.println("Error: unable to write data: " + e);
89     }
90     System.err.println("Finished.");
91   }
92
93   static int errCount = 0;
94   static int curr = 0;
95
96   /**
97    * Load snips and users into the SnipSpace from an xml document out of a stream.
98    *
99    * @param file the file to load from
100    */

101   private static Document load(File JavaDoc file) throws Exception JavaDoc {
102     final long fileLength = file.length();
103     SAXReader saxReader = new SAXReader();
104     System.err.print("0%");
105     InputStreamReader JavaDoc reader = new InputStreamReader JavaDoc(new FileInputStream JavaDoc(file), "UTF-8") {
106       public int read(char[] chars) throws IOException JavaDoc {
107         int n = super.read(chars);
108         for (int i = 0; i < n; i++) {
109           chars[i] = replaceIfIllegal(chars[i]);
110         }
111         return n;
112       }
113
114       public int read(char[] chars, int start, int length) throws IOException JavaDoc {
115         int n = super.read(chars, start, length);
116         for (int i = 0; i < n; i++) {
117           chars[i] = replaceIfIllegal(chars[i]);
118         }
119         readProgress(fileLength, curr += n, length);
120         return n;
121       }
122
123       private char replaceIfIllegal(char c) {
124         if (c < 0x20 && !(c == 0x09 || c == 0x0a || c == 0x0d)) {
125 // System.err.println("Replacing illegal character '0x" + Integer.toHexString(c) + "' by space.");
126
errCount++;
127           return (char) 0x20;
128         }
129         return c;
130       }
131
132       private void readProgress(long length, long current, int blockSize) {
133         long percentage = current * 100 / length;
134         if (percentage % 5 != 0 && ((current - blockSize) * 100 / length) % 5 == 0) {
135           System.err.print(".");
136         } else if (percentage % 20 == 0 && ((current - blockSize) * 100 / length) % 20 != 0) {
137           System.err.print(NumberFormat.getIntegerInstance().format(percentage) + "%");
138         }
139       }
140     };
141
142
143     Document document = saxReader.read(reader);
144     System.err.println();
145
146     if (errCount > 0) {
147       System.err.println("Replaced " + errCount + " illegal characters in input document by a space.");
148       System.err.println("Characters not considered valid in an XML document are considered illegal.");
149       System.err.println("This includes all characters with a code below 32 unless its TAB, CR or LF.");
150     }
151
152     return document;
153   }
154
155   private static Document repair(Document document, File JavaDoc webAppRoot) {
156     Map JavaDoc userData = new TreeMap JavaDoc();
157     Map JavaDoc snipData = new TreeMap JavaDoc();
158     Map JavaDoc unknown = new TreeMap JavaDoc();
159
160     Element rootEl = document.getRootElement();
161     Iterator JavaDoc elementIt = rootEl.elementIterator();
162
163     System.err.println("STEP 2.1: checking for duplicates ...");
164     long identDup = 0;
165     long oldDup = 0;
166     long newDup = 0;
167     while (elementIt.hasNext()) {
168       Element element = (Element) elementIt.next();
169       Element idElement = null;
170       Map JavaDoc data = null;
171       if ("user".equals(element.getName())) {
172         idElement = element.element("login");
173         data = userData;
174       } else if ("snip".equals(element.getName())) {
175         idElement = element.element("name");
176         data = snipData;
177       }
178
179       if (null != data && null != idElement) {
180         String JavaDoc id = element.getName() + "[" + idElement.getText() + "]";
181         long mtime = Long.parseLong(element.element("mTime").getTextTrim());
182
183         Element existingElement = (Element) data.get(id);
184         if (existingElement != null) {
185           long lastmtime = Long.parseLong(existingElement.element("mTime").getTextTrim());
186           if (mtime > lastmtime) {
187             newDup++;
188             System.err.println("Replacing duplicate by newer element: " + id + " (" + (mtime - lastmtime) + "ms)");
189             data.put(id, element);
190           } else if (mtime == lastmtime) {
191             identDup++;
192             System.err.println("Identical duplicate found: " + id);
193           } else {
194             oldDup++;
195             System.err.println("Older duplicate found: " + id);
196           }
197           if (snipData == data) {
198             String JavaDoc name = idElement.getText();
199             if (name.startsWith("comment-") && name.lastIndexOf("-") != -1) {
200               String JavaDoc commentSnip = name.substring("comment-".length(), name.lastIndexOf("-"));
201               Element commentEl = element.element("commentSnip");
202               if (commentEl == null) {
203                 commentEl = element.addElement("commentSnip");
204               }
205 // System.out.println("commentSnip='" + commentSnip.toUpperCase() + "' commentEl='" + commentEl.getText().toUpperCase() + "'");
206
if (!commentSnip.toUpperCase().equals(commentEl.getText().toUpperCase())) {
207                 commentEl.addText(commentSnip);
208                 System.err.println("Fixing commented snip for '" + name + "' (" + commentSnip + ")");
209               }
210             } else if (name.matches("\\d\\d\\d\\d-\\d\\d-\\d\\d")) {
211               Element parentEl = element.element("parentSnip");
212               if (null == parentEl) {
213                 parentEl = element.addElement("parentSnip");
214               }
215               if (!"start".equals(parentEl.getText())) {
216                 parentEl.addText("start");
217                 System.err.println("Fixing parent snip for '" + name + "'");
218               }
219             }
220           }
221         } else {
222           data.put(id, element);
223         }
224       } else {
225         System.err.println("Unknown element '" + element.getName() + "', ignoring ...");
226         unknown.put(element, element);
227       }
228     }
229
230     System.err.println("Found " + identDup + " identical duplicates, replaced " + newDup + ", ignored " + oldDup + ".");
231     if (unknown.size() > 0) {
232       System.err.println("Found " + unknown.size() + " unknown xml elements.");
233     }
234
235     Document outputDocument = DocumentHelper.createDocument();
236     outputDocument.addElement(rootEl.getName());
237     rootEl = outputDocument.getRootElement();
238
239     System.err.println("STEP 2.2: finishing user data (" + userData.size() + ")...");
240     Iterator JavaDoc userIt = userData.values().iterator();
241     while (userIt.hasNext()) {
242       Element userEl = (Element) userIt.next();
243       rootEl.add(userEl.detach());
244     }
245
246     int attCount = 0;
247     System.err.print("STEP 2.3: fixing snip data (" + snipData.size() + ")");
248     if (webAppRoot != null) {
249       System.out.println(" and attachments ...");
250     } else {
251       System.out.println();
252     }
253     Iterator JavaDoc snipIt = snipData.values().iterator();
254     while (snipIt.hasNext()) {
255       Element snipEl = (Element) snipIt.next();
256       if (webAppRoot != null) {
257         attCount += storeAttachments(snipEl, new File JavaDoc(webAppRoot, "/WEB-INF/files"));
258         attCount += storeOldImages(snipEl, new File JavaDoc(webAppRoot, "/images"));
259       }
260       rootEl.add(snipEl.detach());
261     }
262     System.err.println("Added " + attCount + " attachments.");
263     return outputDocument;
264   }
265
266   private static int storeOldImages(Element snipEl, File JavaDoc imageRoot) {
267     int attCount = 0;
268     final String JavaDoc snipName = snipEl.element("name").getText();
269     File JavaDoc[] files = imageRoot.listFiles(new FilenameFilter JavaDoc() {
270       public boolean accept(File JavaDoc file, String JavaDoc s) {
271         return s.startsWith("image-" + snipName);
272       }
273     });
274
275     Element attachmentsEl = snipEl.element("attachments");
276     if (null == attachmentsEl) {
277       attachmentsEl = DocumentHelper.createElement("attachments");
278       snipEl.add(attachmentsEl);
279     }
280
281     Set JavaDoc attList = new HashSet JavaDoc();
282     Iterator JavaDoc attIt = attachmentsEl.elementIterator("attachment");
283     while (attIt.hasNext()) {
284       Element attEl = (Element) attIt.next();
285       if(attEl != null && attEl.element("name") != null) {
286         attList.add(attEl.element("name").getText());
287       }
288     }
289
290     for (int n = 0; n < files.length; n++) {
291       File JavaDoc file = files[n];
292       String JavaDoc fileName = file.getName().substring(("image-" + snipName + "-").length());
293       if (!attList.contains(fileName)) {
294         Element attEl = attachmentsEl.addElement("attachment");
295         attEl.addElement("name").addText(fileName);
296         attEl.addElement("content-type").addText("image/" + fileName.substring(fileName.lastIndexOf(".") + 1));
297         attEl.addElement("size").addText("" + file.length());
298         attEl.addElement("date").addText("" + file.lastModified());
299         attEl.addElement("location").addText(snipName + "/" + fileName);
300         try {
301           addAttachmentFile(attEl, file);
302           attCount++;
303         } catch (IOException JavaDoc e) {
304           System.err.println("Error adding attachment data: " + e.getMessage());
305           attEl.detach();
306         }
307         System.err.println("Added old image attachment '" + fileName + "' to '" + snipName + "'");
308       }
309     }
310     return attCount;
311   }
312
313   private static int storeAttachments(Element snipEl, File JavaDoc attRoot) {
314     Element attachmentsEl = snipEl.element("attachments");
315     attachmentsEl.detach();
316     String JavaDoc textContent = attachmentsEl.getText();
317     if (textContent != null && textContent.length() > 0 && attachmentsEl.elements("attachment").size() == 0) {
318       SAXReader saxReader = new SAXReader();
319       try {
320         attachmentsEl = saxReader.read(new StringReader JavaDoc("<attachments>" + textContent + "</attachments>")).getRootElement();
321       } catch (DocumentException e) {
322         System.err.println("Error parsing the attachments ...: " + e.getMessage());
323       }
324     }
325
326     int attCount = 0;
327     Iterator JavaDoc attIt = attachmentsEl.elements("attachment").iterator();
328     while (attIt.hasNext()) {
329       Element att = (Element) attIt.next();
330       File JavaDoc file = new File JavaDoc(attRoot, att.elementText("location"));
331       String JavaDoc snipName = snipEl.element("name").getText();
332       if (att.element("data") == null) {
333         if (file.exists()) {
334           try {
335             addAttachmentFile(att, file);
336             attCount++;
337             // System.err.println("Added '" + file.getPath() + "' to " + snipName);
338
} catch (Exception JavaDoc e) {
339             System.err.println("Error adding '" + file.getPath() + "' to '" + snipName + "'");
340             e.printStackTrace();
341             att.detach();
342           }
343         } else {
344           System.err.println("Missing file '" + file.getPath() + "' attached to '" + snipName + "'");
345           att.detach();
346         }
347       }
348     }
349     snipEl.add(attachmentsEl);
350     return attCount;
351   }
352
353   public static void addAttachmentFile(Element att, File JavaDoc attFile) throws IOException JavaDoc {
354     ByteArrayOutputStream JavaDoc data = new ByteArrayOutputStream JavaDoc();
355     BufferedInputStream JavaDoc fileIs = new BufferedInputStream JavaDoc(new FileInputStream JavaDoc(attFile));
356     int count = 0;
357     byte[] buffer = new byte[8192];
358     while ((count = fileIs.read(buffer)) != -1) {
359       data.write(buffer, 0, count);
360     }
361     data.close();
362     att.addElement("data").addText(new String JavaDoc(org.apache.commons.codec.binary.Base64.encodeBase64(data.toByteArray()), "UTF-8"));
363   }
364
365 }
366
Popular Tags