1 25 package org.snipsnap.util; 26 27 import org.dom4j.Document; 28 import org.dom4j.DocumentException; 29 import org.dom4j.DocumentHelper; 30 import org.dom4j.Element; 31 import org.dom4j.io.OutputFormat; 32 import org.dom4j.io.SAXReader; 33 import org.dom4j.io.XMLWriter; 34 35 import java.io.BufferedInputStream ; 36 import java.io.ByteArrayOutputStream ; 37 import java.io.File ; 38 import java.io.FileInputStream ; 39 import java.io.FileOutputStream ; 40 import java.io.FilenameFilter ; 41 import java.io.IOException ; 42 import java.io.InputStreamReader ; 43 import java.io.OutputStream ; 44 import java.io.StringReader ; 45 import java.text.NumberFormat ; 46 import java.util.HashSet ; 47 import java.util.Iterator ; 48 import java.util.Map ; 49 import java.util.Set ; 50 import java.util.TreeMap ; 51 52 55 public class XMLSnipRepair { 56 public static void main(String args[]) { 57 if (args.length < 2) { 58 System.err.println("usage: XMLSnipRepair <input file> <output file> [<webapp directory>]"); 59 System.exit(0); 60 } 61 62 } 63 64 public static void repair(File input, File output, File webAppDir) { 65 System.err.println("STEP 1: parsing input file ..."); 66 Document document = null; 67 try { 68 document = load(input); 69 } catch (Exception e) { 70 System.err.println("Unable to read input document: " + e); 71 System.err.println("This is usually the case for illegal XML characters, please manually edit the file and remove them."); 72 System.exit(0); 73 } 74 75 System.err.println("STEP 2: checking SnipSpace consistency ..."); 76 Document repaired = repair(document, webAppDir); 77 78 System.err.println("STEP 3: writing output file ..."); 79 OutputFormat outputFormat = new OutputFormat(); 80 outputFormat.setEncoding("UTF-8"); 81 outputFormat.setNewlines(true); 82 try { 83 XMLWriter xmlWriter = new XMLWriter(null == output ? System.out : (OutputStream ) new FileOutputStream (output)); 84 xmlWriter.write(repaired); 85 xmlWriter.flush(); 86 xmlWriter.close(); 87 } catch (Exception e) { 88 System.err.println("Error: unable to write data: " + e); 89 } 90 System.err.println("Finished."); 91 } 92 93 static int errCount = 0; 94 static int curr = 0; 95 96 101 private static Document load(File file) throws Exception { 102 final long fileLength = file.length(); 103 SAXReader saxReader = new SAXReader(); 104 System.err.print("0%"); 105 InputStreamReader reader = new InputStreamReader (new FileInputStream (file), "UTF-8") { 106 public int read(char[] chars) throws IOException { 107 int n = super.read(chars); 108 for (int i = 0; i < n; i++) { 109 chars[i] = replaceIfIllegal(chars[i]); 110 } 111 return n; 112 } 113 114 public int read(char[] chars, int start, int length) throws IOException { 115 int n = super.read(chars, start, length); 116 for (int i = 0; i < n; i++) { 117 chars[i] = replaceIfIllegal(chars[i]); 118 } 119 readProgress(fileLength, curr += n, length); 120 return n; 121 } 122 123 private char replaceIfIllegal(char c) { 124 if (c < 0x20 && !(c == 0x09 || c == 0x0a || c == 0x0d)) { 125 errCount++; 127 return (char) 0x20; 128 } 129 return c; 130 } 131 132 private void readProgress(long length, long current, int blockSize) { 133 long percentage = current * 100 / length; 134 if (percentage % 5 != 0 && ((current - blockSize) * 100 / length) % 5 == 0) { 135 System.err.print("."); 136 } else if (percentage % 20 == 0 && ((current - blockSize) * 100 / length) % 20 != 0) { 137 System.err.print(NumberFormat.getIntegerInstance().format(percentage) + "%"); 138 } 139 } 140 }; 141 142 143 Document document = saxReader.read(reader); 144 System.err.println(); 145 146 if (errCount > 0) { 147 System.err.println("Replaced " + errCount + " illegal characters in input document by a space."); 148 System.err.println("Characters not considered valid in an XML document are considered illegal."); 149 System.err.println("This includes all characters with a code below 32 unless its TAB, CR or LF."); 150 } 151 152 return document; 153 } 154 155 private static Document repair(Document document, File webAppRoot) { 156 Map userData = new TreeMap (); 157 Map snipData = new TreeMap (); 158 Map unknown = new TreeMap (); 159 160 Element rootEl = document.getRootElement(); 161 Iterator elementIt = rootEl.elementIterator(); 162 163 System.err.println("STEP 2.1: checking for duplicates ..."); 164 long identDup = 0; 165 long oldDup = 0; 166 long newDup = 0; 167 while (elementIt.hasNext()) { 168 Element element = (Element) elementIt.next(); 169 Element idElement = null; 170 Map data = null; 171 if ("user".equals(element.getName())) { 172 idElement = element.element("login"); 173 data = userData; 174 } else if ("snip".equals(element.getName())) { 175 idElement = element.element("name"); 176 data = snipData; 177 } 178 179 if (null != data && null != idElement) { 180 String id = element.getName() + "[" + idElement.getText() + "]"; 181 long mtime = Long.parseLong(element.element("mTime").getTextTrim()); 182 183 Element existingElement = (Element) data.get(id); 184 if (existingElement != null) { 185 long lastmtime = Long.parseLong(existingElement.element("mTime").getTextTrim()); 186 if (mtime > lastmtime) { 187 newDup++; 188 System.err.println("Replacing duplicate by newer element: " + id + " (" + (mtime - lastmtime) + "ms)"); 189 data.put(id, element); 190 } else if (mtime == lastmtime) { 191 identDup++; 192 System.err.println("Identical duplicate found: " + id); 193 } else { 194 oldDup++; 195 System.err.println("Older duplicate found: " + id); 196 } 197 if (snipData == data) { 198 String name = idElement.getText(); 199 if (name.startsWith("comment-") && name.lastIndexOf("-") != -1) { 200 String commentSnip = name.substring("comment-".length(), name.lastIndexOf("-")); 201 Element commentEl = element.element("commentSnip"); 202 if (commentEl == null) { 203 commentEl = element.addElement("commentSnip"); 204 } 205 if (!commentSnip.toUpperCase().equals(commentEl.getText().toUpperCase())) { 207 commentEl.addText(commentSnip); 208 System.err.println("Fixing commented snip for '" + name + "' (" + commentSnip + ")"); 209 } 210 } else if (name.matches("\\d\\d\\d\\d-\\d\\d-\\d\\d")) { 211 Element parentEl = element.element("parentSnip"); 212 if (null == parentEl) { 213 parentEl = element.addElement("parentSnip"); 214 } 215 if (!"start".equals(parentEl.getText())) { 216 parentEl.addText("start"); 217 System.err.println("Fixing parent snip for '" + name + "'"); 218 } 219 } 220 } 221 } else { 222 data.put(id, element); 223 } 224 } else { 225 System.err.println("Unknown element '" + element.getName() + "', ignoring ..."); 226 unknown.put(element, element); 227 } 228 } 229 230 System.err.println("Found " + identDup + " identical duplicates, replaced " + newDup + ", ignored " + oldDup + "."); 231 if (unknown.size() > 0) { 232 System.err.println("Found " + unknown.size() + " unknown xml elements."); 233 } 234 235 Document outputDocument = DocumentHelper.createDocument(); 236 outputDocument.addElement(rootEl.getName()); 237 rootEl = outputDocument.getRootElement(); 238 239 System.err.println("STEP 2.2: finishing user data (" + userData.size() + ")..."); 240 Iterator userIt = userData.values().iterator(); 241 while (userIt.hasNext()) { 242 Element userEl = (Element) userIt.next(); 243 rootEl.add(userEl.detach()); 244 } 245 246 int attCount = 0; 247 System.err.print("STEP 2.3: fixing snip data (" + snipData.size() + ")"); 248 if (webAppRoot != null) { 249 System.out.println(" and attachments ..."); 250 } else { 251 System.out.println(); 252 } 253 Iterator snipIt = snipData.values().iterator(); 254 while (snipIt.hasNext()) { 255 Element snipEl = (Element) snipIt.next(); 256 if (webAppRoot != null) { 257 attCount += storeAttachments(snipEl, new File (webAppRoot, "/WEB-INF/files")); 258 attCount += storeOldImages(snipEl, new File (webAppRoot, "/images")); 259 } 260 rootEl.add(snipEl.detach()); 261 } 262 System.err.println("Added " + attCount + " attachments."); 263 return outputDocument; 264 } 265 266 private static int storeOldImages(Element snipEl, File imageRoot) { 267 int attCount = 0; 268 final String snipName = snipEl.element("name").getText(); 269 File [] files = imageRoot.listFiles(new FilenameFilter () { 270 public boolean accept(File file, String s) { 271 return s.startsWith("image-" + snipName); 272 } 273 }); 274 275 Element attachmentsEl = snipEl.element("attachments"); 276 if (null == attachmentsEl) { 277 attachmentsEl = DocumentHelper.createElement("attachments"); 278 snipEl.add(attachmentsEl); 279 } 280 281 Set attList = new HashSet (); 282 Iterator attIt = attachmentsEl.elementIterator("attachment"); 283 while (attIt.hasNext()) { 284 Element attEl = (Element) attIt.next(); 285 if(attEl != null && attEl.element("name") != null) { 286 attList.add(attEl.element("name").getText()); 287 } 288 } 289 290 for (int n = 0; n < files.length; n++) { 291 File file = files[n]; 292 String fileName = file.getName().substring(("image-" + snipName + "-").length()); 293 if (!attList.contains(fileName)) { 294 Element attEl = attachmentsEl.addElement("attachment"); 295 attEl.addElement("name").addText(fileName); 296 attEl.addElement("content-type").addText("image/" + fileName.substring(fileName.lastIndexOf(".") + 1)); 297 attEl.addElement("size").addText("" + file.length()); 298 attEl.addElement("date").addText("" + file.lastModified()); 299 attEl.addElement("location").addText(snipName + "/" + fileName); 300 try { 301 addAttachmentFile(attEl, file); 302 attCount++; 303 } catch (IOException e) { 304 System.err.println("Error adding attachment data: " + e.getMessage()); 305 attEl.detach(); 306 } 307 System.err.println("Added old image attachment '" + fileName + "' to '" + snipName + "'"); 308 } 309 } 310 return attCount; 311 } 312 313 private static int storeAttachments(Element snipEl, File attRoot) { 314 Element attachmentsEl = snipEl.element("attachments"); 315 attachmentsEl.detach(); 316 String textContent = attachmentsEl.getText(); 317 if (textContent != null && textContent.length() > 0 && attachmentsEl.elements("attachment").size() == 0) { 318 SAXReader saxReader = new SAXReader(); 319 try { 320 attachmentsEl = saxReader.read(new StringReader ("<attachments>" + textContent + "</attachments>")).getRootElement(); 321 } catch (DocumentException e) { 322 System.err.println("Error parsing the attachments ...: " + e.getMessage()); 323 } 324 } 325 326 int attCount = 0; 327 Iterator attIt = attachmentsEl.elements("attachment").iterator(); 328 while (attIt.hasNext()) { 329 Element att = (Element) attIt.next(); 330 File file = new File (attRoot, att.elementText("location")); 331 String snipName = snipEl.element("name").getText(); 332 if (att.element("data") == null) { 333 if (file.exists()) { 334 try { 335 addAttachmentFile(att, file); 336 attCount++; 337 } catch (Exception e) { 339 System.err.println("Error adding '" + file.getPath() + "' to '" + snipName + "'"); 340 e.printStackTrace(); 341 att.detach(); 342 } 343 } else { 344 System.err.println("Missing file '" + file.getPath() + "' attached to '" + snipName + "'"); 345 att.detach(); 346 } 347 } 348 } 349 snipEl.add(attachmentsEl); 350 return attCount; 351 } 352 353 public static void addAttachmentFile(Element att, File attFile) throws IOException { 354 ByteArrayOutputStream data = new ByteArrayOutputStream (); 355 BufferedInputStream fileIs = new BufferedInputStream (new FileInputStream (attFile)); 356 int count = 0; 357 byte[] buffer = new byte[8192]; 358 while ((count = fileIs.read(buffer)) != -1) { 359 data.write(buffer, 0, count); 360 } 361 data.close(); 362 att.addElement("data").addText(new String (org.apache.commons.codec.binary.Base64.encodeBase64(data.toByteArray()), "UTF-8")); 363 } 364 365 } 366 | Popular Tags |