1 23 package org.archive.util.ms; 24 25 import java.io.Closeable ; 26 import java.io.File ; 27 import java.io.FileInputStream ; 28 import java.io.FileOutputStream ; 29 import java.io.IOException ; 30 import java.io.InputStreamReader ; 31 import java.io.OutputStreamWriter ; 32 import java.io.Reader ; 33 import java.io.Writer ; 34 35 import org.apache.poi.hdf.extractor.WordDocument; 36 37 import junit.framework.TestCase; 38 39 40 public class DocTest extends TestCase { 41 42 43 final private static File TEST_DIR = new File ("testdata/ms"); 44 45 46 public void testAgainstPOI() throws IOException { 48 int errors = 0; 49 long start = System.currentTimeMillis(); 50 for (File f: TEST_DIR.listFiles()) try { 51 start = System.currentTimeMillis(); 52 if (f.getName().endsWith(".doc")) { 53 errors += runDoc(f); 54 } 55 } finally { 56 long duration = System.currentTimeMillis() - start; 57 System.out.println("Duration in milliseconds: " + duration); 58 } 59 if (errors > 0) { 60 throw new IOException (errors + " errors, see stdout."); 61 } 62 } 63 64 65 private int runDoc(File doc) throws IOException { 66 System.out.println("===== Now processing " + doc.getName()); 67 String name = doc.getName(); 68 int p = name.lastIndexOf('.'); 69 String expectedName = name.substring(0, p) + ".txt"; 70 File expectedFile = new File (TEST_DIR, expectedName); 71 if (!expectedFile.exists()) { 72 createExpectedOutput(doc, expectedFile); 73 } 74 return runFiles(doc, expectedFile); 75 } 76 77 78 private void createExpectedOutput(File doc, File output) 79 throws IOException { 80 FileInputStream finp = new FileInputStream (doc); 81 FileOutputStream fout = new FileOutputStream (output); 82 83 try { 84 WordDocument wd = new WordDocument(finp); 85 Writer writer = new OutputStreamWriter (fout, "UTF-16BE"); 86 wd.writeAllText(writer); 87 } finally { 88 close(finp); 89 close(fout); 90 } 91 } 92 93 94 private static void close(Closeable c) { 95 try { 96 c.close(); 97 } catch (IOException e) { 98 e.printStackTrace(); 99 } 100 } 101 102 103 private int runFiles(File doc, File expected) 104 throws IOException { 105 FileInputStream expectedIn = new FileInputStream (expected); 106 Reader expectedReader = new InputStreamReader (expectedIn, "UTF-16BE"); 107 Reader docReader = Doc.getText(doc); 108 try { 109 return runReaders(docReader, expectedReader); 110 } finally { 111 close(docReader); 112 close(expectedReader); 113 } 114 } 115 116 117 private int runReaders(Reader doc, Reader expected) 118 throws IOException { 119 int count = 0; 120 int errors = 0; 121 boolean go = true; 122 while (go) { 123 int ch = doc.read(); 124 int expectedCh = correctPOI(expected.read()); 125 if ((ch < 0) || (expectedCh < 0)) { 126 go = false; 127 if ((ch >= 0) || (expectedCh >= 0)) { 128 errors++; 129 System.out.println("File lengths differ."); 130 } 131 } 132 if (ch != expectedCh) { 133 errors += 1; 134 report(count, expectedCh, ch); 135 } 136 count++; 137 } 138 return errors; 139 } 140 141 142 private void report(int count, int expected, int actual) { 143 StringBuilder msg = new StringBuilder ("#").append(count); 144 msg.append(": Expected "); 145 msg.append(expected).append(" (").append(toChar(expected)); 146 msg.append(") but got ").append(actual).append(" ("); 147 msg.append(toChar(actual)).append(")."); 148 System.out.println(msg); 149 } 150 151 152 private static String toChar(int ch) { 153 if (ch < 0) { 154 return "EOF"; 155 } else { 156 return Character.toString((char)ch); 157 } 158 } 159 160 170 private static int correctPOI(int ch) { 171 switch (ch) { 172 case 8734: 173 return 176; 176 case 214: 177 return 8230; 179 case 237: 180 return 8217; 182 case 236: 183 return 8220; 185 case 238: 186 return 8221; 188 default: 189 return ch; 190 } 191 } 192 193 194 } 195 | Popular Tags |