1 package org.enhydra.snapper.parsers; 2 3 import org.enhydra.snapper.SnapperManager; 4 import org.enhydra.snapper.utils.*; 5 6 import java.io.*; 7 import java.util.Vector ; 8 9 import javax.swing.text.DefaultStyledDocument ; 10 import javax.swing.text.Document ; 11 import javax.swing.text.rtf.RTFEditorKit ; 12 13 14 public class Parser { 15 private String fileName; 16 private String parsedText; 17 private File temp; 18 19 public void parse() {}; 20 public Vector parse(File originalFile) { 21 Vector retVal = new Vector (); 22 23 try { 24 if (Utils.isOO(originalFile.getPath())) { 25 File metatmp = new File(System.getProperty("user.dir") + File.separator + "metasearch.txt"); 27 metatmp.createNewFile(); 28 OoToText oc = new OoToText(originalFile.getPath(), metatmp.getPath()); 29 oc.parse(); 30 String title = originalFile.getName(); 31 retVal.add(new Long (originalFile.lastModified())); 32 retVal.add(originalFile.getPath()); 33 retVal.add(oc.getParsedText()); 34 retVal.add("oo"); 35 retVal.add(title); 36 retVal.add(""); 37 if (metatmp.exists()) 38 metatmp.delete(); 39 return retVal; 40 } 41 else if (Utils.isDoc(originalFile.getPath())) { 42 43 44 WordParser wd = new WordParser(); 45 wd.setFileName(originalFile.getPath()); 46 wd.parse(); 47 String title; 48 if ( wd.getTitle() == null || wd.getTitle().equals("") ) 49 title = originalFile.getName(); 50 else 51 title = wd.getTitle(); 52 retVal.add(new Long (originalFile.lastModified())); 53 retVal.add(originalFile.getPath()); 54 retVal.add(wd.getParsedText()); 55 retVal.add("doc"); 56 retVal.add(title); 57 retVal.add(wd.getProperties()); 58 return retVal; 59 60 } 61 62 else if (Utils.isPPT(originalFile.getPath())) { 63 PowerParser pp = new PowerParser(); 64 pp.setFileName(originalFile.getPath()); 65 pp.parse(); 66 String title; 67 if ( pp.getTitle() == null || pp.getTitle().equals("") ) 68 title = originalFile.getName(); 69 else 70 title = pp.getTitle(); 71 retVal.add(new Long (originalFile.lastModified())); 72 retVal.add(originalFile.getPath()); 73 retVal.add(pp.getParsedText()); 74 retVal.add("ppt"); 75 retVal.add(title); 76 retVal.add(pp.getProperties()); 77 return retVal; 78 79 80 } 81 else if (Utils.isPPS(originalFile.getPath())) { 82 PowerParser pp = new PowerParser(); 83 pp.setFileName(originalFile.getPath()); 84 pp.parse(); 85 String title; 86 if ( pp.getTitle() == null || pp.getTitle().equals("") ) 87 title = originalFile.getName(); 88 else 89 title = pp.getTitle(); 90 retVal.add(new Long (originalFile.lastModified())); 91 retVal.add(originalFile.getPath()); 92 retVal.add(pp.getParsedText()); 93 retVal.add("pps"); 94 retVal.add(title); 95 retVal.add(pp.getProperties()); 96 return retVal; 97 98 } 99 100 else if (Utils.isText(originalFile.getPath())) { 101 102 String contents = ReadWriteTextFile.getContents(originalFile); 103 retVal.add(new Long (originalFile.lastModified())); 104 retVal.add(originalFile.getPath()); 105 retVal.add(contents); 106 retVal.add("txt"); 107 retVal.add(originalFile.getName()); 108 retVal.add(""); 109 return retVal; 110 111 112 } 113 else if (Utils.isEML(originalFile.getPath())) { 114 115 String contents = ReadWriteTextFile.getContents(originalFile); 116 retVal.add(new Long (originalFile.lastModified())); 117 retVal.add(originalFile.getPath()); 118 retVal.add(contents); 119 retVal.add("eml"); 120 retVal.add(originalFile.getName()); 121 retVal.add(""); 122 return retVal; 123 124 125 } 126 else if (Utils.isExcel(originalFile.getPath())) { 127 128 ExcelParser ep = new ExcelParser(); 129 ep.setFileName(originalFile.getPath()); 130 ep.parse(); 131 String title; 132 if (ep.getTitle() == null || ep.getTitle().equals("")) 133 title = originalFile.getName(); 134 else 135 title = ep.getTitle(); 136 retVal.add(new Long (originalFile.lastModified())); 137 retVal.add(originalFile.getPath()); 138 retVal.add(ep.getParsedText()); 139 retVal.add("xls"); 140 retVal.add(title); 141 retVal.add(""); 142 originalFile = null; 144 return retVal; 145 146 147 } 148 else if (Utils.isPDF(originalFile.getPath())) { 149 150 SnapperPDFParser pdf = new SnapperPDFParser(); 151 pdf.setFileName(originalFile.getPath()); 152 pdf.parse(); 153 String title; 154 if ( pdf.getTitle() == null || pdf.getTitle().equals("") ) 155 title = originalFile.getName(); 156 else 157 title = pdf.getTitle(); 158 retVal.add(new Long (originalFile.lastModified())); 159 retVal.add(originalFile.getPath()); 160 retVal.add(pdf.getParsedText()); 161 retVal.add("pdf"); 162 retVal.add(title); 163 retVal.add(""); 164 return retVal; 165 166 } 167 else if (Utils.isHTML(originalFile.getPath())) { 168 169 HTMLParser html = new HTMLParser(); 170 html.setFileName(originalFile.getPath()); 171 html.parse(); 172 String title; 173 if ( html.getTitle() == null || html.getTitle().equals("") ) 174 title = originalFile.getName(); 175 else 176 title = html.getTitle(); 177 retVal.add(new Long (originalFile.lastModified())); 178 retVal.add(originalFile.getPath()); 179 retVal.add(html.getParsedText()); 180 retVal.add("html"); 181 retVal.add(originalFile.getName()); 182 retVal.add(""); 183 return retVal; 184 185 } 186 else if (Utils.isMSG(originalFile.getPath())) { 187 188 MsgParser msgp = new MsgParser(); 189 msgp.setFileName(originalFile.getPath()); 190 msgp.parse(); 191 String title; 192 if ( msgp.getTitle() == null || msgp.getTitle().equals("") ) 193 title = originalFile.getName(); 194 else 195 title = msgp.getTitle(); 196 retVal.add(new Long (originalFile.lastModified())); 197 retVal.add(originalFile.getPath()); 198 retVal.add(msgp.getParsedText()); 199 retVal.add("msg"); 200 retVal.add(title); 201 retVal.add(""); 202 originalFile = null; 204 return retVal; 205 206 207 } 208 else if (Utils.isRTF(originalFile.getPath())) { 209 DefaultStyledDocument styledDocument = new DefaultStyledDocument (); 210 String contents = ""; 211 212 FileInputStream in = null; 213 try{ 214 in = new FileInputStream(originalFile); 215 RTFEditorKit kit = new RTFEditorKit (); 218 Document doc = kit.createDefaultDocument(); 219 kit.read(in, doc, 0); 220 221 contents = doc.getText(0, doc.getLength()); 222 225 String title = originalFile.getName(); 226 retVal.add(new Long (originalFile.lastModified())); 227 retVal.add(originalFile.getPath()); 228 retVal.add(contents); 229 retVal.add("rtf"); 230 retVal.add(originalFile.getName()); 231 retVal.add(""); 232 in.close(); 233 return retVal; 234 } 235 catch (Exception ex) { 236 ParserManager.logger.debug("File could not be parsed: " + originalFile.getName()); 237 try{ 239 in.close(); 240 }catch (Exception e) {} 241 retVal.removeAllElements(); 242 retVal.add(null); 243 retVal.add(originalFile.getPath()); 244 retVal.add(null); 245 retVal.add(null); 246 retVal.add(null); 247 retVal.add(null); 248 return retVal; 249 } 250 251 } 252 else if (Utils.isZIP(originalFile.getPath())) { 253 ZIPParser zipp = new ZIPParser(SnapperManager.getInstance().getTempDir() + File.separator); 254 zipp.setFileName(originalFile.getPath()); 255 zipp.parse(); 256 return zipp.getFiles(); 258 259 } 266 else 267 retVal.add(new Long (originalFile.lastModified())); 269 retVal.add(originalFile.getPath()); 270 retVal.add(""); 271 retVal.add("other"); 273 retVal.add(originalFile.getName()); 274 retVal.add(""); 275 return retVal; 276 277 } 278 catch (Exception ex) { 281 System.out.println("Exception while parsing...: " + ex); 282 ParserManager.logger.debug("Exception while parsing...: " + ex); 283 retVal.removeAllElements(); 284 retVal.add(null); 285 retVal.add(originalFile.getPath()); 286 retVal.add(null); 287 retVal.add(null); 288 retVal.add(null); 289 retVal.add(null); 290 return retVal; 291 } 292 294 } 295 296 public File getFile() 297 { 298 return temp; 299 } 300 } 301 302 | Popular Tags |