1 package org.enhydra.snapper.parsers; 2 3 import java.io.File ; 4 import java.io.FileInputStream ; 5 import java.io.InputStream ; 6 import java.util.Collection ; 7 import java.util.Iterator ; 8 import java.util.Map ; 9 import java.util.Set ; 10 11 12 import org.enhydra.snapper.api.*; 13 import org.textmining.text.extraction.WordExtractor; 14 15 import org.apache.poi.poifs.eventfilesystem.POIFSReader; 16 import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent; 17 import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener; 18 import org.apache.poi.hpsf.DocumentSummaryInformation; 20 import org.apache.poi.hpsf.PropertySetFactory; 21 import org.apache.poi.hpsf.SummaryInformation; 22 23 24 public class WordParser implements org.enhydra.snapper.api.Parser { 25 private String fileName; 26 private String parsedText; 27 public String title; 28 private String properties = ""; 29 public Map customproperties; 30 31 public String parse(InputStream is) throws java.io.IOException { return "";} 32 33 public void parse () { 34 FileInputStream fis1 = null, fis = null; 35 try { 36 37 FileInputStream in = new FileInputStream (new File (fileName)); 38 parsedText = new WordExtractor().extractText(in); 39 46 47 in.close(); 48 POIFSReader r = new POIFSReader(); 50 MyPOIFSReaderListener mypoi = new MyPOIFSReaderListener(); 51 r.registerListener(mypoi, 52 "\005SummaryInformation"); 53 fis = new FileInputStream (fileName); 54 r.read(fis); 55 title = mypoi.getTitle(); 56 fis.close(); 57 fis = null; 58 59 60 POIFSReader r1 = new POIFSReader(); 61 DocumentPOIFSReaderListener docpoi = new DocumentPOIFSReaderListener(); 62 r1.registerListener(docpoi, 63 "\005DocumentSummaryInformation"); 64 65 fis1 = new FileInputStream (fileName); 66 68 r1.read(fis1); 69 if (docpoi.getCustomProperties() == null){ 70 properties = ""; 71 return; 72 } 73 customproperties = docpoi.getCustomProperties(); 74 createString(customproperties); 75 fis1.close(); 76 fis1 = null; 77 78 79 80 81 } catch (Throwable e) { 82 83 84 try{ 85 86 ParserManager.logger.debug("***** File could not be parsed: " + fileName); 87 } catch (Exception ex) { 88 System.out.println("***** File could not be parsed: " + fileName); 89 } 90 91 } 92 93 finally { 94 if (fis != null){ 95 try{ 96 fis.close(); 97 } catch (Exception ex){}; 98 fis = null; 99 } 100 if (fis1 != null){ 101 try{ 102 fis1.close(); 103 } catch (Exception ex){}; 104 fis1 = null; 105 } 106 } 107 108 109 } 110 111 112 public void setFileName(String fileName) { 113 this.fileName = fileName; 114 } 115 116 public String getParsedText() { 117 return parsedText; 118 } 119 120 public String getTitle() { 121 return title; 122 } 123 124 public String getProperties(){ 125 return properties; 126 } 127 128 private void createString( Map customproperties){ 129 Collection values = customproperties.values(); 130 Iterator valuesIterator = values.iterator(); 131 properties = ""; 132 Set keys = customproperties.keySet(); 133 Iterator keysIterator = keys.iterator(); 134 135 for (Iterator it= customproperties.entrySet().iterator(); it.hasNext(); ) { 136 Map.Entry entry = (Map.Entry )it.next(); 137 properties += entry.getKey().toString() + " = " + entry.getValue() + " , "; 138 } 139 140 141 } 142 } 143 144 class MyPOIFSReaderListener implements POIFSReaderListener 145 { 146 String title; 147 public void processPOIFSReaderEvent(POIFSReaderEvent event) 148 { 149 SummaryInformation si = null; 150 try 151 { 152 si = (SummaryInformation) 153 PropertySetFactory.create(event.getStream()); 154 } 155 catch (Exception ex) 156 { 157 throw new RuntimeException 158 ("Property set stream \"" + event.getPath() + 159 event.getName() + "\": " + ex); 160 } 161 title = si.getTitle(); 162 } 163 164 public String getTitle(){ 165 return title; 166 } 167 } 168 169 class DocumentPOIFSReaderListener implements POIFSReaderListener 170 { Map customproperties; 171 public void processPOIFSReaderEvent(POIFSReaderEvent event) 172 { 173 DocumentSummaryInformation dsi = null; 174 try 175 { 176 dsi = (DocumentSummaryInformation) 177 PropertySetFactory.create(event.getStream()); 178 } 179 catch (Exception ex) 180 { 181 throw new RuntimeException 182 ("Property set stream \"" + event.getPath() + 183 event.getName() + "\": " + ex); 184 } 185 customproperties = dsi.getCustomProperties(); 186 } 187 188 public Map getCustomProperties(){ 189 return customproperties; 190 } 191 192 193 194 } 195 | Popular Tags |