KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > snapper > parsers > Parser


1 package org.enhydra.snapper.parsers;
2
3 import org.enhydra.snapper.SnapperManager;
4 import org.enhydra.snapper.utils.*;
5
6 import java.io.*;
7 import java.util.Vector JavaDoc;
8
9 import javax.swing.text.DefaultStyledDocument JavaDoc;
10 import javax.swing.text.Document JavaDoc;
11 import javax.swing.text.rtf.RTFEditorKit JavaDoc;
12
13
14 public class Parser {
15       private String JavaDoc fileName;
16       private String JavaDoc parsedText;
17       private File temp;
18       
19       public void parse() {};
20       public Vector JavaDoc parse(File originalFile) {
21         Vector JavaDoc retVal = new Vector JavaDoc();
22
23         try {
24             if (Utils.isOO(originalFile.getPath())) {
25                 //++documentNo;
26
File metatmp = new File(System.getProperty("user.dir") + File.separator + "metasearch.txt");
27                 metatmp.createNewFile();
28                 OoToText oc = new OoToText(originalFile.getPath(), metatmp.getPath());
29                 oc.parse();
30                 String JavaDoc title = originalFile.getName();
31                 retVal.add(new Long JavaDoc(originalFile.lastModified()));
32                 retVal.add(originalFile.getPath());
33                 retVal.add(oc.getParsedText());
34                 retVal.add("oo");
35                 retVal.add(title);
36                 retVal.add("");
37                 if (metatmp.exists())
38                     metatmp.delete();
39                 return retVal;
40             }
41          else if (Utils.isDoc(originalFile.getPath())) {
42
43             
44             WordParser wd = new WordParser();
45             wd.setFileName(originalFile.getPath());
46             wd.parse();
47             String JavaDoc title;
48             if ( wd.getTitle() == null || wd.getTitle().equals("") )
49                 title = originalFile.getName();
50             else
51                 title = wd.getTitle();
52             retVal.add(new Long JavaDoc(originalFile.lastModified()));
53             retVal.add(originalFile.getPath());
54             retVal.add(wd.getParsedText());
55             retVal.add("doc");
56             retVal.add(title);
57             retVal.add(wd.getProperties());
58             return retVal;
59            
60         }
61         
62          else if (Utils.isPPT(originalFile.getPath())) {
63             PowerParser pp = new PowerParser();
64             pp.setFileName(originalFile.getPath());
65             pp.parse();
66             String JavaDoc title;
67             if ( pp.getTitle() == null || pp.getTitle().equals("") )
68                 title = originalFile.getName();
69             else
70                 title = pp.getTitle();
71             retVal.add(new Long JavaDoc(originalFile.lastModified()));
72             retVal.add(originalFile.getPath());
73             retVal.add(pp.getParsedText());
74             retVal.add("ppt");
75             retVal.add(title);
76             retVal.add(pp.getProperties());
77             return retVal;
78
79               
80         }
81         else if (Utils.isPPS(originalFile.getPath())) {
82             PowerParser pp = new PowerParser();
83             pp.setFileName(originalFile.getPath());
84             pp.parse();
85             String JavaDoc title;
86             if ( pp.getTitle() == null || pp.getTitle().equals("") )
87                 title = originalFile.getName();
88             else
89                 title = pp.getTitle();
90             retVal.add(new Long JavaDoc(originalFile.lastModified()));
91             retVal.add(originalFile.getPath());
92             retVal.add(pp.getParsedText());
93             retVal.add("pps");
94             retVal.add(title);
95             retVal.add(pp.getProperties());
96             return retVal;
97               
98         }
99         
100          else if (Utils.isText(originalFile.getPath())) {
101
102                 String JavaDoc contents = ReadWriteTextFile.getContents(originalFile);
103                 retVal.add(new Long JavaDoc(originalFile.lastModified()));
104                 retVal.add(originalFile.getPath());
105                 retVal.add(contents);
106                 retVal.add("txt");
107                 retVal.add(originalFile.getName());
108                 retVal.add("");
109                 return retVal;
110
111             
112         }
113         else if (Utils.isEML(originalFile.getPath())) {
114
115                 String JavaDoc contents = ReadWriteTextFile.getContents(originalFile);
116                 retVal.add(new Long JavaDoc(originalFile.lastModified()));
117                 retVal.add(originalFile.getPath());
118                 retVal.add(contents);
119                 retVal.add("eml");
120                 retVal.add(originalFile.getName());
121                 retVal.add("");
122                 return retVal;
123
124             
125         }
126         else if (Utils.isExcel(originalFile.getPath())) {
127
128             ExcelParser ep = new ExcelParser();
129             ep.setFileName(originalFile.getPath());
130             ep.parse();
131             String JavaDoc title;
132             if (ep.getTitle() == null || ep.getTitle().equals(""))
133                 title = originalFile.getName();
134             else
135                 title = ep.getTitle();
136             retVal.add(new Long JavaDoc(originalFile.lastModified()));
137             retVal.add(originalFile.getPath());
138             retVal.add(ep.getParsedText());
139             retVal.add("xls");
140             retVal.add(title);
141             retVal.add("");
142             //originalFile.delete();
143
originalFile = null;
144             return retVal;
145             
146     
147         }
148         else if (Utils.isPDF(originalFile.getPath())) {
149
150             SnapperPDFParser pdf = new SnapperPDFParser();
151             pdf.setFileName(originalFile.getPath());
152             pdf.parse();
153             String JavaDoc title;
154             if ( pdf.getTitle() == null || pdf.getTitle().equals("") )
155                 title = originalFile.getName();
156             else
157                 title = pdf.getTitle();
158             retVal.add(new Long JavaDoc(originalFile.lastModified()));
159             retVal.add(originalFile.getPath());
160             retVal.add(pdf.getParsedText());
161             retVal.add("pdf");
162             retVal.add(title);
163             retVal.add("");
164             return retVal;
165
166         }
167         else if (Utils.isHTML(originalFile.getPath())) {
168
169             HTMLParser html = new HTMLParser();
170             html.setFileName(originalFile.getPath());
171             html.parse();
172             String JavaDoc title;
173             if ( html.getTitle() == null || html.getTitle().equals("") )
174                 title = originalFile.getName();
175             else
176                 title = html.getTitle();
177             retVal.add(new Long JavaDoc(originalFile.lastModified()));
178             retVal.add(originalFile.getPath());
179             retVal.add(html.getParsedText());
180             retVal.add("html");
181             retVal.add(originalFile.getName());
182             retVal.add("");
183             return retVal;
184         
185         }
186         else if (Utils.isMSG(originalFile.getPath())) {
187
188             MsgParser msgp = new MsgParser();
189             msgp.setFileName(originalFile.getPath());
190             msgp.parse();
191             String JavaDoc title;
192             if ( msgp.getTitle() == null || msgp.getTitle().equals("") )
193                 title = originalFile.getName();
194             else
195                 title = msgp.getTitle();
196             retVal.add(new Long JavaDoc(originalFile.lastModified()));
197             retVal.add(originalFile.getPath());
198             retVal.add(msgp.getParsedText());
199             retVal.add("msg");
200             retVal.add(title);
201             retVal.add("");
202             //originalFile.delete();
203
originalFile = null;
204             return retVal;
205
206         
207         }
208         else if (Utils.isRTF(originalFile.getPath())) {
209             DefaultStyledDocument JavaDoc styledDocument = new DefaultStyledDocument JavaDoc();
210             String JavaDoc contents = "";
211             
212             FileInputStream in = null;
213             try{
214                  in = new FileInputStream(originalFile);
215                 //new RTFEditorKit().read(in, styledDocument,0);
216
//
217
RTFEditorKit JavaDoc kit = new RTFEditorKit JavaDoc();
218                 Document JavaDoc doc = kit.createDefaultDocument();
219                 kit.read(in, doc, 0);
220
221                 contents = doc.getText(0, doc.getLength());
222                 //
223
//contents = styledDocument.getText(0,styledDocument.getLength());
224

225             String JavaDoc title = originalFile.getName();
226             retVal.add(new Long JavaDoc(originalFile.lastModified()));
227             retVal.add(originalFile.getPath());
228             retVal.add(contents);
229             retVal.add("rtf");
230             retVal.add(originalFile.getName());
231             retVal.add("");
232             in.close();
233             return retVal;
234             }
235             catch (Exception JavaDoc ex) {
236                 ParserManager.logger.debug("File could not be parsed: " + originalFile.getName());
237                 //return null;
238
try{
239                     in.close();
240                 }catch (Exception JavaDoc e) {}
241                 retVal.removeAllElements();
242                 retVal.add(null);
243                 retVal.add(originalFile.getPath());
244                 retVal.add(null);
245                 retVal.add(null);
246                 retVal.add(null);
247                 retVal.add(null);
248                 return retVal;
249             }
250         
251         }
252         else if (Utils.isZIP(originalFile.getPath())) {
253             ZIPParser zipp = new ZIPParser(SnapperManager.getInstance().getTempDir() + File.separator);
254             zipp.setFileName(originalFile.getPath());
255             zipp.parse();
256             //System.out.println("UNZIPPED FILES NO: " + zipp.getFiles().size());
257
return zipp.getFiles();
258                         
259 // retVal.add(new Long(originalFile.lastModified()));
260
// retVal.add(originalFile.getPath());
261
// retVal.add("");
262
// retVal.add("zip");
263
// retVal.add(originalFile.getName());
264
// retVal.add("");
265
}
266         else
267             //System.out.println("Other file type");
268
retVal.add(new Long JavaDoc(originalFile.lastModified()));
269             retVal.add(originalFile.getPath());
270             retVal.add("");
271             //String type = originalFile.getPath().substring(originalFile.getPath().lastIndexOf(".")+1, originalFile.getPath().lastIndexOf(".")+4);
272
retVal.add("other");
273             retVal.add(originalFile.getName());
274             retVal.add("");
275             return retVal;
276             
277         }
278         // at least on windows, some temporary files raise this exception with an "access denied" message
279
// checking if the file can be read doesn't help
280
catch (Exception JavaDoc ex) {
281             System.out.println("Exception while parsing...: " + ex);
282             ParserManager.logger.debug("Exception while parsing...: " + ex);
283             retVal.removeAllElements();
284             retVal.add(null);
285             retVal.add(originalFile.getPath());
286             retVal.add(null);
287             retVal.add(null);
288             retVal.add(null);
289             retVal.add(null);
290             return retVal;
291         }
292        // return retVal;
293

294       }
295       
296       public File getFile()
297       {
298         return temp;
299       }
300 }
301
302
Popular Tags