1 32 33 package websphinx.workbench; 34 35 import websphinx.*; 36 import java.io.File ; 37 import java.io.IOException ; 38 39 public class ExtractAction implements Action, CrawlListener { 40 Pattern pattern; 41 String filename; 42 boolean useBrowser; 43 boolean textOnly; 44 45 transient File file; 46 transient RecordTransformer records; 47 transient boolean noFields; 48 49 public ExtractAction (Pattern pattern, boolean useBrowser, String filename, boolean textOnly) { 50 this.pattern = pattern; 51 this.filename = filename; 52 this.useBrowser = useBrowser; 53 this.textOnly = textOnly; 54 } 55 56 public boolean equals (Object object) { 57 if (! (object instanceof ExtractAction)) 58 return false; 59 ExtractAction a = (ExtractAction)object; 60 return same (a.filename, filename) 61 && a.useBrowser == useBrowser 62 && a.pattern.equals (pattern) 63 && a.textOnly == textOnly; 64 } 65 66 private boolean same (String s1, String s2) { 67 if (s1 == null || s2 == null) 68 return s1 == s2; 69 else 70 return s1.equals (s2); 71 } 72 73 public Pattern getPattern () { 74 return pattern; 75 } 76 public boolean getUseBrowser () { 77 return useBrowser; 78 } 79 public String getFilename () { 80 return filename; 81 } 82 public boolean getTextOnly () { 83 return textOnly; 84 } 85 86 public void connected (Crawler crawler) { 87 crawler.addCrawlListener (this); 88 } 89 90 public void disconnected (Crawler crawler) { 91 crawler.removeCrawlListener (this); 92 } 93 94 private void showit () { 95 Browser browser = Context.getBrowser(); 96 if (browser != null) 97 browser.show (file); 98 } 99 100 public synchronized void visit (Page page) { 101 try { 102 int n = 0; 103 104 PatternMatcher m = pattern.match (page); 105 for (Region r = m.nextMatch(); r != null; r = m.nextMatch()) { 106 Object [] fields; 107 if (noFields) { 108 fields = new Object [1]; 109 fields[0] = r; 110 } 111 else 112 fields = (Object [])r.getFields (Pattern.groups); 113 114 records.writeRecord (fields, textOnly); 115 ++n; 116 } 117 118 if (n > 0) 119 records.flush (); 120 } catch (IOException e) { 121 throw new RuntimeException (e.toString()); 122 } 123 } 124 125 128 public synchronized void started (CrawlEvent event){ 129 if (records == null) { 130 try { 131 file = (filename != null) 132 ? new File (filename) 133 : Access.getAccess ().makeTemporaryFile ("extract", ".html"); 134 135 records = new RecordTransformer (file.toString()); 136 137 String [] fieldNames = pattern.getFieldNames (); 138 noFields = (fieldNames.length == 0); 139 records.setProlog (records.getProlog () 140 + makeTableHeader (fieldNames)); 141 } catch (IOException e) { 142 System.err.println (e); } 144 } 145 } 146 147 private String makeTableHeader (String [] fieldNames) { 148 String result = "<TR>\n<TH>\n"; 149 if (fieldNames.length == 0) 150 result += "<TH>\n"; 151 else 152 for (int i=0; i<fieldNames.length; ++i) 153 result += "<TH>" + fieldNames[i] + "\n"; 154 return result; 155 } 156 157 160 public synchronized void stopped (CrawlEvent event){ 161 try { 162 if (records != null) { 163 records.close (); 164 records = null; 165 if (useBrowser) 166 showit (); 167 } 168 } catch (IOException e) { 169 System.err.println (e); } 171 } 172 173 176 public synchronized void cleared (CrawlEvent event){ 177 try { 178 if (records != null) { 179 records.close (); 180 records = null; 181 if (useBrowser) 182 showit (); 183 } 184 } catch (IOException e) { 185 System.err.println (e); } 187 } 188 189 192 public synchronized void timedOut (CrawlEvent event){ 193 try { 194 records.close (); 195 records = null; 196 if (useBrowser) 197 showit (); 198 } catch (IOException e) { 199 System.err.println (e); } 201 } 202 203 206 public synchronized void paused (CrawlEvent event){ 207 try { 208 records.flush (); 209 if (useBrowser) 210 showit (); 211 } catch (IOException e) { 212 System.err.println (e); } 214 } 215 } 216 | Popular Tags |