KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > webharvest > runtime > processors > BaseProcessor


1 /* Copyright (c) 2006-2007, Vladimir Nikic
2     All rights reserved.
3
4     Redistribution and use of this software in source and binary forms,
5     with or without modification, are permitted provided that the following
6     conditions are met:
7
8     * Redistributions of source code must retain the above
9       copyright notice, this list of conditions and the
10       following disclaimer.
11
12     * Redistributions in binary form must reproduce the above
13       copyright notice, this list of conditions and the
14       following disclaimer in the documentation and/or other
15       materials provided with the distribution.
16
17     * The name of Web-Harvest may not be used to endorse or promote
18       products derived from this software without specific prior
19       written permission.
20
21     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31     POSSIBILITY OF SUCH DAMAGE.
32
33     You can contact Vladimir Nikic by sending e-mail to
34     nikic_vladimir@yahoo.com. Please include the word "Web-Harvest" in the
35     subject line.
36 */

37 package org.webharvest.runtime.processors;
38
39 import org.apache.log4j.Logger;
40 import org.webharvest.definition.BaseElementDef;
41 import org.webharvest.definition.IElementDef;
42 import org.webharvest.runtime.Scraper;
43 import org.webharvest.runtime.ScraperContext;
44 import org.webharvest.runtime.templaters.BaseTemplater;
45 import org.webharvest.runtime.variables.*;
46 import org.webharvest.utils.CommonUtil;
47
48 import java.io.*;
49
50 /**
51  * Base processor that contains common processor logic.
52  * All other processors extend this class.
53  */

54 abstract public class BaseProcessor {
55
56     protected static Logger log = Logger.getLogger(BaseProcessor.class);
57
58     abstract public IVariable execute(Scraper scraper, ScraperContext context);
59
60     protected BaseElementDef elementDef;
61
62     protected BaseProcessor() {
63     }
64
65     /**
66      * Base constructor - assigns element definition to the processor.
67      * @param elementDef
68      */

69     protected BaseProcessor(BaseElementDef elementDef) {
70         this.elementDef = elementDef;
71     }
72
73     /**
74      * Wrapper for the execute method. Adds controling and logging logic.
75      */

76     public IVariable run(Scraper scraper, ScraperContext context) {
77         long startTime = System.currentTimeMillis();
78
79         int runningLevel = scraper.getRunningLevel();
80
81         String JavaDoc id = (this.elementDef != null) ? BaseTemplater.execute( this.elementDef.getId(), scraper.getScriptEngine() ) : null;
82         String JavaDoc idDesc = id != null ? "[ID=" + id + "] " : "";
83         String JavaDoc indent = CommonUtil.replicate(" ", runningLevel-1);
84
85         log.info(indent + CommonUtil.getClassName(this) + " starts processing..." + idDesc);
86
87         scraper.increaseRunningLevel();
88         IVariable result = execute(scraper, context);
89         scraper.decreaseRunningLevel();
90
91         // if debug mode is true and processor ID is not null then write debugging file
92
if (scraper.isDebugMode() && id != null) {
93             writeDebugFile(result, id, scraper);
94         }
95
96         log.info(indent + CommonUtil.getClassName(this) +
97                   " processor executed in " + (System.currentTimeMillis() - startTime) + "ms." + idDesc);
98
99         return result;
100     }
101
102     protected IVariable[] executeBody(BaseElementDef elementDef, Scraper scraper, ScraperContext context) {
103         IElementDef[] defs = elementDef.getOperationDefs();
104         IVariable[] result = new IVariable[ Math.max(defs.length, 1) ]; // at least one element
105

106         if (defs.length > 0) {
107             for (int i = 0; i < defs.length; i++) {
108                 BaseProcessor processor = ProcessorResolver.createProcessor( defs[i] );
109                 result[i] = processor.run(scraper, context);
110             }
111         } else {
112             result[0] = new NodeVariable( elementDef.getBodyText() );
113         }
114
115         return result;
116     }
117
118     protected void debug(BaseElementDef elementDef, Scraper scraper, IVariable variable) {
119         String JavaDoc id = (elementDef != null) ? BaseTemplater.execute( elementDef.getId(), scraper.getScriptEngine() ) : null;
120
121         if (scraper.isDebugMode() && id != null) {
122             if (variable != null) {
123                 writeDebugFile(variable, id, scraper);
124             }
125         }
126     }
127
128     protected IVariable getBodyTextContent(BaseElementDef elementDef, Scraper scraper, ScraperContext context) {
129         if (elementDef == null) {
130             return null;
131         } else if (elementDef.hasOperations()) {
132             IVariable[] vars = executeBody(elementDef, scraper, context);
133             return Appender.appendText(vars);
134         } else {
135             return new NodeVariable(elementDef.getBodyText());
136         }
137     }
138
139     protected IVariable getBodyBinaryContent(BaseElementDef elementDef, Scraper scraper, ScraperContext context) {
140         if (elementDef == null) {
141             return null;
142         } else if (elementDef.hasOperations()) {
143             IVariable[] vars = executeBody(elementDef, scraper, context);
144             return Appender.appendBinary(vars);
145         } else {
146             return new NodeVariable(elementDef.getBodyText().getBytes());
147         }
148     }
149
150     protected IVariable getBodyListContent(BaseElementDef elementDef, Scraper scraper, ScraperContext context) {
151         IVariable[] vars = executeBody(elementDef, scraper, context);
152
153         ListVariable listVariable = new ListVariable();
154         for (int i = 0; i < vars.length; i++) {
155             if (!vars[i].isEmpty()) {
156                 listVariable.addVariable(vars[i]);
157             }
158         }
159
160         return listVariable;
161     }
162
163     public BaseElementDef getElementDef() {
164         return elementDef;
165     }
166
167     private void writeDebugFile(IVariable var, String JavaDoc processorId, Scraper scraper) {
168         byte[] data = var == null ? new byte[] {} : var.toString().getBytes();
169
170         String JavaDoc workingDir = scraper.getWorkingDir();
171         String JavaDoc dir = CommonUtil.getAbsoluteFilename(workingDir, "_debug");
172         
173         int index = 1;
174         String JavaDoc fullPath = dir + "/" + processorId + "_" + index + ".debug";
175         while ( new File(fullPath).exists() ) {
176             index++;
177             fullPath = dir + "/" + processorId + "_" + index + ".debug";
178         }
179
180         FileOutputStream out;
181         try {
182             new File(dir).mkdirs();
183             out = new FileOutputStream(fullPath, false);
184             out.write(data);
185             out.flush();
186             out.close();
187         } catch (FileNotFoundException e) {
188             e.printStackTrace();
189         } catch (IOException e) {
190             e.printStackTrace();
191         }
192     }
193
194 }
Popular Tags