| 1 37 package org.webharvest.runtime.processors; 38 39 import org.apache.log4j.Logger; 40 import org.webharvest.definition.BaseElementDef; 41 import org.webharvest.definition.IElementDef; 42 import org.webharvest.runtime.Scraper; 43 import org.webharvest.runtime.ScraperContext; 44 import org.webharvest.runtime.templaters.BaseTemplater; 45 import org.webharvest.runtime.variables.*; 46 import org.webharvest.utils.CommonUtil; 47 48 import java.io.*; 49 50 54 abstract public class BaseProcessor { 55 56 protected static Logger log = Logger.getLogger(BaseProcessor.class); 57 58 abstract public IVariable execute(Scraper scraper, ScraperContext context); 59 60 protected BaseElementDef elementDef; 61 62 protected BaseProcessor() { 63 } 64 65 69 protected BaseProcessor(BaseElementDef elementDef) { 70 this.elementDef = elementDef; 71 } 72 73 76 public IVariable run(Scraper scraper, ScraperContext context) { 77 long startTime = System.currentTimeMillis(); 78 79 int runningLevel = scraper.getRunningLevel(); 80 81 String id = (this.elementDef != null) ? BaseTemplater.execute( this.elementDef.getId(), scraper.getScriptEngine() ) : null; 82 String idDesc = id != null ? "[ID=" + id + "] " : ""; 83 String indent = CommonUtil.replicate(" ", runningLevel-1); 84 85 log.info(indent + CommonUtil.getClassName(this) + " starts processing..." + idDesc); 86 87 scraper.increaseRunningLevel(); 88 IVariable result = execute(scraper, context); 89 scraper.decreaseRunningLevel(); 90 91 if (scraper.isDebugMode() && id != null) { 93 writeDebugFile(result, id, scraper); 94 } 95 96 log.info(indent + CommonUtil.getClassName(this) + 97 " processor executed in " + (System.currentTimeMillis() - startTime) + "ms." + idDesc); 98 99 return result; 100 } 101 102 protected IVariable[] executeBody(BaseElementDef elementDef, Scraper scraper, ScraperContext context) { 103 IElementDef[] defs = elementDef.getOperationDefs(); 104 IVariable[] result = new IVariable[ Math.max(defs.length, 1) ]; 106 if (defs.length > 0) { 107 for (int i = 0; i < defs.length; i++) { 108 BaseProcessor processor = ProcessorResolver.createProcessor( defs[i] ); 109 result[i] = processor.run(scraper, context); 110 } 111 } else { 112 result[0] = new NodeVariable( elementDef.getBodyText() ); 113 } 114 115 return result; 116 } 117 118 protected void debug(BaseElementDef elementDef, Scraper scraper, IVariable variable) { 119 String id = (elementDef != null) ? BaseTemplater.execute( elementDef.getId(), scraper.getScriptEngine() ) : null; 120 121 if (scraper.isDebugMode() && id != null) { 122 if (variable != null) { 123 writeDebugFile(variable, id, scraper); 124 } 125 } 126 } 127 128 protected IVariable getBodyTextContent(BaseElementDef elementDef, Scraper scraper, ScraperContext context) { 129 if (elementDef == null) { 130 return null; 131 } else if (elementDef.hasOperations()) { 132 IVariable[] vars = executeBody(elementDef, scraper, context); 133 return Appender.appendText(vars); 134 } else { 135 return new NodeVariable(elementDef.getBodyText()); 136 } 137 } 138 139 protected IVariable getBodyBinaryContent(BaseElementDef elementDef, Scraper scraper, ScraperContext context) { 140 if (elementDef == null) { 141 return null; 142 } else if (elementDef.hasOperations()) { 143 IVariable[] vars = executeBody(elementDef, scraper, context); 144 return Appender.appendBinary(vars); 145 } else { 146 return new NodeVariable(elementDef.getBodyText().getBytes()); 147 } 148 } 149 150 protected IVariable getBodyListContent(BaseElementDef elementDef, Scraper scraper, ScraperContext context) { 151 IVariable[] vars = executeBody(elementDef, scraper, context); 152 153 ListVariable listVariable = new ListVariable(); 154 for (int i = 0; i < vars.length; i++) { 155 if (!vars[i].isEmpty()) { 156 listVariable.addVariable(vars[i]); 157 } 158 } 159 160 return listVariable; 161 } 162 163 public BaseElementDef getElementDef() { 164 return elementDef; 165 } 166 167 private void writeDebugFile(IVariable var, String processorId, Scraper scraper) { 168 byte[] data = var == null ? new byte[] {} : var.toString().getBytes(); 169 170 String workingDir = scraper.getWorkingDir(); 171 String dir = CommonUtil.getAbsoluteFilename(workingDir, "_debug"); 172 173 int index = 1; 174 String fullPath = dir + "/" + processorId + "_" + index + ".debug"; 175 while ( new File(fullPath).exists() ) { 176 index++; 177 fullPath = dir + "/" + processorId + "_" + index + ".debug"; 178 } 179 180 FileOutputStream out; 181 try { 182 new File(dir).mkdirs(); 183 out = new FileOutputStream(fullPath, false); 184 out.write(data); 185 out.flush(); 186 out.close(); 187 } catch (FileNotFoundException e) { 188 e.printStackTrace(); 189 } catch (IOException e) { 190 e.printStackTrace(); 191 } 192 } 193 194 } | Popular Tags |