|                                                                                                              1
 37  package org.webharvest.runtime.processors;
 38
 39  import java.util.*;
 40  import java.util.regex.Matcher
  ; 41  import java.util.regex.Pattern
  ; 42
 43  import org.webharvest.definition.RegexpDef;
 44  import org.webharvest.definition.BaseElementDef;
 45  import org.webharvest.runtime.Scraper;
 46  import org.webharvest.runtime.ScraperContext;
 47  import org.webharvest.runtime.scripting.ScriptEngine;
 48  import org.webharvest.runtime.templaters.BaseTemplater;
 49  import org.webharvest.runtime.variables.*;
 50  import org.webharvest.utils.CommonUtil;
 51  import org.webharvest.utils.Constants;
 52
 53
 56  public class RegexpProcessor extends BaseProcessor {
 57
 58      private RegexpDef regexpDef;
 59
 60      public RegexpProcessor(RegexpDef regexpDef) {
 61          super(regexpDef);
 62          this.regexpDef = regexpDef;
 63      }
 64
 65      public IVariable execute(Scraper scraper, ScraperContext context) {
 66          ScriptEngine scriptEngine = scraper.getScriptEngine();
 67
 68          BaseElementDef patternDef = regexpDef.getRegexpPatternDef();
 69          IVariable patternVar = getBodyTextContent(patternDef, scraper, context);
 70          debug(patternDef, scraper, patternVar);
 71
 72          BaseElementDef sourceDef = regexpDef.getRegexpSourceDef();
 73          IVariable source = getBodyListContent(sourceDef, scraper, context);
 74          debug(sourceDef, scraper, source);
 75
 76          String
  replace = BaseTemplater.execute( regexpDef.getReplace(), scriptEngine); 77          boolean isReplace = CommonUtil.isBooleanTrue(replace);
 78
 79          String
  maxLoopsString = BaseTemplater.execute( regexpDef.getMax(), scriptEngine); 80          double maxLoops = Constants.DEFAULT_MAX_LOOPS;
 81          if (maxLoopsString != null && !"".equals(maxLoopsString.trim())) {
 82              maxLoops = Double.parseDouble(maxLoopsString);
 83          }
 84
 85          Pattern
  pattern = Pattern.compile(patternVar.toString(), Pattern.DOTALL|Pattern.UNICODE_CASE); 86
 87          List resultList = new ArrayList();
 88
 89          List bodyList = source.toList();
 90          Iterator it = bodyList.iterator();
 91          while (it.hasNext()) {
 92              IVariable currVar = (IVariable) it.next();
 93              String
  text = currVar.toString(); 94
 95              Matcher
  matcher = pattern.matcher(text); 96              int groupCount = matcher.groupCount();
 97
 98              StringBuffer
  buffer = new StringBuffer  (); 99
 100             int index = 0;
 101             while ( matcher.find() ) {
 102                 index++;
 103
 104                                 if (maxLoops < index) {
 106                     break;
 107                 }
 108
 109                 for (int i = 0; i <= groupCount; i++) {
 110                     context.put("_"+i, new NodeVariable(matcher.group(i)));
 111                 }
 112
 113                 BaseElementDef resultDef = regexpDef.getRegexpResultDef();
 114                 IVariable result = getBodyTextContent(resultDef, scraper, context);
 115                 debug(resultDef, scraper, result);
 116
 117                 String
  currResult = (result == null) ? matcher.group(0) : result.toString(); 118                 if (isReplace) {
 119                     matcher.appendReplacement(buffer, currResult);
 120                 } else {
 121                     resultList.add(new NodeVariable(currResult));
 122                 }
 123
 124                 for (int i = 0; i <= groupCount; i++) {
 125                     context.remove("_"+i);
 126                 }
 127             }
 128
 129             if (isReplace) {
 130                 matcher.appendTail(buffer);
 131                 resultList.add(new NodeVariable(buffer.toString()));
 132             }
 133         }
 134
 135
 136         return new ListVariable(resultList);
 137     }
 138
 139 }
                                                                                                                                                                                                             |                                                                       
 
 
 
 
 
                                                                                   Popular Tags                                                                                                                                                                                              |