1 25 package org.archive.util.iterator; 26 27 import java.util.Iterator ; 28 import java.util.logging.Logger ; 29 import java.util.regex.Matcher ; 30 import java.util.regex.Pattern ; 31 32 45 public class RegexpLineIterator 46 extends TransformingIteratorWrapper<String ,String > { 47 private static final Logger logger = 48 Logger.getLogger(RegexpLineIterator.class.getName()); 49 50 public static final String COMMENT_LINE = "\\s*(#.*)?"; 51 public static final String NONWHITESPACE_ENTRY_TRAILING_COMMENT = 52 "^\\s*(\\S+)\\s*(#.*)?$"; 53 public static final String TRIMMED_ENTRY_TRAILING_COMMENT = 54 "^\\s*([^#]+?)\\s*(#.*)?$"; 55 56 public static final String ENTRY = "$1"; 57 58 protected Matcher ignoreLine = null; 59 protected Matcher extractLine = null; 60 protected String outputTemplate = null; 61 62 63 public RegexpLineIterator(Iterator <String > inner, String ignore, 64 String extract, String replace) { 65 this.inner = inner; 66 ignoreLine = Pattern.compile(ignore).matcher(""); 67 extractLine = Pattern.compile(extract).matcher(""); 68 outputTemplate = replace; 69 } 70 71 79 protected String transform(String line) { 80 ignoreLine.reset(line); 81 if(ignoreLine.matches()) { 82 return null; 83 } 84 extractLine.reset(line); 85 if(extractLine.matches()) { 86 StringBuffer output = new StringBuffer (); 87 extractLine.appendReplacement(output,outputTemplate); 90 return output.toString(); 91 } 92 logger.info("nonsense line: "+line); 94 return null; 95 } 96 } 97 | Popular Tags |