1 package net.sf.saxon.functions; 2 import net.sf.saxon.expr.Expression; 3 import net.sf.saxon.expr.StaticContext; 4 import net.sf.saxon.expr.XPathContext; 5 import net.sf.saxon.om.Item; 6 import net.sf.saxon.om.SequenceIterator; 7 import net.sf.saxon.om.EmptyIterator; 8 import net.sf.saxon.trans.DynamicError; 9 import net.sf.saxon.trans.StaticError; 10 import net.sf.saxon.trans.XPathException; 11 import net.sf.saxon.type.RegexTranslator; 12 import net.sf.saxon.value.AtomicValue; 13 import net.sf.saxon.value.StringValue; 14 import net.sf.saxon.value.Value; 15 16 import java.util.regex.Matcher ; 17 import java.util.regex.Pattern ; 18 import java.util.regex.PatternSyntaxException ; 19 20 21 26 27 public class Tokenize extends SystemFunction { 28 29 private Pattern regexp; 30 31 35 36 public Expression simplify(StaticContext env) throws XPathException { 37 Expression e = simplifyArguments(env); 38 39 if (!(e instanceof Value)) { 41 regexp = Matches.tryToCompile(argument, 1, 2); 42 if (regexp != null && regexp.matcher("").matches()) { 44 StaticError err = new StaticError( 45 "The regular expression must not be one that matches a zero-length string"); 46 err.setErrorCode("FORX0003"); 47 throw err; 48 } 49 } 50 51 return e; 52 } 53 54 57 58 public SequenceIterator iterate(XPathContext c) throws XPathException { 59 AtomicValue sv = (AtomicValue)argument[0].evaluateItem(c); 60 if (sv==null) { 61 return EmptyIterator.getInstance(); 62 }; 63 CharSequence input = sv.getStringValueCS(); 64 if (input.length() == 0) { 65 return EmptyIterator.getInstance(); 66 } 67 68 Pattern re = regexp; 69 if (re == null) { 70 71 sv = (AtomicValue)argument[1].evaluateItem(c); 72 CharSequence pattern = sv.getStringValueCS(); 73 74 CharSequence flags; 75 if (argument.length==2) { 76 flags = ""; 77 } else { 78 sv = (AtomicValue)argument[2].evaluateItem(c); 79 flags = sv.getStringValueCS(); 80 } 81 82 try { 83 String javaRegex = RegexTranslator.translate(pattern, true); 84 re = Pattern.compile(javaRegex, Matches.setFlags(flags)); 85 } catch (RegexTranslator.RegexSyntaxException err) { 86 throw new DynamicError(err); 87 } catch (PatternSyntaxException err) { 88 throw new DynamicError(err); 89 } 90 91 if (re.matcher("").matches()) { 93 throw new StaticError( 94 "The regular expression must not be one that matches a zero-length string"); 95 } 96 97 } 98 return new TokenIterator(input, re); 99 } 100 101 102 105 106 public static class TokenIterator implements SequenceIterator { 107 108 private CharSequence input; 109 private Pattern pattern; 110 private Matcher matcher; 111 private CharSequence current; 112 private int position = 0; 113 private int prevEnd = 0; 114 115 116 119 120 public TokenIterator (CharSequence input, Pattern pattern) { 121 this.input = input; 122 this.pattern = pattern; 123 matcher = pattern.matcher(input); 124 prevEnd = 0; 125 } 126 127 public Item next() { 128 if (prevEnd < 0) { 129 current = null; 130 position = -1; 131 return null; 132 } 133 134 if (matcher.find()) { 135 current = input.subSequence(prevEnd, matcher.start()); 136 prevEnd = matcher.end(); 137 } else { 138 current = input.subSequence(prevEnd, input.length()); 139 prevEnd = -1; 140 } 141 position++; 142 return StringValue.makeStringValue(current); 143 } 144 145 public Item current() { 146 return (current==null ? null : StringValue.makeStringValue(current)); 147 } 148 149 public int position() { 150 return position; 151 } 152 153 public SequenceIterator getAnother() { 154 return new TokenIterator(input, pattern); 155 } 156 157 166 167 public int getProperties() { 168 return 0; 169 } 170 171 } 172 173 178 179 public static void main(String [] args) throws Exception { 180 String in = args[0]; 181 String [] out = Pattern.compile(args[1]).split(in, 0); 182 System.out.println("results"); 183 for (int i=0; i<out.length; i++) { 184 System.out.println('[' + out[i] + ']'); 185 } 186 System.out.println("end results"); 187 } 188 189 190 191 } 192 193 194 195 | Popular Tags |