|                                                                                                              1   package net.sf.saxon.functions;
 2   import net.sf.saxon.expr.Expression;
 3   import net.sf.saxon.expr.StaticContext;
 4   import net.sf.saxon.expr.XPathContext;
 5   import net.sf.saxon.om.Item;
 6   import net.sf.saxon.om.SequenceIterator;
 7   import net.sf.saxon.om.EmptyIterator;
 8   import net.sf.saxon.trans.DynamicError;
 9   import net.sf.saxon.trans.StaticError;
 10  import net.sf.saxon.trans.XPathException;
 11  import net.sf.saxon.type.RegexTranslator;
 12  import net.sf.saxon.value.AtomicValue;
 13  import net.sf.saxon.value.StringValue;
 14  import net.sf.saxon.value.Value;
 15
 16  import java.util.regex.Matcher
  ; 17  import java.util.regex.Pattern
  ; 18  import java.util.regex.PatternSyntaxException
  ; 19
 20
 21
 26
 27  public class Tokenize extends SystemFunction  {
 28
 29      private Pattern
  regexp; 30
 31
 35
 36       public Expression simplify(StaticContext env) throws XPathException {
 37          Expression e = simplifyArguments(env);
 38
 39                  if (!(e instanceof Value)) {
 41              regexp = Matches.tryToCompile(argument, 1, 2);
 42                          if (regexp != null && regexp.matcher("").matches()) {
 44                  StaticError err = new StaticError(
 45                          "The regular expression must not be one that matches a zero-length string");
 46                  err.setErrorCode("FORX0003");
 47                  throw err;
 48              }
 49          }
 50
 51          return e;
 52      }
 53
 54
 57
 58      public SequenceIterator iterate(XPathContext c) throws XPathException {
 59          AtomicValue sv = (AtomicValue)argument[0].evaluateItem(c);
 60          if (sv==null) {
 61              return EmptyIterator.getInstance();
 62          };
 63          CharSequence
  input = sv.getStringValueCS(); 64          if (input.length() == 0) {
 65              return EmptyIterator.getInstance();
 66          }
 67
 68          Pattern
  re = regexp; 69          if (re == null) {
 70
 71              sv = (AtomicValue)argument[1].evaluateItem(c);
 72              CharSequence
  pattern = sv.getStringValueCS(); 73
 74              CharSequence
  flags; 75              if (argument.length==2) {
 76                  flags = "";
 77              } else {
 78                  sv = (AtomicValue)argument[2].evaluateItem(c);
 79                  flags = sv.getStringValueCS();
 80              }
 81
 82              try {
 83                  String
  javaRegex = RegexTranslator.translate(pattern, true); 84                  re = Pattern.compile(javaRegex, Matches.setFlags(flags));
 85              } catch (RegexTranslator.RegexSyntaxException err) {
 86                  throw new DynamicError(err);
 87              } catch (PatternSyntaxException
  err) { 88                  throw new DynamicError(err);
 89              }
 90
 91                          if (re.matcher("").matches()) {
 93                  throw new StaticError(
 94                          "The regular expression must not be one that matches a zero-length string");
 95              }
 96
 97          }
 98          return new TokenIterator(input, re);
 99      }
 100
 101
 102
 105
 106     public static class TokenIterator implements SequenceIterator {
 107
 108         private CharSequence
  input; 109         private Pattern
  pattern; 110         private Matcher
  matcher; 111         private CharSequence
  current; 112         private int position = 0;
 113         private int prevEnd = 0;
 114
 115
 116
 119
 120         public TokenIterator (CharSequence
  input, Pattern  pattern) { 121             this.input = input;
 122             this.pattern = pattern;
 123             matcher = pattern.matcher(input);
 124             prevEnd = 0;
 125         }
 126
 127         public Item next() {
 128             if (prevEnd < 0) {
 129                 current = null;
 130                 position = -1;
 131                 return null;
 132             }
 133
 134             if (matcher.find()) {
 135                 current = input.subSequence(prevEnd, matcher.start());
 136                 prevEnd = matcher.end();
 137             } else {
 138                 current = input.subSequence(prevEnd, input.length());
 139                 prevEnd = -1;
 140             }
 141             position++;
 142             return StringValue.makeStringValue(current);
 143         }
 144
 145         public Item current() {
 146             return (current==null ? null : StringValue.makeStringValue(current));
 147         }
 148
 149         public int position() {
 150             return position;
 151         }
 152
 153         public SequenceIterator getAnother() {
 154             return new TokenIterator(input, pattern);
 155         }
 156
 157
 166
 167         public int getProperties() {
 168             return 0;
 169         }
 170
 171     }
 172
 173
 178
 179     public static void main(String
  [] args) throws Exception  { 180         String
  in = args[0]; 181         String
  [] out = Pattern.compile(args[1]).split(in, 0); 182         System.out.println("results");
 183         for (int i=0; i<out.length; i++) {
 184             System.out.println('[' + out[i] + ']');
 185         }
 186         System.out.println("end results");
 187     }
 188
 189
 190
 191 }
 192
 193
 194
 195
                                                                                                                                                                                                             |                                                                       
 
 
 
 
 
                                                                                   Popular Tags                                                                                                                                                                                              |