KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xpath > compiler > Lexer


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 /*
17  * $Id: Lexer.java,v 1.14 2004/02/17 04:32:49 minchau Exp $
18  */

19 package org.apache.xpath.compiler;
20
21 import java.util.Vector JavaDoc;
22
23 import org.apache.xml.utils.PrefixResolver;
24 import org.apache.xpath.res.XPATHErrorResources;
25
26 /**
27  * This class is in charge of lexical processing of the XPath
28  * expression into tokens.
29  */

30 class Lexer
31 {
32
33   /**
34    * The target XPath.
35    */

36   private Compiler JavaDoc m_compiler;
37
38   /**
39    * The prefix resolver to map prefixes to namespaces in the XPath.
40    */

41   PrefixResolver m_namespaceContext;
42
43   /**
44    * The XPath processor object.
45    */

46   XPathParser m_processor;
47
48   /**
49    * This value is added to each element name in the TARGETEXTRA
50    * that is a 'target' (right-most top-level element name).
51    */

52   static final int TARGETEXTRA = 10000;
53
54   /**
55    * Ignore this, it is going away.
56    * This holds a map to the m_tokenQueue that tells where the top-level elements are.
57    * It is used for pattern matching so the m_tokenQueue can be walked backwards.
58    * Each element that is a 'target', (right-most top level element name) has
59    * TARGETEXTRA added to it.
60    *
61    */

62   private int m_patternMap[] = new int[100];
63
64   /**
65    * Ignore this, it is going away.
66    * The number of elements that m_patternMap maps;
67    */

68   private int m_patternMapSize;
69
70   /**
71    * Create a Lexer object.
72    *
73    * @param compiler The owning compiler for this lexer.
74    * @param resolver The prefix resolver for mapping qualified name prefixes
75    * to namespace URIs.
76    * @param xpathProcessor The parser that is processing strings to opcodes.
77    */

78   Lexer(Compiler JavaDoc compiler, PrefixResolver resolver,
79         XPathParser xpathProcessor)
80   {
81
82     m_compiler = compiler;
83     m_namespaceContext = resolver;
84     m_processor = xpathProcessor;
85   }
86
87   /**
88    * Walk through the expression and build a token queue, and a map of the top-level
89    * elements.
90    * @param pat XSLT Expression.
91    *
92    * @throws javax.xml.transform.TransformerException
93    */

94   void tokenize(String JavaDoc pat) throws javax.xml.transform.TransformerException JavaDoc
95   {
96     tokenize(pat, null);
97   }
98
99   /**
100    * Walk through the expression and build a token queue, and a map of the top-level
101    * elements.
102    * @param pat XSLT Expression.
103    * @param targetStrings Vector to hold Strings, may be null.
104    *
105    * @throws javax.xml.transform.TransformerException
106    */

107   void tokenize(String JavaDoc pat, Vector JavaDoc targetStrings)
108           throws javax.xml.transform.TransformerException JavaDoc
109   {
110
111     m_compiler.m_currentPattern = pat;
112     m_patternMapSize = 0;
113
114     // This needs to grow too.
115
m_compiler.m_opMap = new OpMapVector(OpMap.MAXTOKENQUEUESIZE * 5, OpMap.BLOCKTOKENQUEUESIZE * 5, OpMap.MAPINDEX_LENGTH);
116
117     int nChars = pat.length();
118     int startSubstring = -1;
119     int posOfNSSep = -1;
120     boolean isStartOfPat = true;
121     boolean isAttrName = false;
122     boolean isNum = false;
123
124     // Nesting of '[' so we can know if the given element should be
125
// counted inside the m_patternMap.
126
int nesting = 0;
127
128     // char[] chars = pat.toCharArray();
129
for (int i = 0; i < nChars; i++)
130     {
131       char c = pat.charAt(i);
132
133       switch (c)
134       {
135       case '\"' :
136       {
137         if (startSubstring != -1)
138         {
139           isNum = false;
140           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
141           isAttrName = false;
142
143           if (-1 != posOfNSSep)
144           {
145             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
146           }
147           else
148           {
149             addToTokenQueue(pat.substring(startSubstring, i));
150           }
151         }
152
153         startSubstring = i;
154
155         for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++);
156
157         if (c == '\"' && i < nChars)
158         {
159           addToTokenQueue(pat.substring(startSubstring, i + 1));
160
161           startSubstring = -1;
162         }
163         else
164         {
165           m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE,
166                             null); //"misquoted literal... expected double quote!");
167
}
168       }
169       break;
170       case '\'' :
171         if (startSubstring != -1)
172         {
173           isNum = false;
174           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
175           isAttrName = false;
176
177           if (-1 != posOfNSSep)
178           {
179             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
180           }
181           else
182           {
183             addToTokenQueue(pat.substring(startSubstring, i));
184           }
185         }
186
187         startSubstring = i;
188
189         for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++);
190
191         if (c == '\'' && i < nChars)
192         {
193           addToTokenQueue(pat.substring(startSubstring, i + 1));
194
195           startSubstring = -1;
196         }
197         else
198         {
199           m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE,
200                             null); //"misquoted literal... expected single quote!");
201
}
202         break;
203       case 0x0A :
204       case 0x0D :
205       case ' ' :
206       case '\t' :
207         if (startSubstring != -1)
208         {
209           isNum = false;
210           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
211           isAttrName = false;
212
213           if (-1 != posOfNSSep)
214           {
215             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
216           }
217           else
218           {
219             addToTokenQueue(pat.substring(startSubstring, i));
220           }
221
222           startSubstring = -1;
223         }
224         break;
225       case '@' :
226         isAttrName = true;
227
228       // fall-through on purpose
229
case '-' :
230         if ('-' == c)
231         {
232           if (!(isNum || (startSubstring == -1)))
233           {
234             break;
235           }
236
237           isNum = false;
238         }
239
240       // fall-through on purpose
241
case '(' :
242       case '[' :
243       case ')' :
244       case ']' :
245       case '|' :
246       case '/' :
247       case '*' :
248       case '+' :
249       case '=' :
250       case ',' :
251       case '\\' : // Unused at the moment
252
case '^' : // Unused at the moment
253
case '!' : // Unused at the moment
254
case '$' :
255       case '<' :
256       case '>' :
257         if (startSubstring != -1)
258         {
259           isNum = false;
260           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
261           isAttrName = false;
262
263           if (-1 != posOfNSSep)
264           {
265             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
266           }
267           else
268           {
269             addToTokenQueue(pat.substring(startSubstring, i));
270           }
271
272           startSubstring = -1;
273         }
274         else if (('/' == c) && isStartOfPat)
275         {
276           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
277         }
278         else if ('*' == c)
279         {
280           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
281           isAttrName = false;
282         }
283
284         if (0 == nesting)
285         {
286           if ('|' == c)
287           {
288             if (null != targetStrings)
289             {
290               recordTokenString(targetStrings);
291             }
292
293             isStartOfPat = true;
294           }
295         }
296
297         if ((')' == c) || (']' == c))
298         {
299           nesting--;
300         }
301         else if (('(' == c) || ('[' == c))
302         {
303           nesting++;
304         }
305
306         addToTokenQueue(pat.substring(i, i + 1));
307         break;
308       case ':' :
309         if (i>0)
310         {
311           if (posOfNSSep == (i - 1))
312           {
313             if (startSubstring != -1)
314             {
315               if (startSubstring < (i - 1))
316                 addToTokenQueue(pat.substring(startSubstring, i - 1));
317             }
318
319             isNum = false;
320             isAttrName = false;
321             startSubstring = -1;
322             posOfNSSep = -1;
323
324             addToTokenQueue(pat.substring(i - 1, i + 1));
325
326             break;
327           }
328           else
329           {
330             posOfNSSep = i;
331           }
332         }
333
334       // fall through on purpose
335
default :
336         if (-1 == startSubstring)
337         {
338           startSubstring = i;
339           isNum = Character.isDigit(c);
340         }
341         else if (isNum)
342         {
343           isNum = Character.isDigit(c);
344         }
345       }
346     }
347
348     if (startSubstring != -1)
349     {
350       isNum = false;
351       isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
352
353       if ((-1 != posOfNSSep) ||
354          ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes())))
355       {
356         posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars);
357       }
358       else
359       {
360         addToTokenQueue(pat.substring(startSubstring, nChars));
361       }
362     }
363
364     if (0 == m_compiler.getTokenQueueSize())
365     {
366       m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null); //"Empty expression!");
367
}
368     else if (null != targetStrings)
369     {
370       recordTokenString(targetStrings);
371     }
372
373     m_processor.m_queueMark = 0;
374   }
375
376   /**
377    * Record the current position on the token queue as long as
378    * this is a top-level element. Must be called before the
379    * next token is added to the m_tokenQueue.
380    *
381    * @param nesting The nesting count for the pattern element.
382    * @param isStart true if this is the start of a pattern.
383    * @param isAttrName true if we have determined that this is an attribute name.
384    *
385    * @return true if this is the start of a pattern.
386    */

387   private boolean mapPatternElemPos(int nesting, boolean isStart,
388                                     boolean isAttrName)
389   {
390
391     if (0 == nesting)
392     {
393       if(m_patternMapSize >= m_patternMap.length)
394       {
395         int patternMap[] = m_patternMap;
396         int len = m_patternMap.length;
397         m_patternMap = new int[m_patternMapSize + 100];
398         System.arraycopy(patternMap, 0, m_patternMap, 0, len);
399       }
400       if (!isStart)
401       {
402         m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA;
403       }
404       m_patternMap[m_patternMapSize] =
405         (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA;
406
407       m_patternMapSize++;
408
409       isStart = false;
410     }
411
412     return isStart;
413   }
414
415   /**
416    * Given a map pos, return the corresponding token queue pos.
417    *
418    * @param i The index in the m_patternMap.
419    *
420    * @return the token queue position.
421    */

422   private int getTokenQueuePosFromMap(int i)
423   {
424
425     int pos = m_patternMap[i];
426
427     return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos;
428   }
429
430   /**
431    * Reset token queue mark and m_token to a
432    * given position.
433    * @param mark The new position.
434    */

435   private final void resetTokenMark(int mark)
436   {
437
438     int qsz = m_compiler.getTokenQueueSize();
439
440     m_processor.m_queueMark = (mark > 0)
441                               ? ((mark <= qsz) ? mark - 1 : mark) : 0;
442
443     if (m_processor.m_queueMark < qsz)
444     {
445       m_processor.m_token =
446         (String JavaDoc) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++);
447       m_processor.m_tokenChar = m_processor.m_token.charAt(0);
448     }
449     else
450     {
451       m_processor.m_token = null;
452       m_processor.m_tokenChar = 0;
453     }
454   }
455
456   /**
457    * Given a string, return the corresponding keyword token.
458    *
459    * @param key The keyword.
460    *
461    * @return An opcode value.
462    */

463   final int getKeywordToken(String JavaDoc key)
464   {
465
466     int tok;
467
468     try
469     {
470       Integer JavaDoc itok = (Integer JavaDoc) Keywords.m_keywords.get(key);
471
472       tok = (null != itok) ? itok.intValue() : 0;
473     }
474     catch (NullPointerException JavaDoc npe)
475     {
476       tok = 0;
477     }
478     catch (ClassCastException JavaDoc cce)
479     {
480       tok = 0;
481     }
482
483     return tok;
484   }
485
486   /**
487    * Record the current token in the passed vector.
488    *
489    * @param targetStrings Vector of string.
490    */

491   private void recordTokenString(Vector JavaDoc targetStrings)
492   {
493
494     int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1);
495
496     resetTokenMark(tokPos + 1);
497
498     if (m_processor.lookahead('(', 1))
499     {
500       int tok = getKeywordToken(m_processor.m_token);
501
502       switch (tok)
503       {
504       case OpCodes.NODETYPE_COMMENT :
505         targetStrings.addElement(PsuedoNames.PSEUDONAME_COMMENT);
506         break;
507       case OpCodes.NODETYPE_TEXT :
508         targetStrings.addElement(PsuedoNames.PSEUDONAME_TEXT);
509         break;
510       case OpCodes.NODETYPE_NODE :
511         targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
512         break;
513       case OpCodes.NODETYPE_ROOT :
514         targetStrings.addElement(PsuedoNames.PSEUDONAME_ROOT);
515         break;
516       case OpCodes.NODETYPE_ANYELEMENT :
517         targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
518         break;
519       case OpCodes.NODETYPE_PI :
520         targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
521         break;
522       default :
523         targetStrings.addElement(PsuedoNames.PSEUDONAME_ANY);
524       }
525     }
526     else
527     {
528       if (m_processor.tokenIs('@'))
529       {
530         tokPos++;
531
532         resetTokenMark(tokPos + 1);
533       }
534
535       if (m_processor.lookahead(':', 1))
536       {
537         tokPos += 2;
538       }
539
540       targetStrings.addElement(m_compiler.getTokenQueue().elementAt(tokPos));
541     }
542   }
543
544   /**
545    * Add a token to the token queue.
546    *
547    *
548    * @param s The token.
549    */

550   private final void addToTokenQueue(String JavaDoc s)
551   {
552     m_compiler.getTokenQueue().addElement(s);
553   }
554
555   /**
556    * When a seperator token is found, see if there's a element name or
557    * the like to map.
558    *
559    * @param pat The XPath name string.
560    * @param startSubstring The start of the name string.
561    * @param posOfNSSep The position of the namespace seperator (':').
562    * @param posOfScan The end of the name index.
563    *
564    * @throws javax.xml.transform.TransformerException
565    *
566    * @return -1 always.
567    */

568   private int mapNSTokens(String JavaDoc pat, int startSubstring, int posOfNSSep,
569                           int posOfScan)
570            throws javax.xml.transform.TransformerException JavaDoc
571  {
572
573     String JavaDoc prefix = "";
574     
575     if ((startSubstring >= 0) && (posOfNSSep >= 0))
576     {
577        prefix = pat.substring(startSubstring, posOfNSSep);
578     }
579     String JavaDoc uName;
580
581     if ((null != m_namespaceContext) &&!prefix.equals("*")
582             &&!prefix.equals("xmlns"))
583     {
584       try
585       {
586         if (prefix.length() > 0)
587           uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
588             prefix);
589         else
590         {
591
592           // Assume last was wildcard. This is not legal according
593
// to the draft. Set the below to true to make namespace
594
// wildcards work.
595
if (false)
596           {
597             addToTokenQueue(":");
598
599             String JavaDoc s = pat.substring(posOfNSSep + 1, posOfScan);
600
601             if (s.length() > 0)
602               addToTokenQueue(s);
603
604             return -1;
605           }
606           else
607           {
608             uName =
609               ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
610                 prefix);
611           }
612         }
613       }
614       catch (ClassCastException JavaDoc cce)
615       {
616         uName = m_namespaceContext.getNamespaceForPrefix(prefix);
617       }
618     }
619     else
620     {
621       uName = prefix;
622     }
623
624     if ((null != uName) && (uName.length() > 0))
625     {
626       addToTokenQueue(uName);
627       addToTokenQueue(":");
628
629       String JavaDoc s = pat.substring(posOfNSSep + 1, posOfScan);
630
631       if (s.length() > 0)
632         addToTokenQueue(s);
633     }
634     else
635     {
636
637       // error("Could not locate namespace for prefix: "+prefix);
638
m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
639                  new String JavaDoc[] {prefix}); //"Prefix must resolve to a namespace: {0}";
640

641
642       /*** Old code commented out 10-Jan-2001
643       addToTokenQueue(prefix);
644       addToTokenQueue(":");
645
646       String s = pat.substring(posOfNSSep + 1, posOfScan);
647
648       if (s.length() > 0)
649         addToTokenQueue(s);
650       ***/

651     }
652
653     return -1;
654   }
655 }
656
Popular Tags