KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jaxen > saxpath > base > XPathLexer


1 /*
2  * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/saxpath/base/XPathLexer.java,v 1.10 2005/06/15 17:41:15 elharo Exp $
3  * $Revision: 1.10 $
4  * $Date: 2005/06/15 17:41:15 $
5  *
6  * ====================================================================
7  *
8  * Copyright (C) 2000-2002 bob mcwhirter & James Strachan.
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  *
15  * 1. Redistributions of source code must retain the above copyright
16  * notice, this list of conditions, and the following disclaimer.
17  *
18  * 2. Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions, and the disclaimer that follows
20  * these conditions in the documentation and/or other materials
21  * provided with the distribution.
22  *
23  * 3. The name "Jaxen" must not be used to endorse or promote products
24  * derived from this software without prior written permission. For
25  * written permission, please contact license@jaxen.org.
26  *
27  * 4. Products derived from this software may not be called "Jaxen", nor
28  * may "Jaxen" appear in their name, without prior written permission
29  * from the Jaxen Project Management (pm@jaxen.org).
30  *
31  * In addition, we request (but do not require) that you include in the
32  * end-user documentation provided with the redistribution and/or in the
33  * software itself an acknowledgement equivalent to the following:
34  * "This product includes software developed by the
35  * Jaxen Project (http://www.jaxen.org/)."
36  * Alternatively, the acknowledgment may be graphical using the logos
37  * available at http://www.jaxen.org/
38  *
39  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
40  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
41  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
42  * DISCLAIMED. IN NO EVENT SHALL THE Jaxen AUTHORS OR THE PROJECT
43  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
45  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
46  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
47  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
48  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
49  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  * ====================================================================
53  * This software consists of voluntary contributions made by many
54  * individuals on behalf of the Jaxen Project and was originally
55  * created by bob mcwhirter <bob@werken.com> and
56  * James Strachan <jstrachan@apache.org>. For more information on the
57  * Jaxen Project, please see <http://www.jaxen.org/>.
58  *
59  * $Id: XPathLexer.java,v 1.10 2005/06/15 17:41:15 elharo Exp $
60  */

61
62
63
64
65 package org.jaxen.saxpath.base;
66
67 class XPathLexer
68 {
69     private String JavaDoc xpath;
70     private int currentPosition;
71     private int endPosition;
72
73     private Token previousToken;
74
75     XPathLexer(String JavaDoc xpath)
76     {
77         setXPath( xpath );
78     }
79
80     private void setXPath(String JavaDoc xpath)
81     {
82         this.xpath = xpath;
83         this.currentPosition = 0;
84         this.endPosition = xpath.length();
85     }
86
87     String JavaDoc getXPath()
88     {
89         return this.xpath;
90     }
91
92     Token nextToken()
93     {
94         Token token = null;
95
96         do
97         {
98             token = null;
99
100             switch ( LA(1) )
101             {
102                 case '$':
103                 {
104                     token = dollar();
105                     break;
106                 }
107                     
108                 case '"':
109                 case '\'':
110                 {
111                     token = literal();
112                     break;
113                 }
114                     
115                 case '/':
116                 {
117                     token = slashes();
118                     break;
119                 }
120
121                 case ',':
122                 {
123                     token = comma();
124                     break;
125                 }
126                     
127                 case '(':
128                 {
129                     token = leftParen();
130                     break;
131                 }
132                     
133                 case ')':
134                 {
135                     token = rightParen();
136                     break;
137                 }
138                     
139                 case '[':
140                 {
141                     token = leftBracket();
142                     break;
143                 }
144                     
145                 case ']':
146                 {
147                     token = rightBracket();
148                     break;
149                 }
150                     
151                 case '+':
152                 {
153                     token = plus();
154                     break;
155                 }
156                     
157                 case '-':
158                 {
159                     token = minus();
160                     break;
161                 }
162                     
163                 case '<':
164                 case '>':
165                 {
166                     token = relationalOperator();
167                     break;
168                 }
169
170                 case '=':
171                 {
172                     token = equals();
173                     break;
174                 }
175                     
176                 case '!':
177                 {
178                     if ( LA(2) == '=' )
179                     {
180                         token = notEquals();
181                     }
182                     else
183                     {
184                         token = not();
185                     }
186                     break;
187                 }
188                     
189                 case '|':
190                 {
191                     token = pipe();
192                     break;
193                 }
194                     
195                 case '@':
196                 {
197                     token = at();
198                     break;
199                 }
200                     
201                 case ':':
202                 {
203                     if ( LA(2) == ':' )
204                     {
205                         token = doubleColon();
206                     }
207                     else
208                     {
209                         token = colon();
210                     }
211                     break;
212                 }
213                     
214                 case '*':
215                 {
216                     token = star();
217                     break;
218                 }
219                     
220                 case '.':
221                 {
222                     switch ( LA(2) )
223                     {
224                         case '0':
225                         case '1':
226                         case '2':
227                         case '3':
228                         case '4':
229                         case '5':
230                         case '6':
231                         case '7':
232                         case '8':
233                         case '9':
234                         {
235                             token = number();
236                             break;
237                         }
238                         default:
239                         {
240                             token = dots();
241                             break;
242                         }
243                     }
244                     break;
245                 }
246
247                 case '0':
248                 case '1':
249                 case '2':
250                 case '3':
251                 case '4':
252                 case '5':
253                 case '6':
254                 case '7':
255                 case '8':
256                 case '9':
257                 {
258                     token = number();
259                     break;
260                 }
261
262                 case ' ':
263                 case '\t':
264                 case '\n':
265                 case '\r':
266                 {
267                     token = whitespace();
268                     break;
269                 }
270                     
271                 default:
272                 {
273                     if ( isIdentifierStartChar( LA(1) ) )
274                     {
275                         token = identifierOrOperatorName();
276                     }
277                 }
278             }
279
280             if ( token == null )
281             {
282                 if (!hasMoreChars())
283                 {
284                     token = new Token( TokenTypes.EOF,
285                                    getXPath(),
286                                    currentPosition(),
287                                    endPosition() );
288             }
289                 else
290                 {
291                     token = new Token( TokenTypes.ERROR,
292                                    getXPath(),
293                                    currentPosition(),
294                                    endPosition() );
295                 }
296             }
297
298         }
299         while ( token.getTokenType() == TokenTypes.SKIP );
300
301         setPreviousToken( token );
302         
303         return token;
304     }
305
306     private Token identifierOrOperatorName()
307     {
308         Token token = null;
309     
310         if ( previousToken != null )
311         {
312             // For some reason, section 3.7, Lexical structure,
313
// doesn't seem to feel like it needs to mention the
314
// SLASH, DOUBLE_SLASH, and COLON tokens for the test
315
// if an NCName is an operator or not.
316
//
317
// According to section 3.7, "/foo" should be considered
318
// as a SLASH following by an OperatorName being 'foo'.
319
// Which is just simply, clearly, wrong, in my mind.
320
//
321
// -bob
322

323             switch ( previousToken.getTokenType() )
324             {
325                 case TokenTypes.AT:
326                 case TokenTypes.DOUBLE_COLON:
327                 case TokenTypes.LEFT_PAREN:
328                 case TokenTypes.LEFT_BRACKET:
329                 case TokenTypes.AND:
330                 case TokenTypes.OR:
331                 case TokenTypes.MOD:
332                 case TokenTypes.DIV:
333                 case TokenTypes.COLON:
334                 case TokenTypes.SLASH:
335                 case TokenTypes.DOUBLE_SLASH:
336                 case TokenTypes.PIPE:
337                 case TokenTypes.DOLLAR:
338                 case TokenTypes.PLUS:
339                 case TokenTypes.MINUS:
340                 case TokenTypes.STAR:
341                 case TokenTypes.COMMA:
342                 case TokenTypes.LESS_THAN_SIGN:
343                 case TokenTypes.GREATER_THAN_SIGN:
344                 case TokenTypes.LESS_THAN_OR_EQUALS_SIGN:
345                 case TokenTypes.GREATER_THAN_OR_EQUALS_SIGN:
346                 case TokenTypes.EQUALS:
347                 case TokenTypes.NOT_EQUALS:
348                 {
349                     token = identifier();
350                     break;
351                 }
352                 default:
353                 {
354                     token = operatorName();
355                     break;
356                 }
357             }
358         }
359         else
360         {
361             token = identifier();
362         }
363     
364         return token;
365     }
366     
367     private Token identifier()
368     {
369         Token token = null;
370     
371         int start = currentPosition();
372     
373         while ( hasMoreChars() )
374         {
375             if ( isIdentifierChar( LA(1) ) )
376             {
377                 consume();
378             }
379             else
380             {
381                 break;
382             }
383         }
384     
385         token = new Token( TokenTypes.IDENTIFIER,
386                            getXPath(),
387                            start,
388                            currentPosition() );
389     
390         return token;
391     }
392     
393     private Token operatorName()
394     {
395         Token token = null;
396     
397         switch ( LA(1) )
398         {
399             case 'a':
400             {
401                 token = and();
402                 break;
403             }
404     
405             case 'o':
406             {
407                 token = or();
408                 break;
409             }
410     
411             case 'm':
412             {
413                 token = mod();
414                 break;
415             }
416     
417             case 'd':
418             {
419                 token = div();
420                 break;
421             }
422         }
423     
424         return token;
425     }
426     
427     private Token mod()
428     {
429         Token token = null;
430     
431         if ( ( LA(1) == 'm' )
432              &&
433              ( LA(2) == 'o' )
434              &&
435              ( LA(3) == 'd' )
436              &&
437              ( ! isIdentifierChar( LA(4) ) ) )
438         {
439             token = new Token( TokenTypes.MOD,
440                                getXPath(),
441                                currentPosition(),
442                                currentPosition()+3 );
443     
444             consume();
445             consume();
446             consume();
447         }
448     
449         return token;
450     }
451     
452     private Token div()
453     {
454         Token token = null;
455     
456         if ( ( LA(1) == 'd' )
457              &&
458              ( LA(2) == 'i' )
459              &&
460              ( LA(3) == 'v' )
461              &&
462              ( ! isIdentifierChar( LA(4) ) ) )
463         {
464             token = new Token( TokenTypes.DIV,
465                                getXPath(),
466                                currentPosition(),
467                                currentPosition()+3 );
468     
469             consume();
470             consume();
471             consume();
472         }
473     
474         return token;
475     }
476     
477     private Token and()
478     {
479         Token token = null;
480     
481         if ( ( LA(1) == 'a' )
482              &&
483              ( LA(2) == 'n' )
484              &&
485              ( LA(3) == 'd' )
486              &&
487              ( ! isIdentifierChar( LA(4) ) ) )
488         {
489             token = new Token( TokenTypes.AND,
490                                getXPath(),
491                                currentPosition(),
492                                currentPosition()+3 );
493     
494             consume();
495             consume();
496             consume();
497         }
498     
499         return token;
500     }
501     
502     private Token or()
503     {
504         Token token = null;
505     
506         if ( ( LA(1) == 'o' )
507              &&
508              ( LA(2) == 'r' )
509              &&
510              ( ! isIdentifierChar( LA(3) ) ) )
511         {
512             token = new Token( TokenTypes.OR,
513                                getXPath(),
514                                currentPosition(),
515                                currentPosition()+2 );
516     
517             consume();
518             consume();
519         }
520     
521         return token;
522     }
523     
524     private Token number()
525     {
526         int start = currentPosition();
527         boolean periodAllowed = true;
528     
529       loop:
530         while( true )
531         {
532             switch ( LA(1) )
533             {
534                 case '.':
535                 {
536                     if ( periodAllowed )
537                     {
538                         periodAllowed = false;
539                         consume();
540                     }
541                     else
542                     {
543                         break loop;
544                     }
545                     break;
546                 }
547                 
548                 case '0':
549                 case '1':
550                 case '2':
551                 case '3':
552                 case '4':
553                 case '5':
554                 case '6':
555                 case '7':
556                 case '8':
557                 case '9':
558                 {
559                     consume();
560                     break;
561                 }
562                 default:
563                 {
564                     break loop;
565                 }
566             }
567         }
568     
569         Token token = null;
570     
571         if ( periodAllowed )
572         {
573             token = new Token( TokenTypes.INTEGER,
574                                getXPath(),
575                                start,
576                                currentPosition() );
577         }
578         else
579         {
580             token = new Token( TokenTypes.DOUBLE,
581                                getXPath(),
582                                start,
583                                currentPosition() );
584         }
585     
586         return token;
587     }
588     
589     private Token whitespace()
590     {
591         consume();
592             
593       loop:
594         while( hasMoreChars() )
595         {
596             switch ( LA(1) )
597             {
598                 case ' ':
599                 case '\t':
600                 case '\n':
601                 case '\r':
602                 {
603                     consume();
604                     break;
605                 }
606                     
607                 default:
608                 {
609                     break loop;
610                 }
611             }
612         }
613     
614         return new Token( TokenTypes.SKIP,
615                           getXPath(),
616                           0,
617                           0 );
618     }
619     
620     private Token comma()
621     {
622         Token token = new Token( TokenTypes.COMMA,
623                                  getXPath(),
624                                  currentPosition(),
625                                  currentPosition()+1 );
626     
627         consume();
628     
629         return token;
630     }
631     
632     private Token equals()
633     {
634         Token token = new Token( TokenTypes.EQUALS,
635                                  getXPath(),
636                                  currentPosition(),
637                                  currentPosition()+1 );
638     
639         consume();
640     
641         return token;
642     }
643     
644     private Token minus()
645     {
646         Token token = new Token( TokenTypes.MINUS,
647                                  getXPath(),
648                                  currentPosition(),
649                                  currentPosition()+1 );
650         consume();
651             
652         return token;
653     }
654     
655     private Token plus()
656     {
657         Token token = new Token( TokenTypes.PLUS,
658                                  getXPath(),
659                                  currentPosition(),
660                                  currentPosition()+1 );
661         consume();
662     
663         return token;
664     }
665     
666     private Token dollar()
667     {
668         Token token = new Token( TokenTypes.DOLLAR,
669                                  getXPath(),
670                                  currentPosition(),
671                                  currentPosition()+1 );
672         consume();
673     
674         return token;
675     }
676     
677     private Token pipe()
678     {
679         Token token = new Token( TokenTypes.PIPE,
680                                  getXPath(),
681                                  currentPosition(),
682                                  currentPosition()+1 );
683     
684         consume();
685     
686         return token;
687     }
688     
689     private Token at()
690     {
691         Token token = new Token( TokenTypes.AT,
692                                  getXPath(),
693                                  currentPosition(),
694                                  currentPosition()+1 );
695     
696         consume();
697     
698         return token;
699     }
700     
701     private Token colon()
702     {
703         Token token = new Token( TokenTypes.COLON,
704                                  getXPath(),
705                                  currentPosition(),
706                                  currentPosition()+1 );
707         consume();
708     
709         return token;
710     }
711     
712     private Token doubleColon()
713     {
714         Token token = new Token( TokenTypes.DOUBLE_COLON,
715                                  getXPath(),
716                                  currentPosition(),
717                                  currentPosition()+2 );
718     
719         consume();
720         consume();
721     
722         return token;
723     }
724     
725     private Token not()
726     {
727         Token token = new Token( TokenTypes.NOT,
728                                  getXPath(),
729                                  currentPosition(),
730                                  currentPosition() + 1 );
731     
732         consume();
733     
734         return token;
735     }
736     
737     private Token notEquals()
738     {
739         Token token = new Token( TokenTypes.NOT_EQUALS,
740                                  getXPath(),
741                                  currentPosition(),
742                                  currentPosition() + 2 );
743     
744         consume();
745         consume();
746     
747         return token;
748     }
749     
750     private Token relationalOperator()
751     {
752         Token token = null;
753     
754         switch ( LA(1) )
755         {
756             case '<':
757             {
758                 if ( LA(2) == '=' )
759                 {
760                     token = new Token( TokenTypes.LESS_THAN_OR_EQUALS_SIGN,
761                                        getXPath(),
762                                        currentPosition(),
763                                        currentPosition() + 2 );
764                     consume();
765                 }
766                 else
767                 {
768                     token = new Token( TokenTypes.LESS_THAN_SIGN,
769                                        getXPath(),
770                                        currentPosition(),
771                                        currentPosition() + 1);
772                 }
773     
774                 consume();
775                 break;
776             }
777             case '>':
778             {
779                 if ( LA(2) == '=' )
780                 {
781                     token = new Token( TokenTypes.GREATER_THAN_OR_EQUALS_SIGN,
782                                        getXPath(),
783                                        currentPosition(),
784                                        currentPosition() + 2 );
785                     consume();
786                 }
787                 else
788                 {
789                     token = new Token( TokenTypes.GREATER_THAN_SIGN,
790                                        getXPath(),
791                                        currentPosition(),
792                                        currentPosition() + 1 );
793                 }
794     
795                 consume();
796                 break;
797             }
798         }
799     
800         return token;
801                 
802     }
803     
804     private Token star()
805     {
806         Token token = new Token( TokenTypes.STAR,
807                                  getXPath(),
808                                  currentPosition(),
809                                  currentPosition()+1 );
810     
811         consume();
812             
813         return token;
814     }
815     
816     private Token literal()
817     {
818         Token token = null;
819     
820         char match = LA(1);
821     
822         consume();
823     
824         int start = currentPosition();
825             
826         while ( ( token == null )
827                 &&
828                 hasMoreChars() )
829         {
830             if ( LA(1) == match )
831             {
832                 token = new Token( TokenTypes.LITERAL,
833                                    getXPath(),
834                                    start,
835                                    currentPosition() );
836             }
837             consume();
838         }
839     
840         return token;
841     }
842     
843     private Token dots()
844     {
845         Token token = null;
846     
847         switch ( LA(2) )
848         {
849             case '.':
850             {
851                 token = new Token( TokenTypes.DOT_DOT,
852                                    getXPath(),
853                                    currentPosition(),
854                                    currentPosition()+2 ) ;
855                 consume();
856                 consume();
857                 break;
858             }
859             default:
860             {
861                 token = new Token( TokenTypes.DOT,
862                                    getXPath(),
863                                    currentPosition(),
864                                    currentPosition()+1 );
865                 consume();
866                 break;
867             }
868         }
869     
870         return token;
871     }
872     
873     private Token leftBracket()
874     {
875         Token token = new Token( TokenTypes.LEFT_BRACKET,
876                                  getXPath(),
877                                  currentPosition(),
878                                  currentPosition()+1 );
879     
880         consume();
881     
882         return token;
883     }
884     
885     private Token rightBracket()
886     {
887         Token token = new Token( TokenTypes.RIGHT_BRACKET,
888                                  getXPath(),
889                                  currentPosition(),
890                                  currentPosition()+1 );
891     
892         consume();
893     
894         return token;
895     }
896     
897     private Token leftParen()
898     {
899         Token token = new Token( TokenTypes.LEFT_PAREN,
900                                  getXPath(),
901                                  currentPosition(),
902                                  currentPosition()+1 );
903     
904         consume();
905     
906         return token;
907     }
908     
909     private Token rightParen()
910     {
911         Token token = new Token( TokenTypes.RIGHT_PAREN,
912                                  getXPath(),
913                                  currentPosition(),
914                                  currentPosition()+1 );
915     
916         consume();
917     
918         return token;
919     }
920     
921     private Token slashes()
922     {
923         Token token = null;
924     
925         switch ( LA(2) )
926         {
927             case '/':
928             {
929                 token = new Token( TokenTypes.DOUBLE_SLASH,
930                                    getXPath(),
931                                    currentPosition(),
932                                    currentPosition()+2 );
933                 consume();
934                 consume();
935                 break;
936             }
937             default:
938             {
939                 token = new Token( TokenTypes.SLASH,
940                                    getXPath(),
941                                    currentPosition(),
942                                    currentPosition()+1 );
943                 consume();
944             }
945         }
946     
947         return token;
948     }
949     
950     private char LA(int i)
951     {
952         if ( currentPosition + ( i - 1 ) >= endPosition() )
953         {
954             return (char) -1;
955         }
956     
957         return getXPath().charAt( currentPosition() + (i - 1) );
958     }
959     
960     private void consume()
961     {
962         ++this.currentPosition;
963     }
964     
965     private int currentPosition()
966     {
967         return this.currentPosition;
968     }
969     
970     private int endPosition()
971     {
972         return this.endPosition;
973     }
974     
975     private void setPreviousToken(Token previousToken)
976     {
977         this.previousToken = previousToken;
978     }
979     
980     private boolean hasMoreChars()
981     {
982         return currentPosition() < endPosition();
983     }
984     
985     private boolean isIdentifierChar(char c)
986     {
987         return Verifier.isXMLNCNameCharacter( c );
988     }
989     
990     private boolean isIdentifierStartChar(char c)
991     {
992         return Verifier.isXMLNCNameStartCharacter( c );
993     }
994
995 }
996
Popular Tags