KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > nu > xom > Verifier


1 /* Copyright 2002-2004 Elliotte Rusty Harold
2    
3    This library is free software; you can redistribute it and/or modify
4    it under the terms of version 2.1 of the GNU Lesser General Public
5    License as published by the Free Software Foundation.
6    
7    This library is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10    GNU Lesser General Public License for more details.
11    
12    You should have received a copy of the GNU Lesser General Public
13    License along with this library; if not, write to the
14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15    Boston, MA 02111-1307 USA
16    
17    You can contact Elliotte Rusty Harold by sending e-mail to
18    elharo@metalab.unc.edu. Please include the word "XOM" in the
19    subject line. The XOM home page is located at http://www.xom.nu/
20 */

21
22 package nu.xom;
23
24 import java.io.DataInputStream JavaDoc;
25 import java.io.IOException JavaDoc;
26 import java.util.StringTokenizer JavaDoc;
27
28 /**
29  * <p>
30  * <code>Verifier</code> checks names and data for
31  * compliance with XML 1.0 and Namespaces in XML rules.
32  * </p>
33  *
34  * @author Elliotte Rusty Harold
35  * @version 1.0
36  *
37  */

38 final class Verifier {
39     
40     private Verifier() {}
41         
42     // constants for the bit flags in the characters lookup table
43
private final static byte XML_CHARACTER = 1;
44     private final static byte NAME_CHARACTER = 2;
45     private final static byte NAME_START_CHARACTER = 4;
46     private final static byte NCNAME_CHARACTER = 8;
47     
48     private static byte[] flags = null;
49
50     static {
51         
52         ClassLoader JavaDoc loader = Verifier.class.getClassLoader();
53         if (loader != null) loadFlags(loader);
54         // If that didn't work, try a different ClassLoader
55
if (flags == null) {
56             loader = Thread.currentThread().getContextClassLoader();
57             loadFlags(loader);
58         }
59         if (flags == null) {
60             throw new RuntimeException JavaDoc(
61                 "Verifier couldn't load the lookup table"
62             );
63         }
64         
65     }
66     
67     
68     private static void loadFlags(ClassLoader JavaDoc loader) {
69         
70         DataInputStream JavaDoc in = new DataInputStream JavaDoc(
71           loader.getResourceAsStream("nu/xom/characters.dat"));
72         if (in == null) return;
73         
74         try {
75             flags = new byte[65536];
76             in.readFully(flags);
77         }
78         catch (IOException JavaDoc ex) {
79             throw new RuntimeException JavaDoc("Broken XOM installation: "
80               + "could not load nu/xom/characters.dat");
81         }
82         finally {
83             try {
84                 in.close();
85             }
86             catch (IOException JavaDoc ex) {
87                 // no big deal
88
}
89         }
90         
91     }
92
93
94     /**
95      * <p>
96      * Check whether <code>name</code> is
97      * a non-colonized name as defined in
98      * <cite>Namespaces in XML</cite>.
99      * </p>
100      *
101      * @param name <code>String</code> name to check
102      *
103      * @throws IllegalNameException if <code>name</code> is not a
104      * non-colonized name
105      */

106     static void checkNCName(String JavaDoc name) {
107
108         if (name == null) {
109             throwIllegalNameException(name, "NCNames cannot be null");
110         }
111         
112         int length = name.length();
113         if (length == 0) {
114             throwIllegalNameException(name, "NCNames cannot be empty");
115         }
116         
117         char first = name.charAt(0);
118         if ((flags[first] & NAME_START_CHARACTER) == 0) {
119             throwIllegalNameException(name, "NCNames cannot start " +
120               "with the character " + Integer.toHexString(first));
121         }
122         
123         for (int i = 1; i < length; i++) {
124             char c = name.charAt(i);
125             if ((flags[c] & NCNAME_CHARACTER) == 0) {
126                 if (c == ':') {
127                     throwIllegalNameException(name, "NCNames cannot contain colons");
128                 }
129                 else {
130                     throwIllegalNameException(name, "0x"
131                       + Integer.toHexString(c) + " is not a legal NCName character");
132                 }
133             }
134         }
135
136     }
137
138     
139     private static void throwIllegalNameException(String JavaDoc name, String JavaDoc message) {
140         IllegalNameException ex = new IllegalNameException(message);
141         ex.setData(name);
142         throw ex;
143     }
144
145
146     private static void throwIllegalCharacterDataException(String JavaDoc data, String JavaDoc message) {
147         IllegalDataException ex = new IllegalCharacterDataException(message);
148         ex.setData(data);
149         throw ex;
150     }
151
152
153     private static void throwMalformedURIException(String JavaDoc uri, String JavaDoc message) {
154         MalformedURIException ex = new MalformedURIException(message);
155         ex.setData(uri);
156         throw ex;
157     }
158
159
160     /**
161      * <p>
162      * This methods checks whether a string contains only
163      * characters allowed by the XML 1.0 specification.
164      * </p>
165      *
166      * @param text <code>String</code> value to verify
167      *
168      * @throws IllegalCharacterDataException if <code>text</code> is
169      * not legal PCDATA
170      */

171     static void checkPCDATA(String JavaDoc text) {
172         
173         if (text == null) throwIllegalCharacterDataException(text, "Null text");
174
175         char[] data = text.toCharArray();
176         for (int i = 0, len = data.length; i < len; i++) {
177             int result = data[i];
178             if (result >= 0xD800 && result <= 0xDBFF) {
179                 try {
180                     int low = data[i+1];
181                     if (low < 0xDC00 || low > 0xDFFF) {
182                         IllegalCharacterDataException ex
183                           = new IllegalCharacterDataException("Bad surrogate pair");
184                         ex.setData(text);
185                         throw ex;
186                     }
187                     i++; // increment past low surrogate
188
}
189                 catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
190                     IllegalCharacterDataException ide
191                       = new IllegalCharacterDataException("Bad Surrogate Pair", ex);
192                     ide.setData(text);
193                     throw ide;
194                 }
195                 // all properly matched surrogate pairs are legal in PCDATA
196
} // end if
197
else if ((flags[result] & XML_CHARACTER) == 0) {
198                 throwIllegalCharacterDataException(text, "0x"
199                   + Integer.toHexString(result)
200                   + " is not allowed in XML content");
201             }
202
203         }
204
205     }
206
207     
208     /**
209      * <p>
210      * Checks a string to see if it is a syntactically correct
211      * RFC 2396/RFC 2732 URI reference. Both absolute and relative
212      * URIs are supported, as are URIs with fragment identifiers.
213      * </p>
214      *
215      * @param uri <code>String</code> containing the potential URI
216      *
217      * @throws MalformedURIException if this is not a
218      * legal URI reference
219      */

220     static void checkURIReference(String JavaDoc uri) {
221         
222         if ((uri == null) || uri.length() == 0) return;
223
224         URIUtil.ParsedURI parsed = new URIUtil.ParsedURI(uri);
225         try {
226             if (parsed.scheme != null) checkScheme(parsed.scheme);
227             if (parsed.authority != null) checkAuthority(parsed.authority);
228             checkPath(parsed.path);
229             if (parsed.fragment != null) checkFragment(parsed.fragment);
230             if (parsed.query != null) checkQuery(parsed.query);
231         }
232         catch (MalformedURIException ex) {
233             ex.setData(uri);
234             throw ex;
235         }
236         
237     }
238     
239
240     private static void checkQuery(final String JavaDoc query) {
241         
242         int length = query.length();
243         for (int i = 0; i < length; i++) {
244             char c = query.charAt(i);
245             if (c == '%') {
246                try {
247                    if (!isHexDigit(query.charAt(i+1)) || !isHexDigit(query.charAt(i+2))) {
248                        throwMalformedURIException(query,
249                          "Bad percent escape sequence");
250                    }
251                }
252                catch (StringIndexOutOfBoundsException JavaDoc ex) {
253                    throwMalformedURIException(query,
254                      "Bad percent escape sequence");
255                }
256                i += 2;
257             }
258             else if (!isQueryCharacter(c)) {
259                 throw new MalformedURIException(
260                   "Illegal query character " + c
261                 );
262             }
263         }
264         
265     }
266
267     
268     // same for fragment ID
269
private static boolean isQueryCharacter(char c) {
270         
271         switch(c) {
272             case '!': return true;
273             case '"': return false;
274             case '#': return false;
275             case '$': return true;
276             case '%': return false; // tested in checkQuery
277
case '&': return true;
278             case '\'': return true;
279             case '(': return true;
280             case ')': return true;
281             case '*': return true;
282             case '+': return true;
283             case ',': return true;
284             case '-': return true;
285             case '.': return true;
286             case '/': return true;
287             case '0': return true;
288             case '1': return true;
289             case '2': return true;
290             case '3': return true;
291             case '4': return true;
292             case '5': return true;
293             case '6': return true;
294             case '7': return true;
295             case '8': return true;
296             case '9': return true;
297             case ':': return true;
298             case ';': return true;
299             case '<': return false;
300             case '=': return true;
301             case '>': return false;
302             case '?': return true;
303             case '@': return true;
304             case 'A': return true;
305             case 'B': return true;
306             case 'C': return true;
307             case 'D': return true;
308             case 'E': return true;
309             case 'F': return true;
310             case 'G': return true;
311             case 'H': return true;
312             case 'I': return true;
313             case 'J': return true;
314             case 'K': return true;
315             case 'L': return true;
316             case 'M': return true;
317             case 'N': return true;
318             case 'O': return true;
319             case 'P': return true;
320             case 'Q': return true;
321             case 'R': return true;
322             case 'S': return true;
323             case 'T': return true;
324             case 'U': return true;
325             case 'V': return true;
326             case 'W': return true;
327             case 'X': return true;
328             case 'Y': return true;
329             case 'Z': return true;
330             case '[': return false;
331             case '\\': return false;
332             case ']': return false;
333             case '^': return false;
334             case '_': return true;
335             case '`': return false;
336             case 'a': return true;
337             case 'b': return true;
338             case 'c': return true;
339             case 'd': return true;
340             case 'e': return true;
341             case 'f': return true;
342             case 'g': return true;
343             case 'h': return true;
344             case 'i': return true;
345             case 'j': return true;
346             case 'k': return true;
347             case 'l': return true;
348             case 'm': return true;
349             case 'n': return true;
350             case 'o': return true;
351             case 'p': return true;
352             case 'q': return true;
353             case 'r': return true;
354             case 's': return true;
355             case 't': return true;
356             case 'u': return true;
357             case 'v': return true;
358             case 'w': return true;
359             case 'x': return true;
360             case 'y': return true;
361             case 'z': return true;
362             case '{': return false;
363             case '|': return false;
364             case '}': return false;
365             case '~': return true;
366         }
367         return false;
368         
369     }
370
371
372     private static void checkFragment(String JavaDoc fragment) {
373         // The BNF for fragments is the same as for query strings
374
checkQuery(fragment);
375     }
376
377     
378     // Besides the legal characters issues, a path must
379
// not contain two consecutive forward slashes
380
private static void checkPath(final String JavaDoc path) {
381         
382         int length = path.length();
383         char[] text = path.toCharArray();
384         for (int i = 0; i < length; i++) {
385             char c = text[i];
386             if (c == '/') {
387                 if (i < length-1) {
388                     if (text[i+1] == '/') {
389                         throwMalformedURIException(path,
390                           "Double slash (//) in path");
391                     }
392                 }
393             }
394             else if (c == '%') {
395                try {
396                    if (!isHexDigit(text[i+1])
397                      || !isHexDigit(text[i+2])) {
398                        throwMalformedURIException(path,
399                          "Bad percent escape sequence");
400                    }
401                }
402                catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
403                    throwMalformedURIException(path,
404                      "Bad percent escape sequence");
405                }
406                i += 2;
407             }
408             else if (!isPathCharacter(c)) {
409                 throwMalformedURIException(path,
410                   "Illegal path character " + c
411                 );
412             }
413         }
414         
415     }
416
417
418     private static void checkAuthority(String JavaDoc authority) {
419         
420         String JavaDoc userInfo = null;
421         String JavaDoc host = null;
422         String JavaDoc port = null;
423         
424         int atSign = authority.indexOf('@');
425         if (atSign != -1) {
426             userInfo = authority.substring(0, atSign);
427             authority = authority.substring(atSign+1);
428         }
429         
430         int colon = -1;
431         if (authority.startsWith("[")) {
432             colon = authority.indexOf("]:");
433             if (colon != -1) colon = colon+1;
434         }
435         else colon = authority.indexOf(':');
436         
437         if (colon != -1) {
438             host = authority.substring(0, colon);
439             port = authority.substring(colon+1);
440         }
441         else {
442             host = authority;
443         }
444         
445         if (userInfo != null) checkUserInfo(userInfo);
446         if (port != null) checkPort(port);
447         checkHost(host);
448         
449     }
450
451
452     private static void checkHost(final String JavaDoc host) {
453     
454         int length = host.length();
455         if (length == 0) return; // file URI
456

457         char[] text = host.toCharArray();
458         if (text[0] == '[') {
459             if (text[length-1] != ']') {
460                 throw new MalformedURIException("Missing closing ]");
461             }
462                             // trim [ and ] from ends of host
463
checkIP6Address(host.substring(1, length-1));
464         }
465         else {
466             if (length > 255) {
467                 throw new MalformedURIException("Host name too long: " + host);
468             }
469             
470             for (int i = 0; i < length; i++) {
471                 char c = text[i];
472                 if (c == '%') {
473                    try {
474                        if (!isHexDigit(text[i+1]) || !isHexDigit(text[i+2])) {
475                            throwMalformedURIException(host,
476                              "Bad percent escape sequence");
477                        }
478                    }
479                    catch (ArrayIndexOutOfBoundsException JavaDoc ex) {
480                        throwMalformedURIException(host,
481                          "Bad percent escape sequence");
482                    }
483                    i += 2;
484                 }
485                 else if (!isRegNameCharacter(c)) {
486                     throwMalformedURIException(host,
487                       "Illegal host character " + c
488                     );
489                 }
490             }
491         }
492     }
493
494
495     private static boolean isRegNameCharacter(char c) {
496
497         switch(c) {
498             case '!': return true;
499             case '"': return false;
500             case '#': return false;
501             case '$': return true;
502             case '%': return false; // checked separately
503
case '&': return true;
504             case '\'': return true;
505             case '(': return true;
506             case ')': return true;
507             case '*': return true;
508             case '+': return true;
509             case ',': return true;
510             case '-': return true;
511             case '.': return true;
512             case '/': return false;
513             case '0': return true;
514             case '1': return true;
515             case '2': return true;
516             case '3': return true;
517             case '4': return true;
518             case '5': return true;
519             case '6': return true;
520             case '7': return true;
521             case '8': return true;
522             case '9': return true;
523             case ':': return false;
524             case ';': return true;
525             case '<': return false;
526             case '=': return true;
527             case '>': return false;
528             case '?': return false;
529             case '@': return false;
530             case 'A': return true;
531             case 'B': return true;
532             case 'C': return true;
533             case 'D': return true;
534             case 'E': return true;
535             case 'F': return true;
536             case 'G': return true;
537             case 'H': return true;
538             case 'I': return true;
539             case 'J': return true;
540             case 'K': return true;
541             case 'L': return true;
542             case 'M': return true;
543             case 'N': return true;
544             case 'O': return true;
545             case 'P': return true;
546             case 'Q': return true;
547             case 'R': return true;
548             case 'S': return true;
549             case 'T': return true;
550             case 'U': return true;
551             case 'V': return true;
552             case 'W': return true;
553             case 'X': return true;
554             case 'Y': return true;
555             case 'Z': return true;
556             case '[': return false;
557             case '\\': return false;
558             case ']': return false;
559             case '^': return false;
560             case '_': return true;
561             case '`': return false;
562             case 'a': return true;
563             case 'b': return true;
564             case 'c': return true;
565             case 'd': return true;
566             case 'e': return true;
567             case 'f': return true;
568             case 'g': return true;
569             case 'h': return true;
570             case 'i': return true;
571             case 'j': return true;
572             case 'k': return true;
573             case 'l': return true;
574             case 'm': return true;
575             case 'n': return true;
576             case 'o': return true;
577             case 'p': return true;
578             case 'q': return true;
579             case 'r': return true;
580             case 's': return true;
581             case 't': return true;
582             case 'u': return true;
583             case 'v': return true;
584             case 'w': return true;
585             case 'x': return true;
586             case 'y': return true;
587             case 'z': return true;
588             case '{': return false;
589             case '|': return false;
590             case '}': return false;
591             case '~': return true;
592         }
593         return false;
594         
595     }
596
597
598     private static void checkPort(String JavaDoc port) {
599         
600         for (int i = port.length()-1; i >= 0; i--) {
601             char c = port.charAt(i);
602             if (c < '0' || c > '9') {
603                 throw new MalformedURIException("Bad port: " + port);
604             }
605         }
606
607     }
608
609
610     private static void checkUserInfo(String JavaDoc userInfo) {
611
612         int length = userInfo.length();
613         for (int i = 0; i < length; i++) {
614             char c = userInfo.charAt(i);
615             if (c == '%') {
616                try {
617                    if (!isHexDigit(userInfo.charAt(i+1))
618                      || !isHexDigit(userInfo.charAt(i+2))) {
619                        throwMalformedURIException(userInfo,
620                          "Bad percent escape sequence");
621                    }
622                }
623                catch (StringIndexOutOfBoundsException JavaDoc ex) {
624                    throwMalformedURIException(userInfo,
625                      "Bad percent escape sequence");
626                }
627                i += 2;
628             }
629             else if (!isUserInfoCharacter(c)) {
630                 throw new MalformedURIException("Bad user info: " + userInfo);
631             }
632         }
633         
634     }
635
636
637     private static void checkScheme(String JavaDoc scheme) {
638
639         // http is probably 99% of cases so check it first
640
if ("http".equals(scheme)) return;
641         char c = scheme.charAt(0);
642         if (!isAlpha(c)) {
643             throw new MalformedURIException(
644               "Illegal initial scheme character " + c);
645         }
646         
647         for (int i = scheme.length()-1; i >= 1; i--) {
648             c = scheme.charAt(i);
649             if (!isSchemeCharacter(c)) {
650                 throw new MalformedURIException(
651                   "Illegal scheme character " + c
652                 );
653             }
654         }
655         
656     }
657
658
659     private static void checkIP6Address(String JavaDoc ip6Address) {
660
661         StringTokenizer JavaDoc st = new StringTokenizer JavaDoc(ip6Address, ":", true);
662         int numTokens = st.countTokens();
663         if (numTokens > 15 || numTokens < 2) {
664             throw new MalformedURIException(
665               "Illegal IP6 host address: " + ip6Address
666             );
667         }
668         for (int i = 0; i < numTokens; i++) {
669             String JavaDoc hexPart = st.nextToken();
670             if (":".equals(hexPart)) continue;
671             try {
672                 int part = Integer.parseInt(hexPart, 16);
673                 if (part < 0) {
674                       throw new MalformedURIException(
675                       "Illegal IP6 host address: " + ip6Address
676                     );
677                 }
678             }
679             catch (NumberFormatException JavaDoc ex) {
680                 if (i == numTokens-1) {
681                     checkIP4Address(hexPart, ip6Address);
682                 }
683                 else {
684                     throwMalformedURIException(ip6Address,
685                       "Illegal IP6 host address: " + ip6Address
686                     );
687                 }
688             }
689         }
690         
691         if (ip6Address.indexOf("::") != ip6Address.lastIndexOf("::")) {
692             throw new MalformedURIException(
693               "Illegal IP6 host address: " + ip6Address
694             );
695         }
696         
697     }
698
699     
700     private static void checkIP4Address(String JavaDoc address, String JavaDoc ip6Address) {
701
702         StringTokenizer JavaDoc st = new StringTokenizer JavaDoc(address, ".");
703         int numTokens = st.countTokens();
704         if (numTokens != 4) {
705             throw new MalformedURIException(
706               "Illegal IP6 host address: " + ip6Address
707             );
708         }
709         for (int i = 0; i < 4; i++) {
710             String JavaDoc decPart = st.nextToken();
711             try {
712                 int dec = Integer.parseInt(decPart);
713                 if (dec > 255 || dec < 0) {
714                     throw new MalformedURIException(
715                       "Illegal IP6 host address: " + ip6Address
716                     );
717                 }
718             }
719             catch (NumberFormatException JavaDoc ex) {
720                 throw new MalformedURIException(
721                   "Illegal IP6 host address: " + ip6Address
722                 );
723             }
724         }
725         
726     }
727
728     
729     static void checkXMLName(String JavaDoc name) {
730         
731         if (name == null) {
732             throwIllegalNameException(name, "XML names cannot be null");
733         }
734         
735         int length = name.length();
736         if (length == 0) {
737             throwIllegalNameException(name, "XML names cannot be empty");
738         }
739         
740         char first = name.charAt(0);
741         if ((flags[first] & NAME_START_CHARACTER) == 0) {
742             throwIllegalNameException(name, "XML names cannot start " +
743               "with the character " + Integer.toHexString(first));
744         }
745         
746         for (int i = 1; i < length; i++) {
747             char c = name.charAt(i);
748             if ((flags[c] & NAME_CHARACTER) == 0) {
749                 throwIllegalNameException(name, "0x"
750                   + Integer.toHexString(c)
751                   + " is not a legal name character");
752             }
753         }
754
755     }
756
757
758     private static boolean[] C0Table = new boolean[0x21];
759     static {
760         C0Table['\n'] = true;
761         C0Table['\r'] = true;
762         C0Table['\t'] = true;
763         C0Table[' '] = true;
764     }
765
766     
767     static boolean isXMLSpaceCharacter(char c) {
768         if (c > ' ') return false;
769         return C0Table[c];
770     }
771     
772
773     private static boolean isHexDigit(char c) {
774
775         switch(c) {
776             case '0': return true;
777             case '1': return true;
778             case '2': return true;
779             case '3': return true;
780             case '4': return true;
781             case '5': return true;
782             case '6': return true;
783             case '7': return true;
784             case '8': return true;
785             case '9': return true;
786             case ':': return false;
787             case ';': return false;
788             case '<': return false;
789             case '=': return false;
790             case '>': return false;
791             case '?': return false;
792             case '@': return false;
793             case 'A': return true;
794             case 'B': return true;
795             case 'C': return true;
796             case 'D': return true;
797             case 'E': return true;
798             case 'F': return true;
799             case 'G': return false;
800             case 'H': return false;
801             case 'I': return false;
802             case 'J': return false;
803             case 'K': return false;
804             case 'L': return false;
805             case 'M': return false;
806             case 'N': return false;
807             case 'O': return false;
808             case 'P': return false;
809             case 'Q': return false;
810             case 'R': return false;
811             case 'S': return false;
812             case 'T': return false;
813             case 'U': return false;
814             case 'V': return false;
815             case 'W': return false;
816             case 'X': return false;
817             case 'Y': return false;
818             case 'Z': return false;
819             case '[': return false;
820             case '\\': return false;
821             case ']': return false;
822             case '^': return false;
823             case '_': return false;
824             case '`': return false;
825             case 'a': return true;
826             case 'b': return true;
827             case 'c': return true;
828             case 'd': return true;
829             case 'e': return true;
830             case 'f': return true;
831         }
832         return false;
833     }
834     
835     
836     // Since namespace URIs are commonly repeated, we can save a lot
837
// of redundant code by storing the ones we've seen before.
838
private static URICache cache = new URICache();
839
840     private final static class URICache {
841      
842         private final static int LOAD = 6;
843         private String JavaDoc[] cache = new String JavaDoc[LOAD];
844         private int position = 0;
845         
846         synchronized boolean contains(String JavaDoc s) {
847             
848             for (int i = 0; i < LOAD; i++) {
849                 // Here I'm assuming the namespace URIs are interned.
850
// This is commonly but not always true. This won't
851
// break if they haven't been. Using equals() instead
852
// of == is faster when the namespace URIs haven't been
853
// interned but slower if they have.
854
if (s == cache[i]) {
855                     return true;
856                 }
857             }
858             return false;
859             
860         }
861
862         synchronized void put(String JavaDoc s) {
863             cache[position] = s;
864             position++;
865             if (position == LOAD) position = 0;
866         }
867         
868     }
869     
870     
871     /**
872      * <p>
873      * Checks a string to see if it is an RFC 2396/RFC 2732 absolute
874      * URI reference. URI references can contain fragment identifiers.
875      * Absolute URI references must have a scheme.
876      * </p>
877      *
878      * @param uri <code>String</code> to check
879      *
880      * @throws MalformedURIException if this is not a legal
881      * URI reference
882      */

883     static void checkAbsoluteURIReference(String JavaDoc uri) {
884         
885         if (cache.contains(uri)) {
886             return;
887         }
888         URIUtil.ParsedURI parsed = new URIUtil.ParsedURI(uri);
889         try {
890             if (parsed.scheme == null) {
891                 throwMalformedURIException(
892                   uri, "Missing scheme in absolute URI reference");
893             }
894             checkScheme(parsed.scheme);
895             if (parsed.authority != null) checkAuthority(parsed.authority);
896             checkPath(parsed.path);
897             if (parsed.fragment != null) checkFragment(parsed.fragment);
898             if (parsed.query != null) checkQuery(parsed.query);
899             cache.put(uri);
900         }
901         catch (MalformedURIException ex) {
902             ex.setData(uri);
903             throw ex;
904         }
905    
906     }
907
908     
909     static boolean isAlpha(char c) {
910         
911         switch(c) {
912             case 'A': return true;
913             case 'B': return true;
914             case 'C': return true;
915             case 'D': return true;
916             case 'E': return true;
917             case 'F': return true;
918             case 'G': return true;
919             case 'H': return true;
920             case 'I': return true;
921             case 'J': return true;
922             case 'K': return true;
923             case 'L': return true;
924             case 'M': return true;
925             case 'N': return true;
926             case 'O': return true;
927             case 'P': return true;
928             case 'Q': return true;
929             case 'R': return true;
930             case 'S': return true;
931             case 'T': return true;
932             case 'U': return true;
933             case 'V': return true;
934             case 'W': return true;
935             case 'X': return true;
936             case 'Y': return true;
937             case 'Z': return true;
938             case '[': return false;
939             case '\\': return false;
940             case ']': return false;
941             case '^': return false;
942             case '_': return false;
943             case '`': return false;
944             case 'a': return true;
945             case 'b': return true;
946             case 'c': return true;
947             case 'd': return true;
948             case 'e': return true;
949             case 'f': return true;
950             case 'g': return true;
951             case 'h': return true;
952             case 'i': return true;
953             case 'j': return true;
954             case 'k': return true;
955             case 'l': return true;
956             case 'm': return true;
957             case 'n': return true;
958             case 'o': return true;
959             case 'p': return true;
960             case 'q': return true;
961             case 'r': return true;
962             case 's': return true;
963             case 't': return true;
964             case 'u': return true;
965             case 'v': return true;
966             case 'w': return true;
967             case 'x': return true;
968             case 'y': return true;
969             case 'z': return true;
970         }
971         
972         return false;
973         
974     }
975     
976     
977     static boolean isSchemeCharacter(char c) {
978         
979         /* The : and the ? cannot be reached here because they'll
980          * have been parsed out separately before this method is
981          * called. They're included here strictly for alignment
982          * so the compiler will generate a table lookup.
983          */

984         switch(c) {
985             case '+': return true;
986             case ',': return false;
987             case '-': return true;
988             case '.': return true;
989             case '/': return false;
990             case '0': return true;
991             case '1': return true;
992             case '2': return true;
993             case '3': return true;
994             case '4': return true;
995             case '5': return true;
996             case '6': return true;
997             case '7': return true;
998             case '8': return true;
999             case '9': return true;
1000            case ':': return false; // unreachable
1001
case ';': return false;
1002            case '<': return false;
1003            case '=': return false;
1004            case '>': return false;
1005            case '?': return false; // unreachable
1006
case '@': return false;
1007            case 'A': return true;
1008            case 'B': return true;
1009            case 'C': return true;
1010            case 'D': return true;
1011            case 'E': return true;
1012            case 'F': return true;
1013            case 'G': return true;
1014            case 'H': return true;
1015            case 'I': return true;
1016            case 'J': return true;
1017            case 'K': return true;
1018            case 'L': return true;
1019            case 'M': return true;
1020            case 'N': return true;
1021            case 'O': return true;
1022            case 'P': return true;
1023            case 'Q': return true;
1024            case 'R': return true;
1025            case 'S': return true;
1026            case 'T': return true;
1027            case 'U': return true;
1028            case 'V': return true;
1029            case 'W': return true;
1030            case 'X': return true;
1031            case 'Y': return true;
1032            case 'Z': return true;
1033            case '[': return false;
1034            case '\\': return false;
1035            case ']': return false;
1036            case '^': return false;
1037            case '_': return false;
1038            case '`': return false;
1039            case 'a': return true;
1040            case 'b': return true;
1041            case 'c': return true;
1042            case 'd': return true;
1043            case 'e': return true;
1044            case 'f': return true;
1045            case 'g': return true;
1046            case 'h': return true;
1047            case 'i': return true;
1048            case 'j': return true;
1049            case 'k': return true;
1050            case 'l': return true;
1051            case 'm': return true;
1052            case 'n': return true;
1053            case 'o': return true;
1054            case 'p': return true;
1055            case 'q': return true;
1056            case 'r': return true;
1057            case 's': return true;
1058            case 't': return true;
1059            case 'u': return true;
1060            case 'v': return true;
1061            case 'w': return true;
1062            case 'x': return true;
1063            case 'y': return true;
1064            case 'z': return true;
1065        }
1066        
1067        return false;
1068        
1069    }
1070
1071
1072    private static boolean isPathCharacter(char c) {
1073
1074        switch(c) {
1075            case '!': return true;
1076            case '"': return false;
1077            case '#': return false;
1078            case '$': return true;
1079            case '%': return false; // checked separately
1080
case '&': return true;
1081            case '\'': return true;
1082            case '(': return true;
1083            case ')': return true;
1084            case '*': return true;
1085            case '+': return true;
1086            case ',': return true;
1087            case '-': return true;
1088            case '.': return true;
1089            case '/': return false; // handled separately
1090
case '0': return true;
1091            case '1': return true;
1092            case '2': return true;
1093            case '3': return true;
1094            case '4': return true;
1095            case '5': return true;
1096            case '6': return true;
1097            case '7': return true;
1098            case '8': return true;
1099            case '9': return true;
1100            case ':': return true;
1101            case ';': return true;
1102            case '<': return false;
1103            case '=': return true;
1104            case '>': return false;
1105            case '?': return false;
1106            case '@': return true;
1107            case 'A': return true;
1108            case 'B': return true;
1109            case 'C': return true;
1110            case 'D': return true;
1111            case 'E': return true;
1112            case 'F': return true;
1113            case 'G': return true;
1114            case 'H': return true;
1115            case 'I': return true;
1116            case 'J': return true;
1117            case 'K': return true;
1118            case 'L': return true;
1119            case 'M': return true;
1120            case 'N': return true;
1121            case 'O': return true;
1122            case 'P': return true;
1123            case 'Q': return true;
1124            case 'R': return true;
1125            case 'S': return true;
1126            case 'T': return true;
1127            case 'U': return true;
1128            case 'V': return true;
1129            case 'W': return true;
1130            case 'X': return true;
1131            case 'Y': return true;
1132            case 'Z': return true;
1133            case '[': return false;
1134            case '\\': return false;
1135            case ']': return false;
1136            case '^': return false;
1137            case '_': return true;
1138            case '`': return false;
1139            case 'a': return true;
1140            case 'b': return true;
1141            case 'c': return true;
1142            case 'd': return true;
1143            case 'e': return true;
1144            case 'f': return true;
1145            case 'g': return true;
1146            case 'h': return true;
1147            case 'i': return true;
1148            case 'j': return true;
1149            case 'k': return true;
1150            case 'l': return true;
1151            case 'm': return true;
1152            case 'n': return true;
1153            case 'o': return true;
1154            case 'p': return true;
1155            case 'q': return true;
1156            case 'r': return true;
1157            case 's': return true;
1158            case 't': return true;
1159            case 'u': return true;
1160            case 'v': return true;
1161            case 'w': return true;
1162            case 'x': return true;
1163            case 'y': return true;
1164            case 'z': return true;
1165            case '{': return false;
1166            case '|': return false;
1167            case '}': return false;
1168            case '~': return true;
1169        }
1170        
1171        return false;
1172        
1173    }
1174    
1175
1176    private static boolean isUserInfoCharacter(char c) {
1177
1178        switch(c) {
1179            case '!': return true;
1180            case '"': return false;
1181            case '#': return false;
1182            case '$': return true;
1183            case '%': return false; // checked separately
1184
case '&': return true;
1185            case '\'': return true;
1186            case '(': return true;
1187            case ')': return true;
1188            case '*': return true;
1189            case '+': return true;
1190            case ',': return true;
1191            case '-': return true;
1192            case '.': return true;
1193            case '/': return true;
1194            case '0': return true;
1195            case '1': return true;
1196            case '2': return true;
1197            case '3': return true;
1198            case '4': return true;
1199            case '5': return true;
1200            case '6': return true;
1201            case '7': return true;
1202            case '8': return true;
1203            case '9': return true;
1204            case ':': return true;
1205            case ';': return true;
1206            case '<': return false;
1207            case '=': return true;
1208            case '>': return false;
1209            case '?': return false;
1210            case '@': return false;
1211            case 'A': return true;
1212            case 'B': return true;
1213            case 'C': return true;
1214            case 'D': return true;
1215            case 'E': return true;
1216            case 'F': return true;
1217            case 'G': return true;
1218            case 'H': return true;
1219            case 'I': return true;
1220            case 'J': return true;
1221            case 'K': return true;
1222            case 'L': return true;
1223            case 'M': return true;
1224            case 'N': return true;
1225            case 'O': return true;
1226            case 'P': return true;
1227            case 'Q': return true;
1228            case 'R': return true;
1229            case 'S': return true;
1230            case 'T': return true;
1231            case 'U': return true;
1232            case 'V': return true;
1233            case 'W': return true;
1234            case 'X': return true;
1235            case 'Y': return true;
1236            case 'Z': return true;
1237            case '[': return false;
1238            case '\\': return false;
1239            case ']': return false;
1240            case '^': return false;
1241            case '_': return true;
1242            case '`': return false;
1243            case 'a': return true;
1244            case 'b': return true;
1245            case 'c': return true;
1246            case 'd': return true;
1247            case 'e': return true;
1248            case 'f': return true;
1249            case 'g': return true;
1250            case 'h': return true;
1251            case 'i': return true;
1252            case 'j': return true;
1253            case 'k': return true;
1254            case 'l': return true;
1255            case 'm': return true;
1256            case 'n': return true;
1257            case 'o': return true;
1258            case 'p': return true;
1259            case 'q': return true;
1260            case 'r': return true;
1261            case 's': return true;
1262            case 't': return true;
1263            case 'u': return true;
1264            case 'v': return true;
1265            case 'w': return true;
1266            case 'x': return true;
1267            case 'y': return true;
1268            case 'z': return true;
1269            case '{': return false;
1270            case '|': return false;
1271            case '}': return false;
1272            case '~': return true;
1273        }
1274        
1275        return false;
1276        
1277    }
1278    
1279        
1280    /**
1281     * Check to see that this string is an absolute URI,
1282     * neither a relative URI nor a URI reference.
1283     *
1284     */

1285    static void checkAbsoluteURI(String JavaDoc uri) {
1286        
1287        URIUtil.ParsedURI parsed = new URIUtil.ParsedURI(uri);
1288        try {
1289            if (parsed.scheme == null) {
1290                throwMalformedURIException(uri, "Missing scheme in absolute URI");
1291            }
1292            checkScheme(parsed.scheme);
1293            if (parsed.authority != null) checkAuthority(parsed.authority);
1294            checkPath(parsed.path);
1295            if (parsed.fragment != null) {
1296                throwMalformedURIException(uri, "URIs cannot have fragment identifiers");
1297            }
1298            if (parsed.query != null) checkQuery(parsed.query);
1299        }
1300        catch (MalformedURIException ex) {
1301            ex.setData(uri);
1302            throw ex;
1303        }
1304
1305    }
1306
1307    
1308}
Popular Tags