ParseUtils


1   /*
2    * Copyright 2004 The Apache Software Foundation.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * $Header:$
17   */
18  package org.apache.beehive.netui.pageflow.scoping.internal;
19  
20  import java.util.Map  ;
21  import java.util.StringTokenizer  ;
22  import java.io.UnsupportedEncodingException  ;
23  
24  
25  class ParseUtils
26  {
27      //-------------------------------------------------------------------------------------------------
28      // helpers to parse the query string.  
29      
30      /**
31       * Parses an RFC1630 query string into an existing Map.
32       *
33       * @param str      Query string
34       * @param res      Map into which insert the values.
35       * @param encoding Encoding to be used for stored Strings
36       */
37      public static void parseQueryString( String   str, Map   res, String   encoding )
38      {
39  
40          // "Within the query string, the plus sign is reserved as
41          // shorthand notation for a space. Therefore, real plus signs must
42          // be encoded. This method was used to make query URIs easier to
43          // pass in systems which did not allow spaces." -- RFC 1630
44          int i = str.indexOf( '#' );
45          if ( i > 0 )
46          {
47              str = str.substring( 0, i );
48          }
49          StringTokenizer   st = new StringTokenizer  ( str.replace( '+', ' ' ), "&" );
50  
51          while ( st.hasMoreTokens() )
52          {
53              String   qp = st.nextToken();
54              String  [] pair = qp.split( "=" );  // was String[] pair = StringUtils.split(qp, '=');
55              //String s = unescape(pair[1], encoding);
56              res.put( unescape( pair[0], encoding ), unescape( pair[1], encoding ) );
57          }
58      }
59  
60      /**
61       * URI-unescapes the specified string, except for +/<space>
62       * encoding.
63       *
64       * @param str      String to be unescaped
65       * @param encoding The name of a character encoding
66       * @return Unescaped string
67       */
68      private static String   unescape( String   str, String   encoding )
69      {
70          //We cannot unescape '+' to space because '+' is allowed in the file name
71          //str = str.replace('+', ' ');
72          
73          //if the str does not contain "%", we don't need to do anything
74          if ( str.indexOf( '%' ) < 0 )
75          {
76              return str;
77          }
78  
79          if ( encoding == null || encoding.length() == 0 )
80          {
81              encoding = WLS_DEFAULT_ENCODING;
82          }
83          
84          // Do not assume String only contains ascii.  str.length() <= str.getBytes().length
85          int out = 0;
86  
87          byte[] strbytes = str.getBytes();
88          int len = strbytes.length;
89  
90          boolean foundNonAscii = false;
91          for ( int in = 0; in < len; in++, out++ )
92          {
93              if ( strbytes[in] == '%' && ( in + 2 < len ) )
94              {
95                  if ( Hex.isHexChar( strbytes[in + 1] ) &&
96                       Hex.isHexChar( strbytes[in + 2] ) )
97                  {
98                      strbytes[out] =
99                      ( byte ) ( ( Hex.hexValueOf( strbytes[in + 1] ) << 4 ) +
100                                ( Hex.hexValueOf( strbytes[in + 2] ) << 0 ) );
101                     in += 2;
102                     continue;
103                 }
104             }
105             // IE takes non-ASCII URLs. We use the default encoding
106             // if non-ASCII characters are contained in URLs.
107             if ( !foundNonAscii &&
108                  ( strbytes[in] <= 0x1f || strbytes[in] == 0x7f ) )
109             {
110                 encoding = System.getProperty( "file.encoding" );
111                 foundNonAscii = true;
112             }
113             strbytes[out] = strbytes[in];
114         }
115 
116         return newString( strbytes, 0, out, encoding );  // was:  BytesToString.newString(...)
117     }
118 
119     private static String   newString( byte b[], int offset, int length, String   enc )
120     {
121         if ( is8BitUnicodeSubset( enc ) )
122         {
123             return getString( b, offset, length );
124         }
125         try
126         {
127             return new String  ( b, offset, length, enc );
128         }
129         catch ( UnsupportedEncodingException   uee )
130         {
131             return getString( b, offset, length );
132         }
133     }
134 
135     private static boolean is8BitUnicodeSubset( String   enc )
136     {
137         return enc == null || "ISO-8859-1".equalsIgnoreCase( enc ) ||
138                "ISO8859_1".equalsIgnoreCase( enc ) || "ASCII".equalsIgnoreCase( enc );
139     }
140 
141     private static final String   WLS_DEFAULT_ENCODING = "ISO-8859-1";
142 
143     private static String   getString( byte b[], int offset, int length )
144     {
145         try
146         {
147             return new String  ( b, offset, length, WLS_DEFAULT_ENCODING );
148         }
149         catch ( UnsupportedEncodingException   uee )
150         {
151             // every JVM is supposed to support ISO-8859-1
152             throw new AssertionError  ( uee );
153         }
154     }
155 
156     static class Hex
157     {
158 
159         // this class exists only for its static methods
160         private Hex()
161         {
162         }
163 
164         public static int hexValueOf( int c )
165         {
166             if ( c >= '0' && c <= '9' )
167             {
168                 return c - '0';
169             }
170             if ( c >= 'a' && c <= 'f' )
171             {
172                 return c - 'a' + 10;
173             }
174             if ( c >= 'A' && c <= 'F' )
175             {
176                 return c - 'A' + 10;
177             }
178             return 0;
179         }
180 
181 
182         /**
183          * Test a character to see whether it is a possible hex char.
184          *
185          * @param c char (int actually) to test.
186          */
187         public static final boolean isHexChar( int c )
188         {
189             // trade space for speed !!!!
190             switch ( c )
191             {
192                 case '0':
193                 case '1':
194                 case '2':
195                 case '3':
196                 case '4':
197                 case '5':
198                 case '6':
199                 case '7':
200                 case '8':
201                 case '9':
202                 case 'a':
203                 case 'b':
204                 case 'c':
205                 case 'd':
206                 case 'e':
207                 case 'f':
208                 case 'A':
209                 case 'B':
210                 case 'C':
211                 case 'D':
212                 case 'E':
213                 case 'F':
214                     return true;
215                 default:
216                     return false;
217             }
218         }
219 
220     }
221 }
222
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags