KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > tomcat > util > buf > UEncoder


1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17
18 package org.apache.tomcat.util.buf;
19
20 import java.io.CharArrayWriter JavaDoc;
21 import java.io.IOException JavaDoc;
22 import java.io.Writer JavaDoc;
23 import java.util.BitSet JavaDoc;
24
25 /** Efficient implementation for encoders.
26  * This class is not thread safe - you need one encoder per thread.
27  * The encoder will save and recycle the internal objects, avoiding
28  * garbage.
29  *
30  * You can add extra characters that you want preserved, for example
31  * while encoding a URL you can add "/".
32  *
33  * @author Costin Manolache
34  */

35 public final class UEncoder {
36
37     private static org.apache.commons.logging.Log log=
38         org.apache.commons.logging.LogFactory.getLog(UEncoder.class );
39     
40     // Not static - the set may differ ( it's better than adding
41
// an extra check for "/", "+", etc
42
private BitSet JavaDoc safeChars=null;
43     private C2BConverter c2b=null;
44     private ByteChunk bb=null;
45
46     private String JavaDoc encoding="UTF8";
47     private static final int debug=0;
48     
49     public UEncoder() {
50     initSafeChars();
51     }
52
53     public void setEncoding( String JavaDoc s ) {
54     encoding=s;
55     }
56
57     public void addSafeCharacter( char c ) {
58     safeChars.set( c );
59     }
60
61
62     /** URL Encode string, using a specified encoding.
63      *
64      * @param buf The writer
65      * @param s string to be encoded
66      * @throws IOException If an I/O error occurs
67      */

68     public void urlEncode( Writer JavaDoc buf, String JavaDoc s )
69     throws IOException JavaDoc
70     {
71     if( c2b==null ) {
72         bb=new ByteChunk(16); // small enough.
73
c2b=new C2BConverter( bb, encoding );
74     }
75
76     for (int i = 0; i < s.length(); i++) {
77         int c = (int) s.charAt(i);
78         if( safeChars.get( c ) ) {
79         if( debug > 0 ) log("Safe: " + (char)c);
80         buf.write((char)c);
81         } else {
82         if( debug > 0 ) log("Unsafe: " + (char)c);
83         c2b.convert( (char)c );
84         
85         // "surrogate" - UTF is _not_ 16 bit, but 21 !!!!
86
// ( while UCS is 31 ). Amazing...
87
if (c >= 0xD800 && c <= 0xDBFF) {
88             if ( (i+1) < s.length()) {
89             int d = (int) s.charAt(i+1);
90             if (d >= 0xDC00 && d <= 0xDFFF) {
91                 if( debug > 0 ) log("Unsafe: " + c);
92                 c2b.convert( (char)d);
93                 i++;
94             }
95             }
96         }
97
98         c2b.flushBuffer();
99         
100         urlEncode( buf, bb.getBuffer(), bb.getOffset(),
101                bb.getLength() );
102         bb.recycle();
103         }
104     }
105     }
106
107     /**
108      */

109     public void urlEncode( Writer JavaDoc buf, byte bytes[], int off, int len)
110     throws IOException JavaDoc
111     {
112     for( int j=off; j< len; j++ ) {
113         buf.write( '%' );
114         char ch = Character.forDigit((bytes[j] >> 4) & 0xF, 16);
115         if( debug > 0 ) log("Encode: " + ch);
116         buf.write(ch);
117         ch = Character.forDigit(bytes[j] & 0xF, 16);
118         if( debug > 0 ) log("Encode: " + ch);
119         buf.write(ch);
120     }
121     }
122     
123     /**
124      * Utility funtion to re-encode the URL.
125      * Still has problems with charset, since UEncoder mostly
126      * ignores it.
127      */

128     public String JavaDoc encodeURL(String JavaDoc uri) {
129     String JavaDoc outUri=null;
130     try {
131         // XXX optimize - recycle, etc
132
CharArrayWriter JavaDoc out = new CharArrayWriter JavaDoc();
133         urlEncode(out, uri);
134         outUri=out.toString();
135     } catch (IOException JavaDoc iex) {
136     }
137     return outUri;
138     }
139     
140
141     // -------------------- Internal implementation --------------------
142

143     //
144
private void init() {
145     
146     }
147     
148     private void initSafeChars() {
149     safeChars=new BitSet JavaDoc(128);
150     int i;
151     for (i = 'a'; i <= 'z'; i++) {
152         safeChars.set(i);
153     }
154     for (i = 'A'; i <= 'Z'; i++) {
155         safeChars.set(i);
156     }
157     for (i = '0'; i <= '9'; i++) {
158         safeChars.set(i);
159     }
160     //safe
161
safeChars.set('$');
162     safeChars.set('-');
163     safeChars.set('_');
164     safeChars.set('.');
165
166     // Dangerous: someone may treat this as " "
167
// RFC1738 does allow it, it's not reserved
168
// safeChars.set('+');
169
//extra
170
safeChars.set('!');
171     safeChars.set('*');
172     safeChars.set('\'');
173     safeChars.set('(');
174     safeChars.set(')');
175     safeChars.set(',');
176     }
177
178     private static void log( String JavaDoc s ) {
179         if (log.isDebugEnabled())
180             log.debug("Encoder: " + s );
181     }
182 }
183
Popular Tags