KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jivesoftware > stringprep > IDNA


1 /**
2  * Copyright (C) 2004 Free Software Foundation, Inc.
3  *
4  * Author: Oliver Hitz
5  *
6  * This file is part of GNU Libidn.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public License
10  * as published by the Free Software Foundation; either version 2.1 of
11  * the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
21  * USA
22  */

23
24 package org.jivesoftware.stringprep;
25
26 public class IDNA {
27     public final static String JavaDoc ACE_PREFIX = "xn--";
28
29     /**
30      * Converts a Unicode string to ASCII using the procedure in RFC3490
31      * section 4.1. Unassigned characters are not allowed and STD3 ASCII
32      * rules are enforced. The input string may be a domain name
33      * containing dots.
34      *
35      * @param input Unicode string.
36      * @return Encoded string.
37      */

38     public static String JavaDoc toASCII(String JavaDoc input)
39             throws IDNAException {
40         StringBuilder JavaDoc o = new StringBuilder JavaDoc();
41         StringBuilder JavaDoc h = new StringBuilder JavaDoc();
42
43         for (int i = 0; i < input.length(); i++) {
44             char c = input.charAt(i);
45             if (c == '.' || c == '\u3002' || c == '\uff0e' || c == '\uff61') {
46                 o.append(toASCII(h.toString(), false, true));
47                 o.append(c);
48                 h = new StringBuilder JavaDoc();
49             } else {
50                 h.append(c);
51             }
52         }
53         o.append(toASCII(h.toString(), false, true));
54         return o.toString();
55     }
56
57     /**
58      * Converts a Unicode string to ASCII using the procedure in RFC3490
59      * section 4.1. Unassigned characters are not allowed and STD3 ASCII
60      * rules are enforced.
61      *
62      * @param input Unicode string.
63      * @param allowUnassigned Unassigned characters, allowed or not?
64      * @param useSTD3ASCIIRules STD3 ASCII rules, enforced or not?
65      * @return Encoded string.
66      */

67     public static String JavaDoc toASCII(String JavaDoc input, boolean allowUnassigned, boolean useSTD3ASCIIRules)
68             throws IDNAException {
69         // Step 1: Check if the string contains code points outside
70
// the ASCII range 0..0x7c.
71

72         boolean nonASCII = false;
73
74         for (int i = 0; i < input.length(); i++) {
75             int c = input.charAt(i);
76             if (c > 0x7f) {
77                 nonASCII = true;
78                 break;
79             }
80         }
81
82         // Step 2: Perform the nameprep operation.
83

84         if (nonASCII) {
85             try {
86                 input = Stringprep.nameprep(input, allowUnassigned);
87             } catch (StringprepException e) {
88                 throw new IDNAException(e);
89             }
90         }
91
92         // Step 3: - Verify the absence of non-LDH ASCII code points
93
// 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60,
94
// 0x7b..0x7f
95
// - Verify the absence of leading and trailing
96
// hyphen-minus
97

98         if (useSTD3ASCIIRules) {
99             for (int i = 0; i < input.length(); i++) {
100                 int c = input.charAt(i);
101                 if ((c <= 0x2c) ||
102                         (c >= 0x2e && c <= 0x2f) ||
103                         (c >= 0x3a && c <= 0x40) ||
104                         (c >= 0x5b && c <= 0x60) ||
105                         (c >= 0x7b && c <= 0x7f)) {
106                     throw new IDNAException(IDNAException.CONTAINS_NON_LDH);
107                 }
108             }
109
110             if (input.startsWith("-") || input.endsWith("-")) {
111                 throw new IDNAException(IDNAException.CONTAINS_HYPHEN);
112             }
113         }
114
115         // Step 4: If all code points are inside 0..0x7f, skip to step 8
116

117         nonASCII = false;
118
119         for (int i = 0; i < input.length(); i++) {
120             int c = input.charAt(i);
121             if (c > 0x7f) {
122                 nonASCII = true;
123                 break;
124             }
125         }
126
127         String JavaDoc output = input;
128
129         if (nonASCII) {
130
131             // Step 5: Verify that the sequence does not begin with the ACE prefix.
132

133             if (input.startsWith(ACE_PREFIX)) {
134                 throw new IDNAException(IDNAException.CONTAINS_ACE_PREFIX);
135             }
136
137             // Step 6: Punycode
138

139             try {
140                 output = Punycode.encode(input);
141             } catch (PunycodeException e) {
142                 throw new IDNAException(e);
143             }
144
145             // Step 7: Prepend the ACE prefix.
146

147             output = ACE_PREFIX + output;
148         }
149
150         // Step 8: Check that the length is inside 1..63.
151

152         if (output.length() < 1 || output.length() > 63) {
153             throw new IDNAException(IDNAException.TOO_LONG);
154         }
155
156         return output;
157     }
158
159     /**
160      * Converts an ASCII-encoded string to Unicode. Unassigned
161      * characters are not allowed and STD3 hostnames are enforced. Input
162      * may be domain name containing dots.
163      *
164      * @param input ASCII input string.
165      * @return Unicode string.
166      */

167     public static String JavaDoc toUnicode(String JavaDoc input) {
168         StringBuilder JavaDoc o = new StringBuilder JavaDoc();
169         StringBuilder JavaDoc h = new StringBuilder JavaDoc();
170
171         for (int i = 0; i < input.length(); i++) {
172             char c = input.charAt(i);
173             if (c == '.' || c == '\u3002' || c == '\uff0e' || c == '\uff61') {
174                 o.append(toUnicode(h.toString(), false, true));
175                 o.append(c);
176                 h = new StringBuilder JavaDoc();
177             } else {
178                 h.append(c);
179             }
180         }
181         o.append(toUnicode(h.toString(), false, true));
182         return o.toString();
183     }
184
185     /**
186      * Converts an ASCII-encoded string to Unicode.
187      *
188      * @param input ASCII input string.
189      * @param allowUnassigned Allow unassigned Unicode characters.
190      * @param useSTD3ASCIIRules Check that the output conforms to STD3.
191      * @return Unicode string.
192      */

193     public static String JavaDoc toUnicode(String JavaDoc input, boolean allowUnassigned, boolean useSTD3ASCIIRules) {
194         String JavaDoc original = input;
195         boolean nonASCII = false;
196
197         // Step 1: If all code points are inside 0..0x7f, skip to step 3.
198

199         for (int i = 0; i < input.length(); i++) {
200             int c = input.charAt(i);
201             if (c > 0x7f) {
202                 nonASCII = true;
203                 break;
204             }
205         }
206
207         // Step 2: Perform the Nameprep operation.
208

209         if (nonASCII) {
210             try {
211                 input = Stringprep.nameprep(input, allowUnassigned);
212             } catch (StringprepException e) {
213                 // ToUnicode never fails!
214
return original;
215             }
216         }
217
218         // Step 3: Verify the sequence starts with the ACE prefix.
219

220         if (!input.startsWith(ACE_PREFIX)) {
221             // ToUnicode never fails!
222
return original;
223         }
224
225         String JavaDoc stored = input;
226
227         // Step 4: Remove the ACE prefix.
228

229         input = input.substring(ACE_PREFIX.length());
230
231         // Step 5: Decode using punycode
232

233         String JavaDoc output;
234
235         try {
236             output = Punycode.decode(input);
237         } catch (PunycodeException e) {
238             // ToUnicode never fails!
239
return original;
240         }
241
242         // Step 6: Apply toASCII
243

244         String JavaDoc ascii;
245
246         try {
247             ascii = toASCII(output, allowUnassigned, useSTD3ASCIIRules);
248         } catch (IDNAException e) {
249             // ToUnicode never fails!
250
return original;
251         }
252
253         // Step 7: Compare case-insensitively.
254

255         if (!ascii.equalsIgnoreCase(stored)) {
256             // ToUnicode never fails!
257
return original;
258         }
259
260         // Step 8: Return the result.
261

262         return output;
263     }
264 }
Popular Tags