KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > barracuda > plankton > xml > XMLUtil


1 /*
2  * Copyright (C) 2003 Christian Cryder [christianc@granitepeaks.com]
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  *
18  * $Id: XMLUtil.java,v 1.2 2004/02/01 05:16:31 christianc Exp $
19  */

20 package org.enhydra.barracuda.plankton.xml;
21
22
23 /**
24  * Utility functions for XML related text conversion. The functions
25  * we have in place at this time are used to convert xml based unicode
26  * text into Java based unicode and vica versa.
27  */

28 public class XMLUtil {
29     
30     private static String JavaDoc sep = System.getProperty("line.separator");
31     
32     /**
33      * Convert a String from XML unicode string. Basically, we look for
34      * anything starting with &# followed by a semicolon and convert it to
35      * the actual Java character representation
36      *
37      * @param s the String to be converted
38      * @return the converted string
39      */

40     public static String JavaDoc fromXMLUnicodeString(String JavaDoc s) {
41         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(s.length());
42         
43         char c[] = s.toCharArray();
44         int cpos = 0;
45         int spos = -1;
46         int epos = -1;
47         int mpos = s.length();
48         while (cpos<mpos) {
49             spos = s.indexOf("&#",cpos);
50             if (spos>-1) epos = s.indexOf(";", spos);
51             
52             if (spos>-1 && epos>-1) {
53                 sb.append(s.substring(cpos,spos));
54                 String JavaDoc unicode = s.substring(spos+2,epos);
55                 String JavaDoc newChar = null;
56                 try {
57                     int newi = Integer.decode("0"+unicode).intValue();
58                     char newch = (char) newi;
59                     sb.append(newch);
60                     cpos = epos+1;
61                 } catch (Exception JavaDoc e) {
62                     sb.append(s.substring(spos,spos+2));
63                     cpos = spos + 2;
64                 }
65                     
66             } else {
67                 sb.append(s.substring(cpos,mpos));
68                 cpos = mpos;
69             }
70         }
71         return sb.toString();
72     }
73         
74     /**
75      * Convert a String to legal XML unicode string. Basically, we look
76      * for special chars (&,<,>,',") and replace them with their XML
77      * equivalents. In addition, replace anything higher than ~ with the
78      * XML unicode version (&#nnn;). Note that this method is smart enough
79      * to keep track of the number of characters which need to get converted
80      * to Unicode, and if that number exceeds about 15 percent of the size
81      * of the String it'll just return the whole String blocked within a CDATA
82      * section.
83      *
84      * @param s the String to be converted
85      * @return the converted string
86      */

87     public static String JavaDoc toXMLUnicodeString(String JavaDoc s) {
88         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(s.length());
89         char c[] = s.toCharArray();
90         int max = c.length;
91         int code = -1;
92         int epos = max-1;
93         int convCntr = 0;
94         int convThreshold = max/15;
95         
96         for (int i=0; i<max; i++) {
97             code = c[i];
98             
99             //the purpose of this line is to strip out CR/LF's and replace them with separator character
100
if (code==10 || code==13) {
101                 sb.append(sep);
102                 int nextCode = (i<epos ? c[i+1] : -1);
103                 if ((code==10 && nextCode==13) || (code==13 && nextCode==10)) i++;
104                 continue;
105             }
106             
107             //leave these characters alone:
108
// a) anything between space and ~, except for &,<,>,',"
109
// b) \t, \n, \r
110
if (((code>=' ') && (code<='~') && (code!='&') && (code!='<') && (code!='>') && (code!='\'') && (code!='"')) ||
111                 (code=='\t') || (code=='\n') || (code=='\r')) {
112                 sb.append(c[i]);
113             } else {
114                 String JavaDoc uc = null;
115                 String JavaDoc hex = Integer.toHexString(c[i]);
116
117                 //this converts it correctly for IE and XML browsers
118
uc = "&#x"+hex+";";
119                 
120                 sb.append(uc);
121                 convCntr++;
122             }
123             
124             //the purpose of this is so that if we end up converting more than about 10% of the characters
125
//we just just bail and return the text in a CDATA block. This will be a more efficient use of
126
//processing and bandwidth resources...
127
// if (convThreshold>5 && convCntr>convThreshold) {
128
// return "<![CDATA["+s+"]]>";
129
// }
130
}
131         return sb.toString();
132     }
133     
134
135     /**
136      * Main method. Run this to perform a simple little test of the
137      * class conversion methods.
138      */

139     public static void main (String JavaDoc args[]) {
140         String JavaDoc target = null;
141         String JavaDoc dest = null;
142         String JavaDoc result = null;
143         
144         //FROM...
145
System.out.println ("");
146         System.out.println ("From XML to Java...");
147         
148         //shouldn't be any changes
149
target = "blah blah blah";
150         dest = target;
151         result = fromXMLUnicodeString(target);
152         System.out.println ("S/B:["+dest+"] Result:["+result+"]..."+(dest.equals(result) ? "ok" : "failed"));
153         
154         //shouldn't be any changes
155
target = "blah < blah > blah";
156         dest = target;
157         result = fromXMLUnicodeString(target);
158         System.out.println ("S/B:["+dest+"] Result:["+result+"]..."+(dest.equals(result) ? "ok" : "failed"));
159         
160         //should be 3 changes
161
target = "Test &#xa9;1 and Test&#xa9;2 and &#xa9;";
162         dest = "Test \u00a91 and Test\u00a92 and \u00a9";
163         result = fromXMLUnicodeString(target);
164         System.out.println ("S/B:["+dest+"] Result:["+result+"]..."+(dest.equals(result) ? "ok" : "failed"));
165
166         //try leading pos
167
target = "&#xa9;1 and Test&#xa9;2 and &#xa9; sdf";
168         dest = "\u00a91 and Test\u00a92 and \u00a9 sdf";
169         result = fromXMLUnicodeString(target);
170         System.out.println ("S/B:["+dest+"] Result:["+result+"]..."+(dest.equals(result) ? "ok" : "failed"));
171         
172         //try leading pos
173
target = "&#xa9;";
174         dest = "\u00a9";
175         result = fromXMLUnicodeString(target);
176         System.out.println ("S/B:["+dest+"] Result:["+result+"]..."+(dest.equals(result) ? "ok" : "failed"));
177
178         
179         
180         //TO...
181
System.out.println ("");
182         System.out.println ("From Java to XML...");
183         
184         //shouldn't be any changes
185
target = "blah blah blah";
186         dest = target;
187         result = toXMLUnicodeString(target);
188         System.out.println ("S/B:["+dest+"] Result:["+result+"]..."+(dest.equals(result) ? "ok" : "failed"));
189         
190         //should be two changes
191
target = "blah < blah > blah";
192         dest = "blah &#x3c; blah &#x3e; blah";
193         result = toXMLUnicodeString(target);
194         System.out.println ("S/B:["+dest+"] Result:["+result+"]..."+(dest.equals(result) ? "ok" : "failed"));
195         
196         //should be 3 changes
197
target = "Test \u00a91 and Test\u00a92 and \u00a9";
198         dest = "Test &#xa9;1 and Test&#xa9;2 and &#xa9;";
199         result = toXMLUnicodeString(target);
200         System.out.println ("S/B:["+dest+"] Result:["+result+"]..."+(dest.equals(result) ? "ok" : "failed"));
201
202         //try leading pos
203
target = "\u00a91 and Test\u00a92 and \u00a9 sdf";
204         dest = "&#xa9;1 and Test&#xa9;2 and &#xa9; sdf";
205         result = toXMLUnicodeString(target);
206         System.out.println ("S/B:["+dest+"] Result:["+result+"]..."+(dest.equals(result) ? "ok" : "failed"));
207         
208         //try leading pos
209
target = "\u00a9";
210         dest = "&#xa9;";
211         result = toXMLUnicodeString(target);
212         System.out.println ("S/B:["+dest+"] Result:["+result+"]..."+(dest.equals(result) ? "ok" : "failed"));
213         
214     }
215     
216 }
217
Popular Tags