KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > syndication > io > impl > XmlFixerReader


1 /*
2  * Copyright 2005 Sun Microsystems, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */

17 package com.sun.syndication.io.impl;
18
19 import java.io.IOException JavaDoc;
20 import java.io.Reader JavaDoc;
21 import java.io.InputStreamReader JavaDoc;
22 import java.io.BufferedReader JavaDoc;
23 import java.util.HashMap JavaDoc;
24 import java.util.Map JavaDoc;
25 import java.util.regex.Pattern JavaDoc;
26 import java.util.regex.Matcher JavaDoc;
27 import java.net.URL JavaDoc;
28
29 /**
30  * @author Alejandro Abdelnur
31  */

32 public class XmlFixerReader extends Reader JavaDoc {
33
34     public static void main(String JavaDoc[] args) throws Exception JavaDoc {
35         Reader JavaDoc r = new InputStreamReader JavaDoc(new URL JavaDoc(args[0]).openStream());
36         r = new XmlFixerReader(r);
37         BufferedReader JavaDoc br = new BufferedReader JavaDoc(r);
38         String JavaDoc l = br.readLine();
39         while (l!=null) {
40             System.out.println(l);
41             l = br.readLine();
42         }
43     }
44
45     protected Reader JavaDoc in;
46
47     public XmlFixerReader(Reader JavaDoc in) {
48         super(in);
49         this.in = in;
50         _buffer = new StringBuffer JavaDoc();
51         _state = 0;
52     }
53
54     private boolean trimmed;
55     private StringBuffer JavaDoc _buffer;
56     private int _bufferPos;
57     private int _state = 0;
58
59     private boolean trimStream() throws IOException JavaDoc {
60         boolean hasContent = true;
61         int state = 0;
62         boolean loop;
63         int c;
64         do {
65             switch (state) {
66                 case 0:
67                     c = in.read();
68                     if (c==-1) {
69                         loop = false;
70                         hasContent = false;
71                     }
72                     else
73                     if (c==' ' || c=='\n') {
74                         loop = true;
75                     }
76                     else
77                     if (c=='<') {
78                         state = 1;
79                         _buffer.setLength(0);
80                         _bufferPos = 0;
81                         _buffer.append((char)c);
82                         loop = true;
83                     }
84                     else {
85                         _buffer.setLength(0);
86                         _bufferPos = 0;
87                         _buffer.append((char)c);
88                         loop = false;
89                         hasContent = true;
90                         _state = 3;
91                     }
92                     break;
93                 case 1:
94                     c = in.read();
95                     if (c==-1) {
96                         loop = false;
97                         hasContent = true;
98                         _state = 3;
99                     }
100                     else
101                     if (c!='!') {
102                         _buffer.append((char)c);
103                         _state = 3;
104                         loop = false;
105                         hasContent = true;
106                         _state = 3;
107                     }
108                     else {
109                         _buffer.append((char)c);
110                         state = 2;
111                         loop = true;
112                     }
113                     break;
114                 case 2:
115                     c = in.read();
116                     if (c==-1) {
117                         loop = false;
118                         hasContent = true;
119                         _state = 3;
120                     }
121                     else
122                     if (c=='-') {
123                         _buffer.append((char)c);
124                         state = 3;
125                         loop = true;
126                     }
127                     else {
128                         _buffer.append((char)c);
129                         loop = false;
130                         hasContent = true;
131                         _state = 3;
132                     }
133                     break;
134                 case 3:
135                     c = in.read();
136                     if (c==-1) {
137                         loop = false;
138                         hasContent = true;
139                         _state = 3;
140                     }
141                     else
142                     if (c=='-') {
143                         _buffer.append((char)c);
144                         state = 4;
145                         loop = true;
146                     }
147                     else {
148                         _buffer.append((char)c);
149                         loop = false;
150                         hasContent = true;
151                         _state = 3;
152                     }
153                     break;
154                 case 4:
155                     c = in.read();
156                     if (c==-1) {
157                         loop = false;
158                         hasContent = true;
159                         _state = 3;
160                     }
161                     else
162                     if (c!='-') {
163                         _buffer.append((char)c);
164                         loop = true;
165                     }
166                     else {
167                         _buffer.append((char)c);
168                         state = 5;
169                         loop = true;
170                     }
171                     break;
172                 case 5:
173                     c = in.read();
174                     if (c==-1) {
175                         loop = false;
176                         hasContent = true;
177                         _state = 3;
178                     }
179                     else
180                     if (c!='-') {
181                         _buffer.append((char)c);
182                         loop = true;
183                         state = 4;
184                     }
185                     else {
186                         _buffer.append((char)c);
187                         state = 6;
188                         loop = true;
189                     }
190                     break;
191                 case 6:
192                     c = in.read();
193                     if (c==-1) {
194                         loop = false;
195                         hasContent = true;
196                         _state = 3;
197                     }
198                     else
199                     if (c!='>') {
200                         _buffer.append((char)c);
201                         loop = true;
202                         state = 4;
203                     }
204                     else {
205                         _buffer.setLength(0);
206                         state = 0;
207                         loop = true;
208                     }
209                     break;
210                 default:
211                     throw new IOException JavaDoc("It shouldn't happen");
212             }
213         } while (loop);
214         return hasContent;
215     }
216
217     public int read() throws IOException JavaDoc {
218         boolean loop;
219         if (!trimmed) { // trims XML stream
220
trimmed = true;
221             if (!trimStream()) {
222                 return -1;
223             }
224         }
225         int c;
226         do { // converts literal entities to coded entities
227
switch (_state) {
228                 case 0: // reading chars from stream
229
c = in.read();
230                     if (c>-1) {
231                         if (c=='&') {
232                             _state = 1;
233                             _buffer.setLength(0);
234                             _bufferPos = 0;
235                             _buffer.append((char)c);
236                             _state = 1;
237                             loop = true;
238                         }
239                         else {
240                             loop = false;
241                         }
242                     }
243                     else {
244                         loop = false;
245                     }
246                     break;
247                 case 1: // reading entity from stream
248
c = in.read();
249                     if (c>-1) {
250                         if (c==';') {
251                             _buffer.append((char)c);
252                             _state = 2;
253                             loop = true;
254                         }
255                         else
256                         if ((c>='a' && c<='z') || (c>='A' && c<='Z') || (c=='#') || (c>='0' && c<='9')) {
257                             _buffer.append((char)c);
258                             loop = true;
259                         }
260                         else {
261                             _state = 3;
262                             loop = true;
263                         }
264                     }
265                     else {
266                         _state = 3;
267                         loop = true;
268                     }
269                     break;
270                 case 2: // replacing entity
271
c = 0;
272                     String JavaDoc literalEntity = _buffer.toString();
273                     String JavaDoc codedEntity = (String JavaDoc) CODED_ENTITIES.get(literalEntity);
274                     if (codedEntity!=null) {
275                         _buffer.setLength(0);
276                         _buffer.append(codedEntity);
277                     } // else we leave what was in the stream
278
_state = 3;
279                     loop = true;
280                     break;
281                 case 3: // consuming buffer
282
if (_bufferPos<_buffer.length()) {
283                         c = _buffer.charAt(_bufferPos++);
284                         loop = false;
285                     }
286                     else {
287                         c = 0;
288                         _state = 0;
289                         loop = true;
290                     }
291                     break;
292                  default:
293                     throw new IOException JavaDoc("It shouldn't happen");
294             }
295         } while (loop);
296         return c;
297     }
298
299     public int read(char[] buffer,int offset,int len) throws IOException JavaDoc {
300         int charsRead = 0;
301         int c = read();
302         if (c==-1) {
303             return -1;
304         }
305         buffer[offset+(charsRead++)] = (char) c;
306         while (charsRead<len && (c=read())>-1) {
307             buffer[offset+(charsRead++)] = (char) c;
308         }
309         return charsRead;
310     }
311
312     public long skip(long n) throws IOException JavaDoc {
313         if (n==0) {
314             return 0;
315         }
316         else
317         if (n<0) {
318             throw new IllegalArgumentException JavaDoc("'n' cannot be negative");
319         }
320         int c = read();
321         long counter = 1;
322         while (c>-1 && counter<n) {
323             c = read();
324             counter++;
325         }
326         return counter;
327     }
328
329     public boolean ready() throws IOException JavaDoc {
330         return (_state!=0) || in.ready();
331     }
332
333     public boolean markSupported() {
334         return false;
335     }
336
337     public void mark(int readAheadLimit) throws IOException JavaDoc {
338         throw new IOException JavaDoc("Stream does not support mark");
339     }
340
341     public void reset() throws IOException JavaDoc {
342         throw new IOException JavaDoc("Stream does not support mark");
343     }
344
345     public void close() throws IOException JavaDoc {
346         in.close();
347     }
348
349     private static Map JavaDoc CODED_ENTITIES = new HashMap JavaDoc();
350
351     static {
352         CODED_ENTITIES.put("&nbsp;", "&#160;");
353         CODED_ENTITIES.put("&iexcl;", "&#161;");
354         CODED_ENTITIES.put("&cent;", "&#162;");
355         CODED_ENTITIES.put("&pound;", "&#163;");
356         CODED_ENTITIES.put("&curren;","&#164;");
357         CODED_ENTITIES.put("&yen;", "&#165;");
358         CODED_ENTITIES.put("&brvbar;","&#166;");
359         CODED_ENTITIES.put("&sect;", "&#167;");
360         CODED_ENTITIES.put("&uml;", "&#168;");
361         CODED_ENTITIES.put("&copy;", "&#169;");
362         CODED_ENTITIES.put("&ordf;", "&#170;");
363         CODED_ENTITIES.put("&laquo;", "&#171;");
364         CODED_ENTITIES.put("&not;", "&#172;");
365         CODED_ENTITIES.put("&shy;", "&#173;");
366         CODED_ENTITIES.put("&reg;", "&#174;");
367         CODED_ENTITIES.put("&macr;", "&#175;");
368         CODED_ENTITIES.put("&deg;", "&#176;");
369         CODED_ENTITIES.put("&plusmn;","&#177;");
370         CODED_ENTITIES.put("&sup2;", "&#178;");
371         CODED_ENTITIES.put("&sup3;", "&#179;");
372         CODED_ENTITIES.put("&acute;", "&#180;");
373         CODED_ENTITIES.put("&micro;", "&#181;");
374         CODED_ENTITIES.put("&para;", "&#182;");
375         CODED_ENTITIES.put("&middot;","&#183;");
376         CODED_ENTITIES.put("&cedil;", "&#184;");
377         CODED_ENTITIES.put("&sup1;", "&#185;");
378         CODED_ENTITIES.put("&ordm;", "&#186;");
379         CODED_ENTITIES.put("&raquo;", "&#187;");
380         CODED_ENTITIES.put("&frac14;","&#188;");
381         CODED_ENTITIES.put("&frac12;","&#189;");
382         CODED_ENTITIES.put("&frac34;","&#190;");
383         CODED_ENTITIES.put("&iquest;","&#191;");
384         CODED_ENTITIES.put("&Agrave;","&#192;");
385         CODED_ENTITIES.put("&Aacute;","&#193;");
386         CODED_ENTITIES.put("&Acirc;", "&#194;");
387         CODED_ENTITIES.put("&Atilde;","&#195;");
388         CODED_ENTITIES.put("&Auml;", "&#196;");
389         CODED_ENTITIES.put("&Aring;", "&#197;");
390         CODED_ENTITIES.put("&AElig;", "&#198;");
391         CODED_ENTITIES.put("&Ccedil;","&#199;");
392         CODED_ENTITIES.put("&Egrave;","&#200;");
393         CODED_ENTITIES.put("&Eacute;","&#201;");
394         CODED_ENTITIES.put("&Ecirc;", "&#202;");
395         CODED_ENTITIES.put("&Euml;", "&#203;");
396         CODED_ENTITIES.put("&Igrave;","&#204;");
397         CODED_ENTITIES.put("&Iacute;","&#205;");
398         CODED_ENTITIES.put("&Icirc;", "&#206;");
399         CODED_ENTITIES.put("&Iuml;", "&#207;");
400         CODED_ENTITIES.put("&ETH;", "&#208;");
401         CODED_ENTITIES.put("&Ntilde;","&#209;");
402         CODED_ENTITIES.put("&Ograve;","&#210;");
403         CODED_ENTITIES.put("&Oacute;","&#211;");
404         CODED_ENTITIES.put("&Ocirc;", "&#212;");
405         CODED_ENTITIES.put("&Otilde;","&#213;");
406         CODED_ENTITIES.put("&Ouml;", "&#214;");
407         CODED_ENTITIES.put("&times;", "&#215;");
408         CODED_ENTITIES.put("&Oslash;","&#216;");
409         CODED_ENTITIES.put("&Ugrave;","&#217;");
410         CODED_ENTITIES.put("&Uacute;","&#218;");
411         CODED_ENTITIES.put("&Ucirc;", "&#219;");
412         CODED_ENTITIES.put("&Uuml;", "&#220;");
413         CODED_ENTITIES.put("&Yacute;","&#221;");
414         CODED_ENTITIES.put("&THORN;", "&#222;");
415         CODED_ENTITIES.put("&szlig;", "&#223;");
416         CODED_ENTITIES.put("&agrave;","&#224;");
417         CODED_ENTITIES.put("&aacute;","&#225;");
418         CODED_ENTITIES.put("&acirc;", "&#226;");
419         CODED_ENTITIES.put("&atilde;","&#227;");
420         CODED_ENTITIES.put("&auml;", "&#228;");
421         CODED_ENTITIES.put("&aring;", "&#229;");
422         CODED_ENTITIES.put("&aelig;", "&#230;");
423         CODED_ENTITIES.put("&ccedil;","&#231;");
424         CODED_ENTITIES.put("&egrave;","&#232;");
425         CODED_ENTITIES.put("&eacute;","&#233;");
426         CODED_ENTITIES.put("&ecirc;", "&#234;");
427         CODED_ENTITIES.put("&euml;", "&#235;");
428         CODED_ENTITIES.put("&igrave;","&#236;");
429         CODED_ENTITIES.put("&iacute;","&#237;");
430         CODED_ENTITIES.put("&icirc;", "&#238;");
431         CODED_ENTITIES.put("&iuml;", "&#239;");
432         CODED_ENTITIES.put("&eth;", "&#240;");
433         CODED_ENTITIES.put("&ntilde;","&#241;");
434         CODED_ENTITIES.put("&ograve;","&#242;");
435         CODED_ENTITIES.put("&oacute;","&#243;");
436         CODED_ENTITIES.put("&ocirc;", "&#244;");
437         CODED_ENTITIES.put("&otilde;","&#245;");
438         CODED_ENTITIES.put("&ouml;", "&#246;");
439         CODED_ENTITIES.put("&divide;","&#247;");
440         CODED_ENTITIES.put("&oslash;","&#248;");
441         CODED_ENTITIES.put("&ugrave;","&#249;");
442         CODED_ENTITIES.put("&uacute;","&#250;");
443         CODED_ENTITIES.put("&ucirc;", "&#251;");
444         CODED_ENTITIES.put("&uuml;", "&#252;");
445         CODED_ENTITIES.put("&yacute;","&#253;");
446         CODED_ENTITIES.put("&thorn;", "&#254;");
447         CODED_ENTITIES.put("&yuml;", "&#255;");
448     }
449
450     //
451
// It shouldn't be here but well, just reusing the CODED_ENTITIES Map :)
452
//
453

454     private static Pattern JavaDoc ENTITIES_PATTERN = Pattern.compile( "&[A-Za-z^#]+;" );
455
456
457     public String JavaDoc processHtmlEntities(String JavaDoc s) {
458         if (s.indexOf('&')==-1) {
459             return s;
460         }
461         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(s.length());
462         int pos = 0;
463         while (pos<s.length()) {
464             String JavaDoc chunck = s.substring(pos);
465             Matcher JavaDoc m = ENTITIES_PATTERN.matcher(chunck);
466             if (m.find()) {
467                 int b = pos + m.start();
468                 int e = pos + m.end();
469                 if (b>pos) {
470                     sb.append(s.substring(pos,b));
471                     pos = b;
472                 }
473                 chunck = s.substring(pos,e);
474                 String JavaDoc codedEntity = (String JavaDoc) CODED_ENTITIES.get(chunck);
475                 if (codedEntity==null) {
476                     codedEntity = chunck;
477                 }
478                 sb.append(codedEntity);
479                 pos = e;
480             }
481             else {
482                 sb.append(chunck);
483                 pos += chunck.length();
484             }
485         }
486         return sb.toString();
487     }
488
489 }
490
Popular Tags