KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > cocoon > components > sax > XMLByteStreamInterpreter


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.cocoon.components.sax;
17
18 import java.util.ArrayList JavaDoc;
19
20 import org.apache.avalon.excalibur.pool.Recyclable;
21 import org.apache.cocoon.xml.AbstractXMLProducer;
22 import org.xml.sax.SAXException JavaDoc;
23 import org.xml.sax.helpers.AttributesImpl JavaDoc;
24
25 /**
26  * This a simple xml compiler which takes a byte array as input.
27  *
28  * @author <a HREF="mailto:stefano@apache.org">Stefano Mazzocchi</a>
29  * @author <a HREF="mailto:cziegeler@apache.org">Carsten Ziegeler</a>
30  * @author <a HREF="mailto:tcurdt@apache.org">Torsten Curdt</a>
31  * @version CVS $Id: XMLByteStreamInterpreter.java 30932 2004-07-29 17:35:38Z vgritsenko $
32  */

33
34 public final class XMLByteStreamInterpreter extends AbstractXMLProducer implements XMLDeserializer, Recyclable {
35
36     private static final int START_DOCUMENT = 0;
37     private static final int END_DOCUMENT = 1;
38     private static final int START_PREFIX_MAPPING = 2;
39     private static final int END_PREFIX_MAPPING = 3;
40     private static final int START_ELEMENT = 4;
41     private static final int END_ELEMENT = 5;
42     private static final int CHARACTERS = 6;
43     private static final int IGNORABLE_WHITESPACE = 7;
44     private static final int PROCESSING_INSTRUCTION = 8;
45     private static final int COMMENT = 9;
46     private static final int LOCATOR = 10;
47     private static final int START_DTD = 11;
48     private static final int END_DTD = 12;
49     private static final int START_CDATA = 13;
50     private static final int END_CDATA = 14;
51     private static final int SKIPPED_ENTITY = 15;
52     private static final int START_ENTITY = 16;
53     private static final int END_ENTITY = 17;
54
55     private ArrayList JavaDoc list = new ArrayList JavaDoc();
56     private byte[] input;
57     private int currentPos;
58
59     public void recycle() {
60         super.recycle();
61         this.list.clear();
62         this.input = null;
63     }
64
65     public void deserialize(Object JavaDoc saxFragment) throws SAXException JavaDoc {
66         if (!(saxFragment instanceof byte[])) {
67             throw new SAXException JavaDoc("XMLDeserializer needs byte array for deserialization.");
68         }
69         this.list.clear();
70         this.input = (byte[])saxFragment;
71         this.currentPos = 0;
72         this.checkProlog();
73         this.parse();
74     }
75
76     private void parse() throws SAXException JavaDoc {
77         while ( currentPos < input.length) {
78             switch (this.readEvent()) {
79                 case START_DOCUMENT:
80                     contentHandler.startDocument();
81                     break;
82                 case END_DOCUMENT:
83                     contentHandler.endDocument();
84                     break;
85                 case START_PREFIX_MAPPING:
86                     contentHandler.startPrefixMapping(this.readString(), this.readString());
87                     break;
88                 case END_PREFIX_MAPPING:
89                     contentHandler.endPrefixMapping(this.readString());
90                     break;
91                 case START_ELEMENT:
92                     int attributes = this.readAttributes();
93                     AttributesImpl JavaDoc atts = new AttributesImpl JavaDoc();
94                     for (int i = 0; i < attributes; i++) {
95                         atts.addAttribute(this.readString(), this.readString(), this.readString(), this.readString(), this.readString());
96                     }
97                     contentHandler.startElement(this.readString(), this.readString(), this.readString(), atts);
98                     break;
99                 case END_ELEMENT:
100                     contentHandler.endElement(this.readString(), this.readString(), this.readString());
101                     break;
102                 case CHARACTERS:
103                     char[] chars = this.readChars();
104                     int len = chars.length;
105                     while (len > 0 && chars[len-1]==0) len--;
106                     if (len > 0) contentHandler.characters(chars, 0, len);
107                     break;
108                 case IGNORABLE_WHITESPACE:
109                     char[] spaces = this.readChars();
110                     len = spaces.length;
111                     while (len > 0 && spaces[len-1]==0) len--;
112                     if (len > 0) contentHandler.characters(spaces, 0, len);
113                     break;
114                 case PROCESSING_INSTRUCTION:
115                     contentHandler.processingInstruction(this.readString(), this.readString());
116                     break;
117                 case COMMENT:
118                     chars = this.readChars();
119                     if (this.lexicalHandler != null) {
120                         len = chars.length;
121                         while (len > 0 && chars[len-1]==0) len--;
122                         if (len > 0) lexicalHandler.comment(chars, 0, len);
123                     }
124                     break;
125                 case LOCATOR:
126                     {
127                     String JavaDoc publicId = this.readString();
128                     String JavaDoc systemId = this.readString();
129                     int lineNumber = this.read();
130                     int columnNumber = this.read();
131                     org.xml.sax.helpers.LocatorImpl JavaDoc locator = new org.xml.sax.helpers.LocatorImpl JavaDoc();
132                     locator.setPublicId(publicId);
133                     locator.setSystemId(systemId);
134                     locator.setLineNumber(lineNumber);
135                     locator.setColumnNumber(columnNumber);
136                     contentHandler.setDocumentLocator(locator);
137                     }
138                     break;
139                 case START_DTD:
140                     lexicalHandler.startDTD(this.readString(),
141                                             this.readString(),
142                                             this.readString());
143                     break;
144                 case END_DTD:
145                     lexicalHandler.endDTD();
146                     break;
147                 case START_CDATA:
148                     lexicalHandler.startCDATA();
149                     break;
150                 case END_CDATA:
151                     lexicalHandler.endCDATA();
152                     break;
153                 case SKIPPED_ENTITY:
154                     contentHandler.skippedEntity( this.readString() );
155                     break;
156                 case START_ENTITY:
157                     lexicalHandler.startEntity( this.readString() );
158                     break;
159                 case END_ENTITY:
160                     lexicalHandler.endEntity( this.readString() );
161                     break;
162                 default:
163                     throw new SAXException JavaDoc ("parsing error: event not supported.");
164             }
165         }
166     }
167
168     private void checkProlog() throws SAXException JavaDoc {
169         int valid = 0;
170         if (this.read() == 'C') valid++;
171         if (this.read() == 'X') valid++;
172         if (this.read() == 'M') valid++;
173         if (this.read() == 'L') valid++;
174         if (this.read() == 1) valid++;
175         if (this.read() == 0) valid++;
176         if (valid != 6) throw new SAXException JavaDoc("Unrecognized file format.");
177     }
178
179     protected int readEvent() throws SAXException JavaDoc {
180         return this.read();
181     }
182
183     private int readAttributes() throws SAXException JavaDoc {
184         int ch1 = this.read();
185         int ch2 = this.read();
186         return ((ch1 << 8) + (ch2 << 0));
187     }
188
189     private String JavaDoc readString() throws SAXException JavaDoc {
190         int length = this.readWord();
191         int index = length & 0x00007FFF;
192         if (length >= 0x00008000) {
193             return (String JavaDoc) list.get(index);
194         }
195         else {
196             if (length == 0x00007FFF) {
197                 length = this.readLong();
198             }
199             char[] chars = this.readChars(length);
200             int len = chars.length;
201             if (len > 0) {
202                 while (chars[len-1]==0) len--;
203             }
204             String JavaDoc str;
205             if (len == 0) {
206                 str = "";
207             } else {
208                 str = new String JavaDoc(chars, 0, len);
209             }
210             list.add(str);
211             return str;
212         }
213     }
214
215     /**
216      * The returned char array might contain any number of zero bytes
217      * at the end
218      */

219     private char[] readChars() throws SAXException JavaDoc {
220         int length = this.readWord();
221         if (length == 0x00007FFF) {
222             length = this.readLong();
223         }
224         return this.readChars(length);
225     }
226
227     private int read() throws SAXException JavaDoc {
228         if (currentPos >= input.length)
229             throw new SAXException JavaDoc("Reached end of input.");
230         return input[currentPos++] & 0xff;
231     }
232
233     /**
234      * The returned char array might contain any number of zero bytes
235      * at the end
236      */

237     private char[] readChars(int len) throws SAXException JavaDoc {
238         char[] str = new char[len];
239         byte[] bytearr = new byte[len];
240         int c, char2, char3;
241         int count = 0;
242         int i = 0;
243
244         this.readBytes(bytearr);
245
246         while (count < len) {
247             c = bytearr[count] & 0xff;
248             switch (c >> 4) {
249                 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
250                     // 0xxxxxxx
251
count++;
252                     str[i++] = (char) c;
253                     break;
254                 case 12: case 13:
255                     // 110x xxxx 10xx xxxx
256
count += 2;
257                     char2 = bytearr[count-1];
258                     str[i++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
259                     break;
260                 case 14:
261                     // 1110 xxxx 10xx xxxx 10xx xxxx
262
count += 3;
263                     char2 = bytearr[count-2];
264                     char3 = bytearr[count-1];
265                     str[i++] = ((char)(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)));
266                     break;
267                 default:
268                     // 10xx xxxx, 1111 xxxx
269
throw new SAXException JavaDoc("UTFDataFormatException");
270             }
271         }
272
273         return str;
274     }
275
276     private void readBytes(byte[] b) throws SAXException JavaDoc {
277         if (this.currentPos + b.length > this.input.length) {
278             // TC:
279
// >= prevents getting the last byte
280
// 0 1 2 3 4 input.length = 5
281
// |_ currentPos = 2
282
// b.length = 3
283
// 2 + 3 > 5 ok
284
// 2 + 3 >= 5 wrong
285
// why has this worked before?
286
throw new SAXException JavaDoc("End of input reached.");
287         }
288         System.arraycopy(this.input, this.currentPos, b, 0, b.length);
289         this.currentPos += b.length;
290     }
291
292     private int readWord() throws SAXException JavaDoc {
293         int ch1 = this.read();
294         int ch2 = this.read();
295         return ((ch1 << 8) + (ch2 << 0));
296     }
297
298     private int readLong() throws SAXException JavaDoc {
299         int ch1 = this.read();
300         int ch2 = this.read();
301         int ch3 = this.read();
302         int ch4 = this.read();
303         return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));
304     }
305 }
306
Popular Tags