KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > modules > web > jspparser > FastOpenInfoParser


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.modules.web.jspparser;
21
22 import java.io.BufferedReader JavaDoc;
23 import java.io.ByteArrayInputStream JavaDoc;
24 import java.io.IOException JavaDoc;
25 import java.io.InputStream JavaDoc;
26 import java.io.InputStreamReader JavaDoc;
27 import java.io.StringReader JavaDoc;
28 import java.util.ArrayList JavaDoc;
29 import java.util.List JavaDoc;
30 import javax.xml.parsers.ParserConfigurationException JavaDoc;
31 import javax.xml.parsers.SAXParser JavaDoc;
32 import javax.xml.parsers.SAXParserFactory JavaDoc;
33 import org.netbeans.modules.web.jsps.parserapi.JspParserAPI;
34 import org.netbeans.modules.web.jsps.parserapi.JspParserAPI.WebModule;
35 import org.openide.filesystems.FileObject;
36 import org.xml.sax.Attributes JavaDoc;
37 import org.xml.sax.InputSource JavaDoc;
38 import org.xml.sax.Locator JavaDoc;
39 import org.xml.sax.SAXException JavaDoc;
40 import org.xml.sax.helpers.DefaultHandler JavaDoc;
41
42 import org.netbeans.modules.xml.api.EncodingUtil;
43
44 /**
45  * JSP 'open info' parser allowing to fastly determine encoding for JSPs in standart syntax
46  * with DD NOT specifying JSPs encodinf or syntax (at least 95% af all JSPs)
47  *
48  * How the encoding is currently detected:
49  * 1) find deplyment descriptor from given webmodule
50  * 2) if found, parse it and find following elements
51  * <jsp-property-group>
52  * <page-encoding>
53  * ||
54  * <is-xml>
55  * </jsp-property-group>
56  * 3) if any of the nested elements found, give it up and return null (and let jasper parser to determine the encoding)
57  * 4) if the DD is not found or it doesn't contain the elements from #2 test if the file is JSP document (according to the extension)
58  * 5) if the file is a XML document give it up (so far - we can easily implement a simple enc. parser for XMLs as well)
59  * 6) the page is standard syntax - parse first 8kB of text and...
60  * 7) if <%@page encoding="xxx"%> is found return the encoding value
61  * 8) if <%@page encoding="xxx"%> is NOT found find <%@page contentType="mimetype; char-set=xxx"%>
62  * 9) if CT found return encoding from it
63  *
64  * @author Marek Fukala
65  */

66 public class FastOpenInfoParser {
67     
68     static final boolean debug = Boolean.getBoolean("netbeans.debug.fastopeninfo"); // NOI18N
69

70     static FastOpenInfoParser get(WebModule wm) {
71         return new FastOpenInfoParser(wm);
72     }
73     
74     private WebModule wm;
75     
76     /** Creates a new instance of FastOpenInfoParser */
77     private FastOpenInfoParser(WebModule wm) {
78         this.wm = wm;
79     }
80     
81     public JspParserAPI.JspOpenInfo getJspOpenInfo(FileObject fo, boolean useEditor) {
82         long a = System.currentTimeMillis();
83         try {
84             if(wm != null && wm.getDocumentBase() != null && useEditor) return null; //better let the parser do it
85

86             //if there is not wemodule detect the encoding from the file only
87
if(wm != null) {
88                 //find deployment descriptor
89
FileObject documentBase = wm.getDocumentBase();
90                 if(documentBase != null) {
91                     FileObject dd = documentBase.getFileObject("WEB-INF/web.xml"); //NOI18N
92

93                     //test whether the DD exists, if not parse the JSP file
94
if(dd != null) {
95                         //parse the DD and try to find <jsp-property-group> element with <page-encoding> and <is-xml> elements
96
DDParseInfo ddParseInfo = parse(new InputSource JavaDoc(dd.getInputStream())); //parse with default encoding
97

98                         //if the DD defines encoding or marks jsps as xml documents return null
99
if(ddParseInfo.definesEncoding || ddParseInfo.marksXMLDocuments) return null;
100                     }
101                 }
102             }
103             
104             String JavaDoc enc = null;
105             
106             //get encoding from the disk file if webmodule is null and useEditor is true (during file save)
107
//XXX may be fixed better - to get the editor document instance from the fileobject (but I need to add some deps)
108

109             //#64418 - create a ByteArrayInputStream - we need a an inputstream with marks supported
110
byte[] buffer = new byte[8192*4];
111             InputStream JavaDoc _is = fo.getInputStream();
112             int readed = _is.read(buffer);
113             InputStream JavaDoc is = new ByteArrayInputStream JavaDoc(buffer,0,readed);
114             _is.close();
115             
116             if(isXMLSyntax(fo)) {
117                 //XML document - detect encoding acc. to fisrt 4 bytes or xml prolog
118
enc = EncodingUtil.detectEncoding(is);
119             } else {
120                 //JSP in standart syntax
121
//find <%@page encoding or contentType attributes
122
enc = parseEncodingFromFile(is);
123             }
124             
125             if(debug) System.out.println("[fast open parser] detected " + enc + " encoding.");
126             return enc == null ? null : new JspParserAPI.JspOpenInfo(isXMLSyntax(fo), enc);
127             
128         } catch(IOException JavaDoc e) {
129             //do not handle
130
} catch(SAXException JavaDoc se) {
131             //do not handle
132
} catch(ParserConfigurationException JavaDoc pce) {
133             //do not handle
134
} finally {
135             if(debug) System.out.println("[fast open parser] taken " + (System.currentTimeMillis() - a) + "ms.");
136         }
137         return null;
138     }
139     
140     private static String JavaDoc parseEncodingFromFile(InputStream JavaDoc is) throws IOException JavaDoc {
141         InputStreamReader JavaDoc isr = new InputStreamReader JavaDoc(is); //read with default encoding
142
//read only first 8kB of text
143
char[] buffer = new char[8192];
144         int readed = isr.read(buffer);
145         isr.close();
146         
147         return parseJspText(buffer, readed);
148     }
149     
150     private static boolean isXMLSyntax(FileObject fo) {
151         String JavaDoc ext = fo.getExt();
152         if(ext != null && ("jspx".equalsIgnoreCase(ext) || "tagx".equalsIgnoreCase(ext))) return true;
153         else return false;
154     }
155     
156     //JSP encoding parser
157
private static final String JavaDoc PAGE = "page";
158     private static final String JavaDoc ENCODING = "pageEncoding";
159     private static final String JavaDoc CONTENTYPE = "contentType";
160     private static final String JavaDoc CHARSET = "charset=";
161     
162     private static final int P_INIT = 0;
163     private static final int P_LT = 1; //after <
164
private static final int P_LT_PER = 2; //after <%
165
private static final int P_LT_PER_ATS = 3; //after <%@
166
private static final int P_PD = 4; //in page directive
167
private static final int P_APER = 5; //after closing %
168

169     private static final int P_ENC = 7; //after 'encoding' attribute
170
private static final int P_ENC_EQ = 8; //after encoding=
171
private static final int P_ENC_EQ_VAL = 9; //after encoding="
172

173     private static final int P_CT = 11; //after 'contentType' attribute
174
private static final int P_CT_EQ = 12; //after contentType=
175
private static final int P_CT_EQ_VAL = 13; //after contentType="
176
private static final int P_CT_VAL_CHS = 14; //after contentType="TYPE; char-set=
177

178     private static String JavaDoc parseJspText(char[] buffer, int len) {
179         String JavaDoc contentType = null;
180         
181         int state = P_INIT;
182         int i = 0;
183         int pos = -1;
184         while(i < len) {
185             char c = buffer[i];
186             
187             switch(state) {
188                 case P_INIT:
189                     if(c == '<') state = P_LT;
190                     i++;
191                     break;
192                 case P_LT:
193                     switch(c) {
194                         case '%' :
195                             state = P_LT_PER;
196                             break;
197                         default: state = P_INIT;
198                     }
199                     i++;
200                     break;
201                     
202                 case P_LT_PER:
203                     switch(c) {
204                         case '@':
205                             state = P_LT_PER_ATS;
206                             break;
207                         default: state = P_INIT;
208                     }
209                     i++;
210                     break;
211                 case P_LT_PER_ATS:
212                     if(c == ' ' || c == '\t') {
213                         i++;
214                         break;
215                     } else if(prescanFor(buffer, i, PAGE)) {
216                         state = P_PD;
217                         i = i + PAGE.length();
218                         break;
219                     }
220                     state = P_INIT;
221                     i++;
222                     break;
223                 case P_PD:
224                     if(prescanFor(buffer, i, ENCODING)) {
225                         state = P_ENC;
226                         i = i + ENCODING.length();
227                         break;
228                     } else if(prescanFor(buffer, i, CONTENTYPE)) {
229                         state = P_CT;
230                         i = i + CONTENTYPE.length();
231                         break;
232                     } else if(c == '%') state = P_APER;
233                     i++;
234                     break;
235                 case P_APER:
236                     if(c == '>') state = P_INIT;
237                     else state = P_PD;
238                     i++;
239                     break;
240                 case P_ENC:
241                     switch(c) {
242                         case ' ':
243                         case '\t':
244                             ;
245                             break;
246                         case '=':
247                             state = P_ENC_EQ;
248                             break;
249                         case '%':
250                             state = P_APER;
251                             break;
252                         default:
253                             state = P_PD;
254                     }
255                     i++;
256                     break;
257                 case P_ENC_EQ:
258                     switch(c) {
259                         case ' ':
260                         case '\t':
261                             break;
262                         case '"':
263                             state = P_ENC_EQ_VAL;
264                             pos = i + 1;
265                             break;
266                         case '%':
267                             state = P_APER;
268                             break;
269                         default:
270                             state = P_PD;
271                     }
272                     i++;
273                     break;
274                 case P_ENC_EQ_VAL:
275                     switch(c) {
276                         case '"': return new String JavaDoc(buffer, pos, i - pos); //return the encoding attr value
277
default:
278                     }
279                     i++;
280                     break;
281                     
282                 case P_CT:
283                     switch(c) {
284                         case ' ':
285                         case '\t':
286                             break;
287                         case '=':
288                             state = P_CT_EQ;
289                             break;
290                         case '%':
291                             state = P_APER;
292                             break;
293                         default:
294                             state = P_PD;
295                     }
296                     i++;
297                     break;
298                 case P_CT_EQ:
299                     switch(c) {
300                         case ' ':
301                         case '\t':
302                             break;
303                         case '"':
304                             state = P_CT_EQ_VAL;
305                             break;
306                         case '%':
307                             state = P_APER;
308                             break;
309                         default:
310                             state = P_PD;
311                     }
312                     i++;
313                     break;
314                 case P_CT_EQ_VAL:
315                     if(prescanFor(buffer, i, CHARSET)) {
316                         state = P_CT_VAL_CHS;
317                         i = i + CHARSET.length();
318                         pos = i;
319                         break;
320                     } else if(c == '"') {
321                         state = P_PD;
322                         break;
323                     }
324                     i++;
325                     break;
326                 case P_CT_VAL_CHS:
327                     switch(c) {
328                         case '"':
329                             contentType = new String JavaDoc(buffer, pos, i - pos); //return the encoding attr value
330
state = P_PD;
331                             break;
332                         default:
333                     }
334                     i++;
335                     break;
336                     
337             } //eof state switch
338
}
339         
340         //returns either contentType value or null; encoding is returned directly from the parser (has priority over CT)
341
return contentType;
342     }
343     
344     
345     private static boolean prescanFor(char[] buffer, int position, String JavaDoc text) {
346         if((buffer.length - position) < text.length()) return false; //too short buffer - the text cannot be there
347
for(int i = 0; i < text.length(); i++) {
348             if(buffer[position+i] != text.charAt(i)) return false;
349         }
350         return true;
351     }
352     
353
354     static final String JavaDoc JSP_PROPERTY_GROUP = "jsp-property-group";
355     static final String JavaDoc PAGE_ENCODING = "page-encoding";
356     static final String JavaDoc IS_XML = "is-xml";
357     
358     /** returns an array of booleans - the first states whether the dd contains a <jsp-property-group> element
359      * with defined encoding resp. marks a set of JSPs to be xml documents. */

360     private static DDParseInfo parse(InputSource JavaDoc src) throws IOException JavaDoc, SAXException JavaDoc, ParserConfigurationException JavaDoc {
361         SAXParserFactory JavaDoc factory = SAXParserFactory.newInstance();
362         factory.setValidating(false);
363         SAXParser JavaDoc parser = factory.newSAXParser();
364         final DDParseInfo ddParseInfo = new DDParseInfo();
365         
366         class Handler extends DefaultHandler JavaDoc {
367             private boolean inJspPropertyGroup = false;
368             public void startElement(String JavaDoc uri, String JavaDoc localname, String JavaDoc qname, Attributes JavaDoc attr) throws SAXException JavaDoc {
369                 String JavaDoc tagName = qname.toLowerCase();
370                 if(JSP_PROPERTY_GROUP.equals(tagName)) inJspPropertyGroup = true;
371                 if(inJspPropertyGroup) {
372                     if(PAGE_ENCODING.equals(tagName)) ddParseInfo.definesEncoding = true;
373                     if(IS_XML.equals(tagName)) ddParseInfo.marksXMLDocuments = true;
374                 }
375             }
376             public void endElement(String JavaDoc uri, String JavaDoc localname, String JavaDoc qname) throws SAXException JavaDoc {
377                 String JavaDoc tagName = qname.toLowerCase();
378                 if(JSP_PROPERTY_GROUP.equals(tagName)) inJspPropertyGroup = false;
379             }
380             public InputSource JavaDoc resolveEntity (String JavaDoc publicId, String JavaDoc systemId) {
381                 return new InputSource JavaDoc(new StringReader JavaDoc("")); //prevent the parser to use catalog entity resolver
382
}
383         }
384         parser.parse(src, new Handler JavaDoc());
385         return ddParseInfo;
386     }
387     
388     private static final class DDParseInfo {
389         public boolean definesEncoding, marksXMLDocuments;
390         public DDParseInfo() {}
391     }
392     
393 }
394
Popular Tags