KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > scanners > ScriptScanner


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2003 Somik Raha
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ScriptScanner.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2005/03/12 17:53:10 $
10
// $Revision: 1.63 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.scanners;
28
29 import java.util.Vector JavaDoc;
30
31 import org.htmlparser.Attribute;
32 import org.htmlparser.Node;
33 import org.htmlparser.NodeFactory;
34 import org.htmlparser.PrototypicalNodeFactory;
35 import org.htmlparser.Remark;
36 import org.htmlparser.Tag;
37 import org.htmlparser.Text;
38 import org.htmlparser.lexer.Cursor;
39 import org.htmlparser.lexer.Lexer;
40 import org.htmlparser.lexer.Page;
41 import org.htmlparser.scanners.ScriptDecoder;
42 import org.htmlparser.tags.ScriptTag;
43 import org.htmlparser.util.NodeList;
44 import org.htmlparser.util.ParserException;
45
46 /**
47  * The ScriptScanner handles script CDATA.
48  */

49 public class ScriptScanner
50     extends
51         CompositeTagScanner
52 {
53     /**
54      * Strict parsing of CDATA flag.
55      * If this flag is set true, the parsing of script is performed without
56      * regard to quotes. This means that erroneous script such as:
57      * <pre>
58      * document.write("&lt;/script&gt");
59      * </pre>
60      * will be parsed in strict accordance with appendix
61      * <a HREF="http://www.w3.org/TR/html4/appendix/notes.html#notes-specifying-data">
62      * B.3.2 Specifying non-HTML data</a> of the
63      * <a HREF="http://www.w3.org/TR/html4/">HTML 4.01 Specification</a> and
64      * hence will be split into two or more nodes. Correct javascript would
65      * escape the ETAGO:
66      * <pre>
67      * document.write("&lt;\/script&gt");
68      * </pre>
69      * If true, CDATA parsing will stop at the first ETAGO ("&lt;/") no matter
70      * whether it is quoted or not. If false, balanced quotes (either single or
71      * double) will shield an ETAGO. Beacuse of the possibility of quotes within
72      * single or multiline comments, these are also parsed. In most cases,
73      * users prefer non-strict handling since there is so much broken script
74      * out in the wild.
75      */

76     public static boolean STRICT = false;
77
78     /**
79      * Create a script scanner.
80      */

81     public ScriptScanner()
82     {
83     }
84
85     /**
86      * Scan for script.
87      * Accumulates text from the page, until &lt;/[a-zA-Z] is encountered.
88      * @param tag The tag this scanner is responsible for.
89      * @param lexer The source of CDATA.
90      * @param stack The parse stack, <em>not used</em>.
91      */

92     public Tag scan (Tag tag, Lexer lexer, NodeList stack)
93         throws ParserException
94     {
95         String JavaDoc language;
96         String JavaDoc code;
97         Node content;
98         int position;
99         Node node;
100         Attribute attribute;
101         Vector JavaDoc vector;
102
103         if (tag instanceof ScriptTag)
104         {
105             language = ((ScriptTag)tag).getLanguage ();
106             if ((null != language) &&
107                 (language.equalsIgnoreCase ("JScript.Encode") ||
108                  language.equalsIgnoreCase ("VBScript.Encode")))
109             {
110                 code = ScriptDecoder.Decode (lexer.getPage (), lexer.getCursor ());
111                 ((ScriptTag)tag).setScriptCode (code);
112             }
113         }
114         content = lexer.parseCDATA (!STRICT);
115         position = lexer.getPosition ();
116         node = lexer.nextNode (false);
117         if (null != node)
118             if (!(node instanceof Tag) || !( ((Tag)node).isEndTag ()
119                 && ((Tag)node).getTagName ().equals (tag.getIds ()[0])))
120             {
121                 lexer.setPosition (position);
122                 node = null;
123             }
124
125         // build new end tag if required
126
if (null == node)
127         {
128             attribute = new Attribute ("/script", null);
129             vector = new Vector JavaDoc ();
130             vector.addElement (attribute);
131             node = lexer.getNodeFactory ().createTagNode (
132                 lexer.getPage (), position, position, vector);
133         }
134         tag.setEndTag ((Tag)node);
135         if (null != content)
136         {
137             tag.setChildren (new NodeList (content));
138             content.setParent (tag);
139         }
140         node.setParent (tag);
141         tag.doSemanticAction ();
142
143         return (tag);
144     }
145 }
146
Popular Tags