KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > jahiatemplates > org > jahia > portlets_api > HTMLParser


1 //
2
// ____.
3
// __/\ ______| |__/\. _______
4
// __ .____| | \ | +----+ \
5
// _______| /--| | | - \ _ | : - \_________
6
// \\______: :---| : : | : | \________>
7
// |__\---\_____________:______: :____|____:_____\
8
// /_____|
9
//
10
// . . . i n j a h i a w e t r u s t . . .
11
//
12
//--------------------------
13
// HTMLParser
14
//--------------------------
15
// Jerome Bedat 14.12.2000
16
//--------------------------
17

18 package jahiatemplates.org.jahia.portlets_api;
19
20 import org.apache.regexp.RE;
21 import org.apache.regexp.RESyntaxException;
22 import org.jahia.utils.JahiaConsole;
23
24
25 public class HTMLParser
26 {
27
28     private static HTMLParser theObject = null;
29     private String JavaDoc newHTML;
30     private String JavaDoc newString;
31     private String JavaDoc theScripts = "";
32     private boolean matched;
33     private String JavaDoc theResult;
34     
35     
36     /**
37      * HTMLParser
38      *
39      * @author Jerome Bedat
40      *
41      */

42     private HTMLParser()
43     {
44         JahiaConsole.println( "HTML Parser", "Parsing..." );
45     } // end Constructor
46

47
48     /**
49      * getInstance
50      *
51      * @author Jerome Bedat
52      *
53      */

54     public static synchronized HTMLParser getInstance()
55     {
56         if (theObject == null)
57         {
58             theObject = new HTMLParser();
59         }
60         return theObject;
61     } // end getInstance
62

63
64     /**
65      * getDHTML (replace ('") with \' or \", delete new lines and set the content of script tags to a variable)
66      *
67      * @author Jerome Bedat
68      *
69      */

70     public String JavaDoc getDHTML(String JavaDoc oldHTML)
71     {
72         try
73         {
74             newHTML = oldHTML;
75             
76             RE r = new RE("\\n");
77             newHTML = r.subst(newHTML,"");
78             
79             r = new RE("\\r");
80             newHTML = r.subst(newHTML,"");
81  
82             r = new RE("<script(.*?)</script>", RE.MATCH_CASEINDEPENDENT);
83             matched = r.match(newHTML);
84             theResult = r.getParen(0);
85             if (theResult != null) { setScripts(theResult); }
86             newHTML = r.subst(newHTML,"");
87             
88             r = new RE("<style(.*?)</style>", RE.MATCH_CASEINDEPENDENT);
89             matched = r.match(newHTML);
90             theResult = r.getParen(0);
91             if (theResult != null) { setScripts(theResult); }
92             newHTML = r.subst(newHTML,"");
93
94             r = new RE("<link(.*?)>", RE.MATCH_CASEINDEPENDENT);
95             matched = r.match(newHTML);
96             theResult = r.getParen(0);
97             if (theResult != null) { setScripts(theResult); }
98             newHTML = r.subst(newHTML,"");
99
100             r = new RE("<!(.*?)>");
101             newHTML = r.subst(newHTML,"");
102             
103             r = new RE("\'");
104             newHTML = r.subst(newHTML,"\\'");
105             
106             // r = new RE("\"");
107
// newHTML = r.subst(newHTML,"\\\"");
108

109         }
110         catch (RESyntaxException e)
111         {
112             JahiaConsole.println("HTML Parser",e.toString());
113         }
114         return newHTML;
115     } // end getDHTML
116

117
118     /**
119      * getQuoteString (replace ('") with \' or \"
120      *
121      * @author Jerome Bedat
122      *
123      */

124     public String JavaDoc getQuoteString(String JavaDoc oldHTML)
125     {
126         try
127         {
128             newHTML = oldHTML;
129             
130             RE r = new RE("\'");
131             newHTML = r.subst(newHTML,"\\'");
132             
133             // r = new RE("\"");
134
// newHTML = r.subst(newHTML,"\\\"");
135

136         }
137         catch (RESyntaxException e)
138         {
139             JahiaConsole.println("HTML Parser",e.toString());
140         }
141         return newHTML;
142     } // end getQuoteString
143

144  
145     /**
146      * getHTMLString (replace (" ") with "%nbsp;")
147      *
148      * @author Jerome Bedat
149      *
150      */

151     public String JavaDoc getHTMLString(String JavaDoc oldString)
152     {
153         try
154         {
155             newString = oldString;
156             
157             RE r = new RE(" ");
158             newString = r.subst(newString,"&nbsp;");
159         
160         }
161         catch (RESyntaxException e)
162         {
163             JahiaConsole.println("HTML Parser",e.toString());
164         }
165         return newString;
166     } // end getHTMLString
167

168     
169     /**
170      * setScripts (Set content of script tags to a single variable)
171      *
172      * @author Jerome Bedat
173      *
174      */

175     private void setScripts(String JavaDoc theScript) {
176         theScripts += theScript;
177     } // end setScripts
178

179
180     /**
181      * getScripts (Get content of script tags)
182      *
183      * @author Jerome Bedat
184      *
185      */

186     public String JavaDoc getScripts() {
187         theResult = theScripts;
188         theScripts = "";
189         return theResult;
190     } // end getScripts
191

192
193 }
Popular Tags