KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > services > htmlparser > JahiaTextContentTidy


1 package org.jahia.services.htmlparser;
2
3 import org.jahia.utils.JahiaTools;
4
5 /**
6  *
7  * <p>Title: Jahia text content specific issue</p>
8  * <p>Description: </p>
9  * <p>Copyright: Copyright (c) 2002</p>
10  * <p>Company: </p>
11  * @author Khue Nguyen
12  * @version 1.0
13  */

14 public class JahiaTextContentTidy {
15
16     public static String JavaDoc JAHIA_HTML_TAG_NAME = "jahiahtml";
17     public static String JavaDoc JAHIA_HTML_OPEN_TAG = "<jahiahtml>";
18     public static String JavaDoc JAHIA_RESOURCE_MARKER = "jahia$resource$marker";
19     public static String JavaDoc JAHIA_EXPRESSION_MARKER = "jahia$expression$marker";
20     public static String JavaDoc JAHIA_HTML_CLOSE_TAG = "</jahiahtml>";
21     public static String JavaDoc NEW_LINE = "$$$nl$$$";
22     public static String JavaDoc WHITE_SPACE = "$$$ws$$$";
23
24     private static org.apache.log4j.Logger logger =
25             org.apache.log4j.Logger.getLogger (JahiaTextContentTidy.class);
26
27
28     /**
29      * Formats Jahia Text content before tidy processing
30      * @param input
31      * @return
32      */

33     static public String JavaDoc tidyPreProcessing(String JavaDoc input){
34         if ( input == null ){
35             return null;
36         }
37         String JavaDoc result = input.trim();
38         result = JahiaTools.replacePatternIgnoreCase(result,"<html>",JAHIA_HTML_OPEN_TAG);
39         result = JahiaTools.replacePatternIgnoreCase(result,"</html>",JAHIA_HTML_CLOSE_TAG);
40         result = JahiaTools.replacePatternIgnoreCase(result,"<jahia-resource",JAHIA_RESOURCE_MARKER);
41         result = JahiaTools.replacePatternIgnoreCase(result,"<jahia-expression",JAHIA_EXPRESSION_MARKER);
42         result = JahiaTools.replacePattern(result,"\r\n","\n");
43         //result = encodeLineBreak(result,false);
44
//result = encodeTextAreaWhiteSpace(result,true);
45
return result;
46     }
47
48     /**
49      * Formats Jahia Text content after tidy processing.
50      * Should be called only if input string has been processed with preTidyProcessing
51      *
52      * @param input
53      * @return
54      */

55     static public String JavaDoc tidyPostProcessing(String JavaDoc input){
56         if ( input == null ){
57             return null;
58         }
59         int pos = input.toLowerCase().indexOf("<body>");
60         if ( pos != -1 ){
61             input = input.substring(pos+6,input.toLowerCase().indexOf("</body>"));
62         }
63         String JavaDoc result = JahiaTools.replacePattern(input,NEW_LINE,"\n");
64         result = JahiaTools.replacePattern(result,"&#39;","'");
65         result = JahiaTools.replacePatternIgnoreCase(result,JAHIA_HTML_OPEN_TAG,"<html>");
66         result = JahiaTools.replacePatternIgnoreCase(result,JAHIA_HTML_CLOSE_TAG,"</html>");
67         result = JahiaTools.replacePatternIgnoreCase(result,JAHIA_RESOURCE_MARKER,"<jahia-resource");
68         result = JahiaTools.replacePatternIgnoreCase(result,JAHIA_EXPRESSION_MARKER,"<jahia-expression");
69         //result = encodeTextAreaWhiteSpace(result,false);
70
return result.trim();
71     }
72
73     /**
74      * Jahia Specific, we take care to keep text indentation.
75      * As Jahia automatically replace the "\n" by <br> in the Big Text content
76      * entered through Jahia's Simple Text Editor.
77      * We must keep track of those line break when generating processing it
78      * with Tidy.
79      *
80      * @param str
81      * @return
82      */

83     static private String JavaDoc encodeLineBreak (String JavaDoc str, boolean invert) {
84         if (str == null ) {
85             return null;
86         }
87         StringBuffer JavaDoc result = new StringBuffer JavaDoc(str.length() + 500);
88         String JavaDoc strLower = str.toLowerCase();
89         int startOfIndex = 0;
90         int i = strLower.indexOf(JAHIA_HTML_OPEN_TAG);
91         while (i != -1) {
92             if ( !invert ){
93                 result.append(JahiaTools.replacePattern(str.substring(startOfIndex,
94                 i), "\n", NEW_LINE));
95             } else {
96                 result.append(JahiaTools.replacePattern(str.substring(startOfIndex,
97                 i), NEW_LINE, "\n" ));
98             }
99             result.append(JAHIA_HTML_OPEN_TAG);
100             startOfIndex = i + JAHIA_HTML_OPEN_TAG.length();
101             i = strLower.indexOf(JAHIA_HTML_CLOSE_TAG, startOfIndex);
102             if (i != -1) {
103                 result.append(str.substring(startOfIndex, i));
104                 result.append(JAHIA_HTML_CLOSE_TAG);
105                 startOfIndex = i + JAHIA_HTML_CLOSE_TAG.length();
106             }
107             i = strLower.indexOf(JAHIA_HTML_OPEN_TAG, startOfIndex);
108         }
109         if ( !invert ){
110             str = result.append(JahiaTools.replacePattern(str.substring(
111                     startOfIndex, str.length()), "\n", NEW_LINE)).toString();
112         } else {
113             str = result.append(JahiaTools.replacePattern(str.substring(
114                     startOfIndex, str.length()), NEW_LINE, "\n")).toString();
115         }
116         return str;
117     }
118
119     /**
120      * Handle white space in textarea and tidy .
121      * @param str
122      * @return
123      */

124     static private String JavaDoc encodeTextAreaWhiteSpace (String JavaDoc str, boolean encode) {
125         if (str == null ) {
126             return null;
127         }
128         try {
129             StringBuffer JavaDoc result = new StringBuffer JavaDoc(str.length() + 500);
130             String JavaDoc strLower = str.toLowerCase();
131             String JavaDoc test = "";
132             int startOfIndex = 0;
133             int closeTagIndex = 0;
134             int index = 0;
135             int i = strLower.indexOf("<textarea");
136             while (i != -1) {
137                 i = strLower.substring(0, i).length() +
138                     strLower.substring(i).indexOf(">");
139                 result.append(str.substring(startOfIndex, i));
140                 test = result.toString();
141                 closeTagIndex = strLower.substring(0, i).length() +
142                                 strLower.substring(i).indexOf("</textarea");
143                 if ( encode ){
144                     result.append(JahiaTools.replacePattern(str.substring(i,
145                         closeTagIndex), " ", WHITE_SPACE));
146                 } else {
147                     String JavaDoc val = JahiaTools.replacePattern(str.substring(i,
148                         closeTagIndex), " ", "");
149                     result.append(JahiaTools.replacePattern(val, WHITE_SPACE, " "));
150                 }
151                 test = result.toString();
152                 startOfIndex = closeTagIndex;
153                 index = strLower.substring(i).indexOf("<textarea");
154                 if ( index != -1 ){
155                     i = strLower.substring(0, i).length() + index;
156                 } else {
157                     break;
158                 }
159             }
160             str = result.append(str.substring(startOfIndex)).toString();
161         } catch ( Throwable JavaDoc t ){
162             logger.debug("Error parsing text",t);
163         }
164         return str;
165     }
166
167 }
168
Popular Tags