1 package org.jahia.services.htmlparser; 2 3 import org.jahia.utils.JahiaTools; 4 5 14 public class JahiaTextContentTidy { 15 16 public static String JAHIA_HTML_TAG_NAME = "jahiahtml"; 17 public static String JAHIA_HTML_OPEN_TAG = "<jahiahtml>"; 18 public static String JAHIA_RESOURCE_MARKER = "jahia$resource$marker"; 19 public static String JAHIA_EXPRESSION_MARKER = "jahia$expression$marker"; 20 public static String JAHIA_HTML_CLOSE_TAG = "</jahiahtml>"; 21 public static String NEW_LINE = "$$$nl$$$"; 22 public static String WHITE_SPACE = "$$$ws$$$"; 23 24 private static org.apache.log4j.Logger logger = 25 org.apache.log4j.Logger.getLogger (JahiaTextContentTidy.class); 26 27 28 33 static public String tidyPreProcessing(String input){ 34 if ( input == null ){ 35 return null; 36 } 37 String result = input.trim(); 38 result = JahiaTools.replacePatternIgnoreCase(result,"<html>",JAHIA_HTML_OPEN_TAG); 39 result = JahiaTools.replacePatternIgnoreCase(result,"</html>",JAHIA_HTML_CLOSE_TAG); 40 result = JahiaTools.replacePatternIgnoreCase(result,"<jahia-resource",JAHIA_RESOURCE_MARKER); 41 result = JahiaTools.replacePatternIgnoreCase(result,"<jahia-expression",JAHIA_EXPRESSION_MARKER); 42 result = JahiaTools.replacePattern(result,"\r\n","\n"); 43 return result; 46 } 47 48 55 static public String tidyPostProcessing(String input){ 56 if ( input == null ){ 57 return null; 58 } 59 int pos = input.toLowerCase().indexOf("<body>"); 60 if ( pos != -1 ){ 61 input = input.substring(pos+6,input.toLowerCase().indexOf("</body>")); 62 } 63 String result = JahiaTools.replacePattern(input,NEW_LINE,"\n"); 64 result = JahiaTools.replacePattern(result,"'","'"); 65 result = JahiaTools.replacePatternIgnoreCase(result,JAHIA_HTML_OPEN_TAG,"<html>"); 66 result = JahiaTools.replacePatternIgnoreCase(result,JAHIA_HTML_CLOSE_TAG,"</html>"); 67 result = JahiaTools.replacePatternIgnoreCase(result,JAHIA_RESOURCE_MARKER,"<jahia-resource"); 68 result = JahiaTools.replacePatternIgnoreCase(result,JAHIA_EXPRESSION_MARKER,"<jahia-expression"); 69 return result.trim(); 71 } 72 73 83 static private String encodeLineBreak (String str, boolean invert) { 84 if (str == null ) { 85 return null; 86 } 87 StringBuffer result = new StringBuffer (str.length() + 500); 88 String strLower = str.toLowerCase(); 89 int startOfIndex = 0; 90 int i = strLower.indexOf(JAHIA_HTML_OPEN_TAG); 91 while (i != -1) { 92 if ( !invert ){ 93 result.append(JahiaTools.replacePattern(str.substring(startOfIndex, 94 i), "\n", NEW_LINE)); 95 } else { 96 result.append(JahiaTools.replacePattern(str.substring(startOfIndex, 97 i), NEW_LINE, "\n" )); 98 } 99 result.append(JAHIA_HTML_OPEN_TAG); 100 startOfIndex = i + JAHIA_HTML_OPEN_TAG.length(); 101 i = strLower.indexOf(JAHIA_HTML_CLOSE_TAG, startOfIndex); 102 if (i != -1) { 103 result.append(str.substring(startOfIndex, i)); 104 result.append(JAHIA_HTML_CLOSE_TAG); 105 startOfIndex = i + JAHIA_HTML_CLOSE_TAG.length(); 106 } 107 i = strLower.indexOf(JAHIA_HTML_OPEN_TAG, startOfIndex); 108 } 109 if ( !invert ){ 110 str = result.append(JahiaTools.replacePattern(str.substring( 111 startOfIndex, str.length()), "\n", NEW_LINE)).toString(); 112 } else { 113 str = result.append(JahiaTools.replacePattern(str.substring( 114 startOfIndex, str.length()), NEW_LINE, "\n")).toString(); 115 } 116 return str; 117 } 118 119 124 static private String encodeTextAreaWhiteSpace (String str, boolean encode) { 125 if (str == null ) { 126 return null; 127 } 128 try { 129 StringBuffer result = new StringBuffer (str.length() + 500); 130 String strLower = str.toLowerCase(); 131 String test = ""; 132 int startOfIndex = 0; 133 int closeTagIndex = 0; 134 int index = 0; 135 int i = strLower.indexOf("<textarea"); 136 while (i != -1) { 137 i = strLower.substring(0, i).length() + 138 strLower.substring(i).indexOf(">"); 139 result.append(str.substring(startOfIndex, i)); 140 test = result.toString(); 141 closeTagIndex = strLower.substring(0, i).length() + 142 strLower.substring(i).indexOf("</textarea"); 143 if ( encode ){ 144 result.append(JahiaTools.replacePattern(str.substring(i, 145 closeTagIndex), " ", WHITE_SPACE)); 146 } else { 147 String val = JahiaTools.replacePattern(str.substring(i, 148 closeTagIndex), " ", ""); 149 result.append(JahiaTools.replacePattern(val, WHITE_SPACE, " ")); 150 } 151 test = result.toString(); 152 startOfIndex = closeTagIndex; 153 index = strLower.substring(i).indexOf("<textarea"); 154 if ( index != -1 ){ 155 i = strLower.substring(0, i).length() + index; 156 } else { 157 break; 158 } 159 } 160 str = result.append(str.substring(startOfIndex)).toString(); 161 } catch ( Throwable t ){ 162 logger.debug("Error parsing text",t); 163 } 164 return str; 165 } 166 167 } 168 | Popular Tags |