KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > openedit > modules > html > Tidy


1 /*
2  * Created on Jan 7, 2004
3  *
4  */

5 package com.openedit.modules.html;
6
7
8 /**
9  * @author Matthew Avery
10  *
11  */

12 public class Tidy
13 {
14     //protected org.w3c.tidy.Tidy fieldJtidy;
15

16     protected String JavaDoc escapeSpecialCharacters( String JavaDoc inRawSource )
17     {
18         final char NBSP = 160;
19
20         // replace invalid characters with their ASCII codes
21
// (e.g., the character with ASCII code 160 becomes " ")
22
StringBuffer JavaDoc escapedSource = new StringBuffer JavaDoc();
23         for ( int n = 0; n < inRawSource.length(); n++ )
24         {
25             char c = inRawSource.charAt( n );
26             if ( c == NBSP )
27             {
28                 escapedSource.append( "&nbsp;" );
29             }
30             else if ( !Character.isISOControl( c ) || Character.isWhitespace( c ) )
31             {
32                 escapedSource.append( c );
33             }
34             else
35             {
36                 escapedSource.append( "&#" + Integer.toString( (int)c ) + ";" );
37             }
38         }
39         return escapedSource.toString().trim();
40     }
41     public String JavaDoc removeHtml(String JavaDoc inHtml)
42     {
43         //getJtidy().parseDOM()
44
//String val = inHtml.replaceAll("<br>","\n");
45
//val = val.replaceAll("<br />","\n");
46
String JavaDoc val = inHtml;
47         val = val.replaceAll("<a","::link::");
48         val = val.replaceAll("</a>","::closelink::");
49         
50         val = val.replaceAll("<[^>]*>","");
51
52         val = val.replaceAll("::link::","<a");
53         val = val.replaceAll("::closelink::","</a>");
54
55         val = val.replaceAll("&nbsp;"," ");
56         val = val.replaceAll("&quot;","\"");
57         val = val.replaceAll("&trade;","TM");
58         return val;
59     }
60     
61 /*
62     public String tidySource( String inRawSource, boolean inPreserveHeader )
63     {
64         ByteArrayInputStream inputStream = new ByteArrayInputStream( inRawSource.getBytes() );
65         ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
66         getJtidy().parse( inputStream, outputStream );
67         String html = outputStream.toString();;
68
69         if ( inPreserveHeader )
70         {
71             return html;
72         }
73
74         if ( inRawSource.startsWith("<style"))
75         {
76             return inRawSource;
77         }
78         if ( inRawSource.startsWith("<script"))
79         {
80             return inRawSource;
81         }
82         
83         String body = "<body>";
84         int startIndex = html.indexOf( body );
85         if ( startIndex < 0 )
86         {
87             return inRawSource; //We cannot handle it
88         }
89         else
90         {
91             startIndex += body.length();
92         }
93         String bodyclose = "</body>";
94         int endIndex = html.indexOf( bodyclose );
95         if ( endIndex < 0 )
96         {
97             endIndex = Math.max( html.length() - 1, startIndex );
98         }
99
100         String substring = html.substring( startIndex, endIndex );
101         
102         //TODO: we need to get back any Javascript or style stuff that got moved to the head tag
103         
104         
105         if ( substring == null || substring.length() == 0)
106         {
107             return inRawSource; //so it does not delete it all
108         }
109         
110         return escapeSpecialCharacters( substring );
111     }
112     protected org.w3c.tidy.Tidy getJtidy()
113     {
114         if ( fieldJtidy == null )
115         {
116             fieldJtidy = new org.w3c.tidy.Tidy();
117             fieldJtidy.setWraplen(200); //we should not need to wrap stuff
118             fieldJtidy.setSpaces(4);
119             //fieldJtidy.setPrintBodyOnly(true);
120             //fieldJtidy.setMakeClean(false);
121             fieldJtidy.setXHTML(true);
122             fieldJtidy.setTabsize(3);
123             fieldJtidy.setShowWarnings(false);
124             fieldJtidy.setQuiet( true );
125         }
126         return fieldJtidy;
127     }
128 */

129 }
130
Popular Tags