KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > au > id > jericho > lib > html > Indent


1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2
// Version 2.2
3
// Copyright (C) 2006 Martin Jericho
4
// http://sourceforge.net/projects/jerichohtml/
5
//
6
// This library is free software; you can redistribute it and/or
7
// modify it under the terms of the GNU Lesser General Public
8
// License as published by the Free Software Foundation; either
9
// version 2.1 of the License, or (at your option) any later version.
10
// http://www.gnu.org/copyleft/lesser.html
11
//
12
// This library is distributed in the hope that it will be useful,
13
// but WITHOUT ANY WARRANTY; without even the implied warranty of
14
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
// Lesser General Public License for more details.
16
//
17
// You should have received a copy of the GNU Lesser General Public
18
// License along with this library; if not, write to the Free Software
19
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20

21 package au.id.jericho.lib.html;
22
23 import java.util.*;
24 import java.io.*;
25 import java.net.*;
26
27 /**
28  * This is an internal class for encapsulating the HTML indenting functionality.
29  */

30 final class Indent implements CharStreamSource {
31     private final Segment segment;
32     private final CharSequence JavaDoc sourceText;
33     private final String JavaDoc indentText;
34     private final boolean tidyTags;
35     private final boolean collapseWhiteSpace;
36     private final boolean indentAllElements;
37     private final boolean indentScriptElements;
38     private Writer writer;
39     
40     private Tag nextTag;
41     private int index;
42
43     public Indent(final Segment segment, final String JavaDoc indentText, final boolean tidyTags, final boolean collapseWhiteSpace, final boolean indentAllElements) {
44         this.segment=segment;
45         sourceText=segment.source.toString();
46         this.indentText=indentText;
47         this.tidyTags=tidyTags;
48         this.collapseWhiteSpace=collapseWhiteSpace;
49         this.indentAllElements=indentAllElements;
50         this.indentScriptElements=indentAllElements; // SCRIPT elements need to be inline to keep functional equivalency of output
51
}
52
53     public void writeTo(final Writer writer) throws IOException {
54         this.writer=writer;
55         nextTag=segment.source.findNextTag(segment.begin);
56         index=segment.begin;
57         writeContent(segment.end,segment.getChildElements(),0);
58         writer.flush();
59     }
60
61     public long getEstimatedMaximumOutputLength() {
62         return sourceText.length()*2;
63     }
64
65     private void writeContent(final int end, final List childElements, final int depth) throws IOException {
66         // sets index to end
67
for (final Iterator i=childElements.iterator(); i.hasNext();) {
68             final Element element=(Element)i.next();
69             final int elementBegin=element.begin;
70             if (elementBegin>=end) break;
71             if (indentAllElements) {
72                 writeText(elementBegin,depth,false,false,false,collapseWhiteSpace);
73                 writeElement(element,depth,end,false,false);
74             } else {
75                 final String JavaDoc elementName=element.getName();
76                 if (!indent(element)) continue;
77                 writeText(elementBegin,depth,false,false,false,collapseWhiteSpace);
78                 if (elementName==HTMLElementName.PRE || elementName==HTMLElementName.TEXTAREA) {
79                     writeElement(element,depth,end,true,true);
80                 } else if (elementName==HTMLElementName.SCRIPT) {
81                     writeElement(element,depth,end,true,false);
82                 } else {
83                     writeElement(element,depth,end,false,!containsNonInlineLevelChildElements(element));
84                 }
85             }
86         }
87         writeText(end,depth,false,false,false,collapseWhiteSpace);
88     }
89
90     private boolean indent(final Element element) {
91         final StartTagType startTagType=element.getStartTag().getStartTagType();
92         if (startTagType==StartTagType.DOCTYPE_DECLARATION) return true;
93         if (startTagType!=StartTagType.NORMAL) return false;
94         final String JavaDoc elementName=element.getName();
95         if (elementName==HTMLElementName.SCRIPT) return indentScriptElements;
96         if (!HTMLElements.getInlineLevelElementNames().contains(elementName)) return true;
97         return containsNonInlineLevelChildElements(element);
98     }
99
100     private void writeText(final int end, int depth, final boolean beginInline, final boolean endInline, final boolean increaseIndentAfterFirstLineBreak, final boolean collapseWhiteSpace) throws IOException {
101         // sets index to end
102
if (index==end) return;
103         while (Segment.isWhiteSpace(sourceText.charAt(index))) if (++index==end) return; // trim whitespace.
104
if (!beginInline) writeIndent(depth);
105         writeTextInline(end,depth,increaseIndentAfterFirstLineBreak,collapseWhiteSpace);
106         if (!endInline) writer.write('\n');
107     }
108
109     private void writeElement(final Element element, final int depth, final int end, final boolean preformatted, boolean renderContentInline) throws IOException {
110         // sets index to minimum of element.end or end
111
final StartTag startTag=element.getStartTag();
112         final EndTag endTag=element.getEndTag();
113         writeIndent(depth);
114         writeTag(startTag,depth,end);
115         if (index==end) {
116             writer.write('\n');
117             return;
118         }
119         if (!renderContentInline) writer.write('\n');
120         int contentEnd=element.getContentEnd();
121         if (end<contentEnd) contentEnd=end;
122         if (preformatted) {
123             if (renderContentInline) {
124                 // Preformatted element such as PRE, TEXTAREA
125
writeContentPreformatted(contentEnd,depth);
126             } else {
127                 // SCRIPT element
128
writeIndentedScriptContent(contentEnd,depth+1);
129             }
130         } else {
131             if (renderContentInline) {
132                 // Inline-level element
133
if (collapseWhiteSpace) {
134                     writeTextCollapseWhiteSpace(contentEnd,depth);
135                 } else {
136                     if (!writeTextInline(contentEnd,depth,true,false)) {
137                         writer.write('\n');
138                         renderContentInline=false;
139                     }
140                 }
141             } else {
142                 // Block-level element
143
writeContent(contentEnd,element.getChildElements(),depth+1);
144             }
145         }
146         if (endTag!=null && end>endTag.begin) {
147             if (!renderContentInline) writeIndent(depth);
148             // assert index=endTag.begin
149
writeTag(endTag,depth,end);
150             writer.write('\n');
151         } else if (renderContentInline) {
152             writer.write('\n');
153         }
154     }
155
156     private void updateNextTag() {
157         // ensures that nextTag is up to date
158
while (nextTag!=null) {
159             if (nextTag.begin>=index) return;
160             nextTag=nextTag.findNextTag();
161         }
162     }
163
164     private void writeIndentedScriptContent(final int end, final int depth) throws IOException {
165         // sets index to end
166
if (index==end) return;
167         int startOfLinePos=getStartOfLinePos(end,false);
168         if (index==end) return;
169         if (startOfLinePos==-1) {
170             // Script started on same line as start tag. Use the start of the next line to determine the original indent.
171
writeIndent(depth);
172             writeLineKeepWhiteSpace(end,depth);
173             writer.write('\n');
174             if (index==end) return;
175             startOfLinePos=getStartOfLinePos(end,true);
176             if (index==end) return;
177         }
178         writeTextPreserveIndenting(end,depth,index-startOfLinePos);
179         writer.write('\n');
180     }
181
182     private boolean writeTextPreserveIndenting(final int end, final int depth) throws IOException {
183         // sets index to end
184
// returns true if all text was on one line, otherwise false
185
// assert index==tag.begin;
186
// end is normally tag.end, but in rare cases may be < tag.end
187
// Use the start of the next line to determine the original indent.
188
writeLineKeepWhiteSpace(end,depth);
189         if (index==end) return true;
190         int startOfLinePos=getStartOfLinePos(end,true);
191         if (index==end) return true;
192         writer.write('\n');
193         writeTextPreserveIndenting(end,depth+1,index-startOfLinePos);
194         return false;
195     }
196
197     private void writeTextPreserveIndenting(final int end, final int depth, final int originalIndentLength) throws IOException {
198         // sets index to end
199
writeIndent(depth);
200         writeLineKeepWhiteSpace(end,depth);
201         while (index!=end) {
202             // Skip over the original indent:
203
for (int x=0; x<originalIndentLength; x++) {
204                 final char ch=sourceText.charAt(index);
205                 if (!(ch==' ' || ch=='\t')) break;
206                 if (++index==end) return;
207             }
208             writer.write('\n');
209             // Insert our indent:
210
writeIndent(depth);
211             // Write the rest of the line including any indent greater than the first line's indent:
212
writeLineKeepWhiteSpace(end,depth);
213         }
214     }
215
216     private int getStartOfLinePos(final int end, final boolean atStartOfLine) {
217         // returns the starting position of the next complete line containing text, or -1 if texts starts on the current line (hence not a complete line).
218
// sets index to the start of the text following the returned position, or end, whichever comes first.
219
int startOfLinePos=atStartOfLine ? index : -1;
220         while (true) {
221             final char ch=sourceText.charAt(index);
222             if (ch=='\n' || ch=='\r') {
223                 startOfLinePos=index+1;
224             } else if (!(ch==' ' || ch=='\t')) break;
225             if (++index==end) break;
226         }
227         return startOfLinePos;
228     }
229
230     private void writeSpecifiedTextInline(final CharSequence JavaDoc text, int depth) throws IOException {
231         final int textLength=text.length();
232         int i=writeSpecifiedLine(text,0);
233         if (i<textLength) {
234             final int subsequentLineDepth=depth+1;
235             do {
236                 while (Segment.isWhiteSpace(text.charAt(i))) if (++i>=textLength) return; // trim whitespace.
237
writer.write('\n');
238                 writeIndent(subsequentLineDepth);
239                 i=writeSpecifiedLine(text,i);
240             } while (i<textLength);
241         }
242     }
243
244     private int writeSpecifiedLine(final CharSequence JavaDoc text, int i) throws IOException {
245         // Writes the first line from the specified text starting from the specified position.
246
// The line break characters are not written.
247
// Returns the position following the first line break character(s), or text.length() if the text contains no line breaks.
248
final int textLength=text.length();
249         while (true) {
250             final char ch=text.charAt(i);
251             if (ch=='\r') {
252                 final int nexti=i+1;
253                 if (nexti<textLength && text.charAt(nexti)=='\n') return i+2;
254             }
255             if (ch=='\n') return i+1;
256             writer.write(ch);
257             if (++i>=textLength) return i;
258         }
259     }
260
261     private boolean writeTextInline(final int end, int depth, final boolean increaseIndentAfterFirstLineBreak, final boolean collapseWhiteSpace) throws IOException {
262         // returns true if all text was on one line, otherwise false
263
// sets index to end
264
if (index==end) return true;
265         writeLine(end,depth,collapseWhiteSpace);
266         if (index==end) return true;
267         final int subsequentLineDepth=increaseIndentAfterFirstLineBreak ? depth+1 : depth;
268         do {
269             while (Segment.isWhiteSpace(sourceText.charAt(index))) if (++index==end) return false; // trim whitespace.
270
writer.write('\n');
271             writeIndent(subsequentLineDepth);
272             writeLine(end,subsequentLineDepth,collapseWhiteSpace);
273         } while (index<end);
274         return false;
275     }
276
277     private void writeLine(final int end, final int depth, final boolean collapseWhiteSpace) throws IOException {
278         // sets index to the position following the first line break character(s), or to end if collapseWhiteSpace or the text contains no line breaks.
279
if (collapseWhiteSpace) {
280             writeTextCollapseWhiteSpace(end,depth);
281         } else {
282             writeLineKeepWhiteSpace(end,depth);
283         }
284     }
285
286     private void writeLineKeepWhiteSpace(final int end, final int depth) throws IOException {
287         // Writes the first line from the source text starting from the specified position, ending at the specified end position.
288
// The line break characters are not written.
289
// Sets index to the position following the first line break character(s), or end if the text contains no line breaks. index is guaranteed < end.
290
// Any tags encountered are written using the writeTag method, whose output may include line breaks.
291
updateNextTag();
292         while (true) {
293             while (nextTag!=null && index==nextTag.begin) {
294                 writeTag(nextTag,depth,end);
295                 if (index==end) return;
296             }
297             final char ch=sourceText.charAt(index);
298             if (ch=='\r') {
299                 final int nextindex=index+1;
300                 if (nextindex<end && sourceText.charAt(nextindex)=='\n') {
301                     index+=2;
302                     return;
303                 }
304             }
305             if (ch=='\n') {
306                 index++;
307                 return;
308             }
309             writer.write(ch);
310             if (++index==end) return;
311         }
312     }
313
314     private void writeTextCollapseWhiteSpace(final int end, final int depth) throws IOException {
315         // sets index to end
316
boolean lastWasWhiteSpace=false;
317         updateNextTag();
318         while (index<end) {
319             while (nextTag!=null && index==nextTag.begin) {
320                 if (lastWasWhiteSpace) {
321                     writer.write(' ');
322                     lastWasWhiteSpace=false;
323                 }
324                 writeTag(nextTag,depth,end);
325                 if (index==end) return;
326             }
327             final char ch=sourceText.charAt(index++);
328             if (Segment.isWhiteSpace(ch)) {
329                 lastWasWhiteSpace=true;
330             } else {
331                 if (lastWasWhiteSpace) {
332                     writer.write(' ');
333                     lastWasWhiteSpace=false;
334                 }
335                 writer.write(ch);
336             }
337         }
338         if (lastWasWhiteSpace) writer.write(' ');
339     }
340
341     private void writeContentPreformatted(final int end, final int depth) throws IOException {
342         // sets index to end
343
updateNextTag();
344         do {
345             while (nextTag!=null && index==nextTag.begin) {
346                 writeTag(nextTag,depth,end);
347                 if (index==end) return;
348             }
349             writer.write(sourceText.charAt(index));
350         } while (++index<end);
351     }
352
353     private void writeTag(final Tag tag, final int depth, final int end) throws IOException {
354         // sets index to last position written, guaranteed < end
355
// assert index==tag.begin
356
nextTag=tag.findNextTag();
357         final int tagEnd=(end>tag.end) ? tag.end : end;
358         if (tag.getTagType()==StartTagType.COMMENT || tag.getTagType()==StartTagType.CDATA_SECTION) {
359             writeTextPreserveIndenting(tagEnd,depth);
360         } else if (tidyTags) {
361             final String JavaDoc tidyTag=tag.tidy();
362             if ((tag instanceof StartTag) && ((StartTag)tag).getAttributes()!=null)
363                 writer.write(tidyTag);
364             else
365                 writeSpecifiedTextInline(tidyTag,depth);
366             index=tagEnd;
367         } else {
368             writeTextInline(tagEnd,depth,true,false);
369         }
370         if (end<=tag.end || !(tag instanceof StartTag)) return;
371         if ((tag.name==HTMLElementName.SCRIPT && !indentScriptElements) || tag.getTagType().isServerTag()) {
372             // this is a server start tag, we may need to write the whole server element:
373
final Element element=tag.getElement();
374             final EndTag endTag=element.getEndTag();
375             if (endTag==null) return;
376             final int contentEnd=(end<endTag.begin) ? end : endTag.begin;
377             final boolean singleLineContent=writeTextPreserveIndenting(contentEnd,depth);
378             //final boolean singleLineContent=writeTextInline(contentEnd,depth+1,false,false); // use this line instead of previous if indenting shouldn't be preserved in server elements.
379
if (endTag.begin>=end) return;
380             if (!singleLineContent) {
381                 writer.write('\n');
382                 writeIndent(depth);
383             }
384             // assert index==endTag.begin
385
writeTag(endTag,depth,end);
386         }
387     }
388     
389   private void writeIndent(final int depth) throws IOException {
390         for (int x=0; x<depth; x++) writer.write(indentText);
391   }
392
393     private boolean containsNonInlineLevelChildElements(final Element element) {
394         // returns true if the element contains any non-inline-level elements or SCRIPT elements.
395
final Collection childElements=element.getChildElements();
396         if (childElements==Collections.EMPTY_LIST) return false;
397         for (final Iterator i=childElements.iterator(); i.hasNext();) {
398             final Element childElement=(Element)i.next();
399             final String JavaDoc elementName=childElement.getName();
400             if (elementName==HTMLElementName.SCRIPT || !HTMLElements.getInlineLevelElementNames().contains(elementName)) return true;
401             if (containsNonInlineLevelChildElements(childElement)) return true;
402         }
403         return false;
404     }
405
406     public String JavaDoc toString() {
407         return CharStreamSourceUtil.toString(this);
408     }
409 }
410
Popular Tags