KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > fop > fo > XMLWhiteSpaceHandler


1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17
18 /* $Id: XMLWhiteSpaceHandler.java 462816 2006-10-11 14:40:34Z jeremias $ */
19
20 package org.apache.fop.fo;
21
22 import java.util.List JavaDoc;
23 import java.util.Stack JavaDoc;
24 import org.apache.fop.fo.flow.Block;
25 import org.apache.fop.fo.flow.Character;
26 import org.apache.fop.util.CharUtilities;
27
28 /**
29  * Class encapsulating the functionality for white-space-handling
30  * during refinement stage.
31  * The <code>handleWhiteSpace()</code> methods are called during
32  * FOTree-building and marker-cloning:
33  * <br>
34  * <ul>
35  * <li> from <code>FObjMixed.addChildNode()</code></li>
36  * <li> from <code>FObjMixed.endOfNode()</code></li>
37  * <li> from <code>FObjMixed.handleWhiteSpaceFor()</code></li>
38  * </ul>
39  * <br>
40  * Each time one of the variants is called, white-space is handled
41  * for all <code>FOText</code> or <code>Character</code> nodes that
42  * were added:
43  * <br>
44  * <ul>
45  * <li> either prior to <code>newChild</code> (and after the previous
46  * non-text child node)</li>
47  * <li> or, if <code>newChild</code> is <code>null</code>,
48  * after the previous non-text child</li>
49  * </ul>
50  * <br>
51  * The iteration always starts at <code>firstTextNode</code>,
52  * goes on until the last text-node is reached, and deals only
53  * with FOText nodes (characters are immediately removed) or
54  * Character nodes (characters are kept track of and removed
55  * from the list of child nodes later, when the iterator goes
56  * out of scope)
57  *
58  * Note: if the method is called from an inline's endOfNode(),
59  * there is too little context to decide whether trailing
60  * white-space may be removed, so the pending inline is stored
61  * in a List, together with an iterator for which the next()
62  * method returns the first in the trailing sequence of white-
63  * space characters. This List is processed again at the end
64  * of the ancestor block.
65  */

66 public class XMLWhiteSpaceHandler {
67     
68     /** True if we are in a run of white space */
69     private boolean inWhiteSpace = false;
70     /** True if the last char was a linefeed */
71     private boolean afterLinefeed = true;
72     /** Counter, increased every time a non-white-space is encountered */
73     private int nonWhiteSpaceCount;
74     
75     private Block currentBlock;
76     private FObj currentFO;
77     private int linefeedTreatment;
78     private int whiteSpaceTreatment;
79     private int whiteSpaceCollapse;
80     private FONode nextChild;
81     private boolean endOfBlock;
82     private boolean nextChildIsBlockLevel;
83     private RecursiveCharIterator charIter;
84     
85     private List JavaDoc discardableFOCharacters;
86     private List JavaDoc pendingInlines;
87     private Stack JavaDoc nestedBlockStack = new java.util.Stack JavaDoc();
88     private CharIterator firstWhiteSpaceInSeq;
89     
90     /**
91      * Marks a Character object as discardable, so that it is effectively
92      * removed from the FOTree at the end of handleWhitespace()
93      * @param foChar the Character object to be removed from the list of
94      * childNodes
95      */

96     public void addDiscardableFOChar(Character JavaDoc foChar) {
97         if (discardableFOCharacters == null) {
98             discardableFOCharacters = new java.util.ArrayList JavaDoc();
99         }
100         discardableFOCharacters.add(foChar);
101     }
102     
103     /**
104      * Handle white-space for the fo that is passed in, starting at
105      * firstTextNode
106      * @param fo the FO for which to handle white-space
107      * @param firstTextNode the node at which to start
108      */

109     public void handleWhiteSpace(FObjMixed fo, FONode firstTextNode) {
110         
111         int foId = fo.getNameId();
112         
113         if (foId == Constants.FO_BLOCK) {
114             if (nextChild != null && currentBlock != null) {
115                 /* if already in a block, push the current block
116                  * onto the stack of nested blocks
117                  */

118                 nestedBlockStack.push(currentBlock);
119             }
120             currentBlock = (Block) fo;
121         } else if (foId == Constants.FO_RETRIEVE_MARKER) {
122             /* look for the nearest block ancestor, if any */
123             FONode ancestor = fo;
124             do {
125                 ancestor = ancestor.getParent();
126             } while (ancestor.getNameId() != Constants.FO_BLOCK
127                     && ancestor.getNameId() != Constants.FO_STATIC_CONTENT);
128             
129             if (ancestor.getNameId() == Constants.FO_BLOCK) {
130                 currentBlock = (Block) ancestor;
131             }
132         }
133         
134         if (currentBlock != null) {
135             linefeedTreatment = currentBlock.getLinefeedTreatment();
136             whiteSpaceCollapse = currentBlock.getWhitespaceCollapse();
137             whiteSpaceTreatment = currentBlock.getWhitespaceTreatment();
138         } else {
139             linefeedTreatment = Constants.EN_TREAT_AS_SPACE;
140             whiteSpaceCollapse = Constants.EN_TRUE;
141             whiteSpaceTreatment = Constants.EN_IGNORE_IF_SURROUNDING_LINEFEED;
142         }
143         
144         currentFO = fo;
145
146         if (firstTextNode == null) {
147             //nothing to do but initialize related properties
148
return;
149         }
150         
151         charIter = new RecursiveCharIterator(fo, firstTextNode);
152         inWhiteSpace = false;
153         
154         if (currentFO == currentBlock
155                 || currentBlock == null
156                 || (foId == Constants.FO_RETRIEVE_MARKER
157                         && currentFO.getParent() == currentBlock)) {
158             int textNodeIndex = fo.childNodes.indexOf(firstTextNode);
159             afterLinefeed = (
160                     (textNodeIndex == 0)
161                         || (textNodeIndex > 0
162                             && ((FONode) fo.childNodes.get(textNodeIndex - 1))
163                                     .getNameId() == Constants.FO_BLOCK));
164         }
165         
166         endOfBlock = (nextChild == null && currentFO == currentBlock);
167         
168         if (nextChild != null) {
169             int nextChildId = this.nextChild.getNameId();
170             nextChildIsBlockLevel = (
171                     nextChildId == Constants.FO_BLOCK
172                     || nextChildId == Constants.FO_TABLE_AND_CAPTION
173                     || nextChildId == Constants.FO_TABLE
174                     || nextChildId == Constants.FO_LIST_BLOCK
175                     || nextChildId == Constants.FO_BLOCK_CONTAINER);
176         } else {
177             nextChildIsBlockLevel = false;
178         }
179         
180         handleWhiteSpace();
181         
182         if (currentFO == currentBlock
183                 && pendingInlines != null
184                 && !pendingInlines.isEmpty()) {
185             /* current FO is a block, and has pending inlines */
186             if (endOfBlock || nextChildIsBlockLevel) {
187                 if (nonWhiteSpaceCount == 0) {
188                     /* handle white-space for all pending inlines*/
189                     PendingInline p;
190                     for (int i = pendingInlines.size(); --i >= 0;) {
191                         p = (PendingInline)pendingInlines.get(i);
192                         charIter = (RecursiveCharIterator)p.firstTrailingWhiteSpace;
193                         handleWhiteSpace();
194                         pendingInlines.remove(p);
195                     }
196                 } else {
197                     /* there is non-white-space text between the pending
198                      * inline(s) and the end of the block;
199                      * clear list of pending inlines */

200                     pendingInlines.clear();
201                 }
202             }
203         }
204         
205         if (nextChild == null) {
206             if (currentFO != currentBlock) {
207                 /* current FO is not a block, and is about to end */
208                 if (nonWhiteSpaceCount > 0 && pendingInlines != null) {
209                     /* there is non-white-space text between the pending
210                      * inline(s) and the end of the non-block node;
211                      * clear list of pending inlines */

212                     pendingInlines.clear();
213                 }
214                 if (inWhiteSpace) {
215                     /* means there is at least one trailing space in the
216                        inline FO that is about to end */

217                     addPendingInline(fo);
218                 }
219             } else {
220                 /* end of block: clear the references and pop the
221                  * nested block stack */

222                 if (!nestedBlockStack.empty()) {
223                     currentBlock = (Block) nestedBlockStack.pop();
224                 } else {
225                     currentBlock = null;
226                 }
227                 currentFO = null;
228                 charIter = null;
229             }
230         }
231     }
232     
233     /**
234      * Handle white-space for the fo that is passed in, starting at
235      * firstTextNode (when a nested FO is encountered)
236      * @param fo the FO for which to handle white-space
237      * @param firstTextNode the node at which to start
238      * @param nextChild the child-node that will be added to the list after
239      * the last text-node
240      */

241     public void handleWhiteSpace(FObjMixed fo, FONode firstTextNode, FONode nextChild) {
242         this.nextChild = nextChild;
243         handleWhiteSpace(fo, firstTextNode);
244         this.nextChild = null;
245     }
246     
247     private void handleWhiteSpace() {
248         
249         EOLchecker lfCheck = new EOLchecker(charIter);
250         
251         nonWhiteSpaceCount = 0;
252         
253         while (charIter.hasNext()) {
254             if (!inWhiteSpace) {
255                 firstWhiteSpaceInSeq = charIter.mark();
256             }
257             char currentChar = charIter.nextChar();
258             int currentCharClass = CharUtilities.classOf(currentChar);
259             if (currentCharClass == CharUtilities.LINEFEED
260                 && linefeedTreatment == Constants.EN_TREAT_AS_SPACE) {
261                 // if we have a linefeed and it is supposed to be treated
262
// like a space, that's what we do and continue
263
currentChar = '\u0020';
264                 charIter.replaceChar('\u0020');
265                 currentCharClass = CharUtilities.classOf(currentChar);
266             }
267             switch (CharUtilities.classOf(currentChar)) {
268                 case CharUtilities.XMLWHITESPACE:
269                     // Some kind of whitespace character, except linefeed.
270
if (inWhiteSpace
271                             && whiteSpaceCollapse == Constants.EN_TRUE) {
272                         // We are in a run of whitespace and should collapse
273
// Just delete the char
274
charIter.remove();
275                     } else {
276                         // Do the white space treatment here
277
boolean bIgnore = false;
278
279                         switch (whiteSpaceTreatment) {
280                             case Constants.EN_IGNORE:
281                                 bIgnore = true;
282                                 break;
283                             case Constants.EN_IGNORE_IF_BEFORE_LINEFEED:
284                                 bIgnore = lfCheck.beforeLinefeed();
285                                 break;
286                             case Constants.EN_IGNORE_IF_SURROUNDING_LINEFEED:
287                                 bIgnore = afterLinefeed
288                                            || lfCheck.beforeLinefeed();
289                                 break;
290                             case Constants.EN_IGNORE_IF_AFTER_LINEFEED:
291                                 bIgnore = afterLinefeed;
292                                 break;
293                             case Constants.EN_PRESERVE:
294                                 //nothing to do now, replacement takes place later
295
break;
296                             default:
297                                 //nop
298
}
299                         // Handle ignore and replacement
300
if (bIgnore) {
301                             charIter.remove();
302                         } else {
303                             // this is to retain a single space between words
304
inWhiteSpace = true;
305                             if (currentChar != '\u0020') {
306                                 charIter.replaceChar('\u0020');
307                             }
308                         }
309                     }
310                     break;
311
312                 case CharUtilities.LINEFEED:
313                     // A linefeed
314
switch (linefeedTreatment) {
315                         case Constants.EN_IGNORE:
316                             charIter.remove();
317                             break;
318                         case Constants.EN_TREAT_AS_ZERO_WIDTH_SPACE:
319                             charIter.replaceChar(CharUtilities.ZERO_WIDTH_SPACE);
320                             inWhiteSpace = false;
321                             break;
322                         case Constants.EN_PRESERVE:
323                             lfCheck.reset();
324                             inWhiteSpace = false;
325                             afterLinefeed = true; // for following whitespace
326
break;
327                         default:
328                             //nop
329
}
330                     break;
331
332                 case CharUtilities.EOT:
333                     // A "boundary" objects such as non-character inline
334
// or nested block object was encountered. (? can't happen)
335
// If any whitespace run in progress, finish it.
336
// FALL THROUGH
337

338                 default:
339                     // Any other character
340
inWhiteSpace = false;
341                     afterLinefeed = false;
342                     nonWhiteSpaceCount++;
343                     lfCheck.reset();
344                     break;
345             }
346         }
347         if (discardableFOCharacters != null
348                 && !discardableFOCharacters.isEmpty()) {
349             currentFO.childNodes.removeAll(discardableFOCharacters);
350             discardableFOCharacters.clear();
351         }
352     }
353     
354     private void addPendingInline(FObjMixed fo) {
355         if (pendingInlines == null) {
356             pendingInlines = new java.util.ArrayList JavaDoc(5);
357         }
358         pendingInlines.add(new PendingInline(fo, firstWhiteSpaceInSeq));
359     }
360     
361     /**
362      * Helper class, used during white-space handling to look ahead, and
363      * see if the next character is a linefeed (or if there will be
364      * an equivalent effect during layout, i.e. end-of-block or
365      * the following child is a block-level FO)
366      */

367     private class EOLchecker {
368         private boolean nextIsEOL = false;
369         private RecursiveCharIterator charIter;
370
371         EOLchecker(CharIterator charIter) {
372             this.charIter = (RecursiveCharIterator) charIter;
373         }
374
375         boolean beforeLinefeed() {
376             if (!nextIsEOL) {
377                 CharIterator lfIter = charIter.mark();
378                 while (lfIter.hasNext()) {
379                     int charClass = CharUtilities.classOf(lfIter.nextChar());
380                     if (charClass == CharUtilities.LINEFEED) {
381                         if (linefeedTreatment == Constants.EN_PRESERVE) {
382                             nextIsEOL = true;
383                             return nextIsEOL;
384                         }
385                     } else if (charClass != CharUtilities.XMLWHITESPACE) {
386                         return nextIsEOL;
387                     }
388                 }
389                 // No more characters == end of text run
390
// means EOL if there either is a nested block to be added,
391
// or if this is the last text node in the current block
392
nextIsEOL = nextChildIsBlockLevel || endOfBlock;
393             }
394             return nextIsEOL;
395         }
396
397         void reset() {
398             nextIsEOL = false;
399         }
400     }
401     
402     /**
403      * Helper class to store unfinished inline nodes together
404      * with an iterator that starts at the first white-space
405      * character in the sequence of trailing white-space
406      */

407     private class PendingInline {
408         protected FObjMixed fo;
409         protected CharIterator firstTrailingWhiteSpace;
410         
411         PendingInline(FObjMixed fo, CharIterator firstTrailingWhiteSpace) {
412             this.fo = fo;
413             this.firstTrailingWhiteSpace = firstTrailingWhiteSpace;
414         }
415     }
416 }
417
Popular Tags