KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > nu > xom > TextWriter


1 /* Copyright 2002-2004 Elliotte Rusty Harold
2    
3    This library is free software; you can redistribute it and/or modify
4    it under the terms of version 2.1 of the GNU Lesser General Public
5    License as published by the Free Software Foundation.
6    
7    This library is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10    GNU Lesser General Public License for more details.
11    
12    You should have received a copy of the GNU Lesser General Public
13    License along with this library; if not, write to the
14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15    Boston, MA 02111-1307 USA
16    
17    You can contact Elliotte Rusty Harold by sending e-mail to
18    elharo@metalab.unc.edu. Please include the word "XOM" in the
19    subject line. The XOM home page is located at http://www.xom.nu/
20 */

21
22 package nu.xom;
23
24 import java.io.IOException JavaDoc;
25 import java.io.Writer JavaDoc;
26
27 import com.ibm.icu.text.Normalizer;
28
29 /**
30  * <p>
31  * This class is responsible for writing strings with the
32  * necessary escaping for their context.
33  * </p>
34  *
35  * @author Elliotte Rusty Harold
36  * @version 1.0
37  *
38  */

39 abstract class TextWriter {
40
41     protected Writer JavaDoc out;
42     protected String JavaDoc encoding;
43     
44     private String JavaDoc lineSeparator = "\r\n";
45     // true if the user has requested a specific
46
// line separator
47
private boolean lineSeparatorSet = false;
48     private boolean inDocType = false;
49     private int maxLength = 0;
50     private int indent = 0;
51     private String JavaDoc indentString = "";
52     private int column = 0;
53     // Is an xml:space="preserve" attribute in scope?
54
private boolean preserveSpace = false;
55     private boolean normalize = false;
56     
57     protected TextWriter(Writer JavaDoc out, String JavaDoc encoding) {
58         this.out = out;
59         this.encoding = encoding;
60     }
61     
62     
63     void reset() {
64         column = 0;
65         fakeIndents = 0;
66         lastCharacterWasSpace = false;
67         skipFollowingLinefeed = false;
68     }
69
70     
71     private boolean lastCharacterWasSpace = false;
72     
73     /**
74      * Indicates whether a linefeed is just half of a \r\n pair
75      * used for a line break.
76      */

77     private boolean skipFollowingLinefeed = false;
78     
79     private int highSurrogate;
80     
81     
82     private boolean isHighSurrogate(int c) {
83         return c >= 0xD800 && c <= 0xDBFF;
84     }
85     
86     
87     private boolean isLowSurrogate(int c) {
88         return c >= 0xDC00 && c <= 0xDFFF;
89     }
90     
91     
92     // XXX lookup table?
93
final void writePCDATA(char c) throws IOException JavaDoc {
94         
95         if (needsEscaping(c)) {
96             writeEscapedChar(c);
97         }
98         else if (c == '&') {
99             out.write("&amp;");
100             column += 5;
101             lastCharacterWasSpace = false;
102             skipFollowingLinefeed = false;
103             justBroke = false;
104         }
105         else if (c == '<') {
106             out.write("&lt;");
107             column += 4;
108             lastCharacterWasSpace = false;
109             skipFollowingLinefeed = false;
110             justBroke = false;
111         }
112         else if (c == '>') {
113             out.write("&gt;");
114             column += 4;
115             lastCharacterWasSpace = false;
116             skipFollowingLinefeed = false;
117             justBroke = false;
118         }
119         else if (c == '\r') {
120             if (!adjustingWhiteSpace() && !lineSeparatorSet) {
121                 out.write("&#x0D;");
122                 column += 6;
123                 justBroke=false;
124             }
125             else if (!adjustingWhiteSpace() && lineSeparatorSet) {
126                 escapeBreakLine();
127             }
128             else {
129                 breakLine();
130                 lastCharacterWasSpace = true;
131             }
132             skipFollowingLinefeed = true;
133         }
134         else {
135             write(c);
136         }
137         
138     }
139     
140     
141     private void writeEscapedChar(char c) throws IOException JavaDoc {
142
143         if (isHighSurrogate(c)) {
144             //store and wait for low half
145
highSurrogate = c;
146         }
147         else if (isLowSurrogate(c)) {
148             // decode and write entity reference
149
// I am assuming here that nothing allows the
150
// text to be created with a malformed surrogate
151
// pair such as a low surrogate that is not immediately
152
// preceded by a high surrogate
153
int high = highSurrogate & 0x7FF;
154             int low = c - 0xDC00;
155             int highShifted = high << 10;
156             int combined = highShifted | low;
157             int uchar = combined + 0x10000;
158             String JavaDoc s = "&#x" + Integer.toHexString(uchar).toUpperCase() + ';';
159             out.write(s);
160             column += s.length();
161             lastCharacterWasSpace = false;
162             skipFollowingLinefeed = false;
163             justBroke = false;
164         }
165         else {
166             String JavaDoc s = "&#x" + Integer.toHexString(c).toUpperCase() + ';';
167             out.write(s);
168             column += s.length();
169             lastCharacterWasSpace = false;
170             skipFollowingLinefeed = false;
171             justBroke=false;
172         }
173         
174     }
175
176
177     private boolean adjustingWhiteSpace() {
178         return maxLength > 0 || indent > 0;
179     }
180
181     
182     // This is the same as writePCDATA except that it
183
// also needs to escape " as &quot; and tab as "&#x09;".
184
// I'm not escaping the single quote because Serializer
185
// always uses double quotes to contain
186
// values.
187
final void writeAttributeValue(char c)
188       throws IOException JavaDoc {
189         
190         if (needsEscaping(c)) {
191             writeEscapedChar(c);
192         }
193         // Handle white space that the parser might normalize
194
// on roundtrip. We only escape them if the serializer
195
// is not adjusting white space; that is indent is 0
196
// and maxLength is 0.
197
else if (c == '\t' && !adjustingWhiteSpace()) {
198             out.write("&#x09;");
199             column += 6;
200             lastCharacterWasSpace = true;
201             skipFollowingLinefeed = false;
202             justBroke=false;
203         }
204         else if (c == '\n') {
205             if (skipFollowingLinefeed) {
206                 skipFollowingLinefeed = false;
207                 return;
208             }
209             else if (adjustingWhiteSpace()) {
210                 out.write(" ");
211                 lastCharacterWasSpace = true;
212                 justBroke=false;
213             }
214             else {
215                 if (lineSeparatorSet) {
216                     escapeBreakLine();
217                 }
218                 else {
219                     out.write("&#x0A;");
220                     column += 6;
221                     justBroke=false;
222                 }
223                 lastCharacterWasSpace = true;
224             }
225         }
226         else if (c == '"') {
227             out.write("&quot;");
228             column += 6;
229             lastCharacterWasSpace = false;
230             skipFollowingLinefeed = false;
231             justBroke=false;
232         }
233         else if (c == '\r') {
234             if (adjustingWhiteSpace()) {
235                 out.write(" ");
236                 lastCharacterWasSpace = true;
237                 skipFollowingLinefeed = true;
238                 justBroke=false;
239             }
240             else {
241                 if (lineSeparatorSet) {
242                     escapeBreakLine();
243                 }
244                 else {
245                     out.write("&#x0D;");
246                     column += 6;
247                     justBroke=false;
248                 }
249                 skipFollowingLinefeed = true;
250             }
251         }
252         // Handle characters that are illegal in attribute values
253
else if (c == '&') {
254             out.write("&amp;");
255             column += 5;
256             lastCharacterWasSpace = false;
257             skipFollowingLinefeed = false;
258             justBroke=false;
259         }
260         else if (c == '<') {
261             out.write("&lt;");
262             column += 4;
263             lastCharacterWasSpace = false;
264             skipFollowingLinefeed = false;
265             justBroke=false;
266         }
267         else if (c == '>') {
268             out.write("&gt;");
269             column += 4;
270             lastCharacterWasSpace = false;
271             skipFollowingLinefeed = false;
272             justBroke=false;
273         }
274         else {
275             write(c);
276         }
277     }
278
279     
280     private void write(char c) throws IOException JavaDoc {
281         
282       // Carriage returns are completely handled by
283
// writePCDATA and writeAttributeValue. They never
284
// enter this method.
285
if ((c == ' ' || c == '\n' || c == '\t')) {
286             if (needsBreak()) {
287                 breakLine();
288                 skipFollowingLinefeed = false;
289             }
290             else if (preserveSpace || (indent <= 0 && maxLength <= 0)) {
291                 // We're neither indenting nor wrapping
292
// so we need to preserve white space
293
if (c == ' ' || c == '\t') {
294                     out.write(c);
295                     skipFollowingLinefeed = false;
296                     column++;
297                     justBroke=false;
298                 }
299                 else { // (c == '\n')
300
if (!lineSeparatorSet ||
301                         !skipFollowingLinefeed) {
302                         writeLineSeparator(c);
303                     }
304                     skipFollowingLinefeed = false;
305                     column = 0;
306                 }
307             }
308             else if (!lastCharacterWasSpace) {
309                 out.write(' ');
310                 column++;
311                 skipFollowingLinefeed = false;
312                 justBroke=false;
313             }
314             lastCharacterWasSpace = true;
315         }
316         else {
317             out.write(c);
318             // don't increment column for high surrogate, only low surrogate
319
if (c < 0xd800 || c > 0xDBFF) column++;
320             lastCharacterWasSpace = false;
321             skipFollowingLinefeed = false;
322             justBroke=false;
323         }
324       
325     }
326
327     
328     private void writeLineSeparator(char c)
329       throws IOException JavaDoc {
330         
331         if (!inDocType && (!lineSeparatorSet || preserveSpace)) out.write(c);
332         else if (lineSeparator.equals("\r\n")) {
333             out.write("\r\n");
334         }
335         else if (lineSeparator.equals("\n")) {
336             out.write('\n');
337         }
338         else { // lineSeparator.equals("\r"))
339
out.write('\r');
340         }
341         // Remember, there are only three possible line separators
342

343     }
344
345
346     private boolean needsBreak() {
347         
348         if (maxLength <= 0 || preserveSpace) return false;
349         // Better algorithm needed: Should look ahead in the
350
// stream, see if there's a white space character
351
// between here and the maxLength, Then again, simple is good.
352
// Here we just assume there's probably space somewhere
353
// within the next ten characters
354

355         return column >= maxLength - 10;
356         
357     }
358     
359     
360     private boolean justBroke = false;
361     
362     boolean justBroke() {
363         return justBroke;
364     }
365     
366     
367     final void breakLine() throws IOException JavaDoc {
368         
369         out.write(lineSeparator);
370         out.write(indentString);
371         column = indentString.length();
372         lastCharacterWasSpace = true;
373         justBroke = true;
374         
375     }
376     
377     
378     final void escapeBreakLine() throws IOException JavaDoc {
379         
380         if ("\n".equals(lineSeparator)) {
381             out.write("&#x0A;");
382             column += 6;
383         }
384         else if ("\r\n".equals(lineSeparator)) {
385             out.write("&#x0D;&#x0A;");
386             column += 12;
387         }
388         else {
389             out.write("&#x0D;");
390             column += 6;
391         }
392         lastCharacterWasSpace = true;
393         
394     }
395     
396     
397     // Note that when this method is called directly, then
398
// normalization is not performed on c. Currently this is
399
// only called for ASCII characters like <, >, and the space,
400
// which should be OK
401
protected final void writeMarkup(char c) throws IOException JavaDoc {
402         
403         if (needsEscaping(c)) {
404             throw new UnavailableCharacterException(c, encoding);
405         }
406         write(c);
407
408     }
409     
410     // XXX should we have a special package protected
411
// method to be used only for ASCII characters we know don't need escaping or
412
// normalization such as <, /, A-Z, etc.?
413

414     
415     final void writePCDATA(String JavaDoc s) throws IOException JavaDoc {
416         
417         if (normalize) {
418             s = Normalizer.normalize(s, Normalizer.NFC);
419         }
420         int length = s.length();
421         for (int i=0; i < length; i++) {
422             writePCDATA(s.charAt(i));
423         }
424         
425     }
426
427     
428     final void writeAttributeValue(String JavaDoc s)
429       throws IOException JavaDoc {
430         
431         if (normalize) {
432             s = Normalizer.normalize(s, Normalizer.NFC);
433         }
434         int length = s.length();
435         for (int i=0; i < length; i++) {
436             writeAttributeValue(s.charAt(i));
437         }
438         
439     }
440
441     
442     final void writeMarkup(String JavaDoc s) throws IOException JavaDoc {
443         
444         if (normalize) {
445             s = Normalizer.normalize(s, Normalizer.NFC);
446         }
447         int length = s.length();
448         for (int i=0; i < length; i++) {
449             writeMarkup(s.charAt(i));
450         }
451         
452     }
453     
454     
455     boolean isIndenting() {
456         return indentString.length() > 0;
457     }
458
459
460     private int fakeIndents = 0;
461     
462     void incrementIndent() {
463         
464         StringBuffer JavaDoc newIndent = new StringBuffer JavaDoc(indentString);
465         for (int i = 0; i < indent; i++) {
466             newIndent.append(' ');
467         }
468         
469         // limit maximum indent to half of maximum line length
470
if (maxLength > 0 && newIndent.length() > maxLength / 2) {
471             fakeIndents++;
472         }
473         else this.indentString = newIndent.toString();
474         
475     }
476     
477     
478     void decrementIndent() {
479         if (fakeIndents > 0) fakeIndents--;
480         else {
481             indentString = indentString.substring(
482               0, indentString.length()-indent
483             );
484         }
485     }
486
487
488     String JavaDoc getEncoding() {
489         return this.encoding;
490     }
491
492     
493     /**
494      * <p>
495      * Returns the String used as a line separator.
496      * This is always "\n", "\r", or "\r\n".
497      * </p>
498      *
499      * @return the line separator
500      */

501     String JavaDoc getLineSeparator() {
502         return lineSeparator;
503     }
504
505     
506     /**
507      * <p>
508      * Sets the lineSeparator. This
509      * can only be one of the three
510      * strings "\n", "\r", or "\r\n".
511      * All other values are forbidden.
512      * </p>
513      *
514      * @param lineSeparator the lineSeparator to set
515      *
516      * @throws IllegalArgumentException if you attempt to use
517      * any line separator other than "\n", "\r", or "\r\n".
518      *
519      */

520     void setLineSeparator(String JavaDoc lineSeparator) {
521         
522         if (lineSeparator.equals("\n")
523           || lineSeparator.equals("\r")
524           || lineSeparator.equals("\r\n")) {
525             this.lineSeparator = lineSeparator;
526             this.lineSeparatorSet = true;
527         }
528         else {
529             throw new IllegalArgumentException JavaDoc(
530               "Illegal Line Separator");
531         }
532         
533     }
534
535     
536     void setInDocType(boolean inDocType) {
537         this.inDocType = inDocType;
538     }
539
540     
541     /**
542      * <p>
543      * Returns the number of spaces this serializer indents.
544      * </p>
545      *
546      * @return the number of spaces this serializer indents
547      */

548     int getIndent() {
549         return indent;
550     }
551
552
553     /**
554      * <p>
555      * Returns the maximum line length.
556      * </p>
557      *
558      * @return the maximum line length.
559      */

560     int getMaxLength() {
561         return maxLength;
562     }
563
564     /**
565      * <p>
566      * Sets the suggested maximum line length for this serializer.
567      * In some circumstances this may not be respected.
568      * </p>
569      *
570      * @param maxLength the maxLength to set
571      */

572     void setMaxLength(int maxLength) {
573         if (maxLength < 0) maxLength = 0;
574         this.maxLength = maxLength;
575     }
576
577     
578    /**
579      * <p>
580      * Sets the number of spaces to indent each successive level in the
581      * hierarchy. Use 0 for no extra indenting.
582      * </p>
583      *
584      * @param indent the indent to set
585      */

586     void setIndent(int indent) {
587         this.indent = indent;
588     }
589
590
591     void flush() throws IOException JavaDoc {
592         out.flush();
593     }
594
595     
596     abstract boolean needsEscaping(char c);
597
598     
599     /**
600      * <p>
601      * Used to track the current status of xml:space.
602      * This is false by default, unless an xml:space="preserve"
603      * attribute is in-scope. When such an attribute is in-scope,
604      * white space is not adjusted even if indenting and/or
605      * a maximum line length has been requested.
606      * </p>
607      *
608      *
609      * @return true if an <code>xml:space="true"</code> attribute
610      * is in-scope
611      */

612     boolean isPreserveSpace() {
613         return preserveSpace;
614     }
615
616     
617     /**
618      * @param preserveSpace whether to preserve all white space
619      */

620     void setPreserveSpace(boolean preserveSpace) {
621         this.preserveSpace = preserveSpace;
622     }
623
624     
625     /**
626      * @return the current column number
627      */

628     int getColumnNumber() {
629         return this.column;
630     }
631
632     
633     /**
634      * <p>
635      * If true, this property indicates serialization will
636      * perform Unicode normalization on all data using normalization
637      * form C (NFC). Performing Unicode normalization
638      * does change the document's infoset.
639      * The default is false; do not normalize.
640      * </p>
641      *
642      * <p>
643      * This feature has not yet been benchmarked or optimized.
644      * It may result in substantially slower code.
645      * </p>
646      *
647      * @param normalize true if normalization is performed;
648      * false if it isn't.
649      */

650     void setNFC(boolean normalize) {
651         this.normalize = normalize;
652     }
653
654     
655     /**
656      * <p>
657      * If true, this property indicates serialization will
658      * perform Unicode normalization on all data using normalization
659      * form C (NFC). The default is false; do not normalize.
660      * </p>
661      *
662      * @return true if this serialization performs Unicode
663      * normalization; false if it doesn't.
664      */

665     boolean getNFC() {
666         return this.normalize;
667     }
668
669     
670 }
Popular Tags