KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > armedbear > j > XmlFormatter


1 /*
2  * XmlFormatter.java
3  *
4  * Copyright (C) 1998-2003 Peter Graves
5  * $Id: XmlFormatter.java,v 1.2 2003/06/29 17:34:01 piso Exp $
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20  */

21
22 package org.armedbear.j;
23
24 public final class XmlFormatter extends Formatter
25 {
26     private static final byte XML_FORMAT_TEXT = 0;
27     private static final byte XML_FORMAT_COMMENT = 1;
28     private static final byte XML_FORMAT_DELIMITER = 2;
29     private static final byte XML_FORMAT_NAMESPACE = 3;
30     private static final byte XML_FORMAT_TAG = 4;
31     private static final byte XML_FORMAT_ATTRIBUTE = 5;
32     private static final byte XML_FORMAT_EQUALS = 6;
33     private static final byte XML_FORMAT_QUOTE = 7;
34
35     private static final byte STATE_NAMESPACE = STATE_LAST + 1;
36     private static final byte STATE_TAG_STARTING = STATE_LAST + 2;
37     private static final byte STATE_TAG_ENDING = STATE_LAST + 3;
38     private static final byte STATE_ATTRIBUTE = STATE_LAST + 4;
39     private static final byte STATE_EQUALS = STATE_LAST + 5;
40
41     private FastStringBuffer sb = new FastStringBuffer();
42
43     public XmlFormatter(Buffer buffer)
44     {
45         this.buffer = buffer;
46     }
47
48     private void endToken(int state)
49     {
50         if (sb.length() > 0) {
51             byte format;
52             switch (state) {
53                 case STATE_COMMENT:
54                     format = XML_FORMAT_COMMENT;
55                     break;
56                 case STATE_TAG_STARTING:
57                 case STATE_TAG_ENDING:
58                     format = XML_FORMAT_DELIMITER;
59                     break;
60                 case STATE_EQUALS:
61                     format = XML_FORMAT_EQUALS;
62                     break;
63                 case STATE_NAMESPACE:
64                     format = XML_FORMAT_NAMESPACE;
65                     break;
66                 case STATE_TAG:
67                     format = XML_FORMAT_TAG;
68                     break;
69                 case STATE_ATTRIBUTE:
70                     format = XML_FORMAT_ATTRIBUTE;
71                     break;
72                 case STATE_QUOTE:
73                 case STATE_SINGLEQUOTE:
74                     format = XML_FORMAT_QUOTE;
75                     break;
76                 case STATE_NEUTRAL:
77                 default:
78                     format = XML_FORMAT_TEXT;
79                     break;
80             }
81             addSegment(sb.toString(), format);
82             sb.setLength(0);
83         }
84     }
85
86     public LineSegmentList formatLine(Line line)
87     {
88         clearSegmentList();
89         if (line != null)
90             parseLine(line);
91         else
92             addSegment("", XML_FORMAT_TEXT);
93         return segmentList;
94     }
95
96     private void parseLine(Line line)
97     {
98         final String JavaDoc text = getDetabbedText(line);
99         int state = line.flags();
100         sb.setLength(0);
101         int i = 0;
102         final int limit = text.length();
103         while (i < limit) {
104             char c = text.charAt(i);
105             if (state == STATE_COMMENT) {
106                 if (i < limit-2 && text.substring(i, i+3).equals("-->")) {
107                     sb.append("-->");
108                     endToken(state);
109                     state = STATE_NEUTRAL;
110                     i += 3;
111                 } else {
112                     sb.append(c);
113                     ++i;
114                 }
115                 continue;
116             }
117             if (state == STATE_CDATA) {
118                 if (c == ']') {
119                     if (text.regionMatches(i, "]]>", 0, 3)) {
120                         endToken(state);
121                         sb.append("]]");
122                         endToken(STATE_TAG);
123                         sb.append('>');
124                         endToken(STATE_TAG_ENDING);
125                         state = STATE_NEUTRAL;
126                         i += 3;
127                         continue;
128                     }
129                 }
130                 sb.append(c);
131                 ++i;
132                 continue;
133             }
134             if (state == STATE_TAG_STARTING) {
135                 if (c == '/' || c == '?') {
136                     sb.append(c);
137                     endToken(state);
138                     state = STATE_NAMESPACE;
139                     ++i;
140                     continue;
141                 }
142                 if (c == '!') {
143                     if (text.regionMatches(i, "![CDATA[", 0, 8)) {
144                         sb.append(c);
145                         endToken(state);
146                         sb.append("[CDATA[");
147                         endToken(STATE_TAG);
148                         state = STATE_CDATA;
149                         i += 8;
150                         continue;
151                     }
152                     if (text.regionMatches(i, "!DOCTYPE", 0, 8)) {
153                         sb.append(c);
154                         endToken(state);
155                         sb.append("DOCTYPE");
156                         endToken(STATE_TAG);
157                         state = STATE_NEUTRAL;
158                         i += 8;
159                         continue;
160                     }
161                     sb.append(c);
162                     endToken(state);
163                     state = STATE_TAG;
164                     ++i;
165                     continue;
166                 }
167                 endToken(state);
168                 state = STATE_NAMESPACE;
169                 sb.append(c);
170                 ++i;
171                 continue;
172             }
173             if (state == STATE_NAMESPACE) {
174                 if (c == '/' && text.regionMatches(i, "/>", 0, 2)) {
175                     // It wasn't really a namespace.
176
endToken(STATE_TAG);
177                     state = STATE_TAG_ENDING;
178                     sb.append("/>");
179                     endToken(state);
180                     state = STATE_NEUTRAL;
181                     i += 2;
182                     continue;
183                 }
184                 if (c == '?' && text.regionMatches(i, "?>", 0, 2)) {
185                     // Processing instruction.
186
endToken(STATE_TAG);
187                     state = STATE_TAG_ENDING;
188                     sb.append("?>");
189                     endToken(state);
190                     state = STATE_NEUTRAL;
191                     i += 2;
192                     continue;
193                 }
194                 if (c == ':') {
195                     sb.append(c);
196                     endToken(state);
197                     state = STATE_TAG;
198                 } else if (isWhitespace(c)) {
199                     // It wasn't really a namespace.
200
endToken(STATE_TAG);
201                     state = STATE_ATTRIBUTE;
202                     sb.append(c);
203                 } else if (c == '>') {
204                     // It wasn't really a namespace.
205
endToken(STATE_TAG);
206                     state = STATE_TAG_ENDING;
207                     sb.append(c);
208                     endToken(state);
209                     state = STATE_NEUTRAL;
210                 } else
211                     sb.append(c);
212                 ++i;
213                 continue;
214             }
215             if (state == STATE_TAG) {
216                 if (c == '/' && text.regionMatches(i, "/>", 0, 2)) {
217                     endToken(state);
218                     state = STATE_TAG_ENDING;
219                     sb.append("/>");
220                     endToken(state);
221                     state = STATE_NEUTRAL;
222                     i += 2;
223                     continue;
224                 }
225                 if (c == '?' && text.regionMatches(i, "?>", 0, 2)) {
226                     // Processing instruction.
227
endToken(STATE_TAG);
228                     state = STATE_TAG_ENDING;
229                     sb.append("?>");
230                     endToken(state);
231                     state = STATE_NEUTRAL;
232                     i += 2;
233                     continue;
234                 }
235                 if (isWhitespace(c)) {
236                     endToken(state);
237                     state = STATE_ATTRIBUTE;
238                     sb.append(c);
239                 } else if (c == '>') {
240                     endToken(state);
241                     state = STATE_TAG_ENDING;
242                     sb.append(c);
243                     endToken(state);
244                     state = STATE_NEUTRAL;
245                 } else
246                     sb.append(c);
247                 ++i;
248                 continue;
249             }
250             if (state == STATE_ATTRIBUTE) {
251                 if (c == '/' && text.regionMatches(i, "/>", 0, 2)) {
252                     endToken(state);
253                     state = STATE_TAG_ENDING;
254                     sb.append("/>");
255                     endToken(state);
256                     state = STATE_NEUTRAL;
257                     i += 2;
258                     continue;
259                 }
260                 if (c == '?' && text.regionMatches(i, "?>", 0, 2)) {
261                     // Processing instruction.
262
endToken(state);
263                     state = STATE_TAG_ENDING;
264                     sb.append("?>");
265                     endToken(state);
266                     state = STATE_NEUTRAL;
267                     i += 2;
268                     continue;
269                 }
270                 if (c == '>') {
271                     endToken(state);
272                     state = STATE_TAG_ENDING;
273                     sb.append(c);
274                     endToken(state);
275                     state = STATE_NEUTRAL;
276                     ++i;
277                     continue;
278                 }
279                 if (c == '=') {
280                     endToken(state);
281                     state = STATE_EQUALS;
282                     sb.append(c);
283                     endToken(state);
284                     state = STATE_ATTRIBUTE;
285                     ++i;
286                     continue;
287                 }
288                 if (c == '"') {
289                     endToken(state);
290                     state = STATE_QUOTE;
291                     sb.append(c);
292                     ++i;
293                     continue;
294                 }
295                 if (c == '\'') {
296                     endToken(state);
297                     state = STATE_SINGLEQUOTE;
298                     sb.append(c);
299                     ++i;
300                     continue;
301                 }
302                 sb.append(c);
303                 ++i;
304                 continue;
305             }
306             if (state == STATE_QUOTE) {
307                 sb.append(c);
308                 if (c == '"') {
309                     endToken(state);
310                     state = STATE_ATTRIBUTE;
311                 }
312                 ++i;
313                 continue;
314             }
315             if (state == STATE_SINGLEQUOTE) {
316                 sb.append(c);
317                 if (c == '\'') {
318                     endToken(state);
319                     state = STATE_ATTRIBUTE;
320                 }
321                 ++i;
322                 continue;
323             }
324             // Not in comment or tag.
325
if (c == '<') {
326                 endToken(state);
327                 if (text.regionMatches(i, "<!--", 0, 4)) {
328                     state = STATE_COMMENT;
329                     sb.append("<!--");
330                     i += 4;
331                     continue;
332                 }
333                 state = STATE_TAG_STARTING;
334                 sb.append(c);
335             } else
336                 sb.append(c);
337             ++i;
338         }
339         // Reached end of line.
340
if (state == STATE_NAMESPACE)
341             // It wasn't really a namespace.
342
endToken(STATE_TAG);
343         else
344             endToken(state);
345     }
346
347     public boolean parseBuffer()
348     {
349         int state = STATE_NEUTRAL;
350         Line line = buffer.getFirstLine();
351         Position pos = new Position(line, 0);
352         boolean changed = false;
353         while (line != null) {
354             int oldflags = line.flags();
355             if (state != oldflags) {
356                 line.setFlags(state);
357                 changed = true;
358             }
359             final int limit = line.length();
360             for (int i = 0; i < limit; i++) {
361                 char c = line.charAt(i);
362                 if (state == STATE_COMMENT) {
363                     if (c == '-') {
364                         pos.moveTo(line, i);
365                         if (pos.lookingAt("-->")) {
366                             state = STATE_NEUTRAL;
367                             i += 2;
368                             continue;
369                         }
370                     }
371                     continue;
372                 }
373                 if (state == STATE_CDATA) {
374                     if (c == ']') {
375                         pos.moveTo(line, i);
376                         if (pos.lookingAt("]]>")) {
377                             state = STATE_NEUTRAL;
378                             i += 2;
379                             continue;
380                         }
381                     }
382                     continue;
383                 }
384                 if (state == STATE_TAG) {
385                     if (!isWhitespace(c)) {
386                         // OK, we shouldn't really be in STATE_ATTRIBUTE just
387
// because we've seen one non-whitespace character
388
// after the opening '<'. But if the line ends before
389
// the '>', we don't want the next line to start in
390
// STATE_TAG.
391
state = STATE_ATTRIBUTE;
392                         continue;
393                     }
394                 }
395                 if (state == STATE_ATTRIBUTE) {
396                     if (c == '>')
397                         state = STATE_NEUTRAL;
398                     else if (c == '"')
399                         state = STATE_QUOTE;
400                     else if (c == '\'')
401                         state = STATE_SINGLEQUOTE;
402                     continue;
403                 }
404                 if (state == STATE_QUOTE) {
405                     if (c == '"')
406                         state = STATE_ATTRIBUTE;
407                     continue;
408                 }
409                 if (state == STATE_SINGLEQUOTE) {
410                     if (c == '\'')
411                         state = STATE_ATTRIBUTE;
412                     continue;
413                 }
414                 // Neutral state.
415
if (c == '<') {
416                     pos.moveTo(line, i);
417                     if (pos.lookingAt("<!--")) {
418                         state = STATE_COMMENT;
419                         i += 3;
420                         continue;
421                     }
422                     if (pos.lookingAt("<![CDATA[")) {
423                         state = STATE_CDATA;
424                         i += 8;
425                         continue;
426                     }
427                     if (pos.lookingAt("<!DOCTYPE")) {
428                         // There is no STATE_DOCTYPE...
429
state = STATE_NEUTRAL;
430                         i += 8;
431                         continue;
432                     }
433                     state = STATE_TAG;
434                     continue;
435                 }
436             }
437             line = line.next();
438         }
439         buffer.setNeedsParsing(false);
440         return changed;
441     }
442
443     private static final boolean isWhitespace(char c)
444     {
445         return c <= ' ';
446     }
447
448     public FormatTable getFormatTable()
449     {
450         if (formatTable == null) {
451             formatTable = new FormatTable("XmlMode");
452             formatTable.addEntryFromPrefs(XML_FORMAT_TEXT, "text");
453             formatTable.addEntryFromPrefs(XML_FORMAT_COMMENT, "comment");
454             formatTable.addEntryFromPrefs(XML_FORMAT_DELIMITER, "delimiter");
455             formatTable.addEntryFromPrefs(XML_FORMAT_NAMESPACE, "namespace");
456             formatTable.addEntryFromPrefs(XML_FORMAT_TAG, "tag");
457             formatTable.addEntryFromPrefs(XML_FORMAT_ATTRIBUTE, "attribute");
458             formatTable.addEntryFromPrefs(XML_FORMAT_EQUALS, "equals", "delimiter");
459             formatTable.addEntryFromPrefs(XML_FORMAT_QUOTE, "string");
460         }
461         return formatTable;
462     }
463 }
464
Popular Tags