1 21 22 package org.armedbear.j; 23 24 public final class XmlFormatter extends Formatter 25 { 26 private static final byte XML_FORMAT_TEXT = 0; 27 private static final byte XML_FORMAT_COMMENT = 1; 28 private static final byte XML_FORMAT_DELIMITER = 2; 29 private static final byte XML_FORMAT_NAMESPACE = 3; 30 private static final byte XML_FORMAT_TAG = 4; 31 private static final byte XML_FORMAT_ATTRIBUTE = 5; 32 private static final byte XML_FORMAT_EQUALS = 6; 33 private static final byte XML_FORMAT_QUOTE = 7; 34 35 private static final byte STATE_NAMESPACE = STATE_LAST + 1; 36 private static final byte STATE_TAG_STARTING = STATE_LAST + 2; 37 private static final byte STATE_TAG_ENDING = STATE_LAST + 3; 38 private static final byte STATE_ATTRIBUTE = STATE_LAST + 4; 39 private static final byte STATE_EQUALS = STATE_LAST + 5; 40 41 private FastStringBuffer sb = new FastStringBuffer(); 42 43 public XmlFormatter(Buffer buffer) 44 { 45 this.buffer = buffer; 46 } 47 48 private void endToken(int state) 49 { 50 if (sb.length() > 0) { 51 byte format; 52 switch (state) { 53 case STATE_COMMENT: 54 format = XML_FORMAT_COMMENT; 55 break; 56 case STATE_TAG_STARTING: 57 case STATE_TAG_ENDING: 58 format = XML_FORMAT_DELIMITER; 59 break; 60 case STATE_EQUALS: 61 format = XML_FORMAT_EQUALS; 62 break; 63 case STATE_NAMESPACE: 64 format = XML_FORMAT_NAMESPACE; 65 break; 66 case STATE_TAG: 67 format = XML_FORMAT_TAG; 68 break; 69 case STATE_ATTRIBUTE: 70 format = XML_FORMAT_ATTRIBUTE; 71 break; 72 case STATE_QUOTE: 73 case STATE_SINGLEQUOTE: 74 format = XML_FORMAT_QUOTE; 75 break; 76 case STATE_NEUTRAL: 77 default: 78 format = XML_FORMAT_TEXT; 79 break; 80 } 81 addSegment(sb.toString(), format); 82 sb.setLength(0); 83 } 84 } 85 86 public LineSegmentList formatLine(Line line) 87 { 88 clearSegmentList(); 89 if (line != null) 90 parseLine(line); 91 else 92 addSegment("", XML_FORMAT_TEXT); 93 return segmentList; 94 } 95 96 private void parseLine(Line line) 97 { 98 final String text = getDetabbedText(line); 99 int state = line.flags(); 100 sb.setLength(0); 101 int i = 0; 102 final int limit = text.length(); 103 while (i < limit) { 104 char c = text.charAt(i); 105 if (state == STATE_COMMENT) { 106 if (i < limit-2 && text.substring(i, i+3).equals("-->")) { 107 sb.append("-->"); 108 endToken(state); 109 state = STATE_NEUTRAL; 110 i += 3; 111 } else { 112 sb.append(c); 113 ++i; 114 } 115 continue; 116 } 117 if (state == STATE_CDATA) { 118 if (c == ']') { 119 if (text.regionMatches(i, "]]>", 0, 3)) { 120 endToken(state); 121 sb.append("]]"); 122 endToken(STATE_TAG); 123 sb.append('>'); 124 endToken(STATE_TAG_ENDING); 125 state = STATE_NEUTRAL; 126 i += 3; 127 continue; 128 } 129 } 130 sb.append(c); 131 ++i; 132 continue; 133 } 134 if (state == STATE_TAG_STARTING) { 135 if (c == '/' || c == '?') { 136 sb.append(c); 137 endToken(state); 138 state = STATE_NAMESPACE; 139 ++i; 140 continue; 141 } 142 if (c == '!') { 143 if (text.regionMatches(i, "![CDATA[", 0, 8)) { 144 sb.append(c); 145 endToken(state); 146 sb.append("[CDATA["); 147 endToken(STATE_TAG); 148 state = STATE_CDATA; 149 i += 8; 150 continue; 151 } 152 if (text.regionMatches(i, "!DOCTYPE", 0, 8)) { 153 sb.append(c); 154 endToken(state); 155 sb.append("DOCTYPE"); 156 endToken(STATE_TAG); 157 state = STATE_NEUTRAL; 158 i += 8; 159 continue; 160 } 161 sb.append(c); 162 endToken(state); 163 state = STATE_TAG; 164 ++i; 165 continue; 166 } 167 endToken(state); 168 state = STATE_NAMESPACE; 169 sb.append(c); 170 ++i; 171 continue; 172 } 173 if (state == STATE_NAMESPACE) { 174 if (c == '/' && text.regionMatches(i, "/>", 0, 2)) { 175 endToken(STATE_TAG); 177 state = STATE_TAG_ENDING; 178 sb.append("/>"); 179 endToken(state); 180 state = STATE_NEUTRAL; 181 i += 2; 182 continue; 183 } 184 if (c == '?' && text.regionMatches(i, "?>", 0, 2)) { 185 endToken(STATE_TAG); 187 state = STATE_TAG_ENDING; 188 sb.append("?>"); 189 endToken(state); 190 state = STATE_NEUTRAL; 191 i += 2; 192 continue; 193 } 194 if (c == ':') { 195 sb.append(c); 196 endToken(state); 197 state = STATE_TAG; 198 } else if (isWhitespace(c)) { 199 endToken(STATE_TAG); 201 state = STATE_ATTRIBUTE; 202 sb.append(c); 203 } else if (c == '>') { 204 endToken(STATE_TAG); 206 state = STATE_TAG_ENDING; 207 sb.append(c); 208 endToken(state); 209 state = STATE_NEUTRAL; 210 } else 211 sb.append(c); 212 ++i; 213 continue; 214 } 215 if (state == STATE_TAG) { 216 if (c == '/' && text.regionMatches(i, "/>", 0, 2)) { 217 endToken(state); 218 state = STATE_TAG_ENDING; 219 sb.append("/>"); 220 endToken(state); 221 state = STATE_NEUTRAL; 222 i += 2; 223 continue; 224 } 225 if (c == '?' && text.regionMatches(i, "?>", 0, 2)) { 226 endToken(STATE_TAG); 228 state = STATE_TAG_ENDING; 229 sb.append("?>"); 230 endToken(state); 231 state = STATE_NEUTRAL; 232 i += 2; 233 continue; 234 } 235 if (isWhitespace(c)) { 236 endToken(state); 237 state = STATE_ATTRIBUTE; 238 sb.append(c); 239 } else if (c == '>') { 240 endToken(state); 241 state = STATE_TAG_ENDING; 242 sb.append(c); 243 endToken(state); 244 state = STATE_NEUTRAL; 245 } else 246 sb.append(c); 247 ++i; 248 continue; 249 } 250 if (state == STATE_ATTRIBUTE) { 251 if (c == '/' && text.regionMatches(i, "/>", 0, 2)) { 252 endToken(state); 253 state = STATE_TAG_ENDING; 254 sb.append("/>"); 255 endToken(state); 256 state = STATE_NEUTRAL; 257 i += 2; 258 continue; 259 } 260 if (c == '?' && text.regionMatches(i, "?>", 0, 2)) { 261 endToken(state); 263 state = STATE_TAG_ENDING; 264 sb.append("?>"); 265 endToken(state); 266 state = STATE_NEUTRAL; 267 i += 2; 268 continue; 269 } 270 if (c == '>') { 271 endToken(state); 272 state = STATE_TAG_ENDING; 273 sb.append(c); 274 endToken(state); 275 state = STATE_NEUTRAL; 276 ++i; 277 continue; 278 } 279 if (c == '=') { 280 endToken(state); 281 state = STATE_EQUALS; 282 sb.append(c); 283 endToken(state); 284 state = STATE_ATTRIBUTE; 285 ++i; 286 continue; 287 } 288 if (c == '"') { 289 endToken(state); 290 state = STATE_QUOTE; 291 sb.append(c); 292 ++i; 293 continue; 294 } 295 if (c == '\'') { 296 endToken(state); 297 state = STATE_SINGLEQUOTE; 298 sb.append(c); 299 ++i; 300 continue; 301 } 302 sb.append(c); 303 ++i; 304 continue; 305 } 306 if (state == STATE_QUOTE) { 307 sb.append(c); 308 if (c == '"') { 309 endToken(state); 310 state = STATE_ATTRIBUTE; 311 } 312 ++i; 313 continue; 314 } 315 if (state == STATE_SINGLEQUOTE) { 316 sb.append(c); 317 if (c == '\'') { 318 endToken(state); 319 state = STATE_ATTRIBUTE; 320 } 321 ++i; 322 continue; 323 } 324 if (c == '<') { 326 endToken(state); 327 if (text.regionMatches(i, "<!--", 0, 4)) { 328 state = STATE_COMMENT; 329 sb.append("<!--"); 330 i += 4; 331 continue; 332 } 333 state = STATE_TAG_STARTING; 334 sb.append(c); 335 } else 336 sb.append(c); 337 ++i; 338 } 339 if (state == STATE_NAMESPACE) 341 endToken(STATE_TAG); 343 else 344 endToken(state); 345 } 346 347 public boolean parseBuffer() 348 { 349 int state = STATE_NEUTRAL; 350 Line line = buffer.getFirstLine(); 351 Position pos = new Position(line, 0); 352 boolean changed = false; 353 while (line != null) { 354 int oldflags = line.flags(); 355 if (state != oldflags) { 356 line.setFlags(state); 357 changed = true; 358 } 359 final int limit = line.length(); 360 for (int i = 0; i < limit; i++) { 361 char c = line.charAt(i); 362 if (state == STATE_COMMENT) { 363 if (c == '-') { 364 pos.moveTo(line, i); 365 if (pos.lookingAt("-->")) { 366 state = STATE_NEUTRAL; 367 i += 2; 368 continue; 369 } 370 } 371 continue; 372 } 373 if (state == STATE_CDATA) { 374 if (c == ']') { 375 pos.moveTo(line, i); 376 if (pos.lookingAt("]]>")) { 377 state = STATE_NEUTRAL; 378 i += 2; 379 continue; 380 } 381 } 382 continue; 383 } 384 if (state == STATE_TAG) { 385 if (!isWhitespace(c)) { 386 state = STATE_ATTRIBUTE; 392 continue; 393 } 394 } 395 if (state == STATE_ATTRIBUTE) { 396 if (c == '>') 397 state = STATE_NEUTRAL; 398 else if (c == '"') 399 state = STATE_QUOTE; 400 else if (c == '\'') 401 state = STATE_SINGLEQUOTE; 402 continue; 403 } 404 if (state == STATE_QUOTE) { 405 if (c == '"') 406 state = STATE_ATTRIBUTE; 407 continue; 408 } 409 if (state == STATE_SINGLEQUOTE) { 410 if (c == '\'') 411 state = STATE_ATTRIBUTE; 412 continue; 413 } 414 if (c == '<') { 416 pos.moveTo(line, i); 417 if (pos.lookingAt("<!--")) { 418 state = STATE_COMMENT; 419 i += 3; 420 continue; 421 } 422 if (pos.lookingAt("<![CDATA[")) { 423 state = STATE_CDATA; 424 i += 8; 425 continue; 426 } 427 if (pos.lookingAt("<!DOCTYPE")) { 428 state = STATE_NEUTRAL; 430 i += 8; 431 continue; 432 } 433 state = STATE_TAG; 434 continue; 435 } 436 } 437 line = line.next(); 438 } 439 buffer.setNeedsParsing(false); 440 return changed; 441 } 442 443 private static final boolean isWhitespace(char c) 444 { 445 return c <= ' '; 446 } 447 448 public FormatTable getFormatTable() 449 { 450 if (formatTable == null) { 451 formatTable = new FormatTable("XmlMode"); 452 formatTable.addEntryFromPrefs(XML_FORMAT_TEXT, "text"); 453 formatTable.addEntryFromPrefs(XML_FORMAT_COMMENT, "comment"); 454 formatTable.addEntryFromPrefs(XML_FORMAT_DELIMITER, "delimiter"); 455 formatTable.addEntryFromPrefs(XML_FORMAT_NAMESPACE, "namespace"); 456 formatTable.addEntryFromPrefs(XML_FORMAT_TAG, "tag"); 457 formatTable.addEntryFromPrefs(XML_FORMAT_ATTRIBUTE, "attribute"); 458 formatTable.addEntryFromPrefs(XML_FORMAT_EQUALS, "equals", "delimiter"); 459 formatTable.addEntryFromPrefs(XML_FORMAT_QUOTE, "string"); 460 } 461 return formatTable; 462 } 463 } 464 | Popular Tags |