KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > javax > swing > text > html > CSSParser


1 /*
2  * @(#)CSSParser.java 1.8 03/12/19
3  *
4  * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
5  * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
6  */

7 package javax.swing.text.html;
8
9 import java.io.*;
10
11 /**
12  * A CSS parser. This works by way of a delegate that implements the
13  * CSSParserCallback interface. The delegate is notified of the following
14  * events:
15  * <ul>
16  * <li>Import statement: <code>handleImport</code>
17  * <li>Selectors <code>handleSelector</code>. This is invoked for each
18  * string. For example if the Reader contained p, bar , a {}, the delegate
19  * would be notified 4 times, for 'p,' 'bar' ',' and 'a'.
20  * <li>When a rule starts, <code>startRule</code>
21  * <li>Properties in the rule via the <code>handleProperty</code>. This
22  * is invoked one per property/value key, eg font size: foo;, would
23  * cause the delegate to be notified once with a value of 'font size'.
24  * <li>Values in the rule via the <code>handleValue</code>, this is notified
25  * for the total value.
26  * <li>When a rule ends, <code>endRule</code>
27  * </ul>
28  * This will parse much more than CSS 1, and loosely implements the
29  * recommendation for <i>Forward-compatible parsing</i> in section
30  * 7.1 of the CSS spec found at:
31  * <a HREF=http://www.w3.org/TR/REC-CSS1>http://www.w3.org/TR/REC-CSS1</a>.
32  * If an error results in parsing, a RuntimeException will be thrown.
33  * <p>
34  * This will preserve case. If the callback wishes to treat certain poritions
35  * case insensitively (such as selectors), it should use toLowerCase, or
36  * something similar.
37  *
38  * @author Scott Violet
39  * @version 1.8 12/19/03
40  */

41 class CSSParser {
42     // Parsing something like the following:
43
// (@rule | ruleset | block)*
44
//
45
// @rule (block | identifier)*; (block with {} ends @rule)
46
// block matching [] () {} (that is, [()] is a block, [(){}{[]}]
47
// is a block, ()[] is two blocks)
48
// identifier "*" | '*' | anything but a [](){} and whitespace
49
//
50
// ruleset selector decblock
51
// selector (identifier | (block, except block '{}') )*
52
// declblock declaration* block*
53
// declaration (identifier* stopping when identifier ends with :)
54
// (identifier* stopping when identifier ends with ;)
55
//
56
// comments /* */ can appear any where, and are stripped.
57

58
59     // identifier - letters, digits, dashes and escaped characters
60
// block starts with { ends with matching }, () [] and {} always occur
61
// in matching pairs, '' and "" also occur in pairs, except " may be
62

63
64     // Indicates the type of token being parsed.
65
private static final int IDENTIFIER = 1;
66     private static final int BRACKET_OPEN = 2;
67     private static final int BRACKET_CLOSE = 3;
68     private static final int BRACE_OPEN = 4;
69     private static final int BRACE_CLOSE = 5;
70     private static final int PAREN_OPEN = 6;
71     private static final int PAREN_CLOSE = 7;
72     private static final int END = -1;
73
74     private static final char[] charMapping = { 0, 0, '[', ']', '{', '}', '(',
75                            ')', 0};
76
77
78     /** Set to true if one character has been read ahead. */
79     private boolean didPushChar;
80     /** The read ahead character. */
81     private int pushedChar;
82     /** Temporary place to hold identifiers. */
83     private StringBuffer JavaDoc unitBuffer;
84     /** Used to indicate blocks. */
85     private int[] unitStack;
86     /** Number of valid blocks. */
87     private int stackCount;
88     /** Holds the incoming CSS rules. */
89     private Reader reader;
90     /** Set to true when the first non @ rule is encountered. */
91     private boolean encounteredRuleSet;
92     /** Notified of state. */
93     private CSSParserCallback callback;
94     /** nextToken() inserts the string here. */
95     private char[] tokenBuffer;
96     /** Current number of chars in tokenBufferLength. */
97     private int tokenBufferLength;
98     /** Set to true if any whitespace is read. */
99     private boolean readWS;
100
101
102     // The delegate interface.
103
static interface CSSParserCallback {
104     /** Called when an @import is encountered. */
105     void handleImport(String JavaDoc importString);
106     // There is currently no way to distinguish between '"foo,"' and
107
// 'foo,'. But this generally isn't valid CSS. If it becomes
108
// a problem, handleSelector will have to be told if the string is
109
// quoted.
110
void handleSelector(String JavaDoc selector);
111     void startRule();
112     // Property names are mapped to lower case before being passed to
113
// the delegate.
114
void handleProperty(String JavaDoc property);
115     void handleValue(String JavaDoc value);
116     void endRule();
117     }
118
119     CSSParser() {
120     unitStack = new int[2];
121     tokenBuffer = new char[80];
122     unitBuffer = new StringBuffer JavaDoc();
123     }
124
125     void parse(Reader reader, CSSParserCallback callback,
126            boolean inRule) throws IOException {
127     this.callback = callback;
128     stackCount = tokenBufferLength = 0;
129     this.reader = reader;
130     encounteredRuleSet = false;
131     try {
132         if (inRule) {
133         parseDeclarationBlock();
134         }
135         else {
136         while (getNextStatement());
137         }
138     } finally {
139         callback = null;
140         reader = null;
141     }
142     }
143
144     /**
145      * Gets the next statement, returning false if the end is reached. A
146      * statement is either an @rule, or a ruleset.
147      */

148     private boolean getNextStatement() throws IOException {
149     unitBuffer.setLength(0);
150
151     int token = nextToken((char)0);
152
153     switch (token) {
154     case IDENTIFIER:
155         if (tokenBufferLength > 0) {
156         if (tokenBuffer[0] == '@') {
157             parseAtRule();
158         }
159         else {
160             encounteredRuleSet = true;
161             parseRuleSet();
162         }
163         }
164         return true;
165     case BRACKET_OPEN:
166     case BRACE_OPEN:
167     case PAREN_OPEN:
168         parseTillClosed(token);
169         return true;
170
171     case BRACKET_CLOSE:
172     case BRACE_CLOSE:
173     case PAREN_CLOSE:
174         // Shouldn't happen...
175
throw new RuntimeException JavaDoc("Unexpected top level block close");
176
177     case END:
178         return false;
179     }
180     return true;
181     }
182
183     /**
184      * Parses an @ rule, stopping at a matching brace pair, or ;.
185      */

186     private void parseAtRule() throws IOException {
187     // PENDING: make this more effecient.
188
boolean done = false;
189     boolean isImport = (tokenBufferLength == 7 &&
190                 tokenBuffer[0] == '@' && tokenBuffer[1] == 'i' &&
191                 tokenBuffer[2] == 'm' && tokenBuffer[3] == 'p' &&
192                 tokenBuffer[4] == 'o' && tokenBuffer[5] == 'r' &&
193                 tokenBuffer[6] == 't');
194
195     unitBuffer.setLength(0);
196     while (!done) {
197         int nextToken = nextToken(';');
198
199         switch (nextToken) {
200         case IDENTIFIER:
201         if (tokenBufferLength > 0 &&
202             tokenBuffer[tokenBufferLength - 1] == ';') {
203             --tokenBufferLength;
204             done = true;
205         }
206         if (tokenBufferLength > 0) {
207             if (unitBuffer.length() > 0 && readWS) {
208             unitBuffer.append(' ');
209             }
210             unitBuffer.append(tokenBuffer, 0, tokenBufferLength);
211         }
212         break;
213
214         case BRACE_OPEN:
215         if (unitBuffer.length() > 0 && readWS) {
216             unitBuffer.append(' ');
217         }
218         unitBuffer.append(charMapping[nextToken]);
219         parseTillClosed(nextToken);
220         done = true;
221         // Skip a tailing ';', not really to spec.
222
{
223             int nextChar = readWS();
224             if (nextChar != -1 && nextChar != ';') {
225             pushChar(nextChar);
226             }
227         }
228         break;
229
230         case BRACKET_OPEN: case PAREN_OPEN:
231         unitBuffer.append(charMapping[nextToken]);
232         parseTillClosed(nextToken);
233         break;
234
235         case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE:
236         throw new RuntimeException JavaDoc("Unexpected close in @ rule");
237
238         case END:
239         done = true;
240         break;
241         }
242     }
243     if (isImport && !encounteredRuleSet) {
244         callback.handleImport(unitBuffer.toString());
245     }
246     }
247
248     /**
249      * Parses the next rule set, which is a selector followed by a
250      * declaration block.
251      */

252     private void parseRuleSet() throws IOException {
253     if (parseSelectors()) {
254         callback.startRule();
255         parseDeclarationBlock();
256         callback.endRule();
257     }
258     }
259
260     /**
261      * Parses a set of selectors, returning false if the end of the stream
262      * is reached.
263      */

264     private boolean parseSelectors() throws IOException {
265     // Parse the selectors
266
int nextToken;
267
268     if (tokenBufferLength > 0) {
269         callback.handleSelector(new String JavaDoc(tokenBuffer, 0,
270                            tokenBufferLength));
271     }
272
273     unitBuffer.setLength(0);
274     for (;;) {
275         while ((nextToken = nextToken((char)0)) == IDENTIFIER) {
276         if (tokenBufferLength > 0) {
277             callback.handleSelector(new String JavaDoc(tokenBuffer, 0,
278                                tokenBufferLength));
279         }
280         }
281         switch (nextToken) {
282         case BRACE_OPEN:
283         return true;
284
285         case BRACKET_OPEN: case PAREN_OPEN:
286         parseTillClosed(nextToken);
287         // Not too sure about this, how we handle this isn't very
288
// well spec'd.
289
unitBuffer.setLength(0);
290         break;
291
292         case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE:
293         throw new RuntimeException JavaDoc("Unexpected block close in selector");
294
295         case END:
296         // Prematurely hit end.
297
return false;
298         }
299     }
300     }
301
302     /**
303      * Parses a declaration block. Which a number of declarations followed
304      * by a })].
305      */

306     private void parseDeclarationBlock() throws IOException {
307     for (;;) {
308         int token = parseDeclaration();
309         switch (token) {
310         case END: case BRACE_CLOSE:
311         return;
312
313         case BRACKET_CLOSE: case PAREN_CLOSE:
314         // Bail
315
throw new RuntimeException JavaDoc("Unexpected close in declaration block");
316         case IDENTIFIER:
317         break;
318         }
319     }
320     }
321
322     /**
323      * Parses a single declaration, which is an identifier a : and another
324      * identifier. This returns the last token seen.
325      */

326     // identifier+: identifier* ;|}
327
private int parseDeclaration() throws IOException {
328     int token;
329
330     if ((token = parseIdentifiers(':', false)) != IDENTIFIER) {
331         return token;
332     }
333     // Make the property name to lowercase
334
for (int counter = unitBuffer.length() - 1; counter >= 0; counter--) {
335         unitBuffer.setCharAt(counter, Character.toLowerCase
336                  (unitBuffer.charAt(counter)));
337     }
338     callback.handleProperty(unitBuffer.toString());
339
340     token = parseIdentifiers(';', true);
341     callback.handleValue(unitBuffer.toString());
342     return token;
343     }
344
345     /**
346      * Parses identifiers until <code>extraChar</code> is encountered,
347      * returning the ending token, which will be IDENTIFIER if extraChar
348      * is found.
349      */

350     private int parseIdentifiers(char extraChar,
351                  boolean wantsBlocks) throws IOException {
352     int nextToken;
353     int ubl;
354
355     unitBuffer.setLength(0);
356     for (;;) {
357         nextToken = nextToken(extraChar);
358
359         switch (nextToken) {
360         case IDENTIFIER:
361         if (tokenBufferLength > 0) {
362             if (tokenBuffer[tokenBufferLength - 1] == extraChar) {
363             if (--tokenBufferLength > 0) {
364                 if (readWS && unitBuffer.length() > 0) {
365                 unitBuffer.append(' ');
366                 }
367                 unitBuffer.append(tokenBuffer, 0,
368                           tokenBufferLength);
369             }
370             return IDENTIFIER;
371             }
372             if (readWS && unitBuffer.length() > 0) {
373             unitBuffer.append(' ');
374             }
375             unitBuffer.append(tokenBuffer, 0, tokenBufferLength);
376         }
377         break;
378
379         case BRACKET_OPEN:
380         case BRACE_OPEN:
381         case PAREN_OPEN:
382         ubl = unitBuffer.length();
383         if (wantsBlocks) {
384             unitBuffer.append(charMapping[nextToken]);
385         }
386         parseTillClosed(nextToken);
387         if (!wantsBlocks) {
388             unitBuffer.setLength(ubl);
389         }
390         break;
391
392         case BRACE_CLOSE:
393         // No need to throw for these two, we return token and
394
// caller can do whatever.
395
case BRACKET_CLOSE:
396         case PAREN_CLOSE:
397         case END:
398         // Hit the end
399
return nextToken;
400         }
401     }
402     }
403
404     /**
405      * Parses till a matching block close is encountered. This is only
406      * appropriate to be called at the top level (no nesting).
407      */

408     private void parseTillClosed(int openToken) throws IOException {
409     int nextToken;
410     boolean done = false;
411
412     startBlock(openToken);
413     while (!done) {
414         nextToken = nextToken((char)0);
415         switch (nextToken) {
416         case IDENTIFIER:
417         if (unitBuffer.length() > 0 && readWS) {
418             unitBuffer.append(' ');
419         }
420         if (tokenBufferLength > 0) {
421             unitBuffer.append(tokenBuffer, 0, tokenBufferLength);
422         }
423         break;
424
425         case BRACKET_OPEN: case BRACE_OPEN: case PAREN_OPEN:
426         if (unitBuffer.length() > 0 && readWS) {
427             unitBuffer.append(' ');
428         }
429         unitBuffer.append(charMapping[nextToken]);
430         startBlock(nextToken);
431         break;
432
433         case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE:
434         if (unitBuffer.length() > 0 && readWS) {
435             unitBuffer.append(' ');
436         }
437         unitBuffer.append(charMapping[nextToken]);
438         endBlock(nextToken);
439         if (!inBlock()) {
440             done = true;
441         }
442         break;
443
444         case END:
445         // Prematurely hit end.
446
throw new RuntimeException JavaDoc("Unclosed block");
447         }
448     }
449     }
450
451     /**
452      * Fetches the next token.
453      */

454     private int nextToken(char idChar) throws IOException {
455     readWS = false;
456
457     int nextChar = readWS();
458
459     switch (nextChar) {
460     case '\'':
461         readTill('\'');
462         if (tokenBufferLength > 0) {
463         tokenBufferLength--;
464         }
465         return IDENTIFIER;
466     case '"':
467         readTill('"');
468         if (tokenBufferLength > 0) {
469         tokenBufferLength--;
470         }
471         return IDENTIFIER;
472     case '[':
473         return BRACKET_OPEN;
474     case ']':
475         return BRACKET_CLOSE;
476     case '{':
477         return BRACE_OPEN;
478     case '}':
479         return BRACE_CLOSE;
480     case '(':
481         return PAREN_OPEN;
482     case ')':
483         return PAREN_CLOSE;
484     case -1:
485         return END;
486     default:
487         pushChar(nextChar);
488         getIdentifier(idChar);
489         return IDENTIFIER;
490     }
491     }
492
493     /**
494      * Gets an identifier, returning true if the length of the string is greater than 0,
495      * stopping when <code>stopChar</code>, whitespace, or one of {}()[] is
496      * hit.
497      */

498     // NOTE: this could be combined with readTill, as they contain somewhat
499
// similiar functionality.
500
private boolean getIdentifier(char stopChar) throws IOException {
501     boolean lastWasEscape = false;
502     boolean done = false;
503     int escapeCount = 0;
504     int escapeChar = 0;
505     int nextChar;
506     int intStopChar = (int)stopChar;
507     // 1 for '\', 2 for valid escape char [0-9a-fA-F], 3 for
508
// stop character (white space, ()[]{}) 0 otherwise
509
short type;
510     int escapeOffset = 0;
511
512     tokenBufferLength = 0;
513     while (!done) {
514         nextChar = readChar();
515         switch (nextChar) {
516         case '\\':
517         type = 1;
518         break;
519
520         case '0': case '1': case '2': case '3': case '4': case '5':
521         case '6': case '7': case '8': case '9':
522         type = 2;
523         escapeOffset = nextChar - '0';
524         break;
525
526         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
527         type = 2;
528         escapeOffset = nextChar - 'a' + 10;
529         break;
530
531         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
532         type = 2;
533         escapeOffset = nextChar - 'A' + 10;
534         break;
535
536         case '\'': case '"': case '[': case ']': case '{': case '}':
537         case '(': case ')':
538         case ' ': case '\n': case '\t': case '\r':
539         type = 3;
540         break;
541
542         case '/':
543         type = 4;
544         break;
545
546         case -1:
547         // Reached the end
548
done = true;
549         type = 0;
550         break;
551
552         default:
553         type = 0;
554         break;
555         }
556         if (lastWasEscape) {
557         if (type == 2) {
558             // Continue with escape.
559
escapeChar = escapeChar * 16 + escapeOffset;
560             if (++escapeCount == 4) {
561             lastWasEscape = false;
562             append((char)escapeChar);
563             }
564         }
565         else {
566             // no longer escaped
567
lastWasEscape = false;
568             if (escapeCount > 0) {
569             append((char)escapeChar);
570             // Make this simpler, reprocess the character.
571
pushChar(nextChar);
572             }
573             else if (!done) {
574             append((char)nextChar);
575             }
576         }
577         }
578         else if (!done) {
579         if (type == 1) {
580             lastWasEscape = true;
581             escapeChar = escapeCount = 0;
582         }
583         else if (type == 3) {
584             done = true;
585             pushChar(nextChar);
586         }
587         else if (type == 4) {
588             // Potential comment
589
nextChar = readChar();
590             if (nextChar == '*') {
591             done = true;
592             readComment();
593             readWS = true;
594             }
595             else {
596             append('/');
597             if (nextChar == -1) {
598                 done = true;
599             }
600             else {
601                 pushChar(nextChar);
602             }
603             }
604         }
605         else {
606             append((char)nextChar);
607             if (nextChar == intStopChar) {
608             done = true;
609             }
610         }
611         }
612     }
613     return (tokenBufferLength > 0);
614     }
615
616     /**
617      * Reads till a <code>stopChar</code> is encountered, escaping characters
618      * as necessary.
619      */

620     private void readTill(char stopChar) throws IOException {
621     boolean lastWasEscape = false;
622     int escapeCount = 0;
623     int escapeChar = 0;
624     int nextChar;
625     boolean done = false;
626     int intStopChar = (int)stopChar;
627     // 1 for '\', 2 for valid escape char [0-9a-fA-F], 0 otherwise
628
short type;
629     int escapeOffset = 0;
630
631     tokenBufferLength = 0;
632     while (!done) {
633         nextChar = readChar();
634         switch (nextChar) {
635         case '\\':
636         type = 1;
637         break;
638
639         case '0': case '1': case '2': case '3': case '4':case '5':
640         case '6': case '7': case '8': case '9':
641         type = 2;
642         escapeOffset = nextChar - '0';
643         break;
644
645         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
646         type = 2;
647         escapeOffset = nextChar - 'a' + 10;
648         break;
649
650         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
651         type = 2;
652         escapeOffset = nextChar - 'A' + 10;
653         break;
654
655         case -1:
656         // Prematurely reached the end!
657
throw new RuntimeException JavaDoc("Unclosed " + stopChar);
658
659         default:
660         type = 0;
661         break;
662         }
663         if (lastWasEscape) {
664         if (type == 2) {
665             // Continue with escape.
666
escapeChar = escapeChar * 16 + escapeOffset;
667             if (++escapeCount == 4) {
668             lastWasEscape = false;
669             append((char)escapeChar);
670             }
671         }
672         else {
673             // no longer escaped
674
if (escapeCount > 0) {
675             append((char)escapeChar);
676             if (type == 1) {
677                 lastWasEscape = true;
678                 escapeChar = escapeCount = 0;
679             }
680             else {
681                 if (nextChar == intStopChar) {
682                 done = true;
683                 }
684                 append((char)nextChar);
685                 lastWasEscape = false;
686             }
687             }
688             else {
689             append((char)nextChar);
690             lastWasEscape = false;
691             }
692         }
693         }
694         else if (type == 1) {
695         lastWasEscape = true;
696         escapeChar = escapeCount = 0;
697         }
698         else {
699         if (nextChar == intStopChar) {
700             done = true;
701         }
702         append((char)nextChar);
703         }
704     }
705     }
706
707     private void append(char character) {
708     if (tokenBufferLength == tokenBuffer.length) {
709         char[] newBuffer = new char[tokenBuffer.length * 2];
710         System.arraycopy(tokenBuffer, 0, newBuffer, 0, tokenBuffer.length);
711         tokenBuffer = newBuffer;
712     }
713     tokenBuffer[tokenBufferLength++] = character;
714     }
715
716     /**
717      * Parses a comment block.
718      */

719     private void readComment() throws IOException {
720     int nextChar;
721
722     for(;;) {
723         nextChar = readChar();
724         switch (nextChar) {
725         case -1:
726         throw new RuntimeException JavaDoc("Unclosed comment");
727         case '*':
728         nextChar = readChar();
729         if (nextChar == '/') {
730             return;
731         }
732         else if (nextChar == -1) {
733             throw new RuntimeException JavaDoc("Unclosed comment");
734         }
735         else {
736             pushChar(nextChar);
737         }
738         break;
739         default:
740         break;
741         }
742     }
743     }
744
745     /**
746      * Called when a block start is encountered ({[.
747      */

748     private void startBlock(int startToken) {
749     if (stackCount == unitStack.length) {
750         int[] newUS = new int[stackCount * 2];
751
752         System.arraycopy(unitStack, 0, newUS, 0, stackCount);
753         unitStack = newUS;
754     }
755     unitStack[stackCount++] = startToken;
756     }
757
758     /**
759      * Called when an end block is encountered )]}
760      */

761     private void endBlock(int endToken) {
762     int startToken;
763
764     switch (endToken) {
765     case BRACKET_CLOSE:
766         startToken = BRACKET_OPEN;
767         break;
768     case BRACE_CLOSE:
769         startToken = BRACE_OPEN;
770         break;
771     case PAREN_CLOSE:
772         startToken = PAREN_OPEN;
773         break;
774     default:
775         // Will never happen.
776
startToken = -1;
777         break;
778     }
779     if (stackCount > 0 && unitStack[stackCount - 1] == startToken) {
780         stackCount--;
781     }
782     else {
783         // Invalid state, should do something.
784
throw new RuntimeException JavaDoc("Unmatched block");
785     }
786     }
787
788     /**
789      * @return true if currently in a block.
790      */

791     private boolean inBlock() {
792     return (stackCount > 0);
793     }
794
795     /**
796      * Skips any white space, returning the character after the white space.
797      */

798     private int readWS() throws IOException {
799     int nextChar;
800     while ((nextChar = readChar()) != -1 &&
801            Character.isWhitespace((char)nextChar)) {
802         readWS = true;
803     }
804     return nextChar;
805     }
806
807     /**
808      * Reads a character from the stream.
809      */

810     private int readChar() throws IOException {
811     if (didPushChar) {
812         didPushChar = false;
813         return pushedChar;
814     }
815     return reader.read();
816     // Uncomment the following to do case insensitive parsing.
817
/*
818     if (retValue != -1) {
819         return (int)Character.toLowerCase((char)retValue);
820     }
821     return retValue;
822     */

823     }
824
825     /**
826      * Supports one character look ahead, this will throw if called twice
827      * in a row.
828      */

829     private void pushChar(int tempChar) {
830     if (didPushChar) {
831         // Should never happen.
832
throw new RuntimeException JavaDoc("Can not handle look ahead of more than one character");
833     }
834     didPushChar = true;
835     pushedChar = tempChar;
836     }
837 }
838
Popular Tags