KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > editor > ext > html > parser > SyntaxParser


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.editor.ext.html.parser;
21
22 import java.util.ArrayList JavaDoc;
23 import java.util.Collections JavaDoc;
24 import java.util.List JavaDoc;
25 import javax.swing.text.BadLocationException JavaDoc;
26 import javax.swing.text.Document JavaDoc;
27 import org.netbeans.api.html.lexer.HTMLTokenId;
28 import org.netbeans.api.lexer.Token;
29 import org.netbeans.api.lexer.TokenHierarchy;
30 import org.netbeans.api.lexer.TokenHierarchyEvent;
31 import org.netbeans.api.lexer.TokenHierarchyEventType;
32 import org.netbeans.api.lexer.TokenHierarchyListener;
33 import org.netbeans.api.lexer.TokenSequence;
34 import org.netbeans.editor.BaseDocument;
35 import org.openide.util.RequestProcessor;
36
37 /**
38  * Simple HTML syntax parser.
39  *
40  * @author Marek.Fukala@Sun.com
41  */

42 public final class SyntaxParser {
43     
44     private static final int PARSER_DELAY = 1000; //1 second
45

46     private final Document JavaDoc doc;
47     private final TokenHierarchy hi;
48     private final RequestProcessor.Task parserTask;
49     private final ArrayList JavaDoc<SyntaxParserListener> listeners = new ArrayList JavaDoc<SyntaxParserListener>();
50     private final Object JavaDoc parsingState = new Object JavaDoc();
51     
52     private final TokenHierarchyListener tokenHierarchyListener = new TokenHierarchyListener() {
53         public void tokenHierarchyChanged(TokenHierarchyEvent evt) {
54             if(evt.type() == TokenHierarchyEventType.MODIFICATION) {
55                 synchronized (parsingState) {
56                     restartParser();
57                 }
58             }
59         }
60     };
61     
62     private ArrayList JavaDoc<SyntaxElement> parsedElements;
63     
64     private boolean isParsing = false;
65     private boolean isScheduled = false;
66     
67     /** Returns an instance of SyntaxParser for given document.
68      * The client is supposed to add a SyntaxParserListener to the obtained instance
69      * to get notification whenever the document changes and is reparsed.
70      */

71     public static synchronized SyntaxParser get(Document JavaDoc doc) {
72         SyntaxParser parser = (SyntaxParser)doc.getProperty(SyntaxParser.class);
73         if(parser == null) {
74             parser = new SyntaxParser(doc);
75             doc.putProperty(SyntaxParser.class, parser);
76         }
77         return parser;
78     }
79     
80     private SyntaxParser(Document JavaDoc doc) {
81         this.doc = doc;
82         this.hi = TokenHierarchy.get(doc);
83         
84         parserTask = RequestProcessor.getDefault().create(new Runnable JavaDoc() {
85             public void run() {
86                 parse();
87             }
88         });
89         
90         //add itself as token hierarchy listener
91
hi.addTokenHierarchyListener(tokenHierarchyListener);
92         
93         parsedElements = null; //null states the data are not available yet
94
}
95     
96     //---------------------------- public methods -------------------------------
97

98     /** Adds a new SyntaxParserListener and starts parsing if fresh data not available, otherwise synchronously
99      * notifies the added SyntaxParserListener that parsed data are available.*/

100     public void addSyntaxParserListener(SyntaxParserListener spl) {
101         listeners.add(spl);
102         
103         synchronized (parsingState) {
104             if(isParsing || isScheduled) return ; //we are either parsing or waiting for parser to start - will parse and fire event then
105

106             if(parsedElements == null) {
107                 //we need to run the parser
108
restartParser();
109             } else {
110                 //data actual no need to reparse - just synchronously return parsed data
111
spl.parsingFinished(createParseResult());
112             }
113         }
114     }
115     
116     /** Removes the SyntaxParserListener from the listeners list.*/
117     public void removeSyntaxParserListener(SyntaxParserListener spl) {
118         listeners.remove(spl);
119     }
120     
121     //---------------------------- private methods -------------------------------
122

123     private void restartParser() {
124         if(!parserTask.isFinished()) {
125             parserTask.cancel();
126         }
127         parserTask.schedule(PARSER_DELAY);
128         isScheduled = true;
129     }
130     
131     private void parse() {
132         synchronized (parsingState) {
133             isParsing = true;
134             isScheduled = false;
135         }
136         
137         reallyParse();
138         
139         synchronized (parsingState) {
140             isParsing = false;
141         }
142         
143         notifyParsingFinished();
144     }
145     
146     private void reallyParse() {
147         parsedElements = new ArrayList JavaDoc<SyntaxElement>();
148         try {
149             SyntaxElement sel = getElementChain(0);
150             while (sel != null) {
151                 parsedElements.add(sel);
152                 sel = sel.getNext();
153             }
154             
155         }catch(BadLocationException JavaDoc ble) {
156             ble.printStackTrace();;
157         }
158     }
159     
160     private void notifyParsingFinished() {
161         if(!parsedElements.isEmpty()) {
162             List JavaDoc<SyntaxElement> results = createParseResult();
163             for(SyntaxParserListener spl : listeners) {
164                 spl.parsingFinished(results);
165             }
166         }
167     }
168     
169     private List JavaDoc<SyntaxElement> createParseResult() {
170         //return Collections.
171
return Collections.unmodifiableList(parsedElements);
172     }
173     
174     Document JavaDoc getDocument() {
175         return doc;
176     }
177     
178     /** Returns SyntaxElement instance for block of tokens, which is either
179      * surrounding given offset, or is just after the offset.
180      *
181      * @param offset offset in document where to search for SyntaxElement
182      * @return SyntaxElement surrounding or laying after the offset
183      * or <CODE>null</CODE> if there is no element there (end of document)
184      */

185     public SyntaxElement getElementChain( int offset ) throws BadLocationException JavaDoc {
186         ((BaseDocument)doc).readLock();
187         try {
188             TokenSequence ts = tokenSequence(hi, offset);
189             if(ts == null) {
190                 return null;
191             }
192             
193             ts.move(offset);
194             if(!ts.moveNext() && !ts.movePrevious()) return null; //no token found
195

196             Token item = ts.token();
197             
198             int beginning = ts.offset();
199             
200             if( item.id() == HTMLTokenId.CHARACTER ) {
201                 do {
202                     item = ts.token();
203                     beginning = ts.offset();
204                 } while(item.id() == HTMLTokenId.CHARACTER && ts.movePrevious());
205                 
206                 // now item is either HTMLSyntax.VALUE or we're in text, or at BOF
207
if( item.id() != HTMLTokenId.VALUE && item.id() != HTMLTokenId.TEXT ) {
208                     return getNextElement( beginning );
209                 } // else ( for VALUE or TEXT ) fall through
210
}
211             
212             if( item.id() == HTMLTokenId.WS || item.id() == HTMLTokenId.ARGUMENT || // these are possible only in Tags
213
item.id() == HTMLTokenId.OPERATOR || item.id() == HTMLTokenId.VALUE ) { // so find boundary
214
while(ts.movePrevious() && !isTag(item = ts.token()));
215                 return getNextElement( item.offset(hi) ); // TAGC
216
}
217             
218             if( item.id() == HTMLTokenId.TEXT ) {
219                 do {
220                     beginning = ts.offset();
221                 } while ( ts.movePrevious() && (ts.token().id() == HTMLTokenId.TEXT || ts.token().id() == HTMLTokenId.CHARACTER));
222                 
223                 return getNextElement( beginning ); // from start of Commment
224
}
225             
226             if( item.id() == HTMLTokenId.SCRIPT) {
227                 //we have just one big token for script
228
return getNextElement( ts.token().offset(hi));
229             }
230             
231             
232             if( isTag(item)) {
233                 if( item.id() == HTMLTokenId.TAG_OPEN ||
234                         item.id() == HTMLTokenId.TAG_OPEN_SYMBOL) return getNextElement( item.offset(hi) ); // TAGO/ETAGO // NOI18N
235
else {
236                     do {
237                         if(!ts.movePrevious()) {
238                             return getNextElement( item.offset(hi));
239                         }
240                         item = ts.token();
241                     } while( item.id() != HTMLTokenId.TAG_OPEN_SYMBOL);
242                     
243                     return getNextElement( item.offset(hi) ); // TAGC
244
}
245             }
246             
247             if( item.id() == HTMLTokenId.ERROR )
248                 return new SyntaxElement( this, item.offset(hi), getTokenEnd( hi, item ), SyntaxElement.TYPE_ERROR );
249             
250             if( item.id() == HTMLTokenId.BLOCK_COMMENT ) {
251                 while( item.id() == HTMLTokenId.BLOCK_COMMENT && !item.text().toString().startsWith( "<!--" ) && ts.movePrevious()) { // NOI18N
252
item = ts.token();
253                 }
254                 return getNextElement( item.offset(hi)); // from start of Commment
255
}
256             
257             if( item.id() == HTMLTokenId.DECLARATION || item.id() == HTMLTokenId.SGML_COMMENT ) {
258                 while( item.id() != HTMLTokenId.DECLARATION || !item.text().toString().startsWith( "<!" ) && ts.movePrevious()) { // NOI18N
259
item = ts.token();
260                 }
261                 return getNextElement( item.offset(hi) ); // from start of Commment
262
}
263         } finally {
264             ((BaseDocument)doc).readUnlock();
265         }
266         return null;
267     }
268     
269     
270     SyntaxElement getPreviousElement(int offset) throws javax.swing.text.BadLocationException JavaDoc {
271         return offset == 0 ? null
272                 : getElementChain(offset - 1);
273     }
274     
275     SyntaxElement getNextElement(int offset) throws javax.swing.text.BadLocationException JavaDoc {
276         ((BaseDocument)doc).readLock();
277         try {
278             TokenSequence ts = tokenSequence(hi, offset);
279             if(ts == null) {
280                 return null;
281             }
282             
283             ts.move(offset);
284             if (!ts.moveNext())
285                 return null;
286             org.netbeans.api.lexer.Token item = ts.token();
287             int lastOffset = getTokenEnd(hi, item);
288             
289             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.BLOCK_COMMENT) {
290                 do {
291                     lastOffset = getTokenEnd(hi, ts.token());
292                 } while (ts.token().id() ==
293                         org.netbeans.api.html.lexer.HTMLTokenId.BLOCK_COMMENT &&
294                         ts.moveNext());
295                 return new SyntaxElement(this, offset, lastOffset,
296                         SyntaxElement.TYPE_COMMENT);
297             }
298             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.DECLARATION) {
299                 java.lang.StringBuffer JavaDoc sb = new java.lang.StringBuffer JavaDoc(item.text());
300                 
301                 while (item.id() ==
302                         org.netbeans.api.html.lexer.HTMLTokenId.DECLARATION ||
303                         item.id() ==
304                         org.netbeans.api.html.lexer.HTMLTokenId.SGML_COMMENT) {
305                     lastOffset = getTokenEnd(hi, item);
306                     if (!ts.moveNext()) {
307                         break;
308                     }
309                     item = ts.token();
310                     if (item.id() ==
311                             org.netbeans.api.html.lexer.HTMLTokenId.DECLARATION)
312                         sb.append(item.text().toString());
313                 }
314                 java.lang.String JavaDoc image = sb.toString();
315                 
316                 if (!image.startsWith("<!DOCTYPE"))
317                     return new org.netbeans.editor.ext.html.parser.SyntaxElement.Declaration(this,
318                             offset,
319                             lastOffset,
320                             null,
321                             null,
322                             null);
323                 image = image.substring(9).trim();
324                 int index = image.indexOf(' ');
325                 
326                 if (index < 0)
327                     return new org.netbeans.editor.ext.html.parser.SyntaxElement.Declaration(this,
328                             offset,
329                             lastOffset,
330                             null,
331                             null,
332                             null);
333                 java.lang.String JavaDoc rootElem = image.substring(0, index);
334                 
335                 image = image.substring(index).trim();
336                 if (image.startsWith("PUBLIC")) {
337                     image = image.substring(6).trim();
338                     sb = new java.lang.StringBuffer JavaDoc(image);
339                     java.lang.String JavaDoc pi = getQuotedString(sb);
340                     
341                     if (pi != null) {
342                         java.lang.String JavaDoc si = getQuotedString(sb);
343                         
344                         return new org.netbeans.editor.ext.html.parser.SyntaxElement.Declaration(this,
345                                 offset,
346                                 lastOffset,
347                                 rootElem,
348                                 pi,
349                                 si);
350                     }
351                 } else if (image.startsWith("SYSTEM")) {
352                     image = image.substring(6).trim();
353                     sb = new java.lang.StringBuffer JavaDoc(image);
354                     java.lang.String JavaDoc si = getQuotedString(sb);
355                     
356                     if (si != null) {
357                         return new org.netbeans.editor.ext.html.parser.SyntaxElement.Declaration(this,
358                                 offset,
359                                 lastOffset,
360                                 rootElem,
361                                 null,
362                                 si);
363                     }
364                 }
365                 return new org.netbeans.editor.ext.html.parser.SyntaxElement.Declaration(this,
366                         offset,
367                         lastOffset,
368                         null,
369                         null,
370                         null);
371             }
372             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.ERROR)
373                 return new SyntaxElement(this, item.offset(hi), lastOffset,
374                         SyntaxElement.TYPE_ERROR);
375             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TEXT ||
376                     item.id() == org.netbeans.api.html.lexer.HTMLTokenId.CHARACTER) {
377                 do {
378                     lastOffset = getTokenEnd(hi, item);
379                     item = ts.token();
380                 } while (ts.moveNext() &&
381                         (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TEXT ||
382                         item.id() ==
383                         org.netbeans.api.html.lexer.HTMLTokenId.CHARACTER));
384                 return new SyntaxElement(this, offset, lastOffset,
385                         SyntaxElement.TYPE_TEXT);
386             }
387             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.SCRIPT) {
388                 return new SyntaxElement(this, offset, getTokenEnd(hi, item),
389                         SyntaxElement.TYPE_SCRIPT);
390             }
391             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TAG_CLOSE || (item.id() ==
392                     org.netbeans.api.html.lexer.HTMLTokenId.TAG_OPEN_SYMBOL &&
393                     item.text().toString().equals("</"))) {
394                 java.lang.String JavaDoc name = item.text().toString();
395                 
396                 if (item.id() ==
397                         org.netbeans.api.html.lexer.HTMLTokenId.TAG_OPEN_SYMBOL) {
398                     ts.moveNext();
399                     name = ts.token().text().toString();
400                 }
401                 ts.moveNext();
402                 item = ts.token();
403                 do {
404                     item = ts.token();
405                     lastOffset = getTokenEnd(hi, item);
406                 } while (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.WS &&
407                         ts.moveNext());
408                 if (item.id() ==
409                         org.netbeans.api.html.lexer.HTMLTokenId.TAG_CLOSE_SYMBOL) {
410                     return new org.netbeans.editor.ext.html.parser.SyntaxElement.Named(this,
411                             offset,
412                             getTokenEnd(hi,
413                             item),
414                             SyntaxElement.TYPE_ENDTAG,
415                             name);
416                 } else {
417                     return new org.netbeans.editor.ext.html.parser.SyntaxElement.Named(this,
418                             offset,
419                             lastOffset,
420                             SyntaxElement.TYPE_ENDTAG,
421                             name);
422                 }
423             }
424             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TAG_OPEN ||
425                     (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TAG_OPEN_SYMBOL &&
426                     !item.text().toString().equals("</"))) {
427                 java.lang.String JavaDoc name = item.text().toString();
428                 ArrayList JavaDoc<SyntaxElement.TagAttribute> attrs = new ArrayList JavaDoc<SyntaxElement.TagAttribute>();
429                 
430                 if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TAG_OPEN_SYMBOL) {
431                     ts.moveNext();
432                     name = ts.token().text().toString();
433                 }
434                 ts.moveNext();
435                 item = ts.token();
436                 
437                 //find tag attributes
438
Token attrNameToken = null;
439                 do {
440                     item = ts.token();
441                     if (item.id() == HTMLTokenId.ARGUMENT) {
442                         //attribute name
443
attrNameToken = item;
444                     } else if (item.id() == HTMLTokenId.VALUE && attrNameToken != null) {
445                         //found attribute value after attribute name
446
SyntaxElement.TagAttribute tagAttr =
447                                 new SyntaxElement.TagAttribute(attrNameToken.text().toString(),
448                                 item.text().toString(),
449                                 attrNameToken.offset(hi),
450                                 item.offset(hi));
451                         attrs.add(tagAttr);
452                         attrNameToken = null;
453                     }
454                     lastOffset = getTokenEnd(hi, item);
455                 } while ((item.id() == org.netbeans.api.html.lexer.HTMLTokenId.WS ||
456                         item.id() == org.netbeans.api.html.lexer.HTMLTokenId.ARGUMENT ||
457                         item.id() == org.netbeans.api.html.lexer.HTMLTokenId.OPERATOR ||
458                         item.id() == org.netbeans.api.html.lexer.HTMLTokenId.VALUE ||
459                         item.id() == org.netbeans.api.html.lexer.HTMLTokenId.CHARACTER) &&
460                         ts.moveNext());
461                 
462                 if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TAG_CLOSE_SYMBOL) {
463                     return new org.netbeans.editor.ext.html.parser.SyntaxElement.Tag(this,
464                             offset,
465                             getTokenEnd(hi,
466                             item),
467                             name,
468                             attrs,
469                             item.text().toString().equals("/>"));
470                 } else {
471                     return new org.netbeans.editor.ext.html.parser.SyntaxElement.Tag(this,
472                             offset,
473                             lastOffset,
474                             name,
475                             attrs);
476                 }
477             }
478             
479         } finally {
480             ((BaseDocument)doc).readUnlock();
481         }
482         return null;
483     }
484     
485     
486     public static boolean isTag(Token t) {
487         return (( t.id() == HTMLTokenId.TAG_OPEN ) ||
488                 ( t.id() == HTMLTokenId.TAG_CLOSE ) ||
489                 ( t.id() == HTMLTokenId.TAG_OPEN_SYMBOL) ||
490                 ( t.id() == HTMLTokenId.TAG_CLOSE_SYMBOL));
491     }
492     
493     public static boolean isTagButNotSymbol(Token t) {
494         return (( t.id() == HTMLTokenId.TAG_OPEN) ||
495                 ( t.id() == HTMLTokenId.TAG_CLOSE));
496     }
497     
498     
499     private static int getTokenEnd( TokenHierarchy thi, Token item ) {
500         return item.offset(thi) + item.text().length();
501     }
502     
503     /**
504      * Beware, changes data
505      */

506     private static String JavaDoc getQuotedString( StringBuffer JavaDoc data ) {
507         int startIndex = 0;
508         if (data == null || data.length() == 0) return null;
509         while( data.charAt( startIndex ) == ' ' ) startIndex++;
510         
511         char stopMark = data.charAt( startIndex++ );
512         if( stopMark == '"' || stopMark == '\'' ) {
513             for( int index = startIndex; index < data.length(); index++ )
514                 if( data.charAt( index ) == stopMark ) {
515                     String JavaDoc quoted = data.substring( startIndex, index );
516                     data.delete( 0, index + 1 );
517                     return quoted;
518                 }
519         }
520         
521         return null;
522     }
523     
524     private static TokenSequence tokenSequence(TokenHierarchy hi, int offset) {
525         TokenSequence ts = hi.tokenSequence(HTMLTokenId.language());
526         if(ts == null) {
527             //HTML language is not top level one
528
ts = hi.tokenSequence();
529             ts.move(offset);
530             if(!ts.moveNext() && !ts.movePrevious()) {
531                 return null; //no token found
532
} else {
533                 ts = ts.embedded(HTMLTokenId.language());
534             }
535         }
536         return ts;
537     }
538     
539 }
540
Popular Tags