SyntaxParser


1   /*
2    * The contents of this file are subject to the terms of the Common Development
3    * and Distribution License (the License). You may not use this file except in
4    * compliance with the License.
5    *
6    * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7    * or http://www.netbeans.org/cddl.txt.
8    *
9    * When distributing Covered Code, include this CDDL Header Notice in each file
10   * and include the License file at http://www.netbeans.org/cddl.txt.
11   * If applicable, add the following below the CDDL Header, with the fields
12   * enclosed by brackets [] replaced by your own identifying information:
13   * "Portions Copyrighted [year] [name of copyright owner]"
14   *
15   * The Original Software is NetBeans. The Initial Developer of the Original
16   * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
17   * Microsystems, Inc. All Rights Reserved.
18   */
19  
20  package org.netbeans.editor.ext.html.parser;
21  
22  import java.util.ArrayList  ;
23  import java.util.Collections  ;
24  import java.util.List  ;
25  import javax.swing.text.BadLocationException  ;
26  import javax.swing.text.Document  ;
27  import org.netbeans.api.html.lexer.HTMLTokenId;
28  import org.netbeans.api.lexer.Token;
29  import org.netbeans.api.lexer.TokenHierarchy;
30  import org.netbeans.api.lexer.TokenHierarchyEvent;
31  import org.netbeans.api.lexer.TokenHierarchyEventType;
32  import org.netbeans.api.lexer.TokenHierarchyListener;
33  import org.netbeans.api.lexer.TokenSequence;
34  import org.netbeans.editor.BaseDocument;
35  import org.openide.util.RequestProcessor;
36  
37  /**
38   * Simple HTML syntax parser.
39   *
40   * @author Marek.Fukala@Sun.com
41   */
42  public final class SyntaxParser {
43      
44      private static final int PARSER_DELAY = 1000; //1 second
45      
46      private final Document   doc;
47      private final TokenHierarchy hi;
48      private final RequestProcessor.Task parserTask;
49      private final ArrayList  <SyntaxParserListener> listeners = new ArrayList  <SyntaxParserListener>();
50      private final Object   parsingState = new Object  ();
51      
52      private final TokenHierarchyListener tokenHierarchyListener = new TokenHierarchyListener() {
53          public void tokenHierarchyChanged(TokenHierarchyEvent evt) {
54              if(evt.type() == TokenHierarchyEventType.MODIFICATION) {
55                  synchronized (parsingState) {
56                      restartParser();
57                  }
58              }
59          }
60      };
61      
62      private ArrayList  <SyntaxElement> parsedElements;
63      
64      private boolean isParsing = false;
65      private boolean isScheduled = false;
66      
67      /** Returns an instance of SyntaxParser for given document.
68       *  The client is supposed to add a SyntaxParserListener to the obtained instance
69       *  to get notification whenever the document changes and is reparsed.
70       */
71      public static synchronized SyntaxParser get(Document   doc) {
72          SyntaxParser parser = (SyntaxParser)doc.getProperty(SyntaxParser.class);
73          if(parser == null) {
74              parser = new SyntaxParser(doc);
75              doc.putProperty(SyntaxParser.class, parser);
76          }
77          return parser;
78      }
79      
80      private SyntaxParser(Document   doc) {
81          this.doc = doc;
82          this.hi = TokenHierarchy.get(doc);
83          
84          parserTask = RequestProcessor.getDefault().create(new Runnable  () {
85              public void run() {
86                  parse();
87              }
88          });
89          
90          //add itself as token hierarchy listener
91          hi.addTokenHierarchyListener(tokenHierarchyListener);
92          
93          parsedElements = null; //null states the data are not available yet
94      }
95      
96      //---------------------------- public methods -------------------------------
97      
98      /** Adds a new SyntaxParserListener and starts parsing if fresh data not available, otherwise synchronously
99       * notifies the added SyntaxParserListener that parsed data are available.*/
100     public void addSyntaxParserListener(SyntaxParserListener spl) {
101         listeners.add(spl);
102         
103         synchronized (parsingState) {
104             if(isParsing || isScheduled) return ; //we are either parsing or waiting for parser to start - will parse and fire event then
105             
106             if(parsedElements == null) {
107                 //we need to run the parser
108                 restartParser();
109             } else {
110                 //data actual no need to reparse - just synchronously return parsed data
111                 spl.parsingFinished(createParseResult());
112             }
113         }
114     }
115     
116     /** Removes the SyntaxParserListener from the listeners list.*/
117     public void removeSyntaxParserListener(SyntaxParserListener spl) {
118         listeners.remove(spl);
119     }
120     
121     //---------------------------- private methods -------------------------------
122     
123     private void restartParser() {
124         if(!parserTask.isFinished()) {
125             parserTask.cancel();
126         }
127         parserTask.schedule(PARSER_DELAY);
128         isScheduled = true;
129     }
130     
131     private void parse() {
132         synchronized (parsingState) {
133             isParsing = true;
134             isScheduled = false;
135         }
136         
137         reallyParse();
138         
139         synchronized (parsingState) {
140             isParsing = false;
141         }
142         
143         notifyParsingFinished();
144     }
145     
146     private void reallyParse() {
147         parsedElements = new ArrayList  <SyntaxElement>();
148         try {
149             SyntaxElement sel = getElementChain(0);
150             while (sel != null) {
151                 parsedElements.add(sel);
152                 sel = sel.getNext();
153             }
154             
155         }catch(BadLocationException   ble) {
156             ble.printStackTrace();;
157         }
158     }
159     
160     private void notifyParsingFinished() {
161         if(!parsedElements.isEmpty()) {
162             List  <SyntaxElement> results = createParseResult();
163             for(SyntaxParserListener spl : listeners) {
164                 spl.parsingFinished(results);
165             }
166         }
167     }
168     
169     private List  <SyntaxElement> createParseResult() {
170         //return Collections.
171         return Collections.unmodifiableList(parsedElements);
172     }
173     
174     Document   getDocument() {
175         return doc;
176     }
177     
178     /** Returns SyntaxElement instance for block of tokens, which is either
179      * surrounding given offset, or is just after the offset.
180      *
181      * @param offset offset in document where to search for SyntaxElement
182      * @return SyntaxElement surrounding or laying after the offset
183      * or <CODE>null</CODE> if there is no element there (end of document)
184      */
185     public SyntaxElement getElementChain( int offset ) throws BadLocationException   {
186         ((BaseDocument)doc).readLock();
187         try {
188             TokenSequence ts = tokenSequence(hi, offset);
189             if(ts == null) {
190                 return null;
191             }
192             
193             ts.move(offset);
194             if(!ts.moveNext() && !ts.movePrevious()) return null; //no token found
195             
196             Token item = ts.token();
197             
198             int beginning = ts.offset();
199             
200             if( item.id() == HTMLTokenId.CHARACTER ) {
201                 do {
202                     item = ts.token();
203                     beginning = ts.offset();
204                 } while(item.id() == HTMLTokenId.CHARACTER && ts.movePrevious());
205                 
206                 // now item is either HTMLSyntax.VALUE or we're in text, or at BOF
207                 if( item.id() != HTMLTokenId.VALUE && item.id() != HTMLTokenId.TEXT ) {
208                     return getNextElement(  beginning );
209                 } // else ( for VALUE or TEXT ) fall through
210             }
211             
212             if( item.id() == HTMLTokenId.WS || item.id() == HTMLTokenId.ARGUMENT ||     // these are possible only in Tags
213                     item.id() == HTMLTokenId.OPERATOR || item.id() == HTMLTokenId.VALUE ) { // so find boundary
214                 while(ts.movePrevious() && !isTag(item = ts.token()));
215                 return getNextElement(  item.offset(hi) );       // TAGC
216             }
217             
218             if( item.id() == HTMLTokenId.TEXT ) {
219                 do {
220                     beginning = ts.offset();
221                 } while ( ts.movePrevious() && (ts.token().id() == HTMLTokenId.TEXT || ts.token().id() == HTMLTokenId.CHARACTER));
222                 
223                 return getNextElement(  beginning ); // from start of Commment
224             }
225             
226             if( item.id() == HTMLTokenId.SCRIPT) {
227                 //we have just one big token for script
228                 return getNextElement(  ts.token().offset(hi));
229             }
230             
231             
232             if( isTag(item)) {
233                 if( item.id() == HTMLTokenId.TAG_OPEN ||
234                         item.id() == HTMLTokenId.TAG_OPEN_SYMBOL)  return getNextElement(  item.offset(hi) );  // TAGO/ETAGO // NOI18N
235                 else {
236                     do {
237                         if(!ts.movePrevious()) {
238                             return getNextElement( item.offset(hi));
239                         }
240                         item = ts.token();
241                     } while( item.id() != HTMLTokenId.TAG_OPEN_SYMBOL);
242                     
243                     return getNextElement(  item.offset(hi) );       // TAGC
244                 }
245             }
246             
247             if( item.id() == HTMLTokenId.ERROR )
248                 return new SyntaxElement( this, item.offset(hi), getTokenEnd( hi, item ), SyntaxElement.TYPE_ERROR );
249             
250             if( item.id() == HTMLTokenId.BLOCK_COMMENT ) {
251                 while( item.id() == HTMLTokenId.BLOCK_COMMENT && !item.text().toString().startsWith( "<!--" ) && ts.movePrevious()) { // NOI18N
252                     item = ts.token();
253                 }
254                 return getNextElement(  item.offset(hi)); // from start of Commment
255             }
256             
257             if( item.id() == HTMLTokenId.DECLARATION || item.id() == HTMLTokenId.SGML_COMMENT ) {
258                 while( item.id() != HTMLTokenId.DECLARATION || !item.text().toString().startsWith( "<!" ) && ts.movePrevious()) { // NOI18N
259                     item = ts.token();
260                 }
261                 return getNextElement(  item.offset(hi) ); // from start of Commment
262             }
263         } finally {
264             ((BaseDocument)doc).readUnlock();
265         }
266         return null;
267     }
268     
269     
270     SyntaxElement getPreviousElement(int offset) throws javax.swing.text.BadLocationException   {
271         return offset == 0 ? null
272                 : getElementChain(offset - 1);
273     }
274     
275     SyntaxElement getNextElement(int offset) throws javax.swing.text.BadLocationException   {
276         ((BaseDocument)doc).readLock();
277         try {
278             TokenSequence ts = tokenSequence(hi, offset);
279             if(ts == null) {
280                 return null;
281             }
282             
283             ts.move(offset);
284             if (!ts.moveNext())
285                 return null;
286             org.netbeans.api.lexer.Token item = ts.token();
287             int lastOffset = getTokenEnd(hi, item);
288             
289             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.BLOCK_COMMENT) {
290                 do {
291                     lastOffset = getTokenEnd(hi, ts.token());
292                 } while (ts.token().id() ==
293                         org.netbeans.api.html.lexer.HTMLTokenId.BLOCK_COMMENT &&
294                         ts.moveNext());
295                 return new SyntaxElement(this, offset, lastOffset,
296                         SyntaxElement.TYPE_COMMENT);
297             }
298             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.DECLARATION) {
299                 java.lang.StringBuffer   sb = new java.lang.StringBuffer  (item.text());
300                 
301                 while (item.id() ==
302                         org.netbeans.api.html.lexer.HTMLTokenId.DECLARATION ||
303                         item.id() ==
304                         org.netbeans.api.html.lexer.HTMLTokenId.SGML_COMMENT) {
305                     lastOffset = getTokenEnd(hi, item);
306                     if (!ts.moveNext()) {
307                         break;
308                     }
309                     item = ts.token();
310                     if (item.id() ==
311                             org.netbeans.api.html.lexer.HTMLTokenId.DECLARATION)
312                         sb.append(item.text().toString());
313                 }
314                 java.lang.String   image = sb.toString();
315                 
316                 if (!image.startsWith("<!DOCTYPE"))
317                     return new org.netbeans.editor.ext.html.parser.SyntaxElement.Declaration(this,
318                             offset,
319                             lastOffset,
320                             null,
321                             null,
322                             null);
323                 image = image.substring(9).trim();
324                 int index = image.indexOf(' ');
325                 
326                 if (index < 0)
327                     return new org.netbeans.editor.ext.html.parser.SyntaxElement.Declaration(this,
328                             offset,
329                             lastOffset,
330                             null,
331                             null,
332                             null);
333                 java.lang.String   rootElem = image.substring(0, index);
334                 
335                 image = image.substring(index).trim();
336                 if (image.startsWith("PUBLIC")) {
337                     image = image.substring(6).trim();
338                     sb = new java.lang.StringBuffer  (image);
339                     java.lang.String   pi = getQuotedString(sb);
340                     
341                     if (pi != null) {
342                         java.lang.String   si = getQuotedString(sb);
343                         
344                         return new org.netbeans.editor.ext.html.parser.SyntaxElement.Declaration(this,
345                                 offset,
346                                 lastOffset,
347                                 rootElem,
348                                 pi,
349                                 si);
350                     }
351                 } else if (image.startsWith("SYSTEM")) {
352                     image = image.substring(6).trim();
353                     sb = new java.lang.StringBuffer  (image);
354                     java.lang.String   si = getQuotedString(sb);
355                     
356                     if (si != null) {
357                         return new org.netbeans.editor.ext.html.parser.SyntaxElement.Declaration(this,
358                                 offset,
359                                 lastOffset,
360                                 rootElem,
361                                 null,
362                                 si);
363                     }
364                 }
365                 return new org.netbeans.editor.ext.html.parser.SyntaxElement.Declaration(this,
366                         offset,
367                         lastOffset,
368                         null,
369                         null,
370                         null);
371             }
372             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.ERROR)
373                 return new SyntaxElement(this, item.offset(hi), lastOffset,
374                         SyntaxElement.TYPE_ERROR);
375             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TEXT ||
376                     item.id() == org.netbeans.api.html.lexer.HTMLTokenId.CHARACTER) {
377                 do {
378                     lastOffset = getTokenEnd(hi, item);
379                     item = ts.token();
380                 } while (ts.moveNext() &&
381                         (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TEXT ||
382                         item.id() ==
383                         org.netbeans.api.html.lexer.HTMLTokenId.CHARACTER));
384                 return new SyntaxElement(this, offset, lastOffset,
385                         SyntaxElement.TYPE_TEXT);
386             }
387             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.SCRIPT) {
388                 return new SyntaxElement(this, offset, getTokenEnd(hi, item),
389                         SyntaxElement.TYPE_SCRIPT);
390             }
391             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TAG_CLOSE || (item.id() ==
392                     org.netbeans.api.html.lexer.HTMLTokenId.TAG_OPEN_SYMBOL &&
393                     item.text().toString().equals("</"))) {
394                 java.lang.String   name = item.text().toString();
395                 
396                 if (item.id() ==
397                         org.netbeans.api.html.lexer.HTMLTokenId.TAG_OPEN_SYMBOL) {
398                     ts.moveNext();
399                     name = ts.token().text().toString();
400                 }
401                 ts.moveNext();
402                 item = ts.token();
403                 do {
404                     item = ts.token();
405                     lastOffset = getTokenEnd(hi, item);
406                 } while (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.WS &&
407                         ts.moveNext());
408                 if (item.id() ==
409                         org.netbeans.api.html.lexer.HTMLTokenId.TAG_CLOSE_SYMBOL) {
410                     return new org.netbeans.editor.ext.html.parser.SyntaxElement.Named(this,
411                             offset,
412                             getTokenEnd(hi,
413                             item),
414                             SyntaxElement.TYPE_ENDTAG,
415                             name);
416                 } else {
417                     return new org.netbeans.editor.ext.html.parser.SyntaxElement.Named(this,
418                             offset,
419                             lastOffset,
420                             SyntaxElement.TYPE_ENDTAG,
421                             name);
422                 }
423             }
424             if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TAG_OPEN ||
425                     (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TAG_OPEN_SYMBOL &&
426                     !item.text().toString().equals("</"))) {
427                 java.lang.String   name = item.text().toString();
428                 ArrayList  <SyntaxElement.TagAttribute> attrs = new ArrayList  <SyntaxElement.TagAttribute>();
429                 
430                 if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TAG_OPEN_SYMBOL) {
431                     ts.moveNext();
432                     name = ts.token().text().toString();
433                 }
434                 ts.moveNext();
435                 item = ts.token();
436                 
437                 //find tag attributes
438                 Token attrNameToken = null;
439                 do {
440                     item = ts.token();
441                     if (item.id() == HTMLTokenId.ARGUMENT) {
442                         //attribute name
443                         attrNameToken = item;
444                     } else if (item.id() == HTMLTokenId.VALUE && attrNameToken != null) {
445                         //found attribute value after attribute name
446                         SyntaxElement.TagAttribute tagAttr = 
447                                 new SyntaxElement.TagAttribute(attrNameToken.text().toString(),
448                                 item.text().toString(),
449                                 attrNameToken.offset(hi),
450                                 item.offset(hi));
451                         attrs.add(tagAttr);
452                         attrNameToken = null;
453                     }
454                     lastOffset = getTokenEnd(hi, item);
455                 } while ((item.id() == org.netbeans.api.html.lexer.HTMLTokenId.WS ||
456                         item.id() == org.netbeans.api.html.lexer.HTMLTokenId.ARGUMENT ||
457                         item.id() == org.netbeans.api.html.lexer.HTMLTokenId.OPERATOR ||
458                         item.id() == org.netbeans.api.html.lexer.HTMLTokenId.VALUE ||
459                         item.id() == org.netbeans.api.html.lexer.HTMLTokenId.CHARACTER) &&
460                         ts.moveNext());
461                 
462                 if (item.id() == org.netbeans.api.html.lexer.HTMLTokenId.TAG_CLOSE_SYMBOL) {
463                     return new org.netbeans.editor.ext.html.parser.SyntaxElement.Tag(this,
464                             offset,
465                             getTokenEnd(hi,
466                             item),
467                             name,
468                             attrs,
469                             item.text().toString().equals("/>"));
470                 } else {
471                     return new org.netbeans.editor.ext.html.parser.SyntaxElement.Tag(this,
472                             offset,
473                             lastOffset,
474                             name,
475                             attrs);
476                 }
477             }
478             
479         } finally {
480             ((BaseDocument)doc).readUnlock();
481         }
482         return null;
483     }
484     
485     
486     public static boolean isTag(Token t) {
487         return (( t.id() == HTMLTokenId.TAG_OPEN ) ||
488                 ( t.id() == HTMLTokenId.TAG_CLOSE ) ||
489                 ( t.id() == HTMLTokenId.TAG_OPEN_SYMBOL) ||
490                 ( t.id() == HTMLTokenId.TAG_CLOSE_SYMBOL));
491     }
492     
493     public static boolean isTagButNotSymbol(Token t) {
494         return (( t.id() == HTMLTokenId.TAG_OPEN) ||
495                 ( t.id() == HTMLTokenId.TAG_CLOSE));
496     }
497     
498     
499     private static int getTokenEnd( TokenHierarchy thi, Token item ) {
500         return item.offset(thi) + item.text().length();
501     }
502     
503     /**
504      * Beware, changes data
505      */
506     private static String   getQuotedString( StringBuffer   data ) {
507         int startIndex = 0;
508         if (data == null || data.length() == 0) return null;
509         while( data.charAt( startIndex ) == ' ' ) startIndex++;
510         
511         char stopMark = data.charAt( startIndex++ );
512         if( stopMark == '"' || stopMark == '\'' ) {
513             for( int index = startIndex; index < data.length(); index++ )
514                 if( data.charAt( index ) == stopMark ) {
515                     String   quoted = data.substring( startIndex, index );
516                     data.delete( 0, index + 1 );
517                     return quoted;
518                 }
519         }
520         
521         return null;
522     }
523     
524     private static TokenSequence tokenSequence(TokenHierarchy hi, int offset) {
525         TokenSequence ts = hi.tokenSequence(HTMLTokenId.language());
526         if(ts == null) {
527             //HTML language is not top level one
528             ts = hi.tokenSequence();
529             ts.move(offset);
530             if(!ts.moveNext() && !ts.movePrevious()) {
531                 return null; //no token found
532             } else {
533                 ts = ts.embedded(HTMLTokenId.language());
534             }
535         }
536         return ts;
537     }
538     
539 }
540
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags