KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > opensymphony > module > sitemesh > parser > FastPageParser


1 /*
2  * Title: FastPageParser
3  * Description:
4  *
5  * This software is published under the terms of the OpenSymphony Software
6  * License version 1.1, of which a copy has been included with this
7  * distribution in the LICENSE.txt file.
8  */

9
10 package com.opensymphony.module.sitemesh.parser;
11
12 import com.opensymphony.module.sitemesh.Page;
13 import com.opensymphony.module.sitemesh.PageParser;
14 import com.opensymphony.module.sitemesh.html.util.CharArray;
15 import com.opensymphony.module.sitemesh.util.CharArrayReader;
16
17 import java.io.IOException JavaDoc;
18 import java.io.Reader JavaDoc;
19 import java.util.Collections JavaDoc;
20 import java.util.HashMap JavaDoc;
21 import java.util.Map JavaDoc;
22
23 /**
24  * Very fast PageParser implementation for parsing HTML.
25  *
26  * <p>Produces FastPage.</p>
27  *
28  * @author <a HREF="mailto:salaman@qoretech.com">Victor Salaman</a>
29  * @version $Revision: 1.12 $
30  */

31 public final class FastPageParser implements PageParser
32 {
33    private static final int TOKEN_NONE = -0;
34    private static final int TOKEN_EOF = -1;
35    private static final int TOKEN_TEXT = -2;
36    private static final int TOKEN_TAG = -3;
37    private static final int TOKEN_COMMENT = -4;
38    private static final int TOKEN_CDATA = -5;
39    private static final int TOKEN_SCRIPT = -6;
40    private static final int TOKEN_DOCTYPE = -7;
41    private static final int TOKEN_EMPTYTAG = -8;
42
43    private static final int STATE_EOF = -1;
44    private static final int STATE_TEXT = -2;
45    private static final int STATE_TAG = -3;
46    private static final int STATE_COMMENT = -4;
47    private static final int STATE_TAG_QUOTE = -5;
48    private static final int STATE_CDATA = -6;
49    private static final int STATE_SCRIPT = -7;
50    private static final int STATE_DOCTYPE = -8;
51
52    private static final int TAG_STATE_NONE = 0;
53    private static final int TAG_STATE_HTML = -1;
54    private static final int TAG_STATE_HEAD = -2;
55    private static final int TAG_STATE_TITLE = -3;
56    private static final int TAG_STATE_BODY = -4;
57    private static final int TAG_STATE_XML = -6;
58    private static final int TAG_STATE_XMP = -7;
59
60    // These hashcodes are hardcoded because swtich statements can only
61
// switch on compile-time constants.
62
// In theory it is possible for there to be a hashcode collision with
63
// other HTML tags, however in practice it is *very* unlikely because
64
// tags are generally only a few characters long and hence are likely
65
// to produce unique values.
66

67    private static final int SLASH_XML_HASH = 1518984; // "/xml".hashCode();
68
private static final int XML_HASH = 118807; // "xml".hashCode();
69
private static final int SLASH_XMP_HASH = 1518988; // "/xmp".hashCode();
70
private static final int XMP_HASH = 118811; // "xmp".hashCode();
71
private static final int HTML_HASH = 3213227; // "html".hashCode();
72
private static final int SLASH_HTML_HASH = 46618714; // "/html".hashCode();
73
private static final int HEAD_HASH = 3198432; // "head".hashCode();
74
private static final int TITLE_HASH = 110371416; // "title".hashCode();
75
private static final int SLASH_TITLE_HASH = 1455941513; // "/title".hashCode();
76
private static final int PARAMETER_HASH = 1954460585; // "parameter".hashCode();
77
private static final int META_HASH = 3347973; // "meta".hashCode();
78
private static final int SLASH_HEAD_HASH = 46603919; // "/head".hashCode();
79
private static final int FRAMESET_HASH = -1644953643; // "frameset".hashCode();
80
private static final int FRAME_HASH = 97692013; // "frame".hashCode();
81
private static final int BODY_HASH = 3029410; // "body".hashCode();
82
private static final int SLASH_BODY_HASH = 46434897; // "/body".hashCode();
83
private static final int CONTENT_HASH = 951530617; // "content".hashCode();
84

85    public Page parse(char[] data) throws IOException JavaDoc
86    {
87       FastPage page = internalParse(new CharArrayReader(data));
88       page.setVerbatimPage(data);
89       return page;
90    }
91
92    public Page parse(Reader reader)
93    {
94       return internalParse(reader);
95    }
96
97    private FastPage internalParse(Reader reader)
98    {
99       CharArray _buffer = new CharArray(4096);
100       CharArray _body = new CharArray(4096);
101       CharArray _head = new CharArray(512);
102       CharArray _title = new CharArray(128);
103       Map JavaDoc _htmlProperties = null;
104       Map JavaDoc _metaProperties = new HashMap JavaDoc(6);
105       Map JavaDoc _sitemeshProperties = new HashMap JavaDoc(6);
106       Map JavaDoc _bodyProperties = null;
107
108       CharArray _currentTaggedContent = new CharArray(1024);
109       String JavaDoc _contentTagId = null;
110       boolean tagged = false;
111
112       boolean _frameSet = false;
113
114       int _state = STATE_TEXT;
115       int _tokenType = TOKEN_NONE;
116       int _pushBack = 0;
117       int _comment = 0;
118       int _quote = 0;
119       boolean hide = false;
120
121       int state = TAG_STATE_NONE;
122       int laststate = TAG_STATE_NONE;
123       boolean doneTitle = false;
124
125       // This tag object gets reused each iteration.
126
Tag tagObject = new Tag();
127
128       while (_tokenType != TOKEN_EOF)
129       {
130          if(tagged)
131          {
132             if(_tokenType == TOKEN_TAG || _tokenType == TOKEN_EMPTYTAG)
133             {
134                if(_buffer==null || _buffer.length()==0)
135                {
136                   _tokenType=TOKEN_NONE;
137                   continue;
138                }
139
140                if (parseTag(tagObject, _buffer) == null) continue;
141
142                if (_buffer.compareLowerSubstr("/content")) // Note that the '/' survives the | 32 operation
143
{
144                   tagged = false;
145                   if(_contentTagId != null)
146                   {
147                      state = TAG_STATE_NONE;
148                      _sitemeshProperties.put(_contentTagId, _currentTaggedContent.toString());
149                      _currentTaggedContent.setLength(0);
150                      _contentTagId = null;
151                   }
152                }
153                else
154                {
155                   _currentTaggedContent.append('<').append(_buffer).append('>');
156                }
157             }
158             else
159             {
160                if(_buffer.length() > 0) _currentTaggedContent.append(_buffer);
161             }
162          }
163          else
164          {
165             if(_tokenType == TOKEN_TAG || _tokenType == TOKEN_EMPTYTAG)
166             {
167                if(_buffer==null || _buffer.length()==0)
168                {
169                   _tokenType=TOKEN_NONE;
170                   continue;
171                }
172
173                if(parseTag(tagObject, _buffer) == null) {
174                   _tokenType=TOKEN_TEXT;
175                   continue;
176                }
177
178                int tagHash = _buffer.substrHashCode();
179
180                if(state == TAG_STATE_XML || state == TAG_STATE_XMP)
181                {
182                   writeTag(state, laststate, hide, _head, _buffer, _body);
183                   if( (state == TAG_STATE_XML && tagHash == SLASH_XML_HASH)
184                     ||(state == TAG_STATE_XMP && tagHash == SLASH_XMP_HASH) )
185                   {
186                      state = laststate;
187                   }
188                }
189                else
190                {
191                   boolean doDefault = false;
192                   switch (tagHash) {
193                      case HTML_HASH:
194                         if (!_buffer.compareLowerSubstr("html")) { // skip any accidental hash collisions
195
doDefault = true;
196                            break;
197                         }
198                         state = TAG_STATE_HTML;
199                         _htmlProperties = parseProperties(tagObject, _buffer).properties;
200                         break;
201                      case HEAD_HASH:
202                         if (!_buffer.compareLowerSubstr("head")) { // skip any accidental hash collisions
203
doDefault = true;
204                               break;
205                         }
206                         state = TAG_STATE_HEAD;
207                         break;
208                      case XML_HASH:
209                         if (!_buffer.compareLowerSubstr("xml")) { // skip any accidental hash collisions
210
doDefault = true;
211                            break;
212                         }
213                         laststate = state;
214                         writeTag(state, laststate, hide, _head, _buffer, _body);
215                         state = TAG_STATE_XML;
216                         break;
217                      case XMP_HASH:
218                         if (!_buffer.compareLowerSubstr("xmp")) { // skip any accidental hash collisions
219
doDefault = true;
220                            break;
221                         }
222                         laststate = state;
223                         writeTag(state, laststate, hide, _head, _buffer, _body);
224                         state = TAG_STATE_XMP;
225                         break;
226                      case TITLE_HASH:
227                         if (!_buffer.compareLowerSubstr("title")) { // skip any accidental hash collisions
228
doDefault = true;
229                            break;
230                         }
231                         if (doneTitle)
232                         {
233                            hide = true;
234                         }
235                         else
236                         {
237                            laststate = state;
238                            state = TAG_STATE_TITLE;
239                         }
240                         break;
241                      case SLASH_TITLE_HASH:
242                         if (!_buffer.compareLowerSubstr("/title")) { // skip any accidental hash collisions
243
doDefault = true;
244                            break;
245                         }
246                         if (doneTitle)
247                         {
248                            hide = false;
249                         }
250                         else
251                         {
252                            doneTitle = true;
253                            state = laststate;
254                         }
255                         break;
256                      case PARAMETER_HASH:
257                         if (!_buffer.compareLowerSubstr("parameter")) { // skip any accidental hash collisions
258
doDefault = true;
259                            break;
260                         }
261                         parseProperties(tagObject, _buffer);
262                         String JavaDoc name = (String JavaDoc) tagObject.properties.get("name");
263                         String JavaDoc value = (String JavaDoc) tagObject.properties.get("value");
264
265                         if (name != null && value != null)
266                         {
267                            _sitemeshProperties.put(name, value);
268                         }
269                         break;
270                      case META_HASH:
271                         if (!_buffer.compareLowerSubstr("meta")) { // skip any accidental hash collisions
272
doDefault = true;
273                            break;
274                         }
275                         CharArray metaDestination = state == TAG_STATE_HEAD ? _head : _body;
276                         metaDestination.append('<');
277                         metaDestination.append(_buffer);
278                         metaDestination.append('>');
279                         parseProperties(tagObject, _buffer);
280                         name = (String JavaDoc) tagObject.properties.get("name");
281                         value = (String JavaDoc) tagObject.properties.get("content");
282
283                         if (name == null)
284                         {
285                            String JavaDoc httpEquiv = (String JavaDoc) tagObject.properties.get("http-equiv");
286
287                            if (httpEquiv != null)
288                            {
289                               name = "http-equiv." + httpEquiv;
290                            }
291                         }
292
293                         if (name != null && value != null)
294                         {
295                            _metaProperties.put(name, value);
296                         }
297                         break;
298                      case SLASH_HEAD_HASH:
299                         if (!_buffer.compareLowerSubstr("/head")) { // skip any accidental hash collisions
300
doDefault = true;
301                            break;
302                         }
303                         state = TAG_STATE_HTML;
304                         break;
305                      case FRAME_HASH:
306                         if (!_buffer.compareLowerSubstr("frame")) { // skip any accidental hash collisions
307
doDefault = true;
308                            break;
309                         }
310                         _frameSet = true;
311                         break;
312                      case FRAMESET_HASH:
313                         if (!_buffer.compareLowerSubstr("frameset")) { // skip any accidental hash collisions
314
doDefault = true;
315                            break;
316                         }
317                         _frameSet = true;
318                         break;
319                      case BODY_HASH:
320                         if (!_buffer.compareLowerSubstr("body")) { // skip any accidental hash collisions
321
doDefault = true;
322                            break;
323                         }
324                         if (_tokenType == TOKEN_EMPTYTAG)
325                         {
326                            state = TAG_STATE_BODY;
327                         }
328                         _bodyProperties = parseProperties(tagObject, _buffer).properties;
329                         break;
330                      case CONTENT_HASH:
331                         if (!_buffer.compareLowerSubstr("content")) { // skip any accidental hash collisions
332
doDefault = true;
333                            break;
334                         }
335                         state = TAG_STATE_NONE;
336                         Map JavaDoc props = parseProperties(tagObject, _buffer).properties;
337                         if (props != null)
338                         {
339                            tagged = true;
340                            _contentTagId = (String JavaDoc) props.get("tag");
341                         }
342                         break;
343                      case SLASH_XMP_HASH:
344                         if (!_buffer.compareLowerSubstr("/xmp")) { // skip any accidental hash collisions
345
doDefault = true;
346                            break;
347                         }
348                         hide = false;
349                         break;
350                      case SLASH_BODY_HASH:
351                         if (!_buffer.compareLowerSubstr("/body")) { // skip any accidental hash collisions
352
doDefault = true;
353                            break;
354                         }
355                         state = TAG_STATE_NONE;
356                         hide = true;
357                         break;
358                      case SLASH_HTML_HASH:
359                         if (!_buffer.compareLowerSubstr("/html")) { // skip any accidental hash collisions
360
doDefault = true;
361                            break;
362                         }
363                         state = TAG_STATE_NONE;
364                         hide = true;
365                         break;
366                      default:
367                         doDefault = true;
368                   }
369                   if (doDefault)
370                      writeTag(state, laststate, hide, _head, _buffer, _body);
371                }
372             }
373             else if (!hide)
374             {
375                if (_tokenType == TOKEN_TEXT)
376                {
377                   if (state == TAG_STATE_TITLE)
378                   {
379                      _title.append(_buffer);
380                   }
381                   else if (shouldWriteToHead(state, laststate))
382                   {
383                      _head.append(_buffer);
384                   }
385                   else
386                   {
387                      _body.append(_buffer);
388                   }
389                }
390                else if (_tokenType == TOKEN_COMMENT)
391                {
392                   final CharArray commentDestination = shouldWriteToHead(state, laststate) ? _head : _body;
393                   commentDestination.append("<!--");
394                   commentDestination.append(_buffer);
395                   commentDestination.append("-->");
396                }
397                else if (_tokenType == TOKEN_CDATA)
398                {
399                   final CharArray commentDestination = state == TAG_STATE_HEAD ? _head : _body;
400                   commentDestination.append("<![CDATA[");
401                   commentDestination.append(_buffer);
402                   commentDestination.append("]]>");
403                }
404                else if (_tokenType == TOKEN_SCRIPT)
405                {
406                   final CharArray commentDestination = state == TAG_STATE_HEAD ? _head : _body;
407                   commentDestination.append('<');
408                   commentDestination.append(_buffer);
409                }
410             }
411          }
412          _buffer.setLength(0);
413
414          start:
415          while (true)
416          {
417             int c;
418
419             if(_pushBack != 0)
420             {
421                c = _pushBack;
422                _pushBack = 0;
423             }
424             else
425             {
426                try
427                {
428                   c = reader.read();
429                }
430                catch(IOException JavaDoc e)
431                {
432                   _tokenType = TOKEN_EOF;
433                   break start;
434                }
435             }
436
437             if(c < 0)
438             {
439                int tmpstate = _state;
440                _state = STATE_EOF;
441
442                if(_buffer.length() > 0 && tmpstate == STATE_TEXT)
443                {
444                   _tokenType = TOKEN_TEXT;
445                   break start;
446                }
447                else
448                {
449                   _tokenType = TOKEN_EOF;
450                   break start;
451                }
452             }
453
454             switch(_state)
455             {
456                case STATE_TAG:
457                {
458                   int buflen = _buffer.length();
459
460                   if(c == '>')
461                   {
462                      if (_buffer.length() > 1 && _buffer.charAt(_buffer.length() - 1) == '/')
463                      {
464                         _tokenType = TOKEN_EMPTYTAG;
465                      }
466                      else
467                      {
468                         _tokenType = TOKEN_TAG;
469                      }
470                      _state = STATE_TEXT;
471                      break start;
472                   }
473                   else if(c == '/')
474                   {
475                      _buffer.append('/');
476                   }
477                   else if(c == '<' && buflen == 0)
478                   {
479                      _buffer.append("<<");
480                      _state = STATE_TEXT;
481                   }
482                   else if(c == '-' && buflen == 2 && _buffer.charAt(1) == '-' && _buffer.charAt(0) == '!')
483                   {
484                      _buffer.setLength(0);
485                      _state = STATE_COMMENT;
486                   }
487                   else if(c == '[' && buflen == 7 && _buffer.charAt(0) == '!' && _buffer.charAt(1) == '[' && _buffer.compareLower("cdata", 2))
488                   {
489                      _buffer.setLength(0);
490                      _state = STATE_CDATA;
491                   }
492                   else if((c == 'e' || c == 'E') && buflen == 7 && _buffer.charAt(0) == '!' && _buffer.compareLower("doctyp", 1))
493                   {
494                      _buffer.append((char)c);
495                      _state = STATE_DOCTYPE;
496                   }
497                   else if((c == 'T' || c == 't') && buflen == 5 && _buffer.compareLower("scrip", 0))
498                   {
499                      _buffer.append((char)c);
500                      _state = STATE_SCRIPT;
501                   }
502
503                   else if(c == '"' || c == '\'')
504                   {
505                      _quote = c;
506                      _buffer.append(( char ) c);
507                      _state = STATE_TAG_QUOTE;
508                   }
509                   else
510                   {
511                      _buffer.append(( char ) c);
512                   }
513                }
514                break;
515
516                case STATE_TEXT:
517                {
518                   if(c == '<')
519                   {
520                      _state = STATE_TAG;
521                      if(_buffer.length() > 0)
522                      {
523                         _tokenType = TOKEN_TEXT;
524                         break start;
525                      }
526                   }
527                   else
528                   {
529                      _buffer.append(( char ) c);
530                   }
531                }
532                break;
533
534                case STATE_TAG_QUOTE:
535                {
536                   if(c == '>')
537                   {
538                      _pushBack = c;
539                      _state = STATE_TAG;
540                   }
541                   else
542                   {
543                      _buffer.append(( char ) c);
544                      if(c == _quote)
545                      {
546                         _state = STATE_TAG;
547                      }
548                   }
549                }
550                break;
551
552                case STATE_COMMENT:
553                {
554                   if(c == '>' && _comment >= 2)
555                   {
556                      _buffer.setLength(_buffer.length() - 2);
557                      _comment = 0;
558                      _state = STATE_TEXT;
559                      _tokenType = TOKEN_COMMENT;
560                      break start;
561                   }
562                   else if(c == '-')
563                   {
564                      _comment++;
565                   }
566                   else
567                   {
568                      _comment = 0;
569                   }
570
571                   _buffer.append(( char ) c);
572                }
573                break;
574
575                case STATE_CDATA:
576                {
577                   if(c == '>' && _comment >= 2)
578                   {
579                      _buffer.setLength(_buffer.length() - 2);
580                      _comment = 0;
581                      _state = STATE_TEXT;
582                      _tokenType = TOKEN_CDATA;
583                      break start;
584                   }
585                   else if(c == ']')
586                   {
587                      _comment++;
588                   }
589                   else
590                   {
591                      _comment = 0;
592                   }
593
594                   _buffer.append(( char ) c);
595                }
596                break;
597
598                case STATE_SCRIPT:
599                {
600                   _buffer.append((char) c);
601                   if (c == '<')
602                   {
603                      _comment = 0;
604                   }
605                   else if ((c == '/' && _comment == 0)
606                      ||((c == 's' || c == 'S' ) && _comment == 1)
607                      ||((c == 'c' || c == 'C' ) && _comment == 2)
608                      ||((c == 'r' || c == 'R' ) && _comment == 3)
609                      ||((c == 'i' || c == 'I' ) && _comment == 4)
610                      ||((c == 'p' || c == 'P' ) && _comment == 5)
611                      ||((c == 't' || c == 'T' ) && _comment == 6)
612                   )
613                   {
614                      _comment++;
615                   }
616                   else if(c == '>' && _comment >= 7)
617                   {
618                      _comment = 0;
619                      _state = STATE_TEXT;
620                      _tokenType = TOKEN_SCRIPT;
621                      break start;
622                   }
623                }
624                break;
625
626                case STATE_DOCTYPE:
627                {
628                   _buffer.append((char) c);
629                   if (c == '>')
630                   {
631                      _state = STATE_TEXT;
632                      _tokenType = TOKEN_DOCTYPE;
633                      break start;
634                   }
635                   else {
636                     _comment = 0;
637                   }
638                }
639                break;
640             }
641          }
642       }
643
644       // Help the GC
645
_currentTaggedContent = null;
646       _buffer = null;
647
648       return new FastPage(_sitemeshProperties,
649                           _htmlProperties,
650                           _metaProperties,
651                           _bodyProperties,
652                           _title.toString().trim(),
653                           _head.toString().trim(),
654                           _body.toString().trim(),
655                           _frameSet);
656    }
657
658    private static void writeTag(int state, int laststate, boolean hide, CharArray _head, CharArray _buffer, CharArray _body) {
659       if (!hide)
660       {
661          if (shouldWriteToHead(state, laststate))
662          {
663             _head.append('<').append(_buffer).append('>');
664          }
665          else
666          {
667             _body.append('<').append(_buffer).append('>');
668          }
669       }
670    }
671
672    private static boolean shouldWriteToHead(int state, int laststate)
673    {
674       return state == TAG_STATE_HEAD
675              ||(laststate == TAG_STATE_HEAD && (state == TAG_STATE_XML || state == TAG_STATE_XMP));
676    }
677
678    /**
679     * Populates a {@link Tag} object using data from the supplied {@link CharArray}.
680     *
681     * The supplied tag parameter is reset and reused - this avoids excess object
682     * creation which hwlps performance.
683     *
684     * @return the same tag instance that was passed in, except it will be populated
685     * with a new <tt>name</tt> value (and the corresponding <tt>nameEndIdx</tt> value).
686     * However if the tag contained nathing but whitespace, this method will return
687     * <tt>null</tt>.
688     */

689    private Tag parseTag(Tag tag, CharArray buf)
690    {
691       int len = buf.length();
692       int idx = 0;
693       int begin;
694
695       // Skip over any leading whitespace in the tag
696
while (idx < len && Character.isWhitespace(buf.charAt(idx))) idx++;
697
698       if(idx == len) return null;
699
700       // Find out where the non-whitespace characters end. This will give us the tag name.
701
begin = idx;
702       while (idx < len && !Character.isWhitespace(buf.charAt(idx))) idx++;
703
704       // Mark the tag name as a substring within the buffer. This allows us to perform
705
// a substring comparison against it at a later date
706
buf.setSubstr(begin, buf.charAt(idx - 1) == '/' ? idx - 1 : idx);
707
708       // Remember where the name finishes so we can pull out the properties later if need be
709
tag.nameEndIdx = idx;
710
711       return tag;
712    }
713
714    /**
715     * This is called when we need to extract the properties for the tag from the tag's HTML.
716     * We only call this when necessary since it has quite a lot of overhead.
717     *
718     * @param tag the tag that is currently being processed. This should be the
719     * tag that was returned as a result of a call to {@link #parseTag(FastPageParser.Tag, CharArray)}
720     * (ie, it has the <tt>name</tt> and <tt>nameEndIdx</tt> fields set correctly for the
721     * tag in question. The <tt>properties</tt> field can be in an undefined state - it
722     * will get replaced regardless).
723     * @param buffer a <tt>CharArray</tt> containing the entire tag that is being parsed.
724     * @return the same tag instance that was passed in, only it will now be populated
725     * with any properties that were specified in the tag's HTML.
726     */

727    private static Tag parseProperties(Tag tag, CharArray buffer)
728    {
729       int len = buffer.length();
730       int idx = tag.nameEndIdx;
731
732       // Start with an empty hashmap. A new HashMap is lazy-created if we happen to find any properties
733
tag.properties = Collections.EMPTY_MAP;
734       int begin;
735       while (idx < len)
736       {
737          // Skip forward to the next non-whitespace character
738
while (idx < len && Character.isWhitespace(buffer.charAt(idx))) idx++;
739
740          if(idx == len) continue;
741
742          begin = idx;
743          if(buffer.charAt(idx) == '"')
744          {
745             idx++;
746             while (idx < len && buffer.charAt(idx) != '"') idx++;
747             if(idx == len) continue;
748             idx++;
749          }
750          else if(buffer.charAt(idx) == '\'')
751          {
752             idx++;
753             while (idx < len && buffer.charAt(idx) != '\'') idx++;
754             if(idx == len) continue;
755             idx++;
756          }
757          else
758          {
759             while (idx < len && !Character.isWhitespace(buffer.charAt(idx)) && buffer.charAt(idx) != '=') idx++;
760          }
761
762          // Mark the substring. This is the attribute name
763
buffer.setSubstr(begin, idx);
764
765          if(idx < len && Character.isWhitespace(buffer.charAt(idx)))
766          {
767             while (idx < len && Character.isWhitespace(buffer.charAt(idx))) idx++;
768          }
769
770          if(idx == len || buffer.charAt(idx) != '=') continue;
771
772          idx++;
773
774          if(idx == len) continue;
775
776          while(idx < len && (buffer.charAt(idx) == '\n' || buffer.charAt(idx) == '\r')) idx++;
777
778          if(buffer.charAt(idx) == ' ')
779          {
780             while (idx < len && Character.isWhitespace(buffer.charAt(idx))) idx++;
781             if(idx == len || (buffer.charAt(idx) != '"' && buffer.charAt(idx) != '"')) continue;
782          }
783
784          begin = idx;
785          int end;
786          if(buffer.charAt(idx) == '"')
787          {
788             idx++;
789             begin = idx;
790             while (idx < len && buffer.charAt(idx) != '"') idx++;
791             if(idx == len) continue;
792             end = idx;
793             idx++;
794          }
795          else if(buffer.charAt(idx) == '\'')
796          {
797             idx++;
798             begin = idx;
799             while (idx < len && buffer.charAt(idx) != '\'') idx++;
800             if(idx == len) continue;
801             end = idx;
802             idx++;
803          }
804          else
805          {
806             while (idx < len && !Character.isWhitespace(buffer.charAt(idx))) idx++;
807             end = idx;
808          }
809          // Extract the name and value as String objects and add them to the property map
810
String JavaDoc name = buffer.getLowerSubstr();
811          String JavaDoc value = buffer.substring(begin, end);
812
813          tag.addProperty(name, value);
814       }
815       return tag;
816    }
817
818    private class Tag
819    {
820       // The index where the name string ends. This is used as the starting
821
// offet if we need to continue processing to find the tag's properties
822
public int nameEndIdx = 0;
823
824       // This holds a map of the various properties for a particular tag.
825
// This map is only populated when required - normally it will remain empty
826
public Map JavaDoc properties = Collections.EMPTY_MAP;
827
828       /**
829        * Adds a name/value property pair to this tag. Each property that is
830        * added represents a property that was parsed from the tag's HTML.
831        */

832       public void addProperty(String JavaDoc name, String JavaDoc value)
833       {
834         if(properties==Collections.EMPTY_MAP)
835         {
836           properties = new HashMap JavaDoc(8);
837         }
838         properties.put(name, value);
839       }
840    }
841 }
842
Popular Tags