KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > au > id > jericho > lib > html > TagTypeRegister


1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2
// Version 2.2
3
// Copyright (C) 2006 Martin Jericho
4
// http://sourceforge.net/projects/jerichohtml/
5
//
6
// This library is free software; you can redistribute it and/or
7
// modify it under the terms of the GNU Lesser General Public
8
// License as published by the Free Software Foundation; either
9
// version 2.1 of the License, or (at your option) any later version.
10
// http://www.gnu.org/copyleft/lesser.html
11
//
12
// This library is distributed in the hope that it will be useful,
13
// but WITHOUT ANY WARRANTY; without even the implied warranty of
14
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
// Lesser General Public License for more details.
16
//
17
// You should have received a copy of the GNU Lesser General Public
18
// License along with this library; if not, write to the Free Software
19
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20

21 package au.id.jericho.lib.html;
22
23 import java.util.*;
24
25 final class TagTypeRegister {
26     private TagTypeRegister parent=null;
27     private char ch=NULL_CHAR;
28     private TagTypeRegister[] children=null; // always in alphabetical order
29
private TagType[] tagTypes=null; // in descending order of priority
30

31     private static final char NULL_CHAR='\u0000';
32
33     private static final TagType[] DEFAULT_TAG_TYPES={
34         StartTagType.UNREGISTERED,
35         StartTagType.NORMAL,
36         StartTagType.COMMENT,
37         StartTagType.MARKUP_DECLARATION,
38         StartTagType.DOCTYPE_DECLARATION,
39         StartTagType.CDATA_SECTION,
40         StartTagType.XML_PROCESSING_INSTRUCTION,
41         StartTagType.XML_DECLARATION,
42         StartTagType.SERVER_COMMON,
43         EndTagType.UNREGISTERED,
44         EndTagType.NORMAL
45     };
46
47     private static TagTypeRegister root=new TagTypeRegister();
48
49     static {
50         add(DEFAULT_TAG_TYPES);
51     }
52
53     private TagTypeRegister() {}
54
55     private static synchronized void add(final TagType[] tagTypes) {
56         for (int i=0; i<tagTypes.length; i++) add(tagTypes[i]);
57     }
58
59     public static synchronized void add(final TagType tagType) {
60         TagTypeRegister cursor=root;
61         final String JavaDoc startDelimiter=tagType.getStartDelimiter();
62         for (int i=0; i<startDelimiter.length(); i++) {
63             final char ch=startDelimiter.charAt(i);
64             TagTypeRegister child=cursor.getChild(ch);
65             if (child==null) {
66                 child=new TagTypeRegister();
67                 child.parent=cursor;
68                 child.ch=ch;
69                 cursor.addChild(child);
70             }
71             cursor=child;
72         }
73         cursor.addTagType(tagType);
74     }
75
76     public static synchronized void remove(final TagType tagType) {
77         TagTypeRegister cursor=root;
78         final String JavaDoc startDelimiter=tagType.getStartDelimiter();
79         for (int i=0; i<startDelimiter.length(); i++) {
80             final char ch=startDelimiter.charAt(i);
81             final TagTypeRegister child=cursor.getChild(ch);
82             if (child==null) return;
83             cursor=child;
84         }
85         cursor.removeTagType(tagType);
86         // clean up any unrequired children:
87
while (cursor!=root && cursor.tagTypes==null && cursor.children==null) {
88             cursor.parent.removeChild(cursor);
89             cursor=cursor.parent;
90         }
91     }
92
93     // list is in order of lowest to highest precedence
94
public static List getList() {
95         final ArrayList list=new ArrayList();
96         root.addTagTypesToList(list);
97         return list;
98     }
99     
100     private void addTagTypesToList(final List list) {
101         if (tagTypes!=null)
102             for (int i=tagTypes.length-1; i>=0; i--) list.add(tagTypes[i]);
103         if (children!=null)
104             for (int i=0; i<children.length; i++) children[i].addTagTypesToList(list);
105     }
106
107     public static final String JavaDoc getDebugInfo() {
108         return root.appendDebugInfo(new StringBuffer JavaDoc(),0).toString();
109     }
110
111     static final class ProspectiveTagTypeIterator implements Iterator {
112         private TagTypeRegister cursor;
113         private int tagTypeIndex=0;
114         
115         public ProspectiveTagTypeIterator(final Source source, final int pos) {
116             // returns empty iterator if pos out of range
117
final ParseText parseText=source.getParseText();
118             cursor=root;
119             int posIndex=0;
120             try {
121                 // find deepest node that matches the text at pos:
122
while (true) {
123                     final TagTypeRegister child=cursor.getChild(parseText.charAt(pos+(posIndex++)));
124                     if (child==null) break;
125                     cursor=child;
126                 }
127             } catch (IndexOutOfBoundsException JavaDoc ex) {}
128             // go back up until we reach a node that contains a list of tag types:
129
while (cursor.tagTypes==null) if ((cursor=cursor.parent)==null) break;
130         }
131
132         public boolean hasNext() {
133             return cursor!=null;
134         }
135
136         public TagType getNextTagType() {
137             final TagType[] tagTypes=cursor.tagTypes;
138             final TagType nextTagType=tagTypes[tagTypeIndex];
139             if ((++tagTypeIndex)==tagTypes.length) {
140                 tagTypeIndex=0;
141                 do {cursor=cursor.parent;} while (cursor!=null && cursor.tagTypes==null);
142             }
143             return nextTagType;
144         }
145
146         // use getNextTagType() instead to avoid the downcasting
147
public Object JavaDoc next() {
148             return getNextTagType();
149         }
150
151         public void remove() {
152             throw new UnsupportedOperationException JavaDoc();
153         }
154     }
155
156     public String JavaDoc toString() {
157         return appendDebugInfo(new StringBuffer JavaDoc(),0).toString();
158     }
159
160     private StringBuffer JavaDoc appendDebugInfo(final StringBuffer JavaDoc sb, final int level) {
161         for (int i=0; i<level; i++) sb.append(" ");
162         if (ch!=NULL_CHAR) sb.append(ch).append(' ');
163         if (tagTypes!=null) {
164             sb.append('(');
165             for (int i=0; i<tagTypes.length; i++) {
166                 sb.append(tagTypes[i].getDescription()).append(", ");
167             }
168             sb.setLength(sb.length()-2);
169             sb.append(')');
170         }
171         sb.append('\n');
172         if (children!=null) {
173             final int childLevel=level+1;
174             for (int i=0; i<children.length; i++) {
175                 children[i].appendDebugInfo(sb,childLevel);
176             }
177         }
178         return sb;
179     }
180
181     private TagTypeRegister getChild(final char ch) {
182         if (children==null) return null;
183         if (children.length==1) return children[0].ch==ch ? children[0] : null;
184         // perform binary search:
185
int low=0;
186         int high=children.length-1;
187         while (low<=high) {
188             int mid=(low+high) >> 1;
189             final char midChar=children[mid].ch;
190             if (midChar<ch)
191                 low=mid+1;
192             else if (midChar>ch)
193                 high=mid-1;
194             else
195                 return children[mid];
196         }
197         return null;
198     }
199     
200     private void addChild(final TagTypeRegister child) {
201         // assumes the character associated with the child register does not already exist in this register's children.
202
if (children==null) {
203             children=new TagTypeRegister[] {child};
204         } else {
205             final TagTypeRegister[] newChildren=new TagTypeRegister[children.length+1];
206             int i=0;
207             while (i<children.length && children[i].ch<=child.ch) {
208                 newChildren[i]=children[i];
209                 i++;
210             }
211             newChildren[i++]=child;
212             while (i<newChildren.length) {
213                 newChildren[i]=children[i-1];
214                 i++;
215             }
216             children=newChildren;
217         }
218     }
219
220     private void removeChild(final TagTypeRegister child) {
221         // this method assumes that the specified child exists in the children array
222
if (children.length==1) {
223             children=null;
224             return;
225         }
226         final TagTypeRegister[] newChildren=new TagTypeRegister[children.length-1];
227         int offset=0;
228         for (int i=0; i<children.length; i++) {
229             if (children[i]==child)
230                 offset=-1;
231             else
232                 newChildren[i+offset]=children[i];
233         }
234         children=newChildren;
235     }
236     
237     private int indexOfTagType(final TagType tagType) {
238         if (tagTypes==null) return -1;
239         for (int i=0; i<tagTypes.length; i++)
240             if (tagTypes[i]==tagType) return i;
241         return -1;
242     }
243     
244     private void addTagType(final TagType tagType) {
245         final int indexOfTagType=indexOfTagType(tagType);
246         if (indexOfTagType==-1) {
247             if (tagTypes==null) {
248                 tagTypes=new TagType[] {tagType};
249             } else {
250                 final TagType[] newTagTypes=new TagType[tagTypes.length+1];
251                 newTagTypes[0]=tagType;
252                 for (int i=0; i<tagTypes.length; i++) newTagTypes[i+1]=tagTypes[i];
253                 tagTypes=newTagTypes;
254             }
255         } else {
256             // tagType already exists in the list, just move it to the front
257
for (int i=indexOfTagType; i>0; i--) tagTypes[i]=tagTypes[i-1];
258             tagTypes[0]=tagType;
259         }
260     }
261
262     private void removeTagType(final TagType tagType) {
263         final int indexOfTagType=indexOfTagType(tagType);
264         if (indexOfTagType==-1) return;
265         if (tagTypes.length==1) {
266             tagTypes=null;
267             return;
268         }
269         final TagType[] newTagTypes=new TagType[tagTypes.length-1];
270         for (int i=0; i<indexOfTagType; i++) newTagTypes[i]=tagTypes[i];
271         for (int i=indexOfTagType; i<newTagTypes.length; i++) newTagTypes[i]=tagTypes[i+1];
272         tagTypes=newTagTypes;
273     }
274 }
275
276
Popular Tags