KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > au > id > jericho > lib > html > SubCache


1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2
// Version 2.2
3
// Copyright (C) 2006 Martin Jericho
4
// http://sourceforge.net/projects/jerichohtml/
5
//
6
// This library is free software; you can redistribute it and/or
7
// modify it under the terms of the GNU Lesser General Public
8
// License as published by the Free Software Foundation; either
9
// version 2.1 of the License, or (at your option) any later version.
10
// http://www.gnu.org/copyleft/lesser.html
11
//
12
// This library is distributed in the hope that it will be useful,
13
// but WITHOUT ANY WARRANTY; without even the implied warranty of
14
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
// Lesser General Public License for more details.
16
//
17
// You should have received a copy of the GNU Lesser General Public
18
// License along with this library; if not, write to the Free Software
19
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20

21 package au.id.jericho.lib.html;
22
23 import java.util.*;
24
25 /**
26  * Represents a cached map of character positions to tags for a particular tag type,
27  * or for all tag types if the tagType field is null.
28  */

29 final class SubCache {
30     private final Cache cache;
31     public final TagType tagType; // does not support unregistered tag types at present
32
private final CacheEntry bof; // beginning of file marker
33
private final CacheEntry eof; // end of file marker
34
private CacheEntry[] array=new CacheEntry[INITIAL_CAPACITY];
35
36     private static final int INITIAL_CAPACITY=64;
37
38     public SubCache(final Cache cache, final TagType tagType) {
39         this.cache=cache;
40         this.tagType=tagType;
41         array[0]=bof=new CacheEntry(0,-1,null,false,false);
42         array[1]=eof=new CacheEntry(1,cache.getSourceLength(),null,false,false);
43     }
44
45     public int size() {
46         return eof.index+1;
47     }
48
49     public void clear() {
50         bof.nextCached=false;
51         eof.index=1;
52         eof.previousCached=false;
53         array[1]=eof;
54     }
55
56     public void bulkLoad_Init(final int tagCount) {
57         array=new CacheEntry[tagCount+2];
58         array[0]=bof;
59         bof.nextCached=true;
60         array[eof.index=tagCount+1]=eof;
61         eof.previousCached=true;
62     }
63
64     public void bulkLoad_Set(final int tagsIndex, final Tag tag) {
65         final int index=tagsIndex+1;
66         array[index]=new CacheEntry(index,tag.begin,tag,true,true);
67     }
68
69     public void bulkLoad_AddToTypeSpecificCache(final Tag tag) {
70         final int index=eof.index;
71         if (array.length==eof.index+1) doubleCapacity();
72         array[index]=new CacheEntry(index,tag.begin,tag,true,true);
73         eof.index++;
74     }
75
76     public void bulkLoad_FinaliseTypeSpecificCache() {
77         bof.nextCached=true;
78         eof.previousCached=true;
79         array[eof.index]=eof;
80     }
81
82     public Tag getTagAt(final int pos) {
83         // This must only be called on allTagTypesSubCache (ie tagType==null)
84
if (cache.getSourceLength()==0) return null;
85         if (pos<0 || pos>=cache.getSourceLength()) return null;
86         final int index=getIndexOfPos(pos);
87         final CacheEntry cacheEntry=array[index];
88         if (cacheEntry.pos==pos) return cacheEntry.tag;
89         if (cacheEntry.previousCached) return null;
90         return cache.addTagAt(pos);
91     }
92
93     public void addTagAt(final int pos, final Tag tag) {
94         final int index=getIndexOfPos(pos);
95         final CacheEntry nextCacheEntry=array[index];
96         final CacheEntry previousCacheEntry=getPrevious(nextCacheEntry);
97         add(previousCacheEntry,new CacheEntry(index,pos,tag,pos==previousCacheEntry.pos+1,pos==nextCacheEntry.pos-1),nextCacheEntry);
98     }
99
100     public Tag findPreviousOrNextTag(final int pos, final boolean previous) {
101         // Note that this method never returns tags for which tag.includInSearch() is false, so separate caching of unregistered tags won't work.
102
if (cache.getSourceLength()==0) return null;
103         if (pos<0 || pos>=cache.getSourceLength()) return null;
104         int index=getIndexOfPos(pos);
105         final CacheEntry cacheEntry=array[index];
106         final Tag tag;
107         if (previous) {
108             if (cacheEntry.pos==pos && cacheEntry.tag!=null && cacheEntry.tag.includeInSearch()) return cacheEntry.tag;
109             tag=findPreviousTag(getPrevious(cacheEntry),pos,cacheEntry);
110             addPreviousTag(pos,tag);
111         } else {
112             if (cacheEntry.pos==pos) {
113                 if (cacheEntry.tag!=null && cacheEntry.tag.includeInSearch()) return cacheEntry.tag;
114                 tag=findNextTag(cacheEntry,pos,getNext(cacheEntry));
115             } else {
116                 tag=findNextTag(getPrevious(cacheEntry),pos,cacheEntry);
117             }
118             addNextTag(pos,tag);
119         }
120         return tag;
121     }
122
123     public Iterator getTagIterator() {
124         return new TagIterator();
125     }
126
127     public String JavaDoc toString() {
128         return appendTo(new StringBuffer JavaDoc()).toString();
129     }
130
131     protected StringBuffer JavaDoc appendTo(final StringBuffer JavaDoc sb) {
132         sb.append("Cache for TagType : ").append(tagType).append('\n');
133         for (int i=0; i<=lastIndex(); i++) sb.append(array[i]).append('\n');
134         return sb;
135     }
136
137     private Tag findPreviousTag(CacheEntry previousCacheEntry, int pos, CacheEntry nextCacheEntry) {
138         // previousCacheEntry.pos < pos <= nextCacheEntry.pos
139
while (true) {
140             if (!nextCacheEntry.previousCached) {
141                 final Tag tag=Tag.findPreviousOrNextTagUncached(cache.source,pos,tagType,true,previousCacheEntry.pos); // if useAllTypesCache is true, automatically adds tag to all caches if one is found, and maybe some unregistered tags along the way.
142
if (tag!=null) {
143                     if (!cache.source.useAllTypesCache) addTagAt(tag.begin,tag); // have to add tag manually if useAllTypesCache is false
144
return tag;
145                 }
146             }
147             if (previousCacheEntry==bof) return null;
148             if (previousCacheEntry.tag!=null && previousCacheEntry.tag.includeInSearch()) return previousCacheEntry.tag;
149             pos=previousCacheEntry.pos-1;
150             previousCacheEntry=getPrevious(nextCacheEntry=previousCacheEntry);
151         }
152     }
153
154     private Tag findNextTag(CacheEntry previousCacheEntry, int pos, CacheEntry nextCacheEntry) {
155         // previousCacheEntry.pos <= pos < nextCacheEntry.pos
156
while (true) {
157             if (!previousCacheEntry.nextCached) {
158                 final Tag tag=Tag.findPreviousOrNextTagUncached(cache.source,pos,tagType,false,nextCacheEntry.pos); // if useAllTypesCache is true, automatically adds tag to caches if one is found, and maybe some unregistered tags along the way.
159
if (tag!=null) {
160                     if (!cache.source.useAllTypesCache) addTagAt(tag.begin,tag); // have to add tag manually if useAllTypesCache is false
161
return tag;
162                 }
163             }
164             if (nextCacheEntry==eof) return null;
165             if (nextCacheEntry.tag!=null && nextCacheEntry.tag.includeInSearch()) return nextCacheEntry.tag;
166             pos=nextCacheEntry.pos+1;
167             nextCacheEntry=getNext(previousCacheEntry=nextCacheEntry);
168         }
169     }
170
171     private void addPreviousTag(final int pos, final Tag tag) {
172         final int tagPos=(tag==null) ? bof.pos : tag.begin;
173         if (tagPos==pos) return; // the tag was found exactly on pos, so cache has already been fully updated
174
// tagPos < pos
175
int index=getIndexOfPos(pos);
176         CacheEntry stepCacheEntry=array[index];
177         // stepCacheEntry.pos is either == or > than tagPos.
178
// stepCacheEntry.pos is either == or > pos.
179
int compactStartIndex=Integer.MAX_VALUE;
180         if (stepCacheEntry.pos==pos) {
181             // a cache entry was aleady at pos (containing null or wrong tagType)
182
stepCacheEntry.previousCached=true;
183             if (stepCacheEntry.isRedundant()) {stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index);}
184         } else if (!stepCacheEntry.previousCached) {
185             // we have to add a new cacheEntry at pos:
186
if (tagType==null)
187                 cache.addTagAt(pos); // this pos has never been checked before, so add it to all relevant SubCaches (a null or unregistered tag entry is always added to this SubCache)
188
else
189                 addTagAt(pos,null); // all we know is that the pos doesn't contain a tag of this SubCache's type, so add a null entry to this SubCache only.
190
// now we have to reload the index and stepCacheEntry as they may have changed:
191
stepCacheEntry=array[index=getIndexOfPos(pos)];
192             // stepCacheEntry.pos is either == or > than tagPos.
193
// stepCacheEntry.pos is either == or > pos. (the latter if the added entry was redundant)
194
if (stepCacheEntry.pos==pos) {
195                 // perform same steps as in the (stepCacheEntry.pos==pos) if condition above:
196
stepCacheEntry.previousCached=true;
197                 if (stepCacheEntry.isRedundant()) {stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index);}
198             }
199         }
200         while (true) {
201             stepCacheEntry=array[--index];
202             if (stepCacheEntry.pos<=tagPos) break;
203             if (stepCacheEntry.tag!=null) {
204                 if (stepCacheEntry.tag.includeInSearch()) throw new SourceCacheEntryMissingInternalException(tagType,tag,this);
205                 stepCacheEntry.previousCached=true;
206                 stepCacheEntry.nextCached=true;
207             } else {
208                 stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index);
209             }
210         }
211         if (stepCacheEntry.pos!=tagPos) throw new FoundCacheEntryMissingInternalException(tagType,tag,this);
212         stepCacheEntry.nextCached=true;
213         compact(compactStartIndex);
214     }
215
216     private void addNextTag(final int pos, final Tag tag) {
217         final int tagPos=(tag==null) ? eof.pos : tag.begin;
218         if (tagPos==pos) return; // the tag was found exactly on pos, so cache has already been fully updated
219
// tagPos > pos
220
int index=getIndexOfPos(pos);
221         CacheEntry stepCacheEntry=array[index];
222         // stepCacheEntry.pos may be <, == or > than tagPos.
223
// stepCacheEntry.pos is either == or > pos.
224
int compactStartIndex=Integer.MAX_VALUE;
225         if (stepCacheEntry.pos==pos) {
226             // a cache entry was aleady at pos (containing null or wrong tagType)
227
stepCacheEntry.nextCached=true;
228             if (stepCacheEntry.isRedundant()) {stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index);}
229         } else if (!getPrevious(stepCacheEntry).nextCached) {
230             // we have to add a new cacheEntry at pos:
231
if (tagType==null)
232                 cache.addTagAt(pos); // this pos has never been checked before, so add it to all relevant SubCaches (a null or unregistered tag entry is always added to this SubCache)
233
else
234                 addTagAt(pos,null); // all we know is that the pos doesn't contain a tag of this SubCache's type, so add a null entry to this SubCache only.
235
// now we have to reload the index and stepCacheEntry as they may have changed:
236
stepCacheEntry=array[index=getIndexOfPos(pos)];
237             // stepCacheEntry.pos may be <, == or > than tagPos.
238
// stepCacheEntry.pos is either == or > pos. (the latter if the added entry was redundant)
239
if (stepCacheEntry.pos==pos) {
240                 // perform same steps as in the (stepCacheEntry.pos==pos) if condition above:
241
stepCacheEntry.nextCached=true;
242                 if (stepCacheEntry.isRedundant()) {stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index);}
243             }
244         }
245         if (stepCacheEntry.pos<tagPos) {
246             while (true) {
247                 stepCacheEntry=array[++index];
248                 if (stepCacheEntry.pos>=tagPos) break;
249                 if (stepCacheEntry.tag!=null) {
250                     if (stepCacheEntry.tag.includeInSearch()) throw new SourceCacheEntryMissingInternalException(tagType,tag,this);
251                     stepCacheEntry.previousCached=true;
252                     stepCacheEntry.nextCached=true;
253                 } else {
254                     stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index);
255                 }
256             }
257             if (stepCacheEntry.pos!=tagPos) throw new FoundCacheEntryMissingInternalException(tagType,tag,this);
258         }
259         stepCacheEntry.previousCached=true;
260         compact(compactStartIndex);
261     }
262
263     private void compact(int i) {
264         final int lastIndex=lastIndex();
265         int removedCount=1;
266         while (i<lastIndex) {
267             final CacheEntry cacheEntry=array[++i];
268             if (cacheEntry.removed)
269                 removedCount++;
270             else
271                 array[cacheEntry.index=i-removedCount]=cacheEntry;
272         }
273     }
274
275     private void add(final CacheEntry previousCacheEntry, final CacheEntry newCacheEntry, final CacheEntry nextCacheEntry) {
276         if (!newCacheEntry.isRedundant()) insert(newCacheEntry);
277         if (newCacheEntry.previousCached) {
278             previousCacheEntry.nextCached=true;
279             if (previousCacheEntry.isRedundant()) remove(previousCacheEntry);
280         }
281         if (newCacheEntry.nextCached) {
282             nextCacheEntry.previousCached=true;
283             if (nextCacheEntry.isRedundant()) remove(nextCacheEntry);
284         }
285     }
286
287     private int getIndexOfPos(final int pos) {
288         // return the index of the cacheEntry at pos, or the index where it would be inserted if it does not exist.
289
int minIndex=0;
290         int maxIndex=lastIndex();
291         // using the following complex calculation instead of a binary search is likely to result in less iterations but is slower overall:
292
// int index=(pos*maxIndex)/cache.getSourceLength(); // approximate first guess at index, assuming even distribution of cache entries
293
int index=maxIndex>>1;
294         while (true) {
295             final CacheEntry cacheEntry=array[index];
296             if (pos>cacheEntry.pos) {
297                 final CacheEntry nextCacheEntry=getNext(cacheEntry);
298                 if (pos<=nextCacheEntry.pos) return nextCacheEntry.index;
299                 minIndex=nextCacheEntry.index;
300             } else if (pos<cacheEntry.pos) {
301                 final CacheEntry previousCacheEntry=getPrevious(cacheEntry);
302                 if (pos==previousCacheEntry.pos) return previousCacheEntry.index;
303                 if (pos>previousCacheEntry.pos) return index;
304                 maxIndex=previousCacheEntry.index;
305             } else {
306                 return index;
307             }
308             index=(minIndex+maxIndex)>>1;
309             // using the following complex calculation instead of a binary search is likely to result in less iterations but is slower overall:
310
// final int minIndexPos=array[minIndex].pos;
311
// index=((maxIndex-minIndex-1)*(pos-minIndexPos))/(array[maxIndex].pos-minIndexPos)+minIndex+1; // approximate next guess at index, assuming even distribution of cache entries between min and max entries
312
}
313     }
314
315     private CacheEntry getNext(final CacheEntry cacheEntry) {
316         return array[cacheEntry.index+1];
317     }
318
319     private CacheEntry getPrevious(final CacheEntry cacheEntry) {
320         return array[cacheEntry.index-1];
321     }
322     
323     private int lastIndex() {
324         return eof.index;
325     }
326
327     private void insert(final CacheEntry cacheEntry) {
328         final int index=cacheEntry.index;
329         if (array.length==size()) doubleCapacity();
330         for (int i=lastIndex(); i>=index; i--) {
331             final CacheEntry movedCacheEntry=array[i];
332             array[movedCacheEntry.index=(i+1)]=movedCacheEntry;
333         }
334         array[index]=cacheEntry;
335     }
336
337     private void remove(final CacheEntry cacheEntry) {
338         final int lastIndex=lastIndex();
339         for (int i=cacheEntry.index; i<lastIndex; i++) {
340             final CacheEntry movedCacheEntry=array[i+1];
341             array[movedCacheEntry.index=i]=movedCacheEntry;
342         }
343     }
344
345     private void doubleCapacity() {
346         // assumes size==array.length
347
final CacheEntry[] newArray=new CacheEntry[array.length << 1];
348         for (int i=lastIndex(); i>=0; i--) newArray[i]=array[i];
349         array=newArray;
350     }
351
352     private static class CacheEntryMissingInternalException extends RuntimeException JavaDoc {
353         public CacheEntryMissingInternalException(final TagType tagType, final Tag tag, final SubCache subCache, final String JavaDoc message) {
354             super("INTERNAL ERROR: Inconsistent Cache State for TagType \""+tagType+"\" - "+message+' '+tag.getDebugInfo()+'\n'+subCache);
355         }
356     }
357
358     private static class SourceCacheEntryMissingInternalException extends CacheEntryMissingInternalException {
359         public SourceCacheEntryMissingInternalException(final TagType tagType, final Tag tag, final SubCache subCache) {
360             super(tagType,tag,subCache,"cache entry no longer found in source:");
361         }
362     }
363
364     private static class FoundCacheEntryMissingInternalException extends CacheEntryMissingInternalException {
365         public FoundCacheEntryMissingInternalException(final TagType tagType, final Tag tag, final SubCache subCache) {
366             super(tagType,tag,subCache,"missing cache entry for found tag");
367         }
368     }
369
370     private final class TagIterator implements Iterator {
371         private int i=0;
372         private Tag nextTag;
373         public TagIterator() {
374             loadNextTag();
375         }
376         public boolean hasNext() {
377             return nextTag!=null;
378         }
379         public Object JavaDoc next() {
380             final Tag result=nextTag;
381             loadNextTag();
382             return result;
383         }
384         public void remove() {
385             throw new UnsupportedOperationException JavaDoc();
386         }
387         private void loadNextTag() {
388             while (++i<=lastIndex() && (nextTag=array[i].tag)==null) {}
389         }
390     }
391
392     private static final class CacheEntry {
393         public int index;
394         public final int pos;
395         public final Tag tag;
396         public boolean previousCached;
397         public boolean nextCached;
398         public boolean removed=false;
399         
400         public CacheEntry(final int index, final int pos, final Tag tag, final boolean previousCached, final boolean nextCached) {
401             this.index=index;
402             this.pos=pos;
403             this.tag=tag;
404             this.previousCached=previousCached;
405             this.nextCached=nextCached;
406         }
407         
408         public boolean isRedundant() {
409             return tag==null && previousCached && nextCached;
410         }
411         
412         public String JavaDoc toString() {
413             return pad(index,4)+" "+pad(pos,5)+" "+(previousCached?'|':'-')+' '+(nextCached?'|':'-')+' '+(tag==null ? "null" : tag.getDebugInfo());
414         }
415         
416         private String JavaDoc pad(final int n, final int places) {
417             final String JavaDoc nstring=String.valueOf(n);
418             final StringBuffer JavaDoc sb=new StringBuffer JavaDoc(places);
419             for (int i=places-nstring.length(); i>0; i--) sb.append(' ');
420             sb.append(nstring);
421             return sb.toString();
422         }
423     }
424 }
425
Popular Tags