KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > util > ParserUtils


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Somik Raha
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ParserUtils.java,v $
8
// $Author: anul $
9
// $Date: 2004/08/27 09:54:27 $
10
// $Revision: 1.46 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.util;
28
29 import java.io.UnsupportedEncodingException JavaDoc;
30 import java.util.ArrayList JavaDoc;
31
32 import org.htmlparser.Node;
33 import org.htmlparser.NodeFilter;
34 import org.htmlparser.Parser;
35 import org.htmlparser.Tag;
36 import org.htmlparser.filters.NodeClassFilter;
37 import org.htmlparser.filters.TagNameFilter;
38 import org.htmlparser.lexer.Lexer;
39 import org.htmlparser.lexer.Page;
40 import org.htmlparser.tags.CompositeTag;
41 import org.htmlparser.util.NodeList;
42 import org.htmlparser.util.ParserException;
43
44
45 public class ParserUtils
46 {
47     public static String JavaDoc removeChars(String JavaDoc s, char occur) {
48         StringBuffer JavaDoc newString = new StringBuffer JavaDoc();
49         char ch;
50         for (int i = 0; i < s.length(); i++) {
51             ch = s.charAt(i);
52             if (ch != occur)
53                 newString.append(ch);
54         }
55         return newString.toString();
56     }
57
58     public static String JavaDoc removeEscapeCharacters(String JavaDoc inputString) {
59         inputString = ParserUtils.removeChars(inputString, '\r');
60         inputString = ParserUtils.removeChars(inputString, '\n');
61         inputString = ParserUtils.removeChars(inputString, '\t');
62         return inputString;
63     }
64
65     public static String JavaDoc removeTrailingBlanks(String JavaDoc text) {
66         char ch = ' ';
67         while (ch == ' ') {
68             ch = text.charAt(text.length() - 1);
69             if (ch == ' ')
70                 text = text.substring(0, text.length() - 1);
71         }
72         return text;
73     }
74
75     /**
76      * Search given node and pick up any objects of given type.
77      * @param node The node to search.
78      * @param type The class to search for.
79      * @return A node array with the matching nodes.
80      */

81     public static Node[] findTypeInNode(Node node, Class JavaDoc type)
82     {
83         NodeFilter filter;
84         NodeList ret;
85         
86         ret = new NodeList ();
87         filter = new NodeClassFilter (type);
88         node.collectInto (ret, filter);
89
90         return (ret.toNodeArray ());
91     }
92
93     /**
94      * Split the input string considering as string separator
95      * all the not numerical characters
96      * with the only exception of the characters specified in charsDoNotBeRemoved param.
97      * <BR>For example if you call splitButDigits(&quot;&lt;DIV&gt; +12.5, +3.4 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
98      * <BR>you obtain an array of strings {&quot;+12.5&quot;, &quot;+3.4&quot;} as output (1,2,3,4 and 5 are digits and +,. are chars that do not be removed).
99      * @param input The string in input.
100      * @param charsDoNotBeRemoved The chars that do not be removed.
101      * @return The array of strings as output.
102     */

103     public static String JavaDoc[] splitButDigits (String JavaDoc input, String JavaDoc charsDoNotBeRemoved)
104     {
105     
106         ArrayList JavaDoc output = new ArrayList JavaDoc();
107         int minCapacity = 0;
108         StringBuffer JavaDoc str = new StringBuffer JavaDoc();
109
110         boolean charFound = false;
111         boolean toBeAdd = false;
112         for (int index=0; index<input.length(); index++)
113         {
114             charFound=false;
115             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
116                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
117                     charFound=true;
118             if ((Character.isDigit(input.charAt(index))) || (charFound))
119             {
120                 str.append(input.charAt(index));
121                 toBeAdd=false;
122             }
123             else
124                 if (!toBeAdd)
125                     toBeAdd=true;
126             // finished to parse one string
127
if (toBeAdd && (str.length()!=0)) {
128                 minCapacity++;
129                 output.ensureCapacity(minCapacity);
130                 if (output.add(str.toString()))
131                     str = new StringBuffer JavaDoc();
132                 else
133                     minCapacity--;
134             }
135         }
136         // add the last string
137
if (str.length()!=0) {
138             minCapacity++;
139             output.ensureCapacity(minCapacity);
140             if (output.add(str.toString()))
141                 str = new StringBuffer JavaDoc();
142             else
143                 minCapacity--;
144         }
145
146         output.trimToSize();
147         Object JavaDoc[] outputObj = output.toArray();
148         String JavaDoc[] outputStr = new String JavaDoc[output.size()];
149         for (int i=0; i<output.size(); i++)
150             outputStr[i] = new String JavaDoc((String JavaDoc) outputObj[i]);
151         return outputStr;
152         
153     }
154     
155     /**
156      * Remove from the input string all the not numerical characters
157      * with the only exception of the characters specified in charsDoNotBeRemoved param.
158      * <BR>For example if you call trimButDigits(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
159      * <BR>you obtain a string &quot;+12.5&quot; as output (1,2 and 5 are digits and +,. are chars that do not be removed).
160      * <BR>For example if you call trimButDigits(&quot;&lt;DIV&gt; +1 2 . 5 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
161      * <BR>you obtain a string &quot;+12.5&quot; as output (the spaces between 1 and 2, 2 and ., . and 5 are removed).
162      * @param input The string in input.
163      * @param charsDoNotBeRemoved The chars that do not be removed.
164      * @return The string as output.
165     */

166     public static String JavaDoc trimButDigits (String JavaDoc input, String JavaDoc charsDoNotBeRemoved)
167     {
168     
169         StringBuffer JavaDoc output = new StringBuffer JavaDoc();
170
171         boolean charFound=false;
172         for (int index=0; index<input.length(); index++)
173         {
174             charFound=false;
175             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
176                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
177                     charFound=true;
178             if ((Character.isDigit(input.charAt(index))) || (charFound))
179                 output.append(input.charAt(index));
180         }
181
182         return output.toString();
183         
184     }
185     
186     /**
187      * Remove from the beginning and the end of the input string all the not numerical characters
188      * with the only exception of the characters specified in charsDoNotBeRemoved param.
189      * <BR>The removal process removes only chars at the beginning and at the end of the string.
190      * <BR>For example if you call trimButDigitsBeginEnd(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
191      * <BR>you obtain a string &quot;+12.5&quot; as output (1,2 and 5 are digits and +,. are chars that do not be removed).
192      * <BR>For example if you call trimButDigitsBeginEnd(&quot;&lt;DIV&gt; +1 2 . 5 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
193      * <BR>you obtain a string &quot;+1 2 . 5&quot; as output (the spacess inside the string are not removed).
194      * @param input - The string in input.
195      * @param charsDoNotBeRemoved - The chars that do not be removed.
196      * @return The string as output.
197     */

198     public static String JavaDoc trimButDigitsBeginEnd (String JavaDoc input, String JavaDoc charsDoNotBeRemoved)
199     {
200     
201         String JavaDoc output = new String JavaDoc();
202
203         int begin=0;
204         int end=input.length()-1;
205         boolean charFound=false;
206         boolean ok=true;
207         for (int index=begin; (index<input.length()) && ok; index++)
208         {
209             charFound=false;
210             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
211                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
212                     charFound=true;
213             if ( (Character.isDigit(input.charAt(index))) || (charFound) )
214             {
215                 begin=index;
216                 ok=false;
217             }
218         }
219         ok=true;
220         for (int index=end; (index>=0) && ok; index--)
221         {
222             charFound=false;
223             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
224                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
225                     charFound=true;
226             if ( (Character.isDigit(input.charAt(index))) || (charFound) )
227             {
228                 end=index;
229                 ok=false;
230             }
231         }
232         output=input.substring(begin,end+1);
233
234         return output;
235         
236     }
237     
238     /**
239      * Split the input string considering as string separator
240      * all the spaces and tabs like chars and
241      * the chars specified in the input variable charsToBeRemoved.
242      * <BR>For example if you call splitSpaces(&quot;&lt;DIV&gt; +12.5, +3.4 &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/,&quot;),
243      * &lt;BR>you obtain an array of strings {&quot;+12.5&quot;, &quot;+3.4&quot;} as output (space chars and &lt;,&gt;,D,I,V,/ and the comma are chars that must be removed).
244      * @param input The string in input.
245      * @param charsToBeRemoved The chars to be removed.
246      * @return The array of strings as output.
247     */

248     public static String JavaDoc[] splitSpaces (String JavaDoc input, String JavaDoc charsToBeRemoved)
249     {
250     
251         ArrayList JavaDoc output = new ArrayList JavaDoc();
252         int minCapacity = 0;
253         StringBuffer JavaDoc str = new StringBuffer JavaDoc();
254
255         boolean charFound = false;
256         boolean toBeAdd = false;
257         for (int index=0; index<input.length(); index++)
258         {
259             charFound=false;
260             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
261                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
262                     charFound=true;
263             if (!((Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound)))
264             {
265                 str.append(input.charAt(index));
266                 toBeAdd=false;
267             }
268             else
269                 if (!toBeAdd)
270                     toBeAdd=true;
271             // finished to parse one string
272
if (toBeAdd && (str.length()!=0)) {
273                 minCapacity++;
274                 output.ensureCapacity(minCapacity);
275                 if (output.add(str.toString()))
276                     str = new StringBuffer JavaDoc();
277                 else
278                     minCapacity--;
279             }
280         }
281         // add the last string
282
if (str.length()!=0) {
283             minCapacity++;
284             output.ensureCapacity(minCapacity);
285             if (output.add(str.toString()))
286                 str = new StringBuffer JavaDoc();
287             else
288                 minCapacity--;
289         }
290
291         output.trimToSize();
292         Object JavaDoc[] outputObj = output.toArray();
293         String JavaDoc[] outputStr = new String JavaDoc[output.size()];
294         for (int i=0; i<output.size(); i++)
295             outputStr[i] = new String JavaDoc((String JavaDoc) outputObj[i]);
296         return outputStr;
297         
298     }
299
300     /**
301      * Remove from the input string all the spaces and tabs like chars.
302      * Remove also the chars specified in the input variable charsToBeRemoved.
303      * <BR>For example if you call trimSpaces(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/&quot;),
304      * <BR>you obtain a string &quot;+12.5&quot; as output (space chars and &lt;,&gt;,D,I,V,/ are chars that must be removed).
305      * <BR>For example if you call trimSpaces(&quot;&lt;DIV&gt; Trim All Spaces Also The Ones Inside The String &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/&quot;),
306      * <BR>you obtain a string &quot;TrimAllSpacesAlsoTheOnesInsideTheString&quot; as output (all the spaces inside the string are removed).
307      * @param input The string in input.
308      * @param charsToBeRemoved The chars to be removed.
309      * @return The string as output.
310     */

311     public static String JavaDoc trimSpaces (String JavaDoc input, String JavaDoc charsToBeRemoved)
312     {
313     
314         StringBuffer JavaDoc output = new StringBuffer JavaDoc();
315
316         boolean charFound=false;
317         for (int index=0; index<input.length(); index++)
318         {
319             charFound=false;
320             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
321                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
322                     charFound=true;
323             if (!((Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound)))
324                 output.append(input.charAt(index));
325         }
326
327         return output.toString();
328
329     }
330
331     /**
332      * Remove from the beginning and the end of the input string all the spaces and tabs like chars.
333      * Remove also the chars specified in the input variable charsToBeRemoved.
334      * <BR>The removal process removes only chars at the beginning and at the end of the string.
335      * <BR>For example if you call trimSpacesBeginEnd(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/&quot;),
336      * <BR>you obtain a string &quot;+12.5&quot; as output (space chars and &lt;,&gt;,D,I,V,/ are chars that must be removed).
337      * <BR>For example if you call trimSpacesBeginEnd(&quot;&lt;DIV&gt; Trim all spaces but not the ones inside the string &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/&quot;),
338      * <BR>you obtain a string &quot;Trim all spaces but not the ones inside the string&quot; as output (all the spaces inside the string are preserved).
339      * @param input The string in input.
340      * @param charsToBeRemoved The chars to be removed.
341      * @return The string as output.
342     */

343     public static String JavaDoc trimSpacesBeginEnd (String JavaDoc input, String JavaDoc charsToBeRemoved)
344     {
345     
346         String JavaDoc output = new String JavaDoc();
347
348         int begin=0;
349         int end=input.length()-1;
350         boolean charFound=false;
351         boolean ok=true;
352         for (int index=begin; (index<input.length()) && ok; index++)
353         {
354             charFound=false;
355             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
356                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
357                     charFound=true;
358             if (!( (Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound) ))
359             {
360                 begin=index;
361                 ok=false;
362             }
363         }
364         ok=true;
365         for (int index=end; (index>=0) && ok; index--)
366         {
367             charFound=false;
368             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
369                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
370                     charFound=true;
371             if (!( (Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound) ))
372             {
373                 end=index;
374                 ok=false;
375             }
376         }
377         output=input.substring(begin,end+1);
378
379         return output;
380         
381     }
382     
383     /**
384      * Split the input string considering as string separator
385      * all the characters
386      * with the only exception of the characters specified in charsDoNotBeRemoved param.
387      * <BR>For example if you call splitButChars(&quot;&lt;DIV&gt; +12.5, +3.4 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
388      * <BR>you obtain an array of strings {&quot;+12.5&quot;, &quot;+3.4&quot;} as output (+,.,1,2,3,4,5,6,7,8,9,0 are chars that do not be removed).
389      * @param input The string in input.
390      * @param charsDoNotBeRemoved The chars that do not be removed.
391      * @return The array of strings as output.
392     */

393     public static String JavaDoc[] splitButChars (String JavaDoc input, String JavaDoc charsDoNotBeRemoved)
394     {
395     
396         ArrayList JavaDoc output = new ArrayList JavaDoc();
397         int minCapacity = 0;
398         StringBuffer JavaDoc str = new StringBuffer JavaDoc();
399
400         boolean charFound = false;
401         boolean toBeAdd = false;
402         for (int index=0; index<input.length(); index++)
403         {
404             charFound=false;
405             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
406                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
407                     charFound=true;
408             if (charFound)
409             {
410                 str.append(input.charAt(index));
411                 toBeAdd=false;
412             }
413             else
414                 if (!toBeAdd)
415                     toBeAdd=true;
416             // finished to parse one string
417
if (toBeAdd && (str.length()!=0)) {
418                 minCapacity++;
419                 output.ensureCapacity(minCapacity);
420                 if (output.add(str.toString()))
421                     str = new StringBuffer JavaDoc();
422                 else
423                     minCapacity--;
424             }
425         }
426         // add the last string
427
if (str.length()!=0) {
428             minCapacity++;
429             output.ensureCapacity(minCapacity);
430             if (output.add(str.toString()))
431                 str = new StringBuffer JavaDoc();
432             else
433                 minCapacity--;
434         }
435
436         output.trimToSize();
437         Object JavaDoc[] outputObj = output.toArray();
438         String JavaDoc[] outputStr = new String JavaDoc[output.size()];
439         for (int i=0; i<output.size(); i++)
440             outputStr[i] = new String JavaDoc((String JavaDoc) outputObj[i]);
441         return outputStr;
442         
443     }
444     
445     /**
446      * Remove from the input string all the characters
447      * with the only exception of the characters specified in charsDoNotBeRemoved param.
448      * <BR>For example if you call trimButChars(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
449      * <BR>you obtain a string &quot;+12.5&quot; as output (+,.,1,2,3,4,5,6,7,8,9,0 are chars that do not be removed).
450      * <BR>For example if you call trimButChars(&quot;&lt;DIV&gt; +1 2 . 5 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
451      * <BR>you obtain a string &quot;+12.5&quot; as output (the spaces between 1 and 2, 2 and ., . and 5 are removed).
452      * @param input The string in input.
453      * @param charsDoNotBeRemoved The chars that do not be removed.
454      * @return The string as output.
455     */

456     public static String JavaDoc trimButChars (String JavaDoc input, String JavaDoc charsDoNotBeRemoved)
457     {
458     
459         StringBuffer JavaDoc output = new StringBuffer JavaDoc();
460
461         boolean charFound=false;
462         for (int index=0; index<input.length(); index++)
463         {
464             charFound=false;
465             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
466                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
467                     charFound=true;
468             if (charFound)
469                 output.append(input.charAt(index));
470         }
471         
472         return output.toString();
473         
474     }
475     
476     /**
477      * Remove from the beginning and the end of the input string all the characters
478      * with the only exception of the characters specified in charsDoNotBeRemoved param.
479      * <BR>The removal process removes only chars at the beginning and at the end of the string.
480      * <BR>For example if you call trimButCharsBeginEnd(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
481      * <BR>you obtain a string &quot;+12.5&quot; as output (+,.,1,2,3,4,5,6,7,8,9,0 are chars that do not be removed).
482      * <BR>For example if you call trimButCharsBeginEnd(&quot;&lt;DIV&gt; +1 2 . 5 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
483      * <BR>you obtain a string &quot;+1 2 . 5&quot; as output (the spaces inside the string are not removed).
484      * @param input The string in input.
485      * @param charsDoNotBeRemoved The chars that do not be removed.
486      * @return The string as output.
487     */

488     public static String JavaDoc trimButCharsBeginEnd (String JavaDoc input, String JavaDoc charsDoNotBeRemoved)
489     {
490     
491         String JavaDoc output = new String JavaDoc();
492
493         int begin=0;
494         int end=input.length()-1;
495         boolean charFound=false;
496         boolean ok=true;
497         for (int index=begin; (index<input.length()) && ok; index++)
498         {
499             charFound=false;
500             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
501                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
502                     charFound=true;
503             if (charFound)
504             {
505                 begin=index;
506                 ok=false;
507             }
508         }
509         ok=true;
510         for (int index=end; (index>=0) && ok; index--)
511         {
512             charFound=false;
513             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
514                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
515                     charFound=true;
516             if (charFound)
517             {
518                 end=index;
519                 ok=false;
520             }
521         }
522         output=input.substring(begin,end+1);
523
524         return output;
525         
526     }
527
528     /**
529      * Split the input string considering as string separator
530      * the chars specified in the input variable charsToBeRemoved.
531      * <BR>For example if you call splitChars(&quot;&lt;DIV&gt; +12.5, +3.4 &lt;/DIV&gt;&quot;, &quot; <>DIV/,&quot;),
532      * <BR>you obtain an array of strings {&quot;+12.5&quot;, &quot;+3.4&quot;} as output (space chars and &lt;,&gt;,D,I,V,/ and the comma are chars that must be removed).
533      * @param input The string in input.
534      * @param charsToBeRemoved The chars to be removed.
535      * @return The array of strings as output.
536     */

537     public static String JavaDoc[] splitChars (String JavaDoc input, String JavaDoc charsToBeRemoved)
538     {
539     
540         ArrayList JavaDoc output = new ArrayList JavaDoc();
541         int minCapacity = 0;
542         StringBuffer JavaDoc str = new StringBuffer JavaDoc();
543
544         boolean charFound = false;
545         boolean toBeAdd = false;
546         for (int index=0; index<input.length(); index++)
547         {
548             charFound=false;
549             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
550                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
551                     charFound=true;
552             if (!(charFound))
553             {
554                 str.append(input.charAt(index));
555                 toBeAdd=false;
556             }
557             else
558                 if (!toBeAdd)
559                     toBeAdd=true;
560             // finished to parse one string
561
if (toBeAdd && (str.length()!=0)) {
562                 minCapacity++;
563                 output.ensureCapacity(minCapacity);
564                 if (output.add(str.toString()))
565                     str = new StringBuffer JavaDoc();
566                 else
567                     minCapacity--;
568             }
569         }
570         // add the last string
571
if (str.length()!=0) {
572             minCapacity++;
573             output.ensureCapacity(minCapacity);
574             if (output.add(str.toString()))
575                 str = new StringBuffer JavaDoc();
576             else
577                 minCapacity--;
578         }
579
580         output.trimToSize();
581         Object JavaDoc[] outputObj = output.toArray();
582         String JavaDoc[] outputStr = new String JavaDoc[output.size()];
583         for (int i=0; i<output.size(); i++)
584             outputStr[i] = new String JavaDoc((String JavaDoc) outputObj[i]);
585         return outputStr;
586         
587     }
588
589     /**
590      * Remove from the input string all the chars specified in the input variable charsToBeRemoved.
591      * <BR>For example if you call trimChars(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;<>DIV/ &quot;),
592      * <BR>you obtain a string &quot;+12.5&quot; as output (&lt;,&gt;,D,I,V,/ and space char are chars that must be removed).
593      * <BR>For example if you call trimChars(&quot;&lt;DIV&gt; Trim All Chars Also The Ones Inside The String &lt;/DIV&gt;&quot;, &quot;<>DIV/ &quot;),
594      * <BR>you obtain a string &quot;TrimAllCharsAlsoTheOnesInsideTheString&quot; as output (all the spaces inside the string are removed).
595      * @param input The string in input.
596      * @param charsToBeRemoved The chars to be removed.
597      * @return The string as output.
598     */

599     public static String JavaDoc trimChars (String JavaDoc input, String JavaDoc charsToBeRemoved)
600     {
601     
602         StringBuffer JavaDoc output = new StringBuffer JavaDoc();
603
604         boolean charFound=false;
605         for (int index=0; index<input.length(); index++)
606         {
607             charFound=false;
608             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
609                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
610                     charFound=true;
611             if (!(charFound))
612                 output.append(input.charAt(index));
613         }
614
615         return output.toString();
616
617     }
618
619     /**
620      * Remove from the beginning and the end of the input string all the chars specified in the input variable charsToBeRemoved.
621      * <BR>The removal process removes only chars at the beginning and at the end of the string.
622      * <BR>For example if you call trimCharsBeginEnd(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;<>DIV/ &quot;),
623      * <BR>you obtain a string &quot;+12.5&quot; as output (' ' is a space char and &lt;,&gt;,D,I,V,/ are chars that must be removed).
624      * <BR>For example if you call trimCharsBeginEnd(&quot;&lt;DIV&gt; Trim all spaces but not the ones inside the string &lt;/DIV&gt;&quot;, &quot;<>DIV/ &quot;),
625      * <BR>you obtain a string &quot;Trim all spaces but not the ones inside the string&quot; as output (all the spaces inside the string are preserved).
626      * @param input The string in input.
627      * @param charsToBeRemoved The chars to be removed.
628      * @return The string as output.
629     */

630     public static String JavaDoc trimCharsBeginEnd (String JavaDoc input, String JavaDoc charsToBeRemoved)
631     {
632     
633         String JavaDoc output = new String JavaDoc();
634
635         int begin=0;
636         int end=input.length()-1;
637         boolean charFound=false;
638         boolean ok=true;
639         for (int index=begin; (index<input.length()) && ok; index++)
640         {
641             charFound=false;
642             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
643                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
644                     charFound=true;
645             if (!(charFound))
646             {
647                 begin=index;
648                 ok=false;
649             }
650         }
651         ok=true;
652         for (int index=end; (index>=0) && ok; index--)
653         {
654             charFound=false;
655             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
656                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
657                     charFound=true;
658             if (!(charFound))
659             {
660                 end=index;
661                 ok=false;
662             }
663         }
664         output=input.substring(begin,end+1);
665
666         return output;
667         
668     }
669
670     /**
671      * Split the input string in a string array,
672      * considering the tags as delimiter for splitting.
673      * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag).
674      */

675     public static String JavaDoc[] splitTags (String JavaDoc input, String JavaDoc[] tags)
676         throws ParserException, UnsupportedEncodingException JavaDoc
677     {
678         return splitTags (input, tags, true, true);
679     }
680     
681     /**
682      * Split the input string in a string array,
683      * considering the tags as delimiter for splitting.
684      * <BR>For example if you call splitTags(&quot;Begin &lt;DIV&gt;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}),
685      * <BR>you obtain a string array {&quot;Begin &quot;, &quot; ALL OK&quot;} as output (splitted &lt;DIV&gt; tags and their content recursively).
686      * <BR>For example if you call splitTags(&quot;Begin &lt;DIV&gt;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, false, false),
687      * <BR>you obtain a string array {&quot;Begin &quot;, &quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot; ALL OK&quot;} as output (splitted &lt;DIV&gt; tags and not their content and no recursively).
688      * <BR>For example if you call splitTags(&quot;Begin &lt;DIV&gt;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, true, false),
689      * <BR>you obtain a string array {&quot;Begin &quot;, &quot; +12.5 &quot;, &quot; ALL OK&quot;} as output (splitted &lt;DIV&gt; tags and not their content recursively).
690      * <BR>For example if you call splitTags(&quot;Begin &lt;DIV&gt;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, false, true),
691      * <BR>you obtain a string array {&quot;Begin &quot;, &quot; ALL OK&quot;} as output (splitted &lt;DIV&gt; tags and their content).
692      * @param input The string in input.
693      * @param tags The tags to be used as splitting delimiter.
694      * @param recursive Optional parameter (true if not present), if true delete all the tags recursively.
695      * @param insideTag Optional parameter (true if not present), if true delete also the content of the tags.
696      * @return The string array containing the strings delimited by tags.
697      */

698     public static String JavaDoc[] splitTags (String JavaDoc input, String JavaDoc[] tags, boolean recursive, boolean insideTag)
699         throws ParserException, UnsupportedEncodingException JavaDoc
700     {
701     
702         ArrayList JavaDoc outputArrayList = new ArrayList JavaDoc();
703         int minCapacity = 0;
704         String JavaDoc output = new String JavaDoc();
705         String JavaDoc inputModified = new String JavaDoc(input);
706         String JavaDoc[] outputStr = new String JavaDoc[] {};
707         
708         String JavaDoc dummyString = createDummyString (' ', input.length());
709         
710         // loop inside the different tags to be trimmed
711
for (int i=0; i<tags.length; i++)
712         {
713             
714             // loop inside the tags of the same type
715
NodeList links = getLinks (inputModified, tags[i], recursive);
716             for (int j=0; j<links.size(); j++)
717             {
718                 CompositeTag beginTag = (CompositeTag)links.elementAt(j);
719                 Tag endTag = beginTag.getEndTag();
720
721                 // positions of begin and end tags
722
int beginTagBegin = beginTag.getStartPosition ();
723                 int endTagBegin = beginTag.getEndPosition ();
724                 int beginTagEnd = endTag.getStartPosition ();
725                 int endTagEnd = endTag.getEndPosition ();
726
727                 if (insideTag)
728                 {
729                     dummyString = modifyDummyString (new String JavaDoc(dummyString), beginTagBegin, endTagEnd);
730                 }
731                 else
732                 {
733                     dummyString = modifyDummyString (new String JavaDoc(dummyString), beginTagBegin, endTagBegin);
734                     dummyString = modifyDummyString (new String JavaDoc(dummyString), beginTagEnd, endTagEnd);
735                 }
736             }
737             for (int k=dummyString.indexOf(' '); (k<dummyString.length()) && (k!=-1);)
738             {
739                 int kNew = dummyString.indexOf('*',k);
740                 if (kNew!=-1)
741                 {
742                     output = inputModified.substring(k,kNew);
743                     k = dummyString.indexOf(' ',kNew);
744                     
745                     minCapacity++;
746                     outputArrayList.ensureCapacity(minCapacity);
747                     if (outputArrayList.add(output))
748                         output = new String JavaDoc();
749                     else
750                         minCapacity--;
751                 }
752                 else
753                 {
754                     output = inputModified.substring(k,dummyString.length());
755                     k = kNew;
756                     
757                     minCapacity++;
758                     outputArrayList.ensureCapacity(minCapacity);
759                     if (outputArrayList.add(output))
760                         output = new String JavaDoc();
761                     else
762                         minCapacity--;
763                 }
764             }
765             StringBuffer JavaDoc outputStringBuffer = new StringBuffer JavaDoc();
766             outputArrayList.trimToSize();
767             Object JavaDoc[] outputObj = outputArrayList.toArray();
768             outputStr = new String JavaDoc[outputArrayList.size()];
769             for (int j=0; j<outputArrayList.size(); j++)
770             {
771                 outputStr[j] = new String JavaDoc((String JavaDoc) outputObj[j]);
772                 outputStringBuffer.append(outputStr[j]);
773             }
774             outputArrayList = new ArrayList JavaDoc();
775             inputModified = new String JavaDoc(outputStringBuffer.toString());
776             dummyString = createDummyString (' ', inputModified.length());
777         }
778         
779         return outputStr;
780         
781     }
782     
783     /**
784      * Split the input string in a string array,
785      * considering the tags as delimiter for splitting.
786      * <BR>Use Class class as input parameter
787      * instead of tags[] string array.
788      * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag).
789      */

790     public static String JavaDoc[] splitTags (String JavaDoc input, Class JavaDoc nodeType)
791         throws ParserException, UnsupportedEncodingException JavaDoc
792     {
793         return splitTags (input, new NodeClassFilter (nodeType), true, true);
794     }
795     
796     /**
797      * Split the input string in a string array,
798      * considering the tags as delimiter for splitting.
799      * <BR>Use Class class as input parameter
800      * instead of tags[] string array.
801      * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag).
802      */

803     public static String JavaDoc[] splitTags (String JavaDoc input, Class JavaDoc nodeType, boolean recursive, boolean insideTag)
804         throws ParserException, UnsupportedEncodingException JavaDoc
805     {
806         return splitTags (input, new NodeClassFilter (nodeType), recursive, insideTag);
807     }
808     
809     /**
810      * Split the input string in a string array,
811      * considering the tags as delimiter for splitting.
812      * <BR>Use NodeFilter class as input parameter
813      * instead of tags[] string array.
814      * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag).
815      */

816     public static String JavaDoc[] splitTags (String JavaDoc input, NodeFilter filter)
817         throws ParserException, UnsupportedEncodingException JavaDoc
818     {
819         return splitTags (input, filter, true, true);
820     }
821     
822     /**
823      * Split the input string in a string array,
824      * considering the tags as delimiter for splitting.
825      * <BR>Use NodeFilter class as input parameter
826      * instead of tags[] string array.
827      * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag).
828      */

829     public static String JavaDoc[] splitTags (String JavaDoc input, NodeFilter filter, boolean recursive, boolean insideTag)
830         throws ParserException, UnsupportedEncodingException JavaDoc
831     {
832     
833         ArrayList JavaDoc outputArrayList = new ArrayList JavaDoc();
834         int minCapacity = 0;
835         String JavaDoc output = new String JavaDoc();
836         
837         String JavaDoc dummyString = createDummyString (' ', input.length());
838
839         // loop inside the tags of the same type
840
NodeList links = getLinks (input, filter, recursive);
841         for (int j=0; j<links.size(); j++)
842         {
843             CompositeTag beginTag = (CompositeTag)links.elementAt(j);
844             Tag endTag = beginTag.getEndTag();
845
846             // positions of begin and end tags
847
int beginTagBegin = beginTag.getStartPosition ();
848             int endTagBegin = beginTag.getEndPosition ();
849             int beginTagEnd = endTag.getStartPosition ();
850             int endTagEnd = endTag.getEndPosition ();
851
852             if (insideTag)
853             {
854                 dummyString = modifyDummyString (new String JavaDoc(dummyString), beginTagBegin, endTagEnd);
855             }
856             else
857             {
858                 dummyString = modifyDummyString (new String JavaDoc(dummyString), beginTagBegin, endTagBegin);
859                 dummyString = modifyDummyString (new String JavaDoc(dummyString), beginTagEnd, endTagEnd);
860             }
861         }
862         for (int k=dummyString.indexOf(' '); (k<dummyString.length()) && (k!=-1);)
863         {
864             int kNew = dummyString.indexOf('*',k);
865             if (kNew!=-1)
866             {
867                 output = input.substring(k,kNew);
868                 k = dummyString.indexOf(' ',kNew);
869                     
870                 minCapacity++;
871                 outputArrayList.ensureCapacity(minCapacity);
872                 if (outputArrayList.add(output))
873                     output = new String JavaDoc();
874                 else
875                     minCapacity--;
876             }
877             else
878             {
879                 output = input.substring(k,dummyString.length());
880                 k = kNew;
881                     
882                 minCapacity++;
883                 outputArrayList.ensureCapacity(minCapacity);
884                 if (outputArrayList.add(output))
885                     output = new String JavaDoc();
886                 else
887                     minCapacity--;
888             }
889             
890         }
891         
892         outputArrayList.trimToSize();
893         Object JavaDoc[] outputObj = outputArrayList.toArray();
894         String JavaDoc[] outputStr = new String JavaDoc[outputArrayList.size()];
895         for (int i=0; i<outputArrayList.size(); i++)
896             outputStr[i] = new String JavaDoc((String JavaDoc) outputObj[i]);
897         return outputStr;
898         
899     }
900
901     /**
902      * Trim the input string, removing all the tags in the input string.
903      * <BR>The method trims all the substrings included in the input string of the following type:
904      * &quot;&lt;XXX&gt;&quot;, where XXX could be a string of any type.
905      * <BR>If you set to true the inside parameter, the method deletes also the YYY string in the following input string:
906      * &quot;&lt;XXX&gt;YYY&lt;ZZZ&gt;&quot;, note that ZZZ is not necessary the closing tag of XXX.
907      * @param input The string in input.
908      * @param inside If true, it forces the method to delete also what is inside the tags.
909      * @return The string without tags.
910      */

911     public static String JavaDoc trimAllTags (String JavaDoc input, boolean inside)
912     {
913     
914         StringBuffer JavaDoc output = new StringBuffer JavaDoc();
915
916         if (inside) {
917             if ((input.indexOf('<')==-1) || (input.lastIndexOf('>')==-1) || (input.lastIndexOf('>')<input.indexOf('<'))) {
918                 output.append(input);
919             } else {
920                 output.append(input.substring(0, input.indexOf('<')));
921                 output.append(input.substring(input.lastIndexOf('>')+1, input.length()));
922             }
923         } else {
924             boolean write = true;
925             for (int index=0; index<input.length(); index++)
926             {
927                 if (input.charAt(index)=='<' && write)
928                     write = false;
929                 if (write)
930                     output.append(input.charAt(index));
931                 if (input.charAt(index)=='>' && (!write))
932                     write = true;
933             }
934         }
935
936         return output.toString();
937     }
938     
939
940     /**
941      * Trim all tags in the input string and
942      * return a string like the input one
943      * without the tags and their content.
944      * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).
945      */

946     public static String JavaDoc trimTags (String JavaDoc input, String JavaDoc[] tags)
947         throws ParserException, UnsupportedEncodingException JavaDoc
948     {
949         return trimTags (input, tags, true, true);
950     }
951     
952     /**
953      * Trim all tags in the input string and
954      * return a string like the input one
955      * without the tags and their content (optional).
956      * <BR>For example if you call trimTags(&quot;&lt;DIV&gt;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}),
957      * <BR>you obtain a string &quot; ALL OK&quot; as output (trimmed &lt;DIV&gt; tags and their content recursively).
958      * <BR>For example if you call trimTags(&quot;&lt;DIV&gt;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, false, false),
959      * <BR>you obtain a string &quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt; ALL OK&quot; as output (trimmed &lt;DIV&gt; tags and not their content and no recursively).
960      * <BR>For example if you call trimTags(&quot;&lt;DIV&gt;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, true, false),
961      * <BR>you obtain a string &quot; +12.5 ALL OK&quot; as output (trimmed &lt;DIV&gt; tags and not their content recursively).
962      * <BR>For example if you call trimTags(&quot;&lt;DIV&gt;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, false, true),
963      * <BR>you obtain a string &quot; ALL OK&quot; as output (trimmed &lt;DIV&gt; tags and their content).
964      * @param input The string in input.
965      * @param tags The tags to be removed.
966      * @param recursive Optional parameter (true if not present), if true delete all the tags recursively.
967      * @param insideTag Optional parameter (true if not present), if true delete also the content of the tags.
968      * @return The string without tags.
969      */

970     public static String JavaDoc trimTags (String JavaDoc input, String JavaDoc[] tags, boolean recursive, boolean insideTag)
971         throws ParserException, UnsupportedEncodingException JavaDoc
972     {
973     
974         StringBuffer JavaDoc output = new StringBuffer JavaDoc();
975         String JavaDoc inputModified = new String JavaDoc(input);
976         String JavaDoc dummyString = createDummyString (' ', input.length());
977             
978         // loop inside the different tags to be trimmed
979
for (int i=0; i<tags.length; i++)
980         {
981             output = new StringBuffer JavaDoc();
982             
983             // loop inside the tags of the same type
984
NodeList links = getLinks (inputModified, tags[i], recursive);
985             for (int j=0; j<links.size(); j++)
986             {
987                 CompositeTag beginTag = (CompositeTag)links.elementAt(j);
988                 Tag endTag = beginTag.getEndTag();
989
990                 // positions of begin and end tags
991
int beginTagBegin = beginTag.getStartPosition ();
992                 int endTagBegin = beginTag.getEndPosition ();
993                 int beginTagEnd = endTag.getStartPosition ();
994                 int endTagEnd = endTag.getEndPosition ();
995
996
997                 if (insideTag)
998                 {
999                     dummyString = modifyDummyString (new String JavaDoc(dummyString), beginTagBegin, endTagEnd);
1000                }
1001                else
1002                {
1003                    dummyString = modifyDummyString (new String JavaDoc(dummyString), beginTagBegin, endTagBegin);
1004                    dummyString = modifyDummyString (new String JavaDoc(dummyString), beginTagEnd, endTagEnd);
1005                }
1006            }
1007            for (int k=dummyString.indexOf(' '); (k<dummyString.length()) && (k!=-1);)
1008            {
1009                int kNew = dummyString.indexOf('*',k);
1010                if (kNew!=-1)
1011                {
1012                    output = output.append(inputModified.substring(k,kNew));
1013                    k = dummyString.indexOf(' ',kNew);
1014                }
1015                else
1016                {
1017                    output = output.append(inputModified.substring(k,dummyString.length()));
1018                    k = kNew;
1019                }
1020            }
1021            inputModified = new String JavaDoc(output);
1022            dummyString = createDummyString (' ', inputModified.length());
1023        }
1024        
1025        return output.toString();
1026        
1027    }
1028    
1029    /**
1030     * Trim all tags in the input string and
1031     * return a string like the input one
1032     * without the tags and their content.
1033     * <BR>Use Class class as input parameter
1034     * instead of tags[] string array.
1035     * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).
1036     */

1037    public static String JavaDoc trimTags (String JavaDoc input, Class JavaDoc nodeType)
1038        throws ParserException, UnsupportedEncodingException JavaDoc
1039    {
1040        return trimTags (input, new NodeClassFilter (nodeType), true, true);
1041    }
1042
1043    /**
1044     * Trim all tags in the input string and
1045     * return a string like the input one
1046     * without the tags and their content (optional).
1047     * <BR>Use Class class as input parameter
1048     * instead of tags[] string array.
1049     * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).
1050     */

1051    public static String JavaDoc trimTags (String JavaDoc input, Class JavaDoc nodeType, boolean recursive, boolean insideTag)
1052        throws ParserException, UnsupportedEncodingException JavaDoc
1053    {
1054        return trimTags (input, new NodeClassFilter (nodeType), recursive, insideTag);
1055    }
1056
1057    /**
1058     * Trim all tags in the input string and
1059     * return a string like the input one
1060     * without the tags and their content.
1061     * <BR>Use NodeFilter class as input parameter
1062     * instead of tags[] string array.
1063     * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).
1064     */

1065    public static String JavaDoc trimTags (String JavaDoc input, NodeFilter filter)
1066        throws ParserException, UnsupportedEncodingException JavaDoc
1067    {
1068        return trimTags (input, filter, true, true);
1069    }
1070    
1071    /**
1072     * Trim all tags in the input string and
1073     * return a string like the input one
1074     * without the tags and their content (optional).
1075     * <BR>Use NodeFilter class as input parameter
1076     * instead of tags[] string array.
1077     * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag).
1078     */

1079    public static String JavaDoc trimTags (String JavaDoc input, NodeFilter filter, boolean recursive, boolean insideTag)
1080        throws ParserException, UnsupportedEncodingException JavaDoc
1081    {
1082    
1083        StringBuffer JavaDoc output = new StringBuffer JavaDoc();
1084        
1085        String JavaDoc dummyString = createDummyString (' ', input.length());
1086
1087        // loop inside the tags of the same type
1088
NodeList links = getLinks (input, filter, recursive);
1089        for (int j=0; j<links.size(); j++)
1090        {
1091            CompositeTag beginTag = (CompositeTag)links.elementAt(j);
1092            Tag endTag = beginTag.getEndTag();
1093
1094            // positions of begin and end tags
1095
int beginTagBegin = beginTag.getStartPosition ();
1096            int endTagBegin = beginTag.getEndPosition ();
1097            int beginTagEnd = endTag.getStartPosition ();
1098            int endTagEnd = endTag.getEndPosition ();
1099
1100            if (insideTag)
1101            {
1102                dummyString = modifyDummyString (new String JavaDoc(dummyString), beginTagBegin, endTagEnd);
1103            }
1104            else
1105            {
1106                dummyString = modifyDummyString (new String JavaDoc(dummyString), beginTagBegin, endTagBegin);
1107                dummyString = modifyDummyString (new String JavaDoc(dummyString), beginTagEnd, endTagEnd);
1108            }
1109        }
1110        for (int k=dummyString.indexOf(' '); (k<dummyString.length()) && (k!=-1);)
1111        {
1112            int kNew = dummyString.indexOf('*',k);
1113            if (kNew!=-1)
1114            {
1115                output = output.append(input.substring(k,kNew));
1116                k = dummyString.indexOf(' ',kNew);
1117            }
1118            else
1119            {
1120                output = output.append(input.substring(k,dummyString.length()));
1121                k = kNew;
1122            }
1123            
1124        }
1125        
1126        return output.toString();
1127        
1128    }
1129    
1130    /**
1131     * Create a Parser Object having a String Object as input (instead of a url or a string representing the url location).
1132     * <BR>The string will be parsed as it would be a file.
1133     * @param input The string in input.
1134     * @return The Parser Object with the string as input stream.
1135     */

1136    public static Parser createParserParsingAnInputString (String JavaDoc input)
1137        throws ParserException, UnsupportedEncodingException JavaDoc
1138    {
1139    
1140        Parser parser = new Parser();
1141        Lexer lexer = new Lexer();
1142        Page page = new Page(input);
1143        lexer.setPage(page);
1144        parser.setLexer(lexer);
1145        
1146        return parser;
1147        
1148    }
1149
1150    private static NodeList getLinks (String JavaDoc output, String JavaDoc tag, boolean recursive)
1151        throws ParserException, UnsupportedEncodingException JavaDoc
1152    {
1153        
1154        Parser parser = new Parser();
1155        NodeFilter filterLink = new TagNameFilter (tag);
1156        NodeList links = new NodeList ();
1157        parser = createParserParsingAnInputString(output);
1158        links = parser.extractAllNodesThatMatch(filterLink);
1159
1160        // loop to remove tags added recursively
1161
// so if you have selected 'not recursive option'
1162
// you have only the tag container and not the contained tags.
1163
if (!recursive)
1164        {
1165            for (int j=0; j<links.size(); j++)
1166            {
1167                CompositeTag jStartTag = (CompositeTag)links.elementAt(j);
1168                Tag jEndTag = jStartTag.getEndTag();
1169                int jStartTagBegin = jStartTag.getStartPosition ();
1170                int jEndTagEnd = jEndTag.getEndPosition ();
1171                for (int k=0; k<links.size(); k++)
1172                {
1173                    CompositeTag kStartTag = (CompositeTag)links.elementAt(k);
1174                    Tag kEndTag = kStartTag.getEndTag();
1175                    int kStartTagBegin = kStartTag.getStartPosition ();
1176                    int kEndTagEnd = kEndTag.getEndPosition ();
1177                    if ((k!=j) && (kStartTagBegin>jStartTagBegin) && (kEndTagEnd<jEndTagEnd))
1178                    {
1179                        links.remove(k);
1180                        k--;
1181                        j--;
1182                    }
1183                }
1184            }
1185        }
1186        
1187        return links;
1188        
1189    }
1190    
1191    private static NodeList getLinks (String JavaDoc output, NodeFilter filter, boolean recursive)
1192        throws ParserException, UnsupportedEncodingException JavaDoc
1193    {
1194        
1195        Parser parser = new Parser();
1196        NodeList links = new NodeList ();
1197        parser = createParserParsingAnInputString(output);
1198        links = parser.extractAllNodesThatMatch(filter);
1199
1200        // loop to remove tags added recursively
1201
// so if you have selected 'not recursive option'
1202
// you have only the tag container and not the contained tags.
1203
if (!recursive)
1204        {
1205            for (int j=0; j<links.size(); j++)
1206            {
1207                CompositeTag jStartTag = (CompositeTag)links.elementAt(j);
1208                Tag jEndTag = jStartTag.getEndTag();
1209                int jStartTagBegin = jStartTag.getStartPosition ();
1210                int jEndTagEnd = jEndTag.getEndPosition ();
1211                for (int k=0; k<links.size(); k++)
1212                {
1213                    CompositeTag kStartTag = (CompositeTag)links.elementAt(k);
1214                    Tag kEndTag = kStartTag.getEndTag();
1215                    int kStartTagBegin = kStartTag.getStartPosition ();
1216                    int kEndTagEnd = kEndTag.getEndPosition ();
1217                    if ((k!=j) && (kStartTagBegin>jStartTagBegin) && (kEndTagEnd<jEndTagEnd))
1218                    {
1219                        links.remove(k);
1220                        k--;
1221                        j--;
1222                    }
1223                }
1224            }
1225        }
1226        
1227        return links;
1228        
1229    }
1230    
1231    private static String JavaDoc createDummyString (char fillingChar, int length)
1232    {
1233        StringBuffer JavaDoc dummyStringBuffer = new StringBuffer JavaDoc();
1234        for (int j=0; j<length; j++)
1235            dummyStringBuffer = dummyStringBuffer.append(fillingChar);
1236        return new String JavaDoc(dummyStringBuffer);
1237    }
1238    
1239    private static String JavaDoc modifyDummyString (String JavaDoc dummyString, int beginTag, int endTag)
1240    {
1241        String JavaDoc dummyStringInterval = createDummyString ('*', endTag-beginTag);
1242        return new String JavaDoc(dummyString.substring(0, beginTag) + dummyStringInterval + dummyString.substring(endTag, dummyString.length()));
1243    }
1244    
1245}
Popular Tags