KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > util > ParserUtils


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Somik Raha
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ParserUtils.java,v $
8
// $Author: anul $
9
// $Date: 2004/08/27 09:54:27 $
10
// $Revision: 1.46 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.util;
28
29 import java.io.UnsupportedEncodingException JavaDoc;
30 import java.util.ArrayList JavaDoc;
31
32 import org.htmlparser.Node;
33 import org.htmlparser.NodeFilter;
34 import org.htmlparser.Parser;
35 import org.htmlparser.Tag;
36 import org.htmlparser.filters.NodeClassFilter;
37 import org.htmlparser.filters.TagNameFilter;
38 import org.htmlparser.lexer.Lexer;
39 import org.htmlparser.lexer.Page;
40 import org.htmlparser.tags.CompositeTag;
41 import org.htmlparser.util.NodeList;
42 import org.htmlparser.util.ParserException;
43
44
45 public class ParserUtils
46 {
47     public static String JavaDoc removeChars(String JavaDoc s, char occur) {
48         StringBuffer JavaDoc newString = new StringBuffer JavaDoc();
49         char ch;
50         for (int i = 0; i < s.length(); i++) {
51             ch = s.charAt(i);
52             if (ch != occur)
53                 newString.append(ch);
54         }
55         return newString.toString();
56     }
57
58     public static String JavaDoc removeEscapeCharacters(String JavaDoc inputString) {
59         inputString = ParserUtils.removeChars(inputString, '\r');
60         inputString = ParserUtils.removeChars(inputString, '\n');
61         inputString = ParserUtils.removeChars(inputString, '\t');
62         return inputString;
63     }
64
65     public static String JavaDoc removeTrailingBlanks(String JavaDoc text) {
66         char ch = ' ';
67         while (ch == ' ') {
68             ch = text.charAt(text.length() - 1);
69             if (ch == ' ')
70                 text = text.substring(0, text.length() - 1);
71         }
72         return text;
73     }
74
75     /**
76      * Search given node and pick up any objects of given type.
77      * @param node The node to search.
78      * @param type The class to search for.
79      * @return A node array with the matching nodes.
80      */

81     public static Node[] findTypeInNode(Node node, Class JavaDoc type)
82     {
83         NodeFilter filter;
84         NodeList ret;
85         
86         ret = new NodeList ();
87         filter = new NodeClassFilter (type);
88         node.collectInto (ret, filter);
89
90         return (ret.toNodeArray ());
91     }
92
93     /**
94      * Split the input string considering as string separator
95      * all the not numerical characters
96      * with the only exception of the characters specified in charsDoNotBeRemoved param.
97      * <BR>For example if you call splitButDigits(&quot;&lt;DIV&gt; +12.5, +3.4 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
98      * <BR>you obtain an array of strings {&quot;+12.5&quot;, &quot;+3.4&quot;} as output (1,2,3,4 and 5 are digits and +,. are chars that do not be removed).
99      * @param input The string in input.
100      * @param charsDoNotBeRemoved The chars that do not be removed.
101      * @return The array of strings as output.
102     */

103     public static String JavaDoc[] splitButDigits (String JavaDoc input, String JavaDoc charsDoNotBeRemoved)
104     {
105     
106         ArrayList JavaDoc output = new ArrayList JavaDoc();
107         int minCapacity = 0;
108         StringBuffer JavaDoc str = new StringBuffer JavaDoc();
109
110         boolean charFound = false;
111         boolean toBeAdd = false;
112         for (int index=0; index<input.length(); index++)
113         {
114             charFound=false;
115             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
116                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
117                     charFound=true;
118             if ((Character.isDigit(input.charAt(index))) || (charFound))
119             {
120                 str.append(input.charAt(index));
121                 toBeAdd=false;
122             }
123             else
124                 if (!toBeAdd)
125                     toBeAdd=true;
126             // finished to parse one string
127
if (toBeAdd && (str.length()!=0)) {
128                 minCapacity++;
129                 output.ensureCapacity(minCapacity);
130                 if (output.add(str.toString()))
131                     str = new StringBuffer JavaDoc();
132                 else
133                     minCapacity--;
134             }
135         }
136         // add the last string
137
if (str.length()!=0) {
138             minCapacity++;
139             output.ensureCapacity(minCapacity);
140             if (output.add(str.toString()))
141                 str = new StringBuffer JavaDoc();
142             else
143                 minCapacity--;
144         }
145
146         output.trimToSize();
147         Object JavaDoc[] outputObj = output.toArray();
148         String JavaDoc[] outputStr = new String JavaDoc[output.size()];
149         for (int i=0; i<output.size(); i++)
150             outputStr[i] = new String JavaDoc((String JavaDoc) outputObj[i]);
151         return outputStr;
152         
153     }
154     
155     /**
156      * Remove from the input string all the not numerical characters
157      * with the only exception of the characters specified in charsDoNotBeRemoved param.
158      * <BR>For example if you call trimButDigits(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
159      * <BR>you obtain a string &quot;+12.5&quot; as output (1,2 and 5 are digits and +,. are chars that do not be removed).
160      * <BR>For example if you call trimButDigits(&quot;&lt;DIV&gt; +1 2 . 5 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
161      * <BR>you obtain a string &quot;+12.5&quot; as output (the spaces between 1 and 2, 2 and ., . and 5 are removed).
162      * @param input The string in input.
163      * @param charsDoNotBeRemoved The chars that do not be removed.
164      * @return The string as output.
165     */

166     public static String JavaDoc trimButDigits (String JavaDoc input, String JavaDoc charsDoNotBeRemoved)
167     {
168     
169         StringBuffer JavaDoc output = new StringBuffer JavaDoc();
170
171         boolean charFound=false;
172         for (int index=0; index<input.length(); index++)
173         {
174             charFound=false;
175             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
176                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
177                     charFound=true;
178             if ((Character.isDigit(input.charAt(index))) || (charFound))
179                 output.append(input.charAt(index));
180         }
181
182         return output.toString();
183         
184     }
185     
186     /**
187      * Remove from the beginning and the end of the input string all the not numerical characters
188      * with the only exception of the characters specified in charsDoNotBeRemoved param.
189      * <BR>The removal process removes only chars at the beginning and at the end of the string.
190      * <BR>For example if you call trimButDigitsBeginEnd(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
191      * <BR>you obtain a string &quot;+12.5&quot; as output (1,2 and 5 are digits and +,. are chars that do not be removed).
192      * <BR>For example if you call trimButDigitsBeginEnd(&quot;&lt;DIV&gt; +1 2 . 5 &lt;/DIV&gt;&quot;, &quot;+.&quot;),
193      * <BR>you obtain a string &quot;+1 2 . 5&quot; as output (the spacess inside the string are not removed).
194      * @param input - The string in input.
195      * @param charsDoNotBeRemoved - The chars that do not be removed.
196      * @return The string as output.
197     */

198     public static String JavaDoc trimButDigitsBeginEnd (String JavaDoc input, String JavaDoc charsDoNotBeRemoved)
199     {
200     
201         String JavaDoc output = new String JavaDoc();
202
203         int begin=0;
204         int end=input.length()-1;
205         boolean charFound=false;
206         boolean ok=true;
207         for (int index=begin; (index<input.length()) && ok; index++)
208         {
209             charFound=false;
210             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
211                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
212                     charFound=true;
213             if ( (Character.isDigit(input.charAt(index))) || (charFound) )
214             {
215                 begin=index;
216                 ok=false;
217             }
218         }
219         ok=true;
220         for (int index=end; (index>=0) && ok; index--)
221         {
222             charFound=false;
223             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
224                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
225                     charFound=true;
226             if ( (Character.isDigit(input.charAt(index))) || (charFound) )
227             {
228                 end=index;
229                 ok=false;
230             }
231         }
232         output=input.substring(begin,end+1);
233
234         return output;
235         
236     }
237     
238     /**
239      * Split the input string considering as string separator
240      * all the spaces and tabs like chars and
241      * the chars specified in the input variable charsToBeRemoved.
242      * <BR>For example if you call splitSpaces(&quot;&lt;DIV&gt; +12.5, +3.4 &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/,&quot;),
243      * &lt;BR>you obtain an array of strings {&quot;+12.5&quot;, &quot;+3.4&quot;} as output (space chars and &lt;,&gt;,D,I,V,/ and the comma are chars that must be removed).
244      * @param input The string in input.
245      * @param charsToBeRemoved The chars to be removed.
246      * @return The array of strings as output.
247     */

248     public static String JavaDoc[] splitSpaces (String JavaDoc input, String JavaDoc charsToBeRemoved)
249     {
250     
251         ArrayList JavaDoc output = new ArrayList JavaDoc();
252         int minCapacity = 0;
253         StringBuffer JavaDoc str = new StringBuffer JavaDoc();
254
255         boolean charFound = false;
256         boolean toBeAdd = false;
257         for (int index=0; index<input.length(); index++)
258         {
259             charFound=false;
260             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
261                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
262                     charFound=true;
263             if (!((Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound)))
264             {
265                 str.append(input.charAt(index));
266                 toBeAdd=false;
267             }
268             else
269                 if (!toBeAdd)
270                     toBeAdd=true;
271             // finished to parse one string
272
if (toBeAdd && (str.length()!=0)) {
273                 minCapacity++;
274                 output.ensureCapacity(minCapacity);
275                 if (output.add(str.toString()))
276                     str = new StringBuffer JavaDoc();
277                 else
278                     minCapacity--;
279             }
280         }
281         // add the last string
282
if (str.length()!=0) {
283             minCapacity++;
284             output.ensureCapacity(minCapacity);
285             if (output.add(str.toString()))
286                 str = new StringBuffer JavaDoc();
287             else
288                 minCapacity--;
289         }
290
291         output.trimToSize();
292         Object JavaDoc[] outputObj = output.toArray();
293         String JavaDoc[] outputStr = new String JavaDoc[output.size()];
294         for (int i=0; i<output.size(); i++)
295             outputStr[i] = new String JavaDoc((String JavaDoc) outputObj[i]);
296         return outputStr;
297         
298     }
299
300     /**
301      * Remove from the input string all the spaces and tabs like chars.
302      * Remove also the chars specified in the input variable charsToBeRemoved.
303      * <BR>For example if you call trimSpaces(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/&quot;),
304      * <BR>you obtain a string &quot;+12.5&quot; as output (space chars and &lt;,&gt;,D,I,V,/ are chars that must be removed).
305      * <BR>For example if you call trimSpaces(&quot;&lt;DIV&gt; Trim All Spaces Also The Ones Inside The String &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/&quot;),
306      * <BR>you obtain a string &quot;TrimAllSpacesAlsoTheOnesInsideTheString&quot; as output (all the spaces inside the string are removed).
307      * @param input The string in input.
308      * @param charsToBeRemoved The chars to be removed.
309      * @return The string as output.
310     */

311     public static String JavaDoc trimSpaces (String JavaDoc input, String JavaDoc charsToBeRemoved)
312     {
313     
314         StringBuffer JavaDoc output = new StringBuffer JavaDoc();
315
316         boolean charFound=false;
317         for (int index=0; index<input.length(); index++)
318         {
319             charFound=false;
320             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
321                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
322                     charFound=true;
323             if (!((Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound)))
324                 output.append(input.charAt(index));
325         }
326
327         return output.toString();
328
329     }
330
331     /**
332      * Remove from the beginning and the end of the input string all the spaces and tabs like chars.
333      * Remove also the chars specified in the input variable charsToBeRemoved.
334      * <BR>The removal process removes only chars at the beginning and at the end of the string.
335      * <BR>For example if you call trimSpacesBeginEnd(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/&quot;),
336      * <BR>you obtain a string &quot;+12.5&quot; as output (space chars and &lt;,&gt;,D,I,V,/ are chars that must be removed).
337      * <BR>For example if you call trimSpacesBeginEnd(&quot;&lt;DIV&gt; Trim all spaces but not the ones inside the string &lt;/DIV&gt;&quot;, &quot;&lt;>DIV/&quot;),
338      * <BR>you obtain a string &quot;Trim all spaces but not the ones inside the string&quot; as output (all the spaces inside the string are preserved).
339      * @param input The string in input.
340      * @param charsToBeRemoved The chars to be removed.
341      * @return The string as output.
342     */

343     public static String JavaDoc trimSpacesBeginEnd (String JavaDoc input, String JavaDoc charsToBeRemoved)
344     {
345     
346         String JavaDoc output = new String JavaDoc();
347
348         int begin=0;
349         int end=input.length()-1;
350         boolean charFound=false;
351         boolean ok=true;
352         for (int index=begin; (index<input.length()) && ok; index++)
353         {
354             charFound=false;
355             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
356                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
357                     charFound=true;
358             if (!( (Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound) ))
359             {
360                 begin=index;
361                 ok=false;
362             }
363         }
364         ok=true;
365         for (int index=end; (index>=0) && ok; index--)
366         {
367             charFound=false;
368             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
369                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
370                     charFound=true;
371             if (!( (Character.isWhitespace(input.charAt(index))) || (Character.isSpaceChar(input.charAt(index))) || (charFound) ))
372             {
373                 end=index;
374                 ok=false;
375             }
376         }
377         output=input.substring(begin,end+1);
378
379         return output;
380         
381     }
382     
383     /**
384      * Split the input string considering as string separator
385      * all the characters
386      * with the only exception of the characters specified in charsDoNotBeRemoved param.
387      * <BR>For example if you call splitButChars(&quot;&lt;DIV&gt; +12.5, +3.4 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
388      * <BR>you obtain an array of strings {&quot;+12.5&quot;, &quot;+3.4&quot;} as output (+,.,1,2,3,4,5,6,7,8,9,0 are chars that do not be removed).
389      * @param input The string in input.
390      * @param charsDoNotBeRemoved The chars that do not be removed.
391      * @return The array of strings as output.
392     */

393     public static String JavaDoc[] splitButChars (String JavaDoc input, String JavaDoc charsDoNotBeRemoved)
394     {
395     
396         ArrayList JavaDoc output = new ArrayList JavaDoc();
397         int minCapacity = 0;
398         StringBuffer JavaDoc str = new StringBuffer JavaDoc();
399
400         boolean charFound = false;
401         boolean toBeAdd = false;
402         for (int index=0; index<input.length(); index++)
403         {
404             charFound=false;
405             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
406                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
407                     charFound=true;
408             if (charFound)
409             {
410                 str.append(input.charAt(index));
411                 toBeAdd=false;
412             }
413             else
414                 if (!toBeAdd)
415                     toBeAdd=true;
416             // finished to parse one string
417
if (toBeAdd && (str.length()!=0)) {
418                 minCapacity++;
419                 output.ensureCapacity(minCapacity);
420                 if (output.add(str.toString()))
421                     str = new StringBuffer JavaDoc();
422                 else
423                     minCapacity--;
424             }
425         }
426         // add the last string
427
if (str.length()!=0) {
428             minCapacity++;
429             output.ensureCapacity(minCapacity);
430             if (output.add(str.toString()))
431                 str = new StringBuffer JavaDoc();
432             else
433                 minCapacity--;
434         }
435
436         output.trimToSize();
437         Object JavaDoc[] outputObj = output.toArray();
438         String JavaDoc[] outputStr = new String JavaDoc[output.size()];
439         for (int i=0; i<output.size(); i++)
440             outputStr[i] = new String JavaDoc((String JavaDoc) outputObj[i]);
441         return outputStr;
442         
443     }
444     
445     /**
446      * Remove from the input string all the characters
447      * with the only exception of the characters specified in charsDoNotBeRemoved param.
448      * <BR>For example if you call trimButChars(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
449      * <BR>you obtain a string &quot;+12.5&quot; as output (+,.,1,2,3,4,5,6,7,8,9,0 are chars that do not be removed).
450      * <BR>For example if you call trimButChars(&quot;&lt;DIV&gt; +1 2 . 5 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
451      * <BR>you obtain a string &quot;+12.5&quot; as output (the spaces between 1 and 2, 2 and ., . and 5 are removed).
452      * @param input The string in input.
453      * @param charsDoNotBeRemoved The chars that do not be removed.
454      * @return The string as output.
455     */

456     public static String JavaDoc trimButChars (String JavaDoc input, String JavaDoc charsDoNotBeRemoved)
457     {
458     
459         StringBuffer JavaDoc output = new StringBuffer JavaDoc();
460
461         boolean charFound=false;
462         for (int index=0; index<input.length(); index++)
463         {
464             charFound=false;
465             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
466                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
467                     charFound=true;
468             if (charFound)
469                 output.append(input.charAt(index));
470         }
471         
472         return output.toString();
473         
474     }
475     
476     /**
477      * Remove from the beginning and the end of the input string all the characters
478      * with the only exception of the characters specified in charsDoNotBeRemoved param.
479      * <BR>The removal process removes only chars at the beginning and at the end of the string.
480      * <BR>For example if you call trimButCharsBeginEnd(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
481      * <BR>you obtain a string &quot;+12.5&quot; as output (+,.,1,2,3,4,5,6,7,8,9,0 are chars that do not be removed).
482      * <BR>For example if you call trimButCharsBeginEnd(&quot;&lt;DIV&gt; +1 2 . 5 &lt;/DIV&gt;&quot;, &quot;+.1234567890&quot;),
483      * <BR>you obtain a string &quot;+1 2 . 5&quot; as output (the spaces inside the string are not removed).
484      * @param input The string in input.
485      * @param charsDoNotBeRemoved The chars that do not be removed.
486      * @return The string as output.
487     */

488     public static String JavaDoc trimButCharsBeginEnd (String JavaDoc input, String JavaDoc charsDoNotBeRemoved)
489     {
490     
491         String JavaDoc output = new String JavaDoc();
492
493         int begin=0;
494         int end=input.length()-1;
495         boolean charFound=false;
496         boolean ok=true;
497         for (int index=begin; (index<input.length()) && ok; index++)
498         {
499             charFound=false;
500             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
501                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
502                     charFound=true;
503             if (charFound)
504             {
505                 begin=index;
506                 ok=false;
507             }
508         }
509         ok=true;
510         for (int index=end; (index>=0) && ok; index--)
511         {
512             charFound=false;
513             for (int charsCount=0; charsCount<charsDoNotBeRemoved.length(); charsCount++)
514                 if (charsDoNotBeRemoved.charAt(charsCount)==input.charAt(index))
515                     charFound=true;
516             if (charFound)
517             {
518                 end=index;
519                 ok=false;
520             }
521         }
522         output=input.substring(begin,end+1);
523
524         return output;
525         
526     }
527
528     /**
529      * Split the input string considering as string separator
530      * the chars specified in the input variable charsToBeRemoved.
531      * <BR>For example if you call splitChars(&quot;&lt;DIV&gt; +12.5, +3.4 &lt;/DIV&gt;&quot;, &quot; <>DIV/,&quot;),
532      * <BR>you obtain an array of strings {&quot;+12.5&quot;, &quot;+3.4&quot;} as output (space chars and &lt;,&gt;,D,I,V,/ and the comma are chars that must be removed).
533      * @param input The string in input.
534      * @param charsToBeRemoved The chars to be removed.
535      * @return The array of strings as output.
536     */

537     public static String JavaDoc[] splitChars (String JavaDoc input, String JavaDoc charsToBeRemoved)
538     {
539     
540         ArrayList JavaDoc output = new ArrayList JavaDoc();
541         int minCapacity = 0;
542         StringBuffer JavaDoc str = new StringBuffer JavaDoc();
543
544         boolean charFound = false;
545         boolean toBeAdd = false;
546         for (int index=0; index<input.length(); index++)
547         {
548             charFound=false;
549             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
550                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
551                     charFound=true;
552             if (!(charFound))
553             {
554                 str.append(input.charAt(index));
555                 toBeAdd=false;
556             }
557             else
558                 if (!toBeAdd)
559                     toBeAdd=true;
560             // finished to parse one string
561
if (toBeAdd && (str.length()!=0)) {
562                 minCapacity++;
563                 output.ensureCapacity(minCapacity);
564                 if (output.add(str.toString()))
565                     str = new StringBuffer JavaDoc();
566                 else
567                     minCapacity--;
568             }
569         }
570         // add the last string
571
if (str.length()!=0) {
572             minCapacity++;
573             output.ensureCapacity(minCapacity);
574             if (output.add(str.toString()))
575                 str = new StringBuffer JavaDoc();
576             else
577                 minCapacity--;
578         }
579
580         output.trimToSize();
581         Object JavaDoc[] outputObj = output.toArray();
582         String JavaDoc[] outputStr = new String JavaDoc[output.size()];
583         for (int i=0; i<output.size(); i++)
584             outputStr[i] = new String JavaDoc((String JavaDoc) outputObj[i]);
585         return outputStr;
586         
587     }
588
589     /**
590      * Remove from the input string all the chars specified in the input variable charsToBeRemoved.
591      * <BR>For example if you call trimChars(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;<>DIV/ &quot;),
592      * <BR>you obtain a string &quot;+12.5&quot; as output (&lt;,&gt;,D,I,V,/ and space char are chars that must be removed).
593      * <BR>For example if you call trimChars(&quot;&lt;DIV&gt; Trim All Chars Also The Ones Inside The String &lt;/DIV&gt;&quot;, &quot;<>DIV/ &quot;),
594      * <BR>you obtain a string &quot;TrimAllCharsAlsoTheOnesInsideTheString&quot; as output (all the spaces inside the string are removed).
595      * @param input The string in input.
596      * @param charsToBeRemoved The chars to be removed.
597      * @return The string as output.
598     */

599     public static String JavaDoc trimChars (String JavaDoc input, String JavaDoc charsToBeRemoved)
600     {
601     
602         StringBuffer JavaDoc output = new StringBuffer JavaDoc();
603
604         boolean charFound=false;
605         for (int index=0; index<input.length(); index++)
606         {
607             charFound=false;
608             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
609                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
610                     charFound=true;
611             if (!(charFound))
612                 output.append(input.charAt(index));
613         }
614
615         return output.toString();
616
617     }
618
619     /**
620      * Remove from the beginning and the end of the input string all the chars specified in the input variable charsToBeRemoved.
621      * <BR>The removal process removes only chars at the beginning and at the end of the string.
622      * <BR>For example if you call trimCharsBeginEnd(&quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot;<>DIV/ &quot;),
623      * <BR>you obtain a string &quot;+12.5&quot; as output (' ' is a space char and &lt;,&gt;,D,I,V,/ are chars that must be removed).
624      * <BR>For example if you call trimCharsBeginEnd(&quot;&lt;DIV&gt; Trim all spaces but not the ones inside the string &lt;/DIV&gt;&quot;, &quot;<>DIV/ &quot;),
625      * <BR>you obtain a string &quot;Trim all spaces but not the ones inside the string&quot; as output (all the spaces inside the string are preserved).
626      * @param input The string in input.
627      * @param charsToBeRemoved The chars to be removed.
628      * @return The string as output.
629     */

630     public static String JavaDoc trimCharsBeginEnd (String JavaDoc input, String JavaDoc charsToBeRemoved)
631     {
632     
633         String JavaDoc output = new String JavaDoc();
634
635         int begin=0;
636         int end=input.length()-1;
637         boolean charFound=false;
638         boolean ok=true;
639         for (int index=begin; (index<input.length()) && ok; index++)
640         {
641             charFound=false;
642             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
643                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
644                     charFound=true;
645             if (!(charFound))
646             {
647                 begin=index;
648                 ok=false;
649             }
650         }
651         ok=true;
652         for (int index=end; (index>=0) && ok; index--)
653         {
654             charFound=false;
655             for (int charsCount=0; charsCount<charsToBeRemoved.length(); charsCount++)
656                 if (charsToBeRemoved.charAt(charsCount)==input.charAt(index))
657                     charFound=true;
658             if (!(charFound))
659             {
660                 end=index;
661                 ok=false;
662             }
663         }
664         output=input.substring(begin,end+1);
665
666         return output;
667         
668     }
669
670     /**
671      * Split the input string in a string array,
672      * considering the tags as delimiter for splitting.
673      * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag).
674      */

675     public static String JavaDoc[] splitTags (String JavaDoc input, String JavaDoc[] tags)
676         throws ParserException, UnsupportedEncodingException JavaDoc
677     {
678         return splitTags (input, tags, true, true);
679     }
680     
681     /**
682      * Split the input string in a string array,
683      * considering the tags as delimiter for splitting.
684      * <BR>For example if you call splitTags(&quot;Begin &lt;DIV&gt;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}),
685      * <BR>you obtain a string array {&quot;Begin &quot;, &quot; ALL OK&quot;} as output (splitted &lt;DIV&gt; tags and their content recursively).
686      * <BR>For example if you call splitTags(&quot;Begin &lt;DIV&gt;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, false, false),
687      * <BR>you obtain a string array {&quot;Begin &quot;, &quot;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&quot;, &quot; ALL OK&quot;} as output (splitted &lt;DIV&gt; tags and not their content and no recursively).
688      * <BR>For example if you call splitTags(&quot;Begin &lt;DIV&gt;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, true, false),
689      * <BR>you obtain a string array {&quot;Begin &quot;, &quot; +12.5 &quot;, &quot; ALL OK&quot;} as output (splitted &lt;DIV&gt; tags and not their content recursively).
690      * <BR>For example if you call splitTags(&quot;Begin &lt;DIV&gt;&lt;DIV&gt; +12.5 &lt;/DIV&gt;&lt;/DIV&gt; ALL OK&quot;, new String[] {&quot;DIV&quot;}, false, true),
691      * <BR>you obtain a string array {&quot;Begin &quot;, &quot; ALL OK&quot;} as output (splitted &lt;DIV&gt; tags and their content).
692      * @param input The string in input.
693      * @param tags The tags to be used as splitting delimiter.
694      * @param recursive Optional parameter (true if not present), if true delete all the tags recursively.
695      * @param insideTag Optional parameter (true if not present), if true delete also the content of the tags.
696      * @return The string array containing the strings delimited by tags.
697      */

698     public static String JavaDoc[] splitTags (String JavaDoc input, String JavaDoc[] tags, boolean recursive, boolean insideTag)
699         throws ParserException, UnsupportedEncodingException JavaDoc
700     {
701     
702         ArrayList JavaDoc outputArrayList = new ArrayList JavaDoc();
703         int minCapacity = 0;
704         String JavaDoc output = new String JavaDoc();
705         String JavaDoc inputModified = new String JavaDoc(input);
706         String JavaDoc[] outputStr = new String JavaDoc[] {};
707         
708         String JavaDoc dummyString = createDummyString (' ', input.length());
709         
710         // loop inside the different tags to be trimmed
711
for (int i=0; i<tags.length; i++)
712         {
713             
714             // loop inside the tags of the same type
715
NodeList links = getLinks (inputModified, tags[i], recursive);
716             for (int j=0; j<links.size(); j++)
717             {
718                 CompositeTag beginTag = (CompositeTag)links.elementAt(j);
719                 Tag endTag = beginTag.getEndTag();
720
721                 // positions of begin and end tags
722
int beginTagBegin = beginTag.getStartPosition ();
723                 int endTagBegin = beginTag.getEndPosition ();
724                 int beginTagEnd = endTag.getStartPosition ();
725                 int endTagEnd = endTag.getEndPosition ();
726
727                 if (insideTag)
728                 {
729                     dummyString = modifyDummyString (new