KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > SnowMailClient > html > HTMLTag


1 package SnowMailClient.html;
2
3 import snow.utils.storage.*;
4
5 import java.io.*;
6 import java.text.*;
7 import java.util.*;
8
9
10 /** tags with body: UL, H1, H2
11 */

12 public final class HTMLTag
13 {
14   static final String JavaDoc[] tagsWithBody = new String JavaDoc[] {
15      "H1", "H2", "H3", "H4", "H5", "TITLE", "BODY", "HTML", "B", "I", "CENTER"};
16
17   public final Properties parameters = new Properties();
18   public String JavaDoc name;
19   public boolean valid = true;
20   public int tagOpeningStartPos;
21   public int tagOpeningEndPos;
22   public boolean hasBody = false;
23   public int tagClosingStartPos;
24   public int tagClosingEndPos;
25
26
27   public HTMLTag()
28   {
29
30   } // Constructor
31

32   /** the first char must be an opening <
33      the parsing occurs only between startParse and endParse
34   */

35   public static HTMLTag parseTag(String JavaDoc text, int startParse, int endParse)
36   {
37     HTMLTag tag = new HTMLTag();
38     tag.tagOpeningStartPos = startParse;
39
40     // 1) Scan for the opening tag name and optional params
41
//
42
StringBuffer JavaDoc name = new StringBuffer JavaDoc();
43     int i=startParse+1;
44     for(; i<endParse; i++)
45     {
46       char ci = text.charAt(i);
47       if(ci=='>')
48       {
49         // end reached, there are no parameters
50
tag.tagOpeningEndPos = i;
51         break;
52       }
53       else if(ci==' ' || ci=='\t' || ci=='\r' || ci=='\n')
54       {
55         // there are params, read them up to next >
56
int posEnd = text.indexOf(">", i);
57         if(posEnd==-1)
58         {
59            System.out.println("Tag "+name.toString()+" end > not reached");
60            tag.valid = false;
61            tag.tagOpeningEndPos = i;
62            break;
63         }
64         else
65         {
66            tag.parameters.put("args", text.substring(i+1,posEnd));
67            tag.tagOpeningEndPos = posEnd;
68            break;
69         }
70       }
71       else
72       {
73         // continue scan
74
name.append(ci);
75       }
76     }
77     tag.name = name.toString();
78
79     if( i==endParse )
80     {
81        System.out.println("Tag "+name.toString()+" end not reached");
82        tag.tagOpeningEndPos = endParse-1;
83        tag.valid = false;
84     }
85
86     // 2) body
87
//
88
if(contains(tagsWithBody,tag.name))
89     {
90       tag.hasBody = true;
91       String JavaDoc seekFor = "</"+tag.name+">";
92       int pos = indexOfEndTag(text, tag.name, i, endParse);
93       if(pos!=-1)
94       {
95         tag.tagClosingStartPos = pos;
96         tag.tagClosingEndPos = pos+tag.name.length()+2;
97       }
98       else
99       {
100         tag.tagClosingStartPos = endParse-1; // end
101
tag.tagClosingEndPos = endParse-1;
102       }
103
104     }
105
106     return tag;
107   }
108
109
110   /** ignoring case !
111       return the position of the <, -1 if not found
112   */

113   public static int indexOfEndTag(String JavaDoc text, String JavaDoc tag, int startFrom, int maxPos)
114   {
115     int i = startFrom;
116     while(true)
117     {
118       int pos = text.indexOf("</", i);
119       if(pos==-1) return -1;
120       if(pos>=maxPos) return -1;
121
122       try
123       {
124         // caution, because we seeked only </, it can be that we run out of index when we extract the full name
125
String JavaDoc endName = text.substring(pos+2, pos+2+tag.length());
126         if(endName.equalsIgnoreCase(tag)) return pos;
127       }
128       catch(Exception JavaDoc e)
129       {
130         // error
131
System.out.println("End of "+tag+" not found");
132         return -1;
133       }
134
135       // continue
136
i = pos+1;
137     }
138   }
139
140   public static Vector<HTMLTag> parseAllTags(String JavaDoc text, int startParse, int endParse)
141   {
142     Vector<HTMLTag> tags = new Vector<HTMLTag>();
143     StringBuffer JavaDoc textCont = new StringBuffer JavaDoc();
144
145     int pos = startParse;
146     for(;pos<endParse; pos++)
147     {
148       char ci = text.charAt(pos);
149       if(ci=='<')
150       {
151         HTMLTag tag = parseTag(text, pos, endParse);
152         System.out.println(""+tag);
153         tags.addElement(tag);
154
155         pos = tag.tagOpeningEndPos;
156       }
157       else
158       {
159         textCont.append(ci);
160       }
161     }
162     return tags;
163   }
164
165   public String JavaDoc toString()
166   {
167     StringBuffer JavaDoc s = new StringBuffer JavaDoc();
168     //s.append("\nTag "+name+" pos=["+this.tagOpeningStartPos+".."+tagOpeningEndPos+"], valid = "+valid);
169
s.append("\nTag "+name);
170     if(!valid)
171     {
172       s.append(" INVALID !");
173     }
174     String JavaDoc args = parameters.getProperty("args", null);
175     if(args!=null)
176     {
177       s.append("\n Args = "+args);
178     }
179     if(this.hasBody)
180     {
181       s.append("\n Body end ="+this.tagClosingEndPos);
182     }
183     return s.toString();
184   }
185
186
187   public static boolean contains(String JavaDoc[] a, String JavaDoc b)
188   {
189     for(int i=0; i<a.length; i++)
190     {
191       if(a[i].equals(b)) return true;
192     }
193     return false;
194   }
195
196   public static void main(String JavaDoc[] a)
197   {
198     try
199     {
200       String JavaDoc cont = new String JavaDoc(FileUtils.getFileContent(new File("c:/data/test2.htm")));
201       Vector<HTMLTag> tags = HTMLTag.parseAllTags(cont, 0, cont.length());
202       System.out.println("\n\n#tags="+tags.size());
203     }
204     catch(Exception JavaDoc e)
205     {
206       e.printStackTrace();
207     }
208
209   }
210
211 } // HTMLTag
Popular Tags