KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > de > susebox > jtopas > spi > StandardSequenceHandler


1 /*
2  * StandardSequenceHandler.java: simple implementation of SequenceHandler
3  *
4  * Copyright (C) 2002 Heiko Blau
5  *
6  * This file belongs to the JTopas Library.
7  * JTopas is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU Lesser General Public License as published by the
9  * Free Software Foundation; either version 2.1 of the License, or (at your
10  * option) any later version.
11  *
12  * This software is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.
15  * See the GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License along
18  * with JTopas. If not, write to the
19  *
20  * Free Software Foundation, Inc.
21  * 59 Temple Place, Suite 330,
22  * Boston, MA 02111-1307
23  * USA
24  *
25  * or check the Internet: http://www.fsf.org
26  *
27  * Contact:
28  * email: heiko@susebox.de
29  */

30
31 package de.susebox.jtopas.spi;
32
33 //-----------------------------------------------------------------------------
34
// Imports
35
//
36
import java.util.Iterator JavaDoc;
37
38 import de.susebox.jtopas.TokenizerProperty;
39 import de.susebox.jtopas.TokenizerProperties;
40 import de.susebox.jtopas.TokenizerException;
41
42
43 //-----------------------------------------------------------------------------
44
// Interface SequenceHandler
45
//
46

47 /**<p>
48  * Simple implementation of the {@link SequenceHandler} interface. This class
49  * works only with the {@link de.susebox.jtopas.TokenizerProperties} interface
50  * methods and is aware of changes in these properties. It does not cache any
51  * information and is therefore a more or less slow way to handle special sequences.
52  *</p><p>
53  * This class is a bridge between arbitrary {@link de.susebox.jtopas.Tokenizer}
54  * implementations using the SPI interface {@link SequenceHandler} and any
55  * {@link de.susebox.jtopas.TokenizerProperties} implementation that does not
56  * implement the <code>SequenceHandler</code> interface itself.
57  *</p>
58  *
59  * @see SequenceHandler
60  * @see de.susebox.jtopas.Tokenizer
61  * @see de.susebox.jtopas.TokenizerProperties
62  * @author Heiko Blau
63  */

64 public class StandardSequenceHandler implements SequenceHandler {
65   
66   /**
67    * The constructor takes the {@link de.susebox.jtopas.TokenizerProperties}
68    * that provide the special sequences.
69    *
70    * @param props the {@link de.susebox.jtopas.TokenizerProperties} to take the
71    * sequences from
72    */

73   public StandardSequenceHandler(TokenizerProperties props) {
74     _properties = props;
75   }
76   
77   /**
78    * This method can be used by a {@link de.susebox.jtopas.Tokenizer} implementation
79    * for a fast detection if special sequence checking must be performed at all.
80    * If the method returns <code>false</code> time-consuming preparations can be
81    * skipped.
82    *
83    * @return <code>true</code> if there actually are pattern that can be tested
84    * for a match, <code>false</code> otherwise.
85    */

86   public boolean hasSequenceCommentOrString() {
87     if (_properties != null) {
88       return _properties.getSpecialSequences().hasNext();
89     } else {
90       return false;
91     }
92   }
93   
94   /**
95    * This method checks if a given range of data starts with a special sequence,
96    * a comment or a string. These three types of token are tested together since
97    * both comment and string prefixes are ordinary special sequences. Only the
98    * actions preformed <strong>after</strong> a string or comment has been detected,
99    * are different.
100    *<br>
101    * The method returns <code>null</code> if no special sequence, comment or string
102    * could matches the the leading part of the data range given through the
103    * {@link DataProvider}.
104    *
105    * @param dataProvider the source to get the data range from
106    * @return a {@link de.susebox.jtopas.TokenizerProperty} if a special sequence,
107    * comment or string could be detected, <code>null</code> otherwise
108    * @throws NullPointerException if no {@link DataProvider} is given
109    */

110   public TokenizerProperty startsWithSequenceCommentOrString(DataProvider dataProvider)
111     throws NullPointerException JavaDoc
112   {
113     TokenizerProperty prop = null;
114     
115     if (_properties != null) {
116       String JavaDoc data = dataProvider.toString();
117
118       prop = getLongestMatch(data, _properties.getSpecialSequences(), prop);
119       prop = getLongestMatch(data, _properties.getLineComments(), prop);
120       prop = getLongestMatch(data, _properties.getBlockComments(), prop);
121       prop = getLongestMatch(data, _properties.getStrings(), prop);
122     }
123     return prop;
124   }
125
126   /**
127    * This method returns the length of the longest special sequence, comment or
128    * string prefix that is known to this <code>SequenceHandler</code>. When
129    * calling {@link #startsWithSequenceCommentOrString}, the passed {@link DataProvider}
130    * parameter will supply at least this number of characters (see {@link DataProvider#getLength}).
131    * If less characters are provided, EOF is reached.
132    *<br>
133    * The method is an easy approach to the problem of how to provide more data
134    * in case a test runs out of characters. The invoking {@link de.susebox.jtopas.Tokenizer}
135    * (represented by the given {@link DataProvider}) can supply enough data for
136    * the {@link #startsWithSequenceCommentOrString} method.
137    *
138    * @return the number of characters needed in the worst case to identify a
139    * special sequence
140    */

141   public int getSequenceMaxLength() {
142     int maxLength = 0;
143     
144     if (_properties != null) {
145       maxLength = getSequenceMaxLength(_properties.getSpecialSequences(), maxLength);
146       maxLength = getSequenceMaxLength(_properties.getLineComments(), maxLength);
147       maxLength = getSequenceMaxLength(_properties.getBlockComments(), maxLength);
148       maxLength = getSequenceMaxLength(_properties.getStrings(), maxLength);
149     }
150     return maxLength;
151   }
152     
153   /**
154    * Retrieving the maximum length of a {@link TokenizerProperty} from an
155    * {@link java.util.Iterator}.
156    *
157    * @param iter a initialized {@link java.util.Iterator} to walk through
158    * @param currentMax the currently known maximum length
159    * @return the maximum length of the {@link TokenizerProperty} images in the
160    * iterator
161    */

162   private int getSequenceMaxLength(Iterator JavaDoc iter, int currentMax) {
163     while (iter.hasNext()) {
164       TokenizerProperty prop = (TokenizerProperty)iter.next();
165       int len = prop.getImages()[0].length();
166
167       if (len > currentMax) {
168         currentMax = len;
169       }
170     }
171     return currentMax;
172   }
173
174   
175   /**
176    * Retrieving the longest {@link TokenizerProperty} that matches the start of
177    * the given string.
178    *
179    * @param data check the start of this string
180    * @param iter a initialized {@link java.util.Iterator} to walk through
181    * @param currentMatch the currently known longest match
182    * @return the longest matching {@link TokenizerProperty} or <code>null</code>
183    */

184   private TokenizerProperty getLongestMatch(
185     String JavaDoc data,
186     Iterator JavaDoc iter,
187     TokenizerProperty currentMatch
188   )
189   {
190     int currentMax = (currentMatch != null) ? currentMatch.getImages()[0].length() : 0;
191     TokenizerProperty retProp = currentMatch;
192     
193     while (iter.hasNext()) {
194       TokenizerProperty prop = (TokenizerProperty)iter.next();
195       int len = prop.getImages()[0].length();
196
197       if (len > currentMax) {
198         currentMax = len;
199         retProp = prop;
200       }
201     }
202     return retProp;
203   }
204
205   
206   //---------------------------------------------------------------------------
207
// Members
208
//
209

210   /**
211    * The {@link TokenizerProperties} that provide the sequences and the
212    * control flags.
213    */

214   private TokenizerProperties _properties = null;
215 }
216
Popular Tags