KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > mmbase > util > transformers > ChunkedTransformer


1 /*
2
3 This software is OSI Certified Open Source Software.
4 OSI Certified is a certification mark of the Open Source Initiative.
5
6 The license (Mozilla version 1.0) can be read at the MMBase site.
7 See http://www.MMBase.org/license
8
9 */

10 package org.mmbase.util.transformers;
11
12 import java.util.*;
13 import java.io.*;
14 import java.util.regex.*;
15 import org.mmbase.util.ResourceWatcher;
16 import org.mmbase.util.xml.UtilReader;
17 import org.mmbase.util.Entry;
18
19 import org.mmbase.util.logging.*;
20
21
22 /**
23  * A chunked transformer is a transformer that transforms on a 'chunk by chunk' base. A chunck is
24  * typically a word or a line or so. The type of the 'chunks' is controled by the 'mode' parameter.
25  *
26  * It can ignored existing XML markup (the 'XMLTEXT' modes), and also avoids trailing dots and
27  * comments and surrounding quotes and parentheses.
28  *
29  * @author Michiel Meeuwissen
30  * @since MMBase-1.8
31  */

32
33 public abstract class ChunkedTransformer extends ConfigurableReaderTransformer implements CharTransformer {
34     private static final Logger log = Logging.getLoggerInstance(ChunkedTransformer.class);
35
36     /**
37      * Match word by word, but only in PCDATA of xml elements.
38      */

39     public final static int XMLTEXT_WORDS = 1;
40
41     /**
42      * Match in PCDATA of xml elements.
43      */

44     public final static int XMLTEXT = 2;
45
46     /**
47      * Match word by word.
48      */

49     public final static int WORDS = 3;
50
51     /**
52      * Match line by line.
53      */

54     public final static int LINES = 4;
55
56     /**
57      * Match the entire stream (so, one String must be created).
58      */

59     public final static int ENTIRE = 5;
60
61
62     /**
63      * If this is added to the config-int, then only the first match should be used.
64      */

65     public final static int REPLACE_FIRST = 100;
66     /**
67      * If this is added to the config-int, then only the first match should be used.
68      */

69     public final static int REPLACE_FIRST_ALL = 200;
70
71
72     protected boolean replaceFirst = false;
73     protected boolean replaceFirstAll = false;
74
75     public void configure(int i) {
76         if (i >= 200) {
77             replaceFirstAll = true;
78             i -= 200;
79         }
80         if (i >= 100) {
81             replaceFirst = true;
82             i -= 100;
83         }
84         super.configure(i);
85     }
86
87     protected ChunkedTransformer(int i) {
88         super(i);
89     }
90
91     public ChunkedTransformer() {
92         this(WORDS);
93     }
94
95     protected class Status {
96         int replaced = 0;
97         Set used = null;
98         {
99             if (replaceFirstAll) used = new HashSet();
100         }
101     }
102     protected Status newStatus() {
103         return new Status();
104         
105     }
106     /**
107      * Implement this. Return true if a replacement done.
108      */

109     protected abstract boolean replace(String JavaDoc string, Writer w, Status status) throws IOException;
110
111     protected boolean replaceWord(StringBuffer JavaDoc word, Writer writer, Status status) throws IOException {
112         int l = word.length();
113         StringBuffer JavaDoc postFix = null;
114         String JavaDoc w;
115         if (l > 0) {
116
117             postFix = new StringBuffer JavaDoc();
118
119             // surrounding quotes might look like " because of earlier escaping, so we take those out of consideration.
120
w = word.toString();
121             while (w.endsWith(""")) {
122                 postFix.insert(0, """);
123                 l -= 6;
124                 word.setLength(l);
125                 w = word.toString();
126             }
127             if (l > 0) {
128
129                 // to allow for . , and like in the end, we tear those of.
130
char d = word.charAt(l - 1);
131                 while (! Character.isLetterOrDigit(d)) {
132                     postFix.insert(0, d);
133                     word.setLength(--l);
134                     if (l == 0) break;
135                     d = word.charAt(l - 1);
136                 }
137             }
138         }
139
140         w = word.toString();
141
142         // stuff in the beginning:
143
while(w.startsWith(""")) {
144             writer.write(""");
145             word.delete(0, 6);
146             l -= 6;
147             w = word.toString();
148         }
149
150         // ready to make the replacements now.
151
boolean result = replace(w, writer, status);
152
153         if (postFix != null) {
154             writer.write(postFix.toString());
155         }
156         return result;
157     }
158
159     /**
160      * Whether still to do replacing, given status.
161      */

162     protected boolean replace(Status status) {
163         return !replaceFirst || status.replaced == 0;
164     }
165
166     public Writer transformXmlTextWords(Reader r, Writer w) {
167         Status status = newStatus();
168         StringBuffer JavaDoc word = new StringBuffer JavaDoc(); // current word
169
boolean translating = true;
170         try {
171             log.trace("Starting replacing");
172             while (true) {
173                 int c = r.read();
174                 if (c == -1) break;
175                 if (!replace(status)) {
176                     w.write(c);
177                 } else
178                 if (c == '<') { // don't do it in existing tags and attributes
179
translating = false;
180                     replaceWord(word, w, status);
181                     w.write(c);
182                 } else if (c == '>') {
183                     translating = true;
184                     word.setLength(0);
185                     w.write(c);
186                 } else if (! translating) {
187                     w.write(c);
188                 } else {
189                     if (Character.isWhitespace((char) c) || c == '\'' || c == '\"' || c == '(' || c == ')' ) {
190                         replaceWord(word, w, status);
191                         word.setLength(0);
192                         w.write(c);
193                     } else {
194                         word.append((char) c);
195                     }
196                 }
197             }
198             // write last word
199
if (replace(status)) {
200                 if (translating) replaceWord(word, w, status);
201             } else {
202                 w.write(word.toString());
203             }
204             if (log.isDebugEnabled()) {
205                 log.debug("Finished replacing. Replaced " + status.replaced + " words");
206             }
207         } catch (java.io.IOException JavaDoc e) {
208             log.error(e.toString());
209         }
210         return w;
211     }
212
213     public Writer transformXmlText(Reader r, Writer w) {
214         Status status = newStatus();
215         StringBuffer JavaDoc xmltext = new StringBuffer JavaDoc(); // current word
216
boolean translating = true;
217         try {
218             log.trace("Starting replacing");
219             while (true) {
220                 int c = r.read();
221                 if (c == -1) break;
222                 if (!replace(status)) {
223                     w.write(c);
224                 } else
225                 // perhaps better use SAX to decently detect XML, but then it probably won't work
226
// very well on sloppy XML (like HTML).
227
if (c == '<') { // don't do it in existing tags and attributes
228
translating = false;
229                     replace(xmltext.toString(), w, status);
230                     w.write(c);
231                 } else if (c == '>') {
232                     translating = true;
233                     xmltext.setLength(0);
234                     w.write(c);
235                 } else if (! translating) {
236                     w.write(c);
237                 } else {
238                     xmltext.append((char) c);
239                 }
240             }
241             // write last word
242
if (replace(status)) {
243                 if (translating) replace(xmltext.toString(), w, status);
244             } else {
245                 w.write(xmltext.toString());
246             }
247             log.debug("Finished replacing. Replaced " + status.replaced + " words");
248         } catch (java.io.IOException JavaDoc e) {
249             log.error(e.toString());
250         }
251         return w;
252     }
253     public Writer transformWords(Reader r, Writer w) {
254         Status status = newStatus();
255         StringBuffer JavaDoc word = new StringBuffer JavaDoc(); // current word
256
try {
257             if (log.isDebugEnabled()) {
258                 log.trace("Starting replacing words." + Logging.stackTrace());
259             }
260             while (true) {
261                 int c = r.read();
262                 if (c == -1) break;
263                 if (replace(status) && (Character.isWhitespace((char) c) || c == '\'' || c == '\"' || c == '(' || c == ')' || c == '<' || c == '>' )) {
264                     replaceWord(word, w, status);
265                     word.setLength(0);
266                     w.write(c);
267                 } else {
268                     word.append((char) c);
269                 }
270             }
271             // write last word
272
if (replace(status)) {
273                 replaceWord(word, w, status);
274             } else {
275                 w.write(word.toString());
276             }
277             log.debug("Finished replacing. Replaced " + status.replaced + " words");
278         } catch (java.io.IOException JavaDoc e) {
279             log.error(e.toString());
280         }
281         return w;
282     }
283
284
285
286     public Writer transformLines(Reader r, Writer w) {
287         BufferedReader reader = new BufferedReader(r);
288         Status status = newStatus();
289         try {
290             String JavaDoc line = reader.readLine();
291             while (line != null) {
292                 if (replace(status)) {
293                     replace(line, w, status);
294                 } else {
295                     w.write(line);
296                 }
297                 line = reader.readLine();
298             }
299         } catch (java.io.IOException JavaDoc e) {
300             log.error(e.toString());
301         }
302         return w;
303     }
304
305     public Writer transformEntire(Reader r, Writer w) {
306         StringWriter sw = new StringWriter();
307         Status status = newStatus();
308         try {
309             while (true) {
310                 int c = r.read();
311                 if (c == -1) break;
312                 sw.write(c);
313             }
314             replace(sw.toString(), w, status);
315         } catch (java.io.IOException JavaDoc e) {
316             log.error(e.toString());
317         }
318
319         return w;
320     }
321
322
323     public Writer transform(Reader r, Writer w) {
324         switch(to) {
325         case XMLTEXT_WORDS: return transformXmlTextWords(r, w);
326         case XMLTEXT: return transformXmlText(r, w);
327         case WORDS: return transformWords(r, w);
328         case LINES: return transformLines(r, w);
329         case ENTIRE: return transformEntire(r, w);
330         default: throw new UnknownCodingException(getClass(), to);
331         }
332     }
333
334     abstract protected String JavaDoc base();
335
336     public String JavaDoc getEncoding() {
337         switch (to) {
338         case XMLTEXT_WORDS:
339             return base() + "_XMLTEXT_WORDS";
340         case XMLTEXT:
341             return base() + "_XMLTEXT";
342         case WORDS:
343             return base() + "_WORDS";
344         case LINES:
345             return base() + "_LINES";
346         case ENTIRE:
347             return base() + "_ENTIRE";
348         default :
349             throw new UnknownCodingException(getClass(), to);
350         }
351     }
352
353     public Map transformers() {
354         Map h = new HashMap();
355         h.put(base() + "_XMLTEXT_WORDS", new Config(RegexpReplacer.class, XMLTEXT_WORDS, "Search and replaces regexps word-by-word, only in XML text() blocks."));
356         h.put(base() + "_XMLTEXT", new Config(RegexpReplacer.class, XMLTEXT, "Search and replaces regexps, only in XML text() blocks."));
357         h.put(base() + "_WORDS", new Config(RegexpReplacer.class, WORDS, "Search and replaces regexps word-by-word"));
358         h.put(base() + "_LINES", new Config(RegexpReplacer.class, LINES, "Search and replaces regexps, line-by-line"));
359         h.put(base() + "_ENTIRE", new Config(RegexpReplacer.class, ENTIRE, "Search and replaces regexps"));
360
361         return Collections.unmodifiableMap(h);
362     }
363
364
365
366 }
367
Popular Tags