KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > cocoon > transformation > PatternTransformer


1 /*
2  * Copyright 1999-2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.cocoon.transformation;
17
18 import net.sourceforge.chaperon.build.LexicalAutomatonBuilder;
19 import net.sourceforge.chaperon.common.Decoder;
20 import net.sourceforge.chaperon.model.lexicon.Lexicon;
21 import net.sourceforge.chaperon.model.lexicon.LexiconFactory;
22 import net.sourceforge.chaperon.process.LexicalAutomaton;
23 import net.sourceforge.chaperon.process.PatternProcessor;
24
25 import org.apache.avalon.excalibur.pool.Recyclable;
26 import org.apache.avalon.framework.activity.Disposable;
27 import org.apache.avalon.framework.logger.LogEnabled;
28 import org.apache.avalon.framework.logger.Logger;
29 import org.apache.avalon.framework.parameters.ParameterException;
30 import org.apache.avalon.framework.parameters.Parameterizable;
31 import org.apache.avalon.framework.parameters.Parameters;
32 import org.apache.avalon.framework.service.ServiceException;
33 import org.apache.avalon.framework.service.ServiceManager;
34 import org.apache.avalon.framework.service.Serviceable;
35
36 import org.apache.cocoon.ProcessingException;
37 import org.apache.cocoon.xml.XMLUtils;
38 import org.apache.cocoon.caching.CacheableProcessingComponent;
39 import org.apache.cocoon.components.source.SourceUtil;
40 import org.apache.cocoon.environment.SourceResolver;
41
42 //import org.apache.commons.logging.impl.AvalonLogger;
43

44 import org.apache.excalibur.source.Source;
45 import org.apache.excalibur.source.SourceException;
46 import org.apache.excalibur.source.SourceValidity;
47 import org.apache.excalibur.store.Store;
48
49 import org.xml.sax.Attributes JavaDoc;
50 import org.xml.sax.SAXException JavaDoc;
51 import org.xml.sax.helpers.AttributesImpl JavaDoc;
52
53 import java.io.IOException JavaDoc;
54 import java.io.Serializable JavaDoc;
55
56 import java.util.Map JavaDoc;
57
58 /**
59  * This transfomer transforms text pattern of a XML file into lexemes by using a lexicon file.
60  *
61  * <p>
62  * Input:
63  * </p>
64  * <pre>
65  * &lt;section&gt;
66  * Text 123 bla
67  * &lt;/section&gt;
68  * </pre>
69  *
70  * <p>
71  * can be transformed into the following output:
72  * </p>
73  * <pre>
74  * &lt;section&gt;
75  * Text
76  * &lt;lexeme symbol="number" text="123"/&gt;
77  * bla
78  * &lt;/section&gt;
79  * </pre>
80  *
81  * @author <a HREF="mailto:stephan@apache.org">Stephan Michels</a>
82  * @version $Id: PatternTransformer.java 164808 2005-04-26 16:07:03Z vgritsenko $
83  */

84 public class PatternTransformer extends AbstractTransformer
85                                 implements LogEnabled, Serviceable, Recyclable,
86                                            Disposable, Parameterizable, CacheableProcessingComponent {
87
88   /** Namespace for the SAX events. */
89   public static final String JavaDoc NS = "http://chaperon.sourceforge.net/schema/lexemes/2.0";
90   private String JavaDoc lexicon = null;
91   private Source lexiconSource = null;
92   private Logger logger = null;
93   private ServiceManager manager = null;
94   private SourceResolver resolver = null;
95   private LexicalAutomaton automaton = null;
96   private PatternProcessor processor = new PatternProcessor();
97   private boolean groups = false;
98   private StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
99   private StringBuffer JavaDoc output = new StringBuffer JavaDoc();
100
101   /**
102    * Provide component with a logger.
103    *
104    * @param logger the logger
105    */

106   public void enableLogging(Logger logger)
107   {
108     this.logger = logger;
109   }
110
111   /**
112    * Pass the ServiceManager to the Serviceable. The Serviceable implementation should use the
113    * specified ServiceManager to acquire the services it needs for execution.
114    *
115    * @param manager The ServiceManager which this Serviceable uses.
116    */

117   public void service(ServiceManager manager)
118   {
119     this.manager = manager;
120   }
121
122   /**
123    * Provide component with parameters.
124    *
125    * @param parameters the parameters
126    *
127    * @throws ParameterException if parameters are invalid
128    */

129   public void parameterize(Parameters parameters) throws ParameterException
130   {
131     groups = parameters.getParameterAsBoolean("groups", false);
132   }
133
134   /**
135    * Set the SourceResolver, objectModel Map, the source and sitemap Parameters used to process the
136    * request.
137    *
138    * @param resolver Source resolver
139    * @param objectmodel Object model
140    * @param src Source
141    * @param parameters Parameters
142    *
143    * @throws IOException
144    * @throws ProcessingException
145    * @throws SAXException
146    */

147   public void setup(SourceResolver resolver, Map JavaDoc objectmodel, String JavaDoc src, Parameters parameters)
148     throws ProcessingException, SAXException JavaDoc, IOException JavaDoc
149   {
150     this.resolver = resolver;
151
152     Store store = null;
153
154     try
155     {
156       this.lexicon = src;
157
158       this.lexiconSource = resolver.resolveURI(this.lexicon);
159
160       // Retrieve the lexical automaton from the transient store
161
store = (Store)this.manager.lookup(Store.TRANSIENT_STORE);
162
163       LexicalAutomatonEntry entry = (LexicalAutomatonEntry)store.get(this.lexiconSource.getURI());
164
165       // If the lexicon has changed, rebuild the lexical automaton
166
if ((entry==null) || (entry.getValidity()==null) ||
167           (entry.getValidity().isValid(this.lexiconSource.getValidity())<=0))
168       {
169         this.logger.info("(Re)building the automaton from '"+this.lexiconSource.getURI()+"'");
170
171         if (this.lexiconSource.getInputStream()==null)
172           throw new ProcessingException("Source '"+this.lexiconSource.getURI()+"' not found");
173
174         LexiconFactory factory = new LexiconFactory();
175         SourceUtil.toSAX(this.manager, this.lexiconSource, null, factory);
176
177         Lexicon lexicon = factory.getLexicon();
178
179         LexicalAutomatonBuilder builder =
180           new LexicalAutomatonBuilder(lexicon/*, new AvalonLogger(this.logger)*/);
181
182         this.automaton = builder.getLexicalAutomaton();
183
184         this.logger.info("Store automaton into store for '"+this.lexiconSource.getURI()+"'");
185         store.store(this.lexiconSource.getURI(),
186                     new LexicalAutomatonEntry(this.automaton, this.lexiconSource.getValidity()));
187       }
188       else
189       {
190         this.logger.info("Getting automaton from store for '"+this.lexiconSource.getURI()+"'");
191         this.automaton = entry.getLexicalAutomaton();
192       }
193     }
194     catch (SourceException se)
195     {
196       throw new ProcessingException("Error during resolving of '"+src+"'.", se);
197     }
198     catch (ServiceException se)
199     {
200       throw new ProcessingException("Could not lookup for component", se);
201     }
202     finally
203     {
204       if (store!=null)
205         this.manager.release(store);
206     }
207   }
208
209   /**
210    * Generate the unique key. This key must be unique inside the space of this component.
211    *
212    * @return The generated key hashes the src
213    */

214   public Serializable JavaDoc getKey()
215   {
216     return this.lexiconSource.getURI();
217   }
218
219   /**
220    * Generate the validity object.
221    *
222    * @return The generated validity object or <code>null</code> if the component is currently not
223    * cacheable.
224    */

225   public SourceValidity getValidity()
226   {
227     return this.lexiconSource.getValidity();
228   }
229
230   /**
231    * Recycle this component. All instance variables are set to <code>null</code>.
232    */

233   public void recycle()
234   {
235     if ((this.resolver!=null) && (this.lexiconSource!=null))
236     {
237       this.resolver.release(this.lexiconSource);
238       this.lexiconSource = null;
239     }
240
241     this.automaton = null;
242     super.recycle();
243   }
244
245   /**
246    * The dispose operation is called at the end of a components lifecycle.
247    */

248   public void dispose()
249   {
250     if ((this.resolver!=null) && (this.lexiconSource!=null))
251     {
252       this.resolver.release(this.lexiconSource);
253       this.lexiconSource = null;
254     }
255
256     this.manager = null;
257   }
258
259   /**
260    * Receive notification of the beginning of an element.
261    *
262    * @param uri The Namespace URI, or the empty string if the element has no Namespace URI or if
263    * Namespace processing is not being performed.
264    * @param loc The local name (without prefix), or the empty string if Namespace processing is not
265    * being performed.
266    * @param raw The raw XML 1.0 name (with prefix), or the empty string if raw names are not
267    * available.
268    * @param a The attributes attached to the element. If there are no attributes, it shall be an
269    * empty Attributes object.
270    *
271    * @throws SAXException
272    */

273   public void startElement(String JavaDoc uri, String JavaDoc loc, String JavaDoc raw, Attributes JavaDoc a)
274     throws SAXException JavaDoc
275   {
276     search();
277
278     if (contentHandler!=null)
279       contentHandler.startElement(uri, loc, raw, a);
280   }
281
282   /**
283    * Receive notification of the end of an element.
284    *
285    * @param uri The Namespace URI, or the empty string if the element has no Namespace URI or if
286    * Namespace processing is not being performed.
287    * @param loc The local name (without prefix), or the empty string if Namespace processing is not
288    * being performed.
289    * @param raw The raw XML 1.0 name (with prefix), or the empty string if raw names are not
290    * available.
291    *
292    * @throws SAXException
293    */

294   public void endElement(String JavaDoc uri, String JavaDoc loc, String JavaDoc raw)
295     throws SAXException JavaDoc
296   {
297     search();
298
299     if (contentHandler!=null)
300       contentHandler.endElement(uri, loc, raw);
301   }
302
303   /**
304    * Receive notification of character data.
305    *
306    * @param c The characters from the XML document.
307    * @param start The start position in the array.
308    * @param len The number of characters to read from the array.
309    *
310    * @throws SAXException
311    */

312   public void characters(char[] c, int start, int len)
313     throws SAXException JavaDoc
314   {
315     buffer.append(c, start, len);
316   }
317
318   /**
319    * Receive notification of ignorable whitespace in element content.
320    *
321    * @param c The characters from the XML document.
322    * @param start The start position in the array.
323    * @param len The number of characters to read from the array.
324    *
325    * @throws SAXException
326    */

327   public void ignorableWhitespace(char[] c, int start, int len)
328     throws SAXException JavaDoc
329   {
330     buffer.append(c, start, len);
331   }
332
333   /**
334    * Receive notification of a processing instruction.
335    *
336    * @param target The processing instruction target.
337    * @param data The processing instruction data, or null if none was supplied.
338    *
339    * @throws SAXException
340    */

341   public void processingInstruction(String JavaDoc target, String JavaDoc data)
342     throws SAXException JavaDoc
343   {
344     search();
345
346     if (contentHandler!=null)
347       contentHandler.processingInstruction(target, data);
348   }
349
350   /**
351    * Report an XML comment anywhere in the document.
352    *
353    * @param ch An array holding the characters in the comment.
354    * @param start The starting position in the array.
355    * @param len The number of characters to use from the array.
356    *
357    * @throws SAXException
358    */

359   public void comment(char[] ch, int start, int len) throws SAXException JavaDoc
360   {
361     search();
362
363     if (lexicalHandler!=null)
364       lexicalHandler.comment(ch, start, len);
365   }
366
367   /**
368    * @throws SAXException
369    */

370   private void search() throws SAXException JavaDoc
371   {
372     if (buffer.length()<=0)
373       return;
374
375     char[] text = buffer.toString().toCharArray();
376
377     String JavaDoc lexemesymbol;
378     String JavaDoc lexemetext;
379     String JavaDoc[] groups = null;
380     int lexemeindex = 0;
381     int position = 0;
382
383     output.setLength(0);
384     do
385     {
386       lexemesymbol = null;
387       lexemetext = null;
388
389       for (lexemeindex = automaton.getLexemeCount()-1; lexemeindex>=0; lexemeindex--)
390       {
391         processor.setPatternAutomaton(automaton.getLexemeDefinition(lexemeindex));
392
393         if ((processor.match(text, position)) &&
394             ((lexemetext==null) || (processor.getGroup().length()>=lexemetext.length())))
395         {
396           lexemesymbol = automaton.getLexemeSymbol(lexemeindex);
397           lexemetext = processor.getGroup();
398           if (this.groups)
399           {
400             groups = new String JavaDoc[processor.getGroupCount()];
401             for (int group = 0; group<processor.getGroupCount(); group++)
402               groups[group] = processor.getGroup(group);
403           }
404         }
405       }
406
407       if ((lexemetext!=null) && (lexemetext.length()>0))
408       {
409         if (lexemesymbol!=null)
410         {
411           if (logger!=null)
412             logger.debug("Recognize token "+lexemesymbol+" with "+Decoder.toString(lexemetext));
413
414           if (output.length()>0)
415             contentHandler.characters(output.toString().toCharArray(), 0, output.length());
416
417           output.setLength(0);
418
419           contentHandler.startPrefixMapping("", NS);
420
421           AttributesImpl JavaDoc atts = new AttributesImpl JavaDoc();
422
423           atts.addAttribute("", "symbol", "symbol", "CDATA", lexemesymbol);
424           atts.addAttribute("", "text", "text", "CDATA", lexemetext);
425           contentHandler.startElement(NS, "lexeme", "lexeme", atts);
426
427             if (this.groups) {
428                 for (int group = 0; group<groups.length; group++) {
429                     contentHandler.startElement(NS, "group", "group", XMLUtils.EMPTY_ATTRIBUTES);
430                     contentHandler.characters(groups[group].toCharArray(), 0, groups[group].length());
431                     contentHandler.endElement(NS, "group", "group");
432                 }
433             }
434
435           contentHandler.endElement(NS, "lexeme", "lexeme");
436           contentHandler.endPrefixMapping("");
437         }
438         else if (logger!=null)
439           logger.debug("Ignore lexeme with "+Decoder.toString(lexemetext));
440
441         position += lexemetext.length();
442       }
443       else
444       {
445         output.append(text[position]);
446         position++;
447       }
448     }
449     while (position<text.length);
450
451     if (output.length()>0)
452       contentHandler.characters(output.toString().toCharArray(), 0, output.length());
453
454     buffer.setLength(0);
455   }
456
457   /**
458    * This class represent a entry in a store to cache the lexical automaton.
459    */

460   public static class LexicalAutomatonEntry implements Serializable JavaDoc
461   {
462     private SourceValidity validity = null;
463     private LexicalAutomaton automaton = null;
464
465     /**
466      * Create a new entry.
467      *
468      * @param automaton Lexical automaton.
469      * @param validity Validity of the lexicon file.
470      */

471     public LexicalAutomatonEntry(LexicalAutomaton automaton, SourceValidity validity)
472     {
473       this.automaton = automaton;
474       this.validity = validity;
475     }
476
477     /**
478      * Return the validity of the lexicon file.
479      *
480      * @return Validity of the lexicon file.
481      */

482     public SourceValidity getValidity()
483     {
484       return this.validity;
485     }
486
487     /**
488      * Return the lexical automaton.
489      *
490      * @return Lexical automaton.
491      */

492     public LexicalAutomaton getLexicalAutomaton()
493     {
494       return this.automaton;
495     }
496
497     private void writeObject(java.io.ObjectOutputStream JavaDoc out)
498       throws IOException JavaDoc
499     {
500       out.writeObject(validity);
501       out.writeObject(automaton);
502     }
503
504     private void readObject(java.io.ObjectInputStream JavaDoc in)
505       throws IOException JavaDoc, ClassNotFoundException JavaDoc
506     {
507       validity = (SourceValidity)in.readObject();
508       automaton = (LexicalAutomaton)in.readObject();
509     }
510   }
511 }
512
Popular Tags