1 16 package org.apache.cocoon.transformation; 17 18 import net.sourceforge.chaperon.build.LexicalAutomatonBuilder; 19 import net.sourceforge.chaperon.common.Decoder; 20 import net.sourceforge.chaperon.model.lexicon.Lexicon; 21 import net.sourceforge.chaperon.model.lexicon.LexiconFactory; 22 import net.sourceforge.chaperon.process.LexicalAutomaton; 23 import net.sourceforge.chaperon.process.PatternProcessor; 24 25 import org.apache.avalon.excalibur.pool.Recyclable; 26 import org.apache.avalon.framework.activity.Disposable; 27 import org.apache.avalon.framework.logger.LogEnabled; 28 import org.apache.avalon.framework.logger.Logger; 29 import org.apache.avalon.framework.parameters.ParameterException; 30 import org.apache.avalon.framework.parameters.Parameterizable; 31 import org.apache.avalon.framework.parameters.Parameters; 32 import org.apache.avalon.framework.service.ServiceException; 33 import org.apache.avalon.framework.service.ServiceManager; 34 import org.apache.avalon.framework.service.Serviceable; 35 36 import org.apache.cocoon.ProcessingException; 37 import org.apache.cocoon.xml.XMLUtils; 38 import org.apache.cocoon.caching.CacheableProcessingComponent; 39 import org.apache.cocoon.components.source.SourceUtil; 40 import org.apache.cocoon.environment.SourceResolver; 41 42 44 import org.apache.excalibur.source.Source; 45 import org.apache.excalibur.source.SourceException; 46 import org.apache.excalibur.source.SourceValidity; 47 import org.apache.excalibur.store.Store; 48 49 import org.xml.sax.Attributes ; 50 import org.xml.sax.SAXException ; 51 import org.xml.sax.helpers.AttributesImpl ; 52 53 import java.io.IOException ; 54 import java.io.Serializable ; 55 56 import java.util.Map ; 57 58 84 public class PatternTransformer extends AbstractTransformer 85 implements LogEnabled, Serviceable, Recyclable, 86 Disposable, Parameterizable, CacheableProcessingComponent { 87 88 89 public static final String NS = "http://chaperon.sourceforge.net/schema/lexemes/2.0"; 90 private String lexicon = null; 91 private Source lexiconSource = null; 92 private Logger logger = null; 93 private ServiceManager manager = null; 94 private SourceResolver resolver = null; 95 private LexicalAutomaton automaton = null; 96 private PatternProcessor processor = new PatternProcessor(); 97 private boolean groups = false; 98 private StringBuffer buffer = new StringBuffer (); 99 private StringBuffer output = new StringBuffer (); 100 101 106 public void enableLogging(Logger logger) 107 { 108 this.logger = logger; 109 } 110 111 117 public void service(ServiceManager manager) 118 { 119 this.manager = manager; 120 } 121 122 129 public void parameterize(Parameters parameters) throws ParameterException 130 { 131 groups = parameters.getParameterAsBoolean("groups", false); 132 } 133 134 147 public void setup(SourceResolver resolver, Map objectmodel, String src, Parameters parameters) 148 throws ProcessingException, SAXException , IOException 149 { 150 this.resolver = resolver; 151 152 Store store = null; 153 154 try 155 { 156 this.lexicon = src; 157 158 this.lexiconSource = resolver.resolveURI(this.lexicon); 159 160 store = (Store)this.manager.lookup(Store.TRANSIENT_STORE); 162 163 LexicalAutomatonEntry entry = (LexicalAutomatonEntry)store.get(this.lexiconSource.getURI()); 164 165 if ((entry==null) || (entry.getValidity()==null) || 167 (entry.getValidity().isValid(this.lexiconSource.getValidity())<=0)) 168 { 169 this.logger.info("(Re)building the automaton from '"+this.lexiconSource.getURI()+"'"); 170 171 if (this.lexiconSource.getInputStream()==null) 172 throw new ProcessingException("Source '"+this.lexiconSource.getURI()+"' not found"); 173 174 LexiconFactory factory = new LexiconFactory(); 175 SourceUtil.toSAX(this.manager, this.lexiconSource, null, factory); 176 177 Lexicon lexicon = factory.getLexicon(); 178 179 LexicalAutomatonBuilder builder = 180 new LexicalAutomatonBuilder(lexicon); 181 182 this.automaton = builder.getLexicalAutomaton(); 183 184 this.logger.info("Store automaton into store for '"+this.lexiconSource.getURI()+"'"); 185 store.store(this.lexiconSource.getURI(), 186 new LexicalAutomatonEntry(this.automaton, this.lexiconSource.getValidity())); 187 } 188 else 189 { 190 this.logger.info("Getting automaton from store for '"+this.lexiconSource.getURI()+"'"); 191 this.automaton = entry.getLexicalAutomaton(); 192 } 193 } 194 catch (SourceException se) 195 { 196 throw new ProcessingException("Error during resolving of '"+src+"'.", se); 197 } 198 catch (ServiceException se) 199 { 200 throw new ProcessingException("Could not lookup for component", se); 201 } 202 finally 203 { 204 if (store!=null) 205 this.manager.release(store); 206 } 207 } 208 209 214 public Serializable getKey() 215 { 216 return this.lexiconSource.getURI(); 217 } 218 219 225 public SourceValidity getValidity() 226 { 227 return this.lexiconSource.getValidity(); 228 } 229 230 233 public void recycle() 234 { 235 if ((this.resolver!=null) && (this.lexiconSource!=null)) 236 { 237 this.resolver.release(this.lexiconSource); 238 this.lexiconSource = null; 239 } 240 241 this.automaton = null; 242 super.recycle(); 243 } 244 245 248 public void dispose() 249 { 250 if ((this.resolver!=null) && (this.lexiconSource!=null)) 251 { 252 this.resolver.release(this.lexiconSource); 253 this.lexiconSource = null; 254 } 255 256 this.manager = null; 257 } 258 259 273 public void startElement(String uri, String loc, String raw, Attributes a) 274 throws SAXException 275 { 276 search(); 277 278 if (contentHandler!=null) 279 contentHandler.startElement(uri, loc, raw, a); 280 } 281 282 294 public void endElement(String uri, String loc, String raw) 295 throws SAXException 296 { 297 search(); 298 299 if (contentHandler!=null) 300 contentHandler.endElement(uri, loc, raw); 301 } 302 303 312 public void characters(char[] c, int start, int len) 313 throws SAXException 314 { 315 buffer.append(c, start, len); 316 } 317 318 327 public void ignorableWhitespace(char[] c, int start, int len) 328 throws SAXException 329 { 330 buffer.append(c, start, len); 331 } 332 333 341 public void processingInstruction(String target, String data) 342 throws SAXException 343 { 344 search(); 345 346 if (contentHandler!=null) 347 contentHandler.processingInstruction(target, data); 348 } 349 350 359 public void comment(char[] ch, int start, int len) throws SAXException 360 { 361 search(); 362 363 if (lexicalHandler!=null) 364 lexicalHandler.comment(ch, start, len); 365 } 366 367 370 private void search() throws SAXException 371 { 372 if (buffer.length()<=0) 373 return; 374 375 char[] text = buffer.toString().toCharArray(); 376 377 String lexemesymbol; 378 String lexemetext; 379 String [] groups = null; 380 int lexemeindex = 0; 381 int position = 0; 382 383 output.setLength(0); 384 do 385 { 386 lexemesymbol = null; 387 lexemetext = null; 388 389 for (lexemeindex = automaton.getLexemeCount()-1; lexemeindex>=0; lexemeindex--) 390 { 391 processor.setPatternAutomaton(automaton.getLexemeDefinition(lexemeindex)); 392 393 if ((processor.match(text, position)) && 394 ((lexemetext==null) || (processor.getGroup().length()>=lexemetext.length()))) 395 { 396 lexemesymbol = automaton.getLexemeSymbol(lexemeindex); 397 lexemetext = processor.getGroup(); 398 if (this.groups) 399 { 400 groups = new String [processor.getGroupCount()]; 401 for (int group = 0; group<processor.getGroupCount(); group++) 402 groups[group] = processor.getGroup(group); 403 } 404 } 405 } 406 407 if ((lexemetext!=null) && (lexemetext.length()>0)) 408 { 409 if (lexemesymbol!=null) 410 { 411 if (logger!=null) 412 logger.debug("Recognize token "+lexemesymbol+" with "+Decoder.toString(lexemetext)); 413 414 if (output.length()>0) 415 contentHandler.characters(output.toString().toCharArray(), 0, output.length()); 416 417 output.setLength(0); 418 419 contentHandler.startPrefixMapping("", NS); 420 421 AttributesImpl atts = new AttributesImpl (); 422 423 atts.addAttribute("", "symbol", "symbol", "CDATA", lexemesymbol); 424 atts.addAttribute("", "text", "text", "CDATA", lexemetext); 425 contentHandler.startElement(NS, "lexeme", "lexeme", atts); 426 427 if (this.groups) { 428 for (int group = 0; group<groups.length; group++) { 429 contentHandler.startElement(NS, "group", "group", XMLUtils.EMPTY_ATTRIBUTES); 430 contentHandler.characters(groups[group].toCharArray(), 0, groups[group].length()); 431 contentHandler.endElement(NS, "group", "group"); 432 } 433 } 434 435 contentHandler.endElement(NS, "lexeme", "lexeme"); 436 contentHandler.endPrefixMapping(""); 437 } 438 else if (logger!=null) 439 logger.debug("Ignore lexeme with "+Decoder.toString(lexemetext)); 440 441 position += lexemetext.length(); 442 } 443 else 444 { 445 output.append(text[position]); 446 position++; 447 } 448 } 449 while (position<text.length); 450 451 if (output.length()>0) 452 contentHandler.characters(output.toString().toCharArray(), 0, output.length()); 453 454 buffer.setLength(0); 455 } 456 457 460 public static class LexicalAutomatonEntry implements Serializable 461 { 462 private SourceValidity validity = null; 463 private LexicalAutomaton automaton = null; 464 465 471 public LexicalAutomatonEntry(LexicalAutomaton automaton, SourceValidity validity) 472 { 473 this.automaton = automaton; 474 this.validity = validity; 475 } 476 477 482 public SourceValidity getValidity() 483 { 484 return this.validity; 485 } 486 487 492 public LexicalAutomaton getLexicalAutomaton() 493 { 494 return this.automaton; 495 } 496 497 private void writeObject(java.io.ObjectOutputStream out) 498 throws IOException 499 { 500 out.writeObject(validity); 501 out.writeObject(automaton); 502 } 503 504 private void readObject(java.io.ObjectInputStream in) 505 throws IOException , ClassNotFoundException 506 { 507 validity = (SourceValidity)in.readObject(); 508 automaton = (LexicalAutomaton)in.readObject(); 509 } 510 } 511 } 512 | Popular Tags |