KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sourceforge > chaperon > process > LexicalProcessor


1 /*
2  * Copyright (C) Chaperon. All rights reserved.
3  * -------------------------------------------------------------------------
4  * This software is published under the terms of the Apache Software License
5  * version 1.1, a copy of which has been included with this distribution in
6  * the LICENSE file.
7  */

8
9 package net.sourceforge.chaperon.process;
10
11 import net.sourceforge.chaperon.common.Decoder;
12
13 import org.apache.commons.logging.Log;
14
15 import org.xml.sax.*;
16 import org.xml.sax.ext.LexicalHandler JavaDoc;
17 import org.xml.sax.helpers.AttributesImpl JavaDoc;
18 import org.xml.sax.helpers.LocatorImpl JavaDoc;
19
20 /**
21  * The processor convert a stream text into lexical tokens, like a tokenizer.
22  *
23  * @author <a HREF="mailto:stephan@apache.org">Stephan Michels </a>
24  * @version CVS $Id: LexicalProcessor.java,v 1.22 2004/01/04 16:54:34 benedikta Exp $
25  */

26 public class LexicalProcessor implements ContentHandler, LexicalHandler JavaDoc
27 {
28   public static final String JavaDoc NS = "http://chaperon.sourceforge.net/schema/text/1.0";
29   public static final String JavaDoc TEXT = "text";
30   public static final String JavaDoc NS_OUTPUT = "http://chaperon.sourceforge.net/schema/lexer/2.0";
31   public static final String JavaDoc OUTPUT = "output";
32   public static final String JavaDoc LEXEME = "lexeme";
33   public static final String JavaDoc GROUP = "group";
34   public static final String JavaDoc ERROR = "error";
35   private ContentHandler contentHandler = null;
36   private LexicalHandler JavaDoc lexicalHandler = null;
37   private static final int STATE_OUTSIDE = 0;
38   private static final int STATE_TEXT = 1;
39   private int state = STATE_OUTSIDE;
40   private Locator locator = null;
41   private LocatorImpl JavaDoc locatorImpl = null;
42   private LexicalAutomaton automaton = null;
43   private Log log = null;
44   private boolean grouping = false;
45   private boolean localizable = false;
46   private String JavaDoc source;
47   private int lineNumber;
48   private int columnNumber;
49   private StringBuffer JavaDoc buffer = null;
50   private char[] text = null;
51
52   /**
53    * Create a new lexical processor.
54    */

55   public LexicalProcessor() {}
56
57   /**
58    * Create a new lexical processor.
59    *
60    * @param automaton Lexical automaton, which should be used.
61    * @param handler Handler, which should receives the events.
62    */

63   public LexicalProcessor(LexicalAutomaton automaton)
64   {
65     this.automaton = automaton;
66   }
67
68   /**
69    * Set the lexical automaton, which the processor should use.
70    *
71    * @param automaton Lexical automaton, which should be used.
72    */

73   public void setLexicalAutomaton(LexicalAutomaton automaton)
74   {
75     this.automaton = automaton;
76   }
77
78   /**
79    * Set the <code>ContentHandler</code> that will receive XML data.
80    */

81   public void setContentHandler(ContentHandler handler)
82   {
83     this.contentHandler = handler;
84   }
85
86   /**
87    * Set the <code>LexicalHandler</code> that will receive XML data.
88    */

89   public void setLexicalHandler(LexicalHandler JavaDoc handler)
90   {
91     this.lexicalHandler = handler;
92   }
93
94   /**
95    * Set the log, which should be used.
96    *
97    * @param log Log.
98    */

99   public void setLog(Log log)
100   {
101     this.log = log;
102   }
103
104   public void setGrouping(boolean grouping)
105   {
106     this.grouping = grouping;
107   }
108
109   public void setLocalizable(boolean localizable)
110   {
111     this.localizable = localizable;
112   }
113
114   /**
115    * Receive an object for locating the origin of SAX document events.
116    */

117   public void setDocumentLocator(Locator locator)
118   {
119     this.locator = locator;
120     this.locatorImpl = null;
121     if (locator!=null)
122     {
123       this.locatorImpl = new LocatorImpl JavaDoc(locator);
124       contentHandler.setDocumentLocator(locatorImpl);
125     }
126   }
127
128   /**
129    * Receive notification of the beginning of a document.
130    */

131   public void startDocument() throws SAXException
132   {
133     if (locatorImpl!=null)
134     {
135       locatorImpl.setLineNumber(locator.getLineNumber());
136       locatorImpl.setColumnNumber(locator.getColumnNumber());
137     }
138
139     contentHandler.startDocument();
140     state = STATE_OUTSIDE;
141
142     buffer = new StringBuffer JavaDoc();
143   }
144
145   /**
146    * Receive notification of the beginning of an element.
147    */

148   public void startElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName, Attributes atts)
149     throws SAXException
150   {
151     if (state==STATE_OUTSIDE)
152     {
153       if ((namespaceURI!=null) && (namespaceURI.equals(NS)) && (localName.equals(TEXT)))
154       {
155         state = STATE_TEXT;
156         buffer = new StringBuffer JavaDoc();
157
158         if (atts.getValue("source")!=null)
159           source = atts.getValue("source");
160         else if (locator!=null)
161           source = locator.getSystemId();
162         else
163           source = "unknown";
164
165         if (atts.getValue("column")!=null)
166           columnNumber = Integer.parseInt(atts.getValue("column"));
167         else if (locator!=null)
168           columnNumber = locator.getColumnNumber();
169         else
170           columnNumber = 1;
171
172         if (atts.getValue("line")!=null)
173           lineNumber = Integer.parseInt(atts.getValue("line"));
174         else if (locator!=null)
175           lineNumber = locator.getLineNumber();
176         else
177           lineNumber = 1;
178       }
179       else
180         contentHandler.startElement(namespaceURI, localName, qName, atts);
181     }
182     else if (state==STATE_TEXT)
183       throw new SAXException("Unexpected start element '"+qName+"'.");
184   }
185
186   /**
187    * Receive notification of character data.
188    */

189   public void characters(char[] ch, int start, int length)
190     throws SAXException
191   {
192     if (state==STATE_OUTSIDE)
193       contentHandler.characters(ch, start, length);
194     else if (state==STATE_TEXT)
195       buffer.append(ch, start, length);
196   }
197
198   /**
199    * Receive notification of ignorable whitespace in element content.
200    */

201   public void ignorableWhitespace(char[] ch, int start, int length)
202     throws SAXException
203   {
204     if (state==STATE_OUTSIDE)
205       contentHandler.characters(ch, start, length);
206     else if (state==STATE_TEXT)
207       buffer.append(ch, start, length);
208   }
209
210   /**
211    * Receive notification of the end of an element.
212    */

213   public void endElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName)
214     throws SAXException
215   {
216     if (state==STATE_OUTSIDE)
217       contentHandler.endElement(namespaceURI, localName, qName);
218     else if (state==STATE_TEXT)
219     {
220       if ((namespaceURI!=null) && (namespaceURI.equals(NS)) && (localName.equals(TEXT)))
221       {
222         state = STATE_OUTSIDE;
223
224         handleEndDocument();
225       }
226       else
227         throw new SAXException("Unexpected end element '"+qName+"'.");
228     }
229   }
230
231   /**
232    * Begin the scope of a prefix-URI Namespace mapping.
233    */

234   public void startPrefixMapping(String JavaDoc prefix, String JavaDoc uri)
235     throws SAXException
236   {
237     if (locatorImpl!=null)
238     {
239       locatorImpl.setLineNumber(locator.getLineNumber());
240       locatorImpl.setColumnNumber(locator.getColumnNumber());
241     }
242
243     contentHandler.startPrefixMapping(prefix, uri);
244   }
245
246   /**
247    * End the scope of a prefix-URI mapping.
248    */

249   public void endPrefixMapping(String JavaDoc prefix) throws SAXException
250   {
251     if (locatorImpl!=null)
252     {
253       locatorImpl.setLineNumber(locator.getLineNumber());
254       locatorImpl.setColumnNumber(locator.getColumnNumber());
255     }
256
257     contentHandler.endPrefixMapping(prefix);
258   }
259
260   /**
261    * Receive notification of a processing instruction.
262    */

263   public void processingInstruction(String JavaDoc target, String JavaDoc data)
264     throws SAXException
265   {
266     if (locatorImpl!=null)
267     {
268       locatorImpl.setLineNumber(locator.getLineNumber());
269       locatorImpl.setColumnNumber(locator.getColumnNumber());
270     }
271
272     if (state==STATE_OUTSIDE)
273       contentHandler.processingInstruction(target, data);
274   }
275
276   /**
277    * Receive notification of a skipped entity.
278    */

279   public void skippedEntity(String JavaDoc name) throws SAXException
280   {
281     if (locatorImpl!=null)
282     {
283       locatorImpl.setLineNumber(locator.getLineNumber());
284       locatorImpl.setColumnNumber(locator.getColumnNumber());
285     }
286
287     if (state==STATE_OUTSIDE)
288       contentHandler.skippedEntity(name);
289   }
290
291   /**
292    * Receive notification of the end of a document.
293    */

294   public void endDocument() throws SAXException
295   {
296     if (locatorImpl!=null)
297     {
298       locatorImpl.setLineNumber(locator.getLineNumber());
299       locatorImpl.setColumnNumber(locator.getColumnNumber());
300     }
301
302     if (state==STATE_OUTSIDE)
303       contentHandler.endDocument();
304   }
305
306   /**
307    * Report the start of DTD declarations, if any.
308    */

309   public void startDTD(String JavaDoc name, String JavaDoc publicId, String JavaDoc systemId)
310     throws SAXException
311   {
312     if (lexicalHandler!=null)
313       lexicalHandler.startDTD(name, publicId, systemId);
314   }
315
316   /**
317    * Report the end of DTD declarations.
318    */

319   public void endDTD() throws SAXException
320   {
321     if (lexicalHandler!=null)
322       lexicalHandler.endDTD();
323   }
324
325   /**
326    * Report the beginning of an entity.
327    */

328   public void startEntity(String JavaDoc name) throws SAXException
329   {
330     if (lexicalHandler!=null)
331       lexicalHandler.startEntity(name);
332   }
333
334   /**
335    * Report the end of an entity.
336    */

337   public void endEntity(String JavaDoc name) throws SAXException
338   {
339     if (lexicalHandler!=null)
340       lexicalHandler.endEntity(name);
341   }
342
343   /**
344    * Report the start of a CDATA section.
345    */

346   public void startCDATA() throws SAXException
347   {
348     if (lexicalHandler!=null)
349       lexicalHandler.startCDATA();
350   }
351
352   /**
353    * Report the end of a CDATA section.
354    */

355   public void endCDATA() throws SAXException
356   {
357     if (lexicalHandler!=null)
358       lexicalHandler.endCDATA();
359   }
360
361   /**
362    * Report an XML comment anywhere in the document.
363    */

364   public void comment(char[] ch, int start, int len) throws SAXException
365   {
366     if (lexicalHandler!=null)
367       lexicalHandler.comment(ch, start, len);
368   }
369
370   /**
371    * Receives the notification, that the text stream ended.
372    */

373   public void handleEndDocument() throws SAXException
374   {
375     PatternProcessor processor = new PatternProcessor();
376     text = buffer.toString().toCharArray();
377
378     int position = 0;
379
380     if (locatorImpl!=null)
381     {
382       locatorImpl.setSystemId(source);
383       locatorImpl.setLineNumber(lineNumber);
384       locatorImpl.setColumnNumber(columnNumber);
385     }
386
387     contentHandler.startPrefixMapping("", NS_OUTPUT);
388
389     AttributesImpl JavaDoc atts = new AttributesImpl JavaDoc();
390     if (localizable)
391       atts.addAttribute("", "source", "source", "CDATA", source);
392
393     contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT, new AttributesImpl JavaDoc());
394
395     StringBuffer JavaDoc unrecognized = new StringBuffer JavaDoc();
396     while (position<text.length)
397     {
398       String JavaDoc tokensymbol = null;
399       String JavaDoc tokentext = null;
400
401       for (int lexemeindex = automaton.getLexemeCount()-1; lexemeindex>=0; lexemeindex--)
402       {
403         processor.setPatternAutomaton(automaton.getLexemeDefinition(lexemeindex));
404
405         if ((processor.match(text, position)) &&
406             ((tokentext==null) || (processor.getGroup().length()>=tokentext.length())))
407         {
408           tokensymbol = automaton.getLexemeSymbol(lexemeindex);
409           tokentext = processor.getGroup();
410         }
411       }
412
413       if ((tokentext!=null) && (tokentext.length()==0))
414         log.warn("Lexical processor recognized empty lexeme '"+tokensymbol+"'");
415
416       if ((tokentext!=null) && (tokentext.length()>0))
417       {
418         if (unrecognized.length()>0)
419         {
420           if (log!=null)
421             log.debug("Text was not recognized "+Decoder.toString(unrecognized.toString()));
422
423           atts = new AttributesImpl JavaDoc();
424           atts.addAttribute("", "text", "text", "CDATA", unrecognized.toString());
425           if (localizable)
426           {
427             atts.addAttribute("", "line", "line", "CDATA", String.valueOf(lineNumber));
428             atts.addAttribute("", "column", "column", "CDATA", String.valueOf(columnNumber));
429           }
430
431           contentHandler.startElement(NS_OUTPUT, ERROR, ERROR, atts);
432           contentHandler.endElement(NS_OUTPUT, ERROR, ERROR);
433
434           increasePosition(position-unrecognized.length(), unrecognized.length());
435
436           unrecognized = new StringBuffer JavaDoc();
437         }
438
439         if (tokensymbol!=null)
440         {
441           if (log!=null)
442             log.debug("Recognize token "+tokensymbol+" with "+Decoder.toString(tokentext));
443
444           if (locatorImpl!=null)
445           {
446             locatorImpl.setLineNumber(locator.getLineNumber());
447             locatorImpl.setColumnNumber(locator.getColumnNumber());
448           }
449
450           atts = new AttributesImpl JavaDoc();
451
452           atts.addAttribute("", "symbol", "symbol", "CDATA", tokensymbol);
453           atts.addAttribute("", "text", "text", "CDATA", tokentext);
454           if (localizable)
455           {
456             atts.addAttribute("", "line", "line", "CDATA", String.valueOf(lineNumber));
457             atts.addAttribute("", "column", "column", "CDATA", String.valueOf(columnNumber));
458           }
459
460           contentHandler.startElement(NS_OUTPUT, LEXEME, LEXEME, atts);
461
462           if (grouping)
463             for (int i = 1; i<processor.getGroupCount(); i++)
464             {
465               AttributesImpl JavaDoc groupatts = new AttributesImpl JavaDoc();
466               groupatts.addAttribute("", "text", "text", "CDATA", processor.getGroup(i));
467               contentHandler.startElement(NS_OUTPUT, GROUP, GROUP, groupatts);
468               contentHandler.endElement(NS_OUTPUT, GROUP, GROUP);
469             }
470
471           contentHandler.endElement(NS_OUTPUT, LEXEME, LEXEME);
472         }
473         else if (log!=null)
474           log.debug("Ignore lexeme with "+Decoder.toString(tokentext));
475
476         if (locatorImpl!=null)
477         {
478           locatorImpl.setColumnNumber(columnNumber);
479           locatorImpl.setLineNumber(lineNumber);
480         }
481
482         position += tokentext.length();
483
484         increasePosition(position-tokentext.length(), tokentext.length());
485       }
486       else
487       {
488         if (locatorImpl!=null)
489         {
490           locatorImpl.setColumnNumber(columnNumber);
491           locatorImpl.setLineNumber(lineNumber);
492         }
493
494         unrecognized.append(text[position]);
495         position++;
496       }
497     }
498
499     if (unrecognized.length()>0)
500     {
501       if (log!=null)
502         log.debug("Text was not recognized "+Decoder.toString(unrecognized.toString()));
503
504       atts = new AttributesImpl JavaDoc();
505       atts.addAttribute("", "text", "text", "CDATA", unrecognized.toString());
506       if (localizable)
507       {
508         atts.addAttribute("", "line", "line", "CDATA", String.valueOf(lineNumber));
509         atts.addAttribute("", "column", "column", "CDATA", String.valueOf(columnNumber));
510       }
511
512       contentHandler.startElement(NS_OUTPUT, ERROR, ERROR, atts);
513       contentHandler.endElement(NS_OUTPUT, ERROR, ERROR);
514
515       System.out.println("push \""+unrecognized.toString()+"\"");
516       increasePosition(position-unrecognized.length(), unrecognized.length());
517     }
518
519     if (locatorImpl!=null)
520     {
521       locatorImpl.setLineNumber(locator.getLineNumber());
522       locatorImpl.setColumnNumber(locator.getColumnNumber());
523     }
524
525     contentHandler.endElement(NS_OUTPUT, OUTPUT, OUTPUT);
526     contentHandler.endPrefixMapping("");
527   }
528
529   private void increasePosition(int position, int length)
530   {
531     for (int i = position; i<(position+length); i++)
532     {
533       if (text[i]=='\n')
534       {
535         columnNumber = 1;
536         lineNumber++;
537       }
538       else if ((text[i]=='\r') && ((i==(text.length-1)) || (text[i+1]!='\n')))
539       {
540         columnNumber = 1;
541         lineNumber++;
542       }
543       else
544         columnNumber++;
545     }
546   }
547 }
548
Popular Tags