1 package fri.patterns.interpreter.parsergenerator.lexer; 2 3 import java.util.*; 4 import java.io.IOException ; 5 import fri.patterns.interpreter.parsergenerator.Lexer; 6 import fri.patterns.interpreter.parsergenerator.Token; 7 import fri.patterns.interpreter.parsergenerator.syntax.*; 8 import fri.patterns.interpreter.parsergenerator.syntax.builder.SyntaxSeparation; 9 10 73 74 public class LexerBuilder 75 { 76 protected Map charConsumers; 77 protected List ignoredSymbols; 78 public static boolean DEBUG; 80 85 public LexerBuilder(Syntax lexerSyntax, List ignoredSymbols) 86 throws LexerException, SyntaxException 87 { 88 this.ignoredSymbols = ignoredSymbols; 89 build(lexerSyntax); 90 } 91 92 93 94 public Lexer getLexer() { 95 return new LexerImpl(ignoredSymbols, charConsumers); 96 } 97 98 99 public Lexer getLexer(Object input) 100 throws IOException 101 { 102 Lexer lexer = getLexer(); 103 lexer.setInput(input); 104 return lexer; 105 } 106 107 108 private void build(Syntax lexerSyntax) 109 throws LexerException, SyntaxException 110 { 111 SyntaxSeparation.IntArray deleteIndexes = new SyntaxSeparation.IntArray(lexerSyntax.size()); 112 if (DEBUG) 113 System.err.println("Processing lexer rules: \n"+lexerSyntax); 114 115 this.charConsumers = new Hashtable(lexerSyntax.size()); 117 for (int i = 0; i < lexerSyntax.size(); i++) 118 translateLexerRule(lexerSyntax.getRule(i), i, deleteIndexes); 119 deleteIndexes.removeIndexesFrom(lexerSyntax); 120 121 for (int i = 0; i < lexerSyntax.size(); i++) { 123 Rule rule = lexerSyntax.getRule(i); 124 String nonterm = rule.getNonterminal(); 125 if (checkNullableRule(nonterm, rule, i, deleteIndexes) == false) 126 if (checkRepeatableRule(nonterm, rule, i, deleteIndexes) == false) 127 throw new LexerException("Found no character consumer for nullable or repeatable rule "+rule); 128 } 129 deleteIndexes.removeIndexesFrom(lexerSyntax); 130 131 if (lexerSyntax.size() > 0) { throw new LexerException("Could not process rules in lexer syntax: "+lexerSyntax); 133 } 134 135 Map done = new Hashtable(); for (Iterator it = charConsumers.entrySet().iterator(); it.hasNext(); ) { 138 Consumer cc = (Consumer) ((Map.Entry)it.next()).getValue(); 139 cc.resolveConsumerReferences(charConsumers, done); 140 } 141 } 142 143 144 private void translateLexerRule(Rule rule, int index, SyntaxSeparation.IntArray deleteIndexes) 145 throws LexerException 146 { 147 String nonterm = rule.getNonterminal(); 148 if (rule.rightSize() <= 0 || rule.getRightSymbol(0).equals(nonterm)) return; 150 151 153 167 int CONCATENATION = 0, SET = 1, SUBTRACTION = 2; 168 int state = CONCATENATION; 169 boolean intersectionHappened = false; 170 Consumer consumer = new Consumer(rule); Consumer currentConsumer = new Consumer(); 172 Consumer setConsumer = currentConsumer; consumer.append(currentConsumer); 175 for (int i = 0; i < rule.rightSize(); i++) { String sym = rule.getRightSymbol(i); 177 178 if (sym.equals(Token.BUTNOT)) { 179 if (i == 0 || state != CONCATENATION) 180 throw new LexerException("Missing symbol to subtract from: "+rule); 181 state = SUBTRACTION; 182 } 183 else 184 if (sym.equals(Token.UPTO)) { 185 if (i == 0 || state != CONCATENATION) 186 throw new LexerException("Missing lower limit of set: "+rule); 187 state = SET; 188 } 189 else { 190 String convertedSym = convertSymbol(sym); boolean isNonterm = convertedSym.equals(sym); 192 if (isNonterm && state == SET) 193 throw new LexerException("Can not append nonterminal to set: "+rule); 194 195 boolean setWillHappen = rule.rightSize() > i + 1 && rule.getRightSymbol(i + 1).equals(Token.UPTO); 197 if (state == SET) { 198 setConsumer.appendSet(convertedSym); 199 setConsumer = currentConsumer; } 201 else 202 if (state == SUBTRACTION) { 203 intersectionHappened = true; 204 if (isNonterm) 205 if (setWillHappen) 206 throw new LexerException("Nonterminal can not open set after subtraction: "+rule); 207 else 208 currentConsumer.subtract(new Consumer.Reference(sym)); 209 else 210 if (setWillHappen) 211 currentConsumer.subtract(setConsumer = new Consumer(convertedSym)); 212 else 213 currentConsumer.subtract(new Consumer(convertedSym)); 214 } 215 else 216 if (state == CONCATENATION) { 217 if (intersectionHappened) { intersectionHappened = false; 219 currentConsumer = new Consumer(); 220 consumer.append(currentConsumer); 221 } 222 223 if (isNonterm) 224 if (setWillHappen) 225 throw new LexerException("Nonterminal can not open set in concatenation: "+rule); 226 else 227 currentConsumer.append(new Consumer.Reference(sym)); 228 else 229 currentConsumer.append(convertedSym); } 231 232 state = CONCATENATION; 234 } } 237 putCharConsumer(nonterm, consumer.optimize()); 238 deleteIndexes.add(index); 239 } 240 241 242 private void putCharConsumer(String key, Consumer consumer) { 243 Object o = charConsumers.get(key); 246 if (o == null) { charConsumers.put(key, consumer); 248 } 249 else { 250 ConsumerAlternatives ca; 251 252 if (o instanceof ConsumerAlternatives == false) { 253 ca = new ConsumerAlternatives((Consumer)o); 254 charConsumers.put(key, ca); } 256 else { 257 ca = (ConsumerAlternatives)o; 258 } 259 260 ca.addAlternate(consumer); } 262 } 263 264 265 266 private boolean checkNullableRule(String nonterm, Rule rule, int index, SyntaxSeparation.IntArray deleteIndexes) { 267 271 if (rule.rightSize() <= 0) { 272 Object o = charConsumers.get(nonterm); 273 ((Consumer)o).setNullable(); 274 deleteIndexes.add(index); 275 return true; } 277 return false; 278 } 279 280 281 private boolean checkRepeatableRule(String nonterm, Rule rule, int index, SyntaxSeparation.IntArray deleteIndexes) { 282 286 if (rule.rightSize() >= 2 && rule.getRightSymbol(0).equals(nonterm)) { Consumer cc = (Consumer) charConsumers.get(nonterm); 289 if (cc.matchesRepeatableRule(rule)) { cc.setRepeatable(); 291 deleteIndexes.add(index); 292 return true; 293 } 294 } 295 return false; 296 } 297 298 299 312 private String convertSymbol(String sym) { 313 if (sym.charAt(0) == '\'' || sym.charAt(0) == '"') { 314 String s = sym.substring(1, sym.length() - 1); 315 if (s.length() <= 0) 316 throw new IllegalArgumentException ("Empty character or string definition: "+sym); 317 318 StringBuffer sb = new StringBuffer (s.length()); for (int i = 0; i < s.length(); i++) { 320 char c = s.charAt(i); 321 if (c == '\\') { 322 char c1 = s.length() > i + 1 ? s.charAt(i + 1) : 0; 323 switch (c1) { 324 case 'n': sb.append('\n'); i++; break; 325 case 'r': sb.append('\r'); i++; break; 326 case 't': sb.append('\t'); i++; break; 327 case 'f': sb.append('\f'); i++; break; 328 case 'b': sb.append('\b'); i++; break; 329 case '\'': sb.append('\''); i++; break; 330 case '"': sb.append('"'); i++; break; 331 case '\\': sb.append('\\'); i++; break; 332 default: sb.append(c); break; 333 } 334 } 335 else { 336 sb.append(c); 337 } 338 } 339 return sb.toString(); 340 } 341 else { char c; 343 if (sym.startsWith("0x") || sym.startsWith("0X")) c = (char) Integer.valueOf(sym.substring(2), 16).intValue(); 345 else 346 if (sym.startsWith("0")) c = (char) Integer.valueOf(sym.substring(1), 8).intValue(); 348 else 349 if (Character.isDigit(sym.charAt(0))) 350 c = (char) Integer.valueOf(sym).intValue(); else 352 return sym; 354 return new String (new char [] { c }); 355 } 356 } 357 358 } 359 | Popular Tags |