1 package fri.patterns.interpreter.parsergenerator.syntax.builder; 2 3 import java.util.*; 4 import fri.patterns.interpreter.parsergenerator.Token; 5 import fri.patterns.interpreter.parsergenerator.syntax.*; 6 import fri.patterns.interpreter.parsergenerator.lexer.StandardLexerRules; 7 8 25 26 public class SyntaxSeparation 27 { 28 private List tokenSymbols; 29 private List ignoredSymbols; 30 private Syntax parserSyntax; 31 private Syntax lexerSyntax; 32 public static boolean DEBUG = true; 33 34 35 public SyntaxSeparation(Syntax syntax) 36 throws SyntaxException 37 { 38 separate(syntax, new IntArray(syntax.size())); 39 } 40 41 42 public Syntax getLexerSyntax() { 43 return lexerSyntax; 44 } 45 46 47 public Syntax getParserSyntax() { 48 return parserSyntax; 49 } 50 51 55 public List getTokenSymbols() { 56 return tokenSymbols; 57 } 58 59 63 public List getIgnoredSymbols() { 64 return ignoredSymbols; 65 } 66 67 68 private void separate(Syntax syntax, IntArray deleteIndexes) 69 throws SyntaxException 70 { 71 Hashtable tokenSymbols = new Hashtable(); 72 Hashtable ignoredSymbols = new Hashtable(); 73 List commandsDefinedAsTOKEN = new ArrayList(); 74 75 for (int i = 0; i < syntax.size(); i++) { 77 Rule rule = syntax.getRule(i); 78 String nonterm = rule.getNonterminal(); 79 80 boolean token = nonterm.equals(Token.TOKEN); 81 boolean ignored = token == false && nonterm.equals(Token.IGNORED); 82 83 if (token || ignored) { if (rule.rightSize() != 1) throw new SyntaxException("\"token\" and \"ignored\" are predefined lexer keywords and must contain exactly one nonterminal symbol after resolve: "+rule); 86 87 deleteIndexes.add(i); 88 89 String sym = rule.getRightSymbol(0); if (sym.charAt(0) == Token.COMMAND_QUOTE) { 91 sym = sym.substring(1, sym.length() - 1); if (token) 93 commandsDefinedAsTOKEN.add(sym); 94 } 95 96 if (token) 97 tokenSymbols.put(sym, sym); 98 else 99 ignoredSymbols.put(sym, sym); 100 } 101 } 102 deleteIndexes.removeIndexesFrom(syntax); 103 104 for (Iterator it = tokenSymbols.keySet().iterator(); it.hasNext(); ) { 106 Object o = it.next(); 107 if (ignoredSymbols.get(o) != null) 108 throw new SyntaxException("Can not define token as ignored: "+o); 109 } 110 for (Iterator it = ignoredSymbols.keySet().iterator(); it.hasNext(); ) { 111 Object o = it.next(); 112 if (tokenSymbols.get(o) != null) 113 throw new SyntaxException("Can not define ignored as token: "+o); 114 } 115 116 boolean tokensWereDeclared = tokenSymbols.size() > 0; 117 List commandTokens = new ArrayList(); 118 119 for (int i = 0; i < syntax.size(); i++) { 121 Rule rule = syntax.getRule(i); 122 123 for (int j = 0; j < rule.rightSize(); j++) { String sym = rule.getRightSymbol(j); 125 126 if (sym.charAt(0) == Token.COMMAND_QUOTE) { 127 sym = sym.substring(1, sym.length() - 1); tokenSymbols.put(sym, sym); 129 commandTokens.add(sym); 130 rule.setRightSymbol(sym, j); } 132 else 133 if (tokensWereDeclared == false && (sym.equals(Token.BUTNOT) || sym.equals(Token.UPTO))) { 135 String s = rule.getNonterminal(); tokenSymbols.put(s, s); 137 } 138 } 139 } 140 141 this.lexerSyntax = new Syntax(tokenSymbols.size() + ignoredSymbols.size()); 143 Hashtable [] varr = new Hashtable [] { tokenSymbols, ignoredSymbols }; 144 145 for (int j = 0; j < varr.length; j++) { Hashtable symbols = varr[j]; 147 148 for (Enumeration e = symbols.keys(); e.hasMoreElements(); ) { 149 String nonterm = (String )e.nextElement(); 150 151 getRulesUnderSymbol(nonterm, syntax, lexerSyntax, deleteIndexes); 152 153 if (deleteIndexes.isEmpty() && lexerSyntax.hasRule(nonterm) == false) { String [][] predefinedRules = StandardLexerRules.rulesForIdentifier(nonterm); if (predefinedRules == null || predefinedRules.length <= 0) 156 throw new SyntaxException("Found nonterminal that has no rule and is no predefined lexer nonterminal: >"+nonterm+"<"); 157 158 lexerSyntax.appendRules(SyntaxUtil.ruleArrayToList(predefinedRules)); 159 } 160 161 deleteIndexes.removeIndexesFrom(syntax); 162 } 163 } 164 165 this.ignoredSymbols = new ArrayList(ignoredSymbols.size()); 167 for (Enumeration e = ignoredSymbols.keys(); e.hasMoreElements(); ) 168 this.ignoredSymbols.add(e.nextElement()); 169 170 this.parserSyntax = provideParserSyntax(syntax, lexerSyntax, tokensWereDeclared, tokenSymbols, commandTokens, commandsDefinedAsTOKEN); 171 172 } 177 178 179 private Syntax provideParserSyntax(Syntax parserSyntax, Syntax lexerSyntax, boolean tokensWereDeclared, Map tokenSymbols, List commandTokens, List commandsDefinedAsTOKEN) 180 throws SyntaxException 181 { 182 boolean lexerOnlyHandling = false; 183 184 if (parserSyntax.size() > 0) { 186 if (DEBUG) System.err.println("INFO: Mixed parser and lexer specification, "+lexerSyntax.size()+" lexer rules, "+parserSyntax.size()+" parser rules."); 187 this.tokenSymbols = new ArrayList(tokenSymbols.size()); 189 for (int i = 0; i < parserSyntax.size(); i++) { 190 Rule rule = parserSyntax.getRule(i); 191 192 for (int j = 0; j < rule.rightSize(); j++) { 193 String sym = rule.getRightSymbol(j); 194 195 if (tokenSymbols.get(sym) != null) { String parserSymbol = Token.COMMAND_QUOTE + sym + Token.COMMAND_QUOTE; 197 if (sym.charAt(0) != Token.COMMAND_QUOTE) rule.setRightSymbol(parserSymbol, j); 199 200 if (this.tokenSymbols.indexOf(parserSymbol) < 0) 201 this.tokenSymbols.add(parserSymbol); 202 } 205 else 206 if (sym.equals(Token.UPTO) || sym.equals(Token.BUTNOT)) { 207 throw new SyntaxException("Found lexer rule in parser syntax: "+rule+". Please define \"token\" and \"ignored\" better!"); 208 } 209 else 210 if (Token.isTerminal(sym) == false) { boolean found = parserSyntax.hasRule(sym); 212 if (found == false) { if (lexerSyntax.hasRule(sym)) { 214 String parserSymbol = Token.COMMAND_QUOTE + sym + Token.COMMAND_QUOTE; 215 rule.setRightSymbol(parserSymbol, j); 216 if (this.tokenSymbols.indexOf(parserSymbol) < 0) 217 this.tokenSymbols.add(parserSymbol); 218 } 219 else { 220 throw new SyntaxException("Parser nonterminal without rule: "+sym); 221 } 222 } 223 } 224 } 225 } 226 } 227 else if (tokensWereDeclared == false) { if (DEBUG) System.err.println("INFO: No tokens were defined, lexer specification without parser rules, "+lexerSyntax.size()+" lexer rules."); 230 List startRules = lexerSyntax.findStartRules(); 231 232 if (startRules.size() > 0) { 233 this.tokenSymbols = new ArrayList(startRules.size()); 235 for (int i = 0; i < startRules.size(); i++) { 236 String symbol = Token.COMMAND_QUOTE + ((Rule) startRules.get(i)).getNonterminal() + Token.COMMAND_QUOTE; 237 if (this.tokenSymbols.indexOf(symbol) < 0) this.tokenSymbols.add(symbol); 239 } 240 } 241 else { 242 lexerOnlyHandling = true; 243 } 244 } 245 else { 246 if (DEBUG) System.err.println("INFO: tokens were defined, lexer specification without parser rules, "+lexerSyntax.size()+" lexer rules."); 247 lexerOnlyHandling = true; 248 } 249 250 if (lexerOnlyHandling) { for (int i = 0; i < commandTokens.size(); i++) { String sym = (String ) commandTokens.get(i); 253 if (commandsDefinedAsTOKEN.indexOf(sym) < 0) 254 tokenSymbols.remove(sym); 255 } 256 257 this.tokenSymbols = new ArrayList(tokenSymbols.size()); 259 for (Iterator it = tokenSymbols.keySet().iterator(); it.hasNext(); ) 260 this.tokenSymbols.add(Token.COMMAND_QUOTE + it.next().toString() + Token.COMMAND_QUOTE); 261 } 263 264 return parserSyntax; 265 } 266 267 268 277 private void getRulesUnderSymbol(String symbol, Syntax syntax, Syntax resultSyntax, IntArray deleteIndexes) { 278 for (int i = 0; i < syntax.size(); i++) { 279 Rule rule = syntax.getRule(i); 280 String nonterm = rule.getNonterminal(); 281 282 if (deleteIndexes.contains(i) == false && nonterm.equals(symbol)) { 284 resultSyntax.addRule(rule); 285 deleteIndexes.add(i); 286 287 for (int j = 0; j < rule.rightSize(); j++) { 289 String sym = rule.getRightSymbol(j); 290 if (Token.isTerminal(sym) == false && sym.equals(Token.BUTNOT) == false && sym.equals(Token.UPTO) == false) 291 getRulesUnderSymbol(sym, syntax, resultSyntax, deleteIndexes); 292 } 293 } 294 } 295 } 296 297 298 299 public static class IntArray 301 { 302 private int [] array; 303 private int pos; 304 305 public IntArray(int size) { 306 array = new int [size]; 307 } 308 309 public void add(int i) { 310 if (pos >= array.length) { 311 int [] newArray = new int [array.length * 2]; 312 System.arraycopy(array, 0, newArray, 0, array.length); 313 array = newArray; 314 } 315 array[pos] = i; 316 pos++; 317 } 318 319 public boolean isEmpty() { 320 return pos == 0; 321 } 322 323 public boolean contains(int j) { 324 for (int i = 0; i < pos; i++) 325 if (array[i] == j) 326 return true; 327 return false; 328 } 329 330 public void removeIndexesFrom(Syntax syntax) { 331 Arrays.sort(array, 0, pos); for (int i = pos - 1; i >= 0; i--) 333 syntax.removeRule(array[i]); pos = 0; } 336 337 } 339 } 340 | Popular Tags |