1 import org.xml.sax.*; 2 import java.util.*; 3 import java.io.File ; 4 import javax.xml.parsers.SAXParserFactory ; 5 6 18 19 public class DTDGenerator extends org.xml.sax.helpers.DefaultHandler { 20 23 protected static int MIN_ENUMERATION_INSTANCES = 10; 24 27 protected static int MAX_ENUMERATION_VALUES = 20; 28 31 protected static int MIN_ENUMERATION_RATIO = 3; 32 36 protected static int MIN_FIXED = 5; 37 41 protected static int MIN_ID_VALUES = 10; 42 45 protected static int MAX_ID_VALUES = 100000; 46 49 TreeMap elementList; 53 Stack elementStack; 56 60 61 public static void main (String args[]) throws java.lang.Exception  62 { 63 if (args.length != 1) { 65 System.err.println("Usage: java DTDSAXGen input-file >output-file"); 66 System.exit(1); 67 } 68 69 DTDGenerator app = new DTDGenerator(); 71 72 app.run(args[0]); 73 app.printDTD(); 74 } 75 76 public DTDGenerator () 77 { 78 elementList = new TreeMap(); 79 elementStack = new Stack(); 80 } 81 82 private void run(String filename) { 83 try { 84 InputSource is = new InputSource(new File (filename).toURL().toString()); 85 XMLReader parser = SAXParserFactory.newInstance().newSAXParser().getXMLReader(); 86 parser.setContentHandler(this); 87 parser.parse(is); 88 } catch (java.io.FileNotFoundException nf) { 89 System.err.println("File " + filename + " not found"); 90 } catch (Exception err) { 91 System.err.println("Failed while parsing source file"); 92 System.err.println(err.getMessage()); 93 err.printStackTrace(); 94 System.exit(2); 95 } 96 } 97 98 99 104 105 private boolean isValidName(String s) { 106 if (!isValidNMTOKEN(s)) return false; 107 int c = s.charAt(0); 108 return ! ((c>=0x30 && c<=0x39) || c=='.' || c=='-' ); 109 } 110 111 116 117 private boolean isValidNMTOKEN(String s) { 118 if (s.length()==0) return false; 119 for (int i=0; i<s.length(); i++) { 120 int c = s.charAt(i); 121 if (!( (c>=0x41 && c<=0x5a) || 122 (c>=0x61 && c<=0x7a) || 123 (c>=0x30 && c<=0x39) || 124 c=='.' || 125 c=='_' || 126 c=='-' || 127 c==':' || 128 c>128 )) 129 return false; 130 } 131 return true; 132 } 133 134 137 138 private void printDTD () 139 { 140 142 Iterator e=elementList.keySet().iterator(); 143 while ( e.hasNext() ) 144 { 145 String elementname = (String ) e.next(); 146 ElementDetails ed = (ElementDetails) elementList.get(elementname); 147 TreeMap children = ed.children; 148 Set childKeys = children.keySet(); 149 150 if (childKeys.size()==0 && !ed.hasCharacterContent) 152 System.out.print("<!ELEMENT " + elementname + " EMPTY >\n"); 153 154 if (childKeys.size()==0 && ed.hasCharacterContent) 156 System.out.print("<!ELEMENT " + elementname + " ( #PCDATA ) >\n"); 157 158 if (childKeys.size()>0 && !ed.hasCharacterContent) { 160 System.out.print("<!ELEMENT " + elementname + " ( "); 161 162 if (ed.sequenced) { 163 164 167 Enumeration c = ed.childseq.elements(); 168 while (true) { 169 ChildDetails ch = (ChildDetails)c.nextElement(); 170 System.out.print(ch.name); 171 if (ch.repeatable && !ch.optional) 172 System.out.print("+"); 173 if (ch.repeatable && ch.optional) 174 System.out.print("*"); 175 if (ch.optional && !ch.repeatable) 176 System.out.print("?"); 177 if (c.hasMoreElements()) 178 System.out.print(", "); 179 else 180 break; 181 } 182 System.out.print(" ) >\n"); 183 } 184 else { 185 186 189 Iterator c1 = childKeys.iterator(); 190 while (c1.hasNext()) { 191 System.out.print((String )c1.next()); 192 if (c1.hasNext()) System.out.print(" | "); 193 } 194 System.out.print(" )* >\n"); 195 } 196 }; 197 198 if (childKeys.size()>0 && ed.hasCharacterContent) { 200 System.out.print("<!ELEMENT " + elementname + " ( #PCDATA"); 201 Iterator c2 = childKeys.iterator(); 202 while (c2.hasNext()) { 203 System.out.print(" | " + (String )c2.next()); 204 } 205 System.out.print(" )* >\n"); 206 }; 207 208 210 TreeMap attlist = ed.attributes; 211 boolean doneID = false; Iterator a=attlist.keySet().iterator(); 213 while ( a.hasNext() ) 214 { 215 String attname = (String ) a.next(); 216 AttributeDetails ad = (AttributeDetails) attlist.get(attname); 217 218 boolean required = (ad.occurrences==ed.occurrences); 220 221 boolean isid = ad.allNames && (!doneID) && (ad.unique) && 228 (ad.occurrences>=MIN_ID_VALUES); 229 230 boolean isfixed = required && ad.values.size()==1 && ad.occurrences >= MIN_FIXED; 233 234 boolean isenum = ad.allNMTOKENs && (ad.occurrences>=MIN_ENUMERATION_INSTANCES) && 238 (ad.values.size()<=ad.occurrences/MIN_ENUMERATION_RATIO) && 239 (ad.values.size()<=MAX_ENUMERATION_VALUES); 240 241 System.out.print("<!ATTLIST " + elementname + " " + attname + " "); 242 String tokentype = (ad.allNMTOKENs ? "NMTOKEN" : "CDATA"); 243 244 if (isid) { 245 System.out.print("ID"); 246 doneID = true; 247 } 248 else if (isfixed) { 249 String val = (String ) ad.values.first(); 250 System.out.print(tokentype + " #FIXED \"" + escape(val) + "\" >\n"); 251 } 252 else if (isenum) { 253 System.out.print("( "); 254 Iterator v = ad.values.iterator(); 255 while (v.hasNext()) { 256 System.out.print((String ) v.next()); 257 if (!v.hasNext()) break; 258 System.out.print(" | "); 259 }; 260 System.out.print(" )"); 261 } 262 else 263 System.out.print(tokentype); 264 265 if (!isfixed) { 266 if (required) 267 System.out.print(" #REQUIRED >\n"); 268 else 269 System.out.print(" #IMPLIED >\n"); 270 } 271 }; 272 System.out.print("\n"); 273 }; 274 275 } 276 277 278 293 294 private static int escape(char ch[], int start, int length, char[] out) 295 { 296 int o = 0; 297 for (int i = start; i < start+length; i++) { 298 if (ch[i]=='<') {("<").getChars(0,4,out,o); o+=4;} 299 else if (ch[i]=='>') {(">").getChars(0,4,out,o); o+=4;} 300 else if (ch[i]=='&') {("&").getChars(0,5,out,o); o+=5;} 301 else if (ch[i]=='\"') {(""").getChars(0,5,out,o); o+=5;} 302 else if (ch[i]=='\'') {("'").getChars(0,5,out,o); o+=5;} 303 else if (ch[i]<=0x7f) {out[o++]=ch[i];} 304 else { 305 String dec = "&#" + Integer.toString((int)ch[i]) + ';'; 306 dec.getChars(0, dec.length(), out, o); 307 o+=dec.length(); 308 } 309 } 310 return o; 311 } 312 313 325 326 private static String escape(String in) 327 { 328 char[] dest = new char[in.length()*8]; 329 int newlen = escape( in.toCharArray(), 0, in.length(), dest); 330 return new String (dest, 0, newlen); 331 } 332 333 337 338 public void startElement (String uri, String localName, String name, Attributes attributes) 339 throws SAXException 340 { 341 StackEntry se = new StackEntry(); 342 343 ElementDetails ed = (ElementDetails) elementList.get(name); 345 if (ed==null) { 346 ed = new ElementDetails(name); 347 elementList.put(name,ed); 348 }; 349 350 se.elementDetails = ed; 352 353 se.sequenceNumber = -1; 355 356 ed.occurrences++; 358 359 362 for (int a=0; a<attributes.getLength(); a++) { 363 String attName = attributes.getQName(a); 364 String val = attributes.getValue(a); 365 366 AttributeDetails ad = (AttributeDetails) ed.attributes.get(attName); 367 if (ad==null) { 368 ad=new AttributeDetails(attName); 369 ed.attributes.put(attName, ad); 370 }; 371 372 if (!ad.values.contains(val)) { 373 374 376 ad.values.add(val); 377 378 if (ad.allNames && !isValidName(val)) { 380 ad.allNames = false; 381 } 382 383 if (ad.allNMTOKENs && !isValidNMTOKEN(val)) { 385 ad.allNMTOKENs = false; 386 } 387 388 391 if (ad.unique && ad.allNames && ad.occurrences <= MAX_ID_VALUES) { 392 ad.values.add(val); 393 } else if (ad.values.size() <= MAX_ENUMERATION_VALUES) { 394 ad.values.add(val); 395 } 396 397 } else { 398 ad.unique = false; 400 } 401 ad.occurrences++; 402 }; 403 404 if (!elementStack.isEmpty()) { 406 StackEntry parent = (StackEntry)elementStack.peek(); 407 ElementDetails parentDetails = parent.elementDetails; 408 int seq = parent.sequenceNumber; 409 410 boolean isFirstInGroup = (parent.latestChild==null || (!parent.latestChild.equals(name))); 412 if (isFirstInGroup) { 413 seq++; 414 parent.sequenceNumber++; 415 } 416 parent.latestChild = name; 417 418 TreeMap children = parentDetails.children; 420 ChildDetails c = (ChildDetails)children.get(name); 421 if (c==null) { 422 c = new ChildDetails(); 424 c.name = name; 425 c.position = seq; 426 c.repeatable = false; 427 c.optional = false; 428 children.put(name, c); 429 parentDetails.childseq.addElement(c); 430 431 if (parentDetails.occurrences!=1) { 434 c.optional = true; 435 } 436 437 } else { 438 439 if (parentDetails.occurrences==1 && isFirstInGroup) { 443 parentDetails.sequenced = false; 444 } 445 446 if (parentDetails.childseq.size()<=seq || 449 !((ChildDetails)parentDetails.childseq.elementAt(seq)).name.equals(name)) 450 { 451 parentDetails.sequenced = false; 452 } 453 } 454 455 if (!isFirstInGroup) { 457 c.repeatable = true; 458 } 459 } 460 elementStack.push(se); 461 } 462 463 466 467 public void endElement (String uri, String localName, String name) 468 throws SAXException 469 { 470 471 ElementDetails ed = (ElementDetails) elementList.get(name); 474 if (ed.sequenced) { 475 StackEntry se = (StackEntry)elementStack.peek(); 476 int seq = se.sequenceNumber; 477 for (int i=seq+1; i<ed.childseq.size(); i++) { 478 ((ChildDetails)ed.childseq.elementAt(i)).optional = true; 479 } 480 } 481 elementStack.pop(); 482 } 483 484 488 489 public void characters (char ch[], int start, int length) 490 throws SAXException 491 { 492 ElementDetails ed = ((StackEntry)elementStack.peek()).elementDetails; 493 if (!ed.hasCharacterContent) { 494 for (int i=start; i<start+length; i++) { 495 if ((int)ch[i] > 0x20) { 496 ed.hasCharacterContent = true; 497 break; 498 } 499 } 500 } 501 } 502 503 506 507 private class ElementDetails { 508 String name; 509 int occurrences; 510 boolean hasCharacterContent; 511 boolean sequenced; 512 TreeMap children; 513 Vector childseq; 514 TreeMap attributes; 515 516 public ElementDetails ( String name ) { 517 this.name = name; 518 this.occurrences = 0; 519 this.hasCharacterContent = false; 520 this.sequenced = true; 521 this.children = new TreeMap(); 522 this.childseq = new Vector(); 523 this.attributes = new TreeMap(); 524 } 525 } 526 527 532 533 private class ChildDetails { 534 String name; 535 int position; 536 boolean repeatable; 537 boolean optional; 538 } 539 540 541 544 545 private class AttributeDetails { 546 String name; int occurrences; boolean unique; TreeSet values; boolean allNames; boolean allNMTOKENs; 553 public AttributeDetails ( String name ) { 554 this.name = name; 555 this.occurrences = 0; 556 this.unique = true; 557 this.values = new TreeSet(); 558 this.allNames = true; 559 this.allNMTOKENs = true; 560 } 561 } 562 563 566 567 private class StackEntry { 568 ElementDetails elementDetails; 569 int sequenceNumber; 570 String latestChild; 571 } 572 573 574 } 576 | Popular Tags |