| 1 19 20 package edu.umd.cs.findbugs.ml; 21 22 import java.io.BufferedOutputStream ; 23 import java.io.FileOutputStream ; 24 import java.io.IOException ; 25 import java.io.OutputStreamWriter ; 26 import java.io.PrintStream ; 27 import java.io.Writer ; 28 import java.util.ArrayList ; 29 import java.util.Collection ; 30 import java.util.IdentityHashMap ; 31 import java.util.Iterator ; 32 import java.util.LinkedList ; 33 import java.util.List ; 34 import java.util.Random ; 35 import java.util.Set ; 36 import java.util.StringTokenizer ; 37 import java.util.TreeSet ; 38 39 import org.dom4j.Document; 40 import org.dom4j.Element; 41 import org.dom4j.Node; 42 import org.dom4j.io.SAXReader; 43 44 import edu.umd.cs.findbugs.BugCollection; 45 import edu.umd.cs.findbugs.BugInstance; 46 import edu.umd.cs.findbugs.config.CommandLine; 47 48 56 public class ConvertToARFF { 57 61 private static class DataFile { 62 private Document document; 63 private String appName; 64 65 public DataFile(Document document, String appName) { 66 this.document = document; 67 this.appName = appName; 68 } 69 70 public Document getDocument() { return document; } 71 72 public String getAppName() { return appName; } 73 } 74 75 private static class MissingNodeException extends Exception { 76 private static final long serialVersionUID = -5042140832791541208L; 77 78 public MissingNodeException(String msg) { 79 super(msg); 80 } 81 } 82 83 public interface Attribute { 84 public String getName(); 85 public void scan(Element element, String appName) 86 throws MissingNodeException; 87 public String getRange(); 88 public String getInstanceValue(Element element, String appName) 89 throws MissingNodeException; 90 } 91 92 private abstract static class XPathAttribute implements Attribute { 93 private String name; 94 private String xpath; 95 96 public XPathAttribute(String name, String xpath) { 97 this.name = name; 98 this.xpath = xpath; 99 } 100 101 public String getName() { 102 return name; 103 } 104 105 public String getInstanceValue(Element element, String appName) throws MissingNodeException { 106 Object value = element.selectObject(xpath); 107 if (value == null) 108 throw new MissingNodeException("Could not get value from element (path=" + 109 xpath + ")"); 110 if (value instanceof List ) { 111 List list = (List ) value; 112 if (list.size() == 0) 113 throw new MissingNodeException("Could not get value from element (path=" + 114 xpath + ")"); 115 value = list.get(0); 116 } 117 118 if (value instanceof Node) { 119 Node node = (Node) value; 120 return node.getText(); 121 } else if (value instanceof String ) { 122 return (String ) value; 123 } else if (value instanceof Number ) { 124 String s = value.toString(); 125 if (s.endsWith(".0")) 126 s = s.substring(0, s.length() - 2); 127 return s; 128 } else 129 throw new IllegalStateException ("Unexpected object returned from xpath query: " + value); 130 } 131 } 132 133 public static class NominalAttribute extends XPathAttribute { 134 private Set <String > possibleValueSet; 135 136 public NominalAttribute(String name, String xpath) { 137 super(name, xpath); 138 this.possibleValueSet = new TreeSet <String >(); 139 } 140 141 public void scan(Element element, String appName) { 142 try { 143 possibleValueSet.add(getInstanceValue(element, appName)); 144 } catch (MissingNodeException ignore) { 145 } 147 } 148 149 public String getRange() { 150 return collectionToRange(possibleValueSet); 151 } 152 153 @Override  154 public String getInstanceValue(Element element, String appName) throws MissingNodeException { 155 return "\"" + super.getInstanceValue(element, appName) + "\""; 156 } 157 } 158 159 public static class BooleanAttribute extends XPathAttribute { 160 public BooleanAttribute(String name, String xpath) { 161 super(name, xpath); 162 } 163 164 public void scan(Element element, String appName) throws MissingNodeException { 165 } 167 168 public String getRange() { 169 return "{true, false}"; 170 } 171 172 @Override  174 public String getInstanceValue(Element element, String appName) throws MissingNodeException { 175 try { 176 String value = super.getInstanceValue(element, appName); 177 return "\"" + Boolean.valueOf(value).toString() + "\""; 178 } catch (MissingNodeException e) { 179 return "\"false\""; 180 } 181 } 182 } 183 184 private static final int UNCLASSIFIED = 0; 185 private static final int BUG = 1; 186 private static final int NOT_BUG = 2; 187 private static final int HARMLESS = 4; 188 private static final int HARMLESS_BUG = HARMLESS | BUG; 189 190 public static abstract class AbstractClassificationAttribute implements Attribute { 191 192 195 public String getName() { 196 return "classification"; 197 } 198 199 202 public void scan(Element element, String appName) throws MissingNodeException { 203 } 204 205 208 public String getInstanceValue(Element element, String appName) throws MissingNodeException { 209 String annotationText = element.valueOf("./UserAnnotation[text()]"); 210 212 int state = getBugClassification(annotationText); 213 return bugToString(state); 214 } 215 216 protected abstract String bugToString(int bugType) throws MissingNodeException; 217 218 } 219 220 public static class ClassificationAttribute extends AbstractClassificationAttribute { 221 public String getRange() { 222 return "{bug,not_bug,harmless_bug}"; 223 } 224 225 @Override  226 protected String bugToString(int state) throws MissingNodeException { 227 if (state == NOT_BUG) 228 return "not_bug"; 229 else if (state == BUG) 230 return "bug"; 231 else if (state == HARMLESS_BUG) 232 return "harmless_bug"; 233 else 234 throw new MissingNodeException("Unclassified warning"); 235 236 } 237 } 238 239 public static class BinaryClassificationAttribute extends AbstractClassificationAttribute { 240 243 public String getRange() { 244 return "{bug, not_bug}"; 245 } 246 247 250 @Override  251 protected String bugToString(int state) throws MissingNodeException { 252 if (state == BUG) 253 return "bug"; 254 else if (state == NOT_BUG || state == HARMLESS_BUG) 255 return "not_bug"; 256 else 257 throw new MissingNodeException("unclassified warning"); 258 } 259 } 260 261 public static class NumericAttribute extends XPathAttribute { 262 public NumericAttribute(String name, String xpath) { 263 super(name, xpath); 264 } 265 266 public void scan(Element element, String appName) throws MissingNodeException { 267 } 268 269 public String getRange() { 270 return "numeric"; 271 } 272 } 273 274 public static class PriorityAttribute implements Attribute { 275 public String getName() { 276 return "priority"; 277 } 278 279 public void scan(Element element, String appName) throws MissingNodeException { 280 } 281 282 public String getRange() { 283 return "{low,medium,high}"; 284 } 285 286 public String getInstanceValue(Element element, String appName) throws MissingNodeException { 287 org.dom4j.Attribute attribute = element.attribute("priority"); 288 if (attribute == null) 289 throw new MissingNodeException("Missing priority attribute"); 290 String value = attribute.getValue(); 291 try { 292 int prio = Integer.parseInt(value); 293 switch (prio) { 294 case 1: return "high"; 295 case 2: return "medium"; 296 case 3: return "low"; 297 default: return "?"; 298 } 299 } catch (NumberFormatException e) { 300 throw new MissingNodeException("Invalid priority value: " + value); 301 } 302 } 303 } 304 305 314 public static class IdAttribute implements Attribute { 315 private TreeSet <String > possibleValueSet = new TreeSet <String >(); 316 317 private boolean scanning = true; 318 private int count = 0; 319 320 public String getName() { return "id"; } 321 322 public void scan(Element element, String appName) throws MissingNodeException { 323 possibleValueSet.add(instanceValue(element, appName)); 324 } 325 326 public String getRange() { return collectionToRange(possibleValueSet); } 327 328 public String getInstanceValue(Element element, String appName) throws MissingNodeException { 329 if (scanning) { 330 count = 0; 331 scanning = false; 332 } 333 return instanceValue(element, appName); 334 } 335 336 private String instanceValue(Element element, String appName) { 337 String nextId; 338 339 org.dom4j.Attribute uidAttr= element.attribute("uid"); 340 if (uidAttr != null) { 341 nextId = uidAttr.getValue(); 342 } else { 343 nextId = String.valueOf(count++); 344 } 345 346 return "\"" + appName + "-" + nextId + "\""; 347 } 348 } 349 350 public static class IdStringAttribute implements Attribute { 351 352 355 public String getName() { 356 return "ids"; 357 } 358 359 362 public void scan(Element element, String appName) throws MissingNodeException { 363 } 364 365 368 public String getRange() { 369 return "string"; 370 } 371 372 int count = 0; 373 374 377 public String getInstanceValue(Element element, String appName) throws MissingNodeException { 378 String value; 379 org.dom4j.Attribute uidAttr = element.attribute("uid"); 380 if (uidAttr == null) { 381 value = String.valueOf(count++); 382 } else { 383 value = uidAttr.getStringValue(); 384 } 385 386 return "\"" + appName + "-" + value + "\""; 387 } 388 389 } 390 391 private static final String RANDOM_CHARS = 392 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; 393 394 public static class RandomIdAttribute implements Attribute { 395 396 private Random rng = new Random (); 397 private IdentityHashMap <Element, String > idMap = new IdentityHashMap <Element, String >(); 398 399 402 public String getName() { 403 return "idr"; 404 } 405 406 409 public void scan(Element element, String appName) throws MissingNodeException { 410 idMap.put(element, generateId()); 411 } 412 413 private String generateId() { 414 StringBuffer buf = new StringBuffer (); 415 416 for (int i = 0; i < 20; ++i) { 417 char c = RANDOM_CHARS.charAt(rng.nextInt(RANDOM_CHARS.length())); 418 buf.append(c); 419 } 420 421 return buf.toString(); 422 } 423 424 427 public String getRange() { 428 TreeSet <String > range = new TreeSet <String >(); 429 range.addAll(idMap.values()); 430 if (range.size() != idMap.size()) 431 throw new IllegalStateException ("id collision!"); 432 return collectionToRange(range); 433 } 434 435 438 public String getInstanceValue(Element element, String appName) throws MissingNodeException { 439 String id = idMap.get(element); 440 if (id == null) 441 throw new IllegalStateException ("Element not scanned?"); 442 return "\"" + id + "\""; 443 } 444 445 } 446 447 public static class AppNameAttribute implements Attribute { 448 private Set <String > appNameSet = new TreeSet <String >(); 449 450 public String getName() { 451 return "appname"; 452 } 453 454 public void scan(Element element, String appName) 455 throws MissingNodeException { 456 appNameSet.add(appName); 457 } 458 459 public String getRange() { 460 return collectionToRange(appNameSet); 461 } 462 463 public String getInstanceValue(Element element, String appName) 464 throws MissingNodeException { 465 return "\"" + appName + "\""; 466 } 467 } 468 469 public static String collectionToRange(Collection <String > collection) { 470 StringBuffer buf = new StringBuffer (); 471 buf.append("{"); 472 for (String aCollection : collection) { 473 if (buf.length() > 1) 474 buf.append(','); 475 buf.append(aCollection); 476 } 477 buf.append("}"); 478 479 return buf.toString(); 480 } 481 482 public interface AttributeCallback { 483 public void apply(Attribute attribute) throws MissingNodeException, IOException ; 484 } 485 486 490 private static final String DEFAULT_NODE_SELECTION_XPATH = "/BugCollection/BugInstance"; 491 492 496 private List <Attribute> attributeList; 497 private String nodeSelectionXpath; 498 private boolean dropUnclassifiedWarnings; 499 private String appName; 500 501 505 public ConvertToARFF() { 506 this.attributeList = new LinkedList <Attribute>(); 507 this.nodeSelectionXpath = DEFAULT_NODE_SELECTION_XPATH; 508 this.dropUnclassifiedWarnings = false; 509 } 510 511 public void setAppName(String appName) { 512 this.appName = appName; 513 } 514 515 520 public void setNodeSelectionXpath(String nodeSelectionXpath) { 521 this.nodeSelectionXpath = nodeSelectionXpath; 522 } 523 524 public int getNumAttributes() { 525 return attributeList.size(); 526 } 527 528 public void dropUnclassifiedWarnings() { 529 this.dropUnclassifiedWarnings = true; 530 } 531 532 public void addAttribute(Attribute attribute) { 533 attributeList.add(attribute); 534 } 535 536 public void addNominalAttribute(String name, String xpath) { 537 addAttribute(new NominalAttribute(name, xpath)); 538 } 539 540 public void addBooleanAttribute(String name, String xpath) { 541 addAttribute(new BooleanAttribute(name, xpath)); 542 } 543 544 public void addClassificationAttribute() { 545 addAttribute(new ClassificationAttribute()); 546 } 547 548 public void addNumericAttribute(String name, String xpath) { 549 addAttribute(new NumericAttribute(name, xpath)); 550 } 551 552 public void addPriorityAttribute() { 553 addAttribute(new PriorityAttribute()); 554 } 555 556 public void addIdAttribute() { 557 addAttribute(new IdAttribute()); 558 } 559 560 public void addAppNameAttribute() { 561 addAttribute(new AppNameAttribute()); 562 } 563 564 572 public void convert(String relationName, Document document, String appName, final Writer out) 573 throws IOException , MissingNodeException { 574 scan(document, appName); 575 generateHeader(relationName, out); 576 generateInstances(document, appName, out); 577 } 578 579 587 public void scan(Document document, final String appName) throws MissingNodeException, IOException { 588 List <Element> bugInstanceList = getBugInstanceList(document); 589 590 for (final Element element : bugInstanceList) { 591 scanAttributeList(new AttributeCallback() { 592 public void apply(Attribute attribute) throws MissingNodeException { 593 attribute.scan(element, appName); 594 } 595 }); 596 } 597 } 598 599 606 public void generateHeader(String relationName, final Writer out) 607 throws MissingNodeException, IOException { 608 out.write("@relation "); 609 out.write(relationName); 610 out.write("\n\n"); 611 612 scanAttributeList(new AttributeCallback() { 613 public void apply(Attribute attribute) throws IOException { 614 out.write("@attribute "); 615 out.write(attribute.getName()); 616 out.write(" "); 617 out.write(attribute.getRange()); 618 out.write("\n"); 619 } 620 }); 621 out.write("\n"); 622 623 out.write("@data\n"); 624 } 625 626 634 public void generateInstances(Document document, final String appName, final Writer out) 635 throws MissingNodeException, IOException { 636 List <Element> bugInstanceList = getBugInstanceList(document); 637 638 for (final Element element : bugInstanceList) { 639 scanAttributeList(new AttributeCallback() { 640 boolean first = true; 641 642 public void apply(Attribute attribute) throws IOException { 643 if (!first) 644 out.write(","); 645 first = false; 646 String value; 647 try { 648 value = attribute.getInstanceValue(element, appName); 649 } catch (MissingNodeException e) { 650 value = "?"; 651 } 652 out.write(value); 653 } 654 }); 655 out.write("\n"); 656 } 657 } 658 659 664 public void scanAttributeList(AttributeCallback callback) 665 throws MissingNodeException, IOException { 666 for (Attribute attribute : attributeList) { 667 callback.apply(attribute); 668 } 669 } 670 671 675 private static int getBugClassification(String annotationText) { 676 StringTokenizer tok = new StringTokenizer (annotationText, " \t\r\n\f.,:;-"); 677 678 int state = UNCLASSIFIED; 679 680 while (tok.hasMoreTokens()) { 681 String s = tok.nextToken(); 682 if (s.equals("BUG")) 683 state |= BUG; 684 else if (s.equals("NOT_BUG")) 685 state |= NOT_BUG; 686 else if (s.equals("HARMLESS")) 687 state |= HARMLESS; 688 } 689 690 if ((state & NOT_BUG) != 0) 691 return NOT_BUG; 692 else if ((state & BUG) != 0) 693 return ((state & HARMLESS) != 0) ? HARMLESS_BUG : BUG; 694 else 695 return UNCLASSIFIED; 696 } 697 698 private List <Element> getBugInstanceList(Document document) { 699 List <Element>bugInstanceList = document.selectNodes(nodeSelectionXpath); 700 if (dropUnclassifiedWarnings) { 701 for (Iterator <Element> i = bugInstanceList.iterator(); i.hasNext(); ) { 702 Element element = i.next(); 703 String annotationText = element.valueOf("./UserAnnotation[text()]"); 704 int classification = getBugClassification(annotationText); 705 if (classification == UNCLASSIFIED) 706 i.remove(); 707 } 708 } 709 return bugInstanceList; 710 } 711 712 private static class C2ACommandLine extends CommandLine { 713 private ConvertToARFF converter = new ConvertToARFF(); 714 715 public C2ACommandLine() { 716 addOption("-select","xpath expression","select BugInstance elements"); 717 addSwitch("-train", "drop unclassified warnings"); 718 addSwitch("-id", "add unique id attribute (as nominal)"); 719 addSwitch("-ids", "add unique id attribute (as string)"); 720 addSwitch("-idr", "add random unique id attribtue (as nominal)"); 721 addSwitch("-app", "add application name attribute"); 722 addOption("-nominal", "attrName,xpath", "add a nominal attribute"); 723 addOption("-boolean", "attrName,xpath", "add a boolean attribute"); 724 addOption("-numeric", "attrName,xpath", "add a numeric attribute"); 725 addSwitch("-classification", "add bug classification attribute"); 726 addSwitch("-binclass", "add binary (bug/not_bug) classification attribute"); 727 addSwitch("-priority", "add priority attribute"); 728 addOption("-appname", "app name", "set application name of all tuples"); 729 } 730 731 public ConvertToARFF getConverter() { 732 return converter; 733 } 734 735 @Override  736 protected void handleOption(String option, String optionExtraPart) 737 throws IOException { 738 if (option.equals("-train")) { 739 converter.dropUnclassifiedWarnings(); 740 } else if (option.equals("-id")) { 741 converter.addIdAttribute(); 742 } else if (option.equals("-ids")) { 743 converter.addAttribute(new IdStringAttribute()); 744 } else if (option.equals("-idr")) { 745 converter.addAttribute(new RandomIdAttribute()); 746 } else if (option.equals("-app")) { 747 converter.addAppNameAttribute(); 748 } else if (option.equals("-classification")) { 749 converter.addClassificationAttribute(); 750 } else if (option.equals("-binclass")) { 751 converter.addAttribute(new BinaryClassificationAttribute()); 752 } else if (option.equals("-priority")) { 753 converter.addPriorityAttribute(); 754 } 755 } 756 757 private interface XPathAttributeCreator { 758 public Attribute create(String name, String xpath); 759 } 760 761 @Override  762 protected void handleOptionWithArgument(String option, String argument) 763 throws IOException { 764 765 if (option.equals("-select")) { 766 converter.setNodeSelectionXpath(argument); 767 } else if (option.equals("-nominal")) { 768 addXPathAttribute(option, argument, new XPathAttributeCreator() { 769 public Attribute create(String name,String xpath) { 770 return new NominalAttribute(name, xpath); 771 } 772 }); 773 } else if (option.equals("-boolean")) { 774 addXPathAttribute(option, argument, new XPathAttributeCreator() { 775 public Attribute create(String name,String xpath) { 776 return new BooleanAttribute(name, xpath); 777 } 778 }); 779 } else if (option.equals("-numeric")) { 780 addXPathAttribute(option, argument, new XPathAttributeCreator(){ 781 public Attribute create(String name,String xpath) { 782 return new NumericAttribute(name, xpath); 783 } 784 }); 785 } else if (option.equals("-appname")) { 786 converter.setAppName(argument); 787 } 788 } 789 790 protected void addXPathAttribute(String option, String argument, XPathAttributeCreator creator) { 791 int comma = argument.indexOf(','); 792 if (comma < 0) { 793 throw new IllegalArgumentException ("Missing comma separating attribute name and xpath in " + 794 option + " option: " + argument); 795 } 796 String attrName = argument.substring(0, comma); 797 String xpath = argument.substring(comma + 1); 798 converter.addAttribute(creator.create(attrName, xpath)); 799 } 800 801 public void printUsage(PrintStream out) { 802 out.println("Usage: " + ConvertToARFF.class.getName() + 803 " [options] <relation name> <output file> <findbugs results> [<findbugs results>...]"); 804 super.printUsage(out); 805 } 806 } 807 808 public String toAppName(String fileName) { 809 if (appName != null) 810 return appName; 811 812 int lastDot = fileName.lastIndexOf('.'); 814 if (lastDot >= 0) 815 fileName = fileName.substring(0, lastDot); 816 return fileName; 817 } 818 819 public static void main(String [] argv) throws Exception { 820 argv = CommandLine.expandOptionFiles(argv, true, true); 822 823 C2ACommandLine commandLine = new C2ACommandLine(); 825 int argCount = commandLine.parse(argv); 826 if (argCount > argv.length - 3) { 827 commandLine.printUsage(System.err); 828 System.exit(1); 829 } 830 String relationName = argv[argCount++]; 831 String outputFileName = argv[argCount++]; 832 833 ConvertToARFF converter = commandLine.getConverter(); 835 if (converter.getNumAttributes() == 0) { 836 throw new IllegalArgumentException ("No attributes specified!"); 837 } 838 839 Writer out = new OutputStreamWriter (new BufferedOutputStream ( 841 new FileOutputStream (outputFileName))); 842 843 List <DataFile> dataFileList = new ArrayList <DataFile>(); 846 while (argCount < argv.length) { 847 String fileName = argv[argCount++]; 848 849 SAXReader reader = new SAXReader(); 851 Document document = reader.read(fileName); 852 853 DataFile dataFile = new DataFile(document, converter.toAppName(fileName)); 854 dataFileList.add(dataFile); 855 856 converter.scan(dataFile.getDocument(), dataFile.getAppName()); 857 } 858 859 converter.generateHeader(relationName, out); 861 862 for (DataFile dataFile : dataFileList) { 864 converter.generateInstances(dataFile.getDocument(), dataFile.getAppName(), out); 865 } 866 867 out.close(); 868 } 869 870 } 871 872 | Popular Tags |