KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > edu > umd > cs > findbugs > ml > ConvertToARFF


1 /*
2  * Machine Learning support for FindBugs
3  * Copyright (C) 2004,2005 University of Maryland
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */

19
20 package edu.umd.cs.findbugs.ml;
21
22 import java.io.BufferedOutputStream JavaDoc;
23 import java.io.FileOutputStream JavaDoc;
24 import java.io.IOException JavaDoc;
25 import java.io.OutputStreamWriter JavaDoc;
26 import java.io.PrintStream JavaDoc;
27 import java.io.Writer JavaDoc;
28 import java.util.ArrayList JavaDoc;
29 import java.util.Collection JavaDoc;
30 import java.util.IdentityHashMap JavaDoc;
31 import java.util.Iterator JavaDoc;
32 import java.util.LinkedList JavaDoc;
33 import java.util.List JavaDoc;
34 import java.util.Random JavaDoc;
35 import java.util.Set JavaDoc;
36 import java.util.StringTokenizer JavaDoc;
37 import java.util.TreeSet JavaDoc;
38
39 import org.dom4j.Document;
40 import org.dom4j.Element;
41 import org.dom4j.Node;
42 import org.dom4j.io.SAXReader;
43
44 import edu.umd.cs.findbugs.BugCollection;
45 import edu.umd.cs.findbugs.BugInstance;
46 import edu.umd.cs.findbugs.config.CommandLine;
47
48 /**
49  * Convert a BugCollection into ARFF format.
50  * See Witten and Frank, <em>Data Mining</em>, ISBN 1-55860-552-5.
51  *
52  * @see BugCollection
53  * @see BugInstance
54  * @author David Hovemeyer
55  */

56 public class ConvertToARFF {
57     // ------------------------------------------------------------
58
// Helper classes
59
// ------------------------------------------------------------
60

61     private static class DataFile {
62         private Document document;
63         private String JavaDoc appName;
64
65         public DataFile(Document document, String JavaDoc appName) {
66             this.document = document;
67             this.appName = appName;
68         }
69
70         public Document getDocument() { return document; }
71
72         public String JavaDoc getAppName() { return appName; }
73     }
74
75     private static class MissingNodeException extends Exception JavaDoc {
76         private static final long serialVersionUID = -5042140832791541208L;
77
78         public MissingNodeException(String JavaDoc msg) {
79             super(msg);
80         }
81     }
82
83     public interface Attribute {
84         public String JavaDoc getName();
85         public void scan(Element element, String JavaDoc appName)
86             throws MissingNodeException;
87         public String JavaDoc getRange();
88         public String JavaDoc getInstanceValue(Element element, String JavaDoc appName)
89             throws MissingNodeException;
90     }
91
92     private abstract static class XPathAttribute implements Attribute {
93         private String JavaDoc name;
94         private String JavaDoc xpath;
95
96         public XPathAttribute(String JavaDoc name, String JavaDoc xpath) {
97             this.name = name;
98             this.xpath = xpath;
99         }
100
101         public String JavaDoc getName() {
102             return name;
103         }
104
105         public String JavaDoc getInstanceValue(Element element, String JavaDoc appName) throws MissingNodeException {
106             Object JavaDoc value = element.selectObject(xpath);
107             if (value == null)
108                 throw new MissingNodeException("Could not get value from element (path=" +
109                     xpath + ")");
110             if (value instanceof List JavaDoc) {
111                 List JavaDoc list = (List JavaDoc) value;
112                 if (list.size() == 0)
113                     throw new MissingNodeException("Could not get value from element (path=" +
114                             xpath + ")");
115                 value = list.get(0);
116             }
117
118             if (value instanceof Node) {
119                 Node node = (Node) value;
120                 return node.getText();
121             } else if (value instanceof String JavaDoc) {
122                 return (String JavaDoc) value;
123             } else if (value instanceof Number JavaDoc) {
124                 String JavaDoc s = value.toString();
125                 if (s.endsWith(".0"))
126                     s = s.substring(0, s.length() - 2);
127                 return s;
128             } else
129                 throw new IllegalStateException JavaDoc("Unexpected object returned from xpath query: " + value);
130         }
131     }
132
133     public static class NominalAttribute extends XPathAttribute {
134         private Set JavaDoc<String JavaDoc> possibleValueSet;
135
136         public NominalAttribute(String JavaDoc name, String JavaDoc xpath) {
137             super(name, xpath);
138             this.possibleValueSet = new TreeSet JavaDoc<String JavaDoc>();
139         }
140
141         public void scan(Element element, String JavaDoc appName) {
142             try {
143                 possibleValueSet.add(getInstanceValue(element, appName));
144             } catch (MissingNodeException ignore) {
145                 // Ignore: we'll just use an n/a value for this instance
146
}
147         }
148
149         public String JavaDoc getRange() {
150             return collectionToRange(possibleValueSet);
151         }
152
153         @Override JavaDoc
154         public String JavaDoc getInstanceValue(Element element, String JavaDoc appName) throws MissingNodeException {
155             return "\"" + super.getInstanceValue(element, appName) + "\"";
156         }
157     }
158     
159     public static class BooleanAttribute extends XPathAttribute {
160         public BooleanAttribute(String JavaDoc name, String JavaDoc xpath) {
161             super(name, xpath);
162         }
163
164         public void scan(Element element, String JavaDoc appName) throws MissingNodeException {
165             // Nothing to do.
166
}
167
168         public String JavaDoc getRange() {
169             return "{true, false}";
170         }
171         
172         //@Override
173
@Override JavaDoc
174         public String JavaDoc getInstanceValue(Element element, String JavaDoc appName) throws MissingNodeException {
175             try {
176                 String JavaDoc value = super.getInstanceValue(element, appName);
177                 return "\"" + Boolean.valueOf(value).toString() + "\"";
178             } catch (MissingNodeException e) {
179                 return "\"false\"";
180             }
181         }
182     }
183
184     private static final int UNCLASSIFIED = 0;
185     private static final int BUG = 1;
186     private static final int NOT_BUG = 2;
187     private static final int HARMLESS = 4;
188     private static final int HARMLESS_BUG = HARMLESS | BUG;
189     
190     public static abstract class AbstractClassificationAttribute implements Attribute {
191
192         /* (non-Javadoc)
193          * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getName()
194          */

195         public String JavaDoc getName() {
196             return "classification";
197         }
198
199         /* (non-Javadoc)
200          * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#scan(org.dom4j.Element, java.lang.String)
201          */

202         public void scan(Element element, String JavaDoc appName) throws MissingNodeException {
203         }
204
205         /* (non-Javadoc)
206          * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getInstanceValue(org.dom4j.Element, java.lang.String)
207          */

208         public String JavaDoc getInstanceValue(Element element, String JavaDoc appName) throws MissingNodeException {
209             String JavaDoc annotationText = element.valueOf("./UserAnnotation[text()]");
210             //System.out.println("annotationText=" + annotationText);
211

212             int state = getBugClassification(annotationText);
213             return bugToString(state);
214         }
215         
216         protected abstract String JavaDoc bugToString(int bugType) throws MissingNodeException;
217         
218     }
219
220     public static class ClassificationAttribute extends AbstractClassificationAttribute {
221         public String JavaDoc getRange() {
222             return "{bug,not_bug,harmless_bug}";
223         }
224         
225         @Override JavaDoc
226         protected String JavaDoc bugToString(int state) throws MissingNodeException {
227             if (state == NOT_BUG)
228                 return "not_bug";
229             else if (state == BUG)
230                 return "bug";
231             else if (state == HARMLESS_BUG)
232                 return "harmless_bug";
233             else
234                 throw new MissingNodeException("Unclassified warning");
235
236         }
237     }
238     
239     public static class BinaryClassificationAttribute extends AbstractClassificationAttribute {
240         /* (non-Javadoc)
241          * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getRange()
242          */

243         public String JavaDoc getRange() {
244             return "{bug, not_bug}";
245         }
246         
247         /* (non-Javadoc)
248          * @see edu.umd.cs.findbugs.ml.ConvertToARFF.AbstractClassificationAttribute#bugToString(int)
249          */

250         @Override JavaDoc
251         protected String JavaDoc bugToString(int state) throws MissingNodeException {
252             if (state == BUG)
253                 return "bug";
254             else if (state == NOT_BUG || state == HARMLESS_BUG)
255                 return "not_bug";
256             else
257                 throw new MissingNodeException("unclassified warning");
258         }
259     }
260
261     public static class NumericAttribute extends XPathAttribute {
262         public NumericAttribute(String JavaDoc name, String JavaDoc xpath) {
263             super(name, xpath);
264         }
265
266         public void scan(Element element, String JavaDoc appName) throws MissingNodeException {
267         }
268
269         public String JavaDoc getRange() {
270             return "numeric";
271         }
272     }
273
274     public static class PriorityAttribute implements Attribute {
275         public String JavaDoc getName() {
276             return "priority";
277         }
278
279         public void scan(Element element, String JavaDoc appName) throws MissingNodeException {
280         }
281
282         public String JavaDoc getRange() {
283             return "{low,medium,high}";
284         }
285
286         public String JavaDoc getInstanceValue(Element element, String JavaDoc appName) throws MissingNodeException {
287             org.dom4j.Attribute attribute = element.attribute("priority");
288             if (attribute == null)
289                 throw new MissingNodeException("Missing priority attribute");
290             String JavaDoc value = attribute.getValue();
291             try {
292                 int prio = Integer.parseInt(value);
293                 switch (prio) {
294                 case 1: return "high";
295                 case 2: return "medium";
296                 case 3: return "low";
297                 default: return "?";
298                 }
299             } catch (NumberFormatException JavaDoc e) {
300                 throw new MissingNodeException("Invalid priority value: " + value);
301             }
302         }
303     }
304
305     /**
306      * An attribute that just gives each instance a unique id.
307      * The application name is prepended, so each unique id
308      * really unique, even across applications.
309      * Obviously, this attribute shouldn't be used as input
310      * to a learning algorithm.
311      *
312      * <p>Uses the Element's uid attribute if it has one.</p>
313      */

314     public static class IdAttribute implements Attribute {
315         private TreeSet JavaDoc<String JavaDoc> possibleValueSet = new TreeSet JavaDoc<String JavaDoc>();
316         
317         private boolean scanning = true;
318         private int count = 0;
319
320         public String JavaDoc getName() { return "id"; }
321         
322         public void scan(Element element, String JavaDoc appName) throws MissingNodeException {
323             possibleValueSet.add(instanceValue(element, appName));
324         }
325         
326         public String JavaDoc getRange() { return collectionToRange(possibleValueSet); }
327         
328         public String JavaDoc getInstanceValue(Element element, String JavaDoc appName) throws MissingNodeException {
329             if (scanning) {
330                 count = 0;
331                 scanning = false;
332             }
333             return instanceValue(element, appName);
334         }
335         
336         private String JavaDoc instanceValue(Element element, String JavaDoc appName) {
337             String JavaDoc nextId;
338
339             org.dom4j.Attribute uidAttr= element.attribute("uid");
340             if (uidAttr != null) {
341                 nextId = uidAttr.getValue();
342             } else {
343                 nextId = String.valueOf(count++);
344             }
345
346             return "\"" + appName + "-" + nextId + "\"";
347         }
348     }
349     
350     public static class IdStringAttribute implements Attribute {
351
352         /* (non-Javadoc)
353          * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getName()
354          */

355         public String JavaDoc getName() {
356             return "ids";
357         }
358
359         /* (non-Javadoc)
360          * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#scan(org.dom4j.Element, java.lang.String)
361          */

362         public void scan(Element element, String JavaDoc appName) throws MissingNodeException {
363         }
364
365         /* (non-Javadoc)
366          * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getRange()
367          */

368         public String JavaDoc getRange() {
369             return "string";
370         }
371         
372         int count = 0;
373
374         /* (non-Javadoc)
375          * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getInstanceValue(org.dom4j.Element, java.lang.String)
376          */

377         public String JavaDoc getInstanceValue(Element element, String JavaDoc appName) throws MissingNodeException {
378             String JavaDoc value;
379             org.dom4j.Attribute uidAttr = element.attribute("uid");
380             if (uidAttr == null) {
381                 value = String.valueOf(count++);
382             } else {
383                 value = uidAttr.getStringValue();
384             }
385             
386             return "\"" + appName + "-" + value + "\"";
387         }
388         
389     }
390
391     private static final String JavaDoc RANDOM_CHARS =
392         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
393     
394     public static class RandomIdAttribute implements Attribute {
395         
396         private Random JavaDoc rng = new Random JavaDoc();
397         private IdentityHashMap JavaDoc<Element, String JavaDoc> idMap = new IdentityHashMap JavaDoc<Element, String JavaDoc>();
398
399         /* (non-Javadoc)
400          * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getName()
401          */

402         public String JavaDoc getName() {
403             return "idr";
404         }
405
406         /* (non-Javadoc)
407          * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#scan(org.dom4j.Element, java.lang.String)
408          */

409         public void scan(Element element, String JavaDoc appName) throws MissingNodeException {
410             idMap.put(element, generateId());
411         }
412
413         private String JavaDoc generateId() {
414             StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
415             
416             for (int i = 0; i < 20; ++i) {
417                 char c = RANDOM_CHARS.charAt(rng.nextInt(RANDOM_CHARS.length()));
418                 buf.append(c);
419             }
420             
421             return buf.toString();
422         }
423
424         /* (non-Javadoc)
425          * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getRange()
426          */

427         public String JavaDoc getRange() {
428             TreeSet JavaDoc<String JavaDoc> range = new TreeSet JavaDoc<String JavaDoc>();
429             range.addAll(idMap.values());
430             if (range.size() != idMap.size())
431                 throw new IllegalStateException JavaDoc("id collision!");
432             return collectionToRange(range);
433         }
434
435         /* (non-Javadoc)
436          * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getInstanceValue(org.dom4j.Element, java.lang.String)
437          */

438         public String JavaDoc getInstanceValue(Element element, String JavaDoc appName) throws MissingNodeException {
439             String JavaDoc id = idMap.get(element);
440             if (id == null)
441                 throw new IllegalStateException JavaDoc("Element not scanned?");
442             return "\"" + id + "\"";
443         }
444         
445     }
446     
447     public static class AppNameAttribute implements Attribute {
448         private Set JavaDoc<String JavaDoc> appNameSet = new TreeSet JavaDoc<String JavaDoc>();
449
450         public String JavaDoc getName() {
451             return "appname";
452         }
453
454         public void scan(Element element, String JavaDoc appName)
455             throws MissingNodeException {
456             appNameSet.add(appName);
457         }
458
459         public String JavaDoc getRange() {
460             return collectionToRange(appNameSet);
461         }
462
463         public String JavaDoc getInstanceValue(Element element, String JavaDoc appName)
464             throws MissingNodeException {
465             return "\"" + appName + "\"";
466         }
467     }
468
469     public static String JavaDoc collectionToRange(Collection JavaDoc<String JavaDoc> collection) {
470         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
471         buf.append("{");
472         for (String JavaDoc aCollection : collection) {
473             if (buf.length() > 1)
474                 buf.append(',');
475             buf.append(aCollection);
476         }
477         buf.append("}");
478
479         return buf.toString();
480     }
481
482     public interface AttributeCallback {
483         public void apply(Attribute attribute) throws MissingNodeException, IOException JavaDoc;
484     }
485
486     // ------------------------------------------------------------
487
// Constants
488
// ------------------------------------------------------------
489

490     private static final String JavaDoc DEFAULT_NODE_SELECTION_XPATH = "/BugCollection/BugInstance";
491
492     // ------------------------------------------------------------
493
// Fields
494
// ------------------------------------------------------------
495

496     private List JavaDoc<Attribute> attributeList;
497     private String JavaDoc nodeSelectionXpath;
498     private boolean dropUnclassifiedWarnings;
499     private String JavaDoc appName;
500
501     // ------------------------------------------------------------
502
// Public methods
503
// ------------------------------------------------------------
504

505     public ConvertToARFF() {
506         this.attributeList = new LinkedList JavaDoc<Attribute>();
507         this.nodeSelectionXpath = DEFAULT_NODE_SELECTION_XPATH;
508         this.dropUnclassifiedWarnings = false;
509     }
510     
511     public void setAppName(String JavaDoc appName) {
512         this.appName = appName;
513     }
514     
515     /**
516      * Set the xpath expression used to select BugInstance nodes.
517      *
518      * @param nodeSelectionXpath the node selection xpath expression
519      */

520     public void setNodeSelectionXpath(String JavaDoc nodeSelectionXpath) {
521         this.nodeSelectionXpath = nodeSelectionXpath;
522     }
523     
524     public int getNumAttributes() {
525         return attributeList.size();
526     }
527
528     public void dropUnclassifiedWarnings() {
529         this.dropUnclassifiedWarnings = true;
530     }
531
532     public void addAttribute(Attribute attribute) {
533         attributeList.add(attribute);
534     }
535
536     public void addNominalAttribute(String JavaDoc name, String JavaDoc xpath) {
537         addAttribute(new NominalAttribute(name, xpath));
538     }
539     
540     public void addBooleanAttribute(String JavaDoc name, String JavaDoc xpath) {
541         addAttribute(new BooleanAttribute(name, xpath));
542     }
543
544     public void addClassificationAttribute() {
545         addAttribute(new ClassificationAttribute());
546     }
547
548     public void addNumericAttribute(String JavaDoc name, String JavaDoc xpath) {
549         addAttribute(new NumericAttribute(name, xpath));
550     }
551
552     public void addPriorityAttribute() {
553         addAttribute(new PriorityAttribute());
554     }
555
556     public void addIdAttribute() {
557         addAttribute(new IdAttribute());
558     }
559
560     public void addAppNameAttribute() {
561         addAttribute(new AppNameAttribute());
562     }
563
564     /**
565      * Convert a single Document to ARFF format.
566      *
567      * @param relationName the relation name
568      * @param document the Document
569      * @param appName the application name
570      * @param out Writer to write the ARFF output to
571      */

572     public void convert(String JavaDoc relationName, Document document, String JavaDoc appName, final Writer JavaDoc out)
573             throws IOException JavaDoc, MissingNodeException {
574         scan(document, appName);
575         generateHeader(relationName, out);
576         generateInstances(document, appName, out);
577     }
578
579     /**
580      * Scan a Document to find out the ranges of attributes.
581      * All Documents must be scanned before generating the ARFF
582      * header and instances.
583      *
584      * @param document the Document
585      * @param appName the application name
586      */

587     public void scan(Document document, final String JavaDoc appName) throws MissingNodeException, IOException JavaDoc {
588         List JavaDoc<Element> bugInstanceList = getBugInstanceList(document);
589
590         for (final Element element : bugInstanceList) {
591             scanAttributeList(new AttributeCallback() {
592                 public void apply(Attribute attribute) throws MissingNodeException {
593                     attribute.scan(element, appName);
594                 }
595             });
596         }
597     }
598
599     /**
600      * Generate ARFF header.
601      * Documents must have already been scanned.
602      *
603      * @param relationName the relation name
604      * @param out Writer to write the ARFF output to
605      */

606     public void generateHeader(String JavaDoc relationName, final Writer JavaDoc out)
607             throws MissingNodeException, IOException JavaDoc {
608         out.write("@relation ");
609         out.write(relationName);
610         out.write("\n\n");
611
612         scanAttributeList(new AttributeCallback() {
613             public void apply(Attribute attribute) throws IOException JavaDoc {
614                 out.write("@attribute ");
615                 out.write(attribute.getName());
616                 out.write(" ");
617                 out.write(attribute.getRange());
618                 out.write("\n");
619             }
620         });
621         out.write("\n");
622
623         out.write("@data\n");
624     }
625
626     /**
627      * Generate instances from given Document.
628      * Document should already have been scanned, and the ARFF header generated.
629      *
630      * @param document the Document
631      * @param appName the application name
632      * @param out Writer to write the ARFF output to
633      */

634     public void generateInstances(Document document, final String JavaDoc appName, final Writer JavaDoc out)
635             throws MissingNodeException, IOException JavaDoc {
636         List JavaDoc<Element> bugInstanceList = getBugInstanceList(document);
637
638         for (final Element element : bugInstanceList) {
639             scanAttributeList(new AttributeCallback() {
640                 boolean first = true;
641
642                 public void apply(Attribute attribute) throws IOException JavaDoc {
643                     if (!first)
644                         out.write(",");
645                     first = false;
646                     String JavaDoc value;
647                     try {
648                         value = attribute.getInstanceValue(element, appName);
649                     } catch (MissingNodeException e) {
650                         value = "?";
651                     }
652                     out.write(value);
653                 }
654             });
655             out.write("\n");
656         }
657     }
658
659     /**
660      * Apply a callback to all Attributes.
661      *
662      * @param callback the callback
663      */

664     public void scanAttributeList(AttributeCallback callback)
665             throws MissingNodeException, IOException JavaDoc {
666         for (Attribute attribute : attributeList) {
667             callback.apply(attribute);
668         }
669     }
670
671     // ------------------------------------------------------------
672
// Implementation
673
// ------------------------------------------------------------
674

675     private static int getBugClassification(String JavaDoc annotationText) {
676         StringTokenizer JavaDoc tok = new StringTokenizer JavaDoc(annotationText, " \t\r\n\f.,:;-");
677
678         int state = UNCLASSIFIED;
679
680         while (tok.hasMoreTokens()) {
681             String JavaDoc s = tok.nextToken();
682             if (s.equals("BUG"))
683                 state |= BUG;
684             else if (s.equals("NOT_BUG"))
685                 state |= NOT_BUG;
686             else if (s.equals("HARMLESS"))
687                 state |= HARMLESS;
688         }
689
690         if ((state & NOT_BUG) != 0)
691             return NOT_BUG;
692         else if ((state & BUG) != 0)
693             return ((state & HARMLESS) != 0) ? HARMLESS_BUG : BUG;
694         else
695             return UNCLASSIFIED;
696     }
697
698     private List JavaDoc<Element> getBugInstanceList(Document document) {
699         List JavaDoc <Element>bugInstanceList = document.selectNodes(nodeSelectionXpath);
700         if (dropUnclassifiedWarnings) {
701             for (Iterator JavaDoc<Element> i = bugInstanceList.iterator(); i.hasNext(); ) {
702                 Element element = i.next();
703                 String JavaDoc annotationText = element.valueOf("./UserAnnotation[text()]");
704                 int classification = getBugClassification(annotationText);
705                 if (classification == UNCLASSIFIED)
706                     i.remove();
707             }
708         }
709         return bugInstanceList;
710     }
711
712     private static class C2ACommandLine extends CommandLine {
713         private ConvertToARFF converter = new ConvertToARFF();
714
715         public C2ACommandLine() {
716             addOption("-select","xpath expression","select BugInstance elements");
717             addSwitch("-train", "drop unclassified warnings");
718             addSwitch("-id", "add unique id attribute (as nominal)");
719             addSwitch("-ids", "add unique id attribute (as string)");
720             addSwitch("-idr", "add random unique id attribtue (as nominal)");
721             addSwitch("-app", "add application name attribute");
722             addOption("-nominal", "attrName,xpath", "add a nominal attribute");
723             addOption("-boolean", "attrName,xpath", "add a boolean attribute");
724             addOption("-numeric", "attrName,xpath", "add a numeric attribute");
725             addSwitch("-classification", "add bug classification attribute");
726             addSwitch("-binclass", "add binary (bug/not_bug) classification attribute");
727             addSwitch("-priority", "add priority attribute");
728             addOption("-appname", "app name", "set application name of all tuples");
729         }
730
731         public ConvertToARFF getConverter() {
732             return converter;
733         }
734
735         @Override JavaDoc
736         protected void handleOption(String JavaDoc option, String JavaDoc optionExtraPart)
737                 throws IOException JavaDoc {
738             if (option.equals("-train")) {
739                 converter.dropUnclassifiedWarnings();
740             } else if (option.equals("-id")) {
741                 converter.addIdAttribute();
742             } else if (option.equals("-ids")) {
743                 converter.addAttribute(new IdStringAttribute());
744             } else if (option.equals("-idr")) {
745                 converter.addAttribute(new RandomIdAttribute());
746             } else if (option.equals("-app")) {
747                 converter.addAppNameAttribute();
748             } else if (option.equals("-classification")) {
749                 converter.addClassificationAttribute();
750             } else if (option.equals("-binclass")) {
751                 converter.addAttribute(new BinaryClassificationAttribute());
752             } else if (option.equals("-priority")) {
753                 converter.addPriorityAttribute();
754             }
755         }
756         
757         private interface XPathAttributeCreator {
758             public Attribute create(String JavaDoc name, String JavaDoc xpath);
759         }
760
761         @Override JavaDoc
762         protected void handleOptionWithArgument(String JavaDoc option, String JavaDoc argument)
763                 throws IOException JavaDoc {
764             
765             if (option.equals("-select")) {
766                 converter.setNodeSelectionXpath(argument);
767             } else if (option.equals("-nominal")) {
768                 addXPathAttribute(option, argument, new XPathAttributeCreator() {
769                     public Attribute create(String JavaDoc name,String JavaDoc xpath) {
770                         return new NominalAttribute(name, xpath);
771                     }
772                 });
773             } else if (option.equals("-boolean")) {
774                 addXPathAttribute(option, argument, new XPathAttributeCreator() {
775                     public Attribute create(String JavaDoc name,String JavaDoc xpath) {
776                         return new BooleanAttribute(name, xpath);
777                     }
778                 });
779             } else if (option.equals("-numeric")) {
780                 addXPathAttribute(option, argument, new XPathAttributeCreator(){
781                     public Attribute create(String JavaDoc name,String JavaDoc xpath) {
782                         return new NumericAttribute(name, xpath);
783                     }
784                 });
785             } else if (option.equals("-appname")) {
786                 converter.setAppName(argument);
787             }
788         }
789         
790         protected void addXPathAttribute(String JavaDoc option, String JavaDoc argument, XPathAttributeCreator creator) {
791             int comma = argument.indexOf(',');
792             if (comma < 0) {
793                 throw new IllegalArgumentException JavaDoc("Missing comma separating attribute name and xpath in " +
794                     option + " option: " + argument);
795             }
796             String JavaDoc attrName = argument.substring(0, comma);
797             String JavaDoc xpath = argument.substring(comma + 1);
798             converter.addAttribute(creator.create(attrName, xpath));
799         }
800
801         public void printUsage(PrintStream JavaDoc out) {
802             out.println("Usage: " + ConvertToARFF.class.getName() +
803                 " [options] <relation name> <output file> <findbugs results> [<findbugs results>...]");
804             super.printUsage(out);
805         }
806     }
807
808     public String JavaDoc toAppName(String JavaDoc fileName) {
809         if (appName != null)
810             return appName;
811         
812         // Remove file extension, if any
813
int lastDot = fileName.lastIndexOf('.');
814         if (lastDot >= 0)
815             fileName = fileName.substring(0, lastDot);
816         return fileName;
817     }
818
819     public static void main(String JavaDoc[] argv) throws Exception JavaDoc {
820         // Expand any option files
821
argv = CommandLine.expandOptionFiles(argv, true, true);
822
823         // Parse command line arguments
824
C2ACommandLine commandLine = new C2ACommandLine();
825         int argCount = commandLine.parse(argv);
826         if (argCount > argv.length - 3) {
827             commandLine.printUsage(System.err);
828             System.exit(1);
829         }
830         String JavaDoc relationName = argv[argCount++];
831         String JavaDoc outputFileName = argv[argCount++];
832
833         // Create the converter
834
ConvertToARFF converter = commandLine.getConverter();
835         if (converter.getNumAttributes() == 0) {
836             throw new IllegalArgumentException JavaDoc("No attributes specified!");
837         }
838
839         // Open output file
840
Writer JavaDoc out = new OutputStreamWriter JavaDoc(new BufferedOutputStream JavaDoc(
841             new FileOutputStream JavaDoc(outputFileName)));
842
843         // Read documents,
844
// scan documents to find ranges of attributes
845
List JavaDoc<DataFile> dataFileList = new ArrayList JavaDoc<DataFile>();
846         while (argCount < argv.length) {
847             String JavaDoc fileName = argv[argCount++];
848
849             // Read input file as dom4j tree
850
SAXReader reader = new SAXReader();
851             Document document = reader.read(fileName);
852
853             DataFile dataFile = new DataFile(document, converter.toAppName(fileName));
854             dataFileList.add(dataFile);
855
856             converter.scan(dataFile.getDocument(), dataFile.getAppName());
857         }
858
859         // Generate ARFF header
860
converter.generateHeader(relationName, out);
861
862         // Generate instances from each document
863
for (DataFile dataFile : dataFileList) {
864             converter.generateInstances(dataFile.getDocument(), dataFile.getAppName(), out);
865         }
866
867         out.close();
868     }
869
870 }
871
872 // vim:ts=4
873
Popular Tags