KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > parse > ParserChecker


1 /* Copyright (c) 2004 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.parse;
5
6 import net.nutch.util.LogFormatter;
7
8 import net.nutch.protocol.ProtocolFactory;
9 import net.nutch.protocol.Protocol;
10 import net.nutch.protocol.Content;
11
12 import java.util.logging.Logger JavaDoc;
13
14 /**
15  * Parser checker, useful for testing parser.
16  *
17  * @author John Xing
18  */

19
20 public class ParserChecker {
21
22   public static final Logger JavaDoc LOG =
23     LogFormatter.getLogger("net.nutch.parse.ParserChecker");
24
25   public ParserChecker() {}
26
27   public static void main(String JavaDoc[] args) throws Exception JavaDoc {
28     boolean dumpText = false;
29     boolean force = false;
30     String JavaDoc contentType = null;
31     String JavaDoc url = null;
32
33     String JavaDoc usage = "Usage: ParserChecker [-dumpText] [-forceAs mimeType] url";
34
35     if (args.length == 0) {
36       System.err.println(usage);
37       System.exit(-1);
38     }
39
40     for (int i = 0; i < args.length; i++) {
41       if (args[i].equals("-forceAs")) {
42         force = true;
43         contentType = args[++i];
44       } else if (args[i].equals("-dumpText")) {
45         dumpText = true;
46       } else if (i != args.length-1) {
47         System.err.println(usage);
48         System.exit(-1);
49       } else {
50         url = args[i];
51       }
52     }
53
54     LOG.info("fetching: "+url);
55
56     Protocol protocol = ProtocolFactory.getProtocol(url);
57     Content content = protocol.getContent(url);
58
59     if (force) {
60       content.setContentType(contentType);
61     } else {
62       contentType = content.getContentType();
63     }
64
65     if (contentType == null) {
66       System.err.println("");
67       System.exit(-1);
68     }
69
70     LOG.info("parsing: "+url);
71     LOG.info("contentType: "+contentType);
72
73     Parser parser = ParserFactory.getParser(contentType, url);
74     Parse parse = parser.getParse(content);
75
76     System.out.print("---------\nParseData\n---------\n");
77     System.out.print(parse.getData().toString());
78     if (dumpText) {
79       System.out.print("---------\nParseText\n---------\n");
80       System.out.print(parse.getText());
81     }
82
83     System.exit(0);
84   }
85 }
86
Popular Tags