KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > io > UriProcessingFormatter


1 /* UriProcessingFormatter.java
2  *
3  * $Id: UriProcessingFormatter.java,v 1.20 2006/01/21 03:49:44 gojomo Exp $
4  *
5  * Created on Jun 10, 2003
6  *
7  * Copyright (C) 2003 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.crawler.io;
26
27 import it.unimi.dsi.mg4j.util.MutableString;
28
29 import java.util.logging.Formatter JavaDoc;
30 import java.util.logging.LogRecord JavaDoc;
31
32 import org.archive.crawler.datamodel.CoreAttributeConstants;
33 import org.archive.crawler.datamodel.CrawlURI;
34 import org.archive.util.ArchiveUtils;
35 import org.archive.util.Base32;
36 import org.archive.util.MimetypeUtils;
37
38 /**
39  * Formatter for 'crawl.log'. Expects completed CrawlURI as parameter.
40  *
41  * @author gojomo
42  */

43 public class UriProcessingFormatter
44 extends Formatter JavaDoc implements CoreAttributeConstants {
45     private final static String JavaDoc NA = "-";
46     /**
47      * Guess at line length (URIs are assumed avg. of 128 bytes).
48      * Used to preallocated the buffer we accumulate the log line
49      * in. Hopefully we get it right most of the time and no need
50      * to enlarge except in the rare case.
51      */

52     private final static int GUESS_AT_LOG_LENGTH =
53         17 + 1 + 3 + 1 + 10 + 128 + + 1 + 10 + 1 + 128 + 1 + 10 + 1 + 3 +
54         14 + 1 + 32 + 4 + 128 + 1;
55     
56     /**
57      * Reuseable assembly buffer.
58      */

59     private final MutableString buffer =
60         new MutableString(GUESS_AT_LOG_LENGTH);
61     
62     public String JavaDoc format(LogRecord JavaDoc lr) {
63         CrawlURI curi = (CrawlURI)lr.getParameters()[0];
64         String JavaDoc length = NA;
65         String JavaDoc mime = null;
66         if (curi.isHttpTransaction()) {
67             if(curi.getContentLength() >= 0) {
68                 length = Long.toString(curi.getContentLength());
69             } else if (curi.getContentSize() > 0) {
70                 length = Long.toString(curi.getContentSize());
71             }
72             mime = curi.getContentType();
73         } else {
74             if (curi.getContentSize() > 0) {
75                 length = Long.toString(curi.getContentSize());
76             }
77             mime = curi.getContentType();
78         }
79         mime = MimetypeUtils.truncate(mime);
80
81         long time = System.currentTimeMillis();
82         String JavaDoc arcTimeAndDuration;
83         if(curi.containsKey(A_FETCH_COMPLETED_TIME)) {
84             long completedTime = curi.getLong(A_FETCH_COMPLETED_TIME);
85             long beganTime = curi.getLong(A_FETCH_BEGAN_TIME);
86             arcTimeAndDuration = ArchiveUtils.get17DigitDate(beganTime) + "+"
87                     + Long.toString(completedTime - beganTime);
88         } else {
89             arcTimeAndDuration = NA;
90         }
91
92         String JavaDoc via = curi.flattenVia();
93         
94         Object JavaDoc digest = curi.getContentDigest();
95         if (digest != null) {
96             digest = Base32.encode((byte [])digest);
97         }
98
99         String JavaDoc sourceTag = curi.containsKey(A_SOURCE_TAG)
100                 ? curi.getString(A_SOURCE_TAG)
101                 : null;
102                 
103         this.buffer.length(0);
104         return this.buffer.append(ArchiveUtils.getLog17Date(time))
105             .append(" ")
106             .append(ArchiveUtils.padTo(curi.getFetchStatus(), 5))
107             .append(" ")
108             .append(ArchiveUtils.padTo(length, 10))
109             .append(" ")
110             .append(curi.getUURI().toString())
111             .append(" ")
112             .append(checkForNull(curi.getPathFromSeed()))
113             .append(" ")
114             .append(checkForNull(via))
115             .append(" ")
116             .append(mime)
117             .append(" ")
118             .append("#")
119             // Pad threads to be 3 digits. For Igor.
120
.append(ArchiveUtils.padTo(
121                 Integer.toString(curi.getThreadNumber()), 3, '0'))
122             .append(" ")
123             .append(arcTimeAndDuration)
124             .append(" ")
125             .append(checkForNull((String JavaDoc)digest))
126             .append(" ")
127             .append(checkForNull(sourceTag))
128             .append(" ")
129             .append(checkForNull(curi.getAnnotations()))
130             .append("\n").toString();
131     }
132     
133     /**
134      * @param str String to check.
135      * @return Return passed string or <code>NA</code> if null.
136      */

137     protected String JavaDoc checkForNull(String JavaDoc str) {
138         return (str == null || str.length() <= 0)? NA: str;
139     }
140 }
141
142
143
Popular Tags