KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > util > MimetypeUtils


1 /* MimetypeUtils
2  *
3  * $Id: MimetypeUtils.java,v 1.2 2005/02/17 20:39:21 stack-sf Exp $
4  *
5  * Created on Sep 22, 2004
6  *
7  * Copyright (C) 2004 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.util;
26
27 import java.util.regex.Matcher JavaDoc;
28 import java.util.regex.Pattern JavaDoc;
29
30 /**
31  * Class of mimetype utilities.
32  * @author stack
33  */

34 public class MimetypeUtils {
35     /**
36      * The 'no-type' content-type.
37      *
38      * Defined in the ARC file spec at
39      * http://www.archive.org/web/researcher/ArcFileFormat.php.
40      */

41     public static final String JavaDoc NO_TYPE_MIMETYPE = "no-type";
42     
43     /**
44      * Truncation regex.
45      */

46     final static Pattern JavaDoc TRUNCATION_REGEX = Pattern.compile("^([^\\s;,]+).*");
47
48
49     /**
50      * Truncate passed mimetype.
51      *
52      * Ensure no spaces. Strip encoding. Truncation required by
53      * ARC files.
54      *
55      * <p>Truncate at delimiters [;, ].
56      * Truncate multi-part content type header at ';'.
57      * Apache httpclient collapses values of multiple instances of the
58      * header into one comma-separated value,therefore truncated at ','.
59      * Current ia_tools that work with arc files expect 5-column
60      * space-separated meta-lines, therefore truncate at ' '.
61      *
62      * @param contentType Raw content-type.
63      *
64      * @return Computed content-type made from passed content-type after
65      * running it through a set of rules.
66      */

67     public static String JavaDoc truncate(String JavaDoc contentType) {
68         if (contentType == null) {
69             contentType = NO_TYPE_MIMETYPE;
70         } else {
71             Matcher JavaDoc matcher = TRUNCATION_REGEX.matcher(contentType);
72             if (matcher.matches()) {
73                 contentType = matcher.group(1);
74             } else {
75                 contentType = NO_TYPE_MIMETYPE;
76             }
77         }
78
79         return contentType;
80     }
81 }
82
Popular Tags