KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > au > id > jericho > lib > html > Util


1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2
// Version 2.2
3
// Copyright (C) 2006 Martin Jericho
4
// http://sourceforge.net/projects/jerichohtml/
5
//
6
// This library is free software; you can redistribute it and/or
7
// modify it under the terms of the GNU Lesser General Public
8
// License as published by the Free Software Foundation; either
9
// version 2.1 of the License, or (at your option) any later version.
10
// http://www.gnu.org/copyleft/lesser.html
11
//
12
// This library is distributed in the hope that it will be useful,
13
// but WITHOUT ANY WARRANTY; without even the implied warranty of
14
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
// Lesser General Public License for more details.
16
//
17
// You should have received a copy of the GNU Lesser General Public
18
// License along with this library; if not, write to the Free Software
19
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20

21 package au.id.jericho.lib.html;
22
23 import java.util.*;
24 import java.io.*;
25
26 /**
27  * Contains miscellaneous utility methods not directly associated with the HTML Parser library.
28  */

29 public final class Util {
30     private static final int BUFFER_SIZE=2048;
31     private static final String JavaDoc CSVNewLine=System.getProperty("line.separator");
32
33     private Util() {}
34
35     /**
36      * Returns the text loaded from the specified <code>Reader</code> as a string.
37      * <p>
38      * If a <code>null</code> argument is supplied to this method, an empty string is returned.
39      * Note that in version 2.0 a <code>null</code> argument resulted in a <code>null</code> return value.
40      * <p>
41      * To load text from an <code>InputStream</code>, use <code>getString(new InputStreamReader(inputStream))</code>.
42      *
43      * @param reader the <code>java.io.Reader</code> from which to load the text.
44      * @return the text loaded from the specified <code>java.io.Reader</code> as a string.
45      * @throws java.io.IOException if an I/O error occurs.
46      */

47     public static String JavaDoc getString(final Reader reader) throws IOException {
48         if (reader==null) return "";
49         try {
50             final BufferedReader in=new BufferedReader(reader,BUFFER_SIZE);
51             int charsRead;
52             final char[] copyBuffer=new char[BUFFER_SIZE];
53             final StringBuffer JavaDoc sb=new StringBuffer JavaDoc();
54             while ((charsRead=in.read(copyBuffer,0,BUFFER_SIZE))!=-1)
55                 sb.append(copyBuffer,0,charsRead);
56             in.close();
57             return sb.toString();
58         } finally {
59             reader.close();
60         }
61     }
62
63     /**
64      * Outputs the specified array of strings to the specified <code>Writer</code> in the format of a line for a CSV file.
65      * <p>
66      * "CSV" stands for <i>Comma Separated Values</i>.
67      * There is no formal specification for a CSV file, so there is significant variation in
68      * the way different applications handle issues like the encoding of different data types and special characters.
69      * <p>
70      * Generally, a CSV file contains a list of records separated by line breaks, with each record consisting of a list of
71      * field values separated by commas.
72      * Each record in the file should contain the same number of field values, with the values at each position representing the same
73      * type of data in all the records. In this way the file can also be divided into columns, often with the first line of the
74      * file containing the column labels.
75      * <p>
76      * Columns can have different data types such as text, numeric, date / time and boolean.
77      * A text value is often delimited with single (<code>'</code>) or double-quotes (<code>"</code>),
78      * especially if the value contains a comma, line feed, or other special character that is significant to the syntax.
79      * Encoding techniques for including quote characters themselves in text values vary widely.
80      * Values of other types are generally unquoted to distinguish them from text values.
81      * <p>
82      * This method produces output that is readable by MS-Excel, conforming to the following rules:
83      * <p>
84      * <ul>
85      * <li>All values are considered to be of type text, except for the constants {@link Config#ColumnValueTrue}
86      * and {@link Config#ColumnValueFalse}, representing the boolean values <code>true</code> and <code>false</code> respectively.
87      * <li>All text values are enclosed in double-quotes.
88      * <li>Double-quote characters contained in text values are encoded using two consecutive double-quotes (<code>""</code>).
89      * <li><code>null</code> values are represented as empty fields.
90      * <li>The end of each record is represented by a carriage-return / line-feed (CR/LF) pair.
91      * <li>Line breaks inside text values are represented by a single line feed (LF) character.
92      * </ul>
93      *
94      * @param writer the destination <code>java.io.Writer</code> for the output.
95      * @throws java.io.IOException if an I/O error occurs.
96      * @see FormFields#getColumnLabels()
97      * @see FormFields#getColumnValues(Map)
98      */

99   public static void outputCSVLine(final Writer writer, final String JavaDoc[] values) throws IOException {
100     for (int i=0; i<values.length;) {
101             final String JavaDoc value=values[i];
102         if (value!=null) {
103                 if (value==Config.ColumnValueTrue || value==Config.ColumnValueFalse) {
104                     writer.write(value); // assumes neither ColumnTrue or ColumnFalse contain double quotes.
105
} else {
106                     writer.write('"');
107                     outputValueEscapeQuotes(writer,value);
108                     writer.write('"');
109                 }
110             }
111             if (++i!=values.length) writer.write(',');
112     }
113         writer.write(CSVNewLine);
114   }
115
116   private static void outputValueEscapeQuotes(final Writer writer, final String JavaDoc text) throws IOException {
117         for (int i=0; i<text.length(); i++) {
118             final char ch=text.charAt(i);
119             writer.write(ch);
120             if (ch=='"') writer.write(ch);
121         }
122   }
123
124     // use this method until we can replace with java 1.5 StringBuffer.append(CharSequence s)
125
static StringBuffer JavaDoc appendTo(final StringBuffer JavaDoc sb, final CharSequence JavaDoc s) {
126         return appendTo(sb,s,0,s.length());
127     }
128     // use this method until we can replace with java 1.5 StringBuffer.append(CharSequence s, int begin, int end)
129
static StringBuffer JavaDoc appendTo(final StringBuffer JavaDoc sb, final CharSequence JavaDoc s, int start, final int end) {
130         while (start<end) {
131             sb.append(s.charAt(start));
132             start++;
133         }
134         return sb;
135     }
136     // use this method until we can replace with java 1.5 Writer.append(CharSequence s)
137
static Writer appendTo(final Writer writer, final CharSequence JavaDoc s) throws IOException {
138         return appendTo(writer,s,0,s.length());
139     }
140     // use this method until we can replace with java 1.5 Writer.append(CharSequence s, int begin, int end)
141
static Writer appendTo(final Writer writer, final CharSequence JavaDoc s, int start, final int end) throws IOException {
142         while (start<end) {
143             writer.write(s.charAt(start));
144             start++;
145         }
146         return writer;
147     }
148 }
149
Popular Tags