KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > cofax > util > WebToXML


1 /*
2  * WebToXML is part of the Cofax content management sytem library.
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  *
18  * Please see http://www.cofax.org for contact information and other
19  * related informaion.
20  *
21  * $Header: /cvsroot/cofax/cofax/src/org/cofax/util/WebToXML.java,v 1.2.2.1 2006/12/11 16:30:01 fxrobin Exp $
22  */

23
24 package org.cofax.util;
25
26 import java.net.*;
27 import java.io.*;
28 import java.util.*;
29 import java.text.*;
30 import org.apache.oro.text.perl.Perl5Util;
31 import org.cofax.WysiwygTemplate;
32 import org.cofax.XMLConfig;
33
34 class WebToXML {
35
36     /**
37      * September, 2000 performs an HTTP get, then stuffs the document into a
38      * template takes an XML configuration file as its one argument modified
39      * 12/12/2000 to take hard-coded date from template or insert current date
40      * modified 4/2001 to deal with tilde chars
41      *
42      * @author Philip Ravenscroft philip@infosculpture.com
43      * @version Version 0.3
44      */

45
46     static String JavaDoc urlToGet;
47
48     static String JavaDoc fileToWrite;
49
50     static String JavaDoc templateFilename;
51
52     static String JavaDoc articleFilename;
53
54     static String JavaDoc section;
55
56     static String JavaDoc pubName;
57
58     static String JavaDoc noVersioning;
59
60     static String JavaDoc disableIndex;
61
62     public static void main(String JavaDoc[] args) {
63
64         /**
65          * Main method-- control structure
66          *
67          * @param args
68          * String array of command-line arguments
69          * @return void
70          *
71          */

72
73         if (args.length != 1) {
74             System.err.println("Usage: WebToXML configFilename");
75         } else {
76             String JavaDoc xmlConfigFilename = args[0];
77             readConfigFile(xmlConfigFilename);
78             String JavaDoc results = "";
79             try {
80                 results = getURL(urlToGet);
81                 String JavaDoc xmlToWrite = encodeAsXML(results, templateFilename);
82                 writeToFile(xmlToWrite, fileToWrite);
83                 System.out.println("ok, wrote to: " + fileToWrite);
84             } catch (HTTPNotOKException ex) {
85                 System.err.println("Foreign server didn't respond correctly");
86                 System.err.println("exiting...");
87             }
88
89         }
90     }
91
92     public static String JavaDoc getURL(String JavaDoc URLToGet) throws HTTPNotOKException {
93         /**
94          * Gets a given URL using HTTP and returns the document's contents
95          *
96          * @param String
97          * URLToGet the URL you want to get
98          * @return String document contents
99          * @exception HTTPNotOKException
100          * The server did not return 200 status code
101          */

102         String JavaDoc pageContents = "";
103         URL url = null;
104         HttpURLConnection connection = null;
105         int responseCode = 0;
106         try {
107             url = new URL(URLToGet);
108             connection = (HttpURLConnection) url.openConnection();
109             responseCode = connection.getResponseCode();
110         } catch (MalformedURLException ex) {
111             System.err.println("Error: Malformed URL");
112         } catch (IOException ex) {
113             System.err.println("Error: IO Exception");
114         }
115
116         // check to see if responded with 200:
117
if (responseCode != 200) {
118             throw new HTTPNotOKException(responseCode);
119         } else {
120             // read into a buffer
121
try {
122                 BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
123                 String JavaDoc inputLine;
124                 while ((inputLine = in.readLine()) != null) {
125                     pageContents += inputLine;
126                 }
127             } catch (IOException ex) {
128                 System.err.println("Error: IO Exception");
129             }
130         }
131         return pageContents;
132
133     }
134
135     public static String JavaDoc encodeAsXML(String JavaDoc stringToEncode, String JavaDoc xmlTemplateFilename) {
136
137         /**
138          * takes a string, coverts special characters using convertSpecialChars
139          * and puts it into the `body` of a template, which it reads from disk.
140          * Calls cofax's WysiwigTemplate to accomplish this
141          *
142          * @param string
143          * ToEncode, xmlTemplateFilename
144          * @return String encoded XML
145          */

146
147         String JavaDoc escapedStringToEncode = convertSpecialChars(stringToEncode);
148
149         File inputFile = new File(xmlTemplateFilename);
150         BufferedReader in = null;
151         try {
152             in = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile)));
153         } catch (FileNotFoundException ex) {
154             System.out.println("Error: Template file not found");
155         }
156
157         String JavaDoc inputLine;
158         String JavaDoc fileContents = ""; // can we make this a StringBuffer?
159
try {
160             while ((inputLine = in.readLine()) != null) {
161                 fileContents += inputLine + "\n";
162             }
163         } catch (IOException ex) {
164             System.err.println("Error: IOException found trying to read template file");
165         }
166         StringBuffer JavaDoc fileContentsStringBuffer = new StringBuffer JavaDoc(fileContents);
167
168         HashMap glossary = new HashMap();
169         glossary.put("date", returnDate());
170         glossary.put("pubName", pubName);
171         glossary.put("section", section);
172         glossary.put("filename", articleFilename);
173         glossary.put("body", escapedStringToEncode);
174         glossary.put("noVersioning", noVersioning);
175         glossary.put("disableIndex", disableIndex);
176
177         WysiwygTemplate template = new WysiwygTemplate();
178         String JavaDoc completeXML = template.applyTemplate(fileContentsStringBuffer, glossary);
179         return completeXML;
180     }
181
182     public static void writeToFile(String JavaDoc stringToWrite, String JavaDoc fileName) {
183
184         /**
185          * Writes a string to a file
186          *
187          * @param string
188          * to write, filename to write to
189          * @return nothing
190          */

191
192         FileOutputStream out; // declare a file output object
193
PrintStream p; // declare a print stream object
194
try {
195             // Create a new file output stream
196
// connected to "myfile.txt"
197
out = new FileOutputStream(fileName);
198             // Connect print stream to the output stream
199
p = new PrintStream(out);
200             p.println(stringToWrite);
201             p.close();
202         } catch (Exception JavaDoc e) {
203             System.err.println("Error writing to file");
204         }
205     }
206
207     public static String JavaDoc convertSpecialChars(String JavaDoc linesToParse) {
208
209         /**
210          * mangled Java version of Toan Dang's Perl subroutine
211          *
212          * @param String
213          * to parse
214          * @return parsed string
215          */

216
217         Perl5Util util = new Perl5Util();
218
219         linesToParse = util.substitute("s/\n+/\n/g", linesToParse);
220         linesToParse = util.substitute("s/\n/<br><br>\n/g", linesToParse);
221         linesToParse = util.substitute("s/\021//g", linesToParse);
222         linesToParse = util.substitute("s/\252 *//g", linesToParse);
223         linesToParse = util.substitute("s/\317/-/g", linesToParse);
224         linesToParse = util.substitute("s/\376//g", linesToParse);
225         linesToParse = util.substitute("s/\004/<li>/g", linesToParse);
226
227         // convert html tags to entities:
228
linesToParse = util.substitute("s/</&lt;/g", linesToParse);
229         linesToParse = util.substitute("s/>/&gt;/g", linesToParse);
230
231         // convert control chars
232
linesToParse = util.substitute("s/\\^C\\^D/ /g", linesToParse);
233         linesToParse = util.substitute("s/\\^G//g", linesToParse);
234         linesToParse = util.substitute("s/\\\\//g", linesToParse);
235
236         // these don't work b/c the "e" modifier doesn't work:
237
// linesToParse = util.substitute("s/([\200-\400])/encodeForXml($1)/eg",
238
// linesToParse);
239
// linesToParse =
240
// util.substitute("s/([\000-\007\013-\014\016-\037])/encodeForXML($1)/g",
241
// linesToParse);
242
// these don't work b/c of double quotes:
243

244         linesToParse = util.substitute("s/\\'\\'/\"/g", linesToParse);
245         linesToParse = util.substitute("s/\\`/\\'/g", linesToParse);
246         linesToParse = util.substitute("s/\"/&quot;/g", linesToParse);
247
248         // change ampersands to "&amp;"
249
linesToParse = util.substitute("s/&/&amp;/g", linesToParse);
250         // change text entities back:
251
linesToParse = util.substitute("s/&amp;([a-zA-Z0-9]*;)/&$1/g", linesToParse);
252         // except for "&nbsp;", which XML doesn't like:
253
linesToParse = util.substitute("s/&nbsp;/&amp;nbsp;/g", linesToParse);
254         // and "&ntilde; to &#241;"
255
linesToParse = util.substitute("s/&ntilde;/&#241;/g", linesToParse);
256
257         return linesToParse;
258
259     }
260
261     public static String JavaDoc returnDate() {
262
263         /**
264          * simple method that returns the date in the format we like it
265          *
266          * @param none
267          * @return String date in yyyy-MM-dd format suitable for insertion into
268          * db
269          */

270
271         Date now = new Date();
272         SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
273         String JavaDoc formattedDate = dateFormat.format(now);
274         return formattedDate;
275     }
276
277     public static void readConfigFile(String JavaDoc configFilename) {
278
279         /*
280          * reads an xml configuration file and sets the following global
281          * variables: urlToGet, fileToWrite templateFileName, filename, section,
282          * pubName @param Configuration file name @return nothing
283          */

284
285         boolean didload = false;
286         try {
287             XMLConfig configFile = new XMLConfig();
288             configFile.setXMLFileName(configFilename);
289
290             didload = configFile.load();
291             // debug code to tell if page loaded
292
if (!didload) {
293                 System.err.println("Configuration file didn't load");
294                 System.err.println(configFile.getLastError());
295             }
296             // end debug code
297

298             // SECURITY: TO DO:
299
// Ensure that a valid template path is specified.
300
// Consider giving a warning if the template path is
301
// "/", "c:", "d:/", etc.
302

303             urlToGet = configFile.getString("urlToGet");
304             fileToWrite = configFile.getString("fileToWrite");
305             templateFilename = configFile.getString("templateFilename");
306             articleFilename = configFile.getString("articleFilename");
307             section = configFile.getString("section");
308             pubName = configFile.getString("pubName");
309             noVersioning = configFile.getString("noVersioning");
310             disableIndex = configFile.getString("disableIndex");
311
312         } catch (Exception JavaDoc e) {
313
314             System.err.println("Error reading configuration:");
315             e.printStackTrace(System.err);
316
317         }
318     }
319 }
320
321 class HTTPNotOKException extends Exception JavaDoc {
322     HTTPNotOKException(int responseCode) {
323         super("HTTP 200 not returned: server responded with " + responseCode);
324     }
325 }
326
Popular Tags