1 2 17 18 package org.apache.poi.hwpf; 19 20 import org.apache.poi.hwpf.HWPFDocument; 21 import org.apache.poi.hwpf.usermodel.*; 22 import org.apache.poi.hwpf.model.*; 23 24 import java.io.*; 25 26 public class Word2Forrest 27 { 28 Writer _out; 29 HWPFDocument _doc; 30 31 public Word2Forrest(HWPFDocument doc, OutputStream stream) 32 throws IOException, UnsupportedEncodingException 33 { 34 OutputStreamWriter out = new OutputStreamWriter (stream, "UTF-8"); 35 _out = out; 36 _doc = doc; 37 38 init (); 39 openDocument (); 40 openBody (); 41 42 Range r = doc.getRange (); 43 StyleSheet styleSheet = doc.getStyleSheet (); 44 45 int sectionLevel = 0; 46 int lenParagraph = r.numParagraphs (); 47 boolean inCode = false; 48 for (int x = 0; x < lenParagraph; x++) 49 { 50 Paragraph p = r.getParagraph (x); 51 String text = p.text (); 52 if (text.trim ().length () == 0) 53 { 54 continue; 55 } 56 StyleDescription paragraphStyle = styleSheet.getStyleDescription (p. 57 getStyleIndex ()); 58 String styleName = paragraphStyle.getName(); 59 if (styleName.startsWith ("Heading")) 60 { 61 if (inCode) 62 { 63 closeSource(); 64 inCode = false; 65 } 66 67 int headerLevel = Integer.parseInt (styleName.substring (8)); 68 if (headerLevel > sectionLevel) 69 { 70 openSection (); 71 } 72 else 73 { 74 for (int y = 0; y < (sectionLevel - headerLevel) + 1; y++) 75 { 76 closeSection (); 77 } 78 openSection (); 79 } 80 sectionLevel = headerLevel; 81 openTitle (); 82 writePlainText (text); 83 closeTitle (); 84 } 85 else 86 { 87 int cruns = p.numCharacterRuns (); 88 CharacterRun run = p.getCharacterRun (0); 89 String fontName = run.getFontName(); 90 if (fontName.startsWith ("Courier")) 91 { 92 if (!inCode) 93 { 94 openSource (); 95 inCode = true; 96 } 97 writePlainText (p.text()); 98 } 99 else 100 { 101 if (inCode) 102 { 103 inCode = false; 104 closeSource(); 105 } 106 openParagraph(); 107 writePlainText(p.text()); 108 closeParagraph(); 109 } 110 } 111 } 112 for (int x = 0; x < sectionLevel; x++) 113 { 114 closeSection(); 115 } 116 closeBody(); 117 closeDocument(); 118 _out.flush(); 119 120 } 121 122 public void init () 123 throws IOException 124 { 125 _out.write ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n"); 126 _out.write ("<!DOCTYPE document PUBLIC \"-//APACHE//DTD Documentation V1.1//EN\" \"./dtd/document-v11.dtd\">\r\n"); 127 } 128 129 public void openDocument () 130 throws IOException 131 { 132 _out.write ("<document>\r\n"); 133 } 134 public void closeDocument () 135 throws IOException 136 { 137 _out.write ("</document>\r\n"); 138 } 139 140 141 public void openBody () 142 throws IOException 143 { 144 _out.write ("<body>\r\n"); 145 } 146 147 public void closeBody () 148 throws IOException 149 { 150 _out.write ("</body>\r\n"); 151 } 152 153 154 public void openSection () 155 throws IOException 156 { 157 _out.write ("<section>"); 158 159 } 160 161 public void closeSection () 162 throws IOException 163 { 164 _out.write ("</section>"); 165 166 } 167 168 public void openTitle () 169 throws IOException 170 { 171 _out.write ("<title>"); 172 } 173 174 public void closeTitle () 175 throws IOException 176 { 177 _out.write ("</title>"); 178 } 179 180 public void writePlainText (String text) 181 throws IOException 182 { 183 _out.write (text); 184 } 185 186 public void openParagraph () 187 throws IOException 188 { 189 _out.write ("<p>"); 190 } 191 192 public void closeParagraph () 193 throws IOException 194 { 195 _out.write ("</p>"); 196 } 197 198 public void openSource () 199 throws IOException 200 { 201 _out.write ("<source><![CDATA["); 202 } 203 public void closeSource () 204 throws IOException 205 { 206 _out.write ("]]></source>"); 207 } 208 209 210 public static void main(String [] args) 211 { 212 try 213 { 214 OutputStream out = new FileOutputStream("c:\\test.xml"); 215 216 new Word2Forrest(new HWPFDocument(new FileInputStream(args[0])), out); 217 out.close(); 218 } 219 catch (Throwable t) 220 { 221 t.printStackTrace(); 222 } 223 224 } 225 } 226 | Popular Tags |