1 57 58 package dom; 59 60 import java.io.OutputStreamWriter ; 61 import java.io.PrintWriter ; 62 import java.io.UnsupportedEncodingException ; 63 64 import org.enhydra.apache.xerces.readers.MIME2Java; 65 import org.w3c.dom.Attr ; 66 import org.w3c.dom.Document ; 67 import org.w3c.dom.NamedNodeMap ; 68 import org.w3c.dom.Node ; 69 import org.w3c.dom.NodeList ; 70 71 import util.Arguments; 72 73 74 80 public class DOMWriter { 81 82 86 87 88 private static final String 89 DEFAULT_PARSER_NAME = "dom.wrappers.DOMParser"; 90 91 92 private static boolean setValidation = false; private static boolean setNameSpaces = true; 94 private static boolean setSchemaSupport = true; 95 private static boolean setSchemaFullSupport = false; 96 private static boolean setDeferredDOM = true; 97 98 99 100 104 105 private static String 106 PRINTWRITER_ENCODING = "UTF8"; 107 108 private static String MIME2JAVA_ENCODINGS[] = 109 { "Default", "UTF-8", "US-ASCII", "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", 110 "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8", "ISO-8859-9", "ISO-2022-JP", 111 "SHIFT_JIS", "EUC-JP","GB2312", "BIG5", "EUC-KR", "ISO-2022-KR", "KOI8-R", "EBCDIC-CP-US", 112 "EBCDIC-CP-CA", "EBCDIC-CP-NL", "EBCDIC-CP-DK", "EBCDIC-CP-NO", "EBCDIC-CP-FI", "EBCDIC-CP-SE", 113 "EBCDIC-CP-IT", "EBCDIC-CP-ES", "EBCDIC-CP-GB", "EBCDIC-CP-FR", "EBCDIC-CP-AR1", 114 "EBCDIC-CP-HE", "EBCDIC-CP-CH", "EBCDIC-CP-ROECE","EBCDIC-CP-YU", 115 "EBCDIC-CP-IS", "EBCDIC-CP-AR2", "UTF-16" 116 }; 117 118 119 146 147 148 protected PrintWriter out; 149 150 151 protected boolean canonical; 152 153 154 public DOMWriter(String encoding, boolean canonical) 155 throws UnsupportedEncodingException { 156 out = new PrintWriter (new OutputStreamWriter (System.out, encoding)); 157 this.canonical = canonical; 158 } 160 164 165 public DOMWriter(boolean canonical) throws UnsupportedEncodingException { 166 this( getWriterEncoding(), canonical); 167 } 168 169 public static String getWriterEncoding( ) { 170 return(PRINTWRITER_ENCODING); 171 } 173 public static void setWriterEncoding( String encoding ) { 174 if ( encoding.equalsIgnoreCase( "DEFAULT" ) ) 175 PRINTWRITER_ENCODING = "UTF8"; 176 else if ( encoding.equalsIgnoreCase( "UTF-16" ) ) 177 PRINTWRITER_ENCODING = "Unicode"; 178 else 179 PRINTWRITER_ENCODING = MIME2Java.convert( encoding ); 180 } 182 183 public static boolean isValidJavaEncoding( String encoding ) { 184 for ( int i = 0; i < MIME2JAVA_ENCODINGS.length; i++ ) 185 if ( encoding.equals( MIME2JAVA_ENCODINGS[i] ) ) 186 return(true); 187 188 return(false); 189 } 191 192 193 194 public static void print(String parserWrapperName, String uri, 195 boolean canonical ) { 196 try { 197 DOMParserWrapper parser = 198 (DOMParserWrapper)Class.forName(parserWrapperName).newInstance(); 199 200 parser.setFeature( "http://apache.org/xml/features/dom/defer-node-expansion", 201 setDeferredDOM ); 202 parser.setFeature( "http://xml.org/sax/features/validation", 203 setValidation ); 204 parser.setFeature( "http://xml.org/sax/features/namespaces", 205 setNameSpaces ); 206 parser.setFeature( "http://apache.org/xml/features/validation/schema", 207 setSchemaSupport ); 208 parser.setFeature( "http://apache.org/xml/features/validation/schema-full-checking", 209 setSchemaFullSupport ); 210 211 Document document = parser.parse(uri); 212 DOMWriter writer = new DOMWriter(canonical); 213 writer.print(document); 214 } catch ( Exception e ) { 215 } 217 218 } 220 221 222 public void print(Node node) { 223 224 if ( node == null ) { 226 return; 227 } 228 229 int type = node.getNodeType(); 230 switch ( type ) { 231 case Node.DOCUMENT_NODE: { 233 if ( !canonical ) { 234 String Encoding = getWriterEncoding(); 235 if ( Encoding.equalsIgnoreCase( "DEFAULT" ) ) 236 Encoding = "UTF-8"; 237 else if ( Encoding.equalsIgnoreCase( "Unicode" ) ) 238 Encoding = "UTF-16"; 239 else 240 Encoding = MIME2Java.reverse( Encoding ); 241 242 out.println("<?xml version=\"1.0\" encoding=\""+ 243 Encoding + "\"?>"); 244 } 245 247 NodeList children = node.getChildNodes(); 248 for ( int iChild = 0; iChild < children.getLength(); iChild++ ) { 249 print(children.item(iChild)); 250 } 251 out.flush(); 252 break; 253 } 254 255 case Node.ELEMENT_NODE: { 257 out.print('<'); 258 out.print(node.getNodeName()); 259 Attr attrs[] = sortAttributes(node.getAttributes()); 260 for ( int i = 0; i < attrs.length; i++ ) { 261 Attr attr = attrs[i]; 262 out.print(' '); 263 out.print(attr.getNodeName()); 264 out.print("=\""); 265 out.print(normalize(attr.getNodeValue())); 266 out.print('"'); 267 } 268 out.print('>'); 269 NodeList children = node.getChildNodes(); 270 if ( children != null ) { 271 int len = children.getLength(); 272 for ( int i = 0; i < len; i++ ) { 273 print(children.item(i)); 274 } 275 } 276 break; 277 } 278 279 case Node.ENTITY_REFERENCE_NODE: { 281 if ( canonical ) { 282 NodeList children = node.getChildNodes(); 283 if ( children != null ) { 284 int len = children.getLength(); 285 for ( int i = 0; i < len; i++ ) { 286 print(children.item(i)); 287 } 288 } 289 } else { 290 out.print('&'); 291 out.print(node.getNodeName()); 292 out.print(';'); 293 } 294 break; 295 } 296 297 case Node.CDATA_SECTION_NODE: { 299 if ( canonical ) { 300 out.print(normalize(node.getNodeValue())); 301 } else { 302 out.print("<![CDATA["); 303 out.print(node.getNodeValue()); 304 out.print("]]>"); 305 } 306 break; 307 } 308 309 case Node.TEXT_NODE: { 311 out.print(normalize(node.getNodeValue())); 312 break; 313 } 314 315 case Node.PROCESSING_INSTRUCTION_NODE: { 317 out.print("<?"); 318 out.print(node.getNodeName()); 319 String data = node.getNodeValue(); 320 if ( data != null && data.length() > 0 ) { 321 out.print(' '); 322 out.print(data); 323 } 324 out.println("?>"); 325 break; 326 } 327 } 328 329 if ( type == Node.ELEMENT_NODE ) { 330 out.print("</"); 331 out.print(node.getNodeName()); 332 out.print('>'); 333 } 334 335 out.flush(); 336 337 } 339 340 protected Attr [] sortAttributes(NamedNodeMap attrs) { 341 342 int len = (attrs != null) ? attrs.getLength() : 0; 343 Attr array[] = new Attr [len]; 344 for ( int i = 0; i < len; i++ ) { 345 array[i] = (Attr )attrs.item(i); 346 } 347 for ( int i = 0; i < len - 1; i++ ) { 348 String name = array[i].getNodeName(); 349 int index = i; 350 for ( int j = i + 1; j < len; j++ ) { 351 String curName = array[j].getNodeName(); 352 if ( curName.compareTo(name) < 0 ) { 353 name = curName; 354 index = j; 355 } 356 } 357 if ( index != i ) { 358 Attr temp = array[i]; 359 array[i] = array[index]; 360 array[index] = temp; 361 } 362 } 363 364 return(array); 365 366 } 368 369 373 374 public static void main(String argv[]) { 375 Arguments argopt = new Arguments(); 376 argopt.setUsage( new String [] { 377 "usage: java dom.DOMWriter (options) uri ...","", 378 "options:", 379 " -n | -N Turn on/off namespace [default=on]", 380 " -v | -V Turn on/off validation [default=off]", 381 " -s | -S Turn on/off Schema support [default=on]", 382 " -f | -F Turn on/off Schema full consraint checking [default=off]", 383 " -d | -D Turn on/off deferred DOM [default=on]", 384 " -c Canonical XML output.", 385 " -h This help screen.", 386 " -e Output Java Encoding.", 387 " Default encoding: UTF-8"} ); 388 389 390 391 if ( argv.length == 0 ) { 393 argopt.printUsage(); 394 System.exit(1); 395 } 396 397 String parserName = DEFAULT_PARSER_NAME; 399 boolean canonical = false; 400 String encoding = "UTF8"; 402 argopt.parseArgumentTokens(argv, new char[] { 'p', 'e'} ); 403 404 int c; 405 String arg = null; 406 while ( ( arg = argopt.getlistFiles() ) != null ) { 407 408 outer: 409 while ( (c = argopt.getArguments()) != -1 ){ 410 switch (c) { 411 case 'c': 412 canonical = true; 413 break; 414 case 'e': 415 encoding = argopt.getStringParameter(); 416 if ( encoding != null && isValidJavaEncoding( encoding ) ) 417 setWriterEncoding( encoding ); 418 else { 419 printValidJavaEncoding(); 420 System.exit( 1 ); 421 } 422 break; 423 case 'v': 424 setValidation = true; 425 break; 426 case 'V': 427 setValidation = false; 428 break; 429 case 'N': 430 setNameSpaces = false; 431 break; 432 case 'n': 433 setNameSpaces = true; 434 break; 435 case 'p': 436 parserName = argopt.getStringParameter(); 437 break; 438 case 'd': 439 setDeferredDOM = true; 440 break; 441 case 'D': 442 setDeferredDOM = false; 443 break; 444 case 's': 445 setSchemaSupport = true; 446 break; 447 case 'S': 448 setSchemaSupport = false; 449 break; 450 case 'f': 451 setSchemaFullSupport = true; 452 break; 453 case 'F': 454 setSchemaFullSupport = false; 455 break; 456 case '?': 457 case 'h': 458 case '-': 459 argopt.printUsage(); 460 System.exit(1); 461 break; 462 case -1: 463 break outer; 464 default: 465 break; 466 } 467 } 468 System.err.println(arg+':'); 471 print(parserName, arg, canonical ); 472 System.err.println(); 473 } 474 } 476 477 478 protected String normalize(String s) { 479 StringBuffer str = new StringBuffer (); 480 481 int len = (s != null) ? s.length() : 0; 482 for ( int i = 0; i < len; i++ ) { 483 char ch = s.charAt(i); 484 switch ( ch ) { 485 case '<': { 486 str.append("<"); 487 break; 488 } 489 case '>': { 490 str.append(">"); 491 break; 492 } 493 case '&': { 494 str.append("&"); 495 break; 496 } 497 case '"': { 498 str.append("""); 499 break; 500 } 501 case '\'': { 502 str.append("'"); 503 break; 504 } 505 case '\r': 506 case '\n': { 507 if ( canonical ) { 508 str.append("&#"); 509 str.append(Integer.toString(ch)); 510 str.append(';'); 511 break; 512 } 513 } 515 default: { 516 str.append(ch); 517 } 518 } 519 } 520 521 return(str.toString()); 522 523 } 525 526 private static void printValidJavaEncoding() { 527 System.err.println( " ENCODINGS:" ); 528 System.err.print( " " ); 529 for ( int i = 0; 530 i < MIME2JAVA_ENCODINGS.length; i++) { 531 System.err.print( MIME2JAVA_ENCODINGS[i] + " " ); 532 if ( (i % 7 ) == 0 ){ 533 System.err.println(); 534 System.err.print( " " ); 535 } 536 } 537 538 } 540 } 541 | Popular Tags |