1 37 38 package org.htmlcleaner; 39 40 import java.io.File ; 41 import java.io.IOException ; 42 import java.net.URL ; 43 44 50 public class CommandLine { 51 52 private static String getArgValue(String [] args, String name) { 53 for (int i = 0; i < args.length; i++) { 54 String curr = args[i]; 55 int eqIndex = curr.indexOf('='); 56 if (eqIndex >= 0) { 57 String argName = curr.substring(0, eqIndex).trim(); 58 String argValue = curr.substring(eqIndex+1).trim(); 59 60 if (argName.toLowerCase().startsWith(name.toLowerCase())) { 61 return argValue; 62 } 63 } 64 } 65 66 return ""; 67 } 68 69 private static boolean toBoolean(String s) { 70 return s != null && ( "on".equalsIgnoreCase(s) || "true".equalsIgnoreCase(s) || "yes".equalsIgnoreCase(s) ); 71 } 72 73 public static void main(String [] args) throws IOException { 74 String source = getArgValue(args, "src"); 75 if ( "".equals(source) ) { 76 System.err.println("Usage: java -jar htmlcleanerXX.jar src = <url | file> [incharset = <charset>] [dest = <file>] [outcharset = <charset>] [options...]"); 77 System.err.println(""); 78 System.err.println("where options include:"); 79 System.err.println(" outputtype = simple | compact | pretty"); 80 System.err.println(" advancedxmlescape = true | false"); 81 System.err.println(" usecdata = true | false"); 82 System.err.println(" specialentities = true | false"); 83 System.err.println(" unicodechars = true | false"); 84 System.err.println(" omitunknowntags = true | false"); 85 System.err.println(" omitdeprtags = true | false"); 86 System.err.println(" omitcomments = true | false"); 87 System.err.println(" omitxmldecl = true | false"); 88 System.err.println(" omitdoctypedecl = true | false"); 89 System.err.println(" omitxmlnsatt = true | false"); 90 System.err.println(" hyphenreplacement = <string value>"); 91 System.exit(1); 92 } 93 94 String inCharset = getArgValue(args, "incharset"); 95 if ("".equals(inCharset)) { 96 inCharset = HtmlCleaner.DEFAULT_CHARSET; 97 } 98 99 String outCharset = getArgValue(args, "outcharset"); 100 if ("".equals(outCharset)) { 101 outCharset = HtmlCleaner.DEFAULT_CHARSET; 102 } 103 104 String destination = getArgValue(args, "dest"); 105 String outputType = getArgValue(args, "outputtype"); 106 String advancedXmlEscape = getArgValue(args, "advancedxmlescape"); 107 String useCData = getArgValue(args, "usecdata"); 108 String translateSpecialEntities = getArgValue(args, "specialentities"); 109 String unicodeChars = getArgValue(args, "unicodechars"); 110 String omitUnknownTags = getArgValue(args, "omitunknowntags"); 111 String omitDeprecatedTags = getArgValue(args, "omitdeprtags"); 112 String omitComments = getArgValue(args, "omitcomments"); 113 String omitXmlDeclaration = getArgValue(args, "omitxmldecl"); 114 String omitDoctypeDeclaration = getArgValue(args, "omitdoctypedecl"); 115 String omitXmlnsAttributes = getArgValue(args, "omitxmlnsatt"); 116 String commentHyphen = getArgValue(args, "hyphenreplacement"); 117 118 HtmlCleaner cleaner = null; 119 120 String src = source.toLowerCase(); 121 if ( src.startsWith("http://") || src.startsWith("https://") ) { 122 cleaner = new HtmlCleaner(new URL (src), inCharset); 123 } else { 124 cleaner = new HtmlCleaner(new File (src), inCharset); 125 } 126 127 if ( !"".equals(omitUnknownTags) ) { 128 cleaner.setOmitUnknownTags( toBoolean(omitUnknownTags) ); 129 } 130 131 if ( !"".equals(omitDeprecatedTags) ) { 132 cleaner.setOmitDeprecatedTags( toBoolean(omitDeprecatedTags) ); 133 } 134 135 if ( !"".equals(advancedXmlEscape) ) { 136 cleaner.setAdvancedXmlEscape( toBoolean(advancedXmlEscape) ); 137 } 138 139 if ( !"".equals(useCData) ) { 140 cleaner.setUseCdataForScriptAndStyle( toBoolean(useCData) ); 141 } 142 143 if ( !"".equals(translateSpecialEntities) ) { 144 cleaner.setTranslateSpecialEntities( toBoolean(translateSpecialEntities) ); 145 } 146 147 if ( !"".equals(unicodeChars) ) { 148 cleaner.setRecognizeUnicodeChars( toBoolean(unicodeChars) ); 149 } 150 151 if ( !"".equals(omitComments) ) { 152 cleaner.setOmitComments( toBoolean(omitComments) ); 153 } 154 155 if ( !"".equals(omitXmlDeclaration) ) { 156 cleaner.setOmitXmlDeclaration( toBoolean(omitXmlDeclaration) ); 157 } 158 159 if ( !"".equals(omitDoctypeDeclaration) ) { 160 cleaner.setOmitDoctypeDeclaration( toBoolean(omitDoctypeDeclaration) ); 161 } 162 163 if ( !"".equals(omitXmlnsAttributes) ) { 164 cleaner.setOmitXmlnsAttributes( toBoolean(omitXmlnsAttributes) ); 165 } 166 167 if ( !"".equals(commentHyphen) ) { 168 cleaner.setHyphenReplacementInComment(commentHyphen); 169 } 170 171 cleaner.clean(); 172 173 if ( "".equals(destination) ) { 174 if ( "compact".equals(outputType) ) { 175 cleaner.writeCompactXmlToStream(System.out, outCharset); 176 } else if ( "pretty".equals(outputType) ) { 177 cleaner.writePrettyXmlToStream(System.out, outCharset); 178 } else { 179 cleaner.writeXmlToStream(System.out, outCharset); 180 } 181 } else { 182 if ( "compact".equals(outputType) ) { 183 cleaner.writeCompactXmlToFile(destination, outCharset); 184 } else if ( "pretty".equals(outputType) ) { 185 cleaner.writePrettyXmlToFile(destination, outCharset); 186 } else { 187 cleaner.writeXmlToFile(destination, outCharset); 188 } 189 } 190 } 191 192 } | Popular Tags |