1 21 package au.id.jericho.lib.html; 22 23 import au.id.jericho.lib.html.nodoc.*; 24 import java.util.*; 25 import java.io.*; 26 27 53 public final class Attributes extends SequentialListSegment { 54 private final LinkedList attributeList; 56 private static final int AFTER_TAG_NAME=0; 58 private static final int BETWEEN_ATTRIBUTES=1; 59 private static final int IN_NAME=2; 60 private static final int AFTER_NAME=3; private static final int START_VALUE=4; 62 private static final int IN_VALUE=5; 63 private static final int AFTER_VALUE_FINAL_QUOTE=6; 64 65 private static int defaultMaxErrorCount=2; 67 private Attributes(final Source source, final int begin, final int end, final LinkedList attributeList) { 68 super(source,begin,end); 69 this.attributeList=attributeList; 70 } 71 72 73 static Attributes construct(final Source source, final int startTagBegin, final StartTagType startTagType, final String tagName) { 74 return construct(source,"StartTag",AFTER_TAG_NAME,startTagBegin,-1,-1,startTagType,tagName,defaultMaxErrorCount); 75 } 76 77 78 static Attributes construct(final Source source, final int startTagBegin, final int attributesBegin, final int maxEnd, final StartTagType startTagType, final String tagName, final int maxErrorCount) { 79 return construct(source,"Attributes for StartTag",BETWEEN_ATTRIBUTES,startTagBegin,attributesBegin,maxEnd,startTagType,tagName,maxErrorCount); 80 } 81 82 83 static Attributes construct(final Source source, final int begin, final int maxEnd, final int maxErrorCount) { 84 return construct(source,"Attributes",BETWEEN_ATTRIBUTES,begin,-1,maxEnd,StartTagType.NORMAL,null,maxErrorCount); 85 } 86 87 100 private static Attributes construct(final Source source, final String logType, int state, final int logBegin, int attributesBegin, final int maxEnd, final StartTagType startTagType, final String tagName, final int maxErrorCount) { 101 boolean isClosingSlashIgnored=false; 102 if (tagName!=null) { 103 if (attributesBegin==-1) attributesBegin=logBegin+1+tagName.length(); 105 if (startTagType==StartTagType.NORMAL && HTMLElements.isClosingSlashIgnored(tagName)) isClosingSlashIgnored=true; 106 } else { 107 attributesBegin=logBegin; 108 } 109 int attributesEnd=attributesBegin; 110 final LinkedList attributeList=new LinkedList(); 111 final ParseText parseText=source.getParseText(); 112 int i=attributesBegin; 113 char quote=' '; 114 Segment nameSegment=null; 115 String key=null; 116 int currentBegin=-1; 117 boolean isTerminatingCharacter=false; 118 int errorCount=0; 119 try { 120 while (!isTerminatingCharacter) { 121 if (i==maxEnd || startTagType.atEndOfAttributes(source,i,isClosingSlashIgnored)) isTerminatingCharacter=true; 122 final char ch=parseText.charAt(i); 123 switch (state) { 124 case IN_VALUE: 125 if (isTerminatingCharacter || ch==quote || (quote==' ' && isWhiteSpace(ch))) { 126 Segment valueSegment; 127 Segment valueSegmentIncludingQuotes; 128 if (quote==' ') { 129 valueSegment=valueSegmentIncludingQuotes=new Segment(source,currentBegin,i); 130 } else { 131 if (isTerminatingCharacter) { 132 if (i==maxEnd) { 133 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"terminated in the middle of a quoted attribute value",i); 134 if (reachedMaxErrorCount(++errorCount,source,logType,tagName,logBegin,maxErrorCount)) return null; 135 valueSegment=new Segment(source,currentBegin,i); 136 valueSegmentIncludingQuotes=new Segment(source,currentBegin-1,i); } else { 138 isTerminatingCharacter=false; 140 break; 141 } 142 } else { 143 valueSegment=new Segment(source,currentBegin,i); 144 valueSegmentIncludingQuotes=new Segment(source,currentBegin-1,i+1); 145 } 146 } 147 attributeList.add(new Attribute(source, key, nameSegment, valueSegment, valueSegmentIncludingQuotes)); 148 attributesEnd=valueSegmentIncludingQuotes.getEnd(); 149 state=BETWEEN_ATTRIBUTES; 150 } else if (ch=='<' && quote==' ') { 151 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"rejected because of '<' character in unquoted attribute value",i); 152 return null; 153 } 154 break; 155 case IN_NAME: 156 if (isTerminatingCharacter || ch=='=' || isWhiteSpace(ch)) { 157 nameSegment=new Segment(source,currentBegin,i); 158 key=nameSegment.toString().toLowerCase(); 159 if (isTerminatingCharacter) { 160 attributeList.add(new Attribute(source,key,nameSegment)); attributesEnd=i; 162 } else { 163 state=(ch=='=' ? START_VALUE : AFTER_NAME); 164 } 165 } else if (!Tag.isXMLNameChar(ch)) { 166 if (ch=='<') { 169 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"rejected because of '<' character in attribute name",i); 170 return null; 171 } 172 if (isInvalidEmptyElementTag(startTagType,source,i,logType,tagName,logBegin)) break; 173 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"contains attribute name with invalid character",i); 174 if (reachedMaxErrorCount(++errorCount,source,logType,tagName,logBegin,maxErrorCount)) return null; 175 } 176 break; 177 case AFTER_NAME: 178 if (isTerminatingCharacter || !(ch=='=' || isWhiteSpace(ch))) { 180 attributeList.add(new Attribute(source,key,nameSegment)); attributesEnd=nameSegment.getEnd(); 182 if (isTerminatingCharacter) break; 183 state=BETWEEN_ATTRIBUTES; 185 i--; } else if (ch=='=') { 187 state=START_VALUE; 188 } 189 break; 190 case BETWEEN_ATTRIBUTES: 191 if (!isTerminatingCharacter) { 192 if (isWhiteSpace(ch)) { 194 quote=' '; 195 } else { 196 if (quote!=' ') { 197 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"has missing whitespace after quoted attribute value",i); 198 } 200 if (!Tag.isXMLNameStartChar(ch)) { 201 if (ch=='<') { 204 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"rejected because of '<' character",i); 205 return null; 206 } 207 if (isInvalidEmptyElementTag(startTagType,source,i,logType,tagName,logBegin)) break; 208 if (startTagType==StartTagType.NORMAL && startTagType.atEndOfAttributes(source,i,false)) { 209 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"contains a '/' character before the closing '>', which is ignored because tags of this name cannot be empty-element tags"); 211 break; 212 } 213 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"contains attribute name with invalid first character",i); 214 if (reachedMaxErrorCount(++errorCount,source,logType,tagName,logBegin,maxErrorCount)) return null; 215 } 216 state=IN_NAME; 217 currentBegin=i; 218 } 219 } 220 break; 221 case START_VALUE: 222 currentBegin=i; 223 if (isTerminatingCharacter) { 224 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"has missing attribute value after '=' sign",i); 225 final Segment valueSegment=new Segment(source,i,i); 227 attributeList.add(new Attribute(source,key,nameSegment,valueSegment,valueSegment)); 228 attributesEnd=i; 229 state=BETWEEN_ATTRIBUTES; 230 break; 231 } 232 if (isWhiteSpace(ch)) break; if (ch=='<') { 234 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"rejected because of '<' character at start of attribuite value",i); 235 return null; 236 } else if (ch=='\'' || ch=='"') { 237 quote=ch; 238 currentBegin++; 239 } else { 240 quote=' '; 241 } 242 state=IN_VALUE; 243 break; 244 case AFTER_TAG_NAME: 245 if (!isTerminatingCharacter) { 246 if (!isWhiteSpace(ch)) { 247 if (isInvalidEmptyElementTag(startTagType,source,i,logType,tagName,logBegin)) break; 248 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"rejected because name contains invalid character",i); 249 return null; 250 } 251 state=BETWEEN_ATTRIBUTES; 252 } 253 break; 254 } 255 i++; 256 } 257 return new Attributes(source,attributesBegin,attributesEnd,attributeList); } catch (IndexOutOfBoundsException ex) { 259 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"rejected because it has no closing '>' character"); 260 return null; 261 } 262 } 263 264 private static boolean reachedMaxErrorCount(final int errorCount, final Source source, final String logType, final String tagName, final int logBegin, final int maxErrorCount) { 265 if (errorCount<=maxErrorCount) return false; 266 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"rejected because it contains too many errors"); 267 return true; 268 } 269 270 private static boolean isInvalidEmptyElementTag(final StartTagType startTagType, final Source source, final int i, final String logType, final String tagName, final int logBegin) { 271 if (startTagType!=StartTagType.NORMAL || !startTagType.atEndOfAttributes(source,i,false)) return false; 273 if (source.isLoggingEnabled()) log(source,logType,tagName,logBegin,"contains a '/' character before the closing '>', which is ignored because tags of this name cannot be empty-element tags"); 274 return true; 275 } 276 277 287 public Attribute get(final String name) { 288 if (size()==0) return null; 289 for (int i=0; i<size(); i++) { 290 final Attribute attribute=(Attribute)get(i); 291 if (attribute.getKey().equalsIgnoreCase(name)) return attribute; 292 } 293 return null; 294 } 295 296 310 public String getValue(final String name) { 311 final Attribute attribute=get(name); 312 return attribute==null ? null : attribute.getValue(); 313 } 314 315 322 String getRawValue(final String name) { 323 final Attribute attribute=get(name); 324 return attribute==null || !attribute.hasValue() ? null : attribute.getValueSegment().toString(); 325 } 326 327 334 public int getCount() { 335 return attributeList.size(); 336 } 337 338 342 public Iterator iterator() { 343 return listIterator(); 344 } 345 346 361 public ListIterator listIterator(final int index) { 362 return attributeList.listIterator(index); 363 } 364 365 382 public Map populateMap(final Map attributesMap, final boolean convertNamesToLowerCase) { 383 for (final Iterator i=listIterator(0); i.hasNext();) { 384 final Attribute attribute=(Attribute)i.next(); 385 attributesMap.put(convertNamesToLowerCase ? attribute.getKey() : attribute.getName(),attribute.getValue()); 386 } 387 return attributesMap; 388 } 389 390 394 public String getDebugInfo() { 395 final StringBuffer sb=new StringBuffer (); 396 sb.append("Attributes ").append(super.getDebugInfo()).append(": "); 397 if (isEmpty()) { 398 sb.append("EMPTY"); 399 } else { 400 sb.append('\n'); 401 for (final Iterator i=listIterator(0); i.hasNext();) { 402 Attribute attribute=(Attribute)i.next(); 403 sb.append(" ").append(attribute.getDebugInfo()); 404 } 405 } 406 return sb.toString(); 407 } 408 409 448 public static int getDefaultMaxErrorCount() { 449 return defaultMaxErrorCount; 450 } 451 452 459 public static void setDefaultMaxErrorCount(final int value) { 460 defaultMaxErrorCount=value; 461 } 462 463 477 public static String generateHTML(final Map attributesMap) { 478 final StringWriter stringWriter=new StringWriter(); 479 try {appendHTML(stringWriter,attributesMap);} catch (IOException ex) {} return stringWriter.toString(); 481 } 482 483 492 public List getList() { 493 return this; 494 } 495 496 507 static void appendHTML(final Writer writer, final Map attributesMap) throws IOException { 508 for (final Iterator i=attributesMap.entrySet().iterator(); i.hasNext();) { 509 final Map.Entry entry=(Map.Entry)i.next(); 510 Attribute.appendHTML(writer,(String )entry.getKey(),(CharSequence )entry.getValue()); 511 } 512 } 513 514 StringBuffer appendTidy(final StringBuffer sb, Tag nextTag) { 515 for (final Iterator i=listIterator(0); i.hasNext();) 516 nextTag=((Attribute)i.next()).appendTidy(sb,nextTag); 517 return sb; 518 } 519 520 Map getMap(final boolean convertNamesToLowerCase) { 521 return populateMap(new LinkedHashMap(getCount()*2,1.0F),convertNamesToLowerCase); 522 } 523 524 private static void log(final Source source, final String part1, final CharSequence part2, final int begin, final String part3, final int pos) { 525 source.log(source.getRowColumnVector(pos).appendTo(source.getRowColumnVector(begin).appendTo(new StringBuffer (200).append(part1).append(' ').append(part2).append(" at ")).append(' ').append(part3).append(" at position ")).toString()); 526 } 527 528 private static void log(final Source source, final String part1, final CharSequence part2, final int begin, final String part3) { 529 source.log(source.getRowColumnVector(begin).appendTo(new StringBuffer (200).append(part1).append(' ').append(part2).append(" at ")).append(' ').append(part3).toString()); 530 } 531 } 532 | Popular Tags |