1 25 package org.archive.util.anvl; 26 27 import java.io.ByteArrayOutputStream ; 28 import java.io.IOException ; 29 import java.io.InputStream ; 30 import java.io.UnsupportedEncodingException ; 31 import java.util.ArrayList ; 32 import java.util.Collection ; 33 import java.util.HashMap ; 34 import java.util.Iterator ; 35 import java.util.List ; 36 import java.util.Map ; 37 38 import org.archive.io.UTF8Bytes; 39 40 49 public class ANVLRecord extends ArrayList <Element> implements UTF8Bytes { 50 private static final long serialVersionUID = -4610638888453052958L; 51 52 public static final String MIMETYPE = "text/anvl"; 53 54 public static final ANVLRecord EMPTY_ANVL_RECORD = new ANVLRecord(); 55 56 60 public static final long MAXIMUM_SIZE = 1024 * 10; 61 62 66 static final String CRLF = "\r\n"; 67 68 static final String FOLD_PREFIX = CRLF + ' '; 69 70 public ANVLRecord() { 71 super(); 72 } 73 74 public ANVLRecord(Collection <? extends Element> c) { 75 super(c); 76 } 77 78 public ANVLRecord(int initialCapacity) { 79 super(initialCapacity); 80 } 81 82 public boolean addLabel(final String l) { 83 return super.add(new Element(new Label(l))); 84 } 85 86 public boolean addLabelValue(final String l, final String v) { 87 return super.add(new Element(new Label(l), new Value(v))); 88 } 89 90 @Override 91 public String toString() { 92 StringBuilder sb = new StringBuilder (); 94 for (final Iterator <Element> i = iterator(); i.hasNext();) { 95 sb.append(i.next()); 96 sb.append(CRLF); 97 } 98 sb.append(CRLF); 100 return sb.toString(); 101 } 102 103 public Map <String , String > asMap() { 104 Map <String , String > m = new HashMap <String , String >(size()); 105 for (final Iterator <Element> i = iterator(); i.hasNext();) { 106 Element e = i.next(); 107 m.put(e.getLabel().toString(), 108 e.isValue()? e.getValue().toString(): (String )null); 109 } 110 return m; 111 } 112 113 @Override 114 public ANVLRecord clone() { 115 return new ANVLRecord(this); 116 } 117 118 121 public byte [] getUTF8Bytes() 122 throws UnsupportedEncodingException { 123 return toString().getBytes(UTF8); 124 } 125 126 139 public static ANVLRecord load(final InputStream is) 140 throws IOException { 141 boolean isCRLF = false; 147 boolean recordStart = false; 148 ByteArrayOutputStream baos = new ByteArrayOutputStream (1024); 149 boolean done = false; 150 int read = 0; 151 for (int c = -1, previousCharacter; !done;) { 152 if (read++ >= MAXIMUM_SIZE) { 153 throw new IOException ("Read " + MAXIMUM_SIZE + 154 " bytes without finding \\r\\n\\r\\n " + 155 "End-Of-ANVLRecord"); 156 } 157 previousCharacter = c; 158 c = is.read(); 159 if (c == -1) { 160 throw new IOException ("End-Of-Stream before \\r\\n\\r\\n " + 161 "End-Of-ANVLRecord:\n" + 162 new String (baos.toByteArray(), UTF8)); 163 } 164 if (isLF((char)c) && isCR((char)previousCharacter)) { 165 if (isCRLF) { 166 done = true; 169 } else { 170 isCRLF = true; 171 } 172 } else if (!recordStart && Character.isWhitespace(c)) { 173 continue; 175 } else { 176 if (isCRLF && !isCR((char)c)) { 178 isCRLF = false; 179 } 180 if (!recordStart) { 182 recordStart = true; 183 } 184 } 185 baos.write(c); 186 } 187 return load(new String (baos.toByteArray(), UTF8)); 188 } 189 190 204 public static ANVLRecord load(final String s) 205 throws IOException { 206 ANVLRecord record = new ANVLRecord(); 207 boolean inValue = false, inLabel = false, inComment = false, 208 inNewLine = false; 209 String label = null; 210 StringBuilder sb = new StringBuilder (s.length()); 211 for (int i = 0; i < s.length(); i++) { 212 char c = s.charAt(i); 213 214 if ((i + 1) > s.length()) { 216 throw new IOException ("Premature End-of-ANVLRecord:\n" + 217 s.substring(i)); 218 } 219 220 if (inNewLine && isLF(c)) { 222 continue; 223 } 224 225 if (inNewLine && isCR(c) && isLF(s.charAt(i + 1))) { 227 break; 228 } 229 230 if (inNewLine && inValue && Character.isWhitespace(c)) { 233 continue; 234 } 235 236 inNewLine = isCR(c) && isLF(s.charAt(i + 1)); 238 239 if (inNewLine) { 240 if (inComment) { 241 inComment = false; 242 } else if (label != null && !inValue) { 243 record.addLabel(label); 245 label = null; 246 sb.setLength(0); 247 } else if (inValue) { 248 if ((i + 3) > s.length()) { 250 throw new IOException ("Premature End-of-ANVLRecord " 251 + "(2):\n" + s.substring(i)); 252 } 253 if (!isCR(s.charAt(i + 2)) && !isLF(s.charAt(i + 3)) 254 && Character.isWhitespace(s.charAt(i + 2))) { 255 sb.append(CRLF); 259 sb.append(' '); 260 } else { 261 record.addLabelValue(label, sb.toString()); 264 sb.setLength(0); 265 label = null; 266 inValue = false; 267 } 268 } else { 269 } 272 continue; 274 } 275 276 if (inComment) { 277 continue; 278 } else if (inLabel) { 279 if (c == Label.COLON) { 280 label = sb.toString(); 281 sb.setLength(0); 282 inLabel = false; 283 continue; 284 } 285 } else { 286 if (!inLabel && !inValue && !inComment) { 287 if (Character.isWhitespace(c)) { 289 continue; 291 } else if (label == null && c == '#') { 292 inComment = true; 293 continue; 295 } else if (label == null) { 296 inLabel = true; 297 } else { 298 inValue = true; 299 } 300 } 301 } 302 sb.append(c); 303 } 304 return record; 305 } 306 307 312 public synchronized int getLength() { 313 int length = -1; 314 try { 315 length = getUTF8Bytes().length; 316 } catch (UnsupportedEncodingException e) { 317 throw new RuntimeException (e); 318 } 319 return length; 320 } 321 322 public static boolean isCROrLF(final char c) { 323 return isCR(c) || isLF(c); 324 } 325 326 public static boolean isCR(final char c) { 327 return c == ANVLRecord.CRLF.charAt(0); 328 } 329 330 public static boolean isLF(final char c) { 331 return c == ANVLRecord.CRLF.charAt(1); 332 } 333 } | Popular Tags |