1 19 20 33 34 package org.htmlparser; 35 36 import java.io.BufferedReader ; 40 import java.io.IOException ; 41 import java.io.PrintWriter ; 42 import java.io.Reader ; 43 import java.io.StringWriter ; 44 45 import org.htmlparser.parserHelper.StringParser; 46 import org.htmlparser.scanners.TagScanner; 47 import org.htmlparser.tags.EndTag; 48 import org.htmlparser.tags.Tag; 49 import org.htmlparser.util.NodeList; 50 import org.htmlparser.util.ParserException; 51 52 56 public class NodeReader extends BufferedReader 57 { 58 public static final String DECIPHER_ERROR = 59 "NodeReader.readElement() : Error occurred while trying to decipher the tag using scanners"; 60 protected int posInLine = -1; 61 protected String line; 62 protected Node node = null; 63 protected TagScanner previousOpenScanner = null; 64 protected String url; 65 private Parser parser; 66 private int lineCount; 67 private String previousLine; 68 private StringParser stringParser = new StringParser(); 69 private RemarkNodeParser remarkNodeParser = new RemarkNodeParser(); 70 private NodeList nextParsedNode = new NodeList(); 71 private boolean dontReadNextLine = false; 72 75 public NodeReader(Reader in, int len, String url) 76 { 77 super(in, len); 78 this.url = url; 79 this.parser = null; 80 this.lineCount = 1; 81 } 82 88 89 public NodeReader(Reader in, int len) 90 { 91 this(in, len, ""); 92 } 93 98 public NodeReader(Reader in, String url) 99 { 100 this(in, 8192, url); 101 } 102 103 107 public String getURL() 108 { 109 return (url); 110 } 111 112 122 public void changeLine(String line) 123 { 124 this.line = line; 125 } 126 public String getCurrentLine() 127 { 128 return line; 129 } 130 134 public int getLastLineNumber() 135 { 136 return lineCount - 1; 137 } 138 139 144 public int getLastReadPosition() 145 { 146 if (node != null) 147 return node.elementEnd(); 148 else 149 return 0; 150 } 151 152 156 public String getNextLine() 157 { 158 try 159 { 160 previousLine = line; 161 line = readLine(); 162 if (line != null) 163 lineCount++; 164 posInLine = 0; 165 return line; 166 } 167 catch (IOException e) 168 { 169 System.err.println("I/O Exception occurred while reading!"); 170 } 171 return null; 172 } 173 177 public Parser getParser() 178 { 179 return parser; 180 } 181 185 public TagScanner getPreviousOpenScanner() 186 { 187 return previousOpenScanner; 188 } 189 190 197 private boolean beginTag(String line, int pos) 198 { 199 char ch; 200 boolean ret; 201 202 ret = false; 203 204 if (pos + 2 <= line.length()) 205 if ('<' == line.charAt(pos)) 206 { 207 ch = line.charAt(pos + 1); 208 if ('/' == ch 210 || '%' == ch 211 || Character.isLetter(ch) 212 || '!' == ch) 213 ret = true; 214 } 215 216 return (ret); 217 } 218 219 223 public Node readElement() throws ParserException 224 { 225 return (readElement(false)); 226 } 227 228 235 public Node readElement(boolean balance_quotes) throws ParserException 236 { 237 try 238 { 239 if (nextParsedNode.size() > 0) 240 { 241 node = nextParsedNode.elementAt(0); 242 nextParsedNode.remove(0); 243 return node; 244 } 245 if (readNextLine()) 246 { 247 do 248 { 249 line = getNextLine(); 250 } 251 while (line != null && line.length() == 0); 252 253 } 254 else if (dontReadNextLine) 255 { 256 dontReadNextLine = false; 257 } 258 else 259 posInLine = getLastReadPosition() + 1; 260 if (line == null) 261 return null; 262 263 if (beginTag(line, posInLine)) 264 { 265 node = remarkNodeParser.find(this, line, posInLine); 266 if (node != null) 267 return node; 268 node = Tag.find(this, line, posInLine); 269 if (node != null) 270 { 271 Tag tag = (Tag) node; 272 try 273 { 274 node = tag.scan(parser.getScanners(), url, this); 275 return node; 276 } 277 catch (Exception e) 278 { 279 StringBuffer msgBuffer = new StringBuffer (); 280 msgBuffer.append( 281 DECIPHER_ERROR 282 + "\n" 283 + " Tag being processed : " 284 + tag.getTagName() 285 + "\n" 286 + " Current Tag Line : " 287 + tag.getTagLine()); 288 appendLineDetails(msgBuffer); 289 ParserException ex = 290 new ParserException(msgBuffer.toString(), e); 291 292 parser.getFeedback().error(msgBuffer.toString(), ex); 293 throw ex; 294 } 295 } 296 297 node = EndTag.find(line, posInLine); 298 if (node != null) 299 return node; 300 } 301 else 302 { 303 node = stringParser.find(this, line, posInLine, balance_quotes); 304 if (node != null) 305 return node; 306 } 307 308 return null; 309 } 310 catch (ParserException pe) 311 { 312 throw pe; 313 } 314 catch (Exception e) 315 { 316 StringBuffer msgBuffer = 317 new StringBuffer ("NodeReader.readElement() : Error occurred while trying to read the next element,"); 318 StringWriter sw = new StringWriter (); 319 e.printStackTrace(new PrintWriter (sw)); 320 appendLineDetails(msgBuffer); 321 msgBuffer.append("\n Caused by:\n").append( 322 sw.getBuffer().toString()); 323 ParserException ex = new ParserException(msgBuffer.toString(), e); 324 parser.getFeedback().error(msgBuffer.toString(), ex); 325 throw ex; 326 } 327 } 328 public void appendLineDetails(StringBuffer msgBuffer) 329 { 330 msgBuffer.append("\nat Line "); 331 msgBuffer.append(getLineCount()); 332 msgBuffer.append(" : "); 333 msgBuffer.append(getLine()); 334 msgBuffer.append("\nPrevious Line ").append(getLineCount() - 1); 335 msgBuffer.append(" : ").append(getPreviousLine()); 336 } 337 341 protected boolean readNextLine() 342 { 343 if (dontReadNextLine) 344 { 345 return false; 346 } 347 if (posInLine == -1 348 || (line != null && node.elementEnd() + 1 >= line.length())) 349 return true; 350 else 351 return false; 352 } 353 357 public void setParser(Parser newParser) 358 { 359 parser = newParser; 360 } 361 365 public void setPreviousOpenScanner(TagScanner previousOpenScanner) 366 { 367 this.previousOpenScanner = previousOpenScanner; 368 } 369 370 373 public static void setLineSeparator(String lineSeparator) 374 { 375 Node.setLineSeparator(lineSeparator); 376 } 377 378 382 public static String getLineSeparator() 383 { 384 return (Node.getLineSeparator()); 385 } 386 390 public int getLineCount() 391 { 392 return lineCount; 393 } 394 395 399 public String getPreviousLine() 400 { 401 return previousLine; 402 } 403 404 408 public String getLine() 409 { 410 return line; 411 } 412 413 417 public void setLineCount(int lineCount) 418 { 419 this.lineCount = lineCount; 420 } 421 422 426 public void setPosInLine(int posInLine) 427 { 428 this.posInLine = posInLine; 429 } 430 431 public void reset() throws IOException 432 { 433 super.reset(); 434 lineCount = 1; 435 posInLine = -1; 436 } 437 438 public StringParser getStringParser() 439 { 440 return stringParser; 441 } 442 443 449 public void addNextParsedNode(Node nextParsedNode) 450 { 451 this.nextParsedNode.prepend(nextParsedNode); 452 } 453 454 public boolean isDontReadNextLine() 455 { 456 return dontReadNextLine; 457 } 458 459 public void setDontReadNextLine(boolean dontReadNextLine) 460 { 461 this.dontReadNextLine = dontReadNextLine; 462 } 463 464 } 465 | Popular Tags |