1 19 20 33 package org.htmlparser.parserHelper; 34 35 import org.htmlparser.Node; 36 import org.htmlparser.NodeReader; 37 import org.htmlparser.StringNode; 38 39 public class StringParser 40 { 41 private final static int BEFORE_PARSE_BEGINS_STATE = 0; 42 private final static int PARSE_HAS_BEGUN_STATE = 1; 43 private final static int PARSE_COMPLETED_STATE = 2; 44 private final static int PARSE_IGNORE_STATE = 3; 45 46 53 private boolean beginTag(String line, int pos) 54 { 55 char ch; 56 boolean ret; 57 58 ret = false; 59 60 if (pos + 2 <= line.length()) 61 if ('<' == line.charAt(pos)) 62 { 63 ch = line.charAt(pos + 1); 64 if ('/' == ch 66 || '%' == ch 67 || Character.isLetter(ch) 68 || '!' == ch) 69 ret = true; 70 } 71 72 return (ret); 73 } 74 75 83 public Node find( 84 NodeReader reader, 85 String input, 86 int position, 87 boolean balance_quotes) 88 { 89 StringBuffer textBuffer = new StringBuffer (); 90 int state = BEFORE_PARSE_BEGINS_STATE; 91 int textBegin = position; 92 int textEnd = position; 93 int inputLen = input.length(); 94 char ch; 95 char ignore_ender = '\"'; 96 for (int i = position; 97 (i < inputLen && state != PARSE_COMPLETED_STATE); 98 i++) 99 { 100 ch = input.charAt(i); 101 if (ch == '<' && state != PARSE_IGNORE_STATE) 102 { 103 if (beginTag(input, i)) 104 { 105 state = PARSE_COMPLETED_STATE; 106 textEnd = i - 1; 107 } 108 } 109 if (balance_quotes && (ch == '\'' || ch == '"')) 110 { 111 if (state == PARSE_IGNORE_STATE) 112 { 113 if (ch == ignore_ender) 114 state = PARSE_HAS_BEGUN_STATE; 115 } 116 else 117 { 118 ignore_ender = ch; 119 state = PARSE_IGNORE_STATE; 120 } 121 } 122 if (state == BEFORE_PARSE_BEGINS_STATE) 123 { 124 state = PARSE_HAS_BEGUN_STATE; 125 } 126 if (state == PARSE_HAS_BEGUN_STATE || state == PARSE_IGNORE_STATE) 127 { 128 textBuffer.append(input.charAt(i)); 129 } 130 if (state == BEFORE_PARSE_BEGINS_STATE && i == inputLen - 1) 132 state = PARSE_HAS_BEGUN_STATE; 133 if (state == PARSE_HAS_BEGUN_STATE && i == inputLen - 1) 134 { 135 do 136 { 137 input = reader.getNextLine(); 138 if (input != null && input.length() == 0) 139 textBuffer.append(Node.getLineSeparator()); 140 } 141 while (input != null && input.length() == 0); 142 143 if (input == null) 144 { 145 textEnd = i; 146 state = PARSE_COMPLETED_STATE; 147 148 } 149 else 150 { 151 textBuffer.append(Node.getLineSeparator()); 152 inputLen = input.length(); 153 i = -1; 154 } 155 156 } 157 } 158 return new StringNode(textBuffer, textBegin, textEnd); 159 } 160 } 161 | Popular Tags |