1 19 20 33 package org.htmlparser; 34 public class RemarkNodeParser 35 { 36 public final static int REMARK_NODE_BEFORE_PARSING_STATE = 0; 37 public final static int REMARK_NODE_OPENING_ANGLE_BRACKET_STATE = 1; 38 public final static int REMARK_NODE_EXCLAMATION_RECEIVED_STATE = 2; 39 public final static int REMARK_NODE_FIRST_DASH_RECEIVED_STATE = 3; 40 public final static int REMARK_NODE_ACCEPTING_STATE = 4; 41 public final static int REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE = 5; 42 public final static int REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE = 6; 43 public final static int REMARK_NODE_ACCEPTED_STATE = 7; 44 public final static int REMARK_NODE_ILLEGAL_STATE = 8; 45 public final static int REMARK_NODE_FINISHED_PARSING_STATE = 2; 46 47 53 public RemarkNode find(NodeReader reader, String input, int position) 54 { 55 int state = REMARK_NODE_BEFORE_PARSING_STATE; 56 StringBuffer tagContents = new StringBuffer (); 57 int tagBegin = 0; 58 int tagEnd = 0; 59 int i = position; 60 int inputLen = input.length(); 61 char ch, prevChar = ' '; 62 while (i < inputLen && state < REMARK_NODE_ACCEPTED_STATE) 63 { 64 ch = input.charAt(i); 65 if (state == REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE) 66 { 67 if (ch == '>') 68 { 69 state = REMARK_NODE_ACCEPTED_STATE; 70 tagEnd = i; 71 } 72 else if (ch == '-') 73 { 74 tagContents.append(prevChar); 75 } 76 else 77 { 78 state = REMARK_NODE_ACCEPTING_STATE; 80 tagContents.append(prevChar); 81 tagContents.append(prevChar); 82 } 83 84 } 85 86 if (state == REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE) 87 { 88 if (ch == '-') 89 { 90 state = REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE; 91 } 92 else 93 { 94 state = REMARK_NODE_ACCEPTING_STATE; 96 tagContents.append(prevChar); 97 } 98 } 99 if (state == REMARK_NODE_ACCEPTING_STATE) 100 { 101 if (ch == '-') 102 { 103 state = REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE; 104 } 109 } 110 if (state == REMARK_NODE_ACCEPTING_STATE) 111 { 112 tagContents.append(ch); 114 } 115 116 if (state == REMARK_NODE_FIRST_DASH_RECEIVED_STATE) 117 { 118 if (ch == '-') 119 { 120 state = REMARK_NODE_ACCEPTING_STATE; 121 if (input.length() > i + 1 && input.charAt(i + 1) == '>') 123 { 124 state = REMARK_NODE_ACCEPTED_STATE; 125 tagEnd = i + 1; 126 } 127 } 128 else 129 state = REMARK_NODE_ILLEGAL_STATE; 130 } 131 if (state == REMARK_NODE_EXCLAMATION_RECEIVED_STATE) 132 { 133 if (ch == '-') 134 state = REMARK_NODE_FIRST_DASH_RECEIVED_STATE; 135 else if (ch == '>') 136 { 137 state = REMARK_NODE_ACCEPTED_STATE; 138 tagEnd = i; 139 } 140 else 141 state = REMARK_NODE_ILLEGAL_STATE; 142 } 143 if (state == REMARK_NODE_OPENING_ANGLE_BRACKET_STATE) 144 { 145 if (ch == '!') 146 state = REMARK_NODE_EXCLAMATION_RECEIVED_STATE; 147 else 148 state = REMARK_NODE_ILLEGAL_STATE; 149 } 151 if (state == REMARK_NODE_BEFORE_PARSING_STATE) 152 { 153 if (ch == '<') 154 { 155 tagBegin = i; 157 state = REMARK_NODE_OPENING_ANGLE_BRACKET_STATE; 158 } 159 else if (ch != ' ') 160 { 161 state = REMARK_NODE_ILLEGAL_STATE; 163 } 164 } 165 if (state >= REMARK_NODE_ACCEPTING_STATE 167 && state < REMARK_NODE_ACCEPTED_STATE 168 && i == input.length() - 1) 169 { 170 tagContents.append(Node.getLineSeparator()); 173 do 174 { 175 input = reader.getNextLine(); 176 } 177 while (input != null && input.length() == 0); 178 if (input != null) 179 inputLen = input.length(); 180 else 181 inputLen = -1; 182 i = -1; 183 } 184 if (state == REMARK_NODE_ILLEGAL_STATE) 185 { 186 return null; 187 } 188 i++; 189 prevChar = ch; 190 } 191 if (state == REMARK_NODE_ACCEPTED_STATE) 192 return new RemarkNode(tagBegin, tagEnd, tagContents.toString()); 193 else 194 return null; 195 } 196 } 197 | Popular Tags |