KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > javacc > parser > RCharacterList


1 /*
2  * Copyright © 2002 Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
3  * California 95054, U.S.A. All rights reserved. Sun Microsystems, Inc. has
4  * intellectual property rights relating to technology embodied in the product
5  * that is described in this document. In particular, and without limitation,
6  * these intellectual property rights may include one or more of the U.S.
7  * patents listed at http://www.sun.com/patents and one or more additional
8  * patents or pending patent applications in the U.S. and in other countries.
9  * U.S. Government Rights - Commercial software. Government users are subject
10  * to the Sun Microsystems, Inc. standard license agreement and applicable
11  * provisions of the FAR and its supplements. Use is subject to license terms.
12  * Sun, Sun Microsystems, the Sun logo and Java are trademarks or registered
13  * trademarks of Sun Microsystems, Inc. in the U.S. and other countries. This
14  * product is covered and controlled by U.S. Export Control laws and may be
15  * subject to the export or import laws in other countries. Nuclear, missile,
16  * chemical biological weapons or nuclear maritime end uses or end users,
17  * whether direct or indirect, are strictly prohibited. Export or reexport
18  * to countries subject to U.S. embargo or to entities identified on U.S.
19  * export exclusion lists, including, but not limited to, the denied persons
20  * and specially designated nationals lists is strictly prohibited.
21  */

22
23 package org.javacc.parser;
24
25 import java.util.*;
26
27 /**
28  * Describes character lists.
29  */

30
31 public class RCharacterList extends RegularExpression {
32
33   /**
34    * This is true if a tilde (~) appears before the character list.
35    * Otherwise, this is false.
36    */

37   public boolean negated_list = false;
38
39   /**
40    * This is the list of descriptors of the character list. Each vector
41    * entry will narrow to either SingleCharacter or to CharacterRange.
42    */

43   public java.util.Vector JavaDoc descriptors = new java.util.Vector JavaDoc();
44
45 static final char[] diffLowerCaseRanges = {
46 65, 90, 192, 214, 216, 222, 256, 256, 258, 258, 260, 260, 262, 262, 264, 264,
47 266, 266, 268, 268, 270, 270, 272, 272, 274, 274, 276, 276, 278, 278, 280, 280,
48 282, 282, 284, 284, 286, 286, 288, 288, 290, 290, 292, 292, 294, 294, 296, 296,
49 298, 298, 300, 300, 302, 302, /* new for fixing 1.0.2 */ 304, 304, /* End new */
50 306, 306, 308, 308, 310, 310, 313, 313, 315, 315,
51 317, 317, 319, 319, 321, 321, 323, 323, 325, 325, 327, 327, 330, 330, 332, 332,
52 334, 334, 336, 336, 338, 338, 340, 340, 342, 342, 344, 344, 346, 346, 348, 348,
53 350, 350, 352, 352, 354, 354, 356, 356, 358, 358, 360, 360, 362, 362, 364, 364,
54 366, 366, 368, 368, 370, 370, 372, 372, 374, 374, 376, 376, 377, 377, 379, 379,
55 381, 381, 385, 385, 386, 386, 388, 388, 390, 390, 391, 391,
56 /* new for fixing 1.0.2 */ 393, 393, /* End new */ 394, 394, 395, 395,
57 /*398, Sreeni fixed for 1.2*/ 399, 399, 400, 400, 401, 401, 403, 403, 404, 404, 406, 406, 407, 407, 408, 408,
58 412, 412, 413, 413, 416, 416, 418, 418, 420, 420, 423, 423, 425, 425, 428, 428,
59 430, 430, 431, 431, 433, 434, 435, 435, 437, 437, 439, 439, 440, 440, 444, 444,
60 452, 452, 453, 453, 455, 455, 456, 456, 458, 458, 459, 459, 461, 461, 463, 463,
61 465, 465, 467, 467, 469, 469, 471, 471, 473, 473, 475, 475, 478, 478, 480, 480,
62 482, 482, 484, 484, 486, 486, 488, 488, 490, 490, 492, 492, 494, 494, 497, 497,
63 498, 498, 500, 500, 506, 506, 508, 508, 510, 510, 512, 512, 514, 514, 516, 516,
64 518, 518, 520, 520, 522, 522, 524, 524, 526, 526, 528, 528, 530, 530, 532, 532,
65 534, 534, 902, 902, 904, 906, 908, 908, 910, 911, 913, 929, 931, 939, 994, 994,
66 996, 996, 998, 998, 1000, 1000, 1002, 1002, 1004, 1004, 1006, 1006, 1025, 1036,
67 1038, 1039, 1040, 1040, 1041, 1041, 1042, 1071, 1120, 1120, 1122, 1122,
68 1124, 1124, 1126, 1126, 1128, 1128, 1130, 1130, 1132, 1132, 1134, 1134,
69 1136, 1136, 1138, 1138, 1140, 1140, 1142, 1142, 1144, 1144, 1146, 1146,
70 1148, 1148, 1150, 1150, 1152, 1152, 1168, 1168, 1170, 1170, 1172, 1172,
71 1174, 1174, 1176, 1176, 1178, 1178, 1180, 1180, 1182, 1182, 1184, 1184,
72 1186, 1186, 1188, 1188, 1190, 1190, 1192, 1192, 1194, 1194, 1196, 1196,
73 1198, 1198, 1200, 1200, 1202, 1202, 1204, 1204, 1206, 1206, 1208, 1208,
74 1210, 1210, 1212, 1212, 1214, 1214, 1217, 1217, 1219, 1219, 1223, 1223,
75 1227, 1227, 1232, 1232, 1234, 1234, 1236, 1236, 1238, 1238, 1240, 1240,
76 1242, 1242, 1244, 1244, 1246, 1246, 1248, 1248, 1250, 1250, 1252, 1252,
77 1254, 1254, 1256, 1256, 1258, 1258, 1262, 1262, 1264, 1264, 1266, 1266,
78 1268, 1268, 1272, 1272, 1329, 1366, 4256, 4293, 7680, 7680, 7682, 7682,
79 7684, 7684, 7686, 7686, 7688, 7688, 7690, 7690, 7692, 7692, 7694, 7694,
80 7696, 7696, 7698, 7698, 7700, 7700, 7702, 7702, 7704, 7704, 7706, 7706,
81 7708, 7708, 7710, 7710, 7712, 7712, 7714, 7714, 7716, 7716, 7718, 7718,
82 7720, 7720, 7722, 7722, 7724, 7724, 7726, 7726, 7728, 7728, 7730, 7730,
83 7732, 7732, 7734, 7734, 7736, 7736, 7738, 7738, 7740, 7740, 7742, 7742,
84 7744, 7744, 7746, 7746, 7748, 7748, 7750, 7750, 7752, 7752, 7754, 7754,
85 7756, 7756, 7758, 7758, 7760, 7760, 7762, 7762, 7764, 7764, 7766, 7766,
86 7768, 7768, 7770, 7770, 7772, 7772, 7774, 7774, 7776, 7776, 7778, 7778,
87 7780, 7780, 7782, 7782, 7784, 7784, 7786, 7786, 7788, 7788, 7790, 7790,
88 7792, 7792, 7794, 7794, 7796, 7796, 7798, 7798, 7800, 7800, 7802, 7802,
89 7804, 7804, 7806, 7806, 7808, 7808, 7810, 7810, 7812, 7812, 7814, 7814,
90 7816, 7816, 7818, 7818, 7820, 7820, 7822, 7822, 7824, 7824, 7826, 7826,
91 7828, 7828, 7840, 7840, 7842, 7842, 7844, 7844, 7846, 7846, 7848, 7848,
92 7850, 7850, 7852, 7852, 7854, 7854, 7856, 7856, 7858, 7858, 7860, 7860,
93 7862, 7862, 7864, 7864, 7866, 7866, 7868, 7868, 7870, 7870, 7872, 7872,
94 7874, 7874, 7876, 7876, 7878, 7878, 7880, 7880, 7882, 7882, 7884, 7884,
95 7886, 7886, 7888, 7888, 7890, 7890, 7892, 7892, 7894, 7894, 7896, 7896,
96 7898, 7898, 7900, 7900, 7902, 7902, 7904, 7904, 7906, 7906, 7908, 7908,
97 7910, 7910, 7912, 7912, 7914, 7914, 7916, 7916, 7918, 7918, 7920, 7920,
98 7922, 7922, 7924, 7924, 7926, 7926, 7928, 7928, 7944, 7951, 7960, 7965,
99 7976, 7983, 7992, 7999, 8008, 8013, 8025, 8025, 8027, 8027, 8029, 8029,
100 8031, 8031, 8040, 8047, 8072, 8079, 8088, 8095, 8104, 8111, 8120, 8121,
101 8122, 8123, 8124, 8124, 8136, 8139, 8140, 8140, 8152, 8153, 8154, 8155,
102 8168, 8169, 8170, 8171, 8172, 8172, 8184, 8185, 8186, 8187, 8188, 8188,
103 8544, 8559, 9398, 9423, 65313, 65338, 65339, 0xfffe, 0xffff, 0xffff
104 };
105
106 static final char[] diffUpperCaseRanges = {
107 97, 122, 224, 246, 248, 254, 255, 255, 257, 257, 259, 259, 261, 261, 263, 263,
108 265, 265, 267, 267, 269, 269, 271, 271, 273, 273, 275, 275, 277, 277, 279, 279,
109 281, 281, 283, 283, 285, 285, 287, 287, 289, 289, 291, 291, 293, 293, 295, 295,
110 297, 297, 299, 299, 301, 301, 303, 303, 305, 305, 307, 307, 309, 309, 311, 311,
111 314, 314, 316, 316, 318, 318, 320, 320, 322, 322, 324, 324, 326, 326, 328, 328,
112 331, 331, 333, 333, 335, 335, 337, 337, 339, 339, 341, 341, 343, 343, 345, 345,
113 347, 347, 349, 349, 351, 351, 353, 353, 355, 355, 357, 357, 359, 359, 361, 361,
114 363, 363, 365, 365, 367, 367, 369, 369, 371, 371, 373, 373, 375, 375, 378, 378,
115 380, 380, 382, 382, 383, 383, 387, 387, 389, 389, 392, 392, 396, 396, 402, 402,
116 409, 409, 417, 417, 419, 419, 421, 421, 424, 424, 429, 429, 432, 432, 436, 436,
117 438, 438, 441, 441, 445, 445, 453, 453, 454, 454, 456, 456, 457, 457, 459, 459,
118 460, 460, 462, 462, 464, 464, 466, 466, 468, 468, 470, 470, 472, 472, 474, 474,
119 476, 476, 479, 479, 481, 481, 483, 483, 485, 485, 487, 487, 489, 489, 491, 491,
120 493, 493, 495, 495, 498, 498, 499, 499, 501, 501, 507, 507, 509, 509, 511, 511,
121 513, 513, 515, 515, 517, 517, 519, 519, 521, 521, 523, 523, 525, 525, 527, 527,
122 529, 529, 531, 531, 533, 533, 535, 535, 595, 595, 596, 596, 598,
123 /* new for fixing 1.0.2 */ 598, 599, /* End new */ 599, /*600, Sreeni fixed for 1.2 */
124 601, 601,
125 603, 603, 608, 608, 611, 611, 616, 616, 617, 617, 623, 623, 626, 626, 643, 643,
126 648, 648, 650, 651, 658, 658, 940, 940, 941, 943, 945, 961,
127 /* new for fixing 1.0.2 */ 962, 962, /* End new */ 963, 971, 972, 972,
128 973, 974, 976, 976, 977, 977, 981, 981, 982, 982, 995, 995, 997, 997, 999, 999,
129 1001, 1001, 1003, 1003, 1005, 1005, 1007, 1007, 1008, 1008, 1009, 1009,
130 1072, 1103, 1105, 1116, 1118, 1119, 1121, 1121, 1123, 1123, 1125, 1125,
131 1127, 1127, 1129, 1129, 1131, 1131, 1133, 1133, 1135, 1135, 1137, 1137,
132 1139, 1139, 1141, 1141, 1143, 1143, 1145, 1145, 1147, 1147, 1149, 1149,
133 1151, 1151, 1153, 1153, 1169, 1169, 1171, 1171, 1173, 1173, 1175, 1175,
134 1177, 1177, 1179, 1179, 1181, 1181, 1183, 1183, 1185, 1185, 1187, 1187,
135 1189, 1189, 1191, 1191, 1193, 1193, 1195, 1195, 1197, 1197, 1199, 1199,
136 1201, 1201, 1203, 1203, 1205, 1205, 1207, 1207, 1209, 1209, 1211, 1211,
137 1213, 1213, 1215, 1215, 1218, 1218, 1220, 1220, 1224, 1224, 1228, 1228,
138 1233, 1233, 1235, 1235, 1237, 1237, 1239, 1239, 1241, 1241, 1243, 1243,
139 1245, 1245, 1247, 1247, 1249, 1249, 1251, 1251, 1253, 1253, 1255, 1255,
140 1257, 1257, 1259, 1259, 1263, 1263, 1265, 1265, 1267, 1267, 1269, 1269,
141 1273, 1273, 1377, 1414, 7681, 7681, 7683, 7683, 7685, 7685, 7687, 7687,
142 7689, 7689, 7691, 7691, 7693, 7693, 7695, 7695, 7697, 7697, 7699, 7699,
143 7701, 7701, 7703, 7703, 7705, 7705, 7707, 7707, 7709, 7709, 7711, 7711,
144 7713, 7713, 7715, 7715, 7717, 7717, 7719, 7719, 7721, 7721, 7723, 7723,
145 7725, 7725, 7727, 7727, 7729, 7729, 7731, 7731, 7733, 7733, 7735, 7735,
146 7737, 7737, 7739, 7739, 7741, 7741, 7743, 7743, 7745, 7745, 7747, 7747,
147 7749, 7749, 7751, 7751, 7753, 7753, 7755, 7755, 7757, 7757, 7759, 7759,
148 7761, 7761, 7763, 7763, 7765, 7765, 7767, 7767, 7769, 7769, 7771, 7771,
149 7773, 7773, 7775, 7775, 7777, 7777, 7779, 7779, 7781, 7781, 7783, 7783,
150 7785, 7785, 7787, 7787, 7789, 7789, 7791, 7791, 7793, 7793, 7795, 7795,
151 7797, 7797, 7799, 7799, 7801, 7801, 7803, 7803, 7805, 7805, 7807, 7807,
152 7809, 7809, 7811, 7811, 7813, 7813, 7815, 7815, 7817, 7817, 7819, 7819,
153 7821, 7821, 7823, 7823, 7825, 7825, 7827, 7827, 7829, 7829, 7841, 7841,
154 7843, 7843, 7845, 7845, 7847, 7847, 7849, 7849, 7851, 7851, 7853, 7853,
155 7855, 7855, 7857, 7857, 7859, 7859, 7861, 7861, 7863, 7863, 7865, 7865,
156 7867, 7867, 7869, 7869, 7871, 7871, 7873, 7873, 7875, 7875, 7877, 7877,
157 7879, 7879, 7881, 7881, 7883, 7883, 7885, 7885, 7887, 7887, 7889, 7889,
158 7891, 7891, 7893, 7893, 7895, 7895, 7897, 7897, 7899, 7899, 7901, 7901,
159 7903, 7903, 7905, 7905, 7907, 7907, 7909, 7909, 7911, 7911, 7913, 7913,
160 7915, 7915, 7917, 7917, 7919, 7919, 7921, 7921, 7923, 7923, 7925, 7925,
161 7927, 7927, 7929, 7929, 7936, 7943, 7952, 7957, 7968, 7975, 7984, 7991,
162 8000, 8005, 8017, 8017, 8019, 8019, 8021, 8021, 8023, 8023, 8032, 8039,
163 8048, 8049, 8050, 8053, 8054, 8055, 8056, 8057, 8058, 8059, 8060, 8061,
164 8064, 8071, 8080, 8087, 8096, 8103, 8112, 8113, 8115, 8115, 8131, 8131,
165 8144, 8145, 8160, 8161, 8165, 8165, 8179, 8179, 8560, 8575, 9424, 9449,
166 65345, 65370, 65371, 0xfffe, 0xffff, 0xffff
167 };
168
169   void ToCaseNeutral()
170   {
171      int cnt = descriptors.size();
172
173      OuterLoop:
174      for (int i = 0; i < cnt; i++)
175      {
176         if (descriptors.elementAt(i) instanceof SingleCharacter)
177         {
178            char ch = ((SingleCharacter)descriptors.elementAt(i)).ch;
179
180            if (ch != Character.toLowerCase(ch))
181               descriptors.addElement(new
182                          SingleCharacter(Character.toLowerCase(ch)));
183            if (ch != Character.toUpperCase(ch))
184               descriptors.addElement(new
185                          SingleCharacter(Character.toUpperCase(ch)));
186         }
187         else
188         {
189            char l = ((CharacterRange)descriptors.elementAt(i)).left;
190            char r = ((CharacterRange)descriptors.elementAt(i)).right;
191            int j = 0;
192
193            /* Add ranges for which lower case is different. */
194            for (;;)
195            {
196               while (l > diffLowerCaseRanges[j])
197                  j += 2;
198
199               if (l < diffLowerCaseRanges[j])
200               {
201                  if (r < diffLowerCaseRanges[j])
202                     break;
203
204                  if (r <= diffLowerCaseRanges[j + 1])
205                  {
206                     descriptors.addElement(new CharacterRange(Character.toLowerCase(diffLowerCaseRanges[j]),
207                                     (char)(Character.toLowerCase(diffLowerCaseRanges[j]) + r - diffLowerCaseRanges[j])));
208                     break;
209                  }
210
211                  descriptors.addElement(new CharacterRange(Character.toLowerCase(diffLowerCaseRanges[j]),
212                                                          Character.toLowerCase(diffLowerCaseRanges[j + 1])));
213               }
214               else
215               {
216                  if (r <= diffLowerCaseRanges[j + 1])
217                  {
218                     descriptors.addElement(new CharacterRange(
219                                     (char)(Character.toLowerCase(diffLowerCaseRanges[j]) + l - diffLowerCaseRanges[j]),
220                                     (char)(Character.toLowerCase(diffLowerCaseRanges[j]) + r - diffLowerCaseRanges[j])));
221                     break;
222                  }
223
224                  descriptors.addElement(new CharacterRange(
225                                       (char)(Character.toLowerCase(diffLowerCaseRanges[j]) + l - diffLowerCaseRanges[j]),
226                                                         Character.toLowerCase(diffLowerCaseRanges[j + 1])));
227               }
228
229               j += 2;
230               while (r > diffLowerCaseRanges[j])
231               {
232                  if (r <= diffLowerCaseRanges[j + 1])
233                  {
234                     descriptors.addElement(new CharacterRange(Character.toLowerCase(diffLowerCaseRanges[j]),
235                                      (char)(Character.toLowerCase(diffLowerCaseRanges[j]) + r - diffLowerCaseRanges[j])));
236                     break;
237                  }
238
239                  descriptors.addElement(new CharacterRange(Character.toLowerCase(diffLowerCaseRanges[j]),
240                                                          Character.toLowerCase(diffLowerCaseRanges[j + 1])));
241                  j += 2;
242               }
243               break;
244            }
245
246            /* Add ranges for which upper case is different. */
247            j = 0;
248            while (l > diffUpperCaseRanges[j])
249               j += 2;
250
251            if (l < diffUpperCaseRanges[j])
252            {
253               if (r < diffUpperCaseRanges[j])
254                  continue;
255
256               if (r <= diffUpperCaseRanges[j + 1])
257               {
258                  descriptors.addElement(new CharacterRange(Character.toUpperCase(diffUpperCaseRanges[j]),
259                                  (char)(Character.toUpperCase(diffUpperCaseRanges[j]) + r - diffUpperCaseRanges[j])));
260                  continue;
261               }
262
263               descriptors.addElement(new CharacterRange(Character.toUpperCase(diffUpperCaseRanges[j]),
264                                                       Character.toUpperCase(diffUpperCaseRanges[j + 1])));
265            }
266            else
267            {
268               if (r <= diffUpperCaseRanges[j + 1])
269               {
270                  descriptors.addElement(new CharacterRange(
271                                  (char)(Character.toUpperCase(diffUpperCaseRanges[j]) + l - diffUpperCaseRanges[j]),
272                                  (char)(Character.toUpperCase(diffUpperCaseRanges[j]) + r - diffUpperCaseRanges[j])));
273                  continue;
274               }
275
276               descriptors.addElement(new CharacterRange(
277                                    (char)(Character.toUpperCase(diffUpperCaseRanges[j]) + l - diffUpperCaseRanges[j]),
278                                                      Character.toUpperCase(diffUpperCaseRanges[j + 1])));
279            }
280
281            j += 2;
282            while (r > diffUpperCaseRanges[j])
283            {
284               if (r <= diffUpperCaseRanges[j + 1])
285               {
286                  descriptors.addElement(new CharacterRange(Character.toUpperCase(diffUpperCaseRanges[j]),
287                                   (char)(Character.toUpperCase(diffUpperCaseRanges[j]) + r - diffUpperCaseRanges[j])));
288                  break;
289               }
290
291               descriptors.addElement(new CharacterRange(Character.toUpperCase(diffUpperCaseRanges[j]),
292                                                       Character.toUpperCase(diffUpperCaseRanges[j + 1])));
293               j += 2;
294            }
295         }
296      }
297   }
298
299   boolean transformed = false;
300   public Nfa GenerateNfa(boolean ignoreCase)
301   {
302      if (!transformed)
303      {
304         if (Options.getIgnoreCase() || ignoreCase)
305         {
306 /*
307            int i;
308            System.out.println("Before:");
309            for (i = 0; i < descriptors.size(); i++)
310            {
311               if (descriptors.elementAt(i) instanceof SingleCharacter)
312               {
313                  char c = ((SingleCharacter)descriptors.elementAt(i)).ch;
314                  System.out.print((int)c + " ");
315               }
316               else
317               {
318                  char l = ((CharacterRange)descriptors.elementAt(i)).left;
319                  char r = ((CharacterRange)descriptors.elementAt(i)).right;
320       
321                  System.out.print((int)l + "-" + (int)r + " ");
322               }
323               if ((i + 1) % 6 == 0)
324                  System.out.println("");
325            }
326            System.out.println("");
327 */

328
329            ToCaseNeutral();
330            SortDescriptors();
331
332 /*
333            System.out.println("After:");
334            for (i = 0; i < descriptors.size(); i++)
335            {
336               if (descriptors.elementAt(i) instanceof SingleCharacter)
337               {
338                  char c = ((SingleCharacter)descriptors.elementAt(i)).ch;
339                  System.out.print((int)c + " ");
340               }
341               else
342               {
343                  char l = ((CharacterRange)descriptors.elementAt(i)).left;
344                  char r = ((CharacterRange)descriptors.elementAt(i)).right;
345       
346                  System.out.print((int)l + "-" + (int)r + " ");
347               }
348               if ((i + 1) % 6 == 0)
349                  System.out.println("");
350            }
351            System.out.println("");
352 */

353         }
354
355         if (negated_list)
356            RemoveNegation(); // This also sorts the list
357
else
358            SortDescriptors();
359      }
360
361      transformed = true;
362      Nfa retVal = new Nfa();
363      NfaState startState = retVal.start;
364      NfaState finalState = retVal.end;
365      int i;
366
367      for (i = 0; i < descriptors.size(); i++)
368      {
369         if (descriptors.elementAt(i) instanceof SingleCharacter)
370            startState.AddChar(((SingleCharacter)descriptors.elementAt(i)).ch);
371         else // if (descriptors.elementAt(i) instanceof CharacterRange)
372
{
373            CharacterRange cr = (CharacterRange)descriptors.elementAt(i);
374
375            if (cr.left == cr.right)
376               startState.AddChar(cr.left);
377            else
378               startState.AddRange(cr.left, cr.right);
379         }
380      }
381
382      startState.next = finalState;
383
384      return retVal;
385   }
386
387   static boolean Overlaps(CharacterRange r1, CharacterRange r2)
388   {
389      return (r1.left <= r2.right && r1.right > r2.right);
390   }
391
392   static boolean SubRange(CharacterRange r1, CharacterRange r2)
393   {
394      return (r1.left >= r2.left && r1.right <= r2.right);
395   }
396
397   static boolean InRange(char c, CharacterRange range)
398   {
399      return (c >= range.left && c <= range.right);
400   }
401
402   void SortDescriptors()
403   {
404      int j;
405
406      Vector newDesc = new Vector(descriptors.size());
407      int cnt = 0;
408
409      Outer:
410      for (int i = 0; i < descriptors.size(); i++)
411      {
412         SingleCharacter s;
413         CharacterRange range;
414
415         if (descriptors.elementAt(i) instanceof SingleCharacter)
416         {
417            s = (SingleCharacter)descriptors.elementAt(i);
418
419            for (j = 0; j < cnt; j++)
420            {
421               if (newDesc.elementAt(j) instanceof SingleCharacter)
422               {
423                  if (((SingleCharacter)newDesc.elementAt(j)).ch > s.ch)
424                     break;
425                  else if (((SingleCharacter)newDesc.elementAt(j)).ch == s.ch)
426                     continue Outer;
427               }
428               else
429               {
430                  char l = ((CharacterRange)newDesc.elementAt(j)).left;
431
432                  if (InRange(s.ch, (CharacterRange)newDesc.elementAt(j)))
433                     continue Outer;
434                  else if (l > s.ch)
435                     break;
436               }
437            }
438
439            newDesc.insertElementAt(s, j);
440            cnt++;
441         }
442         else
443         {
444            range = (CharacterRange)descriptors.elementAt(i);
445
446            for (j = 0; j < cnt; j++)
447            {
448               if (newDesc.elementAt(j) instanceof SingleCharacter)
449               {
450                  if (InRange(((SingleCharacter)newDesc.elementAt(j)).ch, range))
451                  {
452                     newDesc.removeElementAt(j--);
453                     cnt--;
454                  }
455                  else if (((SingleCharacter)newDesc.elementAt(j)).ch > range.right)
456                     break;
457               }
458               else
459               {
460                  if (SubRange(range, (CharacterRange)newDesc.elementAt(j)))
461                  {
462                     continue Outer;
463                  }
464                  else if (SubRange((CharacterRange)newDesc.elementAt(j), range))
465                  {
466                     newDesc.setElementAt(range, j);
467                     continue Outer;
468                  }
469                  else if (Overlaps(range, (CharacterRange)newDesc.elementAt(j)))
470                  {
471                     range.left = (char)(((CharacterRange)newDesc.elementAt(j)).right + 1);
472                  }
473                  else if (Overlaps((CharacterRange)newDesc.elementAt(j), range))
474                  {
475                     CharacterRange tmp = range;
476                     ((CharacterRange)newDesc.elementAt(j)).right = (char)(range.left + 1);
477                     range = (CharacterRange)newDesc.elementAt(j);
478                     newDesc.setElementAt(tmp, j);
479                  }
480                  else if (((CharacterRange)newDesc.elementAt(j)).left > range.right)
481                     break;
482               }
483            }
484
485            newDesc.insertElementAt(range, j);
486            cnt++;
487         }
488      }
489
490      newDesc.trimToSize();
491      descriptors = newDesc;
492   }
493
494   void RemoveNegation()
495   {
496      int i;
497
498      SortDescriptors();
499
500 /*
501      System.out.println("REM. NEG Before:");
502      for (i = 0; i < descriptors.size(); i++)
503      {
504         if (descriptors.elementAt(i) instanceof SingleCharacter)
505         {
506            char c = ((SingleCharacter)descriptors.elementAt(i)).ch;
507            System.out.print((int)c + " ");
508         }
509         else
510         {
511            char l = ((CharacterRange)descriptors.elementAt(i)).left;
512            char r = ((CharacterRange)descriptors.elementAt(i)).right;
513
514            System.out.print((int)l + "-" + (int)r + " ");
515         }
516      }
517      System.out.println("");
518 */

519
520      Vector newDescriptors = new Vector();
521      int lastRemoved = -1; // One less than the first valid character.
522

523    OuterLoop:
524      for (i = 0; i < descriptors.size(); i++)
525      {
526         if (descriptors.elementAt(i) instanceof SingleCharacter)
527         {
528            char c = ((SingleCharacter)descriptors.elementAt(i)).ch;
529
530            if (c >= 0 && c <= lastRemoved + 1)
531            {
532               lastRemoved = c;
533               continue;
534            }
535
536            //System.out.println("lastRemoved : " + (int)lastRemoved + "; char : " + (int)c);
537
newDescriptors.addElement(new CharacterRange((char)(lastRemoved + 1),
538                                          (char)((lastRemoved = c) - 1)));
539         }
540         else
541         {
542            char l = ((CharacterRange)descriptors.elementAt(i)).left;
543            char r = ((CharacterRange)descriptors.elementAt(i)).right;
544
545            if (l >= 0 && l <= lastRemoved + 1)
546            {
547               lastRemoved = r;
548               continue;
549            }
550
551            //System.out.println("lastRemoved : " + (int)lastRemoved + "; left : " + l + "; right : " + (int)r);
552
newDescriptors.addElement(new CharacterRange((char)(lastRemoved + 1),
553                                             (char)(l - 1)));
554            lastRemoved = r;
555         }
556      }
557
558      //System.out.println("lastRem : " + (int)lastRemoved);
559
if (NfaState.unicodeWarningGiven || Options.getJavaUnicodeEscape())
560      {
561         if (lastRemoved < (char)0xffff)
562            newDescriptors.addElement(new CharacterRange((char)(lastRemoved + 1),
563                                      (char)0xffff));
564      }
565      else
566      {
567         if (lastRemoved < (char)0xff)
568            newDescriptors.addElement(new CharacterRange((char)(lastRemoved + 1),
569                                                                    (char)0xff));
570      }
571
572      descriptors = newDescriptors;
573      negated_list = false;
574
575 /*
576      System.out.println("REM NEG After:");
577      for (i = 0; i < descriptors.size(); i++)
578      {
579         if (descriptors.elementAt(i) instanceof SingleCharacter)
580         {
581            char c = ((SingleCharacter)descriptors.elementAt(i)).ch;
582            System.out.print((int)c + " ");
583         }
584         else
585         {
586            char l = ((CharacterRange)descriptors.elementAt(i)).left;
587            char r = ((CharacterRange)descriptors.elementAt(i)).right;
588
589            System.out.print((int)l + "-" + (int)r + " ");
590         }
591      }
592      System.out.println("");
593 */

594   }
595
596   RCharacterList()
597   {
598   }
599
600   RCharacterList(char c)
601   {
602      descriptors = new Vector();
603      descriptors.addElement(new SingleCharacter(c));
604      negated_list = false;
605      ordinal = Integer.MAX_VALUE;
606   }
607
608   public boolean CanMatchAnyChar()
609   {
610      // Return true only if it is ~[]
611
return negated_list && (descriptors == null || descriptors.size() == 0);
612   }
613 }
614
Popular Tags