KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > analysis > FastCharStream


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.analysis;
5
6 import java.io.*;
7
8 /** An efficient implementation of JavaCC's CharStream interface. <p>Note that
9  * this does not do line-number counting, but instead keeps track of the
10  * character position of the token in the input, as required by Lucene's {@link
11  * org.apache.lucene.analysis.Token} API. */

12 final class FastCharStream implements CharStream {
13   char[] buffer = null;
14
15   int bufferLength = 0; // end of valid chars
16
int bufferPosition = 0; // next char to read
17

18   int tokenStart = 0; // offset in buffer
19
int bufferStart = 0; // position in file of buffer
20

21   Reader input; // source of chars
22

23   /** Constructs from a Reader. */
24   public FastCharStream(Reader r) {
25     input = r;
26   }
27
28   public final char readChar() throws IOException {
29     if (bufferPosition >= bufferLength)
30       refill();
31     return buffer[bufferPosition++];
32   }
33
34   private final void refill() throws IOException {
35     int newPosition = bufferLength - tokenStart;
36
37     if (tokenStart == 0) { // token won't fit in buffer
38
if (buffer == null) { // first time: alloc buffer
39
buffer = new char[2048];
40       } else if (bufferLength == buffer.length) { // grow buffer
41
char[] newBuffer = new char[buffer.length*2];
42     System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
43     buffer = newBuffer;
44       }
45     } else { // shift token to front
46
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
47     }
48
49     bufferLength = newPosition; // update state
50
bufferPosition = newPosition;
51     bufferStart += tokenStart;
52     tokenStart = 0;
53
54     int charsRead = // fill space in buffer
55
input.read(buffer, newPosition, buffer.length-newPosition);
56     if (charsRead == -1)
57       throw new IOException("read past eof");
58     else
59       bufferLength += charsRead;
60   }
61
62   public final char BeginToken() throws IOException {
63     tokenStart = bufferPosition;
64     return readChar();
65   }
66
67   public final void backup(int amount) {
68     bufferPosition -= amount;
69   }
70
71   public final String JavaDoc GetImage() {
72     return new String JavaDoc(buffer, tokenStart, bufferPosition - tokenStart);
73   }
74
75   public final char[] GetSuffix(int len) {
76     char[] value = new char[len];
77     System.arraycopy(buffer, bufferPosition - len, value, 0, len);
78     return value;
79   }
80
81   public final void Done() {
82     try {
83       input.close();
84     } catch (IOException e) {
85       System.err.println("Caught: " + e + "; ignoring.");
86     }
87   }
88
89   public final int getColumn() {
90     return bufferStart + bufferPosition;
91   }
92   public final int getLine() {
93     return 1;
94   }
95   public final int getEndColumn() {
96     return bufferStart + bufferPosition;
97   }
98   public final int getEndLine() {
99     return 1;
100   }
101   public final int getBeginColumn() {
102     return bufferStart + tokenStart;
103   }
104   public final int getBeginLine() {
105     return 1;
106   }
107 }
108
Popular Tags