KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > derbyTesting > functionTests > util > streams > ByteAlphabet


1 /*
2
3    Derby - Class org.apache.derbyTesting.functionTests.util.streams.ByteAlphabet
4
5    Licensed to the Apache Software Foundation (ASF) under one or more
6    contributor license agreements. See the NOTICE file distributed with
7    this work for additional information regarding copyright ownership.
8    The ASF licenses this file to you under the Apache License, Version 2.0
9    (the "License"); you may not use this file except in compliance with
10    the License. You may obtain a copy of the License at
11
12       http://www.apache.org/licenses/LICENSE-2.0
13
14    Unless required by applicable law or agreed to in writing, software
15    distributed under the License is distributed on an "AS IS" BASIS,
16    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17    See the License for the specific language governing permissions and
18    limitations under the License.
19
20  */

21
22 package org.apache.derbyTesting.functionTests.util.streams;
23
24 import java.io.UnsupportedEncodingException JavaDoc;
25
26 /**
27  * A looping alphabet, returning bytes in a specified encoding.
28  *
29  * The alphabet loops over a list of bytes representing characters. The
30  * alphabet-object is used by looping stream, which in turn is used for testing
31  * methods requiring streaming inputs.
32  *
33  * The following alphabets have been defined:
34  * <ul><li><em>Modern latin, lowercase</em> ; letters a - z (26)
35  * <li><em>Norwegian/Danish, lowercase</em> ; letters a - z, plus three
36  * additional letters (29)
37  * <li><em>Tamil</em> ; 46 Tamil letters from UNICODE U0B80
38  * <li><em>CJK subset</em> ; 12 letter from UNICODE CJK U4E00
39  * </ul>
40  */

41 public class ByteAlphabet {
42
43     /** The name of the alphabet. */
44     private final String JavaDoc name;
45     /** The encoding used to represent characters as bytes. */
46     private final String JavaDoc encoding;
47     /** The bytes representing the characters in the alphabet. */
48     private final byte[] bytes;
49     /** The number of characters in the alphabet. */
50     private final int charCount;
51     /** The number of byes in the alphabet. */
52     private final int byteCount;
53     /** Offset into the byte array. */
54     private int boff = 0;
55
56     /**
57      * Create an alphabet returning bytes representing the lowercase letters
58      * a-z in the "US-ASCII" encoding.
59      */

60     public static ByteAlphabet modernLatinLowercase() {
61         return new ByteAlphabet("Modern latin lowercase, US-ASCII",
62                             CharAlphabet.MODERNLATINLOWER,
63                             "US-ASCII");
64     }
65
66     /**
67      * Create an alphabet returning bytes representing the 29 lowercase
68      * letters in the Norwegian/Danish alphabet in the "ISO-8859-1" encoding.
69      */

70     public static ByteAlphabet norwegianLowercase() {
71         return new ByteAlphabet("Norwegian/Danish lowercase, ISO-8859-1",
72                         CharAlphabet.NO_DK_LOWER,
73                         "ISO-8859-1");
74     }
75
76     /**
77      * Create an alphabet returning bytes representing a subset of the Tamil
78      * alphabet in the UTF-8 encoding.
79      */

80     public static ByteAlphabet tamilUTF8() {
81         return new ByteAlphabet("Tamil, UTF-8",
82                         CharAlphabet.TAMIL,
83                         "UTF8");
84     }
85
86     /**
87      * Create an alphabet returning bytes representing a subset of the Tamil
88      * alphabet in the UTF-16BE encoding.
89      */

90     public static ByteAlphabet tamilUTF16BE() {
91         return new ByteAlphabet("Tamil, UTF-16BE",
92                         CharAlphabet.TAMIL,
93                         "UTF-16BE");
94     }
95
96     /**
97      * Create an alphabet returning bytes representing a subset of the CJK
98      * alphabet in the UTF-8 encoding.
99      */

100     public static ByteAlphabet cjkSubsetUTF8() {
101         return new ByteAlphabet("CJK subset, UTF-8",
102                         CharAlphabet.CJKSUBSET,
103                         "UTF8");
104     }
105
106     /**
107      * Create an alphabet returning bytes representing a subset of the CJK
108      * alphabet in the UTF-16BE encoding.
109      */

110     public static ByteAlphabet cjkSubsetUTF16BE() {
111         return new ByteAlphabet("CJK subset, UTF-16BE",
112                         CharAlphabet.CJKSUBSET,
113                         "UTF-16BE");
114     }
115
116     /**
117      * Create an alphabet with the given name, the given characters and using
118      * the specified encoding to represent the characters as bytes.
119      *
120      * @param name the name of the alphabet
121      * @param chars the characters in the alphabet
122      * @param encoding the encoding to use to represent characters as bytes
123      */

124     private ByteAlphabet(String JavaDoc name, char[] chars, String JavaDoc encoding) {
125         this.name = name;
126         this.encoding = encoding;
127         this.charCount = chars.length;
128         String JavaDoc tmpStr = new String JavaDoc(chars);
129         byte[] tmpBytes;
130         int tmpByteCount;
131         try {
132             tmpBytes = tmpStr.getBytes(encoding);
133             tmpByteCount = tmpBytes.length;
134         } catch (UnsupportedEncodingException JavaDoc uee) {
135             // We are nasty and ignore this...
136
tmpBytes = new byte[] {0};
137             tmpByteCount = 1;
138         }
139         this.bytes = tmpBytes;
140         this.byteCount = tmpByteCount;
141     }
142
143     /**
144      * Return the name of the alphabet.
145      */

146     public String JavaDoc getName() {
147         return this.name;
148     }
149
150     /**
151      * Return the encoding used to represent characters as bytes.
152      */

153     public String JavaDoc getEncoding() {
154         return this.encoding;
155     }
156
157     /**
158      * Return the number of characters in the alphabet.
159      */

160     public int charCount() {
161         return charCount;
162     }
163
164     /**
165      * Return the number of bytes in the alphabet.
166      *
167      * The number of bytes in the alphabet is noramlly different from the
168      * number of characters in the alphabet, but it depends on the
169      * characters in the alphabet and encoding used to represent them as
170      * bytes.
171      */

172     public int byteCount() {
173         return byteCount;
174     }
175
176     /**
177      * Return the next byte in the alphabet.
178      */

179     public byte nextByte() {
180         if (boff >= byteCount) {
181             boff = 0;
182         }
183         return bytes[boff++];
184     }
185     
186     /**
187      * Reset the alphabet, the next byte returned is the first byte in the
188      * alphabet, which might not be a complete character.
189      */

190     public void reset() {
191         boff = 0;
192     }
193
194     /**
195      * Compute the next byte to read after reading the specified number
196      * of bytes.
197      *
198      * Besides from returning the index, the internal state of
199      * the alphabet is updated.
200      *
201      * @param bytesRead the number of bytes read
202      * @return the index of the next byte
203      */

204     public int nextByteToRead(int bytesRead) {
205         boff = (boff + (bytesRead % byteCount)) % byteCount;
206         return boff;
207     }
208 } // End class ByteAlphabet
209
Popular Tags