KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > pdfbox > cmapparser > CMapParser


1 /**
2  * Copyright (c) 2003-2004, www.pdfbox.org
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright notice,
11  * this list of conditions and the following disclaimer in the documentation
12  * and/or other materials provided with the distribution.
13  * 3. Neither the name of pdfbox; nor the names of its
14  * contributors may be used to endorse or promote products derived from this
15  * software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
24  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * http://www.pdfbox.org
29  *
30  */

31 package org.pdfbox.cmapparser;
32
33 import java.io.FileInputStream JavaDoc;
34 import java.io.InputStream JavaDoc;
35 import java.io.IOException JavaDoc;
36 import java.io.RandomAccessFile JavaDoc;
37
38 import java.util.List JavaDoc;
39
40 import org.pdfbox.cmaptypes.CMap;
41 import org.pdfbox.cmaptypes.CodespaceRange;
42
43 import org.pdfbox.cos.COSArray;
44 import org.pdfbox.cos.COSName;
45 import org.pdfbox.cos.COSNumber;
46 import org.pdfbox.cos.COSString;
47
48 import org.pdfbox.pdfparser.PDFStreamParser;
49
50 import org.pdfbox.util.PDFOperator;
51
52 /**
53  * This will parser a CMap stream.
54  *
55  * @author Ben Litchfield (ben@csh.rit.edu)
56  * @version $Revision: 1.10 $
57  */

58 public class CMapParser
59 {
60     private static final String JavaDoc BEGIN_CODESPACE_RANGE = "begincodespacerange";
61     private static final String JavaDoc BEGIN_BASE_FONT_CHAR = "beginbfchar";
62     private static final String JavaDoc BEGIN_BASE_FONT_RANGE = "beginbfrange";
63
64     private InputStream JavaDoc input;
65     private CMap result;
66     private RandomAccessFile JavaDoc file;
67
68     /**
69      * Creates a new instance of CMapParser.
70      *
71      * @param in The input stream to read data from.
72      * @param raf The random access file from the document
73      */

74     public CMapParser( InputStream JavaDoc in, RandomAccessFile JavaDoc raf )
75     {
76         input = in;
77         file = raf;
78     }
79
80     /**
81      * This will get the results of the parsing. parse() must be called first.
82      *
83      * @return The parsed CMap file.
84      */

85     public CMap getResult()
86     {
87         return result;
88     }
89
90     /**
91      * This will parse the stream and create a cmap object.
92      *
93      * @throws IOException If there is an error parsing the stream.
94      */

95     public void parse() throws IOException JavaDoc
96     {
97         result = new CMap();
98         PDFStreamParser parser = new PDFStreamParser( input, file );
99         parser.parse();
100         List JavaDoc tokens = parser.getTokens();
101         for( int i=0; i<tokens.size(); i++ )
102         {
103             Object JavaDoc token = tokens.get( i );
104             if( token instanceof PDFOperator )
105             {
106                 PDFOperator op = (PDFOperator)token;
107                 if( op.getOperation().equals( BEGIN_CODESPACE_RANGE ) )
108                 {
109                     COSNumber cosCount = (COSNumber)tokens.get( i-1 );
110                     for( int j=0; j<cosCount.intValue(); j++ )
111                     {
112                         i++;
113                         COSString startRange = (COSString)tokens.get( i );
114                         i++;
115                         COSString endRange = (COSString)tokens.get( i );
116                         CodespaceRange range = new CodespaceRange();
117                         range.setStart( startRange.getBytes() );
118                         range.setEnd( endRange.getBytes() );
119                         result.addCodespaceRange( range );
120                     }
121                 }
122                 else if( op.getOperation().equals( BEGIN_BASE_FONT_CHAR ) )
123                 {
124                     COSNumber cosCount = (COSNumber)tokens.get( i-1 );
125                     for( int j=0; j<cosCount.intValue(); j++ )
126                     {
127                         i++;
128                         COSString inputCode = (COSString)tokens.get( i );
129                         i++;
130                         Object JavaDoc nextToken = tokens.get( i );
131                         if( nextToken instanceof COSString )
132                         {
133                             byte[] bytes = ((COSString)nextToken).getBytes();
134                             String JavaDoc value = createStringFromBytes( bytes );
135                             result.addMapping( inputCode.getBytes(), value );
136                         }
137                         else if( nextToken instanceof COSName )
138                         {
139                             result.addMapping( inputCode.getBytes(), ((COSName)nextToken).getName() );
140                         }
141                         else
142                         {
143                             throw new IOException JavaDoc( "Error parsing CMap beginbfchar, expected{COSString " +
144                                                    "or COSName} and not " + nextToken );
145                         }
146                     }
147                 }
148                else if( op.getOperation().equals( BEGIN_BASE_FONT_RANGE ) )
149                 {
150                     COSNumber cosCount = (COSNumber)tokens.get( i-1 );
151                     
152                     for( int j=0; j<cosCount.intValue(); j++ )
153                     {
154                         i++;
155                         COSString startCode = (COSString)tokens.get( i );
156                         i++;
157                         COSString endCode = (COSString)tokens.get( i );
158                         i++;
159                         Object JavaDoc nextToken = tokens.get( i );
160                         COSArray array = null;
161                         if( nextToken instanceof COSArray )
162                         {
163                             array = (COSArray)nextToken;
164                         }
165
166                         byte[] startBytes = startCode.getBytes();
167                         byte[] endBytes = endCode.getBytes();
168                         byte[] tokenBytes = null;
169                         if( array == null )
170                         {
171                             tokenBytes = ((COSString)nextToken).getBytes();
172                         }
173                         else
174                         {
175                             tokenBytes = ((COSString)array.getObject( 0 )).getBytes();
176                         }
177
178                         String JavaDoc value = null;
179                         
180                         int arrayIndex = 0;
181                         boolean done = false;
182                         while( !done )
183                         {
184                             if( compare( startBytes, endBytes ) >= 0 )
185                             {
186                                 done = true;
187                             }
188                             value = createStringFromBytes( tokenBytes );
189                             result.addMapping( startBytes, value );
190                             increment( startBytes );
191                             
192                             if( array == null )
193                             {
194                                 increment( tokenBytes );
195                             }
196                             else
197                             {
198                                 if( arrayIndex < array.size() )
199                                 {
200                                     tokenBytes = ((COSString)array.getObject( arrayIndex++ )).getBytes();
201                                 }
202                             }
203                         }
204                     }
205                 }
206             }
207         }
208     }
209
210     private void increment( byte[] data )
211     {
212         increment( data, data.length-1 );
213     }
214
215     private void increment( byte[] data, int position )
216     {
217         if( position > 0 && (data[position]+256)%256 == 255 )
218         {
219             data[position]=0;
220             increment( data, position-1);
221         }
222         else
223         {
224             data[position] = (byte)(data[position]+1);
225         }
226     }
227     
228     private String JavaDoc createStringFromBytes( byte[] bytes ) throws IOException JavaDoc
229     {
230         String JavaDoc retval = null;
231         if( bytes.length == 1 )
232         {
233             retval = new String JavaDoc( bytes );
234         }
235         else
236         {
237             retval = new String JavaDoc( bytes, "UTF-16BE" );
238         }
239         return retval;
240     }
241
242     private int compare( byte[] first, byte[] second )
243     {
244         int retval = 1;
245         boolean done = false;
246         for( int i=0; i<first.length && !done; i++ )
247         {
248             if( first[i] == second[i] )
249             {
250                 //move to next position
251
}
252             else if( ((first[i]+256)%256) < ((second[i]+256)%256) )
253             {
254                 done = true;
255                 retval = -1;
256             }
257             else
258             {
259                 done = true;
260                 retval = 1;
261             }
262         }
263         return retval;
264     }
265     
266     /**
267      * A simple class to test parsing of cmap files.
268      *
269      * @param args Some command line arguments.
270      *
271      * @throws Exception If there is an error parsing the file.
272      */

273     public static void main( String JavaDoc[] args ) throws Exception JavaDoc
274     {
275         if( args.length != 1 )
276         {
277             System.err.println( "usage: java org.pdfbox.cmapparser.CMapParser <CMAP File>" );
278             System.exit( -1 );
279         }
280         CMapParser parser = new CMapParser( new FileInputStream JavaDoc( args[0] ), null );
281         parser.parse();
282         CMap result = parser.getResult();
283         System.out.println( "Result:" + result );
284     }
285 }
Popular Tags