KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > test > pdfbox > util > TestTextStripperPerformance


1 /**
2  * Copyright (c) 2003-2004, www.pdfbox.org
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright notice,
11  * this list of conditions and the following disclaimer in the documentation
12  * and/or other materials provided with the distribution.
13  * 3. Neither the name of pdfbox; nor the names of its
14  * contributors may be used to endorse or promote products derived from this
15  * software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
24  * ANY THEORY OF LIABILIT, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * http://www.pdfbox.org
29  */

30 package test.pdfbox.util;
31
32 import java.io.File JavaDoc;
33 import java.io.FilenameFilter JavaDoc;
34 import java.io.FileOutputStream JavaDoc;
35 import java.io.OutputStream JavaDoc;
36 import java.io.OutputStreamWriter JavaDoc;
37 import java.io.Writer JavaDoc;
38
39 import junit.framework.Test;
40 import junit.framework.TestCase;
41 import junit.framework.TestSuite;
42
43 import org.pdfbox.pdmodel.PDDocument;
44
45 import org.pdfbox.util.PDFTextStripper;
46
47 /**
48  * Test the performance of the PDF text stripper utility.
49  *
50  * @author <a HREF="mailto:ben@benlitchfield.com">Ben Litchfield</a>
51  * @version $Revision: 1.4 $
52  */

53 public class TestTextStripperPerformance extends TestCase
54 {
55     /**
56      * Test class constructor.
57      *
58      * @param name The name of the test class.
59      */

60     public TestTextStripperPerformance( String JavaDoc name )
61     {
62         super( name );
63     }
64
65     /**
66      * Test suite setup.
67      */

68     public void setUp()
69     {
70     }
71
72
73     /**
74      * Validate text extraction on a single file.
75      *
76      * @param file The file to validate
77      * @param bLogResult Whether to log the extracted text
78      * @throws Exception when there is an exception
79      */

80     public void doTestFile(File JavaDoc file, boolean bLogResult)
81         throws Exception JavaDoc
82     {
83
84         PDFTextStripper stripper = new PDFTextStripper();
85         OutputStream JavaDoc os = null;
86         Writer JavaDoc writer = null;
87         PDDocument document = null;
88         try
89         {
90             document = PDDocument.load(file);
91
92             File JavaDoc outFile = new File JavaDoc(file.getParentFile().getParentFile(), "output/" + file.getName() + ".txt");
93             os = new FileOutputStream JavaDoc(outFile);
94             writer = new OutputStreamWriter JavaDoc(os);
95
96             stripper.writeText(document, writer);
97         }
98         finally
99         {
100             if( writer != null )
101             {
102                 writer.close();
103             }
104             if( os != null )
105             {
106                 os.close();
107             }
108             if( document != null )
109             {
110                 document.close();
111             }
112         }
113     }
114
115     /**
116      * Test to validate text extraction of file set.
117      *
118      * @throws Exception when there is an exception
119      */

120     public void testExtract()
121         throws Exception JavaDoc
122     {
123         String JavaDoc filename = System.getProperty("test.pdfbox.util.TextStripper.file");
124         File JavaDoc testDir = new File JavaDoc("test/input");
125
126         if ((filename == null) || (filename.length() == 0))
127         {
128             File JavaDoc[] testFiles = testDir.listFiles(new FilenameFilter JavaDoc()
129             {
130                 public boolean accept(File JavaDoc dir, String JavaDoc name)
131                 {
132                     return (name.endsWith(".pdf"));
133                 }
134             });
135
136             for (int n = 0; n < testFiles.length; n++)
137             {
138                 doTestFile(testFiles[n], false);
139             }
140         }
141         else
142         {
143             //doTestFile(new File(testDir, filename), true);
144
}
145     }
146
147     /**
148      * Set the tests in the suite for this test class.
149      *
150      * @return the Suite.
151      */

152     public static Test suite()
153     {
154         return new TestSuite( TestTextStripperPerformance.class );
155     }
156     
157     /**
158      * Command line execution.
159      *
160      * @param args Command line arguments.
161      */

162     public static void main( String JavaDoc[] args )
163     {
164         String JavaDoc[] arg = {TestTextStripperPerformance.class.getName() };
165         junit.textui.TestRunner.main( arg );
166     }
167 }
Popular Tags