KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > opencms > search > TestCmsSearchInDocuments


1 /*
2  * File : $Source: /usr/local/cvs/opencms/test/org/opencms/search/TestCmsSearchInDocuments.java,v $
3  * Date : $Date: 2006/03/27 14:52:51 $
4  * Version: $Revision: 1.11 $
5  *
6  * This library is part of OpenCms -
7  * the Open Source Content Mananagement System
8  *
9  * Copyright (c) 2005 Alkacon Software GmbH (http://www.alkacon.com)
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * For further information about Alkacon Software GmbH, please see the
22  * company website: http://www.alkacon.com
23  *
24  * For further information about OpenCms, please see the
25  * project website: http://www.opencms.org
26  *
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with this library; if not, write to the Free Software
29  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30  */

31
32 package org.opencms.search;
33
34 import org.opencms.file.CmsObject;
35 import org.opencms.file.CmsProperty;
36 import org.opencms.file.CmsPropertyDefinition;
37 import org.opencms.file.CmsResource;
38 import org.opencms.file.types.CmsResourceTypeBinary;
39 import org.opencms.file.types.CmsResourceTypeFolder;
40 import org.opencms.file.types.CmsResourceTypePlain;
41 import org.opencms.i18n.CmsEncoder;
42 import org.opencms.main.OpenCms;
43 import org.opencms.report.CmsShellReport;
44 import org.opencms.search.documents.I_CmsDocumentFactory;
45 import org.opencms.test.OpenCmsTestCase;
46 import org.opencms.test.OpenCmsTestProperties;
47
48 import java.util.ArrayList JavaDoc;
49 import java.util.Collections JavaDoc;
50 import java.util.Iterator JavaDoc;
51 import java.util.List JavaDoc;
52
53 import junit.extensions.TestSetup;
54 import junit.framework.Test;
55 import junit.framework.TestSuite;
56
57 /**
58  * Unit test for searching in extracted document text.<p>
59  *
60  * @author Alexander Kandzior
61  * @version $Revision: 1.11 $
62  */

63 public class TestCmsSearchInDocuments extends OpenCmsTestCase {
64
65     /** Name of the index used for testing. */
66     public static final String JavaDoc INDEX_OFFLINE = "Offline project (VFS)";
67
68     /** The index used for testing. */
69     public static final String JavaDoc INDEX_ONLINE = "Online project (VFS)";
70
71     /**
72      * Default JUnit constructor.<p>
73      *
74      * @param arg0 JUnit parameters
75      */

76     public TestCmsSearchInDocuments(String JavaDoc arg0) {
77
78         super(arg0);
79     }
80
81     /**
82      * Test suite for this test class.<p>
83      *
84      * @return the test suite
85      */

86     public static Test suite() {
87
88         OpenCmsTestProperties.initialize(org.opencms.test.AllTests.TEST_PROPERTIES_PATH);
89
90         TestSuite suite = new TestSuite();
91         suite.setName(TestCmsSearchInDocuments.class.getName());
92
93         suite.addTest(new TestCmsSearchInDocuments("testSearchIndexGeneration"));
94         suite.addTest(new TestCmsSearchInDocuments("testSearchInDocuments"));
95         suite.addTest(new TestCmsSearchInDocuments("testSearchBoost"));
96         suite.addTest(new TestCmsSearchInDocuments("testSearchBoostInMeta"));
97
98         TestSetup wrapper = new TestSetup(suite) {
99
100             protected void setUp() {
101
102                 setupOpenCms("simpletest", "/sites/default/");
103             }
104
105             protected void tearDown() {
106
107                 removeOpenCms();
108             }
109         };
110
111         return wrapper;
112     }
113
114     /**
115      * Tests search boosting.<p>
116      *
117      * @throws Exception if the test fails
118      */

119     public void testSearchBoost() throws Exception JavaDoc {
120
121         CmsObject cms = getCmsObject();
122         echo("Testing search boosting");
123
124         // perform a search on the newly generated index
125
CmsSearch searchBean = new CmsSearch();
126         List JavaDoc searchResult;
127
128         // count depend on the number of documents indexed
129
int expected = 6;
130
131         searchBean.init(cms);
132         searchBean.setIndex(INDEX_OFFLINE);
133         searchBean.setSearchRoot("/search/");
134
135         searchBean.setQuery("Alkacon Software");
136         searchResult = searchBean.getSearchResult();
137
138         Iterator JavaDoc i = searchResult.iterator();
139         while (i.hasNext()) {
140             CmsSearchResult res = (CmsSearchResult)i.next();
141             System.out.print(res.getPath() + " ");
142             System.out.println(res.getScore());
143         }
144         assertEquals(expected, searchResult.size());
145
146         CmsSearchResult res1 = (CmsSearchResult)searchResult.get(searchResult.size() - 1);
147         CmsSearchResult res2 = (CmsSearchResult)searchResult.get(searchResult.size() - 2);
148         CmsSearchResult res3 = (CmsSearchResult)searchResult.get(0);
149
150         String JavaDoc path1 = cms.getRequestContext().removeSiteRoot(res1.getPath());
151         String JavaDoc path2 = cms.getRequestContext().removeSiteRoot(res2.getPath());
152         String JavaDoc path3 = cms.getRequestContext().removeSiteRoot(res3.getPath());
153
154         CmsProperty maxBoost = new CmsProperty(
155             CmsPropertyDefinition.PROPERTY_SEARCH_PRIORITY,
156             I_CmsDocumentFactory.SEARCH_PRIORITY_MAX_VALUE,
157             null,
158             true);
159         CmsProperty highBoost = new CmsProperty(
160             CmsPropertyDefinition.PROPERTY_SEARCH_PRIORITY,
161             I_CmsDocumentFactory.SEARCH_PRIORITY_HIGH_VALUE,
162             null,
163             true);
164         CmsProperty lowBoost = new CmsProperty(
165             CmsPropertyDefinition.PROPERTY_SEARCH_PRIORITY,
166             I_CmsDocumentFactory.SEARCH_PRIORITY_LOW_VALUE,
167             null,
168             true);
169
170         cms.lockResource(path1);
171         cms.writePropertyObject(path1, maxBoost);
172         cms.unlockResource(path1);
173         cms.lockResource(path2);
174         cms.writePropertyObject(path2, highBoost);
175         cms.unlockResource(path2);
176         cms.lockResource(path3);
177         cms.writePropertyObject(path3, lowBoost);
178         cms.unlockResource(path3);
179
180         // update the search indexes
181
OpenCms.getSearchManager().rebuildAllIndexes(new CmsShellReport(cms.getRequestContext().getLocale()));
182
183         // perform the same search again in the online index - must be same result as before
184
searchBean.setIndex(INDEX_ONLINE);
185         searchBean.setQuery("Alkacon Software");
186         searchResult = searchBean.getSearchResult();
187
188         i = searchResult.iterator();
189         while (i.hasNext()) {
190             CmsSearchResult res = (CmsSearchResult)i.next();
191             System.out.print(res.getPath() + " ");
192             System.out.println(res.getScore() + " ");
193             System.out.println(res.getExcerpt());
194         }
195         assertEquals(expected, searchResult.size());
196
197         assertEquals(res1.getPath(), ((CmsSearchResult)searchResult.get(searchResult.size() - 1)).getPath());
198         assertEquals(res2.getPath(), ((CmsSearchResult)searchResult.get(searchResult.size() - 2)).getPath());
199         assertEquals(res3.getPath(), ((CmsSearchResult)searchResult.get(0)).getPath());
200
201         // now the search in the offline index - the boosted docs should now be on top
202
searchBean.setIndex(INDEX_OFFLINE);
203         searchBean.setQuery("Alkacon Software");
204         searchResult = searchBean.getSearchResult();
205
206         i = searchResult.iterator();
207         while (i.hasNext()) {
208             CmsSearchResult res = (CmsSearchResult)i.next();
209             System.out.print(res.getPath() + " ");
210             System.out.println(res.getScore() + " ");
211             System.out.println(res.getExcerpt());
212         }
213         assertEquals(expected, searchResult.size());
214
215         // ensure boosted results are on top
216
assertEquals(res1.getPath(), ((CmsSearchResult)searchResult.get(0)).getPath());
217         assertEquals(res2.getPath(), ((CmsSearchResult)searchResult.get(1)).getPath());
218         // low boosted document should be on last position
219
assertEquals(res3.getPath(), ((CmsSearchResult)searchResult.get(searchResult.size() - 1)).getPath());
220     }
221
222     /**
223      * Tests search boosting when seachrching in meta information only.<p>
224      *
225      * @throws Exception if the test fails
226      */

227     public void testSearchBoostInMeta() throws Exception JavaDoc {
228
229         CmsObject cms = getCmsObject();
230         echo("Testing search boosting in meta information");
231
232         // perform a search on the newly generated index
233
CmsSearch searchBean = new CmsSearch();
234         List JavaDoc searchResult;
235
236         // count depend on the number of documents indexed
237
int expected = 6;
238
239         String JavaDoc path = "/search/";
240         String JavaDoc query = "OpenCms by Alkacon";
241
242         searchBean.init(cms);
243         searchBean.setIndex(INDEX_OFFLINE);
244         searchBean.setSearchRoot(path);
245
246         searchBean.setQuery(query);
247         // ensure only meta information is searched
248
searchBean.setField(new String JavaDoc[] {I_CmsDocumentFactory.DOC_META});
249         searchResult = searchBean.getSearchResult();
250         // since no resource has any description, no results should be found
251
Iterator JavaDoc i = searchResult.iterator();
252         while (i.hasNext()) {
253             CmsSearchResult res = (CmsSearchResult)i.next();
254             System.out.print(res.getPath() + " ");
255             System.out.println(res.getScore());
256         }
257         assertEquals(0, searchResult.size());
258
259         CmsProperty descripion = new CmsProperty(CmsPropertyDefinition.PROPERTY_DESCRIPTION, query, null, true);
260         CmsProperty delete = new CmsProperty(
261             CmsPropertyDefinition.PROPERTY_SEARCH_PRIORITY,
262             CmsProperty.DELETE_VALUE,
263             CmsProperty.DELETE_VALUE);
264
265         List JavaDoc resources = cms.getFilesInFolder(path);
266
267         i = resources.iterator();
268         while (i.hasNext()) {
269             CmsResource res = (CmsResource)i.next();
270             String JavaDoc sitePath = cms.getSitePath(res);
271             System.out.println(sitePath);
272             cms.lockResource(sitePath);
273             cms.writePropertyObject(sitePath, descripion);
274             // delete potential "search.priority" setting from earlier tests
275
cms.writePropertyObject(sitePath, delete);
276             cms.unlockResource(sitePath);
277         }
278         assertEquals(expected, resources.size());
279
280         // update the search indexes
281
OpenCms.getSearchManager().rebuildAllIndexes(new CmsShellReport(cms.getRequestContext().getLocale()));
282
283         // perform the same search again in the online index - must be same result as before
284
searchBean.setIndex(INDEX_ONLINE);
285         searchBean.setQuery(query);
286         searchResult = searchBean.getSearchResult();
287         assertEquals(0, searchResult.size());
288
289         // now the search in the offline index - documents should now be found
290
searchBean.setIndex(INDEX_OFFLINE);
291         searchBean.setQuery(query);
292         List JavaDoc firstSearchResult = searchBean.getSearchResult();
293
294         i = firstSearchResult.iterator();
295         while (i.hasNext()) {
296             CmsSearchResult res = (CmsSearchResult)i.next();
297             System.out.print(res.getPath() + " ");
298             System.out.println(res.getScore());
299         }
300         assertEquals(expected, firstSearchResult.size());
301
302         CmsSearchResult res1 = (CmsSearchResult)firstSearchResult.get(firstSearchResult.size() - 1);
303         CmsSearchResult res2 = (CmsSearchResult)firstSearchResult.get(firstSearchResult.size() - 2);
304         CmsSearchResult res3 = (CmsSearchResult)firstSearchResult.get(0);
305
306         String JavaDoc path1 = cms.getRequestContext().removeSiteRoot(res1.getPath());
307         String JavaDoc path2 = cms.getRequestContext().removeSiteRoot(res2.getPath());
308         String JavaDoc path3 = cms.getRequestContext().removeSiteRoot(res3.getPath());
309
310         CmsProperty maxBoost = new CmsProperty(
311             CmsPropertyDefinition.PROPERTY_SEARCH_PRIORITY,
312             I_CmsDocumentFactory.SEARCH_PRIORITY_MAX_VALUE,
313             null,
314             true);
315         CmsProperty highBoost = new CmsProperty(
316             CmsPropertyDefinition.PROPERTY_SEARCH_PRIORITY,
317             I_CmsDocumentFactory.SEARCH_PRIORITY_HIGH_VALUE,
318             null,
319             true);
320         CmsProperty lowBoost = new CmsProperty(
321             CmsPropertyDefinition.PROPERTY_SEARCH_PRIORITY,
322             I_CmsDocumentFactory.SEARCH_PRIORITY_LOW_VALUE,
323             null,
324             true);
325
326         cms.lockResource(path1);
327         cms.writePropertyObject(path1, maxBoost);
328         cms.unlockResource(path1);
329         cms.lockResource(path2);
330         cms.writePropertyObject(path2, highBoost);
331         cms.unlockResource(path2);
332         cms.lockResource(path3);
333         cms.writePropertyObject(path3, lowBoost);
334         cms.unlockResource(path3);
335
336         // update the search indexes
337
OpenCms.getSearchManager().rebuildAllIndexes(new CmsShellReport(cms.getRequestContext().getLocale()));
338
339         // perform the same search again in the online index - must be same result as before
340
searchBean.setIndex(INDEX_ONLINE);
341         searchBean.setQuery(query);
342         searchResult = searchBean.getSearchResult();
343         assertEquals(0, searchResult.size());
344
345         // just output the first seach result again, just for convenient comparison on the console
346
i = firstSearchResult.iterator();
347         while (i.hasNext()) {
348             CmsSearchResult res = (CmsSearchResult)i.next();
349             System.out.print(res.getPath() + " ");
350             System.out.println(res.getScore() + " ");
351             System.out.println(res.getExcerpt());
352         }
353
354         // now the search in the offline index - the boosted docs should now be on top
355
searchBean.setIndex(INDEX_OFFLINE);
356         searchBean.setQuery(query);
357         searchResult = searchBean.getSearchResult();
358
359         i = searchResult.iterator();
360         while (i.hasNext()) {
361             CmsSearchResult res = (CmsSearchResult)i.next();
362             System.out.print(res.getPath() + " ");
363             System.out.println(res.getScore() + " ");
364             System.out.println(res.getExcerpt());
365         }
366         assertEquals(expected, searchResult.size());
367
368         // ensure boosted results are on top
369
assertEquals(res1.getPath(), ((CmsSearchResult)searchResult.get(0)).getPath());
370         assertEquals(res2.getPath(), ((CmsSearchResult)searchResult.get(1)).getPath());
371         // low boosted document should be on last position
372
assertEquals(res3.getPath(), ((CmsSearchResult)searchResult.get(searchResult.size() - 1)).getPath());
373     }
374
375     /**
376      * Imports the documents for the test cases in the VFS an generates the index.<p>
377      *
378      * Please note: This method need to be called first in this test suite, the
379      * other methods depend on the index generated here.<p>
380      *
381      * @throws Exception if the test fails
382      */

383     public void testSearchIndexGeneration() throws Exception JavaDoc {
384
385         CmsObject cms = getCmsObject();
386         echo("Testing search index generation with different resource types");
387
388         // create test folder
389
cms.createResource("/search/", CmsResourceTypeFolder.RESOURCE_TYPE_ID, null, null);
390         cms.unlockResource("/search/");
391
392         // import the sample documents to the VFS
393
importTestResource(
394             cms,
395             "org/opencms/search/extractors/test1.pdf",
396             "/search/test1.pdf",
397             CmsResourceTypeBinary.getStaticTypeId(),
398             Collections.EMPTY_LIST);
399         importTestResource(
400             cms,
401             "org/opencms/search/extractors/test1.doc",
402             "/search/test1.doc",
403             CmsResourceTypeBinary.getStaticTypeId(),
404             Collections.EMPTY_LIST);
405         importTestResource(
406             cms,
407             "org/opencms/search/extractors/test1.rtf",
408             "/search/test1.rtf",
409             CmsResourceTypeBinary.getStaticTypeId(),
410             Collections.EMPTY_LIST);
411         importTestResource(
412             cms,
413             "org/opencms/search/extractors/test1.xls",
414             "/search/test1.xls",
415             CmsResourceTypeBinary.getStaticTypeId(),
416             Collections.EMPTY_LIST);
417         importTestResource(
418             cms,
419             "org/opencms/search/extractors/test1.ppt",
420             "/search/test1.ppt",
421             CmsResourceTypeBinary.getStaticTypeId(),
422             Collections.EMPTY_LIST);
423
424         // HTML page is encoded using UTF-8
425
List JavaDoc properties = new ArrayList JavaDoc();
426         properties.add(new CmsProperty(
427             CmsPropertyDefinition.PROPERTY_CONTENT_ENCODING,
428             CmsEncoder.ENCODING_UTF_8,
429             null,
430             true));
431         importTestResource(
432             cms,
433             "org/opencms/search/extractors/test1.html",
434             "/search/test1.html",
435             CmsResourceTypePlain.getStaticTypeId(),
436             properties);
437
438         assertTrue(cms.existsResource("/search/test1.pdf"));
439         assertTrue(cms.existsResource("/search/test1.html"));
440         assertTrue(cms.existsResource("/search/test1.doc"));
441         assertTrue(cms.existsResource("/search/test1.rtf"));
442         assertTrue(cms.existsResource("/search/test1.xls"));
443         assertTrue(cms.existsResource("/search/test1.ppt"));
444
445         // publish the project
446
cms.publishProject(new CmsShellReport(cms.getRequestContext().getLocale()));
447
448         // update the search indexes
449
OpenCms.getSearchManager().rebuildAllIndexes(new CmsShellReport(cms.getRequestContext().getLocale()));
450
451         // check the online project
452
cms.getRequestContext().setCurrentProject(cms.readProject("Online"));
453
454         assertTrue(cms.existsResource("/search/test1.pdf"));
455         assertTrue(cms.existsResource("/search/test1.html"));
456         assertTrue(cms.existsResource("/search/test1.doc"));
457         assertTrue(cms.existsResource("/search/test1.rtf"));
458         assertTrue(cms.existsResource("/search/test1.xls"));
459         assertTrue(cms.existsResource("/search/test1.ppt"));
460     }
461
462     /**
463      * Tests searching in the VFS for specific Strings that are placed in
464      * various document formats.<p>
465      *
466      * @throws Exception if the test fails
467      */

468     public void testSearchInDocuments() throws Exception JavaDoc {
469
470         CmsObject cms = getCmsObject();
471         echo("Testing searching in different (complex) document types");
472
473         // perform a search on the newly generated index
474
CmsSearch searchBean = new CmsSearch();
475         List JavaDoc searchResult;
476
477         // count depend on the number of documents indexed
478
int expected = 6;
479
480         searchBean.init(cms);
481         searchBean.setIndex(INDEX_ONLINE);
482         searchBean.setSearchRoot("/search/");
483
484         searchBean.setQuery("Alkacon Software");
485         searchResult = searchBean.getSearchResult();
486         assertEquals(expected, searchResult.size());
487
488         searchBean.setQuery("The OpenCms experts");
489         searchResult = searchBean.getSearchResult();
490         assertEquals(expected, searchResult.size());
491
492         searchBean.setQuery("Some content here.");
493         searchResult = searchBean.getSearchResult();
494         assertEquals(expected, searchResult.size());
495
496         searchBean.setQuery("Some content there.");
497         searchResult = searchBean.getSearchResult();
498         assertEquals(expected, searchResult.size());
499
500         searchBean.setQuery("Some content on a second sheet.");
501         searchResult = searchBean.getSearchResult();
502         assertEquals(expected, searchResult.size());
503
504         searchBean.setQuery("Some content on the third sheet.");
505         searchResult = searchBean.getSearchResult();
506         assertEquals(expected, searchResult.size());
507
508         searchBean.setQuery("äöüÄÖÜß");
509         searchResult = searchBean.getSearchResult();
510         assertEquals(expected, searchResult.size());
511     }
512 }
Popular Tags