KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > opencms > search > documents > CmsDocumentXmlContent


1 /*
2  * File : $Source: /usr/local/cvs/opencms/src/org/opencms/search/documents/CmsDocumentXmlContent.java,v $
3  * Date : $Date: 2005/06/27 23:22:25 $
4  * Version: $Revision: 1.8 $
5  *
6  * This library is part of OpenCms -
7  * the Open Source Content Mananagement System
8  *
9  * Copyright (c) 2005 Alkacon Software GmbH (http://www.alkacon.com)
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * For further information about Alkacon Software GmbH, please see the
22  * company website: http://www.alkacon.com
23  *
24  * For further information about OpenCms, please see the
25  * project website: http://www.opencms.org
26  *
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with this library; if not, write to the Free Software
29  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30  */

31
32 package org.opencms.search.documents;
33
34 import org.opencms.file.CmsFile;
35 import org.opencms.file.CmsObject;
36 import org.opencms.file.CmsResource;
37 import org.opencms.file.types.CmsResourceTypeXmlContent;
38 import org.opencms.file.types.I_CmsResourceType;
39 import org.opencms.i18n.CmsLocaleManager;
40 import org.opencms.main.CmsException;
41 import org.opencms.main.OpenCms;
42 import org.opencms.search.A_CmsIndexResource;
43 import org.opencms.search.CmsIndexException;
44 import org.opencms.search.extractors.CmsExtractionResult;
45 import org.opencms.search.extractors.I_CmsExtractionResult;
46 import org.opencms.xml.A_CmsXmlDocument;
47 import org.opencms.xml.content.CmsXmlContentFactory;
48 import org.opencms.xml.types.I_CmsXmlContentValue;
49
50 import java.util.ArrayList JavaDoc;
51 import java.util.Iterator JavaDoc;
52 import java.util.List JavaDoc;
53 import java.util.Locale JavaDoc;
54
55 /**
56  * Lucene document factory class to extract index data from a cms resource
57  * of type <code>CmsResourceTypeXmlContent</code>.<p>
58  *
59  * @author Carsten Weinholz
60  *
61  * @version $Revision: 1.8 $
62  *
63  * @since 6.0.0
64  */

65 public class CmsDocumentXmlContent extends A_CmsVfsDocument {
66
67     /**
68      * Creates a new instance of this lucene document factory.<p>
69      *
70      * @param name name of the documenttype
71      */

72     public CmsDocumentXmlContent(String JavaDoc name) {
73
74         super(name);
75     }
76
77     /**
78      * Returns the raw text content of a given vfs resource of type <code>CmsResourceTypeXmlContent</code>.<p>
79      *
80      * @see org.opencms.search.documents.A_CmsVfsDocument#extractContent(org.opencms.file.CmsObject, org.opencms.search.A_CmsIndexResource, java.lang.String)
81      */

82     public I_CmsExtractionResult extractContent(CmsObject cms, A_CmsIndexResource indexResource, String JavaDoc language)
83     throws CmsException {
84
85         CmsResource resource = (CmsResource)indexResource.getData();
86         String JavaDoc result = null;
87
88         try {
89             CmsFile file = CmsFile.upgrade(resource, cms);
90             String JavaDoc absolutePath = cms.getSitePath(file);
91             A_CmsXmlDocument xmlContent = CmsXmlContentFactory.unmarshal(cms, file);
92
93             List JavaDoc locales = xmlContent.getLocales();
94             if (locales.size() == 0) {
95                 locales = OpenCms.getLocaleManager().getDefaultLocales(cms, absolutePath);
96             }
97             Locale JavaDoc locale = OpenCms.getLocaleManager().getBestMatchingLocale(
98                 CmsLocaleManager.getLocale(language),
99                 OpenCms.getLocaleManager().getDefaultLocales(cms, absolutePath),
100                 locales);
101
102             List JavaDoc elements = xmlContent.getNames(locale);
103             StringBuffer JavaDoc content = new StringBuffer JavaDoc();
104             for (Iterator JavaDoc i = elements.iterator(); i.hasNext();) {
105                 I_CmsXmlContentValue value = xmlContent.getValue((String JavaDoc)i.next(), locale);
106                 String JavaDoc plainText = value.getPlainText(cms);
107                 if (plainText != null) {
108                     content.append(plainText);
109                     content.append('\n');
110                 }
111             }
112
113             result = content.toString();
114             // CmsHtmlExtractor extractor = new CmsHtmlExtractor();
115
//rawContent = extractor.extractText(content.toString(), page.getEncoding());
116

117         } catch (Exception JavaDoc e) {
118             throw new CmsIndexException(
119                 Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()),
120                 e);
121         }
122
123         return new CmsExtractionResult(result);
124     }
125
126     /**
127      * @see org.opencms.search.documents.I_CmsDocumentFactory#getDocumentKeys(java.util.List, java.util.List)
128      */

129     public List JavaDoc getDocumentKeys(List JavaDoc resourceTypes, List JavaDoc mimeTypes) throws CmsException {
130
131         if (resourceTypes.contains("*")) {
132             ArrayList JavaDoc allTypes = new ArrayList JavaDoc();
133             for (Iterator JavaDoc i = OpenCms.getResourceManager().getResourceTypes().iterator(); i.hasNext();) {
134                 I_CmsResourceType resourceType = (I_CmsResourceType)i.next();
135                 if (resourceType instanceof CmsResourceTypeXmlContent
136                     && ((CmsResourceTypeXmlContent)resourceType).getConfiguration().containsKey(
137                         CmsResourceTypeXmlContent.CONFIGURATION_SCHEMA)) {
138                     allTypes.add(resourceType.getTypeName());
139                 }
140             }
141             resourceTypes = allTypes;
142         }
143
144         return super.getDocumentKeys(resourceTypes, mimeTypes);
145     }
146 }
Popular Tags