KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > opencms > legacy > CmsCosDocument


1 /*
2  * File : $Source: /usr/local/cvs/opencms/src-modules/com/opencms/legacy/CmsCosDocument.java,v $
3  * Date : $Date: 2005/07/29 12:13:00 $
4  * Version: $Revision: 1.10 $
5  *
6  * This library is part of OpenCms -
7  * the Open Source Content Mananagement System
8  *
9  * Copyright (C) 2002 Alkacon Software (http://www.alkacon.com)
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * For further information about Alkacon Software, please see the
22  * company website: http://www.alkacon.com
23  *
24  * For further information about OpenCms, please see the
25  * project website: http://www.opencms.org
26  *
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with this library; if not, write to the Free Software
29  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30  */

31
32 package com.opencms.legacy;
33
34 import org.opencms.file.CmsObject;
35 import org.opencms.main.CmsException;
36 import org.opencms.main.OpenCms;
37 import org.opencms.search.A_CmsIndexResource;
38 import org.opencms.search.documents.I_CmsDocumentFactory;
39 import org.opencms.search.extractors.CmsExtractionResult;
40 import org.opencms.search.extractors.I_CmsExtractionResult;
41 import org.opencms.util.CmsHtmlExtractor;
42 import org.opencms.util.CmsStringUtil;
43
44 import com.opencms.defaults.master.*;
45
46 import java.util.ArrayList JavaDoc;
47 import java.util.Date JavaDoc;
48 import java.util.Iterator JavaDoc;
49 import java.util.List JavaDoc;
50 import java.util.regex.Pattern JavaDoc;
51
52 import org.apache.lucene.document.Document;
53 import org.apache.lucene.document.Field;
54
55 /**
56  * Lucene document factory class to extract index data from a cos resource
57  * of any type derived from <code>CmsMasterDataSet</code>.<p>
58  *
59  * @version $Revision: 1.10 $ $Date: 2005/07/29 12:13:00 $
60  * @author Carsten Weinholz (c.weinholz@alkacon.com)
61  * @author Thomas Weckert (t.weckert@alkacon.com)
62  *
63  * @deprecated Will not be supported past the OpenCms 6 release.
64  */

65 public class CmsCosDocument implements I_CmsDocumentFactory {
66
67     /** The cos prefix for document keys. */
68     public static final String JavaDoc C_DOCUMENT_KEY_PREFIX = "COS";
69
70     /* Matches anything that is not a number, hex-number, uuid or whitespace */
71     private static final Pattern JavaDoc C_NON_NUM_UUID_WS = Pattern.compile("[^a-fA-F0-9\\-_\\s]");
72
73     /** The cms object. */
74     protected CmsObject m_cms;
75
76     /** Name of the document type. */
77     protected String JavaDoc m_name;
78
79     /** Channel of cos document. */
80     public static final String JavaDoc DOC_CHANNEL = "channel";
81
82     /** Content id of cos document. */
83     public static final String JavaDoc DOC_CONTENT_ID = "contentid";
84
85     /** Content definition of cos document. */
86     public static final String JavaDoc DOC_CONTENT_DEFINITION = "contentdefinition";
87
88     /**
89      * Creates a new instance of this lucene document factory.<p>
90      *
91      * @param cms the cms object
92      * @param name name of the documenttype
93      */

94     public CmsCosDocument(CmsObject cms, String JavaDoc name) {
95
96         m_cms = cms;
97         m_name = name;
98     }
99
100     /**
101      * Returns the raw text content of a given cos resource.<p>
102      * The contents of a cos object are accessed using the class <code>CmsMasterDataSet</code>.
103      * For indexing purposes, the contents of the arrays <code>m_dataSmall</code>, <code>m_dataMedium</code>
104      * and <code>m_dataBig</code> are collected in a string.
105      *
106      * @param cms the cms object
107      * @param indexResource the resource
108      * @param language the language requested
109      * @return the raw text content
110      * @throws CmsException if something goes wrong
111      */

112     public I_CmsExtractionResult extractContent(CmsObject cms, A_CmsIndexResource indexResource, String JavaDoc language)
113     throws CmsException {
114
115         CmsMasterDataSet resource = (CmsMasterDataSet)indexResource.getData();
116         String JavaDoc result = null;
117
118         try {
119
120             StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
121
122             for (int i = 0; i < resource.m_dataMedium.length; i++) {
123                 if (resource.m_dataMedium[i] != null && !"".equals(resource.m_dataMedium[i])) {
124                     buf.append((i > 0) ? " " : "");
125                     buf.append(resource.m_dataMedium[i]);
126                 }
127             }
128
129             for (int i = 0; i < resource.m_dataBig.length; i++) {
130                 if (resource.m_dataBig[i] != null && !"".equals(resource.m_dataBig[i])) {
131                     buf.append((i > 0) ? " " : "");
132                     buf.append(resource.m_dataBig[i]);
133                 }
134             }
135
136             for (int i = 0; i < resource.m_dataSmall.length; i++) {
137                 if (resource.m_dataSmall[i] != null && !"".equals(resource.m_dataSmall[i])) {
138                     if (C_NON_NUM_UUID_WS.matcher(resource.m_dataSmall[i]).find()) {
139                         buf.append((i > 0) ? " " : "");
140                         buf.append(resource.m_dataSmall[i]);
141                     }
142                 }
143             }
144
145             result = CmsHtmlExtractor.extractText(buf.toString(), OpenCms.getSystemInfo().getDefaultEncoding());
146
147         } catch (Exception JavaDoc exc) {
148             throw new CmsLegacyException("Reading resource " + indexResource.getRootPath() + " failed", exc);
149         }
150
151         return new CmsExtractionResult(result);
152     }
153
154     /**
155      * @see org.opencms.search.documents.I_CmsDocumentFactory#getDocumentKey(java.lang.String)
156      */

157     public String JavaDoc getDocumentKey(String JavaDoc resourceType) throws CmsException {
158
159         try {
160             return C_DOCUMENT_KEY_PREFIX + ((CmsMasterContent)Class.forName(resourceType).newInstance()).getSubId();
161         } catch (Exception JavaDoc exc) {
162             throw new CmsLegacyException("Instanciation of resource type class " + resourceType + " failed.", exc);
163         }
164     }
165
166     /**
167      * @see org.opencms.search.documents.I_CmsDocumentFactory#getDocumentKeys(java.util.List, java.util.List)
168      */

169     public List JavaDoc getDocumentKeys(List JavaDoc resourceTypes, List JavaDoc mimeTypes) throws CmsException {
170
171         ArrayList JavaDoc keys = new ArrayList JavaDoc();
172
173         try {
174             for (Iterator JavaDoc i = resourceTypes.iterator(); i.hasNext();) {
175
176                 int id = ((CmsMasterContent)Class.forName((String JavaDoc)i.next()).newInstance()).getSubId();
177                 for (Iterator JavaDoc j = resourceTypes.iterator(); j.hasNext();) {
178                     keys.add(C_DOCUMENT_KEY_PREFIX + id + ":" + (String JavaDoc)j.next());
179                 }
180
181                 keys.add(C_DOCUMENT_KEY_PREFIX + id);
182             }
183         } catch (Exception JavaDoc exc) {
184             throw new CmsLegacyException("Creation of document keys failed.", exc);
185         }
186
187         return keys;
188     }
189
190     /**
191      * @see org.opencms.search.documents.I_CmsDocumentFactory#getName()
192      */

193     public String JavaDoc getName() {
194
195         return m_name;
196     }
197
198     /**
199      * Generates a new lucene document instance from contents of the given resource.<p>
200      *
201      * @see org.opencms.search.documents.I_CmsDocumentFactory#newInstance(org.opencms.file.CmsObject, org.opencms.search.A_CmsIndexResource, java.lang.String)
202      */

203     public Document newInstance(CmsObject cms, A_CmsIndexResource resource, String JavaDoc language) throws CmsException {
204
205         Document document = new Document();
206         CmsMasterDataSet content = (CmsMasterDataSet)resource.getData();
207         String JavaDoc value = content.m_title;
208
209         if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(value)) {
210             document.add(Field.Keyword(I_CmsDocumentFactory.DOC_TITLE_KEY, value));
211             document.add(Field.UnStored(I_CmsDocumentFactory.DOC_TITLE_INDEXED, value));
212             document.add(Field.UnStored(I_CmsDocumentFactory.DOC_META, value));
213         }
214
215         document.add(Field.Keyword(I_CmsDocumentFactory.DOC_DATE_CREATED, new Date JavaDoc(content.m_dateCreated)));
216         document.add(Field.Keyword(I_CmsDocumentFactory.DOC_DATE_LASTMODIFIED, new Date JavaDoc(content.m_dateLastModified)));
217
218         document.add(Field.Keyword(CmsCosDocument.DOC_CHANNEL, ((CmsCosIndexResource)resource).getChannel()));
219         document.add(Field.Keyword(CmsCosDocument.DOC_CONTENT_DEFINITION, ((CmsCosIndexResource)resource)
220             .getContentDefinition()));
221
222         String JavaDoc path = m_cms.getRequestContext().removeSiteRoot(resource.getRootPath());
223         document.add(Field.UnIndexed(I_CmsDocumentFactory.DOC_PATH, path));
224         document.add(Field.UnIndexed(CmsCosDocument.DOC_CONTENT_ID, resource.getId().toString()));
225
226         I_CmsExtractionResult extract = extractContent(cms, resource, language);
227         document.add(Field.Text(I_CmsDocumentFactory.DOC_CONTENT, extract.getContent()));
228         extract.release();
229             
230         return document;
231     }
232 }
Popular Tags