KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > opencms > search > CmsVfsIndexer


1 /*
2  * File : $Source: /usr/local/cvs/opencms/src/org/opencms/search/CmsVfsIndexer.java,v $
3  * Date : $Date: 2006/03/27 14:52:54 $
4  * Version: $Revision: 1.34 $
5  *
6  * This library is part of OpenCms -
7  * the Open Source Content Mananagement System
8  *
9  * Copyright (c) 2005 Alkacon Software GmbH (http://www.alkacon.com)
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * For further information about Alkacon Software GmbH, please see the
22  * company website: http://www.alkacon.com
23  *
24  * For further information about OpenCms, please see the
25  * project website: http://www.opencms.org
26  *
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with this library; if not, write to the Free Software
29  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30  */

31
32 package org.opencms.search;
33
34 import org.opencms.db.CmsPublishedResource;
35 import org.opencms.file.CmsObject;
36 import org.opencms.file.CmsProject;
37 import org.opencms.file.CmsResource;
38 import org.opencms.file.CmsResourceFilter;
39 import org.opencms.main.CmsException;
40 import org.opencms.main.CmsLog;
41 import org.opencms.report.I_CmsReport;
42 import org.opencms.search.documents.I_CmsDocumentFactory;
43
44 import java.io.IOException JavaDoc;
45 import java.util.ArrayList JavaDoc;
46 import java.util.Iterator JavaDoc;
47 import java.util.List JavaDoc;
48
49 import org.apache.commons.logging.Log;
50 import org.apache.lucene.document.Document;
51 import org.apache.lucene.document.Field;
52 import org.apache.lucene.index.IndexReader;
53 import org.apache.lucene.index.IndexWriter;
54 import org.apache.lucene.index.Term;
55
56 /**
57  * Implementation for an indexer indexing VFS Cms resources.<p>
58  *
59  * @author Carsten Weinholz
60  * @author Thomas Weckert
61  *
62  * @version $Revision: 1.34 $
63  *
64  * @since 6.0.0
65  */

66 public class CmsVfsIndexer implements I_CmsIndexer {
67
68     /** The log object for this class. */
69     private static final Log LOG = CmsLog.getLog(CmsVfsIndexer.class);
70
71     /** The OpenCms user context to use when reading resources from the VFS during indexing. */
72     private CmsObject m_cms;
73
74     /** The index. */
75     private CmsSearchIndex m_index;
76
77     /** The report. */
78     private I_CmsReport m_report;
79
80     /**
81      * @see org.opencms.search.I_CmsIndexer#deleteResources(org.apache.lucene.index.IndexReader, java.util.List)
82      */

83     public void deleteResources(IndexReader reader, List JavaDoc resourcesToDelete) {
84
85         if ((resourcesToDelete == null) || resourcesToDelete.isEmpty()) {
86             // nothing to délete
87
return;
88         }
89
90         // contains all resources already deleted to avoid multiple deleting in case of siblings
91
List JavaDoc resourcesAlreadyDeleted = new ArrayList JavaDoc(resourcesToDelete.size());
92
93         Iterator JavaDoc i = resourcesToDelete.iterator();
94         while (i.hasNext()) {
95             // iterate all resources in the given list of resources to delete
96
CmsPublishedResource res = (CmsPublishedResource)i.next();
97             String JavaDoc rootPath = res.getRootPath();
98             if (!resourcesAlreadyDeleted.contains(rootPath)) {
99                 // ensure siblings are only deleted once per update
100
resourcesAlreadyDeleted.add(rootPath);
101                 // search for an exact match on the document root path
102
Term term = new Term(I_CmsDocumentFactory.DOC_PATH, rootPath);
103                 try {
104                     // delete all documents with this term from the index
105
reader.deleteDocuments(term);
106                 } catch (IOException JavaDoc e) {
107                     if (LOG.isWarnEnabled()) {
108                         LOG.warn(Messages.get().getBundle().key(
109                             Messages.LOG_IO_INDEX_DOCUMENT_DELETE_2,
110                             rootPath,
111                             m_index.getName()), e);
112                     }
113                 }
114             }
115         }
116     }
117
118     /**
119      * @see org.opencms.search.I_CmsIndexer#getIndexResource(org.opencms.file.CmsObject, org.apache.lucene.document.Document)
120      */

121     public A_CmsIndexResource getIndexResource(CmsObject cms, Document doc) throws CmsException {
122
123         A_CmsIndexResource result = null;
124
125         Field f = doc.getField(I_CmsDocumentFactory.DOC_PATH);
126         if (f != null) {
127
128             String JavaDoc path = cms.getRequestContext().removeSiteRoot(f.stringValue());
129             CmsResource resource = cms.readResource(path);
130             // an exception would have been thrown if the user has no read persmissions
131
result = new CmsVfsIndexResource(resource);
132         }
133
134         return result;
135     }
136
137     /**
138      * @see org.opencms.search.I_CmsIndexer#getUpdateData(org.opencms.search.CmsSearchIndexSource, java.util.List)
139      */

140     public CmsSearchIndexUpdateData getUpdateData(CmsSearchIndexSource source, List JavaDoc publishedResources) {
141
142         // create a new update collection from this indexer and the given index source
143
CmsSearchIndexUpdateData result = new CmsSearchIndexUpdateData(source, this);
144
145         Iterator JavaDoc i = publishedResources.iterator();
146         while (i.hasNext()) {
147             // check all published resources if they match this indexer / source
148
CmsPublishedResource resource = (CmsPublishedResource)i.next();
149             // VFS resources will always have a structure id
150
if (!resource.getStructureId().isNullUUID()) {
151                 // use utility method from CmsProject to check if published resource is "inside" this index source
152
if (CmsProject.isInsideProject(source.getResourcesNames(), resource.getRootPath())) {
153                     // the resource is "inside" this index source
154
if (resource.isNew()) {
155                         // new resource just needs to be updated
156
if (isResourceInTimeWindow(resource)) {
157                             // update only if resource is in time window
158
result.addResourceToUpdate(resource);
159                         }
160                     } else if (resource.isDeleted()) {
161                         // deleted resource just needs to be removed
162
result.addResourceToDelete(resource);
163                     } else if (resource.isChanged() || resource.isUnChanged()) {
164                         // changed (or unchaged) resource must be removed first, and then updated
165
// note: unchanged resources can be siblings that have been added from the online project,
166
// these must be treated as if the resource had changed
167
result.addResourceToDelete(resource);
168                         if (isResourceInTimeWindow(resource)) {
169                             // update only if resource is in time window
170
result.addResourceToUpdate(resource);
171                         }
172                     }
173                 }
174             }
175         }
176         return result;
177     }
178
179     /**
180      * @see org.opencms.search.I_CmsIndexer#newInstance(org.opencms.file.CmsObject, org.opencms.report.I_CmsReport, org.opencms.search.CmsSearchIndex)
181      */

182     public I_CmsIndexer newInstance(CmsObject cms, I_CmsReport report, CmsSearchIndex index) {
183
184         CmsVfsIndexer indexer = new CmsVfsIndexer();
185
186         indexer.m_cms = cms;
187         indexer.m_report = report;
188         indexer.m_index = index;
189
190         return indexer;
191     }
192
193     /**
194      * @see org.opencms.search.I_CmsIndexer#rebuildIndex(org.apache.lucene.index.IndexWriter, org.opencms.search.CmsIndexingThreadManager, org.opencms.search.CmsSearchIndexSource)
195      */

196     public void rebuildIndex(IndexWriter writer, CmsIndexingThreadManager threadManager, CmsSearchIndexSource source)
197     throws CmsIndexException {
198
199         List JavaDoc resourceNames = source.getResourcesNames();
200         Iterator JavaDoc i = resourceNames.iterator();
201         while (i.hasNext()) {
202             // read the resources from all configured source folders
203
String JavaDoc resourceName = (String JavaDoc)i.next();
204             List JavaDoc resources = null;
205             try {
206                 // read all resources (only files) below the given path
207
resources = m_cms.readResources(resourceName, CmsResourceFilter.DEFAULT.addRequireFile());
208             } catch (CmsException e) {
209                 if (m_report != null) {
210                     m_report.println(Messages.get().container(
211                         Messages.RPT_UNABLE_TO_READ_SOURCE_2,
212                         resourceName,
213                         e.getLocalizedMessage()), I_CmsReport.FORMAT_WARNING);
214                 }
215                 if (LOG.isWarnEnabled()) {
216                     LOG.warn(Messages.get().getBundle().key(
217                         Messages.LOG_UNABLE_TO_READ_SOURCE_2,
218                         resourceName,
219                         m_index.getName()), e);
220                 }
221             }
222             if (resources != null) {
223                 // iterate all resources found in the folder
224
Iterator JavaDoc j = resources.iterator();
225                 while (j.hasNext()) {
226                     // now update all the resources individually
227
CmsResource resource = (CmsResource)j.next();
228                     updateResource(writer, threadManager, resource);
229                 }
230             }
231         }
232     }
233
234     /**
235      * @see org.opencms.search.I_CmsIndexer#updateResources(org.apache.lucene.index.IndexWriter, org.opencms.search.CmsIndexingThreadManager, java.util.List)
236      */

237     public void updateResources(IndexWriter writer, CmsIndexingThreadManager threadManager, List JavaDoc resourcesToUpdate)
238     throws CmsIndexException {
239
240         if ((resourcesToUpdate == null) || resourcesToUpdate.isEmpty()) {
241             // nothing to update
242
return;
243         }
244
245         // contains all resources already updated to avoid multiple updates in case of siblings
246
List JavaDoc resourcesAlreadyUpdated = new ArrayList JavaDoc(resourcesToUpdate.size());
247
248         // index all resources that in the given list
249
Iterator JavaDoc i = resourcesToUpdate.iterator();
250         while (i.hasNext()) {
251             CmsPublishedResource res = (CmsPublishedResource)i.next();
252             CmsResource resource = null;
253             try {
254                 resource = m_cms.readResource(res.getRootPath());
255             } catch (CmsException e) {
256                 if (LOG.isWarnEnabled()) {
257                     LOG.warn(Messages.get().getBundle().key(
258                         Messages.LOG_UNABLE_TO_READ_RESOURCE_2,
259                         resource.getRootPath(),
260                         m_index.getName()), e);
261                 }
262             }
263             if (resource != null) {
264                 if (!resourcesAlreadyUpdated.contains(resource.getRootPath())) {
265                     // ensure resources are only indexed once per update
266
resourcesAlreadyUpdated.add(resource.getRootPath());
267                     updateResource(writer, threadManager, resource);
268                 }
269             }
270         }
271     }
272
273     /**
274      * Checks if the published resource is inside the time window set with release and expiration date.<p>
275      *
276      * @param resource the published resource to check
277      * @return true if the published resource is inside the time window, otherwise false
278      */

279     protected boolean isResourceInTimeWindow(CmsPublishedResource resource) {
280
281         return m_cms.existsResource(
282             m_cms.getRequestContext().removeSiteRoot(resource.getRootPath()),
283             CmsResourceFilter.DEFAULT);
284     }
285
286     /**
287      * Updates (writes) a single resource in the index.<p>
288      *
289      * @param writer the index writer to use
290      * @param threadManager the thread manager to use when extracting the document text
291      * @param resource the resource to update
292      *
293      * @throws CmsIndexException if something goes wrong
294      */

295     protected void updateResource(IndexWriter writer, CmsIndexingThreadManager threadManager, CmsResource resource)
296     throws CmsIndexException {
297
298         if (resource.isInternal()) {
299             // don't index internal resources
300
return;
301         }
302         // no check for folder resources, this must be taken care of before calling this method
303

304         try {
305
306             if (m_report != null) {
307                 m_report.print(org.opencms.report.Messages.get().container(
308                     org.opencms.report.Messages.RPT_SUCCESSION_1,
309                     String.valueOf(threadManager.getCounter() + 1)), I_CmsReport.FORMAT_NOTE);
310                 m_report.print(
311                     Messages.get().container(Messages.RPT_SEARCH_INDEXING_FILE_BEGIN_0),
312                     I_CmsReport.FORMAT_NOTE);
313                 m_report.print(org.opencms.report.Messages.get().container(
314                     org.opencms.report.Messages.RPT_ARGUMENT_1,
315                     m_report.removeSiteRoot(resource.getRootPath())));
316                 m_report.print(
317                     org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_DOTS_0),
318                     I_CmsReport.FORMAT_DEFAULT);
319             }
320
321             A_CmsIndexResource indexResource = new CmsVfsIndexResource(resource);
322             threadManager.createIndexingThread(m_cms, writer, indexResource, m_index);
323
324         } catch (Exception JavaDoc e) {
325
326             if (m_report != null) {
327                 m_report.println(
328                     Messages.get().container(Messages.RPT_SEARCH_INDEXING_FAILED_0),
329                     I_CmsReport.FORMAT_WARNING);
330             }
331             if (LOG.isWarnEnabled()) {
332                 LOG.warn(Messages.get().getBundle().key(
333                     Messages.ERR_INDEX_RESOURCE_FAILED_2,
334                     resource.getRootPath(),
335                     m_index.getName()), e);
336             }
337             throw new CmsIndexException(Messages.get().container(
338                 Messages.ERR_INDEX_RESOURCE_FAILED_2,
339                 resource.getRootPath(),
340                 m_index.getName()));
341         }
342     }
343 }
Popular Tags