1 31 32 package org.opencms.search.documents; 33 34 import org.opencms.file.CmsFile; 35 import org.opencms.file.CmsObject; 36 import org.opencms.file.CmsPropertyDefinition; 37 import org.opencms.file.CmsResource; 38 import org.opencms.file.types.I_CmsResourceType; 39 import org.opencms.main.CmsException; 40 import org.opencms.main.CmsLog; 41 import org.opencms.main.OpenCms; 42 import org.opencms.search.A_CmsIndexResource; 43 import org.opencms.search.CmsIndexException; 44 import org.opencms.search.CmsSearchCategoryCollector; 45 import org.opencms.search.CmsSearchIndex; 46 import org.opencms.search.extractors.I_CmsExtractionResult; 47 import org.opencms.util.CmsStringUtil; 48 49 import java.util.ArrayList ; 50 import java.util.Date ; 51 import java.util.Iterator ; 52 import java.util.List ; 53 import java.util.Map ; 54 55 import org.apache.commons.logging.Log; 56 import org.apache.lucene.document.DateTools; 57 import org.apache.lucene.document.Document; 58 import org.apache.lucene.document.Field; 59 60 73 public abstract class A_CmsVfsDocument implements I_CmsDocumentFactory { 74 75 76 public static final String VFS_DOCUMENT_KEY_PREFIX = "VFS"; 77 78 79 private static final Log LOG = CmsLog.getLog(A_CmsVfsDocument.class); 80 81 84 protected String m_name; 85 86 91 public A_CmsVfsDocument(String name) { 92 93 m_name = name; 94 } 95 96 99 public String getDocumentKey(String resourceType) throws CmsIndexException { 100 101 try { 102 return VFS_DOCUMENT_KEY_PREFIX + ((I_CmsResourceType)Class.forName(resourceType).newInstance()).getTypeId(); 103 } catch (Exception exc) { 104 throw new CmsIndexException(Messages.get().container( 105 Messages.ERR_RESOURCE_TYPE_INSTANTIATION_1, 106 resourceType), exc); 107 } 108 } 109 110 113 public List getDocumentKeys(List resourceTypes, List mimeTypes) throws CmsException { 114 115 ArrayList keys = new ArrayList (); 116 117 if (resourceTypes.contains("*")) { 118 ArrayList allTypes = new ArrayList (); 119 for (Iterator i = OpenCms.getResourceManager().getResourceTypes().iterator(); i.hasNext();) { 120 I_CmsResourceType resourceType = (I_CmsResourceType)i.next(); 121 allTypes.add(resourceType.getTypeName()); 122 } 123 resourceTypes = allTypes; 124 } 125 126 try { 127 for (Iterator i = resourceTypes.iterator(); i.hasNext();) { 128 129 int id = OpenCms.getResourceManager().getResourceType((String )i.next()).getTypeId(); 130 for (Iterator j = mimeTypes.iterator(); j.hasNext();) { 131 keys.add(VFS_DOCUMENT_KEY_PREFIX + id + ":" + (String )j.next()); 132 } 133 if (mimeTypes.isEmpty()) { 134 keys.add(VFS_DOCUMENT_KEY_PREFIX + id); 135 } 136 } 137 } catch (Exception exc) { 138 throw new CmsException(Messages.get().container(Messages.ERR_CREATE_DOC_KEY_0), exc); 139 } 140 141 return keys; 142 } 143 144 147 public String getName() { 148 149 return m_name; 150 } 151 152 157 public Document newInstance(CmsObject cms, A_CmsIndexResource resource, String language) throws CmsException { 158 159 Document document = new Document(); 160 CmsResource res = (CmsResource)resource.getData(); 161 String path = cms.getRequestContext().removeSiteRoot(resource.getRootPath()); 162 163 String text = null; 165 try { 166 I_CmsExtractionResult content = extractContent(cms, resource, language); 167 text = mergeMetaInfo(content); 168 content.release(); 169 } catch (Exception e) { 170 LOG.error(Messages.get().getBundle().key(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); 172 } 173 if (text != null) { 174 document.add(new Field(I_CmsDocumentFactory.DOC_CONTENT, text, Field.Store.YES, Field.Index.TOKENIZED)); 175 } 176 177 StringBuffer meta = new StringBuffer (512); 178 String value; 179 Field field; 180 181 value = cms.readPropertyObject(path, CmsPropertyDefinition.PROPERTY_TITLE, false).getValue(); 183 if (CmsStringUtil.isNotEmpty(value)) { 184 value = value.trim(); 185 if (value.length() > 0) { 186 field = new Field(I_CmsDocumentFactory.DOC_TITLE_KEY, value, Field.Store.YES, Field.Index.UN_TOKENIZED); 188 field.setBoost(0); 190 document.add(field); 191 document.add(new Field( 193 I_CmsDocumentFactory.DOC_TITLE_INDEXED, 194 value, 195 Field.Store.NO, 196 Field.Index.TOKENIZED)); 197 meta.append(value); 198 meta.append(" "); 199 } 200 } 201 value = cms.readPropertyObject(path, CmsPropertyDefinition.PROPERTY_KEYWORDS, false).getValue(); 203 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(value)) { 204 document.add(new Field(I_CmsDocumentFactory.DOC_KEYWORDS, value, Field.Store.YES, Field.Index.TOKENIZED)); 205 meta.append(value); 206 meta.append(" "); 207 } 208 value = cms.readPropertyObject(path, CmsPropertyDefinition.PROPERTY_DESCRIPTION, false).getValue(); 210 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(value)) { 211 document.add(new Field(I_CmsDocumentFactory.DOC_DESCRIPTION, value, Field.Store.YES, Field.Index.TOKENIZED)); 212 meta.append(value); 213 meta.append(" "); 214 } 215 String metaInf = meta.toString(); 217 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(metaInf)) { 218 document.add(new Field(I_CmsDocumentFactory.DOC_META, metaInf, Field.Store.NO, Field.Index.TOKENIZED)); 219 } 220 221 value = cms.readPropertyObject(path, CmsPropertyDefinition.PROPERTY_SEARCH_CATEGORY, true).getValue(); 223 if (CmsStringUtil.isNotEmpty(value)) { 224 value = value.trim().toLowerCase(); 226 if (value.length() > 0) { 227 field = new Field(I_CmsDocumentFactory.DOC_CATEGORY, value, Field.Store.YES, Field.Index.UN_TOKENIZED); 228 field.setBoost(0); 229 document.add(field); 230 } 231 } else { 232 field = new Field( 234 I_CmsDocumentFactory.DOC_CATEGORY, 235 CmsSearchCategoryCollector.UNKNOWN_CATEGORY, 236 Field.Store.YES, 237 Field.Index.UN_TOKENIZED); 238 document.add(field); 239 } 240 241 String rootPath = CmsSearchIndex.rootPathRewrite(resource.getRootPath()); 243 field = new Field(I_CmsDocumentFactory.DOC_ROOT, rootPath, Field.Store.YES, Field.Index.TOKENIZED); 244 field.setBoost(0); 246 document.add(field); 247 document.add(new Field( 250 I_CmsDocumentFactory.DOC_PATH, 251 resource.getRootPath(), 252 Field.Store.YES, 253 Field.Index.UN_TOKENIZED)); 254 255 field = new Field(I_CmsDocumentFactory.DOC_DATE_CREATED, DateTools.dateToString( 257 new Date (res.getDateCreated()), 258 DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.UN_TOKENIZED); 259 field.setBoost(0); 260 document.add(field); 261 field = new Field(I_CmsDocumentFactory.DOC_DATE_LASTMODIFIED, DateTools.dateToString(new Date ( 262 res.getDateLastModified()), DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.UN_TOKENIZED); 263 field.setBoost(0); 264 document.add(field); 265 266 document.add(new Field(I_CmsDocumentFactory.DOC_TYPE, VFS_DOCUMENT_KEY_PREFIX, Field.Store.YES, Field.Index.NO)); 268 269 float boost = 1.0f; 270 value = cms.readPropertyObject(path, CmsPropertyDefinition.PROPERTY_SEARCH_PRIORITY, true).getValue(); 272 if (value != null) { 273 value = value.trim().toLowerCase(); 274 if (value.equals(I_CmsDocumentFactory.SEARCH_PRIORITY_MAX_VALUE)) { 275 boost = 2.0f; 276 } else if (value.equals(I_CmsDocumentFactory.SEARCH_PRIORITY_HIGH_VALUE)) { 277 boost = 1.5f; 278 } else if (value.equals(I_CmsDocumentFactory.SEARCH_PRIORITY_LOW_VALUE)) { 279 boost = 0.5f; 280 } 281 } 282 document.setBoost(boost); 284 285 return document; 286 } 287 288 299 protected String mergeMetaInfo(I_CmsExtractionResult extractedContent) { 300 301 Map metaInfo = extractedContent.getMetaInfo(); 302 String content = extractedContent.getContent(); 303 304 if (((metaInfo == null) || (metaInfo.size() == 0)) && (CmsStringUtil.isEmpty(content))) { 305 return null; 306 } 307 308 StringBuffer result = new StringBuffer (4096); 309 if (metaInfo != null) { 310 String meta; 311 meta = (String )metaInfo.get(I_CmsExtractionResult.META_TITLE); 312 if (CmsStringUtil.isNotEmpty(meta)) { 313 result.append(meta); 314 result.append('\n'); 315 } 316 meta = (String )metaInfo.get(I_CmsExtractionResult.META_SUBJECT); 317 if (CmsStringUtil.isNotEmpty(meta)) { 318 result.append(meta); 319 result.append('\n'); 320 } 321 meta = (String )metaInfo.get(I_CmsExtractionResult.META_KEYWORDS); 322 if (CmsStringUtil.isNotEmpty(meta)) { 323 result.append(meta); 324 result.append('\n'); 325 } 326 meta = (String )metaInfo.get(I_CmsExtractionResult.META_COMMENTS); 327 if (CmsStringUtil.isNotEmpty(meta)) { 328 result.append(meta); 329 result.append('\n'); 330 } 331 } 332 333 if (content != null) { 334 result.append(content); 335 } 336 337 return result.toString(); 338 } 339 340 351 protected CmsFile readFile(CmsObject cms, CmsResource resource) throws CmsException, CmsIndexException { 352 353 CmsFile file = CmsFile.upgrade(resource, cms); 354 if (file.getLength() <= 0) { 355 throw new CmsIndexException(Messages.get().container(Messages.ERR_NO_CONTENT_1, resource.getRootPath())); 356 } 357 return file; 358 } 359 } | Popular Tags |