1 31 32 package com.opencms.legacy; 33 34 import org.opencms.file.CmsObject; 35 import org.opencms.main.CmsException; 36 import org.opencms.main.OpenCms; 37 import org.opencms.search.A_CmsIndexResource; 38 import org.opencms.search.documents.I_CmsDocumentFactory; 39 import org.opencms.search.extractors.CmsExtractionResult; 40 import org.opencms.search.extractors.I_CmsExtractionResult; 41 import org.opencms.util.CmsHtmlExtractor; 42 import org.opencms.util.CmsStringUtil; 43 44 import com.opencms.defaults.master.*; 45 46 import java.util.ArrayList ; 47 import java.util.Date ; 48 import java.util.Iterator ; 49 import java.util.List ; 50 import java.util.regex.Pattern ; 51 52 import org.apache.lucene.document.Document; 53 import org.apache.lucene.document.Field; 54 55 65 public class CmsCosDocument implements I_CmsDocumentFactory { 66 67 68 public static final String C_DOCUMENT_KEY_PREFIX = "COS"; 69 70 71 private static final Pattern C_NON_NUM_UUID_WS = Pattern.compile("[^a-fA-F0-9\\-_\\s]"); 72 73 74 protected CmsObject m_cms; 75 76 77 protected String m_name; 78 79 80 public static final String DOC_CHANNEL = "channel"; 81 82 83 public static final String DOC_CONTENT_ID = "contentid"; 84 85 86 public static final String DOC_CONTENT_DEFINITION = "contentdefinition"; 87 88 94 public CmsCosDocument(CmsObject cms, String name) { 95 96 m_cms = cms; 97 m_name = name; 98 } 99 100 112 public I_CmsExtractionResult extractContent(CmsObject cms, A_CmsIndexResource indexResource, String language) 113 throws CmsException { 114 115 CmsMasterDataSet resource = (CmsMasterDataSet)indexResource.getData(); 116 String result = null; 117 118 try { 119 120 StringBuffer buf = new StringBuffer (); 121 122 for (int i = 0; i < resource.m_dataMedium.length; i++) { 123 if (resource.m_dataMedium[i] != null && !"".equals(resource.m_dataMedium[i])) { 124 buf.append((i > 0) ? " " : ""); 125 buf.append(resource.m_dataMedium[i]); 126 } 127 } 128 129 for (int i = 0; i < resource.m_dataBig.length; i++) { 130 if (resource.m_dataBig[i] != null && !"".equals(resource.m_dataBig[i])) { 131 buf.append((i > 0) ? " " : ""); 132 buf.append(resource.m_dataBig[i]); 133 } 134 } 135 136 for (int i = 0; i < resource.m_dataSmall.length; i++) { 137 if (resource.m_dataSmall[i] != null && !"".equals(resource.m_dataSmall[i])) { 138 if (C_NON_NUM_UUID_WS.matcher(resource.m_dataSmall[i]).find()) { 139 buf.append((i > 0) ? " " : ""); 140 buf.append(resource.m_dataSmall[i]); 141 } 142 } 143 } 144 145 result = CmsHtmlExtractor.extractText(buf.toString(), OpenCms.getSystemInfo().getDefaultEncoding()); 146 147 } catch (Exception exc) { 148 throw new CmsLegacyException("Reading resource " + indexResource.getRootPath() + " failed", exc); 149 } 150 151 return new CmsExtractionResult(result); 152 } 153 154 157 public String getDocumentKey(String resourceType) throws CmsException { 158 159 try { 160 return C_DOCUMENT_KEY_PREFIX + ((CmsMasterContent)Class.forName(resourceType).newInstance()).getSubId(); 161 } catch (Exception exc) { 162 throw new CmsLegacyException("Instanciation of resource type class " + resourceType + " failed.", exc); 163 } 164 } 165 166 169 public List getDocumentKeys(List resourceTypes, List mimeTypes) throws CmsException { 170 171 ArrayList keys = new ArrayList (); 172 173 try { 174 for (Iterator i = resourceTypes.iterator(); i.hasNext();) { 175 176 int id = ((CmsMasterContent)Class.forName((String )i.next()).newInstance()).getSubId(); 177 for (Iterator j = resourceTypes.iterator(); j.hasNext();) { 178 keys.add(C_DOCUMENT_KEY_PREFIX + id + ":" + (String )j.next()); 179 } 180 181 keys.add(C_DOCUMENT_KEY_PREFIX + id); 182 } 183 } catch (Exception exc) { 184 throw new CmsLegacyException("Creation of document keys failed.", exc); 185 } 186 187 return keys; 188 } 189 190 193 public String getName() { 194 195 return m_name; 196 } 197 198 203 public Document newInstance(CmsObject cms, A_CmsIndexResource resource, String language) throws CmsException { 204 205 Document document = new Document(); 206 CmsMasterDataSet content = (CmsMasterDataSet)resource.getData(); 207 String value = content.m_title; 208 209 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(value)) { 210 document.add(Field.Keyword(I_CmsDocumentFactory.DOC_TITLE_KEY, value)); 211 document.add(Field.UnStored(I_CmsDocumentFactory.DOC_TITLE_INDEXED, value)); 212 document.add(Field.UnStored(I_CmsDocumentFactory.DOC_META, value)); 213 } 214 215 document.add(Field.Keyword(I_CmsDocumentFactory.DOC_DATE_CREATED, new Date (content.m_dateCreated))); 216 document.add(Field.Keyword(I_CmsDocumentFactory.DOC_DATE_LASTMODIFIED, new Date (content.m_dateLastModified))); 217 218 document.add(Field.Keyword(CmsCosDocument.DOC_CHANNEL, ((CmsCosIndexResource)resource).getChannel())); 219 document.add(Field.Keyword(CmsCosDocument.DOC_CONTENT_DEFINITION, ((CmsCosIndexResource)resource) 220 .getContentDefinition())); 221 222 String path = m_cms.getRequestContext().removeSiteRoot(resource.getRootPath()); 223 document.add(Field.UnIndexed(I_CmsDocumentFactory.DOC_PATH, path)); 224 document.add(Field.UnIndexed(CmsCosDocument.DOC_CONTENT_ID, resource.getId().toString())); 225 226 I_CmsExtractionResult extract = extractContent(cms, resource, language); 227 document.add(Field.Text(I_CmsDocumentFactory.DOC_CONTENT, extract.getContent())); 228 extract.release(); 229 230 return document; 231 } 232 } | Popular Tags |