1 31 32 package org.opencms.search.documents; 33 34 import org.opencms.file.CmsFile; 35 import org.opencms.file.CmsObject; 36 import org.opencms.file.CmsProperty; 37 import org.opencms.file.CmsPropertyDefinition; 38 import org.opencms.file.CmsResource; 39 import org.opencms.i18n.CmsLocaleManager; 40 import org.opencms.main.CmsException; 41 import org.opencms.main.OpenCms; 42 import org.opencms.search.A_CmsIndexResource; 43 import org.opencms.search.CmsIndexException; 44 import org.opencms.search.extractors.CmsExtractionResult; 45 import org.opencms.search.extractors.I_CmsExtractionResult; 46 import org.opencms.util.CmsHtmlExtractor; 47 import org.opencms.xml.page.CmsXmlPage; 48 import org.opencms.xml.page.CmsXmlPageFactory; 49 50 import java.util.Iterator ; 51 import java.util.List ; 52 import java.util.Locale ; 53 54 64 public class CmsDocumentXmlPage extends A_CmsVfsDocument { 65 66 71 public CmsDocumentXmlPage(String name) { 72 73 super(name); 74 } 75 76 81 public I_CmsExtractionResult extractContent(CmsObject cms, A_CmsIndexResource indexResource, String language) 82 throws CmsException { 83 84 CmsResource resource = (CmsResource)indexResource.getData(); 85 String result = null; 86 87 try { 88 String path = cms.getRequestContext().removeSiteRoot(resource.getRootPath()); 89 90 CmsFile file = CmsFile.upgrade(resource, cms); 91 String absolutePath = cms.getSitePath(file); 92 CmsXmlPage page = CmsXmlPageFactory.unmarshal(cms, file); 93 94 List pageLocales = page.getLocales(); 95 if (pageLocales.size() == 0) { 96 pageLocales = OpenCms.getLocaleManager().getDefaultLocales(cms, absolutePath); 97 } 98 Locale locale = OpenCms.getLocaleManager().getBestMatchingLocale( 99 CmsLocaleManager.getLocale(language), 100 OpenCms.getLocaleManager().getDefaultLocales(cms, absolutePath), 101 pageLocales); 102 103 List elements = page.getNames(locale); 104 StringBuffer content = new StringBuffer (); 105 for (Iterator i = elements.iterator(); i.hasNext();) { 106 String value = page.getStringValue(cms, (String )i.next(), locale); 107 if (value != null) { 108 content.append(value); 109 } 110 } 111 112 result = CmsHtmlExtractor.extractText(content.toString(), page.getEncoding()); 113 114 CmsProperty extractionClass = cms.readPropertyObject( 115 path, 116 CmsPropertyDefinition.PROPERTY_SEARCH_EXTRACTIONCLASS, 117 true); 118 if (extractionClass != CmsProperty.getNullProperty()) { 119 Object ext = Class.forName(extractionClass.getValue()).newInstance(); 120 121 if (ext instanceof I_CmsSearchExtractor) { 122 I_CmsSearchExtractor extra = (I_CmsSearchExtractor)ext; 123 I_CmsExtractionResult extract = extra.extractContent(cms, indexResource, language); 124 result = result + "\n" + extract.getContent(); 125 extract.release(); 126 } else { 127 throw new CmsIndexException(Messages.get().container( 128 Messages.ERR_EXTRACTION_CLASS_2, 129 resource.getRootPath(), 130 ext.getClass().getName())); 131 } 132 } 133 134 return new CmsExtractionResult(result); 135 136 } catch (Exception e) { 137 throw new CmsIndexException( 138 Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), 139 e); 140 } 141 } 142 } | Popular Tags |