1 31 32 package org.opencms.search.documents; 33 34 import org.opencms.file.CmsFile; 35 import org.opencms.file.CmsObject; 36 import org.opencms.file.CmsProperty; 37 import org.opencms.file.CmsPropertyDefinition; 38 import org.opencms.file.CmsResource; 39 import org.opencms.main.CmsException; 40 import org.opencms.main.OpenCms; 41 import org.opencms.search.A_CmsIndexResource; 42 import org.opencms.search.CmsIndexException; 43 import org.opencms.search.extractors.CmsExtractorHtml; 44 import org.opencms.search.extractors.I_CmsExtractionResult; 45 46 56 public class CmsDocumentHtml extends A_CmsVfsDocument { 57 58 63 public CmsDocumentHtml(String name) { 64 65 super(name); 66 } 67 68 73 public I_CmsExtractionResult extractContent(CmsObject cms, A_CmsIndexResource indexResource, String language) 74 throws CmsIndexException, CmsException { 75 76 CmsResource resource = (CmsResource)indexResource.getData(); 77 CmsFile file = readFile(cms, resource); 78 79 try { 80 String path = cms.getRequestContext().removeSiteRoot(resource.getRootPath()); 81 CmsProperty encProp = cms.readPropertyObject(path, CmsPropertyDefinition.PROPERTY_CONTENT_ENCODING, true); 82 String encoding = encProp.getValue(OpenCms.getSystemInfo().getDefaultEncoding()); 83 84 return CmsExtractorHtml.getExtractor().extractText(file.getContents(), encoding); 85 } catch (Exception e) { 86 throw new CmsIndexException( 87 Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), 88 e); 89 } 90 } 91 } | Popular Tags |