1 16 package org.outerj.daisy.books.publisher.impl.publicationprocess; 17 18 import org.xml.sax.*; 19 import org.xml.sax.helpers.NamespaceSupport ; 20 import org.apache.cocoon.xml.AttributesImpl; 21 import org.apache.cocoon.xml.SaxBuffer; 22 import org.outerj.daisy.books.publisher.impl.BookInstanceLayout; 23 import org.outerj.daisy.books.publisher.impl.util.AbstractContentHandler; 24 import org.outerj.daisy.xmlutil.XmlSerializer; 25 import org.outerj.daisy.xmlutil.LocalSAXParserFactory; 26 27 import javax.xml.parsers.SAXParser ; 28 import java.util.regex.Pattern ; 29 import java.util.regex.Matcher ; 30 import java.util.HashSet ; 31 import java.util.Map ; 32 import java.util.HashMap ; 33 import java.util.Enumeration ; 34 import java.io.InputStream ; 35 import java.io.OutputStream ; 36 37 public class SplitInChunksTask implements PublicationProcessTask { 38 private final String input; 39 private final String output; 40 private final String chunkNamePrefix; 41 private final String publishExtension; 42 private final String firstChunkName; 43 private static final Pattern headerPattern = Pattern.compile("h([0-9]+)"); 44 45 public SplitInChunksTask(String input, String output, String chunkNamePrefix, 46 String firstChunkName, String publishExtension) { 47 this.input = input; 48 this.output = output; 49 this.chunkNamePrefix = chunkNamePrefix; 50 this.firstChunkName = firstChunkName; 51 this.publishExtension = publishExtension; 52 } 53 54 public void run(PublicationContext context) throws Exception { 55 context.getPublicationLog().info("Running split in chunks task."); 56 57 String publicationOutputPath = BookInstanceLayout.getPublicationOutputPath(context.getPublicationOutputName()); 58 String inputPath = publicationOutputPath + input; 59 String outputPath = publicationOutputPath + output; 60 61 int chunkLevel = 1; 62 String chunkLevelProp = (String )context.getProperties().get("chunker.chunklevel"); 63 if (chunkLevelProp != null) { 64 try { 65 chunkLevel = Integer.parseInt(chunkLevelProp); 66 } catch (NumberFormatException e) { 67 context.getPublicationLog().error("Invalid value in chunker.chunklevel property: \"" + chunkLevelProp + "\", defaulting to " + chunkLevel); 68 } 69 } 70 71 SaxBuffer buffer = new SaxBuffer(); 75 ChunkerHandler chunkerHandler; 76 InputStream is = null; 77 try { 78 SAXParser parser = LocalSAXParserFactory.getSAXParserFactory().newSAXParser(); 79 is = context.getBookInstance().getResource(inputPath); 80 chunkerHandler = new ChunkerHandler(buffer, chunkLevel); 81 parser.getXMLReader().setContentHandler(chunkerHandler); 82 parser.getXMLReader().parse(new InputSource(is)); 83 } finally { 84 if (is != null) 85 is.close(); 86 } 87 88 OutputStream os = null; 92 try { 93 os = context.getBookInstance().getResourceOutputStream(outputPath); 94 XmlSerializer serializer = new XmlSerializer(os); 95 ChunkLinkFixerHandler linkFixer = new ChunkLinkFixerHandler(serializer, chunkerHandler.getIdToChunkMap()); 96 buffer.toSAX(linkFixer); 97 } finally { 98 if (os != null) 99 os.close(); 100 } 101 } 102 103 class ChunkerHandler implements ContentHandler { 104 private boolean inBody = false; 105 private int nesting = 0; 106 private boolean inChunk = false; 107 private ContentHandler consumer; 108 private HashSet createdChunks = new HashSet (); 109 private int chunkNameCounter = 0; 110 private Map idToChunkMap = new HashMap (); 111 private String currentChunkName; 112 private boolean firstChunk = true; 113 private final int chunkLevel; 114 private int prevHeaderLevel = -1; 115 private NamespaceSupport namespaceSupport = new NamespaceSupport (); 116 117 public ChunkerHandler(ContentHandler consumer, int chunkLevel) { 118 this.consumer = consumer; 119 this.chunkLevel = chunkLevel; 120 } 121 122 public Map getIdToChunkMap() { 123 return idToChunkMap; 124 } 125 126 public void startDocument() throws SAXException { 127 consumer.startDocument(); 128 consumer.startElement("", "chunks", "chunks", new AttributesImpl()); 129 } 130 131 public void endDocument() throws SAXException { 132 if (inChunk) 133 endChunk(); 134 consumer.endElement("", "chunks", "chunks"); 135 consumer.endDocument(); 136 } 137 138 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { 139 namespaceSupport.pushContext(); 140 nesting++; 141 Matcher headerMatcher = headerPattern.matcher(localName); 142 143 if (nesting == 2 && namespaceURI.equals("") && localName.equals("body")) { 144 inBody = true; 145 } else if (inBody && nesting == 3 && headerMatcher.matches()) { 146 int headerLevel = Integer.parseInt(headerMatcher.group(1)); 147 if (headerLevel <= chunkLevel && headerLevel != prevHeaderLevel + 1) { 148 if (inChunk) 149 endChunk(); 150 151 String chunkName = null; 153 if (firstChunk && firstChunkName != null) 154 chunkName = firstChunkName; 155 firstChunk = false; 156 if (chunkName == null) { 157 String headerId = atts.getValue("id"); 158 if (headerId != null && headerId.trim().length() > 0) { 159 chunkName = headerId; 160 } else { 161 chunkName = generateChunkName(); 162 } 163 } 164 while (createdChunks.contains(chunkName)) { 166 chunkName = generateChunkName(); 167 } 168 createdChunks.add(chunkName); 169 currentChunkName = chunkName; 170 171 AttributesImpl chunkAttrs = new AttributesImpl(); 172 chunkAttrs.addCDATAAttribute("name", chunkName); 173 consumer.startElement("", "chunk", "chunk", chunkAttrs); 174 consumer.startElement("", "html", "html", new AttributesImpl()); 175 declarePrefixes(); 176 consumer.startElement("", "body", "body", new AttributesImpl()); 177 inChunk = true; 178 } 179 prevHeaderLevel = headerLevel; 180 } 181 182 if (inChunk) { 183 consumer.startElement(namespaceURI, localName, qName, atts); 184 185 String id = atts.getValue("id"); 186 if (id != null && id.trim().length() > 0) { 187 idToChunkMap.put(id, currentChunkName); 188 } 189 } 190 } 191 192 private String generateChunkName() { 193 return chunkNamePrefix + ++chunkNameCounter; 194 } 195 196 public void endElement(String namespaceURI, String localName, String qName) throws SAXException { 197 namespaceSupport.popContext(); 198 if (inBody && nesting == 2 && namespaceURI.equals("") && localName.equals("body")) { 199 inBody = false; 200 if (inChunk) { 201 endChunk(); 202 inChunk = false; 203 } 204 } else if (inChunk) { 205 consumer.endElement(namespaceURI, localName, qName); 206 } 207 nesting--; 208 } 209 210 private void endChunk() throws SAXException { 211 consumer.endElement("", "body", "body"); 212 undeclarePrefixes(); 213 consumer.endElement("", "html", "html"); 214 consumer.endElement("", "chunk", "chunk"); 215 } 216 217 private void declarePrefixes() throws SAXException { 218 Enumeration prefixEnum = namespaceSupport.getPrefixes(); 219 while (prefixEnum.hasMoreElements()) { 220 String prefix = (String )prefixEnum.nextElement(); 221 if (!prefix.equals("xml")) 222 consumer.startPrefixMapping(prefix, namespaceSupport.getURI(prefix)); 223 } 224 } 225 226 private void undeclarePrefixes() throws SAXException { 227 Enumeration prefixEnum = namespaceSupport.getPrefixes(); 228 while (prefixEnum.hasMoreElements()) { 229 String prefix = (String )prefixEnum.nextElement(); 230 if (!prefix.equals("xml")) 231 consumer.endPrefixMapping(prefix); 232 } 233 } 234 235 public void characters(char ch[], int start, int length) throws SAXException { 236 if (inChunk) 237 consumer.characters(ch, start, length); 238 } 239 240 public void ignorableWhitespace(char ch[], int start, int length) throws SAXException { 241 if (inChunk) 242 consumer.ignorableWhitespace(ch, start, length); 243 } 244 245 public void skippedEntity(String name) throws SAXException { 246 if (inChunk) 247 consumer.skippedEntity(name); 248 } 249 250 public void processingInstruction(String target, String data) throws SAXException { 251 } 252 253 public void startPrefixMapping(String prefix, String uri) throws SAXException { 254 namespaceSupport.declarePrefix(prefix, uri); 255 consumer.startPrefixMapping(prefix, uri); 256 } 257 258 public void endPrefixMapping(String prefix) throws SAXException { 259 consumer.endPrefixMapping(prefix); 260 } 261 262 public void setDocumentLocator(Locator locator) { 263 } 265 } 266 267 class ChunkLinkFixerHandler extends AbstractContentHandler { 268 private Map idToChunkMap; 269 private int nesting = 0; 270 private String currentChunkName = null; 271 272 public ChunkLinkFixerHandler(ContentHandler consumer, Map idToChunkMap) { 273 super(consumer); 274 this.idToChunkMap = idToChunkMap; 275 } 276 277 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { 278 nesting++; 279 if (nesting == 2 && namespaceURI.equals("") && localName.equals("chunk")) { 280 currentChunkName = atts.getValue("name"); 281 } else if (currentChunkName != null && namespaceURI.equals("") && localName.equals("a")) { 282 String href = atts.getValue("href"); 283 if (href != null && href.length() > 1 && href.charAt(0) == '#') { 284 String targetChunk = (String )idToChunkMap.get(href.substring(1)); 285 if (targetChunk != null && !targetChunk.equals(currentChunkName)) { 286 String newHref = targetChunk + publishExtension + href; 287 AttributesImpl newAttrs = new AttributesImpl(atts); 288 newAttrs.setValue(newAttrs.getIndex("href"), newHref); 289 atts = newAttrs; 290 } 291 } 292 } 293 super.startElement(namespaceURI, localName, qName, atts); 294 } 295 296 public void endElement(String namespaceURI, String localName, String qName) throws SAXException { 297 if (nesting == 2 && namespaceURI.equals("") && localName.equals("chunk")) { 298 currentChunkName = null; 299 } 300 nesting--; 301 super.endElement(namespaceURI, localName, qName); 302 } 303 } 304 } 305 | Popular Tags |