1 16 package org.outerj.daisy.books.publisher.impl.publicationprocess; 17 18 import org.outerj.daisy.books.publisher.impl.BookInstanceLayout; 19 import org.outerj.daisy.books.publisher.impl.util.AbstractContentHandler; 20 import org.outerj.daisy.xmlutil.XmlSerializer; 21 import org.outerj.daisy.xmlutil.LocalSAXParserFactory; 22 import org.apache.cocoon.xml.SaxBuffer; 23 import org.apache.cocoon.xml.AttributesImpl; 24 import org.apache.cocoon.xml.dom.DOMStreamer; 25 import org.xml.sax.Attributes ; 26 import org.xml.sax.SAXException ; 27 import org.xml.sax.InputSource ; 28 import org.xml.sax.ContentHandler ; 29 import org.w3c.dom.Document ; 30 import org.w3c.dom.Element ; 31 import org.w3c.dom.NodeList ; 32 import org.w3c.dom.Node ; 33 34 import javax.xml.parsers.DocumentBuilderFactory ; 35 import javax.xml.parsers.DocumentBuilder ; 36 import javax.xml.parsers.SAXParser ; 37 import java.io.InputStream ; 38 import java.io.OutputStream ; 39 import java.util.regex.Pattern ; 40 import java.util.regex.Matcher ; 41 import java.util.List ; 42 import java.util.ArrayList ; 43 import java.util.Iterator ; 44 import java.util.StringTokenizer ; 45 46 public class AddTocAndListsTask implements PublicationProcessTask { 47 private final String input; 48 private final String output; 49 50 public AddTocAndListsTask(String input, String output) { 51 this.input = input; 52 this.output = output; 53 } 54 55 public void run(PublicationContext context) throws Exception { 56 context.getPublicationLog().info("Running add toc and lists task."); 57 int tocDepth = Integer.MAX_VALUE; 58 String tocDepthParam = (String )context.getProperties().get("toc.depth"); 59 if (tocDepthParam != null) { 60 try { 61 tocDepth = Integer.parseInt(tocDepthParam); 62 } catch (NumberFormatException e) { 63 throw new Exception ("Invalid value in toc.depth property: " + tocDepthParam); 64 } 65 } 66 67 String listOfFiguresTypes = (String )context.getProperties().get("list-of-figures.include-types"); 69 String [] figureTypes = listOfFiguresTypes != null ? parseCSV(listOfFiguresTypes) : new String [0]; 70 String listOfTablesTypes = (String )context.getProperties().get("list-of-tables.include-types"); 71 String [] tableTypes = listOfTablesTypes != null ? parseCSV(listOfTablesTypes) : new String [0]; 72 73 String publicationOutputPath = BookInstanceLayout.getPublicationOutputPath(context.getPublicationOutputName()); 74 String inputXmlPath = publicationOutputPath + input; 75 String outputXmlPath = publicationOutputPath + output; 76 77 DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); 79 documentBuilderFactory.setNamespaceAware(true); 80 DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); 81 Document inputDocument; 82 InputStream is = null; 83 try { 84 is = context.getBookInstance().getResource(inputXmlPath); 85 inputDocument = documentBuilder.parse(is); 86 } finally { 87 if (is != null) 88 is.close(); 89 } 90 91 Document tocDocument = documentBuilder.newDocument(); 92 TocBuilder tocBuilder = new TocBuilder(); 93 tocBuilder.buildToc(inputDocument, tocDocument, tocDepth); 94 DOMStreamer domStreamer = new DOMStreamer(); 95 SaxBuffer tocBuffer = new SaxBuffer(); 96 domStreamer.setContentHandler(tocBuffer); 97 domStreamer.stream(tocDocument.getDocumentElement()); 98 99 ArtifactListBuilder listBuilder = new ArtifactListBuilder(figureTypes, tableTypes); 100 listBuilder.build(inputDocument); 101 102 OutputStream os = null; 103 is = null; 104 try { 105 is = context.getBookInstance().getResource(inputXmlPath); 106 os = context.getBookInstance().getResourceOutputStream(outputXmlPath); 107 XmlSerializer serializer = new XmlSerializer(os); 108 MergeTocAndListsHandler mergeTocAndListsHandler = new MergeTocAndListsHandler(serializer, tocBuffer, listBuilder.getFigureListBuffers(), listBuilder.getTableListBuffers()); 109 110 SAXParser parser = LocalSAXParserFactory.getSAXParserFactory().newSAXParser(); 111 parser.getXMLReader().setContentHandler(mergeTocAndListsHandler); 112 parser.getXMLReader().parse(new InputSource (is)); 113 } finally { 114 if (is != null) 115 is.close(); 116 if (os != null) 117 os.close(); 118 } 119 } 120 121 private static String [] parseCSV(String data) { 122 ArrayList values = new ArrayList (); 123 StringTokenizer tokenizer = new StringTokenizer (data, ","); 124 while (tokenizer.hasMoreTokens()) { 125 String token = tokenizer.nextToken().trim(); 126 if (token.length() > 0) { 127 values.add(token); 128 } 129 } 130 return (String [])values.toArray(new String [values.size()]); 131 } 132 133 static class TocBuilder { 136 private int currentTocLevel; 137 private Document tocDocument; 138 private Element currentTocElement; 139 private int tocDepth; 140 private static final Pattern headerPattern = Pattern.compile("h([0-9]+)"); 141 142 void buildToc(Document inputDocument, Document tocDocument, int tocDepth) throws Exception { 143 Element tocElement = tocDocument.createElementNS(null, "toc"); 144 tocDocument.appendChild(tocElement); 145 146 currentTocLevel = 0; 147 currentTocElement = tocElement; 148 this.tocDocument = tocDocument; 149 this.tocDepth = tocDepth; 150 151 buildTocRecursive(inputDocument.getDocumentElement()); 152 } 153 154 155 private void buildTocRecursive(Element element) throws Exception { 156 NodeList nodeList = element.getChildNodes(); 157 for (int i = 0; i < nodeList.getLength(); i++) { 158 Node node = nodeList.item(i); 159 if (node.getNodeType() == Node.ELEMENT_NODE) { 160 Element childElement = (Element )node; 161 if (childElement.getNamespaceURI() == null) { 162 Matcher matcher = headerPattern.matcher(childElement.getLocalName()); 163 if (matcher.matches()) { 164 int headerLevel = Integer.parseInt(matcher.group(1)); 165 if (headerLevel <= currentTocLevel + 1 && headerLevel <= tocDepth) { 166 for (int z = headerLevel; z <= currentTocLevel; z++) { 167 currentTocElement = (Element )currentTocElement.getParentNode(); 168 } 169 Element tocEntryEl = tocDocument.createElementNS(null, "tocEntry"); 170 Element captionEl = tocDocument.createElementNS(null, "caption"); 171 172 tocEntryEl.appendChild(tocDocument.createTextNode("\n")); 173 tocEntryEl.appendChild(captionEl); 174 tocEntryEl.appendChild(tocDocument.createTextNode("\n")); 175 176 copyCaptionChildren(childElement, captionEl, tocDocument); 177 178 String targetId = childElement.getAttribute("id"); 179 if (targetId.length() == 0) 180 throw new Exception ("Error during TOC generation: encountered a header without id attribute."); 181 182 tocEntryEl.setAttribute("targetId", targetId); 183 String sectionNumber = childElement.getAttribute("daisyNumber"); 184 String sectionPartialNumber = childElement.getAttribute("daisyPartialNumber"); 185 String sectionRawNumber = childElement.getAttribute("daisyRawNumber"); 186 187 if (sectionNumber.length() > 0) 188 tocEntryEl.setAttribute("daisyNumber", sectionNumber); 189 if (sectionPartialNumber.length() > 0) 190 tocEntryEl.setAttribute("daisyPartialNumber", sectionPartialNumber); 191 if (sectionRawNumber.length() > 0) 192 tocEntryEl.setAttribute("daisyRawNumber", sectionRawNumber); 193 194 195 currentTocElement.appendChild(tocEntryEl); 196 currentTocElement.appendChild(tocDocument.createTextNode("\n")); 197 currentTocLevel = headerLevel; 198 currentTocElement = tocEntryEl; 199 } 200 } else { 201 buildTocRecursive(childElement); 202 } 203 } 204 } 205 } 206 } 207 208 private void copyCaptionChildren(Element fromEl, Element toEl, Document toDocument) { 209 NodeList children = fromEl.getChildNodes(); 210 for (int i = 0; i < children.getLength(); i++) { 211 Node child = children.item(i); 212 toEl.appendChild(toDocument.importNode(child, true)); 213 } 214 215 List elementsToBeRemoved = new ArrayList (); 217 collectUnwantedCaptionElement(toEl, elementsToBeRemoved); 218 Iterator elementsToBeRemovedIt = elementsToBeRemoved.iterator(); 219 while (elementsToBeRemovedIt.hasNext()) { 220 Element element = (Element )elementsToBeRemovedIt.next(); 221 element.getParentNode().removeChild(element); 222 } 223 } 224 225 private void collectUnwantedCaptionElement(Element element, List elementsToBeRemoved) { 226 NodeList children = element.getChildNodes(); 227 for (int i = 0; i < children.getLength(); i++) { 228 Node child = children.item(i); 229 230 if (child.getNodeType() == Node.ELEMENT_NODE) { 232 boolean remove = false; 233 Element childEl = (Element )child; 234 if (childEl.getNamespaceURI() == null && childEl.getLocalName().equals("span") && (childEl.getAttribute("class").equals("footnote") || childEl.getAttribute("class").equals("indexentry"))) { 235 remove = true; 236 elementsToBeRemoved.add(childEl); 237 } 238 239 if (!remove) 240 collectUnwantedCaptionElement(childEl, elementsToBeRemoved); 241 } 242 243 } 244 } 245 246 } 247 248 249 static class ArtifactListBuilder { 250 private SaxBuffer[] figureListBuffers; 251 private SaxBuffer[] tableListBuffers; 252 private String [] figureTypes; 253 private String [] tableTypes; 254 private static final String FIGURES_NAME = "figures"; 255 private static final String TABLES_NAME = "tables"; 256 257 public ArtifactListBuilder(String [] figureTypes, String [] tableTypes) { 258 this.figureTypes = figureTypes; 259 this.tableTypes = tableTypes; 260 figureListBuffers = new SaxBuffer[figureTypes.length]; 261 tableListBuffers = new SaxBuffer[tableTypes.length]; 262 } 263 264 public SaxBuffer[] getFigureListBuffers() { 265 return figureListBuffers; 266 } 267 268 public SaxBuffer[] getTableListBuffers() { 269 return tableListBuffers; 270 } 271 272 public void build(Document inputDocument) throws Exception { 273 buildRecursive(inputDocument.getDocumentElement()); 274 closeLists(FIGURES_NAME, figureListBuffers); 275 closeLists(TABLES_NAME, tableListBuffers); 276 } 277 278 public void buildRecursive(Element element) throws Exception { 279 NodeList childNodes = element.getChildNodes(); 280 for (int i = 0; i < childNodes.getLength(); i++) { 281 Node childNode = childNodes.item(i); 282 if (childNode.getNodeType() == Node.ELEMENT_NODE) { 283 Element childElement = (Element )childNode; 284 if (childNode.getNamespaceURI() == null && childElement.getLocalName().equals("img")) { 285 String caption = childElement.getAttribute("daisy-caption"); 286 if (caption.length() > 0) { 287 String id = childElement.getAttribute("id"); 288 if (id.length() == 0) 289 throw new Exception ("Missing id attribute on image during list-of-figures building."); 290 addFigure(childElement.getAttribute("daisy-image-type"), id, caption, childElement.getAttribute("daisyNumber"), childElement.getAttribute("daisyPartialNumber"), childElement.getAttribute("daisyRawNumber")); 291 } 292 } else if (childNode.getNamespaceURI() == null && childElement.getLocalName().equals("table")) { 293 String caption = childElement.getAttribute("daisy-caption"); 294 if (caption.length() > 0) { 295 String id = childElement.getAttribute("id"); 296 if (id.length() == 0) 297 throw new Exception ("Missing id attribute on table during list-of-tables building."); 298 addTable(childElement.getAttribute("daisy-table-type"), id, caption, childElement.getAttribute("daisyNumber"), childElement.getAttribute("daisyPartialNumber"), childElement.getAttribute("daisyRawNumber")); 299 } 300 301 } 302 buildRecursive(childElement); 303 } 304 } 305 } 306 307 private void addFigure(String type, String id, String caption, String daisyNumber, String daisyPartialNumber, String daisyRawNumber) throws SAXException { 308 addItem(FIGURES_NAME, figureListBuffers, figureTypes, type, id, caption, daisyNumber, daisyPartialNumber, daisyRawNumber); 309 } 310 311 private void addTable(String type, String id, String caption, String daisyNumber, String daisyPartialNumber, String daisyRawNumber) throws SAXException { 312 addItem(TABLES_NAME, tableListBuffers, tableTypes, type, id, caption, daisyNumber, daisyPartialNumber, daisyRawNumber); 313 } 314 315 private void addItem(String artifactName, SaxBuffer[] buffers, String [] types, String type, String id, String caption, String daisyNumber, String daisyPartialNumber, String daisyRawNumber) throws SAXException { 316 int index = -1; 317 for (int i = 0; i < types.length; i++) { 318 if (types[i].equals(type)) 319 index = i; 320 } 321 if (index == -1) 322 return; 323 324 if (buffers[index] == null) { 325 buffers[index] = new SaxBuffer(); 326 AttributesImpl listAttrs = new AttributesImpl(); 327 listAttrs.addCDATAAttribute("type", type); 328 String elementName = "list-of-" + artifactName; 329 buffers[index].characters(new char[] {'\n'}, 0, 1); 330 buffers[index].characters(new char[] {'\n'}, 0, 1); 331 buffers[index].startElement("", elementName, elementName, listAttrs); 332 buffers[index].characters(new char[] {'\n'}, 0, 1); 333 } 334 335 336 AttributesImpl attrs = new AttributesImpl(); 337 attrs.addCDATAAttribute("targetId", id); 338 if (daisyNumber.length() > 0) 339 attrs.addCDATAAttribute("daisyNumber", daisyNumber); 340 if (daisyPartialNumber.length() > 0) 341 attrs.addCDATAAttribute("daisyPartialNumber", daisyPartialNumber); 342 if (daisyRawNumber.length() > 0) 343 attrs.addCDATAAttribute("daisyRawNumber", daisyRawNumber); 344 buffers[index].startElement("", "list-item", "list-item", attrs); 345 buffers[index].characters(caption.toCharArray(), 0, caption.length()); 346 buffers[index].endElement("", "list-item", "list-item"); 347 buffers[index].characters(new char[] {'\n'}, 0, 1); 348 } 349 350 private void closeLists(String artifactName, SaxBuffer[] buffers) throws SAXException { 351 for (int i = 0; i < buffers.length; i++) { 352 if (buffers[i] != null) { 353 String elementName = "list-of-" + artifactName; 354 buffers[i].endElement("", elementName, elementName); 355 buffers[i].characters(new char[] {'\n'}, 0, 1); 356 } 357 } 358 } 359 } 360 361 static class MergeTocAndListsHandler extends AbstractContentHandler { 362 private int level = 0; 363 private final SaxBuffer toc; 364 private final SaxBuffer[] figureBuffers; 365 private final SaxBuffer[] tableBuffers; 366 367 public MergeTocAndListsHandler(ContentHandler consumer, SaxBuffer toc, SaxBuffer[] figureBuffers, SaxBuffer[] tableBuffers) { 368 super(consumer); 369 this.toc = toc; 370 this.figureBuffers = figureBuffers; 371 this.tableBuffers = tableBuffers; 372 } 373 374 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { 375 level++; 376 super.startElement(namespaceURI, localName, qName, atts); 377 if (level == 2 && namespaceURI.equals("") && localName.equals("body")) { 378 toc.toSAX(consumer); 379 for (int i = 0; i < figureBuffers.length; i++) { 380 if (figureBuffers[i] != null) 381 figureBuffers[i].toSAX(consumer); 382 } 383 for (int i = 0; i < tableBuffers.length; i++) { 384 if (tableBuffers[i] != null) 385 tableBuffers[i].toSAX(consumer); 386 } 387 } 388 } 389 390 public void endElement(String namespaceURI, String localName, String qName) throws SAXException { 391 level--; 392 super.endElement(namespaceURI, localName, qName); 393 } 394 } 395 } 396 | Popular Tags |