KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > outerj > daisy > books > publisher > impl > publicationprocess > AddTocAndListsTask


1 /*
2  * Copyright 2004 Outerthought bvba and Schaubroeck nv
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.outerj.daisy.books.publisher.impl.publicationprocess;
17
18 import org.outerj.daisy.books.publisher.impl.BookInstanceLayout;
19 import org.outerj.daisy.books.publisher.impl.util.AbstractContentHandler;
20 import org.outerj.daisy.xmlutil.XmlSerializer;
21 import org.outerj.daisy.xmlutil.LocalSAXParserFactory;
22 import org.apache.cocoon.xml.SaxBuffer;
23 import org.apache.cocoon.xml.AttributesImpl;
24 import org.apache.cocoon.xml.dom.DOMStreamer;
25 import org.xml.sax.Attributes JavaDoc;
26 import org.xml.sax.SAXException JavaDoc;
27 import org.xml.sax.InputSource JavaDoc;
28 import org.xml.sax.ContentHandler JavaDoc;
29 import org.w3c.dom.Document JavaDoc;
30 import org.w3c.dom.Element JavaDoc;
31 import org.w3c.dom.NodeList JavaDoc;
32 import org.w3c.dom.Node JavaDoc;
33
34 import javax.xml.parsers.DocumentBuilderFactory JavaDoc;
35 import javax.xml.parsers.DocumentBuilder JavaDoc;
36 import javax.xml.parsers.SAXParser JavaDoc;
37 import java.io.InputStream JavaDoc;
38 import java.io.OutputStream JavaDoc;
39 import java.util.regex.Pattern JavaDoc;
40 import java.util.regex.Matcher JavaDoc;
41 import java.util.List JavaDoc;
42 import java.util.ArrayList JavaDoc;
43 import java.util.Iterator JavaDoc;
44 import java.util.StringTokenizer JavaDoc;
45
46 public class AddTocAndListsTask implements PublicationProcessTask {
47     private final String JavaDoc input;
48     private final String JavaDoc output;
49
50     public AddTocAndListsTask(String JavaDoc input, String JavaDoc output) {
51         this.input = input;
52         this.output = output;
53     }
54
55     public void run(PublicationContext context) throws Exception JavaDoc {
56         context.getPublicationLog().info("Running add toc and lists task.");
57         int tocDepth = Integer.MAX_VALUE;
58         String JavaDoc tocDepthParam = (String JavaDoc)context.getProperties().get("toc.depth");
59         if (tocDepthParam != null) {
60             try {
61                 tocDepth = Integer.parseInt(tocDepthParam);
62             } catch (NumberFormatException JavaDoc e) {
63                 throw new Exception JavaDoc("Invalid value in toc.depth property: " + tocDepthParam);
64             }
65         }
66
67         // determine types of figures and tables for which to build a lists
68
String JavaDoc listOfFiguresTypes = (String JavaDoc)context.getProperties().get("list-of-figures.include-types");
69         String JavaDoc[] figureTypes = listOfFiguresTypes != null ? parseCSV(listOfFiguresTypes) : new String JavaDoc[0];
70         String JavaDoc listOfTablesTypes = (String JavaDoc)context.getProperties().get("list-of-tables.include-types");
71         String JavaDoc[] tableTypes = listOfTablesTypes != null ? parseCSV(listOfTablesTypes) : new String JavaDoc[0];
72
73         String JavaDoc publicationOutputPath = BookInstanceLayout.getPublicationOutputPath(context.getPublicationOutputName());
74         String JavaDoc inputXmlPath = publicationOutputPath + input;
75         String JavaDoc outputXmlPath = publicationOutputPath + output;
76
77         // Read input document in a DOM tree
78
DocumentBuilderFactory JavaDoc documentBuilderFactory = DocumentBuilderFactory.newInstance();
79         documentBuilderFactory.setNamespaceAware(true);
80         DocumentBuilder JavaDoc documentBuilder = documentBuilderFactory.newDocumentBuilder();
81         Document JavaDoc inputDocument;
82         InputStream JavaDoc is = null;
83         try {
84             is = context.getBookInstance().getResource(inputXmlPath);
85             inputDocument = documentBuilder.parse(is);
86         } finally {
87             if (is != null)
88                 is.close();
89         }
90
91         Document JavaDoc tocDocument = documentBuilder.newDocument();
92         TocBuilder tocBuilder = new TocBuilder();
93         tocBuilder.buildToc(inputDocument, tocDocument, tocDepth);
94         DOMStreamer domStreamer = new DOMStreamer();
95         SaxBuffer tocBuffer = new SaxBuffer();
96         domStreamer.setContentHandler(tocBuffer);
97         domStreamer.stream(tocDocument.getDocumentElement());
98
99         ArtifactListBuilder listBuilder = new ArtifactListBuilder(figureTypes, tableTypes);
100         listBuilder.build(inputDocument);
101
102         OutputStream JavaDoc os = null;
103         is = null;
104         try {
105             is = context.getBookInstance().getResource(inputXmlPath);
106             os = context.getBookInstance().getResourceOutputStream(outputXmlPath);
107             XmlSerializer serializer = new XmlSerializer(os);
108             MergeTocAndListsHandler mergeTocAndListsHandler = new MergeTocAndListsHandler(serializer, tocBuffer, listBuilder.getFigureListBuffers(), listBuilder.getTableListBuffers());
109
110             SAXParser JavaDoc parser = LocalSAXParserFactory.getSAXParserFactory().newSAXParser();
111             parser.getXMLReader().setContentHandler(mergeTocAndListsHandler);
112             parser.getXMLReader().parse(new InputSource JavaDoc(is));
113         } finally {
114             if (is != null)
115                 is.close();
116             if (os != null)
117                 os.close();
118         }
119     }
120
121     private static String JavaDoc[] parseCSV(String JavaDoc data) {
122         ArrayList JavaDoc values = new ArrayList JavaDoc();
123         StringTokenizer JavaDoc tokenizer = new StringTokenizer JavaDoc(data, ",");
124         while (tokenizer.hasMoreTokens()) {
125             String JavaDoc token = tokenizer.nextToken().trim();
126             if (token.length() > 0) {
127                 values.add(token);
128             }
129         }
130         return (String JavaDoc[])values.toArray(new String JavaDoc[values.size()]);
131     }
132
133     // Note: TOC building is done DOM-based since that makes it easier to copy the content of
134
// header elements in a correct way (the DOMStreamer will cleanup missing namespace declarations etc.)
135
static class TocBuilder {
136         private int currentTocLevel;
137         private Document JavaDoc tocDocument;
138         private Element JavaDoc currentTocElement;
139         private int tocDepth;
140         private static final Pattern JavaDoc headerPattern = Pattern.compile("h([0-9]+)");
141
142         void buildToc(Document JavaDoc inputDocument, Document JavaDoc tocDocument, int tocDepth) throws Exception JavaDoc {
143             Element JavaDoc tocElement = tocDocument.createElementNS(null, "toc");
144             tocDocument.appendChild(tocElement);
145
146             currentTocLevel = 0;
147             currentTocElement = tocElement;
148             this.tocDocument = tocDocument;
149             this.tocDepth = tocDepth;
150
151             buildTocRecursive(inputDocument.getDocumentElement());
152         }
153
154
155         private void buildTocRecursive(Element JavaDoc element) throws Exception JavaDoc {
156             NodeList JavaDoc nodeList = element.getChildNodes();
157             for (int i = 0; i < nodeList.getLength(); i++) {
158                 Node JavaDoc node = nodeList.item(i);
159                 if (node.getNodeType() == Node.ELEMENT_NODE) {
160                     Element JavaDoc childElement = (Element JavaDoc)node;
161                     if (childElement.getNamespaceURI() == null) {
162                         Matcher JavaDoc matcher = headerPattern.matcher(childElement.getLocalName());
163                         if (matcher.matches()) {
164                             int headerLevel = Integer.parseInt(matcher.group(1));
165                             if (headerLevel <= currentTocLevel + 1 && headerLevel <= tocDepth) {
166                                 for (int z = headerLevel; z <= currentTocLevel; z++) {
167                                     currentTocElement = (Element JavaDoc)currentTocElement.getParentNode();
168                                 }
169                                 Element JavaDoc tocEntryEl = tocDocument.createElementNS(null, "tocEntry");
170                                 Element JavaDoc captionEl = tocDocument.createElementNS(null, "caption");
171
172                                 tocEntryEl.appendChild(tocDocument.createTextNode("\n"));
173                                 tocEntryEl.appendChild(captionEl);
174                                 tocEntryEl.appendChild(tocDocument.createTextNode("\n"));
175
176                                 copyCaptionChildren(childElement, captionEl, tocDocument);
177
178                                 String JavaDoc targetId = childElement.getAttribute("id");
179                                 if (targetId.length() == 0)
180                                     throw new Exception JavaDoc("Error during TOC generation: encountered a header without id attribute.");
181
182                                 tocEntryEl.setAttribute("targetId", targetId);
183                                 String JavaDoc sectionNumber = childElement.getAttribute("daisyNumber");
184                                 String JavaDoc sectionPartialNumber = childElement.getAttribute("daisyPartialNumber");
185                                 String JavaDoc sectionRawNumber = childElement.getAttribute("daisyRawNumber");
186
187                                 if (sectionNumber.length() > 0)
188                                     tocEntryEl.setAttribute("daisyNumber", sectionNumber);
189                                 if (sectionPartialNumber.length() > 0)
190                                     tocEntryEl.setAttribute("daisyPartialNumber", sectionPartialNumber);
191                                 if (sectionRawNumber.length() > 0)
192                                     tocEntryEl.setAttribute("daisyRawNumber", sectionRawNumber);
193
194
195                                 currentTocElement.appendChild(tocEntryEl);
196                                 currentTocElement.appendChild(tocDocument.createTextNode("\n"));
197                                 currentTocLevel = headerLevel;
198                                 currentTocElement = tocEntryEl;
199                             }
200                         } else {
201                             buildTocRecursive(childElement);
202                         }
203                     }
204                 }
205             }
206         }
207
208         private void copyCaptionChildren(Element JavaDoc fromEl, Element JavaDoc toEl, Document JavaDoc toDocument) {
209             NodeList JavaDoc children = fromEl.getChildNodes();
210             for (int i = 0; i < children.getLength(); i++) {
211                 Node JavaDoc child = children.item(i);
212                 toEl.appendChild(toDocument.importNode(child, true));
213             }
214
215             // remove any footnotes or indexentries that might occur in the caption
216
List JavaDoc elementsToBeRemoved = new ArrayList JavaDoc();
217             collectUnwantedCaptionElement(toEl, elementsToBeRemoved);
218             Iterator JavaDoc elementsToBeRemovedIt = elementsToBeRemoved.iterator();
219             while (elementsToBeRemovedIt.hasNext()) {
220                 Element JavaDoc element = (Element JavaDoc)elementsToBeRemovedIt.next();
221                 element.getParentNode().removeChild(element);
222             }
223         }
224
225         private void collectUnwantedCaptionElement(Element JavaDoc element, List JavaDoc elementsToBeRemoved) {
226             NodeList JavaDoc children = element.getChildNodes();
227             for (int i = 0; i < children.getLength(); i++) {
228                 Node JavaDoc child = children.item(i);
229
230                 // don't include footnotes and indexentries that occur inside headers in the table of contents
231
if (child.getNodeType() == Node.ELEMENT_NODE) {
232                     boolean remove = false;
233                     Element JavaDoc childEl = (Element JavaDoc)child;
234                     if (childEl.getNamespaceURI() == null && childEl.getLocalName().equals("span") && (childEl.getAttribute("class").equals("footnote") || childEl.getAttribute("class").equals("indexentry"))) {
235                         remove = true;
236                         elementsToBeRemoved.add(childEl);
237                     }
238
239                     if (!remove)
240                         collectUnwantedCaptionElement(childEl, elementsToBeRemoved);
241                 }
242
243             }
244         }
245
246     }
247
248
249     static class ArtifactListBuilder {
250         private SaxBuffer[] figureListBuffers;
251         private SaxBuffer[] tableListBuffers;
252         private String JavaDoc[] figureTypes;
253         private String JavaDoc[] tableTypes;
254         private static final String JavaDoc FIGURES_NAME = "figures";
255         private static final String JavaDoc TABLES_NAME = "tables";
256
257         public ArtifactListBuilder(String JavaDoc[] figureTypes, String JavaDoc[] tableTypes) {
258             this.figureTypes = figureTypes;
259             this.tableTypes = tableTypes;
260             figureListBuffers = new SaxBuffer[figureTypes.length];
261             tableListBuffers = new SaxBuffer[tableTypes.length];
262         }
263
264         public SaxBuffer[] getFigureListBuffers() {
265             return figureListBuffers;
266         }
267
268         public SaxBuffer[] getTableListBuffers() {
269             return tableListBuffers;
270         }
271
272         public void build(Document JavaDoc inputDocument) throws Exception JavaDoc {
273             buildRecursive(inputDocument.getDocumentElement());
274             closeLists(FIGURES_NAME, figureListBuffers);
275             closeLists(TABLES_NAME, tableListBuffers);
276         }
277
278         public void buildRecursive(Element JavaDoc element) throws Exception JavaDoc {
279             NodeList JavaDoc childNodes = element.getChildNodes();
280             for (int i = 0; i < childNodes.getLength(); i++) {
281                 Node JavaDoc childNode = childNodes.item(i);
282                 if (childNode.getNodeType() == Node.ELEMENT_NODE) {
283                     Element JavaDoc childElement = (Element JavaDoc)childNode;
284                     if (childNode.getNamespaceURI() == null && childElement.getLocalName().equals("img")) {
285                         String JavaDoc caption = childElement.getAttribute("daisy-caption");
286                         if (caption.length() > 0) {
287                             String JavaDoc id = childElement.getAttribute("id");
288                             if (id.length() == 0)
289                                 throw new Exception JavaDoc("Missing id attribute on image during list-of-figures building.");
290                             addFigure(childElement.getAttribute("daisy-image-type"), id, caption, childElement.getAttribute("daisyNumber"), childElement.getAttribute("daisyPartialNumber"), childElement.getAttribute("daisyRawNumber"));
291                         }
292                     } else if (childNode.getNamespaceURI() == null && childElement.getLocalName().equals("table")) {
293                         String JavaDoc caption = childElement.getAttribute("daisy-caption");
294                         if (caption.length() > 0) {
295                             String JavaDoc id = childElement.getAttribute("id");
296                             if (id.length() == 0)
297                                 throw new Exception JavaDoc("Missing id attribute on table during list-of-tables building.");
298                             addTable(childElement.getAttribute("daisy-table-type"), id, caption, childElement.getAttribute("daisyNumber"), childElement.getAttribute("daisyPartialNumber"), childElement.getAttribute("daisyRawNumber"));
299                         }
300
301                     }
302                     buildRecursive(childElement);
303                 }
304             }
305         }
306
307         private void addFigure(String JavaDoc type, String JavaDoc id, String JavaDoc caption, String JavaDoc daisyNumber, String JavaDoc daisyPartialNumber, String JavaDoc daisyRawNumber) throws SAXException JavaDoc {
308             addItem(FIGURES_NAME, figureListBuffers, figureTypes, type, id, caption, daisyNumber, daisyPartialNumber, daisyRawNumber);
309         }
310
311         private void addTable(String JavaDoc type, String JavaDoc id, String JavaDoc caption, String JavaDoc daisyNumber, String JavaDoc daisyPartialNumber, String JavaDoc daisyRawNumber) throws SAXException JavaDoc {
312             addItem(TABLES_NAME, tableListBuffers, tableTypes, type, id, caption, daisyNumber, daisyPartialNumber, daisyRawNumber);
313         }
314
315         private void addItem(String JavaDoc artifactName, SaxBuffer[] buffers, String JavaDoc[] types, String JavaDoc type, String JavaDoc id, String JavaDoc caption, String JavaDoc daisyNumber, String JavaDoc daisyPartialNumber, String JavaDoc daisyRawNumber) throws SAXException JavaDoc {
316             int index = -1;
317             for (int i = 0; i < types.length; i++) {
318                 if (types[i].equals(type))
319                     index = i;
320             }
321             if (index == -1)
322                 return;
323
324             if (buffers[index] == null) {
325                 buffers[index] = new SaxBuffer();
326                 AttributesImpl listAttrs = new AttributesImpl();
327                 listAttrs.addCDATAAttribute("type", type);
328                 String JavaDoc elementName = "list-of-" + artifactName;
329                 buffers[index].characters(new char[] {'\n'}, 0, 1);
330                 buffers[index].characters(new char[] {'\n'}, 0, 1);
331                 buffers[index].startElement("", elementName, elementName, listAttrs);
332                 buffers[index].characters(new char[] {'\n'}, 0, 1);
333             }
334
335
336             AttributesImpl attrs = new AttributesImpl();
337             attrs.addCDATAAttribute("targetId", id);
338             if (daisyNumber.length() > 0)
339                 attrs.addCDATAAttribute("daisyNumber", daisyNumber);
340             if (daisyPartialNumber.length() > 0)
341                 attrs.addCDATAAttribute("daisyPartialNumber", daisyPartialNumber);
342             if (daisyRawNumber.length() > 0)
343                 attrs.addCDATAAttribute("daisyRawNumber", daisyRawNumber);
344             buffers[index].startElement("", "list-item", "list-item", attrs);
345             buffers[index].characters(caption.toCharArray(), 0, caption.length());
346             buffers[index].endElement("", "list-item", "list-item");
347             buffers[index].characters(new char[] {'\n'}, 0, 1);
348         }
349
350         private void closeLists(String JavaDoc artifactName, SaxBuffer[] buffers) throws SAXException JavaDoc {
351             for (int i = 0; i < buffers.length; i++) {
352                 if (buffers[i] != null) {
353                     String JavaDoc elementName = "list-of-" + artifactName;
354                     buffers[i].endElement("", elementName, elementName);
355                     buffers[i].characters(new char[] {'\n'}, 0, 1);
356                 }
357             }
358         }
359     }
360
361     static class MergeTocAndListsHandler extends AbstractContentHandler {
362         private int level = 0;
363         private final SaxBuffer toc;
364         private final SaxBuffer[] figureBuffers;
365         private final SaxBuffer[] tableBuffers;
366
367         public MergeTocAndListsHandler(ContentHandler consumer, SaxBuffer toc, SaxBuffer[] figureBuffers, SaxBuffer[] tableBuffers) {
368             super(consumer);
369             this.toc = toc;
370             this.figureBuffers = figureBuffers;
371             this.tableBuffers = tableBuffers;
372         }
373
374         public void startElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName, Attributes JavaDoc atts) throws SAXException JavaDoc {
375             level++;
376             super.startElement(namespaceURI, localName, qName, atts);
377             if (level == 2 && namespaceURI.equals("") && localName.equals("body")) {
378                 toc.toSAX(consumer);
379                 for (int i = 0; i < figureBuffers.length; i++) {
380                     if (figureBuffers[i] != null)
381                         figureBuffers[i].toSAX(consumer);
382                 }
383                 for (int i = 0; i < tableBuffers.length; i++) {
384                     if (tableBuffers[i] != null)
385                         tableBuffers[i].toSAX(consumer);
386                 }
387             }
388         }
389
390         public void endElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName) throws SAXException JavaDoc {
391             level--;
392             super.endElement(namespaceURI, localName, qName);
393         }
394     }
395 }
396
Popular Tags