KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > outerj > daisy > books > publisher > impl > publicationprocess > SplitInChunksTask


1 /*
2  * Copyright 2004 Outerthought bvba and Schaubroeck nv
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.outerj.daisy.books.publisher.impl.publicationprocess;
17
18 import org.xml.sax.*;
19 import org.xml.sax.helpers.NamespaceSupport JavaDoc;
20 import org.apache.cocoon.xml.AttributesImpl;
21 import org.apache.cocoon.xml.SaxBuffer;
22 import org.outerj.daisy.books.publisher.impl.BookInstanceLayout;
23 import org.outerj.daisy.books.publisher.impl.util.AbstractContentHandler;
24 import org.outerj.daisy.xmlutil.XmlSerializer;
25 import org.outerj.daisy.xmlutil.LocalSAXParserFactory;
26
27 import javax.xml.parsers.SAXParser JavaDoc;
28 import java.util.regex.Pattern JavaDoc;
29 import java.util.regex.Matcher JavaDoc;
30 import java.util.HashSet JavaDoc;
31 import java.util.Map JavaDoc;
32 import java.util.HashMap JavaDoc;
33 import java.util.Enumeration JavaDoc;
34 import java.io.InputStream JavaDoc;
35 import java.io.OutputStream JavaDoc;
36
37 public class SplitInChunksTask implements PublicationProcessTask {
38     private final String JavaDoc input;
39     private final String JavaDoc output;
40     private final String JavaDoc chunkNamePrefix;
41     private final String JavaDoc publishExtension;
42     private final String JavaDoc firstChunkName;
43     private static final Pattern JavaDoc headerPattern = Pattern.compile("h([0-9]+)");
44
45     public SplitInChunksTask(String JavaDoc input, String JavaDoc output, String JavaDoc chunkNamePrefix,
46                              String JavaDoc firstChunkName, String JavaDoc publishExtension) {
47         this.input = input;
48         this.output = output;
49         this.chunkNamePrefix = chunkNamePrefix;
50         this.firstChunkName = firstChunkName;
51         this.publishExtension = publishExtension;
52     }
53
54     public void run(PublicationContext context) throws Exception JavaDoc {
55         context.getPublicationLog().info("Running split in chunks task.");
56         
57         String JavaDoc publicationOutputPath = BookInstanceLayout.getPublicationOutputPath(context.getPublicationOutputName());
58         String JavaDoc inputPath = publicationOutputPath + input;
59         String JavaDoc outputPath = publicationOutputPath + output;
60
61         int chunkLevel = 1;
62         String JavaDoc chunkLevelProp = (String JavaDoc)context.getProperties().get("chunker.chunklevel");
63         if (chunkLevelProp != null) {
64             try {
65                 chunkLevel = Integer.parseInt(chunkLevelProp);
66             } catch (NumberFormatException JavaDoc e) {
67                 context.getPublicationLog().error("Invalid value in chunker.chunklevel property: \"" + chunkLevelProp + "\", defaulting to " + chunkLevel);
68             }
69         }
70
71         //
72
// Step 1: make chunks and capture result in a sax buffer
73
//
74
SaxBuffer buffer = new SaxBuffer();
75         ChunkerHandler chunkerHandler;
76         InputStream JavaDoc is = null;
77         try {
78             SAXParser JavaDoc parser = LocalSAXParserFactory.getSAXParserFactory().newSAXParser();
79             is = context.getBookInstance().getResource(inputPath);
80             chunkerHandler = new ChunkerHandler(buffer, chunkLevel);
81             parser.getXMLReader().setContentHandler(chunkerHandler);
82             parser.getXMLReader().parse(new InputSource(is));
83         } finally {
84             if (is != null)
85                 is.close();
86         }
87
88         //
89
// Step: adjust links
90
//
91
OutputStream JavaDoc os = null;
92         try {
93             os = context.getBookInstance().getResourceOutputStream(outputPath);
94             XmlSerializer serializer = new XmlSerializer(os);
95             ChunkLinkFixerHandler linkFixer = new ChunkLinkFixerHandler(serializer, chunkerHandler.getIdToChunkMap());
96             buffer.toSAX(linkFixer);
97         } finally {
98             if (os != null)
99                 os.close();
100         }
101     }
102
103     class ChunkerHandler implements ContentHandler {
104         private boolean inBody = false;
105         private int nesting = 0;
106         private boolean inChunk = false;
107         private ContentHandler consumer;
108         private HashSet JavaDoc createdChunks = new HashSet JavaDoc();
109         private int chunkNameCounter = 0;
110         private Map JavaDoc idToChunkMap = new HashMap JavaDoc();
111         private String JavaDoc currentChunkName;
112         private boolean firstChunk = true;
113         private final int chunkLevel;
114         private int prevHeaderLevel = -1;
115         private NamespaceSupport JavaDoc namespaceSupport = new NamespaceSupport JavaDoc();
116
117         public ChunkerHandler(ContentHandler consumer, int chunkLevel) {
118             this.consumer = consumer;
119             this.chunkLevel = chunkLevel;
120         }
121
122         public Map JavaDoc getIdToChunkMap() {
123             return idToChunkMap;
124         }
125
126         public void startDocument() throws SAXException {
127             consumer.startDocument();
128             consumer.startElement("", "chunks", "chunks", new AttributesImpl());
129         }
130
131         public void endDocument() throws SAXException {
132             if (inChunk)
133                 endChunk();
134             consumer.endElement("", "chunks", "chunks");
135             consumer.endDocument();
136         }
137
138         public void startElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName, Attributes atts) throws SAXException {
139             namespaceSupport.pushContext();
140             nesting++;
141             Matcher JavaDoc headerMatcher = headerPattern.matcher(localName);
142
143             if (nesting == 2 && namespaceURI.equals("") && localName.equals("body")) {
144                 inBody = true;
145             } else if (inBody && nesting == 3 && headerMatcher.matches()) {
146                 int headerLevel = Integer.parseInt(headerMatcher.group(1));
147                 if (headerLevel <= chunkLevel && headerLevel != prevHeaderLevel + 1) {
148                     if (inChunk)
149                         endChunk();
150
151                     // determine chunk name
152
String JavaDoc chunkName = null;
153                     if (firstChunk && firstChunkName != null)
154                         chunkName = firstChunkName;
155                     firstChunk = false;
156                     if (chunkName == null) {
157                         String JavaDoc headerId = atts.getValue("id");
158                         if (headerId != null && headerId.trim().length() > 0) {
159                             chunkName = headerId;
160                         } else {
161                             chunkName = generateChunkName();
162                         }
163                     }
164                     // check if there is not already a chunk named this way and generate new name until we have a unique one
165
while (createdChunks.contains(chunkName)) {
166                         chunkName = generateChunkName();
167                     }
168                     createdChunks.add(chunkName);
169                     currentChunkName = chunkName;
170
171                     AttributesImpl chunkAttrs = new AttributesImpl();
172                     chunkAttrs.addCDATAAttribute("name", chunkName);
173                     consumer.startElement("", "chunk", "chunk", chunkAttrs);
174                     consumer.startElement("", "html", "html", new AttributesImpl());
175                     declarePrefixes();
176                     consumer.startElement("", "body", "body", new AttributesImpl());
177                     inChunk = true;
178                 }
179                 prevHeaderLevel = headerLevel;
180             }
181
182             if (inChunk) {
183                 consumer.startElement(namespaceURI, localName, qName, atts);
184
185                 String JavaDoc id = atts.getValue("id");
186                 if (id != null && id.trim().length() > 0) {
187                     idToChunkMap.put(id, currentChunkName);
188                 }
189             }
190         }
191
192         private String JavaDoc generateChunkName() {
193             return chunkNamePrefix + ++chunkNameCounter;
194         }
195
196         public void endElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName) throws SAXException {
197             namespaceSupport.popContext();
198             if (inBody && nesting == 2 && namespaceURI.equals("") && localName.equals("body")) {
199                 inBody = false;
200                 if (inChunk) {
201                     endChunk();
202                     inChunk = false;
203                 }
204             } else if (inChunk) {
205                 consumer.endElement(namespaceURI, localName, qName);
206             }
207             nesting--;
208         }
209
210         private void endChunk() throws SAXException {
211             consumer.endElement("", "body", "body");
212             undeclarePrefixes();
213             consumer.endElement("", "html", "html");
214             consumer.endElement("", "chunk", "chunk");
215         }
216
217         private void declarePrefixes() throws SAXException {
218             Enumeration JavaDoc prefixEnum = namespaceSupport.getPrefixes();
219             while (prefixEnum.hasMoreElements()) {
220                 String JavaDoc prefix = (String JavaDoc)prefixEnum.nextElement();
221                 if (!prefix.equals("xml"))
222                     consumer.startPrefixMapping(prefix, namespaceSupport.getURI(prefix));
223             }
224         }
225
226         private void undeclarePrefixes() throws SAXException {
227             Enumeration JavaDoc prefixEnum = namespaceSupport.getPrefixes();
228             while (prefixEnum.hasMoreElements()) {
229                 String JavaDoc prefix = (String JavaDoc)prefixEnum.nextElement();
230                 if (!prefix.equals("xml"))
231                     consumer.endPrefixMapping(prefix);
232             }
233         }
234
235         public void characters(char ch[], int start, int length) throws SAXException {
236             if (inChunk)
237                 consumer.characters(ch, start, length);
238         }
239
240         public void ignorableWhitespace(char ch[], int start, int length) throws SAXException {
241             if (inChunk)
242                 consumer.ignorableWhitespace(ch, start, length);
243         }
244
245         public void skippedEntity(String JavaDoc name) throws SAXException {
246             if (inChunk)
247                 consumer.skippedEntity(name);
248         }
249
250         public void processingInstruction(String JavaDoc target, String JavaDoc data) throws SAXException {
251         }
252
253         public void startPrefixMapping(String JavaDoc prefix, String JavaDoc uri) throws SAXException {
254             namespaceSupport.declarePrefix(prefix, uri);
255             consumer.startPrefixMapping(prefix, uri);
256         }
257
258         public void endPrefixMapping(String JavaDoc prefix) throws SAXException {
259             consumer.endPrefixMapping(prefix);
260         }
261
262         public void setDocumentLocator(Locator locator) {
263             // ignore
264
}
265     }
266
267     class ChunkLinkFixerHandler extends AbstractContentHandler {
268         private Map JavaDoc idToChunkMap;
269         private int nesting = 0;
270         private String JavaDoc currentChunkName = null;
271
272         public ChunkLinkFixerHandler(ContentHandler consumer, Map JavaDoc idToChunkMap) {
273             super(consumer);
274             this.idToChunkMap = idToChunkMap;
275         }
276
277         public void startElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName, Attributes atts) throws SAXException {
278             nesting++;
279             if (nesting == 2 && namespaceURI.equals("") && localName.equals("chunk")) {
280                 currentChunkName = atts.getValue("name");
281             } else if (currentChunkName != null && namespaceURI.equals("") && localName.equals("a")) {
282                 String JavaDoc href = atts.getValue("href");
283                 if (href != null && href.length() > 1 && href.charAt(0) == '#') {
284                     String JavaDoc targetChunk = (String JavaDoc)idToChunkMap.get(href.substring(1));
285                     if (targetChunk != null && !targetChunk.equals(currentChunkName)) {
286                         String JavaDoc newHref = targetChunk + publishExtension + href;
287                         AttributesImpl newAttrs = new AttributesImpl(atts);
288                         newAttrs.setValue(newAttrs.getIndex("href"), newHref);
289                         atts = newAttrs;
290                     }
291                 }
292             }
293             super.startElement(namespaceURI, localName, qName, atts);
294         }
295
296         public void endElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName) throws SAXException {
297             if (nesting == 2 && namespaceURI.equals("") && localName.equals("chunk")) {
298                 currentChunkName = null;
299             }
300             nesting--;
301             super.endElement(namespaceURI, localName, qName);
302         }
303     }
304 }
305
Popular Tags