KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > outerj > daisy > books > publisher > impl > publicationprocess > VerifyIdsAndLinksTask


1 /*
2  * Copyright 2004 Outerthought bvba and Schaubroeck nv
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.outerj.daisy.books.publisher.impl.publicationprocess;
17
18 import org.outerj.daisy.books.publisher.impl.BookInstanceLayout;
19 import org.outerj.daisy.books.publisher.impl.util.AbstractContentHandler;
20 import org.outerj.daisy.books.store.BookInstance;
21 import org.outerj.daisy.xmlutil.XmlSerializer;
22 import org.xml.sax.InputSource JavaDoc;
23 import org.xml.sax.Attributes JavaDoc;
24 import org.xml.sax.SAXException JavaDoc;
25 import org.xml.sax.ContentHandler JavaDoc;
26 import org.xml.sax.helpers.DefaultHandler JavaDoc;
27 import org.xml.sax.helpers.AttributesImpl JavaDoc;
28
29 import javax.xml.parsers.SAXParserFactory JavaDoc;
30 import javax.xml.parsers.SAXParser JavaDoc;
31 import java.io.InputStream JavaDoc;
32 import java.io.OutputStream JavaDoc;
33 import java.io.PrintWriter JavaDoc;
34 import java.util.HashSet JavaDoc;
35 import java.util.regex.Pattern JavaDoc;
36 import java.util.regex.Matcher JavaDoc;
37
38 public class VerifyIdsAndLinksTask implements PublicationProcessTask {
39     private final String JavaDoc input;
40     private final String JavaDoc output;
41     private static final Pattern JavaDoc headerPattern = Pattern.compile("h([0-9]+)");
42
43     public VerifyIdsAndLinksTask(String JavaDoc input, String JavaDoc output) {
44         this.input = input;
45         this.output = output;
46     }
47
48     public void run(PublicationContext context) throws Exception JavaDoc {
49         context.getPublicationLog().info("Running verify IDs and links task.");
50         LinkLog linkLog = new LinkLog(context.getBookInstance());
51
52         try {
53             verifyIdsAndLinks(context, linkLog);
54         } finally {
55             linkLog.dispose();
56         }
57     }
58
59     private void verifyIdsAndLinks(PublicationContext context, LinkLog linkLog) throws Exception JavaDoc {
60         String JavaDoc publicationOutputPath = BookInstanceLayout.getPublicationOutputPath(context.getPublicationOutputName());
61         String JavaDoc inputXmlPath = publicationOutputPath + input;
62         String JavaDoc outputXmlPath = publicationOutputPath + output;
63         BookInstance bookInstance = context.getBookInstance();
64
65         SAXParserFactory JavaDoc parserFactory = SAXParserFactory.newInstance();
66         parserFactory.setNamespaceAware(true);
67         SAXParser JavaDoc parser = parserFactory.newSAXParser();
68
69         IdGatherer idGatherer = new IdGatherer(linkLog);
70         parser.getXMLReader().setContentHandler(idGatherer);
71
72         InputStream JavaDoc is = bookInstance.getResource(inputXmlPath);
73         try {
74             InputSource JavaDoc inputSource = new InputSource JavaDoc(is);
75             parser.getXMLReader().parse(inputSource);
76         } finally {
77             is.close();
78         }
79
80         OutputStream JavaDoc os = null;
81         is = null;
82         try {
83             os = bookInstance.getResourceOutputStream(outputXmlPath);
84             XmlSerializer xmlSerializer = new XmlSerializer(os);
85             IdAssigner idAssigner = new IdAssigner(xmlSerializer, idGatherer.getIds());
86             LinkCheckerHandler linkCheckerHandler = new LinkCheckerHandler(idAssigner, idGatherer.getIds(), linkLog);
87             parser.getXMLReader().setContentHandler(linkCheckerHandler);
88             is = bookInstance.getResource(inputXmlPath);
89             InputSource JavaDoc inputSource = new InputSource JavaDoc(is);
90             parser.getXMLReader().parse(inputSource);
91         } finally {
92             if (os != null)
93                 os.close();
94             if (is != null)
95                 is.close();
96         }
97     }
98
99     private static class IdGatherer extends DefaultHandler JavaDoc {
100         private HashSet JavaDoc ids = new HashSet JavaDoc();
101         private LinkLog linkLog;
102
103         public IdGatherer(LinkLog linkLog) {
104             this.linkLog = linkLog;
105         }
106
107         public HashSet JavaDoc getIds() {
108             return ids;
109         }
110
111         public void startElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName, Attributes JavaDoc atts) throws SAXException JavaDoc {
112             if (namespaceURI.equals("")) {
113                 String JavaDoc id = atts.getValue("id");
114                 if (id != null) {
115                     if (ids.contains(id)) {
116                         linkLog.error("Duplicate ID encountered: " + id);
117                     }
118                     ids.add(id);
119                 }
120             }
121         }
122     }
123
124     private static class LinkCheckerHandler extends AbstractContentHandler {
125         private HashSet JavaDoc ids;
126         private LinkLog linkLog;
127         private String JavaDoc currentDocument;
128         private String JavaDoc currentBranch;
129         private String JavaDoc currentLanguage;
130
131         public LinkCheckerHandler(ContentHandler consumer, HashSet JavaDoc ids, LinkLog linkLog) {
132             super(consumer);
133             this.ids = ids;
134             this.linkLog = linkLog;
135         }
136
137         public void startElement(String JavaDoc uri, String JavaDoc localName, String JavaDoc qName, Attributes JavaDoc attributes) throws SAXException JavaDoc {
138             if (uri.length() == 0) {
139                 if (headerPattern.matcher(localName).matches()) {
140                     String JavaDoc document = attributes.getValue("daisyDocument");
141                     if (document != null) {
142                         this.currentDocument = document;
143                         this.currentBranch = attributes.getValue("daisyBranch");
144                         this.currentLanguage = attributes.getValue("daisyLanguage");
145                     }
146                 } else if (localName.equals("a")) {
147                     String JavaDoc href = attributes.getValue("href");
148                     if (href != null && href.startsWith("#")) {
149                         if (!ids.contains(href.substring(1))) {
150                             linkLog.error("Link pointing to non-defined ID: " + href + getContext());
151                         }
152                     }
153                 } else if (localName.equals("span") && "crossreference".equals(attributes.getValue("class"))) {
154                     String JavaDoc crossRefBookTarget = attributes.getValue("crossRefBookTarget");
155                     if (crossRefBookTarget != null && crossRefBookTarget.length() > 1) {
156                         if (!ids.contains(crossRefBookTarget.substring(1))) {
157                             linkLog.error("Cross reference pointing to non-defined ID: " + crossRefBookTarget + getContext());
158                         }
159                     }
160                 } else if (localName.equals("img")) {
161                     // Sometimes images might be inserted by accident with a file: URL instead
162
// of uploading them into the Daisy repository.
163
String JavaDoc src = attributes.getValue("src");
164                     if (src != null && src.startsWith("file:")) {
165                         linkLog.error("Image refering to a file: " + src + getContext());
166                     }
167                 }
168             }
169             super.startElement(uri, localName, qName, attributes);
170         }
171
172         private String JavaDoc getContext() {
173             return " (source document: " + currentDocument + ", branch: " + currentBranch + ", language: " + currentLanguage + ")";
174         }
175     }
176
177     /**
178      * A handler that assigns IDs to certain elements that have no ID but need one.
179      */

180     private static class IdAssigner extends AbstractContentHandler {
181         private HashSet JavaDoc existingIds;
182         private int sectionCounter = 0;
183         private int figureCounter = 0;
184         private int tableCounter = 0;
185
186         public IdAssigner(ContentHandler consumer, HashSet JavaDoc existingIds) {
187             super(consumer);
188             this.existingIds = existingIds;
189         }
190
191         public void startElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName, Attributes JavaDoc atts) throws SAXException JavaDoc {
192             if (namespaceURI.equals("")) {
193                 Matcher JavaDoc matcher = headerPattern.matcher(localName);
194                 String JavaDoc newId = null;
195                 if (atts.getValue("id") == null) {
196                     if (matcher.matches()) {
197                         newId = generateSectionId();
198                     } else if (localName.equals("img") && atts.getValue("daisy-caption") != null) {
199                         newId = generateFigureId();
200                     } else if (localName.equals("table") && atts.getValue("daisy-caption") != null) {
201                         newId = generateTableId();
202                     }
203                 }
204
205                 if (newId != null) {
206                     AttributesImpl JavaDoc newAttrs = new AttributesImpl JavaDoc(atts);
207                     newAttrs.addAttribute("", "id", "id", "CDATA", newId);
208                     atts = newAttrs;
209                 }
210             }
211             super.startElement(namespaceURI, localName, qName, atts);
212         }
213
214         private String JavaDoc generateSectionId() {
215             String JavaDoc result = "s" + ++sectionCounter;
216             while (existingIds.contains(result)) {
217                 result = "s" + ++sectionCounter;
218             }
219             return result;
220         }
221
222         private String JavaDoc generateFigureId() {
223             String JavaDoc result = "dsy_fig_" + ++figureCounter;
224             while (existingIds.contains(result)) {
225                 result = "dsy_fig_" + ++figureCounter;
226             }
227             return result;
228         }
229
230         private String JavaDoc generateTableId() {
231             String JavaDoc result = "dsy_tbl_" + ++tableCounter;
232             while (existingIds.contains(result)) {
233                 result = "dsy_tbl_" + ++tableCounter;
234             }
235             return result;
236         }
237     }
238
239     static class LinkLog {
240         private PrintWriter JavaDoc pw;
241
242         public LinkLog(BookInstance bookInstance) throws Exception JavaDoc {
243             OutputStream JavaDoc os = bookInstance.getResourceOutputStream(BookInstanceLayout.getLinkLogPath());
244             pw = new PrintWriter JavaDoc(os);
245             pw.println("If this file is empty, then no link errors were detected.");
246         }
247
248         public void dispose() {
249             if (pw != null)
250                 pw.close();
251         }
252
253         public void error(String JavaDoc message) {
254             pw.println(message);
255             pw.flush();
256         }
257     }
258 }
259
Popular Tags