1 16 package org.outerj.daisy.books.publisher.impl.publicationprocess; 17 18 import org.outerj.daisy.books.publisher.impl.BookInstanceLayout; 19 import org.outerj.daisy.books.publisher.impl.util.AbstractContentHandler; 20 import org.outerj.daisy.books.store.BookInstance; 21 import org.outerj.daisy.xmlutil.XmlSerializer; 22 import org.xml.sax.InputSource ; 23 import org.xml.sax.Attributes ; 24 import org.xml.sax.SAXException ; 25 import org.xml.sax.ContentHandler ; 26 import org.xml.sax.helpers.DefaultHandler ; 27 import org.xml.sax.helpers.AttributesImpl ; 28 29 import javax.xml.parsers.SAXParserFactory ; 30 import javax.xml.parsers.SAXParser ; 31 import java.io.InputStream ; 32 import java.io.OutputStream ; 33 import java.io.PrintWriter ; 34 import java.util.HashSet ; 35 import java.util.regex.Pattern ; 36 import java.util.regex.Matcher ; 37 38 public class VerifyIdsAndLinksTask implements PublicationProcessTask { 39 private final String input; 40 private final String output; 41 private static final Pattern headerPattern = Pattern.compile("h([0-9]+)"); 42 43 public VerifyIdsAndLinksTask(String input, String output) { 44 this.input = input; 45 this.output = output; 46 } 47 48 public void run(PublicationContext context) throws Exception { 49 context.getPublicationLog().info("Running verify IDs and links task."); 50 LinkLog linkLog = new LinkLog(context.getBookInstance()); 51 52 try { 53 verifyIdsAndLinks(context, linkLog); 54 } finally { 55 linkLog.dispose(); 56 } 57 } 58 59 private void verifyIdsAndLinks(PublicationContext context, LinkLog linkLog) throws Exception { 60 String publicationOutputPath = BookInstanceLayout.getPublicationOutputPath(context.getPublicationOutputName()); 61 String inputXmlPath = publicationOutputPath + input; 62 String outputXmlPath = publicationOutputPath + output; 63 BookInstance bookInstance = context.getBookInstance(); 64 65 SAXParserFactory parserFactory = SAXParserFactory.newInstance(); 66 parserFactory.setNamespaceAware(true); 67 SAXParser parser = parserFactory.newSAXParser(); 68 69 IdGatherer idGatherer = new IdGatherer(linkLog); 70 parser.getXMLReader().setContentHandler(idGatherer); 71 72 InputStream is = bookInstance.getResource(inputXmlPath); 73 try { 74 InputSource inputSource = new InputSource (is); 75 parser.getXMLReader().parse(inputSource); 76 } finally { 77 is.close(); 78 } 79 80 OutputStream os = null; 81 is = null; 82 try { 83 os = bookInstance.getResourceOutputStream(outputXmlPath); 84 XmlSerializer xmlSerializer = new XmlSerializer(os); 85 IdAssigner idAssigner = new IdAssigner(xmlSerializer, idGatherer.getIds()); 86 LinkCheckerHandler linkCheckerHandler = new LinkCheckerHandler(idAssigner, idGatherer.getIds(), linkLog); 87 parser.getXMLReader().setContentHandler(linkCheckerHandler); 88 is = bookInstance.getResource(inputXmlPath); 89 InputSource inputSource = new InputSource (is); 90 parser.getXMLReader().parse(inputSource); 91 } finally { 92 if (os != null) 93 os.close(); 94 if (is != null) 95 is.close(); 96 } 97 } 98 99 private static class IdGatherer extends DefaultHandler { 100 private HashSet ids = new HashSet (); 101 private LinkLog linkLog; 102 103 public IdGatherer(LinkLog linkLog) { 104 this.linkLog = linkLog; 105 } 106 107 public HashSet getIds() { 108 return ids; 109 } 110 111 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { 112 if (namespaceURI.equals("")) { 113 String id = atts.getValue("id"); 114 if (id != null) { 115 if (ids.contains(id)) { 116 linkLog.error("Duplicate ID encountered: " + id); 117 } 118 ids.add(id); 119 } 120 } 121 } 122 } 123 124 private static class LinkCheckerHandler extends AbstractContentHandler { 125 private HashSet ids; 126 private LinkLog linkLog; 127 private String currentDocument; 128 private String currentBranch; 129 private String currentLanguage; 130 131 public LinkCheckerHandler(ContentHandler consumer, HashSet ids, LinkLog linkLog) { 132 super(consumer); 133 this.ids = ids; 134 this.linkLog = linkLog; 135 } 136 137 public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { 138 if (uri.length() == 0) { 139 if (headerPattern.matcher(localName).matches()) { 140 String document = attributes.getValue("daisyDocument"); 141 if (document != null) { 142 this.currentDocument = document; 143 this.currentBranch = attributes.getValue("daisyBranch"); 144 this.currentLanguage = attributes.getValue("daisyLanguage"); 145 } 146 } else if (localName.equals("a")) { 147 String href = attributes.getValue("href"); 148 if (href != null && href.startsWith("#")) { 149 if (!ids.contains(href.substring(1))) { 150 linkLog.error("Link pointing to non-defined ID: " + href + getContext()); 151 } 152 } 153 } else if (localName.equals("span") && "crossreference".equals(attributes.getValue("class"))) { 154 String crossRefBookTarget = attributes.getValue("crossRefBookTarget"); 155 if (crossRefBookTarget != null && crossRefBookTarget.length() > 1) { 156 if (!ids.contains(crossRefBookTarget.substring(1))) { 157 linkLog.error("Cross reference pointing to non-defined ID: " + crossRefBookTarget + getContext()); 158 } 159 } 160 } else if (localName.equals("img")) { 161 String src = attributes.getValue("src"); 164 if (src != null && src.startsWith("file:")) { 165 linkLog.error("Image refering to a file: " + src + getContext()); 166 } 167 } 168 } 169 super.startElement(uri, localName, qName, attributes); 170 } 171 172 private String getContext() { 173 return " (source document: " + currentDocument + ", branch: " + currentBranch + ", language: " + currentLanguage + ")"; 174 } 175 } 176 177 180 private static class IdAssigner extends AbstractContentHandler { 181 private HashSet existingIds; 182 private int sectionCounter = 0; 183 private int figureCounter = 0; 184 private int tableCounter = 0; 185 186 public IdAssigner(ContentHandler consumer, HashSet existingIds) { 187 super(consumer); 188 this.existingIds = existingIds; 189 } 190 191 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { 192 if (namespaceURI.equals("")) { 193 Matcher matcher = headerPattern.matcher(localName); 194 String newId = null; 195 if (atts.getValue("id") == null) { 196 if (matcher.matches()) { 197 newId = generateSectionId(); 198 } else if (localName.equals("img") && atts.getValue("daisy-caption") != null) { 199 newId = generateFigureId(); 200 } else if (localName.equals("table") && atts.getValue("daisy-caption") != null) { 201 newId = generateTableId(); 202 } 203 } 204 205 if (newId != null) { 206 AttributesImpl newAttrs = new AttributesImpl (atts); 207 newAttrs.addAttribute("", "id", "id", "CDATA", newId); 208 atts = newAttrs; 209 } 210 } 211 super.startElement(namespaceURI, localName, qName, atts); 212 } 213 214 private String generateSectionId() { 215 String result = "s" + ++sectionCounter; 216 while (existingIds.contains(result)) { 217 result = "s" + ++sectionCounter; 218 } 219 return result; 220 } 221 222 private String generateFigureId() { 223 String result = "dsy_fig_" + ++figureCounter; 224 while (existingIds.contains(result)) { 225 result = "dsy_fig_" + ++figureCounter; 226 } 227 return result; 228 } 229 230 private String generateTableId() { 231 String result = "dsy_tbl_" + ++tableCounter; 232 while (existingIds.contains(result)) { 233 result = "dsy_tbl_" + ++tableCounter; 234 } 235 return result; 236 } 237 } 238 239 static class LinkLog { 240 private PrintWriter pw; 241 242 public LinkLog(BookInstance bookInstance) throws Exception { 243 OutputStream os = bookInstance.getResourceOutputStream(BookInstanceLayout.getLinkLogPath()); 244 pw = new PrintWriter (os); 245 pw.println("If this file is empty, then no link errors were detected."); 246 } 247 248 public void dispose() { 249 if (pw != null) 250 pw.close(); 251 } 252 253 public void error(String message) { 254 pw.println(message); 255 pw.flush(); 256 } 257 } 258 } 259 | Popular Tags |