MmxfSetString


1   /*
2   
3   This software is OSI Certified Open Source Software.
4   OSI Certified is a certification mark of the Open Source Initiative.
5   
6   The license (Mozilla version 1.0) can be read at the MMBase site.
7   See http://www.MMBase.org/license
8   
9   */
10  package org.mmbase.richtext.processors.xml;
11  import org.mmbase.datatypes.processors.*;
12  import org.mmbase.datatypes.processors.xml.Modes;
13  import org.mmbase.bridge.*;
14  import org.mmbase.bridge.Node;
15  import org.mmbase.bridge.NodeList;
16  import org.mmbase.richtext.Mmxf;
17  import org.mmbase.bridge.util.Queries;
18  import org.mmbase.servlet.BridgeServlet;
19  import org.mmbase.storage.search.*;
20  import org.mmbase.util.*;
21  import org.mmbase.util.xml.XMLWriter;
22  import org.mmbase.richtext.transformers.XmlField;
23  import java.util.*;
24  import java.util.regex.*;
25  import java.net.URL  ;
26  import javax.servlet.http.HttpServletRequest  ;
27  import org.w3c.dom.*;
28  import javax.xml.parsers.*;
29  import javax.xml.transform.dom.*;
30  import org.mmbase.util.logging.*;
31  
32  
33  /**
34   * Set-processing for an `mmxf' field. This is the counterpart and inverse of {@link MmxfGetString}, for more
35   * information see the javadoc of that class.
36   * @author Michiel Meeuwissen
37   * @version $Id: MmxfSetString.java,v 1.9 2006/08/24 16:15:33 michiel Exp $
38   * @since MMBase-1.8
39   */
40  
41  public class MmxfSetString implements  Processor {
42      private static final Logger log = Logging.getLoggerInstance(MmxfSetString.class);
43      private static final long serialVersionUID = 1L;
44  
45  
46      private static XmlField xmlField = new XmlField(XmlField.WIKI);
47  
48      /**
49       * Used for generating unique id's
50       */
51      private static long indexCounter = System.currentTimeMillis() / 1000;
52  
53      /**
54       * Just parses String to Document
55       */
56      protected static Document parse(Object   value)  throws javax.xml.parsers.ParserConfigurationException  , org.xml.sax.SAXException  ,  java.io.IOException   {
57          if (value instanceof Document) return (Document) value;
58          try {
59              return parse(new java.io.ByteArrayInputStream  (("" + value).getBytes("UTF-8")));
60          } catch (java.io.UnsupportedEncodingException   uee) {
61              // cannot happen, UTF-8 is supported..
62              return null;
63          }
64  
65      }
66      /**
67       * Just parses InputStream  to Document (without validation).
68       */
69      protected static  Document parse(java.io.InputStream   value)  throws javax.xml.parsers.ParserConfigurationException  , org.xml.sax.SAXException  ,  java.io.IOException   {
70          DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
71          dfactory.setValidating(false);
72          dfactory.setNamespaceAware(true);
73          DocumentBuilder documentBuilder = dfactory.newDocumentBuilder();
74          // dont log errors, and try to process as much as possible...
75          XMLErrorHandler errorHandler = new XMLErrorHandler(false, org.mmbase.util.XMLErrorHandler.NEVER);
76          documentBuilder.setErrorHandler(errorHandler);
77  
78          documentBuilder.setEntityResolver(new XMLEntityResolver(false));
79          Document doc = documentBuilder.parse(value);
80          if (! errorHandler.foundNothing()) {
81              throw new IllegalArgumentException  ("xml invalid:\n" + errorHandler.getMessageBuffer() + "for xml:\n" + value);
82          }
83          return doc;
84      }
85  
86  
87  
88      /**
89       * Means that we are on a level were <h> tags may follow, and subsections initiated
90       */
91      private final int MODE_SECTION = 0;
92      /**
93       * Other levels,
94       */
95      private final int MODE_INLINE  = 1;
96  
97      /**
98       * Also used for parsing kupu-output
99       */
100     private class ParseState {
101         int level = 0;
102         int offset = 0;
103         int mode;
104         List subSections;
105         List sparedTags;
106         ParseState(int sl, int m) {
107             this(sl, m, 0);
108         }
109         ParseState(int sl, int m, int of) {
110             level = sl;
111             mode = m;
112             offset = of;
113             if (m == MODE_SECTION)  subSections = new ArrayList();
114         }
115 
116         public String   level() {
117             StringBuffer   buf = new StringBuffer  ();
118             for (int i = 0 ; i < level ; i++) buf.append("  ");
119             return buf.toString();
120 
121         }
122     }
123 
124 
125     /**
126      * Patterns used in parsing of kupu-output
127      */
128 
129     private static Pattern copyElement   = Pattern.compile("table|tr|td|th|em|strong|ul|ol|li|p|sub|sup");
130     private static Pattern ignoreElement = Pattern.compile("tbody|thead|font|span|acronym|address|abbr|base|blockquote|cite|code|pre|colgroup|col|dd|dfn|dl|dt|kbd|meta|samp|script|style|var|center");
131     private static Pattern ignore        = Pattern.compile("link|#comment");
132     private static Pattern hElement      = Pattern.compile("h([1-9])");
133     private static Pattern crossElement  = Pattern.compile("a|img|div");
134 
135 
136     private static Pattern allowedAttributes = Pattern.compile("id|href|src|class|type");
137 
138     private void copyAttributes(Element source, Element destination) {
139         NamedNodeMap attributes = source.getAttributes();
140         for (int i = 0; i < attributes.getLength(); i++) {
141             org.w3c.dom.Node   n = attributes.item(i);
142             if (allowedAttributes.matcher(n.getNodeName()).matches()) {
143                 destination.setAttribute(n.getNodeName(), n.getNodeValue());
144             }
145         }
146     }
147 
148     private void copyChilds(Element source, Element destination) {
149         org.w3c.dom.Node   child = source.getFirstChild();
150         while (child != null) {
151             org.w3c.dom.Node   copy = destination.getOwnerDocument().importNode(child, true);
152             destination.appendChild(copy);
153             child = child.getNextSibling();
154         }
155     }
156 
157     /**
158      * First stage of parsing kupu-output. Does nothing with relations, only cleans up to 'mmxf' XML.
159      *
160      * @param source       XML as received from kupu
161      * @param destination  pseudo MMXF which is going to receive it.
162      * @param links        This list collects elements representing some kind of link (cross-links, images, attachments, urls). Afterwards these can be compared with actual MMBase objects.
163      * @param state        The function is called recursively, and this object remembers the state then (where it was while parsing e.g.).
164      */
165 
166     private void parseKupu(Element source, Element destination, List links, ParseState state) {
167         org.w3c.dom.NodeList   nl = source.getChildNodes();
168         if (log.isDebugEnabled()) {
169             log.trace(state.level() + state.level + " Appending to " + destination.getNodeName() + " at " + state.offset + " of " + nl.getLength());
170         }
171         for (; state.offset < nl.getLength(); state.offset++) {
172             org.w3c.dom.Node   node = nl.item(state.offset);
173             if (node == null) break;
174             String   name= node.getNodeName();
175             Matcher matcher = ignore.matcher(name);
176             if (matcher.matches()) {
177                 continue;
178             }
179             if (name.equals("#text")) {
180                 if (node.getNodeValue() != null && ! "".equals(node.getNodeValue().trim())) {
181                     if (state.mode == MODE_SECTION) {
182                         Element imp = destination.getOwnerDocument().createElement("p");
183                         log.debug("Appending to " + destination.getNodeName());
184                         destination.appendChild(imp);
185                         Text text = destination.getOwnerDocument().createTextNode(node.getNodeValue());
186                         imp.appendChild(text);
187                     } else {
188                         Text text = destination.getOwnerDocument().createTextNode(node.getNodeValue());
189                         destination.appendChild(text);
190                     }
191                 } else {
192                     log.debug("Ignored empty #text");
193                 }
194                 continue;
195             }
196 
197             if (! (node instanceof Element)) {
198                 log.warn(" found node " + node.getNodeName() + " which is not an element!");
199                 continue;
200             }
201 
202             matcher = ignoreElement.matcher(name);
203             if (matcher.matches()) {
204                 parseKupu((Element) node, destination, links, new ParseState(state.level, MODE_INLINE));
205                 continue;
206             }
207 
208             matcher = crossElement.matcher(name);
209             if (matcher.matches()) {
210                 Element imp = destination.getOwnerDocument().createElement("a");
211                 copyAttributes((Element) node, imp);
212                 if (name.equals("div")) {
213                     imp.setAttribute("class", "div " + imp.getAttribute("class"));
214                 }
215                 if (state.mode == MODE_SECTION) {
216                     Element p = destination.getOwnerDocument().createElement("p");
217                     log.debug("Appending to " + destination.getNodeName());
218                     destination.appendChild(p);
219                     p.appendChild(imp);
220                 } else {
221                     destination.appendChild(imp);
222                 }
223 
224                 links.add(imp);
225                 if (name.equals("div")) {
226                     // don't treat body, will be done later, when handling 'links'.
227                     // so simply copy everthing for now
228                     copyChilds((Element) node, imp);
229 
230                 } else {
231                     if ("generated".equals(imp.getAttribute("class"))) {
232                         // body was generated by kupu, ignore that, it's only presentation.
233                         log.debug("Found generated body, ignoring that");
234                     } else {
235                         // could only do something for 'a' and 'div', but well, never mind
236                         parseKupu((Element) node, imp, links, new ParseState(state.level, MODE_INLINE));
237                     }
238                 }
239                 continue;
240             }
241             if (name.equals("i")) { // produced by FF
242                 if (node.getFirstChild() != null) { // ignore if empty
243                     Element imp = destination.getOwnerDocument().createElement("em");
244                     destination.appendChild(imp);
245                     parseKupu((Element) node, imp, links, new ParseState(state.level, MODE_INLINE));
246                 }
247                 continue;
248             }
249             if (name.equals("b")) { // produced by FF
250                 if (node.getFirstChild() != null) { // ignore if empty
251                     Element imp = destination.getOwnerDocument().createElement("strong");
252                     destination.appendChild(imp);
253                     parseKupu((Element) node, imp, links, new ParseState(state.level, MODE_INLINE));
254                 }
255                 continue;
256             }
257             if (name.equals("br")  && state.mode == MODE_INLINE) { // sigh
258                 Element imp = destination.getOwnerDocument().createElement("br");
259                 destination.appendChild(imp);
260                 continue;
261             }
262 
263             matcher = copyElement.matcher(name);
264             if (matcher.matches()) {
265                 org.w3c.dom.Node   firstChild = node.getFirstChild();
266                 if (firstChild != null && !(firstChild.getNodeType() == org.w3c.dom.Node.TEXT_NODE && firstChild.getNodeValue().equals(""))) { // ignore if empty
267                     Element imp = destination.getOwnerDocument().createElement(matcher.group(0));
268                     destination.appendChild(imp);
269                     copyAttributes((Element) node, imp);
270                     parseKupu((Element) node, imp, links, new ParseState(state.level, MODE_INLINE));
271                 }
272                 continue;
273             }
274             matcher = hElement.matcher(name);
275             if (matcher.matches()) {
276                 if (state.mode != MODE_SECTION) {
277                     log.warn("Found a section where it cannot be! (h-tags need to be on root level");
278                     // treat as paragraph
279                     Element imp = destination.getOwnerDocument().createElement("p");
280                     destination.appendChild(imp);
281                     copyAttributes((Element) node, imp);
282                     parseKupu((Element) node, imp, links,  new ParseState(state.level, MODE_INLINE));
283                     continue;
284                 }
285 
286                 int foundLevel = Integer.parseInt(matcher.group(1));
287 
288                 log.debug(state.level() + " Found section " + foundLevel + " on " + state.level);
289                 if (foundLevel > state.level) {
290                     // need to create a new state.
291                     Element section = destination.getOwnerDocument().createElement("section");
292                     Element h       = destination.getOwnerDocument().createElement("h");
293                     section.appendChild(h);
294                     if (foundLevel == state.level + 1) {
295                         parseKupu((Element) node, h, links,  new ParseState(state.level, MODE_INLINE));
296                         state.subSections.add(section);
297                         ParseState newState = new ParseState(foundLevel, MODE_SECTION, state.offset + 1);
298                         parseKupu(source, section, links, newState);
299                         state.offset = newState.offset;
300                     } else {
301                         state.subSections.add(section);
302                         ParseState newState = new ParseState(state.level + 1, MODE_SECTION, state.offset);
303                         parseKupu(source, section, links, newState);
304                         state.offset = newState.offset;
305                     }
306                     continue;
307 
308                 } else {
309                     // drop state;
310                     log.debug("foundlevel " + foundLevel + " level " + state.level + " --> dropping");
311                     while(! state.subSections.isEmpty()) {
312                         log.debug("Appending to " + destination.getNodeName());
313                         destination.appendChild((org.w3c.dom.Node  ) state.subSections.remove(0));
314                     }
315                     state.offset--;
316                     return;
317                 }
318             }
319             log.warn("Unrecognised element " + name + " ignoring");
320             parseKupu((Element) node, destination, links, new ParseState(state.level, MODE_INLINE));
321         }
322         if (state.mode == MODE_SECTION) {
323             // drop state;
324             while(! state.subSections.isEmpty()) {
325                 destination.appendChild((org.w3c.dom.Node  ) state.subSections.remove(0));
326             }
327         }
328     }
329 
330     /**
331      * Just searches the nodes in a NodeList for which a certain field has a certain value.
332      */
333     private NodeList get(Cloud cloud, NodeList list, String   field, String   value) {
334         NodeList result = cloud.createNodeList();
335         NodeIterator i = list.nodeIterator();
336         while(i.hasNext()) {
337             Node n = i.nextNode();
338             String   pref = "" + list.getProperty(NodeList.NODESTEP_PROPERTY);
339             String   fieldName = field;
340             if (fieldName.indexOf(".") == -1 && pref != null) {
341                 fieldName = pref + "." + field;
342             }
343             if (n.getStringValue(fieldName).equals(value)) {
344                 result.add(n);
345             }
346         }
347         return result;
348     }
349 
350 
351 
352     final Pattern ABSOLUTE_URL = Pattern.compile("(http[s]?://[^/]+)(.*)");
353 
354     /**
355      * Normalizes URL to absolute on server
356      */
357     protected String   normalizeURL(final HttpServletRequest   request, final String   url) {
358 
359         if (url.startsWith("/")) {
360             return url;
361         }
362         String   u = url;
363         if (url.startsWith(".")) {
364             if (request == null) {
365                 log.warn("Did not receive a request, don't know how to normalize '" + url + "'");
366                 return url;
367             }
368 
369 
370             try {
371                 // based on the request as viewed by the client.
372                 if (log.isDebugEnabled()) {
373                     log.debug("Request of " + request.getAttribute("time") + " " + Collections.list(request.getAttributeNames()));
374                 }
375                 String   requestURL = (String  ) request.getAttribute("javax.servlet.include.servlet_path");
376                 if (request.getScheme() == null) {
377                     log.warn("How odd, we got a request with no scheme!!");
378                 }
379                 if (requestURL == null) {
380                     requestURL = request.getRequestURL().toString();
381                 }
382                 u = new URL  (new URL  (requestURL), url).toString();
383             } catch (java.net.MalformedURLException   mfe) {
384                 log.warn("" + mfe, mfe); // should not happen
385                 return url;
386             } catch (NullPointerException   npe) {
387                 log.warn("NPE ", npe);
388             }
389         } else {
390             u = url;
391         }
392         if (log.isDebugEnabled()) {
393             log.debug("url " + url + " ->" + u);
394         }
395         Matcher matcher = ABSOLUTE_URL.matcher(u);
396         if (matcher.matches()) {
397             if (request == null) {
398                 log.warn("Did not receive request, can't check if this URL is local: '" + url + "'");
399                 return url;
400             }
401             try {
402                 URL   hostPart = new URL  (matcher.group(1));
403                 String   scheme = request.getScheme();
404                 if (scheme == null) {
405                     log.warn("Request " + request + " " + request.getRequestURI() + " gave 'null'  scheme" + request.getServerName() + ":" + request.getServerPort() + " " + request.getContextPath());
406                 }
407                 String   host   = request.getServerName();
408                 int port      = request.getServerPort();
409                 URL   foundHost = scheme != null ? new URL  (scheme, host, port, "") : null;
410                 if (scheme != null && hostPart.sameFile(foundHost)) {
411                     String   result = matcher.group(2);
412                     if (log.isDebugEnabled()) {
413                         log.trace("Converted " + url + " -> " + result);
414                     }
415                     return result;
416                 } else {
417                     if (log.isDebugEnabled()) {
418                         log.trace("Not converting url, it is on a different server " + hostPart + " != " + foundHost);
419                     }
420                     return url;
421                 }
422             } catch (java.net.MalformedURLException   mfe) {
423                 log.warn("" + mfe); // client could have typed this.
424                 return url; // don't know anything better then this.
425             }
426         } else {
427             log.debug("Could not normalize url " + url);
428             return url;
429         }
430 
431     }
432 
433     final Pattern OK_URL = Pattern.compile("[a-z]+:.*");
434 
435     /**
436      * Adds missing protocol
437      */
438     protected String   normalizeURL(String   url) {
439         if (OK_URL.matcher(url).matches() || (url.length() > 0 && url.charAt(0) == '/')) {
440             return url;
441         } else {
442             return "http://" + url;
443         }
444     }
445 
446     protected Node getUrlNode(Cloud cloud, String   href, Element a) {
447         NodeManager urls = cloud.getNodeManager("urls");
448         NodeQuery q = urls.createQuery();
449         StepField urlStepField = q.getStepField(urls.getField("url"));
450         Constraint c = q.createConstraint(urlStepField, href);
451         q.setConstraint(c);
452         NodeList ul = urls.getList(q);
453         Node url;
454         if (ul.size() > 0) {
455             url = ul.getNode(0);
456             log.service("linking to exsting URL from cloud " + url);
457         } else {
458             // not found, create it!
459             url = cloud.getNodeManager("urls").createNode();
460             url.setStringValue("url", href);
461             if (urls.hasField("title")) {
462                 url.setStringValue("title", a.getAttribute("alt"));
463             } else if (urls.hasField("name")) {
464                 url.setStringValue("name", a.getAttribute("alt"));
465             }
466             url.commit();
467         }
468         return url;
469     }
470 
471     final Pattern DIV_ID = Pattern.compile("block_(.*?)_(.*)");
472 
473 
474     // list related withouth inheritance
475     private NodeList getRelatedNodes(Node editedNode, NodeManager dest) {
476         NodeQuery q = Queries.createRelatedNodesQuery(editedNode, dest, "idrel", "destination");
477         StepField stepField = q.createStepField(q.getNodeStep(), "otype");
478         Constraint newConstraint = q.createConstraint(stepField, new Integer  (dest.getNumber()));
479         Queries.addConstraint(q, newConstraint);
480         Queries.addRelationFields(q, "idrel", "id", null);
481         return q.getCloud().getList(q);
482     }
483 
484 
485     private String   getHref(Element a, Cloud cloud) {
486         String   href = a.getAttribute("href");
487         if ("".equals(href)) {
488             // must be an image then.
489             // Images are _always_ on the same server.
490             String   src = a.getAttribute("src");
491             try {
492                 href  = (new java.net.URI  (src)).getPath();
493             } catch (java.net.URISyntaxException   se) {
494                 log.warn(se);
495                 href = src;
496             }
497 
498         }
499         String   hrefBefore = href;
500         if (! "".equals(href)) {
501             if (! href.startsWith("mmbase:")) {
502                 href = normalizeURL((HttpServletRequest  ) cloud.getProperty("request"), href);
503             }
504         }
505 
506         // IE Tends to make URL's absolute (http://localhost:8070/mm18/mmbase/images/1234)
507         // FF Tends to make URL's relative (../../../../mmbase/images/1234)
508         // What we want is absolute on server (/mm18/mmbase/images/1234), because that is how URL was probably given in the first place.
509 
510         String   klass = a.getAttribute("class");
511         String   id = a.getAttribute("id");
512 
513         if (klass.startsWith("div ") && href.equals("")) {
514             klass = klass.substring(4);
515             Matcher divId = DIV_ID.matcher(id);
516             if (divId.matches()) {
517                 href = "BLOCK/" + divId.group(1);
518                 if (divId.group(1).equals("createddiv")) {
519                     id = ""; // generate one
520                 }  else {
521                     id   = divId.group(2);
522                 }
523             } else {
524                 // odd
525                 href = "BLOCK/createddiv";
526                 id   = ""; // generated one
527             }
528             a.setAttribute("id", id);
529 
530         }
531         if (id.equals("")) {
532             id = "_" + indexCounter++;
533             a.setAttribute("id", id);
534         }
535         if (log.isDebugEnabled()) {
536             log.debug("Considering " + href + " (from " + hrefBefore + ")");
537         }
538         return href;
539     }
540 
541 
542     private boolean handleImage(String   href, Element a, NodeList usedImages, NodeList relatedImages, Node editedNode) {
543         Cloud cloud = editedNode.getCloud();
544         NodeManager images = cloud.getNodeManager("images");
545         String    imageServlet      = images.getFunctionValue("servletpath", null).toString();
546         if (! href.startsWith(imageServlet)) return false;
547         String   q = "/images/" + href.substring(imageServlet.length());
548         log.debug(href + ":This is an image!!-> " + q);
549         BridgeServlet.QueryParts qp = BridgeServlet.readServletPath(q);
550         if (qp == null) {
551             log.error("Could not parse " + q + ", ignoring...");
552             return true;
553         }
554         NodeManager icaches     = cloud.getNodeManager("icaches");
555         String   nodeNumber = qp.getNodeNumber();
556         Node image = cloud.getNode(nodeNumber);
557         if (image.getNodeManager().equals(icaches)) {
558             image = image.getNodeValue("id");
559             log.debug("This is an icache for " + image.getNumber());
560         }
561         usedImages.add(image);
562         String   klass = a.getAttribute("class");
563         String   id = a.getAttribute("id");
564         NodeList linkedImage = get(cloud, relatedImages, "idrel.id", a.getAttribute("id"));
565         if (! linkedImage.isEmpty()) {
566             // ok, already related!
567             log.service("" + image + " image already correctly related, nothing needs to be done");
568             Node idrel = linkedImage.getNode(0).getNodeValue("idrel");
569             if (!idrel.getStringValue("class").equals(klass)) {
570                 idrel.setStringValue("class", klass);
571                 idrel.commit();
572             }
573 
574         } else {
575             log.service(" to" + image + ", creating new relation");
576             RelationManager rm = cloud.getRelationManager(editedNode.getNodeManager(), images, "idrel");
577             Relation newIdRel = rm.createRelation(editedNode, image);
578             newIdRel.setStringValue("id", id);
579             newIdRel.setStringValue("class", klass);
580             newIdRel.commit();
581                         }
582         a.removeAttribute("src");
583         a.removeAttribute("height");
584         a.removeAttribute("width");
585         a.removeAttribute("class");
586         a.removeAttribute("alt");
587         return true;
588     }
589 
590     private boolean handleAttachment(Matcher matcher, Element a, NodeList usedAttachments, NodeList relatedAttachments, Node editedNode) {
591         if (! matcher.matches()) return false;
592         if (! matcher.group(1).equals("attachments")) return false;
593         String   nodeNumber = matcher.group(2);
594         Cloud cloud = editedNode.getCloud();
595         if (! cloud.hasNode(nodeNumber)) {
596             log.error("No such node '" + nodeNumber + "' (deduced from " + matcher.group() + ")");
597             return false;
598         }
599         NodeManager attachments = cloud.getNodeManager("attachments");
600         Node attachment = cloud.getNode(nodeNumber);
601         usedAttachments.add(attachment);
602         String   klass = a.getAttribute("class");
603         String   id = a.getAttribute("id");
604         NodeList linkedAttachment = get(cloud, relatedAttachments, "idrel.id", id);
605         if (! linkedAttachment.isEmpty()) {
606             // ok, already related!
607             log.service("" + attachment + " attachment (class='" + klass + "') already correctly related, nothing needs to be done");
608             Node idrel = linkedAttachment.getNode(0).getNodeValue("idrel");
609             if (!idrel.getStringValue("class").equals(klass)) {
610                 idrel.setStringValue("class", klass);
611                 idrel.commit();
612             }
613 
614         } else {
615             log.service(" to " + attachment + "(class='" + klass+ "'), creating new relation");
616             RelationManager rm = cloud.getRelationManager(editedNode.getNodeManager(), attachments, "idrel");
617             Relation newIdRel = rm.createRelation(editedNode, attachment);
618             newIdRel.setStringValue("id", id);
619             newIdRel.setStringValue("class", klass);
620             newIdRel.commit();
621         }
622         a.removeAttribute("href");
623         a.removeAttribute("class");
624         a.removeAttribute("title");
625         a.removeAttribute("target");
626         return true;
627     }
628 
629 
630     private boolean handleText(Matcher matcher, Element a, NodeList usedTexts, NodeList relatedTexts, Node editedNode) {
631         if (! matcher.matches()) return false;
632         String   nodeNumber = matcher.group(2);
633         Cloud cloud = editedNode.getCloud();
634         if (! cloud.hasNode(nodeNumber)) {
635             log.error("No such node '" + nodeNumber + "' (deduced from " + matcher.group() + ")");
636             return false;
637         }
638         Node text = cloud.getNode(nodeNumber);
639         usedTexts.add(text);
640         String   klass = a.getAttribute("class");
641         String   id = a.getAttribute("id");
642         NodeList linkedText = get(cloud, relatedTexts, "idrel.id", id);
643         if (! linkedText.isEmpty()) {
644             // ok, already related!
645             log.debug("" + text + " text already correctly related, nothing needs to be done");
646             Node idrel = linkedText.getNode(0).getNodeValue("idrel");
647             if (!idrel.getStringValue("class").equals(klass)) {
648                 idrel.setStringValue("class", klass);
649                 idrel.commit();
650             }
651 
652         } else {
653             log.service("Found new cross link " + text.getNumber() + ", creating new relation now");
654             RelationManager rm = cloud.getRelationManager(editedNode.getNodeManager(), text.getNodeManager(), "idrel");
655             Relation newIdRel = rm.createRelation(editedNode, text);
656             newIdRel.setStringValue("id", id);
657             newIdRel.setStringValue("class", klass);
658             newIdRel.commit();
659         }
660 
661         a.removeAttribute("href");
662         a.removeAttribute("alt");
663         return true;
664     }
665     private boolean handleBlock(String   href, Element a, NodeList relatedBlocks, Node editedNode) {
666         if (! href.startsWith("BLOCK/")) return false;
667 
668         String   nodeNumber = href.substring(6);
669         Cloud cloud = editedNode.getCloud();
670         NodeManager blocks = cloud.getNodeManager("blocks");
671         Node block;
672         if (nodeNumber.equals("createddiv")) {
673             block = blocks.createNode();
674             block.setStringValue("title", "Block created for node " + editedNode.getNumber());
675             block.commit();
676         } else {
677             block = cloud.getNode(nodeNumber);
678         }
679 
680         DocumentBuilder documentBuilder = org.mmbase.util.xml.DocumentReader.getDocumentBuilder();
681         DOMImplementation impl = documentBuilder.getDOMImplementation();
682         Document blockDocument = impl.createDocument("http://www.w3.org/1999/xhtml", "body", null);
683         Element blockBody = blockDocument.getDocumentElement();
684         copyChilds(a, blockBody);
685 
686 
687         org.w3c.dom.Node   child = a.getFirstChild();
688         while (child != null) {
689             a.removeChild(child);
690             child = a.getFirstChild();
691         }
692 
693         if (log.isDebugEnabled()) {
694             log.debug("Setting body to " + XMLWriter.write(blockDocument, false));
695         }
696         // fill _its_ body, still in kupu-mode
697         block.setStringValue("body", XMLWriter.write(blockDocument, false));
698         block.commit();
699         String   klass = a.getAttribute("class");
700         String   id = a.getAttribute("id");
701         NodeList linkedBlock = get(cloud, relatedBlocks, "idrel.id", id);
702         if (! linkedBlock.isEmpty()) {
703             // ok, already related!
704             log.service("" + block + " block already correctly related, nothing needs to be done");
705             Node idrel = linkedBlock.getNode(0).getNodeValue("idrel");
706             if (!idrel.getStringValue("class").equals(klass)) {
707                 idrel.setStringValue("class", klass);
708                 idrel.commit();
709             }
710 
711         } else {
712             log.service(" to " + block + ", creating new relation");
713             RelationManager rm = cloud.getRelationManager(editedNode.getNodeManager(), blocks, "idrel");
714             Relation newIdRel = rm.createRelation(editedNode, block);
715             newIdRel.setStringValue("id", id);
716             newIdRel.setStringValue("class", klass);
717             newIdRel.commit();
718         }
719         a.removeAttribute("class");
720         return true;
721     }
722     /**
723      * Parses kupu-output for a certain node. First it will translate the XHTML like kupu-output to
724      * something very similar to MMXF, while collecting the 'links'. Then in a second stage these
725      * links are compared with related nodes. So the side-effect may be removed, updated, and new
726      * related nodes.
727      *
728      * @param editedNode MMBase node containing the MMXF field.
729      * @param document   XML received from Kupu
730      * @return An MMXF document.
731      */
732     private Document parseKupu(Node editedNode, Document document) {
733         if (log.isDebugEnabled()) {
734             log.debug("Handeling kupu-input" + XMLWriter.write(document, false));
735         }
736         Document xml = Mmxf.createMmxfDocument();
737         // first find Body.
738         org.w3c.dom.NodeList   bodies = document.getElementsByTagName("body");
739         if (bodies.getLength() > 1) {
740             log.warn("Found not one body but " + bodies.getLength());
741         } else if (bodies.getLength() == 0) {
742             log.warn("No body found ");
743             return xml;
744         }
745         Element body = (Element) bodies.item(0);
746         body.normalize();
747         Element mmxf = xml.getDocumentElement();
748         List links = new ArrayList();
749 
750         // first stage.
751         parseKupu(body, mmxf, links, new ParseState(0, MODE_SECTION));
752 
753 
754         // second stage, handle kupu-links.
755         if (editedNode == null) {
756             log.warn("Node node given, cannot handle cross-links!!");
757         } else {
758             Cloud cloud = editedNode.getCloud();
759             NodeManager images      = cloud.getNodeManager("images");
760             NodeManager attachments = cloud.getNodeManager("attachments");
761             NodeManager urls        = cloud.getNodeManager("urls");
762             NodeManager blocks      = cloud.getNodeManager("blocks");
763 
764             NodeManager texts       = cloud.getNodeManager("object");
765             Pattern mmbaseUrl         = Pattern.compile("mmbase://(.*?)/(\\d+)");
766 
767 
768             NodeList relatedImages        = getRelatedNodes(editedNode, images);
769             NodeList usedImages           = cloud.createNodeList();
770 
771             NodeList relatedAttachments   = getRelatedNodes(editedNode, attachments);
772             NodeList usedAttachments      = cloud.createNodeList();
773 
774             NodeList relatedBlocks        = getRelatedNodes(editedNode, blocks);
775 
776             NodeList relatedUrls          = getRelatedNodes(editedNode, urls);
777             NodeList usedUrls             = cloud.createNodeList();
778 
779             NodeList relatedTexts;
780             NodeList usedTexts;
781             {
782                 NodeQuery q = Queries.createRelatedNodesQuery(editedNode, texts, "idrel", "destination");
783                 StepField stepField = q.createStepField(q.getNodeStep(), "otype");
784                 SortedSet nonTexts = new TreeSet();
785                 nonTexts.add(new Integer  (images.getNumber()));
786                 nonTexts.add(new Integer  (attachments.getNumber()));
787                 nonTexts.add(new Integer  (blocks.getNumber()));
788                 nonTexts.add(new Integer  (urls.getNumber()));
789                 FieldValueInConstraint newConstraint = q.createConstraint(stepField, nonTexts);
790                 q.setInverse(newConstraint, true);
791                 Queries.addConstraint(q, newConstraint);
792                 Queries.addRelationFields(q, "idrel", "id", null);
793                 relatedTexts = q.getCloud().getList(q);
794                 if (log.isDebugEnabled()) {
795                     log.debug("Found related texts " + relatedTexts);
796                 }
797                 usedTexts = cloud.createNodeList();
798             }
799 
800 
801             Iterator linkIterator = links.iterator();
802             //String imageServletPath = images.getFunctionValue("servletpath", null).toString();
803             while (linkIterator.hasNext()) {
804                 Element a = (Element) linkIterator.next();
805                 try {
806                     String   href = getHref(a, cloud);
807                     Matcher mmbaseMatcher =  mmbaseUrl.matcher(href);
808                     if (handleImage(href, a, usedImages, relatedImages, editedNode)) { // found an image!
809                         continue;
810                     } else if (handleAttachment(mmbaseMatcher, a, usedAttachments, relatedAttachments, editedNode)) {
811                         continue;
812                     } else if (handleText(mmbaseMatcher, a, usedTexts, relatedTexts, editedNode)) {
813                         continue;
814                     } else if (handleBlock(href, a, relatedBlocks, editedNode)) {
815                         continue;
816                     } else { // must have been really an URL
817                         String   klass = a.getAttribute("class");
818                         String   id = a.getAttribute("id");
819 
820                         NodeList idLinkedUrls = get(cloud, relatedUrls, "idrel.id", id);
821                         if (!idLinkedUrls.isEmpty()) {
822                             // already related.
823                             Node url   = idLinkedUrls.getNode(0).getNodeValue("urls");
824                             Node idrel = idLinkedUrls.getNode(0).getNodeValue("idrel");
825                             if (url.getStringValue("url").equals(href)) {
826                                 log.service("" + url + " url already correctly related, nothing needs to be done");
827                                 usedUrls.add(url);
828                                 if (!idrel.getStringValue("class").equals(klass)) {
829                                     idrel.setStringValue("class", klass);
830                                     idrel.commit();
831                                 }
832                                 continue;
833                             } else {
834                                 // href changed, fall through, to create a new link.
835                             }
836                         } 
837 
838                         // create a link to an URL object.
839                         String   u = normalizeURL(href);
840                         NodeList nodeLinkedUrls = get(cloud, relatedUrls, "url", u);
841                         Node url;
842                         if (nodeLinkedUrls.isEmpty()) {
843                             url = getUrlNode(cloud, u, a);
844                         } else {
845                             url = nodeLinkedUrls.getNode(0).getNodeValue("urls");
846                         }
847                         usedUrls.add(url);
848                         RelationManager rm = cloud.getRelationManager(editedNode.getNodeManager(), url.getNodeManager(), "idrel");
849                         Relation newIdRel = rm.createRelation(editedNode, url);
850                         newIdRel.setStringValue("id", id);
851                         newIdRel.setStringValue("class", klass);
852                         newIdRel.commit();
853 
854 
855                     }
856                 } catch (Exception   e) {
857                     log.error(e.getMessage(), e);
858                 } finally {
859                     a.removeAttribute("href");
860                     a.removeAttribute("title");
861                     a.removeAttribute("target");
862                     a.removeAttribute("class");
863                 }
864 
865             }
866             // ready handling links. Now clean up unused links.
867             log.debug("Cleaning dangling idrels");
868             cleanDanglingIdRels(relatedImages,   usedImages,   "images");
869             cleanDanglingIdRels(relatedUrls,     usedUrls,     "urls");
870             cleanDanglingIdRels(relatedAttachments, usedAttachments, "attachments");
871             cleanDanglingIdRels(relatedTexts, usedTexts, texts.getName());
872         }
873 
874 
875         return xml;
876 
877     }
878     /**
879      * At the end of stage 2 of parseKupu all relations are removed which are not used any more, using this function.
880      */
881     protected void cleanDanglingIdRels(NodeList clusterNodes, NodeList usedNodes, String   type) {
882        NodeIterator i = clusterNodes.nodeIterator();
883        while(i.hasNext()) {
884            Node clusterNode = i.nextNode();
885            Node node = clusterNode.getNodeValue(type);
886            if (! usedNodes.contains(node)) {
887                Node idrel = clusterNode.getNodeValue("idrel");
888                if (log.isDebugEnabled()) {
889                    log.debug(" " + node + " was not used! id:" + idrel.getStringValue("id"));
890                }
891                if (idrel == null) {
892                    log.debug("Idrel returned null from " + clusterNode + " propbably deleted already in previous cleandDanglingIdRels");
893                } else {
894                    if (idrel.mayDelete()) {
895                        log.service("Removing unused idrel " + idrel);
896                        idrel.delete(true);
897                    }
898                }
899            }
900        }
901     }
902 
903 
904 
905 
906     /**
907      * Receives Docbook XML, and saves as MMXF. Docbook is more powerfull as MMXF so this
908      * transformation will not be perfect. It is mainly meant for MMBase documentation.
909      */
910     protected Document parseDocBook(Node editedNode, Document source) {
911         Cloud cloud = editedNode.getCloud();
912         java.net.URL   u = ResourceLoader.getConfigurationRoot().getResource("xslt/docbook2pseudommxf.xslt");
913         DOMResult result = new DOMResult();
914         Map params = new HashMap();
915         params.put("cloud", cloud);
916         try {
917             XSLTransformer.transform(new DOMSource(source), u, result, params);
918         } catch (javax.xml.transform.TransformerException   te) {
919             log.error(te);
920         }
921         Document pseudoMmxf = result.getNode().getOwnerDocument();
922         // should follow some code to remove and create cross-links, images etc.
923 
924         NodeManager urls = cloud.getNodeManager("urls");
925         NodeList relatedUrls          = Queries.getRelatedNodes(editedNode, urls ,  "idrel", "destination", "id", null);
926         NodeList usedUrls             = cloud.createNodeList();
927 
928         // now find all <a href tags in the pseudo-mmxf, and fix them.
929         org.w3c.dom.NodeList   nl = pseudoMmxf.getElementsByTagName("a");
930         for (int j = 0 ; j < nl.getLength(); j++) {
931             Element a = (Element) nl.item(j);
932             String   href = a.getAttribute("href");
933             Node url = getUrlNode(cloud, href, a);
934             String   id = "_" + indexCounter++;
935             a.setAttribute("id", id);
936             RelationManager rm = cloud.getRelationManager(editedNode.getNodeManager(), url.getNodeManager(), "idrel");
937             Relation newIdRel = rm.createRelation(editedNode, url);
938             newIdRel.setStringValue("id", id);
939             newIdRel.commit();
940             a.removeAttribute("href");
941 
942         }
943 
944         return pseudoMmxf;
945     }
946 
947     private String   toString(Object   value) {
948         if (value instanceof Document) {
949             return XMLWriter.write((Document) value, false, true);
950         } else {
951             return "" + value;
952         }
953     }
954 
955     // javadoc inherited
956     public Object   process(Node node, Field field, Object   value) {
957         if (log.isDebugEnabled()) {
958             log.debug("Found for setstring " + value.getClass() + " " + toString(value));
959         }
960         try {
961             switch(Modes.getMode(node.getCloud().getProperty(Cloud.PROP_XMLMODE))) {
962             case Modes.KUPU: {
963                 log.debug("Handeling kupu-input: " + toString(value));
964                 return parseKupu(node, parse(value));
965             }
966             case Modes.WIKI: {
967                 log.debug("Handling wiki-input: " + value);
968                 return  parse(xmlField.transformBack(toString(value)));
969             }
970             case Modes.DOCBOOK: {
971                 log.debug("Handling docbook-input: " + value);
972                 return  parseDocBook(node, parse(value));
973             }
974             case Modes.FLAT: {
975                 log.debug("Handling flat-input " + value.getClass() + " " + toString(value));
976                 return parse(xmlField.transformBack(toString(value)));
977             }
978             default: {
979                 // 'raw' xml
980                 try {
981                     return parse(value);
982                 } catch (Exception   e) {
983                     log.warn("Setting field " + field + " in node " + node.getNumber() + ", but " + e.getMessage());
984                     // simply Istore it, as provided, then.
985                     // fall trough
986                 }
987                 return value;
988             }
989 
990             }
991         } catch (Exception   e) {
992             log.error(e.getMessage() + " for " + value, e);
993             return value;
994         }
995     }
996 
997     /**
998      * Invocation of the class from the commandline for testing. Uses RMMCI (on the default
999      * configuration), gets the 'xmltest' node, and get and set processes it.
1000     */
1001    public static void main(String  [] argv) {
1002        if (System.getProperty("mmbase.config") == null) {
1003            System.err.println("Please start up with -Dmmbase.defaultcloudcontext=rmi://127.0.0.1:1111/remotecontext -Dmmbase.config=<mmbase configuration directory> (needed to find the XSL's)");
1004            return;
1005        }
1006        try {
1007            if (argv.length == 0) {
1008                CloudContext cc = ContextProvider.getDefaultCloudContext();
1009                Cloud cloud = cc.getCloud("mmbase", "class", null);
1010
1011                Node node = cloud.getNode("xmltest");
1012
1013                cloud.setProperty(Cloud.PROP_XMLMODE, "wiki");
1014
1015                Processor getProcessor = new MmxfGetString();
1016                String   wiki = (String  ) getProcessor.process(node, node.getNodeManager().getField("body"), null);
1017
1018                System.out.println("in:\n" + wiki);
1019
1020                Processor setProcessor = new MmxfSetString();
1021
1022                System.out.println("\n-------------\nout:\n");
1023                Document document = (Document) setProcessor.process(node, node.getNodeManager().getField("body"), wiki);
1024                System.out.println(XMLWriter.write(document, false));
1025            } else {
1026                MmxfSetString setProcessor = new MmxfSetString();
1027                ResourceLoader rl = ResourceLoader.getSystemRoot();
1028                Document doc = setProcessor.parse(rl.getResourceAsStream(argv[0]));
1029                Node node = null;
1030                if (argv.length > 1) {
1031                    CloudContext cc = ContextProvider.getDefaultCloudContext();
1032                    Cloud cloud = cc.getCloud("mmbase", "class", null);
1033                    if (argv.length > 2) {
1034                        cloud.setProperty(Cloud.PROP_XMLMODE, argv[2]);
1035                    }
1036                    node = cloud.getNode(argv[1]);
1037                }
1038                Document mmxf = setProcessor.parseKupu(node, doc);
1039                if (node != null) {
1040                    if (log.isDebugEnabled()) {
1041                        log.debug("Setting body of " + node.getNumber() + " to " + XMLWriter.write(mmxf, false));
1042                    }
1043                    node.setXMLValue("body", mmxf);
1044                    node.commit();
1045                } else {
1046                    System.out.println(XMLWriter.write(mmxf, false));
1047                }
1048
1049            }
1050        } catch (Exception   e) {
1051            Throwable   cause = e;
1052            while (cause != null) {
1053                System.err.println("CAUSE " + cause.getMessage() + Logging.stackTrace(cause));
1054                cause = cause.getCause();
1055            }
1056        }
1057        /*
1058
1059
1060        try{
1061            XMLSerializer serializer = new XMLSerializer();
1062            serializer.setNamespaces(true);
1063            serializer.setOutputByteStream(System.out);
1064            serializer.serialize(document);
1065        } catch (java.io.IOException ioe) {
1066            System.err.println(ioe.getMessage());
1067        }
1068        */
1069
1070
1071    }
1072
1073    public String   toString() {
1074        return "set_MMXF";
1075    }
1076
1077}
1078
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags