DOM2DTM


1   /*
2    * Copyright 1999-2004 The Apache Software Foundation.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  /*
17   * $Id: DOM2DTM.java,v 1.34 2004/02/16 23:06:53 minchau Exp $
18   */
19  package com.sun.org.apache.xml.internal.dtm.ref.dom2dtm;
20  
21  import java.util.Vector  ;
22  
23  import javax.xml.transform.SourceLocator  ;
24  import javax.xml.transform.dom.DOMSource  ;
25  
26  import com.sun.org.apache.xml.internal.dtm.DTM;
27  import com.sun.org.apache.xml.internal.dtm.DTMManager;
28  import com.sun.org.apache.xml.internal.dtm.DTMWSFilter;
29  import com.sun.org.apache.xml.internal.dtm.ref.DTMDefaultBaseIterators;
30  import com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault;
31  import com.sun.org.apache.xml.internal.dtm.ref.ExpandedNameTable;
32  import com.sun.org.apache.xml.internal.dtm.ref.IncrementalSAXSource;
33  import com.sun.org.apache.xml.internal.res.XMLErrorResources;
34  import com.sun.org.apache.xml.internal.res.XMLMessages;
35  import com.sun.org.apache.xml.internal.utils.FastStringBuffer;
36  import com.sun.org.apache.xml.internal.utils.QName;
37  import com.sun.org.apache.xml.internal.utils.StringBufferPool;
38  import com.sun.org.apache.xml.internal.utils.TreeWalker;
39  import com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer;
40  import com.sun.org.apache.xml.internal.utils.XMLString;
41  import com.sun.org.apache.xml.internal.utils.XMLStringFactory;
42  import org.w3c.dom.Attr  ;
43  import org.w3c.dom.Document  ;
44  import org.w3c.dom.DocumentType  ;
45  import org.w3c.dom.Element  ;
46  import org.w3c.dom.Entity  ;
47  import org.w3c.dom.NamedNodeMap  ;
48  import org.w3c.dom.Node  ;
49  import org.xml.sax.ContentHandler  ;
50  
51  /** The <code>DOM2DTM</code> class serves up a DOM's contents via the
52   * DTM API.
53   *
54   * Note that it doesn't necessarily represent a full Document
55   * tree. You can wrap a DOM2DTM around a specific node and its subtree
56   * and the right things should happen. (I don't _think_ we currently
57   * support DocumentFrgment nodes as roots, though that might be worth
58   * considering.)
59   *
60   * Note too that we do not currently attempt to track document
61   * mutation. If you alter the DOM after wrapping DOM2DTM around it,
62   * all bets are off.
63   * */
64  public class DOM2DTM extends DTMDefaultBaseIterators
65  {
66    static final boolean JJK_DEBUG=false;
67    static final boolean JJK_NEWCODE=true;
68    
69    /** Manefest constant
70     */
71    static final String   NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace";
72    
73    /** The current position in the DOM tree. Last node examined for
74     * possible copying to DTM. */
75    transient private Node   m_pos;
76    /** The current position in the DTM tree. Who children get appended to. */
77    private int m_last_parent=0;
78    /** The current position in the DTM tree. Who children reference as their 
79     * previous sib. */
80    private int m_last_kid=NULL;
81  
82    /** The top of the subtree.
83     * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'
84     * */
85    transient private Node   m_root;
86  
87    /** True iff the first element has been processed. This is used to control
88        synthesis of the implied xml: namespace declaration node. */
89    boolean m_processedFirstElement=false;
90          
91    /** true if ALL the nodes in the m_root subtree have been processed;
92     * false if our incremental build has not yet finished scanning the
93     * DOM tree.  */
94    transient private boolean m_nodesAreProcessed;
95  
96    /** The node objects.  The instance part of the handle indexes
97     * directly into this vector.  Each DTM node may actually be
98     * composed of several DOM nodes (for example, if logically-adjacent
99     * Text/CDATASection nodes in the DOM have been coalesced into a
100    * single DTM Text node); this table points only to the first in
101    * that sequence. */
102   protected Vector   m_nodes = new Vector  ();
103 
104   /**
105    * Construct a DOM2DTM object from a DOM node.
106    *
107    * @param mgr The DTMManager who owns this DTM.
108    * @param domSource the DOM source that this DTM will wrap.
109    * @param dtmIdentity The DTM identity ID for this DTM.
110    * @param whiteSpaceFilter The white space filter for this DTM, which may 
111    *                         be null.
112    * @param xstringfactory XMLString factory for creating character content.
113    * @param doIndexing true if the caller considers it worth it to use 
114    *                   indexing schemes.
115    */
116   public DOM2DTM(DTMManager mgr, DOMSource   domSource, 
117                  int dtmIdentity, DTMWSFilter whiteSpaceFilter,
118                  XMLStringFactory xstringfactory,
119                  boolean doIndexing)
120   {
121     super(mgr, domSource, dtmIdentity, whiteSpaceFilter, 
122           xstringfactory, doIndexing);
123 
124     // Initialize DOM navigation
125     m_pos=m_root = domSource.getNode();
126     // Initialize DTM navigation
127     m_last_parent=m_last_kid=NULL;
128     m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL);
129 
130     // Apparently the domSource root may not actually be the
131     // Document node. If it's an Element node, we need to immediately
132     // add its attributes. Adapted from nextNode().
133     // %REVIEW% Move this logic into addNode and recurse? Cleaner!
134     //
135     // (If it's an EntityReference node, we're probably scrod. For now
136     // I'm just hoping nobody is ever quite that foolish... %REVIEW%)
137         //
138         // %ISSUE% What about inherited namespaces in this case?
139         // Do we need to special-case initialize them into the DTM model?
140     if(ELEMENT_NODE == m_root.getNodeType())
141     {
142       NamedNodeMap   attrs=m_root.getAttributes();
143       int attrsize=(attrs==null) ? 0 : attrs.getLength();
144       if(attrsize>0)
145       {
146         int attrIndex=NULL; // start with no previous sib
147         for(int i=0;i<attrsize;++i)
148         {
149           // No need to force nodetype in this case;
150           // addNode() will take care of switching it from
151           // Attr to Namespace if necessary.
152           attrIndex=addNode(attrs.item(i),0,attrIndex,NULL);
153           m_firstch.setElementAt(DTM.NULL,attrIndex);
154         }
155         // Terminate list of attrs, and make sure they aren't
156         // considered children of the element
157         m_nextsib.setElementAt(DTM.NULL,attrIndex);
158 
159         // IMPORTANT: This does NOT change m_last_parent or m_last_kid!
160       } // if attrs exist
161     } //if(ELEMENT_NODE)
162 
163     // Initialize DTM-completed status 
164     m_nodesAreProcessed = false;
165   }
166 
167   /**
168    * Construct the node map from the node.
169    *
170    * @param node The node that is to be added to the DTM.
171    * @param parentIndex The current parent index.
172    * @param previousSibling The previous sibling index.
173    * @param forceNodeType If not DTM.NULL, overrides the DOM node type.
174    *    Used to force nodes to Text rather than CDATASection when their
175    *    coalesced value includes ordinary Text nodes (current DTM behavior).
176    *
177    * @return The index identity of the node that was added.
178    */
179   protected int addNode(Node   node, int parentIndex,
180                         int previousSibling, int forceNodeType)
181   {
182     int nodeIndex = m_nodes.size();
183 
184     // Have we overflowed a DTM Identity's addressing range?
185     if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS))
186     {
187       try
188       {
189         if(m_mgr==null)
190           throw new ClassCastException  ();
191                                 
192                                 // Handle as Extended Addressing
193         DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr;
194         int id=mgrD.getFirstFreeDTMID();
195         mgrD.addDTM(this,id,nodeIndex);
196         m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS);
197       }
198       catch(ClassCastException   e)
199       {
200         // %REVIEW% Wrong error message, but I've been told we're trying
201         // not to add messages right not for I18N reasons.
202         // %REVIEW% Should this be a Fatal Error?
203         error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";
204       }
205     }
206 
207     m_size++;
208     // ensureSize(nodeIndex);
209     
210     int type;
211     if(NULL==forceNodeType)
212         type = node.getNodeType();
213     else
214         type=forceNodeType;
215         
216     // %REVIEW% The Namespace Spec currently says that Namespaces are
217     // processed in a non-namespace-aware manner, by matching the
218     // QName, even though there is in fact a namespace assigned to
219     // these nodes in the DOM. If and when that changes, we will have
220     // to consider whether we check the namespace-for-namespaces
221     // rather than the node name.
222     //
223     // %TBD% Note that the DOM does not necessarily explicitly declare
224     // all the namespaces it uses. DOM Level 3 will introduce a
225     // namespace-normalization operation which reconciles that, and we
226     // can request that users invoke it or otherwise ensure that the
227     // tree is namespace-well-formed before passing the DOM to Xalan.
228     // But if they don't, what should we do about it? We probably
229     // don't want to alter the source DOM (and may not be able to do
230     // so if it's read-only). The best available answer might be to
231     // synthesize additional DTM Namespace Nodes that don't correspond
232     // to DOM Attr Nodes.
233     if (Node.ATTRIBUTE_NODE == type)
234     {
235       String   name = node.getNodeName();
236 
237       if (name.startsWith("xmlns:") || name.equals("xmlns"))
238       {
239         type = DTM.NAMESPACE_NODE;
240       }
241     }
242     
243     m_nodes.addElement(node);
244     
245     m_firstch.setElementAt(NOTPROCESSED,nodeIndex);
246     m_nextsib.setElementAt(NOTPROCESSED,nodeIndex);
247     m_prevsib.setElementAt(previousSibling,nodeIndex);
248     m_parent.setElementAt(parentIndex,nodeIndex);
249     
250     if(DTM.NULL != parentIndex && 
251        type != DTM.ATTRIBUTE_NODE && 
252        type != DTM.NAMESPACE_NODE)
253     {
254       // If the DTM parent had no children, this becomes its first child.
255       if(NOTPROCESSED == m_firstch.elementAt(parentIndex))
256         m_firstch.setElementAt(nodeIndex,parentIndex);
257     }
258     
259     String   nsURI = node.getNamespaceURI();
260 
261     // Deal with the difference between Namespace spec and XSLT
262     // definitions of local name. (The former says PIs don't have
263     // localnames; the latter says they do.)
264     String   localName =  (type == Node.PROCESSING_INSTRUCTION_NODE) ? 
265                          node.getNodeName() :
266                          node.getLocalName();
267                          
268     // Hack to make DOM1 sort of work...
269     if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE)) 
270         && null == localName)
271       localName = node.getNodeName(); // -sb
272       
273     ExpandedNameTable exnt = m_expandedNameTable;
274 
275     // %TBD% Nodes created with the old non-namespace-aware DOM
276     // calls createElement() and createAttribute() will never have a
277     // localname. That will cause their expandedNameID to be just the
278     // nodeType... which will keep them from being matched
279     // successfully by name. Since the DOM makes no promise that
280     // those will participate in namespace processing, this is
281     // officially accepted as Not Our Fault. But it might be nice to
282     // issue a diagnostic message!
283     if(node.getLocalName()==null &&
284        (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE))
285       {
286         // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");
287       }
288     
289     int expandedNameID = (null != localName) 
290        ? exnt.getExpandedTypeID(nsURI, localName, type) :
291          exnt.getExpandedTypeID(type);
292 
293     m_exptype.setElementAt(expandedNameID,nodeIndex);
294     
295     indexNode(expandedNameID, nodeIndex);
296 
297     if (DTM.NULL != previousSibling)
298       m_nextsib.setElementAt(nodeIndex,previousSibling);
299 
300     // This should be done after m_exptype has been set, and probably should
301     // always be the last thing we do
302     if (type == DTM.NAMESPACE_NODE)
303         declareNamespaceInContext(parentIndex,nodeIndex);
304 
305     return nodeIndex;
306   }
307   
308   /**
309    * Get the number of nodes that have been added.
310    */
311   public int getNumberOfNodes()
312   {
313     return m_nodes.size();
314   }
315   
316  /**
317    * This method iterates to the next node that will be added to the table.
318    * Each call to this method adds a new node to the table, unless the end
319    * is reached, in which case it returns null.
320    *
321    * @return The true if a next node is found or false if 
322    *         there are no more nodes.
323    */
324   protected boolean nextNode()
325   {
326     // Non-recursive one-fetch-at-a-time depth-first traversal with 
327     // attribute/namespace nodes and white-space stripping.
328     // Navigating the DOM is simple, navigating the DTM is simple;
329     // keeping track of both at once is a trifle baroque but at least
330     // we've avoided most of the special cases.
331     if (m_nodesAreProcessed)
332       return false;
333         
334     // %REVIEW% Is this local copy Really Useful from a performance
335     // point of view?  Or is this a false microoptimization?
336     Node   pos=m_pos; 
337     Node   next=null;
338     int nexttype=NULL;
339 
340     // Navigate DOM tree
341     do
342       {
343         // Look down to first child.
344         if (pos.hasChildNodes()) 
345           {
346             next = pos.getFirstChild();
347 
348             // %REVIEW% There's probably a more elegant way to skip
349             // the doctype. (Just let it go and Suppress it?
350             if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
351               next=next.getNextSibling();
352 
353             // Push DTM context -- except for children of Entity References, 
354             // which have no DTM equivalent and cause no DTM navigation.
355             if(ENTITY_REFERENCE_NODE!=pos.getNodeType())
356               {
357                 m_last_parent=m_last_kid;
358                 m_last_kid=NULL;
359                 // Whitespace-handler context stacking
360                 if(null != m_wsfilter)
361                 {
362                   short wsv =
363                     m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this);
364                   boolean shouldStrip = (DTMWSFilter.INHERIT == wsv) 
365                     ? getShouldStripWhitespace() 
366                     : (DTMWSFilter.STRIP == wsv);
367                   pushShouldStripWhitespace(shouldStrip);
368                 } // if(m_wsfilter)
369               }
370           }
371 
372         // If that fails, look up and right (but not past root!)
373         else 
374           {
375             if(m_last_kid!=NULL)
376               {
377                 // Last node posted at this level had no more children
378                 // If it has _no_ children, we need to record that.
379                 if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED)
380                   m_firstch.setElementAt(NULL,m_last_kid);
381               }
382                         
383             while(m_last_parent != NULL)
384               {
385                 // %REVIEW% There's probably a more elegant way to
386                 // skip the doctype. (Just let it go and Suppress it?
387                 next = pos.getNextSibling();
388                 if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
389                   next=next.getNextSibling();
390 
391                 if(next!=null)
392                   break; // Found it!
393                 
394                 // No next-sibling found. Pop the DOM.
395                 pos=pos.getParentNode();
396                 if(pos==null)
397                   {
398                     // %TBD% Should never arise, but I want to be sure of that...
399                     if(JJK_DEBUG)
400                       {
401                         System.out.println("***** DOM2DTM Pop Control Flow problem");
402                         for(;;); // Freeze right here!
403                       }
404                   }
405                 
406                 // The only parents in the DTM are Elements.  However,
407                 // the DOM could contain EntityReferences.  If we
408                 // encounter one, pop it _without_ popping DTM.
409                 if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType())
410                   {
411                     // Nothing needs doing
412                     if(JJK_DEBUG)
413                       System.out.println("***** DOM2DTM popping EntRef");
414                   }
415                 else
416                   {
417                     popShouldStripWhitespace();
418                     // Fix and pop DTM
419                     if(m_last_kid==NULL)
420                       m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element
421                     else
422                       m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else
423                     m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent);
424                   }
425               }
426             if(m_last_parent==NULL)
427               next=null;
428           }
429                 
430         if(next!=null)
431           nexttype=next.getNodeType();
432                 
433         // If it's an entity ref, advance past it.
434         //
435         // %REVIEW% Should we let this out the door and just suppress it?
436         // More work, but simpler code, more likely to be correct, and
437         // it doesn't happen very often. We'd get rid of the loop too.
438         if (ENTITY_REFERENCE_NODE == nexttype)
439           pos=next;
440       }
441     while (ENTITY_REFERENCE_NODE == nexttype); 
442         
443     // Did we run out of the tree?
444     if(next==null)
445       {
446         m_nextsib.setElementAt(NULL,0);
447         m_nodesAreProcessed = true;
448         m_pos=null;
449                 
450         if(JJK_DEBUG)
451           {
452             System.out.println("***** DOM2DTM Crosscheck:");
453             for(int i=0;i<m_nodes.size();++i)
454               System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i));
455           }
456                 
457         return false;
458       }
459 
460     // Text needs some special handling:
461     //
462     // DTM may skip whitespace. This is handled by the suppressNode flag, which
463     // when true will keep the DTM node from being created.
464     //
465     // DTM only directly records the first DOM node of any logically-contiguous
466     // sequence. The lastTextNode value will be set to the last node in the 
467     // contiguous sequence, and -- AFTER the DTM addNode -- can be used to 
468     // advance next over this whole block. Should be simpler than special-casing
469     // the above loop for "Was the logically-preceeding sibling a text node".
470     // 
471     // Finally, a DTM node should be considered a CDATASection only if all the
472     // contiguous text it covers is CDATASections. The first Text should
473     // force DTM to Text.
474         
475     boolean suppressNode=false;
476     Node   lastTextNode=null;
477 
478     nexttype=next.getNodeType();
479         
480     // nexttype=pos.getNodeType();
481     if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
482       {
483         // If filtering, initially assume we're going to suppress the node
484         suppressNode=((null != m_wsfilter) && getShouldStripWhitespace());
485 
486         // Scan logically contiguous text (siblings, plus "flattening"
487         // of entity reference boundaries).
488         Node   n=next;
489         while(n!=null)
490           {
491             lastTextNode=n;
492             // Any Text node means DTM considers it all Text
493             if(TEXT_NODE == n.getNodeType())
494               nexttype=TEXT_NODE;
495             // Any non-whitespace in this sequence blocks whitespace
496             // suppression
497             suppressNode &=
498               XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue());
499                         
500             n=logicalNextDOMTextNode(n);
501           }
502       }
503         
504     // Special handling for PIs: Some DOMs represent the XML
505     // Declaration as a PI. This is officially incorrect, per the DOM
506     // spec, but is considered a "wrong but tolerable" temporary
507     // workaround pending proper handling of these fields in DOM Level
508     // 3. We want to recognize and reject that case.
509     else if(PROCESSING_INSTRUCTION_NODE==nexttype)
510       {
511         suppressNode = (pos.getNodeName().toLowerCase().equals("xml"));
512       }
513         
514         
515     if(!suppressNode)
516       {
517         // Inserting next. NOTE that we force the node type; for
518         // coalesced Text, this records CDATASections adjacent to
519         // ordinary Text as Text.
520         int nextindex=addNode(next,m_last_parent,m_last_kid,
521                   nexttype);
522     
523         m_last_kid=nextindex;
524 
525         if(ELEMENT_NODE == nexttype)
526           {
527             int attrIndex=NULL; // start with no previous sib
528             // Process attributes _now_, rather than waiting.
529             // Simpler control flow, makes NS cache available immediately.
530             NamedNodeMap   attrs=next.getAttributes();
531             int attrsize=(attrs==null) ? 0 : attrs.getLength();
532             if(attrsize>0)
533               {
534                 for(int i=0;i<attrsize;++i)
535                   {
536                     // No need to force nodetype in this case;
537                     // addNode() will take care of switching it from
538                     // Attr to Namespace if necessary.
539                     attrIndex=addNode(attrs.item(i),
540                                       nextindex,attrIndex,NULL);
541                     m_firstch.setElementAt(DTM.NULL,attrIndex);
542 
543                     // If the xml: prefix is explicitly declared
544                     // we don't need to synthesize one.
545             //
546             // NOTE that XML Namespaces were not originally
547             // defined as being namespace-aware (grrr), and
548             // while the W3C is planning to fix this it's
549             // safer for now to test the QName and trust the
550             // parsers to prevent anyone from redefining the
551             // reserved xmlns: prefix
552                     if(!m_processedFirstElement
553                        && "xmlns:xml".equals(attrs.item(i).getNodeName()))
554                       m_processedFirstElement=true; 
555                   }
556                 // Terminate list of attrs, and make sure they aren't
557                 // considered children of the element
558               } // if attrs exist
559             if(!m_processedFirstElement)
560             {
561               // The DOM might not have an explicit declaration for the
562               // implicit "xml:" prefix, but the XPath data model
563               // requires that this appear as a Namespace Node so we
564               // have to synthesize one. You can think of this as
565               // being a default attribute defined by the XML
566               // Namespaces spec rather than by the DTD.
567               attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode(
568                                                                     (Element  )next,"xml",NAMESPACE_DECL_NS,
569                                                                     makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1)
570                                                                     ),
571                                 nextindex,attrIndex,NULL);      
572               m_firstch.setElementAt(DTM.NULL,attrIndex);
573               m_processedFirstElement=true;
574             }
575             if(attrIndex!=NULL)
576               m_nextsib.setElementAt(DTM.NULL,attrIndex);
577           } //if(ELEMENT_NODE)
578       } // (if !suppressNode)
579 
580     // Text postprocessing: Act on values stored above
581     if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
582       {
583         // %TBD% If nexttype was forced to TEXT, patch the DTM node
584                 
585         next=lastTextNode;      // Advance the DOM cursor over contiguous text
586       }
587         
588     // Remember where we left off.
589     m_pos=next;
590     return true;
591   }  
592 
593 
594   /**
595    * Return an DOM node for the given node.
596    *
597    * @param nodeHandle The node ID.
598    *
599    * @return A node representation of the DTM node.
600    */
601   public Node   getNode(int nodeHandle)
602   {
603 
604     int identity = makeNodeIdentity(nodeHandle);
605 
606     return (Node  ) m_nodes.elementAt(identity);
607   }
608 
609   /**
610    * Get a Node from an identity index.
611    *
612    * NEEDSDOC @param nodeIdentity
613    *
614    * NEEDSDOC ($objectName$) @return
615    */
616   protected Node   lookupNode(int nodeIdentity)
617   {
618     return (Node  ) m_nodes.elementAt(nodeIdentity);
619   }
620 
621   /**
622    * Get the next node identity value in the list, and call the iterator
623    * if it hasn't been added yet.
624    *
625    * @param identity The node identity (index).
626    * @return identity+1, or DTM.NULL.
627    */
628   protected int getNextNodeIdentity(int identity)
629   {
630 
631     identity += 1;
632 
633     if (identity >= m_nodes.size())
634     {
635       if (!nextNode())
636         identity = DTM.NULL;
637     }
638 
639     return identity;
640   }
641 
642   /**
643    * Get the handle from a Node.
644    * <p>%OPT% This will be pretty slow.</p>
645    *
646    * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path;
647    * walk down DTM reconstructing path) might be considerably faster
648    * on later nodes in large documents. That might also imply improving
649    * this call to handle nodes which would be in this DTM but
650    * have not yet been built, which might or might not be a Good Thing.</p>
651    * 
652    * %REVIEW% This relies on being able to test node-identity via
653    * object-identity. DTM2DOM proxying is a great example of a case where
654    * that doesn't work. DOM Level 3 will provide the isSameNode() method
655    * to fix that, but until then this is going to be flaky.
656    *
657    * @param node A node, which may be null.
658    *
659    * @return The node handle or <code>DTM.NULL</code>.
660    */
661   private int getHandleFromNode(Node   node)
662   {
663     if (null != node)
664     {
665       int len = m_nodes.size();        
666       boolean isMore;
667       int i = 0;
668       do
669       {          
670         for (; i < len; i++)
671         {
672           if (m_nodes.elementAt(i) == node)
673             return makeNodeHandle(i);
674         }
675 
676         isMore = nextNode();
677   
678         len = m_nodes.size();
679             
680       } 
681       while(isMore || i < len);
682     }
683     
684     return DTM.NULL;
685   }
686 
687   /** Get the handle from a Node. This is a more robust version of
688    * getHandleFromNode, intended to be usable by the public.
689    *
690    * <p>%OPT% This will be pretty slow.</p>
691    * 
692    * %REVIEW% This relies on being able to test node-identity via
693    * object-identity. DTM2DOM proxying is a great example of a case where
694    * that doesn't work. DOM Level 3 will provide the isSameNode() method
695    * to fix that, but until then this is going to be flaky.
696    *
697    * @param node A node, which may be null.
698    *
699    * @return The node handle or <code>DTM.NULL</code>.  */
700   public int getHandleOfNode(Node   node)
701   {
702     if (null != node)
703     {
704       // Is Node actually within the same document? If not, don't search!
705       // This would be easier if m_root was always the Document node, but
706       // we decided to allow wrapping a DTM around a subtree.
707       if((m_root==node) ||
708          (m_root.getNodeType()==DOCUMENT_NODE &&
709           m_root==node.getOwnerDocument()) ||
710          (m_root.getNodeType()!=DOCUMENT_NODE &&
711           m_root.getOwnerDocument()==node.getOwnerDocument())
712          )
713         {
714           // If node _is_ in m_root's tree, find its handle
715           //
716           // %OPT% This check may be improved significantly when DOM
717           // Level 3 nodeKey and relative-order tests become
718           // available!
719           for(Node   cursor=node;
720               cursor!=null;
721               cursor=
722                 (cursor.getNodeType()!=ATTRIBUTE_NODE)
723                 ? cursor.getParentNode()
724                 : ((org.w3c.dom.Attr  )cursor).getOwnerElement())
725             {
726               if(cursor==m_root)
727                 // We know this node; find its handle.
728                 return getHandleFromNode(node); 
729             } // for ancestors of node
730         } // if node and m_root in same Document
731     } // if node!=null
732 
733     return DTM.NULL;
734   }
735 
736   /**
737    * Retrieves an attribute node by by qualified name and namespace URI.
738    *
739    * @param nodeHandle int Handle of the node upon which to look up this attribute..
740    * @param namespaceURI The namespace URI of the attribute to
741    *   retrieve, or null.
742    * @param name The local name of the attribute to
743    *   retrieve.
744    * @return The attribute node handle with the specified name (
745    *   <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
746    *   attribute.
747    */
748   public int getAttributeNode(int nodeHandle, String   namespaceURI,
749                               String   name)
750   {
751 
752     // %OPT% This is probably slower than it needs to be.
753     if (null == namespaceURI)
754       namespaceURI = "";
755 
756     int type = getNodeType(nodeHandle);
757 
758     if (DTM.ELEMENT_NODE == type)
759     {
760 
761       // Assume that attributes immediately follow the element.
762       int identity = makeNodeIdentity(nodeHandle);
763 
764       while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
765       {
766         // Assume this can not be null.
767         type = _type(identity);
768 
769                 // %REVIEW%
770                 // Should namespace nodes be retrievable DOM-style as attrs?
771                 // If not we need a separate function... which may be desirable
772                 // architecturally, but which is ugly from a code point of view.
773                 // (If we REALLY insist on it, this code should become a subroutine
774                 // of both -- retrieve the node, then test if the type matches
775                 // what you're looking for.)
776         if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE)
777         {
778           Node   node = lookupNode(identity);
779           String   nodeuri = node.getNamespaceURI();
780 
781           if (null == nodeuri)
782             nodeuri = "";
783 
784           String   nodelocalname = node.getLocalName();
785 
786           if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname))
787             return makeNodeHandle(identity);
788         }
789                 
790         else // if (DTM.NAMESPACE_NODE != type)
791         {
792           break;
793         }
794       }
795     }
796 
797     return DTM.NULL;
798   }
799 
800   /**
801    * Get the string-value of a node as a String object
802    * (see http://www.w3.org/TR/xpath#data-model
803    * for the definition of a node's string-value).
804    *
805    * @param nodeHandle The node ID.
806    *
807    * @return A string object that represents the string-value of the given node.
808    */
809   public XMLString getStringValue(int nodeHandle)
810   {
811 
812     int type = getNodeType(nodeHandle);
813     Node   node = getNode(nodeHandle);
814     // %TBD% If an element only has one text node, we should just use it 
815     // directly.
816     if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type 
817     || DTM.DOCUMENT_FRAGMENT_NODE == type)
818     {
819       FastStringBuffer buf = StringBufferPool.get();
820       String   s;
821   
822       try
823       {
824         getNodeData(node, buf);
825   
826         s = (buf.length() > 0) ? buf.toString() : "";
827       }
828       finally
829       {
830         StringBufferPool.free(buf);
831       }
832   
833       return m_xstrf.newstr( s );
834     }
835     else if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
836     {
837       // If this is a DTM text node, it may be made of multiple DOM text
838       // nodes -- including navigating into Entity References. DOM2DTM
839       // records the first node in the sequence and requires that we
840       // pick up the others when we retrieve the DTM node's value.
841       //
842       // %REVIEW% DOM Level 3 is expected to add a "whole text"
843       // retrieval method which performs this function for us.
844       FastStringBuffer buf = StringBufferPool.get();
845       while(node!=null)
846       {
847         buf.append(node.getNodeValue());
848         node=logicalNextDOMTextNode(node);
849       }
850       String   s=(buf.length() > 0) ? buf.toString() : "";
851       StringBufferPool.free(buf);
852       return m_xstrf.newstr( s );
853     }
854     else
855       return m_xstrf.newstr( node.getNodeValue() );
856   }
857   
858   /**
859    * Determine if the string-value of a node is whitespace
860    *
861    * @param nodeHandle The node Handle.
862    *
863    * @return Return true if the given node is whitespace.
864    */
865   public boolean isWhitespace(int nodeHandle)
866   {
867     int type = getNodeType(nodeHandle);
868     Node   node = getNode(nodeHandle);
869     if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
870     {
871       // If this is a DTM text node, it may be made of multiple DOM text
872       // nodes -- including navigating into Entity References. DOM2DTM
873       // records the first node in the sequence and requires that we
874       // pick up the others when we retrieve the DTM node's value.
875       //
876       // %REVIEW% DOM Level 3 is expected to add a "whole text"
877       // retrieval method which performs this function for us.
878       FastStringBuffer buf = StringBufferPool.get();
879       while(node!=null)
880       {
881         buf.append(node.getNodeValue());
882         node=logicalNextDOMTextNode(node);
883       }
884      boolean b = buf.isWhitespace(0, buf.length());
885       StringBufferPool.free(buf);
886      return b;
887     }
888     return false;
889   }
890   
891   /**
892    * Retrieve the text content of a DOM subtree, appending it into a
893    * user-supplied FastStringBuffer object. Note that attributes are
894    * not considered part of the content of an element.
895    * <p>
896    * There are open questions regarding whitespace stripping. 
897    * Currently we make no special effort in that regard, since the standard
898    * DOM doesn't yet provide DTD-based information to distinguish
899    * whitespace-in-element-context from genuine #PCDATA. Note that we
900    * should probably also consider xml:space if/when we address this.
901    * DOM Level 3 may solve the problem for us.
902    * <p>
903    * %REVIEW% Actually, since this method operates on the DOM side of the
904    * fence rather than the DTM side, it SHOULDN'T do
905    * any special handling. The DOM does what the DOM does; if you want
906    * DTM-level abstractions, use DTM-level methods.
907    *
908    * @param node Node whose subtree is to be walked, gathering the
909    * contents of all Text or CDATASection nodes.
910    * @param buf FastStringBuffer into which the contents of the text
911    * nodes are to be concatenated.
912    */
913   protected static void getNodeData(Node   node, FastStringBuffer buf)
914   {
915 
916     switch (node.getNodeType())
917     {
918     case Node.DOCUMENT_FRAGMENT_NODE :
919     case Node.DOCUMENT_NODE :
920     case Node.ELEMENT_NODE :
921     {
922       for (Node   child = node.getFirstChild(); null != child;
923               child = child.getNextSibling())
924       {
925         getNodeData(child, buf);
926       }
927     }
928     break;
929     case Node.TEXT_NODE :
930     case Node.CDATA_SECTION_NODE :
931     case Node.ATTRIBUTE_NODE :  // Never a child but might be our starting node
932       buf.append(node.getNodeValue());
933       break;
934     case Node.PROCESSING_INSTRUCTION_NODE :
935       // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);        
936       break;
937     default :
938       // ignore
939       break;
940     }
941   }
942 
943   /**
944    * Given a node handle, return its DOM-style node name. This will
945    * include names such as #text or #document.
946    *
947    * @param nodeHandle the id of the node.
948    * @return String Name of this node, which may be an empty string.
949    * %REVIEW% Document when empty string is possible...
950    * %REVIEW-COMMENT% It should never be empty, should it?
951    */
952   public String   getNodeName(int nodeHandle)
953   {
954 
955     Node   node = getNode(nodeHandle);
956 
957     // Assume non-null.
958     return node.getNodeName();
959   }
960 
961   /**
962    * Given a node handle, return the XPath node name.  This should be
963    * the name as described by the XPath data model, NOT the DOM-style
964    * name.
965    *
966    * @param nodeHandle the id of the node.
967    * @return String Name of this node, which may be an empty string.
968    */
969   public String   getNodeNameX(int nodeHandle)
970   {
971 
972     String   name;
973     short type = getNodeType(nodeHandle);
974 
975     switch (type)
976     {
977     case DTM.NAMESPACE_NODE :
978     {
979       Node   node = getNode(nodeHandle);
980 
981       // assume not null.
982       name = node.getNodeName();
983       if(name.startsWith("xmlns:"))
984       {
985         name = QName.getLocalPart(name);
986       }
987       else if(name.equals("xmlns"))
988       {
989         name = "";
990       }
991     }
992     break;
993     case DTM.ATTRIBUTE_NODE :
994     case DTM.ELEMENT_NODE :
995     case DTM.ENTITY_REFERENCE_NODE :
996     case DTM.PROCESSING_INSTRUCTION_NODE :
997     {
998       Node   node = getNode(nodeHandle);
999 
1000      // assume not null.
1001      name = node.getNodeName();
1002    }
1003    break;
1004    default :
1005      name = "";
1006    }
1007
1008    return name;
1009  }
1010
1011  /**
1012   * Given a node handle, return its XPath-style localname.
1013   * (As defined in Namespaces, this is the portion of the name after any
1014   * colon character).
1015   *
1016   * @param nodeHandle the id of the node.
1017   * @return String Local name of this node.
1018   */
1019  public String   getLocalName(int nodeHandle)
1020  {
1021    if(JJK_NEWCODE)
1022    {
1023      int id=makeNodeIdentity(nodeHandle);
1024      if(NULL==id) return null;
1025      Node   newnode=(Node  )m_nodes.elementAt(id);
1026      String   newname=newnode.getLocalName();
1027      if (null == newname)
1028      {
1029    // XSLT treats PIs, and possibly other things, as having QNames.
1030    String   qname = newnode.getNodeName();
1031    if('#'==qname.charAt(0))
1032    {
1033      //  Match old default for this function
1034      // This conversion may or may not be necessary
1035      newname="";
1036    }
1037    else
1038    {
1039      int index = qname.indexOf(':');
1040      newname = (index < 0) ? qname : qname.substring(index + 1);
1041    }
1042      }
1043      return newname;
1044    }
1045    else
1046    {
1047      String   name;
1048      short type = getNodeType(nodeHandle);
1049      switch (type)
1050      {
1051      case DTM.ATTRIBUTE_NODE :
1052      case DTM.ELEMENT_NODE :
1053      case DTM.ENTITY_REFERENCE_NODE :
1054      case DTM.NAMESPACE_NODE :
1055      case DTM.PROCESSING_INSTRUCTION_NODE :
1056    {
1057      Node   node = getNode(nodeHandle);
1058      
1059      // assume not null.
1060      name = node.getLocalName();
1061      
1062      if (null == name)
1063      {
1064        String   qname = node.getNodeName();
1065        int index = qname.indexOf(':');
1066        
1067        name = (index < 0) ? qname : qname.substring(index + 1);
1068      }
1069    }
1070    break;
1071      default :
1072    name = "";
1073      }
1074      return name;
1075    }
1076  }
1077
1078  /**
1079   * Given a namespace handle, return the prefix that the namespace decl is
1080   * mapping.
1081   * Given a node handle, return the prefix used to map to the namespace.
1082   *
1083   * <p> %REVIEW% Are you sure you want "" for no prefix?  </p>
1084   * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb  </p>
1085   *
1086   * @param nodeHandle the id of the node.
1087   * @return String prefix of this node's name, or "" if no explicit
1088   * namespace prefix was given.
1089   */
1090  public String   getPrefix(int nodeHandle)
1091  {
1092
1093    String   prefix;
1094    short type = getNodeType(nodeHandle);
1095
1096    switch (type)
1097    {
1098    case DTM.NAMESPACE_NODE :
1099    {
1100      Node   node = getNode(nodeHandle);
1101
1102      // assume not null.
1103      String   qname = node.getNodeName();
1104      int index = qname.indexOf(':');
1105
1106      prefix = (index < 0) ? "" : qname.substring(index + 1);
1107    }
1108    break;
1109    case DTM.ATTRIBUTE_NODE :
1110    case DTM.ELEMENT_NODE :
1111    {
1112      Node   node = getNode(nodeHandle);
1113
1114      // assume not null.
1115      String   qname = node.getNodeName();
1116      int index = qname.indexOf(':');
1117
1118      prefix = (index < 0) ? "" : qname.substring(0, index);
1119    }
1120    break;
1121    default :
1122      prefix = "";
1123    }
1124
1125    return prefix;
1126  }
1127
1128  /**
1129   * Given a node handle, return its DOM-style namespace URI
1130   * (As defined in Namespaces, this is the declared URI which this node's
1131   * prefix -- or default in lieu thereof -- was mapped to.)
1132   *
1133   * <p>%REVIEW% Null or ""? -sb</p>
1134   *
1135   * @param nodeHandle the id of the node.
1136   * @return String URI value of this node's namespace, or null if no
1137   * namespace was resolved.
1138   */
1139  public String   getNamespaceURI(int nodeHandle)
1140  {
1141    if(JJK_NEWCODE)
1142    {
1143      int id=makeNodeIdentity(nodeHandle);
1144      if(id==NULL) return null;
1145      Node   node=(Node  )m_nodes.elementAt(id);
1146      return node.getNamespaceURI();
1147    }
1148    else
1149    {
1150      String   nsuri;
1151      short type = getNodeType(nodeHandle);
1152      
1153      switch (type)
1154      {
1155      case DTM.ATTRIBUTE_NODE :
1156      case DTM.ELEMENT_NODE :
1157      case DTM.ENTITY_REFERENCE_NODE :
1158      case DTM.NAMESPACE_NODE :
1159      case DTM.PROCESSING_INSTRUCTION_NODE :
1160    {
1161      Node   node = getNode(nodeHandle);
1162      
1163      // assume not null.
1164      nsuri = node.getNamespaceURI();
1165      
1166      // %TBD% Handle DOM1?
1167    }
1168    break;
1169      default :
1170    nsuri = null;
1171      }
1172
1173      return nsuri;
1174    }
1175    
1176  }
1177  
1178  /** Utility function: Given a DOM Text node, determine whether it is
1179   * logically followed by another Text or CDATASection node. This may
1180   * involve traversing into Entity References.
1181   * 
1182   * %REVIEW% DOM Level 3 is expected to add functionality which may 
1183   * allow us to retire this.
1184   */
1185  private Node   logicalNextDOMTextNode(Node   n)
1186  {
1187        Node   p=n.getNextSibling();
1188        if(p==null)
1189        {
1190                // Walk out of any EntityReferenceNodes that ended with text
1191                for(n=n.getParentNode();
1192                        n!=null && ENTITY_REFERENCE_NODE == n.getNodeType();
1193                        n=n.getParentNode())
1194                {
1195                        p=n.getNextSibling();
1196                        if(p!=null)
1197                                break;
1198                }
1199        }
1200        n=p;
1201        while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType())
1202        {
1203                // Walk into any EntityReferenceNodes that start with text
1204                if(n.hasChildNodes())
1205                        n=n.getFirstChild();
1206                else
1207                        n=n.getNextSibling();
1208        }
1209        if(n!=null)
1210        {
1211                // Found a logical next sibling. Is it text?
1212                int ntype=n.getNodeType();
1213                if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype)
1214                        n=null;
1215        }
1216        return n;
1217  }
1218
1219  /**
1220   * Given a node handle, return its node value. This is mostly
1221   * as defined by the DOM, but may ignore some conveniences.
1222   * <p>
1223   *
1224   * @param nodeHandle The node id.
1225   * @return String Value of this node, or null if not
1226   * meaningful for this node type.
1227   */
1228  public String   getNodeValue(int nodeHandle)
1229  {
1230    // The _type(nodeHandle) call was taking the lion's share of our
1231    // time, and was wrong anyway since it wasn't coverting handle to
1232    // identity. Inlined it.
1233    int type = _exptype(makeNodeIdentity(nodeHandle));
1234    type=(NULL != type) ? getNodeType(nodeHandle) : NULL;
1235    
1236    if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type)
1237      return getNode(nodeHandle).getNodeValue();
1238    
1239    // If this is a DTM text node, it may be made of multiple DOM text
1240    // nodes -- including navigating into Entity References. DOM2DTM
1241    // records the first node in the sequence and requires that we
1242    // pick up the others when we retrieve the DTM node's value.
1243    //
1244    // %REVIEW% DOM Level 3 is expected to add a "whole text"
1245    // retrieval method which performs this function for us.
1246    Node   node = getNode(nodeHandle);
1247    Node   n=logicalNextDOMTextNode(node);
1248    if(n==null)
1249      return node.getNodeValue();
1250    
1251    FastStringBuffer buf = StringBufferPool.get();
1252        buf.append(node.getNodeValue());
1253    while(n!=null)
1254    {
1255      buf.append(n.getNodeValue());
1256      n=logicalNextDOMTextNode(n);
1257    }
1258    String   s = (buf.length() > 0) ? buf.toString() : "";
1259    StringBufferPool.free(buf);
1260    return s;
1261  }
1262
1263  /**
1264   *   A document type declaration information item has the following properties:
1265   *
1266   *     1. [system identifier] The system identifier of the external subset, if
1267   *        it exists. Otherwise this property has no value.
1268   *
1269   * @return the system identifier String object, or null if there is none.
1270   */
1271  public String   getDocumentTypeDeclarationSystemIdentifier()
1272  {
1273
1274    Document   doc;
1275
1276    if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1277      doc = (Document  ) m_root;
1278    else
1279      doc = m_root.getOwnerDocument();
1280
1281    if (null != doc)
1282    {
1283      DocumentType   dtd = doc.getDoctype();
1284
1285      if (null != dtd)
1286      {
1287        return dtd.getSystemId();
1288      }
1289    }
1290
1291    return null;
1292  }
1293
1294  /**
1295   * Return the public identifier of the external subset,
1296   * normalized as described in 4.2.2 External Entities [XML]. If there is
1297   * no external subset or if it has no public identifier, this property
1298   * has no value.
1299   *
1300   * @param the document type declaration handle
1301   *
1302   * @return the public identifier String object, or null if there is none.
1303   */
1304  public String   getDocumentTypeDeclarationPublicIdentifier()
1305  {
1306
1307    Document   doc;
1308
1309    if (m_root.getNodeType() == Node.DOCUMENT_NODE)
1310      doc = (Document  ) m_root;
1311    else
1312      doc = m_root.getOwnerDocument();
1313
1314    if (null != doc)
1315    {
1316      DocumentType   dtd = doc.getDoctype();
1317
1318      if (null != dtd)
1319      {
1320        return dtd.getPublicId();
1321      }
1322    }
1323
1324    return null;
1325  }
1326
1327  /**
1328   * Returns the <code>Element</code> whose <code>ID</code> is given by
1329   * <code>elementId</code>. If no such element exists, returns
1330   * <code>DTM.NULL</code>. Behavior is not defined if more than one element
1331   * has this <code>ID</code>. Attributes (including those
1332   * with the name "ID") are not of type ID unless so defined by DTD/Schema
1333   * information available to the DTM implementation.
1334   * Implementations that do not know whether attributes are of type ID or
1335   * not are expected to return <code>DTM.NULL</code>.
1336   *
1337   * <p>%REVIEW% Presumably IDs are still scoped to a single document,
1338   * and this operation searches only within a single document, right?
1339   * Wouldn't want collisions between DTMs in the same process.</p>
1340   *
1341   * @param elementId The unique <code>id</code> value for an element.
1342   * @return The handle of the matching element.
1343   */
1344  public int getElementById(String   elementId)
1345  {
1346
1347    Document   doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) 
1348        ? (Document  ) m_root : m_root.getOwnerDocument();
1349        
1350    if(null != doc)
1351    {
1352      Node   elem = doc.getElementById(elementId);
1353      if(null != elem)
1354      {
1355        int elemHandle = getHandleFromNode(elem);
1356        
1357        if(DTM.NULL == elemHandle)
1358        {
1359          int identity = m_nodes.size()-1;
1360          while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
1361          {
1362            Node   node = getNode(identity);
1363            if(node == elem)
1364            {
1365              elemHandle = getHandleFromNode(elem);
1366              break;
1367            }
1368           }
1369        }
1370        
1371        return elemHandle;
1372      }
1373    
1374    }
1375    return DTM.NULL;
1376  }
1377
1378  /**
1379   * The getUnparsedEntityURI function returns the URI of the unparsed
1380   * entity with the specified name in the same document as the context
1381   * node (see [3.3 Unparsed Entities]). It returns the empty string if
1382   * there is no such entity.
1383   * <p>
1384   * XML processors may choose to use the System Identifier (if one
1385   * is provided) to resolve the entity, rather than the URI in the
1386   * Public Identifier. The details are dependent on the processor, and
1387   * we would have to support some form of plug-in resolver to handle
1388   * this properly. Currently, we simply return the System Identifier if
1389   * present, and hope that it a usable URI or that our caller can
1390   * map it to one.
1391   * TODO: Resolve Public Identifiers... or consider changing function name.
1392   * <p>
1393   * If we find a relative URI
1394   * reference, XML expects it to be resolved in terms of the base URI
1395   * of the document. The DOM doesn't do that for us, and it isn't
1396   * entirely clear whether that should be done here; currently that's
1397   * pushed up to a higher level of our application. (Note that DOM Level
1398   * 1 didn't store the document's base URI.)
1399   * TODO: Consider resolving Relative URIs.
1400   * <p>
1401   * (The DOM's statement that "An XML processor may choose to
1402   * completely expand entities before the structure model is passed
1403   * to the DOM" refers only to parsed entities, not unparsed, and hence
1404   * doesn't affect this function.)
1405   *
1406   * @param name A string containing the Entity Name of the unparsed
1407   * entity.
1408   *
1409   * @return String containing the URI of the Unparsed Entity, or an
1410   * empty string if no such entity exists.
1411   */
1412  public String   getUnparsedEntityURI(String   name)
1413  {
1414
1415    String   url = "";
1416    Document   doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) 
1417        ? (Document  ) m_root : m_root.getOwnerDocument();
1418
1419    if (null != doc)
1420    {
1421      DocumentType   doctype = doc.getDoctype();
1422  
1423      if (null != doctype)
1424      {
1425        NamedNodeMap   entities = doctype.getEntities();
1426        if(null == entities)
1427          return url;
1428        Entity   entity = (Entity  ) entities.getNamedItem(name);
1429        if(null == entity)
1430          return url;
1431        
1432        String   notationName = entity.getNotationName();
1433  
1434        if (null != notationName)  // then it's unparsed
1435        {
1436          // The draft says: "The XSLT processor may use the public 
1437          // identifier to generate a URI for the entity instead of the URI 
1438          // specified in the system identifier. If the XSLT processor does 
1439          // not use the public identifier to generate the URI, it must use 
1440          // the system identifier; if the system identifier is a relative 
1441          // URI, it must be resolved into an absolute URI using the URI of 
1442          // the resource containing the entity declaration as the base 
1443          // URI [RFC2396]."
1444          // So I'm falling a bit short here.
1445          url = entity.getSystemId();
1446  
1447          if (null == url)
1448          {
1449            url = entity.getPublicId();
1450          }
1451          else
1452          {
1453            // This should be resolved to an absolute URL, but that's hard 
1454            // to do from here.
1455          }        
1456        }
1457      }
1458    }
1459
1460    return url;
1461  }
1462
1463  /**
1464   *     5. [specified] A flag indicating whether this attribute was actually
1465   *        specified in the start-tag of its element, or was defaulted from the
1466   *        DTD.
1467   *
1468   * @param the attribute handle
1469   *
1470   * NEEDSDOC @param attributeHandle
1471   * @return <code>true</code> if the attribute was specified;
1472   *         <code>false</code> if it was defaulted.
1473   */
1474  public boolean isAttributeSpecified(int attributeHandle)
1475  {
1476    int type = getNodeType(attributeHandle);
1477
1478    if (DTM.ATTRIBUTE_NODE == type)
1479    {
1480      Attr   attr = (Attr  )getNode(attributeHandle);
1481      return attr.getSpecified();
1482    }
1483    return false;
1484  }
1485
1486  /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since
1487   * we're wrapped around an existing DOM.
1488   *
1489   * @param source The IncrementalSAXSource that we want to recieve events from
1490   * on demand.
1491   */
1492  public void setIncrementalSAXSource(IncrementalSAXSource source)
1493  {
1494  }
1495  
1496  /** getContentHandler returns "our SAX builder" -- the thing that
1497   * someone else should send SAX events to in order to extend this
1498   * DTM model.
1499   *
1500   * @return null if this model doesn't respond to SAX events,
1501   * "this" if the DTM object has a built-in SAX ContentHandler,
1502   * the IncrmentalSAXSource if we're bound to one and should receive
1503   * the SAX stream via it for incremental build purposes...
1504   * */
1505  public org.xml.sax.ContentHandler   getContentHandler()
1506  {
1507      return null;
1508  }
1509  
1510  /**
1511   * Return this DTM's lexical handler.
1512   *
1513   * %REVIEW% Should this return null if constrution already done/begun?
1514   *
1515   * @return null if this model doesn't respond to lexical SAX events,
1516   * "this" if the DTM object has a built-in SAX ContentHandler,
1517   * the IncrementalSAXSource if we're bound to one and should receive
1518   * the SAX stream via it for incremental build purposes...
1519   */
1520  public org.xml.sax.ext.LexicalHandler   getLexicalHandler()
1521  {
1522
1523    return null;
1524  }
1525
1526  
1527  /**
1528   * Return this DTM's EntityResolver.
1529   *
1530   * @return null if this model doesn't respond to SAX entity ref events.
1531   */
1532  public org.xml.sax.EntityResolver   getEntityResolver()
1533  {
1534
1535    return null;
1536  }
1537  
1538  /**
1539   * Return this DTM's DTDHandler.
1540   *
1541   * @return null if this model doesn't respond to SAX dtd events.
1542   */
1543  public org.xml.sax.DTDHandler   getDTDHandler()
1544  {
1545
1546    return null;
1547  }
1548
1549  /**
1550   * Return this DTM's ErrorHandler.
1551   *
1552   * @return null if this model doesn't respond to SAX error events.
1553   */
1554  public org.xml.sax.ErrorHandler   getErrorHandler()
1555  {
1556
1557    return null;
1558  }
1559  
1560  /**
1561   * Return this DTM's DeclHandler.
1562   *
1563   * @return null if this model doesn't respond to SAX Decl events.
1564   */
1565  public org.xml.sax.ext.DeclHandler   getDeclHandler()
1566  {
1567
1568    return null;
1569  }  
1570
1571  /** @return true iff we're building this model incrementally (eg
1572   * we're partnered with a IncrementalSAXSource) and thus require that the
1573   * transformation and the parse run simultaneously. Guidance to the
1574   * DTMManager.
1575   * */
1576  public boolean needsTwoThreads()
1577  {
1578    return false;
1579  }
1580
1581  // ========== Direct SAX Dispatch, for optimization purposes ========
1582  
1583  /**
1584   * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
1585   * of whitespace.  Refer to <A HREF="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
1586   * the definition of <CODE>S</CODE></A> for details.
1587   * @param   ch      Character to check as XML whitespace.
1588   * @return          =true if <var>ch</var> is XML whitespace; otherwise =false.
1589   */
1590  private static boolean isSpace(char ch)
1591  {
1592    return XMLCharacterRecognizer.isWhiteSpace(ch);  // Take the easy way out for now.
1593  }
1594
1595  /**
1596   * Directly call the
1597   * characters method on the passed ContentHandler for the
1598   * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
1599   * for the definition of a node's string-value). Multiple calls to the
1600   * ContentHandler's characters methods may well occur for a single call to
1601   * this method.
1602   *
1603   * @param nodeHandle The node ID.
1604   * @param ch A non-null reference to a ContentHandler.
1605   *
1606   * @throws org.xml.sax.SAXException
1607   */
1608  public void dispatchCharactersEvents(
1609          int nodeHandle, org.xml.sax.ContentHandler   ch, 
1610          boolean normalize)
1611            throws org.xml.sax.SAXException  
1612  {
1613    if(normalize)
1614    {
1615      XMLString str = getStringValue(nodeHandle);
1616      str = str.fixWhiteSpace(true, true, false);
1617      str.dispatchCharactersEvents(ch);
1618    }
1619    else
1620    {
1621      int type = getNodeType(nodeHandle);
1622      Node   node = getNode(nodeHandle);
1623      dispatchNodeData(node, ch, 0);
1624          // Text coalition -- a DTM text node may represent multiple
1625          // DOM nodes.
1626          if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
1627          {
1628                  while( null != (node=logicalNextDOMTextNode(node)) )
1629                  {
1630                      dispatchNodeData(node, ch, 0);
1631                  }
1632          }
1633    }
1634  }
1635  
1636  /**
1637   * Retrieve the text content of a DOM subtree, appending it into a
1638   * user-supplied FastStringBuffer object. Note that attributes are
1639   * not considered part of the content of an element.
1640   * <p>
1641   * There are open questions regarding whitespace stripping. 
1642   * Currently we make no special effort in that regard, since the standard
1643   * DOM doesn't yet provide DTD-based information to distinguish
1644   * whitespace-in-element-context from genuine #PCDATA. Note that we
1645   * should probably also consider xml:space if/when we address this.
1646   * DOM Level 3 may solve the problem for us.
1647   * <p>
1648   * %REVIEW% Note that as a DOM-level operation, it can be argued that this
1649   * routine _shouldn't_ perform any processing beyond what the DOM already
1650   * does, and that whitespace stripping and so on belong at the DTM level.
1651   * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM.
1652   *
1653   * @param node Node whose subtree is to be walked, gathering the
1654   * contents of all Text or CDATASection nodes.
1655   * @param buf FastStringBuffer into which the contents of the text
1656   * nodes are to be concatenated.
1657   */
1658  protected static void dispatchNodeData(Node   node, 
1659                                         org.xml.sax.ContentHandler   ch, 
1660                                         int depth)
1661            throws org.xml.sax.SAXException  
1662  {
1663
1664    switch (node.getNodeType())
1665    {
1666    case Node.DOCUMENT_FRAGMENT_NODE :
1667    case Node.DOCUMENT_NODE :
1668    case Node.ELEMENT_NODE :
1669    {
1670      for (Node   child = node.getFirstChild(); null != child;
1671              child = child.getNextSibling())
1672      {
1673        dispatchNodeData(child, ch, depth+1);
1674      }
1675    }
1676    break;
1677    case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW%
1678    case Node.COMMENT_NODE :
1679      if(0 != depth)
1680        break;
1681        // NOTE: Because this operation works in the DOM space, it does _not_ attempt
1682        // to perform Text Coalition. That should only be done in DTM space. 
1683    case Node.TEXT_NODE :
1684    case Node.CDATA_SECTION_NODE :
1685    case Node.ATTRIBUTE_NODE :
1686      String   str = node.getNodeValue();
1687      if(ch instanceof CharacterNodeHandler)
1688      {
1689        ((CharacterNodeHandler)ch).characters(node);
1690      }
1691      else
1692      {
1693        ch.characters(str.toCharArray(), 0, str.length());
1694      }
1695      break;
1696//    /* case Node.PROCESSING_INSTRUCTION_NODE :
1697//      // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);        
1698//      break; */
1699    default :
1700      // ignore
1701      break;
1702    }
1703  }
1704  
1705  TreeWalker m_walker = new TreeWalker(null);
1706  
1707  /**
1708   * Directly create SAX parser events from a subtree.
1709   *
1710   * @param nodeHandle The node ID.
1711   * @param ch A non-null reference to a ContentHandler.
1712   *
1713   * @throws org.xml.sax.SAXException
1714   */
1715  public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler   ch)
1716          throws org.xml.sax.SAXException  
1717  {
1718    TreeWalker treeWalker = m_walker;
1719    ContentHandler   prevCH = treeWalker.getContentHandler();
1720    
1721    if(null != prevCH)
1722    {
1723      treeWalker = new TreeWalker(null);
1724    }
1725    treeWalker.setContentHandler(ch);
1726    
1727    try
1728    {
1729      Node   node = getNode(nodeHandle);
1730      treeWalker.traverse(node);
1731    }
1732    finally
1733    {
1734      treeWalker.setContentHandler(null);
1735    }
1736  }
1737  
1738  public interface CharacterNodeHandler
1739  {
1740    public void characters(Node   node)
1741            throws org.xml.sax.SAXException  ;
1742  }
1743
1744  /**
1745   * For the moment all the run time properties are ignored by this
1746   * class.
1747   *
1748   * @param property a <code>String</code> value
1749   * @param value an <code>Object</code> value
1750   */
1751  public void setProperty(String   property, Object   value)
1752  {
1753  }
1754  
1755  /**
1756   * No source information is available for DOM2DTM, so return
1757   * <code>null</code> here.
1758   *
1759   * @param node an <code>int</code> value
1760   * @return null
1761   */
1762  public SourceLocator   getSourceLocatorFor(int node)
1763  {
1764    return null;
1765  }
1766
1767}
1768
1769
1770
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags