Exemple #1
0
/* :nodoc: */
static void relink_namespace(xmlNodePtr reparented)
{
  xmlNodePtr child;

  /* Avoid segv when relinking against unlinked nodes. */
  if(!reparented->parent) return;

  /* Make sure that our reparented node has the correct namespaces */
  if(!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent)
    xmlSetNs(reparented, reparented->parent->ns);

  /* Search our parents for an existing definition */
  if(reparented->nsDef) {
    xmlNsPtr curr = reparented->nsDef;
    xmlNsPtr prev = NULL;

    while(curr) {
      xmlNsPtr ns = xmlSearchNsByHref(
          reparented->doc,
          reparented->parent,
          curr->href
      );
      /* If we find the namespace is already declared, remove it from this
       * definition list. */
      if(ns && ns != curr) {
        if (prev) {
          prev->next = curr->next;
        } else {
          reparented->nsDef = curr->next;
        }
        nokogiri_root_nsdef(curr, reparented->doc);
      } else {
        prev = curr;
      }
      curr = curr->next;
    }
  }

  /* Only walk all children if there actually is a namespace we need to */
  /* reparent. */
  if(NULL == reparented->ns) return;

  /* When a node gets reparented, walk it's children to make sure that */
  /* their namespaces are reparented as well. */
  child = reparented->children;
  while(NULL != child) {
    relink_namespace(child);
    child = child->next;
  }
}
Exemple #2
0
/* :nodoc: */
static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
{
  VALUE reparented_obj ;
  xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text ;

  if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode))
    rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
  if(rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument))
    rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");

  Data_Get_Struct(reparentee_obj, xmlNode, reparentee);
  Data_Get_Struct(pivot_obj, xmlNode, pivot);

  if(XML_DOCUMENT_NODE == reparentee->type || XML_HTML_DOCUMENT_NODE == reparentee->type)
    rb_raise(rb_eArgError, "cannot reparent a document node");

  xmlUnlinkNode(reparentee);

  if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) {
    /*
     *  if the reparentee is a text node, there's a very good chance it will be
     *  merged with an adjacent text node after being reparented, and in that case
     *  libxml will free the underlying C struct.
     *
     *  since we clearly have a ruby object which references the underlying
     *  memory, we can't let the C struct get freed. let's pickle the original
     *  reparentee by rooting it; and then we'll reparent a duplicate of the
     *  node that we don't care about preserving.
     *
     *  alternatively, if the reparentee is from a different document than the
     *  pivot node, libxml2 is going to get confused about which document's
     *  "dictionary" the node's strings belong to (this is an otherwise
     *  uninteresting libxml2 implementation detail). as a result, we cannot
     *  reparent the actual reparentee, so we reparent a duplicate.
     */
    nokogiri_root_node(reparentee);
    if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) {
      rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
    }
  }

  if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
      && reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
    /*
     *  libxml merges text nodes in a right-to-left fashion, meaning that if
     *  there are two text nodes who would be adjacent, the right (or following,
     *  or next) node will be merged into the left (or preceding, or previous)
     *  node.
     *
     *  and by "merged" I mean the string contents will be concatenated onto the
     *  left node's contents, and then the node will be freed.
     *
     *  which means that if we have a ruby object wrapped around the right node,
     *  its memory would be freed out from under it.
     *
     *  so, we detect this edge case and unlink-and-root the text node before it gets
     *  merged. then we dup the node and insert that duplicate back into the
     *  document where the real node was.
     *
     *  yes, this is totally lame.
     */
    next_text     = pivot->next ;
    new_next_text = xmlDocCopyNode(next_text, pivot->doc, 1) ;

    xmlUnlinkNode(next_text);
    nokogiri_root_node(next_text);

    xmlAddNextSibling(pivot, new_next_text);
  }

  if(!(reparented = (*prf)(pivot, reparentee))) {
    rb_raise(rb_eRuntimeError, "Could not reparent node");
  }

  /*
   *  make sure the ruby object is pointed at the just-reparented node, which
   *  might be a duplicate (see above) or might be the result of merging
   *  adjacent text nodes.
   */
  DATA_PTR(reparentee_obj) = reparented ;

  relink_namespace(reparented);

  reparented_obj = Nokogiri_wrap_xml_node(Qnil, reparented);

  rb_funcall(reparented_obj, decorate_bang, 0);

  return reparented_obj ;
}
Exemple #3
0
/* :nodoc: */
static void relink_namespace(xmlNodePtr reparented)
{
  xmlChar *name, *prefix;
  xmlNodePtr child;
  xmlNsPtr ns;

  if (reparented->type != XML_ATTRIBUTE_NODE &&
      reparented->type != XML_ELEMENT_NODE) return;

  if (reparented->ns == NULL || reparented->ns->prefix == NULL) {
    name = xmlSplitQName2(reparented->name, &prefix);

    if(reparented->type == XML_ATTRIBUTE_NODE) {
      if (prefix == NULL || strcmp((char*)prefix, XMLNS_PREFIX) == 0) return;
    }

    ns = xmlSearchNs(reparented->doc, reparented, prefix);

    if (ns == NULL && reparented->parent) {
      ns = xmlSearchNs(reparented->doc, reparented->parent, prefix);
    }

    if (ns != NULL) {
      xmlNodeSetName(reparented, name);
      xmlSetNs(reparented, ns);
    }
  }

  /* Avoid segv when relinking against unlinked nodes. */
  if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) return;

  /* Make sure that our reparented node has the correct namespaces */
  if(!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent)
    xmlSetNs(reparented, reparented->parent->ns);

  /* Search our parents for an existing definition */
  if(reparented->nsDef) {
    xmlNsPtr curr = reparented->nsDef;
    xmlNsPtr prev = NULL;

    while(curr) {
      xmlNsPtr ns = xmlSearchNsByHref(
          reparented->doc,
          reparented->parent,
          curr->href
      );
      /* If we find the namespace is already declared, remove it from this
       * definition list. */
      if(ns && ns != curr) {
        if (prev) {
          prev->next = curr->next;
        } else {
          reparented->nsDef = curr->next;
        }
        nokogiri_root_nsdef(curr, reparented->doc);
      } else {
        prev = curr;
      }
      curr = curr->next;
    }
  }

  /* Only walk all children if there actually is a namespace we need to */
  /* reparent. */
  if(NULL == reparented->ns) return;

  /* When a node gets reparented, walk it's children to make sure that */
  /* their namespaces are reparented as well. */
  child = reparented->children;
  while(NULL != child) {
    relink_namespace(child);
    child = child->next;
  }

  if (reparented->type == XML_ELEMENT_NODE) {
    child = (xmlNodePtr)((xmlElementPtr)reparented)->attributes;
    while(NULL != child) {
      relink_namespace(child);
      child = child->next;
    }
  }
}
Exemple #4
0
/* :nodoc: */
static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
{
  VALUE reparented_obj ;
  xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text, parent ;

  if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode))
    rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
  if(rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument))
    rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");

  Data_Get_Struct(reparentee_obj, xmlNode, reparentee);
  Data_Get_Struct(pivot_obj, xmlNode, pivot);

  /*
   * Check if nodes given are appropriate to have a parent-child
   * relationship, based on the DOM specification.
   *
   * cf. http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/core.html#ID-1590626202
   */
  if (prf == xmlAddChild) {
    parent = pivot;
  } else {
    parent = pivot->parent;
  }

  if (parent) {
    switch (parent->type) {
    case XML_DOCUMENT_NODE:
    case XML_HTML_DOCUMENT_NODE:
      switch (reparentee->type) {
        case XML_ELEMENT_NODE:
        case XML_PI_NODE:
        case XML_COMMENT_NODE:
        case XML_DOCUMENT_TYPE_NODE:
      /*
       * The DOM specification says no to adding text-like nodes
       * directly to a document, but we allow it for compatibility.
       */
        case XML_TEXT_NODE:
        case XML_CDATA_SECTION_NODE:
        case XML_ENTITY_REF_NODE:
          goto ok;
      }
      break;
    case XML_DOCUMENT_FRAG_NODE:
    case XML_ENTITY_REF_NODE:
    case XML_ELEMENT_NODE:
      switch (reparentee->type) {
      case XML_ELEMENT_NODE:
      case XML_PI_NODE:
      case XML_COMMENT_NODE:
      case XML_TEXT_NODE:
      case XML_CDATA_SECTION_NODE:
      case XML_ENTITY_REF_NODE:
        goto ok;
      }
      break;
    case XML_ATTRIBUTE_NODE:
      switch (reparentee->type) {
      case XML_TEXT_NODE:
      case XML_ENTITY_REF_NODE:
        goto ok;
      }
      break;
    case XML_TEXT_NODE:
      /*
       * xmlAddChild() breaks the DOM specification in that it allows
       * adding a text node to another, in which case text nodes are
       * coalesced, but since our JRuby version does not support such
       * operation, we should inhibit it.
       */
      break;
    }

    rb_raise(rb_eArgError, "cannot reparent %s there", rb_obj_classname(reparentee_obj));
  }

ok:
  xmlUnlinkNode(reparentee);

  if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) {
    /*
     *  if the reparentee is a text node, there's a very good chance it will be
     *  merged with an adjacent text node after being reparented, and in that case
     *  libxml will free the underlying C struct.
     *
     *  since we clearly have a ruby object which references the underlying
     *  memory, we can't let the C struct get freed. let's pickle the original
     *  reparentee by rooting it; and then we'll reparent a duplicate of the
     *  node that we don't care about preserving.
     *
     *  alternatively, if the reparentee is from a different document than the
     *  pivot node, libxml2 is going to get confused about which document's
     *  "dictionary" the node's strings belong to (this is an otherwise
     *  uninteresting libxml2 implementation detail). as a result, we cannot
     *  reparent the actual reparentee, so we reparent a duplicate.
     */
    nokogiri_root_node(reparentee);
    if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) {
      rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
    }
  }

  if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
      && reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
    /*
     *  libxml merges text nodes in a right-to-left fashion, meaning that if
     *  there are two text nodes who would be adjacent, the right (or following,
     *  or next) node will be merged into the left (or preceding, or previous)
     *  node.
     *
     *  and by "merged" I mean the string contents will be concatenated onto the
     *  left node's contents, and then the node will be freed.
     *
     *  which means that if we have a ruby object wrapped around the right node,
     *  its memory would be freed out from under it.
     *
     *  so, we detect this edge case and unlink-and-root the text node before it gets
     *  merged. then we dup the node and insert that duplicate back into the
     *  document where the real node was.
     *
     *  yes, this is totally lame.
     */
    next_text     = pivot->next ;
    new_next_text = xmlDocCopyNode(next_text, pivot->doc, 1) ;

    xmlUnlinkNode(next_text);
    nokogiri_root_node(next_text);

    xmlAddNextSibling(pivot, new_next_text);
  }

  if(!(reparented = (*prf)(pivot, reparentee))) {
    rb_raise(rb_eRuntimeError, "Could not reparent node");
  }

  /*
   *  make sure the ruby object is pointed at the just-reparented node, which
   *  might be a duplicate (see above) or might be the result of merging
   *  adjacent text nodes.
   */
  DATA_PTR(reparentee_obj) = reparented ;

  relink_namespace(reparented);

  reparented_obj = Nokogiri_wrap_xml_node(Qnil, reparented);

  rb_funcall(reparented_obj, decorate_bang, 0);

  return reparented_obj ;
}
Exemple #5
0
/* :nodoc: */
static void relink_namespace(xmlNodePtr reparented)
{
  xmlNodePtr child;

  if (reparented->type != XML_ATTRIBUTE_NODE &&
      reparented->type != XML_ELEMENT_NODE) { return; }

  if (reparented->ns == NULL || reparented->ns->prefix == NULL) {
    xmlNsPtr ns = NULL;
    xmlChar *name = NULL, *prefix = NULL;

    name = xmlSplitQName2(reparented->name, &prefix);

    if (reparented->type == XML_ATTRIBUTE_NODE) {
      if (prefix == NULL || strcmp((char*)prefix, XMLNS_PREFIX) == 0) {
        xmlFree(name);
        xmlFree(prefix);
        return;
      }
    }

    ns = xmlSearchNs(reparented->doc, reparented, prefix);

    if (ns == NULL && reparented->parent) {
      ns = xmlSearchNs(reparented->doc, reparented->parent, prefix);
    }

    if (ns != NULL) {
      xmlNodeSetName(reparented, name);
      xmlSetNs(reparented, ns);
    }

    xmlFree(name);
    xmlFree(prefix);
  }

  /* Avoid segv when relinking against unlinked nodes. */
  if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) { return; }

  /* Make sure that our reparented node has the correct namespaces */
  if (!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent) {
    xmlSetNs(reparented, reparented->parent->ns);
  }

  /* Search our parents for an existing definition */
  if (reparented->nsDef) {
    xmlNsPtr curr = reparented->nsDef;
    xmlNsPtr prev = NULL;

    while (curr) {
      xmlNsPtr ns = xmlSearchNsByHref(
                      reparented->doc,
                      reparented->parent,
                      curr->href
                    );
      /* If we find the namespace is already declared, remove it from this
       * definition list. */
      if (ns && ns != curr && xmlStrEqual(ns->prefix, curr->prefix)) {
        if (prev) {
          prev->next = curr->next;
        } else {
          reparented->nsDef = curr->next;
        }
        nokogiri_root_nsdef(curr, reparented->doc);
      } else {
        prev = curr;
      }
      curr = curr->next;
    }
  }

  /*
   *  Search our parents for an existing definition of current namespace,
   *  because the definition it's pointing to may have just been removed nsDef.
   *
   *  And although that would technically probably be OK, I'd feel better if we
   *  referred to a namespace that's still present in a node's nsDef somewhere
   *  in the doc.
   */
  if (reparented->ns) {
    xmlNsPtr ns = xmlSearchNs(reparented->doc, reparented, reparented->ns->prefix);
    if (ns
        && ns != reparented->ns
        && xmlStrEqual(ns->prefix, reparented->ns->prefix)
        && xmlStrEqual(ns->href, reparented->ns->href)
       ) {
      xmlSetNs(reparented, ns);
    }
  }

  /* Only walk all children if there actually is a namespace we need to */
  /* reparent. */
  if (NULL == reparented->ns) { return; }

  /* When a node gets reparented, walk it's children to make sure that */
  /* their namespaces are reparented as well. */
  child = reparented->children;
  while (NULL != child) {
    relink_namespace(child);
    child = child->next;
  }

  if (reparented->type == XML_ELEMENT_NODE) {
    child = (xmlNodePtr)((xmlElementPtr)reparented)->attributes;
    while(NULL != child) {
      relink_namespace(child);
      child = child->next;
    }
  }
}
Exemple #6
0
/* :nodoc: */
static VALUE reparent_node_with(VALUE node_obj, VALUE other_obj, node_other_func func)
{
  VALUE reparented_obj ;
  xmlNodePtr node, other, reparented ;

  if(!rb_obj_is_kind_of(node_obj, cNokogiriXmlNode))
    rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");

  Data_Get_Struct(node_obj, xmlNode, node);
  Data_Get_Struct(other_obj, xmlNode, other);

  if(XML_DOCUMENT_NODE == node->type || XML_HTML_DOCUMENT_NODE == node->type)
    rb_raise(rb_eArgError, "cannot reparent a document node");

  if(node->type == XML_TEXT_NODE) {
    NOKOGIRI_ROOT_NODE(node);
    node = xmlDocCopyNode(node, other->doc, 1);
  }

  if (node->doc == other->doc) {
    xmlUnlinkNode(node) ;

    // TODO: I really want to remove this.  We shouldn't support 2.6.16 anymore
    if ( node->type == XML_TEXT_NODE
         && other->type == XML_TEXT_NODE
         && is_2_6_16() ) {

      // we'd rather leak than segfault.
      other->content = xmlStrdup(other->content);

    }

    if(!(reparented = (*func)(other, node))) {
      rb_raise(rb_eRuntimeError, "Could not reparent node (%s:%d)", __FILE__, __LINE__);
    }
  } else {
    xmlNodePtr duped_node ;
    // recursively copy to the new document
    if (!(duped_node = xmlDocCopyNode(node, other->doc, 1))) {
      rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
    }
    if(!(reparented = (*func)(other, duped_node))) {
      rb_raise(rb_eRuntimeError, "Could not reparent node (%s:%d)", __FILE__, __LINE__);
    }
    xmlUnlinkNode(node);
    NOKOGIRI_ROOT_NODE(node);
  }

  // the child was a text node that was coalesced. we need to have the object
  // point at SOMETHING, or we'll totally bomb out.
  if (reparented != node) {
    DATA_PTR(node_obj) = reparented ;
  }

  // Appropriately link in namespaces
  relink_namespace(reparented);

  reparented_obj = Nokogiri_wrap_xml_node(Qnil, reparented);

  rb_funcall(reparented_obj, decorate_bang, 0);

  return reparented_obj ;
}