static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
{
  xmlNsPtr ns;
  static char buffer[XMLNS_BUFFER_LEN] ;
  char *key ;
  size_t keylen ;

  if (node->type != XML_ELEMENT_NODE) return ;

  ns = node->nsDef;
  while (ns != NULL) {

    keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ;
    if (keylen > XMLNS_BUFFER_LEN) {
      key = (char*)malloc(keylen) ;
    } else {
      key = buffer ;
    }

    if (ns->prefix) {
      sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix);
    } else {
      sprintf(key, "%s", XMLNS_PREFIX);
    }

    rb_hash_aset(attr_hash,
        NOKOGIRI_STR_NEW2(key),
        (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
    );
    if (key != buffer) {
      free(key);
    }
    ns = ns->next ;
  }
}
static void start_document(void * ctx)
{
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
  VALUE doc = rb_iv_get(self, "@document");

  xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx);

  if(NULL != ctxt && ctxt->html != 1) {
    if(ctxt->standalone != -1) {  /* -1 means there was no declaration */
      VALUE encoding = ctxt->encoding ?
        NOKOGIRI_STR_NEW2(ctxt->encoding) :
        Qnil;

      VALUE version = ctxt->version ?
        NOKOGIRI_STR_NEW2(ctxt->version) :
        Qnil;

      VALUE standalone = Qnil;

      switch(ctxt->standalone)
      {
        case 0:
          standalone = NOKOGIRI_STR_NEW2("no");
          break;
        case 1:
          standalone = NOKOGIRI_STR_NEW2("yes");
          break;
      }

      rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
    }
  }

  rb_funcall(doc, id_start_document, 0);
}
static void processing_instruction(void * ctx, const xmlChar * name, const xmlChar * content)
{
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
  VALUE doc = rb_iv_get(self, "@document");

  rb_funcall( doc,
              id_processing_instruction,
              2,
              NOKOGIRI_STR_NEW2(name),
              NOKOGIRI_STR_NEW2(content)
  );
}
Beispiel #4
0
void Init_nokogiri()
{
  xmlMemSetup(
      (xmlFreeFunc)ruby_xfree,
      (xmlMallocFunc)ruby_xmalloc,
      (xmlReallocFunc)ruby_xrealloc,
      strdup
  );

  mNokogiri         = rb_define_module("Nokogiri");
  mNokogiriXml      = rb_define_module_under(mNokogiri, "XML");
  mNokogiriHtml     = rb_define_module_under(mNokogiri, "HTML");
  mNokogiriXslt     = rb_define_module_under(mNokogiri, "XSLT");
  mNokogiriXmlSax   = rb_define_module_under(mNokogiriXml, "SAX");
  mNokogiriHtmlSax  = rb_define_module_under(mNokogiriHtml, "SAX");

  rb_const_set( mNokogiri,
                rb_intern("LIBXML_VERSION"),
                NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION, "UTF-8")
              );
  rb_const_set( mNokogiri,
                rb_intern("LIBXML_PARSER_VERSION"),
                NOKOGIRI_STR_NEW2(xmlParserVersion, "UTF-8")
              );

  init_xml_document();
  init_html_document();
  init_xml_node();
  init_xml_document_fragment();
  init_xml_text();
  init_xml_cdata();
  init_xml_processing_instruction();
  init_xml_attr();
  init_xml_entity_reference();
  init_xml_comment();
  init_xml_node_set();
  init_xml_xpath_context();
  init_xml_xpath();
  init_xml_sax_parser();
  init_xml_sax_push_parser();
  init_xml_reader();
  init_xml_dtd();
  init_xml_namespace();
  init_html_sax_parser();
  init_xslt_stylesheet();
  init_xml_syntax_error();
  init_html_entity_lookup();
  init_html_element_description();
  init_xml_schema();
  init_xml_relax_ng();
}
/*
 * call-seq:
 *  description
 *
 * The description for this element
 */
static VALUE description(VALUE self)
{
    htmlElemDesc * description;
    Data_Get_Struct(self, htmlElemDesc, description);

    return NOKOGIRI_STR_NEW2(description->desc);
}
VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error)
{
  VALUE msg, e, klass;

  klass = cNokogiriXmlSyntaxError;

  if (error->domain == XML_FROM_XPATH) {
    VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath"));
    klass = rb_const_get(xpath, rb_intern("SyntaxError"));
  }

  msg = (error && error->message) ? NOKOGIRI_STR_NEW2(error->message) : Qnil;

  e = rb_class_new_instance(
      1,
      &msg,
      klass
  );

  if (error)
  {
    rb_iv_set(e, "@domain", INT2NUM(error->domain));
    rb_iv_set(e, "@code", INT2NUM(error->code));
    rb_iv_set(e, "@level", INT2NUM((short)error->level));
    rb_iv_set(e, "@file", RBSTR_OR_QNIL(error->file));
    rb_iv_set(e, "@line", INT2NUM(error->line));
    rb_iv_set(e, "@str1", RBSTR_OR_QNIL(error->str1));
    rb_iv_set(e, "@str2", RBSTR_OR_QNIL(error->str2));
    rb_iv_set(e, "@str3", RBSTR_OR_QNIL(error->str3));
    rb_iv_set(e, "@int1", INT2NUM(error->int1));
    rb_iv_set(e, "@column", INT2NUM(error->int2));
  }

  return e;
}
/*
 * call-seq:
 *  default_sub_element
 *
 * The default sub element for this element
 */
static VALUE default_sub_element(VALUE self)
{
  htmlElemDesc * description;
  Data_Get_Struct(self, htmlElemDesc, description);

  return NOKOGIRI_STR_NEW2(description->defaultsubelt);
}
Beispiel #8
0
static void notation_copier(void *payload, void *data, xmlChar *name)
{
  VALUE hash = (VALUE)data;
  VALUE klass = rb_const_get(mNokogiriXml, rb_intern("Notation"));

  xmlNotationPtr c_notation = (xmlNotationPtr)payload;
  VALUE notation;
  VALUE argv[3];
  argv[0] = (c_notation->name ? NOKOGIRI_STR_NEW2(c_notation->name) : Qnil);
  argv[1] = (c_notation->PublicID ? NOKOGIRI_STR_NEW2(c_notation->PublicID) : Qnil);
  argv[2] = (c_notation->SystemID ? NOKOGIRI_STR_NEW2(c_notation->SystemID) : Qnil);

  notation = rb_class_new_instance(3, argv, klass);

  rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name),notation);
}
Beispiel #9
0
/*
 * call-seq:
 *   attribute(name)
 *
 * Get the value of attribute named +name+
 */
static VALUE reader_attribute(VALUE self, VALUE name)
{
  xmlTextReaderPtr reader;
  xmlChar *value ;
  Data_Get_Struct(self, xmlTextReader, reader);

  if(name == Qnil) return Qnil;
  name = StringValue(name) ;

  value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValuePtr(name));
  if(value == NULL) {
    /* this section is an attempt to workaround older versions of libxml that
       don't handle namespaces properly in all attribute-and-friends functions */
    xmlChar *prefix = NULL ;
    xmlChar *localname = xmlSplitQName2((xmlChar*)StringValuePtr(name), &prefix);
    if (localname != NULL) {
      value = xmlTextReaderLookupNamespace(reader, localname);
      xmlFree(localname) ;
    } else {
      value = xmlTextReaderLookupNamespace(reader, prefix);
    }
    xmlFree(prefix);
  }
  if(value == NULL) return Qnil;

  VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
  VALUE rb_value = NOKOGIRI_STR_NEW2(value,
      RTEST(enc) ? StringValuePtr(enc) : NULL);
  xmlFree(value);
  return rb_value;
}
static void comment_func(void * ctx, const xmlChar * value)
{
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
  VALUE doc = rb_iv_get(self, "@document");
  VALUE str = NOKOGIRI_STR_NEW2(value);
  rb_funcall(doc, id_comment, 1, str);
}
/*
 * call-seq:
 *  name
 *
 * Get the tag name for this ElemementDescription
 */
static VALUE name(VALUE self)
{
    htmlElemDesc * description;
    Data_Get_Struct(self, htmlElemDesc, description);

    if(NULL == description->name) return Qnil;
    return NOKOGIRI_STR_NEW2(description->name);
}
Beispiel #12
0
/*
 * call-seq:
 *  str1
 *
 * Extra string information
 */
static VALUE str1(VALUE self)
{
  xmlErrorPtr error;
  Data_Get_Struct(self, xmlError, error);
  if(error->str1)
    return NOKOGIRI_STR_NEW2(error->str1, "UTF-8");
  return Qnil;
}
Beispiel #13
0
/*
 * call-seq:
 *  encoding
 *
 * Get the encoding for this Document
 */
static VALUE encoding(VALUE self)
{
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if(!doc->encoding) return Qnil;
  return NOKOGIRI_STR_NEW2(doc->encoding);
}
Beispiel #14
0
/*
 * call-seq:
 *  version
 *
 * Get the XML version for this Document
 */
static VALUE version(VALUE self)
{
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if(!doc->version) return Qnil;
  return NOKOGIRI_STR_NEW2(doc->version);
}
Beispiel #15
0
static void element_copier(void *_payload, void *data, xmlChar *name)
{
  VALUE hash = (VALUE)data;
  xmlNodePtr payload = (xmlNodePtr)_payload;

  VALUE element = Nokogiri_wrap_xml_node(Qnil, payload);

  rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name), element);
}
Beispiel #16
0
/*
 * call-seq:
 *   external_id
 *
 * Get the External ID for this DTD
 */
static VALUE external_id(VALUE self)
{
  xmlDtdPtr dtd;
  Data_Get_Struct(self, xmlDtd, dtd);

  if(!dtd->ExternalID) return Qnil;

  return NOKOGIRI_STR_NEW2(dtd->ExternalID);
}
Beispiel #17
0
/*
 * call-seq:
 *  url
 *
 * Get the url name for this document.
 */
static VALUE url(VALUE self)
{
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);

  return Qnil;
}
Beispiel #18
0
/*
 * call-seq:
 *   xml_version
 *
 * Get the XML version of the document being read
 */
static VALUE xml_version(VALUE self)
{
  xmlTextReaderPtr reader;
  Data_Get_Struct(self, xmlTextReader, reader);
  const char * version = (const char *)xmlTextReaderConstXmlVersion(reader);
  if(version == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(version, "UTF-8");
}
Beispiel #19
0
/*
 * call-seq:
 *   system_id
 *
 * Get the System ID for this DTD
 */
static VALUE system_id(VALUE self)
{
  xmlDtdPtr dtd;
  Data_Get_Struct(self, xmlDtd, dtd);

  if(!dtd->SystemID) return Qnil;

  return NOKOGIRI_STR_NEW2(dtd->SystemID);
}
Beispiel #20
0
/*
 * call-seq:
 *   namespace_uri
 *
 * Get the URI defining the namespace associated with the node
 */
static VALUE namespace_uri(VALUE self)
{
  xmlTextReaderPtr reader;
  const char *uri;

  Data_Get_Struct(self, xmlTextReader, reader);
  uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
  if(uri == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(uri);
}
Beispiel #21
0
/*
 * call-seq:
 * base_uri
 *
 * Get the xml:base of the node
 */
static VALUE base_uri(VALUE self)
{
  xmlTextReaderPtr reader;
  const char * base_uri;

  Data_Get_Struct(self, xmlTextReader, reader);
  base_uri = (const char *)xmlTextReaderBaseUri(reader);
  if (base_uri == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(base_uri);
}
Beispiel #22
0
/*
 * call-seq:
 *   name
 *
 * Get the name of the node. Returns a utf-8 encoded string.
 */
static VALUE name(VALUE self)
{
  xmlTextReaderPtr reader;
  const char *name;

  Data_Get_Struct(self, xmlTextReader, reader);
  name = (const char *)xmlTextReaderConstName(reader);
  if(name == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(name);
}
Beispiel #23
0
/*
 * call-seq:
 *   value
 *
 * Get the text value of the node if present. Returns a utf-8 encoded string.
 */
static VALUE value(VALUE self)
{
  xmlTextReaderPtr reader;
  const char *value;

  Data_Get_Struct(self, xmlTextReader, reader);
  value = (const char *)xmlTextReaderConstValue(reader);
  if(value == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(value);
}
Beispiel #24
0
/*
 * call-seq:
 *   name
 *
 * Get the name of the node
 */
static VALUE name(VALUE self)
{
  xmlTextReaderPtr reader;
  Data_Get_Struct(self, xmlTextReader, reader);
  const char * name = (const char *)xmlTextReaderConstName(reader);
  if(name == NULL) return Qnil;

  VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
  return NOKOGIRI_STR_NEW2(name,
      RTEST(enc) ? StringValuePtr(enc) : NULL);
}
Beispiel #25
0
/*
 * call-seq:
 *   prefix
 *
 * Get the shorthand reference to the namespace associated with the node.
 */
static VALUE prefix(VALUE self)
{
  xmlTextReaderPtr reader;
  const char *prefix;

  Data_Get_Struct(self, xmlTextReader, reader);
  prefix = (const char *)xmlTextReaderConstPrefix(reader);
  if(prefix == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(prefix);
}
Beispiel #26
0
/*
 * call-seq:
 *   lang
 *
 * Get the xml:lang scope within which the node resides.
 */
static VALUE lang(VALUE self)
{
  xmlTextReaderPtr reader;
  const char *lang;

  Data_Get_Struct(self, xmlTextReader, reader);
  lang = (const char *)xmlTextReaderConstXmlLang(reader);
  if(lang == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(lang);
}
Beispiel #27
0
/*
 * call-seq:
 *  href
 *
 * Get the href for this namespace
 */
static VALUE href(VALUE self)
{
  xmlNsPtr ns;
  xmlDocPtr doc;

  Data_Get_Struct(self, xmlNs, ns);
  if(!ns->href) return Qnil;

  Data_Get_Struct(rb_iv_get(self, "@document"), xmlDoc, doc);

  return NOKOGIRI_STR_NEW2(ns->href);
}
Beispiel #28
0
static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
{
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
  VALUE doc = rb_iv_get(self, "@document");
  VALUE attributes = rb_ary_new();
  const xmlChar * attr;
  int i = 0;
  if(atts) {
    while((attr = atts[i]) != NULL) {
      rb_ary_push(attributes, NOKOGIRI_STR_NEW2(attr));
      i++;
    }
  }

  rb_funcall( doc,
              id_start_element,
              2,
              NOKOGIRI_STR_NEW2(name),
              attributes
  );
}
Beispiel #29
0
/*
 * call-seq:
 *  encode_special_chars(string)
 *
 * Encode any special characters in +string+
 */
static VALUE encode_special_chars(VALUE self, VALUE string)
{
  xmlNodePtr node;
  Data_Get_Struct(self, xmlNode, node);
  xmlChar * encoded = xmlEncodeSpecialChars(
      node->doc,
      (const xmlChar *)StringValuePtr(string)
  );

  VALUE encoded_str = NOKOGIRI_STR_NEW2(encoded);
  xmlFree(encoded);

  return encoded_str;
}
Beispiel #30
0
static void error_func(void * ctx, const char *msg, ...)
{
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
  VALUE doc = rb_iv_get(self, "@document");
  char * message;

  va_list args;
  va_start(args, msg);
  vasprintf(&message, msg, args);
  va_end(args);

  rb_funcall(doc, id_error, 1, NOKOGIRI_STR_NEW2(message));
  free(message);
}