static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash) { xmlNsPtr ns; static char buffer[XMLNS_BUFFER_LEN] ; char *key ; size_t keylen ; if (node->type != XML_ELEMENT_NODE) return ; ns = node->nsDef; while (ns != NULL) { keylen = XMLNS_PREFIX_LEN + (ns->prefix ? (strlen((const char*)ns->prefix) + 1) : 0) ; if (keylen > XMLNS_BUFFER_LEN) { key = (char*)malloc(keylen) ; } else { key = buffer ; } if (ns->prefix) { sprintf(key, "%s:%s", XMLNS_PREFIX, ns->prefix); } else { sprintf(key, "%s", XMLNS_PREFIX); } rb_hash_aset(attr_hash, NOKOGIRI_STR_NEW2(key), (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil) ); if (key != buffer) { free(key); } ns = ns->next ; } }
static void start_document(void * ctx) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx); if(NULL != ctxt && ctxt->html != 1) { if(ctxt->standalone != -1) { /* -1 means there was no declaration */ VALUE encoding = ctxt->encoding ? NOKOGIRI_STR_NEW2(ctxt->encoding) : Qnil; VALUE version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil; VALUE standalone = Qnil; switch(ctxt->standalone) { case 0: standalone = NOKOGIRI_STR_NEW2("no"); break; case 1: standalone = NOKOGIRI_STR_NEW2("yes"); break; } rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone); } } rb_funcall(doc, id_start_document, 0); }
static void processing_instruction(void * ctx, const xmlChar * name, const xmlChar * content) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); rb_funcall( doc, id_processing_instruction, 2, NOKOGIRI_STR_NEW2(name), NOKOGIRI_STR_NEW2(content) ); }
void Init_nokogiri() { xmlMemSetup( (xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, strdup ); mNokogiri = rb_define_module("Nokogiri"); mNokogiriXml = rb_define_module_under(mNokogiri, "XML"); mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML"); mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT"); mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX"); mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX"); rb_const_set( mNokogiri, rb_intern("LIBXML_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION, "UTF-8") ); rb_const_set( mNokogiri, rb_intern("LIBXML_PARSER_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion, "UTF-8") ); init_xml_document(); init_html_document(); init_xml_node(); init_xml_document_fragment(); init_xml_text(); init_xml_cdata(); init_xml_processing_instruction(); init_xml_attr(); init_xml_entity_reference(); init_xml_comment(); init_xml_node_set(); init_xml_xpath_context(); init_xml_xpath(); init_xml_sax_parser(); init_xml_sax_push_parser(); init_xml_reader(); init_xml_dtd(); init_xml_namespace(); init_html_sax_parser(); init_xslt_stylesheet(); init_xml_syntax_error(); init_html_entity_lookup(); init_html_element_description(); init_xml_schema(); init_xml_relax_ng(); }
/* * call-seq: * description * * The description for this element */ static VALUE description(VALUE self) { htmlElemDesc * description; Data_Get_Struct(self, htmlElemDesc, description); return NOKOGIRI_STR_NEW2(description->desc); }
VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error) { VALUE msg, e, klass; klass = cNokogiriXmlSyntaxError; if (error->domain == XML_FROM_XPATH) { VALUE xpath = rb_const_get(mNokogiriXml, rb_intern("XPath")); klass = rb_const_get(xpath, rb_intern("SyntaxError")); } msg = (error && error->message) ? NOKOGIRI_STR_NEW2(error->message) : Qnil; e = rb_class_new_instance( 1, &msg, klass ); if (error) { rb_iv_set(e, "@domain", INT2NUM(error->domain)); rb_iv_set(e, "@code", INT2NUM(error->code)); rb_iv_set(e, "@level", INT2NUM((short)error->level)); rb_iv_set(e, "@file", RBSTR_OR_QNIL(error->file)); rb_iv_set(e, "@line", INT2NUM(error->line)); rb_iv_set(e, "@str1", RBSTR_OR_QNIL(error->str1)); rb_iv_set(e, "@str2", RBSTR_OR_QNIL(error->str2)); rb_iv_set(e, "@str3", RBSTR_OR_QNIL(error->str3)); rb_iv_set(e, "@int1", INT2NUM(error->int1)); rb_iv_set(e, "@column", INT2NUM(error->int2)); } return e; }
/* * call-seq: * default_sub_element * * The default sub element for this element */ static VALUE default_sub_element(VALUE self) { htmlElemDesc * description; Data_Get_Struct(self, htmlElemDesc, description); return NOKOGIRI_STR_NEW2(description->defaultsubelt); }
static void notation_copier(void *payload, void *data, xmlChar *name) { VALUE hash = (VALUE)data; VALUE klass = rb_const_get(mNokogiriXml, rb_intern("Notation")); xmlNotationPtr c_notation = (xmlNotationPtr)payload; VALUE notation; VALUE argv[3]; argv[0] = (c_notation->name ? NOKOGIRI_STR_NEW2(c_notation->name) : Qnil); argv[1] = (c_notation->PublicID ? NOKOGIRI_STR_NEW2(c_notation->PublicID) : Qnil); argv[2] = (c_notation->SystemID ? NOKOGIRI_STR_NEW2(c_notation->SystemID) : Qnil); notation = rb_class_new_instance(3, argv, klass); rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name),notation); }
/* * call-seq: * attribute(name) * * Get the value of attribute named +name+ */ static VALUE reader_attribute(VALUE self, VALUE name) { xmlTextReaderPtr reader; xmlChar *value ; Data_Get_Struct(self, xmlTextReader, reader); if(name == Qnil) return Qnil; name = StringValue(name) ; value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValuePtr(name)); if(value == NULL) { /* this section is an attempt to workaround older versions of libxml that don't handle namespaces properly in all attribute-and-friends functions */ xmlChar *prefix = NULL ; xmlChar *localname = xmlSplitQName2((xmlChar*)StringValuePtr(name), &prefix); if (localname != NULL) { value = xmlTextReaderLookupNamespace(reader, localname); xmlFree(localname) ; } else { value = xmlTextReaderLookupNamespace(reader, prefix); } xmlFree(prefix); } if(value == NULL) return Qnil; VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding"); VALUE rb_value = NOKOGIRI_STR_NEW2(value, RTEST(enc) ? StringValuePtr(enc) : NULL); xmlFree(value); return rb_value; }
static void comment_func(void * ctx, const xmlChar * value) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); VALUE str = NOKOGIRI_STR_NEW2(value); rb_funcall(doc, id_comment, 1, str); }
/* * call-seq: * name * * Get the tag name for this ElemementDescription */ static VALUE name(VALUE self) { htmlElemDesc * description; Data_Get_Struct(self, htmlElemDesc, description); if(NULL == description->name) return Qnil; return NOKOGIRI_STR_NEW2(description->name); }
/* * call-seq: * str1 * * Extra string information */ static VALUE str1(VALUE self) { xmlErrorPtr error; Data_Get_Struct(self, xmlError, error); if(error->str1) return NOKOGIRI_STR_NEW2(error->str1, "UTF-8"); return Qnil; }
/* * call-seq: * encoding * * Get the encoding for this Document */ static VALUE encoding(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); if(!doc->encoding) return Qnil; return NOKOGIRI_STR_NEW2(doc->encoding); }
/* * call-seq: * version * * Get the XML version for this Document */ static VALUE version(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); if(!doc->version) return Qnil; return NOKOGIRI_STR_NEW2(doc->version); }
static void element_copier(void *_payload, void *data, xmlChar *name) { VALUE hash = (VALUE)data; xmlNodePtr payload = (xmlNodePtr)_payload; VALUE element = Nokogiri_wrap_xml_node(Qnil, payload); rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name), element); }
/* * call-seq: * external_id * * Get the External ID for this DTD */ static VALUE external_id(VALUE self) { xmlDtdPtr dtd; Data_Get_Struct(self, xmlDtd, dtd); if(!dtd->ExternalID) return Qnil; return NOKOGIRI_STR_NEW2(dtd->ExternalID); }
/* * call-seq: * url * * Get the url name for this document. */ static VALUE url(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL); return Qnil; }
/* * call-seq: * xml_version * * Get the XML version of the document being read */ static VALUE xml_version(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); const char * version = (const char *)xmlTextReaderConstXmlVersion(reader); if(version == NULL) return Qnil; return NOKOGIRI_STR_NEW2(version, "UTF-8"); }
/* * call-seq: * system_id * * Get the System ID for this DTD */ static VALUE system_id(VALUE self) { xmlDtdPtr dtd; Data_Get_Struct(self, xmlDtd, dtd); if(!dtd->SystemID) return Qnil; return NOKOGIRI_STR_NEW2(dtd->SystemID); }
/* * call-seq: * namespace_uri * * Get the URI defining the namespace associated with the node */ static VALUE namespace_uri(VALUE self) { xmlTextReaderPtr reader; const char *uri; Data_Get_Struct(self, xmlTextReader, reader); uri = (const char *)xmlTextReaderConstNamespaceUri(reader); if(uri == NULL) return Qnil; return NOKOGIRI_STR_NEW2(uri); }
/* * call-seq: * base_uri * * Get the xml:base of the node */ static VALUE base_uri(VALUE self) { xmlTextReaderPtr reader; const char * base_uri; Data_Get_Struct(self, xmlTextReader, reader); base_uri = (const char *)xmlTextReaderBaseUri(reader); if (base_uri == NULL) return Qnil; return NOKOGIRI_STR_NEW2(base_uri); }
/* * call-seq: * name * * Get the name of the node. Returns a utf-8 encoded string. */ static VALUE name(VALUE self) { xmlTextReaderPtr reader; const char *name; Data_Get_Struct(self, xmlTextReader, reader); name = (const char *)xmlTextReaderConstName(reader); if(name == NULL) return Qnil; return NOKOGIRI_STR_NEW2(name); }
/* * call-seq: * value * * Get the text value of the node if present. Returns a utf-8 encoded string. */ static VALUE value(VALUE self) { xmlTextReaderPtr reader; const char *value; Data_Get_Struct(self, xmlTextReader, reader); value = (const char *)xmlTextReaderConstValue(reader); if(value == NULL) return Qnil; return NOKOGIRI_STR_NEW2(value); }
/* * call-seq: * name * * Get the name of the node */ static VALUE name(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); const char * name = (const char *)xmlTextReaderConstName(reader); if(name == NULL) return Qnil; VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding"); return NOKOGIRI_STR_NEW2(name, RTEST(enc) ? StringValuePtr(enc) : NULL); }
/* * call-seq: * prefix * * Get the shorthand reference to the namespace associated with the node. */ static VALUE prefix(VALUE self) { xmlTextReaderPtr reader; const char *prefix; Data_Get_Struct(self, xmlTextReader, reader); prefix = (const char *)xmlTextReaderConstPrefix(reader); if(prefix == NULL) return Qnil; return NOKOGIRI_STR_NEW2(prefix); }
/* * call-seq: * lang * * Get the xml:lang scope within which the node resides. */ static VALUE lang(VALUE self) { xmlTextReaderPtr reader; const char *lang; Data_Get_Struct(self, xmlTextReader, reader); lang = (const char *)xmlTextReaderConstXmlLang(reader); if(lang == NULL) return Qnil; return NOKOGIRI_STR_NEW2(lang); }
/* * call-seq: * href * * Get the href for this namespace */ static VALUE href(VALUE self) { xmlNsPtr ns; xmlDocPtr doc; Data_Get_Struct(self, xmlNs, ns); if(!ns->href) return Qnil; Data_Get_Struct(rb_iv_get(self, "@document"), xmlDoc, doc); return NOKOGIRI_STR_NEW2(ns->href); }
static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); VALUE attributes = rb_ary_new(); const xmlChar * attr; int i = 0; if(atts) { while((attr = atts[i]) != NULL) { rb_ary_push(attributes, NOKOGIRI_STR_NEW2(attr)); i++; } } rb_funcall( doc, id_start_element, 2, NOKOGIRI_STR_NEW2(name), attributes ); }
/* * call-seq: * encode_special_chars(string) * * Encode any special characters in +string+ */ static VALUE encode_special_chars(VALUE self, VALUE string) { xmlNodePtr node; Data_Get_Struct(self, xmlNode, node); xmlChar * encoded = xmlEncodeSpecialChars( node->doc, (const xmlChar *)StringValuePtr(string) ); VALUE encoded_str = NOKOGIRI_STR_NEW2(encoded); xmlFree(encoded); return encoded_str; }
static void error_func(void * ctx, const char *msg, ...) { VALUE self = NOKOGIRI_SAX_SELF(ctx); VALUE doc = rb_iv_get(self, "@document"); char * message; va_list args; va_start(args, msg); vasprintf(&message, msg, args); va_end(args); rb_funcall(doc, id_error, 1, NOKOGIRI_STR_NEW2(message)); free(message); }