Example #1
0
/*
 * call-seq:
 *  read_io(io, url, encoding, options)
 *
 * Create a new document from an IO object
 */
static VALUE read_io( VALUE klass,
                      VALUE io,
                      VALUE url,
                      VALUE encoding,
                      VALUE options )
{
  const char * c_url    = (url == Qnil) ? NULL : StringValuePtr(url);
  const char * c_enc    = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
  VALUE error_list      = rb_ary_new();

  xmlInitParser();
  xmlResetLastError();
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);

  xmlDocPtr doc = xmlReadIO(
      (xmlInputReadCallback)io_read_callback,
      (xmlInputCloseCallback)io_close_callback,
      (void *)io,
      c_url,
      c_enc,
      NUM2INT(options)
  );
  xmlSetStructuredErrorFunc(NULL, NULL);

  if(doc == NULL) {
    xmlFreeDoc(doc);

    xmlErrorPtr error = xmlGetLastError();
    if(error)
      rb_funcall(rb_mKernel, rb_intern("raise"), 1,
          Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
      );
    else
      rb_raise(rb_eRuntimeError, "Could not parse document");

    return Qnil;
  }

  VALUE document = Nokogiri_wrap_xml_document(klass, doc);
  rb_funcall(document, rb_intern("errors="), 1, error_list);
  return document;
}
/*
 *  call-seq:
 *    transform(document, params = [])
 *
 *  Apply an XSLT stylesheet to an XML::Document.
 *  +params+ is an array of strings used as XSLT parameters.
 *  returns Nokogiri::XML::Document
 *
 *  Example:
 * 
 *    doc   = Nokogiri::XML(File.read(ARGV[0]))
 *    xslt  = Nokogiri::XSLT(File.read(ARGV[1]))
 *    puts xslt.transform(doc, ['key', 'value'])
 *
 */
static VALUE transform(int argc, VALUE* argv, VALUE self)
{
    VALUE xmldoc, paramobj ;
    xmlDocPtr xml ;
    xmlDocPtr result ;
    nokogiriXsltStylesheetTuple *wrapper;
    const char** params ;
    long param_len, j ;

    rb_scan_args(argc, argv, "11", &xmldoc, &paramobj);
    if (NIL_P(paramobj)) { paramobj = rb_ary_new2(0L) ; }
    if (!rb_obj_is_kind_of(xmldoc, cNokogiriXmlDocument))
      rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::Document");

    /* handle hashes as arguments. */
    if(T_HASH == TYPE(paramobj)) {
      paramobj = rb_funcall(paramobj, rb_intern("to_a"), 0);
      paramobj = rb_funcall(paramobj, rb_intern("flatten"), 0);
    }

    Check_Type(paramobj, T_ARRAY);

    Data_Get_Struct(xmldoc, xmlDoc, xml);
    Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper);

    param_len = RARRAY_LEN(paramobj);
    params = calloc((size_t)param_len+1, sizeof(char*));
    for (j = 0 ; j < param_len ; j++) {
      VALUE entry = rb_ary_entry(paramobj, j);
      const char * ptr = StringValuePtr(entry);
      params[j] = ptr;
    }
    params[param_len] = 0 ;

    result = xsltApplyStylesheet(wrapper->ss, xml, params);
    free(params);

    if (!result) rb_raise(rb_eRuntimeError, "could not perform xslt transform on document");

    return Nokogiri_wrap_xml_document((VALUE)0, result) ;
}
Example #3
0
/*
 * call-seq:
 *  read_memory(string, url, encoding, options)
 *
 * Create a new document from a String
 */
static VALUE read_memory( VALUE klass,
                          VALUE string,
                          VALUE url,
                          VALUE encoding,
                          VALUE options )
{
    const char * c_buffer = StringValuePtr(string);
    const char * c_url    = (url == Qnil) ? NULL : StringValuePtr(url);
    const char * c_enc    = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
    int len               = NUM2INT(rb_funcall(string, rb_intern("length"), 0));

    xmlInitParser();
    xmlDocPtr doc = xmlReadMemory(c_buffer, len, c_url, c_enc, NUM2INT(options));

    if(doc == NULL) {
        xmlFreeDoc(doc);
        rb_raise(rb_eRuntimeError, "Couldn't create a document");
        return Qnil;
    }

    return Nokogiri_wrap_xml_document(klass, doc);
}
Example #4
0
/*
 * call-seq:
 *  read_memory(string, url, encoding, options)
 *
 * Create a new document from a String
 */
static VALUE read_memory( VALUE klass,
                          VALUE string,
                          VALUE url,
                          VALUE encoding,
                          VALUE options )
{
  const char * c_buffer = StringValuePtr(string);
  const char * c_url    = NIL_P(url)      ? NULL : StringValuePtr(url);
  const char * c_enc    = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
  int len               = (int)RSTRING_LEN(string);
  VALUE error_list      = rb_ary_new();
  VALUE document;
  xmlDocPtr doc;

  xmlResetLastError();
  Nokogiri_install_error_catcher(error_list);
  doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
  Nokogiri_remove_error_catcher();

  if(doc == NULL) {
    xmlErrorPtr error;

    xmlFreeDoc(doc);

    error = xmlGetLastError();
    if(error)
      rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
    else
      rb_raise(rb_eRuntimeError, "Could not parse document");

    return Qnil;
  }

  document = Nokogiri_wrap_xml_document(klass, doc);
  rb_iv_set(document, "@errors", error_list);
  return document;
}
/*
 * call-seq:
 *  dup
 *
 * Copy this Document.  An optional depth may be passed in, but it defaults
 * to a deep copy.  0 is a shallow copy, 1 is a deep copy.
 */
static VALUE duplicate_document(int argc, VALUE *argv, VALUE self)
{
  xmlDocPtr doc, dup;
  VALUE copy;
  VALUE level;
  VALUE error_list      = rb_ary_new();

  if(rb_scan_args(argc, argv, "01", &level) == 0)
    level = INT2NUM((long)1);

  Data_Get_Struct(self, xmlDoc, doc);

  xmlResetLastError();
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
  dup = xmlCopyDoc(doc, (int)NUM2INT(level));
  xmlSetStructuredErrorFunc(NULL, NULL);

  if(dup == NULL) return Qnil;

  dup->type = doc->type;
  copy = Nokogiri_wrap_xml_document(rb_obj_class(self), dup);
  rb_iv_set(copy, "@errors", error_list);
  return copy ;
}
Example #6
0
 *
 * Create a new document
 */
static VALUE new(int argc, VALUE *argv, VALUE klass)
{
  VALUE uri, external_id, rest, rb_doc;

  rb_scan_args(argc, argv, "0*", &rest);
  uri         = rb_ary_entry(rest, (long)0);
  external_id = rb_ary_entry(rest, (long)1);

  htmlDocPtr doc = htmlNewDoc(
      RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL,
      RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL
  );
  rb_doc = Nokogiri_wrap_xml_document(klass, doc);
  rb_obj_call_init(rb_doc, argc, argv);
  return rb_doc ;
}

/*
 * call-seq:
 *  read_io(io, url, encoding, options)
 *
 * Read the HTML document from +io+ with given +url+, +encoding+,
 * and +options+.  See Nokogiri::HTML.parse
 */
static VALUE read_io( VALUE klass,
                      VALUE io,
                      VALUE url,
                      VALUE encoding,