Esempio n. 1
0
/*
 * call-seq:
 *  new
 *
 * Create a new document
 */
static VALUE new(int argc, VALUE *argv, VALUE klass)
{
  VALUE uri, external_id, rest, rb_doc;

  rb_scan_args(argc, argv, "0*", &rest);
  uri         = rb_ary_entry(rest, (long)0);
  external_id = rb_ary_entry(rest, (long)1);

  htmlDocPtr doc = htmlNewDoc(
      RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL,
      RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL
  );
  rb_doc = Nokogiri_wrap_xml_document(klass, doc);
  rb_obj_call_init(rb_doc, argc, argv);
  return rb_doc ;
}

/*
 * call-seq:
 *  read_io(io, url, encoding, options)
 *
 * Read the HTML document from +io+ with given +url+, +encoding+,
 * and +options+.  See Nokogiri::HTML.parse
 */
static VALUE read_io( VALUE klass,
                      VALUE io,
Esempio n. 2
0
/**
 * Perform a save to a specified file in the form of a html page
 *
 * \param filename	the file to save to
 * \param page_title 	title of the page
 */
bool tree_urlfile_save(struct tree *tree, const char *filename,
		       const char *page_title)
{
	int res;
	xmlDoc *doc;
	xmlNode *html, *head, *title, *body;

	/* Unfortunately the Browse Hotlist format is invalid HTML,
	 * so this is a lie. 
	 */
	doc = htmlNewDoc(
		(const xmlChar *) "http://www.w3.org/TR/html4/strict.dtd",
		(const xmlChar *) "-//W3C//DTD HTML 4.01//EN");
	if (doc == NULL) {
		warn_user("NoMemory", 0);
		return false;
	}

	html = xmlNewNode(NULL, (const xmlChar *) "html");
	if (html == NULL) {
		warn_user("NoMemory", 0);
		xmlFreeDoc(doc);
		return false;
	}
	xmlDocSetRootElement(doc, html);

	head = xmlNewChild(html, NULL, (const xmlChar *) "head", NULL);
	if (head == NULL) {
		warn_user("NoMemory", 0);
		xmlFreeDoc(doc);
		return false;
	}

	title  = xmlNewTextChild(head, NULL, (const xmlChar *) "title",
				 (const xmlChar *) page_title);
	if (title == NULL) {
		warn_user("NoMemory", 0);
		xmlFreeDoc(doc);
		return false;
	}

	body = xmlNewChild(html, NULL, (const xmlChar *) "body", NULL);
	if (body == NULL) {
		warn_user("NoMemory", 0);
		xmlFreeDoc(doc);
		return false;
	}

	if (!tree_url_save_directory(tree_get_root(tree), body)) {
 		warn_user("NoMemory", 0);
 		xmlFreeDoc(doc);
 		return false;
 	}

	doc->charset = XML_CHAR_ENCODING_UTF8;
	res = htmlSaveFileEnc(filename, doc, "iso-8859-1");
	if (res == -1) {
		warn_user("HotlistSaveError", 0);
		xmlFreeDoc(doc);
		return false;
	}

	xmlFreeDoc(doc);
	return true;
}
Esempio n. 3
0
void test_html()
{
    htmlDocPtr doc;
    doc = htmlNewDoc( "", "" );
    xmlNodePtr root_node = xmlNewNode(NULL,BAD_CAST"ap");
    //设置根节点
    xmlDocSetRootElement(doc,root_node);
// cur = xmlDocGetRootElement(doc); //获取文档根结点
    //在根节点中直接创建节点
    xmlNewTextChild(root_node, NULL, BAD_CAST "newNode1", BAD_CAST "newNode1 content");
    xmlNewTextChild(root_node, NULL, BAD_CAST "newNode2", BAD_CAST "newNode2 content");
    xmlNewTextChild(root_node, NULL, BAD_CAST "newNode3", BAD_CAST "newNode3 content");
    /*
    xmlChar *key;
    key = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); //获取文本结点的文本,需用其子结点
    xmlFree(key);
    */

    //创建一个绑定在根节点的子节点
    xmlNewChild(root_node, NULL, BAD_CAST "node1",BAD_CAST "content of node1");
    /*
    xmlNodeSetContent(curNode, (xmlChar *) "content changed");//设置结点的文本内容
    //得到一个节点的内容:
    //xmlChar *value = xmlNodeGetContent(node);
    //返回值value应该使用xmlFree(value)释放内存

    xmlUnlinkNode(curNode); //将当前结点从文档中断链(unlink),这样本文档就不会再包含这个子结点
    xmlFreeNode(curNode); //手动删除断链结点的内存, 若没有xmlDelNode或者xmlRemoveNode,使用此函数

    xmlChar *uri;
    uri = xmlGetProp(cur, "uri"); //获取属性值
    xmlFree(uri); //释放内存

    xmlSetProp(curNode,BAD_(xmlChar *)"attribute", (xmlChar *) "no"); //设置当前结点的attribute属性的属性值为no
    */
    //创建一个节点,设置其内容和属性,然后加入根结点
    xmlNodePtr node = xmlNewNode(NULL,BAD_CAST"node2");
    xmlNodePtr content = xmlNewText(BAD_CAST"NODE CONTENT");
    xmlAddChild(root_node,node);
    xmlAddChild(node,content);
    xmlNewProp(node,BAD_CAST"attribute",BAD_CAST "yes");
    //通过xmlNewProp()增加一个节点的属性
    node=xmlNewChild(root_node, NULL, BAD_CAST "node3", BAD_CAST"node has attributes");
    xmlNewProp(node, BAD_CAST "attribute", BAD_CAST "no");
    //创建一个儿子和孙子节点
    node = xmlNewNode(NULL, BAD_CAST "son");
    xmlAddChild(root_node,node);
    xmlNodePtr grandson = xmlNewNode(NULL, BAD_CAST "grandson");
    xmlAddChild(node,grandson);
    xmlAddChild(grandson, xmlNewText(BAD_CAST "This is a grandson node"));
    //存储xml文档
    int nRel = xmlSaveFile("CreatedXml.xml",doc);
    if (nRel != -1)
    {
       printf("%s\n",d_ConvertCharset("GBK", "utf-8", "一个xml文档被创建\n"));
    }

    //保存文件
    /*
     * xmlSaveFormatFile (docname, doc, 1); 保存文件到磁盘,第一个参数是写入文件的名,第二个参数是一个xmlDoc结构指针,第三个参数设定为1,保证在输出上写入缩进。
     */
    xmlSaveFormatFileEnc( "-", doc, "UTF-8", 1);
    doc=htmlReadFile("ap", NULL, -1);
    xmlSaveFormatFileEnc( "-", doc, "UTF-8", 1);

}