/* * call-seq: * new * * Create a new document */ static VALUE new(int argc, VALUE *argv, VALUE klass) { VALUE uri, external_id, rest, rb_doc; rb_scan_args(argc, argv, "0*", &rest); uri = rb_ary_entry(rest, (long)0); external_id = rb_ary_entry(rest, (long)1); htmlDocPtr doc = htmlNewDoc( RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL, RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL ); rb_doc = Nokogiri_wrap_xml_document(klass, doc); rb_obj_call_init(rb_doc, argc, argv); return rb_doc ; } /* * call-seq: * read_io(io, url, encoding, options) * * Read the HTML document from +io+ with given +url+, +encoding+, * and +options+. See Nokogiri::HTML.parse */ static VALUE read_io( VALUE klass, VALUE io,
/** * Perform a save to a specified file in the form of a html page * * \param filename the file to save to * \param page_title title of the page */ bool tree_urlfile_save(struct tree *tree, const char *filename, const char *page_title) { int res; xmlDoc *doc; xmlNode *html, *head, *title, *body; /* Unfortunately the Browse Hotlist format is invalid HTML, * so this is a lie. */ doc = htmlNewDoc( (const xmlChar *) "http://www.w3.org/TR/html4/strict.dtd", (const xmlChar *) "-//W3C//DTD HTML 4.01//EN"); if (doc == NULL) { warn_user("NoMemory", 0); return false; } html = xmlNewNode(NULL, (const xmlChar *) "html"); if (html == NULL) { warn_user("NoMemory", 0); xmlFreeDoc(doc); return false; } xmlDocSetRootElement(doc, html); head = xmlNewChild(html, NULL, (const xmlChar *) "head", NULL); if (head == NULL) { warn_user("NoMemory", 0); xmlFreeDoc(doc); return false; } title = xmlNewTextChild(head, NULL, (const xmlChar *) "title", (const xmlChar *) page_title); if (title == NULL) { warn_user("NoMemory", 0); xmlFreeDoc(doc); return false; } body = xmlNewChild(html, NULL, (const xmlChar *) "body", NULL); if (body == NULL) { warn_user("NoMemory", 0); xmlFreeDoc(doc); return false; } if (!tree_url_save_directory(tree_get_root(tree), body)) { warn_user("NoMemory", 0); xmlFreeDoc(doc); return false; } doc->charset = XML_CHAR_ENCODING_UTF8; res = htmlSaveFileEnc(filename, doc, "iso-8859-1"); if (res == -1) { warn_user("HotlistSaveError", 0); xmlFreeDoc(doc); return false; } xmlFreeDoc(doc); return true; }
void test_html() { htmlDocPtr doc; doc = htmlNewDoc( "", "" ); xmlNodePtr root_node = xmlNewNode(NULL,BAD_CAST"ap"); //设置根节点 xmlDocSetRootElement(doc,root_node); // cur = xmlDocGetRootElement(doc); //获取文档根结点 //在根节点中直接创建节点 xmlNewTextChild(root_node, NULL, BAD_CAST "newNode1", BAD_CAST "newNode1 content"); xmlNewTextChild(root_node, NULL, BAD_CAST "newNode2", BAD_CAST "newNode2 content"); xmlNewTextChild(root_node, NULL, BAD_CAST "newNode3", BAD_CAST "newNode3 content"); /* xmlChar *key; key = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); //获取文本结点的文本,需用其子结点 xmlFree(key); */ //创建一个绑定在根节点的子节点 xmlNewChild(root_node, NULL, BAD_CAST "node1",BAD_CAST "content of node1"); /* xmlNodeSetContent(curNode, (xmlChar *) "content changed");//设置结点的文本内容 //得到一个节点的内容: //xmlChar *value = xmlNodeGetContent(node); //返回值value应该使用xmlFree(value)释放内存 xmlUnlinkNode(curNode); //将当前结点从文档中断链(unlink),这样本文档就不会再包含这个子结点 xmlFreeNode(curNode); //手动删除断链结点的内存, 若没有xmlDelNode或者xmlRemoveNode,使用此函数 xmlChar *uri; uri = xmlGetProp(cur, "uri"); //获取属性值 xmlFree(uri); //释放内存 xmlSetProp(curNode,BAD_(xmlChar *)"attribute", (xmlChar *) "no"); //设置当前结点的attribute属性的属性值为no */ //创建一个节点,设置其内容和属性,然后加入根结点 xmlNodePtr node = xmlNewNode(NULL,BAD_CAST"node2"); xmlNodePtr content = xmlNewText(BAD_CAST"NODE CONTENT"); xmlAddChild(root_node,node); xmlAddChild(node,content); xmlNewProp(node,BAD_CAST"attribute",BAD_CAST "yes"); //通过xmlNewProp()增加一个节点的属性 node=xmlNewChild(root_node, NULL, BAD_CAST "node3", BAD_CAST"node has attributes"); xmlNewProp(node, BAD_CAST "attribute", BAD_CAST "no"); //创建一个儿子和孙子节点 node = xmlNewNode(NULL, BAD_CAST "son"); xmlAddChild(root_node,node); xmlNodePtr grandson = xmlNewNode(NULL, BAD_CAST "grandson"); xmlAddChild(node,grandson); xmlAddChild(grandson, xmlNewText(BAD_CAST "This is a grandson node")); //存储xml文档 int nRel = xmlSaveFile("CreatedXml.xml",doc); if (nRel != -1) { printf("%s\n",d_ConvertCharset("GBK", "utf-8", "一个xml文档被创建\n")); } //保存文件 /* * xmlSaveFormatFile (docname, doc, 1); 保存文件到磁盘,第一个参数是写入文件的名,第二个参数是一个xmlDoc结构指针,第三个参数设定为1,保证在输出上写入缩进。 */ xmlSaveFormatFileEnc( "-", doc, "UTF-8", 1); doc=htmlReadFile("ap", NULL, -1); xmlSaveFormatFileEnc( "-", doc, "UTF-8", 1); }