TidyNode UCHome_Main_SiteConst::next_feed(void *state_data, TidyDoc doc, TidyNode prev) { ParserStateObject *state_obj = (ParserStateObject*)state_data; TidyNode node = NULL; TidyNode node2 = NULL; int nid = 0; node = tidyGetNext(prev); if(node == NULL) { node = tidyGetParent(prev); assert(node != NULL); node2 = tidyGetNext(node); if(node2 == NULL) { return NULL; }else{ if(tidyNodeGetId(node2) == TidyTag_H4) { state_obj->curr_date = this->get_time_string(doc, node2); node = tidyGetNext(node2); if(node == NULL) { //null }else{ return tidyGetChild(node); } }else{ nid = tidyNodeGetId(node2); assert(nid == TidyTag_UL); return tidyGetChild(node2); } } }else{ return node; } return NULL; }
static nglString GetEncodingString(TidyNode tnod) { if (tidyNodeGetId(tnod) == TidyTag_META) { // Search for the encoding attribute TidyAttr attr_content = tidyAttrGetById(tnod, TidyAttr_CONTENT); TidyAttr attr_httpequiv = tidyAttrGetById(tnod, TidyAttr_HTTP_EQUIV); if (attr_content && attr_httpequiv) { nglString contenttype(tidyAttrValue(attr_content)); if (contenttype.Compare(_T("content-type"), false) != 0) { // bleh... } nglString encoding(tidyAttrValue(attr_content)); //NGL_OUT(_T("content found in the tree: %s"), encoding.GetChars()); int32 col = encoding.Find(_T("charset=")); encoding = encoding.Extract(col + 8); //NGL_OUT(_T("encoding found in the tree: %s"), encoding.GetChars()); return encoding; } } TidyNode child; for (child = tidyGetChild(tnod); child; child = tidyGetNext(child)) { nglString str(GetEncodingString(child)); if (!str.IsNull()) return str; } return nglString::Null; }
QString UCHome_Main_SiteConst::find_photo_url(TidyDoc doc) { QString photo_url; TidyNode node; TidyNode node2; ctmbstr url_str = NULL; TidyAttr attr = NULL; //quick_update s_clear node = this->searchNode(doc, NULL, "quick_update s_clear", TidyTag_DIV); if(node != NULL) { node2 = tidyGetChild(node); Q_ASSERT(tidyNodeGetId(node2) == TidyTag_IMG); attr = tidyAttrGetById(node2, TidyAttr_SRC); if(attr != NULL) { url_str = tidyAttrValue(attr); photo_url = QString(url_str); photo_url = photo_url.replace("small", "big"); q_debug()<<"Photo url: "<<photo_url; }else{ } }else{ q_debug()<<"Warning: no photo url found"; } return photo_url; }
TidyNode UCHome_Main_SiteConst::first_feed(void *state_data, TidyDoc doc) { ParserStateObject *state_obj = (ParserStateObject*)state_data; TidyNode node = NULL; TidyNode node2 = NULL; //////// this->signtext = this->find_sign_text(doc); this->photourl = this->find_photo_url(doc); //this->sysnotes = this->find_sys_notice(doc); ////////////// node = this->searchNode(doc, NULL, "enter-content", TidyTag_DIV); if(node == NULL) { q_debug()<<"No feed node found"; return NULL; } node2 = tidyGetChild(node); assert(node2 != NULL); int nid = tidyNodeGetId(node2); if(nid == TidyTag_H4) { state_obj->curr_date = this->get_time_string(doc, node2); node = tidyGetNext(node2); if(node == NULL) { //null }else{ return tidyGetChild(node); } }else if(nid == TidyTag_UL) { node = tidyGetChild(node2); if(tidyNodeGetId(node) == TidyTag_LI) { return node; // okkkkkk } }else{ q_debug()<<"Unknown node type:"<<tidyNodeGetName(node2); } return NULL; }
FeedRecord* UCHome_Main_SiteConst::parse_feed(void *state_data, TidyDoc doc, TidyNode node) { FeedRecord *rec = NULL; QString feed_text ; QByteArray feed_bytes; int nid = 0; ctmbstr fid = NULL; TidyAttr attr = NULL; TidyBuffer tbuf = {0}; tidyBufInit(&tbuf); nid = tidyNodeGetId(node); this->get_node_text(doc, node, &tbuf); feed_bytes = QByteArray((char*)tbuf.bp); feed_text = this->u8codec->toUnicode(feed_bytes); tidyBufFree(&tbuf); rec = new FeedRecord(); rec->content = feed_text; //<li class="type_1006780" id="feed_685697_li"> //这个id是唯一的,用这个做标识 attr = tidyAttrGetById(node, TidyAttr_ID); fid = tidyAttrValue(attr); //q_debug()<<attr<<fid; if(fid == NULL) { q_debug()<<"Warning: invalid fid"<<fid<<feed_text; return NULL; }else if(strstr(fid, " 1000 ") != NULL) { feed_bytes = fid; feed_bytes = feed_bytes.replace(" 1000 ", ""); fid = feed_bytes.data(); q_debug()<<"Warning: invalid fid"<<tidyAttrValue(attr); md5CheckSum(fid, strlen(fid), rec->md5sum); }else{ md5CheckSum(fid, strlen(fid), rec->md5sum); } return rec; }
void nuiHTMLNode::SetFromNode(const void* _tdoc, const void* _tnod, nglTextEncoding encoding, bool ComputeStyle) { Clear(); TidyDoc tdoc = (TidyDoc)_tdoc; TidyNode tnod = (TidyNode)_tnod; mName = nglString(tidyNodeGetName(tnod), encoding); mType = (NodeType)tidyNodeGetType(tnod); mTagType = (TagType)tidyNodeGetId(tnod); nglString text; TidyBuffer buf; tidyBufInit(&buf); if (tidyNodeGetValue(tdoc, tnod, &buf)) { mText.Import((const char*)buf.bp, (int32)buf.size, encoding); //NGL_OUT(_T("<%s> %s\n"), mName.GetChars(), mText.GetChars()); } tidyBufFree(&buf); // Fill the attributes: TidyAttr tattr; for (tattr = tidyAttrFirst(tnod); tattr; tattr = tidyAttrNext(tattr)) { nuiHTMLAttrib* pAttrib = new nuiHTMLAttrib(tattr, encoding); mAttributes.push_back(pAttrib); } if (ComputeStyle) { mpStyle = new nuiCSSStyle(this); nuiHTMLAttrib* pStyle = GetAttribute(nuiHTMLAttrib::eAttrib_STYLE); if (pStyle) { AddStyleSheet(GetSourceURL(), pStyle->GetValue(), true); } } }
QString UCHome_Main_SiteConst::find_sign_text(TidyDoc doc) { QString sign_text; TidyNode node; TidyNode node2; TidyBuffer tbuf; node = this->searchNode(doc, NULL, "state", TidyTag_DIV); if(node != NULL) { node2 = tidyGetChild(node); Q_ASSERT(tidyNodeGetId(node2) == TidyTag_A); tidyBufInit(&tbuf); if(tidyNodeGetText(doc, tidyGetChild(node2), &tbuf)) { sign_text = this->u8codec->toUnicode(QByteArray((char*)tbuf.bp)); sign_text = sign_text.trimmed(); q_debug()<<"Sign text:"<<sign_text; } tidyBufFree(&tbuf); }else{ q_debug()<<"Warning: no state sign text found"; } return sign_text; }
static void tidy_add_default_properties(PHPTidyObj *obj, tidy_obj_type type) { TidyBuffer buf; TidyAttr tempattr; TidyNode tempnode; zval attribute, children, temp; PHPTidyObj *newobj; switch(type) { case is_node: if (!obj->std.properties) { rebuild_object_properties(&obj->std); } tidyBufInit(&buf); tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf); ADD_PROPERTY_STRINGL(obj->std.properties, value, buf.bp, buf.size ? buf.size-1 : 0); tidyBufFree(&buf); ADD_PROPERTY_STRING(obj->std.properties, name, tidyNodeGetName(obj->node)); ADD_PROPERTY_LONG(obj->std.properties, type, tidyNodeGetType(obj->node)); ADD_PROPERTY_LONG(obj->std.properties, line, tidyNodeLine(obj->node)); ADD_PROPERTY_LONG(obj->std.properties, column, tidyNodeColumn(obj->node)); ADD_PROPERTY_BOOL(obj->std.properties, proprietary, tidyNodeIsProp(obj->ptdoc->doc, obj->node)); switch(tidyNodeGetType(obj->node)) { case TidyNode_Root: case TidyNode_DocType: case TidyNode_Text: case TidyNode_Comment: break; default: ADD_PROPERTY_LONG(obj->std.properties, id, tidyNodeGetId(obj->node)); } tempattr = tidyAttrFirst(obj->node); if (tempattr) { char *name, *val; array_init(&attribute); do { name = (char *)tidyAttrName(tempattr); val = (char *)tidyAttrValue(tempattr); if (name && val) { add_assoc_string(&attribute, name, val); } } while((tempattr = tidyAttrNext(tempattr))); } else { ZVAL_NULL(&attribute); } zend_hash_str_update(obj->std.properties, "attribute", sizeof("attribute") - 1, &attribute); tempnode = tidyGetChild(obj->node); if (tempnode) { array_init(&children); do { tidy_instanciate(tidy_ce_node, &temp); newobj = Z_TIDY_P(&temp); newobj->node = tempnode; newobj->type = is_node; newobj->ptdoc = obj->ptdoc; newobj->ptdoc->ref_count++; tidy_add_default_properties(newobj, is_node); add_next_index_zval(&children, &temp); } while((tempnode = tidyGetNext(tempnode))); } else { ZVAL_NULL(&children); } zend_hash_str_update(obj->std.properties, "child", sizeof("child") - 1, &children); break; case is_doc: if (!obj->std.properties) { rebuild_object_properties(&obj->std); } ADD_PROPERTY_NULL(obj->std.properties, errorBuffer); ADD_PROPERTY_NULL(obj->std.properties, value); break; } }
static void parse_html(TidyDoc tdoc, TidyNode tnod, const url_list_t *elem, int indent, FILE *outfile) { TidyNode child; TidyAttr attr; TidyAttrId attr_id = TidyAttr_UNKNOWN; TidyNodeType node_type; TidyTagId node_id; ctmbstr name; char *url, *relative_url = NULL; int found = 0; int get_html_link = (!option_values.depth || elem->level < option_values.depth); int get_int_html_link = (!option_values.depth || elem->level < option_values.depth+1); int get_ext_depends = ((!option_values.depth || elem->level < option_values.depth+1) && !option_values.no_html_dependencies); for (child = tidyGetChild(tnod); child; child = tidyGetNext(child)) { node_type = tidyNodeGetType(child); switch (node_type) { case TidyNode_Start: case TidyNode_StartEnd: node_id = tidyNodeGetId(child); if (get_html_link && (node_id == TidyTag_A || node_id == TidyTag_AREA || node_id == TidyTag_MAP)) { found = 1; attr_id = TidyAttr_HREF; } else if (get_int_html_link && (node_id == TidyTag_FRAME || node_id == TidyTag_IFRAME)) { found = 1; attr_id = TidyAttr_SRC; } else if (get_ext_depends) { if (node_id == TidyTag_LINK) { found = 1; attr_id = TidyAttr_HREF; } else if (node_id == TidyTag_IMG || node_id == TidyTag_SCRIPT) { found = 1; attr_id = TidyAttr_SRC; } else { found = 0; attr_id = TidyAttr_UNKNOWN; } } else { found = 0; attr_id = TidyAttr_UNKNOWN; } if (found && (attr = tidyAttrGetById(child, attr_id)) != NULL) { url = (char *) tidyAttrValue(attr); string_free(relative_url); if (url && *url) add_new_url_and_check(elem, url, outfile ? &relative_url : NULL); } if (outfile && (name = tidyNodeGetName(child)) != NULL) { fprintf(outfile, "%*.*s%s", indent, indent, "<", name); for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) { fprintf(outfile, " %s", tidyAttrName(attr)); if (relative_url && (tidyAttrGetId(attr) == attr_id)) fprintf(outfile, "=\"%s\"", relative_url); else if (tidyAttrValue(attr)) fprintf(outfile, "=\"%s\"", tidyAttrValue(attr) ? tidyAttrValue(attr) : ""); else fprintf(outfile, "=\"\""); } string_free(relative_url); if (node_type == TidyNode_StartEnd) fprintf(outfile, "/>\n"); else { fprintf(outfile, ">\n"); parse_html(tdoc, child, elem, indent + 1, outfile); fprintf(outfile, "%*.*s%s>\n", indent + 1, indent + 1, "</", name); } } else { string_free(relative_url); parse_html(tdoc, child, elem, indent + 1, outfile); } break; case TidyNode_End: if (outfile) { if ((name = tidyNodeGetName(child)) != NULL) fprintf(outfile, "%*.*s/%s>\n", indent, indent, "<", name); } break; case TidyNode_Text: if (outfile) { TidyBuffer buf; TidyTagId parent_node_id = tidyNodeGetId(tnod); tidyBufInit(&buf); if (parent_node_id == TidyTag_SCRIPT || parent_node_id == TidyTag_STYLE) tidyNodeGetValue(tdoc, child, &buf); else tidyNodeGetText(tdoc, child, &buf); if (buf.bp) fprintf(outfile, "%s", (char *)buf.bp); tidyBufFree(&buf); } break; case TidyNode_Comment: if (outfile) { TidyBuffer buf; tidyBufInit(&buf); tidyNodeGetValue(tdoc, child, &buf); if (buf.bp) fprintf(outfile, "<!--%s-->\n", (char *)buf.bp); tidyBufFree(&buf); } break; case TidyNode_CDATA: if (outfile) { TidyBuffer buf; tidyBufInit(&buf); tidyNodeGetValue(tdoc, child, &buf); if (buf.bp) fprintf(outfile, "<![CDATA[%s]]>\n", (char *)buf.bp); tidyBufFree(&buf); } break; case TidyNode_DocType: if (outfile) { int pub = 0; fprintf(outfile, "<!DOCTYPE %s", tidyNodeGetName(child)); for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) { if (!pub) { fprintf(outfile, " %s", tidyAttrName(attr)); if (!string_casecmp(tidyAttrName(attr), "PUBLIC")) pub = 1; } if (tidyAttrValue(attr)) fprintf(outfile, " \"%s\"", tidyAttrValue(attr)); } fprintf(outfile, ">\n"); } break; default: if (outfile) { TidyBuffer buf; tidyBufInit(&buf); tidyNodeGetValue(tdoc, child, &buf); if (buf.bp) fprintf(outfile, "%s", (char *)buf.bp); tidyBufFree(&buf); } break; } } }