QVector<QString> UCHome_Main_SiteConst::find_sys_notice(TidyDoc doc) { QVector<QString> sys_notes; QString note; char *notes_ids[] = { "mtag_invite", "f_request", NULL }; char *nid = NULL; TidyNode node = NULL; TidyNode node2 = NULL; TidyBuffer tbuf; for(int i = 0; i < 16 ; i++) { nid = notes_ids[i]; if(nid == NULL) { break; } node = this->searchNode(doc, NULL, nid, TidyTag_DIV); if(node != NULL) { tidyBufInit(&tbuf); tidyNodeGetText(doc, node, &tbuf); note = this->u8codec->toUnicode(QByteArray((char*)tbuf.bp)); sys_notes.append(note); tidyBufFree(&tbuf); q_debug()<<note; }else{ q_debug()<<"Warning: "<<nid<<" not found"; } } return sys_notes; }
/* Traverse the document tree */ void dumpNode(TidyDoc doc, TidyNode tnod, int indent ) { TidyNode child; for ( child = tidyGetChild(tnod); child; child = tidyGetNext(child) ) { ctmbstr name = tidyNodeGetName( child ); if ( name ) { /* if it has a name, then it's an HTML tag ... */ TidyAttr attr; printf( "%*.*s%s ", indent, indent, "<", name); /* walk the attribute list */ for ( attr=tidyAttrFirst(child); attr; attr=tidyAttrNext(attr) ) { printf(tidyAttrName(attr)); tidyAttrValue(attr)?printf("=\"%s\" ", tidyAttrValue(attr)):printf(" "); } printf( ">\n"); } else { /* if it doesn't have a name, then it's probably text, cdata, etc... */ TidyBuffer buf; tidyBufInit(&buf); tidyNodeGetText(doc, child, &buf); printf("%*.*s\n", indent, indent, buf.bp?(char *)buf.bp:""); tidyBufFree(&buf); } dumpNode( doc, child, indent + 4 ); /* recursive */ } }
QString UCHome_Main_SiteConst::find_sys_notice_by_type(TidyDoc doc, char *type_class) { QString note; char *nid = type_class; TidyNode node = NULL; TidyNode node2 = NULL; TidyBuffer tbuf; if(nid == NULL) { q_debug()<<""; return note; } node = this->searchNode(doc, NULL, nid, TidyTag_DIV); if(node != NULL) { tidyBufInit(&tbuf); tidyNodeGetText(doc, node, &tbuf); note = this->u8codec->toUnicode(QByteArray((char*)tbuf.bp)); tidyBufFree(&tbuf); q_debug()<<note; }else{ q_debug()<<"Warning: "<<nid<<" not found"; } return note; }
void CCaHtmlParse::__GetPriceAndRamainTicket(UINT *pPrice, UINT *pRemainTicket, const TidyDoc & tdoc, const TidyNode & tdNode) { CStringA straRet; *pPrice = 0; *pRemainTicket = 0; TidyBuffer text = {0}; tidyBufInit(&text); TidyNodeType type = tidyNodeGetType(tdNode); tidyNodeGetText(tdoc, tdNode, &text); straRet.Format("%s",text.bp); straRet.TrimLeft(); CStringA straKey("</font>"); int iPos = straRet.Find(straKey); int iEndPos = straRet.Find("</strong>"); int iStartPos = iPos+straKey.GetLength(); CStringA straPrice = straRet.Mid(iStartPos, iEndPos-iStartPos); straPrice.Remove(0x0d);//去掉回车 straPrice.Remove(0x0a);//去掉换行 *pPrice = atoi(straPrice.GetBuffer(0)); straPrice.ReleaseBuffer(); //剩余座位 straKey = ":"; iPos = straRet.Find(straKey); iEndPos = straRet.Find("</td>"); iStartPos = iPos+straKey.GetLength(); CStringA straRemainSeat = straRet.Mid(iStartPos, iEndPos-iStartPos); *pRemainTicket = atoi(straRemainSeat.GetBuffer(0)); straRemainSeat.ReleaseBuffer(); tidyBufFree(&text); }
void CCaHtmlParse::__GetAirPortCode(CStringA & straDCode, CStringA & straACode, const TidyDoc & tdoc, const TidyNode & tdNode) { CStringA straRet; CStringA straSha("上海虹桥"); CStringA straPvg("上海浦东"); CStringA straPek("北京首都"); CStringA straNay("北京南苑"); straDCode = ""; straACode = ""; TidyBuffer text = {0}; tidyBufInit(&text); TidyNodeType type = tidyNodeGetType(tdNode); tidyNodeGetText(tdoc, tdNode, &text); straRet.Format("%s",text.bp); straRet.TrimLeft(); CStringA straKey("<br />");//<br />后有回车换行符 int iPos = straRet.Find(straKey); CStringA straDCity = straRet.Mid(4, iPos-4); if (-1 != straDCity.Find(straSha)) straDCode = "SHA"; else if (-1 != straDCity.Find(straPvg)) straDCode = "PVG"; else if (-1 != straDCity.Find(straPek)) straDCode = "PEK"; else if(-1 != straDCity.Find(straNay)) straDCode = "NAY"; else { } //取到达机场,<br />后有回车换行符 straRet = straRet.Mid(iPos+straKey.GetLength()); straRet.Remove(0x0d);//去掉回车 straRet.Remove(0x0a);//去掉换行 int iEndPos = straRet.Find("</td>"); CStringA straACity = straRet.Left(iEndPos); if (-1 != straACity.Find(straSha)) straACode = "SHA"; else if (-1 != straACity.Find(straPvg)) straACode = "PVG"; else if (-1 != straACity.Find(straPek)) straACode = "PEK"; else if(-1 != straACity.Find(straNay)) straACode = "NAY"; else { } tidyBufFree(&text); }
FeedRecord* UCHome_Main_SiteConst::parse_note(void *state_data, TidyDoc doc, TidyNode node) { FeedRecord * rec = NULL; QString note; TidyBuffer tbuf; tidyBufInit(&tbuf); tidyNodeGetText(doc, node, &tbuf); note = this->u8codec->toUnicode(QByteArray((char*)tbuf.bp)); rec = new FeedRecord(); rec->content = this->rewrite_relative_link(note); md5CheckSum((char*)tbuf.bp, tbuf.size, rec->md5sum); tidyBufFree(&tbuf); //q_debug()<<"Orig:"<<note; return rec; }
void CCaHtmlParse::__GetFlightStartTime(CStringA & strFlightStartTime, const TidyDoc & tdoc, const TidyNode & tdNode) { CStringA straRet; strFlightStartTime = ""; TidyBuffer text = {0}; tidyBufInit(&text); TidyNodeType type = tidyNodeGetType(tdNode); tidyNodeGetText(tdoc, tdNode, &text); straRet.Format("%s",text.bp); CStringA straKey("\">"); int iStartPos = straRet.Find(straKey); int iEndPos = straRet.Find("</strong>"); iStartPos = iStartPos+straKey.GetLength(); strFlightStartTime = straRet.Mid(iStartPos, iEndPos-iStartPos); tidyBufFree(&text); }
FeedRecord* UCHome_Main_SiteConst::parse_friend(void *state_data, TidyDoc doc, TidyNode node) { FeedRecord * rec = NULL; QString note; TidyBuffer tbuf; TidyNode node2 = NULL; TidyNode node3 = NULL; TidyNode node4 = NULL; TidyNode node5 = NULL; TidyAttr attr = NULL; ctmbstr fuid = NULL; ctmbstr fusername = NULL; tidyBufInit(&tbuf); tidyNodeGetText(doc, node, &tbuf); note = this->u8codec->toUnicode(QByteArray((char*)tbuf.bp)); rec = new FeedRecord(); rec->content = this->rewrite_relative_link(note); rec->content = "<table>" + rec->content + "</table>"; md5CheckSum((char*)tbuf.bp, tbuf.size, rec->md5sum); tidyBufFree(&tbuf); //q_debug()<<"Orig:"<<note; node2 = tidyGetChild(node); node3 = tidyGetChild(node2); attr = tidyAttrGetById(node3, TidyAttr_VALUE); fuid = tidyAttrValue(attr); node4 = tidyGetNext(node2); node3 = tidyGetChild(node4); // A node5 = tidyGetChild(node3); //IMG attr = tidyAttrGetById(node5, TidyAttr_ALT); fusername = tidyAttrValue(attr); rec->fuid = fuid; rec->fusername = this->u8codec->toUnicode(QByteArray(fusername)); q_debug()<<"USER:"<<fuid<<rec->fusername; md5CheckSum(fuid, strlen(fuid), rec->md5sum);//使用uid的md5值肯定不会出现冲突 return rec; }
void CCaHtmlParse::__GetSaleEndDate(CStringA & straEndDate, CStringA & straEndTime, const TidyDoc & tdoc, const TidyNode & tdNode) { CStringA straRet; straEndDate = ""; straEndTime = ""; TidyBuffer text = {0}; tidyBufInit(&text); TidyNodeType type = tidyNodeGetType(tdNode); tidyNodeGetText(tdoc, tdNode, &text); straRet.Format("%s",text.bp); straRet.TrimLeft(); int iPos = straRet.Find("<br"); straRet = straRet.Mid(4, iPos-4); straEndDate = straRet.Left(10); straEndTime = straRet.Mid(11); straEndTime.Trim(); tidyBufFree(&text); }
QString UCHome_Main_SiteConst::get_time_string(TidyDoc doc, TidyNode tnode) { QString time_string ; TidyBuffer tbuf = {0}; tidyBufInit(&tbuf); tidyNodeGetText(doc, tnode, &tbuf); if(strstr((char*)tbuf.bp, "2008-") != NULL) { QDate onlyDate = QDate::fromString(QString((char*)tbuf.bp).trimmed(), "yyyy-MM-dd"); QDateTime currDate = QDateTime::currentDateTime(); currDate.setDate(onlyDate); //q_debug()<<currDate<<onlyDate<<((char*)tbuf.bp); time_string = onlyDate.toString(); }else{ //must yesterday QDateTime currDate = QDateTime::currentDateTime().addDays(-1); time_string = currDate.date().toString(); } tidyBufFree(&tbuf); return time_string; }
void CCaHtmlParse::__GetFlightNoAndFlightStartDate(CStringA & strFlightNo, CStringA & strFlightStartDate, const TidyDoc & tdoc, const TidyNode & tdNode) { CStringA straRet; strFlightNo = ""; strFlightStartDate = ""; TidyBuffer text = {0}; tidyBufInit(&text); TidyNodeType type = tidyNodeGetType(tdNode); tidyNodeGetText(tdoc, tdNode, &text); straRet.Format("%s",text.bp); straRet.TrimLeft(); strFlightStartDate = straRet.Mid(4, 10); int iStartPos = straRet.Find('C');//匹配CA,ca,Ca,cA,<br />后会插入回车换行符, if(-1 == iStartPos)//没找到大写C,匹配小写c iStartPos = straRet.Find('c'); int iEndPos = straRet.Find("</"); strFlightNo = straRet.Mid(iStartPos, iEndPos-iStartPos);//国航航班号有3位的,还有4位的 strFlightNo = strFlightNo.MakeUpper(); strFlightNo = strFlightNo.Mid(2);//去掉CA tidyBufFree(&text); }
static int tidy_node_cast_handler(zval *in, zval *out, int type) { TidyBuffer buf; PHPTidyObj *obj; switch(type) { case IS_LONG: ZVAL_LONG(out, 0); break; case IS_DOUBLE: ZVAL_DOUBLE(out, 0); break; case _IS_BOOL: ZVAL_TRUE(out); break; case IS_STRING: obj = Z_TIDY_P(in); tidyBufInit(&buf); if (obj->ptdoc) { tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf); ZVAL_STRINGL(out, (char *) buf.bp, buf.size-1); } else { ZVAL_EMPTY_STRING(out); } tidyBufFree(&buf); break; default: return FAILURE; } return SUCCESS; }
QString UCHome_Main_SiteConst::find_sign_text(TidyDoc doc) { QString sign_text; TidyNode node; TidyNode node2; TidyBuffer tbuf; node = this->searchNode(doc, NULL, "state", TidyTag_DIV); if(node != NULL) { node2 = tidyGetChild(node); Q_ASSERT(tidyNodeGetId(node2) == TidyTag_A); tidyBufInit(&tbuf); if(tidyNodeGetText(doc, tidyGetChild(node2), &tbuf)) { sign_text = this->u8codec->toUnicode(QByteArray((char*)tbuf.bp)); sign_text = sign_text.trimmed(); q_debug()<<"Sign text:"<<sign_text; } tidyBufFree(&tbuf); }else{ q_debug()<<"Warning: no state sign text found"; } return sign_text; }
/* Traverse the document tree */ int dumpNode(TidyDoc doc, TidyNode tnod, int element, WeatherData *data ) { TidyNode child; for ( child = tidyGetChild(tnod); child; child = tidyGetNext(child) ) { element++; ctmbstr name = tidyNodeGetName( child ); if ( name ) { /* if it has a name, then it's an HTML tag ... */ //TidyAttr attr; //printf( "%*.*s%s ", indent, indent, "<", name); /* walk the attribute list */ //for ( attr=tidyAttrFirst(child); attr; attr=tidyAttrNext(attr) ) { //printf(tidyAttrName(attr)); //tidyAttrValue(attr)?printf("=\"%s\" ", //tidyAttrValue(attr)):printf(" "); //} //printf( ">\n"); } else { /* if it doesn't have a name, then it's probably text, cdata, etc... */ TidyBuffer buf; tidyBufInit(&buf); tidyNodeGetText(doc, child, &buf); //printf("[%d]%s\n", element, buf.bp?(char *)buf.bp:""); switch (element) { case 133: sscanf( (char*)buf.bp, "%lf", &(data->outsideTemp) ); break; case 159: sscanf( (char*)buf.bp, "%d", &(data->outsideHumidity)); break; case 301: sscanf( (char*)buf.bp, "%lf", &(data->dewPoint)); break; case 333: sscanf( (char*)buf.bp, "%lf", &(data->barometer)); break; case 391: // wind speed if ( sscanf( (char*)buf.bp, "%lf", &(data->instantWindSpeed)) == 0) { data->instantWindSpeed = 0; } break; case 417: // wind direction { char b[100]; int i,j=0; for ( i=0; i<strlen((char*)buf.bp); i++) { if ( isdigit( ((char*)buf.bp)[i])) { b[j] = ((char*)buf.bp)[i]; j++; } } b[j] = 0; sscanf( b, "%d", &(data->instantWindDirection)); } break; case 503: if ( sscanf( (char*)buf.bp, "%lf", &(data->avgWindSpeed_2min)) == 0 ) { data->avgWindSpeed_2min = 0; } break; case 533: if ( sscanf( (char*)buf.bp, "%lf", &(data->windGust_10min)) == 0 ) { data->windGust_10min = 0; } break; case 599: sscanf( (char*)buf.bp, "%lf", &(data->rainRate)); break; case 603: sscanf( (char*)buf.bp, "%lf", &(data->dailyRain)); break; case 625: sscanf( (char*)buf.bp, "%lf", &(data->lastHourRain)); break; } tidyBufFree(&buf); } element++; element = dumpNode( doc, child, element, data ); /* recursive */ } return element; }
static void tidy_add_default_properties(PHPTidyObj *obj, tidy_obj_type type) { TidyBuffer buf; TidyAttr tempattr; TidyNode tempnode; zval attribute, children, temp; PHPTidyObj *newobj; switch(type) { case is_node: if (!obj->std.properties) { rebuild_object_properties(&obj->std); } tidyBufInit(&buf); tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf); ADD_PROPERTY_STRINGL(obj->std.properties, value, buf.bp, buf.size ? buf.size-1 : 0); tidyBufFree(&buf); ADD_PROPERTY_STRING(obj->std.properties, name, tidyNodeGetName(obj->node)); ADD_PROPERTY_LONG(obj->std.properties, type, tidyNodeGetType(obj->node)); ADD_PROPERTY_LONG(obj->std.properties, line, tidyNodeLine(obj->node)); ADD_PROPERTY_LONG(obj->std.properties, column, tidyNodeColumn(obj->node)); ADD_PROPERTY_BOOL(obj->std.properties, proprietary, tidyNodeIsProp(obj->ptdoc->doc, obj->node)); switch(tidyNodeGetType(obj->node)) { case TidyNode_Root: case TidyNode_DocType: case TidyNode_Text: case TidyNode_Comment: break; default: ADD_PROPERTY_LONG(obj->std.properties, id, tidyNodeGetId(obj->node)); } tempattr = tidyAttrFirst(obj->node); if (tempattr) { char *name, *val; array_init(&attribute); do { name = (char *)tidyAttrName(tempattr); val = (char *)tidyAttrValue(tempattr); if (name && val) { add_assoc_string(&attribute, name, val); } } while((tempattr = tidyAttrNext(tempattr))); } else { ZVAL_NULL(&attribute); } zend_hash_str_update(obj->std.properties, "attribute", sizeof("attribute") - 1, &attribute); tempnode = tidyGetChild(obj->node); if (tempnode) { array_init(&children); do { tidy_instanciate(tidy_ce_node, &temp); newobj = Z_TIDY_P(&temp); newobj->node = tempnode; newobj->type = is_node; newobj->ptdoc = obj->ptdoc; newobj->ptdoc->ref_count++; tidy_add_default_properties(newobj, is_node); add_next_index_zval(&children, &temp); } while((tempnode = tidyGetNext(tempnode))); } else { ZVAL_NULL(&children); } zend_hash_str_update(obj->std.properties, "child", sizeof("child") - 1, &children); break; case is_doc: if (!obj->std.properties) { rebuild_object_properties(&obj->std); } ADD_PROPERTY_NULL(obj->std.properties, errorBuffer); ADD_PROPERTY_NULL(obj->std.properties, value); break; } }
static void convertNode(TidyNode node, int level, bool opentag) { ctmbstr name; TidyAttr tattr; struct htmlTag *t; int nattr; /* number of attributes */ int i; switch (tidyNodeGetType(node)) { case TidyNode_Text: name = "Text"; break; case TidyNode_Start: case TidyNode_End: case TidyNode_StartEnd: name = tidyNodeGetName(node); break; default: return; } t = newTag((char *)name); if (!t) return; if (!opentag) { t->slash = true; return; } /* if a js script, remember the line number for error messages */ if (t->action == TAGACT_SCRIPT) t->js_ln = tidyNodeLine(node); /* this is the open tag, set the attributes */ /* special case for text tag */ if (t->action == TAGACT_TEXT) { TidyBuffer tnv = { 0 }; /* text-node value */ tidyBufClear(&tnv); tidyNodeGetValue(tdoc, node, &tnv); if (tnv.size) { t->textval = cloneString(tnv.bp); tidyBufFree(&tnv); } } nattr = 0; tattr = tidyAttrFirst(node); while (tattr != NULL) { ++nattr; tattr = tidyAttrNext(tattr); } t->attributes = allocMem(sizeof(char *) * (nattr + 1)); t->atvals = allocMem(sizeof(char *) * (nattr + 1)); i = 0; tattr = tidyAttrFirst(node); while (tattr != NULL) { t->attributes[i] = cloneString(tidyAttrName(tattr)); t->atvals[i] = cloneString(tidyAttrValue(tattr)); ++i; tattr = tidyAttrNext(tattr); } t->attributes[i] = 0; t->atvals[i] = 0; /* innerHTML, only for certain tags */ if (t->info->bits & TAG_INNERHTML) { TidyBuffer tnv = { 0 }; /* text-node value */ tidyBufClear(&tnv); t->innerHTML = emptyString; tidyNodeGetText(tdoc, node, &tnv); if (tnv.size) { /* But it's not the original html, it has been sanitized. * Put a cap on size, else memory consumed could, theoretically, * grow as the size of the document squared. */ if (tnv.size <= 4096) t->innerHTML = cloneString(tnv.bp); tagStrip(t->innerHTML); tidyBufFree(&tnv); } } } /* convertNode */
static void parse_html(TidyDoc tdoc, TidyNode tnod, const url_list_t *elem, int indent, FILE *outfile) { TidyNode child; TidyAttr attr; TidyAttrId attr_id = TidyAttr_UNKNOWN; TidyNodeType node_type; TidyTagId node_id; ctmbstr name; char *url, *relative_url = NULL; int found = 0; int get_html_link = (!option_values.depth || elem->level < option_values.depth); int get_int_html_link = (!option_values.depth || elem->level < option_values.depth+1); int get_ext_depends = ((!option_values.depth || elem->level < option_values.depth+1) && !option_values.no_html_dependencies); for (child = tidyGetChild(tnod); child; child = tidyGetNext(child)) { node_type = tidyNodeGetType(child); switch (node_type) { case TidyNode_Start: case TidyNode_StartEnd: node_id = tidyNodeGetId(child); if (get_html_link && (node_id == TidyTag_A || node_id == TidyTag_AREA || node_id == TidyTag_MAP)) { found = 1; attr_id = TidyAttr_HREF; } else if (get_int_html_link && (node_id == TidyTag_FRAME || node_id == TidyTag_IFRAME)) { found = 1; attr_id = TidyAttr_SRC; } else if (get_ext_depends) { if (node_id == TidyTag_LINK) { found = 1; attr_id = TidyAttr_HREF; } else if (node_id == TidyTag_IMG || node_id == TidyTag_SCRIPT) { found = 1; attr_id = TidyAttr_SRC; } else { found = 0; attr_id = TidyAttr_UNKNOWN; } } else { found = 0; attr_id = TidyAttr_UNKNOWN; } if (found && (attr = tidyAttrGetById(child, attr_id)) != NULL) { url = (char *) tidyAttrValue(attr); string_free(relative_url); if (url && *url) add_new_url_and_check(elem, url, outfile ? &relative_url : NULL); } if (outfile && (name = tidyNodeGetName(child)) != NULL) { fprintf(outfile, "%*.*s%s", indent, indent, "<", name); for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) { fprintf(outfile, " %s", tidyAttrName(attr)); if (relative_url && (tidyAttrGetId(attr) == attr_id)) fprintf(outfile, "=\"%s\"", relative_url); else if (tidyAttrValue(attr)) fprintf(outfile, "=\"%s\"", tidyAttrValue(attr) ? tidyAttrValue(attr) : ""); else fprintf(outfile, "=\"\""); } string_free(relative_url); if (node_type == TidyNode_StartEnd) fprintf(outfile, "/>\n"); else { fprintf(outfile, ">\n"); parse_html(tdoc, child, elem, indent + 1, outfile); fprintf(outfile, "%*.*s%s>\n", indent + 1, indent + 1, "</", name); } } else { string_free(relative_url); parse_html(tdoc, child, elem, indent + 1, outfile); } break; case TidyNode_End: if (outfile) { if ((name = tidyNodeGetName(child)) != NULL) fprintf(outfile, "%*.*s/%s>\n", indent, indent, "<", name); } break; case TidyNode_Text: if (outfile) { TidyBuffer buf; TidyTagId parent_node_id = tidyNodeGetId(tnod); tidyBufInit(&buf); if (parent_node_id == TidyTag_SCRIPT || parent_node_id == TidyTag_STYLE) tidyNodeGetValue(tdoc, child, &buf); else tidyNodeGetText(tdoc, child, &buf); if (buf.bp) fprintf(outfile, "%s", (char *)buf.bp); tidyBufFree(&buf); } break; case TidyNode_Comment: if (outfile) { TidyBuffer buf; tidyBufInit(&buf); tidyNodeGetValue(tdoc, child, &buf); if (buf.bp) fprintf(outfile, "<!--%s-->\n", (char *)buf.bp); tidyBufFree(&buf); } break; case TidyNode_CDATA: if (outfile) { TidyBuffer buf; tidyBufInit(&buf); tidyNodeGetValue(tdoc, child, &buf); if (buf.bp) fprintf(outfile, "<![CDATA[%s]]>\n", (char *)buf.bp); tidyBufFree(&buf); } break; case TidyNode_DocType: if (outfile) { int pub = 0; fprintf(outfile, "<!DOCTYPE %s", tidyNodeGetName(child)); for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) { if (!pub) { fprintf(outfile, " %s", tidyAttrName(attr)); if (!string_casecmp(tidyAttrName(attr), "PUBLIC")) pub = 1; } if (tidyAttrValue(attr)) fprintf(outfile, " \"%s\"", tidyAttrValue(attr)); } fprintf(outfile, ">\n"); } break; default: if (outfile) { TidyBuffer buf; tidyBufInit(&buf); tidyNodeGetValue(tdoc, child, &buf); if (buf.bp) fprintf(outfile, "%s", (char *)buf.bp); tidyBufFree(&buf); } break; } } }