示例#1
0
TidyNode UCHome_Main_SiteConst::next_feed(void *state_data, TidyDoc doc, TidyNode prev)
{
    ParserStateObject *state_obj = (ParserStateObject*)state_data;
    TidyNode node = NULL;
    TidyNode node2 = NULL;
    int nid = 0;
    
    node = tidyGetNext(prev);
    if(node == NULL) {
        node = tidyGetParent(prev);
        assert(node != NULL);
        node2 = tidyGetNext(node);
        if(node2 == NULL) {
            return NULL;
        }else{
            if(tidyNodeGetId(node2) == TidyTag_H4) {
                state_obj->curr_date = this->get_time_string(doc, node2);
                node = tidyGetNext(node2);
                if(node == NULL) {
                    //null
                }else{
                    return tidyGetChild(node);
                }
            }else{
                nid = tidyNodeGetId(node2);
                assert(nid == TidyTag_UL);
                return tidyGetChild(node2);
            }
        }
    }else{
        return node;
    }
    return NULL;
}
示例#2
0
static nglString GetEncodingString(TidyNode tnod)
{
  if (tidyNodeGetId(tnod) == TidyTag_META)
  {
    // Search for the encoding attribute
    TidyAttr attr_content = tidyAttrGetById(tnod, TidyAttr_CONTENT);
    TidyAttr attr_httpequiv = tidyAttrGetById(tnod, TidyAttr_HTTP_EQUIV);
    if (attr_content && attr_httpequiv)
    {
      nglString contenttype(tidyAttrValue(attr_content));
      if (contenttype.Compare(_T("content-type"), false) != 0)
      {
        // bleh...
      }
      nglString encoding(tidyAttrValue(attr_content));
      //NGL_OUT(_T("content found in the tree: %s"), encoding.GetChars());
      int32 col = encoding.Find(_T("charset="));
      encoding = encoding.Extract(col + 8);
      //NGL_OUT(_T("encoding found in the tree: %s"), encoding.GetChars());
      return encoding;
    }
  }
  
  TidyNode child;
  
  for (child = tidyGetChild(tnod); child; child = tidyGetNext(child))
  {
    nglString str(GetEncodingString(child));
    if (!str.IsNull())
      return str;
  }
  
  
  return nglString::Null;
}
示例#3
0
QString UCHome_Main_SiteConst::find_photo_url(TidyDoc doc)
{
    QString photo_url;
    TidyNode node;
    TidyNode node2;
    ctmbstr url_str = NULL;
    TidyAttr  attr = NULL;
    
    //quick_update s_clear
    node = this->searchNode(doc, NULL, "quick_update s_clear", TidyTag_DIV);
    if(node != NULL) {
        node2 = tidyGetChild(node);
        Q_ASSERT(tidyNodeGetId(node2) == TidyTag_IMG);
        attr = tidyAttrGetById(node2, TidyAttr_SRC);
        if(attr != NULL) {
            url_str = tidyAttrValue(attr);
            photo_url = QString(url_str);
            photo_url = photo_url.replace("small", "big");
            q_debug()<<"Photo url: "<<photo_url;
        }else{
        }
    }else{
        q_debug()<<"Warning: no photo url found";
    }
    
    return photo_url;
}
示例#4
0
TidyNode UCHome_Main_SiteConst::first_feed(void *state_data, TidyDoc doc)
{
    ParserStateObject *state_obj = (ParserStateObject*)state_data;
    TidyNode node = NULL;
    TidyNode node2 = NULL;

    ////////
    this->signtext = this->find_sign_text(doc);    
    this->photourl = this->find_photo_url(doc);
    //this->sysnotes = this->find_sys_notice(doc);

    //////////////
    node = this->searchNode(doc, NULL, "enter-content", TidyTag_DIV);
    if(node == NULL) {
        q_debug()<<"No feed node found";
        return NULL;
    }
    node2 = tidyGetChild(node);
    assert(node2 != NULL);
    int nid = tidyNodeGetId(node2);
    if(nid == TidyTag_H4) {
        state_obj->curr_date = this->get_time_string(doc, node2);
        node = tidyGetNext(node2);
        if(node == NULL) {
            //null
        }else{
            return tidyGetChild(node);
        }
    }else if(nid == TidyTag_UL) {
        node = tidyGetChild(node2);
        if(tidyNodeGetId(node) == TidyTag_LI) {
            return node; // okkkkkk
        }
    }else{
        q_debug()<<"Unknown node type:"<<tidyNodeGetName(node2);
    }
    
    return NULL;
}
示例#5
0
FeedRecord* UCHome_Main_SiteConst::parse_feed(void *state_data, TidyDoc doc, TidyNode node)
{
    FeedRecord *rec = NULL;
    QString feed_text ;
    QByteArray feed_bytes;
    int nid = 0;
    ctmbstr  fid = NULL;
    TidyAttr attr = NULL;
    TidyBuffer tbuf = {0};
    tidyBufInit(&tbuf);
    
    nid = tidyNodeGetId(node);    
    this->get_node_text(doc, node, &tbuf);
    feed_bytes = QByteArray((char*)tbuf.bp);
    feed_text = this->u8codec->toUnicode(feed_bytes);

    tidyBufFree(&tbuf);

    rec = new FeedRecord();
    rec->content = feed_text;

    //<li class="type_1006780" id="feed_685697_li">
    //这个id是唯一的,用这个做标识
    attr = tidyAttrGetById(node, TidyAttr_ID);
    fid = tidyAttrValue(attr);
    //q_debug()<<attr<<fid;
    if(fid == NULL) {
        q_debug()<<"Warning: invalid fid"<<fid<<feed_text;
        return NULL;
    }else if(strstr(fid, " 1000 ") != NULL) {
        feed_bytes = fid;
        feed_bytes = feed_bytes.replace(" 1000 ", "");
        fid = feed_bytes.data();
        q_debug()<<"Warning: invalid fid"<<tidyAttrValue(attr);
        md5CheckSum(fid, strlen(fid), rec->md5sum);
    }else{
        md5CheckSum(fid, strlen(fid), rec->md5sum);
    }
    return rec;
}
示例#6
0
void nuiHTMLNode::SetFromNode(const void* _tdoc, const void* _tnod, nglTextEncoding encoding, bool ComputeStyle)
{
  Clear();
  
  TidyDoc tdoc = (TidyDoc)_tdoc;
  TidyNode tnod = (TidyNode)_tnod;

  mName = nglString(tidyNodeGetName(tnod), encoding);
  mType = (NodeType)tidyNodeGetType(tnod);
  mTagType = (TagType)tidyNodeGetId(tnod);
  
  nglString text;
  TidyBuffer buf;
  tidyBufInit(&buf);
  if (tidyNodeGetValue(tdoc, tnod, &buf))
  {
    mText.Import((const char*)buf.bp, (int32)buf.size, encoding);
    //NGL_OUT(_T("<%s> %s\n"), mName.GetChars(), mText.GetChars());
  }
  tidyBufFree(&buf);
  
  // Fill the attributes:
  TidyAttr tattr;
  for (tattr = tidyAttrFirst(tnod); tattr; tattr = tidyAttrNext(tattr))
  {
    nuiHTMLAttrib* pAttrib = new nuiHTMLAttrib(tattr, encoding);
    mAttributes.push_back(pAttrib);
  }
  
  if (ComputeStyle)
  {
    mpStyle = new nuiCSSStyle(this);
    nuiHTMLAttrib* pStyle = GetAttribute(nuiHTMLAttrib::eAttrib_STYLE);
    if (pStyle)
    {
      AddStyleSheet(GetSourceURL(), pStyle->GetValue(), true);
    }
  }
}
示例#7
0
QString UCHome_Main_SiteConst::find_sign_text(TidyDoc doc)
{
    QString sign_text;
    TidyNode node;
    TidyNode node2;
    TidyBuffer tbuf;
    
    node = this->searchNode(doc, NULL, "state", TidyTag_DIV);
    if(node != NULL) {
        node2 = tidyGetChild(node);
        Q_ASSERT(tidyNodeGetId(node2) == TidyTag_A);
        tidyBufInit(&tbuf);
        if(tidyNodeGetText(doc, tidyGetChild(node2), &tbuf)) {
            sign_text = this->u8codec->toUnicode(QByteArray((char*)tbuf.bp));
            sign_text = sign_text.trimmed();
            q_debug()<<"Sign text:"<<sign_text;
        }
        tidyBufFree(&tbuf);
    }else{
        q_debug()<<"Warning: no state sign text found";
    }

    return sign_text;
}
示例#8
0
文件: tidy.c 项目: CooCoooo/php-src
static void tidy_add_default_properties(PHPTidyObj *obj, tidy_obj_type type)
{

	TidyBuffer buf;
	TidyAttr	tempattr;
	TidyNode	tempnode;
	zval attribute, children, temp;
	PHPTidyObj *newobj;

	switch(type) {

		case is_node:
			if (!obj->std.properties) {
				rebuild_object_properties(&obj->std);
			}
			tidyBufInit(&buf);
			tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
			ADD_PROPERTY_STRINGL(obj->std.properties, value, buf.bp, buf.size ? buf.size-1 : 0);
			tidyBufFree(&buf);

			ADD_PROPERTY_STRING(obj->std.properties, name, tidyNodeGetName(obj->node));
			ADD_PROPERTY_LONG(obj->std.properties, type, tidyNodeGetType(obj->node));
			ADD_PROPERTY_LONG(obj->std.properties, line, tidyNodeLine(obj->node));
			ADD_PROPERTY_LONG(obj->std.properties, column, tidyNodeColumn(obj->node));
			ADD_PROPERTY_BOOL(obj->std.properties, proprietary, tidyNodeIsProp(obj->ptdoc->doc, obj->node));

			switch(tidyNodeGetType(obj->node)) {
				case TidyNode_Root:
				case TidyNode_DocType:
				case TidyNode_Text:
				case TidyNode_Comment:
					break;

				default:
					ADD_PROPERTY_LONG(obj->std.properties, id, tidyNodeGetId(obj->node));
			}

			tempattr = tidyAttrFirst(obj->node);

			if (tempattr) {
				char *name, *val;
				array_init(&attribute);

				do {
					name = (char *)tidyAttrName(tempattr);
					val = (char *)tidyAttrValue(tempattr);
					if (name && val) {
						add_assoc_string(&attribute, name, val);
					}
				} while((tempattr = tidyAttrNext(tempattr)));
			} else {
				ZVAL_NULL(&attribute);
			}
			zend_hash_str_update(obj->std.properties, "attribute", sizeof("attribute") - 1, &attribute);

			tempnode = tidyGetChild(obj->node);

			if (tempnode) {
				array_init(&children);
				do {
					tidy_instanciate(tidy_ce_node, &temp);
					newobj = Z_TIDY_P(&temp);
					newobj->node = tempnode;
					newobj->type = is_node;
					newobj->ptdoc = obj->ptdoc;
					newobj->ptdoc->ref_count++;

					tidy_add_default_properties(newobj, is_node);
					add_next_index_zval(&children, &temp);

				} while((tempnode = tidyGetNext(tempnode)));

			} else {
				ZVAL_NULL(&children);
			}

			zend_hash_str_update(obj->std.properties, "child", sizeof("child") - 1, &children);

			break;

		case is_doc:
			if (!obj->std.properties) {
				rebuild_object_properties(&obj->std);
			}
			ADD_PROPERTY_NULL(obj->std.properties, errorBuffer);
			ADD_PROPERTY_NULL(obj->std.properties, value);
			break;
	}
}
示例#9
0
文件: parse.c 项目: ASpade/mulk
static void parse_html(TidyDoc tdoc, TidyNode tnod, const url_list_t *elem, int indent, FILE *outfile)
{
	TidyNode child;
	TidyAttr attr;
	TidyAttrId attr_id = TidyAttr_UNKNOWN;
	TidyNodeType node_type;
	TidyTagId node_id;
	ctmbstr name;
	char *url, *relative_url = NULL;
	int found = 0;
	int get_html_link = (!option_values.depth || elem->level < option_values.depth);
	int get_int_html_link = (!option_values.depth || elem->level < option_values.depth+1);
	int get_ext_depends = ((!option_values.depth || elem->level < option_values.depth+1)
		&& !option_values.no_html_dependencies);

	for (child = tidyGetChild(tnod); child; child = tidyGetNext(child)) {
		node_type = tidyNodeGetType(child);

		switch (node_type) {
			case TidyNode_Start:
			case TidyNode_StartEnd:
				node_id = tidyNodeGetId(child);
				if (get_html_link && (node_id == TidyTag_A || node_id == TidyTag_AREA || node_id == TidyTag_MAP)) {
					found = 1;
					attr_id = TidyAttr_HREF;
				}
				else if (get_int_html_link && (node_id == TidyTag_FRAME || node_id == TidyTag_IFRAME)) {
					found = 1;
					attr_id = TidyAttr_SRC; 
				}
				else if (get_ext_depends) {
					if (node_id == TidyTag_LINK) {
						found = 1;
						attr_id = TidyAttr_HREF;
					}
					else if (node_id == TidyTag_IMG || node_id == TidyTag_SCRIPT) {
						found = 1;
						attr_id = TidyAttr_SRC; 
					}
					else {
						found = 0;
						attr_id = TidyAttr_UNKNOWN;
					}
				}
				else {
					found = 0;
					attr_id = TidyAttr_UNKNOWN;
				}

				if (found && (attr = tidyAttrGetById(child, attr_id)) != NULL) {
					url = (char *) tidyAttrValue(attr);

					string_free(relative_url);
					if (url && *url)
						add_new_url_and_check(elem, url, outfile ? &relative_url : NULL);
				}

				if (outfile && (name = tidyNodeGetName(child)) != NULL) {
					fprintf(outfile, "%*.*s%s", indent, indent, "<", name);
					for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) {
						fprintf(outfile, " %s", tidyAttrName(attr));
						if (relative_url && (tidyAttrGetId(attr) == attr_id))
							fprintf(outfile, "=\"%s\"", relative_url);
						else if (tidyAttrValue(attr))
							fprintf(outfile, "=\"%s\"", tidyAttrValue(attr) ? tidyAttrValue(attr) : "");
						else
							fprintf(outfile, "=\"\"");
					}
					string_free(relative_url);

					if (node_type == TidyNode_StartEnd)
						fprintf(outfile, "/>\n");
					else {
						fprintf(outfile, ">\n");
						parse_html(tdoc, child, elem, indent + 1, outfile);
						fprintf(outfile, "%*.*s%s>\n", indent + 1, indent + 1, "</", name);
					}
				}
				else {
					string_free(relative_url);
					parse_html(tdoc, child, elem, indent + 1, outfile);
				}
				break;
			case TidyNode_End:
				if (outfile) {
					if ((name = tidyNodeGetName(child)) != NULL)
						fprintf(outfile, "%*.*s/%s>\n", indent, indent, "<", name);
				}
				break;
			case TidyNode_Text:
				if (outfile) {
					TidyBuffer buf;
					TidyTagId parent_node_id = tidyNodeGetId(tnod);

					tidyBufInit(&buf);
					if (parent_node_id == TidyTag_SCRIPT || parent_node_id == TidyTag_STYLE)
						tidyNodeGetValue(tdoc, child, &buf);
					else
						tidyNodeGetText(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "%s", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
			case TidyNode_Comment:
				if (outfile) {
					TidyBuffer buf;

					tidyBufInit(&buf);
					tidyNodeGetValue(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "<!--%s-->\n", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
			case TidyNode_CDATA:
				if (outfile) {
					TidyBuffer buf;

					tidyBufInit(&buf);
					tidyNodeGetValue(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "<![CDATA[%s]]>\n", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
			case TidyNode_DocType:
				if (outfile) {
					int pub = 0;

					fprintf(outfile, "<!DOCTYPE %s", tidyNodeGetName(child));
					for (attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr)) {
						if (!pub) {
							fprintf(outfile, " %s", tidyAttrName(attr));
							if (!string_casecmp(tidyAttrName(attr), "PUBLIC"))
								pub = 1;
						}
						if (tidyAttrValue(attr))
							fprintf(outfile, " \"%s\"", tidyAttrValue(attr));
					}
					fprintf(outfile, ">\n");
				}
				break;
			default:
				if (outfile) {
					TidyBuffer buf;

					tidyBufInit(&buf);
					tidyNodeGetValue(tdoc, child, &buf);
					if (buf.bp)
						fprintf(outfile, "%s", (char *)buf.bp);
					tidyBufFree(&buf);
				}
				break;
		}
	}
}