コード例 #1
0
ファイル: gumbo.c プロジェクト: colin3dmax/lua-gumbo
static void add_tag(lua_State *L, const GumboElement *element) {
    if (element->tag_namespace == GUMBO_NAMESPACE_SVG) {
        GumboStringPiece original_tag = element->original_tag;
        gumbo_tag_from_original_text(&original_tag);
        const char *normalized = gumbo_normalize_svg_tagname(&original_tag);
        if (normalized) {
            add_string(L, "tag", normalized);
            return;
        }
    }
    if (element->tag == GUMBO_TAG_UNKNOWN) {
        GumboStringPiece original_tag = element->original_tag;
        gumbo_tag_from_original_text(&original_tag);
        luaL_Buffer b;
        luaL_buffinit(L, &b);
        for (size_t i = 0, n = original_tag.length; i < n; i++) {
            const char c = original_tag.data[i];
            luaL_addchar(&b, (c <= 'Z' && c >= 'A') ? c + 32 : c);
        }
        luaL_pushresult(&b);
    } else {
        lua_pushstring(L, gumbo_normalized_tagname(element->tag));
    }
    lua_setfield(L, -2, "tag");
}
コード例 #2
0
ファイル: Node.cpp プロジェクト: iwangxl/gumbo-query
std::string CNode::tag()
{
	if (mpNode->type != GUMBO_NODE_ELEMENT)
	{
		return "";
	}

	return gumbo_normalized_tagname(mpNode->v.element.tag);
}
コード例 #3
0
ファイル: error.c プロジェクト: CedarLogic/Sigil
static void print_tag_stack(const GumboParserError* error, GumboStringBuffer* output) {
  print_message(output, "  Currently open tags: ");
  for (int i = 0; i < error->tag_stack.length; ++i) {
    if (i) {
      print_message(output, ", ");
    }
    GumboTag tag = (GumboTag) error->tag_stack.data[i];
    print_message(output, gumbo_normalized_tagname(tag));
  }
  gumbo_string_buffer_append_codepoint('.', output);
}
コード例 #4
0
ファイル: error.c プロジェクト: hml1006/gumbo-parser
static void print_tag_stack(GumboParser* parser, const GumboParserError* error,
    GumboStringBuffer* output) {
  print_message(parser, output, "  Currently open tags: ");
  for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
    if (i) {
      print_message(parser, output, ", ");
    }
	unsigned int tmp = (unsigned int) (error->tag_stack.data[i]);
	GumboTag tag = (GumboTag)tmp;
    print_message(parser, output, gumbo_normalized_tagname(tag));
  }
  gumbo_string_buffer_append_codepoint(parser, '.', output);
}
コード例 #5
0
	void parseForumPostText(GumboNode* node)
	{
		assert(node->type == GUMBO_NODE_ELEMENT);
		assert(node->v.element.tag == GUMBO_TAG_DIV);

		GumboAttribute* classAttr = gumbo_get_attribute(&node->v.element.attributes, "class");
		assert(classAttr != NULL);
		assert(strCiCmp(classAttr->value, "forum-post-text"));

		GumboAttribute* idAttr = gumbo_get_attribute(&node->v.element.attributes, "id");
		assert(idAttr != NULL);
		std::string idStr(idAttr->value);
		std::size_t messageTextStrIndex = idStr.find("message_text_");
		assert(messageTextStrIndex != std::string::npos);

		std::string messageIdStr = idStr.substr(13);
		std::cout << "Forum post found, id: " << messageIdStr << std::endl;

		// FIXME: remove after debug completion
		if (!strCiCmp(messageIdStr, "4453758"))return;

		std::string forumPostText = "";
		GumboVector* nodeChildren = &node->v.element.children;
		for (std::size_t i = 0; i < nodeChildren->length; i++)
		{
			GumboNode* childNode = static_cast<GumboNode*>(nodeChildren->data[i]);
			assert(childNode != NULL);
			if (childNode->type == GUMBO_NODE_ELEMENT)
			{
				// FIXME: parse message quotes (<table> tags)

				std::cout << "Tag found: " << gumbo_normalized_tagname(childNode->v.element.tag);
				std::cout << ", children: " << childNode->v.element.children.length << std::endl;
				//
			}
			else if (childNode->type == GUMBO_NODE_TEXT)
			{
				std::string elementTextUtf8(childNode->v.text.text);
				std::string elementTextCp1251 = "";
				utf8ToCp1251(elementTextCp1251, elementTextUtf8);
				forumPostText += elementTextCp1251;
//				std::cout << "Forum message text: " << elementTextCp1251 << std::endl;
			}
			else std::cout << "Ignoring \"" << gumboElementTypeToString(childNode->type) << "\" node..." << std::endl;

//			classAttr = gumbo_get_attribute(&childNode->v.element.attributes, "class");
		}

		std::cout << "Forum post: " << std::endl << forumPostText << std::endl;
	}
コード例 #6
0
ファイル: gmimex.c プロジェクト: swerter/gmimex
static GString *get_tag_name(GumboNode *node) {
  // work around lack of proper name for document node
  if (node->type == GUMBO_NODE_DOCUMENT)
    return g_string_new("document");

  const gchar *n_tagname = gumbo_normalized_tagname(node->v.element.tag);
  GString *tagname = g_string_new(n_tagname);

  if (!tagname->len) {
    g_string_free(tagname, TRUE);
    return handle_unknown_tag(&node->v.element.original_tag);
  }

  return tagname;
}
コード例 #7
0
ファイル: es_gumbo.c プロジェクト: Cy-4AH/showtime
static int
es_gumbo_node_name(duk_context *ctx)
{
  es_gumbo_node_t *egn = es_get_native_obj(ctx, 0, &es_native_gumbo_node);
  const GumboNode *node = egn->node;
  switch(node->type) {
  case GUMBO_NODE_DOCUMENT:
    duk_push_string(ctx, node->v.document.name);
    break;
  case GUMBO_NODE_ELEMENT:
    duk_push_string(ctx, gumbo_normalized_tagname(node->v.element.tag));
    break;
  default:
    duk_push_string(ctx, node->v.text.text);
    break;
  }

  return 1;
}
コード例 #8
0
static const void findTag(const GumboNode* root, int layer){ //layer代表层数
	//用于递归结束条件
	if(root->type != GUMBO_NODE_ELEMENT){
		return;
	}
	//while的功能是缩进格式
	int indent = layer;
	while(indent--){
		printf("  ");
	}
	//获取标签名
	const char* tag_name = gumbo_normalized_tagname(root->v.element.tag);
	printf("%d--------%s\n", layer, tag_name);
	//
	const GumboVector* rchildren = &root->v.element.children;
	for(unsigned int i = 0; i < rchildren->length; ++i){
		GumboNode* child =  rchildren->data[i];
		findTag(child, layer+1);	
	}
}
コード例 #9
0
ファイル: parse.c プロジェクト: craigbarnes/lua-gumbo
static void set_tag(lua_State *L, const GumboElement *element) {
    if (element->tag_namespace == GUMBO_NAMESPACE_SVG) {
        set_literal(L, "namespace", "svg");
        GumboStringPiece original_tag = element->original_tag;
        gumbo_tag_from_original_text(&original_tag);
        const char *normalized = gumbo_normalize_svg_tagname(&original_tag);
        if (normalized) {
            set_string(L, "localName", normalized);
            return;
        }
    } else if (element->tag_namespace == GUMBO_NAMESPACE_MATHML) {
        set_literal(L, "namespace", "math");
    }
    if (element->tag == GUMBO_TAG_UNKNOWN) {
        GumboStringPiece original_tag = element->original_tag;
        gumbo_tag_from_original_text(&original_tag);
        pushstring_lower(L, original_tag.data, original_tag.length);
    } else {
        lua_pushstring(L, gumbo_normalized_tagname(element->tag));
    }
    lua_setfield(L, -2, "localName");
}
コード例 #10
0
ファイル: GumboInterface.cpp プロジェクト: CedarLogic/Sigil
std::string GumboInterface::get_tag_name(GumboNode *node)
{
  std::string tagname;
  if (node->type == GUMBO_NODE_DOCUMENT) {
    tagname = "#document";
    return tagname;
  } else if ((node->type == GUMBO_NODE_TEXT) || (node->type == GUMBO_NODE_WHITESPACE)) {
    tagname = "#text";
    return tagname;
  } else if (node->type == GUMBO_NODE_CDATA) {
    tagname = "#cdata";
    return tagname;
  }
  tagname = gumbo_normalized_tagname(node->v.element.tag);
  if ((tagname.empty()) ||
      (node->v.element.tag_namespace == GUMBO_NAMESPACE_SVG)) {

    // set up to examine original text of tag.
    GumboStringPiece gsp = node->v.element.original_tag;
    gumbo_tag_from_original_text(&gsp);

    // special handling for some svg tag names.
    if (node->v.element.tag_namespace  == GUMBO_NAMESPACE_SVG) {
      const char * data = gumbo_normalize_svg_tagname(&gsp);
      // NOTE: data may not be null-terminated!
      // since case change only - length must be same as original
      // if no replacement found returns null, not original tag!
      if (data != NULL) {
        return std::string(data, gsp.length);
      }
    }
    if (tagname.empty()) {
      return std::string(gsp.data, gsp.length);
    }
  }
  return tagname;
}
コード例 #11
0
// TODO: case-insensitive comparison
static void searchForDivBlocks(GumboNode* node)
{
	if (node->type != GUMBO_NODE_ELEMENT) return;

	// <div class="forum-post-entry">
	GumboAttribute* classAttr = NULL;
	if ((node->v.element.tag == GUMBO_TAG_DIV) && (classAttr = gumbo_get_attribute(&node->v.element.attributes, "class")))
	{
		std::string classAttrStr(classAttr->value);
		if (classAttrStr == "forum-post-entry" )
		{
			// <div class="forum-post-text" id="message_text_4453758">
			GumboNode* fpeNode = node;

			ForumPageParser fpp;
			fpp.parseForumPostEntry(fpeNode);

			for (unsigned int i = 0; i < fpeNode->v.element.children.length; ++i)
			{
#if 0
				GumboNode* fpeChildNode = static_cast< GumboNode* >(fpeNode->v.element.children.data[i]);
				if (fpeChildNode->type != GUMBO_NODE_ELEMENT) continue;
				assert(fpeChildNode->v.element.tag == GUMBO_TAG_DIV);
				if (fpeChildNode->v.element.tag != GUMBO_TAG_DIV) continue;

				std::cout << "    FPE children tags:" << gumbo_normalized_tagname(fpeChildNode->v.element.tag) << std::endl;
				for (int iAttr = 0; iAttr < fpeChildNode->v.element.attributes.length;++iAttr)
				{
					GumboAttribute* attr = static_cast< GumboAttribute* >( fpeChildNode->v.element.attributes.data[iAttr] );
					assert(attr != NULL);
					std::cout << "ATTR: " << attr->name << " = " << attr->value << std::endl;
				}

				if (fpeChildNode->v.element.tag == GUMBO_TAG_DIV)
				{
					GumboAttribute* fpeChildNodeClassAttr = gumbo_get_attribute(&fpeChildNode->v.element.attributes, "class");
					assert(fpeChildNodeClassAttr != NULL);
					std::string fpeChildNodeClassAttrStr(fpeChildNodeClassAttr->value);
					if (fpeChildNodeClassAttrStr == "forum-post-text")
					{
						GumboNode* fptNode = fpeChildNode;
						assert(fptNode->type == GUMBO_NODE_ELEMENT);
//                       qDebug() << "  forum-post-text children count: " << fptNode->v.element.children.length;

						GumboAttribute* fptNodeIdAttr = gumbo_get_attribute(&fptNode->v.element.attributes, "id");
						assert(fptNodeIdAttr != NULL);

						//std::cout << "    Forum post found: " << fptNodeIdAttr->value << std::endl;

						// FIXME: remove after debugging
						if (std::strcmp(fptNodeIdAttr->value, "message_text_4453758") != 0) continue;

						GumboVector* fptNodeChildren = &fptNode->v.element.children;
						for (unsigned int j = 0; j < fptNodeChildren->length; ++j)
						{
							GumboNode* fptNodeChild = static_cast< GumboNode* >(fptNodeChildren->data[j]);
							std::cout << "Element type: " << gumboElementTypeToString( fptNodeChild->type ) << std::endl;

							if (fptNodeChild->type == GUMBO_NODE_TEXT)
							{
								/*std::locale loc(std::locale(), new std::codecvt_utf8<char>);
								std::cout.imbue(loc);*/
								std::string TEST(fptNodeChild->v.text.text);

								WCHAR wBuf[1024] = { 0 };
								int res2 = MultiByteToWideChar(CP_UTF8, 0, fptNodeChild->v.text.text, -1, wBuf, 1024);
								char mbcsBuf[1024] = { 0 };
								int res3 = WideCharToMultiByte(1251, 0, wBuf, 1024, mbcsBuf, 1024, NULL, NULL);
								std::string TEST_2(mbcsBuf);
								std::cout << "Element text: " << TEST_2 << std::endl;

								break;
							}
							else if (fptNodeChild->type == GUMBO_NODE_ELEMENT)
							{
								std::cout << "    " << gumbo_normalized_tagname(fptNodeChild->v.element.tag) << std::endl;
								//qDebug() << "     " << gumbo_tag_from_original_text( &fptNodeChild->v.element.original_tag );
							}
						}
					}
				}
#endif
			}

			std::cout << "----------------------------" << std::endl;
		}
	}

	GumboVector* children = &node->v.element.children;
	for (unsigned int i = 0; i < children->length; ++i)
	{
		searchForDivBlocks(static_cast< GumboNode* >(children->data[i]));
	}
}
コード例 #12
0
ファイル: Node.cpp プロジェクト: zacharygrafton/gumbopp
string_view Node::GetElement() const {
  if(impl->data->type == GUMBO_NODE_ELEMENT)
    return string_view { gumbo_normalized_tagname(impl->data->v.element.tag) };

  throw NotAnElementException();
}