void htmlcxx::HTML::ParserSax::parseHtmlTag(_Iterator b, _Iterator c)
{
	_Iterator name_begin(b);  //申请一个新的游标name_begin游标,初始为b
	++name_begin;//将name_begin后移一位
	
	bool is_end_tag = (*name_begin == '/'); //判断name_begin是否是’/’,如果是则说明是结束型tag标签
	if (is_end_tag) ++name_begin;//如果是结束型的tag标签,则再将name_begin前进一位,跳过’/’符号

	_Iterator name_end(name_begin); //申请一个新的name_end游标,记录一下name_begin标签。
	while (name_end != c && isalnum(*name_end)) //将name_end后移,直到遇到了文档末尾或者普通字符(字母或者数字)为止。
	{
		++name_end;
	}

	std::string name(name_begin, name_end);//取name_begin和name_end之间的这段文本认为是tagName并赋值给一个string型的变量name
	//DEBUGP("Found %s tag %s\n", is_end_tag ? "closing" : "opening", name.c_str());

	/*
		之后,我们要对这个Name做一些判断,判断其是否是特殊类型的tag标签,
		即是否是前面literal_mode_elem[]数组中的某个标签。
	*/
	if (!is_end_tag)  //首先得判断这不是一个结束型的标签
	{
		std::string::size_type tag_len = name.length();
		for (int i = 0; literal_mode_elem[i].len; ++i)
		{
			if (tag_len == literal_mode_elem[i].len)
			{
                #if defined(WIN32) && !defined(__MINGW32__)
				if (!_stricmp(name.c_str(), literal_mode_elem[i].str))
				#else
				if (!strcmp(name.c_str(), literal_mode_elem[i].str)) //--modified by wujun 2013.5.7 --orgi: [ if (!strcasecmp(name.c_str(), literal_mode_elem[i].str)) ]
				#endif
				{
					mpLiteral = literal_mode_elem[i].str;
					break;
				}
			}
		}
	} 
	
	
	//之后,我们就可以将其作为Node节点来做一些存储的工作了。
	htmlcxx::HTML::Node tag_node;
	//by now, length is just the size of the tag
	std::string text(b, c);
	tag_node.length(static_cast<unsigned int>(text.length()));
	tag_node.tagName(name);
	tag_node.text(text);
	tag_node.offset(mCurrentOffset);
	tag_node.isTag(true);
	tag_node.isComment(false);

	mCurrentOffset += tag_node.length();//将当前的解析位置后移

	this->foundTag(tag_node, is_end_tag);//调用方法将其加入到已有的Html树中
}
Esempio n. 2
0
int
sm_marid_scan_expression(
	char const		**s_inout,
	char const		*e,
	sm_marid_expression	*expr_out)
{
	char const		*s, *x;

	s = *s_inout;

	/* skip leading spaces. */
	while (s < e && *s == ' ')
		s++;

	/* fast forward to this term's end. */
	for (x = s; x < e && *x != ' '; x++)
		;

	if (s < e && SM_MARID_IS_DIRECTIVE_PREFIX(*s))
		expr_out->smx_prefix = *s++;
	else
	{
		char const *p;

		/* This might be a modifier, not a mechanism.  Check. */
		if ((p = name_end(s, e)) != NULL && p < e && *p == '=')
		{
			expr_out->smx_type 	= SM_MARID_MODIFIER;
			expr_out->smx_name_s 	= s;
			expr_out->smx_name_e 	= p;
			expr_out->smx_value_s 	= p + 1;
			expr_out->smx_value_e 	= x;

			while (x < e && *x == ' ')
				x++;
			*s_inout = x;
			return 0;
		}

		/* The default prefix value is + */
		expr_out->smx_prefix = '+';
	}
	expr_out->smx_type = SM_MARID_DIRECTIVE;

	expr_out->smx_name_s = s;
	expr_out->smx_name_e = s = name_end(s, e);
	expr_out->smx_value_s = NULL;
	expr_out->smx_value_e = NULL;
	expr_out->smx_cidr_s = NULL;
	expr_out->smx_cidr_e = NULL;

	if (s == NULL)
	{
		*s_inout = e;
		return -1;
	}

	if (s < e && *s == ':')
	{
		expr_out->smx_value_s = s + 1;
		expr_out->smx_value_e = x;
	}
	else
	{
		if (s < x)
		{
			if (*s != '/')
			{
				*s_inout = e;
				return -1;
			}
			expr_out->smx_cidr_s = s;
			for (;;)
			{
				s++;
				if (s >= x || !SM_MARID_IS_DIGIT(*s))
				{
					*s_inout = e;
					return -1;
				}
				while (s < x && SM_MARID_IS_DIGIT(*s))
					s++;
				if (s == x)
					break;
				if (*s != '/')
				{
					*s_inout = e;
					return -1;
				}
			}
			expr_out->smx_cidr_e = s;
			s = x;
		}
	}

	while (x < e && *x == ' ')
		x++;
	*s_inout = x;
	return 0;
}