void htmlcxx::HTML::ParserSax::parseHtmlTag(_Iterator b, _Iterator c) { _Iterator name_begin(b); //申请一个新的游标name_begin游标,初始为b ++name_begin;//将name_begin后移一位 bool is_end_tag = (*name_begin == '/'); //判断name_begin是否是’/’,如果是则说明是结束型tag标签 if (is_end_tag) ++name_begin;//如果是结束型的tag标签,则再将name_begin前进一位,跳过’/’符号 _Iterator name_end(name_begin); //申请一个新的name_end游标,记录一下name_begin标签。 while (name_end != c && isalnum(*name_end)) //将name_end后移,直到遇到了文档末尾或者普通字符(字母或者数字)为止。 { ++name_end; } std::string name(name_begin, name_end);//取name_begin和name_end之间的这段文本认为是tagName并赋值给一个string型的变量name //DEBUGP("Found %s tag %s\n", is_end_tag ? "closing" : "opening", name.c_str()); /* 之后,我们要对这个Name做一些判断,判断其是否是特殊类型的tag标签, 即是否是前面literal_mode_elem[]数组中的某个标签。 */ if (!is_end_tag) //首先得判断这不是一个结束型的标签 { std::string::size_type tag_len = name.length(); for (int i = 0; literal_mode_elem[i].len; ++i) { if (tag_len == literal_mode_elem[i].len) { #if defined(WIN32) && !defined(__MINGW32__) if (!_stricmp(name.c_str(), literal_mode_elem[i].str)) #else if (!strcmp(name.c_str(), literal_mode_elem[i].str)) //--modified by wujun 2013.5.7 --orgi: [ if (!strcasecmp(name.c_str(), literal_mode_elem[i].str)) ] #endif { mpLiteral = literal_mode_elem[i].str; break; } } } } //之后,我们就可以将其作为Node节点来做一些存储的工作了。 htmlcxx::HTML::Node tag_node; //by now, length is just the size of the tag std::string text(b, c); tag_node.length(static_cast<unsigned int>(text.length())); tag_node.tagName(name); tag_node.text(text); tag_node.offset(mCurrentOffset); tag_node.isTag(true); tag_node.isComment(false); mCurrentOffset += tag_node.length();//将当前的解析位置后移 this->foundTag(tag_node, is_end_tag);//调用方法将其加入到已有的Html树中 }
int sm_marid_scan_expression( char const **s_inout, char const *e, sm_marid_expression *expr_out) { char const *s, *x; s = *s_inout; /* skip leading spaces. */ while (s < e && *s == ' ') s++; /* fast forward to this term's end. */ for (x = s; x < e && *x != ' '; x++) ; if (s < e && SM_MARID_IS_DIRECTIVE_PREFIX(*s)) expr_out->smx_prefix = *s++; else { char const *p; /* This might be a modifier, not a mechanism. Check. */ if ((p = name_end(s, e)) != NULL && p < e && *p == '=') { expr_out->smx_type = SM_MARID_MODIFIER; expr_out->smx_name_s = s; expr_out->smx_name_e = p; expr_out->smx_value_s = p + 1; expr_out->smx_value_e = x; while (x < e && *x == ' ') x++; *s_inout = x; return 0; } /* The default prefix value is + */ expr_out->smx_prefix = '+'; } expr_out->smx_type = SM_MARID_DIRECTIVE; expr_out->smx_name_s = s; expr_out->smx_name_e = s = name_end(s, e); expr_out->smx_value_s = NULL; expr_out->smx_value_e = NULL; expr_out->smx_cidr_s = NULL; expr_out->smx_cidr_e = NULL; if (s == NULL) { *s_inout = e; return -1; } if (s < e && *s == ':') { expr_out->smx_value_s = s + 1; expr_out->smx_value_e = x; } else { if (s < x) { if (*s != '/') { *s_inout = e; return -1; } expr_out->smx_cidr_s = s; for (;;) { s++; if (s >= x || !SM_MARID_IS_DIGIT(*s)) { *s_inout = e; return -1; } while (s < x && SM_MARID_IS_DIGIT(*s)) s++; if (s == x) break; if (*s != '/') { *s_inout = e; return -1; } } expr_out->smx_cidr_e = s; s = x; } } while (x < e && *x == ' ') x++; *s_inout = x; return 0; }