local mod_pointer prefix_lookup (char* name,int l) /* finds module name given a prefix */ { mod_pointer p=root,* loc=&root; /* current node and where it comes from */ mod_pointer match=NULL; /* the first matching node, if any */ mod_pointer saved=NULL; /* another subtree that might have matches */ while (p!=NULL) { int l0=p->key_length; char* key=name_begin(p); switch (mod_name_cmp(name,l,key,l0)) { case less: p=*(loc=&p->llink); break; case greater: p=*(loc=&p->rlink); break; case equal: return p; /* a match, and no other matches are possible */ case extension: { enum mod_comparison cmp= mod_name_cmp(name+l0,l-l0,key+l0,(int)strlen(key+l0)); switch(cmp) { case less: case greater: err_print("! Incompatible module name"); print("\nName inconsistently extends <%.*s...>.\n",l0,key); return NULL; case prefix: case equal: return p; case extension: if (complete_name(p)) { err_print("! Incompatible module name"); print("\nPrefix exists: <%s>.\n",key); return NULL; } { free(key-1); if ((key=(char*)malloc(l+2))==NULL) fatal("Out of dynamic memory!"); *key++='\1'; /* ensure that |complete_name(p)| is false afterwards */ strncpy(key,name,l); key[l]='\0'; /* store the incomplete name */ name_begin(p)=key; /* install new name in node |p| */ } return p; } } case prefix: if (match!=NULL) { err_print("! Ambiguous prefix"); return NULL; } match=p; saved=p->rlink; p=p->llink; /* |loc| is irrelevant now */ } if (p==NULL && match!=NULL) p=saved, saved=NULL; /* search other subtree */ } if (match==NULL) { char* key=(char*)malloc(l+2); if (key==NULL) fatal("Out of dynamic memory!"); *key++='\1'; /* ensure that |complete_name(p)| is false afterwards */ strncpy(key,name,l); key[l]='\0'; /* store the incomplete name */ (p=make_mod_node(key))->key_length=l; /* prepare new node */ return *loc=p; /* install new node into tree */ } match->key_length=l; /* |name| is a shorter prefix than used before */ return match; }
local mod_pointer make_mod_node (char* name) { mod_pointer node=mod_ptr; /* allocate new node */ if (mod_ptr++>=mod_table_end) overflow ("module name"); name_begin(node)=name; node->llink=NULL; node->rlink=NULL; init_module_name(node); /* initialise new node */ return node; }
local mod_pointer mod_name_lookup (char* name, int l) /* finds complete module name */ { mod_pointer p; /* current node of the search tree */ mod_pointer* loc=&root; /* |p| will come from this location */ while ((p=*loc)!=NULL) { int l0=p->key_length; char* key=name_begin(p); switch (mod_name_cmp(name,l,key,l0)) { case less: loc=&p->llink; break; case greater: loc=&p->rlink; break; case equal: case extension: { enum mod_comparison cmp= mod_name_cmp(name+l0,l-l0,key+l0,(int)strlen(key+l0)); switch(cmp) { case less: case greater: err_print("! Incompatible module name"); print("\nName inconsistently extends <%.*s...>.\n",l0,key); return NULL; case extension: case equal: if (complete_name(p)) if (cmp==equal) return p; else { err_print("! Incompatible module name"); print("\nPrefix exists: <%s>.\n",key); return NULL; } name_begin(p)=store_string(name,l); /* install |name| in place of |key| */ free(key-1); return p; } } case prefix: err_print("! Incompatible module name"); print("\nName is a prefix of <%s%s>.\n" ,key, complete_name(p) ? "" : "..."); return NULL; /* dummy module name */ } } { (p=make_mod_node(store_string(name,l)))->key_length=l; /* prepare new node */ return *loc=p; /* install new node into tree */ } }
void htmlcxx::HTML::ParserSax::parseHtmlTag(_Iterator b, _Iterator c) { _Iterator name_begin(b); //申请一个新的游标name_begin游标,初始为b ++name_begin;//将name_begin后移一位 bool is_end_tag = (*name_begin == '/'); //判断name_begin是否是’/’,如果是则说明是结束型tag标签 if (is_end_tag) ++name_begin;//如果是结束型的tag标签,则再将name_begin前进一位,跳过’/’符号 _Iterator name_end(name_begin); //申请一个新的name_end游标,记录一下name_begin标签。 while (name_end != c && isalnum(*name_end)) //将name_end后移,直到遇到了文档末尾或者普通字符(字母或者数字)为止。 { ++name_end; } std::string name(name_begin, name_end);//取name_begin和name_end之间的这段文本认为是tagName并赋值给一个string型的变量name //DEBUGP("Found %s tag %s\n", is_end_tag ? "closing" : "opening", name.c_str()); /* 之后,我们要对这个Name做一些判断,判断其是否是特殊类型的tag标签, 即是否是前面literal_mode_elem[]数组中的某个标签。 */ if (!is_end_tag) //首先得判断这不是一个结束型的标签 { std::string::size_type tag_len = name.length(); for (int i = 0; literal_mode_elem[i].len; ++i) { if (tag_len == literal_mode_elem[i].len) { #if defined(WIN32) && !defined(__MINGW32__) if (!_stricmp(name.c_str(), literal_mode_elem[i].str)) #else if (!strcmp(name.c_str(), literal_mode_elem[i].str)) //--modified by wujun 2013.5.7 --orgi: [ if (!strcasecmp(name.c_str(), literal_mode_elem[i].str)) ] #endif { mpLiteral = literal_mode_elem[i].str; break; } } } } //之后,我们就可以将其作为Node节点来做一些存储的工作了。 htmlcxx::HTML::Node tag_node; //by now, length is just the size of the tag std::string text(b, c); tag_node.length(static_cast<unsigned int>(text.length())); tag_node.tagName(name); tag_node.text(text); tag_node.offset(mCurrentOffset); tag_node.isTag(true); tag_node.isComment(false); mCurrentOffset += tag_node.length();//将当前的解析位置后移 this->foundTag(tag_node, is_end_tag);//调用方法将其加入到已有的Html树中 }
id_pointer id_lookup (char* first,char* last,int ilk) /* look up an identifier */ { int l,h; /* length and hash code of the given identifier */ if (last==NULL) last=first+(l=(int)strlen(first)); /* null-terminated string */ else l=(int)(last-first); /* compute the length */ { char* p=first; h=*p; while (++p<last) h=((h<<1)+*p)%hash_size; } { id_pointer p=hash[h]; /* the head of the hash list */ while (p!=NULL && !names_match(p,first,l,ilk)) p=p->hash_link; if (p==NULL) /* we haven't seen this identifier before */ { p=id_ptr; /* this is where the new name entry will be created */ if (id_ptr++>=id_table_end) overflow ("identifier"); name_begin(p)=store_string(first,l); if (program==cweave) init_id_name(p,ilk); p->hash_link=hash[h]; hash[h]=p; /* insert |p| at beginning of hash list */ } return p; } }