//sets the data, param, and type for a node static int dtree_set_payload(dtree_dt_index *h,dtree_dt_node *n,void *data,flag_f type,void *param) { packed_ptr pp; dtree_dt_node *base=n; //dup if((type & n->flags)==type && data==n->payload && param==n->cparam) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: EOT: '%c' level: %d p(%p) pp(%p) param(%p)\n", n->data,dtree_node_depth(h,n),n,n->curr,param); if(n->flags && (h->sflags & DTREE_S_FLAG_DUPS)) { dtree_printd(DTREE_PRINT_INITDTREE,"ADD: DUPLICATE detected, adding\n"); pp=n->dup; //iterate thru the dups while(pp) { n=DTREE_DT_GETPP(h,pp); //dup if((type & n->flags)==type && data==n->payload && param==n->cparam) return 0; pp=n->dup; } pp=dtree_alloc_node(h); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } n->dup=pp; n=DTREE_DT_GETPP(h,pp); n->curr=base->curr; n->prev=base->prev; n->data=base->data; return dtree_set_payload(h,n,data,type,param); } else if(n->flags && !(h->sflags & DTREE_S_FLAG_DUPS)) dtree_printd(DTREE_PRINT_INITDTREE,"ADD: DUPLICATE detected, overwriting original\n"); n->flags=DTREE_DT_FLAG_TOKEN; n->flags|=type; n->payload=data; n->cparam=param; return 1; }
//adds an entry to the tree int dtree_add_entry(dtree_dt_index *h,const char *key,dtree_dt_add_entry *entry) { char namebuf[DTREE_DATA_BUFLEN+1]; packed_ptr pp; if(!key) return 0; *namebuf='\0'; namebuf[sizeof(namebuf)-1]='\0'; strncpy(namebuf+1,key,sizeof(namebuf)-1); if(namebuf[sizeof(namebuf)-1]) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD ENTRY: name '%s':%d\n",namebuf+1,entry->flags); if(!h->head) { pp=dtree_alloc_node(h); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } h->head=DTREE_DT_GETPP(h,pp); h->head->curr=pp; h->head->data=0; } if(!entry->flags) entry->flags=DTREE_DT_FLAG_STRONG; return dtree_add_node(h,h->head,namebuf+1,entry); }
//adds a node to the tree static int dtree_add_node(dtree_dt_index *h,dtree_dt_node *n,char *t,const dtree_dt_add_entry *entry) { int hash; char *s; packed_ptr pp; dtree_dt_node *next; if(!n) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: tree: '%c' level: %d t: '%s' p(%p) pp(%p)\n", n->data?n->data:'^',dtree_node_depth(h,n),t,n,n->curr); //escape if(h->sflags & DTREE_S_FLAG_REGEX && (*t==DTREE_PATTERN_ESCAPE) && *(t-1)!=DTREE_PATTERN_ESCAPE) t++; //(abc) while(h->sflags & DTREE_S_FLAG_REGEX && (*t==DTREE_PATTERN_GROUP_S || *t==DTREE_PATTERN_GROUP_E) && *(t-1)!=DTREE_PATTERN_ESCAPE) { hash=(*t==DTREE_PATTERN_GROUP_S); s=t+1; while(*s && hash) { if(*s==DTREE_PATTERN_GROUP_S && *(s-1)!=DTREE_PATTERN_ESCAPE) hash++; else if(*s==DTREE_PATTERN_GROUP_E && *(s-1)!=DTREE_PATTERN_ESCAPE) hash--; s++; } if(*t==DTREE_PATTERN_GROUP_S) { dtree_printd(DTREE_PRINT_INITDTREE,"ADD: group detected: '%c' - '%c'\n",*(t+1)?*(t+1):'#',*s?*s:'#'); //optional group if(*s==DTREE_PATTERN_OPTIONAL && *(s-1)!=DTREE_PATTERN_ESCAPE) { if(dtree_add_node(h,n,s+1,entry)<0) return -1; } } t++; } //? if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_OPTIONAL && *(t-1)!=DTREE_PATTERN_ESCAPE) { //no group if(*(t-1)!=DTREE_PATTERN_GROUP_E || *(t-2)==DTREE_PATTERN_ESCAPE) { pp=n->prev; next=DTREE_DT_GETPP(h,pp); if(!pp) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: optional: '%c' t: '%s'\n",next->data?next->data:'^',(t+1)); if(dtree_add_node(h,next,t+1,entry)<0) return -1; } t++; return dtree_add_node(h,n,t,entry); } //EOT if(!*t && n->prev) return dtree_set_payload(h,n,entry); //[abc] if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_SET_S && *(t-1)!=DTREE_PATTERN_ESCAPE) { for(s=t;*s;s++) { if(*s==DTREE_PATTERN_SET_E && *(s-1)!=DTREE_PATTERN_ESCAPE) break; } if(!*s) return 0; t++; while(t<s) { *s=*t; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: set: '%c' *t: '%c' t: '%s'\n",n->data?n->data:'^',*t,s); if(dtree_add_node(h,n,s,entry)<0) return -1; t++; } *s=DTREE_PATTERN_SET_E; return 1; } hash=dtree_hash_char(*t); if(!hash && *t!='0') return 0; if(*t>='A' && *t<='Z') *t|=0x20; //. if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_ANY && *(t-1)!=DTREE_PATTERN_ESCAPE) hash=DTREE_HASH_ANY; pp=(packed_ptr)n->nodes[hash]; next=DTREE_DT_GETPP(h,pp); if(!pp) { pp=dtree_alloc_node(h); next=DTREE_DT_GETPP(h,pp); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } if(hash==DTREE_HASH_ANY) next->data=DTREE_PATTERN_ANY; else next->data=*t; next->curr=pp; next->prev=n->curr; n->nodes[hash]=pp; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: new node '%c' created level: %d hash: %d p(%p) pp(%p)\n", *t,dtree_node_depth(h,n),hash,next,pp); } //EOT if(!*(t+1)) return dtree_set_payload(h,next,entry); return dtree_add_node(h,next,t+1,entry); }
//searches for a token with regex static const dtree_dt_node *dtree_search_node(const dtree_dt_index *h,const dtree_dt_node *n,const char *t,dtree_pos_f pos) { unsigned int hash; dtree_flag_f rf,nf; packed_ptr pp; const dtree_dt_node *rflag; if(!n || !n->curr) return NULL; //bad match if(n->data!=*t && n->data!=(*t|0x20) && n->data!=DTREE_PATTERN_ANY) return NULL; t++; hash=dtree_hash_char(*t); //EOS if(!hash && *t!='0') { //only quit if strong with position if(dtree_get_flag(h,n,DTREE_DT_FLAG_STRONG,pos)) return n; else if(!n->flags) return NULL; } pp=n->nodes[hash]; rflag=DTREE_DT_GETPP(h,pp); if(pp) rflag=dtree_search_node(h,rflag,t,pos); else rflag=NULL; //wildcard if(!rflag) { hash=DTREE_HASH_ANY; pp=n->nodes[hash]; rflag=DTREE_DT_GETPP(h,pp); if(pp) rflag=dtree_search_node(h,rflag,t,pos); else rflag=NULL; hash=dtree_hash_char(*t); } //n is lazy EOT or stronger match if((n->flags & DTREE_DT_FLAG_TOKEN) && ((!hash && *t!='0') || hash>=DTREE_HASH_SEP)) { nf=dtree_get_flags(h,n,pos); rf=dtree_get_flags(h,rflag,pos); if(!rflag) rflag=n; else if((nf & DTREE_DT_FLAG_STRONG) && !(rf & DTREE_DT_FLAG_STRONG)) rflag=n; else if(rf & DTREE_DT_FLAG_STRONG); else if((nf & DTREE_DT_FLAG_CHAIN) && !(nf & DTREE_DT_FLAG_BCHAIN) && !(rf & DTREE_DT_FLAG_CHAIN) && !(rf & DTREE_DT_FLAG_BCHAIN)) rflag=n; else if(rf & DTREE_DT_FLAG_CHAIN); else if((nf & DTREE_DT_FLAG_WEAK) && !(rf & DTREE_DT_FLAG_WEAK)) rflag=n; } return rflag; }
//gets the flag for a token const dtree_dt_node *dtree_get_node(const dtree_dt_index *h,const char *t,dtree_flag_f sflags,dtree_pos_f pos) { unsigned int hash; char n; const char *p; packed_ptr pp; const dtree_dt_node *base; const dtree_dt_node *lflag=NULL; if(!sflags) sflags=h->sflags; if(sflags & DTREE_S_FLAG_REGEX) { pp=h->head->nodes[dtree_hash_char(*t)]; base=DTREE_DT_GETPP(h,pp); lflag=dtree_search_node(h,base,t,pos); //leading wildcard if(!lflag) { pp=h->head->nodes[DTREE_HASH_ANY]; base=DTREE_DT_GETPP(h,pp); if(pp) lflag=dtree_search_node(h,base,t,pos); } return lflag; } pp=h->head->nodes[dtree_hash_char(*t)]; base=DTREE_DT_GETPP(h,pp); //iterative search for(p=t;*p && pp;p++) { if(base->data!=*p && base->data!=(*p|0x20) && base->data!=DTREE_PATTERN_ANY) break; n=*(p+1); hash=dtree_hash_char(n); //token match if((!hash && n!='0') || hash>=DTREE_HASH_SEP) { //match is valid with position if(base->flags && (sflags & DTREE_S_FLAG_PARTIAL || !n) && (!base->pos || !pos || base->pos==pos)) return base; else if(!n) break; } pp=base->nodes[hash]; base=DTREE_DT_GETPP(h,pp); } return lflag; }
//sets the data, param, and type for a node static int dtree_set_payload(dtree_dt_index *h,dtree_dt_node *n,const dtree_dt_add_entry *entry) { packed_ptr pp; dtree_dt_node *base=n; if((entry->flags & n->flags)==entry->flags && entry->data==n->payload && entry->param==n->cparam && entry->pos==n->pos && entry->dir==n->dir) { dtree_printd(DTREE_PRINT_INITDTREE,"ADD: PURE DUPLICATE detected, skipping\n"); return 0; } dtree_printd(DTREE_PRINT_INITDTREE,"ADD: EOT: '%c' level: %d p(%p) pp(%p) param(%p) flags: %d\n", n->data?n->data:'^',dtree_node_depth(h,n),n,n->curr,entry->param,entry->flags); if(n->flags && (h->sflags & DTREE_S_FLAG_DUPS)) { dtree_printd(DTREE_PRINT_INITDTREE,"ADD: DUPLICATE detected, adding\n"); pp=n->dup; //iterate thru the dups while(pp) { n=DTREE_DT_GETPP(h,pp); //dup if((entry->flags & n->flags)==entry->flags && entry->data==n->payload && entry->param==n->cparam && entry->pos==n->pos && entry->dir==n->dir) return 0; pp=n->dup; } pp=dtree_alloc_node(h); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } n->dup=pp; n=DTREE_DT_GETPP(h,pp); n->curr=base->curr; n->prev=base->prev; n->data=base->data; return dtree_set_payload(h,n,entry); } else if(n->flags && !(h->sflags & DTREE_S_FLAG_DUPS)) dtree_printd(DTREE_PRINT_INITDTREE,"ADD: DUPLICATE detected, overwriting original\n"); n->flags=DTREE_DT_FLAG_TOKEN; n->flags|=entry->flags; n->payload=entry->data; n->cparam=entry->param; n->pos=entry->pos; n->rank=entry->rank; n->dir=entry->dir; return 1; }
//classifies a string const dclass_keyvalue *dclass_classify(const dclass_index *di,const char *str) { int on=0; int valid; int bcvalid; int i; char buf[DTREE_DATA_BUFLEN]; const char *p; const char *token=""; packed_ptr pp; const dtree_dt_node *wnode=NULL; const dtree_dt_node *nnode=NULL; const dtree_dt_node *fbnode; const dtree_dt_node *fnode; const dtree_dt_index *h=&di->dti; const void *cnodes[DTREE_S_MAX_CHAIN]={NULL}; if(!str || !h->head) return dclass_get_kverror(di); dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() UA: '%s'\n",str); for(p=str;*p;p++) { valid=0; if((*p>='a' && *p<='z') || (*p>='A' && *p<='Z') || (*p>='0' && *p<='9')) { //new token found if(!on) { token=p; on=1; } valid=1; } if((!valid || (!*(p+1))) && on) { //EOT found fbnode=dtree_get_node(h,token,0); dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() token:'%s' = '%s':%d\n", token,fbnode?dtree_node_path(h,fbnode,buf):"",fbnode?(int)fbnode->flags:0); if(fbnode && dtree_get_flag(h,fbnode,DTREE_DT_FLAG_TOKEN)) { if((fnode=dtree_get_flag(h,fbnode,DTREE_DT_FLAG_STRONG))) return fnode->payload; else if((fnode=dtree_get_flag(h,fbnode,DTREE_DT_FLAG_WEAK))) { i=DTREE_DC_DISTANCE(h,(char*)fnode->payload); if(!wnode || (i>=0 && i<DTREE_DC_DISTANCE(h,(char*)wnode->payload))) wnode=fnode; } else if((fnode=dtree_get_flag(h,fbnode,DTREE_DT_FLAG_NONE))) { i=DTREE_DC_DISTANCE(h,(char*)fnode->payload); if(!nnode || (i>=0 && i<DTREE_DC_DISTANCE(h,(char*)nnode->payload))) nnode=fnode; } if((fnode=dtree_get_flag(h,fbnode,DTREE_DT_FLAG_CHAIN))) { dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() pchain detected\n"); pp=fnode->curr; while(pp) { bcvalid=0; if(fnode->flags & DTREE_DT_FLAG_CHAIN && fnode->cparam) { dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() looking for pchain %p\n",fnode->cparam); for(i=0;i<DTREE_S_MAX_CHAIN && cnodes[i];i++) { //chain hit if(cnodes[i]==fnode->cparam) { if(fnode->flags & DTREE_DT_FLAG_BCHAIN) bcvalid=1; else return fnode->payload; } } } if(fnode->flags & DTREE_DT_FLAG_BCHAIN && (bcvalid || !fnode->cparam)) { for(i=0;i<DTREE_S_MAX_CHAIN;i++) { if(!cnodes[i]) { cnodes[i]=fnode->payload; dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() pchain added: %p('%d')\n",fnode->payload,i); break; } } } pp=fnode->dup; fnode=DTREE_DT_GETPP(h,pp); } } } on=0; } } if(wnode) return wnode->payload; else if(nnode) return nnode->payload; return dclass_get_kverror(di); }
//adds a node to the tree static int dtree_add_node(dtree_dt_index *h,dtree_dt_node *n,char *t,void *data,flag_f flags,void *param) { int hash; char *p; packed_ptr pp; dtree_dt_node *next; if(!n) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: tree: '%c' level: %d *t: '%c' p(%p) pp(%p)\n", n->data,dtree_node_depth(h,n),*t?*t:'#',n,n->curr); //EOT trailing wildcard if(!*t && n->prev) return dtree_set_payload(h,n,data,flags,param); //? if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_OPTIONAL) { pp=n->prev; next=DTREE_DT_GETPP(h,pp); if(!pp) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: optional: '%c' *t: '%c'\n",next->data,*(t+1)); if(dtree_add_node(h,next,t+1,data,flags,param)<0) return -1; t++; //trailing wildcard if(!*t) return dtree_set_payload(h,n,data,flags,param); } //[abc] if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_SET_S) { for(p=t;*p;p++) { if(*p==DTREE_PATTERN_SET_E) break; } if(!*p) return 0; t++; while(t<p) { *p=*t; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: set: '%c' *t: '%c' t: '%s'\n",n->data,*t,p); if(dtree_add_node(h,n,p,data,flags,param)<0) return -1; t++; } *p=DTREE_PATTERN_SET_E; return 1; } hash=dtree_hash_char(*t); if(!hash && *t!='0') return 0; if(*t>='A' && *t<='Z') *t|=0x20; //. if(hash==DTREE_HASH_ANY) *t=DTREE_PATTERN_ANY; pp=(packed_ptr)n->nodes[hash]; next=DTREE_DT_GETPP(h,pp); if(!pp) { pp=dtree_alloc_node(h); next=DTREE_DT_GETPP(h,pp); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } next->data=*t; next->curr=pp; next->prev=n->curr; n->nodes[hash]=pp; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: new node '%c' created level: %d hash: %d p(%p) pp(%p)\n", *t,dtree_node_depth(h,n),hash,next,pp); } //EOT if(!*(t+1)) return dtree_set_payload(h,next,data,flags,param); return dtree_add_node(h,next,t+1,data,flags,param); }
//searches for a token with regex static const dtree_dt_node *dtree_search_node(const dtree_dt_index *h,const dtree_dt_node *n,const char *t) { int hash; flag_f rf,nf; packed_ptr pp; const dtree_dt_node *rflag; if(!n) return NULL; //bad match if(n->data!=*t && n->data!=(*t|0x20) && n->data!=DTREE_PATTERN_ANY) return NULL; t++; hash=dtree_hash_char(*t); //lazy EOT if((!hash && *t!='0') || hash>=DTREE_HASH_SEP) { //only quit if strong if(dtree_get_flag(h,n,DTREE_DT_FLAG_STRONG)) return n; else if(!t && !n->flags) return NULL; } pp=n->nodes[hash]; rflag=DTREE_DT_GETPP(h,pp); if(rflag && *t) rflag=dtree_search_node(h,rflag,t); //wildcard if(!rflag) { hash=dtree_hash_char(DTREE_PATTERN_ANY); pp=n->nodes[hash]; rflag=DTREE_DT_GETPP(h,pp); if(rflag) rflag=dtree_search_node(h,rflag,t); hash=dtree_hash_char(*t); } //partial if(!rflag && (n->flags & DTREE_DT_FLAG_TOKEN) && (h->sflags & DTREE_S_FLAG_PARTIAL)) { if(dtree_node_depth(h,n)>=DTREE_S_PART_TLEN) rflag=n; } //n is lazy EOT or stronger match if((n->flags & DTREE_DT_FLAG_TOKEN) && ((hash==0 && *t!='0') || hash>=DTREE_HASH_SEP)) { nf=dtree_get_flags(h,n); rf=dtree_get_flags(h,rflag); if(!rflag) rflag=n; else if((nf & DTREE_DT_FLAG_STRONG) && !(rf & DTREE_DT_FLAG_STRONG)) rflag=n; else if((rf & DTREE_DT_FLAG_STRONG)); else if((nf & DTREE_DT_FLAG_CHAIN) && !(nf & DTREE_DT_FLAG_BCHAIN) && !(rf & DTREE_DT_FLAG_CHAIN) && !(rf & DTREE_DT_FLAG_BCHAIN)) rflag=n; else if((rf & DTREE_DT_FLAG_CHAIN)); else if((nf & DTREE_DT_FLAG_WEAK)) rflag=n; } return rflag; }
//gets the flag for a token const dtree_dt_node *dtree_get_node(const dtree_dt_index *h,const char *t,flag_f sflags) { int hash; char n; const char *p; packed_ptr pp; const dtree_dt_node *base; const dtree_dt_node *lflag=NULL; if(!sflags) sflags=h->sflags; if(sflags & DTREE_S_FLAG_REGEX) { pp=h->head->nodes[dtree_hash_char(*t)]; base=DTREE_DT_GETPP(h,pp); lflag=dtree_search_node(h,base,t); //leading wildcard if(!lflag) { pp=h->head->nodes[dtree_hash_char(DTREE_PATTERN_ANY)]; base=DTREE_DT_GETPP(h,pp); if(pp) lflag=dtree_search_node(h,base,t); } return lflag; } pp=h->head->nodes[dtree_hash_char(*t)]; base=DTREE_DT_GETPP(h,pp); //iterative search for(p=t;*p && pp;p++) { if(base->data!=*p && base->data!=(*p|0x20) && base->data!=DTREE_PATTERN_ANY) break; n=*(p+1); hash=dtree_hash_char(n); //token match if((!hash && n!='0') || hash>=DTREE_HASH_SEP) { //not full string if(base->flags && (sflags & DTREE_S_FLAG_PARTIAL) && n) return base; //full token else if(base->flags && !n) return base; else if(!n) break; } //partial token match if((base->flags & DTREE_DT_FLAG_TOKEN) && (sflags & DTREE_S_FLAG_PARTIAL) && (p+1-t)>=DTREE_S_PART_TLEN) lflag=base; pp=base->nodes[hash]; base=DTREE_DT_GETPP(h,pp); } return lflag; }