//sets the data, param, and type for a node static int dtree_set_payload(dtree_dt_index *h,dtree_dt_node *n,void *data,flag_f type,void *param) { packed_ptr pp; dtree_dt_node *base=n; //dup if((type & n->flags)==type && data==n->payload && param==n->cparam) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: EOT: '%c' level: %d p(%p) pp(%p) param(%p)\n", n->data,dtree_node_depth(h,n),n,n->curr,param); if(n->flags && (h->sflags & DTREE_S_FLAG_DUPS)) { dtree_printd(DTREE_PRINT_INITDTREE,"ADD: DUPLICATE detected, adding\n"); pp=n->dup; //iterate thru the dups while(pp) { n=DTREE_DT_GETPP(h,pp); //dup if((type & n->flags)==type && data==n->payload && param==n->cparam) return 0; pp=n->dup; } pp=dtree_alloc_node(h); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } n->dup=pp; n=DTREE_DT_GETPP(h,pp); n->curr=base->curr; n->prev=base->prev; n->data=base->data; return dtree_set_payload(h,n,data,type,param); } else if(n->flags && !(h->sflags & DTREE_S_FLAG_DUPS)) dtree_printd(DTREE_PRINT_INITDTREE,"ADD: DUPLICATE detected, overwriting original\n"); n->flags=DTREE_DT_FLAG_TOKEN; n->flags|=type; n->payload=data; n->cparam=param; return 1; }
//adds a node to the tree static int dtree_add_node(dtree_dt_index *h,dtree_dt_node *n,char *t,const dtree_dt_add_entry *entry) { int hash; char *s; packed_ptr pp; dtree_dt_node *next; if(!n) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: tree: '%c' level: %d t: '%s' p(%p) pp(%p)\n", n->data?n->data:'^',dtree_node_depth(h,n),t,n,n->curr); //escape if(h->sflags & DTREE_S_FLAG_REGEX && (*t==DTREE_PATTERN_ESCAPE) && *(t-1)!=DTREE_PATTERN_ESCAPE) t++; //(abc) while(h->sflags & DTREE_S_FLAG_REGEX && (*t==DTREE_PATTERN_GROUP_S || *t==DTREE_PATTERN_GROUP_E) && *(t-1)!=DTREE_PATTERN_ESCAPE) { hash=(*t==DTREE_PATTERN_GROUP_S); s=t+1; while(*s && hash) { if(*s==DTREE_PATTERN_GROUP_S && *(s-1)!=DTREE_PATTERN_ESCAPE) hash++; else if(*s==DTREE_PATTERN_GROUP_E && *(s-1)!=DTREE_PATTERN_ESCAPE) hash--; s++; } if(*t==DTREE_PATTERN_GROUP_S) { dtree_printd(DTREE_PRINT_INITDTREE,"ADD: group detected: '%c' - '%c'\n",*(t+1)?*(t+1):'#',*s?*s:'#'); //optional group if(*s==DTREE_PATTERN_OPTIONAL && *(s-1)!=DTREE_PATTERN_ESCAPE) { if(dtree_add_node(h,n,s+1,entry)<0) return -1; } } t++; } //? if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_OPTIONAL && *(t-1)!=DTREE_PATTERN_ESCAPE) { //no group if(*(t-1)!=DTREE_PATTERN_GROUP_E || *(t-2)==DTREE_PATTERN_ESCAPE) { pp=n->prev; next=DTREE_DT_GETPP(h,pp); if(!pp) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: optional: '%c' t: '%s'\n",next->data?next->data:'^',(t+1)); if(dtree_add_node(h,next,t+1,entry)<0) return -1; } t++; return dtree_add_node(h,n,t,entry); } //EOT if(!*t && n->prev) return dtree_set_payload(h,n,entry); //[abc] if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_SET_S && *(t-1)!=DTREE_PATTERN_ESCAPE) { for(s=t;*s;s++) { if(*s==DTREE_PATTERN_SET_E && *(s-1)!=DTREE_PATTERN_ESCAPE) break; } if(!*s) return 0; t++; while(t<s) { *s=*t; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: set: '%c' *t: '%c' t: '%s'\n",n->data?n->data:'^',*t,s); if(dtree_add_node(h,n,s,entry)<0) return -1; t++; } *s=DTREE_PATTERN_SET_E; return 1; } hash=dtree_hash_char(*t); if(!hash && *t!='0') return 0; if(*t>='A' && *t<='Z') *t|=0x20; //. if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_ANY && *(t-1)!=DTREE_PATTERN_ESCAPE) hash=DTREE_HASH_ANY; pp=(packed_ptr)n->nodes[hash]; next=DTREE_DT_GETPP(h,pp); if(!pp) { pp=dtree_alloc_node(h); next=DTREE_DT_GETPP(h,pp); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } if(hash==DTREE_HASH_ANY) next->data=DTREE_PATTERN_ANY; else next->data=*t; next->curr=pp; next->prev=n->curr; n->nodes[hash]=pp; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: new node '%c' created level: %d hash: %d p(%p) pp(%p)\n", *t,dtree_node_depth(h,n),hash,next,pp); } //EOT if(!*(t+1)) return dtree_set_payload(h,next,entry); return dtree_add_node(h,next,t+1,entry); }
//sets the data, param, and type for a node static int dtree_set_payload(dtree_dt_index *h,dtree_dt_node *n,const dtree_dt_add_entry *entry) { packed_ptr pp; dtree_dt_node *base=n; if((entry->flags & n->flags)==entry->flags && entry->data==n->payload && entry->param==n->cparam && entry->pos==n->pos && entry->dir==n->dir) { dtree_printd(DTREE_PRINT_INITDTREE,"ADD: PURE DUPLICATE detected, skipping\n"); return 0; } dtree_printd(DTREE_PRINT_INITDTREE,"ADD: EOT: '%c' level: %d p(%p) pp(%p) param(%p) flags: %d\n", n->data?n->data:'^',dtree_node_depth(h,n),n,n->curr,entry->param,entry->flags); if(n->flags && (h->sflags & DTREE_S_FLAG_DUPS)) { dtree_printd(DTREE_PRINT_INITDTREE,"ADD: DUPLICATE detected, adding\n"); pp=n->dup; //iterate thru the dups while(pp) { n=DTREE_DT_GETPP(h,pp); //dup if((entry->flags & n->flags)==entry->flags && entry->data==n->payload && entry->param==n->cparam && entry->pos==n->pos && entry->dir==n->dir) return 0; pp=n->dup; } pp=dtree_alloc_node(h); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } n->dup=pp; n=DTREE_DT_GETPP(h,pp); n->curr=base->curr; n->prev=base->prev; n->data=base->data; return dtree_set_payload(h,n,entry); } else if(n->flags && !(h->sflags & DTREE_S_FLAG_DUPS)) dtree_printd(DTREE_PRINT_INITDTREE,"ADD: DUPLICATE detected, overwriting original\n"); n->flags=DTREE_DT_FLAG_TOKEN; n->flags|=entry->flags; n->payload=entry->data; n->cparam=entry->param; n->pos=entry->pos; n->rank=entry->rank; n->dir=entry->dir; return 1; }
//adds a node to the tree static int dtree_add_node(dtree_dt_index *h,dtree_dt_node *n,char *t,void *data,flag_f flags,void *param) { int hash; char *p; packed_ptr pp; dtree_dt_node *next; if(!n) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: tree: '%c' level: %d *t: '%c' p(%p) pp(%p)\n", n->data,dtree_node_depth(h,n),*t?*t:'#',n,n->curr); //EOT trailing wildcard if(!*t && n->prev) return dtree_set_payload(h,n,data,flags,param); //? if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_OPTIONAL) { pp=n->prev; next=DTREE_DT_GETPP(h,pp); if(!pp) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: optional: '%c' *t: '%c'\n",next->data,*(t+1)); if(dtree_add_node(h,next,t+1,data,flags,param)<0) return -1; t++; //trailing wildcard if(!*t) return dtree_set_payload(h,n,data,flags,param); } //[abc] if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_SET_S) { for(p=t;*p;p++) { if(*p==DTREE_PATTERN_SET_E) break; } if(!*p) return 0; t++; while(t<p) { *p=*t; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: set: '%c' *t: '%c' t: '%s'\n",n->data,*t,p); if(dtree_add_node(h,n,p,data,flags,param)<0) return -1; t++; } *p=DTREE_PATTERN_SET_E; return 1; } hash=dtree_hash_char(*t); if(!hash && *t!='0') return 0; if(*t>='A' && *t<='Z') *t|=0x20; //. if(hash==DTREE_HASH_ANY) *t=DTREE_PATTERN_ANY; pp=(packed_ptr)n->nodes[hash]; next=DTREE_DT_GETPP(h,pp); if(!pp) { pp=dtree_alloc_node(h); next=DTREE_DT_GETPP(h,pp); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } next->data=*t; next->curr=pp; next->prev=n->curr; n->nodes[hash]=pp; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: new node '%c' created level: %d hash: %d p(%p) pp(%p)\n", *t,dtree_node_depth(h,n),hash,next,pp); } //EOT if(!*(t+1)) return dtree_set_payload(h,next,data,flags,param); return dtree_add_node(h,next,t+1,data,flags,param); }
//searches for a token with regex static const dtree_dt_node *dtree_search_node(const dtree_dt_index *h,const dtree_dt_node *n,const char *t) { int hash; flag_f rf,nf; packed_ptr pp; const dtree_dt_node *rflag; if(!n) return NULL; //bad match if(n->data!=*t && n->data!=(*t|0x20) && n->data!=DTREE_PATTERN_ANY) return NULL; t++; hash=dtree_hash_char(*t); //lazy EOT if((!hash && *t!='0') || hash>=DTREE_HASH_SEP) { //only quit if strong if(dtree_get_flag(h,n,DTREE_DT_FLAG_STRONG)) return n; else if(!t && !n->flags) return NULL; } pp=n->nodes[hash]; rflag=DTREE_DT_GETPP(h,pp); if(rflag && *t) rflag=dtree_search_node(h,rflag,t); //wildcard if(!rflag) { hash=dtree_hash_char(DTREE_PATTERN_ANY); pp=n->nodes[hash]; rflag=DTREE_DT_GETPP(h,pp); if(rflag) rflag=dtree_search_node(h,rflag,t); hash=dtree_hash_char(*t); } //partial if(!rflag && (n->flags & DTREE_DT_FLAG_TOKEN) && (h->sflags & DTREE_S_FLAG_PARTIAL)) { if(dtree_node_depth(h,n)>=DTREE_S_PART_TLEN) rflag=n; } //n is lazy EOT or stronger match if((n->flags & DTREE_DT_FLAG_TOKEN) && ((hash==0 && *t!='0') || hash>=DTREE_HASH_SEP)) { nf=dtree_get_flags(h,n); rf=dtree_get_flags(h,rflag); if(!rflag) rflag=n; else if((nf & DTREE_DT_FLAG_STRONG) && !(rf & DTREE_DT_FLAG_STRONG)) rflag=n; else if((rf & DTREE_DT_FLAG_STRONG)); else if((nf & DTREE_DT_FLAG_CHAIN) && !(nf & DTREE_DT_FLAG_BCHAIN) && !(rf & DTREE_DT_FLAG_CHAIN) && !(rf & DTREE_DT_FLAG_BCHAIN)) rflag=n; else if((rf & DTREE_DT_FLAG_CHAIN)); else if((nf & DTREE_DT_FLAG_WEAK)) rflag=n; } return rflag; }