//sets the data, param, and type for a node static int dtree_set_payload(dtree_dt_index *h,dtree_dt_node *n,void *data,flag_f type,void *param) { packed_ptr pp; dtree_dt_node *base=n; //dup if((type & n->flags)==type && data==n->payload && param==n->cparam) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: EOT: '%c' level: %d p(%p) pp(%p) param(%p)\n", n->data,dtree_node_depth(h,n),n,n->curr,param); if(n->flags && (h->sflags & DTREE_S_FLAG_DUPS)) { dtree_printd(DTREE_PRINT_INITDTREE,"ADD: DUPLICATE detected, adding\n"); pp=n->dup; //iterate thru the dups while(pp) { n=DTREE_DT_GETPP(h,pp); //dup if((type & n->flags)==type && data==n->payload && param==n->cparam) return 0; pp=n->dup; } pp=dtree_alloc_node(h); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } n->dup=pp; n=DTREE_DT_GETPP(h,pp); n->curr=base->curr; n->prev=base->prev; n->data=base->data; return dtree_set_payload(h,n,data,type,param); } else if(n->flags && !(h->sflags & DTREE_S_FLAG_DUPS)) dtree_printd(DTREE_PRINT_INITDTREE,"ADD: DUPLICATE detected, overwriting original\n"); n->flags=DTREE_DT_FLAG_TOKEN; n->flags|=type; n->payload=data; n->cparam=param; return 1; }
//allocates a new dtree node packed_ptr dtree_alloc_node(dtree_dt_index *h) { packed_ptr pp; dtree_dt_node *p; if(h->node_count>=(h->slab_count*DTREE_DT_SLAB_SIZE)) { if(h->slab_count>=DTREE_DT_MAX_SLABS) { dtree_printd(DTREE_PRINT_INITDTREE,"NALLOC: ERROR: all slabs allocated\n"); return (packed_ptr)0; } dtree_printd(DTREE_PRINT_INITDTREE,"NALLOC: allocating new SLAB\n"); p=calloc(DTREE_DT_SLAB_SIZE,sizeof(dtree_dt_node)); if(!p) { dtree_printd(DTREE_PRINT_INITDTREE,"NMALLOC: ERROR: calloc memory allocation failure: %d\n",DTREE_DT_SLAB_SIZE); return (packed_ptr)0; } h->size+=DTREE_DT_SLAB_SIZE*sizeof(dtree_dt_node); h->slabs[h->slab_count]=p; h->slab_count++; h->slab_pos=0; } //return next open node on slab p=h->slabs[h->slab_count-1]; if(!p) return (packed_ptr)0; p=&p[h->slab_pos]; pp=DTREE_DT_GENPP(p,h->slab_count-1,h->slab_pos); dtree_printd(DTREE_PRINT_INITDTREE,"NALLOC: node %zu off of slab %zu p(%p) pp(%p)\n", h->slab_pos,h->slab_count,p,pp); h->slab_pos++; h->node_count++; //bypass null packed_ptr if(!pp) return dtree_alloc_node(h); return pp; }
//allocates a string, reuses if in cache char *dtree_alloc_string(dtree_dt_index *h,const char *s,int len) { char *ret; int i; if(!s) return NULL; for(i=0;h->dc_cache[i] && i<DTREE_M_LOOKUP_CACHE;i++) { if(!strncmp(s,h->dc_cache[i],len) && !h->dc_cache[i][len]) { dtree_printd(DTREE_PRINT_INITDTREE,"SALLOC: cache hit %d: '%s'\n",i,h->dc_cache[i]); return h->dc_cache[i]; } } ret=dtree_alloc_mem_align(h,(len+1)*sizeof(char),sizeof(char)); //legacy if(!ret) return NULL; strncpy(ret,s,len); if(!h->dc_cache[DTREE_M_LOOKUP_CACHE-1]) { for(i=0;i<DTREE_M_LOOKUP_CACHE;i++) { if(!h->dc_cache[i]) { dtree_printd(DTREE_PRINT_INITDTREE,"SALLOC: cache store %d: '%s'\n",i,ret); h->dc_cache[i]=ret; break; } } } dtree_printd(DTREE_PRINT_INITDTREE,"SALLOC: str allocated: '%s'(%p)\n",ret,ret); return ret; }
//adds an entry to the tree int dtree_add_entry(dtree_dt_index *h,const char *key,dtree_dt_add_entry *entry) { char namebuf[DTREE_DATA_BUFLEN+1]; packed_ptr pp; if(!key) return 0; *namebuf='\0'; namebuf[sizeof(namebuf)-1]='\0'; strncpy(namebuf+1,key,sizeof(namebuf)-1); if(namebuf[sizeof(namebuf)-1]) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD ENTRY: name '%s':%d\n",namebuf+1,entry->flags); if(!h->head) { pp=dtree_alloc_node(h); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } h->head=DTREE_DT_GETPP(h,pp); h->head->curr=pp; h->head->data=0; } if(!entry->flags) entry->flags=DTREE_DT_FLAG_STRONG; return dtree_add_node(h,h->head,namebuf+1,entry); }
//aligned memory allocation static void *dtree_alloc_mem_align(dtree_dt_index *h,size_t len,size_t align) { char *ret; size_t olen=0; size_t nlen; if(h->dc_slab_pos%align && !(align&(align-1))) { olen=((h->dc_slab_pos+align-1) & (~(align-1))) - h->dc_slab_pos; dtree_printd(DTREE_PRINT_INITDTREE,"DMALLOC: unaligned(%zu) offset detected %zu => %zu\n", align,h->dc_slab_pos,h->dc_slab_pos+olen); } nlen=len+olen; if(h->dc_slab_count>=DTREE_M_MAX_SLABS) { dtree_printd(DTREE_PRINT_INITDTREE,"DMALLOC: ERROR: out of slabs: %zu\n",h->dc_slab_count); return NULL; } if(!h->dc_slabs[h->dc_slab_count]) { //alloc new slab h->dc_slabs[h->dc_slab_count]=calloc(DTREE_M_SLAB_SIZE,sizeof(char)); if(!h->dc_slabs[h->dc_slab_count]) { dtree_printd(DTREE_PRINT_INITDTREE,"DMALLOC: ERROR: calloc memory allocation failure: %d\n",DTREE_M_SLAB_SIZE); return NULL; } h->dc_slab_pos=0; h->size+=DTREE_M_SLAB_SIZE*sizeof(char); } if(nlen>DTREE_M_SLAB_SIZE || !len) { dtree_printd(DTREE_PRINT_INITDTREE,"DMALLOC: ERROR: size too large: %zu\n",len); return NULL; } if(h->dc_slab_pos+nlen>DTREE_M_SLAB_SIZE) { //move to next slab h->dc_slab_count++; h->dc_slab_pos=0; return dtree_alloc_mem_align(h,len,align); } //alloc memory ret=h->dc_slabs[h->dc_slab_count]+h->dc_slab_pos+olen; if(((size_t)ret%align)) { dtree_printd(DTREE_PRINT_INITDTREE,"DMALLOC: ERROR: misalignment\n"); return NULL; } h->dc_count++; dtree_printd(DTREE_PRINT_INITDTREE,"DMALLOC: memory: %zu,%zu+%zu,%zu(%p) align: %zu\n", h->dc_slab_count,h->dc_slab_pos,olen,len,ret,align); h->dc_slab_pos=(ret-h->dc_slabs[h->dc_slab_count])+len; return (void*)ret; }
//adds a node to the tree static int dtree_add_node(dtree_dt_index *h,dtree_dt_node *n,char *t,const dtree_dt_add_entry *entry) { int hash; char *s; packed_ptr pp; dtree_dt_node *next; if(!n) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: tree: '%c' level: %d t: '%s' p(%p) pp(%p)\n", n->data?n->data:'^',dtree_node_depth(h,n),t,n,n->curr); //escape if(h->sflags & DTREE_S_FLAG_REGEX && (*t==DTREE_PATTERN_ESCAPE) && *(t-1)!=DTREE_PATTERN_ESCAPE) t++; //(abc) while(h->sflags & DTREE_S_FLAG_REGEX && (*t==DTREE_PATTERN_GROUP_S || *t==DTREE_PATTERN_GROUP_E) && *(t-1)!=DTREE_PATTERN_ESCAPE) { hash=(*t==DTREE_PATTERN_GROUP_S); s=t+1; while(*s && hash) { if(*s==DTREE_PATTERN_GROUP_S && *(s-1)!=DTREE_PATTERN_ESCAPE) hash++; else if(*s==DTREE_PATTERN_GROUP_E && *(s-1)!=DTREE_PATTERN_ESCAPE) hash--; s++; } if(*t==DTREE_PATTERN_GROUP_S) { dtree_printd(DTREE_PRINT_INITDTREE,"ADD: group detected: '%c' - '%c'\n",*(t+1)?*(t+1):'#',*s?*s:'#'); //optional group if(*s==DTREE_PATTERN_OPTIONAL && *(s-1)!=DTREE_PATTERN_ESCAPE) { if(dtree_add_node(h,n,s+1,entry)<0) return -1; } } t++; } //? if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_OPTIONAL && *(t-1)!=DTREE_PATTERN_ESCAPE) { //no group if(*(t-1)!=DTREE_PATTERN_GROUP_E || *(t-2)==DTREE_PATTERN_ESCAPE) { pp=n->prev; next=DTREE_DT_GETPP(h,pp); if(!pp) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: optional: '%c' t: '%s'\n",next->data?next->data:'^',(t+1)); if(dtree_add_node(h,next,t+1,entry)<0) return -1; } t++; return dtree_add_node(h,n,t,entry); } //EOT if(!*t && n->prev) return dtree_set_payload(h,n,entry); //[abc] if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_SET_S && *(t-1)!=DTREE_PATTERN_ESCAPE) { for(s=t;*s;s++) { if(*s==DTREE_PATTERN_SET_E && *(s-1)!=DTREE_PATTERN_ESCAPE) break; } if(!*s) return 0; t++; while(t<s) { *s=*t; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: set: '%c' *t: '%c' t: '%s'\n",n->data?n->data:'^',*t,s); if(dtree_add_node(h,n,s,entry)<0) return -1; t++; } *s=DTREE_PATTERN_SET_E; return 1; } hash=dtree_hash_char(*t); if(!hash && *t!='0') return 0; if(*t>='A' && *t<='Z') *t|=0x20; //. if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_ANY && *(t-1)!=DTREE_PATTERN_ESCAPE) hash=DTREE_HASH_ANY; pp=(packed_ptr)n->nodes[hash]; next=DTREE_DT_GETPP(h,pp); if(!pp) { pp=dtree_alloc_node(h); next=DTREE_DT_GETPP(h,pp); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } if(hash==DTREE_HASH_ANY) next->data=DTREE_PATTERN_ANY; else next->data=*t; next->curr=pp; next->prev=n->curr; n->nodes[hash]=pp; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: new node '%c' created level: %d hash: %d p(%p) pp(%p)\n", *t,dtree_node_depth(h,n),hash,next,pp); } //EOT if(!*(t+1)) return dtree_set_payload(h,next,entry); return dtree_add_node(h,next,t+1,entry); }
//sets the data, param, and type for a node static int dtree_set_payload(dtree_dt_index *h,dtree_dt_node *n,const dtree_dt_add_entry *entry) { packed_ptr pp; dtree_dt_node *base=n; if((entry->flags & n->flags)==entry->flags && entry->data==n->payload && entry->param==n->cparam && entry->pos==n->pos && entry->dir==n->dir) { dtree_printd(DTREE_PRINT_INITDTREE,"ADD: PURE DUPLICATE detected, skipping\n"); return 0; } dtree_printd(DTREE_PRINT_INITDTREE,"ADD: EOT: '%c' level: %d p(%p) pp(%p) param(%p) flags: %d\n", n->data?n->data:'^',dtree_node_depth(h,n),n,n->curr,entry->param,entry->flags); if(n->flags && (h->sflags & DTREE_S_FLAG_DUPS)) { dtree_printd(DTREE_PRINT_INITDTREE,"ADD: DUPLICATE detected, adding\n"); pp=n->dup; //iterate thru the dups while(pp) { n=DTREE_DT_GETPP(h,pp); //dup if((entry->flags & n->flags)==entry->flags && entry->data==n->payload && entry->param==n->cparam && entry->pos==n->pos && entry->dir==n->dir) return 0; pp=n->dup; } pp=dtree_alloc_node(h); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } n->dup=pp; n=DTREE_DT_GETPP(h,pp); n->curr=base->curr; n->prev=base->prev; n->data=base->data; return dtree_set_payload(h,n,entry); } else if(n->flags && !(h->sflags & DTREE_S_FLAG_DUPS)) dtree_printd(DTREE_PRINT_INITDTREE,"ADD: DUPLICATE detected, overwriting original\n"); n->flags=DTREE_DT_FLAG_TOKEN; n->flags|=entry->flags; n->payload=entry->data; n->cparam=entry->param; n->pos=entry->pos; n->rank=entry->rank; n->dir=entry->dir; return 1; }
//classifies a string const dclass_keyvalue *dclass_classify(const dclass_index *di,const char *str) { int on=0; int valid; int bcvalid; int i; char buf[DTREE_DATA_BUFLEN]; const char *p; const char *token=""; packed_ptr pp; const dtree_dt_node *wnode=NULL; const dtree_dt_node *nnode=NULL; const dtree_dt_node *fbnode; const dtree_dt_node *fnode; const dtree_dt_index *h=&di->dti; const void *cnodes[DTREE_S_MAX_CHAIN]={NULL}; if(!str || !h->head) return dclass_get_kverror(di); dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() UA: '%s'\n",str); for(p=str;*p;p++) { valid=0; if((*p>='a' && *p<='z') || (*p>='A' && *p<='Z') || (*p>='0' && *p<='9')) { //new token found if(!on) { token=p; on=1; } valid=1; } if((!valid || (!*(p+1))) && on) { //EOT found fbnode=dtree_get_node(h,token,0); dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() token:'%s' = '%s':%d\n", token,fbnode?dtree_node_path(h,fbnode,buf):"",fbnode?(int)fbnode->flags:0); if(fbnode && dtree_get_flag(h,fbnode,DTREE_DT_FLAG_TOKEN)) { if((fnode=dtree_get_flag(h,fbnode,DTREE_DT_FLAG_STRONG))) return fnode->payload; else if((fnode=dtree_get_flag(h,fbnode,DTREE_DT_FLAG_WEAK))) { i=DTREE_DC_DISTANCE(h,(char*)fnode->payload); if(!wnode || (i>=0 && i<DTREE_DC_DISTANCE(h,(char*)wnode->payload))) wnode=fnode; } else if((fnode=dtree_get_flag(h,fbnode,DTREE_DT_FLAG_NONE))) { i=DTREE_DC_DISTANCE(h,(char*)fnode->payload); if(!nnode || (i>=0 && i<DTREE_DC_DISTANCE(h,(char*)nnode->payload))) nnode=fnode; } if((fnode=dtree_get_flag(h,fbnode,DTREE_DT_FLAG_CHAIN))) { dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() pchain detected\n"); pp=fnode->curr; while(pp) { bcvalid=0; if(fnode->flags & DTREE_DT_FLAG_CHAIN && fnode->cparam) { dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() looking for pchain %p\n",fnode->cparam); for(i=0;i<DTREE_S_MAX_CHAIN && cnodes[i];i++) { //chain hit if(cnodes[i]==fnode->cparam) { if(fnode->flags & DTREE_DT_FLAG_BCHAIN) bcvalid=1; else return fnode->payload; } } } if(fnode->flags & DTREE_DT_FLAG_BCHAIN && (bcvalid || !fnode->cparam)) { for(i=0;i<DTREE_S_MAX_CHAIN;i++) { if(!cnodes[i]) { cnodes[i]=fnode->payload; dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() pchain added: %p('%d')\n",fnode->payload,i); break; } } } pp=fnode->dup; fnode=DTREE_DT_GETPP(h,pp); } } } on=0; } } if(wnode) return wnode->payload; else if(nnode) return nnode->payload; return dclass_get_kverror(di); }
//adds a node to the tree static int dtree_add_node(dtree_dt_index *h,dtree_dt_node *n,char *t,void *data,flag_f flags,void *param) { int hash; char *p; packed_ptr pp; dtree_dt_node *next; if(!n) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: tree: '%c' level: %d *t: '%c' p(%p) pp(%p)\n", n->data,dtree_node_depth(h,n),*t?*t:'#',n,n->curr); //EOT trailing wildcard if(!*t && n->prev) return dtree_set_payload(h,n,data,flags,param); //? if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_OPTIONAL) { pp=n->prev; next=DTREE_DT_GETPP(h,pp); if(!pp) return 0; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: optional: '%c' *t: '%c'\n",next->data,*(t+1)); if(dtree_add_node(h,next,t+1,data,flags,param)<0) return -1; t++; //trailing wildcard if(!*t) return dtree_set_payload(h,n,data,flags,param); } //[abc] if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_SET_S) { for(p=t;*p;p++) { if(*p==DTREE_PATTERN_SET_E) break; } if(!*p) return 0; t++; while(t<p) { *p=*t; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: set: '%c' *t: '%c' t: '%s'\n",n->data,*t,p); if(dtree_add_node(h,n,p,data,flags,param)<0) return -1; t++; } *p=DTREE_PATTERN_SET_E; return 1; } hash=dtree_hash_char(*t); if(!hash && *t!='0') return 0; if(*t>='A' && *t<='Z') *t|=0x20; //. if(hash==DTREE_HASH_ANY) *t=DTREE_PATTERN_ANY; pp=(packed_ptr)n->nodes[hash]; next=DTREE_DT_GETPP(h,pp); if(!pp) { pp=dtree_alloc_node(h); next=DTREE_DT_GETPP(h,pp); if(!pp) { fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count); return -1; } next->data=*t; next->curr=pp; next->prev=n->curr; n->nodes[hash]=pp; dtree_printd(DTREE_PRINT_INITDTREE,"ADD: new node '%c' created level: %d hash: %d p(%p) pp(%p)\n", *t,dtree_node_depth(h,n),hash,next,pp); } //EOT if(!*(t+1)) return dtree_set_payload(h,next,data,flags,param); return dtree_add_node(h,next,t+1,data,flags,param); }