Exemplo n.º 1
0
//searches for a token with regex
static const dtree_dt_node *dtree_search_node(const dtree_dt_index *h,const dtree_dt_node *n,const char *t,dtree_pos_f pos)
{
    unsigned int hash;
    dtree_flag_f rf,nf;
    packed_ptr pp;
    const dtree_dt_node *rflag;
    
    if(!n || !n->curr)
        return NULL;
    
    //bad match
    if(n->data!=*t && n->data!=(*t|0x20) && n->data!=DTREE_PATTERN_ANY)
        return NULL;
    
    t++;
    hash=dtree_hash_char(*t);
    
    //EOS
    if(!hash && *t!='0')
    {
        //only quit if strong with position
        if(dtree_get_flag(h,n,DTREE_DT_FLAG_STRONG,pos))
            return n;
        else if(!n->flags)
            return NULL;
    }
    
    pp=n->nodes[hash];
    rflag=DTREE_DT_GETPP(h,pp);
    
    if(pp)
        rflag=dtree_search_node(h,rflag,t,pos);
    else
        rflag=NULL;
    
    //wildcard
    if(!rflag)
    {
        hash=DTREE_HASH_ANY;
        
        pp=n->nodes[hash];
        rflag=DTREE_DT_GETPP(h,pp);
        
        if(pp)
            rflag=dtree_search_node(h,rflag,t,pos);
        else
            rflag=NULL;
        
        hash=dtree_hash_char(*t);
    }
    
    //n is lazy EOT or stronger match
    if((n->flags & DTREE_DT_FLAG_TOKEN) && ((!hash && *t!='0') || hash>=DTREE_HASH_SEP))
    {
        nf=dtree_get_flags(h,n,pos);
        rf=dtree_get_flags(h,rflag,pos);
        
        if(!rflag)
            rflag=n;
        else if((nf & DTREE_DT_FLAG_STRONG) && !(rf & DTREE_DT_FLAG_STRONG))
            rflag=n;
        else if(rf & DTREE_DT_FLAG_STRONG);
        else if((nf & DTREE_DT_FLAG_CHAIN) && !(nf & DTREE_DT_FLAG_BCHAIN) &&
                !(rf & DTREE_DT_FLAG_CHAIN) && !(rf & DTREE_DT_FLAG_BCHAIN))
            rflag=n;
        else if(rf & DTREE_DT_FLAG_CHAIN);
        else if((nf & DTREE_DT_FLAG_WEAK) && !(rf & DTREE_DT_FLAG_WEAK))
            rflag=n;
    }
    
    return rflag;
}
Exemplo n.º 2
0
//adds a node to the tree
static int dtree_add_node(dtree_dt_index *h,dtree_dt_node *n,char *t,const dtree_dt_add_entry *entry)
{
    int hash;
    char *s;
    packed_ptr pp;
    dtree_dt_node *next;
    
    if(!n)
        return 0;
    
    dtree_printd(DTREE_PRINT_INITDTREE,"ADD: tree: '%c' level: %d t: '%s' p(%p) pp(%p)\n",
             n->data?n->data:'^',dtree_node_depth(h,n),t,n,n->curr);
    
    //escape
    if(h->sflags & DTREE_S_FLAG_REGEX && (*t==DTREE_PATTERN_ESCAPE) && *(t-1)!=DTREE_PATTERN_ESCAPE)
        t++;

    //(abc)
    while(h->sflags & DTREE_S_FLAG_REGEX && (*t==DTREE_PATTERN_GROUP_S || *t==DTREE_PATTERN_GROUP_E) &&
            *(t-1)!=DTREE_PATTERN_ESCAPE)
    {
        hash=(*t==DTREE_PATTERN_GROUP_S);
        s=t+1;

        while(*s && hash)
        {
            if(*s==DTREE_PATTERN_GROUP_S && *(s-1)!=DTREE_PATTERN_ESCAPE)
                hash++;
            else if(*s==DTREE_PATTERN_GROUP_E && *(s-1)!=DTREE_PATTERN_ESCAPE)
                hash--;

            s++;
        }

        if(*t==DTREE_PATTERN_GROUP_S)
        {
            dtree_printd(DTREE_PRINT_INITDTREE,"ADD: group detected: '%c' - '%c'\n",*(t+1)?*(t+1):'#',*s?*s:'#');

            //optional group
            if(*s==DTREE_PATTERN_OPTIONAL && *(s-1)!=DTREE_PATTERN_ESCAPE)
            {
                if(dtree_add_node(h,n,s+1,entry)<0)
                    return -1;
            }
        }

        t++;
    }

    //?
    if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_OPTIONAL && *(t-1)!=DTREE_PATTERN_ESCAPE)
    {
        //no group
        if(*(t-1)!=DTREE_PATTERN_GROUP_E || *(t-2)==DTREE_PATTERN_ESCAPE)
        {
            pp=n->prev;
            next=DTREE_DT_GETPP(h,pp);
        
            if(!pp)
                return 0;
        
            dtree_printd(DTREE_PRINT_INITDTREE,"ADD: optional: '%c' t: '%s'\n",next->data?next->data:'^',(t+1));
        
            if(dtree_add_node(h,next,t+1,entry)<0)
                return -1;
        }

        t++;

        return dtree_add_node(h,n,t,entry);
    }

    //EOT
    if(!*t && n->prev)
        return dtree_set_payload(h,n,entry);
    
    //[abc]
    if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_SET_S && *(t-1)!=DTREE_PATTERN_ESCAPE)
    {
        for(s=t;*s;s++)
        {
            if(*s==DTREE_PATTERN_SET_E && *(s-1)!=DTREE_PATTERN_ESCAPE)
                break;
        }
        
        if(!*s)
            return 0;
        
        t++;
        
        while(t<s)
        {
            *s=*t;
            
            dtree_printd(DTREE_PRINT_INITDTREE,"ADD: set: '%c' *t: '%c' t: '%s'\n",n->data?n->data:'^',*t,s);
            
            if(dtree_add_node(h,n,s,entry)<0)
                return -1;
            
            t++;
        }
        
        *s=DTREE_PATTERN_SET_E;
        
        return 1;
    }
    
    hash=dtree_hash_char(*t);
        
    if(!hash && *t!='0')
        return 0;
        
    if(*t>='A' && *t<='Z')
        *t|=0x20;
        
    //.
    if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_ANY && *(t-1)!=DTREE_PATTERN_ESCAPE)
        hash=DTREE_HASH_ANY;
        
    pp=(packed_ptr)n->nodes[hash];
    next=DTREE_DT_GETPP(h,pp);
        
    if(!pp)
    {
        pp=dtree_alloc_node(h);
        next=DTREE_DT_GETPP(h,pp);
        
        if(!pp)
        {
            fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count);
            return -1;
        }
        
        if(hash==DTREE_HASH_ANY)
            next->data=DTREE_PATTERN_ANY;
        else
            next->data=*t;

        next->curr=pp;
        next->prev=n->curr;
        
        n->nodes[hash]=pp;
        
        dtree_printd(DTREE_PRINT_INITDTREE,"ADD: new node '%c' created level: %d hash: %d p(%p) pp(%p)\n",
                 *t,dtree_node_depth(h,n),hash,next,pp);
    }
    
    //EOT
    if(!*(t+1))
        return dtree_set_payload(h,next,entry);

    return dtree_add_node(h,next,t+1,entry);
}
Exemplo n.º 3
0
//gets the flag for a token
const dtree_dt_node *dtree_get_node(const dtree_dt_index *h,const char *t,dtree_flag_f sflags,dtree_pos_f pos)
{
    unsigned int hash;
    char n;
    const char *p;
    packed_ptr pp;
    const dtree_dt_node *base;
    const dtree_dt_node *lflag=NULL;
    
    if(!sflags)
        sflags=h->sflags;
    
    if(sflags & DTREE_S_FLAG_REGEX)
    {
        pp=h->head->nodes[dtree_hash_char(*t)];
        base=DTREE_DT_GETPP(h,pp);
        
        lflag=dtree_search_node(h,base,t,pos);
        
        //leading wildcard
        if(!lflag)
        {
            pp=h->head->nodes[DTREE_HASH_ANY];
            base=DTREE_DT_GETPP(h,pp);
            
            if(pp)
                lflag=dtree_search_node(h,base,t,pos);
        }
        
        return lflag;
    }
    
    pp=h->head->nodes[dtree_hash_char(*t)];
    base=DTREE_DT_GETPP(h,pp);
    
    //iterative search
    for(p=t;*p && pp;p++)
    {
        if(base->data!=*p && base->data!=(*p|0x20) && base->data!=DTREE_PATTERN_ANY)
            break;
   
        n=*(p+1);
        hash=dtree_hash_char(n);
        
        //token match
        if((!hash && n!='0') || hash>=DTREE_HASH_SEP)
        {
            //match is valid with position
            if(base->flags && (sflags & DTREE_S_FLAG_PARTIAL || !n) &&
                    (!base->pos || !pos || base->pos==pos))
                return base;
            else if(!n)
                break;
        }
        
        pp=base->nodes[hash];
        base=DTREE_DT_GETPP(h,pp);
    }

    return lflag;
}
Exemplo n.º 4
0
//adds a node to the tree
static int dtree_add_node(dtree_dt_index *h,dtree_dt_node *n,char *t,void *data,flag_f flags,void *param)
{
    int hash;
    char *p;
    packed_ptr pp;
    dtree_dt_node *next;
    
    if(!n)
        return 0;
    
    dtree_printd(DTREE_PRINT_INITDTREE,"ADD: tree: '%c' level: %d *t: '%c' p(%p) pp(%p)\n",
             n->data,dtree_node_depth(h,n),*t?*t:'#',n,n->curr);
    
    //EOT trailing wildcard
    if(!*t && n->prev)
        return dtree_set_payload(h,n,data,flags,param);
        
    //?
    if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_OPTIONAL)
    {
        pp=n->prev;
        next=DTREE_DT_GETPP(h,pp);
        
        if(!pp)
            return 0;
        
        dtree_printd(DTREE_PRINT_INITDTREE,"ADD: optional: '%c' *t: '%c'\n",next->data,*(t+1));
        
        if(dtree_add_node(h,next,t+1,data,flags,param)<0)
            return -1;
        
        t++;
        
        //trailing wildcard
        if(!*t)
            return dtree_set_payload(h,n,data,flags,param);
    }
    
    //[abc]
    if(h->sflags & DTREE_S_FLAG_REGEX && *t==DTREE_PATTERN_SET_S)
    {
        for(p=t;*p;p++)
        {
            if(*p==DTREE_PATTERN_SET_E)
                break;
        }
        
        if(!*p)
            return 0;
        
        t++;
        
        while(t<p)
        {
            *p=*t;
            
            dtree_printd(DTREE_PRINT_INITDTREE,"ADD: set: '%c' *t: '%c' t: '%s'\n",n->data,*t,p);
            
            if(dtree_add_node(h,n,p,data,flags,param)<0)
                return -1;
            
            t++;
        }
        
        *p=DTREE_PATTERN_SET_E;
        
        return 1;
    }
    
    hash=dtree_hash_char(*t);
        
    if(!hash && *t!='0')
        return 0;
        
    if(*t>='A' && *t<='Z')
        *t|=0x20;
        
    //.
    if(hash==DTREE_HASH_ANY)
        *t=DTREE_PATTERN_ANY;
        
    pp=(packed_ptr)n->nodes[hash];
    next=DTREE_DT_GETPP(h,pp);
        
    if(!pp)
    {
        pp=dtree_alloc_node(h);
        next=DTREE_DT_GETPP(h,pp);
        
        if(!pp)
        {
            fprintf(stderr,"DTREE: allocation error detected, aborting: %zu\n",h->node_count);
            return -1;
        }
        
        next->data=*t;
        next->curr=pp;
        next->prev=n->curr;
        
        n->nodes[hash]=pp;
        
        dtree_printd(DTREE_PRINT_INITDTREE,"ADD: new node '%c' created level: %d hash: %d p(%p) pp(%p)\n",
                 *t,dtree_node_depth(h,n),hash,next,pp);
    }
    
    //EOT
    if(!*(t+1))
        return dtree_set_payload(h,next,data,flags,param);

    return dtree_add_node(h,next,t+1,data,flags,param);
}
Exemplo n.º 5
0
//searches for a token with regex
static const dtree_dt_node *dtree_search_node(const dtree_dt_index *h,const dtree_dt_node *n,const char *t)
{
    int hash;
    flag_f rf,nf;
    packed_ptr pp;
    const dtree_dt_node *rflag;
    
    if(!n)
        return NULL;
    
    //bad match
    if(n->data!=*t && n->data!=(*t|0x20) && n->data!=DTREE_PATTERN_ANY)
        return NULL;
    
    t++;
    hash=dtree_hash_char(*t);
    
    //lazy EOT
    if((!hash && *t!='0') || hash>=DTREE_HASH_SEP)
    {
        //only quit if strong
        if(dtree_get_flag(h,n,DTREE_DT_FLAG_STRONG))
            return n;
        else if(!t && !n->flags)
            return NULL;
    }
    
    pp=n->nodes[hash];
    rflag=DTREE_DT_GETPP(h,pp);
    
    if(rflag && *t)
        rflag=dtree_search_node(h,rflag,t);
    
    //wildcard
    if(!rflag)
    {
        hash=dtree_hash_char(DTREE_PATTERN_ANY);
        
        pp=n->nodes[hash];
        rflag=DTREE_DT_GETPP(h,pp);
        
        if(rflag)
            rflag=dtree_search_node(h,rflag,t);
        
        hash=dtree_hash_char(*t);
    }
    
    //partial
    if(!rflag && (n->flags & DTREE_DT_FLAG_TOKEN) && (h->sflags & DTREE_S_FLAG_PARTIAL))
    {
        if(dtree_node_depth(h,n)>=DTREE_S_PART_TLEN)
            rflag=n;
    }
    
    //n is lazy EOT or stronger match
    if((n->flags & DTREE_DT_FLAG_TOKEN) && ((hash==0 && *t!='0') || hash>=DTREE_HASH_SEP))
    {
        nf=dtree_get_flags(h,n);
        rf=dtree_get_flags(h,rflag);
        
        if(!rflag)
            rflag=n;
        else if((nf & DTREE_DT_FLAG_STRONG) && !(rf & DTREE_DT_FLAG_STRONG))
            rflag=n;
        else if((rf & DTREE_DT_FLAG_STRONG));
        else if((nf & DTREE_DT_FLAG_CHAIN) && !(nf & DTREE_DT_FLAG_BCHAIN) &&
                !(rf & DTREE_DT_FLAG_CHAIN) && !(rf & DTREE_DT_FLAG_BCHAIN))
            rflag=n;
        else if((rf & DTREE_DT_FLAG_CHAIN));
        else if((nf & DTREE_DT_FLAG_WEAK))
            rflag=n;
    }
    
    return rflag;
}
Exemplo n.º 6
0
//gets the flag for a token
const dtree_dt_node *dtree_get_node(const dtree_dt_index *h,const char *t,flag_f sflags)
{
    int hash;
    char n;
    const char *p;
    packed_ptr pp;
    const dtree_dt_node *base;
    const dtree_dt_node *lflag=NULL;
    
    if(!sflags)
        sflags=h->sflags;
    
    if(sflags & DTREE_S_FLAG_REGEX)
    {
        pp=h->head->nodes[dtree_hash_char(*t)];
        base=DTREE_DT_GETPP(h,pp);
        
        lflag=dtree_search_node(h,base,t);
        
        //leading wildcard
        if(!lflag)
        {
            pp=h->head->nodes[dtree_hash_char(DTREE_PATTERN_ANY)];
            base=DTREE_DT_GETPP(h,pp);
            
            if(pp)
                lflag=dtree_search_node(h,base,t);
        }
        
        return lflag;
    }
            
    pp=h->head->nodes[dtree_hash_char(*t)];
    base=DTREE_DT_GETPP(h,pp);
    
    //iterative search
    for(p=t;*p && pp;p++)
    {
        if(base->data!=*p && base->data!=(*p|0x20) && base->data!=DTREE_PATTERN_ANY)
            break;
   
        n=*(p+1);
        hash=dtree_hash_char(n);
        
        //token match
        if((!hash && n!='0') || hash>=DTREE_HASH_SEP)
        {
            //not full string
            if(base->flags && (sflags & DTREE_S_FLAG_PARTIAL) && n)
                return base;
            //full token
            else if(base->flags && !n)
                return base;
            else if(!n)
                break;
        }
        
        //partial token match
        if((base->flags & DTREE_DT_FLAG_TOKEN) && (sflags & DTREE_S_FLAG_PARTIAL) && (p+1-t)>=DTREE_S_PART_TLEN)
            lflag=base;
        
        pp=base->nodes[hash];
        base=DTREE_DT_GETPP(h,pp);
    }

    return lflag;
}
Exemplo n.º 7
0
//classifies a string
const dclass_keyvalue *dclass_classify(const dclass_index *di,const char *str)
{
    int on=0;
    int valid;
    int bcvalid;
    int i;
    char buf[DTREE_DATA_BUFLEN];
    const char *p;
    const char *token="";
    packed_ptr pp;
    const dtree_dt_node *wnode=NULL;
    const dtree_dt_node *nnode=NULL;
    const dtree_dt_node *fbnode;
    const dtree_dt_node *fnode;
    const dtree_dt_index *h=&di->dti;
    const void *cnodes[DTREE_S_MAX_CHAIN]={NULL};
    
    if(!str || !h->head)
        return dclass_get_kverror(di);
    
    dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() UA: '%s'\n",str);
    
    for(p=str;*p;p++)
    {
        valid=0;
        
        if(dtree_hash_char(*p)<DTREE_HASH_SEP)
        {
            //new token found
            if(!on)
            {
                token=p;
                on=1;
            }
            
            valid=1;
        }
        
        if((!valid || (!*(p+1))) && on)
        {
            //EOT found
            fbnode=dtree_get_node(h,token,0);
            
            dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() token:'%s' = '%s':%d\n",
                    token,fbnode?dtree_node_path(h,fbnode,buf):"",fbnode?(int)fbnode->flags:0);
            
            if(fbnode && dtree_get_flag(h,fbnode,DTREE_DT_FLAG_TOKEN))
            {
                if((fnode=dtree_get_flag(h,fbnode,DTREE_DT_FLAG_STRONG)))
                    return fnode->payload;
                else if((fnode=dtree_get_flag(h,fbnode,DTREE_DT_FLAG_WEAK)))
                {
                    i=DTREE_DC_DISTANCE(h,(char*)fnode->payload);
                    if(!wnode || (i>=0 && i<DTREE_DC_DISTANCE(h,(char*)wnode->payload)))
                        wnode=fnode;
                }
                else if((fnode=dtree_get_flag(h,fbnode,DTREE_DT_FLAG_NONE)))
                {
                    i=DTREE_DC_DISTANCE(h,(char*)fnode->payload);
                    if(!nnode || (i>=0 && i<DTREE_DC_DISTANCE(h,(char*)nnode->payload)))
                        nnode=fnode;
                }
                
                if((fnode=dtree_get_flag(h,fbnode,DTREE_DT_FLAG_CHAIN)))
                {
                    dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() pchain detected\n");
                    
                    pp=fnode->curr;

                    while(pp)
                    {
                        bcvalid=0;
                        
                        if(fnode->flags & DTREE_DT_FLAG_CHAIN && fnode->cparam)
                        {   
                            dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() looking for pchain %p\n",fnode->cparam);
                            for(i=0;i<DTREE_S_MAX_CHAIN && cnodes[i];i++)
                            {
                                //chain hit
                                if(cnodes[i]==fnode->cparam)
                                {
                                    if(fnode->flags & DTREE_DT_FLAG_BCHAIN)
                                        bcvalid=1;
                                    else
                                        return fnode->payload;
                                }
                            }
                        }
                        
                        if(fnode->flags & DTREE_DT_FLAG_BCHAIN && (bcvalid || !fnode->cparam))
                        {
                            for(i=0;i<DTREE_S_MAX_CHAIN;i++)
                            {
                                if(!cnodes[i])
                                {
                                    cnodes[i]=fnode->payload;
                                    dtree_printd(DTREE_PRINT_CLASSIFY,"dtree_classify() pchain added: %p('%d')\n",fnode->payload,i);
                                    break;
                                }
                            }
                        }
                        
                        pp=fnode->dup;
                        fnode=DTREE_DT_GETPP(h,pp);
                    }
                }
            }
            on=0;
        }
    }
    
    if(wnode)
        return wnode->payload;
    else if(nnode)
        return nnode->payload;
    
    return dclass_get_kverror(di);
}