static ISNODE* _child (ISTREE *ist, ISNODE *node, int index, int & nbgen) { /* --- create child node (extend set) */ int i, k, n; /* loop variables, counters */ ISNODE *curr; /* to traverse the path to the root */ int item, cnt; /* item identifier, number of items */ int *set; /* next (partial) item set to check */ int supp; /* support of an item set */ assert(ist && node /* check the function arguments */ && (index >= 0) && (index < node->size)); /* --- initialize --- */ supp = node->cnts[index]; /* get support of item set to extend */ if (supp < ist->supp) /* if set support is insufficient, */ { return NULL; /* no child is needed, so abort */ } item = node->offset +index; /* initialize set for support checks */ ist->buf[ist->lvlvsz -2] = item; /* --- check candidates --- */ for (n = 0, i = index; ++i < node->size; ) { supp = node->cnts[i]; /* traverse the candidate items */ if (supp < ist->supp) /* if set support is insufficient, */ continue; /* ignore the corresponding candidate */ set = ist->buf +ist->lvlvsz -(cnt = 2); set[1] = k = node->offset +i; /* add the item to the set */ for (curr = node; curr->parent; curr = curr->parent) { supp = _getsupp(curr->parent, set, cnt); if (supp < ist->supp) { /* get the item set support and */ break; /* if it is too low, abort the loop */ } *--set = curr->id; cnt++; /* add id of current node to the set */ } /* and adapt the number of items */ if (!curr->parent) /* if subset support is high enough */ ist->map[n++] = k; /* note the item identifier */ } if (n <= 0) return NULL; /* if no child is needed, abort */ /* --- create child --- */ k = ist->map[n-1] -ist->map[0] +1; nbgen += k ; curr = (ISNODE*)malloc(sizeof(ISNODE) +(k-1) *sizeof(int)); if (!curr) return (ISNODE*)-1; /* create a child node */ curr->parent = node; /* set pointer to parent node */ curr->succ = NULL; /* and clear successor pointer */ curr->id = item; /* initialize the item id. and */ curr->chcnt = 0; /* there are no children yet */ curr->size = k; /* set size of counter vector */ curr->offset = ist->map[0]; /* note the first item as an offset */ for (set = curr->cnts +(i = k); --i >= 0; ) *--set = 0; /* clear all counters of the node */ return curr; /* return pointer to created child */ } /* _child() */
int ist_rule (ISTREE *ist, int *rule, double* occhyp, double* occcon, double *supp, double *conf, double *aval, double *phi, double *impli, double* normal_simi,double *entro_simi, int maxlen, int simple_impli, int Binomial_law) { /* --- extract next rule */ int i; /* loop variable */ int item; /* buffer for an item identifier */ ISNODE *isnode; /* current item set node */ ISNODE *parent; /* parent of the item set node */ unsigned s_rule; /* minimal support of a rule */ unsigned s_min; /* minimal support of a set */ float s_set; /* support of set (body & head) */ float s_sub; /* support of subset (body) */ double occ_a; double occ_b; double occ_n; double occsqa,occsqb; double pi; double pi2; double occ_abb; double occ_ab; double tmp_b,tmp_c; double alpha, beta, t, h1, h2, ii, unmb; double p_body, p_head; /* prior confidences/probabilities */ double c, v; /* confidence and measure value */ int app; /* appearance flag of head item */ assert(ist && rule && supp && conf); /* check arguments */ /* --- initialize --- */ if (ist->rulelen > ist->height) /* if the tree is not high enough */ return -1; /* for the rule length, abort */ s_rule = (unsigned)ceil(ist->setcnt *ist->supp); if (s_rule < 1) s_rule = 1; /* compute the minimal rule support */ s_min = (ist->rsdef == IST_BOTH) ? s_rule : (unsigned)ceil(ist->setcnt *ist->supp *ist->conf); if (ist->isnode) /* if this is not the first rule, */ isnode = ist->isnode; /* get the buffered item set node */ else { /* if this is the first rule */ isnode = ist->isnode = ist->levels[ist->rulelen-1]; ist->index = ist->hditem = -1; /* initialize the */ } /* rule extraction variables */ /* --- find rule --- */ while (1) { /* search for a rule */ if (ist->hditem >= 0) { /* --- select next item subset */ ist->path[ist->pathlen++] = ist->hditem; ist->hditem = ID(ist->hdnode); /* add previous head to the path */ ist->hdnode = ist->hdnode->parent;/* and get the next head item */ if (!ist->hdnode) /* if all subsets have been processed */ ist->hditem = -1; /* clear the head item to trigger the */ } /* selection of a new item set */ if (ist->hditem < 0) { /* --- select next item set */ if (++ist->index >= isnode->size) { /* if all subsets have been */ isnode = isnode->succ; /* processed, go to the successor */ if (!isnode) { /* if at the end of a level, go down */ if (++ist->rulelen > ist->height) return -1; /* if beyond the leaf level, abort */ isnode = ist->levels[ist->rulelen-1]; } /* get the 1st node of the new level */ ist->isnode = isnode; /* note the new item set node and */ ist->index = 0; /* start with the first item set */ } /* of the new item set node */ i = isnode->offs +ist->index; if ((ist->apps[i] == IST_IGNORE) || (HDONLY(isnode) && (ist->apps[i] == IST_HEAD))) continue; /* skip sets with two head only items */ ist->hditem = i; /* set the head item identifier */ ist->hdonly = HDONLY(isnode) || (ist->apps[i] == IST_HEAD); ist->hdnode = isnode; /* get the new head only flag, */ ist->pathlen = 0; /* set the new head item node, */ } /* and clear the path */ app = ist->apps[ist->hditem]; /* get head item appearance */ if (!(app & IST_HEAD) || (ist->hdonly && (app != IST_HEAD))) continue; /* if rule is not allowed, skip it */ s_set = isnode->cnts[ist->index]; /* get the item set support */ if (s_set < s_min) { /* if the set support is too low, */ ist->hditem = -1; continue; } /* skip this item set */ if (ist->pathlen <= 0) { /* if there is no path, */ parent = isnode->parent; /* get subset support from parent */ if (parent) { s_sub = parent->cnts[ID(isnode) -parent->offs]; occsqa = parent->occ_square[ID(isnode) -parent->offs]; } else { s_sub = (float)ist->setcnt; } } else { /* if there is a path (not 1st subset)*/ s_sub = _getsupp(ist->hdnode, ist->path, ist->pathlen, &occsqa); } /* get subset support using the path */ if (s_sub < s_rule) /* if the subset support is too low, */ continue; /* get the next subset/next set */ c = (double)s_set/s_sub; /* compute the rule confidence */ occ_a=s_sub; occ_b=ist->levels[0]->cnts[ist->hditem]; occsqb=ist->levels[0]->occ_square[ist->hditem]; occ_n=ist->setcnt; pi=occ_a*(occ_n-occ_b); /*pi=p(a)p(b barre)*/ pi2=occsqa*(occ_n-2*occ_b+occsqb); occ_ab=s_set; /*a et b*/ occ_abb=occ_a-occ_ab; /*a et b barre*/ tmp_b=occ_abb-pi/occ_n; *occhyp=occ_a; *occcon=occ_b; int binary_data=(occ_a==occsqa && occ_b==occsqb); if(Binomial_law) { if(binary_data && pi/occ_n*(1-pi/(double)(occ_n*occ_n))<50.) { *phi=1.-Binomiale(pi/(occ_n*occ_n),(long)occ_n,(long)occ_abb); } else { if(pi2==0) tmp_c=0; else tmp_c=tmp_b/sqrt(pi2/occ_n*(1.-pi2/(occ_n*occ_n))); *phi=1.-Normal(tmp_c); } } else { if(binary_data && (pi/occ_n<=5. ||occ_abb<48.)) { *phi=1.-Poisson(occ_a/occ_n*(occ_n-occ_b),(int)occ_abb); } else { if(pi2==0) tmp_c=0; else tmp_c=tmp_b/sqrt(pi2/occ_n); *phi=1.-Normal(tmp_c); } } alpha=(double)occ_a/occ_n; beta=(double)occ_b/occ_n; t=(double)occ_abb/occ_n; /* if (t <= alpha/2.0) h1 =-xl2xb((alpha-t)/alpha) - xl2xb(t/alpha); else h1 =1; unmb = 1.0 - beta; if (t <= unmb/2.0) h2 = -xl2xb((unmb-t)/unmb) - xl2xb(t/unmb); else h2 = 1; ii = pow((1-h1*h1)*(1-h2*h2),0.25);; *impli=sqrt(*phi*ii); */ // entropic version /* if (t <= alpha/2.0) h1 =0.5*(1+xl2xb(0.5-t/alpha) + xl2xb(0.5+t/alpha)); else if( t<=alpha) h1 =0.5*(1-xl2xb(t/alpha-0.5) - xl2xb(1.5-t/alpha)); else h1=1.; unmb = 1.0 - beta; if (t <= unmb/2.0) h2 = 0.5*(1+xl2xb(0.5-t/unmb) + xl2xb(0.5+t/unmb)); else if(t<=unmb) h2 = 0.5*(1-xl2xb(t/unmb-0.5) - xl2xb(1.5-t/unmb)); else h2=1.; ii = sqrt((1.-h1)*(1.-h2)); *impli=(1.-1./(2.*sqrt(occ_n)))*ii; */ //implifiance double occ_nonanonb=occ_n-(occ_b+occ_abb); double C1=occ_ab/occ_a; double C2=occ_nonanonb/(occ_n-occ_b); *impli=*phi*pow(C1*C2,0.25); //normal similarity double c=(occ_a*occ_b)/occ_n; *normal_simi=Normal((occ_ab-c)/sqrt(c)); if(simple_impli) { if ((ist->rulelen==maxlen && *phi < ist->conf -EPSILON) || (ist->rulelen<maxlen && ist->conf==0)) /* if the confidence is too low, */ continue; /* get the next item subset/item set */ } else { if ((ist->rulelen==maxlen && *impli < ist->conf -EPSILON) || (ist->rulelen<maxlen && ist->conf==0)) /* if the confidence is too low, */ continue; /* get the next item subset/item set */ } if (ist->arem == EM_NONE) { /* if no add. eval. measure given, */ v = 0; break; } /* abort the loop (select the rule) */ if (ist->rulelen < 2) { /* if rule has an empty antecedent, */ v = 0; break; } /* abort the loop (select the rule) */ p_body = (double)s_sub /ist->setcnt; p_head = (double)ist->levels[0]->cnts[ist->hditem] / ist->setcnt; /* compute prior probabilities */ v = _eval(ist->arem, p_head, p_body, c); if (v >= ist->minval) /* if rule value exceeds the minimal */ break; /* of the add. rule eval. measure, */ } /* while (1) */ /* abort the loop (select rule) */ /* --- build rule --- */ i = ist->rulelen; /* get rule length */ item = ist->index +isnode->offs; /* if the current item is */ if (item != ist->hditem) /* not the head of the rule, */ rule[--i] = item; /* add it to the body */ while (isnode->parent) { /* traverse path to root and */ if (ID(isnode) != ist->hditem) /* add all items on this path */ rule[--i] = ID(isnode); /* (except the head of the rule) */ isnode = isnode->parent; /* to the rule body */ } rule[0] = ist->hditem; /* set the rule head */ *supp = ((ist->rsdef == IST_BODY) ? s_sub : s_set) / (double)ist->setcnt; /* set the rule support */ *conf = c; /* and the rule confidence */ if (aval) *aval = v; /* set the value of the add. measure */ return ist->rulelen; /* return the rule length */ } /* ist_rule() */
int ist_hedge (ISTREE *ist, int *hedge, double *supp, double *conf) { /* --- extract next hyperedge */ int i; /* loop variable */ ISNODE *isnode; /* current item set node */ ISNODE *hdnode; /* node containing the rule head */ int *path, len; /* path buffer and path length */ unsigned s_min; /* minimal support of a hyperedge */ double s_set; /* support of set (body & head) */ double s_sub; /* support of subset (body) */ double dummy; assert(ist && hedge && supp && conf); /* check arguments */ /* --- initialize --- */ if (ist->rulelen > ist->height) /* if the tree is not high enough */ return -1; /* for the hyperedge size, abort */ s_min = (unsigned)ceil(ist->setcnt *ist->supp); if (s_min < 1) s_min = 1; /* compute the minimal support */ if (!ist->isnode) /* on first hyperedge, initialize */ ist->isnode = ist->levels[ist->rulelen-1]; /* current node */ isnode = ist->isnode; /* get the current item set node */ path = ist->path; /* and the path buffer */ /* --- find hyperedge --- */ while (1) { /* search for a hyperedge */ if (++ist->index >= isnode->size) { /* if all subsets have been */ isnode = isnode->succ; /* processed, go to the successor */ if (!isnode) { /* if at the end of a level, go down */ if (++ist->rulelen > ist->height) return -1; /* if beyond the leaf level, abort */ isnode = ist->levels[ist->rulelen-1]; } /* get the 1st node of the new level */ ist->isnode = isnode; /* note the new item set node and */ ist->index = 0; /* start with the first item set */ } /* of the new item set node */ s_set = isnode->cnts[ist->index]; if (s_set < s_min) /* if the set support is too low, */ continue; /* skip this item set */ hdnode = isnode->parent; /* get subset support from parent */ if (hdnode) s_sub = hdnode->cnts[ID(isnode) -hdnode->offs]; else s_sub = ist->setcnt; *conf = (double)s_set/s_sub;/* compute confidence of first rule */ path[0] = ist->index +isnode->offs; len = 1; /* initialize path and */ while (hdnode) { /* traverse the path up to root */ s_sub = _getsupp(hdnode, path, len,&dummy); /* get the set support */ *conf += (double)s_set/s_sub; /* and sum the rule confidences */ path[len++] = ID(hdnode); /* store head item in the path */ hdnode = hdnode->parent; /* and go to the parent node */ } /* (get the next rule head) */ *conf /= ist->rulelen; /* average rule confidences */ if (*conf >= ist->minval) break; } /* while(1) */ /* if confidence suffices, abort loop */ *supp = (double)s_set/ist->setcnt; /* set hyperedge support */ /* --- build hyperedge --- */ i = ist->rulelen; /* get current hyperedge size and */ hedge[--i] = ist->index +isnode->offs; /* store the first item */ while (isnode->parent) { /* while not at the root node */ hedge[--i] = ID(isnode); /* add item to the hyperedge */ isnode = isnode->parent; /* and go to the parent node */ } return ist->rulelen; /* return hyperedge size */ } /* ist_hedge() */
static ISNODE* _child (ISTREE *ist, ISNODE *node, int item, double s_min, double s_sub) { /* --- create child node (extend set) */ ISNODE *curr; /* to traverse the path to the root */ int i, index; /* loop variable, data vector index */ int len; /* length of path to check */ int frst, last; /* id. of first/last candidate */ int body = 0; /* enough support for a rule body */ int hdonly; /* head only item in path */ int app; /* appearance flags of item */ double s_set; /* support of some set */ double dummy; assert(ist && node); /* check the function arguments */ assert((item >= node->offs) && (item < node->offs +node->size)); app = ist->apps[item]; /* get the item appearance */ if ((app == IST_IGNORE) /* do not extend an item to ignore */ || ((HDONLY(node) && (app == IST_HEAD)))) return NULL; /* nor a set with two head only items */ hdonly = HDONLY(node) || (app == IST_HEAD); /* --- initialize --- */ index = item -node->offs; /* compute index in data vector */ s_set = node->cnts[index]; /* get support of item set to extend */ if (s_set < s_min) /* if the set has not enough support */ return NULL; /* no child is needed, so abort */ if (s_set >= s_sub) /* if set support is large enough */ body = 1; /* for a rule body, set body flag */ ist->path[1] = item; /* set fixed path element */ frst = node->size; last = -1; /* initialize index limits */ /* --- check candidates --- */ /* The set S represented by the index-th vector element of the */ /* current node is extended only by combining it with the sets */ /* represented by the fields that follow it in the node vector, */ /* i.e. by the sets represented by vec[index+1] to vec[size-1]. */ /* The sets that can be formed by combining the set S and the */ /* sets represented by vec[0] to vec[index-1] are processed in */ /* the branches for these sets. */ /* In the below loop for each set represented by vec[index+1] */ /* to vec[size-1] it is checked, whether this set and all the */ /* other subsets of the same size, that can be formed from the */ /* union of this set and the set S, have enough support, so that */ /* a child node is necessary. */ /* Note, that i +offs is the identifier of the item that has */ /* to be added to set S to form the union of the set S and the */ /* set T represented by vec[i], since S and T have the same path */ /* with the exception of the index in the current node. Hence we */ /* can speak of candidate items that are added to S. */ /* Checking the support of the other subsets of the union of S */ /* and T that have the same size as S and T is done with the aid */ /* of a path variable. The items in this variable combined with */ /* the items on the path to the current node always represent */ /* the subset currently tested. That is, the path variable holds */ /* the path to be followed from the current node to arrive at */ /* the support counter for the subset. The path variable is */ /* initialized to [0]: <i+offs>, [1]: <item>, since the support */ /* counters for S and T can be inspected directly. Then this path */ /* is followed from the parent node of the current node, which */ /* is equivalent to checking the subset that can be obtained by */ /* removing from the union of S and T the item that corresponds */ /* to the parent node (in the path to S or T, resp.). */ /* Iteratively making the parent node the current node, adding */ /* its corresponding item to the path and checking the support */ /* counter at the end of the path variable when starting from its */ /* (the new current node's) parent node tests all other subsets. */ /* Another criterion is that the extended set must not contain */ /* two items which may appear only in the head of a rule. If two */ /* such items are contained in a set, neither can a rule be */ /* formed from its items nor can it be the antecedent of a rule. */ /* Whether a set contains two head only items is determined from */ /* the nodes `hdonly' flag and the appearance flags of the items. */ for (i = index +1; i < node->size; i++) { app = ist->apps[node->offs +i]; /* get appearance flags of item */ if ((app == IST_IGNORE) || (hdonly && (app == IST_HEAD))) continue; /* skip sets with 2 head only items */ s_set = node->cnts[i]; /* traverse candidate items */ if (s_set < s_min) /* if set support is too low, */ continue; /* ignore this candidate */ if (s_set >= s_sub) /* if set support is large enough */ body = 1; /* for a rule body, set body flag */ ist->path[0] = node->offs+i;/* add candidate to path and */ len = 2; /* set initial path length */ curr = node; /* start at current node */ while (curr->parent) { /* while not at root node */ s_set = _getsupp(curr->parent, ist->path, len, &dummy); if (s_set < s_min) /* get set support and */ break; /* if it is too low, abort loop */ if (s_set >= s_sub) /* if some subset has enough support */ body = 1; /* for a rule body, set body flag */ ist->path[len++] = ID(curr); curr = curr->parent; /* add id of current node to path */ } /* and go to parent node */ if (s_set < s_min) /* if some set's support is too low, */ continue; /* ignore the corresponding candidate */ if (i < frst) frst = i; /* update index of first and */ last = i; /* last successful candidate */ } if (!body || (frst > last)) /* if no extension can have */ return NULL; /* enough support, abort function */ /* --- create child --- */ curr = (ISNODE*)malloc(sizeof(ISNODE) +(last-frst+1) *2*sizeof(float)); if (!curr) return (ISNODE*)(void*)-1; /* create child node */ curr->parent = node; /* set pointer to parent */ curr->succ = NULL; /* clear successor pointer */ curr->chcnt = 0; /* there are no children yet */ curr->id = item; /* initialize item id */ curr->cnts = (float*)((char*)curr+sizeof(ISNODE)); curr->occ_square = (float*)curr->cnts+(last-frst+1); //for(j=0;j<2*(last-frst+1);j++) // curr->cnts[j]=0; /* initialize cnts and occ_square */ if (hdonly) curr->id |= F_HDONLY; /* set head only flag */ curr->offs = node->offs +frst; /* initialize offset and */ curr->size = last -frst +1; /* size of counter vector */ return curr; /* return pointer to created child */ } /* _child() */