Exemple #1
0
static ISNODE* _child (ISTREE *ist, ISNODE *node, int index, int & nbgen)
{                               /* --- create child node (extend set) */
  int    i, k, n;               /* loop variables, counters */
  ISNODE *curr;                 /* to traverse the path to the root */
  int    item, cnt;             /* item identifier, number of items */
  int    *set;                  /* next (partial) item set to check */
  int    supp;                  /* support of an item set */


  assert(ist && node            /* check the function arguments */
     && (index >= 0) && (index < node->size));

  /* --- initialize --- */
  supp = node->cnts[index];     /* get support of item set to extend */
  if (supp < ist->supp)         /* if set support is insufficient, */   
  {	
		  return NULL;                /* no child is needed, so abort */
  }
  item = node->offset +index;   /* initialize set for support checks */
  ist->buf[ist->lvlvsz -2] = item;

  /* --- check candidates --- */
  for (n = 0, i = index; ++i < node->size; )
  {
    supp = node->cnts[i];       /* traverse the candidate items */
    if (supp < ist->supp)   	/* if set support is insufficient, */
      continue;                 /* ignore the corresponding candidate */	
    set    = ist->buf +ist->lvlvsz -(cnt = 2);
    set[1] = k = node->offset +i;  /* add the item to the set */
    for (curr = node; curr->parent; curr = curr->parent) {
      supp = _getsupp(curr->parent, set, cnt);
      if (supp < ist->supp) 
	  {							/* get the item set support and */
        break;                  /* if it is too low, abort the loop */
	  }
      *--set = curr->id; cnt++; /* add id of current node to the set */
	  
    }                           /* and adapt the number of items */
    if (!curr->parent)          /* if subset support is high enough */
      ist->map[n++] = k;        /* note the item identifier */
  }
  if (n <= 0) 
	return NULL;      /* if no child is needed, abort */
  
 

  /* --- create child --- */
  k = ist->map[n-1] -ist->map[0] +1;

nbgen += k ;

  curr = (ISNODE*)malloc(sizeof(ISNODE) +(k-1) *sizeof(int));
  if (!curr) return (ISNODE*)-1;  /* create a child node */
  curr->parent = node;          /* set pointer to parent node */
  curr->succ   = NULL;          /* and clear successor pointer */
  curr->id     = item;          /* initialize the item id. and */
  curr->chcnt  = 0;             /* there are no children yet */
  curr->size   = k;             /* set size of counter vector */
  curr->offset = ist->map[0];   /* note the first item as an offset */
  for (set = curr->cnts +(i = k); --i >= 0; )
    *--set = 0;                 /* clear all counters of the node */
  return curr;                  /* return pointer to created child */
}  /* _child() */
Exemple #2
0
int ist_rule (ISTREE *ist, int *rule,
              double* occhyp, double* occcon, double *supp, double *conf, double *aval, double *phi,
              double *impli, double* normal_simi,double *entro_simi, int maxlen, int simple_impli, int Binomial_law)
{                               /* --- extract next rule */
	int      i;                   /* loop variable */
	int      item;                /* buffer for an item identifier */
	ISNODE   *isnode;             /* current item set node */
	ISNODE   *parent;             /* parent of the item set node */
	unsigned s_rule;              /* minimal support of a rule */
	unsigned s_min;               /* minimal support of a set */
	float s_set;               /* support of set    (body & head) */
	float s_sub;               /* support of subset (body) */


	double occ_a;
	double occ_b;
	double occ_n;
	double occsqa,occsqb;
	double pi;
	double pi2;
	double occ_abb;
	double occ_ab; 
	double tmp_b,tmp_c;
	double alpha, beta, t, h1, h2, ii, unmb;


	double   p_body, p_head;      /* prior confidences/probabilities */
	double   c, v;                /* confidence and measure value */
	int      app;                 /* appearance flag of head item */

	assert(ist && rule && supp && conf);  /* check arguments */

	/* --- initialize --- */
	if (ist->rulelen > ist->height)  /* if the tree is not high enough */
		return -1;                     /* for the rule length, abort */
	s_rule = (unsigned)ceil(ist->setcnt *ist->supp);
	if (s_rule < 1) s_rule = 1;   /* compute the minimal rule support */
	s_min = (ist->rsdef == IST_BOTH) ? s_rule
		: (unsigned)ceil(ist->setcnt *ist->supp *ist->conf);
	if (ist->isnode)              /* if this is not the first rule, */
		isnode = ist->isnode;       /* get the buffered item set node */
	else {                        /* if this is the first rule */
		isnode = ist->isnode = ist->levels[ist->rulelen-1];
		ist->index = ist->hditem = -1;         /* initialize the */
	}                             /* rule extraction variables */

	/* --- find rule --- */
	while (1) {                   /* search for a rule */
		if (ist->hditem >= 0) {     /* --- select next item subset */
			ist->path[ist->pathlen++] = ist->hditem;
			ist->hditem = ID(ist->hdnode); /* add previous head to the path */
			ist->hdnode = ist->hdnode->parent;/* and get the next head item */
			if (!ist->hdnode)         /* if all subsets have been processed */
				ist->hditem = -1;       /* clear the head item to trigger the */
		}                           /* selection of a new item set */
		if (ist->hditem < 0) {      /* --- select next item set */
			if (++ist->index >= isnode->size) { /* if all subsets have been */
				isnode = isnode->succ;  /* processed, go to the successor */
				if (!isnode) {          /* if at the end of a level, go down */
					if (++ist->rulelen > ist->height)
						return -1;          /* if beyond the leaf level, abort */
					isnode = ist->levels[ist->rulelen-1];
				}                       /* get the 1st node of the new level */
				ist->isnode = isnode;   /* note the new item set node and */
				ist->index  = 0;        /* start with the first item set */
			}                         /* of the new item set node */
			i = isnode->offs +ist->index;
			if ((ist->apps[i] == IST_IGNORE)
					||  (HDONLY(isnode) && (ist->apps[i] == IST_HEAD)))
				continue;               /* skip sets with two head only items */
			ist->hditem  = i;         /* set the head item identifier */
			ist->hdonly  = HDONLY(isnode) || (ist->apps[i] == IST_HEAD);
			ist->hdnode  = isnode;    /* get the new head only flag, */
			ist->pathlen = 0;         /* set the new head item node, */
		}                           /* and clear the path */
		app = ist->apps[ist->hditem];  /* get head item appearance */
		if (!(app & IST_HEAD) || (ist->hdonly && (app != IST_HEAD)))
			continue;                 /* if rule is not allowed, skip it */
		s_set = isnode->cnts[ist->index];  /* get the item set support */

		if (s_set < s_min) {        /* if the set support is too low, */
			ist->hditem = -1; continue; }    /* skip this item set */
		if (ist->pathlen <= 0) {    /* if there is no path, */
			parent = isnode->parent;  /* get subset support from parent */
			if (parent) {
				s_sub = parent->cnts[ID(isnode) -parent->offs];
				occsqa = parent->occ_square[ID(isnode) -parent->offs];
			}
			else {
				s_sub = (float)ist->setcnt;
			}
		}
		else {                      /* if there is a path (not 1st subset)*/
			s_sub = _getsupp(ist->hdnode, ist->path, ist->pathlen, &occsqa);
		}                           /* get subset support using the path */
		if (s_sub < s_rule)         /* if the subset support is too low, */
			continue;                 /* get the next subset/next set */
		c = (double)s_set/s_sub;    /* compute the rule confidence */


		occ_a=s_sub;
		occ_b=ist->levels[0]->cnts[ist->hditem];
		occsqb=ist->levels[0]->occ_square[ist->hditem];
		occ_n=ist->setcnt;
		pi=occ_a*(occ_n-occ_b);	/*pi=p(a)p(b barre)*/
		pi2=occsqa*(occ_n-2*occ_b+occsqb);
		occ_ab=s_set;          /*a et b*/
		occ_abb=occ_a-occ_ab;  /*a et b barre*/
		tmp_b=occ_abb-pi/occ_n;


		*occhyp=occ_a;
		*occcon=occ_b;
		int binary_data=(occ_a==occsqa && occ_b==occsqb);

		if(Binomial_law) {
			if(binary_data && pi/occ_n*(1-pi/(double)(occ_n*occ_n))<50.) {
				*phi=1.-Binomiale(pi/(occ_n*occ_n),(long)occ_n,(long)occ_abb);
			}
			else {
				if(pi2==0) tmp_c=0;
				else tmp_c=tmp_b/sqrt(pi2/occ_n*(1.-pi2/(occ_n*occ_n)));
				*phi=1.-Normal(tmp_c);
			}
		}
		else  {
			if(binary_data && (pi/occ_n<=5. ||occ_abb<48.)) {
				*phi=1.-Poisson(occ_a/occ_n*(occ_n-occ_b),(int)occ_abb);
			}
			else {
				if(pi2==0) tmp_c=0;
				else tmp_c=tmp_b/sqrt(pi2/occ_n);
				*phi=1.-Normal(tmp_c);
			}
		}


		alpha=(double)occ_a/occ_n;
		beta=(double)occ_b/occ_n;
		t=(double)occ_abb/occ_n;
		/*		if (t <= alpha/2.0)
					h1 =-xl2xb((alpha-t)/alpha) - xl2xb(t/alpha);
					else 
					h1 =1;
					unmb = 1.0 - beta;
					if (t <= unmb/2.0)
					h2 = -xl2xb((unmb-t)/unmb) - xl2xb(t/unmb);
					else
					h2 = 1;
					ii = pow((1-h1*h1)*(1-h2*h2),0.25);;
					*impli=sqrt(*phi*ii);
					*/


		//  entropic version
		/*		if (t <= alpha/2.0)
					h1 =0.5*(1+xl2xb(0.5-t/alpha) + xl2xb(0.5+t/alpha));
					else if( t<=alpha)
					h1 =0.5*(1-xl2xb(t/alpha-0.5) - xl2xb(1.5-t/alpha));
					else
					h1=1.;
					unmb = 1.0 - beta;
					if (t <= unmb/2.0)
					h2 = 0.5*(1+xl2xb(0.5-t/unmb) + xl2xb(0.5+t/unmb));
					else if(t<=unmb)
					h2 = 0.5*(1-xl2xb(t/unmb-0.5) - xl2xb(1.5-t/unmb));
					else h2=1.;
					ii = sqrt((1.-h1)*(1.-h2));
					*impli=(1.-1./(2.*sqrt(occ_n)))*ii;
					*/



		//implifiance
		double occ_nonanonb=occ_n-(occ_b+occ_abb);
		double C1=occ_ab/occ_a;
		double C2=occ_nonanonb/(occ_n-occ_b);
		*impli=*phi*pow(C1*C2,0.25);




		//normal similarity
		double c=(occ_a*occ_b)/occ_n;
		*normal_simi=Normal((occ_ab-c)/sqrt(c));




		if(simple_impli)
			{
				if ((ist->rulelen==maxlen && *phi < ist->conf -EPSILON) || (ist->rulelen<maxlen && ist->conf==0)) /* if the confidence is too low, */
					continue;                 /* get the next item subset/item set */
			}
		else
			{
				if ((ist->rulelen==maxlen && *impli < ist->conf -EPSILON) || (ist->rulelen<maxlen && ist->conf==0)) /* if the confidence is too low, */
					continue;                 /* get the next item subset/item set */
			}

		if (ist->arem == EM_NONE) { /* if no add. eval. measure given, */
			v = 0; break; }           /* abort the loop (select the rule) */
		if (ist->rulelen < 2) {     /* if rule has an empty antecedent, */
			v = 0; break; }           /* abort the loop (select the rule) */
		p_body = (double)s_sub /ist->setcnt;
		p_head = (double)ist->levels[0]->cnts[ist->hditem]
			/ ist->setcnt;       /* compute prior probabilities */
		v = _eval(ist->arem, p_head, p_body, c);
		if (v >= ist->minval)       /* if rule value exceeds the minimal */
			break;                    /* of the add. rule eval. measure, */
	}  /* while (1) */            /* abort the loop (select rule) */

	/* --- build rule --- */
	i    = ist->rulelen;          /* get rule length */
	item = ist->index +isnode->offs; /* if the current item is */
	if (item != ist->hditem)         /* not the head of the rule, */
		rule[--i] = item;              /* add it to the body */
	while (isnode->parent) {         /* traverse path to root and */
		if (ID(isnode) != ist->hditem) /* add all items on this path */
			rule[--i] = ID(isnode);   /* (except the head of the rule) */
		isnode = isnode->parent;    /* to the rule body */
	}
	rule[0] = ist->hditem;        /* set the rule head */
	*supp = ((ist->rsdef == IST_BODY) ? s_sub : s_set)
		/ (double)ist->setcnt;  /* set the rule support */
	*conf = c;                    /* and the rule confidence */
	if (aval) *aval = v;          /* set the value of the add. measure */
	return ist->rulelen;          /* return the rule length */
}  /* ist_rule() */
Exemple #3
0
int ist_hedge (ISTREE *ist, int *hedge, double *supp, double *conf)
{                               /* --- extract next hyperedge */
	int      i;                   /* loop variable */
	ISNODE   *isnode;             /* current item set node */
	ISNODE   *hdnode;             /* node containing the rule head */
	int      *path, len;          /* path buffer and path length */
	unsigned s_min;               /* minimal support of a hyperedge */
	double s_set;               /* support of set    (body & head) */
	double s_sub;               /* support of subset (body) */
	double dummy;

	assert(ist && hedge && supp && conf);  /* check arguments */

	/* --- initialize --- */
	if (ist->rulelen > ist->height)  /* if the tree is not high enough */
		return -1;                     /* for the hyperedge size, abort */
	s_min = (unsigned)ceil(ist->setcnt *ist->supp);
	if (s_min < 1) s_min = 1;     /* compute the minimal support */
	if (!ist->isnode)             /* on first hyperedge, initialize */
		ist->isnode = ist->levels[ist->rulelen-1];    /* current node */
	isnode = ist->isnode;         /* get the current item set node */
	path   = ist->path;           /* and the path buffer */

	/* --- find hyperedge --- */
	while (1) {                   /* search for a hyperedge */
		if (++ist->index >= isnode->size) { /* if all subsets have been */
			isnode = isnode->succ;    /* processed, go to the successor */
			if (!isnode) {            /* if at the end of a level, go down */
				if (++ist->rulelen > ist->height)
					return -1;            /* if beyond the leaf level, abort */
				isnode = ist->levels[ist->rulelen-1];
			}                         /* get the 1st node of the new level */
			ist->isnode = isnode;     /* note the new item set node and */
			ist->index  = 0;          /* start with the first item set */
		}                           /* of the new item set node */
		s_set = isnode->cnts[ist->index];
		if (s_set < s_min)          /* if the set support is too low, */
			continue;                 /* skip this item set */
		hdnode = isnode->parent;    /* get subset support from parent */
		if (hdnode) s_sub = hdnode->cnts[ID(isnode) -hdnode->offs];
		else        s_sub = ist->setcnt;
		*conf = (double)s_set/s_sub;/* compute confidence of first rule */
		path[0] = ist->index +isnode->offs;
		len     = 1;                /* initialize path and */
		while (hdnode) {            /* traverse the path up to root */
			s_sub = _getsupp(hdnode, path, len,&dummy);   /* get the set support */
			*conf += (double)s_set/s_sub; /* and sum the rule confidences */
			path[len++] = ID(hdnode); /* store head item in the path */
			hdnode = hdnode->parent;  /* and go to the parent node */
		}                           /* (get the next rule head) */
		*conf /= ist->rulelen;      /* average rule confidences */
		if (*conf >= ist->minval) break;
	}  /* while(1) */             /* if confidence suffices, abort loop */
	*supp = (double)s_set/ist->setcnt;  /* set hyperedge support */

	/* --- build hyperedge --- */
	i          = ist->rulelen;    /* get current hyperedge size and */
	hedge[--i] = ist->index +isnode->offs;  /* store the first item */
	while (isnode->parent) {      /* while not at the root node */
		hedge[--i] = ID(isnode);    /* add item to the hyperedge */
		isnode = isnode->parent;    /* and go to the parent node */
	}
	return ist->rulelen;          /* return hyperedge size */
}  /* ist_hedge() */
Exemple #4
0
static ISNODE* _child (ISTREE *ist, ISNODE *node, int item,
                       double s_min, double s_sub)
{                               /* --- create child node (extend set) */
	ISNODE   *curr;               /* to traverse the path to the root */
	int      i, index;            /* loop variable, data vector index */
	int      len;                 /* length of path to check */
	int      frst, last;          /* id. of first/last candidate */
	int      body = 0;            /* enough support for a rule body */
	int      hdonly;              /* head only item in path */
	int      app;                 /* appearance flags of item */
	double s_set;               /* support of some set */
	double dummy;

	assert(ist && node);          /* check the function arguments */
	assert((item >= node->offs) && (item < node->offs +node->size));
	app = ist->apps[item];        /* get the item appearance */
	if ((app == IST_IGNORE)       /* do not extend an item to ignore */
			||  ((HDONLY(node) && (app == IST_HEAD))))
		return NULL;                /* nor a set with two head only items */
	hdonly = HDONLY(node) || (app == IST_HEAD);

	/* --- initialize --- */
	index = item -node->offs;     /* compute index in data vector */
	s_set = node->cnts[index];    /* get support of item set to extend */
	if (s_set <  s_min)           /* if the set has not enough support */
		return NULL;                /* no child is needed, so abort */
	if (s_set >= s_sub)           /* if set support is large enough */
		body = 1;                   /* for a rule body, set body flag */
	ist->path[1] = item;          /* set fixed path element */
	frst = node->size; last = -1; /* initialize index limits */

	/* --- check candidates --- */
	/* The set S represented by the index-th vector element of the    */
	/* current node is extended only by combining it with the sets    */
	/* represented by the fields that follow it in the node vector,   */
	/* i.e. by the sets represented by vec[index+1] to vec[size-1].   */
	/* The sets that can be formed by combining the set S and the     */
	/* sets represented by vec[0] to vec[index-1] are processed in    */
	/* the branches for these sets.                                   */
	/*   In the below loop for each set represented by vec[index+1]   */
	/* to vec[size-1] it is checked, whether this set and all the     */
	/* other subsets of the same size, that can be formed from the    */
	/* union of this set and the set S, have enough support, so that  */
	/* a child node is necessary.                                     */
	/*   Note, that i +offs is the identifier of the item that has    */
	/* to be added to set S to form the union of the set S and the    */
	/* set T represented by vec[i], since S and T have the same path  */
	/* with the exception of the index in the current node. Hence we  */
	/* can speak of candidate items that are added to S.              */
	/*   Checking the support of the other subsets of the union of S  */
	/* and T that have the same size as S and T is done with the aid  */
	/* of a path variable. The items in this variable combined with   */
	/* the items on the path to the current node always represent     */
	/* the subset currently tested. That is, the path variable holds  */
	/* the path to be followed from the current node to arrive at     */
	/* the support counter for the subset. The path variable is       */
	/* initialized to [0]: <i+offs>, [1]: <item>, since the support   */
	/* counters for S and T can be inspected directly. Then this path */
	/* is followed from the parent node of the current node, which    */
	/* is equivalent to checking the subset that can be obtained by   */
	/* removing from the union of S and T the item that corresponds   */
	/* to the parent node (in the path to S or T, resp.).             */
	/*   Iteratively making the parent node the current node, adding  */
	/* its corresponding item to the path and checking the support    */
	/* counter at the end of the path variable when starting from its */
	/* (the new current node's) parent node tests all other subsets.  */
	/*   Another criterion is that the extended set must not contain  */
	/* two items which may appear only in the head of a rule. If two  */
	/* such items are contained in a set, neither can a rule be       */
	/* formed from its items nor can it be the antecedent of a rule.  */
	/* Whether a set contains two head only items is determined from  */
	/* the nodes `hdonly' flag and the appearance flags of the items. */

	for (i = index +1; i < node->size; i++) {
		app = ist->apps[node->offs +i]; /* get appearance flags of item */
		if ((app == IST_IGNORE) || (hdonly && (app == IST_HEAD)))
			continue;                 /* skip sets with 2 head only items */
		s_set = node->cnts[i];      /* traverse candidate items */
		if (s_set <  s_min)         /* if set support is too low, */
			continue;                 /* ignore this candidate */
		if (s_set >= s_sub)         /* if set support is large enough */
			body = 1;                 /* for a rule body, set body flag */
		ist->path[0] = node->offs+i;/* add candidate to path and */
		len  = 2;                   /* set initial path length */
		curr = node;                /* start at current node */
		while (curr->parent) {      /* while not at root node */
			s_set = _getsupp(curr->parent, ist->path, len, &dummy);
			if (s_set <  s_min)       /* get set support and */
				break;                  /* if it is too low, abort loop */
			if (s_set >= s_sub)       /* if some subset has enough support */
				body = 1;               /* for a rule body, set body flag */
			ist->path[len++] = ID(curr);
			curr = curr->parent;      /* add id of current node to path */
		}                           /* and go to parent node */
		if (s_set < s_min)          /* if some set's support is too low, */
			continue;                 /* ignore the corresponding candidate */
		if (i < frst) frst = i;     /* update index of first and */
		last = i;                   /* last successful candidate */
	}
	if (!body || (frst > last))   /* if no extension can have */
		return NULL;                /* enough support, abort function */

	/* --- create child --- */
	curr = (ISNODE*)malloc(sizeof(ISNODE) +(last-frst+1) *2*sizeof(float));
	if (!curr) return (ISNODE*)(void*)-1;  /* create child node */
	curr->parent = node;          /* set pointer to parent */
	curr->succ   = NULL;          /* clear successor pointer */
	curr->chcnt  = 0;             /* there are no children yet */
	curr->id     = item;              /* initialize item id */
	curr->cnts = (float*)((char*)curr+sizeof(ISNODE));
	curr->occ_square = (float*)curr->cnts+(last-frst+1);
	//for(j=0;j<2*(last-frst+1);j++) 
	//	curr->cnts[j]=0;            /* initialize cnts and occ_square */
	if (hdonly) curr->id |= F_HDONLY; /* set head only flag */
	curr->offs   = node->offs +frst;  /* initialize offset and */
	curr->size   = last -frst +1;     /* size of counter vector */
	return curr;                  /* return pointer to created child */
}  /* _child() */