示例#1
0
文件: main.cpp 项目: mintyc/unordered
void run_test(IFilter &test, bool is_multi = true)
{
    test.insert(Criterion(BY_USERNAME, "4478901234568"));
    test.insert(Criterion(BY_IMEI, "1234567890123456"));
    test.insert(Criterion(BY_IMSI, "12345678901234"));
    test.insert(Criterion(BY_MSISDN, "4478901234567"));
    test.insert(Criterion(BY_MSISDN, "4478901234567"));

    (void) is_multi;    // release builds keep compiler happy
    assert( false == test.contains(Criterion(BY_MSISDN, "NOT ME")));
    assert( false == test.contains(Criterion(BY_IMEI, "4478901234567")));
    assert( false == test.contains(Criterion(BY_IMSI, "4478901234567")));
    assert( false == test.contains(Criterion(BY_USERNAME, "4478901234567")));

    assert( true == test.contains(Criterion(BY_IMEI, "1234567890123456")));
    assert( true == test.contains(Criterion(BY_IMSI, "12345678901234")));
    assert( false == test.contains(Criterion(BY_IMEI, "12345678901234")));
    assert( false == test.contains(Criterion(BY_IMSI, "1234567890123456")));

    assert( true == test.contains(Criterion(BY_USERNAME, "4478901234568")));
    assert( false == test.contains(Criterion(BY_MSISDN, "4478901234568")));

    assert( true == test.contains(Criterion(BY_MSISDN, "4478901234567")));
    assert( true == test.remove(Criterion(BY_MSISDN, "4478901234567")));
    assert( is_multi == test.contains(Criterion(BY_MSISDN, "4478901234567")));
    assert( is_multi == test.remove(Criterion(BY_MSISDN, "4478901234567")));
    assert( false == test.contains(Criterion(BY_MSISDN, "4478901234567")));
}
示例#2
0
int FloatSearch(int n, int d, int delta, int r, TSubset *bset, int detail, int n_classes, fx_select_t *sel, char **file_names, int n_splits, cType classifier, char *cparam, int float_level) {
  /* a non-redundand coding of subset configurations is used.
     For easier understanding imagine our goal is to find a subset by exhaustive search,
     when d=5 and n=12
    - initial configuration is 111110000000   (actually stored in "bin" field)
    - in every step: a) find the leftmost block of 1's
                     b) shift its rightmost 1 to the right
                     c) shift the rest of this block to the left boundary (if it is not there)
    - this algorithm generates increasingly all binary representations of subsets
    - in context of floating search this algorithm is used for computation of generalized steps
      (in case of simple SFFS and simple SFBS only single-feature configurations are tested)
    - for purposes of floating search more identifiers than 0 and 1 are used to
      identify temporarily freezed features etc. (2,-1)
    - because of possibility to exchange meanings of 0 and 1 it was suitable to
      incorporate both forward and backward versions of floating search into one procedure
  */

  int *bin;   /* of size n, stores 0/1 information on currently selected features */
  int *index; /* of size d, it is computed from bin, stores indexes of selected features */
  int *bestset; double bestvalue; /* best subset */
  int *globalbestset; double *globalbestvalue; /* so-far best subsets in all dimensions */
  int wasthebest; /* indicates, if a better subset has been found during last step*/
  double value=0;
  int sumrint;
  int i;
  int sumr;     /* current subset size */
  int zmenasumr; /* how to change sumr (when changing direction of search): -2,-1,+1,+2 steps */
  int rr;
  int pom;
  int beg,piv;  /* beg - beginning of block of identifiers, piv - "pivot" - last identifier in a block */
  int stopex;    /* identifies end of internal exhaustive search */
  int stopfloat; /* identifies end of it all */
  int stopfloatmez=0;  /* identifies the dimension, for which the algorithm should end */
  int id0=0,id2=2;  /* these identifiers (0 and 2) are exchanged in case of backward search */
  int sbs=0;    /* 0=sfs, 1=sbs current search direction*/
  int vykyv=0; // stores current backtracking depth (+forward,-backward)
  float vykyvmez=0.0; // predicted delta estimate
  int vykyvcount=0; // number of direction changes (needed for delta averaging)
  int sfbs=0;   /* 0=sffs, 1=sfbs main search direction */
  int error=0;
  long globcit/*,cit*/,kombcit;
//  int timcet=0;
    double temp=-1;
  int best=0;
  int n_floats=0;
  
  time_t tbegin,telp,tlast;   /* for measuring computational time only */
//  time_t tact;
  
  tbegin=time(NULL);
  tlast=tbegin;

	if (float_level ==-1)
		float_level=n;

  if(r<0) { /* indicates SFBS, first stage will be SBS */
    sfbs=1; sbs=1; id0=2; id2=0; r=-r;
    if(delta==0) 
    	stopfloatmez=1;
	else 
		if(delta>0) {
			stopfloatmez=d-delta; 
			if(stopfloatmez<1) 
				stopfloatmez=1;
			}
    // for delta -1 and -2, stopfloatmez will be set later
  }
  else { /* indicates SFFS, first stage will be SFS */
    if(delta==0) 
    	stopfloatmez=n;
    else 
    	if(delta>0) {
    		stopfloatmez=d+delta; 
    		if(stopfloatmez>n) 
    			stopfloatmez=n;
    	}
    // for delta -1 and -2, stopfloatmez will be set later
  }
  vykyv=0;
  vykyvmez=0.0;
  vykyvcount=0;

  if((d<1)||(d>n)){
	return(24);
  } /* nothing to search for */
  
  if((r<1)||(r>=n)||((!sfbs)&&(r>=d))||((sfbs)&&(r>=n-d))){
	return(25);} /* no sense */
  if(n<2){
	return(30);
  }
  
  if((bin=(int *) rs_malloc((n+1)*sizeof(int),"bin, FloatSearch"))==NULL) {
	return(3);
	}
  if((index=(int *) rs_malloc(n*sizeof(int),"index, FloatSearch"))==NULL) {
  	rs_free(bin);
    return(3);
    }
  if((bestset=(int *) rs_malloc(n*sizeof(int),"bestset, FloatSearch"))==NULL) {
  	rs_free(index);
  	rs_free(bin);
  	return(3);
  	}
  if((globalbestset=(int *) rs_malloc(((n*(n+1L))/2L)*sizeof(int),"globalbestset, FloatSearch"))==NULL) {
  	rs_free(bestset);
  	rs_free(index);
  	rs_free(bin);
  	return(3);
  	}
  if((globalbestvalue=(double *) rs_malloc(n*sizeof(double),"globalbestvalue, FloatSearch"))==NULL) {
  	rs_free(globalbestset);
  	rs_free(bestset);
  	rs_free(index);
  	rs_free(bin);
  	return(3);
  	}
  /* k-th set of size k is stored at [(k*(k-1))/2] */
  for(i=0;i<(n*(n+1))/2;i++) 
  	globalbestset[i]=0;
  for(i=0;i<n;i++) 
  	globalbestvalue[i]=-SAFEUP;

  for(i=0;i<=n;i++) 
  	bin[i]=id0; /* bin[n]=id0 for testing the end */

  rr=r; /* initial generalization level */
  if(sfbs) {
      sumr=n-rr; 
      if(detail&STANDARD) {
           if(r>1) 
           	printf("Generalized "); 
           printf("Sequential Floating Backward Search"); 
           if(r>1) {
               printf(" (r=%d)",r);
           } 
           printf(":\n");
      }
  }          
  else {
      sumr=rr; 
      if(detail&STANDARD){
          if(r>1) 
          	printf("Generalized "); 
          printf("Sequential Floating Forward Search"); 
          if(r>1) {
              printf(" (r=%d)",r);
          } 
          printf(":\n");
      }
  }
  
  if(detail&STANDARD) {
  	printf("started on "); 
  	printf(ctime(&tbegin));}
  stopfloat=0;
  
//  hcreate(100000); //initialize hash table for set->result mapping
 
  do {

    for(i=0;i<n;i++) 
    	bestset[i]=0; 
    bestvalue=-SAFEUP;
    pom=rr;
    for(i=0;((pom>0)&&(i<n));i++) 
    	if(bin[i]==id0) {
    		bin[i]=id2; 
    		pom--;
    	} /* initialize bin for exhaustive step */
    sumrint=sumr*sizeof(int);
    globcit=0;
    // estimate the number of steps
    kombcit=1;
    if(sbs) 
    	pom=sumr+rr; 
    else 
    	pom=n-(sumr-rr); 
    for(i=0;i<rr;i++) 
    	kombcit*=pom-i; 
    for(i=2;i<=rr;i++) 
    	kombcit/=i;

    stopex=0;
    do {
      pom=0; /* convert "bin" to "index" */
      for(i=0;i<sumr;i++) {
	    while(bin[pom]<=0) 
	    	pom++;
	    index[i]=pom;
	    pom++;
      }



      /* ----------- following block serves only for outputting the information about current algorithm state ----------*/
      /* ------------it may be discarded */
//      if(!(globcit%PERCENTDETAIL)){
//          tact=time(NULL);
//          if(difftime(tact,tlast)>SECONDS) {
//              tlast=tact; 
//              timcet=true;
//              // test cancel
//              if((error=GetStopFlag())!=0) {
//              	rs_free(globalbestvalue);
//              	rs_free(globalbestset);
//              	rs_free(bestset);
//              	rs_free(index);
//              	rs_free(bin);
//              	return(error);
//              }
//          }
//      }
//      if((!globcit)/*||(!(globcit%cit))*/||timcet) {
//          pom = (int)floor((100.0*(double)sumr)/(double)n);
//          ProcessTextCS->Acquire();
//          if(sbs) ProcessText[0]='v'; else ProcessText[0]='^';
//          sprintf(ProcessText+1," k=%d, (%ld/%ld), delta=%d:%d, Cr=%g",sumr,globcit,kombcit,delta,(int)(DELTAMUL*vykyvmez+DELTAADD),globalbestvalue[d-1]);
//          ProcessTextCS->Release();
//          SetProcessFlag(pom);
//          if(detail&PERCENT) {
//              printtext("\r");
//              ProcessTextCS->Acquire();
//              printtext(ProcessText); 
//              ProcessTextCS->Release();
//              printtext(" ");
//          }
//      }
//      timcet=false;
//      globcit++;
      /* ------------previous block may be discarded */
      /* ----------- previous block served only for outputting the information about current algorithm state ----------*/      
 


      // result of criterion function should be stored to "value". When calling the criterion function, "index" field
      // contains indexes (beginning by 0) of features in the subset being currently tested, having dimension "sumr"
      if((error=Criterion(&value,index,sumr, n_classes, sel, file_names,n_splits,classifier, cparam))!=0) {
      	rs_free(globalbestvalue);
      	rs_free(globalbestset);
      	rs_free(bestset);
      	rs_free(index);
      	rs_free(bin);
      	return(error);
      }
      if(value>bestvalue) {
	    memcpy(bestset,index,sumrint);
	    bestvalue=value;
      }

      /* finding the new configuration during internal exhaustive step */
      for(beg=0;bin[beg]!=id2;beg++)
      	;
      for(piv=beg;(piv<n)&&(bin[piv]!=id0);piv++)
      	;
      if(piv==n) 
      	stopex=1;
      else {
	    pom=piv; /* remember the position of first 0 on the right */
	    do 
	    	piv--; 
	    while(bin[piv]!=id2); /* find a real pivot */
	    bin[piv]=id0; 
	    bin[pom]=id2; /* shift pivot to the right */
	    pom=0;
	    /* run "pom" from left, "piv" from right. the 0,2 pairs found are changed to 2,0 */
	    do 
	    	piv--; 
	    while((piv>0)&&(bin[piv]!=id2));
	    while((pom<piv)&&(bin[pom]!=id0))
	    	pom++;
	    while(piv-pom>0)
	    {
	      bin[piv]=id0; bin[pom]=id2;
	      do 
	      	piv--; 
	      while((piv>0)&&(bin[piv]!=id2));
	      while((pom<piv)&&(bin[pom]!=id0))
	      	pom++;
	    }
      }
    }while(!stopex);

    if(bestvalue>globalbestvalue[sumr-1]) {// sumr is from interval <1,n>
      memcpy(&globalbestset[(sumr*(sumr-1))/2],bestset,sumrint);
      globalbestvalue[sumr-1]=bestvalue;
      wasthebest=1;
    }
    else 
    	wasthebest=0;

	if (detail&STANDARD) {
		fprintf(stderr,"current best set of size %d and goodness %g:\n",sumr,bestvalue);
		printFset(bestset,sumr);
	}

    if(sfbs) {
      if(sbs) /* last step was sbs */ {
	    if(sumr<n-r) /* if adding is possible, prepare sfs */ {
	      for(i=0;i<n;i++) 
	      	bin[i]=0; /* conversion to sfs format */
	      for(i=0;i<sumr;i++) 
	      	bin[bestset[i]]=1;
	      sbs=0; 
	      id0=0; 
	      id2=2;
	      zmenasumr=1;
	    }
	    else {
	      zmenasumr=-1; /* otherwise stay by sbs */
	      for(i=0;i<n;i++) 
	      	bin[i]=-1;
	      for(i=0;i<sumr;i++) 
	      	bin[bestset[i]]=2; /* freeze the change */
	    }
      }
      else /* last step was sfs */ {
	    if(wasthebest) /* better solution was found */ {
	      if(sumr<n-r) {
	        zmenasumr=1; /* repeat sfs */
	        for(i=0;i<n;i++) 
	        	bin[i]=0;
	        for(i=0;i<sumr;i++) 
	        	bin[bestset[i]]=1; /* freeze the change */
	      }
	      else { /* nothing may be added, switch to sbs */
	        for(i=0;i<n;i++) 
	        	bin[i]=-1; 
	        for(i=0;i<sumr;i++) 
	        	bin[bestset[i]]=2;
	        sbs=1; 
	        id0=2; 
	        id2=0;
	        zmenasumr=-1;
	      }
	    }
	    else /* no improvement during last step (sfs) */ {
	      /* change "bin" for sbs but after the change of "sumr" */
	      sbs=1; 
	      id0=2; 
	      id2=0;
	      zmenasumr=-2; /* forget last step and perform one new sbs step */
	    }
      }
      /* actualize sumr and rr */
      if(zmenasumr==1) {
      	if((sumr==d)&&((n-d)%r!=0)) {
      		sumr=d+(n-d)%r; 
      		rr=(n-d)%r;
      	} 
      	else {
      		sumr+=r; 
      		rr=r;
      	}
        if(vykyv>0) 
        	vykyv+=rr; // continue
        else vykyv=rr; // begin           
      }
      else /* zmenasumr== -1 or -2 */ {
      	if((sumr>d)&&(sumr-r<d)) {
      		sumr=d; 
      		rr=(n-d)%r;
      	} 
      	else {
      		sumr-=r; 
      		rr=r;
      	}
        if(vykyv>0) { // end of going up
        	if(delta==-1){ //averaging
            	vykyvmez=(vykyvcount*vykyvmez+vykyv)/(vykyvcount+1);
                vykyvcount++;
            }
            else{ // maximization
            	if(vykyv>vykyvmez) 
            		vykyvmez=vykyv;
            }
        }
        vykyv=0;
      }
      if(zmenasumr==-2) /* once more */ {
	    for(i=0;i<n;i++) 
	    	bin[i]=-1; /* change to sbs with changed "sumr" */
	    pom=(sumr*(sumr-1))/2;
	    for(i=0;i<sumr;i++) 
	    	bin[globalbestset[pom+i]]=2;
	    if((sumr>d)&&(sumr-r<d)) {
	    	sumr=d; 
	    	rr=(n-d)%r;
	    } 
	    else {
	    	sumr-=r; 
	    	rr=r;
	    }
            // no change in direction
      }

      if(delta<0){
      	stopfloatmez=d-DELTAMUL*vykyvmez-DELTAADD; 
      	if(stopfloatmez<1) 
      		stopfloatmez=1;
      }
      if(sumr<stopfloatmez) 
      	stopfloat=1; /* end if delta reached */
    }
    else /* sffs */ {
      if(sbs) /* last step was sbs */ {
	    if(wasthebest && n_floats < float_level) /* a better subset was found */ {
	      if(sumr>r && n_floats < float_level) {
	        zmenasumr=-1; /* so repeat sbs */
	        for(i=0;i<n;i++) 
	        	bin[i]=-1;
	        for(i=0;i<sumr;i++) 
	        	bin[bestset[i]]=2; /* freeze changes */
	        n_floats++;
	      }
	      else if (wasthebest && n_floats >= float_level) {
	        for(i=0;i<n;i++) 
	        	bin[i]=0; 
	        for(i=0;i<sumr;i++) 
	        	bin[bestset[i]]=2; /* freeze changes */
	        sbs=0; 
	        id0=0; 
	        id2=2;
	        zmenasumr=1;
	      	n_floats=0;
	      }
	      else { /* nothing to remove, change to sfs */
	        for(i=0;i<n;i++) 
	        	bin[i]=0; 
	        for(i=0;i<sumr;i++) 
	        	bin[bestset[i]]=1;
	        sbs=0; 
	        id0=0; 
	        id2=2;
	        zmenasumr=1;
	        n_floats=0;
	      }
	    }
	    else /* no improvement during last step (sbs) */ {
	      /* change to "bin" for sfs later after "sumr" gets its original value */
	      sbs=0; 
	      id0=0; 
	      id2=2;
	      zmenasumr=2; /* forget last step and perform one new sfs step */
	    }
      }
      else /* last step was sfs */ {
	    if(sumr>r) /* if removing is possible, prepare sbs */ {
	      for(i=0;i<n;i++) 
	      	bin[i]=-1;
	      for(i=0;i<sumr;i++) 
	      	bin[bestset[i]]=2;
	      sbs=1; 
	      id0=2; 
	      id2=0;
	      zmenasumr=-1;
	    }
	    else {
	      zmenasumr=1; /* othervise stay by sfs */
	      for(i=0;i<n;i++) 
	      	bin[i]=0;
	      for(i=0;i<sumr;i++) 
	      	bin[bestset[i]]=1; /* freeze changes */
	    }
      }
      /* renew sumr and rr */
      if(zmenasumr==-1) {
      	if((sumr==d)&&(d%r!=0)) {
      		sumr=d-d%r; 
      		rr=d%r;
      	} 
      	else {
      		sumr-=r; 
      		rr=r;
      	}
        if(vykyv<0) 
        	vykyv-=rr; // continue
        else 
        	vykyv=-rr; // begin
      }
      else /* zmenasumr== 1 or 2 */ {
      	if((sumr<d)&&(sumr+r>d)) {
      		sumr=d; 
      		rr=d%r;
      	} 
      	else {
      		sumr+=r; 
      		rr=r;
      	}
        if(vykyv<0) { // end of going down
        	if(delta==-1){ //averaging
            	vykyvmez=(vykyvcount*vykyvmez+(-vykyv))/(vykyvcount+1);
                vykyvcount++;
            }
            else{ // maximizing
            	if(-vykyv>vykyvmez) 
            		vykyvmez=-vykyv;
            }
        }
        vykyv=0;
      }
      if(zmenasumr==2) /* once more and renew "bin"*/ {
	    for(i=0;i<n;i++) 
	    	bin[i]=0; /* change to sfs format now with actualized "sumr" */
	    pom=(sumr*(sumr-1))/2;
	    for(i=0;i<sumr;i++) 
	    	bin[globalbestset[pom+i]]=1;
	    if((sumr<d)&&(sumr+r>d)) {
	    	sumr=d; 
	    	rr=d%r;
	    } 
	    else {
	    	sumr+=r; 
	    	rr=r;
	    }
            // no direction change
      }

      if(delta<0) {
      	stopfloatmez=d+DELTAMUL*vykyvmez+DELTAADD; 
      	if(stopfloatmez>n) 
      		stopfloatmez=n;
      }
      if(sumr>stopfloatmez) 
      	stopfloat=1; /* end if delta reached */
    }


  } while(!stopfloat);


  telp=time(NULL); 
  TimeString(bset->dobavypoctu,difftime(telp,tbegin));

  /* 
   * modified by T. Vogt, 17.04.2007:
   * return the feature set with the highest evaluation and fewest features
   */
  
	for (i=0;i<d;i++) {
  	if (globalbestvalue[i]>temp) {
  		best=i;
  		temp=globalbestvalue[i];
  	}
  }
  d=best+1;

  pom=(d*(d-1))/2;
  bset->subsetsize=d;
  if((bset->featureset=(int *) rs_malloc(d*sizeof(int),"bset->featureset, FloatSearch"))==NULL) {
  	rs_free(globalbestvalue);
  	rs_free(globalbestset);
  	rs_free(bestset);
  	rs_free(index);
  	rs_free(bin);
  	return(3);
  }
  
  if (shift_indexes(sel,globalbestset+pom,&(bset->featureset),d))
  	return (3);

  bset->critvalue=globalbestvalue[d-1];
  
  rs_free(globalbestset);
  rs_free(globalbestvalue);
  rs_free(bestset);
  rs_free(index);
  rs_free(bin);
//  hdestroy();
  return(0);
}