Exemple #1
0
/* As the name says, Quick Sort is a very quick sorting algortihm. Although it's not so easy
 * to demonstrate ad it is for Merge Sort. Quick Sort, in fact, requires O(n*log n) on average,
 * but it can require more (i.e. O(n²)) in the case the array is already ordered.
 * This is because it's based on a sorting done around a random element selected among those
 * present in the array.
 * Let's try with an array:
 * 
 * [2, 7, 6, 3, 8, 0]
 * 
 * We have, first, to pick a random element (pivot, from now on). Suppose we pick 3. We switch 
 * 3 with the last element, obtaining the array [2, 7, 6, 0, 8, 3].
 * We'll use two index: the first one will point on the
 * first element, the second one on the second-last. The first index will scroll the array from
 * left to right until it finds an element that is bigger then the pivot: 2 is not,
 * 7 is, so the first index stop on 7. The second index will scroll the array from right
 * to left until it find an element that is smaller than the pivot: 8 is not,
 * 0 is, so the second will stop on 0. Now we switch this two elements, obtaining the array
 * [2, 0, 6, 7, 8, 3]. Now it's again the turn of the first index to go: 0 is lower than the pivot,
 * so the index advance, but 6 is not. The second index will go further until it finds 0. But now,
 * the first index is bigger than the second, so there will be no swap between them. The value
 * in the first index will be swapped with the pivot and we'll have the array: [2, 0, 3, 7, 8, 6].
 * It's easy to note that 3 is already in it's position and that previus values are all lower than it,
 * while sequent are all bigger. Now it's time to call the function recursively on these two
 * subarrays.
 * Like the Merge Sort, we need two different function: one does the recursion, while the other one
 * distribute values. Note that if you want to obtain a different order, you have only to change
 * the distribution function.
 */
int distributePivot (int *a, int left, int pivot, int right) {
	int i = left, j = right - 1;
	if (pivot < right)
		swapInt(&a[pivot], &a[right]);
	while (i <= j) {
		while (i <= j && a[i] <= a[right])
			i++;
		while (j >= i && a[j] >= a[right])
			j--;
		if (i < j)
			swapInt(&a[i], &a[j]);
	}
	if (i < right)
		swapInt(&a[i], &a[right]);
	return i;
}
Exemple #2
0
int main(void)
{
#ifdef TEST_SWAP_INT
	{
		int a = 10;
		int b = -20;

		swapInt (&a, &b);

		printf("SwapInt: %d %d\n", a, b);
	}
#endif

#ifdef TEST_SWAP_PTR
	{
		int a = 10;
		int b = -20;

		int *pa = &a;
		int *pb = &b;

		swapPtr ((void **)&pa, (void **)&pb);
		printf("SwapPtr: %d %d\n", *pa, *pb);
	}
#endif

	return 0;
}
/*bubble sort函式
傳入參數:一個int陣列data、一個const unsigned的data
        陣列大小、一個指向提供排序順序判斷的函式的指標
傳回值:無*/
void bubbleSort(int data[],
                  const unsigned array_size,
                  int (*sortOrderBool)(int a,
                                        int b))
    {
        /*提供bubble sort遞增順序比較條件函式*/
        int sortAscendingly(int a, int b);

        /*提供bubble sort遞減順序比較條件函式*/
        int sortDescendingly(int a, int b);

        /*次數計數器*/
        unsigned times;
        unsigned compare;

        /*從最後一個數被保證排序正確至第二個數被保證排序正確*/
        for(times = array_size - 1; times >= 1; times--){
            /*從第一個數至times的前一個數取compare與compare+1開始判斷*/
            for(compare = 0; compare <= times - 1; compare++){
                /*如果compare大於compare+1就互換*/
                 if((*sortOrderBool)(data[compare], data[compare + 1])){
                    swapInt(&data[compare], &data[compare + 1]);
                 }
            }
        }
        return;
    }
//-----------------------------------------------------------------------------
void PDCFileWriter::write1intscalar(ofstream &outfile, const int ix)
{
	//writes a 1-column data scalar/number to the file: use this for data type 0
	int x = ix;
	swapInt((char*) &x);
	outfile.write((char*) &x,sizeof(int));
}
void nextPermutation(int* nums, int numsSize) {
    int startIndex = numsSize - 1;
    int nextIndex = startIndex + 1;
    int tmpIndex = startIndex;

    if(2 > numsSize) {
        return ;
    }
    
    while(0 <= startIndex ) {
        tmpIndex = startIndex ;
        for(nextIndex = startIndex + 1; nextIndex < numsSize; nextIndex++) {
            if(nums[startIndex] < nums[nextIndex]) {
                if(tmpIndex == startIndex) {
                    tmpIndex  = nextIndex;
                } else if(nums[tmpIndex] > nums[nextIndex]) {
                    tmpIndex  = nextIndex;
                }
            }
        }
        if( tmpIndex  != startIndex) {
            swapInt(nums+startIndex, nums+tmpIndex);
            qsort(nums+startIndex + 1, numsSize - startIndex - 1, sizeof(int), cmpInt);
            return ;
        }
        startIndex--;
    }
  
    
    qsort(nums, numsSize, sizeof(int) , cmpInt);

    return;
}
Exemple #6
0
int32_t
littleToNativeInt32(int32_t i)
{
	if(getEndianness() == ENDIANNESS_BIG)
		return swapInt(i);

	return i;
}
Exemple #7
0
int32_t
bigToNativeInt32(int32_t i)
{
	if(getEndianness() != ENDIANNESS_BIG)
		return swapInt(i);

	return i;
}
Exemple #8
0
/* This algorithm will order an array of integers in O(n²) time. But if the array
 * is already ordered or only few elements are misplaced, it will use O(n). So, it's
 * Ω(n) and O(n²).
 * It's very simple. It take an element and check if it's ordered among the previous elements.
 * So, let's say we have the sequent array:
 * 
 * [6, 7, 2, 3]
 * 
 * It starts and takes only the first value: 6 is alone, so it's ordered with itself.
 * It takes the second values and compares it with the first: [6, 7] is still ordered.
 * It takes the third value: finally we have a misplaced element. We'll swap it with the
 * previous one and will check again if it's greater than the preceding one, if it is
 * we'll swap it again, otherwise we'll go the next iteration.
 * Then it will take the next element and will do the same. And so on.
 * It means that at every iteration, we know that the first i elements are ordered, but
 * we don't know if they are in their final place.
 * Note that the array is passed as pointer, so the change will be globally.
 */
void IntArrayInsSort (int *a, int dim) {
	int i, j;
	for (i = 0; i < dim; i++) {
		j = i;
		while (j > 0 && a[j-1] > a[j]) {
			swapInt(&a[j-1], &a[j]);
			j--;
		}
	}
}
Exemple #9
0
/* HEAP SORT ON INTEGERS */
void intArrayHeapSort (int **a, int dim) {
	int i;
	IntMinHeap h = arrayToIntMinHeap(*a, dim);
	for (i = dim - 1; i > 0; i--) {
		swapInt(&h.array[i], &h.array[0]);
		h.size--;
		reorganizeIntMinHeap(&h, 0);
	}
	*a = h.array;
}
Exemple #10
0
/* Unlike the Insertion Sort, that has Ω(n) time, this algorithm will always need O(n²).
 * So, it's Θ(n²).
 * At every iteration, the algorithm select which value has to occupy the i position.
 * Let's say we have the sequent array:
 * 
 * [6, 7, 2, 3]
 * 
 * It takes and index fixed on the 0 position and with another one it will scroll the
 * whole array looking for values smaller than the one in position 0. So, it will swap
 * 6 with 2 and then it will do nothing because two is lowest value.
 * Then it will advance the i index to 1 and with the j index it will scroll the rest
 * of the array looking for values smaller than the one in one (7). So, it will swap 7
 * with 6 and then 6 with 3. And so on.
 * At iteration i, the first i elements of the array will be ordered and will be in their
 * final position.
 * We can easily see that also if the array is already ordered, it will check every value
 * bacause it can't know if there are minor values.
 * Note that the array is passed as pointer, so the change will be globally.
 */
void IntArraySelSort (int *a, int dim) {
	int i, j;
	for (i = 0; i < dim-1; i++){
		for (j = i+1; j < dim; j++){
			if (a[j] < a[i]){
				swapInt(&a[j], &a[i]);
			}
		}
	}
}
//-----------------------------------------------------------------------------
void PDCFileWriter::writeAttribute(ofstream &outfile, const int attrib_length, const char* attrib_name, const int attrib_type)
{
	// writes the text for each new attribute to the data file
	// 'attrib_length' is the number of characters in the attribute name 'attrib_name'
	// attrib type is the integer data type, following the convention:
	// 0=int, 1=intArray, 2=intArray, 3=double, 4= doublearray, 5=vector(3doubles), 6=vectorArray(array of 3doubles)

	int attribLen = attrib_length;
	swapInt((char*) &attribLen);
	outfile.write((char*) &attribLen, sizeof(int));

	for(int i=0;i<attrib_length;i++)
	{
		outfile.write((char*) &attrib_name[i], sizeof(char));
	}

	int attribType=attrib_type;
	swapInt((char*) &attribType);
	outfile.write((char*) &attribType, sizeof(int));
}
Exemple #12
0
static void outputBmp(const int width, const int height, 
		      unsigned char *color_array, const char *ofile){

  BmpHeader bmp;

  char bfType[2];
  bfType[0]          = 'B';
  bfType[1]          = 'M';
  bmp.bfSize         = width*height*3 + 54;
  bmp.bfReserved1    = 0;
  bmp.bfReserved2    = 0;
  bmp.bfOffBits      = 54;
  bmp.biSize         = 40;
  bmp.biWidth        = width;
  bmp.biHeight       = height;
  bmp.biPlanes       = 1;
  bmp.biBitCount     = 24;
  bmp.biCompression  = 0;
  bmp.biSizeImage    = 0;
  bmp.biXPixPerMeter = 0;
  bmp.biYPixPerMeter = 0;
  bmp.biClrUsed      = 0;
  bmp.biClrImporant  = 0;

#ifdef REVERSE_ENDIAN_OUTPUT
  bmp.bfSize         = swapInt(bmp.bfSize);
  bmp.bfOffBits      = swapInt(bmp.bfOffBits);
  bmp.biSize         = swapInt(bmp.biSize);
  bmp.biWidth        = swapInt(bmp.biWidth);
  bmp.biHeight       = swapInt(bmp.biHeight);
  bmp.biPlanes       = swapShort(bmp.biPlanes);
  bmp.biBitCount     = swapShort(bmp.biBitCount);
#endif

  FILE *outstream = fopen( ofile, "wb");
  fwrite( &bfType, sizeof(char), 2, outstream);
  fwrite( &bmp, sizeof(BmpHeader), 1, outstream);
  fwrite( color_array, sizeof(unsigned char), width*height*3, outstream);
  fclose( outstream);

}
void reverseIntMemory(int *first, int *last)
{
    const int direction = first < last ? 1 : -1;

    while(first != last && first != last + direction)
    {
        swapInt(first, last);
        first += direction;
        last -= direction;
    }    

}
//-----------------------------------------------------------------------------
void PDCFileWriter::writePDCHeader(ofstream &outfile)
{
	//create temporary versions of the variables to byte-swap

	int temp_formatVersion = getFormatVersion();
	int temp_byteOrder = getByteOrder();
	int temp_extra1 = getExtra1();
	int temp_extra2 = getExtra2();
	int temp_numParticles = getParticleCount();
	int temp_numAttributes = getAttributeCount();

	//do swap of byte order

	swapInt((char*) &temp_formatVersion);
	swapInt((char*) &temp_byteOrder);
	swapInt((char*) &temp_extra1);
	swapInt((char*) &temp_extra2);
	swapInt((char*) &temp_numParticles);
	swapInt((char*) &temp_numAttributes);

	//write out to file

	for(int i=0;i<4;i++) 
	{
		outfile.put( m_format[i]);
	}

	outfile.write((char*) &temp_formatVersion, sizeof(int));
	outfile.write((char*) &temp_byteOrder, sizeof(int));
	outfile.write((char*) &temp_extra1, sizeof(int));
	outfile.write((char*) &temp_extra2, sizeof(int));
	outfile.write((char*) &temp_numParticles, sizeof(int));
	outfile.write((char*) &temp_numAttributes, sizeof(int));
}
Exemple #15
0
void heap_Insert(Heap *heap, int val)
{
    int index = heap->sz;

    heap->hPtr[index] = val;
    (heap->sz)++;

    int temp = getParentIndex(index);
    while ((heap->hPtr[temp] > heap->hPtr[index]) && (index > 0))
    {
        swapInt(heap->hPtr+temp, heap->hPtr+index);
        index = temp;
        temp = getParentIndex(index);
    }
};
Exemple #16
0
// 选择排序
void SelectSort(int* pData, int len){
	int i,j;
	int tmp;
	int min_index;	//无序序列中最小的元素索引

	for(i = 0; i < len; i++){
		tmp = pData[i];
		for(min_index = j = i; j < len; j++){
			if(pData[min_index] > pData[j]){
				min_index = j;
			}
		}

		//将最小的值和第i个比较元素位置互换
		swapInt(&pData[i], &pData[min_index]);

		// 打印排序过程
		printf("process %d: ",i);
		printIntArray(pData, len);
	}
}
/*maxHeapify函式
  版本:0.00(0)*/
void maxHeapify(int data[], unsigned array_size, unsigned current_index)
    {
    /*宣告與定義(Declaration & Definition)*/
    /*--函式雛型(function prototype)--*/

    /*--局域變數--*/
    /*current largest node*/
    unsigned largest_index = current_index;

    /*the child index of current node may be*/
    unsigned left_child_index = current_index * 2,
            right_child_index = current_index * 2 + 1;
    /*---------------------*/
    /*if left child exist and greater than current node*/
    if(left_child_index <= array_size - 1 &&
       data[left_child_index] > data[current_index]){
       largest_index = left_child_index;
    }

    /*if right child exist and greater than current node*/
    if(right_child_index <= array_size - 1 &&
       data[right_child_index] > data[current_index]){
       largest_index = right_child_index;
    }

    /*if largest node isn't current node then swap with the largest
      then maxheapify it's child*/
    if(largest_index != current_index){
      swapInt(&data[current_index], &data[largest_index]);
      maxHeapify(data, array_size, largest_index);
    }

    /*---------------------*/
    /*傳回內容*/
    return ;
    }
Exemple #18
0
void classRF(double *x, int *dimx, int *cl, int *ncl, int *cat, int *maxcat,
        int *sampsize, int *strata, int *Options, int *ntree, int *nvar,
        int *ipi, double *classwt, double *cut, int *nodesize,
        int *outcl, int *counttr, double *prox,
        double *imprt, double *impsd, double *impmat, int *nrnodes,
        int *ndbigtree, int *nodestatus, int *bestvar, int *treemap,
        int *nodeclass, double *xbestsplit, double *errtr,
        int *testdat, double *xts, int *clts, int *nts, double *countts,
        int *outclts, int labelts, double *proxts, double *errts,
        int *inbag) {
    /******************************************************************
     *  C wrapper for random forests:  get input from R and drive
     *  the Fortran routines.
     *
     *  Input:
     *
     *  x:        matrix of predictors (transposed!)
     *  dimx:     two integers: number of variables and number of cases
     *  cl:       class labels of the data
     *  ncl:      number of classes in the responsema
     *  cat:      integer vector of number of classes in the predictor;
     *            1=continuous
     * maxcat:    maximum of cat
     * Options:   7 integers: (0=no, 1=yes)
     *     add a second class (for unsupervised RF)?
     *         1: sampling from product of marginals
     *         2: sampling from product of uniforms
     *     assess variable importance?
     *     calculate proximity?
     *     calculate proximity based on OOB predictions?
     *     calculate outlying measure?
     *     how often to print output?
     *     keep the forest for future prediction?
     *  ntree:    number of trees
     *  nvar:     number of predictors to use for each split
     *  ipi:      0=use class proportion as prob.; 1=use supplied priors
     *  pi:       double vector of class priors
     *  nodesize: minimum node size: no node with fewer than ndsize
     *            cases will be split
     *
     *  Output:
     *
     *  outcl:    class predicted by RF
     *  counttr:  matrix of votes (transposed!)
     *  imprt:    matrix of variable importance measures
     *  impmat:   matrix of local variable importance measures
     *  prox:     matrix of proximity (if iprox=1)
     ******************************************************************/
    
    int nsample0, mdim, nclass, addClass, mtry, ntest, nsample, ndsize,
            mimp, nimp, near, nuse, noutall, nrightall, nrightimpall,
            keepInbag, nstrata;
    int jb, j, n, m, k, idxByNnode, idxByNsample, imp, localImp, iprox,
            oobprox, keepf, replace, stratify, trace, *nright,
            *nrightimp, *nout, *nclts, Ntree;
    
    int *out, *bestsplitnext, *bestsplit, *nodepop, *jin, *nodex,
            *nodexts, *nodestart, *ta, *ncase, *jerr, *varUsed,
            *jtr, *classFreq, *idmove, *jvr,
            *at, *a, *b, *mind, *nind, *jts, *oobpair;
    int **strata_idx, *strata_size, last, ktmp, anyEmpty, ntry;
    
    double av=0.0;
    
    double *tgini, *tx, *wl, *classpop, *tclasscat, *tclasspop, *win,
            *tp, *wr;
    
    //Do initialization for COKUS's Random generator
    seedMT(2*rand()+1);  //works well with odd number so why don't use that
    
    addClass = Options[0];
    imp      = Options[1];
    localImp = Options[2];
    iprox    = Options[3];
    oobprox  = Options[4];
    trace    = Options[5];
    keepf    = Options[6];
    replace  = Options[7];
    stratify = Options[8];
    keepInbag = Options[9];
    mdim     = dimx[0];
    nsample0 = dimx[1];
    nclass   = (*ncl==1) ? 2 : *ncl;
    ndsize   = *nodesize;
    Ntree    = *ntree;
    mtry     = *nvar;
    ntest    = *nts;
    nsample = addClass ? (nsample0 + nsample0) : nsample0;
    mimp = imp ? mdim : 1;
    nimp = imp ? nsample : 1;
    near = iprox ? nsample0 : 1;
    if (trace == 0) trace = Ntree + 1;
    
    /*printf("\nmdim %d, nclass %d, nrnodes %d, nsample %d, ntest %d\n", mdim, nclass, *nrnodes, nsample, ntest);
    printf("\noobprox %d, mdim %d, nsample0 %d, Ntree %d, mtry %d, mimp %d", oobprox, mdim, nsample0, Ntree, mtry, mimp);
    printf("\nstratify %d, replace %d",stratify,replace);
    printf("\n");*/
    tgini =      (double *) S_alloc_alt(mdim, sizeof(double));
    wl =         (double *) S_alloc_alt(nclass, sizeof(double));
    wr =         (double *) S_alloc_alt(nclass, sizeof(double));
    classpop =   (double *) S_alloc_alt(nclass* *nrnodes, sizeof(double));
    tclasscat =  (double *) S_alloc_alt(nclass*32, sizeof(double));
    tclasspop =  (double *) S_alloc_alt(nclass, sizeof(double));
    tx =         (double *) S_alloc_alt(nsample, sizeof(double));
    win =        (double *) S_alloc_alt(nsample, sizeof(double));
    tp =         (double *) S_alloc_alt(nsample, sizeof(double));
    
    out =           (int *) S_alloc_alt(nsample, sizeof(int));
    bestsplitnext = (int *) S_alloc_alt(*nrnodes, sizeof(int));
    bestsplit =     (int *) S_alloc_alt(*nrnodes, sizeof(int));
    nodepop =       (int *) S_alloc_alt(*nrnodes, sizeof(int));
    nodestart =     (int *) S_alloc_alt(*nrnodes, sizeof(int));
    jin =           (int *) S_alloc_alt(nsample, sizeof(int));
    nodex =         (int *) S_alloc_alt(nsample, sizeof(int));
    nodexts =       (int *) S_alloc_alt(ntest, sizeof(int));
    ta =            (int *) S_alloc_alt(nsample, sizeof(int));
    ncase =         (int *) S_alloc_alt(nsample, sizeof(int));
    jerr =          (int *) S_alloc_alt(nsample, sizeof(int));
    varUsed =       (int *) S_alloc_alt(mdim, sizeof(int));
    jtr =           (int *) S_alloc_alt(nsample, sizeof(int));
    jvr =           (int *) S_alloc_alt(nsample, sizeof(int));
    classFreq =     (int *) S_alloc_alt(nclass, sizeof(int));
    jts =           (int *) S_alloc_alt(ntest, sizeof(int));
    idmove =        (int *) S_alloc_alt(nsample, sizeof(int));
    at =            (int *) S_alloc_alt(mdim*nsample, sizeof(int));
    a =             (int *) S_alloc_alt(mdim*nsample, sizeof(int));
    b =             (int *) S_alloc_alt(mdim*nsample, sizeof(int));
    mind =          (int *) S_alloc_alt(mdim, sizeof(int));
    nright =        (int *) S_alloc_alt(nclass, sizeof(int));
    nrightimp =     (int *) S_alloc_alt(nclass, sizeof(int));
    nout =          (int *) S_alloc_alt(nclass, sizeof(int));
    if (oobprox) {
        oobpair = (int *) S_alloc_alt(near*near, sizeof(int));
    }
    //printf("nsample=%d\n", nsample);
    /* Count number of cases in each class. */
    zeroInt(classFreq, nclass);
    for (n = 0; n < nsample; ++n) classFreq[cl[n] - 1] ++;
    /* Normalize class weights. */
    //Rprintf("ipi %d ",*ipi);
    //for(n=0;n<nclass;n++) Rprintf("%d: %d, %f,",n,classFreq[n],classwt[n]);
    normClassWt(cl, nsample, nclass, *ipi, classwt, classFreq);
    //for(n=0;n<nclass;n++) Rprintf("%d: %d, %f,",n,classFreq[n],classwt[n]);
   
    if (stratify) {
        /* Count number of strata and frequency of each stratum. */
        nstrata = 0;
        for (n = 0; n < nsample0; ++n)
            if (strata[n] > nstrata) nstrata = strata[n];
        /* Create the array of pointers, each pointing to a vector
         * of indices of where data of each stratum is. */
        strata_size = (int  *) S_alloc_alt(nstrata, sizeof(int));
        for (n = 0; n < nsample0; ++n) {
            strata_size[strata[n] - 1] ++;
        }
        strata_idx =  (int **) S_alloc_alt(nstrata, sizeof(int *));
        for (n = 0; n < nstrata; ++n) {
            strata_idx[n] = (int *) S_alloc_alt(strata_size[n], sizeof(int));
        }
        zeroInt(strata_size, nstrata);
        for (n = 0; n < nsample0; ++n) {
            strata_size[strata[n] - 1] ++;
            strata_idx[strata[n] - 1][strata_size[strata[n] - 1] - 1] = n;
        }
    } else {
        nind = replace ? NULL : (int *) S_alloc_alt(nsample, sizeof(int));
    }
    
    /*    INITIALIZE FOR RUN */
    if (*testdat) zeroDouble(countts, ntest * nclass);
    zeroInt(counttr, nclass * nsample);
    zeroInt(out, nsample);
    zeroDouble(tgini, mdim);
    zeroDouble(errtr, (nclass + 1) * Ntree);
    
    if (labelts) {
        nclts  = (int *) S_alloc_alt(nclass, sizeof(int));
        for (n = 0; n < ntest; ++n) nclts[clts[n]-1]++;
        zeroDouble(errts, (nclass + 1) * Ntree);
    }
    //printf("labelts %d\n",labelts);fflush(stdout);
    if (imp) {
        zeroDouble(imprt, (nclass+2) * mdim);
        zeroDouble(impsd, (nclass+1) * mdim);
        if (localImp) zeroDouble(impmat, nsample * mdim);
    }
    if (iprox) {
        zeroDouble(prox, nsample0 * nsample0);
        if (*testdat) zeroDouble(proxts, ntest * (ntest + nsample0));
    }
    makeA(x, mdim, nsample, cat, at, b);
    
    //R_CheckUserInterrupt();
    
    
    /* Starting the main loop over number of trees. */
    GetRNGstate();
    if (trace <= Ntree) {
        /* Print header for running output. */
        Rprintf("ntree      OOB");
        for (n = 1; n <= nclass; ++n) Rprintf("%7i", n);
        if (labelts) {
            Rprintf("|    Test");
            for (n = 1; n <= nclass; ++n) Rprintf("%7i", n);
        }
        Rprintf("\n");
    }
    idxByNnode = 0;
    idxByNsample = 0;
    
    //Rprintf("addclass %d, ntree %d, cl[300]=%d", addClass,Ntree,cl[299]);
    for(jb = 0; jb < Ntree; jb++) {
		//Rprintf("addclass %d, ntree %d, cl[300]=%d", addClass,Ntree,cl[299]);
        //printf("jb=%d,\n",jb);
        /* Do we need to simulate data for the second class? */
        if (addClass) createClass(x, nsample0, nsample, mdim);
        do {
            zeroInt(nodestatus + idxByNnode, *nrnodes);
            zeroInt(treemap + 2*idxByNnode, 2 * *nrnodes);
            zeroDouble(xbestsplit + idxByNnode, *nrnodes);
            zeroInt(nodeclass + idxByNnode, *nrnodes);
            zeroInt(varUsed, mdim);
            /* TODO: Put all sampling code into a function. */
            /* drawSample(sampsize, nsample, ); */
            if (stratify) {  /* stratified sampling */
                zeroInt(jin, nsample);
                zeroDouble(tclasspop, nclass);
                zeroDouble(win, nsample);
                if (replace) {  /* with replacement */
                    for (n = 0; n < nstrata; ++n) {
                        for (j = 0; j < sampsize[n]; ++j) {
                            ktmp = (int) (unif_rand() * strata_size[n]);
                            k = strata_idx[n][ktmp];
                            tclasspop[cl[k] - 1] += classwt[cl[k] - 1];
                            win[k] += classwt[cl[k] - 1];
                            jin[k] = 1;
                        }
                    }
                } else { /* stratified sampling w/o replacement */
                    /* re-initialize the index array */
                    zeroInt(strata_size, nstrata);
                    for (j = 0; j < nsample; ++j) {
                        strata_size[strata[j] - 1] ++;
                        strata_idx[strata[j] - 1][strata_size[strata[j] - 1] - 1] = j;
                    }
                    /* sampling without replacement */
                    for (n = 0; n < nstrata; ++n) {
                        last = strata_size[n] - 1;
                        for (j = 0; j < sampsize[n]; ++j) {
                            ktmp = (int) (unif_rand() * (last+1));
                            k = strata_idx[n][ktmp];
                            swapInt(strata_idx[n][last], strata_idx[n][ktmp]);
                            last--;
                            tclasspop[cl[k] - 1] += classwt[cl[k]-1];
                            win[k] += classwt[cl[k]-1];
                            jin[k] = 1;
                        }
                    }
                }
            } else {  /* unstratified sampling */
                anyEmpty = 0;
                ntry = 0;
                do {
                    zeroInt(jin, nsample);
                    zeroDouble(tclasspop, nclass);
                    zeroDouble(win, nsample);
                    if (replace) {
                        for (n = 0; n < *sampsize; ++n) {
                            k = unif_rand() * nsample;
                            tclasspop[cl[k] - 1] += classwt[cl[k]-1];
                            win[k] += classwt[cl[k]-1];
                            jin[k] = 1;
                        }
                    } else {
                        for (n = 0; n < nsample; ++n) nind[n] = n;
                        last = nsample - 1;
                        for (n = 0; n < *sampsize; ++n) {
                            ktmp = (int) (unif_rand() * (last+1));
                            k = nind[ktmp];
                            swapInt(nind[ktmp], nind[last]);
                            last--;
                            tclasspop[cl[k] - 1] += classwt[cl[k]-1];
                            win[k] += classwt[cl[k]-1];
                            jin[k] = 1;
                        }
                    }
                    /* check if any class is missing in the sample */
                    for (n = 0; n < nclass; ++n) {
                        if (tclasspop[n] == 0) anyEmpty = 1;
                    }
                    ntry++;
                } while (anyEmpty && ntry <= 10);
            }
            
            /* If need to keep indices of inbag data, do that here. */
            if (keepInbag) {
                for (n = 0; n < nsample0; ++n) {
                    inbag[n + idxByNsample] = jin[n];
                }
            }
            
            /* Copy the original a matrix back. */
            memcpy(a, at, sizeof(int) * mdim * nsample);
            modA(a, &nuse, nsample, mdim, cat, *maxcat, ncase, jin);
            
            #ifdef WIN64
            F77_CALL(_buildtree)
            #endif
                    
            #ifndef WIN64
            F77_CALL(buildtree)
            #endif        
            (a, b, cl, cat, maxcat, &mdim, &nsample,
                    &nclass,
                    treemap + 2*idxByNnode, bestvar + idxByNnode,
                    bestsplit, bestsplitnext, tgini,
                    nodestatus + idxByNnode, nodepop,
                    nodestart, classpop, tclasspop, tclasscat,
                    ta, nrnodes, idmove, &ndsize, ncase,
                    &mtry, varUsed, nodeclass + idxByNnode,
                    ndbigtree + jb, win, wr, wl, &mdim,
                    &nuse, mind);
            /* if the "tree" has only the root node, start over */
        } while (ndbigtree[jb] == 1);
        
        Xtranslate(x, mdim, *nrnodes, nsample, bestvar + idxByNnode,
                bestsplit, bestsplitnext, xbestsplit + idxByNnode,
                nodestatus + idxByNnode, cat, ndbigtree[jb]);
        
        /*  Get test set error */
        if (*testdat) {
            predictClassTree(xts, ntest, mdim, treemap + 2*idxByNnode,
                    nodestatus + idxByNnode, xbestsplit + idxByNnode,
                    bestvar + idxByNnode,
                    nodeclass + idxByNnode, ndbigtree[jb],
                    cat, nclass, jts, nodexts, *maxcat);
            TestSetError(countts, jts, clts, outclts, ntest, nclass, jb+1,
                    errts + jb*(nclass+1), labelts, nclts, cut);
        }
        
        /*  Get out-of-bag predictions and errors. */
        predictClassTree(x, nsample, mdim, treemap + 2*idxByNnode,
                nodestatus + idxByNnode, xbestsplit + idxByNnode,
                bestvar + idxByNnode,
                nodeclass + idxByNnode, ndbigtree[jb],
                cat, nclass, jtr, nodex, *maxcat);
        
        zeroInt(nout, nclass);
        noutall = 0;
        for (n = 0; n < nsample; ++n) {
            if (jin[n] == 0) {
                /* increment the OOB votes */
                counttr[n*nclass + jtr[n] - 1] ++;
                /* count number of times a case is OOB */
                out[n]++;
                /* count number of OOB cases in the current iteration.
                 * nout[n] is the number of OOB cases for the n-th class.
                 * noutall is the number of OOB cases overall. */
                nout[cl[n] - 1]++;
                noutall++;
            }
        }
        
        /* Compute out-of-bag error rate. */
        oob(nsample, nclass, jin, cl, jtr, jerr, counttr, out,
                errtr + jb*(nclass+1), outcl, cut);
        
        if ((jb+1) % trace == 0) {
            Rprintf("%5i: %6.2f%%", jb+1, 100.0*errtr[jb * (nclass+1)]);
            for (n = 1; n <= nclass; ++n) {
                Rprintf("%6.2f%%", 100.0 * errtr[n + jb * (nclass+1)]);
            }
            if (labelts) {
                Rprintf("| ");
                for (n = 0; n <= nclass; ++n) {
                    Rprintf("%6.2f%%", 100.0 * errts[n + jb * (nclass+1)]);
                }
            }
            Rprintf("\n");
            
            //R_CheckUserInterrupt();
        }
        
        /*  DO VARIABLE IMPORTANCE  */
        if (imp) {
            nrightall = 0;
            /* Count the number of correct prediction by the current tree
             * among the OOB samples, by class. */
            zeroInt(nright, nclass);
            for (n = 0; n < nsample; ++n) {
                /* out-of-bag and predicted correctly: */
                if (jin[n] == 0 && jtr[n] == cl[n]) {
                    nright[cl[n] - 1]++;
                    nrightall++;
                }
            }
            for (m = 0; m < mdim; ++m) {
                if (varUsed[m]) {
                    nrightimpall = 0;
                    zeroInt(nrightimp, nclass);
                    for (n = 0; n < nsample; ++n) tx[n] = x[m + n*mdim];
                    /* Permute the m-th variable. */
                    permuteOOB(m, x, jin, nsample, mdim);
                    /* Predict the modified data using the current tree. */
                    predictClassTree(x, nsample, mdim, treemap + 2*idxByNnode,
                            nodestatus + idxByNnode,
                            xbestsplit + idxByNnode,
                            bestvar + idxByNnode,
                            nodeclass + idxByNnode, ndbigtree[jb],
                            cat, nclass, jvr, nodex, *maxcat);
                    /* Count how often correct predictions are made with
                     * the modified data. */
                    for (n = 0; n < nsample; n++) {
                        if (jin[n] == 0) {
                            if (jvr[n] == cl[n]) {
                                nrightimp[cl[n] - 1]++;
                                nrightimpall++;
                            }
                            if (localImp && jvr[n] != jtr[n]) {
                                if (cl[n] == jvr[n]) {
                                    impmat[m + n*mdim] -= 1.0;
                                } else {
                                    impmat[m + n*mdim] += 1.0;
                                }
                            }
                        }
                        /* Restore the original data for that variable. */
                        x[m + n*mdim] = tx[n];
                    }
                    /* Accumulate decrease in proportions of correct
                     * predictions. */
                    for (n = 0; n < nclass; ++n) {
                        if (nout[n] > 0) {
                            imprt[m + n*mdim] +=
                                    ((double) (nright[n] - nrightimp[n])) /
                                    nout[n];
                            impsd[m + n*mdim] +=
                                    ((double) (nright[n] - nrightimp[n]) *
                                    (nright[n] - nrightimp[n])) / nout[n];
                        }
                    }
                    if (noutall > 0) {
                        imprt[m + nclass*mdim] +=
                                ((double)(nrightall - nrightimpall)) / noutall;
                        impsd[m + nclass*mdim] +=
                                ((double) (nrightall - nrightimpall) *
                                (nrightall - nrightimpall)) / noutall;
                    }
                }
            }
        }
        
        /*  DO PROXIMITIES */
        if (iprox) {
            computeProximity(prox, oobprox, nodex, jin, oobpair, near);
            /* proximity for test data */
            if (*testdat) {
                computeProximity(proxts, 0, nodexts, jin, oobpair, ntest);
                /* Compute proximity between testset and training set. */
                for (n = 0; n < ntest; ++n) {
                    for (k = 0; k < near; ++k) {
                        if (nodexts[n] == nodex[k])
                            proxts[n + ntest * (k+ntest)] += 1.0;
                    }
                }
            }
        }
        
        if (keepf) idxByNnode += *nrnodes;
        if (keepInbag) idxByNsample += nsample0;
    }
    PutRNGstate();
   
    
    /*  Final processing of variable importance. */
    for (m = 0; m < mdim; m++) tgini[m] /= Ntree;
      
    if (imp) {
        for (m = 0; m < mdim; ++m) {
            if (localImp) { /* casewise measures */
                for (n = 0; n < nsample; ++n) impmat[m + n*mdim] /= out[n];
            }
            /* class-specific measures */
            for (k = 0; k < nclass; ++k) {
                av = imprt[m + k*mdim] / Ntree;
                impsd[m + k*mdim] =
                        sqrt(((impsd[m + k*mdim] / Ntree) - av*av) / Ntree);
                imprt[m + k*mdim] = av;
                /* imprt[m + k*mdim] = (se <= 0.0) ? -1000.0 - av : av / se; */
            }
            /* overall measures */
            av = imprt[m + nclass*mdim] / Ntree;
            impsd[m + nclass*mdim] =
                    sqrt(((impsd[m + nclass*mdim] / Ntree) - av*av) / Ntree);
            imprt[m + nclass*mdim] = av;
            imprt[m + (nclass+1)*mdim] = tgini[m];
        }
    } else {
        for (m = 0; m < mdim; ++m) imprt[m] = tgini[m];
    }
   
    /*  PROXIMITY DATA ++++++++++++++++++++++++++++++++*/
    if (iprox) {
        for (n = 0; n < near; ++n) {
            for (k = n + 1; k < near; ++k) {
                prox[near*k + n] /= oobprox ?
                    (oobpair[near*k + n] > 0 ? oobpair[near*k + n] : 1) :
                        Ntree;
                        prox[near*n + k] = prox[near*k + n];
            }
            prox[near*n + n] = 1.0;
        }
        if (*testdat) {
            for (n = 0; n < ntest; ++n)
                for (k = 0; k < ntest + nsample; ++k)
                    proxts[ntest*k + n] /= Ntree;
        }
    }
    if (trace <= Ntree){
        printf("\nmdim %d, nclass %d, nrnodes %d, nsample %d, ntest %d\n", mdim, nclass, *nrnodes, nsample, ntest);
        printf("\noobprox %d, mdim %d, nsample0 %d, Ntree %d, mtry %d, mimp %d", oobprox, mdim, nsample0, Ntree, mtry, mimp);
        printf("\nstratify %d, replace %d",stratify,replace);
        printf("\n");
    }
    
    //frees up the memory
    free(tgini);free(wl);free(wr);free(classpop);free(tclasscat);
    free(tclasspop);free(tx);free(win);free(tp);free(out);
    free(bestsplitnext);free(bestsplit);free(nodepop);free(nodestart);free(jin);
    free(nodex);free(nodexts);free(ta);free(ncase);free(jerr);
    free(varUsed);free(jtr);free(jvr);free(classFreq);free(jts);
    free(idmove);free(at);free(a);free(b);free(mind);
    free(nright);free(nrightimp);free(nout);
    
    if (oobprox) {
        free(oobpair);
    }
    
    if (stratify) {
        free(strata_size);
        for (n = 0; n < nstrata; ++n) {
            free(strata_idx[n]);
        }
        free(strata_idx);        
    } else {
        if (replace)
            free(nind);
    }
    //printf("labelts %d\n",labelts);fflush(stdout);
    
    if (labelts) {
        free(nclts);        
    }
    //printf("stratify %d",stratify);fflush(stdout);
}
Exemple #19
0
int main (int argc, const char *argv[])
{
	/* file buffers */
	int fp;
	char globalHeaderContent[sizeof(pcap_hdr_t)];
	char recordHeaderContent[sizeof(pcaprec_hdr_t)];
	char buffer[READ_BUFFER_SIZE];

	/* headers, headers, headers... */
	pcap_hdr_t* globalHeader = (pcap_hdr_t*) globalHeaderContent;
	pcaprec_hdr_t* recordHeader = (pcaprec_hdr_t*) recordHeaderContent;
	ether_header_t* etherHeader;
	ipv4_header_t* ipv4Header;
	udp_header_t* udpHeader;

	/* record count */
	unsigned int recordCount = 0;





	/* checking file input */
	if (argc<1) 
	{
		fprintf(stderr, "Error: No file input.\n");
		return 1;
	}

	/* file opening */
	fp = open(argv[1], O_RDONLY);
	if (fp == -1)
	{
		fprintf(stderr, "Error: Cannot open file.\n");
		return 2;
	}





	/* applying global header data structure */
	if (!fetchGlobalHeader(fp, globalHeader))
	{
		fprintf(stderr, "Error: File format not correct\n");
		return 4;
	}

	/* printing global header information */
	printf ("ver=%d.%d snaplen=%d network=%d\n",
		swapShort(globalHeader->version_major),
		swapShort(globalHeader->version_minor),
		swapInt(globalHeader->snaplen),
		swapInt(globalHeader->network));





	/* per-packet information */
	while(fetchRecordHeader(fp, recordHeader))
	{
		recordCount += 1;

		/* read file */
		read (fp, buffer, swapInt(recordHeader->incl_len));


		/* applying data structures */
		etherHeader = (ether_header_t *) (buffer);
		ipv4Header = (ipv4_header_t *) (buffer
				+sizeof(ether_header_t));



		/* printing
		 * 1234567890.098765 100/100 123.45.67.89 -> 98.76.54.32 (17) sport=12345 dport=9876 */
		printf ("%u.%06u %u/%u", 
				swapInt (recordHeader->ts_sec),
				swapInt (recordHeader->ts_usec),
				swapInt (recordHeader->incl_len),
				swapInt (recordHeader->orig_len)
			);
		printf (" %s", inet_ntoa (ipv4Header->ip_src));
		printf (" -> %s", inet_ntoa (ipv4Header->ip_dst));
		printf (" (%u)", ipv4Header->ip_p);



		/* UDP-specific information */
		if (ipv4Header->ip_p == 17)
		{

			/* applying UDP header data structure */
			udpHeader = (udp_header_t *) (buffer
					+ sizeof (ether_header_t)
					+ sizeof (ipv4_header_t));

			/* printing */
			printf(" sport=%u dport=%u",
					ntohs (udpHeader->port_src),
					ntohs (udpHeader->port_dst)
			      );

		}


		/* new line */
		printf("\n");
	}

	printf ("total %d packets read\n", recordCount);







	close(fp);

	return 0;
}
Exemple #20
0
void regRF(double *x, double *y, int *xdim, int *sampsize,
	   int *nthsize, int *nrnodes, int *nTree, int *mtry, int *imp,
	   int *cat, int *maxcat, int *jprint, int *doProx, int *oobprox,
           int *biasCorr, double *yptr, double *errimp, double *impmat,
           double *impSD, double *prox, int *treeSize, int *nodestatus,
           int *lDaughter, int *rDaughter, double *avnode, int *mbest,
           double *upper, double *mse, int *keepf, int *replace,
           int *testdat, double *xts, int *nts, double *yts, int *labelts,
           double *yTestPred, double *proxts, double *msets, double *coef,
           int *nout, int *inbag) {
    /*************************************************************************
   Input:
   mdim=number of variables in data set
   nsample=number of cases

   nthsize=number of cases in a node below which the tree will not split,
   setting nthsize=5 generally gives good results.

   nTree=number of trees in run.  200-500 gives pretty good results

   mtry=number of variables to pick to split on at each node.  mdim/3
   seems to give genrally good performance, but it can be
   altered up or down

   imp=1 turns on variable importance.  This is computed for the
   mth variable as the percent rise in the test set mean sum-of-
   squared errors when the mth variable is randomly permuted.

  *************************************************************************/

    double errts = 0.0, averrb, meanY, meanYts, varY, varYts, r, xrand,
	errb = 0.0, resid=0.0, ooberr, ooberrperm, delta, *resOOB;

    double *yb, *xtmp, *xb, *ytr, *ytree, *tgini, *coeffs;

    int k, m, mr, n, nOOB, j, jout, idx, ntest, last, ktmp, nPerm,
        nsample, mdim, keepF, keepInbag;
    int *oobpair, varImp, localImp, *varUsed;

    int *in, *nind, *nodex, *nodexts, *probs;

    nsample = xdim[0];
    mdim = xdim[1];
    ntest = *nts;
    varImp = imp[0];
    localImp = imp[1];
    nPerm = imp[2];
    keepF = keepf[0];
    keepInbag = keepf[1];

    if (*jprint == 0) *jprint = *nTree + 1;

    yb         = (double *) S_alloc(*sampsize, sizeof(double));
    xb         = (double *) S_alloc(mdim * *sampsize, sizeof(double));
    ytr        = (double *) S_alloc(nsample, sizeof(double));
    xtmp       = (double *) S_alloc(nsample, sizeof(double));
    resOOB     = (double *) S_alloc(nsample, sizeof(double));
    coeffs     = (double *) S_alloc(*sampsize, sizeof(double));
  
    probs      = (int *) S_alloc(*sampsize, sizeof(int));
    in         = (int *) S_alloc(nsample, sizeof(int));
    nodex      = (int *) S_alloc(nsample, sizeof(int));
    varUsed    = (int *) S_alloc(mdim, sizeof(int));
    nind = *replace ? NULL : (int *) S_alloc(nsample, sizeof(int));

    if (*testdat) {
	ytree      = (double *) S_alloc(ntest, sizeof(double));
	nodexts    = (int *) S_alloc(ntest, sizeof(int));
    }
    oobpair = (*doProx && *oobprox) ?
	(int *) S_alloc(nsample * nsample, sizeof(int)) : NULL;

    /* If variable importance is requested, tgini points to the second
       "column" of errimp, otherwise it's just the same as errimp. */
    tgini = varImp ? errimp + mdim : errimp;

    averrb = 0.0;
    meanY = 0.0;
    varY = 0.0;

    zeroDouble(yptr, nsample);
    zeroInt(nout, nsample);
    for (n = 0; n < nsample; ++n) {
	varY += n * (y[n] - meanY)*(y[n] - meanY) / (n + 1);
	meanY = (n * meanY + y[n]) / (n + 1);
    }
    varY /= nsample;

    varYts = 0.0;
    meanYts = 0.0;
    if (*testdat) {
	for (n = 0; n < ntest; ++n) {
	    varYts += n * (yts[n] - meanYts)*(yts[n] - meanYts) / (n + 1);
	    meanYts = (n * meanYts + yts[n]) / (n + 1);
	}
	varYts /= ntest;
    }

    if (*doProx) {
        zeroDouble(prox, nsample * nsample);
	if (*testdat) zeroDouble(proxts, ntest * (nsample + ntest));
    }

    if (varImp) {
        zeroDouble(errimp, mdim * 2);
	if (localImp) zeroDouble(impmat, nsample * mdim);
    } else {
        zeroDouble(errimp, mdim);
    }
    if (*labelts) zeroDouble(yTestPred, ntest);

    /* print header for running output */
    if (*jprint <= *nTree) {
	Rprintf("     |      Out-of-bag   ");
	if (*testdat) Rprintf("|       Test set    ");
	Rprintf("|\n");
	Rprintf("Tree |      MSE  %%Var(y) ");
	if (*testdat) Rprintf("|      MSE  %%Var(y) ");
	Rprintf("|\n");
    }
    GetRNGstate();
    /*************************************
     * Start the loop over trees.
     *************************************/
    for (j = 0; j < *nTree; ++j) {

    /* multinomial */
    /*unsigned int coeffs[*sampsize];*/
    /* for loop implementation */
    /*double probs[*sampsize];*/
    for (k = 0; k < *sampsize; ++k) {
        probs[k] = 1/(*sampsize);
    }

    ran_multinomial(*sampsize,100,probs,coeffs);

		idx = keepF ? j * *nrnodes : 0;
		zeroInt(in, nsample);
        zeroInt(varUsed, mdim);
        /* Draw a random sample for growing a tree. */
		if (*replace) { /* sampling with replacement */
			for (n = 0; n < *sampsize; ++n) {
				xrand = unif_rand();
				k = xrand * nsample;
				in[k] = 1;
				yb[n] = y[k];
				for(m = 0; m < mdim; ++m) {
					xb[m + n * mdim] = x[m + k * mdim];
				}
			}
		} else { /* sampling w/o replacement */
			for (n = 0; n < nsample; ++n) nind[n] = n;
			last = nsample - 1;
			for (n = 0; n < *sampsize; ++n) {
				ktmp = (int) (unif_rand() * (last+1));
                k = nind[ktmp];
                swapInt(nind[ktmp], nind[last]);
				last--;
				in[k] = 1;
				yb[n] = y[k];
				for(m = 0; m < mdim; ++m) {
					xb[m + n * mdim] = x[m + k * mdim];
				}
			}
		}
		if (keepInbag) {
			for (n = 0; n < nsample; ++n) inbag[n + j * nsample] = in[n];
		}
        /* grow the regression tree */
		regTree(xb, yb, mdim, *sampsize, lDaughter + idx, rDaughter + idx,
                upper + idx, avnode + idx, nodestatus + idx, *nrnodes,
                treeSize + j, *nthsize, *mtry, mbest + idx, cat, tgini,
                varUsed, coeffs);
        /* predict the OOB data with the current tree */
		/* ytr is the prediction on OOB data by the current tree */
		predictRegTree(x, nsample, mdim, lDaughter + idx,
                       rDaughter + idx, nodestatus + idx, ytr, upper + idx,
                       avnode + idx, mbest + idx, treeSize[j], cat, *maxcat,
                       nodex);
		/* yptr is the aggregated prediction by all trees grown so far */
		errb = 0.0;
		ooberr = 0.0;
		jout = 0; /* jout is the number of cases that has been OOB so far */
		nOOB = 0; /* nOOB is the number of OOB samples for this tree */
		for (n = 0; n < nsample; ++n) {
			if (in[n] == 0) {
				nout[n]++;
                nOOB++;
				yptr[n] = ((nout[n]-1) * yptr[n] + ytr[n]) / nout[n];
				resOOB[n] = ytr[n] - y[n];
                ooberr += resOOB[n] * resOOB[n];
			}
            if (nout[n]) {
				jout++;
				errb += (y[n] - yptr[n]) * (y[n] - yptr[n]);
			}
		}
		errb /= jout;
		/* Do simple linear regression of y on yhat for bias correction. */
		if (*biasCorr) simpleLinReg(nsample, yptr, y, coef, &errb, nout);

		/* predict testset data with the current tree */
		if (*testdat) {
			predictRegTree(xts, ntest, mdim, lDaughter + idx,
						   rDaughter + idx, nodestatus + idx, ytree,
                           upper + idx, avnode + idx,
						   mbest + idx, treeSize[j], cat, *maxcat, nodexts);
			/* ytree is the prediction for test data by the current tree */
			/* yTestPred is the average prediction by all trees grown so far */
			errts = 0.0;
			for (n = 0; n < ntest; ++n) {
				yTestPred[n] = (j * yTestPred[n] + ytree[n]) / (j + 1);
			}
            /* compute testset MSE */
			if (*labelts) {
				for (n = 0; n < ntest; ++n) {
					resid = *biasCorr ?
                        yts[n] - (coef[0] + coef[1]*yTestPred[n]) :
                        yts[n] - yTestPred[n];
					errts += resid * resid;
				}
				errts /= ntest;
			}
		}
        /* Print running output. */
		if ((j + 1) % *jprint == 0) {
			Rprintf("%4d |", j + 1);
			Rprintf(" %8.4g %8.2f ", errb, 100 * errb / varY);
			if(*labelts == 1) Rprintf("| %8.4g %8.2f ",
									  errts, 100.0 * errts / varYts);
			Rprintf("|\n");
		}
		mse[j] = errb;
		if (*labelts) msets[j] = errts;

		/*  DO PROXIMITIES */
		if (*doProx) {
			computeProximity(prox, *oobprox, nodex, in, oobpair, nsample);
			/* proximity for test data */
			if (*testdat) {
                /* In the next call, in and oobpair are not used. */
                computeProximity(proxts, 0, nodexts, in, oobpair, ntest);
				for (n = 0; n < ntest; ++n) {
					for (k = 0; k < nsample; ++k) {
						if (nodexts[n] == nodex[k]) {
							proxts[n + ntest * (k+ntest)] += 1.0;
						}
					}
				}
			}
		}

		/* Variable importance */
		if (varImp) {
			for (mr = 0; mr < mdim; ++mr) {
                if (varUsed[mr]) { /* Go ahead if the variable is used */
                    /* make a copy of the m-th variable into xtmp */
                    for (n = 0; n < nsample; ++n)
                        xtmp[n] = x[mr + n * mdim];
                    ooberrperm = 0.0;
                    for (k = 0; k < nPerm; ++k) {
                        permuteOOB(mr, x, in, nsample, mdim);
                        predictRegTree(x, nsample, mdim, lDaughter + idx,
                                       rDaughter + idx, nodestatus + idx, ytr,
                                       upper + idx, avnode + idx, mbest + idx,
                                       treeSize[j], cat, *maxcat, nodex);
                        for (n = 0; n < nsample; ++n) {
                            if (in[n] == 0) {
                                r = ytr[n] - y[n];
                                ooberrperm += r * r;
                                if (localImp) {
                                    impmat[mr + n * mdim] +=
                                        (r*r - resOOB[n]*resOOB[n]) / nPerm;
                                }
                            }
                        }
                    }
                    delta = (ooberrperm / nPerm - ooberr) / nOOB;
                    errimp[mr] += delta;
                    impSD[mr] += delta * delta;
                    /* copy original data back */
                    for (n = 0; n < nsample; ++n)
                        x[mr + n * mdim] = xtmp[n];
                }
            }
        }
    }
    PutRNGstate();
    /* end of tree iterations=======================================*/

    if (*biasCorr) {  /* bias correction for predicted values */
		for (n = 0; n < nsample; ++n) {
			if (nout[n]) yptr[n] = coef[0] + coef[1] * yptr[n];
		}
		if (*testdat) {
			for (n = 0; n < ntest; ++n) {
				yTestPred[n] = coef[0] + coef[1] * yTestPred[n];
			}
		}
    }

    if (*doProx) {
		for (n = 0; n < nsample; ++n) {
			for (k = n + 1; k < nsample; ++k) {
                prox[nsample*k + n] /= *oobprox ?
                    (oobpair[nsample*k + n] > 0 ? oobpair[nsample*k + n] : 1) :
                    *nTree;
                prox[nsample * n + k] = prox[nsample * k + n];
            }
			prox[nsample * n + n] = 1.0;
        }
		if (*testdat) {
			for (n = 0; n < ntest; ++n)
				for (k = 0; k < ntest + nsample; ++k)
					proxts[ntest*k + n] /= *nTree;
		}
    }

    if (varImp) {
		for (m = 0; m < mdim; ++m) {
			errimp[m] = errimp[m] / *nTree;
			impSD[m] = sqrt( ((impSD[m] / *nTree) -
							  (errimp[m] * errimp[m])) / *nTree );
			if (localImp) {
                for (n = 0; n < nsample; ++n) {
                    impmat[m + n * mdim] /= nout[n];
                }
			}
        }
    }
    for (m = 0; m < mdim; ++m) tgini[m] /= *nTree;
}
Exemple #21
0
int
LPAFreadImageAnswer(LPAF *lpaf, int current)
{
  char   *fullname, fname[100] ;
  FILE   *fp ;
  IMAGE  Iheader ;
  int    i, ecode, frame, current_frame ;
  LP_BOX *lpb ;
  struct extpar *xp ;
#ifdef _MSDOS
  long   *parms ;
#else
  int    *parms ;
#endif

  fullname = lpaf->filelist[current] ;

  ImageUnpackFileName(fullname, &current_frame, &i, fname) ;

  fp = fopen(fname, "rb") ;
  if (!fp)
    ErrorReturn(-1,(ERROR_NO_FILE,"LPAFreadImageAnswer(%d): could not open %s",
                    current, fname)) ;

  ecode = fread_header(fp, &Iheader, fname) ;
  fclose(fp) ;
  if (ecode)
    ErrorReturn(-2, (ERROR_BADFILE,
                     "LPAFreadImageAnswer(%s): could not read header",fname));

  if (Iheader.numparam < Iheader.num_frame)
    return(0) ;

  /* read answer from header */
#if 0
  fprintf(stderr, "reading lp values from %dth entry in image file\n",
          current_frame);
#endif
  lpb = &lpaf->coords[current] ;
  for (frame = 0, xp = Iheader.params ; xp ; xp = xp->nextp)
    if (frame++ == current_frame)
      break ;

  /*
   if hips file created on Sun, then the parameters are actually longs.
  */
#ifndef _MSDOS
  parms = xp->val.v_pi ;
#else
  parms = (long *)xp->val.v_pi ;
#endif

#ifndef _MSDOS
  if (parms[0] < 0 || parms[0] >= Iheader.cols)
  {
    parms[0] = swapInt(parms[0]) ;
    parms[1] = swapInt(parms[1]) ;
    for (i = 0 ; i < NPOINTS ; i++)
    {
      parms[2+2*i] = swapInt(parms[2*i]) ;
      parms[2+2*i+1] = swapInt(parms[2*i+1]) ;
    }
  }
#else
  if (parms[0] < 0 || parms[0] >= (long)Iheader.cols)
  {
    parms[0] = swapLong(parms[0]) ;
    parms[1] = swapLong(parms[1]) ;
    for (i = 0 ; i < NPOINTS ; i++)
    {
      parms[2+2*i] = swapLong(parms[2*i]) ;
      parms[2+2*i+1] = swapLong(parms[2*i+1]) ;
    }
  }
#endif

  if ((int)parms[0] == INIT_VAL)  /* not yet written with real value */
    return(0) ;

  lpb->xc = (int)parms[0] ;
  lpb->yc  = (int)parms[1] ;
  for (i = 0 ; i < NPOINTS ; i++)
  {
    lpb->xp[i] = (int)parms[2+2*i] ;
    lpb->yp[i] = (int)parms[2+2*i+1] ;
  }

  if (lpb->xc < 0 || lpb->xc >= Iheader.cols ||
      lpb->yc < 0 || lpb->xc >= Iheader.rows )
    return(0) ;

  return(1) ;
}
/*
returnType functionName(auguments)
*/
void strangeFunc(int a[], int size)
	{
		//«Å§i»P©wžq(Declaration & Definition)
        //let p, q be the first, last position of array
            int * p = &a[0], * q = &a[size - 1];

		//¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð
		//while p's position is before q's
            while(p < q)
                {
                    if(*p < 0)
                        {
                            //if *p && *q < 0
                                if(*q < 0)
                                    {
                                        //swap
                                            swapInt(p, q);
                                        //move both
                                            p++;
                                            q--;
                                        //continue
                                            continue;
                                    }
                                else//if only *p < 0
                                    {
                                        //move q
                                            q--;
                                        //continue
                                            continue;
                                    }
                        }
                    else//if !(*p < 0)
                        {
                            //if only *q < 0
                                if(*q < 0)
                                    {
                                        //move p
                                            p++;
                                        //continue
                                            continue;
                                    }
                                else//if all not < 0
                                    {
                                        //move both
                                            p++;
                                            q--;
                                        //continue
                                            continue;
                                    }
                        }
                }





		//¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð
		//¶ÇŠ^€º®e
		return ;

	}
Exemple #23
0
void regRF(double *x, double *y, int *xdim, int *sampsize,
        int *nthsize, int *nrnodes, int *nTree, int *mtry, int *imp,
        int *cat, int maxcat, int *jprint, int doProx, int oobprox,
        int biasCorr, double *yptr, double *errimp, double *impmat,
        double *impSD, double *prox, int *treeSize, SMALL_INT *nodestatus,
        int *lDaughter, int *rDaughter, double *avnode, int *mbest,
        double *upper, double *mse, const int *keepf, int *replace,
        int testdat, double *xts, int *nts, double *yts, int labelts,
        double *yTestPred, double *proxts, double *msets, double *coef,
        int *nout, int *inbag) {
    /*************************************************************************
     * Input:
     * mdim=number of variables in data set
     * nsample=number of cases
     *
     * nthsize=number of cases in a node below which the tree will not split,
     * setting nthsize=5 generally gives good results.
     *
     * nTree=number of trees in run.  200-500 gives pretty good results
     *
     * mtry=number of variables to pick to split on at each node.  mdim/3
     * seems to give genrally good performance, but it can be
     * altered up or down
     *
     * imp=1 turns on variable importance.  This is computed for the
     * mth variable as the percent rise in the test set mean sum-of-
     * squared errors when the mth variable is randomly permuted.
     *
     *************************************************************************/
    
    //PRINTF( "*jprint: %d\n", *jprint );
    //mexEvalString( "pause(0.0001)" );
    
    double errts = 0.0, averrb, meanY, meanYts, varY, varYts, r, xrand,
            errb = 0.0, resid=0.0, ooberr, ooberrperm, delta, *resOOB;
    
    double *yb, *xtmp, *xb, *ytr, *ytree = NULL, *tgini;
    
    int k, m, mr, n, nOOB, j, jout, idx, ntest, last, ktmp, nPerm,
            nsample, mdim, keepF, keepInbag;
    int *oobpair, varImp, localImp, *varUsed;
    
    int *in, *nind, *nodex, *nodexts = NULL;
    
    //Abhi:temp variable
    double tmp_d = 0;
    int tmp_i;
    SMALL_INT tmp_c;
    
    //Do initialization for COKUS's Random generator
    seedMT(2*rand()+1);  //works well with odd number so why don't use that
    
    nsample = xdim[0];
    mdim = xdim[1];
    ntest = *nts;
    varImp = imp[0];
    localImp = imp[1];
    nPerm = imp[2]; //PRINTF("nPerm %d\n",nPerm);
    keepF = keepf[0];
    keepInbag = keepf[1];
    
    if (*jprint == 0) *jprint = *nTree + 1;
    
    yb         = (double *) calloc(*sampsize, sizeof(double));
    xb         = (double *) calloc(mdim * *sampsize, sizeof(double));
    ytr        = (double *) calloc(nsample, sizeof(double));
    xtmp       = (double *) calloc(nsample, sizeof(double));
    resOOB     = (double *) calloc(nsample, sizeof(double));
    
    in        = (int *) calloc(nsample, sizeof(int));
    nodex      = (int *) calloc(nsample, sizeof(int));
    varUsed    = (int *) calloc(mdim, sizeof(int));
    nind = *replace ? NULL : (int *) calloc(nsample, sizeof(int));
    
    if (testdat) {
        ytree      = (double *) calloc(ntest, sizeof(double));
        nodexts    = (int *) calloc(ntest, sizeof(int));
    }
    oobpair = (doProx && oobprox) ?
        (int *) calloc(nsample * nsample, sizeof(int)) : NULL;
        
        /* If variable importance is requested, tgini points to the second
       "column" of errimp, otherwise it's just the same as errimp. */
        tgini = varImp ? errimp + mdim : errimp;
        
        averrb = 0.0;
        meanY = 0.0;
        varY = 0.0;
        
        zeroDouble(yptr, nsample);
        zeroInt(nout, nsample);
        for (n = 0; n < nsample; ++n) {
            varY += n * (y[n] - meanY)*(y[n] - meanY) / (n + 1);
            meanY = (n * meanY + y[n]) / (n + 1);
        }
        varY /= nsample;
        
        varYts = 0.0;
        meanYts = 0.0;
        if (testdat) {
            for (n = 0; n < ntest; ++n) {
                varYts += n * (yts[n] - meanYts)*(yts[n] - meanYts) / (n + 1);
                meanYts = (n * meanYts + yts[n]) / (n + 1);
            }
            varYts /= ntest;
        }
        
        if (doProx) {
            zeroDouble(prox, nsample * nsample);
            if (testdat) zeroDouble(proxts, ntest * (nsample + ntest));
        }
        
        if (varImp) {
            zeroDouble(errimp, mdim * 2);
            if (localImp) zeroDouble(impmat, nsample * mdim);
        } else {
            zeroDouble(errimp, mdim);
        }
        if (labelts) zeroDouble(yTestPred, ntest);
        
        /* print header for running output */
        if (*jprint <= *nTree) {
            PRINTF("     |      Out-of-bag   ");
            if (testdat) PRINTF("|       Test set    ");
            PRINTF("|\n");
            PRINTF("Tree |      MSE  %%Var(y) ");
            if (testdat) PRINTF("|      MSE  %%Var(y) ");
            PRINTF("|\n");
            // mexEvalString( "pause(0.001)" );
        }
        GetRNGstate();
        /*************************************
         * Start the loop over trees.
         *************************************/
        for (j = 0; j < *nTree; ++j) {
            //PRINTF("tree num %d\n",j);fflush(stdout);
            //PRINTF("1. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d\n", *maxcat, *jprint, doProx, oobprox, biasCorr);
            
            idx = keepF ? j * *nrnodes : 0;
            zeroInt(in, nsample);
            zeroInt(varUsed, mdim);
            /* Draw a random sample for growing a tree. */
//		PRINTF("1.8. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat);
            
            if (*replace) { /* sampling with replacement */
                for (n = 0; n < *sampsize; ++n) {
                    xrand = unif_rand();
                    k = (int)(xrand * nsample);
                    in[k] = 1;
                    yb[n] = y[k];
                    for(m = 0; m < mdim; ++m) {
                        xb[m + n * mdim] = x[m + k * mdim];
                    }
                }
            } else { /* sampling w/o replacement */
                for (n = 0; n < nsample; ++n) nind[n] = n;
                last = nsample - 1;
                for (n = 0; n < *sampsize; ++n) {
                    ktmp = (int) (unif_rand() * (last+1));
                    k = nind[ktmp];
                    swapInt(nind[ktmp], nind[last]);
                    last--;
                    in[k] = 1;
                    yb[n] = y[k];
                    for(m = 0; m < mdim; ++m) {
                        xb[m + n * mdim] = x[m + k * mdim];
                    }
                }
            }
            if (keepInbag) {
                for (n = 0; n < nsample; ++n) inbag[n + j * nsample] = in[n];
            }
//		PRINTF("1.9. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat);
            
            /* grow the regression tree */
            regTree(xb, yb, mdim, *sampsize, lDaughter + idx, rDaughter + idx,
                    upper + idx, avnode + idx, nodestatus + idx, *nrnodes,
                    treeSize + j, *nthsize, *mtry, mbest + idx, cat, tgini,
                    varUsed);
            /* predict the OOB data with the current tree */
            /* ytr is the prediction on OOB data by the current tree */
            
//		PRINTF("2. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat);
            
            predictRegTree(x, nsample, mdim, lDaughter + idx,
                    rDaughter + idx, nodestatus + idx, ytr, upper + idx,
                    avnode + idx, mbest + idx, treeSize[j], cat, maxcat,
                    nodex);
            /* yptr is the aggregated prediction by all trees grown so far */
            errb = 0.0;
            ooberr = 0.0;
            jout = 0; /* jout is the number of cases that has been OOB so far */
            nOOB = 0; /* nOOB is the number of OOB samples for this tree */
            for (n = 0; n < nsample; ++n) {
                if (in[n] == 0) {
                    nout[n]++;
                    nOOB++;
                    yptr[n] = ((nout[n]-1) * yptr[n] + ytr[n]) / nout[n];
                    resOOB[n] = ytr[n] - y[n];
                    ooberr += resOOB[n] * resOOB[n];
                }
                if (nout[n]) {
                    jout++;
                    errb += (y[n] - yptr[n]) * (y[n] - yptr[n]);
                }
            }
            errb /= jout;
            /* Do simple linear regression of y on yhat for bias correction. */
            if (biasCorr) simpleLinReg(nsample, yptr, y, coef, &errb, nout);
//PRINTF("2.5.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d\n", maxcat, *jprint, doProx, oobprox, biasCorr);
            
            /* predict testset data with the current tree */
            if (testdat) {
                predictRegTree(xts, ntest, mdim, lDaughter + idx,
                        rDaughter + idx, nodestatus + idx, ytree,
                        upper + idx, avnode + idx,
                        mbest + idx, treeSize[j], cat, maxcat, nodexts);
                /* ytree is the prediction for test data by the current tree */
                /* yTestPred is the average prediction by all trees grown so far */
                errts = 0.0;
                for (n = 0; n < ntest; ++n) {
                    yTestPred[n] = (j * yTestPred[n] + ytree[n]) / (j + 1);
                }
                /* compute testset MSE */
                if (labelts) {
                    for (n = 0; n < ntest; ++n) {
                        resid = biasCorr ?
                            yts[n] - (coef[0] + coef[1]*yTestPred[n]) :
                            yts[n] - yTestPred[n];
                            errts += resid * resid;
                    }
                    errts /= ntest;
                }
            }
//PRINTF("2.6.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d, testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat);
            
            /* Print running output. */
            if ((j + 1) % *jprint == 0) {
                PRINTF("%4d |", j + 1);
                PRINTF(" %8.4g %8.2f ", errb, 100 * errb / varY);
                if(labelts == 1) PRINTF("| %8.4g %8.2f ",
                        errts, 100.0 * errts / varYts);
                PRINTF("|\n");
                fflush(stdout);
                // mexEvalString("pause(.001);"); // to dump string.
            }
            
//PRINTF("2.7.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d, testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat);
            
            mse[j] = errb;
            if (labelts) msets[j] = errts;
//PRINTF("2.701  j %d, nTree %d, errts %f errb %f \n", j, *nTree, errts,errb);
//PRINTF("2.71.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d, testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat);
            
            /*  DO PROXIMITIES */
            if (doProx) {
                computeProximity(prox, oobprox, nodex, in, oobpair, nsample);
                /* proximity for test data */
                if (testdat) {
                    /* In the next call, in and oobpair are not used. */
                    computeProximity(proxts, 0, nodexts, in, oobpair, ntest);
                    for (n = 0; n < ntest; ++n) {
                        for (k = 0; k < nsample; ++k) {
                            if (nodexts[n] == nodex[k]) {
                                proxts[n + ntest * (k+ntest)] += 1.0;
                            }
                        }
                    }
                }
            }
//PRINTF("2.8.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d, testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat);
            
            /* Variable importance */
            if (varImp) {
                for (mr = 0; mr < mdim; ++mr) {
                    if (varUsed[mr]) { /* Go ahead if the variable is used */
                        /* make a copy of the m-th variable into xtmp */
                        for (n = 0; n < nsample; ++n)
                            xtmp[n] = x[mr + n * mdim];
                        ooberrperm = 0.0;
                        for (k = 0; k < nPerm; ++k) {
                            permuteOOB(mr, x, in, nsample, mdim);
                            predictRegTree(x, nsample, mdim, lDaughter + idx,
                                    rDaughter + idx, nodestatus + idx, ytr,
                                    upper + idx, avnode + idx, mbest + idx,
                                    treeSize[j], cat, maxcat, nodex);
                            for (n = 0; n < nsample; ++n) {
                                if (in[n] == 0) {
                                    r = ytr[n] - y[n];
                                    ooberrperm += r * r;
                                    if (localImp) {
                                        impmat[mr + n * mdim] +=
                                                (r*r - resOOB[n]*resOOB[n]) / nPerm;
                                    }
                                }
                            }
                        }
                        delta = (ooberrperm / nPerm - ooberr) / nOOB;
                        errimp[mr] += delta;
                        impSD[mr] += delta * delta;
                        /* copy original data back */
                        for (n = 0; n < nsample; ++n)
                            x[mr + n * mdim] = xtmp[n];
                    }
                    
                }
                
            }
//	PRINTF("3. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat);
            
        }
        PutRNGstate();
        /* end of tree iterations=======================================*/
        
        if (biasCorr) {  /* bias correction for predicted values */
            for (n = 0; n < nsample; ++n) {
                if (nout[n]) yptr[n] = coef[0] + coef[1] * yptr[n];
            }
            if (testdat) {
                for (n = 0; n < ntest; ++n) {
                    yTestPred[n] = coef[0] + coef[1] * yTestPred[n];
                }
            }
        }
        
        if (doProx) {
            for (n = 0; n < nsample; ++n) {
                for (k = n + 1; k < nsample; ++k) {
                    prox[nsample*k + n] /= oobprox ?
                        (oobpair[nsample*k + n] > 0 ? oobpair[nsample*k + n] : 1) :
                            *nTree;
                            prox[nsample * n + k] = prox[nsample * k + n];
                }
                prox[nsample * n + n] = 1.0;
            }
            if (testdat) {
                for (n = 0; n < ntest; ++n)
                    for (k = 0; k < ntest + nsample; ++k)
                        proxts[ntest*k + n] /= *nTree;
            }
        }
        
        if (varImp) {
            for (m = 0; m < mdim; ++m) {
                errimp[m] = errimp[m] / *nTree;
                impSD[m] = sqrt( ((impSD[m] / *nTree) -
                        (errimp[m] * errimp[m])) / *nTree );
                if (localImp) {
                    for (n = 0; n < nsample; ++n) {
                        impmat[m + n * mdim] /= nout[n];
                    }
                }
            }
        }
        for (m = 0; m < mdim; ++m) tgini[m] /= *nTree;
        
        
        //addition by abhi
        //in order to release the space stored by the variable in findBestSplit
        // call by setting
        in_findBestSplit=-99;
        findBestSplit(&tmp_d, &tmp_i, &tmp_d, tmp_i, tmp_i,
                tmp_i, tmp_i, &tmp_i, &tmp_d,
                &tmp_d, &tmp_i, &tmp_i, tmp_i,
                tmp_d, tmp_i, &tmp_i);
        
        //do the same freeing of space by calling with -99
        in_regTree=-99;
        regTree(&tmp_d, &tmp_d, tmp_i, tmp_i, &tmp_i,
                &tmp_i,
                &tmp_d, &tmp_d, &tmp_c, tmp_i,
                &tmp_i, tmp_i, tmp_i, &tmp_i, &tmp_i,
                &tmp_d, &tmp_i);
	
	
	free(yb);
        free(xb);
	free(ytr);
	free(xtmp);
	free(resOOB);
        free(in);
	free(nodex);
	free(varUsed);
    if (!(*replace)  )
        free(nind);
    
    if (testdat) {
		free(ytree);
		free(nodexts);
	}
	
	if (doProx && oobprox)
		free(oobpair) ;
}
Exemple #24
0
void findBestSplit(double *x, int *jdex, double *y, int mdim, int nsample,
        int ndstart, int ndend, int *msplit, double *decsplit,
        double *ubest, int *ndendl, int *jstat, int mtry,
        double sumnode, int nodecnt, int *cat) {
    int last, ncat[32], icat[32], lc, nl, nr, npopl, npopr;
    int i, j, kv, l;
    static int *mind, *ncase;
    static double *xt, *ut, *v, *yl;
    double sumcat[32], avcat[32], tavcat[32], ubestt;
    double crit, critmax, critvar, suml, sumr, d, critParent;
    
    
    if (in_findBestSplit==-99){
      free(ncase);
      free(mind); //had to remove this so that it wont crash for when mdim=0, strangely happened for replace=0
      free(v);
      free(yl);
      free(xt);
      free(ut);
     //	PRINTF("giving up mem in findBestSplit\n");
      return;
    }			
    
    if (in_findBestSplit==0){
    	in_findBestSplit=1;
		ut = (double *) calloc(nsample, sizeof(double));
		xt = (double *) calloc(nsample, sizeof(double));
		v  = (double *) calloc(nsample, sizeof(double));
		yl = (double *) calloc(nsample, sizeof(double));
		mind  = (int *) calloc(mdim+1, sizeof(int));   //seems that the sometimes i am asking for kv[10] and that causes problesmms
													   //so allocate 1 more. helps with not crashing in windows
		ncase = (int *) calloc(nsample, sizeof(int));
    }
    zeroDouble(ut, nsample);
    zeroDouble(xt, nsample);
    zeroDouble(v, nsample);
    zeroDouble(yl, nsample);
    zeroInt(mind, mdim);
    zeroInt(ncase, nsample);
    
    zeroDouble(avcat, 32);
    zeroDouble(tavcat, 32);
    
    /* START BIG LOOP */
    *msplit = -1;
    *decsplit = 0.0;
    critmax = 0.0;
    ubestt = 0.0;
    for (i=0; i < mdim; ++i) mind[i] = i;
    
    last = mdim - 1;
    for (i = 0; i < mtry; ++i) {
        critvar = 0.0;
        j = (int) (unif_rand() * (last+1));
        //PRINTF("j=%d, last=%d mind[j]=%d\n", j, last, mind[j]);fflush(stdout);
        kv = mind[j];
		//if(kv>100){
		//	1;
		//	getchar();
		//}
        swapInt(mind[j], mind[last]);
        /* mind[j] = mind[last];
         * mind[last] = kv; */
        last--;
        
        lc = cat[kv];
        if (lc == 1) {
            /* numeric variable */
            for (j = ndstart; j <= ndend; ++j) {
                xt[j] = x[kv + (jdex[j] - 1) * mdim];
                yl[j] = y[jdex[j] - 1];
            }
        } else {
            /* categorical variable */
            zeroInt(ncat, 32);
            zeroDouble(sumcat, 32);
            for (j = ndstart; j <= ndend; ++j) {
                l = (int) x[kv + (jdex[j] - 1) * mdim];
                sumcat[l - 1] += y[jdex[j] - 1];
                ncat[l - 1] ++;
            }
            /* Compute means of Y by category. */
            for (j = 0; j < lc; ++j) {
                avcat[j] = ncat[j] ? sumcat[j] / ncat[j] : 0.0;
            }
            /* Make the category mean the `pseudo' X data. */
            for (j = 0; j < nsample; ++j) {
                xt[j] = avcat[(int) x[kv + (jdex[j] - 1) * mdim] - 1];
                yl[j] = y[jdex[j] - 1];
            }
        }
        /* copy the x data in this node. */
        for (j = ndstart; j <= ndend; ++j) v[j] = xt[j];
        for (j = 1; j <= nsample; ++j) ncase[j - 1] = j;
        R_qsort_I(v, ncase, ndstart + 1, ndend + 1);
        if (v[ndstart] >= v[ndend]) continue;
        /* ncase(n)=case number of v nth from bottom */
        /* Start from the right and search to the left. */
        critParent = sumnode * sumnode / nodecnt;
        suml = 0.0;
        sumr = sumnode;
        npopl = 0;
        npopr = nodecnt;
        crit = 0.0;
        /* Search through the "gaps" in the x-variable. */
        for (j = ndstart; j <= ndend - 1; ++j) {
            d = yl[ncase[j] - 1];
            suml += d;
            sumr -= d;
            npopl++;
            npopr--;
            if (v[j] < v[j+1]) {
                crit = (suml * suml / npopl) + (sumr * sumr / npopr) -
                        critParent;
                if (crit > critvar) {
                    ubestt = (v[j] + v[j+1]) / 2.0;
                    critvar = crit;
                }
            }
        }
        if (critvar > critmax) {
            *ubest = ubestt;
            *msplit = kv + 1;
            critmax = critvar;
            for (j = ndstart; j <= ndend; ++j) {
                ut[j] = xt[j];
            }
            if (cat[kv] > 1) {
                for (j = 0; j < cat[kv]; ++j) tavcat[j] = avcat[j];
            }
        }
    }
    *decsplit = critmax;
    
    /* If best split can not be found, set to terminal node and return. */
    if (*msplit != -1) {
        nl = ndstart;
        for (j = ndstart; j <= ndend; ++j) {
            if (ut[j] <= *ubest) {
                nl++;
                ncase[nl-1] = jdex[j];
            }
        }
        *ndendl = imax2(nl - 1, ndstart);
        nr = *ndendl + 1;
        for (j = ndstart; j <= ndend; ++j) {
            if (ut[j] > *ubest) {
                if (nr >= nsample) break;
                nr++;
                ncase[nr - 1] = jdex[j];
            }
        }
        if (*ndendl >= ndend) *ndendl = ndend - 1;
        for (j = ndstart; j <= ndend; ++j) jdex[j] = ncase[j];
        
        lc = cat[*msplit - 1];
        if (lc > 1) {
            for (j = 0; j < lc; ++j) {
                icat[j] = (tavcat[j] < *ubest) ? 1 : 0;
            }
            *ubest = pack(lc, icat);
        }
    } else *jstat = 1;
    
}