/* As the name says, Quick Sort is a very quick sorting algortihm. Although it's not so easy * to demonstrate ad it is for Merge Sort. Quick Sort, in fact, requires O(n*log n) on average, * but it can require more (i.e. O(n²)) in the case the array is already ordered. * This is because it's based on a sorting done around a random element selected among those * present in the array. * Let's try with an array: * * [2, 7, 6, 3, 8, 0] * * We have, first, to pick a random element (pivot, from now on). Suppose we pick 3. We switch * 3 with the last element, obtaining the array [2, 7, 6, 0, 8, 3]. * We'll use two index: the first one will point on the * first element, the second one on the second-last. The first index will scroll the array from * left to right until it finds an element that is bigger then the pivot: 2 is not, * 7 is, so the first index stop on 7. The second index will scroll the array from right * to left until it find an element that is smaller than the pivot: 8 is not, * 0 is, so the second will stop on 0. Now we switch this two elements, obtaining the array * [2, 0, 6, 7, 8, 3]. Now it's again the turn of the first index to go: 0 is lower than the pivot, * so the index advance, but 6 is not. The second index will go further until it finds 0. But now, * the first index is bigger than the second, so there will be no swap between them. The value * in the first index will be swapped with the pivot and we'll have the array: [2, 0, 3, 7, 8, 6]. * It's easy to note that 3 is already in it's position and that previus values are all lower than it, * while sequent are all bigger. Now it's time to call the function recursively on these two * subarrays. * Like the Merge Sort, we need two different function: one does the recursion, while the other one * distribute values. Note that if you want to obtain a different order, you have only to change * the distribution function. */ int distributePivot (int *a, int left, int pivot, int right) { int i = left, j = right - 1; if (pivot < right) swapInt(&a[pivot], &a[right]); while (i <= j) { while (i <= j && a[i] <= a[right]) i++; while (j >= i && a[j] >= a[right]) j--; if (i < j) swapInt(&a[i], &a[j]); } if (i < right) swapInt(&a[i], &a[right]); return i; }
int main(void) { #ifdef TEST_SWAP_INT { int a = 10; int b = -20; swapInt (&a, &b); printf("SwapInt: %d %d\n", a, b); } #endif #ifdef TEST_SWAP_PTR { int a = 10; int b = -20; int *pa = &a; int *pb = &b; swapPtr ((void **)&pa, (void **)&pb); printf("SwapPtr: %d %d\n", *pa, *pb); } #endif return 0; }
/*bubble sort函式 傳入參數:一個int陣列data、一個const unsigned的data 陣列大小、一個指向提供排序順序判斷的函式的指標 傳回值:無*/ void bubbleSort(int data[], const unsigned array_size, int (*sortOrderBool)(int a, int b)) { /*提供bubble sort遞增順序比較條件函式*/ int sortAscendingly(int a, int b); /*提供bubble sort遞減順序比較條件函式*/ int sortDescendingly(int a, int b); /*次數計數器*/ unsigned times; unsigned compare; /*從最後一個數被保證排序正確至第二個數被保證排序正確*/ for(times = array_size - 1; times >= 1; times--){ /*從第一個數至times的前一個數取compare與compare+1開始判斷*/ for(compare = 0; compare <= times - 1; compare++){ /*如果compare大於compare+1就互換*/ if((*sortOrderBool)(data[compare], data[compare + 1])){ swapInt(&data[compare], &data[compare + 1]); } } } return; }
//----------------------------------------------------------------------------- void PDCFileWriter::write1intscalar(ofstream &outfile, const int ix) { //writes a 1-column data scalar/number to the file: use this for data type 0 int x = ix; swapInt((char*) &x); outfile.write((char*) &x,sizeof(int)); }
void nextPermutation(int* nums, int numsSize) { int startIndex = numsSize - 1; int nextIndex = startIndex + 1; int tmpIndex = startIndex; if(2 > numsSize) { return ; } while(0 <= startIndex ) { tmpIndex = startIndex ; for(nextIndex = startIndex + 1; nextIndex < numsSize; nextIndex++) { if(nums[startIndex] < nums[nextIndex]) { if(tmpIndex == startIndex) { tmpIndex = nextIndex; } else if(nums[tmpIndex] > nums[nextIndex]) { tmpIndex = nextIndex; } } } if( tmpIndex != startIndex) { swapInt(nums+startIndex, nums+tmpIndex); qsort(nums+startIndex + 1, numsSize - startIndex - 1, sizeof(int), cmpInt); return ; } startIndex--; } qsort(nums, numsSize, sizeof(int) , cmpInt); return; }
int32_t littleToNativeInt32(int32_t i) { if(getEndianness() == ENDIANNESS_BIG) return swapInt(i); return i; }
int32_t bigToNativeInt32(int32_t i) { if(getEndianness() != ENDIANNESS_BIG) return swapInt(i); return i; }
/* This algorithm will order an array of integers in O(n²) time. But if the array * is already ordered or only few elements are misplaced, it will use O(n). So, it's * Ω(n) and O(n²). * It's very simple. It take an element and check if it's ordered among the previous elements. * So, let's say we have the sequent array: * * [6, 7, 2, 3] * * It starts and takes only the first value: 6 is alone, so it's ordered with itself. * It takes the second values and compares it with the first: [6, 7] is still ordered. * It takes the third value: finally we have a misplaced element. We'll swap it with the * previous one and will check again if it's greater than the preceding one, if it is * we'll swap it again, otherwise we'll go the next iteration. * Then it will take the next element and will do the same. And so on. * It means that at every iteration, we know that the first i elements are ordered, but * we don't know if they are in their final place. * Note that the array is passed as pointer, so the change will be globally. */ void IntArrayInsSort (int *a, int dim) { int i, j; for (i = 0; i < dim; i++) { j = i; while (j > 0 && a[j-1] > a[j]) { swapInt(&a[j-1], &a[j]); j--; } } }
/* HEAP SORT ON INTEGERS */ void intArrayHeapSort (int **a, int dim) { int i; IntMinHeap h = arrayToIntMinHeap(*a, dim); for (i = dim - 1; i > 0; i--) { swapInt(&h.array[i], &h.array[0]); h.size--; reorganizeIntMinHeap(&h, 0); } *a = h.array; }
/* Unlike the Insertion Sort, that has Ω(n) time, this algorithm will always need O(n²). * So, it's Θ(n²). * At every iteration, the algorithm select which value has to occupy the i position. * Let's say we have the sequent array: * * [6, 7, 2, 3] * * It takes and index fixed on the 0 position and with another one it will scroll the * whole array looking for values smaller than the one in position 0. So, it will swap * 6 with 2 and then it will do nothing because two is lowest value. * Then it will advance the i index to 1 and with the j index it will scroll the rest * of the array looking for values smaller than the one in one (7). So, it will swap 7 * with 6 and then 6 with 3. And so on. * At iteration i, the first i elements of the array will be ordered and will be in their * final position. * We can easily see that also if the array is already ordered, it will check every value * bacause it can't know if there are minor values. * Note that the array is passed as pointer, so the change will be globally. */ void IntArraySelSort (int *a, int dim) { int i, j; for (i = 0; i < dim-1; i++){ for (j = i+1; j < dim; j++){ if (a[j] < a[i]){ swapInt(&a[j], &a[i]); } } } }
//----------------------------------------------------------------------------- void PDCFileWriter::writeAttribute(ofstream &outfile, const int attrib_length, const char* attrib_name, const int attrib_type) { // writes the text for each new attribute to the data file // 'attrib_length' is the number of characters in the attribute name 'attrib_name' // attrib type is the integer data type, following the convention: // 0=int, 1=intArray, 2=intArray, 3=double, 4= doublearray, 5=vector(3doubles), 6=vectorArray(array of 3doubles) int attribLen = attrib_length; swapInt((char*) &attribLen); outfile.write((char*) &attribLen, sizeof(int)); for(int i=0;i<attrib_length;i++) { outfile.write((char*) &attrib_name[i], sizeof(char)); } int attribType=attrib_type; swapInt((char*) &attribType); outfile.write((char*) &attribType, sizeof(int)); }
static void outputBmp(const int width, const int height, unsigned char *color_array, const char *ofile){ BmpHeader bmp; char bfType[2]; bfType[0] = 'B'; bfType[1] = 'M'; bmp.bfSize = width*height*3 + 54; bmp.bfReserved1 = 0; bmp.bfReserved2 = 0; bmp.bfOffBits = 54; bmp.biSize = 40; bmp.biWidth = width; bmp.biHeight = height; bmp.biPlanes = 1; bmp.biBitCount = 24; bmp.biCompression = 0; bmp.biSizeImage = 0; bmp.biXPixPerMeter = 0; bmp.biYPixPerMeter = 0; bmp.biClrUsed = 0; bmp.biClrImporant = 0; #ifdef REVERSE_ENDIAN_OUTPUT bmp.bfSize = swapInt(bmp.bfSize); bmp.bfOffBits = swapInt(bmp.bfOffBits); bmp.biSize = swapInt(bmp.biSize); bmp.biWidth = swapInt(bmp.biWidth); bmp.biHeight = swapInt(bmp.biHeight); bmp.biPlanes = swapShort(bmp.biPlanes); bmp.biBitCount = swapShort(bmp.biBitCount); #endif FILE *outstream = fopen( ofile, "wb"); fwrite( &bfType, sizeof(char), 2, outstream); fwrite( &bmp, sizeof(BmpHeader), 1, outstream); fwrite( color_array, sizeof(unsigned char), width*height*3, outstream); fclose( outstream); }
void reverseIntMemory(int *first, int *last) { const int direction = first < last ? 1 : -1; while(first != last && first != last + direction) { swapInt(first, last); first += direction; last -= direction; } }
//----------------------------------------------------------------------------- void PDCFileWriter::writePDCHeader(ofstream &outfile) { //create temporary versions of the variables to byte-swap int temp_formatVersion = getFormatVersion(); int temp_byteOrder = getByteOrder(); int temp_extra1 = getExtra1(); int temp_extra2 = getExtra2(); int temp_numParticles = getParticleCount(); int temp_numAttributes = getAttributeCount(); //do swap of byte order swapInt((char*) &temp_formatVersion); swapInt((char*) &temp_byteOrder); swapInt((char*) &temp_extra1); swapInt((char*) &temp_extra2); swapInt((char*) &temp_numParticles); swapInt((char*) &temp_numAttributes); //write out to file for(int i=0;i<4;i++) { outfile.put( m_format[i]); } outfile.write((char*) &temp_formatVersion, sizeof(int)); outfile.write((char*) &temp_byteOrder, sizeof(int)); outfile.write((char*) &temp_extra1, sizeof(int)); outfile.write((char*) &temp_extra2, sizeof(int)); outfile.write((char*) &temp_numParticles, sizeof(int)); outfile.write((char*) &temp_numAttributes, sizeof(int)); }
void heap_Insert(Heap *heap, int val) { int index = heap->sz; heap->hPtr[index] = val; (heap->sz)++; int temp = getParentIndex(index); while ((heap->hPtr[temp] > heap->hPtr[index]) && (index > 0)) { swapInt(heap->hPtr+temp, heap->hPtr+index); index = temp; temp = getParentIndex(index); } };
// 选择排序 void SelectSort(int* pData, int len){ int i,j; int tmp; int min_index; //无序序列中最小的元素索引 for(i = 0; i < len; i++){ tmp = pData[i]; for(min_index = j = i; j < len; j++){ if(pData[min_index] > pData[j]){ min_index = j; } } //将最小的值和第i个比较元素位置互换 swapInt(&pData[i], &pData[min_index]); // 打印排序过程 printf("process %d: ",i); printIntArray(pData, len); } }
/*maxHeapify函式 版本:0.00(0)*/ void maxHeapify(int data[], unsigned array_size, unsigned current_index) { /*宣告與定義(Declaration & Definition)*/ /*--函式雛型(function prototype)--*/ /*--局域變數--*/ /*current largest node*/ unsigned largest_index = current_index; /*the child index of current node may be*/ unsigned left_child_index = current_index * 2, right_child_index = current_index * 2 + 1; /*---------------------*/ /*if left child exist and greater than current node*/ if(left_child_index <= array_size - 1 && data[left_child_index] > data[current_index]){ largest_index = left_child_index; } /*if right child exist and greater than current node*/ if(right_child_index <= array_size - 1 && data[right_child_index] > data[current_index]){ largest_index = right_child_index; } /*if largest node isn't current node then swap with the largest then maxheapify it's child*/ if(largest_index != current_index){ swapInt(&data[current_index], &data[largest_index]); maxHeapify(data, array_size, largest_index); } /*---------------------*/ /*傳回內容*/ return ; }
void classRF(double *x, int *dimx, int *cl, int *ncl, int *cat, int *maxcat, int *sampsize, int *strata, int *Options, int *ntree, int *nvar, int *ipi, double *classwt, double *cut, int *nodesize, int *outcl, int *counttr, double *prox, double *imprt, double *impsd, double *impmat, int *nrnodes, int *ndbigtree, int *nodestatus, int *bestvar, int *treemap, int *nodeclass, double *xbestsplit, double *errtr, int *testdat, double *xts, int *clts, int *nts, double *countts, int *outclts, int labelts, double *proxts, double *errts, int *inbag) { /****************************************************************** * C wrapper for random forests: get input from R and drive * the Fortran routines. * * Input: * * x: matrix of predictors (transposed!) * dimx: two integers: number of variables and number of cases * cl: class labels of the data * ncl: number of classes in the responsema * cat: integer vector of number of classes in the predictor; * 1=continuous * maxcat: maximum of cat * Options: 7 integers: (0=no, 1=yes) * add a second class (for unsupervised RF)? * 1: sampling from product of marginals * 2: sampling from product of uniforms * assess variable importance? * calculate proximity? * calculate proximity based on OOB predictions? * calculate outlying measure? * how often to print output? * keep the forest for future prediction? * ntree: number of trees * nvar: number of predictors to use for each split * ipi: 0=use class proportion as prob.; 1=use supplied priors * pi: double vector of class priors * nodesize: minimum node size: no node with fewer than ndsize * cases will be split * * Output: * * outcl: class predicted by RF * counttr: matrix of votes (transposed!) * imprt: matrix of variable importance measures * impmat: matrix of local variable importance measures * prox: matrix of proximity (if iprox=1) ******************************************************************/ int nsample0, mdim, nclass, addClass, mtry, ntest, nsample, ndsize, mimp, nimp, near, nuse, noutall, nrightall, nrightimpall, keepInbag, nstrata; int jb, j, n, m, k, idxByNnode, idxByNsample, imp, localImp, iprox, oobprox, keepf, replace, stratify, trace, *nright, *nrightimp, *nout, *nclts, Ntree; int *out, *bestsplitnext, *bestsplit, *nodepop, *jin, *nodex, *nodexts, *nodestart, *ta, *ncase, *jerr, *varUsed, *jtr, *classFreq, *idmove, *jvr, *at, *a, *b, *mind, *nind, *jts, *oobpair; int **strata_idx, *strata_size, last, ktmp, anyEmpty, ntry; double av=0.0; double *tgini, *tx, *wl, *classpop, *tclasscat, *tclasspop, *win, *tp, *wr; //Do initialization for COKUS's Random generator seedMT(2*rand()+1); //works well with odd number so why don't use that addClass = Options[0]; imp = Options[1]; localImp = Options[2]; iprox = Options[3]; oobprox = Options[4]; trace = Options[5]; keepf = Options[6]; replace = Options[7]; stratify = Options[8]; keepInbag = Options[9]; mdim = dimx[0]; nsample0 = dimx[1]; nclass = (*ncl==1) ? 2 : *ncl; ndsize = *nodesize; Ntree = *ntree; mtry = *nvar; ntest = *nts; nsample = addClass ? (nsample0 + nsample0) : nsample0; mimp = imp ? mdim : 1; nimp = imp ? nsample : 1; near = iprox ? nsample0 : 1; if (trace == 0) trace = Ntree + 1; /*printf("\nmdim %d, nclass %d, nrnodes %d, nsample %d, ntest %d\n", mdim, nclass, *nrnodes, nsample, ntest); printf("\noobprox %d, mdim %d, nsample0 %d, Ntree %d, mtry %d, mimp %d", oobprox, mdim, nsample0, Ntree, mtry, mimp); printf("\nstratify %d, replace %d",stratify,replace); printf("\n");*/ tgini = (double *) S_alloc_alt(mdim, sizeof(double)); wl = (double *) S_alloc_alt(nclass, sizeof(double)); wr = (double *) S_alloc_alt(nclass, sizeof(double)); classpop = (double *) S_alloc_alt(nclass* *nrnodes, sizeof(double)); tclasscat = (double *) S_alloc_alt(nclass*32, sizeof(double)); tclasspop = (double *) S_alloc_alt(nclass, sizeof(double)); tx = (double *) S_alloc_alt(nsample, sizeof(double)); win = (double *) S_alloc_alt(nsample, sizeof(double)); tp = (double *) S_alloc_alt(nsample, sizeof(double)); out = (int *) S_alloc_alt(nsample, sizeof(int)); bestsplitnext = (int *) S_alloc_alt(*nrnodes, sizeof(int)); bestsplit = (int *) S_alloc_alt(*nrnodes, sizeof(int)); nodepop = (int *) S_alloc_alt(*nrnodes, sizeof(int)); nodestart = (int *) S_alloc_alt(*nrnodes, sizeof(int)); jin = (int *) S_alloc_alt(nsample, sizeof(int)); nodex = (int *) S_alloc_alt(nsample, sizeof(int)); nodexts = (int *) S_alloc_alt(ntest, sizeof(int)); ta = (int *) S_alloc_alt(nsample, sizeof(int)); ncase = (int *) S_alloc_alt(nsample, sizeof(int)); jerr = (int *) S_alloc_alt(nsample, sizeof(int)); varUsed = (int *) S_alloc_alt(mdim, sizeof(int)); jtr = (int *) S_alloc_alt(nsample, sizeof(int)); jvr = (int *) S_alloc_alt(nsample, sizeof(int)); classFreq = (int *) S_alloc_alt(nclass, sizeof(int)); jts = (int *) S_alloc_alt(ntest, sizeof(int)); idmove = (int *) S_alloc_alt(nsample, sizeof(int)); at = (int *) S_alloc_alt(mdim*nsample, sizeof(int)); a = (int *) S_alloc_alt(mdim*nsample, sizeof(int)); b = (int *) S_alloc_alt(mdim*nsample, sizeof(int)); mind = (int *) S_alloc_alt(mdim, sizeof(int)); nright = (int *) S_alloc_alt(nclass, sizeof(int)); nrightimp = (int *) S_alloc_alt(nclass, sizeof(int)); nout = (int *) S_alloc_alt(nclass, sizeof(int)); if (oobprox) { oobpair = (int *) S_alloc_alt(near*near, sizeof(int)); } //printf("nsample=%d\n", nsample); /* Count number of cases in each class. */ zeroInt(classFreq, nclass); for (n = 0; n < nsample; ++n) classFreq[cl[n] - 1] ++; /* Normalize class weights. */ //Rprintf("ipi %d ",*ipi); //for(n=0;n<nclass;n++) Rprintf("%d: %d, %f,",n,classFreq[n],classwt[n]); normClassWt(cl, nsample, nclass, *ipi, classwt, classFreq); //for(n=0;n<nclass;n++) Rprintf("%d: %d, %f,",n,classFreq[n],classwt[n]); if (stratify) { /* Count number of strata and frequency of each stratum. */ nstrata = 0; for (n = 0; n < nsample0; ++n) if (strata[n] > nstrata) nstrata = strata[n]; /* Create the array of pointers, each pointing to a vector * of indices of where data of each stratum is. */ strata_size = (int *) S_alloc_alt(nstrata, sizeof(int)); for (n = 0; n < nsample0; ++n) { strata_size[strata[n] - 1] ++; } strata_idx = (int **) S_alloc_alt(nstrata, sizeof(int *)); for (n = 0; n < nstrata; ++n) { strata_idx[n] = (int *) S_alloc_alt(strata_size[n], sizeof(int)); } zeroInt(strata_size, nstrata); for (n = 0; n < nsample0; ++n) { strata_size[strata[n] - 1] ++; strata_idx[strata[n] - 1][strata_size[strata[n] - 1] - 1] = n; } } else { nind = replace ? NULL : (int *) S_alloc_alt(nsample, sizeof(int)); } /* INITIALIZE FOR RUN */ if (*testdat) zeroDouble(countts, ntest * nclass); zeroInt(counttr, nclass * nsample); zeroInt(out, nsample); zeroDouble(tgini, mdim); zeroDouble(errtr, (nclass + 1) * Ntree); if (labelts) { nclts = (int *) S_alloc_alt(nclass, sizeof(int)); for (n = 0; n < ntest; ++n) nclts[clts[n]-1]++; zeroDouble(errts, (nclass + 1) * Ntree); } //printf("labelts %d\n",labelts);fflush(stdout); if (imp) { zeroDouble(imprt, (nclass+2) * mdim); zeroDouble(impsd, (nclass+1) * mdim); if (localImp) zeroDouble(impmat, nsample * mdim); } if (iprox) { zeroDouble(prox, nsample0 * nsample0); if (*testdat) zeroDouble(proxts, ntest * (ntest + nsample0)); } makeA(x, mdim, nsample, cat, at, b); //R_CheckUserInterrupt(); /* Starting the main loop over number of trees. */ GetRNGstate(); if (trace <= Ntree) { /* Print header for running output. */ Rprintf("ntree OOB"); for (n = 1; n <= nclass; ++n) Rprintf("%7i", n); if (labelts) { Rprintf("| Test"); for (n = 1; n <= nclass; ++n) Rprintf("%7i", n); } Rprintf("\n"); } idxByNnode = 0; idxByNsample = 0; //Rprintf("addclass %d, ntree %d, cl[300]=%d", addClass,Ntree,cl[299]); for(jb = 0; jb < Ntree; jb++) { //Rprintf("addclass %d, ntree %d, cl[300]=%d", addClass,Ntree,cl[299]); //printf("jb=%d,\n",jb); /* Do we need to simulate data for the second class? */ if (addClass) createClass(x, nsample0, nsample, mdim); do { zeroInt(nodestatus + idxByNnode, *nrnodes); zeroInt(treemap + 2*idxByNnode, 2 * *nrnodes); zeroDouble(xbestsplit + idxByNnode, *nrnodes); zeroInt(nodeclass + idxByNnode, *nrnodes); zeroInt(varUsed, mdim); /* TODO: Put all sampling code into a function. */ /* drawSample(sampsize, nsample, ); */ if (stratify) { /* stratified sampling */ zeroInt(jin, nsample); zeroDouble(tclasspop, nclass); zeroDouble(win, nsample); if (replace) { /* with replacement */ for (n = 0; n < nstrata; ++n) { for (j = 0; j < sampsize[n]; ++j) { ktmp = (int) (unif_rand() * strata_size[n]); k = strata_idx[n][ktmp]; tclasspop[cl[k] - 1] += classwt[cl[k] - 1]; win[k] += classwt[cl[k] - 1]; jin[k] = 1; } } } else { /* stratified sampling w/o replacement */ /* re-initialize the index array */ zeroInt(strata_size, nstrata); for (j = 0; j < nsample; ++j) { strata_size[strata[j] - 1] ++; strata_idx[strata[j] - 1][strata_size[strata[j] - 1] - 1] = j; } /* sampling without replacement */ for (n = 0; n < nstrata; ++n) { last = strata_size[n] - 1; for (j = 0; j < sampsize[n]; ++j) { ktmp = (int) (unif_rand() * (last+1)); k = strata_idx[n][ktmp]; swapInt(strata_idx[n][last], strata_idx[n][ktmp]); last--; tclasspop[cl[k] - 1] += classwt[cl[k]-1]; win[k] += classwt[cl[k]-1]; jin[k] = 1; } } } } else { /* unstratified sampling */ anyEmpty = 0; ntry = 0; do { zeroInt(jin, nsample); zeroDouble(tclasspop, nclass); zeroDouble(win, nsample); if (replace) { for (n = 0; n < *sampsize; ++n) { k = unif_rand() * nsample; tclasspop[cl[k] - 1] += classwt[cl[k]-1]; win[k] += classwt[cl[k]-1]; jin[k] = 1; } } else { for (n = 0; n < nsample; ++n) nind[n] = n; last = nsample - 1; for (n = 0; n < *sampsize; ++n) { ktmp = (int) (unif_rand() * (last+1)); k = nind[ktmp]; swapInt(nind[ktmp], nind[last]); last--; tclasspop[cl[k] - 1] += classwt[cl[k]-1]; win[k] += classwt[cl[k]-1]; jin[k] = 1; } } /* check if any class is missing in the sample */ for (n = 0; n < nclass; ++n) { if (tclasspop[n] == 0) anyEmpty = 1; } ntry++; } while (anyEmpty && ntry <= 10); } /* If need to keep indices of inbag data, do that here. */ if (keepInbag) { for (n = 0; n < nsample0; ++n) { inbag[n + idxByNsample] = jin[n]; } } /* Copy the original a matrix back. */ memcpy(a, at, sizeof(int) * mdim * nsample); modA(a, &nuse, nsample, mdim, cat, *maxcat, ncase, jin); #ifdef WIN64 F77_CALL(_buildtree) #endif #ifndef WIN64 F77_CALL(buildtree) #endif (a, b, cl, cat, maxcat, &mdim, &nsample, &nclass, treemap + 2*idxByNnode, bestvar + idxByNnode, bestsplit, bestsplitnext, tgini, nodestatus + idxByNnode, nodepop, nodestart, classpop, tclasspop, tclasscat, ta, nrnodes, idmove, &ndsize, ncase, &mtry, varUsed, nodeclass + idxByNnode, ndbigtree + jb, win, wr, wl, &mdim, &nuse, mind); /* if the "tree" has only the root node, start over */ } while (ndbigtree[jb] == 1); Xtranslate(x, mdim, *nrnodes, nsample, bestvar + idxByNnode, bestsplit, bestsplitnext, xbestsplit + idxByNnode, nodestatus + idxByNnode, cat, ndbigtree[jb]); /* Get test set error */ if (*testdat) { predictClassTree(xts, ntest, mdim, treemap + 2*idxByNnode, nodestatus + idxByNnode, xbestsplit + idxByNnode, bestvar + idxByNnode, nodeclass + idxByNnode, ndbigtree[jb], cat, nclass, jts, nodexts, *maxcat); TestSetError(countts, jts, clts, outclts, ntest, nclass, jb+1, errts + jb*(nclass+1), labelts, nclts, cut); } /* Get out-of-bag predictions and errors. */ predictClassTree(x, nsample, mdim, treemap + 2*idxByNnode, nodestatus + idxByNnode, xbestsplit + idxByNnode, bestvar + idxByNnode, nodeclass + idxByNnode, ndbigtree[jb], cat, nclass, jtr, nodex, *maxcat); zeroInt(nout, nclass); noutall = 0; for (n = 0; n < nsample; ++n) { if (jin[n] == 0) { /* increment the OOB votes */ counttr[n*nclass + jtr[n] - 1] ++; /* count number of times a case is OOB */ out[n]++; /* count number of OOB cases in the current iteration. * nout[n] is the number of OOB cases for the n-th class. * noutall is the number of OOB cases overall. */ nout[cl[n] - 1]++; noutall++; } } /* Compute out-of-bag error rate. */ oob(nsample, nclass, jin, cl, jtr, jerr, counttr, out, errtr + jb*(nclass+1), outcl, cut); if ((jb+1) % trace == 0) { Rprintf("%5i: %6.2f%%", jb+1, 100.0*errtr[jb * (nclass+1)]); for (n = 1; n <= nclass; ++n) { Rprintf("%6.2f%%", 100.0 * errtr[n + jb * (nclass+1)]); } if (labelts) { Rprintf("| "); for (n = 0; n <= nclass; ++n) { Rprintf("%6.2f%%", 100.0 * errts[n + jb * (nclass+1)]); } } Rprintf("\n"); //R_CheckUserInterrupt(); } /* DO VARIABLE IMPORTANCE */ if (imp) { nrightall = 0; /* Count the number of correct prediction by the current tree * among the OOB samples, by class. */ zeroInt(nright, nclass); for (n = 0; n < nsample; ++n) { /* out-of-bag and predicted correctly: */ if (jin[n] == 0 && jtr[n] == cl[n]) { nright[cl[n] - 1]++; nrightall++; } } for (m = 0; m < mdim; ++m) { if (varUsed[m]) { nrightimpall = 0; zeroInt(nrightimp, nclass); for (n = 0; n < nsample; ++n) tx[n] = x[m + n*mdim]; /* Permute the m-th variable. */ permuteOOB(m, x, jin, nsample, mdim); /* Predict the modified data using the current tree. */ predictClassTree(x, nsample, mdim, treemap + 2*idxByNnode, nodestatus + idxByNnode, xbestsplit + idxByNnode, bestvar + idxByNnode, nodeclass + idxByNnode, ndbigtree[jb], cat, nclass, jvr, nodex, *maxcat); /* Count how often correct predictions are made with * the modified data. */ for (n = 0; n < nsample; n++) { if (jin[n] == 0) { if (jvr[n] == cl[n]) { nrightimp[cl[n] - 1]++; nrightimpall++; } if (localImp && jvr[n] != jtr[n]) { if (cl[n] == jvr[n]) { impmat[m + n*mdim] -= 1.0; } else { impmat[m + n*mdim] += 1.0; } } } /* Restore the original data for that variable. */ x[m + n*mdim] = tx[n]; } /* Accumulate decrease in proportions of correct * predictions. */ for (n = 0; n < nclass; ++n) { if (nout[n] > 0) { imprt[m + n*mdim] += ((double) (nright[n] - nrightimp[n])) / nout[n]; impsd[m + n*mdim] += ((double) (nright[n] - nrightimp[n]) * (nright[n] - nrightimp[n])) / nout[n]; } } if (noutall > 0) { imprt[m + nclass*mdim] += ((double)(nrightall - nrightimpall)) / noutall; impsd[m + nclass*mdim] += ((double) (nrightall - nrightimpall) * (nrightall - nrightimpall)) / noutall; } } } } /* DO PROXIMITIES */ if (iprox) { computeProximity(prox, oobprox, nodex, jin, oobpair, near); /* proximity for test data */ if (*testdat) { computeProximity(proxts, 0, nodexts, jin, oobpair, ntest); /* Compute proximity between testset and training set. */ for (n = 0; n < ntest; ++n) { for (k = 0; k < near; ++k) { if (nodexts[n] == nodex[k]) proxts[n + ntest * (k+ntest)] += 1.0; } } } } if (keepf) idxByNnode += *nrnodes; if (keepInbag) idxByNsample += nsample0; } PutRNGstate(); /* Final processing of variable importance. */ for (m = 0; m < mdim; m++) tgini[m] /= Ntree; if (imp) { for (m = 0; m < mdim; ++m) { if (localImp) { /* casewise measures */ for (n = 0; n < nsample; ++n) impmat[m + n*mdim] /= out[n]; } /* class-specific measures */ for (k = 0; k < nclass; ++k) { av = imprt[m + k*mdim] / Ntree; impsd[m + k*mdim] = sqrt(((impsd[m + k*mdim] / Ntree) - av*av) / Ntree); imprt[m + k*mdim] = av; /* imprt[m + k*mdim] = (se <= 0.0) ? -1000.0 - av : av / se; */ } /* overall measures */ av = imprt[m + nclass*mdim] / Ntree; impsd[m + nclass*mdim] = sqrt(((impsd[m + nclass*mdim] / Ntree) - av*av) / Ntree); imprt[m + nclass*mdim] = av; imprt[m + (nclass+1)*mdim] = tgini[m]; } } else { for (m = 0; m < mdim; ++m) imprt[m] = tgini[m]; } /* PROXIMITY DATA ++++++++++++++++++++++++++++++++*/ if (iprox) { for (n = 0; n < near; ++n) { for (k = n + 1; k < near; ++k) { prox[near*k + n] /= oobprox ? (oobpair[near*k + n] > 0 ? oobpair[near*k + n] : 1) : Ntree; prox[near*n + k] = prox[near*k + n]; } prox[near*n + n] = 1.0; } if (*testdat) { for (n = 0; n < ntest; ++n) for (k = 0; k < ntest + nsample; ++k) proxts[ntest*k + n] /= Ntree; } } if (trace <= Ntree){ printf("\nmdim %d, nclass %d, nrnodes %d, nsample %d, ntest %d\n", mdim, nclass, *nrnodes, nsample, ntest); printf("\noobprox %d, mdim %d, nsample0 %d, Ntree %d, mtry %d, mimp %d", oobprox, mdim, nsample0, Ntree, mtry, mimp); printf("\nstratify %d, replace %d",stratify,replace); printf("\n"); } //frees up the memory free(tgini);free(wl);free(wr);free(classpop);free(tclasscat); free(tclasspop);free(tx);free(win);free(tp);free(out); free(bestsplitnext);free(bestsplit);free(nodepop);free(nodestart);free(jin); free(nodex);free(nodexts);free(ta);free(ncase);free(jerr); free(varUsed);free(jtr);free(jvr);free(classFreq);free(jts); free(idmove);free(at);free(a);free(b);free(mind); free(nright);free(nrightimp);free(nout); if (oobprox) { free(oobpair); } if (stratify) { free(strata_size); for (n = 0; n < nstrata; ++n) { free(strata_idx[n]); } free(strata_idx); } else { if (replace) free(nind); } //printf("labelts %d\n",labelts);fflush(stdout); if (labelts) { free(nclts); } //printf("stratify %d",stratify);fflush(stdout); }
int main (int argc, const char *argv[]) { /* file buffers */ int fp; char globalHeaderContent[sizeof(pcap_hdr_t)]; char recordHeaderContent[sizeof(pcaprec_hdr_t)]; char buffer[READ_BUFFER_SIZE]; /* headers, headers, headers... */ pcap_hdr_t* globalHeader = (pcap_hdr_t*) globalHeaderContent; pcaprec_hdr_t* recordHeader = (pcaprec_hdr_t*) recordHeaderContent; ether_header_t* etherHeader; ipv4_header_t* ipv4Header; udp_header_t* udpHeader; /* record count */ unsigned int recordCount = 0; /* checking file input */ if (argc<1) { fprintf(stderr, "Error: No file input.\n"); return 1; } /* file opening */ fp = open(argv[1], O_RDONLY); if (fp == -1) { fprintf(stderr, "Error: Cannot open file.\n"); return 2; } /* applying global header data structure */ if (!fetchGlobalHeader(fp, globalHeader)) { fprintf(stderr, "Error: File format not correct\n"); return 4; } /* printing global header information */ printf ("ver=%d.%d snaplen=%d network=%d\n", swapShort(globalHeader->version_major), swapShort(globalHeader->version_minor), swapInt(globalHeader->snaplen), swapInt(globalHeader->network)); /* per-packet information */ while(fetchRecordHeader(fp, recordHeader)) { recordCount += 1; /* read file */ read (fp, buffer, swapInt(recordHeader->incl_len)); /* applying data structures */ etherHeader = (ether_header_t *) (buffer); ipv4Header = (ipv4_header_t *) (buffer +sizeof(ether_header_t)); /* printing * 1234567890.098765 100/100 123.45.67.89 -> 98.76.54.32 (17) sport=12345 dport=9876 */ printf ("%u.%06u %u/%u", swapInt (recordHeader->ts_sec), swapInt (recordHeader->ts_usec), swapInt (recordHeader->incl_len), swapInt (recordHeader->orig_len) ); printf (" %s", inet_ntoa (ipv4Header->ip_src)); printf (" -> %s", inet_ntoa (ipv4Header->ip_dst)); printf (" (%u)", ipv4Header->ip_p); /* UDP-specific information */ if (ipv4Header->ip_p == 17) { /* applying UDP header data structure */ udpHeader = (udp_header_t *) (buffer + sizeof (ether_header_t) + sizeof (ipv4_header_t)); /* printing */ printf(" sport=%u dport=%u", ntohs (udpHeader->port_src), ntohs (udpHeader->port_dst) ); } /* new line */ printf("\n"); } printf ("total %d packets read\n", recordCount); close(fp); return 0; }
void regRF(double *x, double *y, int *xdim, int *sampsize, int *nthsize, int *nrnodes, int *nTree, int *mtry, int *imp, int *cat, int *maxcat, int *jprint, int *doProx, int *oobprox, int *biasCorr, double *yptr, double *errimp, double *impmat, double *impSD, double *prox, int *treeSize, int *nodestatus, int *lDaughter, int *rDaughter, double *avnode, int *mbest, double *upper, double *mse, int *keepf, int *replace, int *testdat, double *xts, int *nts, double *yts, int *labelts, double *yTestPred, double *proxts, double *msets, double *coef, int *nout, int *inbag) { /************************************************************************* Input: mdim=number of variables in data set nsample=number of cases nthsize=number of cases in a node below which the tree will not split, setting nthsize=5 generally gives good results. nTree=number of trees in run. 200-500 gives pretty good results mtry=number of variables to pick to split on at each node. mdim/3 seems to give genrally good performance, but it can be altered up or down imp=1 turns on variable importance. This is computed for the mth variable as the percent rise in the test set mean sum-of- squared errors when the mth variable is randomly permuted. *************************************************************************/ double errts = 0.0, averrb, meanY, meanYts, varY, varYts, r, xrand, errb = 0.0, resid=0.0, ooberr, ooberrperm, delta, *resOOB; double *yb, *xtmp, *xb, *ytr, *ytree, *tgini, *coeffs; int k, m, mr, n, nOOB, j, jout, idx, ntest, last, ktmp, nPerm, nsample, mdim, keepF, keepInbag; int *oobpair, varImp, localImp, *varUsed; int *in, *nind, *nodex, *nodexts, *probs; nsample = xdim[0]; mdim = xdim[1]; ntest = *nts; varImp = imp[0]; localImp = imp[1]; nPerm = imp[2]; keepF = keepf[0]; keepInbag = keepf[1]; if (*jprint == 0) *jprint = *nTree + 1; yb = (double *) S_alloc(*sampsize, sizeof(double)); xb = (double *) S_alloc(mdim * *sampsize, sizeof(double)); ytr = (double *) S_alloc(nsample, sizeof(double)); xtmp = (double *) S_alloc(nsample, sizeof(double)); resOOB = (double *) S_alloc(nsample, sizeof(double)); coeffs = (double *) S_alloc(*sampsize, sizeof(double)); probs = (int *) S_alloc(*sampsize, sizeof(int)); in = (int *) S_alloc(nsample, sizeof(int)); nodex = (int *) S_alloc(nsample, sizeof(int)); varUsed = (int *) S_alloc(mdim, sizeof(int)); nind = *replace ? NULL : (int *) S_alloc(nsample, sizeof(int)); if (*testdat) { ytree = (double *) S_alloc(ntest, sizeof(double)); nodexts = (int *) S_alloc(ntest, sizeof(int)); } oobpair = (*doProx && *oobprox) ? (int *) S_alloc(nsample * nsample, sizeof(int)) : NULL; /* If variable importance is requested, tgini points to the second "column" of errimp, otherwise it's just the same as errimp. */ tgini = varImp ? errimp + mdim : errimp; averrb = 0.0; meanY = 0.0; varY = 0.0; zeroDouble(yptr, nsample); zeroInt(nout, nsample); for (n = 0; n < nsample; ++n) { varY += n * (y[n] - meanY)*(y[n] - meanY) / (n + 1); meanY = (n * meanY + y[n]) / (n + 1); } varY /= nsample; varYts = 0.0; meanYts = 0.0; if (*testdat) { for (n = 0; n < ntest; ++n) { varYts += n * (yts[n] - meanYts)*(yts[n] - meanYts) / (n + 1); meanYts = (n * meanYts + yts[n]) / (n + 1); } varYts /= ntest; } if (*doProx) { zeroDouble(prox, nsample * nsample); if (*testdat) zeroDouble(proxts, ntest * (nsample + ntest)); } if (varImp) { zeroDouble(errimp, mdim * 2); if (localImp) zeroDouble(impmat, nsample * mdim); } else { zeroDouble(errimp, mdim); } if (*labelts) zeroDouble(yTestPred, ntest); /* print header for running output */ if (*jprint <= *nTree) { Rprintf(" | Out-of-bag "); if (*testdat) Rprintf("| Test set "); Rprintf("|\n"); Rprintf("Tree | MSE %%Var(y) "); if (*testdat) Rprintf("| MSE %%Var(y) "); Rprintf("|\n"); } GetRNGstate(); /************************************* * Start the loop over trees. *************************************/ for (j = 0; j < *nTree; ++j) { /* multinomial */ /*unsigned int coeffs[*sampsize];*/ /* for loop implementation */ /*double probs[*sampsize];*/ for (k = 0; k < *sampsize; ++k) { probs[k] = 1/(*sampsize); } ran_multinomial(*sampsize,100,probs,coeffs); idx = keepF ? j * *nrnodes : 0; zeroInt(in, nsample); zeroInt(varUsed, mdim); /* Draw a random sample for growing a tree. */ if (*replace) { /* sampling with replacement */ for (n = 0; n < *sampsize; ++n) { xrand = unif_rand(); k = xrand * nsample; in[k] = 1; yb[n] = y[k]; for(m = 0; m < mdim; ++m) { xb[m + n * mdim] = x[m + k * mdim]; } } } else { /* sampling w/o replacement */ for (n = 0; n < nsample; ++n) nind[n] = n; last = nsample - 1; for (n = 0; n < *sampsize; ++n) { ktmp = (int) (unif_rand() * (last+1)); k = nind[ktmp]; swapInt(nind[ktmp], nind[last]); last--; in[k] = 1; yb[n] = y[k]; for(m = 0; m < mdim; ++m) { xb[m + n * mdim] = x[m + k * mdim]; } } } if (keepInbag) { for (n = 0; n < nsample; ++n) inbag[n + j * nsample] = in[n]; } /* grow the regression tree */ regTree(xb, yb, mdim, *sampsize, lDaughter + idx, rDaughter + idx, upper + idx, avnode + idx, nodestatus + idx, *nrnodes, treeSize + j, *nthsize, *mtry, mbest + idx, cat, tgini, varUsed, coeffs); /* predict the OOB data with the current tree */ /* ytr is the prediction on OOB data by the current tree */ predictRegTree(x, nsample, mdim, lDaughter + idx, rDaughter + idx, nodestatus + idx, ytr, upper + idx, avnode + idx, mbest + idx, treeSize[j], cat, *maxcat, nodex); /* yptr is the aggregated prediction by all trees grown so far */ errb = 0.0; ooberr = 0.0; jout = 0; /* jout is the number of cases that has been OOB so far */ nOOB = 0; /* nOOB is the number of OOB samples for this tree */ for (n = 0; n < nsample; ++n) { if (in[n] == 0) { nout[n]++; nOOB++; yptr[n] = ((nout[n]-1) * yptr[n] + ytr[n]) / nout[n]; resOOB[n] = ytr[n] - y[n]; ooberr += resOOB[n] * resOOB[n]; } if (nout[n]) { jout++; errb += (y[n] - yptr[n]) * (y[n] - yptr[n]); } } errb /= jout; /* Do simple linear regression of y on yhat for bias correction. */ if (*biasCorr) simpleLinReg(nsample, yptr, y, coef, &errb, nout); /* predict testset data with the current tree */ if (*testdat) { predictRegTree(xts, ntest, mdim, lDaughter + idx, rDaughter + idx, nodestatus + idx, ytree, upper + idx, avnode + idx, mbest + idx, treeSize[j], cat, *maxcat, nodexts); /* ytree is the prediction for test data by the current tree */ /* yTestPred is the average prediction by all trees grown so far */ errts = 0.0; for (n = 0; n < ntest; ++n) { yTestPred[n] = (j * yTestPred[n] + ytree[n]) / (j + 1); } /* compute testset MSE */ if (*labelts) { for (n = 0; n < ntest; ++n) { resid = *biasCorr ? yts[n] - (coef[0] + coef[1]*yTestPred[n]) : yts[n] - yTestPred[n]; errts += resid * resid; } errts /= ntest; } } /* Print running output. */ if ((j + 1) % *jprint == 0) { Rprintf("%4d |", j + 1); Rprintf(" %8.4g %8.2f ", errb, 100 * errb / varY); if(*labelts == 1) Rprintf("| %8.4g %8.2f ", errts, 100.0 * errts / varYts); Rprintf("|\n"); } mse[j] = errb; if (*labelts) msets[j] = errts; /* DO PROXIMITIES */ if (*doProx) { computeProximity(prox, *oobprox, nodex, in, oobpair, nsample); /* proximity for test data */ if (*testdat) { /* In the next call, in and oobpair are not used. */ computeProximity(proxts, 0, nodexts, in, oobpair, ntest); for (n = 0; n < ntest; ++n) { for (k = 0; k < nsample; ++k) { if (nodexts[n] == nodex[k]) { proxts[n + ntest * (k+ntest)] += 1.0; } } } } } /* Variable importance */ if (varImp) { for (mr = 0; mr < mdim; ++mr) { if (varUsed[mr]) { /* Go ahead if the variable is used */ /* make a copy of the m-th variable into xtmp */ for (n = 0; n < nsample; ++n) xtmp[n] = x[mr + n * mdim]; ooberrperm = 0.0; for (k = 0; k < nPerm; ++k) { permuteOOB(mr, x, in, nsample, mdim); predictRegTree(x, nsample, mdim, lDaughter + idx, rDaughter + idx, nodestatus + idx, ytr, upper + idx, avnode + idx, mbest + idx, treeSize[j], cat, *maxcat, nodex); for (n = 0; n < nsample; ++n) { if (in[n] == 0) { r = ytr[n] - y[n]; ooberrperm += r * r; if (localImp) { impmat[mr + n * mdim] += (r*r - resOOB[n]*resOOB[n]) / nPerm; } } } } delta = (ooberrperm / nPerm - ooberr) / nOOB; errimp[mr] += delta; impSD[mr] += delta * delta; /* copy original data back */ for (n = 0; n < nsample; ++n) x[mr + n * mdim] = xtmp[n]; } } } } PutRNGstate(); /* end of tree iterations=======================================*/ if (*biasCorr) { /* bias correction for predicted values */ for (n = 0; n < nsample; ++n) { if (nout[n]) yptr[n] = coef[0] + coef[1] * yptr[n]; } if (*testdat) { for (n = 0; n < ntest; ++n) { yTestPred[n] = coef[0] + coef[1] * yTestPred[n]; } } } if (*doProx) { for (n = 0; n < nsample; ++n) { for (k = n + 1; k < nsample; ++k) { prox[nsample*k + n] /= *oobprox ? (oobpair[nsample*k + n] > 0 ? oobpair[nsample*k + n] : 1) : *nTree; prox[nsample * n + k] = prox[nsample * k + n]; } prox[nsample * n + n] = 1.0; } if (*testdat) { for (n = 0; n < ntest; ++n) for (k = 0; k < ntest + nsample; ++k) proxts[ntest*k + n] /= *nTree; } } if (varImp) { for (m = 0; m < mdim; ++m) { errimp[m] = errimp[m] / *nTree; impSD[m] = sqrt( ((impSD[m] / *nTree) - (errimp[m] * errimp[m])) / *nTree ); if (localImp) { for (n = 0; n < nsample; ++n) { impmat[m + n * mdim] /= nout[n]; } } } } for (m = 0; m < mdim; ++m) tgini[m] /= *nTree; }
int LPAFreadImageAnswer(LPAF *lpaf, int current) { char *fullname, fname[100] ; FILE *fp ; IMAGE Iheader ; int i, ecode, frame, current_frame ; LP_BOX *lpb ; struct extpar *xp ; #ifdef _MSDOS long *parms ; #else int *parms ; #endif fullname = lpaf->filelist[current] ; ImageUnpackFileName(fullname, ¤t_frame, &i, fname) ; fp = fopen(fname, "rb") ; if (!fp) ErrorReturn(-1,(ERROR_NO_FILE,"LPAFreadImageAnswer(%d): could not open %s", current, fname)) ; ecode = fread_header(fp, &Iheader, fname) ; fclose(fp) ; if (ecode) ErrorReturn(-2, (ERROR_BADFILE, "LPAFreadImageAnswer(%s): could not read header",fname)); if (Iheader.numparam < Iheader.num_frame) return(0) ; /* read answer from header */ #if 0 fprintf(stderr, "reading lp values from %dth entry in image file\n", current_frame); #endif lpb = &lpaf->coords[current] ; for (frame = 0, xp = Iheader.params ; xp ; xp = xp->nextp) if (frame++ == current_frame) break ; /* if hips file created on Sun, then the parameters are actually longs. */ #ifndef _MSDOS parms = xp->val.v_pi ; #else parms = (long *)xp->val.v_pi ; #endif #ifndef _MSDOS if (parms[0] < 0 || parms[0] >= Iheader.cols) { parms[0] = swapInt(parms[0]) ; parms[1] = swapInt(parms[1]) ; for (i = 0 ; i < NPOINTS ; i++) { parms[2+2*i] = swapInt(parms[2*i]) ; parms[2+2*i+1] = swapInt(parms[2*i+1]) ; } } #else if (parms[0] < 0 || parms[0] >= (long)Iheader.cols) { parms[0] = swapLong(parms[0]) ; parms[1] = swapLong(parms[1]) ; for (i = 0 ; i < NPOINTS ; i++) { parms[2+2*i] = swapLong(parms[2*i]) ; parms[2+2*i+1] = swapLong(parms[2*i+1]) ; } } #endif if ((int)parms[0] == INIT_VAL) /* not yet written with real value */ return(0) ; lpb->xc = (int)parms[0] ; lpb->yc = (int)parms[1] ; for (i = 0 ; i < NPOINTS ; i++) { lpb->xp[i] = (int)parms[2+2*i] ; lpb->yp[i] = (int)parms[2+2*i+1] ; } if (lpb->xc < 0 || lpb->xc >= Iheader.cols || lpb->yc < 0 || lpb->xc >= Iheader.rows ) return(0) ; return(1) ; }
/* returnType functionName(auguments) */ void strangeFunc(int a[], int size) { //«Å§i»P©wžq(Declaration & Definition) //let p, q be the first, last position of array int * p = &a[0], * q = &a[size - 1]; //¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð //while p's position is before q's while(p < q) { if(*p < 0) { //if *p && *q < 0 if(*q < 0) { //swap swapInt(p, q); //move both p++; q--; //continue continue; } else//if only *p < 0 { //move q q--; //continue continue; } } else//if !(*p < 0) { //if only *q < 0 if(*q < 0) { //move p p++; //continue continue; } else//if all not < 0 { //move both p++; q--; //continue continue; } } } //¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð¡Ð //¶ÇŠ^€º®e return ; }
void regRF(double *x, double *y, int *xdim, int *sampsize, int *nthsize, int *nrnodes, int *nTree, int *mtry, int *imp, int *cat, int maxcat, int *jprint, int doProx, int oobprox, int biasCorr, double *yptr, double *errimp, double *impmat, double *impSD, double *prox, int *treeSize, SMALL_INT *nodestatus, int *lDaughter, int *rDaughter, double *avnode, int *mbest, double *upper, double *mse, const int *keepf, int *replace, int testdat, double *xts, int *nts, double *yts, int labelts, double *yTestPred, double *proxts, double *msets, double *coef, int *nout, int *inbag) { /************************************************************************* * Input: * mdim=number of variables in data set * nsample=number of cases * * nthsize=number of cases in a node below which the tree will not split, * setting nthsize=5 generally gives good results. * * nTree=number of trees in run. 200-500 gives pretty good results * * mtry=number of variables to pick to split on at each node. mdim/3 * seems to give genrally good performance, but it can be * altered up or down * * imp=1 turns on variable importance. This is computed for the * mth variable as the percent rise in the test set mean sum-of- * squared errors when the mth variable is randomly permuted. * *************************************************************************/ //PRINTF( "*jprint: %d\n", *jprint ); //mexEvalString( "pause(0.0001)" ); double errts = 0.0, averrb, meanY, meanYts, varY, varYts, r, xrand, errb = 0.0, resid=0.0, ooberr, ooberrperm, delta, *resOOB; double *yb, *xtmp, *xb, *ytr, *ytree = NULL, *tgini; int k, m, mr, n, nOOB, j, jout, idx, ntest, last, ktmp, nPerm, nsample, mdim, keepF, keepInbag; int *oobpair, varImp, localImp, *varUsed; int *in, *nind, *nodex, *nodexts = NULL; //Abhi:temp variable double tmp_d = 0; int tmp_i; SMALL_INT tmp_c; //Do initialization for COKUS's Random generator seedMT(2*rand()+1); //works well with odd number so why don't use that nsample = xdim[0]; mdim = xdim[1]; ntest = *nts; varImp = imp[0]; localImp = imp[1]; nPerm = imp[2]; //PRINTF("nPerm %d\n",nPerm); keepF = keepf[0]; keepInbag = keepf[1]; if (*jprint == 0) *jprint = *nTree + 1; yb = (double *) calloc(*sampsize, sizeof(double)); xb = (double *) calloc(mdim * *sampsize, sizeof(double)); ytr = (double *) calloc(nsample, sizeof(double)); xtmp = (double *) calloc(nsample, sizeof(double)); resOOB = (double *) calloc(nsample, sizeof(double)); in = (int *) calloc(nsample, sizeof(int)); nodex = (int *) calloc(nsample, sizeof(int)); varUsed = (int *) calloc(mdim, sizeof(int)); nind = *replace ? NULL : (int *) calloc(nsample, sizeof(int)); if (testdat) { ytree = (double *) calloc(ntest, sizeof(double)); nodexts = (int *) calloc(ntest, sizeof(int)); } oobpair = (doProx && oobprox) ? (int *) calloc(nsample * nsample, sizeof(int)) : NULL; /* If variable importance is requested, tgini points to the second "column" of errimp, otherwise it's just the same as errimp. */ tgini = varImp ? errimp + mdim : errimp; averrb = 0.0; meanY = 0.0; varY = 0.0; zeroDouble(yptr, nsample); zeroInt(nout, nsample); for (n = 0; n < nsample; ++n) { varY += n * (y[n] - meanY)*(y[n] - meanY) / (n + 1); meanY = (n * meanY + y[n]) / (n + 1); } varY /= nsample; varYts = 0.0; meanYts = 0.0; if (testdat) { for (n = 0; n < ntest; ++n) { varYts += n * (yts[n] - meanYts)*(yts[n] - meanYts) / (n + 1); meanYts = (n * meanYts + yts[n]) / (n + 1); } varYts /= ntest; } if (doProx) { zeroDouble(prox, nsample * nsample); if (testdat) zeroDouble(proxts, ntest * (nsample + ntest)); } if (varImp) { zeroDouble(errimp, mdim * 2); if (localImp) zeroDouble(impmat, nsample * mdim); } else { zeroDouble(errimp, mdim); } if (labelts) zeroDouble(yTestPred, ntest); /* print header for running output */ if (*jprint <= *nTree) { PRINTF(" | Out-of-bag "); if (testdat) PRINTF("| Test set "); PRINTF("|\n"); PRINTF("Tree | MSE %%Var(y) "); if (testdat) PRINTF("| MSE %%Var(y) "); PRINTF("|\n"); // mexEvalString( "pause(0.001)" ); } GetRNGstate(); /************************************* * Start the loop over trees. *************************************/ for (j = 0; j < *nTree; ++j) { //PRINTF("tree num %d\n",j);fflush(stdout); //PRINTF("1. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d\n", *maxcat, *jprint, doProx, oobprox, biasCorr); idx = keepF ? j * *nrnodes : 0; zeroInt(in, nsample); zeroInt(varUsed, mdim); /* Draw a random sample for growing a tree. */ // PRINTF("1.8. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); if (*replace) { /* sampling with replacement */ for (n = 0; n < *sampsize; ++n) { xrand = unif_rand(); k = (int)(xrand * nsample); in[k] = 1; yb[n] = y[k]; for(m = 0; m < mdim; ++m) { xb[m + n * mdim] = x[m + k * mdim]; } } } else { /* sampling w/o replacement */ for (n = 0; n < nsample; ++n) nind[n] = n; last = nsample - 1; for (n = 0; n < *sampsize; ++n) { ktmp = (int) (unif_rand() * (last+1)); k = nind[ktmp]; swapInt(nind[ktmp], nind[last]); last--; in[k] = 1; yb[n] = y[k]; for(m = 0; m < mdim; ++m) { xb[m + n * mdim] = x[m + k * mdim]; } } } if (keepInbag) { for (n = 0; n < nsample; ++n) inbag[n + j * nsample] = in[n]; } // PRINTF("1.9. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); /* grow the regression tree */ regTree(xb, yb, mdim, *sampsize, lDaughter + idx, rDaughter + idx, upper + idx, avnode + idx, nodestatus + idx, *nrnodes, treeSize + j, *nthsize, *mtry, mbest + idx, cat, tgini, varUsed); /* predict the OOB data with the current tree */ /* ytr is the prediction on OOB data by the current tree */ // PRINTF("2. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); predictRegTree(x, nsample, mdim, lDaughter + idx, rDaughter + idx, nodestatus + idx, ytr, upper + idx, avnode + idx, mbest + idx, treeSize[j], cat, maxcat, nodex); /* yptr is the aggregated prediction by all trees grown so far */ errb = 0.0; ooberr = 0.0; jout = 0; /* jout is the number of cases that has been OOB so far */ nOOB = 0; /* nOOB is the number of OOB samples for this tree */ for (n = 0; n < nsample; ++n) { if (in[n] == 0) { nout[n]++; nOOB++; yptr[n] = ((nout[n]-1) * yptr[n] + ytr[n]) / nout[n]; resOOB[n] = ytr[n] - y[n]; ooberr += resOOB[n] * resOOB[n]; } if (nout[n]) { jout++; errb += (y[n] - yptr[n]) * (y[n] - yptr[n]); } } errb /= jout; /* Do simple linear regression of y on yhat for bias correction. */ if (biasCorr) simpleLinReg(nsample, yptr, y, coef, &errb, nout); //PRINTF("2.5.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d\n", maxcat, *jprint, doProx, oobprox, biasCorr); /* predict testset data with the current tree */ if (testdat) { predictRegTree(xts, ntest, mdim, lDaughter + idx, rDaughter + idx, nodestatus + idx, ytree, upper + idx, avnode + idx, mbest + idx, treeSize[j], cat, maxcat, nodexts); /* ytree is the prediction for test data by the current tree */ /* yTestPred is the average prediction by all trees grown so far */ errts = 0.0; for (n = 0; n < ntest; ++n) { yTestPred[n] = (j * yTestPred[n] + ytree[n]) / (j + 1); } /* compute testset MSE */ if (labelts) { for (n = 0; n < ntest; ++n) { resid = biasCorr ? yts[n] - (coef[0] + coef[1]*yTestPred[n]) : yts[n] - yTestPred[n]; errts += resid * resid; } errts /= ntest; } } //PRINTF("2.6.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d, testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); /* Print running output. */ if ((j + 1) % *jprint == 0) { PRINTF("%4d |", j + 1); PRINTF(" %8.4g %8.2f ", errb, 100 * errb / varY); if(labelts == 1) PRINTF("| %8.4g %8.2f ", errts, 100.0 * errts / varYts); PRINTF("|\n"); fflush(stdout); // mexEvalString("pause(.001);"); // to dump string. } //PRINTF("2.7.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d, testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); mse[j] = errb; if (labelts) msets[j] = errts; //PRINTF("2.701 j %d, nTree %d, errts %f errb %f \n", j, *nTree, errts,errb); //PRINTF("2.71.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d, testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); /* DO PROXIMITIES */ if (doProx) { computeProximity(prox, oobprox, nodex, in, oobpair, nsample); /* proximity for test data */ if (testdat) { /* In the next call, in and oobpair are not used. */ computeProximity(proxts, 0, nodexts, in, oobpair, ntest); for (n = 0; n < ntest; ++n) { for (k = 0; k < nsample; ++k) { if (nodexts[n] == nodex[k]) { proxts[n + ntest * (k+ntest)] += 1.0; } } } } } //PRINTF("2.8.maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d, testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); /* Variable importance */ if (varImp) { for (mr = 0; mr < mdim; ++mr) { if (varUsed[mr]) { /* Go ahead if the variable is used */ /* make a copy of the m-th variable into xtmp */ for (n = 0; n < nsample; ++n) xtmp[n] = x[mr + n * mdim]; ooberrperm = 0.0; for (k = 0; k < nPerm; ++k) { permuteOOB(mr, x, in, nsample, mdim); predictRegTree(x, nsample, mdim, lDaughter + idx, rDaughter + idx, nodestatus + idx, ytr, upper + idx, avnode + idx, mbest + idx, treeSize[j], cat, maxcat, nodex); for (n = 0; n < nsample; ++n) { if (in[n] == 0) { r = ytr[n] - y[n]; ooberrperm += r * r; if (localImp) { impmat[mr + n * mdim] += (r*r - resOOB[n]*resOOB[n]) / nPerm; } } } } delta = (ooberrperm / nPerm - ooberr) / nOOB; errimp[mr] += delta; impSD[mr] += delta * delta; /* copy original data back */ for (n = 0; n < nsample; ++n) x[mr + n * mdim] = xtmp[n]; } } } // PRINTF("3. maxcat %d, jprint %d, doProx %d, oobProx %d, biasCorr %d testdat %d\n", maxcat, *jprint, doProx, oobprox, biasCorr,testdat); } PutRNGstate(); /* end of tree iterations=======================================*/ if (biasCorr) { /* bias correction for predicted values */ for (n = 0; n < nsample; ++n) { if (nout[n]) yptr[n] = coef[0] + coef[1] * yptr[n]; } if (testdat) { for (n = 0; n < ntest; ++n) { yTestPred[n] = coef[0] + coef[1] * yTestPred[n]; } } } if (doProx) { for (n = 0; n < nsample; ++n) { for (k = n + 1; k < nsample; ++k) { prox[nsample*k + n] /= oobprox ? (oobpair[nsample*k + n] > 0 ? oobpair[nsample*k + n] : 1) : *nTree; prox[nsample * n + k] = prox[nsample * k + n]; } prox[nsample * n + n] = 1.0; } if (testdat) { for (n = 0; n < ntest; ++n) for (k = 0; k < ntest + nsample; ++k) proxts[ntest*k + n] /= *nTree; } } if (varImp) { for (m = 0; m < mdim; ++m) { errimp[m] = errimp[m] / *nTree; impSD[m] = sqrt( ((impSD[m] / *nTree) - (errimp[m] * errimp[m])) / *nTree ); if (localImp) { for (n = 0; n < nsample; ++n) { impmat[m + n * mdim] /= nout[n]; } } } } for (m = 0; m < mdim; ++m) tgini[m] /= *nTree; //addition by abhi //in order to release the space stored by the variable in findBestSplit // call by setting in_findBestSplit=-99; findBestSplit(&tmp_d, &tmp_i, &tmp_d, tmp_i, tmp_i, tmp_i, tmp_i, &tmp_i, &tmp_d, &tmp_d, &tmp_i, &tmp_i, tmp_i, tmp_d, tmp_i, &tmp_i); //do the same freeing of space by calling with -99 in_regTree=-99; regTree(&tmp_d, &tmp_d, tmp_i, tmp_i, &tmp_i, &tmp_i, &tmp_d, &tmp_d, &tmp_c, tmp_i, &tmp_i, tmp_i, tmp_i, &tmp_i, &tmp_i, &tmp_d, &tmp_i); free(yb); free(xb); free(ytr); free(xtmp); free(resOOB); free(in); free(nodex); free(varUsed); if (!(*replace) ) free(nind); if (testdat) { free(ytree); free(nodexts); } if (doProx && oobprox) free(oobpair) ; }
void findBestSplit(double *x, int *jdex, double *y, int mdim, int nsample, int ndstart, int ndend, int *msplit, double *decsplit, double *ubest, int *ndendl, int *jstat, int mtry, double sumnode, int nodecnt, int *cat) { int last, ncat[32], icat[32], lc, nl, nr, npopl, npopr; int i, j, kv, l; static int *mind, *ncase; static double *xt, *ut, *v, *yl; double sumcat[32], avcat[32], tavcat[32], ubestt; double crit, critmax, critvar, suml, sumr, d, critParent; if (in_findBestSplit==-99){ free(ncase); free(mind); //had to remove this so that it wont crash for when mdim=0, strangely happened for replace=0 free(v); free(yl); free(xt); free(ut); // PRINTF("giving up mem in findBestSplit\n"); return; } if (in_findBestSplit==0){ in_findBestSplit=1; ut = (double *) calloc(nsample, sizeof(double)); xt = (double *) calloc(nsample, sizeof(double)); v = (double *) calloc(nsample, sizeof(double)); yl = (double *) calloc(nsample, sizeof(double)); mind = (int *) calloc(mdim+1, sizeof(int)); //seems that the sometimes i am asking for kv[10] and that causes problesmms //so allocate 1 more. helps with not crashing in windows ncase = (int *) calloc(nsample, sizeof(int)); } zeroDouble(ut, nsample); zeroDouble(xt, nsample); zeroDouble(v, nsample); zeroDouble(yl, nsample); zeroInt(mind, mdim); zeroInt(ncase, nsample); zeroDouble(avcat, 32); zeroDouble(tavcat, 32); /* START BIG LOOP */ *msplit = -1; *decsplit = 0.0; critmax = 0.0; ubestt = 0.0; for (i=0; i < mdim; ++i) mind[i] = i; last = mdim - 1; for (i = 0; i < mtry; ++i) { critvar = 0.0; j = (int) (unif_rand() * (last+1)); //PRINTF("j=%d, last=%d mind[j]=%d\n", j, last, mind[j]);fflush(stdout); kv = mind[j]; //if(kv>100){ // 1; // getchar(); //} swapInt(mind[j], mind[last]); /* mind[j] = mind[last]; * mind[last] = kv; */ last--; lc = cat[kv]; if (lc == 1) { /* numeric variable */ for (j = ndstart; j <= ndend; ++j) { xt[j] = x[kv + (jdex[j] - 1) * mdim]; yl[j] = y[jdex[j] - 1]; } } else { /* categorical variable */ zeroInt(ncat, 32); zeroDouble(sumcat, 32); for (j = ndstart; j <= ndend; ++j) { l = (int) x[kv + (jdex[j] - 1) * mdim]; sumcat[l - 1] += y[jdex[j] - 1]; ncat[l - 1] ++; } /* Compute means of Y by category. */ for (j = 0; j < lc; ++j) { avcat[j] = ncat[j] ? sumcat[j] / ncat[j] : 0.0; } /* Make the category mean the `pseudo' X data. */ for (j = 0; j < nsample; ++j) { xt[j] = avcat[(int) x[kv + (jdex[j] - 1) * mdim] - 1]; yl[j] = y[jdex[j] - 1]; } } /* copy the x data in this node. */ for (j = ndstart; j <= ndend; ++j) v[j] = xt[j]; for (j = 1; j <= nsample; ++j) ncase[j - 1] = j; R_qsort_I(v, ncase, ndstart + 1, ndend + 1); if (v[ndstart] >= v[ndend]) continue; /* ncase(n)=case number of v nth from bottom */ /* Start from the right and search to the left. */ critParent = sumnode * sumnode / nodecnt; suml = 0.0; sumr = sumnode; npopl = 0; npopr = nodecnt; crit = 0.0; /* Search through the "gaps" in the x-variable. */ for (j = ndstart; j <= ndend - 1; ++j) { d = yl[ncase[j] - 1]; suml += d; sumr -= d; npopl++; npopr--; if (v[j] < v[j+1]) { crit = (suml * suml / npopl) + (sumr * sumr / npopr) - critParent; if (crit > critvar) { ubestt = (v[j] + v[j+1]) / 2.0; critvar = crit; } } } if (critvar > critmax) { *ubest = ubestt; *msplit = kv + 1; critmax = critvar; for (j = ndstart; j <= ndend; ++j) { ut[j] = xt[j]; } if (cat[kv] > 1) { for (j = 0; j < cat[kv]; ++j) tavcat[j] = avcat[j]; } } } *decsplit = critmax; /* If best split can not be found, set to terminal node and return. */ if (*msplit != -1) { nl = ndstart; for (j = ndstart; j <= ndend; ++j) { if (ut[j] <= *ubest) { nl++; ncase[nl-1] = jdex[j]; } } *ndendl = imax2(nl - 1, ndstart); nr = *ndendl + 1; for (j = ndstart; j <= ndend; ++j) { if (ut[j] > *ubest) { if (nr >= nsample) break; nr++; ncase[nr - 1] = jdex[j]; } } if (*ndendl >= ndend) *ndendl = ndend - 1; for (j = ndstart; j <= ndend; ++j) jdex[j] = ncase[j]; lc = cat[*msplit - 1]; if (lc > 1) { for (j = 0; j < lc; ++j) { icat[j] = (tavcat[j] < *ubest) ? 1 : 0; } *ubest = pack(lc, icat); } } else *jstat = 1; }