void sort(int a[],int s,int t) { if(s>=t) return; int l=psort(a,s,t); sort(a,s,l-1); sort(a,l+1,t); }
double sph_density(HBTReal cen[3],HBTReal *p2hguess,HBTInt *PIndex,HBTReal PPos[][3]) { HBTReal hguess; HBTInt i, n; double h, hinv3, wk, u, r, rho; //~ clock_t T[10]; HBTInt numngb; NgbNMax=NgbNMax0; NgbR2=mymalloc(sizeof(HBTReal)*NgbNMax); NgbID=mymalloc(sizeof(HBTInt)*NgbNMax); hguess=*p2hguess; //~ T[0]=clock(); numngb = treesearch_sphere(cen, hguess, PIndex,PPos); //~ T[1]=clock(); //~ printf("First search: %ld; %d ngbs found\n",T[1]-T[0], numngb);fflush(stdout); while(numngb<SPH_DENS_NGB) { if(numngb) hguess *= pow((HBTReal)SPH_DENS_NGB/(HBTReal)numngb,1.0/3.0)*1.1;//update hguess adaptively, and conservatively to keep it slightly larger else //zero ngb, double hguess hguess *= 2.; numngb = treesearch_sphere(cen, hguess, PIndex,PPos); //~ printf("N=%d,h=%f\n",numngb,hguess);fflush(stdout); } *p2hguess=hguess*powf((HBTReal)SPH_DENS_NGB/(HBTReal)numngb,1.0/3.0)*1.1;//to return a slight larger best guess //~ T[2]=clock(); //~ printf("Search done: %ld, hguess=%f\n",T[2]-T[1],hguess);fflush(stdout); h=psort(SPH_DENS_NGB,numngb,NgbR2);//NgbR2 has now been partly sorted,with respect to h //~ T[3]=clock(); //~ printf("NgbSorted: %ld\n",T[3]-T[2]);fflush(stdout); h=sqrtf(h); hinv3 = 1.0 / (h * h * h); for(n = 0, rho = 0; n < SPH_DENS_NGB; n++) { r = sqrtf(NgbR2[n]); u = r / h; if(u < 0.5) wk = hinv3 * (2.546479089470 + 15.278874536822 * (u - 1) * u * u); else wk = hinv3 * 5.092958178941 * (1.0 - u) * (1.0 - u) * (1.0 - u); rho += wk; } free(NgbR2); free(NgbID); //~ T[4]=clock(); //~ printf("DensCalc: %ld\n",T[4]-T[3]);fflush(stdout); //~ T[5]=T[4]-T[0]; //~ printf("Summary: %f, %f, %f, %f\n",(HBTReal)(T[1]-T[0])/(HBTReal)T[5],(HBTReal)(T[2]-T[1])/(HBTReal)T[5],(HBTReal)(T[3]-T[2])/(HBTReal)T[5],(HBTReal)(T[4]-T[3])/(HBTReal)T[5]); //~ printf("%ld,%ld,%ld,%ld,%ld\n",T[0],T[1],T[2],T[3],T[4]); return rho; }
void quicksort(int *array, size_t n) { // created threads should be joinable pthread_attr_init(&thread_attr); pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); // start sorting psort(array, 0, n-1); // quit thread pool, wait for all threads to finish. size_t i; for(i = 0; i < spawned_threads; ++i) { pthread_join(thread_pool[i].thread, NULL); //pthread_mutex_destroy(&thread_pool[i].wait_lock); //pthread_cond_destroy(&thread_pool[i].start_signal); } // remove allocated attr pthread_attr_destroy(&thread_attr); }
void addsort(int *v, int first, int last) { // thread pool stuff pthread_mutex_lock(&order_lock); // find a thread that can take a beating (more work) struct qsthread *thread = NULL; // if one's free, that's our thread if(num_free_threads != 0) { thread = free_thread_pool[--num_free_threads]; } // else, if we're able to spawn a new one, do it else if(spawned_threads < MAX_THREADS) { thread_pool[spawned_threads] = thread_default; thread = &thread_pool[spawned_threads]; pthread_create(&thread_pool[spawned_threads].thread, &thread_attr, run_thread, (void *)thread); thread_barrier(); ++spawned_threads; } pthread_mutex_unlock(&order_lock); // if there's one to give work to, assign the order to it if(thread != NULL) { thread->v = v; thread->first = first; thread->last = last; pthread_mutex_lock(&thread->wait_lock); pthread_cond_signal(&thread->start_signal); pthread_mutex_unlock(&thread->wait_lock); } else // no more threads available, work yourself. psort(v, first, last); }
// thread starting point void *run_thread(void *thread_ptr) { struct qsthread *thread = (struct qsthread *)thread_ptr; pthread_mutex_lock(&thread->wait_lock); // let spawning thread know that we've started ok. thread_barrier(); while(1) { pthread_cond_wait(&thread->start_signal, &thread->wait_lock); // NULL list means main thread wants us to quit if(thread->v == NULL) pthread_exit(NULL); // otherwise, sort thread psort(thread->v, thread->first, thread->last); pthread_mutex_lock(&order_lock); // if completing thread's current work means all started threads // are free, then work is done signal back to main thread if(num_free_threads == spawned_threads-1) { size_t i = 0; for(i = 0; i < num_free_threads; ++i) { free_thread_pool[i]->v = NULL; pthread_cond_signal(&free_thread_pool[i]->start_signal); } pthread_mutex_unlock(&order_lock); pthread_mutex_unlock(&thread->wait_lock); break; } // otherwise add thread back to pool and be available for more work free_thread_pool[num_free_threads++] = thread; pthread_mutex_unlock(&order_lock); } return NULL; }
int main(int argc, char* argv[]) { int count = 1; for(char* c = argv[1];c<argv[1]+strlen(argv[1]);c++) { if(*c==',') count++; } int ary[count]; int* p = ary; int buff = 0; for(char* c = argv[1];c<argv[1]+strlen(argv[1]);c++) { if(*c >=48 && *c<=57) { buff = buff * 10 + (*c-48); }else if(*c ==',') { *p = buff; p++; buff = 0; } } *p = buff; psort(ary,sizeof(ary)/sizeof(int)); char out[10*sizeof(ary)/sizeof(int)]; sprintf(out,"["); for(int* i = ary; i < ary + sizeof(ary)/sizeof(int);i++) { char buff[11]; sprintf(buff,"%d,",*i); strncat(out,buff,10); } int len = strlen(out); out[len-1] = ']'; printf("%s\n",out); return 0; }
esvmOutput *esvmSIME(esvmParameters *params, cv::Mat img, esvmModel *model) { //userTasks needs to be greater than 4. //This is for performance. less than 4 threads doesn't make sense! //Also I think (there was an assumption made in some version //, I forget if it is still there or not.) //otherwise binning histograms (binHists) //will not work properly. //this can be fixed, but I haven't done it. assert(params->userTasks >= 4); //computing hog pyramid and reading whogs can be made parallel #ifdef ESVM_PERFORMANCE_COUNTERS double hogTime = CycleTimer::currentSeconds(); #endif esvmHogPyr *hogpyr = computeHogScale(img,params->cellWidth,params->maxHogLevels,params->minHogDim, params->levelsPerOctave,params->minImageScale,params->hogEnablePadding, params->hogPadding,params->userTasks,params->useMexResize); #ifdef ESVM_PERFORMANCE_COUNTERS hogTime -= CycleTimer::currentSeconds(); #endif const esvmHogPyr *whogpyr = model->hogpyr; const esvmHog **whogs = (const esvmHog **) whogpyr->hogs; const int numWeights = whogpyr->num; const float *bWeight = model->b; #ifdef ESVM_PERFORMANCE_COUNTERS double convTime = CycleTimer::currentSeconds(); #endif esvmArr2_f *convResults = convolvePyramids(hogpyr,whogpyr,params->convEnablePadding, params->userTasks); #ifdef ESVM_PERFORMANCE_COUNTERS convTime -= CycleTimer::currentSeconds(); #endif //allocate memory for bounding boxes per exemplar esvmBoxes *boxesArr = (esvmBoxes *)esvmCalloc(numWeights*hogpyr->num,sizeof(esvmBoxes)); assert(params->maxTotalBoxesPerExemplar > params->maxWindowsPerExemplar); for(int w=0;w<numWeights;w++) { boxesArr[w].arr = (float *)esvmMalloc(params->maxTotalBoxesPerExemplar*ESVM_BOX_DIM*sizeof(float)); boxesArr[w].num = 0; std::fill(boxesArr[w].arr,boxesArr[w].arr+params->maxTotalBoxesPerExemplar*ESVM_BOX_DIM, ESVM_FLOAT_MIN); } float *maxers = (float *)esvmMalloc(numWeights*sizeof(float)); std::fill(maxers,maxers+numWeights,ESVM_FLOAT_MIN); float *negScores = (float *)esvmMalloc(params->maxTotalBoxesPerExemplar*sizeof(float)); float *topScores = (float *)esvmMalloc(params->maxTotalBoxesPerExemplar*sizeof(float)); int *topInds = (int *)esvmMalloc(params->maxTotalBoxesPerExemplar*sizeof(int)); float *topBoxes = (float *)esvmMalloc(params->maxTotalBoxesPerExemplar*ESVM_BOX_DIM*sizeof(float)); //parallel loop. int numBoxes = 0; #ifdef ESVM_PERFORMANCE_COUNTERS double nmsTime = CycleTimer::currentSeconds(); #endif //serial loop because maxers are maintained from higher levels! for(int i=hogpyr->num-1;i>=0;i--) { //serial loop for(int w=0;w<numWeights;w++) { esvmArr2_f *convOut = &(convResults[i*numWeights+w]); subtractScalar(convOut->arr,convOut->rows,convOut->cols,bWeight[w]); int nkeep; //hogPadding is subtracted from the indices. float detectionThreshold = max(maxers[w],params->detectionThreshold); //float detectionThreshold = params->detectionThreshold; int *indices = sort2DIndex(convOut->arr,convOut->rows,convOut->cols, ESVM_DESCEND_SORT,ESVM_THRESHOLD,detectionThreshold,&nkeep, params->hogPadding); if(nkeep==0) { continue; } //arrays for top-k sorting const int topK = min(boxesArr[w].num+nkeep,params->maxWindowsPerExemplar); //concatenate current boxes to the boxes already detected by exemplar float *bboxes = &(boxesArr[w].arr[boxesArr[w].num*ESVM_BOX_DIM]); int *tmpIndex = indices; int dim1 = convOut->rows*convOut->cols; float resizing = params->cellWidth/hogpyr->scale[i]; //get the bounding boxes in the original image //need to rescale assert(boxesArr[w].num+nkeep <= params->maxTotalBoxesPerExemplar); //printf("NKEEP is %d\n",nkeep); for(int j=0;j<nkeep;j++) { float *bboxL = bboxes+j*ESVM_BOX_DIM; ARR_RMIN_P(bboxL) = ((*(tmpIndex+j))*resizing); ARR_CMIN_P(bboxL) = ((*(tmpIndex+j+dim1))*resizing); ARR_RMAX_P(bboxL) = ((*(tmpIndex+j)+whogs[w]->rows)*resizing)-1; ARR_CMAX_P(bboxL) = ((*(tmpIndex+j+dim1)+whogs[w]->cols)*resizing)-1; //Put negative of score inside. //This is useful for finding top-k elements negScores[boxesArr[w].num+j] = -convOut->arr[j]; ARR_SCORE_P(bboxL) = convOut->arr[j]; ARR_SCALE_P(bboxL) = hogpyr->scale[i]; ARR_CLASS_P(bboxL) = (int)whogs[w]->classId; ARR_EXID_P(bboxL) = w; } //find top-k boxes psort(negScores, boxesArr[w].num+nkeep, topK, topScores, topInds); int *tmpTop = topInds; bboxes = boxesArr[w].arr; for(int j=0;j<topK;j++) { float *bboxL = topBoxes+j*ESVM_BOX_DIM; float *bboxR = bboxes+topInds[j]*ESVM_BOX_DIM; ARR_COPY_P(bboxR,bboxL); negScores[j] = -ARR_SCORE_P(bboxR); } //now copy back the current boxes into the list of boxes for this exemplar memcpy(boxesArr[w].arr,topBoxes,topK*ESVM_BOX_DIM*sizeof(float)); boxesArr[w].num = topK; if(topK >= params->maxWindowsPerExemplar) { //update maxers if topK > threshold maxers[w] = -topScores[topK-1]; } free(indices); free(convOut->arr); } } //more cleanup free(convResults); free(maxers); free(negScores); free(topScores); free(topInds); free(topBoxes); //perform nms on each exemplar's boxes esvmBoxes *nmsBoxesArr = (esvmBoxes *)esvmCalloc(numWeights*hogpyr->num,sizeof(esvmBoxes)); int totalBoxes = 0; for(int w=0;w<numWeights;w++) { nms(boxesArr[w].arr,boxesArr[w].num, params->nmsOverlapThreshold, &(nmsBoxesArr[w].num),&(nmsBoxesArr[w].arr)); totalBoxes += (nmsBoxesArr[w].num); free(boxesArr[w].arr); } free(boxesArr); #ifdef ESVM_PERFORMANCE_COUNTERS nmsTime -= CycleTimer::currentSeconds(); #endif //assign output esvmOutput *output = (esvmOutput *)esvmMalloc(sizeof(esvmOutput)); //collect all the boxes together output->boxes = (esvmBoxes *)esvmCalloc(1,sizeof(esvmBoxes)); output->boxes->num = totalBoxes; if(totalBoxes>0) output->boxes->arr = (float *)esvmMalloc(totalBoxes*ESVM_BOX_DIM*sizeof(float)); int count = 0; for(int w=0;w<numWeights;w++) { for(int j=0;j<nmsBoxesArr[w].num;j++) { ARR_COPY(nmsBoxesArr[w].arr,j,output->boxes->arr,count); count++; } free(nmsBoxesArr[w].arr); } free(nmsBoxesArr); if(params->saveHogPyr==false) { freeHogPyramid(hogpyr); output->hogpyr = (esvmHogPyr *)esvmMalloc(sizeof(esvmHogPyr)); output->hogpyr->num = 0; } else { output->hogpyr = hogpyr; } #ifdef ESVM_PERFORMANCE_COUNTERS output->perf.hogTime = -hogTime*1000; output->perf.convTime = -convTime*1000; output->perf.nmsTime = -nmsTime*1000; #endif return output; }
void run(int col) { compute(col); printf("\nBUFFER = %d\n", BUFF); printf("COLUMNS = %d\n", COLS); printf("ROWS = %d\n", ROWS); printf("FULL = %d\n", FULL); printf("END = %d\n", END); printf("SIZE = %d\n\n", SIZE); int *array = allocate(FULL); printf("\n"); init_data(array); if(toPr) { printf("\nINITIAL LIST WITH BUFFER\n"); shiftUp(); print(array); shiftDown(); } printf("\n\nSTEP 1: Sort: "); //STEP 1 if(nThreads > 1) psort(array, nThreads); //Sort using parallel insertion sort else insort(array); //Sort using serial insertion sort if(toPr) print(array); printf("\nSTEP 2: Transpose Up: "); //STEP 2 array = transposeUp(array); if(toPr) print(array); printf("\nSTEP 3: Sort: "); //STEP 3 if(nThreads > 1) psort(array, nThreads); //Sort using parallel insertion sort else insort(array); //Sort using serial insertion sort if(toPr) print(array); printf("\nSTEP 4: Transpose Down: "); //STEP 4 array = transposeDown(array); if(toPr) print(array); printf("\nSTEP 5: Sort: "); //STEP 5 if(nThreads > 1) psort(array, nThreads); //Sort using parallel insertion sort else insort(array); //Sort using serial insertion sort if(toPr) print(array); printf("\nSTEP 6: Shift Up: "); //STEP 6 shiftUp(); if(toPr) print(array); printf("\nSTEP 7: Sort: "); //STEP 7 if(nThreads > 1) psort(array, nThreads); //Sort using parallel insertion sort else insort(array); //Sort using serial insertion sort if(toPr) print(array); printf("\nSTEP 8: Shift Down: "); //STEP 8 shiftDown(); if(toPr) print(array); printf("\n"); if(isSorted(array)){ printf("Array is sorted!\n\n"); } else { printf("Array not sorted!\n\n"); } free(array); }
void ParallelSort::psort(vector<T>* array) { psort(*array); }