PERF_TEST_P( EstimateAffine, EstimateAffine2D, ESTIMATE_PARAMS ) { AffineParams params = GetParam(); const int n = get<0>(params); const double confidence = get<1>(params); const int method = get<2>(params); const size_t refining = get<3>(params); Mat aff(2, 3, CV_64F); cv::randu(aff, -2., 2.); // LMEDS can't handle more than 50% outliers (by design) int m; if (method == LMEDS) m = 3*n/5; else m = 2*n/5; const float shift_outl = 15.f; const float noise_level = 20.f; Mat fpts(1, n, CV_32FC2); Mat tpts(1, n, CV_32FC2); randu(fpts, 0., 100.); transform(fpts, tpts, aff); /* adding noise to some points */ Mat outliers = tpts.colRange(m, n); outliers.reshape(1) += shift_outl; Mat noise (outliers.size(), outliers.type()); randu(noise, 0., noise_level); outliers += noise; Mat aff_est; vector<uchar> inliers (n); warmup(inliers, WARMUP_WRITE); warmup(fpts, WARMUP_READ); warmup(tpts, WARMUP_READ); TEST_CYCLE() { aff_est = estimateAffine2D(fpts, tpts, inliers, method, 3, 2000, confidence, refining); } // we already have accuracy tests SANITY_CHECK_NOTHING(); }
double CPUBenchmark::getSystemCallOverhead(bool isCached) { uint64_t sum = 0; uint64_t start, end; warmup(); for(int i = 0; i < TIMES; i++) { // in order to reduce overhead, leave the time count inside the if block if(isCached) { start = rdtscStart(); // this one has cache getpid(); end = rdtscEnd(); } else { start = rdtscStart(); // this one don't have cache getppid(); end = rdtscEnd(); } sum += (end - start); } return (double) sum / (double) TIMES; }
int main(int argc, char** argv) { int budget = atoi(argv[0]); warmup(); int iterations = 0; double start = secondtime(); double elapsed; while (1) { if (!sample()) { abort(); } iterations++; elapsed = secondtime() - start; if (elapsed > budget) break; } double score = iterations / elapsed * 1000.0; printf("%f\n", score); }
uint64_t CPUBenchmark::getLoopOverhead(int times) { uint64_t start, end; warmup(); start = rdtscStart(); for(int i = 0; i < times; i++) { // end loop to avoid new overhead } end = rdtscEnd(); return end - start; }
double CPUBenchmark::getReadOverhead() { uint64_t sum = 0; uint64_t start, end; warmup(); for(int i = 0; i < TIMES; i++) { start = rdtscStart(); end = rdtscEnd(); sum += (end - start); } return (double)sum / (double)TIMES; }
void test_BS (cl_uint sizeMin, cl_uint sizeMax, int warmup_num_iters, int run_num_iters, size_t blockMin, size_t blockMax, size_t gridMin, size_t gridMax, char *filename, cl_float(*gen)(cl_float low, cl_float high)) { BS_test_t t; t.num = sizeMin; t.blockSize = blockMin; t.gridSize = gridMin; t.Riskfree = R; t.Volatility = V; bool result = false; FILE * csv = fopen(filename, "a"); if (csv == NULL) fprintf(stderr, "can't open output file\n"); if (csv != NULL) fprintf(csv, "size, block size, grid size, total time\n"); setup_cl(&t); setup_buffers(&t, gen); if (t.maxBlockSize < blockMin) blockMin = t.maxBlockSize; if (t.maxBlockSize < blockMax) blockMax = t.maxBlockSize; printf("blockMIN: %d blockMAX: %d t.maxBlockSize: %d\n", blockMin, blockMax, t.maxBlockSize); warmup(&t, warmup_num_iters); for (t.num = sizeMin; t.num <= sizeMax; t.num <<= 1) { for (t.gridSize = gridMin; t.gridSize <= gridMax; t.gridSize += 16*1024) { for (t.blockSize = blockMin; t.blockSize <= blockMax; t.blockSize += 64) { if (t.gridSize % t.blockSize != 0) continue; setup_buffers(&t, gen); result = run(&t, run_num_iters); post(&t, run_num_iters, result); if (result && csv != NULL) output(&t, csv, run_num_iters); cleanup(&t); } } } if (csv != NULL) fclose(csv); }
uint64_t CPUBenchmark::calculateThreadSwitchTime(){ uint64_t start; uint64_t end; pthread_t thread; warmup(); // pipe(fd); pthread_create(&thread, NULL, foo, &end); start = rdtscStart(); pthread_join(thread, NULL); // read(fd[0], (void*)&end, sizeof(uint64_t)); return end - start; }
double CPUBenchmark::getKernelThreadCreationTime() { double sum = 0; uint64_t start, end; pthread_t thread; warmup(); for(int i = 0; i < TASK_OP_TIMES; i++) { start = rdtscStart(); pthread_create(&thread, NULL, &threadStartRountine, NULL); // make the main process to wait new thread pthread_join(thread, NULL); end = rdtscEnd(); sum += end - start; } return (double) sum / (double) TASK_OP_TIMES; }
uint64_t CPUBenchmark::calculateProcessSwitchTime(int *fd){ uint64_t start; uint64_t end; pid_t cpid; uint64_t result = 0; warmup(); if ((cpid = fork()) != 0) { start = rdtscStart(); wait(NULL); read(fd[0], (void*)&end, sizeof(uint64_t)); } else { end = rdtscEnd(); write(fd[1], (void*)&end, sizeof(uint64_t)); exit(1); } if(end > start){ result = end - start; } return (result); }
int begin(void) { const char *scheme = use_secure?"https":"http"; char *space; static const char blanks[] = " "; static const char stars[] = "**********************************"; i_session = ne_session_create(scheme, i_hostname, i_port); CALL(init_session(i_session)); ne_hook_pre_send(i_session, i_pre_send, "X-Prestan"); space = ne_concat(i_path, "davtest/", NULL); ne_delete(i_session, space); if (ne_mkcol(i_session, space)) { t_context("Could not create new collection `%s' for tests: %s\n" "Server must allow `MKCOL %s' for tests to proceed", space, ne_get_error(i_session), space); return FAILHARD; } free(i_path); i_path = space; warmup(); printf("\nStart Testing %s:\n\n",pget_option.URL) ; printf("\n%s%s\n", blanks, stars); printf("\n%s* Number of Requests\t\t%d\n", blanks, pget_option.requests); printf("\n%s* Number of Dead Properties\t%d\n", blanks, pget_option.numprops); printf("\n%s* Depth of Collection\t\t%d\n", blanks, pget_option.depth); printf("\n%s* Width of Collection\t\t%d\n", blanks, pget_option.width); printf("\n%s* Type of Methods\t\t%s\n", blanks, pget_option.methods); printf("\n%s%s\n", blanks, stars); printf("\n\n"); return OK; }
int main(int argc, char *argv[]){ if(argc<=2){ usage(argv[0]); exit(-1); } int pagesize = getpagesize(); CmdParam cmd; CacheStat stat; memset(&stat, 0, sizeof(stat)); memset(&cmd, 0, sizeof(cmd)); stat.isPrint = true; //default true parseArgs(argc, argv, &cmd); if(cmd.isDaemon) daemonMe(); if(cmd.cmd_type == CLEAR){ clear(cmd.path, cmd.file_type); }else if(cmd.cmd_type == STAT){ normalStat(cmd.path, cmd.file_type, &stat); if(cmd.file_type == DIRECTORY){ fprintf(stdout, "\nTotal Cache of Directory:%s size:%s cached:%s\n", cmd.path, sizeFit(stat.pageCount*pagesize, buf1), sizeFit(stat.inCache*pagesize, buf2)); } }else if(cmd.cmd_type == RSTAT){ realStat(cmd.path, cmd.interval, cmd.file_type, cmd.isSuppress); }else if(cmd.cmd_type == LOCK){ lock(cmd.path, cmd.file_type); select(0, NULL, NULL, NULL, NULL); }else if(cmd.cmd_type == WARM){ warmup(cmd.path, cmd.file_type); } return 0; }
double CPUBenchmark::getProcessCreationTime() { uint64_t sum = 0; uint64_t start, end; pid_t pid; warmup(); for(int i = 0; i < TASK_OP_TIMES; i++) { start = rdtscStart(); pid = fork(); if(pid == 0) { // child process, just exit exit(1); } else { // parent process, wait child exit wait(NULL); end = rdtscEnd(); sum += (end - start); } } return (double) sum / (double) TASK_OP_TIMES; }
int main(int argc, char **argv) { #define test_A(i,j) test_A[(size_t)(j)*N+(i)] #define test_A2(i,j) test_A2[(size_t)(j)*N+(i)] int N,NB,w,LDA,BB; size_t memsize; //bytes int iam, nprocs, mydevice; int ICTXT, nprow, npcol, myprow, mypcol; int i_one = 1, i_zero = 0, i_negone = -1; double d_one = 1.0, d_zero = 0.0, d_negone = -1.0; int IASEED = 100; /* printf("N=?\n"); scanf("%ld",&N); printf("NB=?\n"); scanf("%d", &NB); printf("width of Y panel=?\n"); scanf("%ld",&w); */ if(argc < 4){ printf("invalid arguments N NB memsize(M)\n"); exit(1); } N = atoi(argv[1]); NB = atoi(argv[2]); memsize = (size_t)atoi(argv[3])*1024*1024; BB = (N + NB - 1) / NB; w = memsize/sizeof(double)/BB/NB/NB - 1; assert(w > 0); LDA = N + 0; //padding int do_io = (N <= NSIZE); double llttime; double gflops; nprow = npcol = 1; blacs_pinfo_(&iam, &nprocs); blacs_get_(&i_negone, &i_zero, &ICTXT); blacs_gridinit_(&ICTXT, "R", &nprow, &npcol); blacs_gridinfo_(&ICTXT, &nprow, &npcol, &myprow, &mypcol); #ifdef USE_MIC #ifdef __INTEL_OFFLOAD printf("offload compilation enabled\ninitialize each MIC\n"); offload_init(&iam, &mydevice); #pragma offload target(mic:0) { mkl_peak_mem_usage(MKL_PEAK_MEM_ENABLE); } #else if(isroot) printf("offload compilation not enabled\n"); exit(0); #endif #else #ifdef USE_CUBLASV2 { cublasStatus_t cuStatus; for(int r = 0; r < OOC_NTHREADS; r++){ cuStatus = cublasCreate(&worker_handle[r]); assert(cuStatus == CUBLAS_STATUS_SUCCESS); } } #else cublasInit(); #endif #endif double *test_A = (double*)memalign(64,(size_t)LDA*N*sizeof(double)); // for chol #ifdef VERIFY double *test_A2 = (double*)memalign(64,(size_t)LDA*N*sizeof(double)); // for verify #endif /*Initialize A */ int i,j; printf("Initialize A ... "); fflush(stdout); llttime = MPI_Wtime(); pdmatgen(&ICTXT, "Symm", "Diag", &N, &N, &NB, &NB, test_A, &LDA, &i_zero, &i_zero, &IASEED, &i_zero, &N, &i_zero, &N, &myprow, &mypcol, &nprow, &npcol); llttime = MPI_Wtime() - llttime; printf("time %lf\n", llttime); /*print test_A*/ if(do_io){ printf("Original A=\n\n"); matprint(test_A, N, LDA, 'A'); } /*Use directed unblocked Cholesky factorization*/ /* t1 = clock(); Test_dpotrf(test_A2,N); t2 = clock(); printf ("time for unblocked Cholesky factorization on host %f \n", ((float) (t2 - t1)) / CLOCKS_PER_SEC); */ /*print test_A*/ /* if(do_io){ printf("Unblocked result:\n\n"); matprint(test_A2,N,'L'); } */ /*Use tile algorithm*/ Quark *quark = QUARK_New(OOC_NTHREADS); QUARK_DOT_DAG_Enable(quark, 0); #ifdef USE_MIC // mklmem(NB); printf("QUARK MIC affinity binding\n"); QUARK_bind(quark); printf("offload warm up\n"); warmup(quark); #endif QUARK_DOT_DAG_Enable(quark, quark_getenv_int("QUARK_DOT_DAG_ENABLE", 0)); printf("LLT start %lf\n", MPI_Wtime()); llttime = Cholesky(quark,test_A,N,NB,LDA,memsize); printf("LLT end %lf\n", MPI_Wtime()); QUARK_Delete(quark); #ifdef USE_MIC offload_destroy(); #else #ifdef USE_CUBLASV2 { cublasStatus_t cuStatus; for(int r = 0; r < OOC_NTHREADS; r++){ cuStatus = cublasDestroy(worker_handle[r]); assert(cuStatus == CUBLAS_STATUS_SUCCESS); } } #else cublasShutdown(); #endif #endif gflops = (double) N; gflops = gflops/3.0 + 0.5; gflops = gflops*(double)(N)*(double)(N); gflops = gflops/llttime/1024.0/1024.0/1024.0; printf ("N NB memsize(MB) quark_pthreads time Gflops\n%d %d %lf %d %lf %lf\n", N, NB, (double)memsize/1024/1024, OOC_NTHREADS, llttime, gflops); #ifdef USE_MIC #pragma offload target(mic:0) { memsize = mkl_peak_mem_usage(MKL_PEAK_MEM_RESET); } printf("mkl_peak_mem_usage %lf MB\n", (double)memsize/1024.0/1024.0); #endif /*Update and print L*/ if(do_io){ printf("L:\n\n"); matprint(test_A,N,LDA,'L'); } #ifdef VERIFY printf("Verify... "); llttime = MPI_Wtime(); /* * ------------------------ * check difference betwen * test_A and test_A2 * ------------------------ */ /* { double maxerr = 0; double maxerr2 = 0; for (j = 0; j < N; j++) { for (i = j; i < N; i++) { double err = (test_A (i, j) - test_A2 (i, j)); err = ABS (err); maxerr = MAX (err, maxerr); maxerr2 = maxerr2 + err * err; }; }; maxerr2 = sqrt (ABS (maxerr2)); printf ("max difference between test_A and test_A2 %lf \n", maxerr); printf ("L2 difference between test_A and test_A2 %lf \n", maxerr2); }; */ /* * ------------------ * over-write test_A2 * ------------------ */ pdmatgen(&ICTXT, "Symm", "Diag", &N, &N, &NB, &NB, test_A2, &LDA, &i_zero, &i_zero, &IASEED, &i_zero, &N, &i_zero, &N, &myprow, &mypcol, &nprow, &npcol); /* * --------------------------------------- * after solve, test_A2 should be identity * --------------------------------------- */ // test_A = chol(B) = L; // test_A2 = B // solve L*L'*X = B // if L is correct, X is identity */ { int uplo = 'L'; const char *uplo_char = ((uplo == (int) 'U') || (uplo == (int) 'u')) ? "U" : "L"; int info = 0; int nrhs = N; int LDA = N; int ldb = N; dpotrs(uplo_char, &N, &nrhs, test_A, &LDA, test_A2, &ldb, &info); assert (info == 0); } { double maxerr = 0; double maxerr2 = 0; for (j = 0; j < N; j++) { for (i = 0; i < N; i++) { double eyeij = (i == j) ? 1.0 : 0.0; double err = (test_A2 (i, j) - eyeij); err = ABS (err); maxerr = MAX (maxerr, err); maxerr2 = maxerr2 + err * err; }; }; maxerr2 = sqrt (ABS (maxerr2)); printf("time %lf\n", MPI_Wtime() - llttime); printf ("max error %lf \n", maxerr); printf ("max L2 error %lf \n", maxerr2); } #endif free(test_A);test_A=NULL; #ifdef VERIFY free(test_A2);test_A2=NULL; #endif blacs_gridexit_(&ICTXT); blacs_exit_(&i_zero); return 0; #undef test_A #undef test_A2 }
//#ifdef _MT void runthreads(long sleep_cnt, int min_threads, int max_threads, int chperthread, int num_rounds) { thread_data *de_area = new thread_data[max_threads] ; thread_data *pdea; int nperthread ; int sum_threads ; unsigned long sum_allocs ; unsigned long sum_frees ; double duration ; #ifdef __WIN32__ _LARGE_INTEGER ticks_per_sec, start_cnt, end_cnt; #else long ticks_per_sec ; long start_cnt, end_cnt ; #endif _int64 ticks ; double rate_1=0, rate_n ; double reqd_space ; ULONG used_space ; int prevthreads ; int i ; QueryPerformanceFrequency( &ticks_per_sec ) ; pdea = &de_area[0] ; memset(&de_area[0], 0, sizeof(thread_data)) ; prevthreads = 0 ; for(num_threads=min_threads; num_threads <= max_threads; num_threads++ ) { warmup(&blkp[prevthreads*chperthread], (num_threads-prevthreads)*chperthread ); nperthread = chperthread ; stopflag = FALSE ; for(i=0; i< num_threads; i++){ de_area[i].threadno = i+1 ; de_area[i].NumBlocks = num_rounds*nperthread; de_area[i].array = &blkp[i*nperthread] ; de_area[i].blksize = &blksize[i*nperthread] ; de_area[i].asize = nperthread ; de_area[i].min_size = min_size ; de_area[i].max_size = max_size ; de_area[i].seed = lran2(&rgen) ; ; de_area[i].finished = 0 ; de_area[i].cAllocs = 0 ; de_area[i].cFrees = 0 ; de_area[i].cThreads = 0 ; de_area[i].finished = FALSE ; lran2_init(&de_area[i].rgen, de_area[i].seed) ; #ifdef __WIN32__ _beginthread((void (__cdecl*)(void *)) exercise_heap, 0, &de_area[i]) ; #else _beginthread(exercise_heap, 0, &de_area[i]) ; #endif } QueryPerformanceCounter( &start_cnt) ; // printf ("Sleeping for %ld seconds.\n", sleep_cnt); Sleep(sleep_cnt * 1000L) ; stopflag = TRUE ; for(i=0; i<num_threads; i++){ while( !de_area[i].finished ){ #ifdef __WIN32__ Sleep(1); #elif defined(__SVR4) thr_yield(); #else sched_yield(); #endif } } QueryPerformanceCounter( &end_cnt) ; sum_frees = sum_allocs =0 ; sum_threads = 0 ; for(i=0;i< num_threads; i++){ sum_allocs += de_area[i].cAllocs ; sum_frees += de_area[i].cFrees ; sum_threads += de_area[i].cThreads ; de_area[i].cAllocs = de_area[i].cFrees = 0; } #ifdef __WIN32__ ticks = end_cnt.QuadPart - start_cnt.QuadPart ; duration = (double)ticks/ticks_per_sec.QuadPart ; #else ticks = end_cnt - start_cnt ; duration = (double)ticks/ticks_per_sec ; #endif for( i=0; i<num_threads; i++){ if( !de_area[i].finished ) printf("Thread at %d not finished\n", i) ; } rate_n = sum_allocs/duration ; if( rate_1 == 0){ rate_1 = rate_n ; } reqd_space = (0.5*(min_size+max_size)*num_threads*chperthread) ; // used_space = CountReservedSpace() - init_space; used_space = 0; printf ("Throughput = %8.0f operations per second.\n", sum_allocs / duration); #if 0 printf("%2d ", num_threads ) ; printf("%6.3f", duration ) ; printf("%6.3f", rate_n/rate_1 ) ; printf("%8.0f", sum_allocs/duration ) ; printf(" %6.3f %.3f", (double)used_space/(1024*1024), used_space/reqd_space) ; printf("\n") ; #endif Sleep(5000L) ; // wait 5 sec for old threads to die prevthreads = num_threads ; printf ("Done sleeping...\n"); } delete [] de_area; }
bool warmup(const char *path, enum FileType file_type){ int fd = 0; DIR *dir = NULL; int time_used = 0; char file_name[256]; struct stat st; struct timeval begin, end; struct dirent *dp = NULL; if(stat(path, &st)<0){ goto ERROR; } gettimeofday(&begin, NULL); if(file_type == REGFILE){ fd = open(path, O_RDONLY); if(fd<0){ goto ERROR; } if(posix_fadvise(fd, 0, st.st_size, POSIX_FADV_WILLNEED) != 0){ goto ERROR; } }else if(file_type == DIRECTORY){ if((dir = opendir(path)) == NULL){ goto ERROR; } while((dp = readdir(dir)) != NULL){ if(dp->d_name[0] != '.'){ memset(file_name, 0, sizeof(path)); strcat(file_name, path); strcat(file_name, "/"); strcat(file_name, dp->d_name); if(dp->d_type == DT_REG){ warmup(file_name, REGFILE); }else if(dp->d_type == DT_DIR){ warmup(file_name, DIRECTORY); }else{ fprintf(stdout, "%s:%c type unsupported!\n", dp->d_name, dp->d_type); } } } } gettimeofday(&end, NULL); time_used = getUsedTime(&begin, &end); if(file_type == REGFILE){ fprintf(stdout, "Warmup File:%s TimeUsed:%d ms\n", path, time_used); close(fd); }else if(file_type == DIRECTORY){ fprintf(stdout, "Warmup Dir:%s TimeUsed:%d ms\n", path, time_used); closedir(dir); } return true; ERROR: fprintf(stderr, "File:%s %s\n", path, strerror(errno)); if(fd) close(fd); if(dir) closedir(dir); return false; }
void CPUBenchmark::getProcedureOverhead(vector<double> &result){ uint64_t totalTime = 0; uint64_t start, end; //0 argument warmup(); for (int i = 0; i < TIMES; i++) { start = rdtscStart(); fun_0(); end = rdtscEnd(); totalTime += (end - start); } result[0] = (double)totalTime / (double)TIMES; //1 argument warmup(); totalTime = 0; for (int i = 0; i < TIMES; i++) { start = rdtscStart(); fun_1(1); end = rdtscEnd(); totalTime += (end - start); } result[1] = (double)totalTime / (double)TIMES; //2 argument warmup(); totalTime = 0; for (int i = 0; i < TIMES; i++) { start = rdtscStart(); fun_2(1, 2); end = rdtscEnd(); totalTime += (end - start); } result[2] = (double)totalTime / (double)TIMES; //3 argument warmup(); totalTime = 0; for (int i = 0; i < TIMES; i++) { start = rdtscStart(); fun_3(1, 2, 3); end = rdtscEnd(); totalTime += (end - start); } result[3] = (double)totalTime / (double)TIMES; //4 argument warmup(); totalTime = 0; for (int i = 0; i < TIMES; i++) { start = rdtscStart(); fun_4(1, 2, 3, 4); end = rdtscEnd(); totalTime += (end - start); } result[4] = (double)totalTime / (double)TIMES; //5 argument warmup(); totalTime = 0; for (int i = 0; i < TIMES; i++) { start = rdtscStart(); fun_5(1, 2, 3, 4, 5); end = rdtscEnd(); totalTime += (end - start); } result[5] = (double)totalTime / (double)TIMES; //6 argument warmup(); totalTime = 0; for (int i = 0; i < TIMES; i++) { start = rdtscStart(); fun_6(1, 2, 3, 4, 5, 6); end = rdtscEnd(); totalTime += (end - start); } result[6] = (double)totalTime / (double)TIMES; //7 argument warmup(); totalTime = 0; for (int i = 0; i < TIMES; i++) { start = rdtscStart(); fun_7(1, 2, 3, 4, 5, 6, 7); end = rdtscEnd(); totalTime += (end - start); } result[7] = (double)totalTime / (double)TIMES; }
void runthreads(long sleep_cnt, int min_threads, int max_threads, int chperthread, int num_rounds) { thread_data de_area[MAX_THREADS] ; int nperthread ; int sum_threads ; int sum_allocs ; int sum_frees ; double duration ; long ticks_per_sec ; long start_cnt, end_cnt ; _int64 ticks ; double rate_1=0, rate_n ; double reqd_space ; ptrdiff_t used_space ; int prevthreads ; int i ; QueryPerformanceFrequency( &ticks_per_sec ) ; memset(&de_area[0], 0, sizeof(thread_data)) ; prevthreads = 0 ; for(num_threads=min_threads; num_threads <= max_threads; num_threads++ ) { warmup(&blkp[prevthreads*chperthread], (num_threads-prevthreads)*chperthread ); nperthread = chperthread ; stopflag = FALSE ; for(i=0; i< num_threads; i++){ de_area[i].threadno = i+1 ; de_area[i].NumBlocks = num_rounds*nperthread; de_area[i].array = &blkp[i*nperthread] ; de_area[i].blksize = &blksize[i*nperthread] ; de_area[i].asize = nperthread ; de_area[i].min_size = min_size ; de_area[i].max_size = max_size ; de_area[i].seed = lran2(&rgen) ; ; de_area[i].finished = 0 ; de_area[i].cAllocs = 0 ; de_area[i].cFrees = 0 ; de_area[i].cThreads = 0 ; de_area[i].finished = FALSE ; lran2_init(&de_area[i].rgen, de_area[i].seed) ; _beginthread(exercise_heap, 0, &de_area[i]) ; } QueryPerformanceCounter( &start_cnt) ; //printf ("Sleeping for %ld seconds.\n", sleep_cnt); Sleep(sleep_cnt * 1000L) ; stopflag = TRUE ; for(i=0; i<num_threads; i++){ while( !de_area[i].finished ){ sched_yield(); } } QueryPerformanceCounter( &end_cnt) ; sum_frees = sum_allocs =0 ; sum_threads = 0 ; for(i=0;i< num_threads; i++){ sum_allocs += de_area[i].cAllocs ; sum_frees += de_area[i].cFrees ; sum_threads += de_area[i].cThreads ; printf("area %d: %d allocs, %d threads\n",i,de_area[i].cAllocs,de_area[i].cThreads); de_area[i].cAllocs = de_area[i].cFrees = 0; } ticks = end_cnt - start_cnt ; duration = (double)ticks/ticks_per_sec ; for( i=0; i<num_threads; i++){ if( !de_area[i].finished ) printf("Thread at %d not finished\n", i) ; } rate_n = sum_allocs/duration ; if( rate_1 == 0){ rate_1 = rate_n ; } reqd_space = (0.5*(min_size+max_size)*num_threads*chperthread) ; used_space = mem_usage(); printf ("Throughput = %8.0f operations per second.\n", sum_allocs / duration); printf ("Memory used = %ld bytes, required %.0lf, ratio %lf\n",used_space,reqd_space,used_space/reqd_space); #if 0 printf("%2d ", num_threads ) ; printf("%6.3f", duration ) ; printf("%6.3f", rate_n/rate_1 ) ; printf("%8.0f", sum_allocs/duration ) ; printf(" %6.3f %.3f", (double)used_space/(1024*1024), used_space/reqd_space) ; printf("\n") ; #endif Sleep(5000L) ; // wait 5 sec for old threads to die prevthreads = num_threads ; printf ("Done sleeping...\n"); } }