PERF_TEST_P( EstimateAffine, EstimateAffine2D, ESTIMATE_PARAMS )
{
    AffineParams params = GetParam();
    const int n = get<0>(params);
    const double confidence = get<1>(params);
    const int method = get<2>(params);
    const size_t refining = get<3>(params);

    Mat aff(2, 3, CV_64F);
    cv::randu(aff, -2., 2.);

    // LMEDS can't handle more than 50% outliers (by design)
    int m;
    if (method == LMEDS)
        m = 3*n/5;
    else
        m = 2*n/5;
    const float shift_outl = 15.f;
    const float noise_level = 20.f;

    Mat fpts(1, n, CV_32FC2);
    Mat tpts(1, n, CV_32FC2);

    randu(fpts, 0., 100.);
    transform(fpts, tpts, aff);

    /* adding noise to some points */
    Mat outliers = tpts.colRange(m, n);
    outliers.reshape(1) += shift_outl;

    Mat noise (outliers.size(), outliers.type());
    randu(noise, 0., noise_level);
    outliers += noise;

    Mat aff_est;
    vector<uchar> inliers (n);

    warmup(inliers, WARMUP_WRITE);
    warmup(fpts, WARMUP_READ);
    warmup(tpts, WARMUP_READ);

    TEST_CYCLE()
    {
        aff_est = estimateAffine2D(fpts, tpts, inliers, method, 3, 2000, confidence, refining);
    }

    // we already have accuracy tests
    SANITY_CHECK_NOTHING();
}
double CPUBenchmark::getSystemCallOverhead(bool isCached) {
  uint64_t sum = 0;
  uint64_t start, end;

  warmup();
  for(int i = 0; i < TIMES; i++) {
    // in order to reduce overhead, leave the time count inside the if block
    if(isCached) {
      start = rdtscStart();
      // this one has cache
      getpid();
      end = rdtscEnd();
    }
    else {
      start = rdtscStart();
      // this one don't have cache
      getppid();
      end = rdtscEnd();
    }

    sum += (end - start);
  }

  return (double) sum / (double) TIMES;
}
Beispiel #3
0
int main(int argc, char** argv) {
  int budget = atoi(argv[0]);

  warmup();

  int iterations = 0;

  double start = secondtime();
  double elapsed;

  while (1) {
     if (!sample()) {
        abort();
     }

     iterations++;

     elapsed = secondtime() - start;

     if (elapsed > budget)
        break;
  }

  double score = iterations / elapsed * 1000.0;

  printf("%f\n", score);
}
uint64_t CPUBenchmark::getLoopOverhead(int times) {
  uint64_t start, end;

  warmup();
  start = rdtscStart();
  for(int i = 0; i < times; i++) {
    // end loop to avoid new overhead
  }
  end = rdtscEnd();

  return end - start;
}
double CPUBenchmark::getReadOverhead() {
  uint64_t sum = 0;
  uint64_t start, end;

  warmup();
  for(int i = 0; i < TIMES; i++) {
    start = rdtscStart();
    end = rdtscEnd();
    sum += (end - start);
  }

  return (double)sum / (double)TIMES;
}
Beispiel #6
0
void test_BS (cl_uint sizeMin, cl_uint sizeMax, int warmup_num_iters, int run_num_iters, 
			  size_t blockMin, size_t blockMax, 
			  size_t gridMin, size_t gridMax,
			  char *filename, cl_float(*gen)(cl_float low, cl_float high))
{
	BS_test_t t;
	t.num = sizeMin;
	t.blockSize = blockMin;
	t.gridSize = gridMin;
	t.Riskfree = R;
	t.Volatility = V;
	bool result = false;

	FILE * csv = fopen(filename, "a");
	if (csv == NULL)
		fprintf(stderr, "can't open output file\n");

	if (csv != NULL)
		fprintf(csv, "size, block size, grid size, total time\n");

	setup_cl(&t);
	setup_buffers(&t, gen);

	if (t.maxBlockSize < blockMin)
		blockMin = t.maxBlockSize;
	if (t.maxBlockSize < blockMax)
		blockMax = t.maxBlockSize;

	printf("blockMIN: %d blockMAX: %d t.maxBlockSize: %d\n", blockMin, blockMax, t.maxBlockSize);

	warmup(&t, warmup_num_iters);

	for (t.num = sizeMin; t.num <= sizeMax; t.num <<= 1) {
		for (t.gridSize = gridMin; t.gridSize <= gridMax; t.gridSize += 16*1024) {
			for (t.blockSize = blockMin; t.blockSize <= blockMax; t.blockSize += 64) {
				if (t.gridSize % t.blockSize != 0)
					continue;
				setup_buffers(&t, gen);
				result = run(&t, run_num_iters);
				post(&t, run_num_iters, result);
				if (result && csv != NULL)
					output(&t, csv, run_num_iters);
				cleanup(&t);
			}
		}
	}

	if (csv != NULL)
		fclose(csv);
}
uint64_t CPUBenchmark::calculateThreadSwitchTime(){
  uint64_t start;
  uint64_t end;

  pthread_t thread;
  warmup();
  // pipe(fd);
  pthread_create(&thread, NULL, foo, &end);

  start = rdtscStart();
  pthread_join(thread, NULL);
  // read(fd[0], (void*)&end, sizeof(uint64_t));

    return end - start;
}
double CPUBenchmark::getKernelThreadCreationTime() {
  double sum = 0;
  uint64_t start, end;
  pthread_t thread;

  warmup();
  for(int i = 0; i < TASK_OP_TIMES; i++) {
    start = rdtscStart();
    pthread_create(&thread, NULL, &threadStartRountine, NULL);
    // make the main process to wait new thread
    pthread_join(thread, NULL);
    end = rdtscEnd();
    sum += end - start;
  }

  return (double) sum / (double) TASK_OP_TIMES;
}
uint64_t CPUBenchmark::calculateProcessSwitchTime(int *fd){
  uint64_t start;
  uint64_t end;
  pid_t cpid;
  uint64_t result = 0;
  warmup();
  if ((cpid = fork()) != 0) {
    start = rdtscStart();
    wait(NULL);
    read(fd[0], (void*)&end, sizeof(uint64_t));
  }
  else {
    end = rdtscEnd();
    write(fd[1], (void*)&end, sizeof(uint64_t));
    exit(1);
  }
  if(end > start){
    result = end - start;
  }
  return (result);
}
Beispiel #10
0
int begin(void)
{
    const char *scheme = use_secure?"https":"http";
    char *space;
    static const char blanks[] = "          ";
    static const char stars[] = "**********************************";
    

    i_session = ne_session_create(scheme, i_hostname, i_port);
    CALL(init_session(i_session));
    ne_hook_pre_send(i_session, i_pre_send, "X-Prestan");


    space = ne_concat(i_path, "davtest/", NULL);
    ne_delete(i_session, space);
    if (ne_mkcol(i_session, space)) {
	t_context("Could not create new collection `%s' for tests: %s\n"
	  "Server must allow `MKCOL %s' for tests to proceed", 
	  space, ne_get_error(i_session), space);
	return FAILHARD;
    }
    free(i_path);
    i_path = space;    
    
    warmup();

    printf("\nStart Testing %s:\n\n",pget_option.URL) ;
    printf("\n%s%s\n", blanks, stars);
    printf("\n%s* Number of Requests\t\t%d\n", blanks, pget_option.requests);
    printf("\n%s* Number of Dead Properties\t%d\n", 
    			blanks, pget_option.numprops);
    printf("\n%s* Depth of Collection\t\t%d\n", blanks, pget_option.depth);
    printf("\n%s* Width of Collection\t\t%d\n", blanks, pget_option.width);
    printf("\n%s* Type of Methods\t\t%s\n", blanks, pget_option.methods);
    printf("\n%s%s\n", blanks, stars);
    printf("\n\n");
    
    return OK;
}
int main(int argc, char *argv[]){
    if(argc<=2){
        usage(argv[0]);
        exit(-1);
    } 

    int pagesize = getpagesize();
    CmdParam cmd;
    CacheStat stat;
    memset(&stat, 0, sizeof(stat));
    memset(&cmd, 0, sizeof(cmd));
    stat.isPrint = true; //default true
    parseArgs(argc, argv, &cmd);

    if(cmd.isDaemon)
        daemonMe();

    if(cmd.cmd_type == CLEAR){
        clear(cmd.path, cmd.file_type);
    }else if(cmd.cmd_type == STAT){
        normalStat(cmd.path, cmd.file_type, &stat);
        if(cmd.file_type == DIRECTORY){
            fprintf(stdout, "\nTotal Cache of Directory:%s size:%s cached:%s\n", 
                    cmd.path,
                    sizeFit(stat.pageCount*pagesize, buf1), 
                    sizeFit(stat.inCache*pagesize, buf2));
        }
    }else if(cmd.cmd_type == RSTAT){
        realStat(cmd.path, cmd.interval, cmd.file_type, cmd.isSuppress);
    }else if(cmd.cmd_type == LOCK){
        lock(cmd.path, cmd.file_type);
        select(0, NULL, NULL, NULL, NULL);
    }else if(cmd.cmd_type == WARM){
        warmup(cmd.path, cmd.file_type);
    }

    return 0;
}
double CPUBenchmark::getProcessCreationTime() {
  uint64_t sum = 0;
  uint64_t start, end;
  pid_t pid;

  warmup();
  for(int i = 0; i < TASK_OP_TIMES; i++) {
    start = rdtscStart();
    pid = fork();
    if(pid == 0) {
      // child process, just exit
      exit(1);
    }
    else {
      // parent process, wait child exit
      wait(NULL);
      end = rdtscEnd();
      sum += (end - start);
    }
  }

  return (double) sum / (double) TASK_OP_TIMES;
}
Beispiel #13
0
int main(int argc, char **argv)
{
    #define test_A(i,j) test_A[(size_t)(j)*N+(i)]
    #define test_A2(i,j) test_A2[(size_t)(j)*N+(i)]
    int N,NB,w,LDA,BB;
    size_t memsize; //bytes
    int iam, nprocs, mydevice;
    int ICTXT, nprow, npcol, myprow, mypcol;
    int i_one = 1, i_zero = 0, i_negone = -1;
    double d_one = 1.0, d_zero = 0.0, d_negone = -1.0;
    int IASEED = 100;
/*  printf("N=?\n");
    scanf("%ld",&N);
    printf("NB=?\n");
    scanf("%d", &NB);
    printf("width of Y panel=?\n");
    scanf("%ld",&w);
*/
    if(argc < 4){
        printf("invalid arguments N NB memsize(M)\n");
        exit(1);
    }
    N = atoi(argv[1]);
    NB = atoi(argv[2]);
    memsize = (size_t)atoi(argv[3])*1024*1024;
    BB = (N + NB - 1) / NB;
    w = memsize/sizeof(double)/BB/NB/NB - 1;
    assert(w > 0);
    LDA = N + 0; //padding

    int do_io = (N <= NSIZE);
    double llttime;
    double gflops;
    
    nprow = npcol = 1;
    blacs_pinfo_(&iam, &nprocs);
    blacs_get_(&i_negone, &i_zero, &ICTXT);
    blacs_gridinit_(&ICTXT, "R", &nprow, &npcol);
    blacs_gridinfo_(&ICTXT, &nprow, &npcol, &myprow, &mypcol);
    #ifdef USE_MIC
        #ifdef __INTEL_OFFLOAD
            printf("offload compilation enabled\ninitialize each MIC\n");
            offload_init(&iam, &mydevice);
            #pragma offload target(mic:0)
            {
                mkl_peak_mem_usage(MKL_PEAK_MEM_ENABLE);
            }
        #else
            if(isroot)
                printf("offload compilation not enabled\n");
            exit(0);
        #endif
    #else
        #ifdef USE_CUBLASV2
        {
            cublasStatus_t cuStatus;
            for(int r = 0; r < OOC_NTHREADS; r++){
                cuStatus = cublasCreate(&worker_handle[r]);
                assert(cuStatus == CUBLAS_STATUS_SUCCESS);
            }
        }
        #else
            cublasInit();
        #endif
    #endif

    double *test_A = (double*)memalign(64,(size_t)LDA*N*sizeof(double)); // for chol
#ifdef VERIFY
    double *test_A2 = (double*)memalign(64,(size_t)LDA*N*sizeof(double)); // for verify
#endif
    
    /*Initialize A */
    int i,j;
    printf("Initialize A ... "); fflush(stdout);
    llttime = MPI_Wtime();
    pdmatgen(&ICTXT, "Symm", "Diag", &N,
         &N, &NB, &NB,
         test_A, &LDA, &i_zero, &i_zero,
         &IASEED, &i_zero, &N, &i_zero, &N,
         &myprow, &mypcol, &nprow, &npcol); 
    llttime = MPI_Wtime() - llttime;
    printf("time %lf\n", llttime);
              
    /*print test_A*/
    if(do_io){
        printf("Original A=\n\n");
        matprint(test_A, N, LDA, 'A');
    }

    /*Use directed unblocked Cholesky factorization*/    
    /*
    t1 = clock();
    Test_dpotrf(test_A2,N);
    t2 = clock();
    printf ("time for unblocked Cholesky factorization on host %f \n",
        ((float) (t2 - t1)) / CLOCKS_PER_SEC);
    */
    
    /*print test_A*/
    /*
    if(do_io){
        printf("Unblocked result:\n\n");
        matprint(test_A2,N,'L');   
    }
    */ 

    /*Use tile algorithm*/
    Quark *quark = QUARK_New(OOC_NTHREADS);
    QUARK_DOT_DAG_Enable(quark, 0);
    #ifdef USE_MIC
//      mklmem(NB);
        printf("QUARK MIC affinity binding\n");
        QUARK_bind(quark);
        printf("offload warm up\n");
        warmup(quark);
    #endif
    QUARK_DOT_DAG_Enable(quark, quark_getenv_int("QUARK_DOT_DAG_ENABLE", 0));
    printf("LLT start %lf\n", MPI_Wtime());
    llttime = Cholesky(quark,test_A,N,NB,LDA,memsize);
    printf("LLT end %lf\n", MPI_Wtime());
    QUARK_Delete(quark);
    #ifdef USE_MIC
        offload_destroy();
    #else
        #ifdef USE_CUBLASV2
        {
            cublasStatus_t cuStatus;
            for(int r = 0; r < OOC_NTHREADS; r++){ 
                cuStatus = cublasDestroy(worker_handle[r]);
                assert(cuStatus == CUBLAS_STATUS_SUCCESS);
            }
        }
        #else
            cublasShutdown();
        #endif
    #endif

    gflops = (double) N;
    gflops = gflops/3.0 + 0.5;
    gflops = gflops*(double)(N)*(double)(N);
    gflops = gflops/llttime/1024.0/1024.0/1024.0;
    printf ("N NB memsize(MB) quark_pthreads time Gflops\n%d %d %lf %d %lf %lf\n",
        N, NB, (double)memsize/1024/1024, OOC_NTHREADS, llttime, gflops);
    #ifdef USE_MIC
        #pragma offload target(mic:0)
        {
            memsize = mkl_peak_mem_usage(MKL_PEAK_MEM_RESET);
        }
        printf("mkl_peak_mem_usage %lf MB\n", (double)memsize/1024.0/1024.0);
    #endif

    /*Update and print L*/             
    if(do_io){
        printf("L:\n\n");
        matprint(test_A,N,LDA,'L');
    }
#ifdef VERIFY
    printf("Verify... ");
    llttime = MPI_Wtime();
  /*
   * ------------------------
   * check difference betwen 
   * test_A and test_A2
   * ------------------------
   */
    /*
    {
    double maxerr = 0;
    double maxerr2 = 0;

    for (j = 0; j < N; j++)
      {
        for (i = j; i < N; i++)
          {
            double err = (test_A (i, j) - test_A2 (i, j));
            err = ABS (err);
            maxerr = MAX (err, maxerr);
            maxerr2 = maxerr2 + err * err;
          };
      };
    maxerr2 = sqrt (ABS (maxerr2));
    printf ("max difference between test_A and test_A2 %lf \n", maxerr);
    printf ("L2 difference between test_A and test_A2 %lf \n", maxerr2);
    };
    */

  /*
   * ------------------
   * over-write test_A2
   * ------------------
   */
   
    pdmatgen(&ICTXT, "Symm", "Diag", &N,
         &N, &NB, &NB,
         test_A2, &LDA, &i_zero,
         &i_zero, &IASEED, &i_zero, &N, &i_zero, &N,
         &myprow, &mypcol, &nprow, &npcol);

  /*
   * ---------------------------------------
   * after solve, test_A2 should be identity
   * ---------------------------------------
   */
  // test_A = chol(B) = L;
  // test_A2 = B
  // solve L*L'*X = B
  // if L is correct, X is identity */
     
    {
    int uplo = 'L';
    const char *uplo_char = ((uplo == (int) 'U')
                    || (uplo == (int) 'u')) ? "U" : "L";
    int info = 0;
    int nrhs = N;
    int LDA = N;
    int ldb = N;
    dpotrs(uplo_char, &N, &nrhs, test_A, &LDA, test_A2, &ldb, &info);
    assert (info == 0);
    }

    {
    double maxerr = 0;
    double maxerr2 = 0;

    for (j = 0; j < N; j++)
      {
        for (i = 0; i < N; i++)
          {
            double eyeij = (i == j) ? 1.0 : 0.0;
            double err = (test_A2 (i, j) - eyeij);
            err = ABS (err);
            maxerr = MAX (maxerr, err);
            maxerr2 = maxerr2 + err * err;
          };
      };

    maxerr2 = sqrt (ABS (maxerr2));
    printf("time %lf\n", MPI_Wtime() - llttime);
    printf ("max error %lf \n", maxerr);
    printf ("max L2 error %lf \n", maxerr2);
    }
#endif

    free(test_A);test_A=NULL;
#ifdef VERIFY
    free(test_A2);test_A2=NULL;
#endif
    blacs_gridexit_(&ICTXT);
    blacs_exit_(&i_zero);
    return 0;
    #undef test_A
    #undef test_A2
}
Beispiel #14
0
//#ifdef _MT
void runthreads(long sleep_cnt, int min_threads, int max_threads, int chperthread, int num_rounds)
{
  thread_data *de_area = new thread_data[max_threads] ;
  thread_data *pdea;
  int           nperthread ;
  int           sum_threads ;
  unsigned long  sum_allocs ;
  unsigned long  sum_frees ;
  double        duration ;
#ifdef __WIN32__
	_LARGE_INTEGER ticks_per_sec, start_cnt, end_cnt;
#else
	long ticks_per_sec ;
  long start_cnt, end_cnt ;
#endif
	_int64        ticks ;
  double        rate_1=0, rate_n ;
  double        reqd_space ;
  ULONG         used_space ;
  int           prevthreads ;
  int           i ;

  QueryPerformanceFrequency( &ticks_per_sec ) ;

  pdea = &de_area[0] ;
  memset(&de_area[0], 0, sizeof(thread_data)) ;

  prevthreads = 0 ;
  for(num_threads=min_threads; num_threads <= max_threads; num_threads++ )
    {

      warmup(&blkp[prevthreads*chperthread], (num_threads-prevthreads)*chperthread );

      nperthread = chperthread ;
      stopflag   = FALSE ;
		
      for(i=0; i< num_threads; i++){
	de_area[i].threadno    = i+1 ;
	de_area[i].NumBlocks   = num_rounds*nperthread;
	de_area[i].array       = &blkp[i*nperthread] ;
	de_area[i].blksize     = &blksize[i*nperthread] ;
	de_area[i].asize       = nperthread ;
	de_area[i].min_size    = min_size ;
	de_area[i].max_size    = max_size ;
	de_area[i].seed        = lran2(&rgen) ; ;
	de_area[i].finished    = 0 ;
	de_area[i].cAllocs     = 0 ;
	de_area[i].cFrees      = 0 ;
	de_area[i].cThreads    = 0 ;
	de_area[i].finished    = FALSE ;
	lran2_init(&de_area[i].rgen, de_area[i].seed) ;

#ifdef __WIN32__
	_beginthread((void (__cdecl*)(void *)) exercise_heap, 0, &de_area[i]) ;  
#else
	_beginthread(exercise_heap, 0, &de_area[i]) ;  
#endif

	}

      QueryPerformanceCounter( &start_cnt) ;

      // printf ("Sleeping for %ld seconds.\n", sleep_cnt);
      Sleep(sleep_cnt * 1000L) ;

      stopflag = TRUE ;

      for(i=0; i<num_threads; i++){
	while( !de_area[i].finished ){
#ifdef __WIN32__
		Sleep(1);
#elif defined(__SVR4)
		thr_yield();
#else
		sched_yield();
#endif
	}
      }


      QueryPerformanceCounter( &end_cnt) ;

      sum_frees = sum_allocs =0  ;
      sum_threads = 0 ;
      for(i=0;i< num_threads; i++){
	sum_allocs    += de_area[i].cAllocs ;
	sum_frees     += de_area[i].cFrees ;
	sum_threads   += de_area[i].cThreads ;
	de_area[i].cAllocs = de_area[i].cFrees = 0;
      }

 
#ifdef __WIN32__
      ticks = end_cnt.QuadPart - start_cnt.QuadPart ;
     duration = (double)ticks/ticks_per_sec.QuadPart ;
#else
      ticks = end_cnt - start_cnt ;
     duration = (double)ticks/ticks_per_sec ;
#endif

      for( i=0; i<num_threads; i++){
	if( !de_area[i].finished )
	  printf("Thread at %d not finished\n", i) ;
      }


      rate_n = sum_allocs/duration ;
      if( rate_1 == 0){
	rate_1 = rate_n ;
      }
		
      reqd_space = (0.5*(min_size+max_size)*num_threads*chperthread) ;
      // used_space = CountReservedSpace() - init_space;
      used_space = 0;
      
      printf ("Throughput = %8.0f operations per second.\n", sum_allocs / duration);

#if 0
      printf("%2d ", num_threads ) ;
      printf("%6.3f", duration  ) ;
      printf("%6.3f", rate_n/rate_1 ) ;
      printf("%8.0f", sum_allocs/duration ) ;
      printf(" %6.3f %.3f", (double)used_space/(1024*1024), used_space/reqd_space) ;
      printf("\n") ;
#endif

      Sleep(5000L) ; // wait 5 sec for old threads to die

      prevthreads = num_threads ;

      printf ("Done sleeping...\n");

    }
  delete [] de_area;
}
bool warmup(const char *path, enum FileType file_type){
    int fd = 0;
    DIR *dir = NULL;
    int time_used = 0;
    char file_name[256];
    struct stat st;
    struct timeval begin, end;
    struct dirent *dp = NULL;

    if(stat(path, &st)<0){
        goto ERROR;
    }

    gettimeofday(&begin, NULL);
    if(file_type == REGFILE){
        fd = open(path, O_RDONLY);
        if(fd<0){
            goto ERROR;
        }

        if(posix_fadvise(fd, 0, st.st_size, POSIX_FADV_WILLNEED) != 0){
            goto ERROR;
        }
    }else if(file_type == DIRECTORY){
        if((dir = opendir(path)) == NULL){
            goto ERROR;
        }

        while((dp = readdir(dir)) != NULL){
            if(dp->d_name[0] != '.'){
                memset(file_name, 0, sizeof(path));
                strcat(file_name, path);
                strcat(file_name, "/");
                strcat(file_name, dp->d_name);
                if(dp->d_type == DT_REG){
                    warmup(file_name, REGFILE);
                }else if(dp->d_type == DT_DIR){
                    warmup(file_name, DIRECTORY);
                }else{
                    fprintf(stdout, "%s:%c type unsupported!\n", dp->d_name, dp->d_type);
                }
            }
        }
    }

    gettimeofday(&end, NULL);
    time_used = getUsedTime(&begin, &end);

    if(file_type == REGFILE){
        fprintf(stdout, "Warmup File:%s TimeUsed:%d ms\n", path, time_used);
        close(fd);
    }else if(file_type == DIRECTORY){
        fprintf(stdout, "Warmup Dir:%s TimeUsed:%d ms\n", path, time_used);
        closedir(dir);
    }
    return true;

ERROR:
    fprintf(stderr, "File:%s %s\n", path, strerror(errno));
    if(fd)  close(fd);
    if(dir) closedir(dir);
    return false;
}
void CPUBenchmark::getProcedureOverhead(vector<double> &result){
    uint64_t totalTime = 0;
    uint64_t start, end;

    //0 argument
    warmup();
    for (int i = 0; i < TIMES; i++) {
      start = rdtscStart();
      fun_0();
      end = rdtscEnd();
      totalTime += (end - start);
    }
    result[0] = (double)totalTime / (double)TIMES;

    //1 argument
    warmup();
    totalTime = 0;
    for (int i = 0; i < TIMES; i++) {
      start = rdtscStart();
      fun_1(1);
      end = rdtscEnd();
      totalTime += (end - start);
    }
    result[1] = (double)totalTime / (double)TIMES;

     //2 argument
    warmup();
    totalTime = 0;
    for (int i = 0; i < TIMES; i++) {
      start = rdtscStart();
      fun_2(1, 2);
      end = rdtscEnd();
      totalTime += (end - start);
    }
    result[2] = (double)totalTime / (double)TIMES;

     //3 argument
    warmup();
    totalTime = 0;
    for (int i = 0; i < TIMES; i++) {
      start = rdtscStart();
      fun_3(1, 2, 3);
      end = rdtscEnd();
      totalTime += (end - start);
    }
    result[3] = (double)totalTime / (double)TIMES;

     //4 argument
    warmup();
    totalTime = 0;
    for (int i = 0; i < TIMES; i++) {
      start = rdtscStart();
      fun_4(1, 2, 3, 4);
      end = rdtscEnd();
      totalTime += (end - start);
    }
    result[4] = (double)totalTime / (double)TIMES;

     //5 argument
    warmup();
    totalTime = 0;
    for (int i = 0; i < TIMES; i++) {
      start = rdtscStart();
      fun_5(1, 2, 3, 4, 5);
      end = rdtscEnd();
      totalTime += (end - start);
    }
    result[5] = (double)totalTime / (double)TIMES;

     //6 argument
    warmup();
    totalTime = 0;
    for (int i = 0; i < TIMES; i++) {
      start = rdtscStart();
      fun_6(1, 2, 3, 4, 5, 6);
      end = rdtscEnd();
      totalTime += (end - start);
    }
    result[6] = (double)totalTime / (double)TIMES;

     //7 argument
    warmup();
    totalTime = 0;
    for (int i = 0; i < TIMES; i++) {
      start = rdtscStart();
      fun_7(1, 2, 3, 4, 5, 6, 7);
      end = rdtscEnd();
      totalTime += (end - start);
    }
    result[7] = (double)totalTime / (double)TIMES;
}
Beispiel #17
0
void runthreads(long sleep_cnt, int min_threads, int max_threads, int chperthread, int num_rounds)
{
  thread_data  de_area[MAX_THREADS] ;
  int           nperthread ;
  int           sum_threads ;
  int           sum_allocs ;
  int           sum_frees ;
  double        duration ;
  long ticks_per_sec ;
  long start_cnt, end_cnt ;
  _int64        ticks ;
  double        rate_1=0, rate_n ;
  double        reqd_space ;
  ptrdiff_t     used_space ;
  int           prevthreads ;
  int           i ;

  QueryPerformanceFrequency( &ticks_per_sec ) ;

  memset(&de_area[0], 0, sizeof(thread_data)) ;

  prevthreads = 0 ;
  for(num_threads=min_threads; num_threads <= max_threads; num_threads++ )
    {

      warmup(&blkp[prevthreads*chperthread], (num_threads-prevthreads)*chperthread );

      nperthread = chperthread ;
      stopflag   = FALSE ;
		
      for(i=0; i< num_threads; i++){
	de_area[i].threadno    = i+1 ;
	de_area[i].NumBlocks   = num_rounds*nperthread;
	de_area[i].array       = &blkp[i*nperthread] ;
	de_area[i].blksize     = &blksize[i*nperthread] ;
	de_area[i].asize       = nperthread ;
	de_area[i].min_size    = min_size ;
	de_area[i].max_size    = max_size ;
	de_area[i].seed        = lran2(&rgen) ; ;
	de_area[i].finished    = 0 ;
	de_area[i].cAllocs     = 0 ;
	de_area[i].cFrees      = 0 ;
	de_area[i].cThreads    = 0 ;
	de_area[i].finished    = FALSE ;
	lran2_init(&de_area[i].rgen, de_area[i].seed) ;

	_beginthread(exercise_heap, 0, &de_area[i]) ;  

      }

      QueryPerformanceCounter( &start_cnt) ;

      //printf ("Sleeping for %ld seconds.\n", sleep_cnt);
      Sleep(sleep_cnt * 1000L) ;
      stopflag = TRUE ;

      for(i=0; i<num_threads; i++){
	while( !de_area[i].finished ){
		sched_yield();
	}
      }


      QueryPerformanceCounter( &end_cnt) ;

      sum_frees = sum_allocs =0  ;
      sum_threads = 0 ;
      for(i=0;i< num_threads; i++){
	sum_allocs    += de_area[i].cAllocs ;
	sum_frees     += de_area[i].cFrees ;
	sum_threads   += de_area[i].cThreads ;
	printf("area %d: %d allocs, %d threads\n",i,de_area[i].cAllocs,de_area[i].cThreads);
	de_area[i].cAllocs = de_area[i].cFrees = 0;
      }

 
      ticks = end_cnt - start_cnt ;
     duration = (double)ticks/ticks_per_sec ;

      for( i=0; i<num_threads; i++){
	if( !de_area[i].finished )
	  printf("Thread at %d not finished\n", i) ;
      }


      rate_n = sum_allocs/duration ;
      if( rate_1 == 0){
	rate_1 = rate_n ;
      }
		
      reqd_space = (0.5*(min_size+max_size)*num_threads*chperthread) ;
      used_space = mem_usage();
      
      printf ("Throughput = %8.0f operations per second.\n", sum_allocs / duration);
      printf ("Memory used = %ld bytes, required %.0lf, ratio %lf\n",used_space,reqd_space,used_space/reqd_space);

#if 0
      printf("%2d ", num_threads ) ;
      printf("%6.3f", duration  ) ;
      printf("%6.3f", rate_n/rate_1 ) ;
      printf("%8.0f", sum_allocs/duration ) ;
      printf(" %6.3f %.3f", (double)used_space/(1024*1024), used_space/reqd_space) ;
      printf("\n") ;
#endif

      Sleep(5000L) ; // wait 5 sec for old threads to die

      prevthreads = num_threads ;

      printf ("Done sleeping...\n");

    }
}