예제 #1
0
파일: dgemm_3.c 프로젝트: yoyz/mpi
int bench_stream_triad()
{
    double *A, *B, *C;
    double t;
    int64_t m, n, k, i, j;
    m = SIZE, k = SIZE, n = SIZE;
    double scalar=3.14;
    A = (double *)mkl_malloc( m*k*sizeof( double ), 64 );
    B = (double *)mkl_malloc( k*n*sizeof( double ), 64 );
    C = (double *)mkl_malloc( m*n*sizeof( double ), 64 );

#pragma omp parallel for 
    for (i = 0; i < (m*k); i++) {
        A[i] = (double)(i+1);
    }
#pragma omp parallel for 
    for (i = 0; i < (k*n); i++) {
        B[i] = (double)(-i-1);
    }

#pragma omp parallel for 
    for (i = 0; i < (m*n); i++) {
        C[i] = 0.0;
    }

    if (A == NULL || B == NULL || C == NULL) {
      printf( "\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");
      mkl_free(A);
      mkl_free(B);
      mkl_free(C);
      return 1;
    }
    t=stoptime();
    for (i=0;i<NTIME;i++)
#pragma omp parallel for    
      for (j=0; j<(m*k); j++)
	A[j] = B[j]+scalar*C[j];
    t=stoptime()-t;
    printf("GB/s         : %f\n",(((((m*k)*3)*8)*NTIME)/t)*1E-9);
    DPRINTF("\n Deallocating memory \n\n");
    mkl_free(A);
    mkl_free(B);
    mkl_free(C);
    return 0;
}
예제 #2
0
파일: mydgemm_1.c 프로젝트: yoyz/dgemm
int main()
{
  double t;
  long   i,j,k;
  double ** mul1;
  double ** mul2;
  double ** res1;

  mul1=malloc(sizeof(double**)*N);
  mul2=malloc(sizeof(double**)*N);
  res1=malloc(sizeof(double**)*N);



  for (i = 0; i < N; ++i)
    {
      mul1[i]=(double*)malloc(sizeof(double)*N);
      mul2[i]=(double*)malloc(sizeof(double)*N);
      res1[i]=(double*)malloc(sizeof(double)*N);
    }

  t=stoptime();

  for (i = 0; i < N; ++i)
    for (j = 0; j < N; ++j)
      for (k = 0; k < N; ++k)
        res1[i][j] += mul1[i][k] * mul2[k][j];

  t=stoptime()-t;

  printf("calculation time : %f\n",t);
  //printf("gflops/s         : %f\n",((2.0*m*n*k)*1E-9)/t);
  printf("gflops/s         : %f\n",((2.0*N*N*N)*1E-9)/t);


  printf("res1[i][j]:%f\n",res1[i-1][j-1]);
}
예제 #3
0
static void stop_catcher(int signo UNUSED)
{
    sigset_t sigset, osigset;

    stoptime();
    noraw();
    echo();
    move(nlines - 1, 0);
    refresh();

    signal(SIGTSTP, SIG_DFL);
    sigemptyset(&sigset);
    sigaddset(&sigset, SIGTSTP);
    sigprocmask(SIG_UNBLOCK, &sigset, &osigset);
    kill(0, SIGTSTP);
    sigprocmask(SIG_SETMASK, &osigset, (sigset_t *) 0);
    signal(SIGTSTP, stop_catcher);
}
예제 #4
0
void CDlgHistoryLogUser::OnBnClickedButtonHlQuery()
{
	UpdateData(true);

	int i = 0, j = 0;
	char s_starttime[128]={0};
	char s_stoptime[128]={0};

	CTime starttime(m_StartDay.GetYear(), m_StartDay.GetMonth(), m_StartDay.GetDay(),
		m_StartTime.GetHour(), m_StartTime.GetMinute(), m_StartTime.GetSecond());
	CTime stoptime(m_StopDay.GetYear(), m_StopDay.GetMonth(), m_StopDay.GetDay(),
		m_StopTime.GetHour(), m_StopTime.GetMinute(), m_StopTime.GetSecond());

	if (stoptime <= starttime)
	{
		MessageBox("时间选择错误:开始时间大于结束时间","视频监视");
		return;
	}

	sprintf(s_starttime, "%04d-%02d-%02d %02d:%02d:%02d",
		m_StartDay.GetYear(), m_StartDay.GetMonth(), m_StartDay.GetDay(),
		m_StartTime.GetHour(), m_StartTime.GetMinute(), m_StartTime.GetSecond());
	sprintf(s_stoptime, "%04d-%02d-%02d %02d:%02d:%02d",
		m_StopDay.GetYear(), m_StopDay.GetMonth(), m_StopDay.GetDay(),
		m_StopTime.GetHour(), m_StopTime.GetMinute(), m_StopTime.GetSecond());

	//鼠标为等待状态
	AfxGetApp()->DoWaitCursor(1); 

	m_ListCtrl_UserLog.DeleteAllItems();

	int nSelectIndex = m_ComboType.GetCurSel();

	if (nSelectIndex == 0)    //管理端操作
		SearchAndSetHistoryListInfo(s_starttime,s_stoptime,(char *)(LPCTSTR)m_strNodeName);
	else if(nSelectIndex == 1)    //客户端操作
		SearchAndSetHistoryListInfo2(s_starttime,s_stoptime,(char *)(LPCTSTR)m_strNodeName);
	else if(nSelectIndex == 2)    //辅助系统管理端操作
		SearchAndSetHistoryListInfo3(s_starttime,s_stoptime,(char *)(LPCTSTR)m_strNodeName);
	//恢复鼠标为正常状态
	AfxGetApp()->DoWaitCursor(0);
}
예제 #5
0
파일: fix_test.c 프로젝트: cgcym1234/heaven
void test2()
{
    int i;
    long long int cost = 0;
    struct timeval tv;
    starttime(&tv);

    for(i = 0; i < TIMES; i++)
    {
        p[i] = (test *)malloc(sizeof(test));

    }
    //cost = stoptime(tv);
    for(i = 0; i < TIMES; i++)
    {
        free(p[i]);
    }
    cost = stoptime(tv);
    printf("%lld\n", cost/1000);
}
예제 #6
0
int main()
{
    fp = fopen("log.txt","w+");

    long long int cost = 0;
    struct timeval tv;
    starttime(&tv);

    thread_pool_t *pool = threadpool_create(2, 4, 10000);
    int i;
    for(i = 0; i < 1000; i++)
    {
        dispatch(pool, test_fun, (void *)i);
    }
    //sleep(5);
    dispatch(pool, test_fun, (void *)i, EMG_PRI);
    threadpool_destroy(pool, 1);

    cost = stoptime(tv);
    printf("%lld\n", cost/1000);

    return 0;
}
예제 #7
0
//**************************************************************************
// ApptDialog :: command - Process Commands                               *
//**************************************************************************
Boolean ApptDialog :: command(ICommandEvent& cmdevt)
{
  Environment *ev = somGetGlobalEnvironment();
  ITime starttime(fldStarthr.value(), fldStartmin.value());
  ITime stoptime(fldStophr.value(), fldStopmin.value());

  switch(cmdevt.commandId()) {
    case DID_OK:
      switch (apptType) {
         case MEETING:
           apptObject->_set_start(ev,starttime.asSeconds());
           apptObject->_set_end(ev,stoptime.asSeconds());
           apptObject->_set_subject(ev,mleSubj.text());
           ((Meeting *)apptObject)->_set_location(ev,fldLoc.text());
           break;
         case CCALL:
           apptObject->_set_start(ev,starttime.asSeconds());
           apptObject->_set_end(ev,stoptime.asSeconds());
           apptObject->_set_subject(ev,mleSubj.text());
           ((ConferenceCall *)apptObject)->_set_phoneNumber(ev,fldPhone.text());
           break;
         default:
           break;
      } /* End switch*/
      dismiss(DID_OK);
      return(true);
      break;

    case DID_CANCEL:
      dismiss(DID_CANCEL);
      return(true);
      break;
  }/* end switch */

  return(false);  //Allow Default Processing to occur
}
예제 #8
0
파일: fix_test.c 프로젝트: cgcym1234/heaven
void test1()
{
    fix_mpool_t *pool = fmem_create(TIMES, sizeof(test));
    int i;
    long long int cost = 0;
    struct timeval tv;
    starttime(&tv);
    //pool_t *pool = mem_init(TIMES, sizeof(test));

    for(i = 0; i < TIMES; i++)
    {
        p[i] = (test *)fmem_alloc(pool);
        //memset(p[i], 0, sizeof(test));
    }
    for(i = 0; i < TIMES; i++)
    {
        fmem_free(pool, p[i]);
    }
    // mem_info(pool);
    cost = stoptime(tv);
    fmem_destroy(pool);
    printf("%lld\n", cost/1000);

}
예제 #9
0
// Main input routine
// - doesn't accept words longer than MAXWORDLEN or containing caps
char *boggle_getline(char *q)
{
    int ch, done;
    char *p;
    int row, col;

    p = q;
    done = 0;
    while (!done) {
	ch = timerch();
	switch (ch) {
	    case '\n':
	    case '\r':
	    case ' ':
		done = 1;
		break;
	    case '\033':
		findword();
		break;
	    case '\177':      // <del>
	    case '\010':      // <bs>
		if (p == q)
		    break;
		p--;
		getyx(stdscr, row, col);
		move(row, col - 1);
		clrtoeol();
		refresh();
		break;
	    case '\025':      // <^u>
	    case '\027':      // <^w>
		if (p == q)
		    break;
		getyx(stdscr, row, col);
		move(row, col - (int) (p - q));
		p = q;
		clrtoeol();
		refresh();
		break;
#ifdef SIGTSTP
	    case '\032':      // <^z>
		stop_catcher(0);
		break;
#endif
	    case '\023':      // <^s>
		stoptime();
		printw("<PAUSE>");
		refresh();
		while ((ch = inputch()) != '\021' && ch != '\023');
		move(crow, ccol);
		clrtoeol();
		refresh();
		starttime();
		break;
	    case '\003':      // <^c>
		cleanup();
		exit(0);
	     /*NOTREACHED*/ case '\004':	// <^d>
		done = 1;
		ch = EOF;
		break;
	    case '\014':      // <^l>
	    case '\022':      // <^r>
		redraw();
		break;
	    case '?':
		stoptime();
		if (help() < 0)
		    showstr("Can't open help file", 1);
		starttime();
		break;
	    default:
		if (!islower(ch))
		    break;
		if ((int) (p - q) == MAXWORDLEN) {
		    p = q;
		    badword();
		    break;
		}
		*p++ = ch;
		addch(ch);
		refresh();
		break;
	}
    }
    *p = '\0';
    if (ch == EOF)
	return (char *) NULL;
    return q;
}
예제 #10
0
int main(int argc, char** argv) {

   // Set up the data on the host	
   clock_t start, start0;
   start0 = clock();
   start = clock();
   // Rows and columns in the input image
   int imageHeight;
   int imageWidth;

   const char* inputFile = "input.bmp";
   const char* outputFile = "output.bmp";



   // Homegrown function to read a BMP from file
   float* inputImage = readImage(inputFile, &imageWidth, 
      &imageHeight);

   // Size of the input and output images on the host
   int dataSize = imageHeight*imageWidth*sizeof(float);

   // Pad the number of columns 
#ifdef NON_OPTIMIZED
   int deviceWidth = imageWidth;
#else  // READ_ALIGNED || READ4
   int deviceWidth = roundUp(imageWidth, WGX);
#endif
   int deviceHeight = imageHeight;
   // Size of the input and output images on the device
   int deviceDataSize = imageHeight*deviceWidth*sizeof(float);

   // Output image on the host
   float* outputImage = NULL;
   outputImage = (float*)malloc(dataSize);
   int i, j;
   for(i = 0; i < imageHeight; i++) {
       for(j = 0; j < imageWidth; j++) {
           outputImage[i*imageWidth+j] = 0;
       }
   }

   // 45 degree motion blur
   float filter[49] = 
      {0,      0,      0,      0,      0, 0.0145,      0,
       0,      0,      0,      0, 0.0376, 0.1283, 0.0145,
       0,      0,      0, 0.0376, 0.1283, 0.0376,      0,
       0,      0, 0.0376, 0.1283, 0.0376,      0,      0,
       0, 0.0376, 0.1283, 0.0376,      0,      0,      0,
  0.0145, 0.1283, 0.0376,      0,      0,      0,      0,
       0, 0.0145,      0,      0,      0,      0,      0};
 
   int filterWidth = 7;
   int paddingPixels = (int)(filterWidth/2) * 2; 
   stoptime(start, "set up input, output.");
   start = clock();
   // Set up the OpenCL environment

   // Discovery platform
   cl_platform_id platform;
   clGetPlatformIDs(1, &platform, NULL);

   // Discover device
   cl_device_id device;
   clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device,
      NULL);

    size_t time_res;
    clGetDeviceInfo(device, CL_DEVICE_PROFILING_TIMER_RESOLUTION,
            sizeof(time_res), &time_res, NULL);
    printf("Device profiling timer resolution: %zu ns.\n", time_res);

   // Create context
   cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 
       (cl_context_properties)(platform), 0};
   cl_context context; 
   context = clCreateContext(props, 1, &device, NULL, NULL, 
      NULL);

   // Create command queue
   cl_ulong time_start, time_end, exec_time;
   cl_event timing_event;
   cl_command_queue queue;
   queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL);

   // Create memory buffers
   cl_mem d_inputImage;
   cl_mem d_outputImage;
   cl_mem d_filter;
   d_inputImage = clCreateBuffer(context, CL_MEM_READ_ONLY, 
       deviceDataSize, NULL, NULL);
   d_outputImage = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
       deviceDataSize, NULL, NULL);
   d_filter = clCreateBuffer(context, CL_MEM_READ_ONLY, 
       49*sizeof(float),NULL, NULL);
   
   // Write input data to the device
#ifdef NON_OPTIMIZED
   clEnqueueWriteBuffer(queue, d_inputImage, CL_TRUE, 0, deviceDataSize,
       inputImage, 0, NULL, NULL);
#else // READ_ALIGNED || READ4
   size_t buffer_origin[3] = {0,0,0};
   size_t host_origin[3] = {0,0,0};
   size_t region[3] = {deviceWidth*sizeof(float), 
      imageHeight, 1};
   clEnqueueWriteBufferRect(queue, d_inputImage, CL_TRUE, 
      buffer_origin, host_origin, region, 
      deviceWidth*sizeof(float), 0, imageWidth*sizeof(float), 0,
      inputImage, 0, NULL, NULL);
#endif
	
   // Write the filter to the device
   clEnqueueWriteBuffer(queue, d_filter, CL_TRUE, 0, 
      49*sizeof(float), filter, 0, NULL, NULL);
	
   // Read in the program from file
   char* source = readSource("convolution.cl");

   // Create the program
   cl_program program;
	
   // Create and compile the program
   program = clCreateProgramWithSource(context, 1, 
       (const char**)&source, NULL, NULL);
   cl_int build_status;
   build_status = clBuildProgram(program, 1, &device, NULL, NULL,
      NULL);
      
   // Create the kernel
   cl_kernel kernel;
#if defined NON_OPTIMIZED || defined READ_ALIGNED
   // Only the host-side code differs for the aligned reads
   kernel = clCreateKernel(program, "convolution", NULL);
#else // READ4
   kernel = clCreateKernel(program, "convolution_read4", NULL);
#endif
	
   // Selected work group size is 16x16
   int wgWidth = WGX;
   int wgHeight = WGY;

   // When computing the total number of work items, the 
   // padding work items do not need to be considered
   int totalWorkItemsX = roundUp(imageWidth-paddingPixels, 
      wgWidth);
   int totalWorkItemsY = roundUp(imageHeight-paddingPixels, 
      wgHeight);

   // Size of a work group
   size_t localSize[2] = {wgWidth, wgHeight};
   // Size of the NDRange
   size_t globalSize[2] = {totalWorkItemsX, totalWorkItemsY};

   // The amount of local data that is cached is the size of the
   // work groups plus the padding pixels
#if defined NON_OPTIMIZED || defined READ_ALIGNED
   int localWidth = localSize[0] + paddingPixels;
#else // READ4
   // Round the local width up to 4 for the read4 kernel
   int localWidth = roundUp(localSize[0]+paddingPixels, 4);
#endif
   int localHeight = localSize[1] + paddingPixels;

   // Compute the size of local memory (needed for dynamic 
   // allocation)
   size_t localMemSize = (localWidth * localHeight * 
      sizeof(float));

   // Set the kernel arguments
   clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage);
   clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage);
   clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_filter);
   clSetKernelArg(kernel, 3, sizeof(int), &deviceHeight);
   clSetKernelArg(kernel, 4, sizeof(int), &deviceWidth); 
   clSetKernelArg(kernel, 5, sizeof(int), &filterWidth);
   clSetKernelArg(kernel, 6, localMemSize, NULL);
   clSetKernelArg(kernel, 7, sizeof(int), &localHeight); 
   clSetKernelArg(kernel, 8, sizeof(int), &localWidth);

   stoptime(start, "set up kernel");
   start = clock();
   // Execute the kernel
   clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, 
      localSize, 0, NULL, &timing_event);

   // Wait for kernel to complete
   clFinish(queue);
   stoptime(start, "run kernel");
   clGetEventProfilingInfo(timing_event, CL_PROFILING_COMMAND_START,
           sizeof(time_start), &time_start, NULL);
   clGetEventProfilingInfo(timing_event, CL_PROFILING_COMMAND_END,
           sizeof(time_end), &time_end, NULL);
   exec_time = time_end-time_start;
   printf("Profile execution time = %.3lf sec.\n", (double) exec_time/1000000000);

   // Read back the output image
#ifdef NON_OPTIMIZED
   clEnqueueReadBuffer(queue, d_outputImage, CL_TRUE, 0, 
      deviceDataSize, outputImage, 0, NULL, NULL);
#else // READ_ALIGNED || READ4
   // Begin reading output from (3,3) on the device 
   // (for 7x7 filter with radius 3)
   buffer_origin[0] = 3*sizeof(float);
   buffer_origin[1] = 3;
   buffer_origin[2] = 0;

   // Read data into (3,3) on the host
   host_origin[0] = 3*sizeof(float);
   host_origin[1] = 3;
   host_origin[2] = 0;
	
   // Region is image size minus padding pixels
   region[0] = (imageWidth-paddingPixels)*sizeof(float);
   region[1] = (imageHeight-paddingPixels);
   region[2] = 1;
	
	// Perform the read
   clEnqueueReadBufferRect(queue, d_outputImage, CL_TRUE, 
      buffer_origin, host_origin, region, 
      deviceWidth*sizeof(float), 0, imageWidth*sizeof(float), 0, 
      outputImage, 0, NULL, NULL);
#endif
  
   // Homegrown function to write the image to file
   storeImage(outputImage, outputFile, imageHeight, 
      imageWidth, inputFile);
   
   // Free OpenCL objects
   clReleaseMemObject(d_inputImage);
   clReleaseMemObject(d_outputImage);
   clReleaseMemObject(d_filter);
   clReleaseKernel(kernel);
   clReleaseProgram(program);
   clReleaseCommandQueue(queue);
   clReleaseContext(context);

   return 0;
}
예제 #11
0
파일: dgemm_3.c 프로젝트: yoyz/mpi
int bench_dgemm()
{
    double *A, *B, *C;
    int m, n, k, i, j;
    double alpha, beta;
    double t;

    m = SIZE, k = SIZE, n = SIZE;
    DPRINTF(" Initializing data for matrix multiplication C=A*B for matrix \n"
            " A(%ix%i) and matrix B(%ix%i)\n\n", m, k, k, n);
    alpha = 1.0; beta = 0.0;

    DPRINTF(" Allocating memory for matrices aligned on 64-byte boundary for better \n"
            " performance \n\n");
    A = (double *)mkl_malloc( m*k*sizeof( double ), 64 );
    B = (double *)mkl_malloc( k*n*sizeof( double ), 64 );
    C = (double *)mkl_malloc( m*n*sizeof( double ), 64 );
    if (A == NULL || B == NULL || C == NULL) {
      printf( "\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");
      mkl_free(A);
      mkl_free(B);
      mkl_free(C);
      return 1;
    }

    DPRINTF(" Intializing matrix data \n\n");
#pragma omp parallel for 
    for (i = 0; i < (m*k); i++) {
        A[i] = (double)(i+1);
    }
#pragma omp parallel for 
    for (i = 0; i < (k*n); i++) {
        B[i] = (double)(-i-1);
    }

#pragma omp parallel for 
    for (i = 0; i < (m*n); i++) {
        C[i] = 0.0;
    }

    DPRINTF(" Computing matrix product using Intel(R) MKL dgemm function via CBLAS interface \n\n");
    t=stoptime();
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 
                m, n, k, alpha, A, k, B, n, beta, C, n);
    t=stoptime()-t;
    printf("calculation time : %f\n",t);
    printf("gflops/s         : %f\n",((2.0*m*n*k)*1E-9)/t);

    DPRINTF("\n Computations completed.\n\n");

    DPRINTF(" Top left corner of matrix A: \n");
    for (i=0; i<min(m,6); i++) {
      for (j=0; j<min(k,6); j++) {
        DPRINTF("%12.0f", A[j+i*k]);
      }
      DPRINTF("\n");
    }

    DPRINTF("\n Top left corner of matrix B: \n");
    for (i=0; i<min(k,6); i++) {
      for (j=0; j<min(n,6); j++) {
        DPRINTF("%12.0f", B[j+i*n]);
      }
      DPRINTF("\n");
    }
    
    DPRINTF("\n Top left corner of matrix C: \n");
    for (i=0; i<min(m,6); i++) {
      for (j=0; j<min(n,6); j++) {
        DPRINTF("%12.5G", C[j+i*n]);
      }
      DPRINTF("\n");
    }

    DPRINTF("\n Deallocating memory \n\n");
    mkl_free(A);
    mkl_free(B);
    mkl_free(C);

    DPRINTF(" Example completed. \n\n");
    return 0;
}
예제 #12
0
파일: gflops.c 프로젝트: yoyz/mpi
int main(int argc, char** argv)
{
  double t;
  double x;
  double i=0;
  double iter=1;
  int size, rank;
  char hostname[1024];

  if (argc > 1) { iter = atoi(argv[1]); }

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);


  hostname[1023] = '\0';
  gethostname(hostname, 1023);
  t=stoptime();  

#ifdef __SSE__
  printf("calling addmul_sse\n");
  for (i=0;i<iter;i++)
    x+=addmul_sse();
#endif

#ifdef __AVX__
  printf("calling addmul_avx\n");
  printf("AVX\n");
  for (i=0;i<iter;i++)
    x+=addmul_avx();
#endif


  t=stoptime()-t;

  // Here we launch max1*max2*iteration 
  // 16 assembly instruction on 16 register
  // storing 2 data on SSE
  // storing 4 data on AVX
#ifdef __SSE__
  printf("rank: %.4d\thost: %s\tgflops:\t %.3f s, %.3f Gflops, rank: %.4d res=%f\n",
         rank,
         hostname,
         t,
         (double)max1*max2*iter*16*2/t/1e9,
         rank,
         x);
#endif
#ifdef __AVX__
  printf("rank: %.4d\thost: %s\tgflops:\t %.3f s, %.3f Gflops, rank: %.4d res=%f\n",
         rank,
         hostname,
         t,
         (double)max1*max2*iter*16*4/t/1e9,
         rank,
         x);
#endif

  MPI_Finalize();

  return 0;
}