int bench_stream_triad() { double *A, *B, *C; double t; int64_t m, n, k, i, j; m = SIZE, k = SIZE, n = SIZE; double scalar=3.14; A = (double *)mkl_malloc( m*k*sizeof( double ), 64 ); B = (double *)mkl_malloc( k*n*sizeof( double ), 64 ); C = (double *)mkl_malloc( m*n*sizeof( double ), 64 ); #pragma omp parallel for for (i = 0; i < (m*k); i++) { A[i] = (double)(i+1); } #pragma omp parallel for for (i = 0; i < (k*n); i++) { B[i] = (double)(-i-1); } #pragma omp parallel for for (i = 0; i < (m*n); i++) { C[i] = 0.0; } if (A == NULL || B == NULL || C == NULL) { printf( "\n ERROR: Can't allocate memory for matrices. Aborting... \n\n"); mkl_free(A); mkl_free(B); mkl_free(C); return 1; } t=stoptime(); for (i=0;i<NTIME;i++) #pragma omp parallel for for (j=0; j<(m*k); j++) A[j] = B[j]+scalar*C[j]; t=stoptime()-t; printf("GB/s : %f\n",(((((m*k)*3)*8)*NTIME)/t)*1E-9); DPRINTF("\n Deallocating memory \n\n"); mkl_free(A); mkl_free(B); mkl_free(C); return 0; }
int main() { double t; long i,j,k; double ** mul1; double ** mul2; double ** res1; mul1=malloc(sizeof(double**)*N); mul2=malloc(sizeof(double**)*N); res1=malloc(sizeof(double**)*N); for (i = 0; i < N; ++i) { mul1[i]=(double*)malloc(sizeof(double)*N); mul2[i]=(double*)malloc(sizeof(double)*N); res1[i]=(double*)malloc(sizeof(double)*N); } t=stoptime(); for (i = 0; i < N; ++i) for (j = 0; j < N; ++j) for (k = 0; k < N; ++k) res1[i][j] += mul1[i][k] * mul2[k][j]; t=stoptime()-t; printf("calculation time : %f\n",t); //printf("gflops/s : %f\n",((2.0*m*n*k)*1E-9)/t); printf("gflops/s : %f\n",((2.0*N*N*N)*1E-9)/t); printf("res1[i][j]:%f\n",res1[i-1][j-1]); }
static void stop_catcher(int signo UNUSED) { sigset_t sigset, osigset; stoptime(); noraw(); echo(); move(nlines - 1, 0); refresh(); signal(SIGTSTP, SIG_DFL); sigemptyset(&sigset); sigaddset(&sigset, SIGTSTP); sigprocmask(SIG_UNBLOCK, &sigset, &osigset); kill(0, SIGTSTP); sigprocmask(SIG_SETMASK, &osigset, (sigset_t *) 0); signal(SIGTSTP, stop_catcher); }
void CDlgHistoryLogUser::OnBnClickedButtonHlQuery() { UpdateData(true); int i = 0, j = 0; char s_starttime[128]={0}; char s_stoptime[128]={0}; CTime starttime(m_StartDay.GetYear(), m_StartDay.GetMonth(), m_StartDay.GetDay(), m_StartTime.GetHour(), m_StartTime.GetMinute(), m_StartTime.GetSecond()); CTime stoptime(m_StopDay.GetYear(), m_StopDay.GetMonth(), m_StopDay.GetDay(), m_StopTime.GetHour(), m_StopTime.GetMinute(), m_StopTime.GetSecond()); if (stoptime <= starttime) { MessageBox("时间选择错误:开始时间大于结束时间","视频监视"); return; } sprintf(s_starttime, "%04d-%02d-%02d %02d:%02d:%02d", m_StartDay.GetYear(), m_StartDay.GetMonth(), m_StartDay.GetDay(), m_StartTime.GetHour(), m_StartTime.GetMinute(), m_StartTime.GetSecond()); sprintf(s_stoptime, "%04d-%02d-%02d %02d:%02d:%02d", m_StopDay.GetYear(), m_StopDay.GetMonth(), m_StopDay.GetDay(), m_StopTime.GetHour(), m_StopTime.GetMinute(), m_StopTime.GetSecond()); //鼠标为等待状态 AfxGetApp()->DoWaitCursor(1); m_ListCtrl_UserLog.DeleteAllItems(); int nSelectIndex = m_ComboType.GetCurSel(); if (nSelectIndex == 0) //管理端操作 SearchAndSetHistoryListInfo(s_starttime,s_stoptime,(char *)(LPCTSTR)m_strNodeName); else if(nSelectIndex == 1) //客户端操作 SearchAndSetHistoryListInfo2(s_starttime,s_stoptime,(char *)(LPCTSTR)m_strNodeName); else if(nSelectIndex == 2) //辅助系统管理端操作 SearchAndSetHistoryListInfo3(s_starttime,s_stoptime,(char *)(LPCTSTR)m_strNodeName); //恢复鼠标为正常状态 AfxGetApp()->DoWaitCursor(0); }
void test2() { int i; long long int cost = 0; struct timeval tv; starttime(&tv); for(i = 0; i < TIMES; i++) { p[i] = (test *)malloc(sizeof(test)); } //cost = stoptime(tv); for(i = 0; i < TIMES; i++) { free(p[i]); } cost = stoptime(tv); printf("%lld\n", cost/1000); }
int main() { fp = fopen("log.txt","w+"); long long int cost = 0; struct timeval tv; starttime(&tv); thread_pool_t *pool = threadpool_create(2, 4, 10000); int i; for(i = 0; i < 1000; i++) { dispatch(pool, test_fun, (void *)i); } //sleep(5); dispatch(pool, test_fun, (void *)i, EMG_PRI); threadpool_destroy(pool, 1); cost = stoptime(tv); printf("%lld\n", cost/1000); return 0; }
//************************************************************************** // ApptDialog :: command - Process Commands * //************************************************************************** Boolean ApptDialog :: command(ICommandEvent& cmdevt) { Environment *ev = somGetGlobalEnvironment(); ITime starttime(fldStarthr.value(), fldStartmin.value()); ITime stoptime(fldStophr.value(), fldStopmin.value()); switch(cmdevt.commandId()) { case DID_OK: switch (apptType) { case MEETING: apptObject->_set_start(ev,starttime.asSeconds()); apptObject->_set_end(ev,stoptime.asSeconds()); apptObject->_set_subject(ev,mleSubj.text()); ((Meeting *)apptObject)->_set_location(ev,fldLoc.text()); break; case CCALL: apptObject->_set_start(ev,starttime.asSeconds()); apptObject->_set_end(ev,stoptime.asSeconds()); apptObject->_set_subject(ev,mleSubj.text()); ((ConferenceCall *)apptObject)->_set_phoneNumber(ev,fldPhone.text()); break; default: break; } /* End switch*/ dismiss(DID_OK); return(true); break; case DID_CANCEL: dismiss(DID_CANCEL); return(true); break; }/* end switch */ return(false); //Allow Default Processing to occur }
void test1() { fix_mpool_t *pool = fmem_create(TIMES, sizeof(test)); int i; long long int cost = 0; struct timeval tv; starttime(&tv); //pool_t *pool = mem_init(TIMES, sizeof(test)); for(i = 0; i < TIMES; i++) { p[i] = (test *)fmem_alloc(pool); //memset(p[i], 0, sizeof(test)); } for(i = 0; i < TIMES; i++) { fmem_free(pool, p[i]); } // mem_info(pool); cost = stoptime(tv); fmem_destroy(pool); printf("%lld\n", cost/1000); }
// Main input routine // - doesn't accept words longer than MAXWORDLEN or containing caps char *boggle_getline(char *q) { int ch, done; char *p; int row, col; p = q; done = 0; while (!done) { ch = timerch(); switch (ch) { case '\n': case '\r': case ' ': done = 1; break; case '\033': findword(); break; case '\177': // <del> case '\010': // <bs> if (p == q) break; p--; getyx(stdscr, row, col); move(row, col - 1); clrtoeol(); refresh(); break; case '\025': // <^u> case '\027': // <^w> if (p == q) break; getyx(stdscr, row, col); move(row, col - (int) (p - q)); p = q; clrtoeol(); refresh(); break; #ifdef SIGTSTP case '\032': // <^z> stop_catcher(0); break; #endif case '\023': // <^s> stoptime(); printw("<PAUSE>"); refresh(); while ((ch = inputch()) != '\021' && ch != '\023'); move(crow, ccol); clrtoeol(); refresh(); starttime(); break; case '\003': // <^c> cleanup(); exit(0); /*NOTREACHED*/ case '\004': // <^d> done = 1; ch = EOF; break; case '\014': // <^l> case '\022': // <^r> redraw(); break; case '?': stoptime(); if (help() < 0) showstr("Can't open help file", 1); starttime(); break; default: if (!islower(ch)) break; if ((int) (p - q) == MAXWORDLEN) { p = q; badword(); break; } *p++ = ch; addch(ch); refresh(); break; } } *p = '\0'; if (ch == EOF) return (char *) NULL; return q; }
int main(int argc, char** argv) { // Set up the data on the host clock_t start, start0; start0 = clock(); start = clock(); // Rows and columns in the input image int imageHeight; int imageWidth; const char* inputFile = "input.bmp"; const char* outputFile = "output.bmp"; // Homegrown function to read a BMP from file float* inputImage = readImage(inputFile, &imageWidth, &imageHeight); // Size of the input and output images on the host int dataSize = imageHeight*imageWidth*sizeof(float); // Pad the number of columns #ifdef NON_OPTIMIZED int deviceWidth = imageWidth; #else // READ_ALIGNED || READ4 int deviceWidth = roundUp(imageWidth, WGX); #endif int deviceHeight = imageHeight; // Size of the input and output images on the device int deviceDataSize = imageHeight*deviceWidth*sizeof(float); // Output image on the host float* outputImage = NULL; outputImage = (float*)malloc(dataSize); int i, j; for(i = 0; i < imageHeight; i++) { for(j = 0; j < imageWidth; j++) { outputImage[i*imageWidth+j] = 0; } } // 45 degree motion blur float filter[49] = {0, 0, 0, 0, 0, 0.0145, 0, 0, 0, 0, 0, 0.0376, 0.1283, 0.0145, 0, 0, 0, 0.0376, 0.1283, 0.0376, 0, 0, 0, 0.0376, 0.1283, 0.0376, 0, 0, 0, 0.0376, 0.1283, 0.0376, 0, 0, 0, 0.0145, 0.1283, 0.0376, 0, 0, 0, 0, 0, 0.0145, 0, 0, 0, 0, 0}; int filterWidth = 7; int paddingPixels = (int)(filterWidth/2) * 2; stoptime(start, "set up input, output."); start = clock(); // Set up the OpenCL environment // Discovery platform cl_platform_id platform; clGetPlatformIDs(1, &platform, NULL); // Discover device cl_device_id device; clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); size_t time_res; clGetDeviceInfo(device, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(time_res), &time_res, NULL); printf("Device profiling timer resolution: %zu ns.\n", time_res); // Create context cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(platform), 0}; cl_context context; context = clCreateContext(props, 1, &device, NULL, NULL, NULL); // Create command queue cl_ulong time_start, time_end, exec_time; cl_event timing_event; cl_command_queue queue; queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL); // Create memory buffers cl_mem d_inputImage; cl_mem d_outputImage; cl_mem d_filter; d_inputImage = clCreateBuffer(context, CL_MEM_READ_ONLY, deviceDataSize, NULL, NULL); d_outputImage = clCreateBuffer(context, CL_MEM_WRITE_ONLY, deviceDataSize, NULL, NULL); d_filter = clCreateBuffer(context, CL_MEM_READ_ONLY, 49*sizeof(float),NULL, NULL); // Write input data to the device #ifdef NON_OPTIMIZED clEnqueueWriteBuffer(queue, d_inputImage, CL_TRUE, 0, deviceDataSize, inputImage, 0, NULL, NULL); #else // READ_ALIGNED || READ4 size_t buffer_origin[3] = {0,0,0}; size_t host_origin[3] = {0,0,0}; size_t region[3] = {deviceWidth*sizeof(float), imageHeight, 1}; clEnqueueWriteBufferRect(queue, d_inputImage, CL_TRUE, buffer_origin, host_origin, region, deviceWidth*sizeof(float), 0, imageWidth*sizeof(float), 0, inputImage, 0, NULL, NULL); #endif // Write the filter to the device clEnqueueWriteBuffer(queue, d_filter, CL_TRUE, 0, 49*sizeof(float), filter, 0, NULL, NULL); // Read in the program from file char* source = readSource("convolution.cl"); // Create the program cl_program program; // Create and compile the program program = clCreateProgramWithSource(context, 1, (const char**)&source, NULL, NULL); cl_int build_status; build_status = clBuildProgram(program, 1, &device, NULL, NULL, NULL); // Create the kernel cl_kernel kernel; #if defined NON_OPTIMIZED || defined READ_ALIGNED // Only the host-side code differs for the aligned reads kernel = clCreateKernel(program, "convolution", NULL); #else // READ4 kernel = clCreateKernel(program, "convolution_read4", NULL); #endif // Selected work group size is 16x16 int wgWidth = WGX; int wgHeight = WGY; // When computing the total number of work items, the // padding work items do not need to be considered int totalWorkItemsX = roundUp(imageWidth-paddingPixels, wgWidth); int totalWorkItemsY = roundUp(imageHeight-paddingPixels, wgHeight); // Size of a work group size_t localSize[2] = {wgWidth, wgHeight}; // Size of the NDRange size_t globalSize[2] = {totalWorkItemsX, totalWorkItemsY}; // The amount of local data that is cached is the size of the // work groups plus the padding pixels #if defined NON_OPTIMIZED || defined READ_ALIGNED int localWidth = localSize[0] + paddingPixels; #else // READ4 // Round the local width up to 4 for the read4 kernel int localWidth = roundUp(localSize[0]+paddingPixels, 4); #endif int localHeight = localSize[1] + paddingPixels; // Compute the size of local memory (needed for dynamic // allocation) size_t localMemSize = (localWidth * localHeight * sizeof(float)); // Set the kernel arguments clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage); clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage); clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_filter); clSetKernelArg(kernel, 3, sizeof(int), &deviceHeight); clSetKernelArg(kernel, 4, sizeof(int), &deviceWidth); clSetKernelArg(kernel, 5, sizeof(int), &filterWidth); clSetKernelArg(kernel, 6, localMemSize, NULL); clSetKernelArg(kernel, 7, sizeof(int), &localHeight); clSetKernelArg(kernel, 8, sizeof(int), &localWidth); stoptime(start, "set up kernel"); start = clock(); // Execute the kernel clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, localSize, 0, NULL, &timing_event); // Wait for kernel to complete clFinish(queue); stoptime(start, "run kernel"); clGetEventProfilingInfo(timing_event, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL); clGetEventProfilingInfo(timing_event, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL); exec_time = time_end-time_start; printf("Profile execution time = %.3lf sec.\n", (double) exec_time/1000000000); // Read back the output image #ifdef NON_OPTIMIZED clEnqueueReadBuffer(queue, d_outputImage, CL_TRUE, 0, deviceDataSize, outputImage, 0, NULL, NULL); #else // READ_ALIGNED || READ4 // Begin reading output from (3,3) on the device // (for 7x7 filter with radius 3) buffer_origin[0] = 3*sizeof(float); buffer_origin[1] = 3; buffer_origin[2] = 0; // Read data into (3,3) on the host host_origin[0] = 3*sizeof(float); host_origin[1] = 3; host_origin[2] = 0; // Region is image size minus padding pixels region[0] = (imageWidth-paddingPixels)*sizeof(float); region[1] = (imageHeight-paddingPixels); region[2] = 1; // Perform the read clEnqueueReadBufferRect(queue, d_outputImage, CL_TRUE, buffer_origin, host_origin, region, deviceWidth*sizeof(float), 0, imageWidth*sizeof(float), 0, outputImage, 0, NULL, NULL); #endif // Homegrown function to write the image to file storeImage(outputImage, outputFile, imageHeight, imageWidth, inputFile); // Free OpenCL objects clReleaseMemObject(d_inputImage); clReleaseMemObject(d_outputImage); clReleaseMemObject(d_filter); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); return 0; }
int bench_dgemm() { double *A, *B, *C; int m, n, k, i, j; double alpha, beta; double t; m = SIZE, k = SIZE, n = SIZE; DPRINTF(" Initializing data for matrix multiplication C=A*B for matrix \n" " A(%ix%i) and matrix B(%ix%i)\n\n", m, k, k, n); alpha = 1.0; beta = 0.0; DPRINTF(" Allocating memory for matrices aligned on 64-byte boundary for better \n" " performance \n\n"); A = (double *)mkl_malloc( m*k*sizeof( double ), 64 ); B = (double *)mkl_malloc( k*n*sizeof( double ), 64 ); C = (double *)mkl_malloc( m*n*sizeof( double ), 64 ); if (A == NULL || B == NULL || C == NULL) { printf( "\n ERROR: Can't allocate memory for matrices. Aborting... \n\n"); mkl_free(A); mkl_free(B); mkl_free(C); return 1; } DPRINTF(" Intializing matrix data \n\n"); #pragma omp parallel for for (i = 0; i < (m*k); i++) { A[i] = (double)(i+1); } #pragma omp parallel for for (i = 0; i < (k*n); i++) { B[i] = (double)(-i-1); } #pragma omp parallel for for (i = 0; i < (m*n); i++) { C[i] = 0.0; } DPRINTF(" Computing matrix product using Intel(R) MKL dgemm function via CBLAS interface \n\n"); t=stoptime(); cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, k, B, n, beta, C, n); t=stoptime()-t; printf("calculation time : %f\n",t); printf("gflops/s : %f\n",((2.0*m*n*k)*1E-9)/t); DPRINTF("\n Computations completed.\n\n"); DPRINTF(" Top left corner of matrix A: \n"); for (i=0; i<min(m,6); i++) { for (j=0; j<min(k,6); j++) { DPRINTF("%12.0f", A[j+i*k]); } DPRINTF("\n"); } DPRINTF("\n Top left corner of matrix B: \n"); for (i=0; i<min(k,6); i++) { for (j=0; j<min(n,6); j++) { DPRINTF("%12.0f", B[j+i*n]); } DPRINTF("\n"); } DPRINTF("\n Top left corner of matrix C: \n"); for (i=0; i<min(m,6); i++) { for (j=0; j<min(n,6); j++) { DPRINTF("%12.5G", C[j+i*n]); } DPRINTF("\n"); } DPRINTF("\n Deallocating memory \n\n"); mkl_free(A); mkl_free(B); mkl_free(C); DPRINTF(" Example completed. \n\n"); return 0; }
int main(int argc, char** argv) { double t; double x; double i=0; double iter=1; int size, rank; char hostname[1024]; if (argc > 1) { iter = atoi(argv[1]); } MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); hostname[1023] = '\0'; gethostname(hostname, 1023); t=stoptime(); #ifdef __SSE__ printf("calling addmul_sse\n"); for (i=0;i<iter;i++) x+=addmul_sse(); #endif #ifdef __AVX__ printf("calling addmul_avx\n"); printf("AVX\n"); for (i=0;i<iter;i++) x+=addmul_avx(); #endif t=stoptime()-t; // Here we launch max1*max2*iteration // 16 assembly instruction on 16 register // storing 2 data on SSE // storing 4 data on AVX #ifdef __SSE__ printf("rank: %.4d\thost: %s\tgflops:\t %.3f s, %.3f Gflops, rank: %.4d res=%f\n", rank, hostname, t, (double)max1*max2*iter*16*2/t/1e9, rank, x); #endif #ifdef __AVX__ printf("rank: %.4d\thost: %s\tgflops:\t %.3f s, %.3f Gflops, rank: %.4d res=%f\n", rank, hostname, t, (double)max1*max2*iter*16*4/t/1e9, rank, x); #endif MPI_Finalize(); return 0; }