int replayfs_fs_init(void) { int err; perftimer_init(); debugk("Sizeof pgoff_t is %u, loff_t %u\n", sizeof(pgoff_t), sizeof(loff_t)); if (PAGE_SIZE != sizeof(struct replayfs_dir_page)) { printk("REPLAYFS ERROR: PAGE_SIZE == %lu, replayfs_dir_page size == %d\n", PAGE_SIZE, sizeof(struct replayfs_dir_page)); return 1; } /* Init memory management */ if (replayfs_init_allocators()) { return 1; } /* All is going well so far, setup our super block */ err = register_filesystem(&replayfs_type); if (err) { replayfs_destroy_allocators(); return 1; } return 0; }
/** * \brief Creates and initializes the working data for the plan * \param [in] plan The data and memory location for the plan. * \return int Error flag value * \sa parseCUDAMEMPlan * \sa makeCUDAMEMPlan * \sa execCUDAMEMPlan * \sa perfCUDAMEMPlan * \sa killCUDAMEMPlan */ int initCUDAMEMPlan(void *plan) { size_t avail, total, arraybytes; int M,i; int ret = make_error(ALLOC,generic_err); double gputhreads; cudaError_t cudaStat; struct cudaDeviceProp prop; Plan *p; CUDAMEMdata *d = NULL; p = (Plan *)plan; if (p) { d = (CUDAMEMdata*)p->vptr; p->exec_count = 0; perftimer_init(&p->timers, NUM_TIMERS); } if(d) { CUDA_CALL( cudaSetDevice(d->device) ); CUDA_CALL( cudaMemGetInfo(&avail, &total) ); CUDA_CALL( cudaGetDeviceProperties(&prop, d->device) ); if (d->nGpuThreads != 0) { // use the user spec'd number of threads or default to warp*cores gputhreads = (double)(d->nGpuThreads); } else { gputhreads = d->nGpuThreads = prop.warpSize * prop.multiProcessorCount; } if (prop.major < 2) { // check results on older devices d->resultCheck = 1; } else { d->resultCheck = 0; } // calculate M for 6 M*M arrays to fill 100%/75%/50% of GPU free memory // M = (d->nGpuThreads) * (int)(sqrt(0.75*avail/(6.0*sizeof(double)*gputhreads*gputhreads))); // M = (d->nGpuThreads) * (int)(sqrt(0.50*avail/(6.0*sizeof(double)*gputhreads*gputhreads))); M = (d->nGpuThreads) * (int)(sqrt(1.00*avail/(6.0*sizeof(double)*gputhreads*gputhreads))); // assume one will fit in host memory d->M = M; arraybytes = (size_t)(0.99*avail); d->arraybytes = arraybytes; d->arrayelems = arraybytes / sizeof(int); // host array and device arrays CUDA_CALL( cudaMallocHost((void **)(&(d->hostarray)), arraybytes) ); CUDA_CALL( cudaMalloc ((void **)(&(d->devicearray)), arraybytes) ); // initialize so that results are M*PI**2/100 //for(i=0; i<3*M*M; i++) d->HA[i] = (double)0.31415926535; //CUDA_CALL( cudaMemcpy( (d->DA), (d->HA), arraybytes, cudaMemcpyHostToDevice) ); //CUDA_CALL( cudaMemcpy( (d->DB), (d->DA), arraybytes, cudaMemcpyDeviceToDevice) ); ret = ERR_CLEAN; } return ret; }
/** * \brief Creates and initializes the working data for the plan * \param [in] plan The struct that holds the plan's data values. * \return int Error flag value * \sa parseCBAPlan * \sa makeCBAPlan * \sa execCBAPlan * \sa perfCBAPlan * \sa killCBAPlan */ int initCBAPlan(void *plan){ int ret = make_error(ALLOC,generic_err); int i; int nrow, ncol; Plan *p; CBA_data *ci = NULL; p = (Plan *)plan; #ifdef HAVE_PAPI int temp_event, k; int PAPI_Events [NUM_PAPI_EVENTS] = PAPI_COUNTERS; char *PAPI_units [NUM_PAPI_EVENTS] = PAPI_UNITS; #endif //HAVE_PAPI if(p){ ci = (CBA_data *)p->vptr; p->exec_count = 0; if(DO_PERF){ perftimer_init(&p->timers, NUM_TIMERS); #ifdef HAVE_PAPI /* Initialize plan's PAPI data */ p->PAPI_EventSet = PAPI_NULL; p->PAPI_Num_Events = 0; TEST_PAPI(PAPI_create_eventset(&p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); //Add the desired events to the Event Set; ensure the dsired counters // are on the system then add, ignore otherwise for(k = 0; k < TOTAL_PAPI_EVENTS && k < NUM_PAPI_EVENTS; k++){ temp_event = PAPI_Events[k]; if(PAPI_query_event(temp_event) == PAPI_OK){ p->PAPI_Num_Events++; TEST_PAPI(PAPI_add_event(p->PAPI_EventSet, temp_event), PAPI_OK, MyRank, 9999, PRINT_SOME); } } PAPIRes_init(p->PAPI_Results, p->PAPI_Times); PAPI_set_units(p->name, PAPI_units, NUM_PAPI_EVENTS); TEST_PAPI(PAPI_start(p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); #endif //HAVE_PAPI } //DO_PERF } if(ci){ brand_init(&(ci->br), ci->seed); nrow = ci->nrows; ncol = ci->ncols; ci->niter *= 64; /* we'll do iterations in blocks of 64 */ if((ci->ncols % BLOCKSIZE) != 0){ return make_error(0,specific_err); //fprintf(stderr, "ERROR (plan_cba): BLOCKSIZE (%ld) must divide" //" ncol (%ld)\n", BLOCKSIZE, ncol); } assert ((NITERS % 64) == 0); ci->work = (uint64_t *)calloc((size_t)((nrow * ncol + PAD + NITERS) * 2), sizeof(uint64_t)); ret = (ci->work == NULL) ? make_error(ALLOC,generic_err) : ERR_CLEAN; ci->out = &(ci->work[nrow * ncol + PAD]); ci->data = &(ci->out[NITERS]); ci->chk = &(ci->data[nrow * ncol + PAD]); for(i = 0; i < (nrow * ncol); i++){ ci->data[i] = brand(&(ci->br)); } blockit (ci->data, nrow, ncol, ci->work); } return ret; } /* initCBAPlan */
/** * \brief Creates and initializes the working data for the plan * \param plan The Plan struct that holds the plan's data values. * \return Error flag value */ int initOPENCL_MEMPlan(void *plan){ // <- Replace YOUR_NAME with the name of your module. if(!plan){ return make_error(ALLOC, generic_err); // <- This is the error code for one of the malloc fails. } Plan *p; OPENCL_MEM_DATA *d; p = (Plan *)plan; #ifdef HAVE_PAPI int temp_event, i; int PAPI_Events [NUM_PAPI_EVENTS] = PAPI_COUNTERS; char *PAPI_units [NUM_PAPI_EVENTS] = PAPI_UNITS; #endif //HAVE_PAPI if(p){ d = (OPENCL_MEM_DATA *)p->vptr; p->exec_count = 0; // Initialize the plan execution count to zero. perftimer_init(&p->timers, NUM_TIMERS); // Initialize all performance timers to zero. #ifdef HAVE_PAPI /* Initialize plan's PAPI data */ p->PAPI_EventSet = PAPI_NULL; p->PAPI_Num_Events = 0; TEST_PAPI(PAPI_create_eventset(&p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); //Add the desired events to the Event Set; ensure the dsired counters // are on the system then add, ignore otherwise for(i = 0; i < TOTAL_PAPI_EVENTS && i < NUM_PAPI_EVENTS; i++){ temp_event = PAPI_Events[i]; if(PAPI_query_event(temp_event) == PAPI_OK){ p->PAPI_Num_Events++; TEST_PAPI(PAPI_add_event(p->PAPI_EventSet, temp_event), PAPI_OK, MyRank, 9999, PRINT_SOME); } } PAPIRes_init(p->PAPI_Results, p->PAPI_Times); PAPI_set_units(p->name, PAPI_units, NUM_PAPI_EVENTS); TEST_PAPI(PAPI_start(p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); #endif //HAVE_PAPI } if(d){ cl_int error; pthread_mutex_lock(&opencl_platform_mutex); error = clGetPlatformIDs(0, NULL,&(d->num_platforms)); pthread_mutex_unlock(&opencl_platform_mutex); assert(error == CL_SUCCESS); d->platforms = (cl_platform_id *)malloc(sizeof(cl_platform_id) * d->num_platforms); pthread_mutex_lock(&opencl_platform_mutex); error = clGetPlatformIDs(d->num_platforms, d->platforms, NULL); pthread_mutex_unlock(&opencl_platform_mutex); assert(error == CL_SUCCESS); error = clGetDeviceIDs(d->platforms[0],CL_DEVICE_TYPE_ALL, 0, NULL, &(d->num_devices)); assert(error == CL_SUCCESS); d->devices = (cl_device_id *)malloc(sizeof(cl_device_id) * d->num_devices); error = clGetDeviceIDs(d->platforms[0],CL_DEVICE_TYPE_ALL, d->num_devices, d->devices, NULL); assert(error == CL_SUCCESS); d->context = clCreateContext(NULL, 1, &(d->devices[d->device_id]), NULL, NULL, &error); assert(error == CL_SUCCESS); d->opencl_queue = clCreateCommandQueue(d->context, d->devices[d->device_id], 0, &error); assert(error == CL_SUCCESS); error = clGetDeviceInfo(d->devices[d->device_id], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &(d->device_memory), NULL); assert(error == CL_SUCCESS); d->device_memory -= SUB_FACTOR; d->buffer = clCreateBuffer(d->context, CL_MEM_WRITE_ONLY, d->device_memory, NULL, &error); assert(error == CL_SUCCESS); size_t page_size = sysconf(_SC_PAGESIZE); error = posix_memalign((void **)&(d->return_buffer), page_size, d->device_memory); assert(error == 0); d->program = clCreateProgramWithSource(d->context, 1, (const char **)&opencl_program,NULL,&error); assert(error == CL_SUCCESS); error = clBuildProgram(d->program,1,&(d->devices[d->device_id]),NULL,NULL,NULL); assert(error == CL_SUCCESS); d->kernel = clCreateKernel(d->program, "write_pattern", &error); assert(error == CL_SUCCESS); } return ERR_CLEAN; // <- This indicates a clean run with no errors. Does not need to be changed. } /* initOPENCL_MEMPlan */
/** * \brief Creates and initializes the working data for the plan * \param [in] plan Holds the data and memory for the plan. * \return int Error flag value * \sa parseFFT2Plan * \sa makeFFT2Plan * \sa execFFT2Plan * \sa perfFFT2Plan * \sa killFFT2Plan */ int initFFT2Plan(void *plan){ int i,k; size_t M; int ret = make_error(ALLOC,generic_err); Plan *p; FFTdata *d = NULL; p = (Plan *)plan; #ifdef HAVE_PAPI int temp_event, j; int PAPI_Events [NUM_PAPI_EVENTS] = PAPI_COUNTERS; char *PAPI_units [NUM_PAPI_EVENTS] = PAPI_UNITS; #endif //HAVE_PAPI if(p){ d = (FFTdata *)p->vptr; p->exec_count = 0; if(DO_PERF){ perftimer_init(&p->timers, NUM_TIMERS); #ifdef HAVE_PAPI /* Initialize plan's PAPI data */ p->PAPI_EventSet = PAPI_NULL; p->PAPI_Num_Events = 0; TEST_PAPI(PAPI_create_eventset(&p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); //Add the desired events to the Event Set; ensure the dsired counters // are on the system then add, ignore otherwise for(j = 0; j < TOTAL_PAPI_EVENTS && j < NUM_PAPI_EVENTS; j++){ temp_event = PAPI_Events[j]; if(PAPI_query_event(temp_event) == PAPI_OK){ p->PAPI_Num_Events++; TEST_PAPI(PAPI_add_event(p->PAPI_EventSet, temp_event), PAPI_OK, MyRank, 9999, PRINT_SOME); } } PAPIRes_init(p->PAPI_Results, p->PAPI_Times); PAPI_set_units(p->name, PAPI_units, NUM_PAPI_EVENTS); TEST_PAPI(PAPI_start(p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); #endif //HAVE_PAPI } //DO_PERF } if(d){ M = d->M; pthread_rwlock_wrlock(&FFTW_Lock); d->in_original = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * M * M); assert(d->in_original); d->out = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * M * M); assert(d->out); d->mid = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * M * M); assert(d->mid); if(d->in_original && d->out && d->mid){ ret = make_error(0,specific_err); // Error in getting the plan set } d->forward = fftw_plan_dft_2d(M,M,d->in_original,d->mid,FFTW_FORWARD, FFTW_ESTIMATE); d->backward = fftw_plan_dft_2d(M,M,d->mid,d->out,FFTW_BACKWARD, FFTW_ESTIMATE); pthread_rwlock_unlock(&FFTW_Lock); if(d->forward && d->backward){ ret = ERR_CLEAN; } srand(0); for(i = 0; i < M; i++){ for(k = 0; k < M; k++){ d->in_original[i * M + k][0] = rand(); d->in_original[i * M + k][1] = rand(); } } } return ret; } /* initFFT2Plan */
/** * \brief Creates and initializes the working data for the plan * \param plan The Plan struct that holds the plan's data values. * \return Error flag value */ int initDOPENACCGEMMPlan(void *plan){ // <- Replace YOUR_NAME with the name of your module. if(!plan){ return make_error(ALLOC, generic_err); // <- This is the error code for one of the malloc fails. } Plan *p; DOPENACCGEMM_DATA *d; p = (Plan *)plan; #ifdef HAVE_PAPI int temp_event, i; int PAPI_Events [NUM_PAPI_EVENTS] = PAPI_COUNTERS; char *PAPI_units [NUM_PAPI_EVENTS] = PAPI_UNITS; #endif //HAVE_PAPI if(p){ d = (DOPENACCGEMM_DATA *)p->vptr; p->exec_count = 0; // Initialize the plan execution count to zero. perftimer_init(&p->timers, NUM_TIMERS); // Initialize all performance timers to zero. #ifdef HAVE_PAPI /* Initialize plan's PAPI data */ p->PAPI_EventSet = PAPI_NULL; p->PAPI_Num_Events = 0; TEST_PAPI(PAPI_create_eventset(&p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); //Add the desired events to the Event Set; ensure the dsired counters // are on the system then add, ignore otherwise for(i = 0; i < TOTAL_PAPI_EVENTS && i < NUM_PAPI_EVENTS; i++){ temp_event = PAPI_Events[i]; if(PAPI_query_event(temp_event) == PAPI_OK){ p->PAPI_Num_Events++; TEST_PAPI(PAPI_add_event(p->PAPI_EventSet, temp_event), PAPI_OK, MyRank, 9999, PRINT_SOME); } } PAPIRes_init(p->PAPI_Results, p->PAPI_Times); PAPI_set_units(p->name, PAPI_units, NUM_PAPI_EVENTS); TEST_PAPI(PAPI_start(p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); #endif //HAVE_PAPI } if(d){ int error; acc_device_t my_device = acc_get_device_type(); acc_set_device_num(d->device_id, my_device); //When OpenACC can report back on accelerator size, these two lines should be enabled //d->device_memory = system_burn_accelerator_memory(d->device_id); //d->device_memory -= SUB_FACTOR; d->M = ((int)sqrt(d->device_memory/sizeof(double))) / 3; size_t page_size = sysconf(_SC_PAGESIZE); error = posix_memalign((void **)&(d->A_buffer),page_size,d->M*d->M*sizeof(double)); assert(error==0); error = posix_memalign((void **)&(d->B_buffer),page_size,d->M*d->M*sizeof(double)); assert(error==0); error = posix_memalign((void **)&(d->C_buffer),page_size,d->M*d->M*sizeof(double)); assert(error==0); for(size_t idx=0; idx < d->M*d->M; idx++) { d->A_buffer[idx] = (double)4.5; d->B_buffer[idx] = (double)2.0; d->C_buffer[idx] = (double)0.0; } } return ERR_CLEAN; // <- This indicates a clean run with no errors. Does not need to be changed. } /* initDOPENACCGEMMPlan */