예제 #1
0
/**
 * PPU program entry point.
 */
int main(int argc, char** argv)
{
    /* Get global memory pointer */
    fixedgrid_t* const G = &G_GLOBAL;
    
    /* Iterators */
    uint32_t k, iter;
    
    /* Start wall clock timer */
    timer_start(&G->metrics.wallclock);

    /* Check dimensions */
    if(NX % BLOCK_X != 0)
    {
        fprintf(stderr, "NX must be a multiple of %d\n", BLOCK_X);
        exit(1);
    }    
    if(NY % BLOCK_Y != 0)
    {
        fprintf(stderr, "NY must be a multiple of %d\n", BLOCK_Y);
        exit(1);
    }
    if(NZ % BLOCK_Z != 0)
    {
        fprintf(stderr, "NZ must be a multiple of %d\n", BLOCK_Z);
        exit(1);
    }
    
    /* Initialize the model parameters */
    init_model(G);
    
    /* Add emissions */
    process_emissions(G);
    
    /* Print startup banner */
    print_start_banner(G);
    
    /* Store initial concentration */
    printf("Writing initial concentration data... ");
    write_conc(G, 0, 0);
    printf("done.\n");    
        
    printf("\n!!!!FIXME: Report # FPEs\n");
        
    /* BEGIN CALCULATIONS */
    for(iter=1, G->time = G->tstart; G->time < G->tend; G->time += G->dt, ++iter)
    {
        start_saprc99(G);
        
        for(k=0; k<NLOOKAT; k++)
        {
            // Copy concentration data to device
            CU_SAFE_CALL(cuMemcpyHtoD(G->dev_conc, &G->conc(0, 0, 0, MONITOR[k]), NX*NY*NZ*sizeof(real_t)));
            
            discretize_all_x(G, G->dt*0.5);
            
            discretize_all_y(G, G->dt*0.5);
            
            discretize_all_z(G, G->dt);
            
            discretize_all_y(G, G->dt*0.5);
            
            discretize_all_x(G, G->dt*0.5);
            
            // Copy updated concentrations back to host
            CU_SAFE_CALL(cuMemcpyDtoH((void*)&G->conc(0, 0, 0, MONITOR[k]), G->dev_conc_out, NX*NY*NZ*sizeof(real_t)));            
        }

        update_model(G);
        
        #if WRITE_EACH_ITER == 1
        write_conc(G, iter, 0);
        #endif

        printf("  After iteration %02d: Model time = %07.2f sec.\n", iter, iter*G->dt);
    }
    /* END CALCULATIONS */
    
    /* Store concentration */
    #if WRITE_EACH_ITER != 1
    write_conc(G, iter-1, 0);
    #endif
    
    /* Show final time */
    printf("\nFinal time: %f seconds.\n", (iter-1)*G->dt);
    
    timer_stop(&G->metrics.wallclock);
    
    /* Write metrics to CSV file */
    write_metrics_as_csv(G, "NVidia CUDA");
    
    /* Cleanup and exit */

    CU_SAFE_CALL(cuMemFree(G->dev_conc));
    CU_SAFE_CALL(cuMemFree(G->dev_wind));
    CU_SAFE_CALL(cuMemFree(G->dev_diff));
    CU_SAFE_CALL(cuMemFree(G->dev_buff));
    CU_SAFE_CALL(cuMemFree(G->dev_conc_out));
    CU_SAFE_CALL_NO_SYNC(cuCtxDetach(cu_context_global));
    
    return 0;
}
예제 #2
0
/**
 * Initializes the model
 */
void init_model(fixedgrid_t* G)
{
    uint32_t x, y, z, s;
    
    /* Chemistry buffer */
    real_t chemBuff[NSPEC];
    
    /* Initialize metrics */
    metrics_init(&G->metrics, "PPE");
        
    /* Initialize time frame */
    /* FIXME: year is ignored */
    G->tstart = day2sec(START_DOY) + hour2sec(START_HOUR) + minute2sec(START_MIN);
    G->tend   = day2sec(END_DOY)   + hour2sec(END_HOUR)   + minute2sec(END_MIN);
    G->dt = STEP_SIZE;
    G->time = G->tstart;
        
    /* Initialize chemistry and concentration data */
    printf("Loading chemistry and concentration data... ");
    timer_start(&G->metrics.array_init);

    #if DO_CHEMISTRY == 1
    
    saprc99_Initialize(chemBuff);
    
    for(s=0; s<NSPEC; s++)
    {
        for(z=0; z<NZ; z++)
        {
            for(y=0; y<NY; y++)
            {
                for(x=0; x<NX; x++)
                {
                    G->conc(x, y, z, s) = chemBuff[s];
                }
            }
        }
    }
    
    #else
    
    for(z=0; z<NZ; z++)
    {
        for(y=0; y<NY; y++)
        {
            for(x=0; x<NX; x++)
            {
                G->conc(x, y, z, ind_O3) = O3_INIT;
            }
        }
    }
    
    #endif
    
    timer_stop(&G->metrics.array_init);
    printf("done.\n");
    
    /* Initialize wind field */
    printf("Loading wind field data... ");
    timer_start(&G->metrics.array_init);
    
    for(z=0; z<NZ; z++)
    {
        for(y=0; y<NY; y++)
        {
            for(x=0; x<NX; x++)
            {
                G->wind_u(x, y, z) = WIND_U_INIT;
                G->wind_v(x, y, z) = WIND_V_INIT;
                G->wind_w(x, y, z) = WIND_W_INIT;
            }
        }
    }
    
    timer_stop(&G->metrics.array_init);
    printf("done.\n");
    
    /* Initialize diffusion field */
    printf("Loading diffusion field data... ");
    timer_start(&G->metrics.array_init);
    
    for(z=0; z<NZ; z++)
    {
        for(y=0; y<NY; y++)
        {
            for(x=0; x<NX; x++)
            {
                G->diff_h(x, y, z) = DIFF_H_INIT;
                G->diff_v(x, y, z) = DIFF_V_INIT;
            }
        }
    }
    
    timer_stop(&G->metrics.array_init);
    printf("done.\n");
    
    /* Initialize temperature field */
    printf("Loading temperature field data... ");
    timer_start(&G->metrics.array_init);
    
    for(z=0; z<NZ; z++)
    {
        for(y=0; y<NY; y++)
        {
            for(x=0; x<NX; x++)
            {
                G->temp(x, y, z) = TEMP_INIT;
            }
        }
    }
    
    timer_stop(&G->metrics.array_init);
    printf("done.\n");
    
    /* Initialize CUDA kernel and device memory */
    printf("Initializing CUDA driver interface... ");
    //FIXME: Start a timer here
    
    CU_SAFE_CALL(init_cuda_driver("data/discretize.cubin"));
    CU_SAFE_CALL(cuMemAlloc(&G->dev_conc, NZ*NY*NX*sizeof(real_t)));
    CU_SAFE_CALL(cuMemAlloc(&G->dev_wind, NZ*NY*NX*sizeof(real_t)));
    CU_SAFE_CALL(cuMemAlloc(&G->dev_diff, NZ*NY*NX*sizeof(real_t)));
    CU_SAFE_CALL(cuMemAlloc(&G->dev_buff, NZ*NY*NX*sizeof(real_t)));
    CU_SAFE_CALL(cuMemAlloc(&G->dev_conc_out, NZ*NY*NX*sizeof(real_t)));
    
    init_discretization_kernel(G);
        
    //FIXME: Stop a timer here
    printf("done.\n");
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runTest(int argc, char** argv)
{
    CUcontext cuContext;

    // initialize CUDA
    CUfunction pk = NULL;
    const char cubin_name [] = "pass_kernel.cubin";
    const char kernel_name [] = "pass_kernel";

    CU_SAFE_CALL(initCuda(cuContext, argv[0], &pk, argc, argv, cubin_name, kernel_name));
    printf("initCuda-returned CUfunction:\n");

    // cuParamSetx, x=i f v
    // http://visionexperts.blogspot.com/2010/07/cuda-parameter-alignment.html - check alignment
    #define ALIGN_UP(offset, alignment)					\
        (offset) = ((offset) + (alignment) - 1) & ~((alignment) - 1)

    size_t offset = 0;

    // input integers
    // CU paramset i.
    for(int i = 0 ; i < NUM_ARG ; i++) 
    {
 	int align = __alignof(int);
	ALIGN_UP(offset, align);
	cuParamSeti(pk, offset, i);
	printf ("offset %d = %d\n", i, offset);
	offset += sizeof(int);
    }

    // return array for updated inputs
    int size_int = sizeof(int);

    int size_array = size_int * NUM_ARG;
    CUdeviceptr d_return_values;
    cuMemAlloc (&d_return_values, size_array);
    void* ptr = (void*)(size_t)d_return_values;
    int align = __alignof(ptr);
    ALIGN_UP(offset, align);
    cuParamSetv(pk, offset, &ptr, sizeof(ptr));
    printf("return values offset:%d\n", offset);
    offset += sizeof(ptr);

    CUdeviceptr d_return_N;
    cuMemAlloc(&d_return_N, size_int);
    void* ptrN = (void*)(size_t)d_return_N;
    int alignN = __alignof(ptrN);
    ALIGN_UP(offset, alignN);
    cuParamSetv(pk, offset, &ptrN, sizeof(ptr));
    printf("return int offset:%d\n", offset);
    offset += sizeof(ptrN);

    // Calling kernel
    int BLOCK_SIZE_X = NUM_ARG;
    int BLOCK_SIZE_Y = 1;
    int BLOCK_SIZE_Z = 1;
    int GRID_SIZE = 1;
    cutilDrvSafeCallNoSync(cuFuncSetBlockShape(pk, BLOCK_SIZE_X, BLOCK_SIZE_Y, BLOCK_SIZE_Z));
 
    printf("paramsetsize:%d\n", offset);
    CU_SAFE_CALL(cuParamSetSize(pk, offset));
    CU_SAFE_CALL(cuLaunchGrid(pk, GRID_SIZE, GRID_SIZE));

    int* h_return_values = (int*)malloc(NUM_ARG * sizeof(int));
    CU_SAFE_CALL(cuMemcpyDtoH((void*)h_return_values, d_return_values, size_array));
    CU_SAFE_CALL(cuMemFree(d_return_values));

    for(int i=0;i<NUM_ARG;i++)
        printf("%dth value = %d\n", i, h_return_values[i]);
    free(h_return_values);

    int* h_return_N = (int*)malloc(sizeof(int));
    CU_SAFE_CALL(cuMemcpyDtoH((void*)h_return_N, d_return_N, size_int));
    CU_SAFE_CALL(cuMemFree(d_return_N));

    printf("%d sizeof array\n", *h_return_N);

    if(cuContext !=NULL) cuCtxDetach(cuContext);
}