예제 #1
0
OOflightrecorder*
ooNewFlightRecorder(sim_spacecraft_t *sc, int maxSamples, int sampleInterval)
{
  OOflightrecorder *fr = smalloc(sizeof(OOflightrecorder));
  double_array_init(&fr->data);
  fr->sampleInterval = sampleInterval;
  fr->sampleTimeStamp = 0;
  fr->maxSamples = maxSamples;
  return fr;
}
예제 #2
0
int main(int argc, char** argv)
{
    /* Iterators */
    int i, j, k;
    
    uint32_t block;
    
    /* Time (seconds) */
    long t_0;
    long t_end;
    long dt;
    long steps;
    long iter;
    
    /* Emission control */
    bool emflag = TRUE;
    
    /* Start wall clock timer */
    timer_start(TIMER_WALLCLOCK);
    
    /* Initialize parallelization */
    nprocs = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
    nprocs = nprocs > MAX_THREADS ? MAX_THREADS : nprocs;
    
    if(argc > 1)
    {
        i = atoi(argv[1]);
        if(i < 1)
        {
            fprintf(stderr, "Invalid number of SPUs: %d < 1.\n", i);
            exit(1);
        }
        
        if(i < nprocs)
        {
            nprocs = i;
        }
        else 
        {
            printf("%d SPUs unavailable.  Using %d instead.\n", i, nprocs);
        }
    }
    
    /* Create SPE threads */
    for(i=0; i<nprocs; i++) 
    {
        threads[i].argp = (void*)(&spe_argvs[i]);
        
        /* Create context */
        if((threads[i].speid = spe_context_create(0, NULL)) == NULL) 
        {
            fprintf(stderr, "Failed spe_context_create(errno=%d strerror=%s)\n", errno, strerror(errno));
            exit(1);
        }
        
        /* Load program into context */
        if(spe_program_load(threads[i].speid, &fixedgrid_spu)) 
        {
            fprintf(stderr, "Failed spe_program_load(errno=%d strerror=%s)\n", errno, strerror(errno));
            exit(1);
        }
            
        /* Create thread for each SPE context */
        if(pthread_create(&threads[i].pthread, NULL, &ppu_pthread_function, &threads[i])) 
        {
            fprintf(stderr, "Failed pthread_create(errno=%d strerror=%s)\n", errno, strerror(errno));
            exit(1);
        }
        
        spe_set_status(i, SPE_STATUS_WAITING);
    }
    
    printf("\nRunning %d threads (%d SPU + 1 PPU).\n", (nprocs+1), nprocs);
    
    /* Allocate concentration memory */
    //conc = _malloc_align(NROWS*NCOLS*sizeof(double), 7);
    //conc_buff = (double*)_malloc_align(MAX_THREADS*NY*sizeof(double), 7);

    /* Allocation wind vector filed memory */
    //wind_u = _malloc_align(NROWS*NCOLS*sizeof(double), 7);
    //wind_v = _malloc_align(NROWS*NCOLS*sizeof(double), 7);
    //wind_u_buff = (double*)_malloc_align(MAX_THREADS*NY*sizeof(double), 7);
    //wind_v_buff = (double*)_malloc_align(MAX_THREADS*NY*sizeof(double), 7);
 
    /* Allocation diffusion tensor memory */
    //diff = _malloc_align(NROWS*NCOLS*sizeof(double), 7);
    //diff_buff = (double*)_malloc_align(MAX_THREADS*NY*sizeof(double), 7);

    /* Initialize concentration data */
    double_array_init(NROWS*NCOLS, conc, O3_INIT);
        
    /* Initialize wind field */
    double_array_init(NROWS*NCOLS, wind_u, WIND_U_INIT);
    double_array_init(NROWS*NCOLS, wind_v, WIND_V_INIT);
    
    /* Initialize diffusion field */
    double_array_init(NROWS*NCOLS, diff, DIFF_INIT);
    
    /* Initialize time */
    t_0 = 0.0;
    t_end = year2sec(END_YEAR - START_YEAR) + day2sec(END_DOY - START_DOY) + 
            hour2sec(END_HOUR - START_HOUR) + minute2sec(END_MIN - START_MIN);
    dt = STEP_SIZE;
    steps = (long)( (t_end - t_0)/dt );
    
    /* Print startup banner */
    print_start_banner(NX*DX, NY*DY, 0.0, t_end, steps);
    
    /* Store initial concentration */
    write_conc(&(conc[0]), 0, 0);
    
    /* BEGIN CALCULATIONS */
    for(iter = 1; iter <= steps; iter++)
    {
        emflag = iter*dt < 6*3600.0 ? TRUE : FALSE;
        
        timer_start(TIMER_ROW_DISCRET);
        
        /* Discretize rows 1/2 timestep */
        block = NROWS / nprocs;
        for(i=0; i<nprocs; i++)
        {
            /* Configure SPE arguments */
            spe_argvs[i].arg[0].u64 = (uint64_t)(&conc[i*block*NX]);
            spe_argvs[i].arg[1].u64 = (uint64_t)(&wind_u[i*block*NX]);
            spe_argvs[i].arg[2].u64 = (uint64_t)(&diff[i*block*NX]);
            spe_argvs[i].arg[3].dbl = dt/2;
            spe_argvs[i].arg[4].dbl = DX;
            spe_argvs[i].arg[5].u32[0] = NX;
            spe_argvs[i].arg[5].u32[1] = (i == nprocs - 1 ? block + NROWS % nprocs : block);  //FIXME
            
            /* Signal SPE */
            spe_set_status(i, SPE_STATUS_WORKING);
        }
        
        /* Wait for SPEs to finish */
        wait_all_spes();
        
        timer_stop(TIMER_ROW_DISCRET);
        
        timer_start(TIMER_COL_DISCRET);
        
        /* Discretize colums 1 timestep */
        for(i=0; i<NCOLS; i++)
        {
            k = i % nprocs;

            while(spe_get_status(k) > 0) ; //intentional wait
            
            if(i >= nprocs)
            {
                timer_start(TIMER_ARRAY_COPY);
                for(j=0; j<NY; j++)
                {
                    conc[i-nprocs + j*NX] = ccol[k*NY+j];
                }
                timer_stop(TIMER_ARRAY_COPY);
            }
            
            timer_start(TIMER_ARRAY_COPY);
            for(j=0; j<NY; j++)
            {
                ccol[k*NY + j] = conc[i + j*NX];
                wcol[k*NY + j] = wind_v[i + j*NX];
                dcol[k*NY + j] = diff[i + j*NX];
            }
            timer_stop(TIMER_ARRAY_COPY);

            // Configure SPE arguments 
            spe_argvs[k].arg[0].u64 = (uint64_t)(&ccol[k*NY]);
            spe_argvs[k].arg[1].u64 = (uint64_t)(&wcol[k*NY]);
            spe_argvs[k].arg[2].u64 = (uint64_t)(&dcol[k*NY]);
            spe_argvs[k].arg[3].dbl = dt;
            spe_argvs[k].arg[4].dbl = DY;
            spe_argvs[k].arg[5].u32[0] = NY;
            spe_argvs[k].arg[5].u32[1] = 1;

            // Signal SPE 
            spe_set_status(k, SPE_STATUS_WORKING);
        }

        /* Wait for SPEs to finish */
        wait_all_spes();
        
        timer_stop(TIMER_COL_DISCRET);
        
        timer_start(TIMER_ROW_DISCRET);
        
        /* Discretize rows 1/2 timestep */
        block = NROWS / nprocs;
        for(i=0; i<nprocs; i++)
        {
            /* Configure SPE arguments */
            spe_argvs[i].arg[0].u64 = (uint64_t)(&conc[i*block*NX]);
            spe_argvs[i].arg[1].u64 = (uint64_t)(&wind_u[i*block*NX]);
            spe_argvs[i].arg[2].u64 = (uint64_t)(&diff[i*block*NX]);
            spe_argvs[i].arg[3].dbl = dt/2;
            spe_argvs[i].arg[4].dbl = DX;
            spe_argvs[i].arg[5].u32[0] = NX;
            spe_argvs[i].arg[5].u32[1] = (i == nprocs - 1 ? block + NROWS % nprocs : block);  //FIXME
            
            /* Signal SPE */
            spe_set_status(i, SPE_STATUS_WORKING);
        }
        
        /* Wait for SPEs to finish */
        wait_all_spes();
        
        timer_stop(TIMER_ROW_DISCRET);
        
        /*
         * Could update wind field here...
         */
         
        /*
         * Could update diffusion tensor here...
         */
        
        /* Add emissions */
        if(emflag)
        {
            conc[SOURCE_Y*NX + SOURCE_X] += dt * (SOURCE_RATE) / (DX * DY * 1000.0);
        }
        
        /* Store concentration */
        #ifdef WRITE_EACH_ITER
        write_conc(conc, iter, 0);
        #endif
        
        /* Indicate progress */
        if(iter % 10 == 0)
        {
            printf("Iteration %ld of %ld.  Time = %ld seconds.\n", iter, steps, iter*dt);
        }
        
    }
    /* END CALCULATIONS */
    
    /* Wait for SPU-thread to complete execution. */
    for(i=0; i<nprocs; i++) 
    {
        spe_set_status(i, SPE_STATUS_STOPPED);
        if(pthread_join(threads[i].pthread, NULL)) 
        {
            perror("Failed pthread_join");
            exit(1);
        }
    }
    
    /* Store concentration */
    write_conc(conc, iter-1, 0);
    
    /* Show final time */
    printf("Final time: %ld seconds.\n", (iter-1)*dt);
    
    timer_stop(TIMER_WALLCLOCK);

    print_timer_summary("===PPU Timers===");    
    
    /* Cleanup and exit */
    return 0;
}