// samples are non-contiguous in mem of the ppu...
static inline void load_samples(int channel, int time, int buffer) {
#if DO_DMA
  for(int stat=0; stat<NR_STATIONS; stat++) {
    samples_dma_list[stat].ea_low = (unsigned) (*((ppu_samples_type*)spu_arguments.ppu_samples))[channel][stat][time];
//	    printf("SPE sample for station %u, time %u = %x %u, size = %u\n", stat, time, samples_dma_list[stat].ea_low, samples_dma_list[stat].ea_low, samples_dma_list[stat].size);
  }

  mfc_getl(samples[buffer], 0, samples_dma_list, sizeof(struct spu_dma_list_elt) * NR_STATIONS, buffer, 0, 0);
#endif
}
示例#2
0
/**
 * Fetches chemistry data from main memory
 */
void fetch_chem_buffer(uint32_t i, uint64_t ea_off)
{
    uint32_t j;
    
    /* Build DMA lists */
    //timer_start(&metrics.array_copy);
    
    clist[i].length = conc[i].length;
    
    for(j=0; j<clist[i].length; ++j)
    {
        clist[i].data[j].eal = conc[i].ea_base + ea_off + j*NX_ALIGNED_SIZE*NY;
        clist[i].data[j].size = 16;
    }
    
    //timer_stop(&metrics.array_copy);
    
    /* Fetch data */
    mfc_getl(conc[i].data, conc[i].ea_base, clist[i].data, 
             clist[i].length*sizeof(mfc_list_element_t), i, 0, 0);
}
示例#3
0
int main(unsigned long long speid __attribute__ ((unused)),
         unsigned long long argp, 
         unsigned long long envp __attribute__ ((unused)))
{
  unsigned int tag;
  unsigned long long in_addr, out_addr;
  unsigned int i, num_chunks;
  mfc_list_element_t* dma_list_in;
  unsigned int tmp_addr;

#ifdef USE_TIMER
  uint64_t start, time_working;
    
  spu_slih_register (MFC_DECREMENTER_EVENT, spu_clock_slih);
  spu_clock_start();
  start = spu_clock_read();
#endif /* USE_TIMER */

  /* First, we reserve a MFC tag for use */
  tag = mfc_tag_reserve();
  if (tag == MFC_TAG_INVALID)
  {
    printf ("SPU ERROR, unable to reserve tag\n");
    return 1;
  }

  /* calculate the address of the local buffer where we can point the 
   * dma_list_in pointer to */
  tmp_addr = (unsigned int)((local_buffer_in + sizeof(float)*CHUNK_SIZE * NUM_LIST_ELEMENTS) - 
      (sizeof (mfc_list_element_t) * NUM_LIST_ELEMENTS));
  dma_list_in = (mfc_list_element_t*) (tmp_addr);

  /* issue DMA transfer to get the control block information from 
   * system memory */
  mfc_get (&control_block, argp, sizeof (control_block_t), tag, 0, 0);

  /* wait for the DMA get to complete */ 
  mfc_write_tag_mask (1 << tag);
  mfc_read_tag_status_all ();

  /* calculate the number of blocks (chunks) that this spe is assigned 
   * to process */
  num_chunks = control_block.num_elements_per_spe/CHUNK_SIZE;

  /*
   * This is the main loop.  We basically goes through the num_chunks of data
   * NUM_LIST_ELEMENTS at a time. Each list element is going to move CHUNK_SIZE
   * of data into system memory. Data is moved into local store, processed, and 
   * written back to system memory NUM_LIST_ELEMENT chunks per loop iteration.      
   */
  for (i = 0; i <num_chunks; i+= NUM_LIST_ELEMENTS)
  {
    /* set the in_addr and out_addr variables, we will use these for
     * issuing DMA get and put commands */
    in_addr = control_block.in_addr + (i * CHUNK_SIZE * sizeof (float));
    out_addr = control_block.out_addr + (i * CHUNK_SIZE * sizeof (float));

    /* fill the dma list with the appropriate lower 32bit effective address and size for
     * each dma list element. This dma list is used to gather the input data 
     * from system memory */
    fill_dma_list (dma_list_in, NUM_LIST_ELEMENTS, in_addr, CHUNK_SIZE * sizeof(float)); 

    /* issue a DMA get list command to gather the NUM_LIST_ELEMENT chunks of data from system memory.
     * The data will be gathered into local buffer local_buffer_in */
    mfc_getl (local_buffer_in, in_addr, dma_list_in, NUM_LIST_ELEMENTS * sizeof(mfc_list_element_t), tag, 0, 0);

    /* wait for the DMA get list command to complete */
    mfc_write_tag_mask (1 << tag);
    mfc_read_tag_status_all ();

    /* invoke process_data to work on the data that's just been moved into local store*/
    process_data_simd (local_buffer_in, local_buffer_out, CHUNK_SIZE * NUM_LIST_ELEMENTS);

    /* fill the dma list with the appropriate lower 32 bit ea and size for each
     * dma list element. This dma list is used to scatter the output data to system memory  */
    fill_dma_list (dma_list_out, NUM_LIST_ELEMENTS, out_addr, CHUNK_SIZE * sizeof(float)); 

    /* issue the DMA put list command to scatter the result from local memory to 
    * different places in system memory */
    mfc_putl (local_buffer_out, out_addr, dma_list_out, NUM_LIST_ELEMENTS * sizeof(mfc_list_element_t), 
        tag, 0, 0);

    /* wait for the DMA put list to complete */
    mfc_write_tag_mask (1 << tag);
    mfc_read_tag_status_all ();

  }

#ifdef USE_TIMER
  time_working = (spu_clock_read() - start);
  spu_clock_stop();
  printf ("SPE time_working = %lld\n", time_working);
#endif /* USE_TIMER */

  return 0;
}
示例#4
0
/**
 * Fetches z data from main memory
 */
void fetch_z_buffer(uint32_t i, uint64_t ea_off)
{
    uint32_t j, x;

    /* Build DMA lists */
    //timer_start(&metrics.array_copy);

#define UNROLL_ELEMENT \
clist[i].data[j].eal = conc[i].ea_base + ea_off + j*NX_ALIGNED_SIZE*NY; \
wlist[i].data[j].eal = wind[i].ea_base + ea_off + j*NX_ALIGNED_SIZE*NY; \
dlist[i].data[j].eal = diff[i].ea_base + ea_off + j*NX_ALIGNED_SIZE*NY; \
clist[i].data[j].size = 16; \
wlist[i].data[j].size = 16; \
dlist[i].data[j].size = 16

    j=0;
    x = clist[i].length;
    while(x > 8)
    {
        UNROLL_ELEMENT;
        ++j;
        UNROLL_ELEMENT;
        ++j;
        UNROLL_ELEMENT;
        ++j;
        UNROLL_ELEMENT;
        ++j;
        UNROLL_ELEMENT;
        ++j;
        UNROLL_ELEMENT;
        ++j;
        UNROLL_ELEMENT;
        ++j;
        UNROLL_ELEMENT;
        ++j;
        x -= 8;
    }
    while(x > 4)
    {
        UNROLL_ELEMENT;
        ++j;
        UNROLL_ELEMENT;
        ++j;
        UNROLL_ELEMENT;
        ++j;
        UNROLL_ELEMENT;
        ++j;
        x -= 4;
    }
    while(x > 0)
    {
        UNROLL_ELEMENT;
        ++j;
        --x;
    }

#undef UNROLL_ELEMENT

    //timer_stop(&metrics.array_copy);

    /* Fetch data */
    mfc_getlb(conc[i].data, conc[i].ea_base, clist[i].data,
              clist[i].length*sizeof(mfc_list_element_t), i, 0, 0);
    mfc_getl(wind[i].data, wind[i].ea_base, wlist[i].data,
             wlist[i].length*sizeof(mfc_list_element_t), i, 0, 0);
    mfc_getl(diff[i].data, diff[i].ea_base, dlist[i].data,
             dlist[i].length*sizeof(mfc_list_element_t), i, 0, 0);
}