示例#1
0
/**
 * Writes layer data to main memory
 */
void write_z_buffer(uint32_t i)
{
    mfc_putl(buff[i].data, conc[i].ea_base, clist[i].data,
             clist[i].length*sizeof(mfc_list_element_t), i, 0, 0);
}
示例#2
0
int main(unsigned long long speid __attribute__ ((unused)),
         unsigned long long argp, 
         unsigned long long envp __attribute__ ((unused)))
{
  unsigned int tag;
  unsigned long long in_addr, out_addr;
  unsigned int i, num_chunks;
  mfc_list_element_t* dma_list_in;
  unsigned int tmp_addr;

#ifdef USE_TIMER
  uint64_t start, time_working;
    
  spu_slih_register (MFC_DECREMENTER_EVENT, spu_clock_slih);
  spu_clock_start();
  start = spu_clock_read();
#endif /* USE_TIMER */

  /* First, we reserve a MFC tag for use */
  tag = mfc_tag_reserve();
  if (tag == MFC_TAG_INVALID)
  {
    printf ("SPU ERROR, unable to reserve tag\n");
    return 1;
  }

  /* calculate the address of the local buffer where we can point the 
   * dma_list_in pointer to */
  tmp_addr = (unsigned int)((local_buffer_in + sizeof(float)*CHUNK_SIZE * NUM_LIST_ELEMENTS) - 
      (sizeof (mfc_list_element_t) * NUM_LIST_ELEMENTS));
  dma_list_in = (mfc_list_element_t*) (tmp_addr);

  /* issue DMA transfer to get the control block information from 
   * system memory */
  mfc_get (&control_block, argp, sizeof (control_block_t), tag, 0, 0);

  /* wait for the DMA get to complete */ 
  mfc_write_tag_mask (1 << tag);
  mfc_read_tag_status_all ();

  /* calculate the number of blocks (chunks) that this spe is assigned 
   * to process */
  num_chunks = control_block.num_elements_per_spe/CHUNK_SIZE;

  /*
   * This is the main loop.  We basically goes through the num_chunks of data
   * NUM_LIST_ELEMENTS at a time. Each list element is going to move CHUNK_SIZE
   * of data into system memory. Data is moved into local store, processed, and 
   * written back to system memory NUM_LIST_ELEMENT chunks per loop iteration.      
   */
  for (i = 0; i <num_chunks; i+= NUM_LIST_ELEMENTS)
  {
    /* set the in_addr and out_addr variables, we will use these for
     * issuing DMA get and put commands */
    in_addr = control_block.in_addr + (i * CHUNK_SIZE * sizeof (float));
    out_addr = control_block.out_addr + (i * CHUNK_SIZE * sizeof (float));

    /* fill the dma list with the appropriate lower 32bit effective address and size for
     * each dma list element. This dma list is used to gather the input data 
     * from system memory */
    fill_dma_list (dma_list_in, NUM_LIST_ELEMENTS, in_addr, CHUNK_SIZE * sizeof(float)); 

    /* issue a DMA get list command to gather the NUM_LIST_ELEMENT chunks of data from system memory.
     * The data will be gathered into local buffer local_buffer_in */
    mfc_getl (local_buffer_in, in_addr, dma_list_in, NUM_LIST_ELEMENTS * sizeof(mfc_list_element_t), tag, 0, 0);

    /* wait for the DMA get list command to complete */
    mfc_write_tag_mask (1 << tag);
    mfc_read_tag_status_all ();

    /* invoke process_data to work on the data that's just been moved into local store*/
    process_data_simd (local_buffer_in, local_buffer_out, CHUNK_SIZE * NUM_LIST_ELEMENTS);

    /* fill the dma list with the appropriate lower 32 bit ea and size for each
     * dma list element. This dma list is used to scatter the output data to system memory  */
    fill_dma_list (dma_list_out, NUM_LIST_ELEMENTS, out_addr, CHUNK_SIZE * sizeof(float)); 

    /* issue the DMA put list command to scatter the result from local memory to 
    * different places in system memory */
    mfc_putl (local_buffer_out, out_addr, dma_list_out, NUM_LIST_ELEMENTS * sizeof(mfc_list_element_t), 
        tag, 0, 0);

    /* wait for the DMA put list to complete */
    mfc_write_tag_mask (1 << tag);
    mfc_read_tag_status_all ();

  }

#ifdef USE_TIMER
  time_working = (spu_clock_read() - start);
  spu_clock_stop();
  printf ("SPE time_working = %lld\n", time_working);
#endif /* USE_TIMER */

  return 0;
}
示例#3
0
/**
 * Writes chemistry data to main memory
 */
void write_chem_buffer(uint32_t i)
{
    mfc_putl(conc[i].data, conc[i].ea_base, clist[i].data, 
             clist[i].length*sizeof(mfc_list_element_t), i, 0, 0);
    wait_for_dma(i);
}