/** * Writes layer data to main memory */ void write_z_buffer(uint32_t i) { mfc_putl(buff[i].data, conc[i].ea_base, clist[i].data, clist[i].length*sizeof(mfc_list_element_t), i, 0, 0); }
int main(unsigned long long speid __attribute__ ((unused)), unsigned long long argp, unsigned long long envp __attribute__ ((unused))) { unsigned int tag; unsigned long long in_addr, out_addr; unsigned int i, num_chunks; mfc_list_element_t* dma_list_in; unsigned int tmp_addr; #ifdef USE_TIMER uint64_t start, time_working; spu_slih_register (MFC_DECREMENTER_EVENT, spu_clock_slih); spu_clock_start(); start = spu_clock_read(); #endif /* USE_TIMER */ /* First, we reserve a MFC tag for use */ tag = mfc_tag_reserve(); if (tag == MFC_TAG_INVALID) { printf ("SPU ERROR, unable to reserve tag\n"); return 1; } /* calculate the address of the local buffer where we can point the * dma_list_in pointer to */ tmp_addr = (unsigned int)((local_buffer_in + sizeof(float)*CHUNK_SIZE * NUM_LIST_ELEMENTS) - (sizeof (mfc_list_element_t) * NUM_LIST_ELEMENTS)); dma_list_in = (mfc_list_element_t*) (tmp_addr); /* issue DMA transfer to get the control block information from * system memory */ mfc_get (&control_block, argp, sizeof (control_block_t), tag, 0, 0); /* wait for the DMA get to complete */ mfc_write_tag_mask (1 << tag); mfc_read_tag_status_all (); /* calculate the number of blocks (chunks) that this spe is assigned * to process */ num_chunks = control_block.num_elements_per_spe/CHUNK_SIZE; /* * This is the main loop. We basically goes through the num_chunks of data * NUM_LIST_ELEMENTS at a time. Each list element is going to move CHUNK_SIZE * of data into system memory. Data is moved into local store, processed, and * written back to system memory NUM_LIST_ELEMENT chunks per loop iteration. */ for (i = 0; i <num_chunks; i+= NUM_LIST_ELEMENTS) { /* set the in_addr and out_addr variables, we will use these for * issuing DMA get and put commands */ in_addr = control_block.in_addr + (i * CHUNK_SIZE * sizeof (float)); out_addr = control_block.out_addr + (i * CHUNK_SIZE * sizeof (float)); /* fill the dma list with the appropriate lower 32bit effective address and size for * each dma list element. This dma list is used to gather the input data * from system memory */ fill_dma_list (dma_list_in, NUM_LIST_ELEMENTS, in_addr, CHUNK_SIZE * sizeof(float)); /* issue a DMA get list command to gather the NUM_LIST_ELEMENT chunks of data from system memory. * The data will be gathered into local buffer local_buffer_in */ mfc_getl (local_buffer_in, in_addr, dma_list_in, NUM_LIST_ELEMENTS * sizeof(mfc_list_element_t), tag, 0, 0); /* wait for the DMA get list command to complete */ mfc_write_tag_mask (1 << tag); mfc_read_tag_status_all (); /* invoke process_data to work on the data that's just been moved into local store*/ process_data_simd (local_buffer_in, local_buffer_out, CHUNK_SIZE * NUM_LIST_ELEMENTS); /* fill the dma list with the appropriate lower 32 bit ea and size for each * dma list element. This dma list is used to scatter the output data to system memory */ fill_dma_list (dma_list_out, NUM_LIST_ELEMENTS, out_addr, CHUNK_SIZE * sizeof(float)); /* issue the DMA put list command to scatter the result from local memory to * different places in system memory */ mfc_putl (local_buffer_out, out_addr, dma_list_out, NUM_LIST_ELEMENTS * sizeof(mfc_list_element_t), tag, 0, 0); /* wait for the DMA put list to complete */ mfc_write_tag_mask (1 << tag); mfc_read_tag_status_all (); } #ifdef USE_TIMER time_working = (spu_clock_read() - start); spu_clock_stop(); printf ("SPE time_working = %lld\n", time_working); #endif /* USE_TIMER */ return 0; }
/** * Writes chemistry data to main memory */ void write_chem_buffer(uint32_t i) { mfc_putl(conc[i].data, conc[i].ea_base, clist[i].data, clist[i].length*sizeof(mfc_list_element_t), i, 0, 0); wait_for_dma(i); }