static void send_response(uint32_t x) { spu.response = x; spu.sync = 1; /* send response to ppu variable */ uint64_t ea = spu_ea + ((uint32_t)&spu.response) - ((uint32_t)&spu); mfc_put(&spu.response, ea, 4, TAG, 0, 0); /* send sync to ppu variable with fence (this ensures sync is written AFTER response) */ ea = spu_ea + ((uint32_t)&spu.sync) - ((uint32_t)&spu); mfc_putf(&spu.sync, ea, 4, TAG, 0, 0); }
void update_tail(int side){ if(mcb[am].leaf_node || mcb[am].local[side] < 255) return; mfc_putf(&md[am].idx[side][TAIL], mcb[am].idx_addr[side], sizeof(vector signed int), ctrl_dma_tag, 0,0); }
static void send_response(uint64_t sync_ea, uint64_t response_ea, uint32_t x) { /* sync variable (memory alignment is required by dma) */ static uint32_t sync __attribute__((aligned(16))) = 1; /* sync variable (memory alignment is required by dma) */ static uint32_t response __attribute__((aligned(16))); response = x; /* send response to ppu variable */ mfc_put(&response, response_ea, 4, TAG, 0, 0); /* send sync to ppu variable with fence (this ensures sync is written AFTER response) */ mfc_putf(&sync, sync_ea, 4, TAG, 0, 0); }
/* * Colour the given framebuffer address pix. * i and params may be used to select the colour. */ static void write_colour(struct pixel *pix, float i, struct fractal_params *params) { // Mask for keeping track of ppe finishing with buffers static int valid = 0xff; static vector unsigned int sentinel = {1,0,0,0}; ++cmap_calls; uint colour; // Various colouring alternatives are possible here // ignore the first few steps - reduces backgroud noise if(i<20) return; /* if(i==0) return; if(params->i_max < 10000) { colour = 0x00010000; } else if(params->i_max < 20000) { if(i<10000) return; colour = 0x00000200; } else { if(i<20000) return; colour = 0x00000004; }*/ colour = 0x00010100; // If starting to fill a new buffer, check that any earlier use // is finished with - ppe signals completion if(fill%2048 == 0) { while(!(valid&(1<<(fill/2048)))) { valid |= spu_read_signal1(); } } // set values points[fill].addr = (uint*)pix; points[fill].i = colour; ++fill; // if we just filled a buffer, send it to ppe if(fill%2048==0) { // select the specific buffer that is full int f = (fill / 2048) - 1; mfc_put(&points[f*2048], (uint)params->pointbuf[f], 16384, 0, 0, 0); // fence a sentinel - the ppe will spin on this completing... // What's a better way to achieve sync? mfc_putf(&sentinel, (uint)params->sentinel[f], 16, 0, 0, 0); // interrupt the ppe spu_write_out_intr_mbox(f); // unmask the relevant bit valid&=~(1<<f); if(fill==16384) { fill = 0; } ++dma_puts; } }
void push(){ int avail_out = num_in_buffer(OUT); if(!avail_out) return; int avail_parent = num_free_in_buffer(PARENT); if(mcb[am].id == 0) avail_parent = mcb[am].data_size[LEFT] + mcb[am].data_size[RIGHT]; int num_send = avail_out < avail_parent ? avail_out : avail_parent; num_send = num_send < MAX_DMA_SIZE ? num_send : MAX_DMA_SIZE; if(!num_send) return; int tag = mfc_tag_reserve(); if(tag == MFC_TAG_INVALID){ return; } else md[am].held_tag[OUT] = tag; // send num_send vectors, in up to three DMA-put's while(num_send > 0){ int parent_head = spu_extract(md[am].idx[PARENT][HEAD],0); int free_from_head = mcb[am].buffer_size[PARENT] - parent_head; int tail = spu_extract(md[am].idx[OUT][TAIL],0); int avail_from_tail = mcb[am].buffer_size[OUT] - tail; int part_send = num_send < free_from_head ? num_send : free_from_head; part_send = part_send < avail_from_tail ? part_send : avail_from_tail; unsigned int to = mcb[am].block_addr[OUT] + parent_head*sizeof(vector signed int); mfc_put(&md[am].buffer[OUT][tail], to, part_send * sizeof(vector signed int), md[am].held_tag[OUT], 0,0); md[am].idx[PARENT][HEAD] = spu_add(md[am].idx[PARENT][HEAD], part_send); parent_head = spu_extract(md[am].idx[PARENT][HEAD],0); if(parent_head == mcb[am].buffer_size[PARENT]) md[am].idx[PARENT][HEAD] = spu_splats(0); md[am].idx[OUT][TAIL] = spu_add(md[am].idx[OUT][TAIL], part_send); tail = spu_extract(md[am].idx[OUT][TAIL],0); if(tail == mcb[am].buffer_size[OUT]) md[am].idx[OUT][TAIL] = spu_splats(0); num_send -= part_send; } // Inner nodes updates parent in buffer head idx if(mcb[am].id) mfc_putf(&md[am].idx[PARENT][HEAD], mcb[am].idx_addr[OUT], sizeof(vector signed int), md[am].held_tag[OUT], 0,0); }