Example #1
0
/*
 * The argv argument will be populated with the address that the PPE provided,
 * from the 4th argument to spe_context_run()
 */
int main(uint64_t speid, uint64_t argv, uint64_t envp)
{
    struct spe_args args __attribute__((aligned(SPE_ALIGN)));

    mfc_get(&args, argv, sizeof(args), 0, 0, 0);

    mfc_write_tag_mask(1 << 0);
    mfc_read_tag_status_all();

    cmap_calls = 0;
    dma_puts = 0;
    spu_write_decrementer(-1);

    // Run multiple renders with offsets.  Should be factored into render_fractal()
    render_fractal(&args.fractal, args.thread_idx, args.n_threads, 0.);
    render_fractal(&args.fractal, args.thread_idx, args.n_threads,
                   args.fractal.delta * 7 / 8);
    render_fractal(&args.fractal, args.thread_idx, args.n_threads,
                   args.fractal.delta * 3 / 4);
    render_fractal(&args.fractal, args.thread_idx, args.n_threads,
                   args.fractal.delta * 5 / 8);
    render_fractal(&args.fractal, args.thread_idx, args.n_threads,
                   args.fractal.delta / 2);
    render_fractal(&args.fractal, args.thread_idx, args.n_threads,
                   args.fractal.delta * 3 / 8);
    render_fractal(&args.fractal, args.thread_idx, args.n_threads,
                   args.fractal.delta / 4);
    render_fractal(&args.fractal, args.thread_idx, args.n_threads,
                   args.fractal.delta / 8);

    // Send remaining points
    if(fill%2048) {
        // select the last buffer used
        int f = fill / 2048;
        mfc_put(&points[f*2048], (uint)args.fractal.pointbuf[f], 16384, 0, 0, 0);
        // Block for completion
        mfc_write_tag_mask(1<<0);
        mfc_read_tag_status_all();
        // Send a message with top bit set to indicate final item
        spu_write_out_intr_mbox((1<<31)|f);
        // Send another message indicating count
        spu_write_out_intr_mbox(fill%2048);
        ++dma_puts;
    }

    // Report some stats
    uint ticks = -1 - spu_read_decrementer();
    printf("cmap calls %d ticks %u calls/tick %f\n",
           cmap_calls, ticks, (double)cmap_calls/ticks );
    printf("dma puts %d\n", dma_puts);

    return 0;
}
Example #2
0
void
_gc_log_write(gc_log_entry_t entry)
{
  if (log_base_ea == 0)
    return;

  entry.seqno = log_seqno++;
  entry.timestamp = spu_read_decrementer();

  if (tmp_buffer_busy & (1 << tmp_buffer_idx)){
    mfc_write_tag_mask(1 << (log_tags + tmp_buffer_idx));
    mfc_read_tag_status_all();
  }

  tmp_buffer[tmp_buffer_idx] = entry;	// save local copy

  mfc_put(&tmp_buffer[tmp_buffer_idx],
	  log_base_ea + log_idx * sizeof(entry), sizeof(entry),
	  log_tags + tmp_buffer_idx, 0, 0);

  tmp_buffer_busy |= (1 << tmp_buffer_idx);
  tmp_buffer_idx ^= 0x1;
  log_idx = (log_idx + 1) & log_idx_mask;
}
Example #3
0
int main(unsigned long long spe_id,
	 unsigned long long argp,
	 unsigned long long envp)
{
  #ifdef TRACE_TIME
  spu_write_decrementer(0);
  #endif
  setup_spu(argp);

  //unsigned int prod_ticks = 0;

  // Merge
  num_active_mergers = spu_ctrlblock.num_mergers;

  while(num_active_mergers > 0){
    if(md[am].done){
      am = (am+1) % spu_ctrlblock.num_mergers;
      continue;
    }

    // Check DMA completion if leaf node or extern parent    
    if(mcb[am].leaf_node){
      check_pull_dma(LEFT);
      check_pull_dma(RIGHT);

      if(md[am].num_waiting[LEFT] + md[am].num_waiting[RIGHT]){
	am = (am+1) % spu_ctrlblock.num_mergers;
	continue;
      }
    }

    if(mcb[am].local[OUT] == 255){
      check_push_dma();
      if(md[am].held_tag[OUT] < 32){
	am = (am+1) % spu_ctrlblock.num_mergers;
	continue;
      }
    }

    if(md[am].done){
      am = (am+1) % spu_ctrlblock.num_mergers;
      continue;
    }

    // Produce    
    #ifdef TRACE_TIME
    dec_val = spu_read_decrementer();
    #endif
    if(md[am].depleted[LEFT] && !md[am].depleted[RIGHT]){      
      cp_buffer(RIGHT);
    } else if(md[am].depleted[RIGHT] && !md[am].depleted[LEFT]){
      cp_buffer(LEFT);
    } else {
      merge_buffers();
    }
    #ifdef TRACE_TIME
    merge_ticks += -(spu_read_decrementer() - dec_val);
    #endif

    // Push, if parent not local
    if(mcb[am].local[OUT] == 255)
      push();

    // Pull if leaf node
    if(mcb[am].leaf_node){
      pull(LEFT);
      pull(RIGHT);
    }

    am = (am+1) % spu_ctrlblock.num_mergers;
  }

  #ifdef TRACE_TIME
  float tot = -spu_read_decrementer() / (79.8*1000);
  float sort = sort_v_ticks / (79.8*1000);
  float merge = merge_ticks / (79.8*1000);
  float merge_loop = merge_loop_ticks / (79.8*1000);

  printf("SPU%d: Total %fms, merge %fms, inner loop %fms, sort %fms\n", spu_ctrlblock.spu_id,tot, merge, merge_loop, sort);
  #endif

  return 0;
}
Example #4
0
void merge_buffers(){
  vector unsigned int cmp_v, cmp_v2;

  const vector signed int one_at_0 = {1,0,0,0};
  const vector signed int one_at_1 = {0,1,0,0};
  const vector signed int one_at_2 = {0,0,1,0};
  const vector signed int ones = {1,1,1,1};
  const vector signed int zeros = {0,0,0,0};

  const vector unsigned char cmp_v_shuffle_mask = {31,31,31,31,
						   31,31,31,31,
						   31,31,31,31,
						   31,31,31,31};
  vector unsigned char rev_mask;
  const vector unsigned char rev_left = {12,13,14,15,
					 8,9,10,11,
					 4,5,6,7,
					 0,1,2,3};

  const vector unsigned char rev_right = {28,29,30,31,
					  24,25,26,27,
					  20,21,22,23,
					  16,17,18,19};
  vector signed int *out_head_idx;
  if(mcb[am].local[OUT] < 255){
    int parent_idx = mcb[am].local[OUT];
    int side = (mcb[am].id+1)&1;
    out_head_idx = (vector signed int*) &md[parent_idx].idx[side][HEAD];
  } else {
    out_head_idx = (vector signed int*) &md[am].idx[OUT][HEAD];
  }

  vector signed int *left_tail_idx = (vector signed int*) &md[am].idx[LEFT][TAIL];
  vector signed int *right_tail_idx = (vector signed int*) &md[am].idx[RIGHT][TAIL];

  vector signed int size_v = {mcb[am].buffer_size[LEFT], mcb[am].buffer_size[RIGHT], mcb[am].buffer_size[OUT], 0};
  vector signed int avail_v = {num_in_buffer(LEFT), num_in_buffer(RIGHT), num_free_in_buffer(OUT), 1};
  vector signed int avail_before = { spu_extract(avail_v, 0), spu_extract(avail_v, 1), 0, 0 };
  vector unsigned int avail = spu_gather( spu_cmpgt(avail_v, zeros) ); // avail = 0x0F if all avail_v > zeros

  vector signed int *left, *right, *out;
  left = (vector signed int*) &md[am].buffer[LEFT][ spu_extract(*left_tail_idx,0) ];
  right = (vector signed int*) &md[am].buffer[RIGHT][ spu_extract(*right_tail_idx,0) ];
  out = (vector signed int*) &md[am].buffer[OUT][ spu_extract(*out_head_idx,0) ];

  #ifdef TRACE_TIME
    dec_val2 = spu_read_decrementer();
  #endif

  while(spu_extract(avail,0) == 0x0F){
    // cmp left and right to determine who gets eaten
    cmp_v = spu_cmpgt(*left,*right);
    cmp_v = spu_shuffle(cmp_v, cmp_v, cmp_v_shuffle_mask);
    // cmp_v = {FFFF,FFFF,FFFF,FFFF} if left[3] > right[3]

    *out = spu_sel(*left,*right,cmp_v);
    rev_mask = spu_sel(rev_right,rev_left,(vector unsigned char)cmp_v);
    *left = spu_shuffle(*left,*right,rev_mask);
    // data to be sorted is now in out and left, left in descending order

    sort_vectors(out,left);

    // update index of the used side
    if( spu_extract(cmp_v,0) ){
      // left[3] > right[3]
      *right_tail_idx = spu_add(*right_tail_idx,ones);
      avail_v = spu_sub(avail_v, one_at_1);
      right++;
      // modulus hack
      cmp_v2 = spu_cmpeq(*right_tail_idx, size_v);
      if( __builtin_expect( spu_extract(cmp_v2,0) ,0) ){
	*right_tail_idx = zeros;
	right = (vector signed int*) &md[am].buffer[RIGHT][0];
      }
    } else {
      *right = *left;
      *left_tail_idx = spu_add(*left_tail_idx,ones);
      avail_v = spu_sub(avail_v, one_at_0);
      left++;
      // modulus hack
      cmp_v2 = spu_cmpeq(*left_tail_idx, size_v);      
      if( __builtin_expect( spu_extract(cmp_v2,0) ,0) ){	
	*left_tail_idx = zeros;
	left = (vector signed int*) &md[am].buffer[LEFT][0];
      }
    }

    // update out head idx
    *out_head_idx = spu_add(*out_head_idx,ones);
    avail_v = spu_sub(avail_v, one_at_2);
    out++;
    // modulus hack
    cmp_v2 = spu_cmpeq(*out_head_idx, size_v);
    if( __builtin_expect(spu_extract(cmp_v2,0),0) ){
      out = (vector signed int*) &md[am].buffer[OUT][0];
      *out_head_idx = zeros;
    }

    // is there data still available?
    avail = spu_gather(spu_cmpgt(avail_v, zeros));
  }

  #ifdef TRACE_TIME
  merge_loop_ticks += -(spu_read_decrementer() - dec_val2);
  #endif

  // how much got produced?
  vector signed int consumed = spu_sub(avail_before, avail_v);
  int consumed_left = spu_extract(consumed, 0);
  int consumed_right = spu_extract(consumed, 1);

  if(consumed_left)
    update_tail(LEFT);

  if(consumed_right)
    update_tail(RIGHT);

  md[am].consumed[LEFT] += consumed_left;
  md[am].consumed[RIGHT] += consumed_right;
    
  if(md[am].consumed[LEFT] == mcb[am].data_size[LEFT])
    md[am].depleted[LEFT] = 1;
  
  if(md[am].consumed[RIGHT] == mcb[am].data_size[RIGHT])
    md[am].depleted[RIGHT] = 1;

  if(mcb[am].local[OUT] < 255 && md[am].depleted[LEFT] && md[am].depleted[RIGHT]){
    md[am].done = 1;
    --num_active_mergers;
  }
}