/* Writes an output datum to an indexed buffer. 
 * Operates on a block of model instances in parallel.
 * quantities is an array of output data of length (outputsize * blocksize)
 * participate is an array of length blocksize indicating which threads are active
 */
__DEVICE__ void buffer_indexed_output (unsigned int modelid, unsigned int outputid, unsigned int outputsize, CDATAFORMAT *quantities, indexed_output_buffer *pos, unsigned int threadid, unsigned int blocksize, int participate) {
  unsigned int i, offset;
  CDATAFORMAT *buffer;
  indexed_sort_data *sort;
  int *index = pos->scratch;

  index[threadid] = !!participate; // ensures index is 1 or 0
  parallel_scan(index,threadid,blocksize);

  if (participate) {
    offset = pos->size + index[threadid] - 1;

    buffer = pos->buffer;
    for (i=0; i<outputsize; i++) {
      buffer[i+offset] = quantities[VEC_IDX(outputsize,i,blocksize,threadid)];
    }

    sort = pos->sort + offset;
    sort->modelid = modelid;
    sort->outputid = outputid;
    sort->offset = offset;
  }

  if (0 == threadid) {
    pos->size += index[blocksize-1] * outputsize;
  }
}
Exemple #2
0
 TestScan( const WorkSpec & Start , const WorkSpec & N )
   {
     typedef Kokkos::RangePolicy<execution_space> exec_policy ;
     parallel_scan( exec_policy( Start , N ) , *this );
   }
Exemple #3
0
 TestScan( const WorkSpec & N )
   { parallel_scan( N , *this ); }