/* Writes an output datum to an indexed buffer. * Operates on a block of model instances in parallel. * quantities is an array of output data of length (outputsize * blocksize) * participate is an array of length blocksize indicating which threads are active */ __DEVICE__ void buffer_indexed_output (unsigned int modelid, unsigned int outputid, unsigned int outputsize, CDATAFORMAT *quantities, indexed_output_buffer *pos, unsigned int threadid, unsigned int blocksize, int participate) { unsigned int i, offset; CDATAFORMAT *buffer; indexed_sort_data *sort; int *index = pos->scratch; index[threadid] = !!participate; // ensures index is 1 or 0 parallel_scan(index,threadid,blocksize); if (participate) { offset = pos->size + index[threadid] - 1; buffer = pos->buffer; for (i=0; i<outputsize; i++) { buffer[i+offset] = quantities[VEC_IDX(outputsize,i,blocksize,threadid)]; } sort = pos->sort + offset; sort->modelid = modelid; sort->outputid = outputid; sort->offset = offset; } if (0 == threadid) { pos->size += index[blocksize-1] * outputsize; } }
TestScan( const WorkSpec & Start , const WorkSpec & N ) { typedef Kokkos::RangePolicy<execution_space> exec_policy ; parallel_scan( exec_policy( Start , N ) , *this ); }
TestScan( const WorkSpec & N ) { parallel_scan( N , *this ); }