void OrcBlockImpl::work(const InputItems &ins, const OutputItems &outs) { //calculate production/consumption params size_t num_input_items, num_output_items; if (_params.production_factor > 1.0) { num_output_items = std::min(size_t(ins.min()*_params.production_factor), outs.min()); num_input_items = size_t(num_output_items/_params.production_factor); } else { num_input_items = std::min(size_t(outs.min()/_params.production_factor), ins.min()); num_output_items = size_t(num_input_items*_params.production_factor); } //load the executor with source and dest buffers size_t input_index = 0, output_index = 0; for (size_t i = 0; i < ORC_N_VARIABLES; i++) { if (_orc_program->vars[i].vartype == ORC_VAR_TYPE_DEST) { void *dst = outs[output_index++].cast<void *>(); orc_executor_set_array(_orc_executor.get(), i, dst); } if (_orc_program->vars[i].vartype == ORC_VAR_TYPE_SRC) { void *src = const_cast<void *>(ins[input_index++].cast<const void *>()); orc_executor_set_array(_orc_executor.get(), i, src); } } //execute the orc code; orc_executor_set_n(_orc_executor.get(), size_t(_params.kernel_factor*num_input_items)); orc_executor_run(_orc_executor.get()); //produce consume fixed this->consume(num_input_items-_params.consumption_offset); this->produce(num_output_items); }
void Multiply<std::complex<float> >::work( const InputItems &ins, const OutputItems &outs ){ const size_t n_nums = std::min(ins.min(), outs.min()); std::complex<float> *out = outs[0].cast<std::complex<float> *>(); const std::complex<float> *in0 = ins[0].cast<const std::complex<float> *>(); for (size_t n = 1; n < ins.size(); n++) { const std::complex<float> *in = ins[n].cast<const std::complex<float> *>(); volk_32fc_x2_multiply_32fc(out, in0, in, n_nums * _vlen); in0 = out; //for next input, we do output *= input } this->consume(n_nums); this->produce(n_nums); }
void Add<float>::work( const InputItems &ins, const OutputItems &outs ){ const size_t n_nums = std::min(ins.min(), outs.min()); float *out = outs[0].cast<float *>(); const float *in0 = ins[0].cast<const float *>(); for (size_t n = 1; n < ins.size(); n++) { const float *in = ins[n].cast<const float *>(); volk_32f_x2_add_32f(out, in0, in, n_nums * _vlen); in0 = out; //for next input, we do output += input } this->consume(n_nums); this->produce(n_nums); }
void Multiply<type>::work( const InputItems &ins, const OutputItems &outs ){ const size_t n_nums = std::min(ins.min(), outs.min()); type *out = outs[0].cast<type *>(); const type *in0 = ins[0].cast<const type *>(); for (size_t n = 1; n < ins.size(); n++) { const type *in = ins[n].cast<const type *>(); for (size_t i = 0; i < n_nums * _vlen; i++) { out[i] = in0[i] * in[i]; } in0 = out; //for next input, we do output *= input } this->consume(n_nums); this->produce(n_nums); }