void execute_clamp(opcode_t opcode, const argument& a, const argument& b) { const clamp_args_t* args = b.get<const clamp_args_t*>(); usize_t size = a.size(); launchcfg cfg = make_elemwise_launchcfg(size); launch_clamp(cfg.gdim,cfg.bdim,cfg.smem,cfg.stream, size,a.dtype, a.get<void*>(), args->lo,args->hi); }
// execution function: // Called when the virtual machine gets around to executing // the lerp instruction, long after it was validated and inserted // into the execution stream. // A CUDA execution function configures and calls a CUDA kernel. // void execute_lerp(opcode_t opcode, const argument& a, const argument& b, const argument& c, const argument& alpha) { usize_t size = a.size(); launchcfg cfg = make_elemwise_launchcfg(size); launch_lerp(cfg.gdim,cfg.bdim,cfg.smem,cfg.stream, size,a.dtype, a.get<const void*>(), b.get<const void*>(), c.get< void*>(), alpha.get<double>()); }