void push_arg(device_vector<T> arg) { K.set_arg(argpos++, arg.raw()); }
void push_arg(const device_vector<T> &arg) { push_arg(arg.raw()); }