int reduce_by_key_sink( IKTuple &&ikeys, vector<V> const &ivals, OKTuple &&okeys, vector<V> &ovals, Comp, Oper ) { namespace fusion = boost::fusion; typedef typename extract_value_types<IKTuple>::type K; static_assert( std::is_same<K, typename extract_value_types<OKTuple>::type>::value, "Incompatible input and output key types"); precondition( fusion::at_c<0>(ikeys).nparts() == 1 && ivals.nparts() == 1, "reduce_by_key is only supported for single device contexts" ); precondition(fusion::at_c<0>(ikeys).size() == ivals.size(), "keys and values should have same size" ); const auto &queue = fusion::at_c<0>(ikeys).queue_list(); backend::select_context(queue[0]); const int NT_cpu = 1; const int NT_gpu = 256; const int NT = is_cpu(queue[0]) ? NT_cpu : NT_gpu; size_t count = fusion::at_c<0>(ikeys).size(); size_t num_blocks = (count + NT - 1) / NT; size_t scan_buf_size = alignup(num_blocks, NT); backend::device_vector<int> key_sum (queue[0], scan_buf_size); backend::device_vector<V> pre_sum (queue[0], scan_buf_size); backend::device_vector<V> post_sum (queue[0], scan_buf_size); backend::device_vector<V> offset_val(queue[0], count); backend::device_vector<int> offset (queue[0], count); /***** Kernel 0 *****/ auto krn0 = offset_calculation<K, Comp>(queue[0]); krn0.push_arg(count); boost::fusion::for_each(ikeys, do_push_arg(krn0)); krn0.push_arg(offset); krn0(queue[0]); VEX_FUNCTION(int, plus, (int, x)(int, y), return x + y;);
void argument_expression_list(struct ArgumentExpressionList* node, struct SymbolList* symbol) { while (symbol) { push_arg_buf(symbol->symbol); symbol = symbol->next; } push_arg(load_symbol(assignment_expression(node->assignmentExpression))); while (node->type == 1) { node = node->argumentExpressionList; push_arg(load_symbol(assignment_expression(node->assignmentExpression))); } cast_arg(); }
//____________________________________________________________________ int optionmm::basic_option::handle(const std::string& arg, int pos) { if (_long_name.empty() || arg.compare(CMP_ARG(2,_long_name))) return 0; if (need_argument()) { std::string::size_type eq = arg.find_last_of('='); if (eq == std::string::npos) { std::cerr << "Option --" << _long_name << " need an argument" << std::endl; return -1; } std::string value(arg); value.erase(0, eq+1); push_arg(value.c_str(), pos); } else push_arg(pos); return 1; }
//____________________________________________________________________ int optionmm::basic_option::handle(char*& opt, char*& arg, int pos) { if (_short_name == '\0' || opt[0] != _short_name) return 0; if (need_argument()) { if (opt[1] != '\0') { push_arg(&(opt[1]), pos); opt[1] = '\0'; } else if (arg) { push_arg(arg, pos); // Flag argument as used. arg = 0; } else { std::cerr << "Option -" << _short_name << " need an argument" << std::endl; return -1; } } else push_arg(pos); return 1; }
/* Get an expression */ static char * get_exp(char *str) { char *ptr = str, *tptr = token; struct operator *op; if (SUCCESS == strncmp(str, "PI", 2)) return strcpy(token, "PI"); while (*ptr) { if (NULL != (op = get_op(ptr))) { if ('-' == *ptr) { if (str != ptr && 'E' != ptr[-1]) break; if (str == ptr && !isdigit(ptr[1]) && '.' != ptr[1]) { push_arg(0.0); strcpy(token, op->tag); return token; } } else if (str == ptr) { strcpy(token, op->tag); return token; } else break; } *tptr++ = *ptr++; } *tptr = '\0'; return token; }
static void op_push_arg_immed(int byte, struct thread *thread) { push_arg(thread, byte & 0x0f); }
int evaluate(char *line, double *val) //-------------------------------------------------------------------- // Evaluates an ASCII mathematical expression // INPUT: line: String to evaluate // val: Storage to receive double result // // RETURN: SUCCESS = 0 if successful // E_ERROR = -1 if syntax error // R_ERROR = -2 if runtime error // DUV_ZERO= -3 Division by 0 // // Side effects: Removes all whitespace from the string and converts // it to U.C. //-------------------------------------------------------------------- { double arg; char *ptr = line, *str, *endptr; int ercode; struct operator *op; strupr(line); rmallws(line); state = op_sptr = arg_sptr = parens = 0; while (*ptr) { switch (state) { case 0: if (NULL != (str = get_exp(ptr))) { if (NULL != (op = get_op(str)) && strlen(str) == op->taglen) { push_op(op->token); ptr += op->taglen; break; } if (SUCCESS == strcmp(str, "-")) { push_op(*str); ++ptr; break; } if (SUCCESS == strcmp(str, "PI")) push_arg(Pi); else { if (0.0 == (arg = strtod(str, &endptr)) && NULL == strchr(str, '0')) { return E_ERROR; } push_arg(arg); } ptr += strlen(str); } else return E_ERROR; state = 1; break; case 1: if (NULL != (op = get_op(ptr))) { if (')' == *ptr) { if (SUCCESS > (ercode = do_paren())) return ercode; } else { while (op_sptr && op->precedence <= getTOSprec()) { do_op(); } push_op(op->token); state = 0; } ptr += op->taglen; } else return E_ERROR; break; } } while (1 < arg_sptr) { if (SUCCESS > (ercode = do_op())) return ercode; } if (!op_sptr) return pop_arg(val); else return E_ERROR; }
/* Evaluate stacked arguments and operands */ static int do_op(void) { double arg1, arg2; int op; if (E_ERROR == pop_op(&op)) return E_ERROR; pop_arg(&arg1); pop_arg(&arg2); switch (op) { case '+': push_arg(arg2 + arg1); break; case '-': push_arg(arg2 - arg1); break; case '*': push_arg(arg2 * arg1); break; case '/': if (0.0 == arg1) return(DIV_ZERO); push_arg(arg2 / arg1); break; case '\\': if (0.0 == arg1) return R_ERROR; push_arg(fmod(arg2, arg1)); break; case '^': push_arg(pow(arg2, arg1)); break; case 't': ++arg_sptr; push_arg(atan(arg1)); break; case 'S': ++arg_sptr; push_arg(sin(arg1)); break; case 's': if (0.0 > arg2) return R_ERROR; ++arg_sptr; push_arg(sqrt(arg1)); break; case 'C': ++arg_sptr; push_arg(cos(arg1)); break; case 'A': ++arg_sptr; push_arg(fabs(arg1)); break; case 'L': if (0.0 < arg1) { ++arg_sptr; push_arg(log(arg1)); break; } else return R_ERROR; case 'E': ++arg_sptr; push_arg(exp(arg1)); break; case '(': arg_sptr += 2; break; default: return E_ERROR; } if (1 > arg_sptr) return E_ERROR; else return op; }
int main(int argc, char **argv) { size_t letters_from_global_id = 4, letters_to_iter_over_in_kernel = 2; size_t global_work_size = 1 << (letters_from_global_id * 4), // 2^16 local_work_size = 256; // opencl context opencl::Context context(argc, argv); context.init(); // memory allocation - both CPU & GPU char cpu_buf[LETTER_COUNT+1] = {0,0,0, 0,0,0, 0,0,0, 0}; auto gpu_buf = context.allocate(CL_MEM_WRITE_ONLY, sizeof(cl_char) * LETTER_COUNT); context.write_buffer(gpu_buf, 0, sizeof(cl_char) * LETTER_COUNT, cpu_buf, true); cl_int cpu_flag = 0; auto gpu_flag = context.allocate(CL_MEM_READ_WRITE, sizeof(cl_int)); context.write_buffer(gpu_flag, 0, sizeof(cl_int), (void*)&cpu_flag, true); std::cout << "cpu/gpu buffers pair allocated" << std::endl; auto kernel = context.create_kernel(cSourceFile, "-D LETTER_COUNT=" STRINGIFY(LETTER_COUNT)); // calculate cpu iteration count auto letters_to_iter_over_on_cpu = LETTER_COUNT - letters_from_global_id - letters_to_iter_over_in_kernel; ull repeatCnt = 1 << (letters_to_iter_over_on_cpu * 4); int percent_done = 0, repeats_per_percent = repeatCnt / 100 + 1; for (ull i = 0; i < repeatCnt; i++) { // report progress if (i % repeats_per_percent == 0) { std::cout << "\r["; for(auto i=0; i<10;i++){ std::cout << (percent_done > i * 10 ? "=":" "); } std::cout << "] " << percent_done << "%"; ++percent_done; } // kernel args kernel->push_arg(gpu_buf); kernel->push_arg(gpu_flag); kernel->push_arg(sizeof(cl_long),(void *)&target_hash); kernel->push_arg(sizeof(cl_int), (void *)&letters_from_global_id); kernel->push_arg(sizeof(cl_int), (void *)&i); kernel->push_arg(sizeof(cl_int), (void *)&letters_to_iter_over_in_kernel); // Launch kernel cl_event finish_token = kernel->execute(1, &global_work_size, &local_work_size); // Synchronous/blocking read of results context.read_buffer(gpu_flag, (void *)&cpu_flag, true, &finish_token, 1); // done if(cpu_flag){ context.read_buffer(gpu_buf, (void *)cpu_buf, true); char* result_buffer = (char *)cpu_buf; result_buffer[LETTER_COUNT] = '\0'; std::cout << std::endl << "found: '" << result_buffer << "'" << std::endl; break; } } exit(EXIT_SUCCESS); }
static int external_grep(struct grep_opt *opt, const char **paths, int cached) { int i, nr, argc, hit, len, status; const char *argv[MAXARGS+1]; char randarg[ARGBUF]; char *argptr = randarg; struct grep_pat *p; if (opt->extended || (opt->relative && opt->prefix_length)) return -1; len = nr = 0; push_arg("grep"); if (opt->fixed) push_arg("-F"); if (opt->linenum) push_arg("-n"); if (!opt->pathname) push_arg("-h"); if (opt->regflags & REG_EXTENDED) push_arg("-E"); if (opt->regflags & REG_ICASE) push_arg("-i"); if (opt->binary == GREP_BINARY_NOMATCH) push_arg("-I"); if (opt->word_regexp) push_arg("-w"); if (opt->name_only) push_arg("-l"); if (opt->unmatch_name_only) push_arg("-L"); if (opt->null_following_name) /* in GNU grep git's "-z" translates to "-Z" */ push_arg("-Z"); if (opt->count) push_arg("-c"); if (opt->post_context || opt->pre_context) { if (opt->post_context != opt->pre_context) { if (opt->pre_context) { push_arg("-B"); len += snprintf(argptr, sizeof(randarg)-len, "%u", opt->pre_context) + 1; if (sizeof(randarg) <= len) die("maximum length of args exceeded"); push_arg(argptr); argptr += len; } if (opt->post_context) { push_arg("-A"); len += snprintf(argptr, sizeof(randarg)-len, "%u", opt->post_context) + 1; if (sizeof(randarg) <= len) die("maximum length of args exceeded"); push_arg(argptr); argptr += len; } } else { push_arg("-C"); len += snprintf(argptr, sizeof(randarg)-len, "%u", opt->post_context) + 1; if (sizeof(randarg) <= len) die("maximum length of args exceeded"); push_arg(argptr); argptr += len; } } for (p = opt->pattern_list; p; p = p->next) { push_arg("-e"); push_arg(p->pattern); } hit = 0; argc = nr; for (i = 0; i < active_nr; i++) { struct cache_entry *ce = active_cache[i]; char *name; int kept; if (!S_ISREG(ce->ce_mode)) continue; if (!pathspec_matches(paths, ce->name)) continue; name = ce->name; if (name[0] == '-') { int len = ce_namelen(ce); name = xmalloc(len + 3); memcpy(name, "./", 2); memcpy(name + 2, ce->name, len + 1); } argv[argc++] = name; if (MAXARGS <= argc) { status = flush_grep(opt, argc, nr, argv, &kept); if (0 < status) hit = 1; argc = nr + kept; } if (ce_stage(ce)) { do { i++; } while (i < active_nr && !strcmp(ce->name, active_cache[i]->name)); i--; /* compensate for loop control */ } } if (argc > nr) { status = flush_grep(opt, argc, nr, argv, NULL); if (0 < status) hit = 1; } return hit; }
/** * Implementation of the MUF debugger * * This implements the command parsing for the MUF debugger. It also clears * temporary bookmarks if this was triggered from a temporary one. * * This relies on some static globals, so it is not threadsafe. If the * 'prim' debugger command is ever used to trigger the MUF debugger somehow * in a recursive fashion, and then you call 'prim' again, it will probably * cause havock. * * @param descr the descriptor of the debugging player * @param player the debugging player * @param program the program we are debugging * @param text the input text from the user * @param fr the current frame pointer * @return boolean true if the program should exit, false if not */ int muf_debugger(int descr, dbref player, dbref program, const char *text, struct frame *fr) { char cmd[BUFFER_LEN]; char buf[BUFFER_LEN]; char buf2[BUFFER_LEN]; char *ptr, *ptr2, *arg; struct inst *pinst; int i, j, cnt; static struct inst primset[5]; static struct muf_proc_data temp_muf_proc_data = { "__Temp_Debugger_Proc", 0, 0, NULL }; /* * Basic massaging of the input - clearing spaces, finding the * argument. */ skip_whitespace(&text); strcpyn(cmd, sizeof(cmd), text); ptr = cmd; remove_ending_whitespace(&ptr); for (arg = cmd; *arg && !isspace(*arg); arg++) ; if (*arg) *arg++ = '\0'; /* Empty command means repeat last command, if available */ if (!*cmd && fr->brkpt.lastcmd) { strcpyn(cmd, sizeof(cmd), fr->brkpt.lastcmd); } else { free(fr->brkpt.lastcmd); if (*cmd) fr->brkpt.lastcmd = strdup(cmd); } /* delete triggering breakpoint, if it's only temp. */ j = fr->brkpt.breaknum; if (j >= 0 && fr->brkpt.temp[j]) { for (j++; j < fr->brkpt.count; j++) { fr->brkpt.temp[j - 1] = fr->brkpt.temp[j]; fr->brkpt.level[j - 1] = fr->brkpt.level[j]; fr->brkpt.line[j - 1] = fr->brkpt.line[j]; fr->brkpt.linecount[j - 1] = fr->brkpt.linecount[j]; fr->brkpt.pc[j - 1] = fr->brkpt.pc[j]; fr->brkpt.pccount[j - 1] = fr->brkpt.pccount[j]; fr->brkpt.prog[j - 1] = fr->brkpt.prog[j]; } fr->brkpt.count--; } fr->brkpt.breaknum = -1; /** * @TODO This giant if statement is pretty gnarly; we'd be better * of looping over an array of callbacks to break this up * nicer and make it more extensible. */ if (!strcasecmp(cmd, "cont")) { /* Nothing to do -- this will continue to next breakpoint */ } else if (!strcasecmp(cmd, "finish")) { if (fr->brkpt.count >= MAX_BREAKS) { notify_nolisten(player, "Cannot finish because there are too many breakpoints set.", 1); add_muf_read_event(descr, player, program, fr); return 0; } j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = fr->system.top - 1; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = -2; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = -2; fr->brkpt.prog[j] = program; fr->brkpt.bypass = 1; return 0; } else if (!strcasecmp(cmd, "stepi")) { i = atoi(arg); if (!i) i = 1; if (fr->brkpt.count >= MAX_BREAKS) { notify_nolisten(player, "Cannot stepi because there are too many breakpoints set.", 1); add_muf_read_event(descr, player, program, fr); return 0; } j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = -1; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = -2; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = i; fr->brkpt.prog[j] = NOTHING; fr->brkpt.bypass = 1; return 0; } else if (!strcasecmp(cmd, "step")) { i = atoi(arg); if (!i) i = 1; if (fr->brkpt.count >= MAX_BREAKS) { notify_nolisten(player, "Cannot step because there are too many breakpoints set.", 1); add_muf_read_event(descr, player, program, fr); return 0; } j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = -1; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = i; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = -2; fr->brkpt.prog[j] = NOTHING; fr->brkpt.bypass = 1; return 0; } else if (!strcasecmp(cmd, "nexti")) { i = atoi(arg); if (!i) i = 1; if (fr->brkpt.count >= MAX_BREAKS) { notify_nolisten(player, "Cannot nexti because there are too many breakpoints set.", 1); add_muf_read_event(descr, player, program, fr); return 0; } j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = fr->system.top; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = -2; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = i; fr->brkpt.prog[j] = program; fr->brkpt.bypass = 1; return 0; } else if (!strcasecmp(cmd, "next")) { i = atoi(arg); if (!i) i = 1; if (fr->brkpt.count >= MAX_BREAKS) { notify_nolisten(player, "Cannot next because there are too many breakpoints set.", 1); add_muf_read_event(descr, player, program, fr); return 0; } j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = fr->system.top; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = i; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = -2; fr->brkpt.prog[j] = program; fr->brkpt.bypass = 1; return 0; } else if (!strcasecmp(cmd, "exec")) { if (fr->brkpt.count >= MAX_BREAKS) { notify_nolisten(player, "Cannot finish because there are too many breakpoints set.", 1); add_muf_read_event(descr, player, program, fr); return 0; } if (!(pinst = funcname_to_pc(program, arg))) { notify_nolisten(player, "I don't know a function by that name.", 1); add_muf_read_event(descr, player, program, fr); return 0; } if (fr->system.top >= STACK_SIZE) { notify_nolisten(player, "That would exceed the system stack size for this program.", 1); add_muf_read_event(descr, player, program, fr); return 0; } fr->system.st[fr->system.top].progref = program; fr->system.st[fr->system.top++].offset = fr->pc; fr->pc = pinst; j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = fr->system.top - 1; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = -2; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = -2; fr->brkpt.prog[j] = program; fr->brkpt.bypass = 1; return 0; } else if (!strcasecmp(cmd, "prim")) { /* * @TODO The way this works is a little funky. It looks like * it would be possible to cause some weird havoc if we * manage to run a primitive that in turn triggers muf_debugger * * I am uncertain if this is possible; looks like the only * way it could happen is by typing 'prim debugger_break' * but I don't know much about about how muf_debugger is * triggered. Some digging should be done to make this * safe. * * Even better would be to not use statics for this somehow * without introducing a memory leak. (tanabi) */ if (fr->brkpt.count >= MAX_BREAKS) { notify_nolisten(player, "Cannot finish because there are too many breakpoints set.", 1); add_muf_read_event(descr, player, program, fr); return 0; } if (!primitive(arg)) { notify_nolisten(player, "I don't recognize that primitive.", 1); add_muf_read_event(descr, player, program, fr); return 0; } if (fr->system.top >= STACK_SIZE) { notify_nolisten(player, "That would exceed the system stack size for this program.", 1); add_muf_read_event(descr, player, program, fr); return 0; } primset[0].type = PROG_FUNCTION; primset[0].line = 0; primset[0].data.mufproc = &temp_muf_proc_data; primset[0].data.mufproc->vars = 0; primset[0].data.mufproc->args = 0; primset[0].data.mufproc->varnames = NULL; primset[1].type = PROG_PRIMITIVE; primset[1].line = 0; primset[1].data.number = get_primitive(arg); primset[2].type = PROG_PRIMITIVE; primset[2].line = 0; primset[2].data.number = IN_RET; /* primset[3].data.number = primitive("EXIT"); */ fr->system.st[fr->system.top].progref = program; fr->system.st[fr->system.top++].offset = fr->pc; fr->pc = &primset[1]; j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = -1; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = -2; fr->brkpt.pc[j] = &primset[2]; fr->brkpt.pccount[j] = -2; fr->brkpt.prog[j] = program; fr->brkpt.bypass = 1; fr->brkpt.dosyspop = 1; return 0; } else if (!strcasecmp(cmd, "break")) { add_muf_read_event(descr, player, program, fr); if (fr->brkpt.count >= MAX_BREAKS) { notify_nolisten(player, "Too many breakpoints set.", 1); return 0; } if (number(arg)) { i = atoi(arg); } else { if (!(pinst = funcname_to_pc(program, arg))) { notify_nolisten(player, "I don't know a function by that name.", 1); return 0; } else { i = pinst->line; } } if (!i) i = fr->pc->line; j = fr->brkpt.count++; fr->brkpt.temp[j] = 0; fr->brkpt.level[j] = -1; fr->brkpt.line[j] = i; fr->brkpt.linecount[j] = -2; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = -2; fr->brkpt.prog[j] = program; notify_nolisten(player, "Breakpoint set.", 1); return 0; } else if (!strcasecmp(cmd, "delete")) { add_muf_read_event(descr, player, program, fr); i = atoi(arg); if (!i) { notify_nolisten(player, "Which breakpoint did you want to delete?", 1); return 0; } if (i < 1 || i > fr->brkpt.count) { notify_nolisten(player, "No such breakpoint.", 1); return 0; } j = i - 1; for (j++; j < fr->brkpt.count; j++) { fr->brkpt.temp[j - 1] = fr->brkpt.temp[j]; fr->brkpt.level[j - 1] = fr->brkpt.level[j]; fr->brkpt.line[j - 1] = fr->brkpt.line[j]; fr->brkpt.linecount[j - 1] = fr->brkpt.linecount[j]; fr->brkpt.pc[j - 1] = fr->brkpt.pc[j]; fr->brkpt.pccount[j - 1] = fr->brkpt.pccount[j]; fr->brkpt.prog[j - 1] = fr->brkpt.prog[j]; } fr->brkpt.count--; notify_nolisten(player, "Breakpoint deleted.", 1); return 0; } else if (!strcasecmp(cmd, "breaks")) { notify_nolisten(player, "Breakpoints:", 1); for (i = 0; i < fr->brkpt.count; i++) { ptr = unparse_breakpoint(fr, i); notify_nolisten(player, ptr, 1); } notify_nolisten(player, "*done*", 1); add_muf_read_event(descr, player, program, fr); return 0; } else if (!strcasecmp(cmd, "where")) { i = atoi(arg); muf_backtrace(player, program, i, fr); add_muf_read_event(descr, player, program, fr); return 0; } else if (!strcasecmp(cmd, "stack")) { notify_nolisten(player, "*Argument stack top*", 1); i = atoi(arg); if (!i) i = STACK_SIZE; ptr = ""; for (j = fr->argument.top; j > 0 && i-- > 0;) { cnt = 0; do { strcpyn(buf, sizeof(buf), ptr); ptr = insttotext(NULL, 0, &fr->argument.st[--j], buf2, sizeof(buf2), 4000, program, 1); cnt++; } while (!strcasecmp(ptr, buf) && j > 0); if (cnt > 1) notifyf(player, " [repeats %d times]", cnt); if (strcasecmp(ptr, buf)) notifyf(player, "%3d) %s", j + 1, ptr); } notify_nolisten(player, "*done*", 1); add_muf_read_event(descr, player, program, fr); return 0; } else if (!strcasecmp(cmd, "list") || !strcasecmp(cmd, "listi")) { int startline, endline; add_muf_read_event(descr, player, program, fr); if ((ptr2 = (char *) strchr(arg, ','))) { *ptr2++ = '\0'; } else { ptr2 = ""; } if (!*arg) { if (fr->brkpt.lastlisted) { startline = fr->brkpt.lastlisted + 1; } else { startline = fr->pc->line; } endline = startline + 15; } else { if (!number(arg)) { if (!(pinst = funcname_to_pc(program, arg))) { notify_nolisten(player, "I don't know a function by that name. (starting arg, 1)", 1); return 0; } else { startline = pinst->line; endline = startline + 15; } } else { if (*ptr2) { endline = startline = atoi(arg); } else { startline = atoi(arg) - 7; endline = startline + 15; } } } if (*ptr2) { if (!number(ptr2)) { if (!(pinst = funcname_to_pc(program, ptr2))) { notify_nolisten(player, "I don't know a function by that name. (ending arg, 1)", 1); return 0; } else { endline = pinst->line; } } else { endline = atoi(ptr2); } } i = (PROGRAM_CODE(program) + PROGRAM_SIZ(program) - 1)->line; if (startline > i) { notify_nolisten(player, "Starting line is beyond end of program.", 1); return 0; } if (startline < 1) startline = 1; if (endline > i) endline = i; if (endline < startline) endline = startline; notify_nolisten(player, "Listing:", 1); if (!strcasecmp(cmd, "listi")) { for (i = startline; i <= endline; i++) { pinst = linenum_to_pc(program, i); if (pinst) { notifyf_nolisten(player, "line %d: %s", i, (i == fr->pc->line) ? show_line_prims(program, fr->pc, STACK_SIZE, 1) : show_line_prims(program, pinst, STACK_SIZE, 0)); } } } else { list_proglines(player, program, fr, startline, endline); } fr->brkpt.lastlisted = endline; notify_nolisten(player, "*done*", 1); return 0; } else if (!strcasecmp(cmd, "quit")) { notify_nolisten(player, "Halting execution.", 1); return 1; } else if (!strcasecmp(cmd, "trace")) { add_muf_read_event(descr, player, program, fr); if (!strcasecmp(arg, "on")) { fr->brkpt.showstack = 1; notify_nolisten(player, "Trace turned on.", 1); } else if (!strcasecmp(arg, "off")) { fr->brkpt.showstack = 0; notify_nolisten(player, "Trace turned off.", 1); } else { notifyf_nolisten(player, "Trace is currently %s.", fr->brkpt.showstack ? "on" : "off"); } return 0; } else if (!strcasecmp(cmd, "words")) { list_program_functions(player, program, arg); add_muf_read_event(descr, player, program, fr); return 0; } else if (!strcasecmp(cmd, "print")) { debug_printvar(player, program, fr, arg); add_muf_read_event(descr, player, program, fr); return 0; } else if (!strcasecmp(cmd, "push")) { push_arg(player, fr, arg); add_muf_read_event(descr, player, program, fr); return 0; } else if (!strcasecmp(cmd, "pop")) { add_muf_read_event(descr, player, program, fr); if (fr->argument.top < 1) { notify_nolisten(player, "Nothing to pop.", 1); return 0; } fr->argument.top--; CLEAR(fr->argument.st + fr->argument.top); notify_nolisten(player, "Stack item popped.", 1); return 0; } else if (!strcasecmp(cmd, "help")) { do_helpfile(player, tp_file_man_dir, tp_file_man, "debugger_commands", ""); return 0; } else { notify_nolisten(player, "I don't understand that debugger command. Type 'help' for help.", 1); add_muf_read_event(descr, player, program, fr); return 0; } return 0; }
void operator()(const boost::compute::command_queue &q, Arg1 &&arg1, OtherArgs&&... other_args) { push_arg(std::forward<Arg1>(arg1)); (*this)(q, std::forward<OtherArgs>(other_args)...); }
void push_arg(const device_vector<T> &arg) { push_arg(arg.raw()); }
void scan( backend::command_queue const &queue, backend::device_vector<T> const &input, backend::device_vector<T> &output, T init, bool exclusive, Oper ) { precondition( input.size() == output.size(), "Wrong output size in inclusive_scan" ); backend::select_context(queue); const int NT_cpu = 1; const int NT_gpu = 256; const int NT = is_cpu(queue) ? NT_cpu : NT_gpu; const int NT2 = 2 * NT; int do_exclusive = exclusive ? 1 : 0; const size_t count = input.size(); const size_t num_blocks = (count + NT2 - 1) / NT2; const size_t scan_buf_size = alignup(num_blocks, NT2); backend::device_vector<T> pre_sum1(queue, scan_buf_size); backend::device_vector<T> pre_sum2(queue, scan_buf_size); backend::device_vector<T> post_sum(queue, scan_buf_size); // Kernel0 auto krn0 = is_cpu(queue) ? block_inclusive_scan<NT_cpu, T, Oper>(queue) : block_inclusive_scan<NT_gpu, T, Oper>(queue); krn0.push_arg(count); krn0.push_arg(input); krn0.push_arg(init); krn0.push_arg(pre_sum1); krn0.push_arg(pre_sum2); krn0.push_arg(do_exclusive); krn0.config(num_blocks, NT); krn0(queue); // Kernel1 auto krn1 = is_cpu(queue) ? intra_block_inclusive_scan<NT_cpu, T, Oper>(queue) : intra_block_inclusive_scan<NT_gpu, T, Oper>(queue); uint work_per_thread = std::max<uint>(1U, static_cast<uint>(scan_buf_size / NT)); krn1.push_arg(num_blocks); krn1.push_arg(post_sum); krn1.push_arg(pre_sum1); krn1.push_arg(init); krn1.push_arg(work_per_thread); krn1.config(1, NT); krn1(queue); // Kernel2 auto krn2 = is_cpu(queue) ? block_addition<NT_cpu, T, Oper>(queue) : block_addition<NT_gpu, T, Oper>(queue); krn2.push_arg(count); krn2.push_arg(input); krn2.push_arg(output); krn2.push_arg(post_sum); krn2.push_arg(pre_sum2); krn2.push_arg(init); krn2.push_arg(do_exclusive); krn2.config(num_blocks * 2, NT); krn2(queue); }
static void op_push_arg(int byte, struct thread *thread) { push_arg(thread, decode_arg(thread)); }
int muf_debugger(dbref player, dbref program, const char *text, struct frame *fr) { char cmd[BUFFER_LEN]; char buf[BUFFER_LEN]; char *ptr, *ptr2, *arg; struct inst *pinst; int i, j, cnt; while (isspace(*text)) text++; strcpy(cmd, text); ptr = cmd + strlen(cmd); if (ptr > cmd) ptr--; while (ptr >= cmd && isspace(*ptr)) *ptr-- = '\0'; for (arg = cmd; *arg && !isspace(*arg); arg++); if (*arg) *arg++ = '\0'; if (!*cmd && fr->brkpt.lastcmd) { strcpy(cmd, fr->brkpt.lastcmd); } else { if (fr->brkpt.lastcmd) free(fr->brkpt.lastcmd); if (*cmd) fr->brkpt.lastcmd = string_dup(cmd); } /* delete triggering breakpoint, if it's only temp. */ j = fr->brkpt.breaknum; if (j >= 0 && fr->brkpt.temp[j]) { for (j++; j < fr->brkpt.count; j++) { fr->brkpt.temp[j-1] = fr->brkpt.temp[j]; fr->brkpt.level[j-1] = fr->brkpt.level[j]; fr->brkpt.line[j-1] = fr->brkpt.line[j]; fr->brkpt.linecount[j-1] = fr->brkpt.linecount[j]; fr->brkpt.pc[j-1] = fr->brkpt.pc[j]; fr->brkpt.pccount[j-1] = fr->brkpt.pccount[j]; fr->brkpt.prog[j-1] = fr->brkpt.prog[j]; } fr->brkpt.count--; } fr->brkpt.breaknum = -1; if (!string_compare(cmd, "cont")) { } else if (!string_compare(cmd, "finish")) { if (fr->brkpt.count >= MAX_BREAKS) { anotify_nolisten(player, CFAIL "Cannot finish because there are too many breakpoints set.", 1); add_muf_read_event(player, program, fr); return 0; } j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = fr->system.top - 1; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = -2; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = -2; fr->brkpt.prog[j] = program; fr->brkpt.bypass = 1; return 0; } else if (!string_compare(cmd, "stepi")) { i = atoi(arg); if (!i) i = 1; if (fr->brkpt.count >= MAX_BREAKS) { anotify_nolisten(player, CFAIL "Cannot stepi because there are too many breakpoints set.", 1); add_muf_read_event(player, program, fr); return 0; } j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = -1; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = -2; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = i; fr->brkpt.prog[j] = NOTHING; fr->brkpt.bypass = 1; return 0; } else if (!string_compare(cmd, "step")) { i = atoi(arg); if (!i) i = 1; if (fr->brkpt.count >= MAX_BREAKS) { anotify_nolisten(player, CFAIL "Cannot step because there are too many breakpoints set.", 1); add_muf_read_event(player, program, fr); return 0; } j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = -1; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = i; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = -2; fr->brkpt.prog[j] = NOTHING; fr->brkpt.bypass = 1; return 0; } else if (!string_compare(cmd, "nexti")) { i = atoi(arg); if (!i) i = 1; if (fr->brkpt.count >= MAX_BREAKS) { anotify_nolisten(player, CFAIL "Cannot nexti because there are too many breakpoints set.", 1); add_muf_read_event(player, program, fr); return 0; } j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = fr->system.top; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = -2; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = i; fr->brkpt.prog[j] = program; fr->brkpt.bypass = 1; return 0; } else if (!string_compare(cmd, "next")) { i = atoi(arg); if (!i) i = 1; if (fr->brkpt.count >= MAX_BREAKS) { anotify_nolisten(player, CFAIL "Cannot next because there are too many breakpoints set.", 1); add_muf_read_event(player, program, fr); return 0; } j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = fr->system.top; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = i; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = -2; fr->brkpt.prog[j] = program; fr->brkpt.bypass = 1; return 0; } else if (!string_compare(cmd, "exec")) { if (fr->brkpt.count >= MAX_BREAKS) { anotify_nolisten(player, CFAIL "Cannot finish because there are too many breakpoints set.", 1); add_muf_read_event(player, program, fr); return 0; } if (!(pinst = funcname_to_pc(program, arg))) { anotify_nolisten(player, CINFO "I don't know a function by that name.", 1); add_muf_read_event(player, program, fr); return 0; } if (fr->system.top >= STACK_SIZE) { anotify_nolisten(player, CFAIL "That would exceed the system stack size for this program.", 1); add_muf_read_event(player, program, fr); return 0; } fr->system.st[fr->system.top].progref = program; fr->system.st[fr->system.top++].offset = fr->pc; fr->pc = pinst; j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = fr->system.top - 1; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = -2; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = -2; fr->brkpt.prog[j] = program; fr->brkpt.bypass = 1; return 0; } else if (!string_compare(cmd, "prim")) { if (fr->brkpt.count >= MAX_BREAKS) { anotify_nolisten(player, CFAIL "Cannot finish because there are too many breakpoints set.", 1); add_muf_read_event(player, program, fr); return 0; } if (!(i = primitive(arg))) { anotify_nolisten(player, CINFO "I don't recognize that primitive.", 1); add_muf_read_event(player, program, fr); return 0; } if (fr->system.top >= STACK_SIZE) { anotify_nolisten(player, CFAIL "That would exceed the system stack size for this program.", 1); add_muf_read_event(player, program, fr); return 0; } shstr.data[0] = '\0'; shstr.links = 1; shstr.length= strlen(shstr.data); primset[0].type = PROG_FUNCTION; primset[0].line = 0; primset[0].data.string = &shstr; primset[1].type = PROG_PRIMITIVE; primset[1].line = 0; primset[1].data.number = i; primset[2].type = PROG_PRIMITIVE; primset[2].line = 0; primset[2].data.number = primitive("EXIT"); fr->system.st[fr->system.top].progref = program; fr->system.st[fr->system.top++].offset = fr->pc; fr->pc = primset; j = fr->brkpt.count++; fr->brkpt.temp[j] = 1; fr->brkpt.level[j] = fr->system.top - 1; fr->brkpt.line[j] = -1; fr->brkpt.linecount[j] = -2; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = -2; fr->brkpt.prog[j] = program; fr->brkpt.bypass = 1; return 0; } else if (!string_compare(cmd, "break")) { add_muf_read_event(player, program, fr); if (fr->brkpt.count >= MAX_BREAKS) { anotify_nolisten(player, CFAIL "Too many breakpoints set.", 1); return 0; } if (number(arg)) { i = atoi(arg); } else { if (!(pinst = funcname_to_pc(program, arg))) { anotify_nolisten(player, CINFO "I don't know a function by that name.", 1); return 0; } else { i = pinst->line; } } if (!i) i = fr->pc->line; j = fr->brkpt.count++; fr->brkpt.temp[j] = 0; fr->brkpt.level[j] = -1; fr->brkpt.line[j] = i; fr->brkpt.linecount[j] = -2; fr->brkpt.pc[j] = NULL; fr->brkpt.pccount[j] = -2; fr->brkpt.prog[j] = program; anotify_nolisten(player, CSUCC "Breakpoint set.", 1); return 0; } else if (!string_compare(cmd, "delete")) { add_muf_read_event(player, program, fr); i = atoi(arg); if (!i) { anotify_nolisten(player, CINFO "Which breakpoint did you want to delete?", 1); return 0; } if (i < 1 || i > fr->brkpt.count) { anotify_nolisten(player, CFAIL "No such breakpoint.", 1); return 0; } j = i - 1; for (j++; j < fr->brkpt.count; j++) { fr->brkpt.temp[j-1] = fr->brkpt.temp[j]; fr->brkpt.level[j-1] = fr->brkpt.level[j]; fr->brkpt.line[j-1] = fr->brkpt.line[j]; fr->brkpt.linecount[j-1] = fr->brkpt.linecount[j]; fr->brkpt.pc[j-1] = fr->brkpt.pc[j]; fr->brkpt.pccount[j-1] = fr->brkpt.pccount[j]; fr->brkpt.prog[j-1] = fr->brkpt.prog[j]; } fr->brkpt.count--; anotify_nolisten(player, CSUCC "Breakpoint deleted.", 1); return 0; } else if (!string_compare(cmd, "breaks")) { anotify_nolisten(player, CINFO "Breakpoints:", 1); for (i = 0; i < fr->brkpt.count; i++) { ptr = unparse_breakpoint(fr, i); notify_nolisten(player, ptr, 1); } anotify_nolisten(player, CINFO "Done.", 1); add_muf_read_event(player, program, fr); return 0; } else if (!string_compare(cmd, "where")) { i = atoi(arg); muf_backtrace(player, program, i, fr); add_muf_read_event(player, program, fr); return 0; } else if (!string_compare(cmd, "stack")) { anotify_nolisten(player, CINFO "*Argument stack top*", 1); i = atoi(arg); if (!i) i = STACK_SIZE; ptr = ""; for (j = fr->argument.top; j>0 && i-->0;) { cnt = 0; do { strcpy(buf, ptr); ptr = insttotext(&fr->argument.st[--j], 4000, program); cnt++; } while (!string_compare(ptr, buf) && j>0); if (cnt > 1) notify_fmt(player, " [repeats %d times]", cnt); if (string_compare(ptr, buf)) notify_fmt(player, "%3d) %s", j+1, ptr); } anotify_nolisten(player, CINFO "Done.", 1); add_muf_read_event(player, program, fr); return 0; } else if (!string_compare(cmd, "list") || !string_compare(cmd, "listi")) { int startline, endline; startline = endline = 0; add_muf_read_event(player, program, fr); if ((ptr2 = (char *)index(arg, ','))) { *ptr2++ = '\0'; } else { ptr2 = ""; } if (!*arg) { if (fr->brkpt.lastlisted) { startline = fr->brkpt.lastlisted + 1; } else { startline = fr->pc->line; } endline = startline + 15; } else { if (!number(arg)) { if (!(pinst = funcname_to_pc(program, arg))) { anotify_nolisten(player, CINFO "I don't know a function by that name. (starting arg, 1)", 1); return 0; } else { startline = pinst->line; endline = startline + 15; } } else { if (*ptr2) { endline = startline = atoi(arg); } else { startline = atoi(arg) - 7; endline = startline + 15; } } } if (*ptr2) { if (!number(ptr2)) { if (!(pinst = funcname_to_pc(program, ptr2))) { anotify_nolisten(player, CINFO "I don't know a function by that name. (ending arg, 1)", 1); return 0; } else { endline = pinst->line; } } else { endline = atoi(ptr2); } } i = (DBFETCH(program)->sp.program.code + DBFETCH(program)->sp.program.siz - 1)->line; if (startline > i) { anotify_nolisten(player, CFAIL "Starting line is beyond end of program.", 1); return 0; } if (startline < 1) startline = 1; if (endline > i) endline = i; if (endline < startline) endline = startline; anotify_nolisten(player, CINFO "Listing:", 1); if (!string_compare(cmd, "listi")) { for (i = startline; i <= endline; i++) { pinst = linenum_to_pc(program, i); if (pinst) { sprintf(buf, "line %d: %s", i, (i == fr->pc->line) ? show_line_prims(program, fr->pc, STACK_SIZE, 1) : show_line_prims(program, pinst, STACK_SIZE, 0)); notify_nolisten(player, buf, 1); } } } else { list_proglines(player, program, fr, startline, endline); } fr->brkpt.lastlisted = endline; anotify_nolisten(player, CINFO "Done.", 1); return 0; } else if (!string_compare(cmd, "quit")) { anotify_nolisten(player, CINFO "Halting execution.", 1); return 1; } else if (!string_compare(cmd, "trace")) { add_muf_read_event(player, program, fr); if (!string_compare(arg, "on")) { fr->brkpt.showstack = 1; anotify_nolisten(player, CSUCC "Trace turned on.", 1); } else if (!string_compare(arg, "off")) { fr->brkpt.showstack = 0; anotify_nolisten(player, CSUCC "Trace turned off.", 1); } else { sprintf(buf, CINFO "Trace is currently %s.", fr->brkpt.showstack? "on" : "off"); anotify_nolisten(player, buf, 1); } return 0; } else if (!string_compare(cmd, "words")) { list_program_functions(player, program, arg); add_muf_read_event(player, program, fr); return 0; } else if (!string_compare(cmd, "print")) { debug_printvar(player, fr, arg); add_muf_read_event(player, program, fr); return 0; } else if (!string_compare(cmd, "push")) { push_arg(player, fr, arg); add_muf_read_event(player, program, fr); return 0; } else if (!string_compare(cmd, "pop")) { add_muf_read_event(player, program, fr); if (fr->argument.top < 1) { anotify_nolisten(player, CFAIL "Nothing to pop.", 1); return 0; } fr->argument.top--; CLEAR(fr->argument.st + fr->argument.top); anotify_nolisten(player, CSUCC "Stack item popped.", 1); return 0; } else if (!string_compare(cmd, "help")) { notify_nolisten(player, "cont continues execution until a breakpoint is hit.", 1); notify_nolisten(player, "finish completes execution of current function.", 1); notify_nolisten(player, "step [NUM] executes one (or NUM, 1) lines of muf.", 1); notify_nolisten(player, "stepi [NUM] executes one (or NUM, 1) muf instructions.", 1); notify_nolisten(player, "next [NUM] like step, except skips CALL and EXECUTE.", 1); notify_nolisten(player, "nexti [NUM] like stepi, except skips CALL and EXECUTE.", 1); notify_nolisten(player, "break LINE# sets breakpoint at given LINE number.", 1); notify_nolisten(player, "break FUNCNAME sets breakpoint at start of given function.", 1); notify_nolisten(player, "breaks lists all currently set breakpoints.", 1); notify_nolisten(player, "delete NUM deletes breakpoint by NUM, as listed by 'breaks'", 1); notify_nolisten(player, "where [LEVS] displays function call backtrace of up to num levels deep.", 1); notify_nolisten(player, "stack [NUM] shows the top num items on the stack.", 1); notify_nolisten(player, "print v# displays the value of given global variable #.", 1); notify_nolisten(player, "print lv# displays the value of given local variable #.", 1); notify_nolisten(player, "trace [on|off] turns on/off debug stack tracing.", 1); notify_nolisten(player, "list [L1,[L2]] lists source code of given line range.", 1); notify_nolisten(player, "list FUNCNAME lists source code of given function.", 1); notify_nolisten(player, "listi [L1,[L2]] lists instructions in given line range.", 1); notify_nolisten(player, "listi FUNCNAME lists instructions in given function.", 1); notify_nolisten(player, "words lists all function word names in program.", 1); notify_nolisten(player, "words PATTERN lists all function word names that match PATTERN.", 1); notify_nolisten(player, "exec FUNCNAME calls given function with the current stack data.", 1); notify_nolisten(player, "prim PRIMITIVE executes given primitive with current stack data.", 1); notify_nolisten(player, "push DATA pushes an int, dbref, var, or string onto the stack.", 1); notify_nolisten(player, "pop pops top data item off the stack.", 1); notify_nolisten(player, "help displays this help screen.", 1); notify_nolisten(player, "quit stop execution here.", 1); add_muf_read_event(player, program, fr); return 0; } else { anotify_nolisten(player, CINFO "I don't understand that debugger command. Type 'help' for help.", 1); add_muf_read_event(player, program, fr); return 0; } return 0; }
int reduce_by_key_sink( IKTuple &&ikeys, vector<V> const &ivals, OKTuple &&okeys, vector<V> &ovals, Comp, Oper ) { namespace fusion = boost::fusion; typedef typename extract_value_types<IKTuple>::type K; static_assert( std::is_same<K, typename extract_value_types<OKTuple>::type>::value, "Incompatible input and output key types"); precondition( fusion::at_c<0>(ikeys).nparts() == 1 && ivals.nparts() == 1, "Sorting is only supported for single device contexts" ); precondition(fusion::at_c<0>(ikeys).size() == ivals.size(), "keys and values should have same size" ); const auto &queue = fusion::at_c<0>(ikeys).queue_list(); backend::select_context(queue[0]); const int NT_cpu = 1; const int NT_gpu = 256; const int NT = is_cpu(queue[0]) ? NT_cpu : NT_gpu; size_t count = fusion::at_c<0>(ikeys).size(); size_t num_blocks = (count + NT - 1) / NT; size_t scan_buf_size = alignup(num_blocks, NT); backend::device_vector<int> key_sum (queue[0], scan_buf_size); backend::device_vector<V> pre_sum (queue[0], scan_buf_size); backend::device_vector<V> post_sum (queue[0], scan_buf_size); backend::device_vector<V> offset_val(queue[0], count); backend::device_vector<int> offset (queue[0], count); /***** Kernel 0 *****/ auto krn0 = detail::offset_calculation<K, Comp>(queue[0]); krn0.push_arg(count); boost::fusion::for_each(ikeys, do_push_arg(krn0)); krn0.push_arg(offset); krn0(queue[0]); VEX_FUNCTION(plus, int(int, int), "return prm1 + prm2;"); detail::scan(queue[0], offset, offset, 0, false, plus); /***** Kernel 1 *****/ auto krn1 = is_cpu(queue[0]) ? detail::block_scan_by_key<NT_cpu, V, Oper>(queue[0]) : detail::block_scan_by_key<NT_gpu, V, Oper>(queue[0]); krn1.push_arg(count); krn1.push_arg(offset); krn1.push_arg(ivals(0)); krn1.push_arg(offset_val); krn1.push_arg(key_sum); krn1.push_arg(pre_sum); krn1.config(num_blocks, NT); krn1(queue[0]); /***** Kernel 2 *****/ uint work_per_thread = std::max<uint>(1U, static_cast<uint>(scan_buf_size / NT)); auto krn2 = is_cpu(queue[0]) ? detail::block_inclusive_scan_by_key<NT_cpu, V, Oper>(queue[0]) : detail::block_inclusive_scan_by_key<NT_gpu, V, Oper>(queue[0]); krn2.push_arg(num_blocks); krn2.push_arg(key_sum); krn2.push_arg(pre_sum); krn2.push_arg(post_sum); krn2.push_arg(work_per_thread); krn2.config(1, NT); krn2(queue[0]); /***** Kernel 3 *****/ auto krn3 = detail::block_sum_by_key<V, Oper>(queue[0]); krn3.push_arg(count); krn3.push_arg(key_sum); krn3.push_arg(post_sum); krn3.push_arg(offset); krn3.push_arg(offset_val); krn3.config(num_blocks, NT); krn3(queue[0]); /***** resize okeys and ovals *****/ int out_elements; offset.read(queue[0], count - 1, 1, &out_elements, true); ++out_elements; boost::fusion::for_each(okeys, do_vex_resize(queue, out_elements)); ovals.resize(ivals.queue_list(), out_elements); /***** Kernel 4 *****/ auto krn4 = detail::key_value_mapping<K, V>(queue[0]); krn4.push_arg(count); boost::fusion::for_each(ikeys, do_push_arg(krn4)); boost::fusion::for_each(okeys, do_push_arg(krn4)); krn4.push_arg(ovals(0)); krn4.push_arg(offset); krn4.push_arg(offset_val); krn4(queue[0]); return out_elements; }
void operator()(const cl::CommandQueue &q, Arg1 &&arg1, OtherArgs&&... other_args) { push_arg(std::forward<Arg1>(arg1)); (*this)(q, std::forward<OtherArgs>(other_args)...); }