static FFTW(plan) mkplan_real(bench_problem *p, unsigned flags) { FFTW(plan) pln = 0; int i; ptrdiff_t ntot; vn = p->vecsz->rnk == 1 ? p->vecsz->dims[0].n : 1; if (p->sz->rnk < 2 || p->split || !tensor_real_contiguousp(p->sz, p->sign, vn) || tensor_rowmajor_transposedp(p->sz) || p->vecsz->rnk > 1 || (p->vecsz->rnk == 1 && (p->vecsz->dims[0].is != 1 || p->vecsz->dims[0].os != 1))) return 0; alloc_rnk(p->sz->rnk); for (i = 0; i < rnk; ++i) { total_ni[i] = total_no[i] = p->sz->dims[i].n; local_ni[i] = local_no[i] = total_ni[i]; local_starti[i] = local_starto[i] = 0; } local_ni[rnk-1] = local_no[rnk-1] = total_ni[rnk-1] = total_no[rnk-1] = p->sz->dims[rnk-1].n / 2 + 1; { ptrdiff_t n, start, nT, startT; ntot = FFTW(mpi_local_size_many_transposed) (p->sz->rnk, total_ni, vn, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, MPI_COMM_WORLD, &n, &start, &nT, &startT); if (flags & FFTW_MPI_TRANSPOSED_IN) { local_ni[1] = nT; local_starti[1] = startT; } else { local_ni[0] = n; local_starti[0] = start; } if (flags & FFTW_MPI_TRANSPOSED_OUT) { local_no[1] = nT; local_starto[1] = startT; } else { local_no[0] = n; local_starto[0] = start; } } alloc_local(ntot * 2, p->in == p->out); total_ni[rnk - 1] = p->sz->dims[rnk - 1].n; if (p->sign < 0) pln = FFTW(mpi_plan_many_dft_r2c)(p->sz->rnk, total_ni, vn, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, local_in, (FFTW(complex) *) local_out, MPI_COMM_WORLD, flags); else
int main(int argc, char **argv) { errval_t err; debug_printf("Xeon Phi Test started on the card.\n"); err = xeon_phi_client_init(disp_xeon_phi_id()); EXPECT_SUCCESS(err, "xeon_phi_client_init"); xeon_phi_client_set_callbacks(&callbacks); alloc_local(); wait_for_connection(); char iface[30]; snprintf(iface, 30, "xphi_ump_bench.%u", XPHI_BENCH_CORE_HOST); debug_printf("sending open to host domain..\n"); err = xeon_phi_client_chan_open(disp_xeon_phi_id(), domainid, 0, local_frame, 2); EXPECT_SUCCESS(err, "xeon_phi_client_init"); #if XPHI_BENCH_INITIATOR_HOST debug_printf("giving time for host to initialize...\n"); for (uint32_t i = 0; i < 10; ++i) { delay_ms(4000); thread_yield(); } #endif #if XPHI_BENCH_INITIATOR_HOST debug_printf("---------------- normal run -----------------\n"); xphi_bench_start_echo(&xphi_uc); debug_printf("---------------- reversed run -----------------\n"); xphi_bench_start_echo(&xphi_uc_rev); #else #ifndef XPHI_BENCH_THROUGHPUT debug_printf("---------------- normal run -----------------\n"); xphi_bench_start_initator_rtt(&xphi_uc); debug_printf("---------------- reversed run -----------------\n"); xphi_bench_start_initator_rtt(&xphi_uc_rev); #else #ifdef XPHI_BENCH_SEND_SYNC debug_printf("---------------- normal run -----------------\n"); xphi_bench_start_initator_sync(&xphi_uc); debug_printf("---------------- reversed run -----------------\n"); xphi_bench_start_initator_sync(&xphi_uc_rev); #else debug_printf("---------------- normal run -----------------\n"); xphi_bench_start_initator_async(&xphi_uc); debug_printf("---------------- reversed run -----------------\n"); xphi_bench_start_initator_async(&xphi_uc_rev); #endif #endif #endif }
variable_t* new_variable2(methodstate_t*method, const char*name, classinfo_t*type, char init, char maybeslot) { if(maybeslot) { variable_t*v = find_slot(method, name); if(v) { alloc_local(); return v; } } NEW(variable_t, v); v->type = type; v->init = v->kill = init; if(name) { if(!method->no_variable_scoping) { if(dict_contains(state->vars, name)) { syntaxerror("variable %s already defined", name); } v->index = alloc_local(); dict_put(state->vars, name, v); } else { if(as3_pass==2 && dict_contains(state->method->allvars, name)) { variable_t*v = dict_lookup(state->method->allvars, name); if(v->type != type && (!v->type || v->type->kind!=INFOTYPE_UNRESOLVED)) { syntaxerror("variable %s already defined.", name); } return v; } v->index = alloc_local(); } dict_put(state->method->allvars, name, v); } else { v->index = alloc_local(); } return v; }
static void mod_neko_do_init() { int tmp = 0; if( init_done ) return; init_done = 1; memset(&config,0,sizeof(config)); config.use_cache = 1; config.gc_period = 1; config.max_post_size = MOD_NEKO_POST_SIZE; # ifdef APACHE_2_X putenv(strdup("MOD_NEKO=2")); # else putenv(strdup("MOD_NEKO=1")); # endif neko_global_init(); cache_root = alloc_local(); }
EXTERN void neko_global_init() { # ifdef NEKO_DIRECT_THREADED op_last = neko_get_ttable()[Last]; # endif empty_array.ptr = val_null; neko_gc_init(); neko_vm_context = alloc_local(); neko_fields_lock = alloc_lock(); neko_fields = (objtable*)alloc_root((NEKO_FIELDS_MASK+1) * sizeof(struct _objtable) / sizeof(value)); { int i; for(i=0;i<=NEKO_FIELDS_MASK;i++) otable_init(&neko_fields[i]); } neko_init_builtins(); kind_names = (kind_list**)alloc_root(1); *kind_names = NULL; id_loader = val_id("loader"); id_exports = val_id("exports"); id_cache = val_id("cache"); id_path = val_id("path"); id_loader_libs = val_id("__libs"); neko_id_module = val_id("__module"); INIT_ID(compare); INIT_ID(string); INIT_ID(add); INIT_ID(radd); INIT_ID(sub); INIT_ID(rsub); INIT_ID(mult); INIT_ID(rmult); INIT_ID(div); INIT_ID(rdiv); INIT_ID(mod); INIT_ID(rmod); INIT_ID(get); INIT_ID(set); apply_string = alloc_root(1); *apply_string = alloc_string("apply"); neko_init_jit(); }
code_t* add_scope_code(code_t*c, methodstate_t*m, char init) { if(m->uses_slots || m->innerfunctions || (m->late_binding && !m->inner)) { c = abc_getlocal_0(c); c = abc_pushscope(c); } if(m->uses_slots) { /* FIXME: this alloc_local() causes variable indexes to be different in pass2 than in pass1 */ if(!m->activation_var) { m->activation_var = alloc_local(); } if(init) { c = abc_newactivation(c); c = abc_dup(c); c = abc_pushscope(c); c = abc_setlocal(c, m->activation_var); } else { c = abc_getlocal(c, m->activation_var); c = abc_pushscope(c); } } return c; }
int main(int argc, char **argv) { errval_t err; debug_printf("Xeon Phi Test started on the card %u.\n", disp_xeon_phi_id()); debug_printf("Msg Buf Size = %lx, Buf Frame Size = %lx\n", XPHI_BENCH_MSG_FRAME_SIZE, XPHI_BENCH_BUF_FRAME_SIZE); xeon_phi_client_set_callbacks(&callbacks); err = xeon_phi_client_init(disp_xeon_phi_id()); if (err_is_fail(err)) { USER_PANIC_ERR(err, "could not init the service\n"); } err = alloc_local(); assert(err_is_ok(err)); if (disp_xeon_phi_id() == 0) { char *iface = xeon_phi_domain_build_iface("xeon_phi_inter", 1, 2); err = xeon_phi_domain_blocking_lookup(iface, &domid); if (err_is_fail(err)) { USER_PANIC_ERR(err, "looking up domain id\n"); } debug_printf("sending open message to %s on node 1\n", iface); err = xeon_phi_client_chan_open(1, domid, 0xcafebabe, local_frame, 2); if (err_is_fail(err)) { USER_PANIC_ERR(err, "could not open channel"); } } while (!connected) { messages_wait_and_handle_next(); } debug_printf("Initializing UMP channel...\n"); if (disp_xeon_phi_id() != 0) { err = xeon_phi_client_chan_open(0, domid, 0xdeadbeef, local_frame, 2); if (err_is_fail(err)) { USER_PANIC_ERR(err, "could not open channel"); } } else { debug_printf("Other node reply: %s\n", (char *) local_buf); } err = ump_chan_init(&uc, inbuf, XPHI_BENCH_MSG_FRAME_SIZE, outbuf, XPHI_BENCH_MSG_FRAME_SIZE); err = ump_chan_init(&uc_rev, inbuf_rev, XPHI_BENCH_MSG_FRAME_SIZE, outbuf_rev, XPHI_BENCH_MSG_FRAME_SIZE); if (err_is_fail(err)) { USER_PANIC_ERR(err, "Could not initialize UMP"); } if (disp_xeon_phi_id() == 1) { #ifndef XPHI_BENCH_THROUGHPUT debug_printf("---------------- normal run -----------------\n"); xphi_bench_start_initator_rtt(&bufs, &uc); debug_printf("---------------- reversed run -----------------\n"); xphi_bench_start_initator_rtt(&bufs_rev, &uc_rev); #else #ifdef XPHI_BENCH_SEND_SYNC debug_printf("---------------- normal run -----------------\n"); xphi_bench_start_initator_sync(&bufs, &uc); debug_printf("---------------- reversed run -----------------\n"); xphi_bench_start_initator_sync(&bufs_rev, &uc_rev); #else debug_printf("---------------- normal run -----------------\n"); xphi_bench_start_initator_async(&bufs, &uc); debug_printf("---------------- reversed run -----------------\n"); xphi_bench_start_initator_async(&bufs_rev, &uc_rev); #endif #endif } else { #ifndef XPHI_BENCH_THROUGHPUT debug_printf("---------------- normal run -----------------\n"); xphi_bench_start_echo(&bufs, &uc); debug_printf("---------------- reversed run -----------------\n"); xphi_bench_start_echo(&bufs_rev, &uc_rev); #else debug_printf("---------------- normal run -----------------\n"); xphi_bench_start_processor(&bufs, &uc); debug_printf("---------------- reversed run -----------------\n"); xphi_bench_start_processor(&bufs_rev, &uc_rev); #endif } err = dma_manager_wait_for_driver(DMA_DEV_TYPE_XEON_PHI, disp_xeon_phi_id()); if (err_is_fail(err)) { USER_PANIC_ERR(err, "waiting for drive"); } struct dma_client_info info = { .type = DMA_CLIENT_INFO_TYPE_NAME, .device_type = DMA_DEV_TYPE_XEON_PHI, .args = { .name = XEON_PHI_DMA_SERVICE_NAME } }; struct dma_client_device *xdev; err = dma_client_device_init(&info, &xdev); if (err_is_fail(err)) { USER_PANIC_ERR(err, "could not initialize client device"); } struct dma_device *dev = (struct dma_device *) xdev; err = dma_register_memory((struct dma_device *) dev, local_frame); if (err_is_fail(err)) { USER_PANIC_ERR(err, "could not register memory"); } err = dma_register_memory((struct dma_device *) dev, remote_frame); if (err_is_fail(err)) { USER_PANIC_ERR(err, "could not register memory"); } if (disp_xeon_phi_id() == 1) { debug_printf("+++++++ DMA / MEMCOPY Benchmark ++++++++\n"); debug_printf("\n"); debug_printf("========================================\n"); debug_printf("\n"); debug_printf("DMA-BENCH: LOCAL -> REMOTE \n"); debug_printf("\n"); debug_printf("========================================\n"); debug_printf("\n"); xphi_bench_memcpy(dev, remote_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE, local_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE, XPHI_BENCH_BUF_FRAME_SIZE / 2, remote_base + 2 * XPHI_BENCH_MSG_FRAME_SIZE, local_base + 2 * XPHI_BENCH_MSG_FRAME_SIZE); debug_printf("\n"); debug_printf("========================================\n"); debug_printf("\n"); debug_printf("DMA-BENCH: REMOTE -> LOCAL \n"); debug_printf("\n"); debug_printf("========================================\n"); debug_printf("\n"); xphi_bench_memcpy(dev, local_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE, remote_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE, XPHI_BENCH_BUF_FRAME_SIZE / 2, local_base + 2 * XPHI_BENCH_MSG_FRAME_SIZE, remote_base + 2 * XPHI_BENCH_MSG_FRAME_SIZE); } debug_printf("benchmark done."); while (1) { messages_wait_and_handle_next(); } }
static FFTW(plan) mkplan_complex(bench_problem *p, unsigned flags) { FFTW(plan) pln = 0; int i; ptrdiff_t ntot; vn = p->vecsz->rnk == 1 ? p->vecsz->dims[0].n : 1; if (p->sz->rnk < 1 || p->split || !tensor_contiguousp(p->sz, vn) || tensor_rowmajor_transposedp(p->sz) || p->vecsz->rnk > 1 || (p->vecsz->rnk == 1 && (p->vecsz->dims[0].is != 1 || p->vecsz->dims[0].os != 1))) return 0; alloc_rnk(p->sz->rnk); for (i = 0; i < rnk; ++i) { total_ni[i] = total_no[i] = p->sz->dims[i].n; local_ni[i] = local_no[i] = total_ni[i]; local_starti[i] = local_starto[i] = 0; } if (rnk > 1) { ptrdiff_t n, start, nT, startT; ntot = FFTW(mpi_local_size_many_transposed) (p->sz->rnk, total_ni, vn, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, MPI_COMM_WORLD, &n, &start, &nT, &startT); if (flags & FFTW_MPI_TRANSPOSED_IN) { local_ni[1] = nT; local_starti[1] = startT; } else { local_ni[0] = n; local_starti[0] = start; } if (flags & FFTW_MPI_TRANSPOSED_OUT) { local_no[1] = nT; local_starto[1] = startT; } else { local_no[0] = n; local_starto[0] = start; } } else if (rnk == 1) { ntot = FFTW(mpi_local_size_many_1d) (total_ni[0], vn, MPI_COMM_WORLD, p->sign, flags, local_ni, local_starti, local_no, local_starto); } alloc_local(ntot * 2, p->in == p->out); pln = FFTW(mpi_plan_many_dft)(p->sz->rnk, total_ni, vn, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, (FFTW(complex) *) local_in, (FFTW(complex) *) local_out, MPI_COMM_WORLD, p->sign, flags); vn *= 2; if (rnk > 1) { ptrdiff_t nrest = 1; for (i = 2; i < rnk; ++i) nrest *= p->sz->dims[i].n; if (flags & FFTW_MPI_TRANSPOSED_IN) plan_scramble_in = mkplan_transpose_local( p->sz->dims[0].n, local_ni[1], vn * nrest, local_in, local_in); if (flags & FFTW_MPI_TRANSPOSED_OUT) plan_unscramble_out = mkplan_transpose_local( local_no[1], p->sz->dims[0].n, vn * nrest, local_out, local_out); } return pln; }