// Incoming output data for a block void flow_t::process_output(Array<Vector<super_t,2>>* buffer, MPI_Status* status) { { // How many elements did we receive? const int tag = status->MPI_TAG; const local_id_t local_block_id = request_block_id(tag); const uint8_t dimension = request_dimension(tag); const auto event = output_blocks.local_block_line_event(local_block_id,dimension); thread_time_t time(output_recv_kind,event); if (PENTAGO_MPI_COMPRESS_OUTPUTS) { const int count = get_count(status,MPI_BYTE); PENTAGO_MPI_TRACE("process output block: source %d, local block id %d, dimension %d, count %d, tag %d, event 0x%llx",status->MPI_SOURCE,local_block_id.id,dimension,count,tag,event); const auto compressed = char_view_own(*buffer).slice_own(0,count); // Schedule an accumulate as soon as possible to conserve memory threads_schedule(CPU,curry(absorb_compressed_output,&output_blocks,local_block_id,dimension,compressed,*buffer),true); } else { const int count = get_count(status,MPI_LONG_LONG_INT); PENTAGO_MPI_TRACE("process output block: source %d, local block id %d, dimension %d, count %g, tag %d, event 0x%llx",status->MPI_SOURCE,local_block_id.id,dimension,count/8.,tag,event); GEODE_ASSERT(!(count&7)); const auto block_data = buffer->slice_own(0,count/8); // Schedule an accumulate as soon as possible to conserve memory threads_schedule(CPU,curry(&accumulating_block_store_t::accumulate,&output_blocks,local_block_id,dimension,block_data),true); } buffer->clean_memory(); } // One step closer... progress.progress(); countdown.decrement(); post_output_recv(buffer); }
int main(int argc, char *argv[]) { int (*f) (int, int); // holds function to be curried char *op_str; if(argc < 5) return -1; printf("pc = 0lx%.16x\n", &&pc); pc: // select function switch(argv[1][0]) { case 'a': f = &op_add; op_str = "op_add"; break; case 's': f = &op_sub; op_str = "op_sub"; break; case 'm': f = &op_mul; op_str = "op_mul"; break; case 'd': f = &op_div; op_str = "op_div"; break; default: f = &op_zero; op_str = "op_zero"; break; } // read arguments int a = atoi(argv[2]); int b = atoi(argv[3]); int x = atoi(argv[4]); int (*(*c1) (int)) (int) = curry(f); int (*c2)(int) = c1(a); int r1 = c2(b); int r2 = c2(x); printf("c1 = curry(%s) = 0x%.8x\n", op_str, c1); printf("c2 = c1(%d) = 0x%.8x\n", a, c2); printf("r1 = c2(%d) = %d\n", b, r1); printf("r2 = c2(%d) = %d\n", x, r2); return 0; }
void flow_t::process_response(block_request_t* request, MPI_Status* status) { { thread_time_t time(response_recv_kind,request->block_lines_event()); PENTAGO_MPI_TRACE("process response: owner %d, owner block id %d, dimension %d",status->MPI_SOURCE,request_block_id(status->MPI_TAG).id,request->dimensions.data); // Erase block request const int index = block_requests.find(request); GEODE_ASSERT(block_requests.valid(index)); block_requests.remove_index_lazy(index); // Decrement input response counters for (auto line : request->dependent_lines) if (!line->decrement_input_responses()) free_line_gathers++; // Data has already been received into the first dependent line, but may be compressed. // Schedule a decompression and/or copying job. No need to put this at the front of the queue, // since there's no memory to be deallocated. const int recv_size = get_count(status,MPI_BYTE); threads_schedule(CPU,curry(absorb_response,request,recv_size)); } // We may be able to schedule more lines if any line gathers completed schedule_lines(); }
int main() { auto sum = [](int x, int y) { return x + y; }; (void)curry(sum)(1)(1); }
// Find dependencies for all dvents as a consistency check void check_dependencies(const vector<vector<Array<const history_t>>>& event_sorted_history, const int direction) { const int jobs = 16; for (const int thread : range((int)event_sorted_history.size())) for (const int kind : range((int)event_sorted_history[thread].size())) { const auto events = event_sorted_history[thread][kind].raw(); for (const int job : range(jobs)) threads_schedule(CPU,curry(check_helper,&event_sorted_history,direction,thread,kind,events.slice(partition_loop(events.size(),jobs,job)))); } threads_wait_all(); }
int main() { auto mult3 = curry([](int x, int y, int z) { return x*y*z; }); auto something = [](int a, int b, int c, int d, int e, int f) { return a*b*c+d*e*f; }; std::cout << " 14 + 1 = " << curry(add)(14)(1) << std::endl << " 1 * 3 * 5 = " << mult3(1)(3)(5) << std::endl << " 1 + 2 + 4 + 8 = " << curry(add4)(1)(2)(4)(8) << std::endl << " 1 + 2 + 3 + 4 + 5 = " << curry( [](int a, int b, int c, int d, int e) { return a+b+c+d+e; } )(1)(2)(3)(4)(5) << std::endl << "1 * 2 * 3 + 1 * 3 * 3 = " << curry(something)(1)(2)(3)(1)(3)(3) << std::endl; return 0; }
// Register a wakeup callback for the communication thread void flow_t::post_wakeup(line_details_t& line, const wakeup_block_t b) { #if PENTAGO_MPI_FUNNEL requests.add_immediate(curry(&flow_t::wakeup,this,&line,b)); #else static_assert(sizeof(line_details_t*)==sizeof(long long int),""); // Send a pointer to ourselves to the communication thread MPI_Request request; CHECK(MPI_Isend((void*)&line.self,1,MPI_LONG_LONG_INT,0,wakeup_tag(b),comms.wakeup_comm,&request)); // Since requests_t::free is not thread safe, we're forced to use MPI_Request_free here. // This is bad, because http://blogs.cisco.com/performance/mpi_request_free-is-evil. CHECK(MPI_Request_free(&request)); #endif }
void flow_t::post_output_recv(Array<Vector<super_t,2>>* buffer) { PENTAGO_MPI_TRACE("post output recv"); MPI_Request request; if (!buffer->size()) *buffer = large_buffer<Vector<super_t,2>>(sqr(sqr(block_size))+PENTAGO_MPI_COMPRESS_OUTPUTS,uninit); GEODE_ASSERT(buffer->size()==sqr(sqr(block_size))+PENTAGO_MPI_COMPRESS_OUTPUTS); { thread_time_t time(mpi_kind,unevent); if (PENTAGO_MPI_COMPRESS_OUTPUTS) CHECK(MPI_Irecv(buffer->data(),memory_usage(*buffer),MPI_BYTE,MPI_ANY_SOURCE,MPI_ANY_TAG,comms.output_comm,&request)); else CHECK(MPI_Irecv((uint64_t*)buffer->data(),8*buffer->size(),datatype<uint64_t>(),MPI_ANY_SOURCE,MPI_ANY_TAG,comms.output_comm,&request)); } requests.add(request,curry(&flow_t::process_output,this,buffer),true); }
int main() { const auto sum = [](auto a, auto b, auto c, auto d, auto e, auto f, auto g, auto h) constexpr { return a + b + c + d + e + f + g + h; }; constexpr auto expected = sum(0, 1, 2, 3, 4, 5, 6, 7); #if defined(VR_BASELINE) constexpr auto s0 = sum(0, 1, 2, 3, 4, 5, 6, 7); constexpr auto s1 = sum(0, 1, 2, 3, 4, 5, 6, 7); constexpr auto s2 = sum(0, 1, 2, 3, 4, 5, 6, 7); constexpr auto s3 = sum(0, 1, 2, 3, 4, 5, 6, 7); constexpr auto s4 = sum(0, 1, 2, 3, 4, 5, 6, 7); constexpr auto s5 = sum(0, 1, 2, 3, 4, 5, 6, 7); constexpr auto s6 = sum(0, 1, 2, 3, 4, 5, 6, 7); constexpr auto s7 = sum(0, 1, 2, 3, 4, 5, 6, 7); #elif defined(VR_CURRY) constexpr auto s0 = curry(sum)(0, 1, 2, 3, 4, 5, 6, 7); constexpr auto s1 = curry(sum)(0)(1, 2, 3, 4, 5, 6, 7); constexpr auto s2 = curry(sum)(0, 1)(2, 3, 4, 5, 6, 7); constexpr auto s3 = curry(sum)(0, 1, 2)(3, 4, 5, 6, 7); constexpr auto s4 = curry(sum)(0, 1, 2, 3)(4, 5, 6, 7); constexpr auto s5 = curry(sum)(0, 1, 2, 3, 4)(5, 6, 7); constexpr auto s6 = curry(sum)(0, 1, 2, 3, 4, 5)(6, 7); constexpr auto s7 = curry(sum)(0, 1, 2, 3, 4, 5, 6)(7); #endif static_assert(s0 == expected); static_assert(s1 == expected); static_assert(s2 == expected); static_assert(s3 == expected); static_assert(s4 == expected); static_assert(s5 == expected); static_assert(s6 == expected); static_assert(s7 == expected); return s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7; }
int main() { auto f = curry(foo); // Call the 3rd overload: f("world"); // testing the ability to "jump over" the second overload: std::cout << f(3.14,10,nullptr)(nullptr) << "\n"; // call the 2nd overload: auto x = f('a',2); std::cout << x << "\n"; // again: x = f('a')(2); std::cout << x << "\n"; std::cout << is_invokable<decltype(foo)(double, int)>{} << "\n"; std::cout << is_invokable<decltype(foo)(double)>{} << "\n"; std::cout << is_invokable<decltype(f)(double, int)>{} << "\n"; std::cout << is_invokable<decltype(f)(double)>{} << "\n"; std::cout << is_invokable<decltype(f(3.14))(int)>{} << "\n"; decltype(std::declval<decltype((foo))>()(std::declval<double>(), std::declval<int>())) y = {3}; (void)y; // std::cout << << "\n"; }
static obj enclose(obj v){ vto_close = Assoc(); assert(v->type == tArrow); obj vs = Assoc(); pbind_vars(&vs, em0(v)); penv = op(vs, nil); enclose0(em1(v)); release(penv); assert(vto_close->type == tAssoc); if(! ul(vto_close)) return render(tClosure, list3(retain(em0(v)), retain(em1(v)), nil)); list varlist = nil, vallist = nil; for(list l = ul(vto_close); l; l=rest(l)){ varlist = cons(retain(car(first(l))), varlist); vallist = cons(find_var(car(first(l))), vallist); } release(vto_close); obj rr = curry(List2v(varlist), List2v(vallist), retain(em1(v))); rr = render(tClosure, list3(retain(em0(v)), rr, nil)); return rr; }
// Optionally compress an output block and send it void flow_t::send_output(line_details_t* const line, const int b) { const auto block = line->pre.line.block(b); const auto owner_and_id = output_blocks.partition->find_block(line->pre.line.section,block); const int owner = owner_and_id.x; const local_id_t owner_block_id = owner_and_id.y; const int tag = request_id(owner_block_id,line->pre.line.dimension); const auto event = line->pre.line.block_line_event(b); MPI_Request request; #if PENTAGO_MPI_COMPRESS_OUTPUTS // Send compressed block thread_time_t time(output_send_kind,event); const auto compressed = line->compressed_output_block_data(b); CHECK(MPI_Isend((void*)compressed.data(),compressed.size(),MPI_BYTE,owner,tag,comms.output_comm,&request)); PENTAGO_MPI_TRACE("send output %p: owner %d, owner block id %d, dimension %d, count %d, tag %d, event 0x%llx",line,owner,owner_block_id.id,line->pre.line.dimension,compressed.size(),tag,event); #else // Send without compression thread_time_t time(output_send_kind,event); const auto block_data = line->output_block_data(b); CHECK(MPI_Isend((void*)block_data.data(),8*block_data.size(),MPI_LONG_LONG_INT,owner,tag,comms.output_comm,&request)); PENTAGO_MPI_TRACE("send output %p: owner %d, owner block id %d, dimension %d, count %d, tag %d, event 0x%llx",line,owner,owner_block_id.id,line->pre.line.dimension,block_data.size(),tag,event); #endif requests.add(request,curry(&flow_t::finish_output_send,this,line)); }
constexpr auto curry(F && f, Ts&& ...xs) { return curry(std::forward<F>(f)).apply(std::forward<Ts>(xs)...); }
void flow_t::post_wakeup_recv() { PENTAGO_MPI_TRACE("post wakeup recv"); MPI_Request request; CHECK(MPI_Irecv(&wakeup_buffer,1,MPI_LONG_LONG_INT,0,PENTAGO_MPI_COMPRESS_OUTPUTS?MPI_ANY_TAG:0,comms.wakeup_comm,&request)); requests.add(request,curry(&flow_t::process_wakeup,this),true); }
void flow_t::post_request_recv(Vector<int,2>* buffer) { PENTAGO_MPI_TRACE("post request recv"); MPI_Request request; CHECK(MPI_Irecv((int*)buffer,2,MPI_INT,MPI_ANY_SOURCE,MPI_ANY_TAG,comms.request_comm,&request)); requests.add(request,curry(&flow_t::process_request,this,buffer),true); }
constexpr auto curry_as(F && f) { return curry(Callable_as_<Fun, std::decay_t<F>>(std::forward<F>(f))); };
int main() { cout << curry(&sum<int, int>)(1)(2) << endl; cout << curry(&sum<int, int, int>)(1)(2)(3) << endl; cout << curry(&sum<int, int, int, int>)(1)(2)(3)(4) << endl; }
int main() { IntToVoid f2 = curry(foo, 20); VoidToVoid f4 = curry(f2, 10); f4(); }