int FrontProtocol::Barrier() { int tag; PacketPtr p; unsigned int countACKs = 0; #if defined(CONTROL_STREAM_BLOCKING) cerr << "[FE] Entering barrier..." << endl; MRN_STREAM_RECV(mrnApp->stControl, &tag, p, TAG_ACK); p->unpack("%d", &countACKs); cerr << "[FE] Barrier received " << countACKs << " ACK's..." << endl; #else for (int i=0; i<mrnApp->stControl->size(); i++) { int x = 0; MRN_STREAM_RECV(mrnApp->stControl, &tag, p, TAG_ACK); p->unpack("%d", &x); countACKs += x; } #endif cerr << "[FE] Barrier broadcasting " << countACKs << " ACK's..." << endl; MRN_STREAM_SEND(mrnApp->stControl, TAG_ACK, "%d", countACKs); if (countACKs != mrnApp->stControl->size()) { cerr << "[FE] ERROR: FrontProtocol::Barrier: " << countACKs << " ACKs received, expected " << mrnApp->stControl->size() << endl; return -1; } cerr << "[FE] Exiting barrier" << endl; return 0; }
/** * Automatically publishes all the streams that are queued in 'registeredStreams' to the back-ends. * return 0 on success; -1 otherwise. */ int FrontProtocol::AnnounceStreams() { int tag, NumberOfStreams=0; PacketPtr p; /* Announce streams to the back-ends */ unsigned int countACKs = 0; /* DEBUG std::cout << "[FE] FrontProtocol::AnnounceStreams: Sending " << registeredStreams.size() << " streams" << std::endl; */ /* Send the number of streams */ NumberOfStreams = registeredStreams.size(); MRN_STREAM_SEND(mrnApp->stControl, TAG_STREAM, "%d", registeredStreams.size()); for (int i=0; i<NumberOfStreams; i++) { STREAM *st = registeredStreams.front(); /* DEBUG std::cout << "[FE] FrontProtocol::AnnounceStreams: Publishing stream #" << st->get_Id() << " streams" << std::endl; */ /* Send a message through every stream */ MRN_STREAM_SEND(st, TAG_STREAM, ""); /* Remove the stream from the queue */ registeredStreams.pop(); } /* Read ACKs */ #if defined(CONTROL_STREAM_BLOCKING) MRN_STREAM_RECV(mrnApp->stControl, &tag, p, TAG_ACK); p->unpack("%d", &countACKs); #else for (int i=0; i<mrnApp->stControl->size(); i++) { int x = 0; MRN_STREAM_RECV(mrnApp->stControl, &tag, p, TAG_ACK); p->unpack("%d", &x); countACKs += x; } #endif if (countACKs != mrnApp->stControl->size()) { cerr << "[FE] Error announcing streams! (" << countACKs << " ACKs received, expected " << mrnApp->stControl->size() << ")" << endl; return -1; } return 0; }
int test_CountFilter( Network * net, const char * so_file ) { int retval, tag, recv_val=0; PacketPtr buf; std::string testname("test_Count"); test->start_SubTest(testname); int filter_id = net->load_FilterFunc( so_file, "aggr_Count" ); if( filter_id == -1 ){ test->print("Stream::load_FilterFunc() failure\n", testname); test->end_SubTest(testname, MRNTEST_FAILURE); return -1; } Communicator * comm_BC = net->get_BroadcastCommunicator( ); Stream * stream = net->new_Stream(comm_BC, filter_id, SFILTER_WAITFORALL); if( stream->send(PROT_COUNT, "") == -1 ){ test->print("stream::send() failure\n", testname); test->end_SubTest(testname, MRNTEST_FAILURE); return -1; } if( stream->flush( ) == -1 ){ test->print("stream::flush() failure\n", testname); test->end_SubTest(testname, MRNTEST_FAILURE); return -1; } retval = stream->recv(&tag, buf); assert( retval != 0 ); //shouldn't be 0, either error or block till data if( retval == -1){ //recv error test->print("stream::recv() failure\n", testname); test->end_SubTest(testname, MRNTEST_FAILURE); return -1; } else{ //Got data if( buf->unpack( "%d", &recv_val ) == -1 ){ test->print("stream::unpack() failure\n", testname); return -1; } if( recv_val != (int)stream->size() ){ char tmp_buf[256]; sprintf(tmp_buf, "recv_val(%d) != NumEndPoints(%d). Failure.\n", recv_val, stream->size( ) ); test->print(tmp_buf, testname); test->end_SubTest(testname, MRNTEST_FAILURE); return -1; } } test->end_SubTest(testname, MRNTEST_SUCCESS); return 0; }
int test_CountOddsAndEvensFilter( Network * net, const char * so_file ) { int num_odds=0, num_evens=0, retval, tag=0; PacketPtr buf; std::string testname("test_CountOddsAndEvens"); test->start_SubTest(testname); int filter_id = net->load_FilterFunc( so_file, "aggr_CountOddsAndEvens" ); if( filter_id == -1 ){ test->print("Stream::load_FilterFunc() failure\n", testname); test->end_SubTest(testname, MRNTEST_FAILURE); return -1; } Communicator * comm_BC = net->get_BroadcastCommunicator( ); Stream * stream = net->new_Stream(comm_BC, filter_id, SFILTER_WAITFORALL); if( stream->send(PROT_COUNTODDSANDEVENS, "") == -1 ){ test->print("stream::send() failure\n", testname); test->end_SubTest(testname, MRNTEST_FAILURE); return -1; } if( stream->flush( ) == -1 ){ test->print("stream::flush() failure\n", testname); test->end_SubTest(testname, MRNTEST_FAILURE); return -1; } retval = stream->recv(&tag, buf); assert( retval != 0 ); //shouldn't be 0, either error or block till data if( retval == -1){ //recv error test->print("stream::recv() failure\n", testname); test->end_SubTest(testname, MRNTEST_FAILURE); return -1; } else{ //Got data if( buf->unpack( "%d %d", &num_odds, &num_evens ) == -1 ){ test->print("stream::unpack() failure\n", testname); test->end_SubTest(testname, MRNTEST_FAILURE); return -1; } char tmp_buf[256]; sprintf(tmp_buf, "num_odds = %d; num_evens = %d\n", num_odds, num_evens); test->print(tmp_buf, testname); } test->end_SubTest(testname, MRNTEST_SUCCESS); return 0; }
DysectAPI::DysectErrorCode Frontend::listen() { int ret; int idle = 0; // Install handler for (ctrl-c) abort // signal(SIGINT, Frontend::interrupt); // printf("Waiting for events (! denotes captured event)\n"); printf("Hit <enter> to stop session\n"); fflush(stdout); { do { // select() overwrites fd_set with ready fd's // Copy fd_set structure fd_set fdRead = Domain::getFdSet(); if(breakOnEnter) FD_SET(0, &fdRead); //STDIN struct timeval timeout; timeout.tv_sec = Frontend::selectTimeout; timeout.tv_usec = 0; ret = select(Domain::getMaxFd() + 1, &fdRead, NULL, NULL, &timeout); if(ret < 0) { //return Err::warn(DysectAPI::Error, "select() failed to listen on file descriptor set."); return DysectAPI::OK; } if(FD_ISSET(0, &fdRead) && breakOnEnter) { Err::info(true, "Stopping session - enter key was hit"); break; } // Look for owners vector<Domain*> doms = Domain::getFdsFromSet(fdRead); if(doms.size() == 0) { if(Frontend::breakOnTimeout && (--Frontend::numEvents < 0)) { Err::info(true, "Stopping session - increase numEvents for longer sessions"); break; } } else { printf("\n"); fflush(stdout); } for(int i = 0; i < doms.size(); i++) { Domain* dom = doms[i]; PacketPtr packet; int tag; if(!dom->getStream()) { return Err::warn(Error, "Stream not available for domain %x", dom->getId()); } do { ret = dom->getStream()->recv(&tag, packet, false); if(ret == -1) { return Err::warn(Error, "Receive error"); } else if(ret == 0) { break; } int count; char *payload; int len; if(packet->unpack("%d %auc", &count, &payload, &len) == -1) { return Err::warn(Error, "Unpack error"); } if(Domain::isProbeEnabledTag(tag)) { Domain* dom = 0; if(!Domain::getDomainFromTag(dom, tag)) { Err::warn(false, "Could not get domain from tag %x", tag); } else { //Err::info(true, "[%d] Probe %x enabled (payload size %d)", count, dom->getId(), len); //Err::info(true, "[%d] Probe %x enabled", count, dom->getId()); } Probe* probe = dom->owner; if(!probe) { Err::warn(false, "Probe object not found for %x", dom->getId()); } else { probe->handleActions(count, payload, len); } // Empty bodied probe // Check wether backends are waiting for releasing processes if(dom->isBlocking()) { dom->sendContinue(); } } } while(1); dom->getStream()->clear_DataNotificationFd(); } } while(running); } }
int main(int argc, char **argv) { Network * net = NULL; Communicator * comm_BC; Stream * stream; int32_t send_val=57, recv_val=0; if( (argc < 3) || (argc > 4) ) { fprintf( stderr, "Usage: %s topology_file num_backends [num_threads_per_be]\n", argv[0] ); exit(-1); } char* topology_file = argv[1]; unsigned int num_backends = atoi( argv[2] ); unsigned int num_be_thrds = 1; if( argc == 4 ) num_be_thrds = atoi( argv[3] ); // If backend_exe (2nd arg) and backend_args (3rd arg) are both NULL, // then all nodes specified in the topology are internal tree nodes. net = Network::CreateNetworkFE( topology_file, NULL, NULL ); bool cbrett = net->register_EventCallback( Event::TOPOLOGY_EVENT, TopologyEvent::TOPOL_ADD_BE, BE_Add_Callback, NULL ); if(cbrett == false) { fprintf( stdout, "Failed to register callback for back-end add topology event\n"); delete net; return -1; } cbrett = net->register_EventCallback( Event::TOPOLOGY_EVENT, TopologyEvent::TOPOL_REMOVE_NODE, BE_Remove_Callback, NULL ); if(cbrett == false) { fprintf( stdout, "Failed to register callback for back-end remove topology event\n"); delete net; return -1; } // Query net for topology object NetworkTopology * topology = net->get_NetworkTopology(); vector< NetworkTopology::Node * > internal_leaves; topology->get_Leaves(internal_leaves); topology->print(stdout); // Write connection information to temporary file write_be_connections( internal_leaves, num_backends ); // Wait for backends to attach unsigned int waitfor_count = num_backends * num_be_thrds; fprintf( stdout, "Please start backends now.\n\nWaiting for %u backends to connect\n", waitfor_count ); fflush(stdout); unsigned curr_count = 0; do { sleep(1); cb_lock.Lock(); curr_count = num_attach_callbacks; cb_lock.Unlock(); } while( curr_count != waitfor_count ); fprintf( stdout, "All %u backends have attached!\n", waitfor_count); // A simple broadcast/gather comm_BC = net->get_BroadcastCommunicator(); stream = net->new_Stream(comm_BC, TFILTER_NULL, SFILTER_DONTWAIT); fprintf( stdout, "broadcasting int %d to back-ends\n", send_val ); if( (stream->send(PROT_INT, "%d", send_val) == -1) || (stream->flush() == -1) ){ printf("stream::send(%d) failure\n", send_val); return -1; } fprintf( stdout, "waiting for response from %d back-ends\n", waitfor_count ); for( unsigned int i = 0; i < waitfor_count; i++ ){ int tag; PacketPtr p; int retval = stream->recv(&tag, p, true); if( retval == -1){ //recv error printf("stream::recv() int failure\n"); return -1; } if( p->unpack( "%d", &recv_val ) == -1 ){ printf("stream::unpack() failure\n"); return -1; } printf("FE received int = %d\n", recv_val); } if( (stream->send(PROT_EXIT, "") == -1) || (stream->flush() == -1) ){ printf("stream::send_exit() failure\n"); return -1; } sleep(1); //delete stream; #if 0 // TESTING detach before shutdown fprintf( stdout, "Waiting for %u backends to detach\n", waitfor_count ); fflush(stdout); curr_count = 0; do { sleep(1); cb_lock.Lock(); curr_count = num_detach_callbacks; cb_lock.Unlock(); } while( curr_count != waitfor_count ); fprintf( stdout, "All %u backends have detached!\n", waitfor_count); #endif // The Network destructor causes internal and leaf nodes to exit delete net; return 0; }
int main( int argc, char* argv[] ) { Stream *stream = NULL; Stream *grp_stream = NULL; Stream *be_stream = NULL; int tag = -1; unsigned int val = 0; PacketPtr pkt; // join the MRNet net Network * net = Network::CreateNetworkBE( argc, argv ); bool done = false; while( !done ) { tag = 0; int rret = net->recv( &tag, pkt, &stream ); if( rret == -1 ) { cerr << "BE: Network::recv() failed" << endl; break; } if( tag == SC_GROUP ) { grp_stream = stream; } else if( tag == SC_SINGLE ) { be_stream = stream; pkt->unpack( "%ud", &val ); std::cout << "BE: sending val on BE stream" << std::endl; if( (be_stream->send(tag, "%ud", val) == -1) || (be_stream->flush() == -1) ) { cerr << "BE: val send single failed" << endl; } val = 1; } else { cerr << "BE: unexpected tag " << tag << endl; done = true; } if( grp_stream && (val != 0) ) done = true; } // send our value for the reduction std::cout << "BE: sending val on group stream" << std::endl; if( (grp_stream->send(SC_GROUP, "%ud", val) == -1) || (grp_stream->flush() == -1) ) { cerr << "BE: val send group failed" << endl; } // cleanup // receive a go-away message tag = 0; int rret = net->recv( &tag, pkt, &stream ); if( rret == -1) { cerr << "BE: failed to receive go-away tag" << endl; } else if( tag != SC_EXIT ) { cerr << "BE: received unexpected go-away tag " << tag << endl; } // wait for FE to delete network, which will shut us down net->waitfor_ShutDown(); delete net; return 0; }
int main(int argc, char **argv) { Stream * stream = NULL; PacketPtr p; int rc, tag=0, num_iters=0; int send_val = 10 ; int recv_val ; int sendAr[TOTAL_PACKET_SIZE]; Network * net = Network::CreateNetworkBE( argc, argv ); do { rc = net->recv(&tag, p, &stream); if( rc == -1 ) { fprintf( stderr, "BE: Network::recv() failure\n" ); break; } else if( rc == 0 ) { // a stream was closed continue; } switch(tag) { case PROT_CONCAT: p->unpack( "%d %d", &recv_val, &num_iters ); printf("Init BE pid : %d : values :", getpid()); // Send integer arrays as waves - simulate buffer waves for( int i=0; i<num_iters; i++ ) { //init array for each wave initBuffer(sendAr); fprintf( stdout, "BE: Sending wave %u ...\n", i ); // if( stream->send(tag, "%d", send_val + i) == -1 ) { if( stream->send(tag, "%ad", sendAr, TOTAL_PACKET_SIZE) == -1 ) { fprintf( stderr, "BE: stream::send(%%d) failure in PROT_CONCAT\n" ); tag = PROT_EXIT; break; } if( stream->flush() == -1 ) { fprintf( stderr, "BE: stream::flush() failure in PROT_CONCAT\n" ); break; } fflush(stdout); sleep(2); // stagger sends } break; case PROT_EXIT: if( stream->send(tag, "%d", 0) == -1 ) { fprintf( stderr, "BE: stream::send(%%s) failure in PROT_EXIT\n" ); break; } if( stream->flush( ) == -1 ) { fprintf( stderr, "BE: stream::flush() failure in PROT_EXIT\n" ); } break; default: fprintf( stderr, "BE: Unknown Protocol: %d\n", tag ); tag = PROT_EXIT; break; } fflush(stderr); } while( tag != PROT_EXIT ); if( stream != NULL ) { while( ! stream->is_Closed() ) sleep(1); delete stream; } // FE delete of the net will cause us to exit, wait for it net->waitfor_ShutDown(); delete net; return 0; }
SymReader *MRNetSymbolReaderFactory::openSymbolReader(std::string pathName) { const char *pathStr = pathName.c_str(); bool localLib = true; int tag = PROT_LIB_REQ, ret; long size; uint64_t fileContentsLength = 0; char *fileName = NULL, *fileContents = NULL; FILE *fp; PacketPtr packet; MRNetSymbolReader *msr; std::map<std::string, MRNetSymbolReader* >::iterator iter; ////GLL comment: This is a temp feature to look for a copy in /tmp RAM disk first //std::string fileBaseName = basename(pathName.c_str()); //std::string tmpFilePath = "/tmp/" + fileBaseName; //struct stat fileStat; //if (stat(tmpFilePath.c_str(), &fileStat) == 0) //{ //fprintf(stderr, "%s %s\n", pathName.c_str(), tmpFilePath.c_str()); //pathName = tmpFilePath; //} mrn_dbg(2, mrn_printf(__FILE__, __LINE__, "openSymbolReader", gStatOutFp, "Interposed lib functions called openSymbolReader(%s)\n", pathName.c_str())); iter = openReaders_.find(pathName); if (iter == openReaders_.end()) { mrn_dbg(2, mrn_printf(__FILE__, __LINE__, "openSymbolReader", gStatOutFp, "no existing reader for %s\n", pathStr)); AsyncGlobalFileStatus myStat(pathStr); /* TODO: this is a workaround for BlueGene where FGFS is reporting incorrectly */ #ifdef BGL if (true) #else if (IS_YES(myStat.isUnique())) #endif { localLib = false; mrn_dbg(2, mrn_printf(__FILE__, __LINE__, "openSymbolReader", gStatOutFp, "requesting contents for %s\n", pathStr)); if (stream_->send(tag, "%s", pathStr) == -1) { mrn_dbg(2, mrn_printf(__FILE__, __LINE__, "openSymbolReader", gStatOutFp, "BE: stream::send() failure\n")); return NULL; } if (stream_->flush() == -1) { mrn_dbg(2, mrn_printf(__FILE__, __LINE__, "openSymbolReader", gStatOutFp, "BE: stream::flush() failure\n")); return NULL; } //ret = network_->recv(&tag, packet, &stream_); ret = stream_->recv(&tag, packet); if (ret != 1) { mrn_dbg(2, mrn_printf(__FILE__, __LINE__, "openSymbolReader", gStatOutFp, "BE: network::recv() failure\n")); return NULL; } if (tag == PROT_LIB_REQ_ERR) { mrn_dbg(2, mrn_printf(__FILE__, __LINE__, "openSymbolReader", gStatOutFp, "FE reported error sending contents of %s\n", pathStr)); localLib = true; } if (tag != PROT_LIB_REQ_RESP) { mrn_dbg(2, mrn_printf(__FILE__, __LINE__, "openSymbolReader", gStatOutFp, "Unexpected tag %d when trying to receive contents of %s\n", tag, pathStr)); localLib = true; } #ifdef MRNET40 else if (packet->unpack("%Ac %s", &fileContents, &fileContentsLength, &fileName) == -1) #else else if (packet->unpack("%ac %s", &fileContents, &fileContentsLength, &fileName) == -1) #endif { mrn_dbg(2, mrn_printf(__FILE__, __LINE__, "openSymbolReader", gStatOutFp, "Failed to unpack contents of %s, length %d\n", pathStr, fileContentsLength)); localLib = true; } free(fileName); fileName = NULL; }
void dysectAPIUpStream(vector<PacketPtr> &packetsIn, vector<PacketPtr> &packetsOut, vector<PacketPtr> &packetsOutReverse, void **filterState, PacketPtr ¶ms, const TopologyLocalInfo &topology) { //cpPrintMsg(STAT_LOG_MESSAGE, __FILE__, __LINE__, "dysectAPIUpStream entry\n"); int streamId = packetsIn[0]->get_StreamId(); int tag = packetsIn[0]->get_Tag(); UpstreamFilter upstreamFilter(streamId); struct packet* newPacket = 0; int newPacketLen = 0; int newCount = 0; struct timeval startTime, endTime; double elapsedTime = 0.0; gettimeofday(&startTime, NULL); for(int i = 0; i < packetsIn.size(); i++) { //cpPrintMsg(STAT_LOG_MESSAGE, __FILE__, __LINE__, "Incoming packet %d\n", i); PacketPtr currentPacket = packetsIn[i]; int tag = currentPacket->get_Tag(); int count; int payloadLen; char *payload; if(currentPacket->unpack("%d %auc", &count, &payload, &payloadLen) == -1) { cpPrintMsg(STAT_LOG_MESSAGE, __FILE__, __LINE__, "Could not unpack packet!\n"); continue; } //cpPrintMsg(STAT_LOG_MESSAGE, __FILE__, __LINE__, "Incoming packet unpack count '%d' payload size %d\n", count, payloadLen); if(payloadLen > 1) { //cpPrintMsg(STAT_LOG_MESSAGE, __FILE__, __LINE__, "Handle probe packet with payload\n"); if(newPacket == 0) { newCount = count; //cpPrintMsg(STAT_LOG_MESSAGE, __FILE__, __LINE__, "Setting probe packet with base\n"); newPacket = (struct packet*)payload; newPacketLen = payloadLen; } else { newCount += count; // Merge packets struct packet* mergedPacket = 0; AggregateFunction::mergePackets(newPacket, (struct packet*)payload, mergedPacket, newPacketLen); newPacket = mergedPacket; } } else if(upstreamFilter.isControlTag(tag) && (payloadLen <= 1)) { cpPrintMsg(STAT_LOG_MESSAGE, __FILE__, __LINE__, "Handle control packet\n"); upstreamFilter.aggregateControlPacket(tag, count); } } if(newPacket != 0) { //cpPrintMsg(STAT_LOG_MESSAGE, __FILE__, __LINE__, "Setting probe packet with base (npl: %d)\n", newPacketLen); PacketPtr packet(new Packet(streamId, tag, "%d %auc", newCount, (unsigned char*)newPacket, newPacketLen)); packetsOut.push_back(packet); } if(upstreamFilter.anyControlPackets()) { upstreamFilter.getControlPackets(packetsOut); } gettimeofday(&endTime, NULL); elapsedTime += (endTime.tv_sec - startTime.tv_sec) * 1000.0; elapsedTime += (endTime.tv_usec - startTime.tv_usec) / 1000.0; cpPrintMsg(STAT_LOG_MESSAGE, __FILE__, __LINE__, "Aggregated %d packets in %.3f ms\n", packetsIn.size(), elapsedTime); }
int main(int argc, char **argv) { int send_val=32, recv_val=0; int tag, retval; PacketPtr p; if (argc != 4){ fprintf(stderr, "Usage: %s <topology file> <backend_exe> <so_file>\n", argv[0]); exit(-1); } const char * topology_file = argv[1]; const char * backend_exe = argv[2]; const char * so_file = argv[3]; const char * dummy_argv=NULL; // This Network() cnstr instantiates the MRNet internal nodes, according to the // organization in "topology_file," and the application back-end with any // specified cmd line args Network * net = Network::CreateNetworkFE( topology_file, backend_exe, &dummy_argv); if (net->has_Error()) { net->perror("Network creation failed"); exit(-1); } if (!net->set_FailureRecovery(false)) { fprintf( stdout, "Failed to disable failure recovery\n" ); delete net; return -1; } bool cbrett = net->register_EventCallback( Event::TOPOLOGY_EVENT, TopologyEvent::TOPOL_REMOVE_NODE, Failure_Callback, NULL); if( cbrett == false ) { printf("Failed to register callback for node failure event\n" ); delete net; return -1; } std::vector<const char*> filterNames; filterNames.push_back("IntegerAdd"); filterNames.push_back("TreeInit"); std::vector<int> filterIds; // Make sure path to "so_file" is in LD_LIBRARY_PATH retval = net->load_FilterFuncs(so_file, filterNames, filterIds); if (retval == -1){ fprintf( stderr, "Network::load_FilterFunc() failure\n" ); delete net; return -1; } // A Broadcast communicator contains all the back-ends Communicator * comm_BC = net->get_BroadcastCommunicator( ); // Create a stream that will use the Integer_Add filter for aggregation Stream * add_stream = net->new_Stream( comm_BC, filterIds[0], SFILTER_WAITFORALL, filterIds[1] ); int num_backends = int(comm_BC->get_EndPoints().size()); // Broadcast a control message to back-ends to send us "num_iters" // waves of integers tag = PROT_SUM; unsigned int num_iters=5; std::cout << "sending packet!\n"; if( add_stream->send( tag, "%lf %lf %lf %lf %lf %lf %lf %lf %d %lf %lf %lf %lf", .3, 4., .95, .1, -1.0, 1.0, -1.0, 1.0, 16, -1.0, 1.0, -1.0, 1.0) == -1 ){ fprintf( stderr, "stream::send() failure\n" ); return -1; } std::cout << "flushing all packets!\n"; if( add_stream->flush( ) == -1 ){ fprintf( stderr, "stream::flush() failure\n" ); return -1; } // We expect "num_iters" aggregated responses from all back-ends for( unsigned int i=0; i < num_iters; i++ ){ std::cout << "receiving packet\n"; retval = add_stream->recv(&tag, p); std::cout << "received packet\n"; if( retval == 0 ) { //shouldn't be 0, either error or block for data, unless a failure occured fprintf( stderr, "stream::recv() returned zero\n" ); if( saw_failure ) break; return -1; } if( retval == -1 ) { //recv error fprintf( stderr, "stream::recv() unexpected failure\n" ); if( saw_failure ) break; return -1; } if( p->unpack( "%d", &recv_val ) == -1 ){ fprintf( stderr, "stream::unpack() failure\n" ); return -1; } int expected_val = num_backends * i * send_val; if( recv_val != expected_val ){ fprintf(stderr, "FE: Iteration %d: Failure! recv_val(%d) != %d*%d*%d=%d (send_val*i*num_backends)\n", i, recv_val, send_val, i, num_backends, expected_val ); } else{ fprintf(stdout, "FE: Iteration %d: Success! recv_val(%d) == %d*%d*%d=%d (send_val*i*num_backends)\n", i, recv_val, send_val, i, num_backends, expected_val ); } } if( saw_failure ) { fprintf( stderr, "FE: a network process has failed, killing network\n" ); delete net; } else { delete add_stream; // Tell back-ends to exit Stream * ctl_stream = net->new_Stream( comm_BC, TFILTER_MAX, SFILTER_WAITFORALL ); if(ctl_stream->send(PROT_EXIT, "") == -1){ fprintf( stderr, "stream::send(exit) failure\n" ); return -1; } if(ctl_stream->flush() == -1){ fprintf( stderr, "stream::flush() failure\n" ); return -1; } retval = ctl_stream->recv(&tag, p); if( retval == -1){ //recv error fprintf( stderr, "stream::recv() failure\n" ); return -1; } delete ctl_stream; if( tag == PROT_EXIT ) { // The Network destructor will cause all internal and leaf tree nodes to exit delete net; } } return 0; }
void data_recving (MYSQL* conn, char * tablename, int nr_be) { int nr_exited = 0; int tag; int count = 0; // PacketPtr pack; int rank, eid, tid, data, finish, nr_record, mpi_rank, type, src_rank, dst_rank, sendsize, sendtype, recvsize, recvtype; int mpi_comm, mpi_tag; MRN::Network * net = GetNetwork(); unsigned pid; long long unsigned time; unsigned long time_s, time_us; char sql[2048]; char buffer[500]; char instance[40], metric[40]; char *ename = NULL; char *hostname = NULL; char *procdata = NULL; Communicator *comm = net->get_BroadcastCommunicator(); Stream *stream = net->new_Stream( comm, TFILTER_NULL, SFILTER_DONTWAIT ); stream->send( PROT_DATA, "%d", 0 ); printf_d("[MRNFE] recieving data...nr_exited is %d nr_be is %d\n",nr_exited,nr_be); if (mysql_autocommit (conn, 0) != 0) printf_d ("[MRNFE] %s\n", mysql_error(conn)); while( nr_exited != nr_be ) { PacketPtr pack; stream->recv( &tag, pack ); count ++; switch (tag) { case PROT_DATA: pack->unpack("%d %d %d %uld %s %d %s", &tid, &pid, &eid, &time, &hostname, &finish, &ename); char *sql_tmp; sql_tmp = (char *) calloc (sizeof (char *), 200); snprintf_d(sql_tmp, 200, "CALL insertdata ( -1,%d, \"%s\" , %d, \"%s\", 7,\"FFFFFFFF\", '%llu', %d );" , pid, hostname, eid, ename, time, finish); if (mysql_query (conn, sql_tmp)) printf_d ("[MRNFE] %s\n", mysql_error(conn)); free (sql_tmp); free (hostname); free (ename); hostname = NULL; ename = NULL; break; case PROT_MPIDATA: /*MPI wrapper trace 数据*/ pack->unpack("%d %ud %s %d %s %d %d %d %d %d %d %d %d %d %uld %d", &mpi_rank, &pid, &hostname, &eid, &ename, &type, &mpi_comm, &mpi_tag, &src_rank, &dst_rank, &sendsize, &sendtype, &recvsize, &recvtype, &time, &finish); #ifdef debug printf_d ("[DEBUG]:\tRecv_Data: %d %u %s %d %s %d %x %d %d %d %d %d %d %d %llu %d\n", mpi_rank, pid, hostname, eid, ename, type, mpi_comm, mpi_tag, src_rank, dst_rank, sendsize, sendtype, recvsize, recvtype, time, finish); #endif snprintf_d (sql, 2048,"CALL insertdata (%d, %u, \"%s\", %d, \"%s\", %d, \"%x\", %llu, %d);", mpi_rank, pid, hostname, eid, ename, type, mpi_comm, time, finish); #ifdef DEBUG printf_d ("[DEBUG]:\t%s\n", sql); #endif if (mysql_query (conn, sql)) { printf_d ("[MRNFE] %s\n", mysql_error(conn)); } if (type != 0) { snprintf_d (sql, 2048,"CALL insert_comm_data (%d, %u, \"%s\", %d, \"%s\", %d, %d, %d, %d, %d, %d, %d, %llu, %d);", mpi_rank, pid, hostname, eid, ename, mpi_tag, src_rank, dst_rank, sendsize, sendtype, recvsize, recvtype, time, finish); if (mysql_query (conn, sql)) { printf_d ("[MRNFE] %s\n", mysql_error(conn)); } } free (ename); free (hostname); ename = NULL; hostname = NULL; break; case PROT_PROC: /*proc数据*/ if(!use_proc) { printf_d ("[MRNFE] no proc!!! ignored\n"); break; } { pack->unpack("%ud %ud %s %d %s %ud", &time_s, &time_us, &hostname, &nr_record, &procdata, &pid ); //printf_d(" [DEBUG] %ld, %ld, %d %s\n", time_s, time_us, nr_record, procdata ); char *p = procdata; float fdata; for( int i = 0; i < nr_record; i++ ){ sscanf_d( p, "%s", buffer ); p+=strlen(buffer)+1; sscanf_d( buffer, "%[^#]#%[^#]#%f", instance, metric, &fdata ); snprintf_d(sql, 2048, "CALL insertproc( %ld, %ld, \"%s\", %d, \"%s\", \"%s\", '%f')" , time_s, time_us, hostname, pid, metric, instance, fdata); if (mysql_query (conn, sql)) { printf_d ("[MRNFE] %s\n", mysql_error(conn)); } //printf_d ("%s\n", sql); //mysql_query( conn, sql ); } free(procdata); free(hostname); procdata = hostname = NULL; } break; case PROT_PAPI: if (!use_papi) { printf_d ("[MRNFE] no papi!!! ignored\n"); break; } else { long long papi_data; char *papi_event; pack->unpack("%s %ud %d %s %s %ld %uld %d", &ename, &pid, &tid, &hostname, &papi_event, &papi_data, &time, &finish); snprintf_d (sql, 2048, "CALL insert_papi_data(\"%s\", %u, %d, \"%s\", \"%s\", %lld, %llu, %d);", ename, pid, tid, hostname, papi_event, papi_data, time, finish); if (mysql_query (conn, sql)) printf_d ("[MRNFE] %s\n", mysql_error(conn)); free (hostname); free (ename); free (papi_event); hostname = NULL; ename = NULL; papi_event = NULL; } break; case PROT_IO: char *io_data; long long unsigned stime, etime; pack->unpack ("%s %ud %d %s %s %uld %uld", &ename, &pid, &tid, &hostname, &io_data, &stime, &etime); snprintf_d (sql, 2048, "CALL insert_io_data(\"%s\", %u, %d, \"%s\", \"%s\", %llu, %llu);", ename, pid, tid, hostname, io_data, stime, etime); if (mysql_query (conn, sql)) printf_d ("[MRNFE] %s\n", mysql_error(conn)); free (hostname); free (ename); free (io_data); hostname = NULL; ename = NULL; io_data = NULL; break; case PROT_MPIRANK: /*MPI rank数据*/ pack->unpack("%d %d", &rank, &data); snprintf_d( sql, 2048, "update `%s_nodelist` set mpirank=%d where rank=%d", tablename, data, rank ); mysql_query( conn, sql ); break; case PROT_NODEDATA: /*MPI节点数据*/ pack->unpack ("%ud %d", &pid, &mpi_rank); snprintf_d (sql, 2048, "update `%s_nodelist` set mpirank=%d where pid=%u", tablename, mpi_rank, pid); mysql_query( conn, sql ); break; case PROT_HALT: /*发送停止*/ nr_exited ++ ; printf_d( "[DEBUG] %d Back-ends Halted\n", nr_exited ); break; default: break; } if (count % 10000 == 0) mysql_commit(conn); } FE::write_exiting_info(); }
int main(int argc, char **argv){ Stream * stream; PacketPtr pkt; int tag; char recv_char; char recv_uchar; int16_t recv_short; uint16_t recv_ushort; int32_t recv_int; uint32_t recv_uint; int64_t recv_long; uint64_t recv_ulong; float recv_float; double recv_double; char * recv_string; bool success=true; fprintf(stderr, "BE args...\n"); for(int i =0; i < argc; i++){ fprintf(stderr, "Args were: %s\n", argv[i]); } /*if( argc != 6 ) { fprintf(stderr, "Usage: %s parent_hostname parent_port parent_rank my_hostname my_rank\n", argv[0]); exit( -1 ); }*/ Network * net = Network::CreateNetworkBE( argc, argv ); do { if ( net->recv( &tag, pkt, &stream ) != 1 ) { fprintf(stderr, "BE: stream::recv() failure ... exiting\n"); exit (-1); } switch(tag){ case PROT_CHAR: #if defined(DEBUG) fprintf( stderr, "BE: Processing PROT_CHAR ...\n"); #endif if( pkt->unpack( "%c", &recv_char ) == -1 ) { fprintf(stderr, "BE: stream::unpack(%%c) failure\n"); success=false; } if( stream->send( tag, "%c", recv_char ) == -1 ) { fprintf(stderr, "BE: stream::send(%%c) failure\n"); success=false; } break; case PROT_INT: #if defined(DEBUG) fprintf( stderr, "BE: Processing PROT_INT ...\n"); #endif if( pkt->unpack( "%d", &recv_int ) == -1 ) { fprintf(stderr, "BE: stream::unpack(%%d) failure\n"); success=false; } if( stream->send( tag, "%d", recv_int ) == -1 ) { fprintf(stderr, "BE: stream::send(%%d) failure\n"); success=false; } break; case PROT_UINT: #if defined(DEBUG) fprintf( stderr, "BE: Processing PROT_UINT ...\n"); #endif if( pkt->unpack( "%ud", &recv_uint ) == -1 ) { fprintf(stderr, "BE: stream::unpack(%%ud) failure\n"); success=false; } if( stream->send( tag, "%ud", recv_uint ) == -1 ) { fprintf(stderr, "BE: stream::send(%%ud) failure\n"); success=false; } break; case PROT_SHORT: #if defined(DEBUG) fprintf( stderr, "BE: Processing PROT_SHORT ...\n"); #endif if( pkt->unpack( "%hd", &recv_short ) == -1 ) { fprintf(stderr, "BE: stream::unpack(%%hd) failure\n"); success=false; } if( stream->send( tag, "%hd", recv_short ) == -1 ) { fprintf(stderr, "BE: stream::send(%%hd) failure\n"); success=false; } break; case PROT_USHORT: #if defined(DEBUG) fprintf( stderr, "BE: Processing PROT_USHORT ...\n"); #endif if( pkt->unpack( "%uhd", &recv_ushort ) == -1 ) { fprintf(stderr, "BE: stream::unpack(%%uhd) failure\n"); success=false; } if( stream->send( tag, "%uhd", recv_ushort ) == -1 ) { fprintf(stderr, "BE: stream::send(%%uhd) failure\n"); success=false; } break; case PROT_LONG: #if defined(DEBUG) fprintf( stderr, "BE: Processing PROT_LONG ...\n"); #endif if( pkt->unpack( "%ld", &recv_long ) == -1 ) { fprintf(stderr, "BE: stream::unpack(%%ld) failure\n"); success=false; } if( stream->send( tag, "%ld", recv_long ) == -1 ) { fprintf(stderr, "BE: stream::send(%%ld) failure\n"); success=false; } break; case PROT_ULONG: #if defined(DEBUG) fprintf( stderr, "BE: Processing PROT_ULONG ...\n"); #endif if( pkt->unpack( "%uld", &recv_ulong ) == -1 ) { fprintf(stderr, "BE: stream::unpack(%%uld) failure\n"); success=false; } if( stream->send( tag, "%uld", recv_ulong ) == -1 ) { fprintf(stderr, "BE: stream::send(%%uld) failure\n"); success=false; } break; case PROT_FLOAT: #if defined(DEBUG) fprintf( stderr, "BE: Processing PROT_FLOAT ...\n"); #endif if( pkt->unpack( "%f", &recv_float ) == -1 ) { fprintf(stderr, "BE: stream::unpack(%%f) failure\n"); success=false; } if( stream->send( tag, "%f", recv_float ) == -1 ) { fprintf(stderr, "BE: stream::send(%%f) failure\n"); success=false; } break; case PROT_DOUBLE: #if defined(DEBUG) fprintf( stderr, "BE: Processing PROT_DOUBLE ...\n"); #endif if( pkt->unpack( "%lf", &recv_double ) == -1 ) { fprintf(stderr, "BE: stream::unpack(%%lf) failure\n"); success=false; } if( stream->send( tag, "%lf", recv_double ) == -1 ) { fprintf(stderr, "BE: stream::send(%%lf) failure\n"); success=false; } break; case PROT_STRING: #if defined(DEBUG) fprintf( stderr, "BE: Processing PROT_STRING ...\n"); #endif if( pkt->unpack( "%s", &recv_string ) == -1 ) { fprintf(stderr, "BE: stream::unpack(%%s) failure\n"); success=false; } if( stream->send( tag, "%s", recv_string ) == -1 ) { fprintf(stderr, "BE: stream::send(%%s) failure\n"); success=false; } if( stream->flush() == -1){ fprintf(stderr, "BE: stream::flush() failure\n"); return -1; } free(recv_string); break; case PROT_ALL: #if defined(DEBUG) fprintf( stderr, "BE: Processing PROT_ALL ...\n"); #endif if( pkt->unpack( "%c %uc %hd %uhd %d %ud %ld %uld %f %lf %s", &recv_char, &recv_uchar, &recv_short, &recv_ushort, &recv_int, &recv_uint, &recv_long, &recv_ulong, &recv_float, &recv_double, &recv_string ) == 1 ) { fprintf(stderr, "BE: stream::unpack(all) failure\n"); success = false; } if( stream->send( tag, "%c %uc %hd %uhd %d %ud %ld %uld %f %lf %s", recv_char, recv_uchar, recv_short, recv_ushort, recv_int, recv_uint, recv_long, recv_ulong, recv_float, recv_double, recv_string ) == 1 ) { fprintf(stderr, "BE: stream::send(all) failure\n"); success=false; } break; case PROT_EXIT: #if defined(DEBUG) fprintf( stderr, "BE: Processing PROT_EXIT ...\n"); #endif break; default: fprintf(stderr, "BE: Unknown Protocol: %d\n", tag); exit(-1); } if( tag != PROT_EXIT ) { if( stream->flush() == -1){ fprintf(stderr, "BE: stream::flush() failure\n"); return -1; } } } while( tag != PROT_EXIT ); // FE delete net will shut us down, wait for it net->waitfor_ShutDown(); if( net != NULL ) delete net; return 0; }
int main(int argc, char **argv) { int send_val=32; int recv_val; int tag, retval; PacketPtr p; if( (argc != 4) && (argc != 5) ){ fprintf(stderr, "Usage: %s <topology file> <so_file> <num BEs>\n", argv[0]); exit(-1); } const char * topology_file = argv[1]; const char * so_file = argv[2]; const char * dummy_argv=NULL; FILE * structureFile; structureFile = fopen ("/home/usw/Install/sight/sight/sight/mrnet/bin/mrnet.Attrib/structure","ab+"); if (structureFile!=NULL) { #ifdef DEBUG_ON printf("OUT File opened.. \n"); #endif } int nets = 1; int num_backends = 1; if( argc == 4 ){ num_backends = atoi( argv[3] ); } int n = 0; while( n++ < nets ) { saw_failure = false; if( nets > 1 ) fprintf(stdout, "\n\n---------- Network Instance %d ----------\n\n", n); #ifdef DEBUG_ON fprintf(stdout, "PID: %d top : %s , BE : %s dummy : %s num BEs : %d \n", getpid(), topology_file, "no initialized BE", dummy_argv, num_backends); #endif // If backend_exe (2nd arg) and backend_args (3rd arg) are both NULL, // then all nodes specified in the topology are internal tree nodes. Network * net = Network::CreateNetworkFE( topology_file, NULL, NULL ); if( net->has_Error() ) { net->perror("Network creation failed!!!"); exit(-1); } if( ! net->set_FailureRecovery(false) ) { fprintf( stdout, "Failed to disable failure recovery\n" ); delete net; return -1; } bool cbrett = net->register_EventCallback( Event::TOPOLOGY_EVENT, TopologyEvent::TOPOL_ADD_BE, BE_Add_Callback, NULL ); if(cbrett == false) { fprintf( stdout, "Failed to register callback for back-end add topology event\n"); delete net; return -1; } cbrett = net->register_EventCallback( Event::TOPOLOGY_EVENT, TopologyEvent::TOPOL_REMOVE_NODE, BE_Remove_Callback, NULL ); if(cbrett == false) { fprintf( stdout, "Failed to register callback for back-end remove topology event\n"); delete net; return -1; } // Query net for topology object NetworkTopology * topology = net->get_NetworkTopology(); std::vector< NetworkTopology::Node * > internal_leaves; topology->get_Leaves(internal_leaves); topology->print(stdout); //print leaf info printLeafInfo(internal_leaves, num_backends); // Wait for backends to attach unsigned int waitfor_count = num_backends; fprintf( stdout, "Please start backends now.\n\nWaiting for %u backends to connect\n", waitfor_count ); fflush(stdout); unsigned curr_count = 0; do { sleep(1); locker.set_mutex_lock(&cb_lock); curr_count = num_attach_callbacks; locker.set_mutex_unlock(&cb_lock); fprintf( stdout, " %d backends have been attached!\n", curr_count); } while( curr_count != waitfor_count ); #ifdef DEBUG_ON fprintf( stdout, "All %u backends have attached!\n", waitfor_count); #endif // Make sure path to "so_file" is in LD_LIBRARY_PATH int filter_id = net->load_FilterFunc( so_file, "SightStreamAggregator" ); if( filter_id == -1 ){ fprintf( stderr, "Network::load_FilterFunc() failure\n" ); delete net; return -1; } // A Broadcast communicator contains all the back-ends Communicator * comm_BC = net->get_BroadcastCommunicator( ); // Create a stream that will use the Integer_Add filter for aggregation Stream * add_stream = net->new_Stream( comm_BC, filter_id, // TFILTER_SUM, // SFILTER_WAITFORALL ); SFILTER_DONTWAIT ); int num_backends2 = int(comm_BC->get_EndPoints().size()); // Broadcast a control message to back-ends to send us "num_iters" // waves of integers tag = PROT_CONCAT; //total number of waves are calculated using --> total number of integers we like to send / number of intergers per wave #ifdef DEBUG_ON fprintf( stdout, "preparing to send INIT tags.. num_be : %d\n", num_backends2); #endif unsigned int num_iters= TOTAL_STREAM_SIZE / TOTAL_PACKET_SIZE; if( add_stream->send( tag, "%d %d", send_val, num_iters ) == -1 ){ fprintf( stderr, "stream::send() failure\n" ); return -1; } if( add_stream->flush( ) == -1 ){ fprintf( stderr, "stream::flush() failure\n" ); return -1; } #ifdef DEBUG_ON fprintf( stdout, "INIT tag flush() done... \n"); #endif /* * Main loop where merged output is recieved from child nodes * */ char* recv_Ar; unsigned length; int total_len = 0 ; // We expect "num_iters" aggregated responses from all back-ends while(true){ retval = add_stream->recv(&tag, p); #ifdef DEBUG_ON fprintf(stdout, "\n[FE: STREM->recv done ; retval : %d] \n", retval); #endif if(tag == PROT_END_PHASE){ //print any stream coming with protocol end phase #ifdef DEBUG_ON fprintf(stdout, "FE: Iteration PROTOCOL END SUCCESS %d: \n", 0); #endif if( p->unpack( "%ac", &recv_Ar, &length ) == -1 ){ fprintf( stderr, "PROTOCOL END stream::unpack() failure\n" ); return -1; } total_len+= length; for(int j = 0 ; j < length ; j++){ fprintf(structureFile, "%c", recv_Ar[j]); } fprintf(stdout, "\n[FE: PROTOCOL SUCCESS: Output stored: bytes written => most recent : [%d] total : [%d] ] \n", length, total_len); fflush(structureFile); break; } if( retval == 0 ) { //shouldn't be 0, either error or block for data, unless a failure occured fprintf( stderr, "stream::recv() returned zero\n" ); if( saw_failure ) break; return -1; } if( retval == -1 ) { //recv error fprintf( stderr, "stream::recv() unexpected failure\n" ); if( saw_failure ) break; return -1; } if( p->unpack( "%ac", &recv_Ar, &length ) == -1 ){ fprintf( stderr, "stream::unpack() failure\n" ); return -1; } total_len += length; for(int j = 0 ; j < length ; j++){ fprintf(structureFile, "%c", recv_Ar[j]); } // fprintf(stdout, "\n[FE: Display values done!] \n"); } if( saw_failure ) { fprintf( stderr, "FE: a network process has failed, killing network\n" ); fflush(structureFile); fclose(structureFile); delete net; } else { delete add_stream; fflush(structureFile); fclose(structureFile); // Tell back-ends to exit Stream * ctl_stream = net->new_Stream( comm_BC, TFILTER_MAX, SFILTER_WAITFORALL ); if(ctl_stream->send(PROT_EXIT, "") == -1){ fprintf( stderr, "stream::send(exit) failure\n" ); return -1; } if(ctl_stream->flush() == -1){ fprintf( stderr, "stream::flush() failure\n" ); return -1; } retval = ctl_stream->recv(&tag, p); if( retval == -1){ //recv error fprintf( stderr, "stream::recv() failure\n" ); return -1; } delete ctl_stream; if( tag == PROT_EXIT ) { // The Network destructor will cause all internal and leaf tree nodes to exit delete net; break; } } } return 0; }