void be_a_slave(int argc, char** argv, struct mw_api_spec *f) { int p; // parse command line arg for success probability if (argc == 3) { float temp = atof(argv[2]); if (temp > .0 && temp < 1.) p = temp; } int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); DEBUG_PRINT(("Seeded srand with %u", (unsigned) time(NULL) + rank)); srand((unsigned)time(NULL) + rank); mw_work_t work; MPI_Request request_master, request_sup; MPI_Status status_master, status_sup; int flag_master = 0, flag_sup = 0; mw_result_t * computedResult; MPI_Irecv(&work, f->work_sz, MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &request_master); MPI_Irecv(&work, f->work_sz, MPI_CHAR, 1, MPI_ANY_TAG, MPI_COMM_WORLD, &request_sup); int master_failed = 0; while(1) { MPI_Test(&request_master, &flag_master, &status_master); MPI_Test(&request_sup, &flag_sup, &status_sup); if ((flag_master || flag_sup) && (status_master.MPI_TAG == KILL_TAG || status_sup.MPI_TAG == KILL_TAG)) return; if (!master_failed && flag_sup && (status_sup.MPI_TAG == M_FAIL_TAG)) { //printf("MASTER FAILED\n"); master_failed = 1; MPI_Irecv(&work, f->work_sz, MPI_CHAR, 1, MPI_ANY_TAG, MPI_COMM_WORLD, &request_sup); } if (!master_failed && flag_master && status_master.MPI_TAG == WORK_TAG) { computedResult = f->compute(&work); F_Send(computedResult, f->res_sz, MPI_CHAR, 0, WORK_TAG, MPI_COMM_WORLD, rank); MPI_Irecv(&work, f->work_sz, MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &request_master); } else if (master_failed && flag_sup && status_sup.MPI_TAG == WORK_TAG) { //printf("MASTER FAILED SENDING RESULT\n"); computedResult = f->compute(&work); F_Send(computedResult, f->res_sz, MPI_CHAR, 1, WORK_TAG, MPI_COMM_WORLD, rank); MPI_Irecv(&work, f->work_sz, MPI_CHAR, 1, MPI_ANY_TAG, MPI_COMM_WORLD, &request_sup); } } }
void be_a_slave(int argc, char** argv, struct mw_api_spec *f) { mw_work_t work; mw_result_t * computedResult; //int ping = 1; MPI_Status status; // parse command line arg for success probability if (argc == 3) { float temp = atof(argv[2]); if (temp > .0 && temp < 1.) p = temp; } int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); DEBUG_PRINT(("Seeded srand with %u", (unsigned) time(NULL) + rank)); srand((unsigned)time(NULL) + rank); MPI_Request request_master_fail; MPI_Status status_master_fail; int flag_master_fail = 0; int master_fail = 0; MPI_Irecv(&master_fail, 1, MPI_INT, 1, M_FAIL_TAG, MPI_COMM_WORLD, &request_master_fail); while(1) { MPI_Test(&request_master_fail, &flag_master_fail, &status_master_fail); // don't Irecv again because only need to recv once if (flag_master_fail) { MPI_Recv(&work, f->work_sz, MPI_CHAR, 1, MPI_ANY_TAG, MPI_COMM_WORLD, &status); if(status.MPI_TAG == KILL_TAG) { return; } computedResult = f->compute(&work); F_Send(computedResult, f->res_sz, MPI_CHAR, 1, WORK_TAG, MPI_COMM_WORLD, rank); } else { MPI_Recv(&work, f->work_sz, MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); if(status.MPI_TAG == KILL_TAG) { return; } computedResult = f->compute(&work); F_Send(computedResult, f->res_sz, MPI_CHAR, 0, WORK_TAG, MPI_COMM_WORLD, rank); } } }
void slave(struct mw_api_spec *f, int sz, int myid) { // Slave Code mw_work_t *my_work = (mw_work_t *) malloc(f->work_sz); mw_result_t **res = (mw_result_t **) malloc(sizeof(mw_result_t *)); int curr_master = 0; // Dynamic int message[3]; int status[2]; message[0] = myid; while(1) { // Send Request for work message[1] = REQUEST; int work_index; F_Send(message, 3, MPI_INT, curr_master, MESSAGE_CHANNEL, MPI_COMM_WORLD, slave_fail_p); // Receive current status if(!Timeout_recv(status, 2, MPI_INT, curr_master, STATUS_CHANNEL, MPI_COMM_WORLD, MPI_STATUS_IGNORE, 1.0)){ curr_master = 1; continue; } if(status[0] == MOVEON){ work_index = status[1]; } else { break; } if(!Timeout_recv(my_work, 1, f->work_type, curr_master, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, 1.0)) { curr_master = 1; continue; } // Compute int size = f->compute(my_work, res); // Send request for send back the result message[1] = RECEIVE; message[2] = work_index; F_Send(message, 3, MPI_INT, curr_master, MESSAGE_CHANNEL, MPI_COMM_WORLD, slave_fail_p); F_Send(&size, 1, MPI_INT, curr_master, 1, MPI_COMM_WORLD, slave_fail_p); F_Send(*res, size, f->result_type, curr_master, 2, MPI_COMM_WORLD, slave_fail_p); } }
void be_a_slave(int argc, char** argv, struct mw_api_spec *f) { mw_work_t work; mw_result_t * computedResult; //int ping = 1; MPI_Status status; // parse command line arg for success probability if (argc == 3) { float temp = atof(argv[2]); if (temp > .0 && temp < 1.) p = temp; } int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); DEBUG_PRINT(("Seeded srand with %u", (unsigned) time(NULL) + rank)); srand((unsigned)time(NULL) + rank); while(1) { // recv unit of work from master MPI_Recv(&work, f->work_sz, MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); if(status.MPI_TAG == KILL_TAG) { return; } // check for kill signal for non-blocking recv computedResult = f->compute(&work); //DEBUG_PRINT(("Result computed!")); // send unit of work to master with probability p F_Send(computedResult, f->res_sz, MPI_CHAR, 0, WORK_TAG, MPI_COMM_WORLD, rank); //DEBUG_PRINT(("result sent")); // send ping after unit of work is possibly sent //MPI_Send(&ping, 1, MPI_INT, 1, WORK_SUP_TAG, MPI_COMM_WORLD); } }
void master(struct mw_api_spec *f, int sz, int myid){ // Master Code mw_work_t** work_list; // Dynamic int curr; int message[3]; // 0 for dest, 1 for message, 2 for result index int status[2]; // 0 for status, 1 for work index int dest = 0; mw_result_t** result_list; int *res_size = (int *)malloc((WORK_NUM) * sizeof(int)); result_list = (mw_result_t **) malloc((WORK_NUM) * sizeof(mw_result_t *)); std::unordered_set<int> result_set; std::unordered_set<int> waiting_list; std::queue<int> work_queue; double *work_start_time = (double *)malloc((WORK_NUM) * sizeof(double)); for(int i = 0;i < WORK_NUM; i++){ work_queue.push(i); } work_list = f->create(WORK_NUM); if(myid != 0) { message[0] = myid; message[1] = MASTERCHECK; while(1) { MPI_Isend(message, 3, MPI_INT, 0, MESSAGE_CHANNEL, MPI_COMM_WORLD, &request); MPI_Wait(&request, &mpi_status); if(!Timeout_recv(status, 2, MPI_INT, 0, MESSAGE_CHANNEL, MPI_COMM_WORLD, MPI_STATUS_IGNORE, timeout)) { //MPI_Barrier(MPI_COMM_WORLD); break; } else{ if(status[0] != MOVEON) { return; } } } } while(1) { //Receive a message and do correspond operation. int flag; if(!Timeout_recv(message, 3, MPI_INT, MPI_ANY_SOURCE, MESSAGE_CHANNEL, MPI_COMM_WORLD, MPI_STATUS_IGNORE, 3.0)) { printf("%d Master No requests from slaves.\n", myid); break; } dest = message[0]; // Send a piece of work if(message[1] == REQUEST) { printf("%d Master get request from %d.\n", myid, dest); if(result_set.size() != WORK_NUM) { int work_index = work_queue.front(); work_queue.pop(); status[0] = MOVEON; status[1] = work_index; if(myid == 0){ F_Send(status, 2, MPI_INT, dest, STATUS_CHANNEL, MPI_COMM_WORLD, master_fail_p); } else { MPI_Send(status, 2, MPI_INT, dest, STATUS_CHANNEL, MPI_COMM_WORLD); } mw_work_t *temp = *(work_list + work_index); if(myid == 0){ F_Send(temp, 1, f->work_type, dest, 0, MPI_COMM_WORLD, master_fail_p); } else { MPI_Send(temp, 1, f->work_type, dest, 0, MPI_COMM_WORLD); } waiting_list.insert(work_index); work_start_time[work_index] = MPI_Wtime(); } else { status[0] = SHUTDOWN; F_Send(status, 2, MPI_INT, dest, STATUS_CHANNEL, MPI_COMM_WORLD, master_fail_p); } } // Receive a piece of result if(message[1] == RECEIVE) { printf("%d Master receive result from %d.\n", myid, dest); int result_index = message[2]; int *curr_size = res_size + result_index; if(!Timeout_recv(curr_size, 1, MPI_INT, dest, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, timeout)){ continue; } //printf("received size:%d\n", *curr_size); *(result_list + result_index) = (mw_result_t *) malloc((*curr_size) * f->res_sz); //printf("2 Flag: %d\n", flag); if(!Timeout_recv(*(result_list + result_index), *curr_size, f->result_type, dest, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE, timeout)) { continue; } waiting_list.erase(result_index); result_set.insert(result_index); } if(myid == 0 && message[1] == MASTERCHECK) { status[0] = MOVEON; printf("MASTERCHECK\n"); F_Send(status, 2, MPI_INT, dest, STATUS_CHANNEL, MPI_COMM_WORLD, master_fail_p); } double curr_time = MPI_Wtime(); //printf("Check waiting list.\n"); for(auto iter = waiting_list.begin(); iter != waiting_list.end(); ++iter) { int curr_index = *iter; double interval = curr_time - work_start_time[curr_index]; if(interval > timeout) { work_queue.push(curr_index); waiting_list.erase(curr_index); } } } if(result_set.size() == WORK_NUM){ f->result(WORK_NUM, res_size, result_list); } else if(myid == 0){ printf("Don't exist any active slaves. Tasks are aborted! Please restart!\n"); } printf("Master Exit!\n"); }