/** * Callback to signal successfull startup of the controller process * * @param cls the handle to the slave whose status is to be found here * @param cfg the configuration with which the controller has been started; * NULL if status is not #GNUNET_OK * @param status #GNUNET_OK if the startup is successfull; #GNUNET_SYSERR if not, * GNUNET_TESTBED_controller_stop() shouldn't be called in this case */ static void slave_status_cb (void *cls, const struct GNUNET_CONFIGURATION_Handle *cfg, int status) { struct Slave *slave = cls; struct LinkControllersContext *lcc; lcc = slave->lcc; if (GNUNET_SYSERR == status) { slave->controller_proc = NULL; /* Stop all link controller forwarding tasks since we shutdown here anyway and as these tasks they depend on the operation queues which are created through GNUNET_TESTBED_controller_connect() and in kill_slave() we call the destructor function GNUNET_TESTBED_controller_disconnect() */ GST_free_lcf (); kill_slave (slave); destroy_slave (slave); slave = NULL; LOG (GNUNET_ERROR_TYPE_WARNING, "Unexpected slave shutdown\n"); GNUNET_SCHEDULER_shutdown (); /* We too shutdown */ goto clean_lcc; } slave->controller = GNUNET_TESTBED_controller_connect (GST_host_list[slave->host_id], EVENT_MASK, &slave_event_cb, slave); if (NULL != slave->controller) { send_controller_link_response (lcc->client, lcc->operation_id, cfg, NULL); } else { send_controller_link_response (lcc->client, lcc->operation_id, NULL, "Could not connect to delegated controller"); kill_slave (slave); destroy_slave (slave); slave = NULL; } clean_lcc: if (NULL != lcc) { if (NULL != lcc->client) { GNUNET_SERVICE_client_continue (lcc->client); lcc->client = NULL; } GNUNET_free (lcc); } if (NULL != slave) slave->lcc = NULL; }
size_t EscapedSlavesEvent::normal() { // following constants may be tweaked to change game mechanics const size_t ESCAPED_PERCENTAGE = 10; size_t escaped_count = _raid_slaves.size() * MAX_STAT_VALUE / ESCAPED_PERCENTAGE; for (size_t i = 0; i < escaped_count; ++i) { kill_slave(); } return EO_NORMAL; }
void fatalx(char *fmt, ...) { va_list ap; va_start(ap, fmt); vsyslog(LOG_ERR, fmt, ap); va_end(ap); kill_slave("fatal error"); _exit(0); }
/* * Send data over a socket and exit if something fails. */ void send_data(int sock, void *buf, size_t len) { ssize_t n; size_t pos = 0; char *ptr = buf; while (len > pos) { switch (n = write(sock, ptr + pos, len - pos)) { case 0: kill_slave("write failure"); _exit(0); /* NOTREACHED */ case -1: if (errno != EINTR && errno != EAGAIN) fatalx("send_data: %m"); break; default: pos += n; } } }
/** * Cleans up the slave list */ void GST_slave_list_clear () { struct Slave *slave; unsigned int id; for (id = 0; id < GST_slave_list_size; id++) { slave = GST_slave_list[id]; if (NULL == slave) continue; kill_slave (slave); } for (id = 0; id < GST_slave_list_size; id++) { slave = GST_slave_list[id]; if (NULL == slave) continue; destroy_slave (slave); } GNUNET_free_non_null (GST_slave_list); GST_slave_list = NULL; }
void do_master_stuff(int argc, char ** argv, struct mw_api_spec *f) { DEBUG_PRINT(("master starting")); int number_of_nonslaves = 3; int number_of_slaves; MPI_Comm_size(MPI_COMM_WORLD, &number_of_slaves); number_of_slaves = number_of_slaves - number_of_nonslaves; // needed for F_Send int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); DEBUG_PRINT(("Seeded srand with %u", (unsigned) time(NULL) + rank)); srand((unsigned)time(NULL) + rank); LinkedList * work_list; double start, end, start_create, end_create, start_results, end_results; start = MPI_Wtime(); DEBUG_PRINT(("creating work list...")); start_create = MPI_Wtime(); // save work_array separately so we can find index later on mw_work_t ** work_array = f->create(argc, argv); work_list = listFromArray(work_array); end_create = MPI_Wtime(); DEBUG_PRINT(("created work in %f seconds!", end_create - start_create)); int slave=1, num_work_units=0; num_work_units = get_total_units(work_array); mw_result_t * received_results = malloc(f->res_sz * num_work_units); if (received_results == NULL) { fprintf(stderr, "ERROR: insufficient memory to allocate received_results\n"); exit(0); } int num_results_received = 0; // make array keeping track of pointers for work that's active LinkedList* assignment_ptrs[number_of_slaves]; // create array of start times double assignment_time[number_of_slaves]; // create array indicating if slaves are down int are_you_down[number_of_slaves]; // current pointer LinkedList * next_work_node = work_list, * list_end = NULL; // have supervisor so starting at number_of_nonslaves for(slave=number_of_nonslaves; slave<(number_of_slaves+number_of_nonslaves); ++slave) { are_you_down[slave-number_of_nonslaves] = 0; //slaves are all working in the beginning DEBUG_PRINT(("assigning work to slave")); if(next_work_node == NULL) { DEBUG_PRINT(("reached the end of the work, breaking!")); break; } mw_work_t * work_unit = next_work_node->data; send_to_slave(work_unit, f->work_sz, MPI_CHAR, slave, WORK_TAG, MPI_COMM_WORLD); // save next_work_node to assigned work assignment_ptrs[slave-number_of_nonslaves] = next_work_node; assert(assignment_ptrs[slave-number_of_nonslaves] != NULL); // save start time assignment_time[slave-number_of_nonslaves] = MPI_Wtime(); // update next_work_node if(next_work_node->next == NULL) { list_end = next_work_node; } next_work_node=next_work_node->next; DEBUG_PRINT(("work sent to slave")); } // send time array to supervisor DEBUG_PRINT(("Sending supervisor first time update")); MPI_Send(assignment_time, number_of_slaves, MPI_DOUBLE, 1, SUPERVISOR_TAG, MPI_COMM_WORLD); // failure id int failure_id, kill_signal; MPI_Status status_fail, status_res, status_kill; MPI_Request request_fail, request_res, request_kill; int flag_fail = 0, flag_res = 0, flag_kill = 0; // receive failure from supervisor as non-blocking recv MPI_Irecv(&failure_id, 1, MPI_INT, 1, FAIL_TAG, MPI_COMM_WORLD, &request_fail); // receive result from workers as non-blocking recv MPI_Irecv(&received_results[num_results_received], f->res_sz, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &request_res); // receive kill from supervisor as non-blocking recv MPI_Irecv(&kill_signal, 1, MPI_INT, 1, KILL_TAG, MPI_COMM_WORLD, &request_kill); int ping_sup = 0; // send units of work while haven't received all results while(num_results_received < num_work_units) { // send ping to supervisor MPI_Send(&ping_sup, 1, MPI_INT, 1, M_PING_TAG, MPI_COMM_WORLD); // check for flag_fail MPI_Test(&request_fail, &flag_fail, &status_fail); // check for flag_res MPI_Test(&request_res, &flag_res, &status_res); // check for flag_kill MPI_Test(&request_kill, &flag_kill, &status_kill); // send work if have failures or got results if (flag_fail) { DEBUG_PRINT(("received failure from supervisor, process %d", failure_id)); // get work_unit that needs to be reassigned LinkedList * work_unit = assignment_ptrs[failure_id]; if(work_unit != NULL) { DEBUG_PRINT(("Moving assignment at %p to end of the queue", work_unit)); move_node_to_end(work_unit); if(next_work_node == NULL) { next_work_node = work_unit; } assert(next_work_node != NULL); } if(assignment_time[failure_id] == 0.0) { DEBUG_PRINT(("Failure on idle process %d. WTF??", failure_id)); } if(are_you_down[failure_id] == 1) { DEBUG_PRINT(("Failure on a process which is already failed. WTF??")); } are_you_down[failure_id] = 1; //this slave is considered dead :( assignment_ptrs[failure_id] = NULL; assignment_time[failure_id] = 0.0; MPI_Send(assignment_time, number_of_slaves, MPI_DOUBLE, 1, SUPERVISOR_TAG, MPI_COMM_WORLD); flag_fail = 0; // continue to receive failures from supervisor as non-blocking recv MPI_Irecv(&failure_id, 1, MPI_INT, 1, FAIL_TAG, MPI_COMM_WORLD, &request_fail); } int idle_process = -1, i; for(i=0; i<number_of_slaves; ++i) { if(assignment_time[i] == 0.0 && !are_you_down[i]) { idle_process = i; break; } } if(next_work_node != NULL && idle_process > -1) { send_to_slave(next_work_node->data, f->work_sz, MPI_CHAR, idle_process+number_of_nonslaves, WORK_TAG, MPI_COMM_WORLD); assignment_ptrs[idle_process] = next_work_node; assignment_time[idle_process] = MPI_Wtime(); MPI_Send(assignment_time, number_of_slaves, MPI_DOUBLE, 1, SUPERVISOR_TAG, MPI_COMM_WORLD); DEBUG_PRINT(("Gave an assignment to previously idle process %d, assignment at %p", idle_process, next_work_node)); if(next_work_node->next == NULL) { list_end = next_work_node; } next_work_node = next_work_node->next; } if (flag_res) { int worker_number = status_res.MPI_SOURCE-number_of_nonslaves; if(!are_you_down[worker_number]) //If this slave is marked dead, just ignore him { // update number of results received num_results_received++; if(next_work_node == NULL && list_end != NULL && list_end->next != NULL) { DEBUG_PRINT(("Found more work to do, now an idle process can get an assignment")); next_work_node = list_end->next; list_end = NULL; } if(next_work_node != NULL) { // get work_unit mw_work_t* work_unit = next_work_node->data; // send new unit of work send_to_slave(work_unit, f->work_sz, MPI_CHAR, status_res.MPI_SOURCE, WORK_TAG, MPI_COMM_WORLD); // update pointer if(next_work_node->next == NULL) { list_end = next_work_node; } // update work index for new_pid assignment_ptrs[status_res.MPI_SOURCE-number_of_nonslaves] = next_work_node; assert(assignment_ptrs[status_res.MPI_SOURCE-number_of_nonslaves] != NULL); assignment_time[status_res.MPI_SOURCE-number_of_nonslaves] = MPI_Wtime(); // send updated array of times to supervisor MPI_Send(assignment_time, number_of_slaves, MPI_DOUBLE, 1, SUPERVISOR_TAG, MPI_COMM_WORLD); DEBUG_PRINT(("SENT TIME TO SUP")); next_work_node = next_work_node->next; if(next_work_node == NULL) { DEBUG_PRINT(("Reached the end of the work list, should get idle processors after this")); } } else { DEBUG_PRINT(("Worker %d is now idle, I ain't got shit for him to do", worker_number)); assignment_time[worker_number] = 0.0; assignment_ptrs[worker_number] = NULL; assert(!are_you_down[worker_number]); MPI_Send(assignment_time, number_of_slaves, MPI_DOUBLE, 1, SUPERVISOR_TAG, MPI_COMM_WORLD); } } // continue to receive results from workers as non-blocking recv MPI_Irecv(&received_results[num_results_received], f->res_sz, MPI_CHAR, MPI_ANY_SOURCE, WORK_TAG, MPI_COMM_WORLD, &request_res); } if (flag_kill) { return; } } // send kill signal to other processes, including supervisor for(slave=1; slave<number_of_slaves+number_of_nonslaves; ++slave) { DEBUG_PRINT(("Murdering slave")); kill_slave(slave); } start_results = MPI_Wtime(); int err_code = f->result(num_results_received, received_results); end_results = MPI_Wtime(); end = MPI_Wtime(); DEBUG_PRINT(("all %f s\n", end-start)); DEBUG_PRINT(("create %f s\n", end_create-start_create)); DEBUG_PRINT(("process %f s\n", end_results-start_results)); }
void do_supervisor_as_master_stuff(int argc, char ** argv, struct mw_api_spec *f) { DEBUG_PRINT(("supervisor taking over")); int number_of_nonslaves = 2; int number_of_slaves; MPI_Comm_size(MPI_COMM_WORLD, &number_of_slaves); number_of_slaves = number_of_slaves - number_of_nonslaves; DEBUG_PRINT(("NUMBER OF SLAVES %d", number_of_slaves)); /** slave failure detection **/ // keep track of start times //if (assignment_time1 == NULL) //double * assignment_time2 = malloc(sizeof(double)*number_of_slaves); // determine how long each worker took //if (complete_time == NULL) double * complete_time = malloc(sizeof(double)*number_of_slaves); // initialize threshold to 0.1 double threshold = 0.1, tot_time = 0.0, sq_err = 0.0, mean = 0.0, stddev = 0.0; /** end slave failure detection **/ double start, end, start_create, end_create, start_results, end_results; start = MPI_Wtime(); DEBUG_PRINT(("creating work list...")); start_create = MPI_Wtime(); // save work_array separately so we can find index later on mw_work_t ** work_array = f->create(argc, argv); // create work_list later end_create = MPI_Wtime(); DEBUG_PRINT(("created work in %f seconds!", end_create - start_create)); int num_work_units=0; num_work_units = get_total_units(work_array); DEBUG_PRINT(("num_work_units %d\n", num_work_units)); mw_result_t * received_results = calloc(num_work_units, f->res_sz); if (received_results == NULL) { fprintf(stderr, "ERROR: insufficient memory to allocate received_results\n"); exit(0); } int * has_result_array = calloc(num_work_units, sizeof(int)); int num_results_received = 0; /** read through contents of file **/ FILE *file = fopen("recovery.txt","r"); if (file != NULL) //there are results to process { int result_index = 0; char str[1000]; while(fscanf(file, "%d %s", &result_index, str) != EOF) { //printf("%d %s\n", result_index, str); // update received results mw_result_t * result = f->from_str(str); //printf("here\n"); received_results[result_index] = *result; //printf("now here\n"); // update has_results_array has_result_array[result_index] = 1; // update num_results_received num_results_received++; } } DEBUG_PRINT(("num_results_received %d\n", num_results_received)); // create linked list of indices not in the results array LinkedList * work_list = new_linkedlist_node(); LinkedList * next_work_node = work_list; LinkedList * head = work_list; // cycle through has_result_array to find indices not in results array int i; int num_results_needed = 0; for (i = 0; i < num_work_units; i++) { if (has_result_array[i] == 0) { next_work_node->index = i; next_work_node->data = work_array[i]; if (num_results_needed < (num_work_units-num_results_received)-1) addNode(next_work_node); if (next_work_node->next == NULL); next_work_node = next_work_node->next; num_results_needed++; } } DEBUG_PRINT(("num_results_needed %d", num_results_needed)); // reset next_work_node to head next_work_node = head; // tell slaves to send to supervisor now int slave; for(slave=number_of_nonslaves; slave<(number_of_slaves+number_of_nonslaves); ++slave) { DEBUG_PRINT(("Telling slave")); MPI_Send(0, 0, MPI_CHAR, slave, M_FAIL_TAG, MPI_COMM_WORLD); } // make array keeping track of pointers for work that's active LinkedList* assignment_ptrs[number_of_slaves]; // create array of start times double assignment_time[number_of_slaves]; // create array of start times int assignment_indices[number_of_slaves]; // create array indicating if slaves are down int are_you_down[number_of_slaves]; // pointer for end of list LinkedList * list_end = NULL; // have supervisor so starting at number_of_nonslaves for(slave=number_of_nonslaves; slave<(number_of_slaves+number_of_nonslaves); ++slave) { are_you_down[slave-number_of_nonslaves] = 0; //slaves are all working in the beginning DEBUG_PRINT(("assigning work to slave")); if(next_work_node == NULL) { DEBUG_PRINT(("reached the end of the work, breaking!")); break; } mw_work_t * work_unit = next_work_node->data; send_to_slave(work_unit, f->work_sz, MPI_CHAR, slave, WORK_TAG, MPI_COMM_WORLD); // save next_work_node to assigned work assignment_ptrs[slave-number_of_nonslaves] = next_work_node; assert(assignment_ptrs[slave-number_of_nonslaves] != NULL); // save start time assignment_time[slave-number_of_nonslaves] = MPI_Wtime(); // save assignment indices assignment_indices[slave-number_of_nonslaves] = next_work_node->index; // update next_work_node if(next_work_node->next == NULL) { list_end = next_work_node; } next_work_node=next_work_node->next; DEBUG_PRINT(("work %d sent to slave %d", assignment_indices[slave-number_of_nonslaves], slave)); } // no need to send time array to supervisor MPI_Status status_res; MPI_Request request_res; int flag_res = 0; // receive result from workers as non-blocking recv MPI_Irecv(&received_results[num_results_received], f->res_sz, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &request_res); // don't clear out file; will append new results to recovery.txt FILE * fptr; // send units of work while haven't received all results while(num_results_received < num_work_units) { // check for flag_res MPI_Test(&request_res, &flag_res, &status_res); // send work if have failures or got results /** slave failure detection **/ // check if slave has not responded for a long time for(i=0; i<number_of_slaves; i++) { // not failed and not idle if (!are_you_down[i] && assignment_time[i] != 0.0) { if (i == 4) DEBUG_PRINT(("NOT FAILED NOT IDLE rank %d %f",i+2, MPI_Wtime()-assignment_time[i])); if(threshold>0 && MPI_Wtime() - assignment_time[i] > threshold) { DEBUG_PRINT(("methinks someone is slacking of rank %d", i+2)); are_you_down[i] = 1; assignment_time[i] = 0.0; assignment_indices[i] = -1; // get work_unit that needs to be reassigned LinkedList * work_unit = assignment_ptrs[i]; if (work_unit == NULL) DEBUG_PRINT(("work_unit is NULL")); if(work_unit != NULL) { DEBUG_PRINT(("Moving assignment at %p to end of the queue", work_unit)); move_node_to_end(work_unit); if(next_work_node == NULL) { next_work_node = work_unit; } assert(next_work_node != NULL); } } } } /** end slave failure detection **/ // find an idle process to assign work to int idle_process = -1, i; for(i=0; i<number_of_slaves; ++i) { if(assignment_time[i] == 0.0 && !are_you_down[i]) { idle_process = i; break; } } // assign idle process unit of work if(next_work_node != NULL && idle_process > -1) { send_to_slave(next_work_node->data, f->work_sz, MPI_CHAR, idle_process+number_of_nonslaves, WORK_TAG, MPI_COMM_WORLD); /** slave failure detection **/ //a previously idle worker got assigned something DEBUG_PRINT(("Worker of rank %d just got off his lazy ass", i+2)); /** end slave failure detection **/ assignment_ptrs[idle_process] = next_work_node; if (idle_process == 4) DEBUG_PRINT(("changing rank 6 time idle")); assignment_time[idle_process] = MPI_Wtime(); assignment_indices[idle_process] = next_work_node->index; //MPI_Send(assignment_time, number_of_slaves, MPI_DOUBLE, 1, SUPERVISOR_TAG, MPI_COMM_WORLD); DEBUG_PRINT(("Gave an assignment to previously idle process rank %d, assignment at %p", idle_process+number_of_nonslaves, next_work_node)); if(next_work_node->next == NULL) { list_end = next_work_node; } next_work_node = next_work_node->next; } if (flag_res) { int worker_number = status_res.MPI_SOURCE-number_of_nonslaves; DEBUG_PRINT(("Got result from rank %d", worker_number+number_of_nonslaves)); if(!are_you_down[worker_number]) //If this slave is marked dead, just ignore him { // save index and result received to file char * str = f->to_str(received_results[num_results_received]); fptr = fopen("recovery.txt", "a"); fprintf(fptr, "%d %s\n", assignment_indices[worker_number], str); fclose(fptr); // update number of results received num_results_received++; /** slave failure detection **/ //DEBUG_PRINT(("supervisor is impressed by his good worker %d", i)); int i = worker_number; complete_time[i] = MPI_Wtime() - assignment_time[i]; tot_time += complete_time[i]; mean = tot_time/num_results_received; sq_err += pow(complete_time[i] - mean, 2); stddev = sqrt(sq_err/num_results_received); //we have enough data to update threshold if(num_results_received >= number_of_slaves/2) { //DEBUG_PRINT(("the stddev is %f", stddev)); threshold = mean + 10*stddev + 0.1; //DEBUG_PRINT(("the threshold is %f", threshold)); } //assignment_time1[i] = assignment_time2[i]; //found_change = 1; /** end slave failure detection **/ //DEBUG_PRINT(("num results received %d\n", num_results_received)); if(next_work_node == NULL && list_end != NULL && list_end->next != NULL) { DEBUG_PRINT(("Found more work to do, now an idle process can get an assignment")); next_work_node = list_end->next; list_end = NULL; } if(next_work_node != NULL) { // get work_unit mw_work_t* work_unit = next_work_node->data; //DEBUG_PRINT(("Sending new unit of work")); // send new unit of work send_to_slave(work_unit, f->work_sz, MPI_CHAR, status_res.MPI_SOURCE, WORK_TAG, MPI_COMM_WORLD); // update pointer if(next_work_node->next == NULL) { list_end = next_work_node; } // update work index for new_pid assignment_ptrs[status_res.MPI_SOURCE-number_of_nonslaves] = next_work_node; assert(assignment_ptrs[status_res.MPI_SOURCE-number_of_nonslaves] != NULL); if (status_res.MPI_SOURCE == 4) DEBUG_PRINT(("changing process of rank 6 time in res recv")); assignment_time[status_res.MPI_SOURCE-number_of_nonslaves] = MPI_Wtime(); assignment_indices[status_res.MPI_SOURCE-number_of_nonslaves] = next_work_node->index; // send updated array of times to supervisor //MPI_Send(assignment_time, number_of_slaves, MPI_DOUBLE, 1, SUPERVISOR_TAG, MPI_COMM_WORLD); //DEBUG_PRINT(("SENT TIME TO SUP")); next_work_node = next_work_node->next; if(next_work_node == NULL) { DEBUG_PRINT(("Reached the end of the work list, should get idle processors after this")); } } else { DEBUG_PRINT(("Worker of rank %d is now idle, I ain't got shit for him to do", worker_number+2)); if (worker_number == 4) DEBUG_PRINT(("changing processof rank 6 time in else")); assignment_time[worker_number] = 0.0; assignment_ptrs[worker_number] = NULL; assignment_indices[worker_number] = -1; assert(!are_you_down[worker_number]); //MPI_Send(assignment_time, number_of_slaves, MPI_DOUBLE, 1, SUPERVISOR_TAG, MPI_COMM_WORLD); } } // continue to receive results from workers as non-blocking recv MPI_Irecv(&received_results[num_results_received], f->res_sz, MPI_CHAR, MPI_ANY_SOURCE, WORK_TAG, MPI_COMM_WORLD, &request_res); } } // send kill signal to other processes for(slave=number_of_nonslaves; slave<number_of_slaves+number_of_nonslaves; ++slave) { DEBUG_PRINT(("Murdering slave")); kill_slave(slave); } start_results = MPI_Wtime(); int err_code = f->result(num_results_received, received_results); end_results = MPI_Wtime(); end = MPI_Wtime(); DEBUG_PRINT(("all %f s\n", end-start)); DEBUG_PRINT(("create %f s\n", end_create-start_create)); DEBUG_PRINT(("process %f s\n", end_results-start_results)); // remove recovery file since it is no longer useful remove("recovery.txt"); }
void do_master_stuff(int argc, char ** argv, struct mw_api_spec *f) { DEBUG_PRINT("master starting"); int number_of_slaves; MPI_Comm_size(MPI_COMM_WORLD, &number_of_slaves); mw_work_t ** work_list; printf("argc: %d\n", argc); int i; for(i=0; i<argc; ++i) { printf("arg[%d]: %s\n",i, argv[i]); } DEBUG_PRINT("creating work list..."); work_list = f->create(argc, argv); DEBUG_PRINT("created work!"); int slave=1, num_work_units=0; num_work_units = get_total_units(work_list); mw_result_t * received_results = malloc(f->res_sz * num_work_units); if (received_results == NULL) { fprintf(stderr, "ERROR: insufficient memory to allocate received_results\n"); free(received_results); } int num_results_received = 0; for(slave=1; slave<number_of_slaves; ++slave) { DEBUG_PRINT("assigning work to slave"); mw_work_t * work_unit = work_list[i]; i++; if(work_unit == NULL) { DEBUG_PRINT("reached the end of the work, breaking!"); break; } send_to_slave(work_unit, f->work_sz, MPI_CHAR, slave, WORK_TAG, MPI_COMM_WORLD); //MPI_Send(work_unit, f->work_sz, MPI_CHAR, slave, WORK_TAG, MPI_COMM_WORLD); DEBUG_PRINT("work sent to slave"); } while(work_list[i] != NULL) { DEBUG_PRINT("Waiting to receive a result..."); MPI_Status status; MPI_Recv(&received_results[num_results_received], f->res_sz, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); DEBUG_PRINT("Received a result!"); num_results_received++; send_to_slave(work_list[i], f->work_sz, MPI_CHAR, status.MPI_SOURCE, WORK_TAG, MPI_COMM_WORLD); i++; } while(num_results_received < num_work_units) { DEBUG_PRINT("Waiting to receive a result..."); MPI_Status status; MPI_Recv(&received_results[num_results_received], f->res_sz, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); DEBUG_PRINT("Received a result!"); num_results_received++; } DEBUG_PRINT("Received all results!"); for(slave=1; slave<number_of_slaves; ++slave) { DEBUG_PRINT("Murdering slave"); kill_slave(slave); } int err_code = f->result(num_results_received, received_results); }