int main(int argc, char *argv[]) { double t1, t2; Node root; StealStack *ss; /* initialize stealstacks and comm. layer */ ss = ss_init(&argc, &argv); /* determine benchmark parameters */ uts_parseParams(argc, argv); /* Initialize trace collection structures */ ss_initStats(ss); /* show parameter settings */ if (ss_get_thread_num() == 0) { uts_printParams(); } fflush(NULL); // Workers will return 1 from ss_start(), all others (managers) // will return 0 here once the computation ends if (ss_start(sizeof(Node), chunkSize)) { /* initialize root node and push on thread 0 stack */ if (ss_get_thread_num() == 0) { uts_initRoot(&root, type); #ifdef TRACE ss_markSteal(ss, 0); // first session is own "parent session" #endif ss_put_work(ss, &root); } /* time parallel search */ t1 = uts_wctime(); parTreeSearch(ss); t2 = uts_wctime(); ss->walltime = t2 - t1; #ifdef TRACE ss->startTime = t1; ss->sessionRecords[SS_IDLE][ss->entries[SS_IDLE] - 1].endTime = t2; #endif } ss_stop(); /* display results */ showStats(); ss_finalize(); return 0; }
void doWorkQueueManager(int size, StealStack *s) { MPI_Request request[size*3]; //make one array so we can do a Waitall on all comm MPI_Request *req_make_global = &request[0]; MPI_Request *req_work_request = &request[size]; MPI_Request *req_response = &request[2*size]; MPI_Status request_status; //, send_status; MPI_Status wait_all_status[3*size]; void *shared_work_buf[size]; unsigned long work_request_buf[size]; int flag, who, i; struct waiting_entry waiting[size]; unsigned long timestamps[size]; unsigned long msg_counts[size]; int work_response_send_count=0; int done=0; #ifdef TRACE_RELEASES /* Track releases */ ss_setState(s, SS_WORK); #else /* Attribute the WQM's time to overhead */ ss_setState(s, SS_WORK); ss_setState(s, SS_IDLE); #endif /* Init the receieve buffers */ for(i = 0; i < size; i++) { waiting[i].flag = 0; /*init waiting to not waiting*/ waiting[i].buf = NULL; /*init waiting to not waiting*/ timestamps[i] = 0; msg_counts[i] = 0; shared_work_buf[i] = malloc(s->work_size*s->chunk_size); } /* Setup non-block recieves for communicating with workers */ for(i=0; i < size; i++) { /* Listen for work releases */ MPI_Irecv(shared_work_buf[i], s->work_size*s->chunk_size, MPI_BYTE, i, MPI_MAKEWORKGLOBAL_TAG, MPI_COMM_WORLD, &req_make_global[i]); /* Listen for work requests (A WORKREQUEST should be the chunksize requested) */ MPI_Irecv(&work_request_buf[i], 1, MPI_LONG, i, MPI_WORKREQUEST_TAG, MPI_COMM_WORLD, &req_work_request[i]); } /** BEGIN WORK MANAGEMENT LOOP */ while(!done) { /* Wait for someone to send work or to request work */ MPI_Waitany(2*size, request, &who, &request_status); /* Sending shared work to the queue */ if(who < size) { void *w = malloc(s->work_size*s->chunk_size); #ifdef TRACE_RELEASES /* Mark this release as a "steal" event */ ss_markSteal(s, who); ss_setState(s, SS_SEARCH); ss_setState(s, SS_WORK); #endif /* Update timestamp */ msg_counts[who]++; memcpy(w, shared_work_buf[who], s->work_size*s->chunk_size); deq_pushFront(globalQueue, w); s->globalWork += s->chunk_size; MPI_Irecv(shared_work_buf[who], s->work_size*s->chunk_size, MPI_BYTE, who, MPI_MAKEWORKGLOBAL_TAG, MPI_COMM_WORLD, &req_make_global[who]); } /* Requesting shared work from the queue */ else { // (who >= size) who -= size; /* mark this id is waiting for work */ waiting[who].flag = 1; /* Update timestamp */ msg_counts[who]++; timestamps[who] = work_request_buf[who]; /* This should be an invariant.. */ if (timestamps[who] < msg_counts[who]) { ss_error("WQM: message delivery failure!\n", 10); } MPI_Irecv(&work_request_buf[who], 1, MPI_LONG, who, MPI_WORKREQUEST_TAG, MPI_COMM_WORLD, &req_work_request[who]); } /* finish last round of sends before start to send more data */ if (work_response_send_count) { MPI_Waitall(work_response_send_count, req_response, wait_all_status); // Free all the buffers used in the last round for (i = 0; i < size; i++) { if (waiting[i].buf != NULL) { free(waiting[i].buf); waiting[i].buf = NULL; } } } /* Attempt to send work to everyone who is waiting. */ work_response_send_count = 0; for (i = 0; i < size; i++) { if (waiting[i].flag && !deq_isEmpty(globalQueue)) { void* work_ptr = deq_popFront(globalQueue); MPI_Isend(work_ptr, s->work_size*s->chunk_size, MPI_BYTE, i, MPI_RESPONDWORK_TAG, MPI_COMM_WORLD, &req_response[work_response_send_count]); work_response_send_count++; s->globalWork -= s->chunk_size; waiting[i].flag = 0; waiting[i].buf = work_ptr; } } /** Check for termination **/ /* If everyone is still waiting and there are no outstanding messages then we are done. */ done = 1; for(i=0; i < size; i++) { if(!waiting[i].flag || (msg_counts[i] != timestamps[i])) { done=0; break; //no need to check everyone else } } /* Sanity check */ if(done && !deq_isEmpty(globalQueue)) { ss_error("WQM: Something evil happened. We are terminating but I still have work!", 13); } } /* END: while (!done) */ if (DEBUG & 2) printf("Queue Manager: We are done. Letting everyone know.\n"); /* This is a sanity test to make sure our prioritazation above works. If this testany were to return true, the cancels below would error out. */ MPI_Testany(2*size, request, &who, &flag, &request_status); if (flag) { ss_error("WQM: Attempted to terminate with inbound work!", 13); } /* Cancel the outstanding MPI_Irecvs */ for (i = 0; i < size; i++) { MPI_Cancel(&req_make_global[i]); MPI_Cancel(&req_work_request[i]); } /* send a msg to everyone that no work exists, everyone should be waiting on an MPI_recv here */ work_response_send_count = 0; for(i=0; i < size; i++) { MPI_Isend(NULL, 0, MPI_BYTE, i, MPI_RESPONDWORK_TAG, MPI_COMM_WORLD, &req_response[i]); work_response_send_count++; } MPI_Waitall(work_response_send_count, req_response, wait_all_status); ss_setState(s, SS_IDLE); }
/** * ensure local work exists, find it if it doesnt * returns process id where work is stolen from if no can be found locally * returns -1 if no local work exists and none could be stolen **/ int ensureLocalWork(StealStack *s) { int work_queue_id = getWorkQueueId(); int work_rcv; void *work_ptr; StealStackNode *node; MPI_Status status; if (s->localWork < 0) ss_error("ensureLocalWork(): localWork count is less than 0!", 2); /* If no more work */ if (s->localWork == 0) { ss_setState(s, SS_SEARCH); work_ptr = malloc(s->work_size*s->chunk_size); node = (StealStackNode*)malloc(sizeof(StealStackNode)); if (!work_ptr || !node) ss_error("ensureLocalWork(): Out of virtual memory.", 10); /* Send a work request - will block until work is available or program terminates. */ #ifdef NONBLOCK MPI_Wait(&rls_handle, &status); #endif ++msg_counter; // Increase our timestamp MPI_Send(&msg_counter, 1, MPI_LONG, work_queue_id, MPI_WORKREQUEST_TAG, MPI_COMM_WORLD); MPI_Recv(work_ptr, s->work_size*s->chunk_size, MPI_BYTE, work_queue_id, MPI_RESPONDWORK_TAG, MPI_COMM_WORLD, &status); // FIXME: Safe to assume chunk is full? node->head = s->chunk_size; node->work = work_ptr; // FIXME: Should we check the tag instead? MPI_Get_count(&status, MPI_BYTE, &work_rcv); if (work_rcv == 0) { /* No more work, time to terminate */ if (DEBUG & 2) printf("Thread %d terminating\n", comm_rank); fflush(NULL); return -1; } else if (work_rcv != s->work_size * s->chunk_size) { ss_error("ensureLocalWork(): Work received size does not equal chunk size", 10); } ctrk_get(comm_rank, work_ptr); s->nSteal++; s->localWork += s->chunk_size; #ifdef TRACE ss_markSteal(s, getWorkQueueId()); #endif /* Push stolen work onto the back of the queue */ deq_pushBack(localQueue, node); return work_queue_id; } return 0; //local work already exists }