int main(int argc, char *argv[]) { unsigned int power2[12] = {1,2,4,8,16,32,64,128,256,512,1024,2048}; int numMSG; int dim; int MASTER; /******************* Number of MSGs **********/ numMSG = atoi(argv[1]); dim = atoi(argv[2]); /******************* MPI Init ****************** */ unsigned int rank, size; int MPI_ERR; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); MASTER = size -1 ; /************************** MASTER ******************************/ if (rank == MASTER){ int *msg = (int *)calloc(MSGSIZE, sizeof(int)); int i=0; int numberOfPrints = (size-1) * (numMSG*2); int dest = 0; int next; int doneCounter = 0; int printCounter = 0; int treeDone = 0; makeMessage(msg, MASTER, ZERO, TREE, 0); MPI_Send(&msg[0], MSGSIZE, MPI_INT, ZERO, DATA_TAG, MPI_COMM_WORLD); while(1){ MPI_Recv(&msg[0], MSGSIZE, MPI_INT, ZERO, DATA_TAG, MPI_COMM_WORLD, &status); if(msg[TYPE] == TREEDONE){ treeDone += 1; if(treeDone < 2 ){ makeMessage(msg, MASTER, size-2, INITDONE, 0); MPI_Send(&msg[0], MSGSIZE, MPI_INT, size-2, DATA_TAG, MPI_COMM_WORLD); } } if(msg[TYPE] == PRINT){ printCounter += 1; print(msg, msg[SOURCEORIGINAL], dim, power2); } if(msg[TYPE] == DONE){ doneCounter += 1; } if(doneCounter == (size-1) && printCounter == ( (numMSG * 2) * (size-1) )){ break; } } // Send a STOP messages using Reduce Forward to Zero dest = ZERO; makeMessage(msg, MASTER, dest, REDUCEFORWARD, 0); next = compute_next_dest(rank, dest, power2); MPI_Send(&msg[0], MSGSIZE, MPI_INT, dest, DATA_TAG, MPI_COMM_WORLD); // printf("%d\n", printCounter); } /************************** Workers ******************************/ else{ int i; int x; int dest; int *msg = (int *)calloc(MSGSIZE, sizeof(int)); int *children = (int *)calloc(dim, sizeof(int)); int childCounter = 0; int msgno = 0; unsigned int next; int flag; int msgCounter = 0; int makeMsgCount = 0; int ackCounter = 0; int stopCount = 0; int parent = 0; int parentSent = 0; int parentCounter = 0; int initDone = 0; int forBcastCounter = 0; int bcastCouter = 0; int bSent = 0; int reduceForwardCount = 0; int reduceBackwardCount = 0; int reduceFlag = 1; while(1){ MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status); if(flag == 1){ MPI_Recv(&msg[0] , MSGSIZE, MPI_INT, status.MPI_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); if(rank == ZERO){ if(msg[TYPE] == PRINT){ MPI_Send(&msg[0] , MSGSIZE, MPI_INT, MASTER, DATA_TAG, MPI_COMM_WORLD); } else if(msg[TYPE] == DONE){ MPI_Send(&msg[0] , MSGSIZE, MPI_INT, MASTER, DATA_TAG, MPI_COMM_WORLD); } else if(msg[TYPE] == TREEDONE){ msg[SOURCE] = ZERO; msg[DEST] = MASTER; MPI_Send(&msg[0] , MSGSIZE, MPI_INT, MASTER, DATA_TAG, MPI_COMM_WORLD); } } dest = msg[DEST]; /* If the message is for you */ if(dest == rank){ /* If you got an ACK, increament your counter */ if(msg[TYPE] == ACK){ ackCounter += 1; /* Send for Printing */ msg[SOURCE] = rank; msg[DEST] = ZERO; msg[TYPEORIGINAL] = ACK ; //msg[TYPE]; msg[TYPE] = PRINT; next = compute_next_dest(rank, ZERO, power2); if(rank == ZERO){ MPI_Send(&msg[0] , MSGSIZE, MPI_INT, MASTER, DATA_TAG, MPI_COMM_WORLD); }else{ MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } } /* If you got a message destined to you, send an ACK */ else if(msg[TYPE] == MSG){ /* Send for printing */ msg[SOURCE] = rank; msg[DEST] = ZERO; msg[TYPEORIGINAL] = MSG ; msg[TYPE] = PRINT; next = compute_next_dest(rank, ZERO, power2); if(rank == ZERO){ MPI_Send(&msg[0] , MSGSIZE, MPI_INT, MASTER, DATA_TAG, MPI_COMM_WORLD); }else{ MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } /* Send Ack */ dest = msg[SOURCEORIGINAL]; msgno = msg[MSGNO]; makeMessage(msg, rank, dest, ACK, msgno); next = compute_next_dest(rank, dest, power2); msg[HOPCNT] = 1; msg[SOURCEORIGINAL] = rank; MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } else if(msg[TYPE] == TREE && msg[SOURCE] == ZERO){ parent = findParent(rank, dim); makeMessage(msg, rank, parent, PARENT, msgno); next = compute_next_dest(rank, parent, power2); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); // printf("Rank %d sending to %d \n", rank, msg[DEST]); } else if(msg[TYPE] == TREE && msg[SOURCE] == MASTER){ int layer = (int)(pow(2,dim)) - (((int)(pow(2,dim))) / 2); for(i = 0 ; i < layer ; i++){ makeMessage(msg, ZERO, i, TREE, 0); next = compute_next_dest(ZERO, i, power2); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } } else if(msg[TYPE] == PARENT){ children[childCounter] = msg[SOURCE]; childCounter += 1; if(rank != size-2 && parentSent == 0){ parent = findParent(rank, dim); makeMessage(msg, rank, parent, PARENT, msgno); next = compute_next_dest(rank, parent, power2); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); parentSent = 1; } if(rank == size-2){ parentCounter += 1; if(parentCounter == ((int)pow(2,(dim-1)) - 1)){ dest = ZERO; makeMessage(msg, rank, dest, TREEDONE, msgno); next = compute_next_dest(rank, dest, power2); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } } } else if(msg[TYPE] == STOP){ if(childCounter > 0){ for(i = 0 ; i < childCounter ; i++){ dest = children[i]; makeMessage(msg, rank, dest, STOP, msgno); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, dest, DATA_TAG, MPI_COMM_WORLD); } } break; } else if(msg[TYPE] == INITDONE){ if(childCounter > 0){ for(i = 0 ; i < childCounter ; i++){ dest = children[i]; makeMessage(msg, rank, dest, INITDONE, msgno); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, dest, DATA_TAG, MPI_COMM_WORLD); } } initDone = 1; } /* Only 11..1 gets this */ else if(msg[TYPE] == FORBCAST){ if(childCounter > 0){ for(i = 0 ; i < childCounter ; i++){ dest = children[i]; makeMessage(msg, rank, dest, BCAST, msgno); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, dest, DATA_TAG, MPI_COMM_WORLD); bcastCouter += 1; // printf("Rank %d send bcast type %d to %d - bcounter %d \n", rank, msg[TYPE], msg[DEST], bcastCouter); } // bcastCouter += childCounter; } /* FORBCASTACK immediately to Zero */ makeMessage(msg, rank, ZERO, FORBCASTACK, msgno); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, ZERO, DATA_TAG, MPI_COMM_WORLD); // printf("Rank %d send forbcastack type %d to %d - bcounter %d \n", rank, msg[TYPE], msg[DEST], bcastCouter); } else if(msg[TYPE] == BCAST){ if(childCounter > 0){ for(i = 0 ; i < childCounter ; i++){ dest = children[i]; makeMessage(msg, rank, dest, BCAST, msgno); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, dest, DATA_TAG, MPI_COMM_WORLD); bcastCouter += 1; // printf("Rank %d send bcast type %d to %d - bcounter %d \n", rank, msg[TYPE], msg[DEST], bcastCouter); } } else{ /* BCASTACK to parent only when you received as many acks as you sent bcasts */ makeMessage(msg, rank, parent, BCASTACK, msgno); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, parent, DATA_TAG, MPI_COMM_WORLD); // printf("BCAST ACK from %d to %d \n", rank, msg[DEST]); } } /* Only Zero receives this */ else if(msg[TYPE] == FORBCASTACK){ // printf("Rank %d got Type %d from %d \n",rank, msg[TYPE], msg[SOURCE] ); forBcastCounter -= 1; } else if(msg[TYPE] == BCASTACK){ bcastCouter -= 1; if(bcastCouter == 0 && rank != size-2){ /* BCASTACK to parent only when you received as many acks as you sent bcasts */ makeMessage(msg, rank, parent, BCASTACK, msgno); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, parent, DATA_TAG, MPI_COMM_WORLD); // printf("BCAST ACK from %d to %d \n", rank, msg[DEST]); } } else if(msg[TYPE] == REDUCEFORWARD){ // if(msg[SOURCEORIGINAL] == rank){ // reduceFlag = 0; // printf("Returned to Source : %d - Loop Finished!\n", rank); // } // else{ int msgNumber = msg[MSGNO]; int source = msg[SOURCEORIGINAL]; dest = myNext(rank, dim); makeMessage(msg, rank, dest, REDUCEFORWARD, msgNumber); msg[SOURCEORIGINAL] = source; MPI_Send(&msg[0], MSGSIZE, MPI_INT, dest, DATA_TAG, MPI_COMM_WORLD); reduceFlag = 0; printf("HOP : Rank %d myNext : %d - Message Number : %d - TERMINATED !\n", rank, dest, msg[MSGNO]); break; // } } else if(msg[TYPE] == REDUCEBACKWARD){ if(msg[SOURCEORIGINAL] == rank){ reduceFlag = 0; printf("Returned to Source : %d - Loop Finished!\n", rank); } else{ int msgNumber = msg[MSGNO]; int source = msg[SOURCEORIGINAL]; dest = myPrev(rank, dim); makeMessage(msg, rank, dest, REDUCEBACKWARD, msgNumber); msg[SOURCEORIGINAL] = source; MPI_Send(&msg[0], MSGSIZE, MPI_INT, dest, DATA_TAG, MPI_COMM_WORLD); reduceFlag = 0; printf("HOP : Rank %d myPrev : %d - Message Number : %d \n", rank, dest, msg[MSGNO]); } } } /* If you are not destination */ else{ /* MSG or ACK */ if(msg[TYPE] == MSG || msg[TYPE] == ACK){ /* add to number of hobs */ msg[HOPCNT] += 1; int nHubs = msg[HOPCNT]; msg[(HOPCNT + nHubs)] = rank; next = compute_next_dest(rank, dest, power2); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } /* Pass it along : print, done, stop, parent, parentfound */ else{ next = compute_next_dest(rank, dest, power2); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } } /* if you are done send a DONE message to Zero (forBcastCounter == 0 && bcastCouter == 0 && reduceFlag == 0) */ if(ackCounter == numMSG ){ // printf("Rank %d is DoNe !!!! \n", rank); dest = ZERO; makeMessage(msg, rank, dest, DONE, msgno); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, dest, DATA_TAG, MPI_COMM_WORLD); ackCounter += 1; // printf("Rank %d TYPE %d to %d\n", rank, msg[TYPE], msg[DEST]); } } /* Flag = 0 */ else{ if(initDone == 1){ /* Still have to make your messages */ if(makeMsgCount < numMSG){ dest = randomGenerator(size-1) ; makeMessage(msg, rank, dest, MSG, msgno); next = compute_next_dest(rank, dest, power2); msg[HOPCNT] = 1; msg[SOURCEORIGINAL] = rank; MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); makeMsgCount += 1; } } } } } MPI_Finalize(); return 0; }
bool Slave::checkMessages() { //if (engine.decisionLevel() <= engine.assumptions.size()) return false; double t = wallClockTime(); if (t <= next_check && engine.decisionLevel() > 0) return false; real_time += t; // cpu_time += cpuTime(); int received; //fprintf(stderr, "%d: CheckMessages! \n", thread_no); while (true) { MPI_Iprobe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &received, &s); if (!received) break; switch (s.MPI_TAG) { case INTERRUPT_TAG: MPI_Recv(NULL, 0, MPI_INT, 0, INTERRUPT_TAG, MPI_COMM_WORLD, &s); if (PAR_DEBUG) fprintf(stderr, "%d: Interrupted! %f\n", thread_no, wallClockTime()); real_time -= wallClockTime(); // cpu_time -= cpuTime(); return true; case STEAL_TAG: //fprintf(stderr, "%d asked to split job\n", thread_no); splitJob(); break; case LEARNTS_TAG: //fprintf(stderr, "%d receives learnts\n", thread_no); receiveLearnts(); break; case SOLUTION_TAG: receiveSolution(); break; case INT_SOLUTION_TAG: assert(false && "This version does not support forwarding solution! "); break; case NEW_EXPORT_LIMIT: // Only 1 Integer: New length int newLength; //fprintf(stderr, "Receiving new export limit! \n"); MPI_Recv(&newLength, 1, MPI_INT, 0, NEW_EXPORT_LIMIT, MPI_COMM_WORLD, &s); so.maxClSz = newLength; //fprintf(stderr, "Done, set to %d \n", newLength); break; case NEW_STATE_TAG: int newState; MPI_Recv(&newState, 1, MPI_INT, 0, NEW_STATE_TAG, MPI_COMM_WORLD, &s); if(_state != newState){ if(_state == RUNNING_GREEDY) engine.stop_init_phase = true; _state = newState; } MPI_Bsend(&_state, 1, MPI_INT, 0, SLAVE_NEW_STATE_TAG , MPI_COMM_WORLD); break; default: fprintf(stderr, "assert false!\n"); assert(false); } } if ((++checks%int(report_freq/check_freq) == 0) || unitFound){ unitFound = false; sendReport(); } t = wallClockTime(); if(engine.decisionLevel() > 0) next_check = t + check_freq; real_time -= t; if(engine.decisionLevel() == 0){ for(int i = 0 ; i < storedClauses.size() ; i++){ assert(storedClauses[i].size() > 0); assert(storedClauses[i][0] == tmp_header[i]); // TODO: vec<int> tmp; for(int j = 0 ; j < storedClauses[i].size() ; j++) tmp.push(storedClauses[i][j]); sat.convertToClause(tmp); sat.addLearnt(); } storedClauses.clear(); tmp_header.clear(); } return false; }
void comm_receive(const switchboard_t* sb) { #ifdef WITH_MPI MPI_Status status; status.MPI_ERROR = MPI_SUCCESS; int flag = 0; MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status); if (status.MPI_ERROR != MPI_SUCCESS) mpi_fatal(sb, &status, "Got error while probing for messages"); while(flag) { /* only execute the body if `flag` is true */ int size; MPI_Get_count(&status, MPI_BYTE, &size); void* xa = xmalloc(size); switch(status.MPI_TAG) { case TAG_ROW_SPARSE: { sparse_row_t* new_row = sparse_row_alloc_placed(xa, size); assert(NULL != new_row); MPI_Recv(new_row, size, MPI_BYTE, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status); if (status.MPI_ERROR != MPI_SUCCESS) mpi_fatal(sb, &status, "Got error while receiving sparse row"); # ifndef NDEBUG int recv_size; MPI_Get_count(&status, MPI_BYTE, &recv_size); assert(size == recv_size); # endif assert(is_local(sb, new_row->starting_column_)); vpu_t* vpu = vpu_for_column(sb, new_row->starting_column_); assert(vpu != NULL); vpu_recv_row(vpu, new_row, ROW_SPARSE); break; }; case TAG_ROW_DENSE: { dense_row_t* new_row = dense_row_alloc_placed(xa, size); assert(NULL != new_row); MPI_Recv(new_row, size, MPI_BYTE, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status); if (status.MPI_ERROR != MPI_SUCCESS) mpi_fatal(sb, &status, "Got error while receiving dense row"); # ifndef NDEBUG int recv_size; MPI_Get_count(&status, MPI_BYTE, &recv_size); assert(size == recv_size); # endif assert(is_local(sb, new_row->starting_column_)); vpu_t* vpu = vpu_for_column(sb, new_row->starting_column_); assert(NULL != vpu); vpu_recv_row(vpu, new_row, ROW_DENSE); break; }; case TAG_END: { // "end" message received; no new rows will be coming. // But some other rows could have arrived or could // already be in the `inbox`, so we need to make another // pass anyway. All this boils down to: set a flag, // make another iteration, and end the loop next time. coord_t column; MPI_Recv(&column, 1, MPI_LONG, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status); if (status.MPI_ERROR != MPI_SUCCESS) mpi_fatal(sb, &status, "Got error while receiving sparse END tag"); # ifndef NDEBUG int recv_size; MPI_Get_count(&status, MPI_BYTE, &recv_size); assert(size == recv_size); # endif assert(column >= 0 && is_local(sb, column)); vpu_t* vpu = vpu_for_column(sb, column); assert(NULL != vpu); vpu_end_phase(vpu); break; }; }; // switch(status.MPI_TAG) MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status); if (status.MPI_ERROR != MPI_SUCCESS) mpi_fatal(sb, &status, "Got error while probing for messages"); }; // while(MPI_Iprobe) #endif }
int main(int argc, char** argv) { int ROOT=_ROOT; int ACK_COUNT=0; int MSG_SIZE = _MSG_SIZE; int i,j,k,source,my_rank,num_nodes,my_state,tmpmsg; MPI_Status status; // initialize mpi stuff MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD,&num_nodes); MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); int msg[MSG_SIZE]; for (i=0;i<MSG_SIZE;i++) msg[i] = -1; //build msg int done = 0; int flag = 0; if (ROOT == my_rank) { my_state = R0; while (!done) { tmpmsg = get_random_msg(); for (i=0;i<MSG_SIZE;i++) msg[i] = tmpmsg; //build msg // send msg to nodes for (j=1;j<num_nodes;j++) MPI_Send(&msg,MSG_SIZE,MPI_INT,j,0,MPI_COMM_WORLD); // blocking send, not ideal for efficiency // check for ACK flag=0; MPI_Iprobe(MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&flag,&status); // a non-blocking check for ACK message from nodes in FINAL state if (1 == flag) { source = status.MPI_SOURCE; MPI_Recv(&msg,MSG_SIZE,MPI_INT,source,0,MPI_COMM_WORLD,&status); if (num_nodes-1 == ++ACK_COUNT) ++done; } } } else { my_state = Q0; int done = 0; while (!done) { MPI_Recv(&msg,MSG_SIZE,MPI_INT,ROOT,0,MPI_COMM_WORLD,&status); //printf("Node %d received MSG=%d from Node %d\n",my_rank,msg,ROOT); // react based on msg switch (msg[0]) { // presumably, the first element of the msg array is the same as all other elements case A: if (Q0 == my_state) { my_state = next_state_proc(my_state,msg[0]); printf("Node %d now in state %d\n",my_rank,my_state); } break; case B: if (Q1 == my_state) { my_state = next_state_proc(my_state,msg[0]); printf("Node %d now in state %d\n",my_rank,my_state); } break; case C: if (Q2 == my_state) { my_state = next_state_proc(my_state,msg[0]); printf("Node %d now in FINAL state %d (shutting down...)\n",my_rank,my_state); for (i=0;i<MSG_SIZE;i++) msg[i] = ACK; //build msg MPI_Send(&msg,MSG_SIZE,MPI_INT,ROOT,0,MPI_COMM_WORLD); // blocking send, not ideal for efficiency ++done; } break; } } } MPI_Finalize(); exit(EXIT_SUCCESS); }
void Broker::checkReceivedRanking(int* flag, MPI_Status* status){ MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, flag, status); }
int main(int argc, char *argv[]) { unsigned int power2[12] = {1,2,4,8,16,32,64,128,256,512,1024,2048}; int numMSG; int dim; int MASTER; /******************* Number of MSGs **********/ numMSG = atoi(argv[1]); dim = atoi(argv[2]); /******************* MPI Init ****************** */ unsigned int rank, size; int MPI_ERR; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); MASTER = size -1 ; /************************** MASTER ******************************/ if (rank == MASTER){ int *msg = (int *)calloc(MSGSIZE, sizeof(int)); int i=0; int numberOfPrints = (size-1) * (numMSG*2); int dest = 0; int doneCounter = 0; int printCounter = 0; int treeDone = 0; makeMessage(msg, MASTER, ZERO, TREE, 0); MPI_Send(&msg[0], MSGSIZE, MPI_INT, ZERO, DATA_TAG, MPI_COMM_WORLD); while(1){ MPI_Recv(&msg[0], MSGSIZE, MPI_INT, ZERO, DATA_TAG, MPI_COMM_WORLD, &status); if(msg[TYPE] == TREEDONE){ treeDone = 1; } if(msg[TYPE] == PRINT){ printCounter += 1; print(msg, msg[SOURCEORIGINAL], dim, power2); } if(msg[TYPE] == DONE){ doneCounter += 1; } if(treeDone == 1 && doneCounter == (size-1) && printCounter == ( (numMSG * 2) * (size-1) )){ break; } } // Send a STOP messages to One dest = size-2; makeMessage(msg, MASTER, dest, STOP, 0); MPI_Send(&msg[0], MSGSIZE, MPI_INT, dest, DATA_TAG, MPI_COMM_WORLD); // printf("%d\n", printCounter); } /************************** Workers ******************************/ else{ int i; int x; int dest; int *msg = (int *)calloc(MSGSIZE, sizeof(int)); int *children = (int *)calloc(dim, sizeof(int)); int childCounter = 0; int msgno = 0; unsigned int next; int flag; int msgCounter = 0; int makeMsgCount = 0; int ackCounter = 0; int stopCount = 0; int parent = 0; int parentSent = 0; int parentCounter = 0; while(1){ MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status); if(flag == 1){ MPI_Recv(&msg[0] , MSGSIZE, MPI_INT, status.MPI_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); if(rank == ZERO){ if(msg[TYPE] == PRINT){ MPI_Send(&msg[0] , MSGSIZE, MPI_INT, MASTER, DATA_TAG, MPI_COMM_WORLD); } else if(msg[TYPE] == DONE){ MPI_Send(&msg[0] , MSGSIZE, MPI_INT, MASTER, DATA_TAG, MPI_COMM_WORLD); } // else if(msg[TYPE] == STOP){ // stopCount += 1; // msg[SOURCE] = ZERO; // msg[DEST] = (size-1) - stopCount; // next = compute_next_dest(ZERO, msg[DEST], power2); // MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); // } else if(msg[TYPE] == TREEDONE){ msg[SOURCE] = ZERO; msg[DEST] = MASTER; MPI_Send(&msg[0] , MSGSIZE, MPI_INT, MASTER, DATA_TAG, MPI_COMM_WORLD); } } dest = msg[DEST]; /* If the message is for you */ if(dest == rank){ /* you are done so STOP */ // if(msg[TYPE] == STOP && msg[SOURCE] == ZERO){ // break; // } /* If you got an ACK, increament your counter */ if(msg[TYPE] == ACK){ ackCounter += 1; /* Send for Printing */ msg[SOURCE] = rank; msg[DEST] = ZERO; msg[TYPEORIGINAL] = ACK ; //msg[TYPE]; msg[TYPE] = PRINT; next = compute_next_dest(rank, ZERO, power2); if(rank == ZERO){ MPI_Send(&msg[0] , MSGSIZE, MPI_INT, MASTER, DATA_TAG, MPI_COMM_WORLD); }else{ MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } } /* If you got a message destined to you, send an ACK */ else if(msg[TYPE] == MSG){ /* Send for printing */ msg[SOURCE] = rank; msg[DEST] = ZERO; msg[TYPEORIGINAL] = MSG ; msg[TYPE] = PRINT; next = compute_next_dest(rank, ZERO, power2); if(rank == ZERO){ MPI_Send(&msg[0] , MSGSIZE, MPI_INT, MASTER, DATA_TAG, MPI_COMM_WORLD); }else{ MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } /* Send Ack */ dest = msg[SOURCEORIGINAL]; msgno = msg[MSGNO]; makeMessage(msg, rank, dest, ACK, msgno); next = compute_next_dest(rank, dest, power2); msg[HOPCNT] = 1; msg[SOURCEORIGINAL] = rank; MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } else if(msg[TYPE] == TREE && msg[SOURCE] == ZERO){ parent = findParent(rank, dim); makeMessage(msg, rank, parent, PARENT, msgno); next = compute_next_dest(rank, parent, power2); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); // printf("Rank %d sending to %d \n", rank, msg[DEST]); } else if(msg[TYPE] == TREE && msg[SOURCE] == MASTER){ int layer = (int)(pow(2,dim)) - (((int)(pow(2,dim))) / 2); for(i = 0 ; i < layer ; i++){ makeMessage(msg, ZERO, i, TREE, 0); next = compute_next_dest(ZERO, i, power2); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } } else if(msg[TYPE] == PARENT){ children[childCounter] = msg[SOURCE]; childCounter += 1; if(rank != size-2 && parentSent == 0){ parent = findParent(rank, dim); makeMessage(msg, rank, parent, PARENT, msgno); next = compute_next_dest(rank, parent, power2); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); parentSent = 1; } if(rank == size-2){ parentCounter += 1; if(parentCounter == ((int)pow(2,(dim-1)) - 1)){ dest = ZERO; makeMessage(msg, rank, dest, TREEDONE, msgno); next = compute_next_dest(rank, dest, power2); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } } } else if(msg[TYPE] == STOP){ if(childCounter > 0){ for(i = 0 ; i < childCounter ; i++){ dest = children[i]; makeMessage(msg, rank, dest, STOP, msgno); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, dest, DATA_TAG, MPI_COMM_WORLD); } } break; } } /* If you are not destination */ else{ /* MSG or ACK */ if(msg[TYPE] == MSG || msg[TYPE] == ACK){ /* add to number of hobs */ msg[HOPCNT] += 1; int nHubs = msg[HOPCNT]; msg[(HOPCNT + nHubs)] = rank; next = compute_next_dest(rank, dest, power2); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } /* Pass it along : print, done, stop, parent, parentfound */ else{ next = compute_next_dest(rank, dest, power2); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } } /* if you are done send a DONE message to Zero */ if(ackCounter == numMSG){ dest = ZERO; makeMessage(msg, rank, dest, DONE, msgno); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, dest, DATA_TAG, MPI_COMM_WORLD); ackCounter += 1; // printf("Rank %d TYPE %d to %d\n", rank, msg[TYPE], msg[DEST]); } } /* Flag = 0 */ else{ /* Still have to make your messages */ if(makeMsgCount < numMSG){ dest = randomGenerator(size-1) ; makeMessage(msg, rank, dest, MSG, msgno); next = compute_next_dest(rank, dest, power2); msg[HOPCNT] = 1; msg[SOURCEORIGINAL] = rank; MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); makeMsgCount += 1; } } } } MPI_Finalize(); return 0; }
FC_FUNC(mpi_iprobe, MPI_IPROBE)(int * source, int * tag, int * comm, int * flag, int *status, int * ierr) { *ierr = MPI_Iprobe(*source, *tag, *comm, flag, mpi_c_status(status)); }
int main(int argc, char **argv) { int procid, nproc, i; MPI_Win llist_win; llist_ptr_t head_ptr, tail_ptr; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &procid); MPI_Comm_size(MPI_COMM_WORLD, &nproc); #ifdef TEST_MPI3_ROUTINES MPIX_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &llist_win); /* Process 0 creates the head node */ if (procid == 0) head_ptr.disp = alloc_elem(-1, llist_win); /* Broadcast the head pointer to everyone */ head_ptr.rank = 0; MPI_Bcast(&head_ptr.disp, 1, MPI_AINT, 0, MPI_COMM_WORLD); tail_ptr = head_ptr; /* All processes concurrently append NUM_ELEMS elements to the list */ for (i = 0; i < NUM_ELEMS; i++) { llist_ptr_t new_elem_ptr; int success; /* Create a new list element and register it with the window */ new_elem_ptr.rank = procid; new_elem_ptr.disp = alloc_elem(procid, llist_win); /* Append the new node to the list. This might take multiple attempts if others have already appended and our tail pointer is stale. */ do { llist_ptr_t next_tail_ptr = nil; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPIX_Compare_and_swap((void*) &new_elem_ptr.rank, (void*) &nil.rank, (void*) &next_tail_ptr.rank, MPI_INT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.rank), llist_win); MPI_Win_unlock(tail_ptr.rank, llist_win); success = (next_tail_ptr.rank == nil.rank); if (success) { int i, flag; MPI_Aint result; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPIX_Fetch_and_op(&new_elem_ptr.disp, &result, MPI_AINT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), MPI_REPLACE, llist_win); /* Note: accumulate is faster, since we don't need the result. Replacing with Fetch_and_op to create a more complete test case. */ /* MPI_Accumulate(&new_elem_ptr.disp, 1, MPI_AINT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), 1, MPI_AINT, MPI_REPLACE, llist_win); */ MPI_Win_unlock(tail_ptr.rank, llist_win); tail_ptr = new_elem_ptr; /* For implementations that use pt-to-pt messaging, force progress for other threads' RMA operations. */ for (i = 0; i < NPROBE; i++) MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE); } else { /* Tail pointer is stale, fetch the displacement. May take multiple tries if it is being updated. */ do { MPI_Aint junk = 0; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPIX_Fetch_and_op(NULL, &next_tail_ptr.disp, MPI_AINT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), MPIX_NO_OP, llist_win); MPI_Win_unlock(tail_ptr.rank, llist_win); } while (next_tail_ptr.disp == nil.disp); tail_ptr = next_tail_ptr; } } while (!success); } MPI_Barrier(MPI_COMM_WORLD); /* Traverse the list and verify that all processes inserted exactly the correct number of elements. */ if (procid == 0) { int have_root = 0; int errors = 0; int *counts, count = 0; counts = (int*) malloc(sizeof(int) * nproc); assert(counts != NULL); for (i = 0; i < nproc; i++) counts[i] = 0; tail_ptr = head_ptr; /* Walk the list and tally up the number of elements inserted by each rank */ while (tail_ptr.disp != nil.disp) { llist_elem_t elem; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPI_Get(&elem, sizeof(llist_elem_t), MPI_BYTE, tail_ptr.rank, tail_ptr.disp, sizeof(llist_elem_t), MPI_BYTE, llist_win); MPI_Win_unlock(tail_ptr.rank, llist_win); tail_ptr = elem.next; /* This is not the root */ if (have_root) { assert(elem.value >= 0 && elem.value < nproc); counts[elem.value]++; count++; if (verbose) { int last_elem = tail_ptr.disp == nil.disp; printf("%2d%s", elem.value, last_elem ? "" : " -> "); if (count % ELEM_PER_ROW == 0 && !last_elem) printf("\n"); } } /* This is the root */ else { assert(elem.value == -1); have_root = 1; } } if (verbose) printf("\n\n"); /* Verify the counts we collected */ for (i = 0; i < nproc; i++) { int expected = NUM_ELEMS; if (counts[i] != expected) { printf("Error: Rank %d inserted %d elements, expected %d\n", i, counts[i], expected); errors++; } } printf("%s\n", errors == 0 ? " No Errors" : "FAIL"); free(counts); } MPI_Win_free(&llist_win); /* Free all the elements in the list */ for ( ; my_elems_count > 0; my_elems_count--) MPI_Free_mem(my_elems[my_elems_count-1]); #else /* ! TEST_MPI3_ROUTINES */ if (procid == 0) printf(" No Errors\n"); #endif MPI_Finalize(); return 0; }
/* data1, odata1 and odata2 are packed in the format (for communication): data[0] = is_max, no of is data[1] = size of is[0] ... data[is_max] = size of is[is_max-1] data[is_max + 1] = data(is[0]) ... data[is_max+1+sum(size of is[k]), k=0,...,i-1] = data(is[i]) ... data2 is packed in the format (for creating output is[]): data[0] = is_max, no of is data[1] = size of is[0] ... data[is_max] = size of is[is_max-1] data[is_max + 1] = data(is[0]) ... data[is_max + 1 + Mbs*i) = data(is[i]) ... */ static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C,PetscInt is_max,IS is[]) { Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data; PetscErrorCode ierr; PetscMPIInt size,rank,tag1,tag2,*len_s,nrqr,nrqs,*id_r1,*len_r1,flag,len,*iwork; const PetscInt *idx_i; PetscInt idx,isz,col,*n,*data1,**data1_start,*data2,*data2_i,*data,*data_i; PetscInt Mbs,i,j,k,*odata1,*odata2; PetscInt proc_id,**odata2_ptr,*ctable=0,*btable,len_max,len_est; PetscInt proc_end=0,len_unused,nodata2; PetscInt ois_max; /* max no of is[] in each of processor */ char *t_p; MPI_Comm comm; MPI_Request *s_waits1,*s_waits2,r_req; MPI_Status *s_status,r_status; PetscBT *table; /* mark indices of this processor's is[] */ PetscBT table_i; PetscBT otable; /* mark indices of other processors' is[] */ PetscInt bs=C->rmap->bs,Bn = c->B->cmap->n,Bnbs = Bn/bs,*Bowners; IS garray_local,garray_gl; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm); CHKERRQ(ierr); size = c->size; rank = c->rank; Mbs = c->Mbs; ierr = PetscObjectGetNewTag((PetscObject)C,&tag1); CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2); CHKERRQ(ierr); /* create tables used in step 1: table[i] - mark c->garray of proc [i] step 3: table[i] - mark indices of is[i] when whose=MINE table[0] - mark incideces of is[] when whose=OTHER */ len = PetscMax(is_max, size); CHKERRQ(ierr); ierr = PetscMalloc2(len,&table,(Mbs/PETSC_BITS_PER_BYTE+1)*len,&t_p); CHKERRQ(ierr); for (i=0; i<len; i++) { table[i] = t_p + (Mbs/PETSC_BITS_PER_BYTE+1)*i; } ierr = MPIU_Allreduce(&is_max,&ois_max,1,MPIU_INT,MPI_MAX,comm); CHKERRQ(ierr); /* 1. Send this processor's is[] to other processors */ /*---------------------------------------------------*/ /* allocate spaces */ ierr = PetscMalloc1(is_max,&n); CHKERRQ(ierr); len = 0; for (i=0; i<is_max; i++) { ierr = ISGetLocalSize(is[i],&n[i]); CHKERRQ(ierr); len += n[i]; } if (!len) { is_max = 0; } else { len += 1 + is_max; /* max length of data1 for one processor */ } ierr = PetscMalloc1(size*len+1,&data1); CHKERRQ(ierr); ierr = PetscMalloc1(size,&data1_start); CHKERRQ(ierr); for (i=0; i<size; i++) data1_start[i] = data1 + i*len; ierr = PetscMalloc4(size,&len_s,size,&btable,size,&iwork,size+1,&Bowners); CHKERRQ(ierr); /* gather c->garray from all processors */ ierr = ISCreateGeneral(comm,Bnbs,c->garray,PETSC_COPY_VALUES,&garray_local); CHKERRQ(ierr); ierr = ISAllGather(garray_local, &garray_gl); CHKERRQ(ierr); ierr = ISDestroy(&garray_local); CHKERRQ(ierr); ierr = MPI_Allgather(&Bnbs,1,MPIU_INT,Bowners+1,1,MPIU_INT,comm); CHKERRQ(ierr); Bowners[0] = 0; for (i=0; i<size; i++) Bowners[i+1] += Bowners[i]; if (is_max) { /* hash table ctable which maps c->row to proc_id) */ ierr = PetscMalloc1(Mbs,&ctable); CHKERRQ(ierr); for (proc_id=0,j=0; proc_id<size; proc_id++) { for (; j<C->rmap->range[proc_id+1]/bs; j++) ctable[j] = proc_id; } /* hash tables marking c->garray */ ierr = ISGetIndices(garray_gl,&idx_i); CHKERRQ(ierr); for (i=0; i<size; i++) { table_i = table[i]; ierr = PetscBTMemzero(Mbs,table_i); CHKERRQ(ierr); for (j = Bowners[i]; j<Bowners[i+1]; j++) { /* go through B cols of proc[i]*/ ierr = PetscBTSet(table_i,idx_i[j]); CHKERRQ(ierr); } } ierr = ISRestoreIndices(garray_gl,&idx_i); CHKERRQ(ierr); } /* if (is_max) */ ierr = ISDestroy(&garray_gl); CHKERRQ(ierr); /* evaluate communication - mesg to who, length, and buffer space */ for (i=0; i<size; i++) len_s[i] = 0; /* header of data1 */ for (proc_id=0; proc_id<size; proc_id++) { iwork[proc_id] = 0; *data1_start[proc_id] = is_max; data1_start[proc_id]++; for (j=0; j<is_max; j++) { if (proc_id == rank) { *data1_start[proc_id] = n[j]; } else { *data1_start[proc_id] = 0; } data1_start[proc_id]++; } } for (i=0; i<is_max; i++) { ierr = ISGetIndices(is[i],&idx_i); CHKERRQ(ierr); for (j=0; j<n[i]; j++) { idx = idx_i[j]; *data1_start[rank] = idx; data1_start[rank]++; /* for local proccessing */ proc_end = ctable[idx]; for (proc_id=0; proc_id<=proc_end; proc_id++) { /* for others to process */ if (proc_id == rank) continue; /* done before this loop */ if (proc_id < proc_end && !PetscBTLookup(table[proc_id],idx)) continue; /* no need for sending idx to [proc_id] */ *data1_start[proc_id] = idx; data1_start[proc_id]++; len_s[proc_id]++; } } /* update header data */ for (proc_id=0; proc_id<size; proc_id++) { if (proc_id== rank) continue; *(data1 + proc_id*len + 1 + i) = len_s[proc_id] - iwork[proc_id]; iwork[proc_id] = len_s[proc_id]; } ierr = ISRestoreIndices(is[i],&idx_i); CHKERRQ(ierr); } nrqs = 0; nrqr = 0; for (i=0; i<size; i++) { data1_start[i] = data1 + i*len; if (len_s[i]) { nrqs++; len_s[i] += 1 + is_max; /* add no. of header msg */ } } for (i=0; i<is_max; i++) { ierr = ISDestroy(&is[i]); CHKERRQ(ierr); } ierr = PetscFree(n); CHKERRQ(ierr); ierr = PetscFree(ctable); CHKERRQ(ierr); /* Determine the number of messages to expect, their lengths, from from-ids */ ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&nrqr); CHKERRQ(ierr); ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,len_s,&id_r1,&len_r1); CHKERRQ(ierr); /* Now post the sends */ ierr = PetscMalloc2(size,&s_waits1,size,&s_waits2); CHKERRQ(ierr); k = 0; for (proc_id=0; proc_id<size; proc_id++) { /* send data1 to processor [proc_id] */ if (len_s[proc_id]) { ierr = MPI_Isend(data1_start[proc_id],len_s[proc_id],MPIU_INT,proc_id,tag1,comm,s_waits1+k); CHKERRQ(ierr); k++; } } /* 2. Receive other's is[] and process. Then send back */ /*-----------------------------------------------------*/ len = 0; for (i=0; i<nrqr; i++) { if (len_r1[i] > len) len = len_r1[i]; } ierr = PetscFree(len_r1); CHKERRQ(ierr); ierr = PetscFree(id_r1); CHKERRQ(ierr); for (proc_id=0; proc_id<size; proc_id++) len_s[proc_id] = iwork[proc_id] = 0; ierr = PetscMalloc1(len+1,&odata1); CHKERRQ(ierr); ierr = PetscMalloc1(size,&odata2_ptr); CHKERRQ(ierr); ierr = PetscBTCreate(Mbs,&otable); CHKERRQ(ierr); len_max = ois_max*(Mbs+1); /* max space storing all is[] for each receive */ len_est = 2*len_max; /* estimated space of storing is[] for all receiving messages */ ierr = PetscMalloc1(len_est+1,&odata2); CHKERRQ(ierr); nodata2 = 0; /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */ odata2_ptr[nodata2] = odata2; len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max */ k = 0; while (k < nrqr) { /* Receive messages */ ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status); CHKERRQ(ierr); if (flag) { ierr = MPI_Get_count(&r_status,MPIU_INT,&len); CHKERRQ(ierr); proc_id = r_status.MPI_SOURCE; ierr = MPI_Irecv(odata1,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req); CHKERRQ(ierr); ierr = MPI_Wait(&r_req,&r_status); CHKERRQ(ierr); /* Process messages */ /* make sure there is enough unused space in odata2 array */ if (len_unused < len_max) { /* allocate more space for odata2 */ ierr = PetscMalloc1(len_est+1,&odata2); CHKERRQ(ierr); odata2_ptr[++nodata2] = odata2; len_unused = len_est; } ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,odata1,OTHER,odata2,&otable); CHKERRQ(ierr); len = 1 + odata2[0]; for (i=0; i<odata2[0]; i++) len += odata2[1 + i]; /* Send messages back */ ierr = MPI_Isend(odata2,len,MPIU_INT,proc_id,tag2,comm,s_waits2+k); CHKERRQ(ierr); k++; odata2 += len; len_unused -= len; len_s[proc_id] = len; /* num of messages sending back to [proc_id] by this proc */ } } ierr = PetscFree(odata1); CHKERRQ(ierr); ierr = PetscBTDestroy(&otable); CHKERRQ(ierr); /* 3. Do local work on this processor's is[] */ /*-------------------------------------------*/ /* make sure there is enough unused space in odata2(=data) array */ len_max = is_max*(Mbs+1); /* max space storing all is[] for this processor */ if (len_unused < len_max) { /* allocate more space for odata2 */ ierr = PetscMalloc1(len_est+1,&odata2); CHKERRQ(ierr); odata2_ptr[++nodata2] = odata2; } data = odata2; ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,data1_start[rank],MINE,data,table); CHKERRQ(ierr); ierr = PetscFree(data1_start); CHKERRQ(ierr); /* 4. Receive work done on other processors, then merge */ /*------------------------------------------------------*/ /* get max number of messages that this processor expects to recv */ ierr = MPIU_Allreduce(len_s,iwork,size,MPI_INT,MPI_MAX,comm); CHKERRQ(ierr); ierr = PetscMalloc1(iwork[rank]+1,&data2); CHKERRQ(ierr); ierr = PetscFree4(len_s,btable,iwork,Bowners); CHKERRQ(ierr); k = 0; while (k < nrqs) { /* Receive messages */ ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag2,comm,&flag,&r_status); CHKERRQ(ierr); if (flag) { ierr = MPI_Get_count(&r_status,MPIU_INT,&len); CHKERRQ(ierr); proc_id = r_status.MPI_SOURCE; ierr = MPI_Irecv(data2,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req); CHKERRQ(ierr); ierr = MPI_Wait(&r_req,&r_status); CHKERRQ(ierr); if (len > 1+is_max) { /* Add data2 into data */ data2_i = data2 + 1 + is_max; for (i=0; i<is_max; i++) { table_i = table[i]; data_i = data + 1 + is_max + Mbs*i; isz = data[1+i]; for (j=0; j<data2[1+i]; j++) { col = data2_i[j]; if (!PetscBTLookupSet(table_i,col)) data_i[isz++] = col; } data[1+i] = isz; if (i < is_max - 1) data2_i += data2[1+i]; } } k++; } } ierr = PetscFree(data2); CHKERRQ(ierr); ierr = PetscFree2(table,t_p); CHKERRQ(ierr); /* phase 1 sends are complete */ ierr = PetscMalloc1(size,&s_status); CHKERRQ(ierr); if (nrqs) { ierr = MPI_Waitall(nrqs,s_waits1,s_status); CHKERRQ(ierr); } ierr = PetscFree(data1); CHKERRQ(ierr); /* phase 2 sends are complete */ if (nrqr) { ierr = MPI_Waitall(nrqr,s_waits2,s_status); CHKERRQ(ierr); } ierr = PetscFree2(s_waits1,s_waits2); CHKERRQ(ierr); ierr = PetscFree(s_status); CHKERRQ(ierr); /* 5. Create new is[] */ /*--------------------*/ for (i=0; i<is_max; i++) { data_i = data + 1 + is_max + Mbs*i; ierr = ISCreateGeneral(PETSC_COMM_SELF,data[1+i],data_i,PETSC_COPY_VALUES,is+i); CHKERRQ(ierr); } for (k=0; k<=nodata2; k++) { ierr = PetscFree(odata2_ptr[k]); CHKERRQ(ierr); } ierr = PetscFree(odata2_ptr); CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc, char *argv[]) { unsigned int power2[12] = {1,2,4,8,16,32,64,128,256,512,1024,2048}; int numMSG; int dim; int MASTER; /******************* Number of MSGs **********/ numMSG = atoi(argv[1]); dim = atoi(argv[2]); /******************* MPI Init ****************** */ unsigned int rank, size; int MPI_ERR; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); MASTER = size -1 ; /************************** MASTER ******************************/ if (rank == MASTER){ } /************************** Workers ******************************/ else{ int i; int x; int dest; int *msg = (int *)calloc(MSGSIZE, sizeof(int)); int msgno = 0; unsigned int next; int flag; int msgCounter = 0; int makeMsgCount = 0; while(1){ MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status); if(flag == 1){ MPI_Recv(&msg[0] , MSGSIZE, MPI_INT, status.MPI_SOURCE, DATA_TAG, MPI_COMM_WORLD, &status); dest = msg[1]; next = compute_next_dest(rank, dest, power2); if(dest == rank){ print(msg, rank, dim, power2); }else{ /* add to number of hobs */ int nHubs = msg[HOPCNT]; msg[HOPCNT] += 1; msg[(HOPCNT + nHubs + 1)] = rank; MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); } } /* Flag = 0 */ else{ if(makeMsgCount < numMSG){ dest = randomGenerator(size) ; makeMessage(msg, rank, dest, MSG, msgno); next = compute_next_dest(rank, dest, power2); MPI_Send(&msg[0] , MSGSIZE, MPI_INT, next, DATA_TAG, MPI_COMM_WORLD); makeMsgCount += 1; // printf("Rank %d sending \n", rank); } } } } MPI_Finalize(); return 0; }
/* * === FUNCTION ====================================================================== * Name: main * Description: * ===================================================================================== */ int main ( int argc, char *argv[] ) { // initialize MPI MPI_Init (&argc, &argv); // we have to remember the number of PEs int numpes; MPI_Comm_size (MPI_COMM_WORLD, &numpes); //for this we need 3 PEs assert(numpes == 3); // which rank does this process have? int myid; MPI_Comm_rank (MPI_COMM_WORLD, &myid); switch ( myid ) { case 0 : { printf("0: I have A....sending it!\n"); char *msg = "A"; MPI_Send(msg, strlen(msg) + 1, MPI_CHAR, 1, 0, MPI_COMM_WORLD); MPI_Send(msg, strlen(msg) + 1, MPI_CHAR, 2, 0, MPI_COMM_WORLD); break; } case 1 : { char *msg = "B"; MPI_Status status; MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); int msglen; MPI_Get_count(&status, MPI_CHAR, &msglen); assert(msglen > 0); char *recMsg = malloc(msglen * sizeof(char)); MPI_Recv (recMsg, msglen, MPI_CHAR, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status); printf("1: I have B....\n"); printf("1: Received an %s so sending my B\n", recMsg); MPI_Send(msg, strlen(msg) + 1, MPI_CHAR, 2, 0, MPI_COMM_WORLD); break; } case 2 : { int hasMail; int receivedNotAll = 2; MPI_Status status; while(receivedNotAll){ MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &hasMail, &status); if (hasMail) { int msglen; MPI_Get_count(&status, MPI_CHAR, &msglen); assert(msglen > 0); char *msg = malloc(msglen * sizeof(char)); if (!msg) { fprintf(stderr, "Could not allocate memory for %d bytes in message\n", msglen); exit(-2); } MPI_Recv (msg, msglen, MPI_CHAR, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status); printf("2: Received a %s\n", msg); free(msg); receivedNotAll--; } } break; } default: break; } /* ----- end switch ----- */ MPI_Finalize(); return EXIT_SUCCESS; } /* ---------- end of function main ---------- */
void Master::solve() { lastCubeFinished = wallClockTime(); num_threads = so.num_threads; job_start_time.growTo(num_threads, DONT_DISTURB); job_start_time_backup.growTo(num_threads, DONT_DISTURB); cur_job.growTo(num_threads, NULL); lhead.growTo(num_threads, 0); last_send_learnts.growTo(num_threads, 0); global_learnts.reserve(10000000); long maxCycleTime = 0; stoppedInit = false; if(engine.opt_var){ bestObjReceived = engine.opt_type ? engine.opt_var->getMin() : engine.opt_var->getMax(); } if(so.purePortfolio){ for(int i = 0 ; i < num_threads ; i++) job_queue.push(new SClause()); } else job_queue.push(new SClause()); for(int i = 0 ; i < num_threads ; i++){ if(so.greedyInit && (i % 3 < 2)){ slaveStates.push_back(RUNNING_GREEDY); slavesRunningGreedy++; setState(i, RUNNING_GREEDY); } else{ slaveStates.push_back(NORMAL_SEARCH); setState(i, NORMAL_SEARCH); } } MPI_Buffer_attach(malloc(MPI_BUFFER_SIZE), MPI_BUFFER_SIZE); // Search: int lastPrinted = time(NULL); int tStart = lastPrinted; long lastSleep = clock(); bool stealJobsNow = true; while (status == RES_UNK && time(NULL) < so.time_out) { //fprintf(stderr, "Trying to send jobs...\n"); while (num_free_slaves > 0 && job_queue.size() > 0) { if(!sendJob()) break; } /************************************************************************** * Ask all jobs to finish the first phase, if this has not happened yet. * */ if(!stoppedInit && time(NULL) > engine.half_time){ if(PAR_DEBUG) fprintf(stderr, "Asking remaining init-workers to continue with normal search...\n"); for(int i = 0 ; i < num_threads ; i++){ if(slaveStates[i] == RUNNING_GREEDY){ setState(i, NORMAL_SEARCH); } } stoppedInit = true; } int received; MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &received, &s); if (received) { double t; profile_start(); // printf("Profiling started...\n"); switch (s.MPI_TAG) { case REPORT_TAG: // printf("It was a report!\n"); receiveReport(); // printf("ReceiveReport done!\n"); profile_end("receive report", 0); continue; case SPLIT_TAG: // printf("Split tag received!\n"); receiveJobs(); profile_end("receive jobs", 0); continue; case REPORT_SOLUTION_TAG: if(PAR_DEBUG) fprintf(stderr, "Received solution! \n"); receiveSolution(); continue; case REPORT_OPTIMUM_TAG: receiveOptObj(); continue; case SOLUTION_PHASE_TAG: receivePhase(); continue; case SLAVE_NEW_STATE_TAG: int dummy; MPI_Recv(&dummy, 1, MPI_INT, s.MPI_SOURCE, SLAVE_NEW_STATE_TAG, MPI_COMM_WORLD, &s); if(PAR_DEBUG) fprintf(stderr, "Setting state of slave %d to %d\n", s.MPI_SOURCE-1, dummy); if(slaveStates[s.MPI_SOURCE-1] == RUNNING_GREEDY && dummy == NORMAL_SEARCH) slavesRunningGreedy--; slaveStates[s.MPI_SOURCE-1] = dummy; continue; continue; default: assert(false); } } if (job_queue.size() < 2*num_threads-2 && !so.purePortfolio ) { // Steal jobs if // - normal mode // - greedy-init, and at least one job finished now... if(!so.greedyInit || slavesRunningGreedy < num_threads){ stealJobs(); //continue; } } long now = clock(); maxCycleTime = std::max(maxCycleTime, now - lastSleep); usleep(500); lastSleep = clock(); } if (PAR_DEBUG){ fprintf(stderr, "Waiting for slaves to terminate...\n"); fprintf(stderr, "Max cycle time: %d (%lf)\n", maxCycleTime, (double)maxCycleTime/CLOCKS_PER_SEC); fprintf(stderr, "End of problem called\n"); } MPI_Request r; for (int i = 0; i < num_threads; i++) { MPI_Isend(NULL, 0, MPI_INT, i+1, INTERRUPT_TAG, MPI_COMM_WORLD, &r); MPI_Isend(NULL, 0, MPI_INT, i+1, FINISH_TAG, MPI_COMM_WORLD, &r); } while (num_free_slaves != num_threads) { MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &s); int thread_no = s.MPI_SOURCE-1; MPI_Get_count(&s, MPI_INT, &message_length); message = (int*) malloc(message_length*sizeof(int)); MPI_Recv(message, message_length, MPI_INT, s.MPI_SOURCE, s.MPI_TAG, MPI_COMM_WORLD, &s); if (s.MPI_TAG == REPORT_TAG) { if (message[0] != RES_SEA) { assert(job_start_time[thread_no] != NOT_WORKING); num_free_slaves++; job_start_time[thread_no] = NOT_WORKING; if (PAR_DEBUG) fprintf(stderr, "%d is free, %f\n", thread_no, wallClockTime()); } } free(message); } collectStats(); if(PAR_DEBUG) fprintf(stderr, "Master terminating with obj in %d and %d, bestResult= %d\n", engine.opt_var->getMin(), engine.opt_var->getMax(), bestObjReceived); if(so.verbosity > 0) printStats(); }
double farmer(int numprocs) { MPI_Status status; int i, flag, source, w_id; double result, incoming[5], *derp; int w_out[numprocs-1]; // Set up stack stack *stack = new_stack(); double data[5] = {A, B, F(A), F(B), (F(A)+F(B)) * (B-A)/2}; push(data, stack); // Set up queue queue *queue = new_queue(); for (i=1; i<numprocs; i++) { push_queue(i, queue); w_out[i-1] = 0; } while (1) { if (!is_empty(stack)) { derp = pop(stack); w_id = pop_queue(queue); w_out[w_id] = 1; MPI_Send(derp, 5, MPI_DOUBLE, w_id, TAG_DO_TASK, MPI_COMM_WORLD); tasks_per_process[w_id]++; } // peek for messages MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status); // if there is a message if (flag) { switch(status.MPI_TAG) { case TAG_ADD_TASK: // receive data and push onto stack source = status.MPI_SOURCE; MPI_Recv(&incoming, 5, MPI_DOUBLE, source, TAG_ADD_TASK, MPI_COMM_WORLD, &status); push(incoming, stack); if (w_out[source]) { push_queue(source, queue); w_out[source] = 0; } break; case TAG_RESULT: source = status.MPI_SOURCE; MPI_Recv(&incoming, 5, MPI_DOUBLE, source, TAG_RESULT, MPI_COMM_WORLD, &status); result += incoming[4]; if (w_out[source]) { push_queue(source, queue); w_out[source] = 0; } break; } } // ready to finish? if (workers_available(queue) == numprocs-1 && is_empty(stack)) { break; } } // kill and free for (i=1; i<numprocs; i++) { MPI_Send(&data, 5, MPI_DOUBLE, i, TAG_KILL, MPI_COMM_WORLD); } free_stack(stack); free_queue(queue); return result; }
void nomad_fun(const rksvm_problem *prob, const rksvm_parameter *param, Scheduler *scheduler, double *Q) { int l = prob->l; int global_l = prob->global_l; int thread_count = param->thread_count; int nr_ranks = param->nr_ranks; int current_rank = mpi_get_rank(); int *nr_send = scheduler->nr_send; int *nr_recv = scheduler->nr_recv; //atomic variables atomic<int> count_setup_threads; count_setup_threads = 0; atomic<int> computed_data_nodes;//record the number of data_nodes that have been utilized. computed_data_nodes = 0; atomic<int> sended_count;//record the number of data_nodes that have been sended. sended_count = 0; atomic<int> recvd_count;////record the number of data_nodes that have been received. recvd_count = 0; // two auxiliary atomic flags for both sending and receiving atomic<bool> flag_send_ready; flag_send_ready = false; atomic<bool> flag_receive_ready; flag_receive_ready = false; //build several job queues and one sending queue con_queue *job_queues = callocator<con_queue>().allocate(thread_count); for(int i=0;i<thread_count;i++) callocator<con_queue>().construct(job_queues + i); con_queue send_queue; //initilize job queues int interval = (int)ceil((double)prob->l/thread_count); int thread_id = 0; for(int i=0;i<l;i++) { data_node *copy_x = nullptr; copy_x = scheduler->pop(); if((i!=0)&&(i%interval==0)) thread_id++; job_queues[thread_id].push(copy_x); } //the first function auto QMatrix = [&](struct data_node *copy_x)->void{//{{{ int i = 0; int global_index = copy_x->global_index; for(i=0;i<l;i++) { rksvm_node *s = prob->x[i]; rksvm_node *t = copy_x->x; Q[global_index + i*global_l] = k_function(s,t,*param); } return; };//}}} //the second function auto computer_fun = [&](int thread_id)->void{///{{{ count_setup_threads++; while(count_setup_threads < thread_count) { std::this_thread::yield(); } while(true) { if(computed_data_nodes == global_l) break; data_node *copy_x = nullptr; bool success = job_queues[thread_id].try_pop(copy_x); if(success) { if(copy_x->first_time) { QMatrix(copy_x); computed_data_nodes++; if(nr_ranks==1) { int lth = copy_x->length; callocator<rksvm_node>().deallocate(copy_x->x, lth); callocator<data_node>().destroy(copy_x); callocator<data_node>().deallocate(copy_x,1); } else { copy_x->first_time = false; send_queue.push(copy_x); flag_send_ready = true; } } else { QMatrix(copy_x); computed_data_nodes++; copy_x->current_rank = current_rank; int next_rank = cyclic_loading_rank(copy_x->current_rank, nr_ranks); if(next_rank==copy_x->initial_rank) { int lth = copy_x->length; callocator<rksvm_node>().deallocate(copy_x->x, lth); callocator<data_node>().destroy(copy_x); callocator<data_node>().deallocate(copy_x,1); } else { send_queue.push(copy_x); } } } } return; };///}}} //the third function auto sender_fun = [&]()->void{///{{{ while(flag_send_ready == false) { std::this_thread::yield(); } int lth; int msg_bytenum; while(true) { if(sended_count == nr_send[current_rank]) break; data_node *copy_x = nullptr; bool success = send_queue.try_pop(copy_x); if(success) { int next_rank = cyclic_loading_rank(copy_x->current_rank, nr_ranks); if(next_rank == copy_x->initial_rank) { lth = copy_x->length; callocator<rksvm_node>().deallocate(copy_x->x, lth); callocator<data_node>().destroy(copy_x); callocator<data_node>().deallocate(copy_x,1); } else { lth = copy_x->length; msg_bytenum = sizeof(bool)+4*sizeof(int)+lth*sizeof(rksvm_node); char *send_message = sallocator<char>().allocate(msg_bytenum); *(reinterpret_cast<bool *>(send_message)) = copy_x->first_time; *(reinterpret_cast<int *>(send_message + sizeof(bool))) = copy_x->length; *(reinterpret_cast<int *>(send_message + sizeof(bool) + sizeof(int))) = copy_x->initial_rank; *(reinterpret_cast<int *>(send_message + sizeof(bool) + 2*sizeof(int))) = copy_x->current_rank; *(reinterpret_cast<int *>(send_message + sizeof(bool) + 3*sizeof(int))) = copy_x->global_index; rksvm_node *dest = reinterpret_cast<rksvm_node *>(send_message + sizeof(bool) + 4*sizeof(int)); std::copy(copy_x->x, copy_x->x + lth, dest); flag_receive_ready = true; MPI_Ssend(send_message, msg_bytenum, MPI_CHAR, next_rank, 1, MPI_COMM_WORLD); //destroying callocator<rksvm_node>().deallocate(copy_x->x, lth); callocator<data_node>().destroy(copy_x); callocator<data_node>().deallocate(copy_x,1); //record the sended count sended_count++; sallocator<char>().deallocate(send_message, msg_bytenum); } } } return; };///}}} //the fourth function auto receiver_fun = [&]()->void{///{{{ while(flag_receive_ready == false) { std::this_thread::yield(); } int flag = 0; int src_rank; int lth; MPI_Status status; while(true) { if(recvd_count == nr_recv[mpi_get_rank()]) break; MPI_Iprobe(MPI_ANY_SOURCE, 1, MPI_COMM_WORLD, &flag, &status); if(flag == 0) { std::this_thread::yield(); } else { src_rank = status.MPI_SOURCE; int msg_size = 0; MPI_Get_count(&status, MPI_CHAR, &msg_size); char *recv_message = sallocator<char>().allocate(msg_size); MPI_Recv(recv_message, msg_size, MPI_CHAR, src_rank, 1, MPI_COMM_WORLD, &status); //recovering data_node *copy_x = callocator<data_node>().allocate(1); copy_x->first_time = *(reinterpret_cast<bool *>(recv_message)); copy_x->length = *(reinterpret_cast<int *>(recv_message + sizeof(bool))); copy_x->initial_rank = *(reinterpret_cast<int *>(recv_message + sizeof(bool) + sizeof(int))); copy_x->current_rank = *(reinterpret_cast<int *>(recv_message + sizeof(bool) + 2*sizeof(int))); copy_x->global_index = *(reinterpret_cast<int *>(recv_message + sizeof(bool) + 3*sizeof(int))); rksvm_node *dest = reinterpret_cast<rksvm_node *>(recv_message + sizeof(bool) + 4*sizeof(int)); //please notice that the approach to recover cp_x->x lth = copy_x->length; copy_x->x = callocator<rksvm_node>().allocate(lth); memcpy(copy_x->x, dest, (size_t)sizeof(rksvm_node)*lth); sallocator<char>().deallocate(recv_message, msg_size); //push an item to the job_queue who has the smallest number of items. //In doing so, the dynamic loading balancing can be achieved. int smallest_items_thread_id = 0; auto smallest_items = job_queues[0].unsafe_size(); for(int i=1;i<thread_count;i++) { auto tmp = job_queues[i].unsafe_size(); if(tmp < smallest_items) { smallest_items_thread_id = i; smallest_items = tmp; } } job_queues[smallest_items_thread_id].push(copy_x); recvd_count++; } } return; };///}}} //notice that tht above functions are important to our program //create some functional threads std::vector<std::thread> computers; std::thread *sender = nullptr; std::thread *receiver = nullptr; for (int i=0; i < thread_count; i++){ computers.push_back(std::thread(computer_fun, i)); } if(nr_ranks>1) { sender = new std::thread(sender_fun); receiver = new std::thread(receiver_fun); } //wait until data loading and initialization //the main thread is used to test the results while(count_setup_threads < thread_count){ std::this_thread::yield(); } if(current_rank==0) { printf("Start to compute kernel matrix!\n"); fflush(stdout); } //test the time used to compute Q tbb::tick_count start_time = tbb::tick_count::now(); while(true) { if(nr_ranks==1) { if(computed_data_nodes == global_l) break; } else { if((computed_data_nodes==global_l)&& (sended_count==nr_send[current_rank])&& (recvd_count==nr_recv[current_rank])) break; } } MPI_Barrier(MPI_COMM_WORLD);//sychronization double elapsed_seconds = (tbb::tick_count::now() - start_time).seconds(); if(current_rank==0) { printf("Computing Q has done!, the elapsed time is %f secs\n", elapsed_seconds); fflush(stdout); } callocator<con_queue>().deallocate(job_queues, thread_count); for(auto &th: computers) th.join(); if(nr_ranks > 1) { sender->join(); receiver->join(); delete sender; delete receiver; } return; }
static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,*tags,done,i; MPI_Aint lb,unitbytes; char *tdata; MPI_Request *sendreqs,*usendreqs,*req,barrier; PetscSegBuffer segrank,segdata,segreq; PetscBool barrier_started; PetscFunctionBegin; ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); ierr = PetscMalloc1(ntags,&tags);CHKERRQ(ierr); for (i=0; i<ntags; i++) { ierr = PetscCommGetNewTag(comm,&tags[i]);CHKERRQ(ierr); } ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); tdata = (char*)todata; ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); ierr = PetscMalloc1(nto*ntags,&usendreqs);CHKERRQ(ierr); /* Post synchronous sends */ for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the * synchronous messages above. */ for (i=0; i<nto; i++) { PetscMPIInt k; for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; ierr = (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); ierr = PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, * but we need to work around it. */ barrier_started = PETSC_FALSE; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank,k; void *buf; ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); ierr = PetscSegBufferGet(segreq,ntags,&req);CHKERRQ(ierr); for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; ierr = (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);CHKERRQ(ierr); nrecvs++; } if (!barrier_started) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { #if defined(PETSC_HAVE_MPI_IBARRIER) ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); #elif defined(PETSC_HAVE_MPIX_IBARRIER) ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); #endif barrier_started = PETSC_TRUE; } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); *toreqs = usendreqs; ierr = PetscSegBufferExtractAlloc(segreq,fromreqs);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segreq);CHKERRQ(ierr); ierr = PetscFree(sendreqs);CHKERRQ(ierr); ierr = PetscFree(tags);CHKERRQ(ierr); ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc, char *argv[]) { initTracer(); int i, p; int rank, size; scene *curscene; int res; TGAFILE* tga = NULL; vector *scanline; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (rank == 0) { curscene = sceneLoad(); tga = openTGA(curscene); } else curscene = calloc(1, sizeof(scene)); double start, end; start = MPI_Wtime(); MPI_Bcast((void*)curscene, sizeof(scene) / sizeof(float), MPI_FLOAT, 0, MPI_COMM_WORLD); res = sceneResolution(curscene); scanline = calloc(res, sizeof(vector)); if (rank == 0) { int lastline = 0; int waiting = res; for (lastline = 0; lastline < size - 1; lastline++) { MPI_Send(&lastline, 1, MPI_INT, lastline + 1, 0, MPI_COMM_WORLD); } int flag; while(1) { if (waiting == 0) break; MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status); if (flag) { MPI_Recv(scanline, (sizeof(vector) / sizeof(float)) * res, MPI_FLOAT, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status); if (lastline < res) { MPI_Send(&lastline, 1, MPI_INT, status.MPI_SOURCE, 0, MPI_COMM_WORLD); lastline++; } for (i = 0; i < res; i++) writeTGAColor(tga, curscene, status.MPI_TAG, i, scanline[i].x, scanline[i].y, scanline[i].z); waiting--; } } flag = -1; for (p = 1; p < size; p++) { MPI_Send(&flag, 1, MPI_INT, p, 0, MPI_COMM_WORLD); } } else { int line; while(1) { MPI_Recv(&line, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); if (line < 0) { break; } printf("Process %d scanning horizontal line %d.\n", rank, line); for (i = 0; i < res; i++) scanline[i] = pixelraytrace(curscene, line, i); MPI_Send(scanline, (sizeof(vector) / sizeof(float)) * res, MPI_FLOAT, 0, line, MPI_COMM_WORLD); } } free(curscene); end = MPI_Wtime(); if (rank == 0) printf("Entire process took %.4f seconds.\n", end - start); MPI_Finalize(); return 0; }
static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) { PetscErrorCode ierr; PetscMPIInt nrecvs,tag,done,i; MPI_Aint lb,unitbytes; char *tdata; MPI_Request *sendreqs,barrier; PetscSegBuffer segrank,segdata; PetscBool barrier_started; PetscFunctionBegin; ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); tdata = (char*)todata; ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); for (i=0; i<nto; i++) { ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); } ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); nrecvs = 0; barrier = MPI_REQUEST_NULL; /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, * but we need to work around it. */ barrier_started = PETSC_FALSE; for (done=0; !done; ) { PetscMPIInt flag; MPI_Status status; ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); if (flag) { /* incoming message */ PetscMPIInt *recvrank; void *buf; ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); *recvrank = status.MPI_SOURCE; ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); nrecvs++; } if (!barrier_started) { PetscMPIInt sent,nsends; ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); if (sent) { #if defined(PETSC_HAVE_MPI_IBARRIER) ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); #elif defined(PETSC_HAVE_MPIX_IBARRIER) ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); #endif barrier_started = PETSC_TRUE; ierr = PetscFree(sendreqs);CHKERRQ(ierr); } } else { ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); } } *nfrom = nrecvs; ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); PetscFunctionReturn(0); }
void declareBindings (void) { /* === Point-to-point === */ void* buf; int count; MPI_Datatype datatype; int dest; int tag; MPI_Comm comm; MPI_Send (buf, count, datatype, dest, tag, comm); // L12 int source; MPI_Status status; MPI_Recv (buf, count, datatype, source, tag, comm, &status); // L15 MPI_Get_count (&status, datatype, &count); MPI_Bsend (buf, count, datatype, dest, tag, comm); MPI_Ssend (buf, count, datatype, dest, tag, comm); MPI_Rsend (buf, count, datatype, dest, tag, comm); void* buffer; int size; MPI_Buffer_attach (buffer, size); // L22 MPI_Buffer_detach (buffer, &size); MPI_Request request; MPI_Isend (buf, count, datatype, dest, tag, comm, &request); // L25 MPI_Ibsend (buf, count, datatype, dest, tag, comm, &request); MPI_Issend (buf, count, datatype, dest, tag, comm, &request); MPI_Irsend (buf, count, datatype, dest, tag, comm, &request); MPI_Irecv (buf, count, datatype, source, tag, comm, &request); MPI_Wait (&request, &status); int flag; MPI_Test (&request, &flag, &status); // L32 MPI_Request_free (&request); MPI_Request* array_of_requests; int index; MPI_Waitany (count, array_of_requests, &index, &status); // L36 MPI_Testany (count, array_of_requests, &index, &flag, &status); MPI_Status* array_of_statuses; MPI_Waitall (count, array_of_requests, array_of_statuses); // L39 MPI_Testall (count, array_of_requests, &flag, array_of_statuses); int incount; int outcount; int* array_of_indices; MPI_Waitsome (incount, array_of_requests, &outcount, array_of_indices, array_of_statuses); // L44--45 MPI_Testsome (incount, array_of_requests, &outcount, array_of_indices, array_of_statuses); // L46--47 MPI_Iprobe (source, tag, comm, &flag, &status); // L48 MPI_Probe (source, tag, comm, &status); MPI_Cancel (&request); MPI_Test_cancelled (&status, &flag); MPI_Send_init (buf, count, datatype, dest, tag, comm, &request); MPI_Bsend_init (buf, count, datatype, dest, tag, comm, &request); MPI_Ssend_init (buf, count, datatype, dest, tag, comm, &request); MPI_Rsend_init (buf, count, datatype, dest, tag, comm, &request); MPI_Recv_init (buf, count, datatype, source, tag, comm, &request); MPI_Start (&request); MPI_Startall (count, array_of_requests); void* sendbuf; int sendcount; MPI_Datatype sendtype; int sendtag; void* recvbuf; int recvcount; MPI_Datatype recvtype; MPI_Datatype recvtag; MPI_Sendrecv (sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, &status); // L67--69 MPI_Sendrecv_replace (buf, count, datatype, dest, sendtag, source, recvtag, comm, &status); // L70--71 MPI_Datatype oldtype; MPI_Datatype newtype; MPI_Type_contiguous (count, oldtype, &newtype); // L74 int blocklength; { int stride; MPI_Type_vector (count, blocklength, stride, oldtype, &newtype); // L78 } { MPI_Aint stride; MPI_Type_hvector (count, blocklength, stride, oldtype, &newtype); // L82 } int* array_of_blocklengths; { int* array_of_displacements; MPI_Type_indexed (count, array_of_blocklengths, array_of_displacements, oldtype, &newtype); // L87--88 } { MPI_Aint* array_of_displacements; MPI_Type_hindexed (count, array_of_blocklengths, array_of_displacements, oldtype, &newtype); // L92--93 MPI_Datatype* array_of_types; MPI_Type_struct (count, array_of_blocklengths, array_of_displacements, array_of_types, &newtype); // L95--96 } void* location; MPI_Aint address; MPI_Address (location, &address); // L100 MPI_Aint extent; MPI_Type_extent (datatype, &extent); // L102 MPI_Type_size (datatype, &size); MPI_Aint displacement; MPI_Type_lb (datatype, &displacement); // L105 MPI_Type_ub (datatype, &displacement); MPI_Type_commit (&datatype); MPI_Type_free (&datatype); MPI_Get_elements (&status, datatype, &count); void* inbuf; void* outbuf; int outsize; int position; MPI_Pack (inbuf, incount, datatype, outbuf, outsize, &position, comm); // L114 int insize; MPI_Unpack (inbuf, insize, &position, outbuf, outcount, datatype, comm); // L116--117 MPI_Pack_size (incount, datatype, comm, &size); /* === Collectives === */ MPI_Barrier (comm); // L121 int root; MPI_Bcast (buffer, count, datatype, root, comm); // L123 MPI_Gather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); // L124--125 int* recvcounts; int* displs; MPI_Gatherv (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm); // L128--130 MPI_Scatter (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm); // L131--132 int* sendcounts; MPI_Scatterv (sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm); // L134--135 MPI_Allgather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); // L136--137 MPI_Allgatherv (sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, comm); // L138--140 MPI_Alltoall (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); // L141--142 int* sdispls; int* rdispls; MPI_Alltoallv (sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm); // L145--147 MPI_Op op; MPI_Reduce (sendbuf, recvbuf, count, datatype, op, root, comm); // L149 #if 0 MPI_User_function function; int commute; MPI_Op_create (function, commute, &op); // L153 #endif MPI_Op_free (&op); // L155 MPI_Allreduce (sendbuf, recvbuf, count, datatype, op, comm); MPI_Reduce_scatter (sendbuf, recvbuf, recvcounts, datatype, op, comm); MPI_Scan (sendbuf, recvbuf, count, datatype, op, comm); /* === Groups, contexts, and communicators === */ MPI_Group group; MPI_Group_size (group, &size); // L162 int rank; MPI_Group_rank (group, &rank); // L164 MPI_Group group1; int n; int* ranks1; MPI_Group group2; int* ranks2; MPI_Group_translate_ranks (group1, n, ranks1, group2, ranks2); // L170 int result; MPI_Group_compare (group1, group2, &result); // L172 MPI_Group newgroup; MPI_Group_union (group1, group2, &newgroup); // L174 MPI_Group_intersection (group1, group2, &newgroup); MPI_Group_difference (group1, group2, &newgroup); int* ranks; MPI_Group_incl (group, n, ranks, &newgroup); // L178 MPI_Group_excl (group, n, ranks, &newgroup); extern int ranges[][3]; MPI_Group_range_incl (group, n, ranges, &newgroup); // L181 MPI_Group_range_excl (group, n, ranges, &newgroup); MPI_Group_free (&group); MPI_Comm_size (comm, &size); MPI_Comm_rank (comm, &rank); MPI_Comm comm1; MPI_Comm comm2; MPI_Comm_compare (comm1, comm2, &result); MPI_Comm newcomm; MPI_Comm_dup (comm, &newcomm); MPI_Comm_create (comm, group, &newcomm); int color; int key; MPI_Comm_split (comm, color, key, &newcomm); // L194 MPI_Comm_free (&comm); MPI_Comm_test_inter (comm, &flag); MPI_Comm_remote_size (comm, &size); MPI_Comm_remote_group (comm, &group); MPI_Comm local_comm; int local_leader; MPI_Comm peer_comm; int remote_leader; MPI_Comm newintercomm; MPI_Intercomm_create (local_comm, local_leader, peer_comm, remote_leader, tag, &newintercomm); // L204--205 MPI_Comm intercomm; MPI_Comm newintracomm; int high; MPI_Intercomm_merge (intercomm, high, &newintracomm); // L209 int keyval; #if 0 MPI_Copy_function copy_fn; MPI_Delete_function delete_fn; void* extra_state; MPI_Keyval_create (copy_fn, delete_fn, &keyval, extra_state); // L215 #endif MPI_Keyval_free (&keyval); // L217 void* attribute_val; MPI_Attr_put (comm, keyval, attribute_val); // L219 MPI_Attr_get (comm, keyval, attribute_val, &flag); MPI_Attr_delete (comm, keyval); /* === Environmental inquiry === */ char* name; int resultlen; MPI_Get_processor_name (name, &resultlen); // L226 MPI_Errhandler errhandler; #if 0 MPI_Handler_function function; MPI_Errhandler_create (function, &errhandler); // L230 #endif MPI_Errhandler_set (comm, errhandler); // L232 MPI_Errhandler_get (comm, &errhandler); MPI_Errhandler_free (&errhandler); int errorcode; char* string; MPI_Error_string (errorcode, string, &resultlen); // L237 int errorclass; MPI_Error_class (errorcode, &errorclass); // L239 MPI_Wtime (); MPI_Wtick (); int argc; char** argv; MPI_Init (&argc, &argv); // L244 MPI_Finalize (); MPI_Initialized (&flag); MPI_Abort (comm, errorcode); }
int main(int argc, char *argv[]) { int rank, nprocs, x, y; Window win; // initialization for a window GC gc; // graphics context Display *display = NULL; Colormap screenColourmap; XColor colours[15]; unsigned int width = X_RESN, height = Y_RESN; /* window size */ clock_t start, end, elapsed; int pixelToDraw[3]; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); ComplexNumber c; ComplexNumber z; int depth = 1000; int cSize = 4; double cX = -2; double cY = -2; double cMinR = cX; double cMinI = cY; double cMaxR = cMinR + cSize; double cMaxI = cMinI + cSize; double increaseX = (cMaxR - cMinR) / X_RESN; double increaseY = (cMaxI - cMinI) / Y_RESN; int drawable[X_RESN][Y_RESN]; if(rank==0) { display = x11setup(&win, &gc, width, height); screenColourmap = DefaultColormap(display, DefaultScreen(display)); setupColours(display, colours, screenColourmap); int probeFlag = 0; MPI_Status status; int count = 0; int start = 1; for (int i = 1; i < nprocs; i++) { MPI_Send(&start, 1, MPI_INT, i, 98, MPI_COMM_WORLD); } for (int y = 0; y < Y_RESN; y++) { for (int x = 0; x < X_RESN; x++) { while (!probeFlag) { MPI_Iprobe(MPI_ANY_SOURCE, 99, MPI_COMM_WORLD, &probeFlag, &status); if (probeFlag) { int coords[2] = {x, y}; int slave = status.MPI_SOURCE; MPI_Recv(&pixelToDraw, 3, MPI_INT, MPI_ANY_SOURCE, 99, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Send(&coords, 2, MPI_INT, slave, 99, MPI_COMM_WORLD); drawable[pixelToDraw[0]][pixelToDraw[1]] = pixelToDraw[2]; count++; } else { MPI_Iprobe(MPI_ANY_SOURCE, 98, MPI_COMM_WORLD, &probeFlag, &status); if (probeFlag) { int slave = status.MPI_SOURCE; int coords[2] = {x, y}; MPI_Recv(&start, 1, MPI_INT, MPI_ANY_SOURCE, 98, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Send(&coords, 2, MPI_INT, slave, 99, MPI_COMM_WORLD); } } } probeFlag = 0; } } MPI_Recv(&pixelToDraw, 3, MPI_INT, MPI_ANY_SOURCE, 99, MPI_COMM_WORLD, MPI_STATUS_IGNORE); drawable[pixelToDraw[0]][pixelToDraw[1]] = pixelToDraw[2]; for (int y = 0; y < Y_RESN; y++) { for (int x = 0; x < X_RESN; x++) { drawPoint(display, win, gc, colours, x, y, depth, drawable[x][y]); } } } else { int flag = 1; int coords[2]; MPI_Recv(&start, 1, MPI_INT, 0, 98, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Send(&start, 1, MPI_INT, 0, 98, MPI_COMM_WORLD); while (flag) { MPI_Recv(&coords, 2, MPI_INT, 0, 99, MPI_COMM_WORLD, MPI_STATUS_IGNORE); c.r = coords[0] * increaseX - 2; c.i = coords[1] * increaseY - 2; resetComplexNumber(&z); int limit = seriesDiverges(depth, &z, &c); pixelToDraw[0] = coords[0]; pixelToDraw[1] = coords[1]; pixelToDraw[2] = limit; MPI_Send(&pixelToDraw, 3, MPI_INT, 0, 99, MPI_COMM_WORLD); } } // main loop int running = 1; // loop variable start = clock(); while(running) { // checks to see if there have been any events, // will exit the main loop if any key is pressed if(rank==0) { if(XPending(display)) { XEvent ev; XNextEvent(display, &ev); switch(ev.type) { case KeyPress: running = 0; break; } } } end = clock(); elapsed = end - start; // only update the display if > 1 millisecond has passed since the last update if(elapsed / (CLOCKS_PER_SEC/1000) > 1 && rank==0) { //XClearWindow(display, win); start = end; XFlush(display); } } if(rank==0 && display) { XCloseDisplay(display); // close the display window printf("Display closed\n"); } else { MPI_Finalize(); return 0; } MPI_Finalize(); return 0; }
void manager_initialize(WSET *ws, int NUM_GROUP, int stages, int numbprocs, char filename[50]) { int i,slv,eqnid=0,eqnpack[NUM_GROUP],new_eqnpack[NUM_GROUP], quotient,remain,eqnsize,flag,quit_collect=0,cnt_recv; MPI_Status status; /* Initialization phase: sending eqn. index to workers */ if(NUM_GROUP<=numbprocs-1) /* worker with id<=NUM_GROUP will get one eqn. */ { if(v>3) printf("NUM_GROUP<=numbprocs-1\n"); for(slv=1;slv<=NUM_GROUP;slv++) { eqnid = slv; MPI_Send(&eqnid,1,MPI_INT,slv,EQN_TAG,MPI_COMM_WORLD); if(v>3 ) printf("manager sent eqn. index %d to %d\n", eqnid, slv); } } else /* NUM_GROUP>numbprocs-1 */ { if(v>3) printf("NUM_GROUP>numbprocs-1\n"); quotient = NUM_GROUP/(numbprocs-1); remain = (int)fmod(NUM_GROUP,numbprocs-1); if(v>3) printf("quotient=%d,remain=%d\n",quotient,remain); eqnid = 1; for(slv=1;slv<=remain;slv++) { for(i=1;i<=quotient+1;i++) { MPI_Send(&eqnid,1,MPI_INT,slv,EQN_TAG,MPI_COMM_WORLD); eqnid++; } } for(slv=remain+1;slv<=numbprocs-1;slv++) { for(i=1;i<=quotient;i++) { MPI_Send(&eqnid,1,MPI_INT,slv,EQN_TAG,MPI_COMM_WORLD); eqnid++; } } if(v>3) printf("the last eqnid=%d\n", eqnid-1); assert((eqnid-1)<=NUM_GROUP); } /* send 0 to everyone */ if(v>3) printf("manager sent 0 to everyone.\n"); eqnid = 0; for(slv=1;slv<=numbprocs-1;slv++) MPI_Send(&eqnid,1,MPI_INT,slv,EQN_TAG,MPI_COMM_WORLD); /* initial collect */ cnt_recv = 0; while(!quit_collect) { MPI_Iprobe(MPI_ANY_SOURCE,EQN_TAG,MPI_COMM_WORLD,&flag,&status); while(flag) /* while flag -- pending recv */ { slv = status.MPI_SOURCE; /* which slave sent this JOB back */ MPI_Get_count(&status, MPI_INT, &eqnsize); MPI_Recv(new_eqnpack,eqnsize,MPI_INT,slv,EQN_TAG,MPI_COMM_WORLD,&status); if(v>3) { printf("manager recv returned eqn index from node %d with eqnsize=%d\n", slv, eqnsize); fflush; for(i=0;i<eqnsize;i++) printf("%d\n", new_eqnpack[i]); fflush; } cnt_recv++; if(cnt_recv==numbprocs-1) /* all workers have sent back */ { quit_collect=1; break;} MPI_Iprobe(MPI_ANY_SOURCE,EQN_TAG,MPI_COMM_WORLD,&flag,&status); } } printf("Initialization phase is DONE!\n"); fflush; /* End of Initialization phase */ /* mainloop initialization */ mainloop_initialize(ws, NUM_GROUP, stages, numbprocs, filename); }
void newflow_inbox(file_object_vector &inbox_file_objects, SeqGraph *resident_sgraph, SplitBuckets *buckets, uintptr_t &total_bucket_nodes) { #ifdef VELOUR_TBB tbb::tick_count::interval_t time_inboxload, time_relatedcomponents, time_insertnodes; tbb::tick_count::interval_t time_removetips, time_concatenate; tbb::tick_count::interval_t time_slicing, time_splitting, time_cleanup; #endif if (!inbox_file_objects.empty()) { // no inbox for the first partition // incrementally load components from the inbox files for (file_object_vector::iterator itr=inbox_file_objects.begin() ; itr != inbox_file_objects.end() ; ++itr) { if (itr->filetype != BUCKET) { fprintf(stderr, "ERROR: Cannot use %s file %s as flow input. Exiting...\n", FILE_TYPES[itr->filetype], itr->filename); exit(EXIT_FAILURE); } #ifdef VELOUR_MPI unsigned inbox_partition = itr->fileindex; #endif // VELOUR_MPI printf(" flowing inbox: %s\n", itr->filename); fflush(stdout); uintptr_t round = 1; size_t file_offset = 0; #ifdef VELOUR_MPI int producer_finished = 0; size_t safe_length = 0; do { //if (safe_length < p__MPI_SAFE_LENGTH_ARRAY[inbox_partition]) // printf("DBG: Partition %u read new safe_length %lli\n", g__PARTITION_INDEX, safe_length); fflush(stdout); size_t new_safe_length = * static_cast<volatile size_t *>(&p__MPI_SAFE_LENGTH_ARRAY[inbox_partition]); if (new_safe_length == safe_length) { MPI_Status mpi_status; MPI_Iprobe(PART_TO_RANK(inbox_partition), 42, MPI_COMM_WORLD, &producer_finished, &mpi_status); // TODO: do MPI_Recv() on the message() continue; } else { safe_length = new_safe_length; } #endif // VELOUR_MPI int filedes = open(itr->filename, O_RDONLY); if (filedes == -1) { fprintf(stderr, "ERROR: failed to open file: %s\n", itr->filename); perror("REASON: "); exit(EXIT_FAILURE); } struct stat file_stat; if (fstat(filedes, &file_stat) != 0) { fprintf(stderr,"ERROR: failed to stat file: %s\n", itr->filename); exit(EXIT_FAILURE); } size_t file_length = file_stat.st_size; //itr->length = file_stat.st_size; #ifdef VELOUR_MPI file_length = min(file_length, safe_length); #endif if (file_length == 0) { // otherwise, mmap will fail if (close(filedes) == -1) { fprintf(stderr, "ERROR: failed to close file: %s\n", itr->filename); perror("REASON: "); exit(EXIT_FAILURE); } #ifdef VELOUR_MPI MPI_Status mpi_status; MPI_Iprobe(PART_TO_RANK(inbox_partition), 42, MPI_COMM_WORLD, &producer_finished, &mpi_status); if (producer_finished && safe_length == * static_cast<volatile size_t *>(&p__MPI_SAFE_LENGTH_ARRAY[inbox_partition])) { // TODO: do MPI_Recv() on the message() //printf("DBG: Partition %u producer finished break\n", g__PARTITION_INDEX); fflush(stdout); break; // break out of do loop to start next file } else { //printf("DBG: Partition %u not finished continue\n", g__PARTITION_INDEX); fflush(stdout); continue; // continue do loop } #else continue; // continue to start next file #endif // VELOUR_MPI } char *file_mmap = static_cast<char *>( mmap( 0, file_length, PROT_READ, MAP_PRIVATE, filedes, 0) ); if (file_mmap == reinterpret_cast<char *>(-1)) { fprintf(stderr, "ERROR: failed to mmap file: %s\n", itr->filename); perror("REASON: "); exit(EXIT_FAILURE); } flow_nodelist_t flowlist; #ifdef VELOUR_TBB flowlist.reserve(1000000); // XXX: constant, should relate to value below #endif while (file_offset < file_length) { uintptr_t round_inbox_nodes = 0; uintptr_t round_import_nodes = 0; #ifdef VELOUR_TBB tbb::tick_count time0 = tbb::tick_count::now(); #endif SeqNode *inbox_chain = NULL; uintptr_t inbox_chain_length = 0; uintptr_t min_inbox_chain_length = max(100000, resident_sgraph->node_count >> 5); // about 3% // // CHOOSE WHICH MODE: PERFORMANCE or MINIMUM PEAK FOOTPRINT MODE FOR PUBLICATIONS // uintptr_t max_inbox_chain_length; if (g__MINIMIZE_FOOTPRINT) { max_inbox_chain_length = min_inbox_chain_length; } else { max_inbox_chain_length = max(min_inbox_chain_length, (g__NODE_ALLOCATORS->GetMaxSafeAllocation() / 100)); // XXX: max average seqnode size constant } while (file_offset < file_length && inbox_chain_length < max_inbox_chain_length) { size_t component_size; uintptr_t component_length = sg_load_mmap_stream_component((file_mmap+file_offset), &component_size, &inbox_chain); file_offset += component_size; inbox_chain_length += component_length; } total_bucket_nodes += inbox_chain_length; round_inbox_nodes += inbox_chain_length; p__peakSeqNodes = max(p__peakSeqNodes, resident_sgraph->node_count + inbox_chain_length); p__peakSeqLiveMemory = max(p__peakSeqLiveMemory, g__SEQNODE_ALLOCATOR->GetSeqLiveMemory()); #ifdef VELOUR_TBB tbb::tick_count time1 = tbb::tick_count::now(); #endif #ifdef VELOUR_TBB uintptr_t nodes_memoized = sg_parallel_chain_memoize_related_components(inbox_chain, resident_sgraph, &flowlist); //uintptr_t nodes_memoized = sg_serial_chain_memoize_related_components(inbox_chain, resident_sgraph, &flowlist); #else uintptr_t nodes_memoized = sg_serial_chain_memoize_related_components(inbox_chain, resident_sgraph, &flowlist); #endif round_import_nodes += nodes_memoized; #ifdef VELOUR_TBB tbb::tick_count time2 = tbb::tick_count::now(); #endif while (inbox_chain != NULL) { SeqNode *node = inbox_chain; inbox_chain = inbox_chain->head_next; node->head_next = NULL; // XXX flowlist.push_back(node); resident_sgraph->insertNodeAndUpdateColors(node); } #ifdef VELOUR_TBB tbb::tick_count time3 = tbb::tick_count::now(); time_inboxload += time1 - time0; time_relatedcomponents += time2 - time1; time_insertnodes += time3 - time2; #endif //printf("(flow) inbox component nodes: %"PRIuPTR"\n", round_inbox_nodes); // XXX: remove me //printf("(flow) inbox component related nodes: %"PRIuPTR"\n", round_import_nodes); fflush(stdout); // XXX: remove me #ifdef VERIFY resident_sgraph->verify(true); #endif #ifdef VELOUR_TBB tbb::tick_count time4 = tbb::tick_count::now(); #endif #ifdef VELOUR_TBB //TODO sg_parallel_nodelist_remove_tips(resident_sgraph, true, &flowlist); sg_nodelist_remove_tips(resident_sgraph, true, &flowlist); #else sg_nodelist_remove_tips(resident_sgraph, true, &flowlist); #endif #ifdef VELOUR_TBB tbb::tick_count time5 = tbb::tick_count::now(); #endif #ifdef VERIFY resident_sgraph->verify(true); #endif //sg_concatenate(resident_sgraph, true); sg_nodelist_concatenate(resident_sgraph, true, &flowlist); #ifdef VELOUR_TBB tbb::tick_count time6 = tbb::tick_count::now(); #endif #ifdef VERIFY resident_sgraph->verify(true); #endif // FIXME: execution time is counted with concatenation //sg_nodelist_pop_bubbles(resident_sgraph, true, &flowlist); #ifdef VELOUR_TBB //tbb::tick_count time6 = tbb::tick_count::now(); #endif #ifdef VERIFY //resident_sgraph->verify(true); #endif // emit sub-components that are no longer relevant //slice2_graph(resident_sgraph, g__PARTITION_INDEX); if (g__SLICING) slice2_nodelist(resident_sgraph, g__PARTITION_INDEX, &flowlist); #ifdef VELOUR_TBB tbb::tick_count time7 = tbb::tick_count::now(); #endif // emit components that are no longer relevant to the working graph //buckets->split(resident_sgraph); buckets->split_nodelist(resident_sgraph, &flowlist); #ifdef VELOUR_TBB tbb::tick_count time8 = tbb::tick_count::now(); #endif // lastly, reset claim tid on nodes for future iterations for (flow_nodelist_t::iterator it = flowlist.begin(); it != flowlist.end() ; ++it) { SeqNode *node = *it; //assert( node->claim_tid == 1 ); // don't assert this as not true for grown nodes and for inbox nodes node->claim_tid = 0; } flowlist.clear(); #ifdef VELOUR_TBB tbb::tick_count time9 = tbb::tick_count::now(); time_removetips += time5 - time4; time_concatenate += time6 - time5; time_slicing += time7 - time6; time_splitting += time8 - time7; time_cleanup += time9 - time8; #endif // initiate garbage collection if needed if (g__NODE_ALLOCATORS->flag_gc_needed_) { g__NODE_ALLOCATORS->GarbageCollect(); } ++ round; } if (munmap(file_mmap, file_length) == -1) { fprintf(stderr, "ERROR: failed to munmap file: %s\n", itr->filename); perror("REASON: "); exit(EXIT_FAILURE); } if (close(filedes) == -1) { fprintf(stderr, "ERROR: failed to close file: %s\n", itr->filename); perror("REASON: "); exit(EXIT_FAILURE); } #ifdef VELOUR_MPI checkdone: MPI_Status mpi_status; MPI_Iprobe(PART_TO_RANK(inbox_partition), 42, MPI_COMM_WORLD, &producer_finished, &mpi_status); // TODO: do MPI_Recv() on the message() } while (!producer_finished || safe_length != * static_cast<volatile size_t *>(&p__MPI_SAFE_LENGTH_ARRAY[inbox_partition])); #endif // VELOUR_MPI } /*if( g__FULL_STATISTICS ) { sg_stat_components(resident_sgraph, stdout); }*/ // last, split the resident graph to outboxes // FIXME: should this be empty!? printf("FLOW: %zu nodes (FIXME maybe dead) in resident graph at end of flowing.... slicing and splitting resident graph.\n", resident_sgraph->node_count); fflush(stdout); // XXX: HACK if (g__SLICING) { slice2_graph(resident_sgraph, g__PARTITION_INDEX); } // XXX: HACK HACK HACK buckets->split(resident_sgraph); //assert( resident_sgraph->node_count == 0 ); } #ifdef VELOUR_TBB tbb::tick_count::interval_t time_total = time_inboxload + time_relatedcomponents + time_insertnodes + time_removetips + time_concatenate + time_slicing + time_splitting + time_cleanup; printf(" flow time: %lfs inbox: %02.1lf%% related: %02.1lf%% insert: %02.1lf%%" " remove: %02.1lf%% concat: %02.1lf%% slice: %02.1lf%% split: %02.1lf%%" " clean: %02.1lf%%\n", time_total.seconds(), 100.0 * (time_inboxload.seconds() / time_total.seconds()), 100.0 * (time_relatedcomponents.seconds() / time_total.seconds()), 100.0 * (time_insertnodes.seconds() / time_total.seconds()), 100.0 * (time_removetips.seconds() / time_total.seconds()), 100.0 * (time_concatenate.seconds() / time_total.seconds()), 100.0 * (time_slicing.seconds() / time_total.seconds()), 100.0 * (time_splitting.seconds() / time_total.seconds()), 100.0 * (time_cleanup.seconds() / time_total.seconds()) ); fflush(stdout); #endif }
void slave_initialize(int id, int NUM_GROUP, char filename[50]) { int flag,*buffer,*newbuffer,count,i,eqn_1,dim_1,solution_1,eqn_2, cnt_eqns,eqnid,eqn_pack[NUM_GROUP],quit_flag=0, fail,k,n; char outfile[80]; MPI_Status status; if(v>2) printf("Node %d knows the filename: %s\n",id, filename);fflush; fail = read_named_target_without_solutions ((int) strlen(filename),filename); fail = copy_target_system_to_container(); cnt_eqns = 0; while(!quit_flag) { MPI_Iprobe(0,MPI_ANY_TAG,MPI_COMM_WORLD,&flag,&status); while(flag) /* while flag -- pending recv */ { if(status.MPI_TAG==EQN_TAG) { MPI_Recv(&eqnid,1,MPI_INT,0,EQN_TAG,MPI_COMM_WORLD,&status); if(eqnid!=0) { eqn_pack[cnt_eqns] = eqnid; cnt_eqns++; if(v>2) printf("node %d solving eqn. %d; cnt_eqns=%d \n", id, eqnid, cnt_eqns); fflush; /* solving eqn. */ sprintf(outfile, "%s_%d", filename, eqnid ); n = (int) strlen(outfile); fail = hypersurface_witness_set(eqnid,n,outfile); /* end of solving eqn. */ } else { if(v>2) printf("node %d recv end signal and send back: \n", id); /* send the index of the eqns for which the worker has computed back */ eqn_pack[cnt_eqns] = 0; /* the last number is 0 */ cnt_eqns++; if(v>2) { printf("cnt_eqns=%d\n", cnt_eqns); for(i=0;i<cnt_eqns;i++) printf("%d\n", eqn_pack[i]); } MPI_Send(eqn_pack,cnt_eqns,MPI_INT,0,EQN_TAG,MPI_COMM_WORLD); /* clean the system and solution container */ fail = solcon_clear_standard_solutions( ); if(fail>0) printf("fail to clear solution container.\n"); fail = syscon_clear_standard_system( ); if(fail>0) printf("fail to clear system container.\n"); /* end of cleaning the system and solution container */ quit_flag =1; } fflush; } /* status.MPI_TAG */ MPI_Iprobe(0,MPI_ANY_TAG,MPI_COMM_WORLD,&flag,&status); } /* flag */ } /* quit_flag */ }
int wc_mpi_iprobe(int src_id, int tag, int *flag, wc_mpistatus_t *status) { int rc = MPI_Iprobe(src_id, tag, MPI_COMM_WORLD, flag, status); WC_HANDLE_MPI_ERROR(MPI_Iprobe, rc); return (rc == MPI_SUCCESS) ? 0 : -1; }
void send_collect(WSET *ws, int num_group, int stages, int cnt_stage, int numbprocs, int cnt_step, IDLE_ELEMENT **ie, LISTELEMENT **listpointer, int *TaskCount, int NUM_GROUP, int *cnt_index, int update_flag, int n1, int n2, int cd) { int slv,flght,flag,*buffer,*package,count,cnt_count, idle_procs[numbprocs],idle_pos,cnt_idle, r,c,s_max,s_min, deg,n=cd,m[2],send[3],label_sol,fail,i=0,j=0; double sol1[2*n1+5], sol2[2*n2+5], ps[2*cd+5]; WSET *ws_p; JOB *temp_j; MPI_Status status; if(v>3) printf("inside of send_collect\n"); fflush; temp_j = (JOB *)((*listpointer)->dataitem); count = temp_j->num_eqns_1 + temp_j->num_eqns_2 + 8; if(cnt_step==2 && update_flag) { package = (int*) malloc(count*sizeof(int)); pack(count, temp_j, package); } cnt_idle = num_idle(*ie); /* Initial JOB distribution */ while(*ie != NULL) /* there are idle processors */ { if(*listpointer!=NULL) /* JOB queue is not empty */ { /* pop one JOB from the queue */ *listpointer = removeitem (*listpointer); slv = (*ie)->data; *ie = removeslv(*ie); /* obtain start solution & send */ if(v>3) printf("sent a job & path to node %d.\n",slv); if(cnt_step==1) { if(v>3) fail = get_next_start_product (&i,&j,1,temp_j->num_vars_1,temp_j->num_vars_2, temp_j->dim_1,temp_j->dim_2, temp_j->deg_1,temp_j->deg_2,cd,sol1,sol2,ps); else fail = get_next_start_product (&i,&j,0,temp_j->num_vars_1,temp_j->num_vars_2, temp_j->dim_1,temp_j->dim_2, temp_j->deg_1,temp_j->deg_2,cd,sol1,sol2,ps); m[0] = n; m[1] = 1; send[0] = slv; send[1] = m[1]; send[2] = m[0]; MPI_Send(send,3,MPI_INT,slv,SEND_SMUL,MPI_COMM_WORLD); MPI_Send(ps,2*n+5,MPI_DOUBLE,slv,SEND_SSOL,MPI_COMM_WORLD); } if(cnt_step==2) { fail = solcon_read_next_solution(n,&m[1],ps); if(fail>0) printf("solcon_read_next_solution fail!\n"); m[0] = n; send[0] = slv; send[1] = m[1]; send[2] = m[0]; MPI_Send(send,3,MPI_INT,slv,SEND_SMUL,MPI_COMM_WORLD); MPI_Send(ps,2*n+5,MPI_DOUBLE,slv,SEND_SSOL,MPI_COMM_WORLD); } /* end of obtaining start solution & sending */ *(TaskCount+slv-1)=*(TaskCount+slv-1)+1; } else break; } flght = cnt_idle - num_idle(*ie); flag = 0; while(flght>0 || *listpointer!=NULL) /* JOB queue loop */ { if(flght>0) { MPI_Iprobe(MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&flag,&status); while(flag) /* while flag -- pending recv */ { if(v>3) printf("manager starting recv... \n"); slv = status.MPI_SOURCE; /* which slave sent this JOB back */ /* recv end solution */ MPI_Recv(ps,2*n+7,MPI_DOUBLE,MPI_ANY_SOURCE,SEND_TSOL, MPI_COMM_WORLD,&status); m[1] = (int) ps[2*n+5]; label_sol = (int) ps[2*n+6] - 1; fail = solcon_write_next_solution_to_defined_output_file (&label_sol,n,m[1],ps); /* end of recv. end solution */ /* update idle processor list */ *ie = addslv(*ie, slv); /* update corresponding cell when cnt_step==2 && update_flag is TRUE */ if(cnt_step==2 && update_flag) { c = cnt_stage; r = *cnt_index; ws_p = ws+r*(stages+1)+c; if(ws_p->count==0) /* first returned JOB for current witness set */ { printf("update num_eqns, source, dim, deg\n"); ws_p->num_eqns = package[0]; ws_p->source = (int*) malloc(package[0]*sizeof(int)); deg = 1; for(cnt_count=0;cnt_count<package[0];cnt_count++) { ws_p->source[cnt_count] = package[cnt_count+1]; printf("ws[%d][%d].source[%d]=%d\n",r,c,cnt_count,ws_p->source[cnt_count]); deg = (ws+(package[cnt_count+1]-1)*(stages+1))->deg*deg; } ws_p->dim = package[package[0]+1]; /*ws_p->sols = (int*) malloc(deg*sizeof(int));*/ ws_p->deg = deg; } /*ws_p->sols[ws_p->count]=ws_p->count+1; */ ws_p->count++; if(ws_p->count==ws_p->deg) /* this witness set is complete */ { if(ws_p->num_eqns==NUM_GROUP) { printf("\nrecord [%d][%d]\n", r,c); printf("ALL DONE! aha........\n"); print_ws(ws_p); } } /* if: count==deg */ } /* cnt_step == 2 && update_flag */ if(*listpointer!=NULL) /* JOB queue is not empty */ { /* pop one JOB from the queue */ *listpointer = removeitem (*listpointer); slv = (*ie)->data; *ie = removeslv(*ie); if(v>3) printf("sending a job & path to node %d.\n",slv); /* obtain start solution & send */ if(cnt_step==1) { if(v>3) fail = get_next_start_product (&i,&j,1,temp_j->num_vars_1,temp_j->num_vars_2, temp_j->dim_1,temp_j->dim_2, temp_j->deg_1,temp_j->deg_2,cd,sol1,sol2,ps); else fail = get_next_start_product (&i,&j,0,temp_j->num_vars_1,temp_j->num_vars_2, temp_j->dim_1,temp_j->dim_2, temp_j->deg_1,temp_j->deg_2,cd,sol1,sol2,ps); m[0] = n; m[1] = 1; send[0] = slv; send[1] = m[1]; send[2] = m[0]; MPI_Send(send,3,MPI_INT,slv,SEND_SMUL,MPI_COMM_WORLD); MPI_Send(ps,2*n+5,MPI_DOUBLE,slv,SEND_SSOL,MPI_COMM_WORLD); } if(cnt_step==2) { fail = solcon_read_next_solution(n,&m[1],ps); m[0] = n; send[0] = slv; send[1] = m[1]; send[2] = m[0]; MPI_Send(send,3,MPI_INT,slv,SEND_SMUL,MPI_COMM_WORLD); MPI_Send(ps,2*n+5,MPI_DOUBLE,slv,SEND_SSOL,MPI_COMM_WORLD); } /* end of obtaining start solution & sending */ *(TaskCount+slv-1)=*(TaskCount+slv-1)+1; } else /* JOB queue is empty */ { flght=flght-1; /* one in-flight task less */ if(v>3) {printf ("Job queue empty!\n"); printf("flght=%d\n",flght); } } MPI_Iprobe(MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&flag,&status); } /* while flag */ } /* if flght */ } /* while flght listpointer */ /* send termination to all workers */ if(v>3) printf("\nmanager sent termination\n"); for(slv=1;slv<=numbprocs-1;slv++) { count = -1; MPI_Send(&count,1,MPI_INT,slv,COUNT_TAG,MPI_COMM_WORLD); } }
int main(int argc, char **argv) { int rank, size, i; int root = 0; int hits = 0; // index used for 'hits' int total = 1; // index used for 'total' int msg_waiting = 0; double results[2] = {0}; MPI_Init(&argc, &argv); MPI_Comm comm = MPI_COMM_WORLD; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); MPI_Status status; MPI_Request request; // is root process if(rank == root) { double area; double total_hits = 0; double total_pokes = 0; while (1) { // check each slave process for results (non-blocking) for (i = 1; i < size; i++) { MPI_Iprobe(i, 0, comm, &msg_waiting, &status); // if slave process is sending results if (msg_waiting) { MPI_Recv(&results, 2, MPI_DOUBLE, i, 0, comm, &status); total_hits += results[hits]; total_pokes += results[total]; } } if (total_pokes >= 15000000000) { area = (total_hits / total_pokes) * 4; printf("Area=%.12lf\n", area); // send terminating message to each slave process for (i = 1; i < size; i++) { MPI_Isend(&area, 1, MPI_DOUBLE, i, 0, comm, &request); } break; } } // is slave process } else { int cpu_count = get_cpu_count(); double shared_results[cpu_count * 2]; double l_hits = 0; double l_total = 0; pthread_t threads[cpu_count]; t_data thread_data[cpu_count]; for (i = 0; i < cpu_count; i++) { thread_data[i].id = i; thread_data[i].rank = rank; thread_data[i].results = shared_results; pthread_create(&threads[i], NULL, &throw_darts, &thread_data[i]); } // periodically reads results from shared memory; sends to root process while(1) { sleep(3); // first checks for termination flag from root process MPI_Iprobe(root, 0, comm, &msg_waiting, &status); if (msg_waiting) { // terminate threads for (i = 0; i < cpu_count; i++) { pthread_cancel(threads[i]); } break; } else { results[hits] = 0; results[total] = 0; for (i = 0; i < cpu_count; i++) { results[hits] += shared_results[i * 2]; results[total] += shared_results[i * 2 + 1]; } results[hits] -= l_hits; results[total] -= l_total; l_hits += results[hits]; l_total += results[total]; // send results to root process MPI_Isend(&results, 2, MPI_DOUBLE, root, 0, comm, &request); } } } MPI_Finalize(); return 0; }
int main( int argc, char **argv) { int rank, size, i, recv_flag, ret, passed; MPI_Status Status; char message[17]; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (rank == 0) { Test_Init("barrier", rank); /* Receive the startup messages from each of the other clients */ for (i = 0; i < size - 1; i++) { MPI_Recv(message, 17, MPI_CHAR, MPI_ANY_SOURCE, 2000, MPI_COMM_WORLD, &Status); } /* Now use Iprobe to make sure no more messages arive for a while */ passed = 1; for (i = 0; i < WAIT_TIMES; i++){ recv_flag = 0; MPI_Iprobe(MPI_ANY_SOURCE, 2000, MPI_COMM_WORLD, &recv_flag, &Status); if (recv_flag) passed = 0; } if (passed) Test_Passed("Barrier Test 1"); else Test_Failed("Barrier Test 1"); /* Now go into the barrier myself */ MPI_Barrier(MPI_COMM_WORLD); /* And get everyones message who came out */ for (i = 0; i < size - 1; i++) { MPI_Recv(message, 13, MPI_CHAR, MPI_ANY_SOURCE, 2000, MPI_COMM_WORLD, &Status); } /* Now use Iprobe to make sure no more messages arive for a while */ passed = 1; for (i = 0; i < WAIT_TIMES; i++){ recv_flag = 0; MPI_Iprobe(MPI_ANY_SOURCE, 2000, MPI_COMM_WORLD, &recv_flag, &Status); if (recv_flag) passed = 0; } if (passed) Test_Passed("Barrier Test 2"); else Test_Failed("Barrier Test 2"); Test_Waitforall( ); ret = Summarize_Test_Results(); Test_Finalize(); MPI_Finalize(); return ret; } else { MPI_Send("Entering Barrier", 17, MPI_CHAR, 0, 2000, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); MPI_Send("Past Barrier", 13, MPI_CHAR, 0, 2000, MPI_COMM_WORLD); Test_Waitforall( ); MPI_Finalize(); return 0; } }
void frog_spin(int rank, int size, int nfrog, long * rng_state, MPI_Comm comm) { list frogs = NULL; int i; int start; int end; int chunk; int msg; chunk = nfrog/(size - 3); start = chunk * (rank - 3); end = start + chunk; if ( rank == size - 1 ) { end = nfrog; } /* Build initial list of frogs */ for ( i = start; i < end; i++ ) { frog f = make_frog(0,0); frogHop(0, 0, &f->x, &f->y, rng_state); /* random starting position */ push(f, &frogs); } i = 0; /* Loop as for cell list, looking for interrupts. To ensure we * don't get deadlocks, lots of magic happens in update_frog_list */ do { MPI_Status status; int flag; msg = -1; update_frog_list(&frogs, rng_state, comm); MPI_Iprobe(0, CONTROL_TAG, comm, &flag, &status); if ( flag ) { MPI_Recv(&msg, 1, MPI_INT, status.MPI_SOURCE, status.MPI_TAG, comm, &status); } if ( msg == YEAR_END ) { int nfrogs[2] = {0,0}; list it = frogs; /* Calculate global number of frogs (and diseased * frogs) */ while ( it ) { nfrogs[0]++; nfrogs[1] += ((frog)(it->data))->diseased; it = it->next; } /* Should do this by building a sub-communicator and doing * a reduction, but that's hard work */ if ( rank == 3 ) { int tmp[2]; int j; for ( j = 4; j < size; j++ ) { MPI_Recv(&tmp, 2, MPI_INT, j, FROG_REDUCTION, comm, &status); nfrogs[0] += tmp[0]; nfrogs[1] += tmp[1]; } printf("There are %d frogs (%d infected) in year %d\n", nfrogs[0], nfrogs[1], i++); } else { MPI_Send(&nfrogs, 2, MPI_INT, 3, FROG_REDUCTION, comm); } } } while ( msg != SIMULATION_END ) ; delete_list(&frogs, &delete_frog); }
static void test_pair (void) { int prev, next, count, tag, index, i, outcount, indices[2]; int rank, size, flag, ierr, reqcount; double send_buf[TEST_SIZE], recv_buf[TEST_SIZE]; double buffered_send_buf[TEST_SIZE * 2 + MPI_BSEND_OVERHEAD]; /* factor of two is based on guessing - only dynamic allocation would be safe */ void *buffer; MPI_Status statuses[2]; MPI_Status status; MPI_Request requests[2]; MPI_Comm dupcom, intercom; #ifdef V_T struct _VT_FuncFrameHandle { char *name; int func; int frame; }; typedef struct _VT_FuncFrameHandle VT_FuncFrameHandle_t; VT_FuncFrameHandle_t normal_sends, buffered_sends, buffered_persistent_sends, ready_sends, sync_sends, nblock_sends, nblock_rsends, nblock_ssends, pers_sends, pers_rsends, pers_ssends, sendrecv, sendrecv_repl, intercomm; int classid; VT_classdef( "Application:test_pair", &classid ); #define VT_REGION_DEF( _name, _nameframe, _class ) \ (_nameframe).name=_name; \ VT_funcdef( (_nameframe).name, _class, &((_nameframe).func) ); #define VT_BEGIN_REGION( _nameframe ) \ LOCDEF(); \ VT_begin( (_nameframe).func ) #define VT_END_REGION( _nameframe ) \ LOCDEF(); VT_end( (_nameframe).func ) #else #define VT_REGION_DEF( _name, _nameframe, _class ) #define VT_BEGIN_REGION( _nameframe ) #define VT_END_REGION( _nameframe ) #endif ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); ierr = MPI_Comm_size(MPI_COMM_WORLD, &size); if ( size < 2 ) { if ( rank == 0 ) { printf("Program needs to be run on at least 2 processes.\n"); } ierr = MPI_Abort( MPI_COMM_WORLD, 66 ); } ierr = MPI_Comm_dup(MPI_COMM_WORLD, &dupcom); if ( rank >= 2 ) { /* printf( "%d Calling finalize.\n", rank ); */ ierr = MPI_Finalize( ); exit(0); } next = rank + 1; if (next >= 2) next = 0; prev = rank - 1; if (prev < 0) prev = 1; VT_REGION_DEF( "Normal_Sends", normal_sends, classid ); VT_REGION_DEF( "Buffered_Sends", buffered_sends, classid ); VT_REGION_DEF( "Buffered_Persistent_Sends", buffered_persistent_sends, classid ); VT_REGION_DEF( "Ready_Sends", ready_sends, classid ); VT_REGION_DEF( "Sync_Sends", sync_sends, classid ); VT_REGION_DEF( "nblock_Sends", nblock_sends, classid ); VT_REGION_DEF( "nblock_RSends", nblock_rsends, classid ); VT_REGION_DEF( "nblock_SSends", nblock_ssends, classid ); VT_REGION_DEF( "Pers_Sends", pers_sends, classid ); VT_REGION_DEF( "Pers_RSends", pers_rsends, classid ); VT_REGION_DEF( "Pers_SSends", pers_ssends, classid ); VT_REGION_DEF( "SendRecv", sendrecv, classid ); VT_REGION_DEF( "SendRevc_Repl", sendrecv_repl, classid ); VT_REGION_DEF( "InterComm", intercomm, classid ); /* * Normal sends */ VT_BEGIN_REGION( normal_sends ); if (rank == 0) printf ("Send\n"); tag = 0x100; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); LOCDEF(); MPI_Send(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check(recv_buf, prev, tag, count, &status, TEST_SIZE, "send and recv"); } else { LOCDEF(); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE,"send and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( normal_sends ); /* * Buffered sends */ VT_BEGIN_REGION( buffered_sends ); if (rank == 0) printf ("Buffered Send\n"); tag = 138; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); LOCDEF(); MPI_Buffer_attach(buffered_send_buf, sizeof(buffered_send_buf)); MPI_Bsend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); MPI_Buffer_detach(&buffer, &size); if(buffer != buffered_send_buf || size != sizeof(buffered_send_buf)) { printf ("[%d] Unexpected buffer returned by MPI_Buffer_detach(): %p/%d != %p/%d\n", rank, buffer, size, buffered_send_buf, (int)sizeof(buffered_send_buf)); MPI_Abort(MPI_COMM_WORLD, 201); } MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check(recv_buf, prev, tag, count, &status, TEST_SIZE, "send and recv"); } else { LOCDEF(); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE,"send and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( buffered_sends ); /* * Buffered sends */ VT_BEGIN_REGION( buffered_persistent_sends ); if (rank == 0) printf ("Buffered Persistent Send\n"); tag = 238; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); LOCDEF(); MPI_Buffer_attach(buffered_send_buf, sizeof(buffered_send_buf)); MPI_Bsend_init(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, requests); MPI_Start(requests); MPI_Wait(requests, statuses); MPI_Request_free(requests); MPI_Buffer_detach(&buffer, &size); if(buffer != buffered_send_buf || size != sizeof(buffered_send_buf)) { printf ("[%d] Unexpected buffer returned by MPI_Buffer_detach(): %p/%d != %p/%d\n", rank, buffer, size, buffered_send_buf, (int)sizeof(buffered_send_buf)); MPI_Abort(MPI_COMM_WORLD, 201); } MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check(recv_buf, prev, tag, count, &status, TEST_SIZE, "send and recv"); } else { LOCDEF(); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE,"send and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( buffered_persistent_sends ); /* * Ready sends. Note that we must insure that the receive is posted * before the rsend; this requires using Irecv. */ VT_BEGIN_REGION( ready_sends ); if (rank == 0) printf ("Rsend\n"); tag = 1456; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Recv(MPI_BOTTOM, 0, MPI_INT, next, tag, MPI_COMM_WORLD, &status); MPI_Rsend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); MPI_Probe(MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &status); if (status.MPI_SOURCE != prev) printf ("Incorrect src, expected %d, got %d\n",prev, status.MPI_SOURCE); if (status.MPI_TAG != tag) printf ("Incorrect tag, expected %d, got %d\n",tag, status.MPI_TAG); MPI_Get_count(&status, MPI_DOUBLE, &i); if (i != count) printf ("Incorrect count, expected %d, got %d\n",count,i); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "rsend and recv"); } else { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); MPI_Send( MPI_BOTTOM, 0, MPI_INT, next, tag, MPI_COMM_WORLD); MPI_Wait(requests, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "rsend and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( ready_sends ); /* * Synchronous sends */ VT_BEGIN_REGION( sync_sends ); if (rank == 0) printf ("Ssend\n"); tag = 1789; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Iprobe(MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &flag, &status); if (flag) printf ("Iprobe succeeded! source %d, tag %d\n",status.MPI_SOURCE, status.MPI_TAG); MPI_Ssend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); while (!flag) MPI_Iprobe(MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &flag, &status); if (status.MPI_SOURCE != prev) printf ("Incorrect src, expected %d, got %d\n",prev, status.MPI_SOURCE); if (status.MPI_TAG != tag) printf ("Incorrect tag, expected %d, got %d\n",tag, status.MPI_TAG); MPI_Get_count(&status, MPI_DOUBLE, &i); if (i != count) printf ("Incorrect count, expected %d, got %d\n",count,i); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "ssend and recv"); } else { MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "ssend and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Ssend(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( sync_sends ); /* * Nonblocking normal sends */ VT_BEGIN_REGION( nblock_sends ); if (rank == 0) printf ("Isend\n"); tag = 2123; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); init_test_data(send_buf,TEST_SIZE,0); MPI_Isend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, (requests+1)); MPI_Waitall(2, requests, statuses); rq_check( requests, 2, "isend and irecv" ); msg_check(recv_buf,prev,tag,count,statuses, TEST_SIZE,"isend and irecv"); } else { MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check(recv_buf,prev,tag,count,&status, TEST_SIZE,"isend and irecv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Isend(recv_buf, count, MPI_DOUBLE, next, tag,MPI_COMM_WORLD, (requests)); MPI_Wait((requests), &status); rq_check(requests, 1, "isend (and recv)"); } VT_END_REGION( nblock_sends ); /* * Nonblocking ready sends */ VT_BEGIN_REGION( nblock_rsends ); if (rank == 0) printf ("Irsend\n"); tag = 2456; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); init_test_data(send_buf,TEST_SIZE,0); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, next, 0, MPI_BOTTOM, 0, MPI_INT, next, 0, dupcom, &status); MPI_Irsend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, (requests+1)); reqcount = 0; while (reqcount != 2) { MPI_Waitany( 2, requests, &index, statuses); if( index == 0 ) { memcpy( &status, statuses, sizeof(status) ); } reqcount++; } rq_check( requests, 1, "irsend and irecv"); msg_check(recv_buf,prev,tag,count,&status, TEST_SIZE,"irsend and irecv"); } else { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); MPI_Sendrecv( MPI_BOTTOM, 0, MPI_INT, next, 0, MPI_BOTTOM, 0, MPI_INT, next, 0, dupcom, &status); flag = 0; while (!flag) MPI_Test(requests, &flag, &status); rq_check( requests, 1, "irsend and irecv (test)"); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "irsend and irecv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Irsend(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, requests); MPI_Waitall(1, requests, statuses); rq_check( requests, 1, "irsend and irecv"); } VT_END_REGION( nblock_rsends ); /* * Nonblocking synchronous sends */ VT_BEGIN_REGION( nblock_ssends ); if (rank == 0) printf ("Issend\n"); tag = 2789; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { MPI_Irecv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests ); init_test_data(send_buf,TEST_SIZE,0); MPI_Issend(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, (requests+1)); flag = 0; while (!flag) MPI_Testall(2, requests, &flag, statuses); rq_check( requests, 2, "issend and irecv (testall)"); msg_check( recv_buf, prev, tag, count, statuses, TEST_SIZE, "issend and recv"); } else { MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "issend and recv"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Issend(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD,requests); flag = 0; while (!flag) MPI_Testany(1, requests, &index, &flag, statuses); rq_check( requests, 1, "issend and recv (testany)"); } VT_END_REGION( nblock_ssends ); /* * Persistent normal sends */ VT_BEGIN_REGION( pers_sends ); if (rank == 0) printf ("Send_init\n"); tag = 3123; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); MPI_Send_init(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, requests); MPI_Recv_init(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, (requests+1)); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Startall(2, requests); MPI_Waitall(2, requests, statuses); msg_check( recv_buf, prev, tag, count, (statuses+1), TEST_SIZE, "persistent send/recv"); } else { MPI_Start((requests+1)); MPI_Wait((requests+1), &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "persistent send/recv"); init_test_data(send_buf,TEST_SIZE,1); MPI_Start(requests); MPI_Wait(requests, &status); } MPI_Request_free(requests); MPI_Request_free((requests+1)); VT_END_REGION( pers_sends ); /* * Persistent ready sends */ VT_BEGIN_REGION( pers_rsends ); if (rank == 0) printf ("Rsend_init\n"); tag = 3456; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); MPI_Rsend_init(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, requests); MPI_Recv_init(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, (requests+1)); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Barrier( MPI_COMM_WORLD ); MPI_Startall(2, requests); reqcount = 0; while (reqcount != 2) { MPI_Waitsome(2, requests, &outcount, indices, statuses); for (i=0; i<outcount; i++) { if (indices[i] == 1) { msg_check( recv_buf, prev, tag, count, (statuses+i), TEST_SIZE, "waitsome"); } reqcount++; } } } else { MPI_Start((requests+1)); MPI_Barrier( MPI_COMM_WORLD ); flag = 0; while (!flag) MPI_Test((requests+1), &flag, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "test"); init_test_data(send_buf,TEST_SIZE,1); MPI_Start(requests); MPI_Wait(requests, &status); } MPI_Request_free(requests); MPI_Request_free((requests+1)); VT_END_REGION( pers_rsends ); /* * Persistent synchronous sends */ VT_BEGIN_REGION( pers_ssends ); if (rank == 0) printf ("Ssend_init\n"); tag = 3789; count = TEST_SIZE / 3; clear_test_data(recv_buf,TEST_SIZE); MPI_Ssend_init(send_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, (requests+1)); MPI_Recv_init(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, requests); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Startall(2, requests); reqcount = 0; while (reqcount != 2) { MPI_Testsome(2, requests, &outcount, indices, statuses); for (i=0; i<outcount; i++) { if (indices[i] == 0) { msg_check( recv_buf, prev, tag, count, (statuses+i), TEST_SIZE, "testsome"); } reqcount++; } } } else { MPI_Start(requests); flag = 0; while (!flag) MPI_Testany(1, requests, &index, &flag, statuses); msg_check( recv_buf, prev, tag, count, statuses, TEST_SIZE, "testany" ); init_test_data(send_buf,TEST_SIZE,1); MPI_Start((requests+1)); MPI_Wait((requests+1), &status); } MPI_Request_free(requests); MPI_Request_free((requests+1)); VT_END_REGION( pers_ssends ); /* * Send/receive. */ VT_BEGIN_REGION( sendrecv ); if (rank == 0) printf ("Sendrecv\n"); tag = 4123; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); MPI_Sendrecv(send_buf, count, MPI_DOUBLE, next, tag, recv_buf, count, MPI_DOUBLE, prev, tag, MPI_COMM_WORLD, &status ); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "sendrecv"); } else { MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "recv/send"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( sendrecv ); #ifdef V_T VT_flush(); #endif /* * Send/receive replace. */ VT_BEGIN_REGION( sendrecv_repl ); if (rank == 0) printf ("Sendrecv_replace\n"); tag = 4456; count = TEST_SIZE / 3; if (rank == 0) { init_test_data(recv_buf, TEST_SIZE,0); for (i=count; i< TEST_SIZE; i++) recv_buf[i] = 0.0; MPI_Sendrecv_replace(recv_buf, count, MPI_DOUBLE, next, tag, prev, tag, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "sendrecvreplace"); } else { clear_test_data(recv_buf,TEST_SIZE); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); msg_check( recv_buf, prev, tag, count, &status, TEST_SIZE, "recv/send for replace"); init_test_data(recv_buf,TEST_SIZE,1); MPI_Send(recv_buf, count, MPI_DOUBLE, next, tag, MPI_COMM_WORLD); } VT_END_REGION( sendrecv_repl ); /* * Send/Receive via inter-communicator */ VT_BEGIN_REGION( intercomm ); MPI_Intercomm_create(MPI_COMM_SELF, 0, MPI_COMM_WORLD, next, 1, &intercom); if (rank == 0) printf ("Send via inter-communicator\n"); tag = 4018; count = TEST_SIZE / 5; clear_test_data(recv_buf,TEST_SIZE); if (rank == 0) { init_test_data(send_buf,TEST_SIZE,0); LOCDEF(); MPI_Send(send_buf, count, MPI_DOUBLE, 0, tag, intercom); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, intercom, &status); msg_check(recv_buf, 0, tag, count, &status, TEST_SIZE, "send and recv via inter-communicator"); } else if (rank == 1) { LOCDEF(); MPI_Recv(recv_buf, TEST_SIZE, MPI_DOUBLE,MPI_ANY_SOURCE, MPI_ANY_TAG, intercom, &status); msg_check( recv_buf, 0, tag, count, &status, TEST_SIZE,"send and recv via inter-communicator"); init_test_data(recv_buf,TEST_SIZE,0); MPI_Send(recv_buf, count, MPI_DOUBLE, 0, tag, intercom); } VT_END_REGION( normal_sends ); MPI_Comm_free(&intercom); MPI_Comm_free(&dupcom); }
void img (const char *FileNameImg) { FILE *FileImg; COLOR *TabColor, *Color, *TileColor; STRING Name; INDEX i, j, rank; BYTE Byte; int N = 18988, err, provided; int next_proc; MPI_Request rs; MPI_Status status; MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &provided); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &P); next_proc = (rank + 1) % P; if (next_proc == 0) next_proc++; P--; MPI_Type_vector(1, 3, 0, MPI_FLOAT, &MPI_COLOR); MPI_Type_commit(&MPI_COLOR); if (rank == 0) { strcpy (Name, FileNameImg); strcat (Name, ".ppm"); INIT_FILE (FileImg, Name, "w"); fprintf (FileImg, "P6\n%d %d\n255\n", Img.Pixel.i, Img.Pixel.j); } // number of tiles Ci = Img.Pixel.i / TILE_SIZE + (Img.Pixel.i % TILE_SIZE?1:0); // number of tiles in dimension i Cj = Img.Pixel.j / TILE_SIZE + (Img.Pixel.i % TILE_SIZE?1:0); // number of tiles in dimension j int C = Ci * Cj; int q = (C+P-1)/P; int size = Img.Pixel.i * Img.Pixel.j ; N = C/2+1; // buffer for each tile INIT_MEM (TileColor, TILE_SIZE * TILE_SIZE, COLOR); if (rank != 0) { struct TileQueue tiles = {NULL, NULL}; // Init tasks init(&tiles,rank,q,N,C); // Init mutex, semaphores & threads pthread_mutex_init(&mutex,NULL); pthread_mutex_init(&mutex_time,NULL); sem_init(&wait_work, 0, 0); sem_init(&ask_work, 0, 0); pthread_t tid[NB_THREADS]; for (i = 0; i < NB_THREADS; i++){ err = pthread_create(&(tid[i]), NULL, (void*)tile_fill, (void*)&tiles); if (err != 0) printf("\ncan't create thread :[%s]", strerror(err)); } // vol de travail ? if (vol){ // Main thread: Communicator while (!terminated) { int flag = 0, msg; MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status); if (flag) { // We DID receive a communication so we CAN do a blocking receive MPI_Recv(&msg, 1, MPI_INT, status.MPI_SOURCE, status.MPI_TAG, MPI_COMM_WORLD, &status); // TAG contains the information about what the message is switch (status.MPI_TAG){ case TERMINATE: // No more jobs; threads can finish their jobs and return MPI_Isend(&msg, 1, MPI_INT, next_proc, TERMINATE, MPI_COMM_WORLD, &rs); terminated = 1; break; case WORK_ASK: // msg-th process is seeking for job pthread_mutex_lock(&mutex); if(!isEmpty(&tiles)){ int tile = firstElement(&tiles); pop(&tiles); pthread_mutex_unlock(&mutex); MPI_Isend(&tile, 1, MPI_INT, msg, WORK_SEND, MPI_COMM_WORLD, &rs); } else { pthread_mutex_unlock(&mutex); if (msg == rank){ MPI_Isend(&msg, 1, MPI_INT, next_proc, TERMINATE, MPI_COMM_WORLD, &rs); terminated = 1; } else { MPI_Isend(&msg, 1, MPI_INT, next_proc, WORK_ASK, MPI_COMM_WORLD, &rs); } } break; case WORK_SEND: // Received a job pthread_mutex_lock(&mutex); addTile(&tiles, msg); pthread_mutex_unlock(&mutex); sem_post(&wait_work); break; default: fprintf(stderr, "Err: Unknown message: %d, with tag %d\n", msg,status.MPI_TAG); break; } } if (sem_trywait(&ask_work) == 0){ MPI_Isend(&rank, 1, MPI_INT, next_proc, WORK_ASK, MPI_COMM_WORLD, &rs); } } for (i = 0; i < NB_THREADS; i++){ sem_post(&wait_work); } } for (i = 0; i < NB_THREADS; i++){ pthread_join(tid[i],NULL); } pthread_mutex_destroy(&mutex); pthread_mutex_destroy(&mutex_time); sem_destroy(&wait_work); sem_destroy(&ask_work); fprintf(stderr, "%d %ld\n", rank, local_time); } // process 0 gathers all the tiles if (rank == 0){ // If fake tasks: we don't receive anything and don't write the image FILE* fd = fopen("config","r"); if (fd != NULL){ int fake; fscanf(fd,"%d\n",&fake); if (fake){ EXIT_FILE(FileImg); EXIT_MEM(TileColor); MPI_Finalize(); return; } } // final image buffer that will receive the tiles INIT_MEM (TabColor, size, COLOR); // Receive tiles from other procs for (i = 0; i < C ; i++){ MPI_Recv(TileColor, TILE_SIZE * TILE_SIZE, MPI_COLOR, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); int current_tile = status.MPI_TAG - TILE_TAG_INDEX; int j_begin = rank_j(current_tile,Cj); int index_begin = rank_i(current_tile,Ci) + j_begin * Img.Pixel.i; for (j = 0; j < TILE_SIZE && j_begin + j < Img.Pixel.j; j++) { memcpy(&TabColor[index_begin + j * Img.Pixel.i],&TileColor[j * TILE_SIZE],MIN(Img.Pixel.i - rank_i(current_tile,Ci),TILE_SIZE) * sizeof(COLOR)); } } // writing in file for (j = 0, Color = TabColor; j < size; j++, Color++) { Byte = Color->r < 1.0 ? 255.0*Color->r : 255.0; putc (Byte, FileImg); Byte = Color->g < 1.0 ? 255.0*Color->g : 255.0; putc (Byte, FileImg); Byte = Color->b < 1.0 ? 255.0*Color->b : 255.0; putc (Byte, FileImg); } EXIT_FILE (FileImg); printf("Copied in file\n"); EXIT_MEM (TabColor); } EXIT_MEM (TileColor); MPI_Finalize(); }
/** * Kontrola a zpracovani zprav pomoci MPI. */ void mpi_handle() { MPI_Status mpi_status; int mpi_flag; unsigned int l; unsigned int v = 1; char *b = NULL; MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &mpi_flag, &mpi_status); // odeslu nejnizsi nalezenou penalizaci (podmineno viz. telo funkce) mpi_send_penalty(); if(mpi_flag) { srpdebug("msg", node, "zprava <tag=%d>", mpi_status.MPI_TAG); switch(mpi_status.MPI_TAG) { case MSG_REQUEST: srpdebug("msg", node, "MSG_REQUEST"); MPI_Recv(&v, 1, MPI_UNSIGNED, mpi_status.MPI_SOURCE, MSG_REQUEST, MPI_COMM_WORLD, &mpi_status); srpdebug("mpi", node, "prijata zadost o praci <src=%d>", mpi_status.MPI_SOURCE); // TODO rozdelit praci a odpovedet MSG_STACK nebo MSG_NOSTACK if(s->s_real > 1) { mpi_send_stack(2, mpi_status.MPI_SOURCE); } else { mpi_send_nostack(mpi_status.MPI_SOURCE); } break; case MSG_STACK: srpdebug("msg", node, "MSG_STACK"); mpi_recv_stack(mpi_status.MPI_SOURCE); break; case MSG_NOSTACK: srpdebug("msg", node, "MSG_NOSTACK"); mpi_recv_nostack(mpi_status.MPI_SOURCE); break; case MSG_PENALTY: srpdebug("msg", node, "MSG_PENALTY"); mpi_recv_penalty(mpi_status.MPI_SOURCE); break; case MSG_TOKEN: srpdebug("msg", node, "MSG_TOKEN"); mpi_recv_token(mpi_status.MPI_SOURCE); break; case MSG_FINALIZE: srpdebug("msg", node, "MSG_FINALIZE"); mpi_recv_finalize(mpi_status.MPI_SOURCE); finalize(); break; default: // neznama zprava srpprintf(stderr, "msg", node, "neznama zprava"); exit(EXIT_FAILURE); } } /* else { srpdebug("mpi", node, "zadne zpravy"); } */ }