void Create_parms_datatype(MPI_Datatype *Parmstype){ struct s_mc_parms *x; x = calloc(1,sizeof(struct s_mc_parms)); if (!x) Error("Cannot allocate mc_parms"); MPI_Aint adress[68]; MPI_Get_address(x, &adress[0]); MPI_Get_address(&(*x).npol, &adress[1]); MPI_Get_address(&(*x).nstep, &adress[2]); MPI_Get_address(&(*x).seed, &adress[3]); MPI_Get_address(&(*x).dw_flip, &adress[4]); MPI_Get_address(&(*x).dw_pivot, &adress[5]); MPI_Get_address(&(*x).dw_mpivot, &adress[6]); MPI_Get_address(&(*x).dw_lpivot, &adress[7]); MPI_Get_address(&(*x).dw_mflip, &adress[8]); MPI_Get_address(&(*x).fntrj, &adress[9]); MPI_Get_address(&(*x).fne, &adress[10]); MPI_Get_address(&(*x).flastp, &adress[11]); MPI_Get_address(&(*x).fnproc, &adress[12]); MPI_Get_address(&(*x).nprinttrj, &adress[13]); MPI_Get_address(&(*x).nprintlog, &adress[14]); MPI_Get_address(&(*x).nprinte, &adress[15]); MPI_Get_address(&(*x).shell, &adress[16]); MPI_Get_address(&(*x).nshell, &adress[17]); MPI_Get_address(&(*x).r2shell, &adress[18]); MPI_Get_address(&(*x).ntemp, &adress[19]); MPI_Get_address(&(*x).T, &adress[20]); MPI_Get_address(&(*x).randdw, &adress[21]); MPI_Get_address(&(*x).debug, &adress[22]); MPI_Get_address(&(*x).movetype, &adress[23]); MPI_Get_address(&(*x).nmul_mpivot, &adress[24]); MPI_Get_address(&(*x).nmul_lpivot, &adress[25]); MPI_Get_address(&(*x).nmul_mflip, &adress[26]); MPI_Get_address(&(*x).nosidechains, &adress[27]); MPI_Get_address(&(*x).noangpot, &adress[28]); MPI_Get_address(&(*x).nodihpot, &adress[29]); MPI_Get_address(&(*x).nrun, &adress[30]); MPI_Get_address(&(*x).always_restart, &adress[31]); MPI_Get_address(&(*x).record_native, &adress[32]); MPI_Get_address(&(*x).acc, &adress[33]); MPI_Get_address(&(*x).mov, &adress[34]); MPI_Get_address(&(*x).disentangle, &adress[35]); MPI_Get_address(&(*x).stempering, &adress[36]); MPI_Get_address(&(*x).dx_com, &adress[37]); MPI_Get_address(&(*x).dx_clm, &adress[38]); MPI_Get_address(&(*x).r_cloose, &adress[39]); MPI_Get_address(&(*x).a_cloose, &adress[40]); MPI_Get_address(&(*x).d_cloose, &adress[41]); MPI_Get_address(&(*x).hb, &adress[42]); MPI_Get_address(&(*x).anneal, &adress[43]); MPI_Get_address(&(*x).anneal_often, &adress[44]); MPI_Get_address(&(*x).anneal_step, &adress[45]); MPI_Get_address(&(*x).anneal_t, &adress[46]); MPI_Get_address(&(*x).anneal_recov, &adress[47]); #ifdef OPTIMIZEPOT MPI_Get_address(&(*x).op_minim, &adress[48]); MPI_Get_address(&(*x).op_itermax, &adress[49]); MPI_Get_address(&(*x).op_step, &adress[50]); MPI_Get_address(&(*x).op_T, &adress[51]); MPI_Get_address(&(*x).op_deltat, &adress[52]); MPI_Get_address(&(*x).op_stop, &adress[53]); MPI_Get_address(&(*x).op_print, &adress[54]); MPI_Get_address(&(*x).op_emin, &adress[55]); MPI_Get_address(&(*x).op_emax, &adress[56]); MPI_Get_address(&(*x).op_wait, &adress[57]); MPI_Get_address(&(*x).op_r, &adress[58]); MPI_Get_address(&(*x).op_r0, &adress[59]); MPI_Get_address(&(*x).nstep_exchange, &adress[60]); MPI_Get_address(&(*x).nmul_local,&adress[61]); MPI_Get_address(&(*x).chi2start,&adress[62]); MPI_Get_address(&(*x).ishell,&adress[63]); MPI_Get_address(&(*x).bgs_a,&adress[64]); MPI_Get_address(&(*x).bgs_b,&adress[65]); MPI_Get_address(&(*x).dtheta,&adress[66]); MPI_Get_address(&(*x).iT_bias,&adress[67]); #else MPI_Get_address(&(*x).nstep_exchange, &adress[48]); #endif #ifdef OPTIMIZEPOT MPI_Datatype type[67]={MPI_INT, MPI_INT, MPI_LONG, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_CHAR, MPI_CHAR, MPI_CHAR, MPI_CHAR, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_DOUBLE, MPI_INT, MPI_DOUBLE, MPI_INT, MPI_INT, MPI_INT, MPI_INT,MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_DOUBLE, MPI_INT, MPI_CHAR, MPI_INT, MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_DOUBLE, MPI_INT, MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_INT,MPI_INT,MPI_INT,MPI_DOUBLE,MPI_DOUBLE,MPI_DOUBLE,MPI_INT}; int blocklen[67]={1,1,1,1,1,1,1,1,50,50,50,50,1,1,1,1,1,1,1,NREPMAX,1,1,NMOVES,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,50,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; MPI_Aint disp[67]; int i; for(i=0; i<67; i++) {disp[i]=adress[i+1]-adress[0];} MPI_Type_create_struct(67,blocklen,disp,type,Parmstype); MPI_Type_commit(Parmstype); free(x); #else MPI_Datatype type[48]={MPI_INT, MPI_INT, MPI_LONG, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_CHAR, MPI_CHAR, MPI_CHAR, MPI_CHAR, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_DOUBLE, MPI_INT, MPI_DOUBLE, MPI_INT, MPI_INT, MPI_INT, MPI_INT,MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_DOUBLE, MPI_INT, MPI_INT}; int blocklen[48]={1,1,1,1,1,1,1,1,50,50,50,50,1,1,1,1,1,1,1,NREPMAX,1,1,NMOVES,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; MPI_Aint disp[48]; int i; for(i=0; i<48; i++) {disp[i]=adress[i+1]-adress[0];} MPI_Type_create_struct(48,blocklen,disp,type,Parmstype); MPI_Type_commit(Parmstype); free(x); #endif }
/* * Dump the timing information to a file. * Called both from C and Fortran API's (adios.c and adiosf.c) */ void adios_timing_write_xml_common (int64_t fd_p, const char* filename) { #if defined ADIOS_TIMER_EVENTS && !defined _NOMPI //No timing information on single process struct adios_file_struct * fd = (struct adios_file_struct *) fd_p; if (!fd) { adios_error (err_invalid_file_pointer, "Invalid handle passed to adios_get_timing_name\n"); return; } if (!fd->group || !fd->group->prev_timing_obj) { // No timing info, don't write anything. return; } int size=1, rank=0, i, global_event_count, count_to_send; int * counts; int * displs; struct adios_timing_event_struct* events; MPI_Datatype event_type; if (fd->comm != MPI_COMM_NULL) { MPI_Comm_size (fd->comm, &size); MPI_Comm_rank (fd->comm, &rank); } if (rank == 0) { counts = (int*) malloc (sizeof (int) * size); } // Collect all of the events on proc 0 // First, per proc event counts count_to_send = (fd->group->prev_timing_obj->event_count > ADIOS_TIMING_MAX_EVENTS) ? ADIOS_TIMING_MAX_EVENTS : fd->group->prev_timing_obj->event_count; MPI_Gather ( &count_to_send, // sendbuf 1, // sendcount MPI_INT, // sendtype counts, // recvbuf 1, // recvcount MPI_INT, // recvtype 0, // root fd->comm // comm ); if (rank == 0) { displs = (int*) malloc (sizeof (int) * size); displs[0] = 0; global_event_count = counts[0]; for (i = 1; i < size; i++) { displs[i] = displs[i-1] + counts[i-1]; global_event_count += counts[i]; } events = (struct adios_timing_event_struct*) malloc ( sizeof (struct adios_timing_event_struct) * global_event_count); } // structure of the adios_timing_event_struct (int, int, double) int blocklens[] = {2,1}; MPI_Aint disps[] = {0,2*sizeof(int)}; MPI_Datatype types[] = {MPI_INT,MPI_DOUBLE}; MPI_Type_create_struct ( 2, // count blocklens, // array_of_blocklengths disps, // array_of_displacements types, // array_of_types &event_type ); MPI_Type_commit (&event_type); // Now the events MPI_Gatherv ( &fd->group->prev_timing_obj->events, // sendbuf count_to_send, // sendcount event_type, // sendtype events, //recvbuf counts, // recvcounts displs, // displacements event_type, // recvtype 0, // root fd->comm // comm ); // Gather the write sizes int *write_sizes = NULL; if (rank == 0) { write_sizes = (int*) malloc (sizeof(int) * size); } MPI_Gather ( &fd->write_size_bytes, //sendbuf 1, //sendcount MPI_INT, //sendtype write_sizes, //recvbuf 1, //recvcount MPI_INT, //recvtype 0, //root fd->comm //comm ); // Write the events to a file if (rank == 0) { FILE* f = fopen (filename, "a"); int event_rank; for (i = 0; i < size; i++) { fprintf (f, "'%i'%i\n", i, write_sizes[i]); } // Write the labels for (i = 0; i < fd->group->prev_timing_obj->internal_count; i++) { fprintf (f, ":%i:%s\n", ADIOS_TIMING_MAX_USER_TIMERS + i, fd->group->prev_timing_obj->names[ADIOS_TIMING_MAX_USER_TIMERS + i]); } // Now the event data i = 0; for (event_rank = 0; event_rank < size; event_rank++) { for ( ; i < displs[event_rank] + counts[event_rank]; i++) { fprintf (f, "%i,%i%s,%f\n", event_rank, events[i].type, events[i].is_start?"S":"E", events[i].time); } } fclose(f); } if (rank == 0) { if (counts) free (counts); } #else log_warn ("Timing events are not currently available.\n" "To use the timing events, you must enable them when building ADIOS.\n" "Use --enable-timer-events during the configuration step.\n"); #endif }
void SocketServer::handle_conn(int sockfd) { //MPI_CONNECTION_INIT // TODO: check this! int argc = 0; #ifndef NDEBUG std::cout << "INFO" << ": trying MPI_Init " << std::endl; #endif MPI_Init( &argc, NULL ); #ifndef NDEBUG std::cout << "INFO" << ": ... done " << std::endl; #endif // Create MPI Structure int sizeOfData; MPI_Type_size( MPI_INT,&sizeOfData ); int array_of_block_lengths[2] = {1, 1}; MPI_Aint array_of_displacements[2] = {0, sizeOfData}; MPI_Datatype array_of_types[2] = { MPI_INT, MPI_INT }; MPI_Type_create_struct(2, array_of_block_lengths, array_of_displacements, array_of_types, &ArgListType); MPI_Type_commit(&ArgListType); // End of MPI struct client = MPI_COMM_WORLD; #ifndef NDEBUG std::cout << "DEBUG: Waiting for IR\n" << std::endl; #endif MPI_Status status; int mpi_server_tag = MPI_SERVER_TAG; int myrank; MPI_Comm_rank(client, &myrank); int mpi_server_rank =0; // TODO: check this! if(myrank==0) mpi_server_rank = 1; int incomingMessageSize=0; MPI_Probe(MPI_ANY_SOURCE, mpi_server_tag, client, &status); MPI_Get_count(&status,MPI_CHAR,&incomingMessageSize); char *module_ir_buffer = (char *) calloc(incomingMessageSize + 1 , sizeof(char)); MPI_Recv(module_ir_buffer, incomingMessageSize + 1, MPI_CHAR, MPI_ANY_SOURCE, mpi_server_tag, client, &status); #ifndef NDEBUG std::cout << "DEBUG: Recieved IR\n" << std::endl; #endif auto backend = parseIRtoBackend(module_ir_buffer); // notify client that calls can be accepted now by sending time taken for optimizing module and initialising backend const std::string readyStr(std::to_string(TimeDiffOpt.count()) + ":" + std::to_string(TimeDiffInit.count())); MPI_Send((void *)readyStr.c_str(), readyStr.size() , MPI_CHAR, mpi_server_rank, mpi_server_tag, client); free(module_ir_buffer); // initialise msg_buffer std::shared_ptr<char> msg_buffer((char*)calloc(MSG_BUFFER_SIZE, sizeof(char)), &free); while (1) { bzero(msg_buffer.get(), MSG_BUFFER_SIZE); // first acquire message length unsigned msg_length; auto UINT_MAX_str_len = std::to_string(UINT_MAX).length(); int num_chars = recv(sockfd, msg_buffer.get(), UINT_MAX_str_len + 1, 0); if (num_chars == 0) { std::cout << "Client assigned to process " << getpid() << " has closed its socket 3 \n"; exit(0); } if (num_chars < 0) error("ERROR, could not read from socket"); #ifndef NDEBUG //std::cout << getpid() << ": got message \"" << msg_buffer << "\"\n"; // TODO command line argument to print messages std::cout << getpid() << ": got message \n"; #endif llvm::Function* calledFunction = nullptr; std::vector<llvm::GenericValue> args; std::list<std::vector<llvm::GenericValue>::size_type> indexesOfPointersInArgs; llvm::GenericValue result = handleCall(backend.get(), msg_buffer.get(), calledFunction, args, indexesOfPointersInArgs); // reset buffer and write time taken to buffer bzero(msg_buffer.get(), MSG_BUFFER_SIZE); sprintf(msg_buffer.get(), ";%ld", (long)TimeDiffLastExecution.count()); //MPI_DATA_MOVEMENT //Send data back to the client //Create the MPI data structure //allocate memory for struct #ifndef TIMING auto StartTime = std::chrono::high_resolution_clock::now(); #endif struct ArgumentList argList[MAX_NUMBER_OF_ARGUMENTS]; MPI_Status status; //Create the structure int structSize=0; for (const auto& indexOfPtr : indexesOfPointersInArgs) { auto paramType = calledFunction->getFunctionType()->getParamType(indexOfPtr); while (paramType->getTypeID() == llvm::Type::ArrayTyID || paramType->getTypeID() == llvm::Type::PointerTyID) paramType = llvm::cast<llvm::SequentialType>(paramType)->getElementType(); if (paramType->getTypeID() == llvm::Type::IntegerTyID) { argList[structSize].typeofArg = ENUM_MPI_INT; } else { argList[structSize].typeofArg = ENUM_MPI_DOUBLE; } argList[structSize].sizeOfArg =argumentList[indexOfPtr].sizeOfArg; structSize++; } #ifndef NDEBUG std::cout << "\nMPI SERVER: Sending message back from server to client"; std::cout.flush(); #endif #ifndef NDEBUG std::cout << "\nMPI SERVER: Sending MPI Header"; std::cout.flush(); for (int i=0; i<structSize; i++) { std::cout << "\n MPI Sent DS : Size : " << argList[i].sizeOfArg << " Type" << argList[i].typeofArg ; std::cout.flush(); } #endif MPI_Send(argList, structSize, ArgListType, mpi_server_rank, mpi_server_tag, client); #ifndef NDEBUG std::cout << "\nMPI SERVER: Sent MPI Header"; std::cout.flush(); std::cout << "\nMPI SERVER: Sending data"; std::cout.flush(); #endif //Start sending individual arrrays for (const auto& indexOfPtr : indexesOfPointersInArgs) { auto paramType = calledFunction->getFunctionType()->getParamType(indexOfPtr); while (paramType->getTypeID() == llvm::Type::ArrayTyID || paramType->getTypeID() == llvm::Type::PointerTyID) paramType = llvm::cast<llvm::SequentialType>(paramType)->getElementType(); if (paramType->getTypeID() == llvm::Type::IntegerTyID) { MPI_Send(args[indexOfPtr].PointerVal,argList[indexOfPtr].sizeOfArg, MPI_INT, mpi_server_rank, mpi_server_tag, client); } else { MPI_Send(args[indexOfPtr].PointerVal, argList[indexOfPtr].sizeOfArg, MPI_DOUBLE, mpi_server_rank, mpi_server_tag, client); } free(args[indexOfPtr].PointerVal); } #ifndef TIMING auto EndTime = std::chrono::high_resolution_clock::now(); std::cout << "\n SERVR: MPI_DATA_TRANSFER S->C = " << std::chrono::duration_cast<std::chrono::microseconds>(EndTime - StartTime).count() << "\n"; #endif #ifndef NDEBUG std::cout << "\nMPI SERVER: Data sent"; std::cout.flush(); std::cout << "\nMPI SERVER: Return Messages sent"; std::cout.flush(); #endif char returnValStr[MAX_VAL_SIZE]; switch (calledFunction->getReturnType()->getTypeID()) { case llvm::Type::VoidTyID: sprintf(returnValStr, ":"); break; case llvm::Type::FloatTyID: sprintf(returnValStr, ":%a", result.FloatVal); break; case llvm::Type::DoubleTyID: sprintf(returnValStr, ":%la", result.DoubleVal); break; case llvm::Type::X86_FP80TyID: returnValStr[0]=':'; llvm::APFloat(llvm::APFloat::x87DoubleExtended, result.IntVal).convertToHexString(returnValStr+1, 0U, false, llvm::APFloat::roundingMode::rmNearestTiesToEven); break; case llvm::Type::FP128TyID: returnValStr[0]=':'; llvm::APFloat(llvm::APFloat::IEEEquad, result.IntVal).convertToHexString(returnValStr+1, 0U, false, llvm::APFloat::roundingMode::rmNearestTiesToEven); break; case llvm::Type::IntegerTyID: // Note: LLVM does not differentiate between signed/unsiged int types sprintf(returnValStr, ":%s", result.IntVal.toString(16,false).c_str()); break; default: error(std::string("ERROR, LLVM TypeID " + std::to_string(calledFunction->getReturnType()->getTypeID()) + " of result of function \"" + calledFunction->getName().str() + "\" is not supported").c_str()); } strcat(msg_buffer.get(), returnValStr); //Send the message MPI_Send(msg_buffer.get(), strlen(msg_buffer.get()), MPI_CHAR, mpi_server_rank, mpi_server_tag, client); MPI_Type_free(&ArgListType); // TODO: check this! MPI_Finalize(); } }
static int test_indexed_with_zeros(char *filename, int testcase) { int i, rank, np, buflen, num, err, nr_errors=0; int nelms[MAXLEN], buf[MAXLEN], indices[MAXLEN], blocklen[MAXLEN]; MPI_File fh; MPI_Status status; MPI_Datatype filetype; MPI_Datatype types[MAXLEN]; MPI_Aint addrs[MAXLEN]; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &np); /* set up the number of integers to write in each iteration */ for (i=0; i<MAXLEN; i++) nelms[i] = 0; if (rank == 0) nelms[4]=nelms[5]=nelms[7]=1; if (rank == 1) nelms[0]=nelms[1]=nelms[2]=nelms[3]=nelms[6]=nelms[8]=1; /* pre-fill the file with integers -999 */ if (rank == 0) { for (i=0; i<MAXLEN; i++) buf[i] = -999; err =MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open"); err = MPI_File_write(fh, buf, MAXLEN, MPI_INT, &status); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_write"); err = MPI_File_close(&fh); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close"); } MPI_Barrier(MPI_COMM_WORLD); /* define a filetype with spurious leading zeros */ buflen = num = 0; for (i=0; i<MAXLEN; i++) { buflen += nelms[i]; indices[num] = i; addrs[num] = i*sizeof(int); blocklen[num] = nelms[i]; types[num] = MPI_INT; num++; } switch (testcase) { case INDEXED: MPI_Type_indexed(num, blocklen, indices, MPI_INT, &filetype); break; case HINDEXED: MPI_Type_hindexed(num, blocklen, addrs, MPI_INT, &filetype); break; case STRUCT: MPI_Type_create_struct(num, blocklen, addrs, types, &filetype); break; default: fprintf(stderr, "unknown testcase!\n"); return(-100); } MPI_Type_commit(&filetype); /* initialize write buffer and write to file*/ for (i=0; i<MAXLEN; i++) buf[i] = 1; err =MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open"); err = MPI_File_set_view(fh, 0, MPI_INT, filetype, "native", MPI_INFO_NULL); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_set_view"); err = MPI_File_write_all(fh, buf, buflen, MPI_INT, &status); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_write_all"); MPI_Type_free(&filetype); err = MPI_File_close(&fh); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close"); /* read back and check */ if (rank == 0) { err = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open"); err = MPI_File_read(fh,buf, MAXLEN, MPI_INT, &status); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_read"); err = MPI_File_close(&fh); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close"); for (i=0; i<MAXLEN; i++) { if (buf[i] < 0) { nr_errors++; printf("Error: unexpected value for case %d at buf[%d] == %d\n", testcase,i,buf[i]); } } } return nr_errors; }
int getparams(int argc, char *argv[], pparams *params, FILE **gridfile, FILE **statusfile, MPI_Datatype *pparams_dt, int rank) { MPI_Aint pparams_displ[NUM_PARAMS]; int arg; /* Compute the displacements necessary to create a new MPI datatype. */ pparams_displ[0] = (size_t)&(params->dx) - (size_t)params; pparams_displ[1] = (size_t)&(params->dt) - (size_t)params; pparams_displ[2] = (size_t)&(params->D) - (size_t)params; pparams_displ[3] = (size_t)&(params->ntotal) - (size_t)params; pparams_displ[4] = (size_t)&(params->ttotal) - (size_t)params; pparams_displ[5] = (size_t)&(params->l) - (size_t)params; pparams_displ[6] = (size_t)&(params->h) - (size_t)params; pparams_displ[7] = (size_t)&(params->freq) - (size_t)params; /* Create new MPI datatype. */ MPI_Type_create_struct(NUM_PARAMS, pparams_blength, pparams_displ, pparams_type, pparams_dt); MPI_Type_commit(pparams_dt); /* Only rank 0 has to parse the parameters. */ if (rank > 0) return EX_OK; params->dx = -1; params->dt = -1; params->D = -1; params->l = 0; params->h = 0; params->freq = -1; *gridfile = NULL; while ((arg = getopt(argc, argv, "x:D:t:f:s:h:l:g:")) != -1) { switch (arg) { case 'x': params->dx = (grid_type)strtof(optarg, NULL); break; case 'D': params->D = (grid_type)strtof(optarg, NULL); break; case 't': params->dt = (grid_type)strtof(optarg, NULL); break; case 'g': if ((*gridfile = fopen(optarg, "w")) == NULL) return EX_CANTCREAT; break; case 's': if ((*statusfile = fopen(optarg, "a")) == NULL) return EX_CANTCREAT; break; case 'l': params->l = (int)strtol(optarg, NULL, 10); break; case 'h': params->h = (int)strtol(optarg, NULL, 10); break; case 'f': params->freq = (int)strtol(optarg, NULL, 10); break; default: usage(); } } argc -= optind; argv += optind; /* Although this could be computed every time, we prefer storing the values. */ params->ntotal = (int)(1 / params->dx); params->ttotal = (int)(1 / params->dt); /* Do some sanity check. */ if (params->ntotal < 1) { warnx("ntotal > 1"); usage(); } if (params->D < 0) { warnx("D >= 0"); usage(); } if (*gridfile == NULL) { warnx("Could not open a file to store grid points."); usage(); } if (params->l == 0 || params->h == 0) { warnx("please specify the processor dimensions of the Grid."); usage(); } if (params->freq < 0) { warnx("frequency >= 0"); usage(); } return EX_OK; }
void compute_process(int agents_total, int nreps, int world_width, int world_height) { int np, pid; MPI_Comm_rank(MPI_COMM_WORLD, &pid); MPI_Comm_size(MPI_COMM_WORLD, &np); int server_process = np - 1; MPI_Status status; /* create a type for struct agent */ const int nitems=5; int blocklengths[5] = {1,1,1,1,1}; MPI_Datatype types[5] = {MPI_INT, MPI_INT, MPI_INT, MPI_FLOAT, MPI_FLOAT}; MPI_Datatype mpi_agent_type; MPI_Aint offsets[5]; offsets[0] = offsetof(agent, id); offsets[1] = offsetof(agent, x); offsets[2] = offsetof(agent, y); offsets[3] = offsetof(agent, z); offsets[4] = offsetof(agent, w); MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_agent_type); MPI_Type_commit(&mpi_agent_type); unsigned int num_bytes = agents_total * sizeof(float4); unsigned int num_halo_points = RADIO * world_width; unsigned int num_halo_bytes = num_halo_points * sizeof(short int); //unsigned int world_node_height = (world_height / (np-1)) + (RADIO * 2); //if(pid == 0 or pid == np - 2) // world_node_height -= RADIO; size_t size_world = world_width * world_height * sizeof(short int); short int *h_world = (short int *)malloc(size_world); *h_world = 0; short int *d_world; for(int j = 0; j < world_width * world_height; j++) { h_world[j] = 0; } /* alloc host memory */ agent *h_agents_in = (agent *)malloc(num_bytes); //agent *d_agents_in; float4 *h_agents_pos; float4 *d_agents_pos; //MPI_Recv(rcv_address, num_points, MPI_FLOAT, server_process, MPI_ANY_TAG, MPI_COMM_WORLD, &status); MPI_Recv(h_agents_in, agents_total, mpi_agent_type, server_process, 0, MPI_COMM_WORLD, &status); //Iniatialize world for( int i = 0; i < agents_total; i++) { h_world[(world_width * (h_agents_in[i].y - 1) ) + h_agents_in[i].x] = (h_agents_in[i].x!=0?1:0); //if(h_world[(world_width * (h_agents_in[i].y - 1) ) + h_agents_in[i].x] == 1) //printf("world x: %d, y: %d\n", h_agents_in[i].x, h_agents_in[i].y); h_agents_pos[i].x = h_agents_in[i].x; h_agents_pos[i].y = h_agents_in[i].y; h_agents_pos[i].z = h_agents_in[i].z; h_agents_pos[i].w = h_agents_in[i].w; } /*** if(pid ==1) { int k=0; for(int j = 0; j < world_width * world_height; j++) { if ( j%96 == 0 and j>0) { k++; printf("%d row: %d\n", h_world[j], k); } else printf("%d ", h_world[j]); } } ***/ // Error code to check return values for CUDA calls cudaError_t err = cudaSuccess; // Allocate the device pointer err = cudaMalloc((void **)&d_world, size_world); if (err != cudaSuccess) { fprintf(stderr, "Failed to allocate device pointer (error code %s)!\n", cudaGetErrorString(err)); exit(EXIT_FAILURE); } err = cudaMemcpy(d_world, h_world, size_world, cudaMemcpyHostToDevice); if (err != cudaSuccess) { fprintf(stderr, "Failed to copy pointer from host to device (error code %s)!\n", cudaGetErrorString(err)); exit(EXIT_FAILURE); } //http://cuda-programming.blogspot.com.es/2013/02/cuda-array-in-cuda-how-to-use-cuda.html //http://stackoverflow.com/questions/17924705/structure-of-arrays-vs-array-of-structures-in-cuda // Allocate the device pointer err = cudaMalloc((void **)&d_agents_pos, num_bytes); if (err != cudaSuccess) { fprintf(stderr, "Failed to allocate device pointer (error code %s)!\n", cudaGetErrorString(err)); exit(EXIT_FAILURE); } err = cudaMemcpy(d_agents_pos, h_agents_pos, num_bytes, cudaMemcpyHostToDevice); if (err != cudaSuccess) { fprintf(stderr, "Failed to copy pointer from host to device (error code %s)!\n", cudaGetErrorString(err)); exit(EXIT_FAILURE); } launch_kernel(d_agents_pos, d_world, world_width, world_height ); MPI_Barrier( MPI_COMM_WORLD); #ifdef DEBUG // printf("pid: %d\n", pid); // display_data(h_agents_in, agents_total ); #endif MPI_Send(h_agents_in, agents_total, mpi_agent_type, server_process, DATA_COLLECT, MPI_COMM_WORLD); /* Release resources */ free(h_agents_in); /* free(h_output); cudaFreeHost(h_left_boundary); cudaFreeHost(h_right_boundary); cudaFreeHost(h_left_halo); cudaFreeHost(h_right_halo); cudaFree(d_input); cudaFree(d_output); */ }
/** * @brief Island-based genetic algorithm model running in different modes: Sequential, CPU or GPU only and Heterogeneous (full cooperation between all available OpenCL devices) * @param subpops The initial subpopulations * @param devicesObject Structure containing the OpenCL variables of a device * @param trDataBase The training database which will contain the instances and the features * @param selInstances The instances choosen as initial centroids * @param conf The structure with all configuration parameters */ void agIslands(Individual *const subpops, CLDevice *const devicesObject, const float *const trDataBase, const int *const selInstances, const Config *const conf) { /********** MPI variables ***********/ MPI_Datatype Individual_MPI_type; MPI_Datatype array_of_types[3] = {MPI_UNSIGNED_CHAR, MPI_FLOAT, MPI_INT}; int array_of_blocklengths[3] = {conf -> nFeatures, conf -> nObjectives + 1, 2}; MPI_Aint array_of_displacement[3]; MPI_Status status; /******* Measure and start the master-worker algorithm *******/ MPI_Barrier(MPI_COMM_WORLD); /******* Each process dinamically will request subpopulations *******/ // Master if (conf -> mpiRank == 0) { double timeStart = omp_get_wtime(); int *nIndsFronts0 = new int[conf -> nSubpopulations]; int finalFront0; // The master receives the number of subpopulations that each worker can process int workerCapacities[conf -> mpiSize - 1]; MPI_Request requests[conf -> mpiSize - 1]; for (int p = 1; p < conf -> mpiSize; ++p) { MPI_Irecv(&workerCapacities[p - 1], 1, MPI_INT, p, MPI_ANY_TAG, MPI_COMM_WORLD, &requests[p - 1]); } // The "Individual" datatype must be converted to a MPI datatype and commit it array_of_displacement[0] = (size_t) &(subpops[0].chromosome[0]) - (size_t) &(subpops[0]); array_of_displacement[1] = (size_t) &(subpops[0].fitness[0]) - (size_t) &(subpops[0]); array_of_displacement[2] = (size_t) &(subpops[0].rank) - (size_t) &(subpops[0]); MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacement, array_of_types, &Individual_MPI_type); MPI_Type_commit(&Individual_MPI_type); MPI_Waitall(conf -> mpiSize - 1, requests, MPI_STATUSES_IGNORE); int maxChunk = std::min(*std::max_element(workerCapacities, workerCapacities + conf -> mpiSize - 1), conf -> nSubpopulations); /********** In each migration the individuals are exchanged between subpopulations of different nodes ***********/ for (int gMig = 0; gMig < conf -> nGlobalMigrations; ++gMig) { // Send some work to the workers int nextWork = 0; int sent = 0; int mpiTag = (gMig == 0) ? INITIALIZE : IGNORE_VALUE; for (int p = 1; p < conf -> mpiSize && nextWork < conf -> nSubpopulations; ++p) { int finallyWork = std::min(workerCapacities[p - 1], conf -> nSubpopulations - nextWork); int popIndex = nextWork * conf -> familySize; MPI_Isend(subpops + popIndex, finallyWork * conf -> familySize, Individual_MPI_type, p, mpiTag, MPI_COMM_WORLD, &requests[p - 1]); nextWork += finallyWork; ++sent; } MPI_Waitall(sent, requests, MPI_STATUSES_IGNORE); // Dynamically distribute the subpopulations int receivedWork = 0; int receivedPtr = 0; while (nextWork < conf -> nSubpopulations) { MPI_Recv(subpops + (receivedPtr * conf -> familySize), maxChunk * conf -> familySize, Individual_MPI_type, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); MPI_Recv(nIndsFronts0 + receivedPtr, maxChunk, MPI_INT, status.MPI_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); MPI_Get_count(&status, MPI_INT, &receivedWork); receivedPtr += receivedWork; int finallyWork = std::min(workerCapacities[status.MPI_SOURCE - 1], conf -> nSubpopulations - nextWork); int popIndex = nextWork * conf -> familySize; MPI_Send(subpops + popIndex, finallyWork * conf -> familySize, Individual_MPI_type, status.MPI_SOURCE, mpiTag, MPI_COMM_WORLD); nextWork += finallyWork; } // Receive the remaining work while (receivedPtr < conf -> nSubpopulations) { MPI_Recv(subpops + (receivedPtr * conf -> familySize), maxChunk * conf -> familySize, Individual_MPI_type, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); MPI_Recv(nIndsFronts0 + receivedPtr, maxChunk, MPI_INT, status.MPI_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); MPI_Get_count(&status, MPI_INT, &receivedWork); receivedPtr += receivedWork; } // Migration process between subpopulations of different nodes if (gMig != conf -> nGlobalMigrations - 1 && conf -> nSubpopulations > 1) { migration(subpops, conf -> nSubpopulations, nIndsFronts0, conf); #pragma omp parallel for for (int sp = 0; sp < conf -> nSubpopulations; ++sp) { int popIndex = sp * conf -> familySize; // The crowding distance of the subpopulation is initialized again for the next nonDominationSort for (int i = popIndex; i < popIndex + conf -> subpopulationSize; ++i) { subpops[i].crowding = 0.0f; } nonDominationSort(subpops + popIndex, conf -> subpopulationSize, conf); } } } // Notify to all workers that the work has finished for (int p = 1; p < conf -> mpiSize; ++p) { MPI_Isend(0, 0, MPI_INT, p, FINISH, MPI_COMM_WORLD, &requests[p - 1]); } /********** Recombination process ***********/ if (conf -> nSubpopulations > 1) { for (int sp = 0; sp < conf -> nSubpopulations; ++sp) { memcpy(subpops + (sp * conf -> subpopulationSize), subpops + (sp * conf -> familySize), conf -> subpopulationSize * sizeof(Individual)); } // The crowding distance of the subpopulation is initialized again for the next nonDominationSort #pragma omp parallel for for (int i = 0; i < conf -> worldSize; ++i) { subpops[i].crowding = 0.0f; } finalFront0 = std::min(conf -> subpopulationSize, nonDominationSort(subpops, conf -> worldSize, conf)); } else { finalFront0 = nIndsFronts0[0]; } // All process must reach this point in order to provide a real time measure MPI_Waitall(conf -> mpiSize - 1, requests, MPI_STATUSES_IGNORE); MPI_Barrier(MPI_COMM_WORLD); fprintf(stdout, "%.10g\n", (omp_get_wtime() - timeStart) * 1000.0); // Get the hypervolume fprintf(stdout, "%.6g\n", getHypervolume(subpops, finalFront0, conf)); // Generation of the data file for Gnuplot generateDataPlot(subpops, finalFront0, conf); // Exclusive variables used by the master are released delete[] nIndsFronts0; MPI_Type_free(&Individual_MPI_type); } // Workers else { // This is only for sequential benchmark const bool isSequential = (conf -> nDevices == 0 && conf -> ompThreads < 2); const int nDevices = (isSequential) ? conf -> nSubpopulations : std::max(1, conf -> nDevices + (conf -> ompThreads > 0)); int nChildren[nDevices]; int nIndsFronts0[nDevices]; MPI_Request requests[2]; // The worker tells to the master how many subpopulations can be processed MPI_Isend(&nDevices, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &(requests[0])); MPI_Request_free(&(requests[0])); // Each worker will compute as many subpopulations as OpenCL devices at most Individual *subpops = new Individual[nDevices * conf -> familySize]; // Create MPI datatype for the individuals and commit it array_of_displacement[0] = (size_t) &(subpops[0].chromosome[0]) - (size_t) &(subpops[0]); array_of_displacement[1] = (size_t) &(subpops[0].fitness[0]) - (size_t) &(subpops[0]); array_of_displacement[2] = (size_t) &(subpops[0].rank) - (size_t) &(subpops[0]); MPI_Type_create_struct(3, array_of_blocklengths, array_of_displacement, array_of_types, &Individual_MPI_type); MPI_Type_commit(&Individual_MPI_type); // The worker receives as many subpopulations as number of OpenCL devices at most MPI_Recv(subpops, nDevices * conf -> familySize, Individual_MPI_type, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); while (status.MPI_TAG != FINISH) { int receivedWork; MPI_Get_count(&status, Individual_MPI_type, &receivedWork); int nSubpopulations = receivedWork / conf -> familySize; int nThreads = (isSequential) ? 1 : std::min(nDevices, nSubpopulations); if (status.MPI_TAG == INITIALIZE) { /********** Multi-objective individuals evaluation over all subpopulations ***********/ omp_set_nested(1); #pragma omp parallel for num_threads(nThreads) schedule(dynamic, 1) for (int sp = 0; sp < nSubpopulations; ++sp) { int popIndex = sp * conf -> familySize; if (isSequential) { evaluationCPU(subpops + popIndex, conf -> subpopulationSize, trDataBase, selInstances, 1, conf); } else if (nSubpopulations == 1) { evaluationHET(subpops + popIndex, conf -> subpopulationSize, devicesObject, nDevices, trDataBase, selInstances, conf); } else { evaluationHET(subpops + popIndex, conf -> subpopulationSize, &devicesObject[omp_get_thread_num()], 1, trDataBase, selInstances, conf); } // Fitness normalization normalizeFitness(subpops + popIndex, conf -> subpopulationSize, conf); } /********** Sort each subpopulation with the "Non-Domination-Sort" method ***********/ #pragma omp parallel for for (int sp = 0; sp < nSubpopulations; ++sp) { int popIndex = sp * conf -> familySize; nIndsFronts0[sp] = nonDominationSort(subpops + popIndex, conf -> subpopulationSize, conf); } } /********** In each migration the individuals are exchanged between subpopulations of the same node ***********/ int nLocalMigrations = (nSubpopulations > 1) ? conf -> nLocalMigrations : 1; for (int lMig = 0; lMig < nLocalMigrations; ++lMig) { /********** Start the evolution process ***********/ for (int g = 0; g < conf -> nGenerations; ++g) { /********** Fill the mating pool and perform crossover ***********/ #pragma omp parallel for for (int sp = 0; sp < nSubpopulations; ++sp) { const int *const pool = getPool(conf); int popIndex = sp * conf -> familySize; nChildren[sp] = crossoverUniform(subpops + popIndex, pool, conf); // Local resources used are released delete[] pool; } /********** Multi-objective individuals evaluation over all subpopulations ***********/ #pragma omp parallel for num_threads(nThreads) schedule(dynamic, 1) for (int sp = 0; sp < nSubpopulations; ++sp) { int popIndex = sp * conf -> familySize; if (isSequential) { evaluationCPU(subpops + popIndex + conf -> subpopulationSize, nChildren[sp], trDataBase, selInstances, 1, conf); } else if (nSubpopulations == 1) { evaluationHET(subpops + popIndex + conf -> subpopulationSize, nChildren[sp], devicesObject, nDevices, trDataBase, selInstances, conf); } else { evaluationHET(subpops + popIndex + conf -> subpopulationSize, nChildren[sp], &devicesObject[omp_get_thread_num()], 1, trDataBase, selInstances, conf); } // Fitness normalization normalizeFitness(subpops + popIndex + conf -> subpopulationSize, nChildren[sp], conf); } /********** The crowding distance of the parents is initialized again for the next nonDominationSort ***********/ #pragma omp parallel for for (int sp = 0; sp < nSubpopulations; ++sp) { int popIndex = sp * conf -> familySize; for (int i = popIndex; i < popIndex + conf -> subpopulationSize; ++i) { subpops[i].crowding = 0.0f; } // Replace subpopulation // Parents and children are sorted by rank and crowding distance. // The first "subpopulationSize" individuals will advance the next generation nIndsFronts0[sp] = nonDominationSort(subpops + popIndex, conf -> subpopulationSize + nChildren[sp], conf); } } // Migration process between subpopulations of the same node if (lMig != nLocalMigrations - 1 && nSubpopulations > 1) { migration(subpops, nSubpopulations, nIndsFronts0, conf); #pragma omp parallel for for (int sp = 0; sp < nSubpopulations; ++sp) { int popIndex = sp * conf -> familySize; // The crowding distance of the subpopulation is initialized again for the next nonDominationSort for (int i = popIndex; i < popIndex + conf -> subpopulationSize; ++i) { subpops[i].crowding = 0.0f; } nonDominationSort(subpops + popIndex, conf -> subpopulationSize, conf); } } } // The worker send to the master the subpopulations already evaluated and will request new work MPI_Isend(subpops, receivedWork, Individual_MPI_type, 0, 0, MPI_COMM_WORLD, &(requests[0])); MPI_Isend(nIndsFronts0, nSubpopulations, MPI_INT, 0, 0, MPI_COMM_WORLD, &(requests[1])); MPI_Waitall(2, requests, MPI_STATUSES_IGNORE); MPI_Recv(subpops, nDevices * conf -> familySize, Individual_MPI_type, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); } // All process must reach this point in order to provide a real time measure MPI_Barrier(MPI_COMM_WORLD); // Exclusive variables used by the workers are released delete[] subpops; MPI_Type_free(&Individual_MPI_type); } }
int main(int argc, char ** argv){ int my_id, root, ierr, num_procs; MPI_Status status; ierr = MPI_Init(&argc, &argv);//Creat processes ierr = MPI_Comm_rank(MPI_COMM_WORLD, &my_id); ierr = MPI_Comm_size(MPI_COMM_WORLD, &num_procs); /*Make MPI data type for Vars*/ const int nitems=5; int blocklengths[5] = {1, 1, 1, 1, 1}; MPI_Datatype types[5] = { MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE}; MPI_Datatype mpi_Vars; MPI_Aint offsets[5]; offsets[0] = offsetof(Vars, mass); offsets[1] = offsetof(Vars, xvelocity); offsets[2] = offsetof(Vars, yvelocity); offsets[3] = offsetof(Vars, energy); offsets[4] = offsetof(Vars, press); MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_Vars); MPI_Type_commit(&mpi_Vars); /*start the program*/ int N, type; N = num_procs*100; type = 1; int zones_to_do = N/num_procs; double dt; int count = 0;char str[80]; FILE *fid, *finit; double dx = 1./(double)N; double t, T; t = 0.; T = .2; int num = 30; Vars * U = malloc((N+4)*(N+4)*sizeof(Vars)); init_sys(N+4, N+4, U, dx, dx, 1); if(my_id == 0){ /*I am root*/ finit = fopen("2Dinit.dat","w"); Write_Cons(N+4, N+4, U, dx, dx, finit); fclose(finit); int count = 0; } while(t<T){ //printf("before\n"); dt = advance_system(N+4, N+4, U, dx, dx, my_id, zones_to_do, num_procs, mpi_Vars); t+=dt; //break; //printf("what time is it = %f\n", dt); /*Broadcast U*/ ierr = MPI_Bcast(U, (N+4)*(N+4), mpi_Vars, 0, MPI_COMM_WORLD); /* if(my_id == 0){ if( count % 1 == 0){ sprintf(str, "T_%d.dat", count); fid = fopen(str, "w"); Write_Cons(N+4, N+4, U, dx, dx, fid); fclose(fid); //printf("T=%f\n", t); } count += 1; }*/ } if(my_id == 0){ /*I am Root*/ printf("%d\n", count); fid = fopen("22data.dat","w"); Write_Cons(N+4, N+4, U, dx, dx, fid); fclose(fid); } free(U); MPI_Finalize(); }
void main(int argc, char **argv) { double start_t; double end_t; int my_rank, p, my_loc_rank, loc_p; complex *A; complex *B; complex *C; /* initialize MPI */ MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &p); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* Create MPI Datatype for Complex */ const float nitems=2; int blocklengths[2] = {1,1}; MPI_Datatype types[2] = {MPI_FLOAT, MPI_FLOAT}; MPI_Aint offsets[2]; offsets[0] = offsetof(complex, r); offsets[1] = offsetof(complex, i); MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_complex); MPI_Type_commit(&mpi_complex); int workload = 512 / p; complex a[512*workload]; complex b[512*workload]; complex c[512*workload]; /* Split the first two groups and a collector group*/ if(my_rank == 0) { MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank, &comm1); MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, my_rank, &comm2); MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank, &commR2); } else if(my_rank < p/2) { MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank, &comm1); MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, my_rank, &comm2); MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, my_rank, &commR2); } else { MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, my_rank-(p/2), &comm1); MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank-(p/2), &comm2); MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank-(p/2), &commR2); } /* Split the group for the latter two tasks */ /* All processors may participate, we only need one group for the two tasks */ MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank, &comm3); /* Initialize Data*/ workload = 512 / (p/2); if(my_rank == 0) { A = malloc(512*512 * sizeof(complex)); B = malloc(512*512 * sizeof(complex)); C = malloc(512*512 * sizeof(complex)); initialize_data(f1_name, A); start_t = MPI_Wtime(); } else if(my_rank == p/2 || p == 1) { B = malloc(512*512 * sizeof(complex)); initialize_data(f2_name, B); } if(my_rank < p/2) { MPI_Scatter(A, 512*workload, mpi_complex, a, 512*workload, mpi_complex, 0, comm1); } else { MPI_Scatter(B, 512*workload, mpi_complex, b, 512*workload, mpi_complex, 0, comm2); } /* 2D FFT on A */ if(my_rank < p/2) { MPI_Comm_rank(comm1, &my_loc_rank); MPI_Comm_size(comm1, &loc_p); execute_fft(a, 1, loc_p, my_loc_rank); MPI_Gather(a, 512*workload, mpi_complex, A, 512*workload, mpi_complex, 0, comm1); if(my_loc_rank == 0) { transpose(A); } MPI_Scatter(A, 512*workload, mpi_complex, a, 512*workload, mpi_complex, 0, comm1); execute_fft(a, 1, loc_p, my_loc_rank); } else if(my_rank >= p/2 || p == 1) { /* 2D FFT on B */ MPI_Comm_rank(comm2, &my_loc_rank); MPI_Comm_size(comm2, &loc_p); execute_fft(b, 1, loc_p, my_loc_rank); MPI_Gather(b, 512*workload, mpi_complex, B, 512*workload, mpi_complex, 0, comm2); if(my_loc_rank == 0) { transpose(B); } MPI_Scatter(B, 512*workload, mpi_complex, b, 512*workload, mpi_complex, 0, comm2); execute_fft(b, 1, loc_p, my_loc_rank); } /* Multiplication Step */ workload = 512 / p; sync_tasks(a, b, A, B, p, my_rank); MPI_Scatter(A, 512*workload, mpi_complex, b, 512*workload, mpi_complex, 0, comm3); MPI_Scatter(B, 512*workload, mpi_complex, b, 512*workload, mpi_complex, 0, comm3); execute_mm(a, b, c, p, my_rank); /* 2D FFT on C */ execute_fft(c, -1, p, my_rank); MPI_Gather(c, 512*workload, mpi_complex, C, 512*workload, mpi_complex, 0, comm3); if(my_rank == 0) { transpose(C); } MPI_Scatter(C, 512*workload, mpi_complex, c, 512*workload, mpi_complex, 0, comm3); execute_fft(c, -1, p, my_rank); MPI_Gather(c, 512*workload, mpi_complex, C, 512*workload, mpi_complex, 0, comm3); end_t = MPI_Wtime(); if(my_rank == 0) { output_data(f_out, C); printf("\nElapsed time = %g s\n", end_t - start_t); printf("--------------------------------------------\n"); int i; for(i = 0; i < 512*512; i++) { free(&A[i]); free(&B[i]); free(&C[i]); } } MPI_Finalize(); }
int main(int argc, char* argv[]) { int* bodies_off; int* n_bodies_split; int n_local_bodies; const MPI_Comm comm = MPI_COMM_WORLD; FILE *inputf; FILE *outputf; double clockStart, clockEnd; int rc, n_proc, rank; rc = MPI_Init(&argc, &argv); if (rc != MPI_SUCCESS) { puts("MPI_Init failed"); exit(-1); } MPI_Comm_size(comm, &n_proc); MPI_Comm_rank(comm, &rank); //creazione datatype per mpi! MPI_Datatype bodytype; MPI_Datatype type[6] = { MPI_LB, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_UB }; int block_len[6] = {1, 1, 3, 3, 3, 1}; MPI_Aint disp[6]; leaf_t example[2]; MPI_Get_address(&example[0], &disp[0]); MPI_Get_address(&(example[0].mass), &disp[1]); MPI_Get_address(&(example[0].pos), &disp[2]); MPI_Get_address(&(example[0].vel), &disp[3]); MPI_Get_address(&(example[0].acc), &disp[4]); MPI_Get_address(&(example[1].acc), &disp[5]); // int i; // for(i = 6; i >= 0; --i) // disp[i] -= disp[0]; disp[1] = disp[1] - disp[0]; disp[2] = disp[2] - disp[0]; disp[3] = disp[3] - disp[0]; disp[4] = disp[4] - disp[0]; disp[5] = disp[5] - disp[0]; MPI_Type_create_struct(6, block_len, disp, type, &bodytype); MPI_Type_commit(&bodytype); bodies_off = malloc((n_proc + 1) * sizeof(int)); n_bodies_split = malloc((n_proc) * sizeof(int)); bodies = malloc(nbodies * sizeof(node_t*)); leafs = malloc(nbodies * sizeof(leaf_t)); char* inputfile = argv[1]; inputf = fopen(inputfile, "r"); if (inputf == NULL) { printf("impossibile leggere da file"); exit(1); } fscanf(inputf, "%d", &nbodies); fscanf(inputf, "%d", &steps); fscanf(inputf, "%lf", &dt); fscanf(inputf, "%lf", &eps); fscanf(inputf, "%lf", &tol); fclose(inputf); if (rank == 0) { int i; create_bodies(); quicksort(0, nbodies - 1); // bublesort(); // int i = 0; // for (i = 0; i < nbodies; i++) { // printf("%lf, %lf, %lf \n", bodies[i]->pos[0], bodies[i]->pos[1], // bodies[i]->pos[2]); // } n_local_bodies = nbodies / n_proc; //split delle particelle secondo shark & fish // split_bodies(n_proc, bodies_off, n_bodies_split); // n_local_bodies = n_bodies_split[rank]; // // MPI_Bcast(n_bodies_split, n_proc, MPI_INT, 0, comm); MPI_Bcast(leafs, nbodies, bodytype, 0, comm); dthf = 0.5 * dt; epssq = eps * eps; itolsq = 1.0 / (tol * tol); clockStart = MPI_Wtime(); int step = 0; root = NULL; for (step = 0; step < steps; step++) { compute_center_and_diameter(); root = malloc(sizeof(struct node_t)); // "new" is like "malloc" double mass_root = 0.0; root->type = 1; root->mass = &mass_root; root->pos = center; root->cell.childs[0] = NULL; root->cell.childs[1] = NULL; root->cell.childs[2] = NULL; root->cell.childs[3] = NULL; root->cell.childs[4] = NULL; root->cell.childs[5] = NULL; root->cell.childs[6] = NULL; root->cell.childs[7] = NULL; double radius = diameter * 0.5; int i = 0; for (i = 0; i < nbodies; i++) { insert(root, bodies[i], radius); // questo è il modo per passare i dati per riferimento... cioè mandare l'indirizzo della struttura puntata dal puntatore } curr = 0; compute_center_of_mass(&(*root)); for (i = 0; i < n_local_bodies; i++) { compute_force(&(*root), &(*bodies[i]), diameter, step); } // for (i = 0; i < nbodies; i++) { // } deallocate_tree(root); //inserire all gather MPI_Allgather(leafs, n_local_bodies, bodytype, leafs, n_local_bodies, bodytype, comm); for (i = 0; i < nbodies; i++) { advance(&(*bodies[i])); } // int p = 0; // for (p = 0; p < nbodies; p++) // printf("%lf, %lf, %lf \n", bodies[p]->pos[0], bodies[p]->pos[1], // bodies[p]->pos[2]); // printf("*************************************** \n"); } // int i = 0; // dopo l'esecuzione!! // int proc_rec = 1; // while (proc_rec < n_proc) { // MPI_Status status; // int proc_rank; // int cap = nbodies / n_proc; // node_t temp[cap]; // MPI_Recv(temp, cap, bodytype, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, // &status); // proc_rank = status.MPI_SOURCE; // // int idx = 0; // for (idx = proc_rec * (cap); idx < cap; idx++) // *bodies[idx] = temp[idx]; // proc_rec++; // } clockEnd = MPI_Wtime(); if (nbodies == 16384) { system("echo 'Host:' `hostname` >> output16384 "); outputf = fopen("output16384", "a"); fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd - clockStart); for (i = 0; i < nbodies; i++) { fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0], bodies[i]->pos[1], bodies[i]->pos[2]); } } else if (nbodies == 32768) { system("echo 'Host:' `hostname` >> output32768 "); outputf = fopen("output32768", "a"); fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd - clockStart); for (i = 0; i < nbodies; i++) { fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0], bodies[i]->pos[1], bodies[i]->pos[2]); } } else if (nbodies == 65536) { system("echo 'Host:' `hostname` >> output65536 "); outputf = fopen("output65536", "a"); fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd - clockStart); for (i = 0; i < nbodies; i++) { fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0], bodies[i]->pos[1], bodies[i]->pos[2]); } } else { system("echo 'Host:' `hostname` >> output "); outputf = fopen("output", "a"); fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd - clockStart); for (i = 0; i < nbodies; i++) { fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0], bodies[i]->pos[1], bodies[i]->pos[2]); } } fflush(outputf); fclose(outputf); printf("Esecuzione completata\n"); } else { int low = 1, up = 0; int i; dthf = 0.5 * dt; epssq = eps * eps; itolsq = 1.0 / (tol * tol); // if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) { // printf("Inizializzazione della libreria di papi fallita \n"); // exit(1); // } // // if (PAPI_create_eventset(&event_set) != PAPI_OK) { // printf("E' andata a male la creazione dell'eventSet \n"); // exit(1); // } // // if (PAPI_add_events(event_set, events, 2) != PAPI_OK) { // printf("E' andata a male l'aggiunta degli eventi\n"); // exit(1); // } n_local_bodies = nbodies / n_proc; MPI_Bcast(leafs, nbodies, bodytype, 0, comm); int step = 0; root = NULL; low += (rank * n_local_bodies); up = low + n_local_bodies; // PAPI_start(event_set); // clockStart = PAPI_get_real_usec(); for (step = 0; step < steps; step++) { compute_center_and_diameter(); root = malloc(sizeof(struct node_t)); // "new" is like "malloc" root->type = 1; *(root->mass) = 0.0; root->pos = center; root->cell.childs[0] = NULL; root->cell.childs[1] = NULL; root->cell.childs[2] = NULL; root->cell.childs[3] = NULL; root->cell.childs[4] = NULL; root->cell.childs[5] = NULL; root->cell.childs[6] = NULL; root->cell.childs[7] = NULL; double radius = diameter * 0.5; for (i = 0; i < nbodies; i++) { bodies[i] = malloc(sizeof(node_t)); bodies[i]->cell.leaf = &leafs[i]; bodies[i]->mass = &leafs[i].mass; bodies[i]->pos = leafs[i].pos; insert(&(*root), &(*bodies[i]), radius); // questo è il modo per passare i dati per riferimento... cioè mandare l'indirizzo della struttura puntata dal puntatore } curr = 0; compute_center_of_mass(&(*root)); for (i = low; i < up; i++) { compute_force(&(*root), &(*bodies[i]), diameter, step); } // for (i = 0; i < nbodies; i++) { // } deallocate_tree(root); local_leafs = &leafs[low]; //inserire all_gather MPI_Allgather(local_leafs, up - low, bodytype, leafs, up - low, bodytype, comm); for (i = 0; i < nbodies; i++) { advance(&(*bodies[i])); } // int p = 0; // for (p = 0; p < nbodies; p++) // printf("%lf, %lf, %lf \n", bodies[p]->pos[0], bodies[p]->pos[1], // bodies[p]->pos[2]); // printf("*************************************** \n"); } // clockEnd = PAPI_get_real_usec(); // PAPI_stop(event_set, values); // int i = 0; // MPI_Send(bodies[low], up - low + 1, bodytype, 0, MPI_ANY_TAG, comm); } MPI_Finalize(); return 0; }
int main(int argc, char **argv) { size_t dimensions; size_t i, j; scanf("%zu", &dimensions); struct complex *matrix = calloc(sizeof(struct complex), dimensions * dimensions); struct complex temp; for (i = 0; i < dimensions; ++i) { for (j = 0; j < dimensions; ++j) { scanf("%lf", &temp.re); scanf("%lf", &temp.im); temp.x = (int) i; temp.y = (int) j; matrix[i * dimensions + j] = temp; } } int counter, size; double begin, end; begin = omp_get_wtime(); MPI_Init(&argc, &argv); MPI_Datatype complex_t; MPI_Datatype type[4] = {MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_INT}; int blocklen[4] = {1, 1, 1, 1}; //*because readability is our main concern*//* MPI_Aint disp[4]; MPI_Type_create_struct(4, blocklen, disp, type, &complex_t); MPI_Type_commit(&complex_t); MPI_Comm_rank(MPI_COMM_WORLD, &counter); MPI_Comm_size(MPI_COMM_WORLD, &size); printf("%d %d", counter, size); struct complex thread_min = matrix[0]; thread_min.x = counter; thread_min.y = 0; struct complex thread_max = matrix[0]; thread_max.x = counter; thread_max.y = 0; for (i = (size_t) counter; i < dimensions; i += size) { for (j = 0; j < dimensions; ++j) { if (length(matrix[i * dimensions + j]) < length(thread_min)) { thread_min = matrix[i * dimensions + j]; } if (length(matrix[i * dimensions + j]) > length(thread_max)) { thread_max = matrix[i * dimensions + j]; } } } if (counter != 0) { MPI_Send(&thread_min, 1, complex_t, 0, 0, MPI_COMM_WORLD); MPI_Send(&thread_max, 1, complex_t, 0, 0, MPI_COMM_WORLD); } if (counter == 0) { struct complex min = thread_min; struct complex max = thread_max; for (i = 1; i < size; ++i) { MPI_Recv(&thread_min, 1, complex_t, 0, 0, MPI_COMM_WORLD, NULL); MPI_Recv(&thread_max, 1, complex_t, 0, 0, MPI_COMM_WORLD, NULL); printf("%.2f+i*%.2f", thread_min.re, thread_min.im); printf("%.2f+i*%.2f", thread_max.re, thread_max.im); if (length(thread_min) < length(min)) { min = thread_min; } if (length(thread_max) > length(max)) { max = thread_max; } } printf("max complex number %.2f+i*%.2f position x:%d y:%d \n", max.re, max.im, max.x, max.y); printf("min complex number %.2f+i*%.2f position x:%d, y:%d \n", min.re, min.im, min.x, min.y); } MPI_Finalize(); end = omp_get_wtime(); printf("execution time: %f\n", end - begin); free(matrix); return 0; }
FORT_DLL_SPEC void FORT_CALL mpi_type_create_struct_ ( MPI_Fint *v1, MPI_Fint v2[], MPI_Aint * v3, MPI_Fint v4[], MPI_Fint *v5, MPI_Fint *ierr ){ *ierr = MPI_Type_create_struct( *v1, v2, v3, (MPI_Datatype *)(v4), (MPI_Datatype *)(v5) ); }
/** * main function * divided to two brances for master & slave processors respectively * @param argc commandline argument count * @param argv array of commandline arguments * @return 0 if success */ int main(int argc, char* argv[]) { int rank; int size; int num_clusters; int num_points; int dex; int job_size; int job_done=0; Point* centroids; Point* points; Point* received_points; int * slave_clusters; int * former_clusters; int * latter_clusters; MPI_Init(&argc, &argv); MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); //creation of derived MPI structure MPI_Datatype MPI_POINT; MPI_Datatype type=MPI_DOUBLE; int blocklen=2; MPI_Aint disp=0; MPI_Type_create_struct(1,&blocklen,&disp,&type,&MPI_POINT); MPI_Type_commit(&MPI_POINT); /******** MASTER PROCESSOR WORKS HERE******************************************************/ if(rank==MASTER) { //inputting from file FILE *input; input=fopen(argv[1],"r"); readHeaders(input,&num_clusters,&num_points); points=(Point*)malloc(sizeof(Point)*num_points); readPoints(input,points,num_points); fclose(input); //other needed memory locations former_clusters=(int*)malloc(sizeof(int)*num_points); latter_clusters=(int*)malloc(sizeof(int)*num_points); job_size=num_points/(size-1); centroids=malloc(sizeof(Point)*num_clusters); //reseting and initializing to default behaviour initialize(centroids,num_clusters); resetData(former_clusters,num_points); resetData(latter_clusters,num_points); //Sending the essential data to slave processors for(dex=1;dex<size;dex++) { printf("Sending to [%d]\n",dex); MPI_Send(&job_size ,1 , MPI_INT ,dex,0,MPI_COMM_WORLD); MPI_Send(&num_clusters ,1 , MPI_INT ,dex,0,MPI_COMM_WORLD); MPI_Send(centroids ,num_clusters, MPI_POINT ,dex,0,MPI_COMM_WORLD); MPI_Send(points+(dex-1)*job_size,job_size , MPI_POINT ,dex,0,MPI_COMM_WORLD); } printf("Sent!\n"); MPI_Barrier(MPI_COMM_WORLD); //Main job of master processor is done here while(1) { MPI_Barrier(MPI_COMM_WORLD); printf("Master Receiving\n"); for(dex=1;dex<size;dex++) MPI_Recv(latter_clusters+(job_size*(dex-1)),job_size,MPI_INT,dex,0,MPI_COMM_WORLD,&status); printf("Master Received\n"); calculateNewCentroids(points,latter_clusters,centroids,num_clusters,num_points); printf("New Centroids are done!\n"); if(checkConvergence(latter_clusters,former_clusters,num_points)==0) { printf("Converged!\n"); job_done=1; } else { printf("Not converged!\n"); for(dex=0;dex<num_points;dex++) former_clusters[dex]=latter_clusters[dex]; } //Informing slaves that no more job to be done for(dex=1;dex<size;dex++) MPI_Send(&job_done,1, MPI_INT,dex,0,MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); if(job_done==1) break; //Sending the recently created centroids for(dex=1;dex<size;dex++) MPI_Send(centroids,num_clusters, MPI_POINT,dex,0, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); } //Outputting to the output file FILE* output=fopen(argv[2],"w"); fprintf(output,"%d\n",num_clusters); fprintf(output,"%d\n",num_points); for(dex=0;dex<num_clusters;dex++) fprintf(output,"%lf,%lf\n",centroids[dex]._x,centroids[dex]._y); for(dex=0;dex<num_points;dex++) fprintf(output,"%lf,%lf,%d\n",points[dex]._x,points[dex]._y,latter_clusters[dex]+1); fclose(output); } /*************END OF MASTER PROCESSOR'S BRANCH -- SLAVE PROCESSORS' JOB IS TO FOLLOW ************************/ else { //Receiving the essential data printf("Receiving\n"); MPI_Recv(&job_size ,1 ,MPI_INT ,MASTER,0,MPI_COMM_WORLD,&status); MPI_Recv(&num_clusters,1 ,MPI_INT ,MASTER,0,MPI_COMM_WORLD,&status); centroids=malloc(sizeof(Point)*num_clusters); MPI_Recv(centroids ,num_clusters,MPI_POINT,MASTER,0,MPI_COMM_WORLD,&status); printf("part_size =%d\n",job_size); received_points=(Point*)malloc(sizeof(Point)*job_size); slave_clusters=(int*)malloc(sizeof(int)*job_size); MPI_Recv(received_points,job_size,MPI_POINT ,MASTER,0,MPI_COMM_WORLD,&status); printf("Received [%d]\n",rank); MPI_Barrier(MPI_COMM_WORLD); while(1) { printf("Calculation of new clusters [%d]\n",rank); for(dex=0;dex<job_size;dex++) { slave_clusters[dex]=whoIsYourDaddy(received_points[dex],centroids,num_clusters); } printf("sending to master [%d]\n",rank); MPI_Send(slave_clusters,job_size, MPI_INT,MASTER, 0, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); MPI_Recv(&job_done,1, MPI_INT,MASTER,0,MPI_COMM_WORLD,&status); if(job_done==1) //No more work to be done break; //Receiving recently created centroids from master MPI_Recv(centroids,num_clusters,MPI_POINT,MASTER,0, MPI_COMM_WORLD,&status); MPI_Barrier(MPI_COMM_WORLD); } } //End of all MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { clock_t startTime, endTime; startTime = clock(); int p, my_rank; /* initialize MPI stuff */ MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD,&p); MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); srand(time(NULL)); int row_num, nz, col_num, i; FILE *fp; fp = fopen("crs48x48.txt", "r"); fscanf(fp, "%d", &nz); while (fgetc(fp) != '\n'); fscanf(fp, "%d", &row_num); while (fgetc(fp) != '\n'); fscanf(fp, "%d", &col_num); while (fgetc(fp) != '\n'); printf("%d => NZ = %d\n",my_rank, nz); FILE *fpseed; int seed[p]; //int *column_partition = (int *)malloc(sizeof(int)*col_num); int *column_ptr; int *hash_weights; int num_cols_per_process[p]; int *YPartition = (int*)calloc(row_num, sizeof(int)); int *YmaxPartition = (int*)calloc(row_num, sizeof(int)); const int nitems = 2; int blocklengths[2] = {1, 1}; MPI_Datatype types[2] = {MPI_INT, MPI_INT}; MPI_Datatype mpi_pair; MPI_Aint offsets[2]; offsets[0] = offsetof(pair, col); offsets[1] = offsetof(pair, nz); MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_pair); MPI_Type_commit(&mpi_pair); //printf("datatype created\n"); pair A_partition_column[p]; pair *A_partition[p]; pair *my_columns; column_ptr = (int *)malloc(sizeof(int) * (col_num+1)); // I need how many non-zeros in each column in the matrix data for (i=0; i <= col_num; i++) { fscanf(fp, "%d", &column_ptr[i]); while (fgetc(fp) != '\n'); } //column_ptr[i] = nz; if (my_rank == 0) { fpseed = fopen("seed48x48.txt", "r"); for(i=0; i<p; i++) { fscanf(fpseed, "%d\n", &seed[i]); printf("seed[%d]: %d\n", i, seed[i]); } fclose(fpseed); int i; int prime_arr[prime_arr_len] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541 }; hash_weights = (int *)malloc(sizeof(int)*row_num); genHashWeightsArr(hash_weights, row_num, prime_arr); /*for (i=0; i<row_num; i++) { printf("hashweights[%d]: %d\n",i, hash_weights[i]); }*/ int *current_column_rows = (int *)malloc(sizeof(int)*row_num); //printf("check 1\n"); HASHTABLE hash_columns; hash_columns = createHashtable(p, row_num); // read row_arr and insert in the hashtable for each column int j,c, flag; //insert seed cols for (c=0; c<p; c++) { j = seed[c]; int nz_in_current_col = column_ptr[j+1] - column_ptr[j]; fseek(fp, col_block_size*(col_num+1) + init_block_size*3 + rowVal_block_size*(column_ptr[j]), SEEK_SET); //printf("inserting\n"); for (i=0; i<nz_in_current_col; i++) { fscanf(fp, "%d,", ¤t_column_rows[i]); while (fgetc(fp) != '\n'); } hash_columns = insert_hash(hash_columns, current_column_rows, nz_in_current_col, j, p, hash_weights, row_num, c); } printf("\nSeeds:\n"); print_hash(hash_columns, p); fseek(fp, col_block_size*(col_num+1) + init_block_size*3, SEEK_SET); //#pragma omp parallel for private(fp, j) num_threads(8) for (j=0; j<col_num; j++) { int nz_in_current_col = column_ptr[j+1] - column_ptr[j]; flag =1; for (i=0; i<nz_in_current_col; i++) { fscanf(fp, "%d,", ¤t_column_rows[i]); while (fgetc(fp) != '\n'); } //current_column_rows[i] = -1; /*if (j==0) { for (i=0; i<nz_in_current_col; i++) { printf("cur col[%d]: %d\n",i, current_column_rows[i]); } */ for(c =0 ; c<p; c++) { if(seed[c] == j) { flag = 0; } } if(flag == 1) { hash_columns = insert_hash(hash_columns, current_column_rows, nz_in_current_col, j, p, hash_weights, row_num, -1); } //} } // Load balancing //printf("inserted in hash\n"); print_hash(hash_columns, p); // Generate a column-wise index storing the partition alloted to each column NODE temp; int max; #pragma omp parallel for num_threads(p) for (i=0; i<p; i++) { max = 0; A_partition_column[i].col = hash_columns->col_counts[i]; A_partition[i] = (pair *)malloc(sizeof(pair)*A_partition_column[i].col); temp = hash_columns->buckets[i]; for (j = 0; j < A_partition_column[i].col; j++) { A_partition[i][j].col = temp->col_index; A_partition[i][j].nz = temp->col_nz; if (temp->col_nz > max) { max = temp->col_nz; } temp = temp->next; } for (j=0; j<row_num; j++) { if(hash_columns->row_indices[i][j] > YmaxPartition[j]) { YmaxPartition[j] = hash_columns->row_indices[i][j]; YPartition[j] = i; } } A_partition_column[i].nz = max; } } // Broadcast the column-wise partition array MPI_Bcast(A_partition_column, p, mpi_pair, 0, MPI_COMM_WORLD); if (my_rank == 0) { my_columns = *A_partition; } else { my_columns = (pair *)malloc(sizeof(struct _pair)*A_partition_column[my_rank].col); } if (my_rank == 0) { for (i=1; i<p; i++) { MPI_Send(A_partition[i], A_partition_column[i].col, mpi_pair, i, 0, MPI_COMM_WORLD); } } else { MPI_Recv(my_columns, A_partition_column[my_rank].col, mpi_pair, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Bcast(YPartition, row_num, MPI_INT, 0, MPI_COMM_WORLD); //check what recvd in mycolumns /*for(i=0; i<A_partition_column[my_rank].col; i++) { printf("Rank %d , Col no: %d, myNz : %d\n", my_rank, my_columns[i].col, my_columns[i].nz); }*/ //partition_fp = (FILE **)malloc(sizeof(FILE*)*p); FILE *my_output; char f_name[20]; int colIndex, myNz, rowIndex, j; float val; char *buffer; float *Y; Y = (float*)calloc(row_num, sizeof(float)); //Read X FILE *fp2; fp2 = fopen("Xvector_algo2.txt", "r"); int *X; X = (int*)malloc(sizeof(int)*A_partition_column[my_rank].col); printf("Rank %d recvd %d columns\n", my_rank, A_partition_column[my_rank].col); #pragma omp parallel for private(fp2, colIndex, i) for(i=0; i<A_partition_column[my_rank].col; i++) { colIndex = my_columns[i].col; fseek(fp2, colIndex*vector_block_size, SEEK_SET); fscanf(fp2, "%d\n", &X[i]); } fclose(fp2); /*for(i=0; i<A_partition_column[my_rank].col; i++) { printf("Rank %d ::, X[%d] = %d\n",my_rank, i, X[i]); }*/ //for each column in A_partition_column[my_rank]... //Read non zeroes and multiply (computing local Y)... #pragma omp parallel for private(fp, colIndex, myNz, rowIndex, val) for(i=0; i<A_partition_column[my_rank].col; i++) { //printf("proc: %d, Operating on col %d \n", my_rank, colIndex); colIndex = my_columns[i].col; myNz = my_columns[i].nz; //seek to non-zeroes corresponding to this column in file fseek(fp, col_block_size*(col_num+1) + init_block_size*3 + rowVal_block_size*(column_ptr[colIndex]), SEEK_SET); //fread(buffer, myNz*rowVal_block_size,1,fp); //for each non zero... for(j=0; j<myNz; j++) { fscanf(fp, "%d, %f", &rowIndex, &val); while (fgetc(fp) != '\n'); if(rowIndex>=row_num) { //printf("\n\n***********ERROR %d\n\n\n\n", rowIndex); } #pragma omp atomic Y[rowIndex]+= X[i]*val; } } //printf("end of loop: %d\n", my_rank); pairF *sendOthers[p]; int numRowsInPartition[p], part; //numRowsInPartition = (int*) malloc(sizeof(int)*p); #pragma omp parallel for for(i=0; i<row_num; i++) { numRowsInPartition[i] = 0; } /* for(i=0; i<row_num; i++) { printf("YPartition[%d] = %d\n", i, YPartition[i]); } */ #pragma omp parallel for for(i=0; i<row_num; i++) { part = YPartition[i]; #pragma omp atomic numRowsInPartition[part]++; } if (my_rank == 0) { for(i=0;i < p; i++) { printf("Rank %d got %d rows of Y vector\n", i, numRowsInPartition[i]); } } //make the arrays that have to be sent to other processes. //pair arrays that store rowIndex and val. //allocate! for(i=0; i<p;i++) { //if(i!=my_rank) //{ sendOthers[i] = (pairF*)malloc(sizeof(pairF)*numRowsInPartition[i]); //} } int *current = (int*) calloc(p, sizeof(int)); int other, other_pos; //populate! for(i=0; i<row_num; i++) { other = YPartition[i]; //if(other!=my_rank) //{ other_pos = current[other]; sendOthers[other][other_pos].row = i; sendOthers[other][other_pos].val = Y[i]; current[other]++; //} } //write to respective files FILE *partition_fp[p]; //open output files for (i=0; i< p; i++) { sprintf(f_name, "%d", i); //printf("open file %d\n", i); partition_fp[i] = fopen(f_name, "a"); } //FILE *fp21 = fopen("hehe.txt", "a"); for(i=0; i<p; i++) { if(i!=my_rank) { other = i; for(j=0; j< numRowsInPartition[other]; j++) { if(sendOthers[other][j].val!=0){ fprintf(partition_fp[other], "%d, %f, process %d\n",sendOthers[other][j].row, sendOthers[other][j].val, my_rank); } } } } //read from respective files and add! MPI_Barrier(MPI_COMM_WORLD); for (i = 0; i < p; ++i) { fclose(partition_fp[i]); } //printf("all files closed by rank %d\n", my_rank); sprintf(f_name, "%d", my_rank); partition_fp[my_rank] = fopen(f_name, "r"); strcat(f_name, "_output.txt"); my_output = fopen(f_name, "w"); while (fscanf(partition_fp[my_rank], "%d, %f", &rowIndex, &val) > 0) // expect 1 successful conversion { while (fgetc(partition_fp[my_rank]) != '\n'); //update local y //printf("\n****\nRank %d read value %f\n\n", my_rank, val); Y[rowIndex]+=val; } for(i=0; i<numRowsInPartition[my_rank]; i++) { rowIndex = sendOthers[my_rank][i].row; sendOthers[my_rank][i].val = Y[rowIndex]; //these are the final values! fprintf(my_output, "%d, %f, process %d\n",sendOthers[my_rank][i].row, sendOthers[my_rank][i].val, my_rank); } fclose(partition_fp[my_rank]); fclose(my_output); endTime = clock(); printf("\nrank = %d, Time taken: %lf\n", my_rank, (double)(endTime - startTime)/CLOCKS_PER_SEC); MPI_Finalize(); return 0; }
/* test case from tt#1030 ported to C * * Thanks to Matthias Lieber for reporting the bug and providing a good test * program. */ int struct_struct_test(void) { int err, errs = 0; int i, j, dt_size = 0; MPI_Request req[2]; #define COUNT (2) MPI_Aint displ[COUNT]; int blens[COUNT]; MPI_Datatype types[COUNT]; MPI_Datatype datatype; /* A slight difference from the F90 test: F90 arrays are column-major, C * arrays are row-major. So we invert the order of dimensions. */ #define N (2) #define M (4) int array[N][M] = { {-1, -1, -1, -1}, {-1, -1, -1, -1} }; int expected[N][M] = { {-1, 1, 2, 5}, {-1, 3, 4, 6} }; int seq_array[N*M]; MPI_Aint astart, aend; MPI_Aint size_exp = 0; /* 1st section selects elements 1 and 2 out of 2nd dimension, complete 1st dim. * should receive the values 1, 2, 3, 4 */ astart = 1; aend = 2; err = build_array_section_type(M, astart, aend, &types[0]); if (err) { errs++; if (verbose) fprintf(stderr, "build_array_section_type failed\n"); return errs; } blens[0] = N; displ[0] = 0; size_exp = size_exp + N * (aend-astart+1) * sizeof(int); /* 2nd section selects last element of 2nd dimension, complete 1st dim. * should receive the values 5, 6 */ astart = 3; aend = 3; err = build_array_section_type(M, astart, aend, &types[1]); if (err) { errs++; if (verbose) fprintf(stderr, "build_array_section_type failed\n"); return errs; } blens[1] = N; displ[1] = 0; size_exp = size_exp + N * (aend-astart+1) * sizeof(int); /* create type */ err = MPI_Type_create_struct(COUNT, blens, displ, types, &datatype); check_err(MPI_Type_create_struct); err = MPI_Type_commit(&datatype); check_err(MPI_Type_commit); err = MPI_Type_size(datatype, &dt_size); check_err(MPI_Type_size); if (dt_size != size_exp) { errs++; if (verbose) fprintf(stderr, "unexpected type size\n"); } /* send the type to ourselves to make sure that the type describes data correctly */ for (i = 0; i < (N*M) ; ++i) seq_array[i] = i + 1; /* source values 1..(N*M) */ err = MPI_Isend(&seq_array[0], dt_size/sizeof(int), MPI_INT, 0, 42, MPI_COMM_SELF, &req[0]); check_err(MPI_Isend); err = MPI_Irecv(&array[0][0], 1, datatype, 0, 42, MPI_COMM_SELF, &req[1]); check_err(MPI_Irecv); err = MPI_Waitall(2, req, MPI_STATUSES_IGNORE); check_err(MPI_Waitall); /* check against expected */ for (i = 0; i < N; ++i) { for (j = 0; j < M; ++j) { if (array[i][j] != expected[i][j]) { errs++; if (verbose) fprintf(stderr, "array[%d][%d]=%d, should be %d\n", i, j, array[i][j], expected[i][j]); } } } err = MPI_Type_free(&datatype); check_err(MPI_Type_free); err = MPI_Type_free(&types[0]); check_err(MPI_Type_free); err = MPI_Type_free(&types[1]); check_err(MPI_Type_free); return errs; #undef M #undef N #undef COUNT }
int main(int argc, char **argv) { char * dirPrefix = "/mirror/local/vita/input/"; FILE * fp; News *news; News Test; int count = atoi(argv[1]); //printf("Count %d",count); //news = (News *)malloc(sizeof(News)*(count)); char *buffer = (char *)malloc(sizeof(char)*(count*630)); // 30 + 100 + 500 int p = 0; for (int i = 1 ; i <= count ; i++ ) { char * line = NULL; size_t len = 0; ssize_t read; char *filePath = (char *)malloc(sizeof(char)*200); strcpy(filePath,dirPrefix); char *arg = (char *)malloc(sizeof(char)*32); snprintf(arg, 32, "%d", i); strcat(filePath,arg); //printf("So file path is : %s\n",filePath); fp = fopen(filePath, "r"); if (fp == NULL) { exit(EXIT_FAILURE); } //news[i] = (News *)malloc(sizeof(News)); int j = 0 ; while ((read = getline(&line, &len, fp)) != -1) { //printf("Retrieved line of length %zu :\n", read); //printf("%s", line); if (j == 0) { //news[i]->timeStamp = (char *)malloc(sizeof(char)*read); //strncpy(news[i].timeStamp,line,read); //news[i].timeStamp[read] = '\0'; strncpy(&(buffer[p]),line,read); if (read < 30) { int k = p + read; for (; k < 30 ; k++) { buffer[k] = '\0'; } } else { buffer[29] = '\0'; } j++; p = p+ 30; } else if ( j == 1) { //news[i]->title = (char *)malloc(sizeof(char)*read); //strncpy(news[i].title,line,read); //news[i].title[read] = '\0'; strncpy(&(buffer[p]),line,read); if (read < 100) { int k = p + read; for (; k < 100 ; k++) { buffer[k] = '\0'; } } else { buffer[99] = '\0'; } j++; p = p+ 100; } else { //news[i]->details = (char *)malloc(sizeof(char)*read); //strncpy(news[i].details,line,read); //news[i].details[read] = '\0'; strncpy(&(buffer[p]),line,read); if (read < 500) { int k = p + read; for (; k < 500 ; k++) { buffer[k] = '\0'; } } else { buffer[99] = '\0'; } j++; p = p + 500; j = 0; } } fclose(fp); if (line) free(line); } /* int limit = count * 630 -1; for (int i = 0 ; i < limit; i++) { printf("%c", buffer[i]); } */ //printf("Time Stamp : %s\n",news1.timeStamp); //printf("Title : %s\n",news1.title); //printf("Details : %s\n",news1.details); /* for (int i = 1 ; i <= count ; i++) { printf("News item : %d \n", i); printf("News TimeStamp: %s \n", news[i].timeStamp); printf("News Title: %s \n", news[i].title); printf("News Details: %s \n", news[i].details); } */ /* News *latestNews = findLatest(news, count); printf("Latest News TimeStamp: %s \n", latestNews->timeStamp); printf("Latest News Title: %s \n", latestNews->title); printf("Latest News Details: %s \n", latestNews->details); */ //%a %b %e %T %Z %Y => Thu Mar 3 22:32:41 IST 2016 //struct tm time; //strptime(news1.timeStamp,"%a %b %e %T %Z %Y",&time); //time_t loctime = mktime(&time); //printf ( "Current local time and date: %s", asctime (&time) ); const int tag = 0; int world_size, world_rank; int rep_size, rep_rank; int *process_rank; MPI_Group world_group, new_group; MPI_Comm rep_comm,world_comm; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &world_size); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); //number of items inside structure Test const int nitems = 3; //count of item of each type inside Test in order int blocklengths[3] = {1, 1, 1}; MPI_Datatype mpi_timestamp; MPI_Datatype mpi_title; MPI_Datatype mpi_details; MPI_Type_contiguous(100,MPI_CHAR,&mpi_title); MPI_Type_commit(&mpi_title); MPI_Type_contiguous(30,MPI_CHAR,&mpi_timestamp); MPI_Type_commit(&mpi_timestamp); MPI_Type_contiguous(500,MPI_CHAR,&mpi_details); MPI_Type_commit(&mpi_details); //data types present inside Test in order MPI_Datatype types[3] = {mpi_timestamp, mpi_title, mpi_details}; //name of derived data type MPI_Datatype mpi_test_type; //array to store starting address of each item inside Test MPI_Aint offsets[3]; //offset of each item in Test with respect to base address of Test offsets[0] = offsetof(News, timeStamp); offsets[1] = offsetof(News, title); offsets[2] = offsetof(News, details); //create the new derived data type MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_test_type); //commit the new data type MPI_Type_commit(&mpi_test_type); //get rank of current process //MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); //Code for the creation of REPORTER COMM goes here. process_rank = (int*)malloc(sizeof(int) * (world_size - 1)); for(int i = 1 ; i < world_size ; i++){ process_rank[i] = i; } MPI_Comm_dup(MPI_COMM_WORLD, &world_comm); MPI_Comm_group(world_comm, &world_group); MPI_Group_incl(world_group, (world_size - 1), process_rank, &new_group); MPI_Comm_create(world_comm, new_group, &rep_comm); //printf("%d\n",error); //Get the size of the Comm REPORTER. if(world_rank == 0){ //Do editor's task }else{ MPI_Comm_size(rep_comm, &rep_size); MPI_Comm_rank(rep_comm, &rep_rank); /* for (int i = 1 ; i <= count ; i++) { printf("News item : %d \n", i); printf("News TimeStamp: %s \n", news[i].timeStamp); printf("News Title: %s \n", news[i].title); printf("News Details: %s \n", news[i].details); } MPI_Barrier(MPI_COMM_WORLD); /* if(rank == 1) { // News send; // News.one = 1; // News.two = 2.0; // strncpy(send.news,"This is simple news.",sizeof(send.news)); const int dest = 2; MPI_Send(news[1], 1, mpi_test_type, dest, tag, MPI_COMM_WORLD); printf("\nRank %d sending \n %s \n %s \n %s\n", rank, news[1]->timeStamp, news[1]->title, news[1]->details); } if(rank == 2) { MPI_Status status; const int src = 1; News recv; MPI_Recv(&recv, 1, mpi_test_type, src, tag, MPI_COMM_WORLD, &status); printf("\nRank %d received \n %s \n %s \n %s \n", rank, recv.timeStamp,recv.title,recv.details); } */ //News *recvNews = (News *)malloc(sizeof(News )*(count)); char *recvBuffer = (char *)malloc(sizeof(char) * (count*630)); int status = MPI_Alltoall(buffer,630,MPI_CHAR,recvBuffer,630,MPI_CHAR,rep_comm); if(status != 0) { printf("MPI_Alltoall failed with status %d\n", status); exit(EXIT_FAILURE); } MPI_Barrier(rep_comm); printf(" \n \n \n"); /* if (rank == 1) { int limit = count * 630 -1; for (int i = 0 ; i < limit; i++) { printf("%c", recvBuffer[i]); } } */ //MPI_Barrier(MPI_COMM_WORLD); News* newsArray = getNewsArray(recvBuffer,count); for( int my_rank = 0; my_rank < count; my_rank++) { if( my_rank == rep_rank ) { /*for (int i = 0 ; i < size ; i++) { printf("News item : %d rank %d\n", i , rank); printf("News TimeStamp: %s rank %d\n", newsArray[i].timeStamp,rank); printf("News Title: %s rank %d\n", newsArray[i].title,rank); printf("News Details: %s rank %d \n", newsArray[i].details,rank); } */ News latestNews = findLatest(newsArray,count); printf("Latest news : \n"); printf("News TimeStamp: %s\n", latestNews.timeStamp); printf("News Title: %s\n", latestNews.title); printf("News Details: %s\n", latestNews.details); } } /* else if(rank == 1) { for (int i = 1 ; i <= size ; i++) { printf("News item : %d rank %d\n", i , rank); printf("News TimeStamp: %s rank %d\n", recvNews[i].timeStamp,rank); printf("News Title: %s rank %d\n", recvNews[i].title,rank); printf("News Details: %s rank %d \n", recvNews[i].details,rank); } } */ //dumpRecvNews(,recvNews,size); //free the derived data type } MPI_Type_free(&mpi_test_type); MPI_Finalize(); exit(EXIT_SUCCESS); }
/* regression for tt#1030, checks for bad offset math in the * blockindexed and indexed dataloop flattening code */ int flatten_test(void) { int err, errs = 0; #define ARR_SIZE (9) /* real indices 0 1 2 3 4 5 6 7 8 * indices w/ &array[3] -3 -2 -1 0 1 2 3 4 5 */ int array[ARR_SIZE] = {-1,-1,-1,-1,-1,-1,-1,-1,-1}; int expected[ARR_SIZE] = {-1, 0, 1,-1, 2,-1, 3,-1, 4}; MPI_Datatype idx_type = MPI_DATATYPE_NULL; MPI_Datatype blkidx_type = MPI_DATATYPE_NULL; MPI_Datatype combo = MPI_DATATYPE_NULL; #define COUNT (2) int displ[COUNT]; MPI_Aint adispl[COUNT]; int blens[COUNT]; MPI_Datatype types[COUNT]; /* indexed type layout: * XX_X * 2101 <-- pos (left of 0 is neg) * * different blens to prevent optimization into a blockindexed */ blens[0] = 2; displ[0] = -2; /* elements, puts byte after block end at 0 */ blens[1] = 1; displ[1] = 1; /*elements*/ err = MPI_Type_indexed(COUNT, blens, displ, MPI_INT, &idx_type); check_err(MPI_Type_indexed); err = MPI_Type_commit(&idx_type); check_err(MPI_Type_commit); /* indexed type layout: * _X_X * 2101 <-- pos (left of 0 is neg) */ displ[0] = -1; displ[1] = 1; err = MPI_Type_create_indexed_block(COUNT, 1, displ, MPI_INT, &blkidx_type); check_err(MPI_Type_indexed_block); err = MPI_Type_commit(&blkidx_type); check_err(MPI_Type_commit); /* struct type layout: * II_I_B_B (I=idx_type, B=blkidx_type) * 21012345 <-- pos (left of 0 is neg) */ blens[0] = 1; adispl[0] = 0; /*bytes*/ types[0] = idx_type; blens[1] = 1; adispl[1] = 4 * sizeof(int); /* bytes */ types[1] = blkidx_type; /* must be a struct in order to trigger flattening code */ err = MPI_Type_create_struct(COUNT, blens, adispl, types, &combo); check_err(MPI_Type_indexed); err = MPI_Type_commit(&combo); check_err(MPI_Type_commit); /* pack/unpack with &array[3] */ errs += pack_and_check_expected(combo, "combo", 3, ARR_SIZE, array, expected); MPI_Type_free(&combo); MPI_Type_free(&idx_type); MPI_Type_free(&blkidx_type); return errs; #undef COUNT }
int main(int argc, char **argv){ int task, len, chunks = CHUNK; MPI_Status status; char hostname[MPI_MAX_PROCESSOR_NAME]; #ifdef GETTIME double start = MPI_Wtime(); #endif const int nitems=3; int blocklengths[3] = {2,2,1}; MPI_Datatype types[3] = {MPI_CHAR, MPI_UNSIGNED_SHORT, MPI_UNSIGNED}; MPI_Aint offsets[3]; offsets[0] = offsetof(world_cell, type); offsets[1] = offsetof(world_cell, breeding_period); offsets[2] = offsetof(world_cell, number); /* MPI Initialization */ if (MPI_Init(&argc, &argv) != MPI_SUCCESS) { printf ("Error starting MPI program. Terminating.\n"); /*MPI_Abort(MPI_COMM_WORLD, ret);*/ return -1; } MPI_Get_processor_name(hostname, &len); MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_world_cell_type); MPI_Type_commit(&mpi_world_cell_type); MPI_Comm_size(MPI_COMM_WORLD, &numtasks); MPI_Comm_rank(MPI_COMM_WORLD, &taskid); if(taskid == MASTER){ MPI_Request size_reqs[numtasks-1]; info[1] = wolf_breeding_period = atoi(argv[2]); info[2] = squirrel_breeding_period = atoi(argv[3]); info[3] = wolf_starvation_period = atoi(argv[4]); info[4] = number_of_generations = atoi(argv[5]); parse_input(argv[1]); info[0] = grid_size; bottom = 0; top = chunk_size = CHUNK; payload = top + 2; for(task = 1; task < numtasks; task++) MPI_Isend(info, 5, MPI_INT, task, INIT_TAG, MPI_COMM_WORLD, &size_reqs[task-1]); MPI_Waitall(numtasks - 1, size_reqs, MPI_STATUS_IGNORE); for(task = 1; task < numtasks; task++){ int bottom_task = FLIMIT_INF_CHUNK(task), top_task = FLIMIT_SUP_CHUNK(task), chunk_size = top_task-bottom_task; bottom_task -= 2; if (task == numtasks-1) top_task += CHUNK_REMAINDER; else top_task += 2; for( ; bottom_task < top_task; bottom_task++) MPI_Send(world[bottom_task], grid_size, mpi_world_cell_type, task, FILL_TAG, MPI_COMM_WORLD); } } else { int j = 0; MPI_Recv(info, 5, MPI_INT, MASTER, INIT_TAG, MPI_COMM_WORLD, &status); grid_size = info[0]; wolf_breeding_period = info[1]; squirrel_breeding_period = info[2]; wolf_starvation_period = info[3]; number_of_generations = info[4]; bottom = 2; if(taskid == numtasks-1){ chunk_size = CHUNK + CHUNK_REMAINDER; payload = top = chunk_size+bottom; } else { chunk_size = CHUNK; top = chunk_size+bottom; payload = top + 2; } initialize_world_array(payload ); for( ; j < payload; j++) MPI_Recv(world[j], grid_size, mpi_world_cell_type, MASTER, FILL_TAG, MPI_COMM_WORLD, &status); } start_world_simulation(); gather(); #ifdef GETTIME if(taskid == MASTER){ printf("MPI time: %lf\n", MPI_Wtime() - start); print_world(grid_size); } #endif //freemem(); MPI_Finalize(); return 0; }
void data_server(int agents_total, int world_width, int world_height) { int np; MPI_Comm_size(MPI_COMM_WORLD, &np); /* create a type for struct agent */ const int nitems=5; int blocklengths[5] = {1,1,1,1,1}; MPI_Datatype types[5] = {MPI_INT, MPI_INT, MPI_INT, MPI_FLOAT, MPI_FLOAT}; MPI_Datatype mpi_agent_type; MPI_Aint offsets[5]; offsets[0] = offsetof(agent, id); offsets[1] = offsetof(agent, x); offsets[2] = offsetof(agent, y); offsets[3] = offsetof(agent, z); offsets[4] = offsetof(agent, w); MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_agent_type); MPI_Type_commit(&mpi_agent_type); int num_comp_nodes = np -1; unsigned int num_bytes = agents_total * sizeof(agent); agent *h_agents_in, *h_agents_out; /* allocate input data */ h_agents_in = (agent *)malloc(num_bytes); h_agents_out = (agent *)malloc(num_bytes); if(h_agents_in == NULL || h_agents_out == NULL) { printf("server couldn't allocate memory\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /* initialize input data */ init_data(h_agents_in, agents_total); #ifdef DEBUG printf("Init data\n"); display_data(h_agents_in, agents_total); #endif int world_height_node = world_height / num_comp_nodes; // printf("world_height: %d\n", world_height_node); agent h_agents_node_in[num_comp_nodes][agents_total], h_agents_node_out[num_comp_nodes][agents_total]; for(int process = 0; process < num_comp_nodes; process++) { for(int i = 0; i < agents_total; i++) { if( ( h_agents_in[i].y >= (process * world_height_node) ) and ( h_agents_in[i].y < ( (process + 1) * world_height_node ) ) ) h_agents_node_in[process][i] = h_agents_in[i]; } } /*** printf("copy data 0\n"); display_data(h_agents_node_in[0], agents_total); printf("copy data 1\n"); display_data(h_agents_node_in[1], agents_total); printf("copy data 2\n"); display_data(h_agents_node_in[2], agents_total); ***/ /* send data to compute nodes */ for(int process = 0; process < num_comp_nodes; process++) MPI_Send(h_agents_node_in[process], agents_total, mpi_agent_type, process, 0, MPI_COMM_WORLD); /* Wait for nodes to compute */ MPI_Barrier(MPI_COMM_WORLD); /* Collect output data */ MPI_Status status; for(int process = 0; process < num_comp_nodes; process++) MPI_Recv(h_agents_node_out[process], agents_total, mpi_agent_type, process, DATA_COLLECT, MPI_COMM_WORLD, &status); #ifdef DEBUG printf("Final Data\n"); /* display output data */ // display_data(h_agents_out, agents_total); #endif /* release resources */ free(h_agents_in); free(h_agents_out); // free(h_agents_node_in); // free(h_agents_node_out); }
/* struct_of_basics_test(void) * * There's nothing simple about structs :). Although this is an easy one. * * Returns number of errors encountered. * * NOT TESTED. */ int struct_of_basics_test(void) { MPI_Datatype parent_type; int s_count = 3, s_blocklengths[3] = { 3, 2, 1 }; MPI_Aint s_displacements[3] = { 10, 20, 30 }; MPI_Datatype s_types[3] = { MPI_CHAR, MPI_INT, MPI_FLOAT }; int nints, nadds, ntypes, combiner, *ints; MPI_Aint *adds = NULL; MPI_Datatype *types; int err, errs = 0; /* set up type */ err = MPI_Type_create_struct(s_count, s_blocklengths, s_displacements, s_types, &parent_type); /* decode */ err = MPI_Type_get_envelope(parent_type, &nints, &nadds, &ntypes, &combiner); if (nints != 4) errs++; if (nadds != 3) errs++; if (ntypes != 3) errs++; if (combiner != MPI_COMBINER_STRUCT) errs++; if (verbose) { if (nints != 4) fprintf(stderr, "nints = %d; should be 3\n", nints); if (nadds != 3) fprintf(stderr, "nadds = %d; should be 0\n", nadds); if (ntypes != 3) fprintf(stderr, "ntypes = %d; should be 3\n", ntypes); if (combiner != MPI_COMBINER_STRUCT) fprintf(stderr, "combiner = %s; should be struct\n", combiner_to_string(combiner)); } ints = malloc(nints * sizeof(*ints)); adds = malloc(nadds * sizeof(*adds)); types = malloc(ntypes *sizeof(*types)); err = MPI_Type_get_contents(parent_type, nints, nadds, ntypes, ints, adds, types); if (ints[0] != s_count) errs++; if (ints[1] != s_blocklengths[0]) errs++; if (ints[2] != s_blocklengths[1]) errs++; if (ints[3] != s_blocklengths[2]) errs++; if (adds[0] != s_displacements[0]) errs++; if (adds[1] != s_displacements[1]) errs++; if (adds[2] != s_displacements[2]) errs++; if (types[0] != s_types[0]) errs++; if (types[1] != s_types[1]) errs++; if (types[2] != s_types[2]) errs++; if (verbose) { if (ints[0] != s_count) fprintf(stderr, "count = %d; should be %d\n", ints[0], s_count); if (ints[1] != s_blocklengths[0]) fprintf(stderr, "blocklength[0] = %d; should be %d\n", ints[1], s_blocklengths[0]); if (ints[2] != s_blocklengths[1]) fprintf(stderr, "blocklength[1] = %d; should be %d\n", ints[2], s_blocklengths[1]); if (ints[3] != s_blocklengths[2]) fprintf(stderr, "blocklength[2] = %d; should be %d\n", ints[3], s_blocklengths[2]); if (adds[0] != s_displacements[0]) fprintf(stderr, "displacement[0] = %d; should be %d\n", adds[0], s_displacements[0]); if (adds[1] != s_displacements[1]) fprintf(stderr, "displacement[1] = %d; should be %d\n", adds[1], s_displacements[1]); if (adds[2] != s_displacements[2]) fprintf(stderr, "displacement[2] = %d; should be %d\n", adds[2], s_displacements[2]); if (types[0] != s_types[0]) fprintf(stderr, "type[0] does not match\n"); if (types[1] != s_types[1]) fprintf(stderr, "type[1] does not match\n"); if (types[2] != s_types[2]) fprintf(stderr, "type[2] does not match\n"); } free(ints); free(adds); free(types); MPI_Type_free( &parent_type ); return errs; }
int main(int argc, char **argv) { int i, size, ierr, instructionmsg, ctr; char hostname[MAX_LINE]; int hostnamelen, filepoolnumel, claimedfilepoolnumel; int *nodepoolentriesk, *nodepoolentriesv; int *filepoolkeys, *claimedfilepoolkeys; mpiconfig_t mpicfg; MPI_Datatype instructmsg_mpi_t; MPI_Datatype array_of_types[3]; int array_of_blocklengths[3]; MPI_Aint array_of_displaysments[3]; MPI_Aint intex, charex, lb; MPI_Init(&argc, &argv); ierr = MPI_Comm_size(MPI_COMM_WORLD, &mpicfg.num_procs); ierr = MPI_Comm_rank(MPI_COMM_WORLD, &mpicfg.rank); MPI_Get_processor_name(hostname, &hostnamelen); ierr = MPI_Type_get_extent(MPI_INT, &lb, &intex); ierr = MPI_Type_get_extent(MPI_CHAR, &lb, &charex); //Says the type of every block array_of_types[0] = MPI_CHAR; array_of_types[1] = MPI_INT; array_of_types[2] = MPI_INT; //Says how many elements for block array_of_blocklengths[0] = MAX_LINE; array_of_blocklengths[1] = 1; array_of_blocklengths[2] = 1; /*Says where every block starts in memory, counting from the beginning of the struct.*/ array_of_displaysments[0] = 0; array_of_displaysments[1] = MAX_LINE * charex; array_of_displaysments[2] = MAX_LINE * charex + intex; /*Create MPI Datatype and commit*/ MPI_Type_create_struct(3, array_of_blocklengths, array_of_displaysments, array_of_types, &instructmsg_mpi_t); MPI_Type_commit(&instructmsg_mpi_t); mpicfg.imsg_t = instructmsg_mpi_t; if(EBUG){printf("Hello world! I am process number: %d on host %s\n", mpicfg.rank, hostname);} //hashtable_t *nodepool; /* Create node/file pool hash table */ mpicfg.nodepool = ht_create( mpicfg.num_procs ); mpicfg.filepool = ht_create( mpicfg.num_procs ); //Initialize structs: mpicfg.procstatus = (int*)calloc(mpicfg.num_procs,sizeof(int)); mpicfg.aliveprocs = (int*)calloc(mpicfg.num_procs,sizeof(int)); for ( i = 0 ; i < mpicfg.num_procs ; i++) { mpicfg.aliveprocs[i] = -1; } mpicfg.id = -1; mpicfg.stopexecution = 0; if( mpicfg.rank == 0 ) { /*This is the coordinator process.*/ //Load instructin list: /* the coordinator node must be different from the others!! */ /* Execute next instruction */ join( &mpicfg, 1 ); join( &mpicfg, 4 ); join( &mpicfg, 8 ); join( &mpicfg, 6 ); insert( &mpicfg, 9 ); insert( &mpicfg, 3 ); insert( &mpicfg, 4 ); insert( &mpicfg, 9 ); find( &mpicfg, 4 ); del( &mpicfg, 4 ); find( &mpicfg, 4 ); find( &mpicfg, 3 ); leave( &mpicfg, 8 ); find( &mpicfg, 3 ); /*find( &mpicfg, 8 ); insert( &mpicfg, 5 ); insert( &mpicfg, 6 ); insert( &mpicfg, 7 ); insert( &mpicfg, 8 ); */ end( &mpicfg); }else{ /*All other ranks */ while (!mpicfg.stopexecution){ /* Wait instruction message: */ MPI_Recv(&mpicfg.imsg, 1, mpicfg.imsg_t, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); /* Execute whatever instruction in that message */ executeinstruction(&mpicfg, mpicfg.imsg); } } free(mpicfg.procstatus); free(mpicfg.aliveprocs); ierr = MPI_Finalize(); return 0; }
static PetscErrorCode TestCellShape(DM dm) { PetscMPIInt rank; PetscInt dim, c, cStart, cEnd, count = 0; ex1_stats_t stats, globalStats; PetscReal *J, *invJ, min = 0, max = 0, mean = 0, stdev = 0; MPI_Comm comm = PetscObjectComm((PetscObject)dm); DM dmCoarse; PetscErrorCode ierr; PetscFunctionBegin; stats.min = PETSC_MAX_REAL; stats.max = PETSC_MIN_REAL; stats.sum = stats.squaresum = 0.; stats.count = 0; ierr = DMGetDimension(dm,&dim);CHKERRQ(ierr); ierr = PetscMalloc2(dim * dim, &J, dim * dim, &invJ);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm,0,&cStart,&cEnd);CHKERRQ(ierr); for (c = cStart; c < cEnd; c++) { PetscInt i; PetscReal frobJ = 0., frobInvJ = 0., cond2, cond, detJ; ierr = DMPlexComputeCellGeometryAffineFEM(dm,c,NULL,J,invJ,&detJ);CHKERRQ(ierr); for (i = 0; i < dim * dim; i++) { frobJ += J[i] * J[i]; frobInvJ += invJ[i] * invJ[i]; } cond2 = frobJ * frobInvJ; cond = PetscSqrtReal(cond2); stats.min = PetscMin(stats.min,cond); stats.max = PetscMax(stats.max,cond); stats.sum += cond; stats.squaresum += cond2; stats.count++; } { PetscMPIInt blockLengths[2] = {4,1}; MPI_Aint blockOffsets[2] = {offsetof(ex1_stats_t,min),offsetof(ex1_stats_t,count)}; MPI_Datatype blockTypes[2] = {MPIU_REAL,MPIU_INT}, statType; MPI_Op statReduce; ierr = MPI_Type_create_struct(2,blockLengths,blockOffsets,blockTypes,&statType);CHKERRQ(ierr); ierr = MPI_Type_commit(&statType);CHKERRQ(ierr); ierr = MPI_Op_create(ex1_stats_reduce, PETSC_TRUE, &statReduce);CHKERRQ(ierr); ierr = MPI_Reduce(&stats,&globalStats,1,statType,statReduce,0,comm);CHKERRQ(ierr); ierr = MPI_Op_free(&statReduce);CHKERRQ(ierr); ierr = MPI_Type_free(&statType);CHKERRQ(ierr); } ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); if (!rank) { count = globalStats.count; min = globalStats.min; max = globalStats.max; mean = globalStats.sum / globalStats.count; stdev = PetscSqrtReal(globalStats.squaresum / globalStats.count - mean * mean); } ierr = PetscPrintf(comm,"Mesh with %d cells, shape condition numbers: min = %g, max = %g, mean = %g, stddev = %g\n", count, (double) min, (double) max, (double) mean, (double) stdev); ierr = PetscFree2(J,invJ);CHKERRQ(ierr); ierr = DMPlexGetCoarseDM(dm,&dmCoarse);CHKERRQ(ierr); if (dmCoarse) { ierr = TestCellShape(dmCoarse);CHKERRQ(ierr); } PetscFunctionReturn(0); }
int main(int argc, char *argv[]) { int numtasks, taskid; int n = atoi(argv[1]); const int num_item =2; int blocklengths[2] = {1,1}; int seed = clock(); int i,j; clock_t endt,start; srand(seed); MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD,&taskid); MPI_Comm_size(MPI_COMM_WORLD,&numtasks); if(n % numtasks != 0){ printf("points number not dividable by number of processors\n"); MPI_Finalize(); return -1; } MPI_Status status; MPI_Datatype types[2] = {MPI_INT, MPI_INT}; MPI_Datatype mpi_point_type; MPI_Aint offsets[2]; offsets[0] = offsetof(point, x); offsets[1] = offsetof(point, y); MPI_Type_create_struct(num_item, blocklengths, offsets, types, &mpi_point_type); MPI_Type_commit(&mpi_point_type); point s[n]; struct point_struct *p_x = (struct point_struct*)malloc(n*sizeof(struct point_struct)); double *dist_closest_pair = (double*)malloc(numtasks*sizeof(double)); int offset[numtasks], share_len = (n/numtasks); offset[taskid] = taskid * share_len; if (taskid == MASTER){ for(i = 0; i < n ; i++) { s[i].x = rand()%1000; s[i].y = rand()%1000; } for(i=0 ; i<n ; i++){ p_x[i].x = s[i].x; p_x[i].y = s[i].y; } start = clock(); b_s_x(n,p_x); }//MASTER MPI_Scatter(&p_x[0], share_len, mpi_point_type, &p_x[offset[taskid]], share_len, mpi_point_type, MASTER , MPI_COMM_WORLD); for(i=0 ;i < numtasks; i++){ if(taskid == i ){ dist_closest_pair[taskid] = Closest_Pair(taskid,offset[taskid], offset[taskid]+share_len-1, share_len, p_x); } } MPI_Gather(&dist_closest_pair[taskid] , 1, MPI_DOUBLE, &dist_closest_pair[taskid] , 1 , MPI_DOUBLE , MASTER , MPI_COMM_WORLD); if(taskid == MASTER){ point p_y[2*share_len]; int x[numtasks-1]; for(i=0 ;i< numtasks-1 ; i++){ x[i]= (i*share_len)+share_len; } double d_boundary[numtasks-1], d_min_proc=dist_closest_pair[0]; for(i=1 ; i<numtasks ; i++){ if(d_min_proc > dist_closest_pair[i]) d_min_proc=dist_closest_pair[i]; } for(i=0 ; i<numtasks-1 ; i++){ for(j=x[i]-share_len ; j<x[i]+share_len ; j++){ p_y[j] = p_x[j]; } b_s_y(2*share_len,p_y); d_boundary[i] = boundary_check(x[i]-share_len, 2*share_len, p_y, x[i], d_min_proc ); } double D_min = d_min_proc; for(i=0 ; i<numtasks-1 ; i++){ if(d_boundary[i] < D_min ) D_min = d_boundary[i]; } printf("\n minimum distanse is : %f.\n",D_min); } MPI_Finalize(); return 0; }
int main (int argc, char *argv[]) { int my_rank, size; int right, left; struct buff{ int i; float f; } snd_buf, rcv_buf, sum; int i; int array_of_blocklengths[COUNT]; MPI_Aint array_of_displacements[COUNT], first_var_address, second_var_address; MPI_Datatype array_of_types[COUNT], datatype; MPI_Status status; /* Get process and neighbour info. */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &size); right = (my_rank+1) % size; left = (my_rank-1+size) % size; /* ... this SPMD-style neighbor computation with modulo has the same meaning as: */ /* right = my_rank + 1; */ /* if (right == size) right = 0; */ /* left = my_rank - 1; */ /* if (left == -1) left = size-1;*/ /* Set MPI datatypes for sending and receiving partial sums. */ array_of_blocklengths[0] = 1; array_of_blocklengths[1] = 1; MPI_Get_address(&snd_buf.i, &first_var_address); MPI_Get_address(&snd_buf.f, &second_var_address); array_of_displacements[0] = (MPI_Aint) 0; array_of_displacements[1] = second_var_address - first_var_address; array_of_types[0] = MPI_INT; array_of_types[1] = MPI_FLOAT; MPI_Type_create_struct(COUNT, array_of_blocklengths, array_of_displacements, array_of_types, &datatype); MPI_Type_commit(&datatype); /* Compute global sum. */ sum.i = 0; sum.f = 0; snd_buf.i = my_rank; snd_buf.f = my_rank; /* Step 1 = init */ for( i = 0; i < size; i++) { MPI_Sendrecv(&snd_buf, 1, datatype, right, to_right, /* Step 2 */ &rcv_buf, 1, datatype, left, to_right, /* Step 3 */ MPI_COMM_WORLD, &status); snd_buf = rcv_buf; /* Step 4 */ sum.i += rcv_buf.i; sum.f += rcv_buf.f; /* Step 5 */ } printf ("PE%i:\tSum = %i\t%f\n", my_rank, sum.i, sum.f); MPI_Finalize(); }
int main( int argc, char **argv ) { int vcount, vstride; int32_t counts[2]; int v2stride, typesize, packsize, i, position, errs = 0; double *outbuf, *outbuf2; double *vsource; MPI_Datatype vtype, stype; MPI_Aint lb, extent; double t0, t1; double tspack, tvpack, tmanual; int ntry; int blocklengths[2]; MPI_Aint displacements[2]; MPI_Datatype typesArray[2]; MPI_Init( &argc, &argv ); /* Create a struct consisting of a two 32-bit ints, followed by a vector of stride 3 but count 128k (less than a few MB of data area) */ vcount = 128000; vstride = 3; MPI_Type_vector( vcount, 1, vstride, MPI_DOUBLE, &vtype ); vsource = (double *)malloc( (vcount + 1) * (vstride + 1) * sizeof(double) ); if (!vsource) { fprintf( stderr, "Unable to allocate vsource\n" ); MPI_Abort( MPI_COMM_WORLD, 1 ); } for (i=0; i<vcount*vstride; i++) { vsource[i] = i; } blocklengths[0] = 2; MPI_Get_address( &counts[0], &displacements[0] ); blocklengths[1] = 1; MPI_Get_address( vsource, &displacements[1] ); if (verbose) { printf( "%p = %p?\n", vsource, (void *)displacements[1] ); } typesArray[0] = MPI_INT32_T; typesArray[1] = vtype; MPI_Type_create_struct( 2, blocklengths, displacements, typesArray, &stype ); MPI_Type_commit( &stype ); MPI_Type_commit( &vtype ); #if defined(MPICH) && defined(PRINT_DATATYPE_INTERNALS) /* To use MPIDU_Datatype_debug to print the datatype internals, you must configure MPICH with --enable-g=log */ if (verbose) { printf( "Original struct datatype:\n" ); MPIDU_Datatype_debug( stype, 10 ); } #endif MPI_Pack_size( 1, stype, MPI_COMM_WORLD, &packsize ); outbuf = (double *)malloc( packsize ); outbuf2 = (double *)malloc( packsize ); if (!outbuf) { fprintf( stderr, "Unable to allocate %ld for outbuf\n", (long)packsize ); MPI_Abort( MPI_COMM_WORLD, 1 ); } if (!outbuf2) { fprintf( stderr, "Unable to allocate %ld for outbuf2\n", (long)packsize ); MPI_Abort( MPI_COMM_WORLD, 1 ); } position = 0; /* Warm up the code and data */ MPI_Pack( MPI_BOTTOM, 1, stype, outbuf, packsize, &position, MPI_COMM_WORLD ); tspack = 1e12; for (ntry = 0; ntry < 5; ntry++) { position = 0; t0 = MPI_Wtime(); MPI_Pack( MPI_BOTTOM, 1, stype, outbuf, packsize, &position, MPI_COMM_WORLD ); t1 = MPI_Wtime() - t0; if (t1 < tspack) tspack = t1; } MPI_Type_free( &stype ); /* An equivalent packing, using the 2 ints and the vector separately */ tvpack = 1e12; for (ntry = 0; ntry < 5; ntry++) { position = 0; t0 = MPI_Wtime(); MPI_Pack( counts, 2, MPI_INT32_T, outbuf, packsize, &position, MPI_COMM_WORLD ); MPI_Pack( vsource, 1, vtype, outbuf, packsize, &position, MPI_COMM_WORLD ); t1 = MPI_Wtime() - t0; if (t1 < tvpack) tvpack = t1; } MPI_Type_free( &vtype ); /* Note that we exploit the fact that the vector type contains vblock instances of a contiguous type of size 24, or a single block of 24*vblock bytes. */ tmanual = 1e12; for (ntry = 0; ntry < 5; ntry++) { const double * restrict ppe = (const double *)vsource; double * restrict ppo = outbuf2; int j; t0 = MPI_Wtime(); position = 0; *(int32_t *)ppo = counts[0]; *( ((int32_t *)ppo) + 1) = counts[1]; ppo++; /* Some hand optimization because this file is not normally compiled with optimization by the test suite */ j = vcount; while (j) { *ppo++ = *ppe; ppe += vstride; *ppo++ = *ppe; ppe += vstride; *ppo++ = *ppe; ppe += vstride; *ppo++ = *ppe; ppe += vstride; j -= 4; } position += (1 + vcount); position *= sizeof(double); t1 = MPI_Wtime() - t0; if (t1 < tmanual) tmanual = t1; /* Check on correctness */ #ifdef PACK_IS_NATIVE if (memcmp( outbuf, outbuf2, position ) != 0) { printf( "Panic(manual) - pack buffers differ\n" ); for (j=0; j<8; j++) { printf( "%d: %llx\t%llx\n", j, (long long unsigned)outbuf[j], (long long unsigned)outbuf2[j] ); } } #endif } if (verbose) { printf( "Bytes packed = %d\n", position ); printf( "MPI_Pack time = %e (struct), = %e (vector), manual pack time = %e\n", tspack, tvpack, tmanual ); } if (4 * tmanual < tspack) { errs++; printf( "MPI_Pack time using struct with vector = %e, manual pack time = %e\n", tspack, tmanual ) ; printf( "MPI_Pack time should be less than 4 times the manual time\n" ); printf( "For most informative results, be sure to compile this test with optimization\n" ); } if (4 * tmanual < tvpack) { errs++; printf( "MPI_Pack using vector = %e, manual pack time = %e\n", tvpack, tmanual ); printf( "MPI_Pack time should be less than 4 times the manual time\n" ); printf( "For most informative results, be sure to compile this test with optimization\n" ); } if (4 * tvpack < tspack) { errs++; printf( "MPI_Pack using a vector = %e, using a struct with vector = %e\n", tvpack, tspack ); printf( "MPI_Pack time using vector should be about the same as the struct containing the vector\n" ); printf( "For most informative results, be sure to compile this test with optimization\n" ); } if (errs) { printf( " Found %d errors\n", errs ); } else { printf( " No Errors\n" ); } free( vsource ); free( outbuf ); free( outbuf2 ); MPI_Finalize(); return 0; }
// TODO: add player payments for chance/chest int main(int argc, char ** argv) { struct timeval t1, t2; MPI_Init(&argc, &argv); gettimeofday(&t1, NULL); int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &globalrank); MPI_Comm_size(MPI_COMM_WORLD, &size); globalsize = size; srand(time(NULL) + globalrank); struct location board[BSIZE]; struct player players[NUMPLAYERS]; int itr = 10000; long long bills[4]; // how much you owe each player at end of round init_players(players); init_board(board); char plocation; int pvalue; int numcomms = 1; MPI_Group world_group; MPI_Comm_group(MPI_COMM_WORLD, &world_group); playerdata d; d.money[0] = 0; d.money[1] = 0; d.money[2] = 0; d.money[3] = 0; output = (FILE **) malloc(size * sizeof(FILE *)); // if 1 process created just run sequentially if (size == 1) { int done[4] = {1, 1, 1, 1}; while (itr) { itr--; int i; for (i = 0; i < NUMPLAYERS; i++) { plocation = 0; pvalue = 0; if (players[i].money > 0) { move(players, board, i, &pvalue, &plocation); if (plocation) { board[plocation].owner = i; players[i].money -= pvalue; } } else { players[i].order = -1; if (done[i]) { remove_properties(board, i); done[i] = 0; } } } } gettimeofday(&t2, NULL); results(players, board); double exectime = (t2.tv_sec - t1.tv_sec) * 1000000 + ((t2.tv_usec - t1.tv_usec)); printf("Exec Time %lf\n", exectime); return 0; } // create a communicator for each monopoly game (for n > 4) MPI_Group * gamesel; MPI_Comm * games; int ranksel[4]; if (size > 4) { numcomms = size / 4; games = (MPI_Comm *) malloc(numcomms * sizeof(MPI_Comm)); gamesel = (MPI_Group *) malloc(numcomms * sizeof(MPI_Group)); int i; for (i = 0; i < numcomms; i++) { ranksel[0] = 4 * i; ranksel[1] = 4 * i + 1; ranksel[2] = 4 * i + 2; ranksel[3] = 4 * i + 3; MPI_Group_incl(world_group, 4, ranksel, &gamesel[i]); MPI_Comm_create(MPI_COMM_WORLD, gamesel[i], &games[i]); } } else { // n < 4 so use MPI_COMM_WORLD games = (MPI_Comm *) malloc(1 * sizeof(MPI_Comm)); games[0] = MPI_COMM_WORLD; numcomms = 1; } // create an MPI type so that we can use our player data struct in MPI communication calls const int nitems = 5; int blocklengths[5] = {4, 1, 1, 1, 1}; MPI_Datatype types[5] = {MPI_LONG_LONG, MPI_INT, MPI_CHAR, MPI_CHAR, MPI_CHAR}; MPI_Datatype MPI_MONO_DATA; MPI_Aint offsets[5]; offsets[0] = offsetof(playerdata, money); offsets[1] = offsetof(playerdata, pvalue); offsets[2] = offsetof(playerdata, plocation); offsets[3] = offsetof(playerdata, order); offsets[4] = offsetof(playerdata, trade); MPI_Type_create_struct(nitems, blocklengths, offsets, types, &MPI_MONO_DATA); MPI_Type_commit(&MPI_MONO_DATA); MPI_Comm_rank(games[globalrank / 4], &rank); #ifdef DEBUG char fname[10]; snprintf(fname, 10, "mon%d.dbg", globalrank); output[globalrank] = fopen(fname, "w"); fprintf(output[globalrank], "MAIN begin loop\n"); print_board_info(board); #endif // run the game for 40000 turns (10000 per player) while (itr > 0) { itr--; pvalue = 0; plocation = 0; d.trade = 0; d.order = rank; #ifdef DEBUG fprintf(output[globalrank], "MAIN tag 1 rank %d\n", rank); #endif move(players, board, rank, &pvalue, &plocation); d.pvalue = pvalue; d.plocation = plocation; #ifdef DEBUG fprintf(output[globalrank], "using comm %d\n", globalrank / 4); if (games[globalrank / 4] != MPI_COMM_WORLD) { fprintf(output[globalrank], "COMM ERROR\n"); } #endif send_info(&d, players, board, rank, games[globalrank / 4], MPI_MONO_DATA); #ifdef DEBUG fprintf(output[globalrank], "MAIN tag 3 rank %d\n", rank); print_board_info(board); #endif } #ifdef DEBUG fprintf(output[globalrank], "MAIN last tag rank %d\n", rank); #endif // get results from each process gather_results(players, board, games, numcomms, globalrank); gettimeofday(&t2, NULL); if (globalrank == 0) { results(players, board); } #ifdef DEBUG fclose(output[globalrank]); #endif double exectime = (t2.tv_sec - t1.tv_sec) * 1000000 + ((t2.tv_usec - t1.tv_usec)); if (globalrank == 0) { printf("Exec Time %lf\n", exectime); } MPI_Finalize(); return 0; }