extern "C" int main(int ac, char **av) { MPI_CALL(Init(&ac, &av)); ospcommon::tasking::initTaskingSystem(); maml::init(); std::mt19937 rng(std::random_device{}()); std::uniform_int_distribution<int> distrib(0, 255); int numRuns = 1000000; int rank = -1; int numRanks = 0; MPI_CALL(Comm_size(MPI_COMM_WORLD,&numRanks)); MPI_CALL(Comm_rank(MPI_COMM_WORLD,&rank)); int numMessages = 100; int payloadSize = 100000; MyHandler handler; maml::registerHandlerFor(MPI_COMM_WORLD,&handler); char *payload = (char*)malloc(payloadSize); for (int i=0;i<payloadSize;i++) payload[i] = distrib(rng); for (int run=0;run<numRuns;run++) { MPI_CALL(Barrier(MPI_COMM_WORLD)); double t0 = ospcommon::getSysTime(); maml::start(); for (int mID=0;mID<numMessages;mID++) { for (int r=0;r<numRanks;r++) { maml::sendTo(MPI_COMM_WORLD,r,std::make_shared<maml::Message>(payload,payloadSize)); } } while (handler.numReceived != numRanks*numMessages*(run+1)) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); } maml::stop(); double t1 = ospcommon::getSysTime(); double bytes = numRanks * numMessages * payloadSize / (t1-t0); std::string rate = ospcommon::prettyNumber(bytes); printf("rank %i: received %i messages in %lf secs; that is %sB/s\n",rank,numRanks*numMessages,t1-t0, rate.c_str()); MPI_CALL(Barrier(MPI_COMM_WORLD)); } maml::shutdown(); MPI_CALL(Barrier(MPI_COMM_WORLD)); MPI_Finalize(); }
// 書き込み bool Write(const void *data, unsigned int len, unsigned int &written){ // ポインタとサイズの確認 if (State != FSTATE_Config) return false; written = len; //xprintf("Conf:write(%d)\n", len); if (GetIn(PIN_FPGA_DONE) == IN_LOW){ // コンフィギュレーションはまだ終わってない // 送信 if (SPI::Open() == false) return false; SetOut(PIN_FPGA_CONF, OUT_HIGH); Barrier(); if (len & 1){ // 奇数 SPI::SetMode(SPI::MODE_8BIT); SPI::SetupTxDMA(data, len); }else{ // 偶数 SPI::SetMode(SPI::MODE_16BIT); SPI::SetupTxDMA(data, len / 2); } SPI::StartDMA(); SPI::WaitTxDMA(); SPI::StopDMA(); Barrier(); SetOut(PIN_FPGA_CONF, OUT_LOW); SPI::Close(); } return true; }
bool Write(int address, const void *data, int length, bool noinc){ if (State != FSTATE_Running) return false; bool result = SPI::Open(); if (result == true){ unsigned short header[2]; address &= 0x1FFF; if (noinc == false){ address |= 0x6000; }else{ address |= 0x4000; } header[0] = address; header[1] = length; SetOut(PIN_FPGA_nCS, OUT_LOW); Barrier(); SPI::SetMode(SPI::MODE_16BIT); SPI::SetupTxDMA(header, 2); SPI::SetupTxDMA(data, length); SPI::StartDMA(); SPI::WaitTxDMA(); SPI::StopDMA(); Barrier(); SetOut(PIN_FPGA_nCS, OUT_HIGH); SPI::Close(); } return result; }
/*------------------------------------------------------------------- * Function: Bitonic_sort_decr * Purpose: Use parallel bitonic sort to sort a list into * decreasing order. This implements a butterfly * communication scheme among the threads * In args: th_count: the number of threads participating * in this sort * dim: base 2 log of th_count * my_first: the subscript of my first element in l_a * local_n: the number of elements assigned to each * thread * my_rank: the calling thread's global rank * In/out global: l_a pointer to current list. * Scratch global: l_b pointer to temporary list. */ void Bitonic_sort_decr(int th_count, int dim, int my_first, int local_n, int my_rank) { int stage; int partner; int* tmp; unsigned eor_bit = 1 << (dim - 1); for (stage = 0; stage < dim; stage++) { partner = my_rank ^ eor_bit; if (my_rank > partner) Merge_split_lo(my_rank, my_first, local_n, partner); else Merge_split_hi(my_rank, my_first, local_n, partner); eor_bit >>= 1; Barrier(); if (my_rank == 0) { # ifdef DEBUG char title[1000]; # endif tmp = l_a; l_a = l_b; l_b = tmp; # ifdef DEBUG sprintf(title, "Th_count = %d, stage = %d", th_count, stage); Print_list(title, l_a, n); # endif } Barrier(); } } /* Bitonic_sort_decr */
void comm_simopts(SIMOPTS *simopts,MPI_Comm world) /*=======================================================================*/ /* Begin routine */ {/*begin routine */ /*=======================================================================*/ /* Local variable declarations */ #include "../typ_defs/typ_mask.h" MPI_Datatype simopts_comm; MPI_Datatype types[1]; MPI_Aint displs[1]; int blockcounts[1]; Address(&(simopts->md),&displs[0]); types[0] = MPI_INT; blockcounts[0] = 18; Barrier(world); Type_struct(1,blockcounts,displs,types,&simopts_comm); Barrier(world); Type_commit(&simopts_comm); Barrier(world); Bcast(MPI_BOTTOM,1,simopts_comm,0,world); Barrier(world); Type_free(&simopts_comm); Barrier(world); Bcast(&(simopts->ann_rate),1,MPI_DOUBLE,0,world); Bcast(&(simopts->ann_start_temp),1,MPI_DOUBLE,0,world); Bcast(&(simopts->ann_target_temp),1,MPI_DOUBLE,0,world); /*------------------------------------------------------------------------*/ } /*end routine*/
void comm_filenames(FILENAMES *filenames,MPI_Comm world) /*=======================================================================*/ /* Begin routine */ {/*begin routine */ /*=======================================================================*/ /* Local variable declarations */ #include "../typ_defs/typ_mask.h" int filenames_num = 14; MPI_Datatype filenames_comm; MPI_Datatype types[1]; MPI_Aint displs[1]; int blockcounts[1]; Address(&(filenames->iwrite_screen),&displs[0]); types[0] = MPI_INT; blockcounts[0] = filenames_num; Barrier(world); Type_struct(1,blockcounts,displs,types,&filenames_comm); Barrier(world); Type_commit(&filenames_comm); Barrier(world); Bcast(MPI_BOTTOM,1,filenames_comm,0,world); Barrier(world); Type_free(&filenames_comm); /*------------------------------------------------------------------------*/ } /*end routine*/
void comm_cpopts(CPOPTS *cpopts,MPI_Comm world) /*=======================================================================*/ /* Begin routine */ {/*begin routine */ /*=======================================================================*/ /* Local variable declarations */ #include "../typ_defs/typ_mask.h" int cpopts_num = 39; MPI_Datatype cpopts_comm; MPI_Datatype types[1]; MPI_Aint displs[1]; int blockcounts[1]; Address(&(cpopts->cp_lda),&displs[0]); types[0] = MPI_INT; blockcounts[0] = cpopts_num; Barrier(world); Type_struct(1,blockcounts,displs,types,&cpopts_comm); Barrier(world); Type_commit(&cpopts_comm); Barrier(world); Bcast(MPI_BOTTOM,1,cpopts_comm,0,world); Barrier(world); Type_free(&cpopts_comm); /*------------------------------------------------------------------------*/ } /*end routine*/
void comm_ensopts(ENSOPTS *ensopts,MPI_Comm world) /*=======================================================================*/ /* Begin routine */ {/*begin routine */ /*=======================================================================*/ /* Local variable declarations */ #include "../typ_defs/typ_mask.h" int nensopts = 5; MPI_Datatype ensopts_comm; MPI_Datatype types[1]; MPI_Aint displs[1]; int blockcounts[1]; Address(&(ensopts->nve),&displs[0]); types[0] = MPI_INT; blockcounts[0] = nensopts; Barrier(world); Type_struct(1,blockcounts,displs,types,&ensopts_comm); Barrier(world); Type_commit(&ensopts_comm); Barrier(world); Bcast(MPI_BOTTOM,1,ensopts_comm,0,world); Barrier(world); Type_free(&ensopts_comm); /*------------------------------------------------------------------------*/ } /*end routine*/
void comm_cp_wannier(CP_WANNIER *cp_wannier,MPI_Comm world) /*=======================================================================*/ /* Begin routine */ {/*begin routine */ /*=======================================================================*/ /* Local variable declarations */ #include "../typ_defs/typ_mask.h" int ninfo = 4; MPI_Datatype cp_wannier_comm; MPI_Datatype types[1]; MPI_Aint displs[1]; int blockcounts[1]; Address(&(cp_wannier->cp_wan_calc_frq),&displs[0]); types[0] = MPI_INT; blockcounts[0] = ninfo; Barrier(world); Type_struct(1,blockcounts,displs,types,&cp_wannier_comm); Barrier(world); Type_commit(&cp_wannier_comm); Barrier(world); Bcast(MPI_BOTTOM,1,cp_wannier_comm,0,world); Barrier(world); Type_free(&cp_wannier_comm); Barrier(world); Bcast(&(cp_wannier->rcut_wan_orb),1,MPI_DOUBLE,0,world); Bcast(&(cp_wannier->rcut_wan_nl),1,MPI_DOUBLE,0,world); /*------------------------------------------------------------------------*/ }/*end routine*/
void comm_cpcoeffs_info(CPCOEFFS_INFO *cpcoeffs_info,MPI_Comm world) /*=======================================================================*/ /* Begin routine */ {/*begin routine */ /*=======================================================================*/ /* Local variable declarations */ #include "../typ_defs/typ_mask.h" int ninfo = 17; MPI_Datatype cpcoeffs_info_comm; MPI_Datatype types[1]; MPI_Aint displs[1]; int blockcounts[1]; Address(&(cpcoeffs_info->pi_beads),&displs[0]); types[0] = MPI_INT; blockcounts[0] = ninfo; Barrier(world); Type_struct(1,blockcounts,displs,types,&cpcoeffs_info_comm); Barrier(world); Type_commit(&cpcoeffs_info_comm); Barrier(world); Bcast(MPI_BOTTOM,1,cpcoeffs_info_comm,0,world); Barrier(world); Type_free(&cpcoeffs_info_comm); Barrier(world); Bcast(&(cpcoeffs_info->ecut),1,MPI_DOUBLE,0,world); Bcast(&(cpcoeffs_info->cp_hess_cut),1,MPI_DOUBLE,0,world); /*------------------------------------------------------------------------*/ }/*end routine*/
void comm_cpconstrnt(CPCONSTRNT *cpconstrnt,MPI_Comm world) /*=======================================================================*/ /* Begin routine */ {/*begin routine */ /*=======================================================================*/ /* Local variable declarations */ #include "../typ_defs/typ_mask.h" int nscal_cpconstrnt = 3; MPI_Datatype cpconstrnt_comm; MPI_Datatype types[1]; MPI_Aint displs[1]; int blockcounts[1]; Address(&(cpconstrnt->c_tolshake),&displs[0]); types[0] = MPI_DOUBLE; blockcounts[0] = nscal_cpconstrnt; Barrier(world); Type_struct(1,blockcounts,displs,types,&cpconstrnt_comm); Barrier(world); Type_commit(&cpconstrnt_comm); Barrier(world); Bcast(MPI_BOTTOM,1,cpconstrnt_comm,0,world); Barrier(world); Type_free(&cpconstrnt_comm); /*------------------------------------------------------------------------*/ } /*end routine*/
void comm_pseudo(PSEUDO *pseudo,MPI_Comm world,int myid) /*=======================================================================*/ /* Begin routine */ {/*begin routine */ /*=======================================================================*/ /* Local variable declarations */ #include "../typ_defs/typ_mask.h" if(myid!=0){ pseudo->vxc_typ = (char *)cmalloc(MAXWORD*sizeof(char)); pseudo->ggax_typ = (char *)cmalloc(MAXWORD*sizeof(char)); pseudo->ggac_typ = (char *)cmalloc(MAXWORD*sizeof(char)); }/*endif*/ Barrier(world); Bcast(&(pseudo->vxc_typ[0]),MAXWORD,MPI_CHAR,0,world); /* must be from 0*/ Bcast(&(pseudo->ggax_typ[0]),MAXWORD,MPI_CHAR,0,world); Bcast(&(pseudo->ggac_typ[0]),MAXWORD,MPI_CHAR,0,world); Bcast(&(pseudo->gga_cut),1,MPI_DOUBLE,0,world); Bcast(&(pseudo->b3_cut),1,MPI_DOUBLE,0,world); Bcast(&(pseudo->b3_alp),1,MPI_DOUBLE,0,world); Bcast(&(pseudo->alpha_conv_dual),1,MPI_DOUBLE,0,world); Bcast(&(pseudo->n_interp_pme_dual),1,MPI_INT,0,world); Bcast(&(pseudo->nsplin_g),1,MPI_INT,0,world); Bcast(&(pseudo->nl_cut_on),1,MPI_INT,0,world); Bcast(&(pseudo->nlvps_skin),1,MPI_DOUBLE,0,world); Barrier(world); /*------------------------------------------------------------------------*/ } /*end routine*/
void *Worker(void *arg) { int myid = (int) arg; double maxdiff, temp,tmp=0.0; int i, j, iters; int first, last; double **grid1; double **grid2; unsigned long mask = 128;//24+myid; unsigned long maxnode = 8*sizeof(unsigned long); printf("worker %d (pthread id %d) has started\n", myid, pthread_self()); grid1 = (double**)mai_malloc((stripSize+3)*sizeof(double*)); grid2 = (double**)mai_malloc((stripSize+3)*sizeof(double*)); for(i = 0; i <= stripSize; i++) { grid1[i] = (double*)mai_malloc((gridSize+3)*sizeof(double)); grid2[i] = (double*)mai_malloc((gridSize+3)*sizeof(double)); } Barrier(); InitializeGrids(grid1,grid2); /* determine first and last rows of my strip of the grids */ for (iters = 1; iters <= numIters; iters++) { /* update my points */ for (i = 1; i < stripSize; i++) { for (j = 1; j <= gridSize; j++) { grid2[i][j] = (grid1[i-1][j] + grid1[i+1][j] + grid1[i][j-1] + grid1[i][j+1]) * 0.25; } } Barrier(); /* update my points again */ for (i = 1; i < stripSize; i++) { for (j = 1; j <= gridSize; j++) { grid1[i][j] = (grid2[i-1][j] + grid2[i+1][j] + grid2[i][j-1] + grid2[i][j+1]) * 0.25; } } Barrier(); } /* compute the maximum difference in my strip and set global variable */ maxdiff = 0.0; for (i = 1; i <= stripSize; i++) { for (j = 1; j <= gridSize; j++) { temp = grid1[i][j]-grid2[i][j]; if (temp < 0) temp = -temp; if (maxdiff < temp) maxdiff = temp; } } maxDiff[myid] = maxdiff; }
void Zoo::StartPS() { int role = ParsePSRole(MV_CONFIG_ps_role); CHECK(role != -1); nodes_.resize(size()); nodes_[rank()].rank = rank(); nodes_[rank()].role = role; mailbox_.reset(new MtQueue<MessagePtr>); // NOTE(feiga): the start order is non-trivial, communicator should be last. if (rank() == kController) { Actor* controler = new Controller(); controler->Start(); } Actor* communicator = new Communicator(); communicator->Start(); // activate the system RegisterNode(); if (node::is_server(role)) { Actor* server = Server::GetServer(); server->Start(); } if (node::is_worker(role)) { Actor* worker = new Worker(); worker->Start(); } Barrier(); Log::Info("Rank %d: Multiverso start successfully\n", rank()); }
void communicate_test_energy_pimd(double *vgen,double *vgen2,double *vgen3, double *cpu, MPI_Comm world) /*==========================================================================*/ /* Begin Routine */ {/*begin routine*/ /*==========================================================================*/ #include "../typ_defs/typ_mask.h" int iii; double vgen_temp,vgen2_temp,vgen3_temp,cpu_temp; cpu_temp = *cpu; vgen_temp = *vgen; vgen2_temp = *vgen2; vgen3_temp = *vgen3; Reduce(&cpu_temp,cpu, 1, MPI_DOUBLE, MPI_MAX, 0,world); Allreduce(&vgen_temp,vgen, 1, MPI_DOUBLE, MPI_SUM,0, world); Allreduce(&vgen2_temp,vgen2, 1, MPI_DOUBLE, MPI_SUM,0, world); Allreduce(&vgen3_temp,vgen3, 1, MPI_DOUBLE, MPI_SUM,0, world); Barrier(world); }/*end routine*/
void GenericBuffer::CopyFrom(vk::CommandBuffer commandBuffer, Texture& srcTexture) { auto textureSize = srcTexture.GetWidth() * srcTexture.GetHeight() * GetBytesPerPixel(srcTexture.GetFormat()); if (textureSize != mSize) { throw std::runtime_error("Cannot copy texture of different sizes"); } srcTexture.Barrier(commandBuffer, vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eColorAttachmentWrite, vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); auto info = vk::BufferImageCopy() .setImageSubresource({vk::ImageAspectFlagBits::eColor, 0, 0, 1}) .setImageExtent({srcTexture.GetWidth(), srcTexture.GetHeight(), 1}); commandBuffer.copyImageToBuffer( srcTexture.mImage, vk::ImageLayout::eTransferSrcOptimal, mBuffer, info); srcTexture.Barrier(commandBuffer, vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead, vk::ImageLayout::eGeneral, vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eColorAttachmentRead); Barrier(commandBuffer, vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eShaderRead); }
/* Each worker sums the values in one strip of the matrix. After a barrier, worker(0) computes and prints the total */ void *Worker(void *arg) { long myid = (long) arg; int total, i, j, first, last; #ifdef DEBUG printf("worker %d (pthread id %d) has started\n", myid, pthread_self()); #endif /* determine first and last rows of my strip */ first = myid*stripSize; last = (myid == numWorkers - 1) ? (size - 1) : (first + stripSize - 1); /* sum values in my strip */ total = 0; for (i = first; i <= last; i++) for (j = 0; j < size; j++) total += matrix[i][j]; sums[myid] = total; Barrier(); if (myid == 0) { total = 0; for (i = 0; i < numWorkers; i++) total += sums[i]; /* get end time */ end_time = read_timer(); /* print results */ printf("The total is %d\n", total); printf("The execution time is %g sec\n", end_time - start_time); } }
void slaveLoop(void *id) { int myid = *((int*)id); int i, j, ret; int local_sense = 0; int total_iter=0; long long int temp; char localbuf[1024]; unsigned int input_array[6]; int mybinding; unsigned long long c; emptyPage_t** pagesVector; tm_bind_to_cabinet(myid+1); Barrier(&local_sense, myid, num_threads); if (myid == num_threads-1) { set_handler_address(); set_log_base(myid); int k = 0; BEGIN_ESCAPE while (1) { while (!cleanus_counter); for (c=0; c<num_threads+1; c++) { memcntl(GlobalPagesVector[c], 8192, MC_SYNC, (caddr_t) (MS_SYNC|MS_INVALIDATE), 0, 0); } pthread_mutex_lock(&clean_mutex); cleanus_counter = 0; pthread_mutex_unlock(&clean_mutex); } END_ESCAPE } else {
void communicate_cp_data(CP *cp,int natm_mall,int natm_typ_mall, MPI_Comm world,int myid) /*=======================================================================*/ /* Begin routine */ {/*begin routine */ /*=======================================================================*/ /* Local variable declarations */ #include "../typ_defs/typ_mask.h" comm_cpconstrnt(&(cp->cpconstrnt),world); Barrier(world); comm_pseudo(&(cp->pseudo),world,myid); Barrier(world); comm_cpopts_data(&(cp->cpopts),world); Barrier(world); /*------------------------------------------------------------------------*/ } /*end routine*/
void comm_cp_parse_info(CP_PARSE *cp_parse, MPI_Comm world) /*=======================================================================*/ /* Begin routine */ {/*begin routine */ /*=======================================================================*/ /* Local variable declarations */ #include "../typ_defs/typ_mask.h" int ncp_parse_int = 6; int ncp_parse_double = 11; MPI_Datatype cp_parse_info_comm_int; MPI_Datatype cp_parse_info_comm_double; MPI_Datatype types_int[1]; MPI_Datatype types_double[1]; MPI_Aint displs_int[1]; MPI_Aint displs_double[1]; int blockcounts_int[1]; int blockcounts_double[1]; Address(&(cp_parse->istart_cp),&displs_int[0]); Address(&(cp_parse->cp_mass_tau_def),&displs_double[0]); types_int[0] = MPI_INT; types_double[0] = MPI_DOUBLE; blockcounts_int[0] = ncp_parse_int; blockcounts_double[0] = ncp_parse_double; Barrier(world); Type_struct(1,blockcounts_int,displs_int,types_int, &cp_parse_info_comm_int); Barrier(world); Type_struct(1,blockcounts_double,displs_double,types_double, &cp_parse_info_comm_double); Barrier(world); Type_commit(&cp_parse_info_comm_int); Barrier(world); Type_commit(&cp_parse_info_comm_double); Barrier(world); Bcast(MPI_BOTTOM,1,cp_parse_info_comm_int,0,world); Barrier(world); Bcast(MPI_BOTTOM,1,cp_parse_info_comm_double,0,world); Barrier(world); Type_free(&cp_parse_info_comm_int); Barrier(world); Type_free(&cp_parse_info_comm_double); Barrier(world); /*------------------------------------------------------------------------*/ } /*end routine*/
/* ************************************************************************** * Check whether there is a receivable message, for use in guarding * against errant messages (message from an unrelated communication) * that may be mistakenly received. This check is imperfect; it can * detect messages that have arrived but it can't detect messages that * has not arrived. * * The barriers prevent processes from starting or finishing the check * too early. Early start may miss recently sent errant messages from * slower processes. Early finishes can allow the process to get ahead * and send a valid message that may be mistaken as an errant message * by the receiver doing the Iprobe. ************************************************************************** */ bool SAMRAI_MPI::hasReceivableMessage( Status* status, int source, int tag) const { int flag = false; if (s_mpi_is_initialized) { SAMRAI_MPI::Status tmp_status; Barrier(); int mpi_err = Iprobe(source, tag, &flag, status ? status : &tmp_status); if (mpi_err != MPI_SUCCESS) { TBOX_ERROR("SAMRAI_MPI::hasReceivableMessage: Error probing for message." << std::endl); } Barrier(); } return flag == true; }
void avtTimeIteratorExpression::Execute(void) { FinalizeTimeLoop(); avtContract_p contract = ConstructContractWithVarnames(); contract->DisableExtentsCalculations(); // Store off the original expression list. ParsingExprList *pel = ParsingExprList::Instance(); ExpressionList orig_list = *(pel->GetList()); InitializeOutput(); std::string db = GetInput()->GetInfo().GetAttributes().GetFullDBName(); ref_ptr<avtDatabase> dbp = avtCallback::GetDatabase(db, 0, NULL); if (*dbp == NULL) EXCEPTION1(InvalidFilesException, db.c_str()); // The first EEF already set up its expressions ... we need a new one // to set up filters for the CMFE expressions. avtExpressionEvaluatorFilter myeef; myeef.SetInput(GetInput()); for (int i = 0 ; i < numTimeSlicesToProcess ; i++) { int timeSlice = firstTimeSlice + i*timeStride; if (timeSlice > actualLastTimeSlice) timeSlice = actualLastTimeSlice; debug1 << "Time iterating expression working on time slice " << timeSlice << endl; UpdateExpressions(timeSlice); // won't re-execute without setting modified to true, because // it doesn't check to see if expression definitions change. myeef.ReleaseData(); myeef.GetOutput()->Update(contract); avtCallback::ResetTimeout(5*60); Barrier(); avtCallback::ResetTimeout(5*60); avtDatabaseMetaData *md = dbp->GetMetaData(timeSlice, false, false, false); currentCycle = md->GetCycles()[timeSlice]; currentTime = md->GetTimes()[timeSlice]; ProcessDataTree(myeef.GetTypedOutput()->GetDataTree(), i); debug1 << "Time iterating expression done working on time slice " << timeSlice << endl; } // Get the upstream filters back the way they are supposed to be. GetInput()->Update(executionContract); FinalizeOutput(); // Restore the original expression list ... i.e. undo the temporary // expressions we put in. *(pel->GetList()) = orig_list; }
void comm_vel_samp_cp(VEL_SAMP_CP *vel_samp_cp,MPI_Comm world,int myid) /*=======================================================================*/ /* Begin routine */ { /* begin routine */ /*=======================================================================*/ /* Local variable declarations */ #include "../typ_defs/typ_mask.h" int i,itemp; double temp,qseed; int nint = 8; MPI_Datatype vel_samp_comm; MPI_Datatype types[1]; MPI_Aint displs[1]; int blockcounts[1]; Address(&(vel_samp_cp->ivelc_smpl_on),&displs[0]); types[0] = MPI_INT; blockcounts[0] = nint; Barrier(world); Type_struct(1,blockcounts,displs,types,&vel_samp_comm); Barrier(world); Type_commit(&vel_samp_comm); Barrier(world); Bcast(MPI_BOTTOM,1,vel_samp_comm,0,world); Barrier(world); Type_free(&vel_samp_comm); Barrier(world); Bcast(&(vel_samp_cp->qseed),1,MPI_DOUBLE,0,world); Bcast(&(vel_samp_cp->vc_scal_tol),1,MPI_DOUBLE,0,world); /*Randomize random seed */ qseed = vel_samp_cp->qseed; for(i=1;i<=myid;i++){ temp=10000.0*ran_essl(&qseed); itemp = temp; }/*endfor*/ if(myid>0){vel_samp_cp->qseed = (double)itemp;} /*------------------------------------------------------------------------*/ } /*end routine*/
void *Worker(void *arg) { int myid = (int) arg; double maxdiff, temp; int i, j, iters; int first, last; printf("worker %d (pthread id %d) has started\n", myid, pthread_self()); /* determine first and last rows of my strip of the grids */ first = myid*stripSize + 1; last = first + stripSize - 1; for (iters = 1; iters <= numIters; iters++) { /* update my points */ for (i = first; i <= last; i++) { for (j = 1; j <= gridSize; j++) { grid2[i][j] = (grid1[i-1][j] + grid1[i+1][j] + grid1[i][j-1] + grid1[i][j+1]) * 0.25; } } Barrier(); /* update my points again */ for (i = first; i <= last; i++) { for (j = 1; j <= gridSize; j++) { grid1[i][j] = (grid2[i-1][j] + grid2[i+1][j] + grid2[i][j-1] + grid2[i][j+1]) * 0.25; } } Barrier(); } /* compute the maximum difference in my strip and set global variable */ maxdiff = 0.0; for (i = first; i <= last; i++) { for (j = 1; j <= gridSize; j++) { temp = grid1[i][j]-grid2[i][j]; if (temp < 0) temp = -temp; if (maxdiff < temp) maxdiff = temp; } } maxDiff[myid] = maxdiff; }
void GenericBuffer::CopyFrom(vk::CommandBuffer commandBuffer, GenericBuffer& srcBuffer) { if (mSize != srcBuffer.mSize) { throw std::runtime_error("Cannot copy buffers of different sizes"); } // TODO improve barriers srcBuffer.Barrier( commandBuffer, vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eTransferRead); Barrier(commandBuffer, vk::AccessFlagBits::eShaderRead, vk::AccessFlagBits::eTransferWrite); auto region = vk::BufferCopy().setSize(mSize); commandBuffer.copyBuffer(srcBuffer.Handle(), mBuffer, region); Barrier(commandBuffer, vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eShaderRead); srcBuffer.Barrier( commandBuffer, vk::AccessFlagBits::eTransferRead, vk::AccessFlagBits::eShaderRead); }
void Zoo::StopPS() { if (MV_CONFIG_sync) { FinishTrain(); } Barrier(); // Stop all actors for (auto actor : zoo_) { actor.second->Stop(); } }
int main(int argc, char** argv) { if (argc == 2) { int threadCounter = atoi(argv[1]); printf("%i threads\n",threadCounter); int i=0; pthread_mutex_init(&barMutex,NULL); sem_unlink("bar_sync_semaphore"); barSem=sem_open("bar_sync_semaphore",O_CREAT, 0777, 0); pthread_t thread[threadCounter]; Barrier B =Barrier(threadCounter); BufStruct *arg = (BufStruct*) malloc(threadCounter*sizeof(BufStruct)); for (i=0;i<threadCounter;i++) { arg[i].pBar=&B; arg[i].id=i+1; if (pthread_create(&thread[i], NULL, singleint, &arg[i]) != 0) { printf("FAIL"); perror("fail"); } } for (i=0;i<threadCounter;i++) { pthread_join(thread[i], NULL); } pthread_mutex_destroy(&barMutex); sem_close(barSem); sem_unlink("bar_sync_semaphore"); printf("END\n"); } return 0; }
void comm_cptherm_info(CPTHERM_INFO *cptherm_info,MPI_Comm world) /*=======================================================================*/ /* Begin routine */ {/*begin routine */ /*=======================================================================*/ /* Local variable declarations */ #include "../typ_defs/typ_mask.h" int ninfo = 5; MPI_Datatype cptherm_info_comm; MPI_Datatype types[1]; MPI_Aint displs[1]; int blockcounts[1]; Address(&(cptherm_info->len_c_nhc),&displs[0]); types[0] = MPI_INT; blockcounts[0] = ninfo; Barrier(world); Type_struct(1,blockcounts,displs,types,&cptherm_info_comm); Barrier(world); Type_commit(&cptherm_info_comm); Barrier(world); Bcast(MPI_BOTTOM,1,cptherm_info_comm,0,world); Barrier(world); Type_free(&cptherm_info_comm); Barrier(world); Bcast(&(cptherm_info->cp_therm_heat_fact),1,MPI_DOUBLE,0,world); Barrier(world); /*------------------------------------------------------------------------*/ } /*end routine*/
void ThreadedDevice::Execute(Task* task, int thrid) { PreExecute(); #ifndef NDEBUG WallTimer memory_timer; memory_timer.Start(); #endif DataList input_shards; for (auto& i : task->inputs) { auto& input_data = i.physical_data; if (input_data.device_id == device_id_) { // Input is local DLOG(INFO) << Name() << " input task data #" << i.id << " is local"; CHECK_EQ(local_data_.Count(input_data.data_id), 1); } else { lock_guard<mutex> lck(copy_locks_[input_data.data_id]); if (!remote_data_.Count(input_data.data_id)) { // Input is remote and not copied DLOG(INFO) << Name() << " input task data #" << i.id << " is remote and not copied"; size_t size = input_data.size.Prod() * sizeof(float); auto ptr = data_store_->CreateData(input_data.data_id, size); DoCopyRemoteData(ptr, MinervaSystem::Instance().GetPtr(input_data.device_id, input_data.data_id).second, size, thrid); CHECK(remote_data_.Insert(input_data.data_id)); } } input_shards.emplace_back(data_store_->GetData(input_data.data_id), input_data.size); } DataList output_shards; for (auto& i : task->outputs) { size_t size = i.physical_data.size.Prod() * sizeof(float); DLOG(INFO) << Name() << " create output for task data #" << i.id; auto ptr = data_store_->CreateData(i.physical_data.data_id, size); CHECK(local_data_.Insert(i.physical_data.data_id)); output_shards.emplace_back(ptr, i.physical_data.size); } auto& op = task->op; CHECK(op.compute_fn); if(!FLAGS_no_execute) { #ifndef NDEBUG Barrier(thrid); memory_timer.Stop(); MinervaSystem::Instance().profiler().RecordTime(TimerType::kMemory, op.compute_fn->Name(), memory_timer); WallTimer calculate_timer; calculate_timer.Start(); #endif DLOG(INFO) << Name() << " execute task #" << task->id << ": " << op.compute_fn->Name(); DoExecute(input_shards, output_shards, op, thrid); DLOG(INFO) << Name() << " finished execute task #" << task->id << ": " << op.compute_fn->Name(); #ifndef NDEBUG calculate_timer.Stop(); MinervaSystem::Instance().profiler().RecordTime(TimerType::kCalculation, op.compute_fn->Name(), calculate_timer); #endif } listener_->OnOperationComplete(task); }
void MPIdata::printAll(std::string instr) const{ // Prints processor number and instr out sequentially // Guaranteed to print each seperately so don't get overlapping output for(int i = 0; i < total_n_v(); i++) { Barrier(); if (i == my_n_v()) { std::cout << "Proc " << my_n_v() << std::endl; std::cout << instr << std::endl; } } }