void comm_init() { int i; static int firsttime=1; if (!firsttime){ return; } firsttime = 0; gethostname(hostname, 128); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); int gpus_per_node = getGpuCount(); comm_partition(); back_nbr = (rank -1 + size)%size; fwd_nbr = (rank +1)%size; num_nodes=size / getGpuCount(); if(num_nodes ==0) { num_nodes=1; } //determine which gpu this MPI process is going to use char* hostname_recv_buf = (char*)malloc(128*size); if(hostname_recv_buf == NULL){ printf("ERROR: malloc failed for host_recv_buf\n"); comm_exit(1); } gethostname(hostname, 128); int rc = MPI_Allgather(hostname, 128, MPI_CHAR, hostname_recv_buf, 128, MPI_CHAR, MPI_COMM_WORLD); if (rc != MPI_SUCCESS){ printf("ERROR: MPI_Allgather failed for hostname\n"); comm_exit(1); } which_gpu=0; for(i=0;i < size; i++){ if (i == rank){ break; } if (strncmp(hostname, hostname_recv_buf + 128*i, 128) == 0){ which_gpu ++; } } if (which_gpu >= gpus_per_node){ printf("ERROR: invalid gpu(%d) to use in rank=%d mpi process\n", which_gpu, rank); comm_exit(1); } srand(rank*999); free(hostname_recv_buf); return; }
/* * Author: Huy Phung * Constructor of A5Cuda * Set parameter to running thread * @param: maxrounds, condition * @return: A5Cuda instance */ A5Cuda::A5Cuda(uint32_t max_rounds, int condition) { mRunning = true; mMaxRound = max_rounds; mCondition = condition; //mProcessThread = new std::thread(&A5Cuda::Process, this); int nGpus = getGpuCount(); std::vector<A5CudaSlice*> a5slices(nGpus); for(int i = 0; i < nGpus; i++) { a5slices[i] = new A5CudaSlice(this, i, mCondition, mMaxRound); } }