void Process(thrill::Context& ctx) { std::default_random_engine rng(std::random_device { } ()); std::uniform_real_distribution<double> dist(0.0, 1000.0); // generate 100 random points using uniform distribution auto points = Generate( ctx, /* size */ 100, [&](const size_t&) { return Point { dist(rng), dist(rng) }; }) .Cache(); // print out the points points.Print("points"); //! [step2 sample] // pick some initial random cluster centers auto centers = points.Sample(/* num_clusters */ 10); //! [step2 sample] //! [step2 classify] // collect centers in a local vector on each worker std::vector<Point> local_centers = centers.AllGather(); // calculate the closest center for each point auto closest = points.Map( [local_centers](const Point& p) { double min_dist = p.DistanceSquare(local_centers[0]); size_t cluster_id = 0; for (size_t i = 1; i < local_centers.size(); ++i) { double dist = p.DistanceSquare(local_centers[i]); if (dist < min_dist) min_dist = dist, cluster_id = i; } return ClosestCenter { cluster_id, p }; }); closest.Print("closest"); //! [step2 classify] }
void MiniCluster<Dtype>::run(shared_ptr<Solver<Dtype> > root_solver, const vector<int>& gpus, int total_gpus) { #ifdef INFINIBAND RDMAAdapter adapter; LOG(INFO) << "Found RDMA adapter " << adapter.name(); // Create channel for each peer vector<shared_ptr<RDMAChannel> > peers(size_); for (int i = 0; i < size_; ++i) { if (i != rank_) { peers[i].reset(new RDMAChannel(adapter)); } } // Connect channels all to all for (int i = 0; i < size_; ++i) { vector<string> addresses(1); if (i != rank_) { addresses[0] = peers[i]->address(); } AllGather(&addresses); for (int j = 0; j < addresses.size(); ++j) LOG(INFO) << addresses[j]; if (i == rank_) { for (int j = 0; j < size_; ++j) { if (j != rank_) { peers[j]->Connect(addresses[j]); } } } } vector<shared_ptr<P2PSync<Dtype> > > syncs(gpus.size()); // RDMASync will create all necessary buffers syncs[0].reset(new RDMASync<Dtype>(root_solver, peers, rank_)); #else // Create channel for each peer vector<shared_ptr<SocketChannel> > peers(size_); for (int i = 0; i < size_; ++i) { if (i != rank_) { peers[i].reset(new SocketChannel()); } } SocketAdapter adapter(&peers); usleep(10000); // Get all channels to connect to vector<string> addresses(1); // Set local address to send to master in AllGather. // If you are master, you still need to set it, so // that it is sent to everyone during regular broadcast in AllGather addresses[0] = adapter.address(); LOG(INFO) << "Adapter address " << adapter.address().c_str(); AllGather(&addresses); for (int j = 0; j < addresses.size(); ++j) LOG(INFO) << "ADDRESS [" << addresses.at(j).c_str() << "]"; // Connect to all channnels for (int j = 0; j < size_; ++j) { if (j != rank_) { LOG(INFO) << "Connecting to [" << addresses[j].c_str() << "]"; peers[j]->Connect(addresses[j]); } } #ifndef CPU_ONLY vector<shared_ptr<P2PSync<Dtype> > > syncs(gpus.size()); syncs[0].reset(new SocketSync<Dtype>(root_solver, peers, rank_)); #else vector<shared_ptr<P2PSyncCPU<Dtype> > > syncs(1); syncs[0].reset(new SocketSyncCPU<Dtype>(root_solver, peers, rank_)); #endif #endif #ifndef CPU_ONLY syncs[0]->prepare(gpus, &syncs); LOG(INFO)<< "Starting Optimization"; // Switch to total number of GPUs once the datareaders are ready Caffe::set_solver_count(total_gpus); for (int i = 1; i < syncs.size(); ++i) { syncs[i]->StartInternalThread(); } // Run root solver on current thread syncs[0]->solver()->Solve(); for (int i = 1; i < syncs.size(); ++i) { syncs[i]->StopInternalThread(); } #else Caffe::set_solver_count(1); LOG(INFO) << "Starting solver..."; syncs[0]->solver()->Solve(); #endif }