void ParameterServer<BlobType>::update(int param_id){ static int update_cnt = 0; MPI_Status status; boost::shared_ptr<BlobType> param = params[param_id]; DLOG(INFO) << "[Public]: request update " << param_id << " from" << rank; // trigger in the do_listen() or call by server itself if (rank == 0){ if (locking) update_locks[param_id].lock(); // update params in ParameterServer(Root node) dragon_axpy(param->count(), Dtype(-1), param->cpu_diff(), param->mutable_cpu_data()); if (locking) update_locks[param_id].unlock(); DLOG(INFO) << "[Server]: finish update"; update_cnt++; if (update_cnt % 100000 == 0) DLOG(INFO) << "[Server]: reports: " << update_cnt << " updates"; } // call by Net else{ // seed request diff to ParameterServer(Root node) MPI_Send((void*)param->cpu_diff(), param->count(), getType(Dtype(0.0)), 0, param_id, MPI_COMM_WORLD); DLOG(INFO) << "[Node]: " << rank << " send param diff" << param_id; MPI_Recv((void*)param->mutable_cpu_data(), param->count(), getType(0.0), 0, param_id, MPI_COMM_WORLD, &status); DLOG(INFO) << "[Node]: " << rank << " receive param data" << param_id; } }
void EltwiseLayer<Dtype>::forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int* mask = NULL; const Dtype* bottom_data_a = NULL; const Dtype* bottom_data_b = NULL; const int count = top[0]->count(); Dtype* top_data = top[0]->mutable_cpu_data(); switch (op_) { case EltwiseParameter_EltwiseOp_PROD: dragon_mul(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), top_data); for (int i = 2; i < bottom.size(); ++i) dragon_mul(count, top_data, bottom[i]->cpu_data(), top_data); break; case EltwiseParameter_EltwiseOp_SUM: dragon_set(count, Dtype(0), top_data); // TODO(shelhamer) does BLAS optimize to sum for coeff = 1? for (int i = 0; i < bottom.size(); ++i) dragon_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data); break; case EltwiseParameter_EltwiseOp_MAX: // Initialize mask = max_idx_.mutable_cpu_data(); dragon_set(count, -1, mask); dragon_set(count, Dtype(-FLT_MAX), top_data); // bottom 0 & 1 bottom_data_a = bottom[0]->cpu_data(); bottom_data_b = bottom[1]->cpu_data(); for (int idx = 0; idx < count; ++idx) { if (bottom_data_a[idx] > bottom_data_b[idx]) { top_data[idx] = bottom_data_a[idx]; // maxval mask[idx] = 0; // maxid } else { top_data[idx] = bottom_data_b[idx]; // maxval mask[idx] = 1; // maxid } } // bottom 2++ for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) { bottom_data_b = bottom[blob_idx]->cpu_data(); for (int idx = 0; idx < count; ++idx) { if (bottom_data_b[idx] > top_data[idx]) { top_data[idx] = bottom_data_b[idx]; // maxval mask[idx] = blob_idx; // maxid } } } break; default: LOG(FATAL) << "Unknown elementwise operation."; } }
void Blob<Dtype>::update(){ switch(data_->head()){ case SyncedMemory::SyncedHead::HEAD_AT_CPU: dragon_axpy(count_, Dtype(-1), cpu_diff(), mutable_cpu_data()); break; case SyncedMemory::SyncedHead::HEAD_AT_GPU: case SyncedMemory::SyncedHead::SYNCED: #ifndef CPU_ONLY dragon_gpu_axpy<Dtype>(count_, Dtype(-1), gpu_diff(), mutable_gpu_data()); #endif break; default: // UNINITIALIZED JUST DO NOTHING ; } }