inline const float *Pred(const DataMatrix &dmat, int option_mask, unsigned ntree_limit, bst_ulong *len) { this->CheckInitModel(); this->Predict(dmat, (option_mask&1) != 0, &this->preds_, ntree_limit, (option_mask&2) != 0); *len = static_cast<bst_ulong>(this->preds_.size()); return BeginPtr(this->preds_); }
/*! \brief save a LibSVM format file as DMatrixPage */ inline void LoadText(const char *uri, const char* cache_file, bool silent, bool loadsplit) { if (!silent) { utils::Printf("start generate text file from %s\n", uri); } int rank = 0, npart = 1; if (loadsplit) { rank = rabit::GetRank(); npart = rabit::GetWorldSize(); } this->set_cache_file(cache_file); std::string fname_row = std::string(cache_file) + ".row.blob"; utils::FileStream fo(utils::FopenCheck(fname_row.c_str(), "wb")); SparsePage page; size_t bytes_write = 0; double tstart = rabit::utils::GetTime(); LibSVMParser parser( dmlc::InputSplit::Create(uri, rank, npart, "text"), 16); info.Clear(); while (parser.Next()) { const LibSVMPage &batch = parser.Value(); size_t nlabel = info.labels.size(); info.labels.resize(nlabel + batch.label.size()); if (batch.label.size() != 0) { std::memcpy(BeginPtr(info.labels) + nlabel, BeginPtr(batch.label), batch.label.size() * sizeof(float)); } page.Push(batch); for (size_t i = 0; i < batch.data.size(); ++i) { info.info.num_col = std::max(info.info.num_col, static_cast<size_t>(batch.data[i].index+1)); } if (page.MemCostBytes() >= kPageSize) { bytes_write += page.MemCostBytes(); page.Save(&fo); page.Clear(); double tdiff = rabit::utils::GetTime() - tstart; if (!silent) { utils::Printf("Writting to %s in %g MB/s, %lu MB written\n", cache_file, (bytes_write >> 20UL) / tdiff, (bytes_write >> 20UL)); } }
// synchronize the best solution of each node virtual void SyncBestSolution(const std::vector<int> &qexpand) { std::vector<SplitEntry> vec; for (size_t i = 0; i < qexpand.size(); ++i) { const int nid = qexpand[i]; for (int tid = 0; tid < this->nthread; ++tid) { this->snode[nid].best.Update(this->stemp[tid][nid].best); } vec.push_back(this->snode[nid].best); } // TODO(tqchen) lazy version // communicate best solution reducer.Allreduce(BeginPtr(vec), vec.size()); // assign solution back for (size_t i = 0; i < qexpand.size(); ++i) { const int nid = qexpand[i]; this->snode[nid].best = vec[i]; } }
virtual bool Next(void) { if (fscanf(fplst_, "%f", BeginPtr(labels_)) != EOF) { for (int i = 1; i < label_width_; ++i) { utils::Check(fscanf(fplst_, ",%f", &labels_[i]) == 1, "CSVIterator: Error when reading label. Possible incorrect file or label_width."); } for (index_t i = 0; i < shape_[0]; ++i) { for (index_t j = 0; j < shape_[1]; ++j) { for (index_t k = 0; k < shape_[2]; ++k) { utils::Check(fscanf(fplst_, ",%f", &data_[i][j][k]) == 1, "CSVIterator: Error when reading data. Possible incorrect file or input_shape."); } } } out_.data = data_; out_.index = data_index_++; mshadow::Tensor<cpu, 1> label_(&(labels_[0]), mshadow::Shape1(label_width_)); out_.label = label_; return true; } return false; }
virtual void SetNonDefaultPosition(const std::vector<int> &qexpand, IFMatrix *p_fmat, const RegTree &tree) { // step 2, classify the non-default data into right places std::vector<unsigned> fsplits; for (size_t i = 0; i < qexpand.size(); ++i) { const int nid = qexpand[i]; if (!tree[nid].is_leaf()) { fsplits.push_back(tree[nid].split_index()); } } // get the candidate split index std::sort(fsplits.begin(), fsplits.end()); fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin()); while (fsplits.size() != 0 && fsplits.back() >= p_fmat->NumCol()) { fsplits.pop_back(); } // bitmap is only word concurrent, set to bool first { bst_omp_uint ndata = static_cast<bst_omp_uint>(this->position.size()); boolmap.resize(ndata); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { boolmap[j] = 0; } } utils::IIterator<ColBatch> *iter = p_fmat->ColIterator(fsplits); while (iter->Next()) { const ColBatch &batch = iter->Value(); for (size_t i = 0; i < batch.size; ++i) { ColBatch::Inst col = batch[i]; const bst_uint fid = batch.col_index[i]; const bst_omp_uint ndata = static_cast<bst_omp_uint>(col.length); #pragma omp parallel for schedule(static) for (bst_omp_uint j = 0; j < ndata; ++j) { const bst_uint ridx = col[j].index; const float fvalue = col[j].fvalue; const int nid = this->DecodePosition(ridx); if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) { if (fvalue < tree[nid].split_cond()) { if (!tree[nid].default_left()) boolmap[ridx] = 1; } else { if (tree[nid].default_left()) boolmap[ridx] = 1; } } } } } bitmap.InitFromBool(boolmap); // communicate bitmap rabit::Allreduce<rabit::op::BitOR>(BeginPtr(bitmap.data), bitmap.data.size()); const std::vector<bst_uint> &rowset = p_fmat->buffered_rowset(); // get the new position const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size()); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; const int nid = this->DecodePosition(ridx); if (bitmap.Get(ridx)) { utils::Assert(!tree[nid].is_leaf(), "inconsistent reduce information"); if (tree[nid].default_left()) { this->SetEncodePosition(ridx, tree[nid].cright()); } else { this->SetEncodePosition(ridx, tree[nid].cleft()); } } } }
virtual const int* GetLeafPosition(void) const { return BeginPtr(this->position); }
// update the tree, do pruning virtual void Update(const std::vector<bst_gpair> &gpair, IFMatrix *p_fmat, const BoosterInfo &info, const std::vector<RegTree*> &trees) { if (trees.size() == 0) return; // number of threads // thread temporal space std::vector< std::vector<TStats> > stemp; std::vector<RegTree::FVec> fvec_temp; // setup temp space for each thread int nthread; #pragma omp parallel { nthread = omp_get_num_threads(); } fvec_temp.resize(nthread, RegTree::FVec()); stemp.resize(nthread, std::vector<TStats>()); #pragma omp parallel { int tid = omp_get_thread_num(); int num_nodes = 0; for (size_t i = 0; i < trees.size(); ++i) { num_nodes += trees[i]->param.num_nodes; } stemp[tid].resize(num_nodes, TStats(param)); std::fill(stemp[tid].begin(), stemp[tid].end(), TStats(param)); fvec_temp[tid].Init(trees[0]->param.num_feature); } // if it is C++11, use lazy evaluation for Allreduce, // to gain speedup in recovery #if __cplusplus >= 201103L auto lazy_get_stats = [&]() #endif { // start accumulating statistics utils::IIterator<RowBatch> *iter = p_fmat->RowIterator(); iter->BeforeFirst(); while (iter->Next()) { const RowBatch &batch = iter->Value(); utils::Check(batch.size < std::numeric_limits<unsigned>::max(), "too large batch size "); const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nbatch; ++i) { RowBatch::Inst inst = batch[i]; const int tid = omp_get_thread_num(); const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i); RegTree::FVec &feats = fvec_temp[tid]; feats.Fill(inst); int offset = 0; for (size_t j = 0; j < trees.size(); ++j) { AddStats(*trees[j], feats, gpair, info, ridx, BeginPtr(stemp[tid]) + offset); offset += trees[j]->param.num_nodes; } feats.Drop(inst); } } // aggregate the statistics int num_nodes = static_cast<int>(stemp[0].size()); #pragma omp parallel for schedule(static) for (int nid = 0; nid < num_nodes; ++nid) { for (int tid = 1; tid < nthread; ++tid) { stemp[0][nid].Add(stemp[tid][nid]); } } }; #if __cplusplus >= 201103L reducer.Allreduce(BeginPtr(stemp[0]), stemp[0].size(), lazy_get_stats); #else reducer.Allreduce(BeginPtr(stemp[0]), stemp[0].size()); #endif // rescale learning rate according to size of trees float lr = param.learning_rate; param.learning_rate = lr / trees.size(); int offset = 0; for (size_t i = 0; i < trees.size(); ++i) { for (int rid = 0; rid < trees[i]->param.num_roots; ++rid) { this->Refresh(BeginPtr(stemp[0]) + offset, rid, trees[i]); } offset += trees[i]->param.num_nodes; } // set learning rate back param.learning_rate = lr; }