virtual void reset_data_for_test(const AzOut &out, const AzSmat *m_data) { bool doSparse = false; if (m_data->rowNum()*m_data->colNum() > Az_max_test_entries) { /* large data */ /*--- dense is faster but uses up more memory if data is sparse ---*/ double nz_ratio; m_data->nonZeroNum(&nz_ratio); if (nz_ratio < 0.6) { /* relatively sparse */ doSparse = true; AzBytArr s; s.c("Large and sparse test data (nonzero ratio=", nz_ratio); s.c("); treated as sparse data."); AzPrint::writeln(out, s); } } data_num = m_data->colNum(); m_tran_dense.reset(); m_tran_sparse.reset(); if (doSparse) { m_data->transpose(&m_tran_sparse); } else { m_tran_dense.transpose_from(m_data); } sorted_arr.reset(); feat.reset(m_data->rowNum()); }
virtual void reset_data(const AzOut &out, const AzSmat *m_data, AzParam &p, bool beTight, const AzSvFeatInfo *inp_feat=NULL) { resetParam(p); printParam(out); /*--- count nonzero components ---*/ double nz_ratio; m_data->nonZeroNum(&nz_ratio); AzBytArr s("Training data: "); s.cn(m_data->rowNum());s.c("x");s.cn(m_data->colNum()); s.c(", nonzero_ratio=", nz_ratio, 4); /*--- decide sparse or dense ---*/ AzBytArr s_dp("; managed as dense data"); bool doSparse = false; if (dataproc == dataproc_Auto && nz_ratio < Az_nz_ratio_threshold || dataproc == dataproc_Sparse) { doSparse = true; s_dp.reset("; managed as sparse data"); } if (dataproc != dataproc_Auto) s_dp.concat(" as requested."); else s_dp.concat("."); AzPrint::writeln(out, "-------------"); AzPrint::writeln(out, s, s_dp); AzPrint::writeln(out, "-------------"); /*--- pre-sort data ---*/ m_tran_sparse.reset(); m_tran_dense.unlock(); m_tran_dense.reset(); data_num = m_data->colNum(); if (doSparse) { m_data->transpose(&m_tran_sparse); sorted_arr.reset_sparse(&m_tran_sparse, beTight); } else { m_tran_dense.transpose_from(m_data); sorted_arr.reset_dense(&m_tran_dense, beTight); /* prohibit any action to change the pointers to the column vectors */ m_tran_dense.lock(); } if (inp_feat != NULL) { feat.reset(inp_feat); if (feat.featNum() != m_data->rowNum()) { throw new AzException(AzInputError, "AzDataForTrTree::reset", "#feat mismatch"); } } else { feat.reset(m_data->rowNum()); } }
/* <class V>: AzSvect (sparse vector) | AzDvect (dense vector) */ template <class V> void apply(const V *v_x, AzDvect *v_pred) const { v_pred->reform(classNum()); double *pred = v_pred->point_u(); int cx; for (cx = 0; cx < classNum(); ++cx) { pred[cx] = ws*m_w.col(cx)->innerProduct(v_x); } }
void count(const AzPmat *m) { if (v_border.rowNum() == 0) init_count(); AzDmat md; m->get(&md); const double *border = v_border.point(); int row, col; for (col = 0; col < md.colNum(); ++col) { for (row = 0; row < md.rowNum(); ++row) { double val = md.get(row, col); int bx; for (bx = 0; bx < v_border.rowNum(); ++bx) { if (val <= border[bx]) { v_pop.add(bx, 1); break; } } } } }
virtual inline bool isLE(int dx, int fx, double border_val) const { double value; if (AzSmat::isNull(&m_tran_sparse)) { value = m_tran_dense.get(dx, fx); } else { value = m_tran_sparse.get(dx, fx); } if (value <= border_val) return true; return false; }
inline void flush_ws() { if (ws != 1) { m_w.multiply(ws); ws = 1; } }
/*------------------------------------------------------------*/ inline int classNum() const { return m_w.colNum(); }
template <class V> double apply(const V *v_x) const { if (classNum() != 1) throw new AzException("AzsLmod::apply", "apply(x) is only for binary classification."); return ws*m_w.col(0)->innerProduct(v_x); }
virtual void reset(const AzDmat *inp_m_w, double inp_ws) { m_w.set(inp_m_w); ws = inp_ws; }
inline void add_to(AzDvect *v_dst, int col, double coeff) const { if (md != NULL) v_dst->add(md->col(col), coeff); /* dst += md[,col]*coeff */ else if (ms != NULL) v_dst->add(ms->col(col), coeff); else throw new AzException("AzDSmat::add", "No data"); }
inline int colNum() const { if (md != NULL) return md->colNum(); if (ms != NULL) return ms->colNum(); return 0; }
inline int rowNum() const { if (md != NULL) return md->rowNum(); if (ms != NULL) return ms->rowNum(); return 0; }