virtual void reset_data_for_test(const AzOut &out, const AzSmat *m_data) { bool doSparse = false; if (m_data->rowNum()*m_data->colNum() > Az_max_test_entries) { /* large data */ /*--- dense is faster but uses up more memory if data is sparse ---*/ double nz_ratio; m_data->nonZeroNum(&nz_ratio); if (nz_ratio < 0.6) { /* relatively sparse */ doSparse = true; AzBytArr s; s.c("Large and sparse test data (nonzero ratio=", nz_ratio); s.c("); treated as sparse data."); AzPrint::writeln(out, s); } } data_num = m_data->colNum(); m_tran_dense.reset(); m_tran_sparse.reset(); if (doSparse) { m_data->transpose(&m_tran_sparse); } else { m_tran_dense.transpose_from(m_data); } sorted_arr.reset(); feat.reset(m_data->rowNum()); }
virtual void reset_data(const AzOut &out, const AzSmat *m_data, AzParam &p, bool beTight, const AzSvFeatInfo *inp_feat=NULL) { resetParam(p); printParam(out); /*--- count nonzero components ---*/ double nz_ratio; m_data->nonZeroNum(&nz_ratio); AzBytArr s("Training data: "); s.cn(m_data->rowNum());s.c("x");s.cn(m_data->colNum()); s.c(", nonzero_ratio=", nz_ratio, 4); /*--- decide sparse or dense ---*/ AzBytArr s_dp("; managed as dense data"); bool doSparse = false; if (dataproc == dataproc_Auto && nz_ratio < Az_nz_ratio_threshold || dataproc == dataproc_Sparse) { doSparse = true; s_dp.reset("; managed as sparse data"); } if (dataproc != dataproc_Auto) s_dp.concat(" as requested."); else s_dp.concat("."); AzPrint::writeln(out, "-------------"); AzPrint::writeln(out, s, s_dp); AzPrint::writeln(out, "-------------"); /*--- pre-sort data ---*/ m_tran_sparse.reset(); m_tran_dense.unlock(); m_tran_dense.reset(); data_num = m_data->colNum(); if (doSparse) { m_data->transpose(&m_tran_sparse); sorted_arr.reset_sparse(&m_tran_sparse, beTight); } else { m_tran_dense.transpose_from(m_data); sorted_arr.reset_dense(&m_tran_dense, beTight); /* prohibit any action to change the pointers to the column vectors */ m_tran_dense.lock(); } if (inp_feat != NULL) { feat.reset(inp_feat); if (feat.featNum() != m_data->rowNum()) { throw new AzException(AzInputError, "AzDataForTrTree::reset", "#feat mismatch"); } } else { feat.reset(m_data->rowNum()); } }