void warm_start(const AzTreeEnsemble *inp_ens, const AzDataForTrTree *data, AzParam ¶m, const AzBytArr *s_temp_prefix, const AzOut &out, int max_t_num, int search_t_num, AzDvect *v_p, /* inout */ const AzIntArr *inp_ia_tr_dx=NULL) { const char *eyec = "AzTrTreeEnsemble::warmup"; if (max_t_num < inp_ens->size()) { throw new AzException(eyec, "maximum #tree is less than the #tree we already have"); } reset(); a_tree.alloc(&t, max_t_num, "AzTrTreeEnsemble::warmup"); t_num = inp_ens->size(); const_val = inp_ens->constant(); org_dim = inp_ens->orgdim(); if (org_dim > 0 && org_dim != data->featNum()) { throw new AzException(AzInputError, eyec, "feature dimensionality mismatch"); } const AzIntArr *ia_tr_dx = inp_ia_tr_dx; AzIntArr ia_temp; if (ia_tr_dx == NULL) { ia_temp.range(0, data->dataNum()); ia_tr_dx = &ia_temp; } v_p->reform(data->dataNum()); v_p->add(const_val, ia_tr_dx); T dummy_tree(param); if (dummy_tree.usingInternalNodes()) { throw new AzException(AzInputError, eyec, "warm start is not allowed with use of internal nodes"); } dummy_tree.printParam(out); temp_files.reset(&dummy_tree, data->dataNum(), s_temp_prefix); s_param.reset(param.c_str()); dt_param = s_param.c_str(); AzParam p(dt_param, false); int tx; for (tx = 0; tx < t_num; ++tx) { t[tx] = new T(p); t[tx]->forStoringDataIndexes(temp_files.point_file()); if (search_t_num > 0 && tx < t_num-search_t_num) { t[tx]->quick_warmup(inp_ens->tree(tx), data, v_p, ia_tr_dx); } else { t[tx]->warmup(inp_ens->tree(tx), data, v_p, ia_tr_dx); } } }
inline void vLoss(const char *kw, AzLossType *out_loss) { if (param == NULL) return; AzBytArr s; vStr(kw, &s); if (s.length() > 0) { *out_loss = AzLoss::lossType(s.c_str()); } if (doCheck) sp_used_kw.put(kw); }
inline void print(const char *name, T val, int width_prec=-1, bool doZero_doSci=false) { if (o == NULL) return; itemBegin(); if (name != NULL) { *o<<name; if (name_dlm != NULL) *o<<name_dlm; } AzBytArr s; s.cn(val, width_prec, doZero_doSci); *o<<s.c_str(); }
/*-------------------*/ void toplevel_header(const char *desc, AzByte dlm='*') { if (out.isNull()) return; newline(); AzBytArr s; s.fill(dlm, 3); int dlm_len = Az64::cstrlen(desc) + s.length()*2; dlm_len = MIN(line_width, dlm_len); AzBytArr s_long; s_long.fill(dlm, dlm_len); AzPrint::writeln(out, s_long.c_str()); AzPrint::write(out, s.c_str()); AzPrint::write(out, desc); AzPrint::writeln(out, s.c_str()); AzPrint::writeln(out, s_long.c_str()); }
/*--------*/ inline void ppBegin(const char *caller, const char *desc=NULL, const char *inp_dlm=NULL) { AzBytArr s; if (level > 0) { s.concat(caller); } else { s.concat(desc); } printBegin(s.c_str(), inp_dlm); }
AzParam(int argc, const char *argv[], bool inp_doCheck=true, char file_mark='@', char inp_dlm=',', char inp_kwval_dlm='=', char cmt='#') : sp_used_kw(100, 30) { doCheck = inp_doCheck; dlm = inp_dlm; kwval_dlm = inp_kwval_dlm; concat_args(argc, argv, &s_param, file_mark, dlm, cmt); param = s_param.c_str(); }
//! copy nodes only; not split void copy_nodes_from(const AzTrTreeEnsemble_ReadOnly *inp) { reset(); const_val = inp->constant(); org_dim = inp->orgdim(); t_num = inp->size(); s_param.reset(inp->param_c_str()); dt_param = s_param.c_str(); AzParam p(dt_param, false); a_tree.alloc(&t, t_num, "AzTrTreeEnsemble::copy_nodes_from"); for (int tx = 0; tx < t_num; ++tx) { t[tx] = new T(p); t[tx]->copy_nodes_from(inp->tree(tx)); } }
/*-------------------------------------------------------------------------*/ template <class Vmat> /* Vmat: AzSmatVar | AzSmatcVar */ int AzPrepText2::_write_XY(AzDataArr<AzSmat> &am_xy, /* destroyed to save memory */ int data_num, const AzBytArr &s_batch_id, const char *outnm, const char *xy_ext) const { Vmat mv_xy; mv_xy.transfer_from(&am_xy, data_num); am_xy.reset(); AzBytArr s_xy(": "); AzTools::show_smat_stat(*mv_xy.data(), s_xy); AzBytArr s_xy_fn(outnm, xy_ext); if (s_batch_id.length() > 0) s_xy_fn << "." << s_batch_id.c_str(); const char *xy_fn = s_xy_fn.c_str(); AzTimeLog::print(xy_fn, s_xy.c_str(), out); if (AzBytArr::endsWith(xy_ext, "smat")) mv_xy.write_matrix(xy_fn); else mv_xy.write(xy_fn); return mv_xy.rowNum(); }
inline void cold_start( AzParam ¶m, const AzBytArr *s_temp_prefix, /* may be NULL */ int data_num, /* to estimate the data size for temp */ const AzOut &out, int tree_num_max, int inp_org_dim) { reset(); T dummy_tree(param); dummy_tree.printParam(out); s_param.reset(param.c_str()); dt_param = s_param.c_str(); alloc(tree_num_max, "AzTrTreeEnsemble::reset"); //@ allocate forest space org_dim = inp_org_dim; temp_files.reset(&dummy_tree, data_num, s_temp_prefix); //@ estimate the data size for temp and do something? }
/*-------------------------------------------------------------------------*/ void AzPrepText2::check_batch_id(const AzBytArr &s_batch_id) const { const char *eyec = "AzPrepText::check_batch_id"; if (s_batch_id.length() <= 0) return; AzBytArr s(kw_batch_id); s << " should look like \"1of5\""; const char *batch_id = s_batch_id.c_str(); const char *of_str = strstr(batch_id, "of"); AzX::throw_if((of_str == NULL), AzInputError, eyec, s.c_str()); for (const char *wp = batch_id; wp < batch_id+s_batch_id.length(); ++wp) { if (wp >= of_str && wp < of_str+2) continue; AzX::throw_if((*wp < '0' || *wp > '9'), AzInputError, eyec, s.c_str()); } int batch_no = atol(batch_id); int batch_num = atol(of_str+2); AzX::throw_if((batch_no < 1 || batch_no > batch_num), AzInputError, eyec, s.c_str(), " batch# must start with 1 and must not exceed the number of batches. "); }
inline void printBegin(const char *kw, const char *inp_dlm=NULL, const char *inp_name_dlm=NULL, int indent=0) { if (o == NULL) return; dlm = inp_dlm; name_dlm = inp_name_dlm; if (name_dlm == NULL) name_dlm = dlm; if (indent > 0) { /* indentation */ AzBytArr s; s.fill(' ', indent); *o<<s.c_str(); } if (kw != NULL && strlen(kw) > 0) { *o<<kw<<": "; } count = 0; }
/*------------------------------------------*/ virtual void reset_data(const AzOut &_out, const char *nm, int dummy_ydim, AzpData_batch_info *bi=NULL) { const char *eyec = "AzpData_img::reset_data"; out = _out; s_nm.reset(nm); double min_val = 0, max_val = 0, abssum = 0, abssum_pop = 0; if (bi != NULL) bi->reset(batch_num); total_data_num = 0; int x_row = -1, y_row = -1; int bx; for (bx = batch_num-1; bx >= 0; --bx) { AzBytArr s_batchnm; _reset_data(bx); if (bx == batch_num-1) { x_row = m_x.rowNum(); y_row = ms_y.rowNum(); } else { AzX::throw_if((m_x.rowNum() != x_row || ms_y.rowNum() != y_row), AzInputError, eyec, "Data dimensionality conflict between batches"); } if (bi != NULL) bi->update(bx, data_num, NULL); total_data_num += m_x.colNum(); current_batch = bx; AzTimeLog::print("#data = ", data_num, out); abssum += m_x.absSum(); abssum_pop += m_x.size(); if (bx == batch_num-1) { min_val = m_x.min(); max_val = m_x.max(); min_tar = ms_y.min(); max_tar = ms_y.max(); } else { min_val = MIN(min_val, m_x.min()); max_val = MAX(max_val, m_x.max()); min_tar = MIN(min_tar, ms_y.min()); max_tar = MAX(max_tar, ms_y.max()); } } AzBytArr s; s << "#total_data=" << total_data_num << ",min,max=" << min_val << "," << max_val; if (abssum_pop > 0) s << ",absavg=" << abssum/abssum_pop; s << ", target-min,max=" << min_tar << "," << max_tar; AzTimeLog::print(s.c_str(), out); }
inline void inBrackets(const AzBytArr &s) { if (o == NULL) return; inBrackets(s.c_str()); }
inline void inParen(const AzBytArr &s) { if (o == NULL) return; inParen(s.c_str()); }
inline void inBrackets(double val, int prec=-1, bool doSci=false) { if (o == NULL) return; itemBegin(); AzBytArr s; inBrackets(&s, val, prec, doSci); *o<<s.c_str(); }
inline void inBrackets(int val, int width=-1, bool doFillZero=false) { if (o == NULL) return; itemBegin(); AzBytArr s; inBrackets(&s, val, width, doFillZero); *o<<s.c_str(); }
inline void printV_if_not_empty(const char *kw, const AzBytArr &s) { if (o == NULL) return; if (s.length() <= 0) return; itemBegin(); *o<<kw<<s.c_str(); }
/*-------------------*/ inline void print(const AzBytArr &s) { if (o == NULL) return; print(s.c_str()); }
inline void printV(const char *kw, const AzBytArr &s) { if (o == NULL) return; itemBegin(); *o<<kw<<s.c_str(); }
const char *configuration() const { return s_config.c_str(); }
inline const char *param_c_str() const { return s_param.c_str(); }
static inline void writeln(const AzOut &out, const AzBytArr &s) { writeln(out, s.c_str()); }
inline void writeln(const AzBytArr &s) { if (o == NULL) return; *o<<s.c_str()<<endl; }
inline void write(const AzBytArr &s) { if (o == NULL) return; *o<<s.c_str(); }
inline void close() { ofs.close(); if (ofs.fail()) { throw new AzException(AzFileIOError, "AzOfs::close", "Failed to close:", s_fn.c_str()); } }
static inline void force_writeln(const AzBytArr &s) { force_writeln(s.c_str()); }
inline static void print(const AzBytArr &s, const AzOut &out) { print(s.c_str(), out); }
inline static void print(int number, const char *msg, const AzOut &out) { AzBytArr s; s.cn(number); print(s.c_str(), msg, out); }
const char *signature() const { return s_sign.c_str(); }