virtual void reset_data_for_test(const AzOut &out, const AzSmat *m_data) { bool doSparse = false; if (m_data->rowNum()*m_data->colNum() > Az_max_test_entries) { /* large data */ /*--- dense is faster but uses up more memory if data is sparse ---*/ double nz_ratio; m_data->nonZeroNum(&nz_ratio); if (nz_ratio < 0.6) { /* relatively sparse */ doSparse = true; AzBytArr s; s.c("Large and sparse test data (nonzero ratio=", nz_ratio); s.c("); treated as sparse data."); AzPrint::writeln(out, s); } } data_num = m_data->colNum(); m_tran_dense.reset(); m_tran_sparse.reset(); if (doSparse) { m_data->transpose(&m_tran_sparse); } else { m_tran_dense.transpose_from(m_data); } sorted_arr.reset(); feat.reset(m_data->rowNum()); }
void _item(bool isRequired, const char *kw, const char *desc, const char *dflt=NULL) { AzBytArr s; if (dflt != NULL && strlen(dflt) > 0) { s.c(" (Default:"); s.c(dflt); s.c(")"); } _item(isRequired, kw, desc, s); }
/*------------------------------------------------------------*/ void AzpLayer::show_stat(AzBytArr &s) const { if (do_topthru) { s.c("[topthru]"); return; } cs.weight->show_stat(s); if (using_lm2) { s.c(";-lm2-;"); cs.weight2->show_stat(s); } activ_show_stat(s); }
virtual void show_stat(AzBytArr &s) const { if (p.typ == AzpActivDflt_None) return; if (!p.do_stat) return; AzDvect my_v_pop(&v_pop_last); my_v_pop.normalize1(); s.nl(); double accum = 0; int ix; for (ix = 0; ix < my_v_pop.rowNum(); ++ix) { accum += my_v_pop.get(ix); s.c("b"); s.cn(v_border.get(ix)); s.c("="); s.cn(my_v_pop.get(ix),3); s.c("("); s.cn(accum,3); s.c("),"); } }
/*-------------------------------------------------------------------------*/ int AzTools_text::replace_utf8dashes(AzByte *data, int len) /* inout */ { const char *eyec = "AzTools_text:;replace_utf8dashes"; AzBytArr s_mydata; AzByte *mydata = s_mydata.reset(len*2, 0); AzByte *mywp = mydata; /* 0xe28093: en dash (often used as in 1900-1935) */ /* 0xe28094: em dash (long dash) */ /* 0xe2809c: double quote begin -> [ " ] */ /* 0xe2809d: duoble quote end -> [ " ] */ /* 0xe28098: single quote begin -> [ ' ]*/ /* 0xe28099: single quote end -> [ 's] if ending a token; [ ' ] otherwise */ const AzByte *data_end = data+len, *wp = data; for ( ; wp < data_end; ) { const AzByte *ptr = (AzByte *)memchr(wp, 0xE2, data_end-wp); if (ptr == NULL) { ptr = data_end; } int mvlen = Az64::ptr_diff(ptr-wp, eyec); if (mvlen > 0) { memcpy(mywp, wp, mvlen); mywp += mvlen; /* string before 0xE2 */ } if (ptr+3 <= data_end) { AzByte prevch = (ptr-1 >= data) ? *(ptr-1) : 0; AzByte nextch = (ptr+3 < data_end) ? *(ptr+3) : 0; AzBytArr s; if (*(ptr+1) == 0x80) { if (*(ptr+2) == 0x93) { /* en dash */ if (prevch>='0' && prevch<='9' && nextch>='0' && nextch<='9') s.c("-"); /* between digits */ else s.c(" - "); } else if (*(ptr+2) == 0x94) s.c(" - "); /* em dash */ else if (*(ptr+2) == 0x98) s.c(" ' "); else if (*(ptr+2) == 0x9c || *(ptr+2) == 0x9d) s.c(" \" "); /* double quote */ else if (*(ptr+2) == 0x99) { if (ptr+5<=data_end && memcmp(ptr+3, "s ", 2)==0) s.c(" '"); else s.c(" ' "); } else s.c(ptr, 3); } else s.c(ptr, 3); memcpy(mywp, s.point(), s.length()); mywp += s.length(); wp = ptr+3; } else { mvlen = Az64::ptr_diff(data_end-ptr, eyec); if (mvlen > 0) { memcpy(mywp, ptr, mvlen); mywp += mvlen; } wp = data_end; } } int mydata_len = Az64::ptr_diff(mywp-mydata); memcpy(data, mydata, mydata_len); mydata[mydata_len] = '\0'; return mydata_len; }
virtual void format(AzBytArr &s, bool do_reset=false) const { if (do_reset) s.reset(); // s.c("("); int ix; for (ix = 0; ix < ia_sz.size(); ++ix) { if (ix > 0) s.c(" x "); s.cn(ia_sz.get(ix)); } // s.c(")"); }
/*--------------------------------------------------------*/ void AzTreeEnsemble::show_weights(const AzOut &out, AzSvFeatInfo *fi) const { AzIIFarr iifa_tx_nx_posiw, iifa_tx_nx_negaw; int tx; for (tx = 0; tx < t_num; ++tx) { int nx; for (nx = 0; nx < t[tx]->nodeNum(); ++nx) { const AzTreeNode *np = t[tx]->node(nx); if (np->weight > 0) { iifa_tx_nx_posiw.put(tx, nx, np->weight); } else if (np->weight < 0) { iifa_tx_nx_negaw.put(tx, nx, np->weight); } } } iifa_tx_nx_posiw.sort_Float(false); /* descending order */ iifa_tx_nx_negaw.sort_Float(true); /* ascending order */ AzPrint::writeln(out, "Positive weights -------------------"); int ix; for (ix = 0; ix < iifa_tx_nx_posiw.size(); ++ix) { int tx, nx; double w = iifa_tx_nx_posiw.get(ix, &tx, &nx); AzBytArr s_desc; t[tx]->genDesc(fi, nx, &s_desc); AzBytArr s; s.cn(w, 6, false); s.c(' '); s.c(&s_desc); AzPrint::writeln(out, s); } AzPrint::writeln(out, "Negative weights -------------------"); for (ix = 0; ix < iifa_tx_nx_negaw.size(); ++ix) { int tx, nx; double w = iifa_tx_nx_negaw.get(ix, &tx, &nx); AzBytArr s_desc; t[tx]->genDesc(fi, nx, &s_desc); AzBytArr s; s.cn(w, 6, false); s.c(' '); s.c(&s_desc); AzPrint::writeln(out, s); } }
inline virtual void show(const char *header, const AzOut &out) const { AzBytArr s; s.c(header); format(s); AzPrint::writeln(out, s); }