void append_const(double const_to_add) { int r_num = m_feat.rowNum()+1, c_num = m_feat.colNum(); m_feat.resize(r_num, c_num); int col; for (col = 0; col < c_num; ++col) { AzIFarr ifa; m_feat.col(col)->nonZero(&ifa); ifa.put(r_num-1, const_to_add); m_feat.col_u(col)->load(&ifa); } }
static void getFloats(const char *str, AzByte dlm, AzDvect *v) { AzIFarr ifa; AzStrPool sp; getStrings(str, dlm, &sp); int ix; for (ix = 0; ix < sp.size(); ++ix) { ifa.put(-1, atof(sp.c_str(ix))); } v->reform(ifa.size()); for (ix = 0; ix < ifa.size(); ++ix) { v->set(ix, ifa.get(ix)); } }
/*-------------------------------------------------------------*/ void AzDmat::convert(AzSmat *m_out) { m_out->reform(row_num, col_num); int col; for (col = 0; col < col_num; ++col) { if (column[col] != NULL) { AzIFarr ifq; column[col]->nonZero(&ifq); if (ifq.size() > 0) { m_out->col_u(col)->load(&ifq); } } } }
/*------------------------------------------*/ static void apply_log(AzSmat *m_x) { int col; for (col = 0; col < m_x->colNum(); ++col) { AzIFarr ifa; const AzSvect *v_x = m_x->col(col); AzCursor cur; for ( ; ; ) { double val; int row = v_x->next(cur, val); if (row < 0) break; if (val > 0) val = log(val+1); else val = -log(-val+1); ifa.put(row, val); } m_x->col_u(col)->load(&ifa); } }
/*------------------------------------------*/ static void find_max(const AzSmat *m_x, double ratio, int topk, AzDvect *v_max) { AzSmat m_tran; m_x->transpose(&m_tran); int kk = MAX(topk, (int)((double)m_x->colNum()*ratio)); v_max->reform(m_x->rowNum()); double *max = v_max->point_u(); int row; for (row = 0; row < m_x->rowNum(); ++row) { AzDvect v(m_tran.col(row)); v.abs(); AzIFarr ifa; v.nonZero(&ifa); ifa.sort_Float(false); /* descending order */ ifa.cut(kk); max[row] = ifa.sum() / (double)kk; } }
/*------------------------------------------*/ static void cap(double capval, AzSmat *m_x) { if (capval < 0) throw new AzException("AzSvDataS::cap", "cap must be non-negative"); int col; for (col = 0; col < m_x->colNum(); ++col) { AzIFarr ifa; const AzSvect *v_x = m_x->col(col); AzCursor cur; for ( ; ; ) { double val; int row = v_x->next(cur, val); if (row < 0) break; if (val > 0) val = MIN(val, capval); else val = MAX(val, -capval); ifa.put(row, val); } m_x->col_u(col)->load(&ifa); } }
/*------------------------------------------------------------------*/ void AzSvDataS::_parseDataLine_Sparse(const AzByte *inp, int inp_len, int f_num, const char *data_fn, int line_no, /*--- output ---*/ AzIFarr &ifa_ex_val) { const char *eyec = "AzSvDataS::_parseDataLine_Sparse"; AzIntArr ia_isUsed; ia_isUsed.reset(f_num, 0); int *isUsed = ia_isUsed.point_u(); const AzByte *wp = inp, *line_end = inp + inp_len; // AzIFarr ifa_ex_val; for ( ; ; ) { if (wp >= line_end) break; AzBytArr str_token; AzTools::getString(&wp, line_end, &str_token); if (str_token.getLen() > 0) { int ex; double val; decomposeFeat(str_token.c_str(), line_no, &ex, &val); if (ex < 0 || ex >= f_num) { AzBytArr s("Error in line# "); s.cn(line_no); s.c(": invalid feature# "); s.cn(ex); throw new AzException(AzInputError, eyec, s.c_str()); } if (isUsed[ex]) { AzBytArr s("Error in line# "); s.cn(line_no); s.c(": feature# "); s.cn(ex); s.c(" appears more than once."); throw new AzException(AzInputError, eyec, s.c_str()); } if (val != 0) { ifa_ex_val.put(ex, val); } isUsed[ex] = 1; } } // m_feat->load(col, &ifa_ex_val); }
/*-------------------------------------------------------------------------*/ void AzPrepText2::set_ifeat(const AzSmat *m_feat, int top_num, int col, int offs, AzIFarr *ifa_ctx, feat_info fi[2]) const { if (col < 0 || col >= m_feat->colNum()) return; AzIFarr ifa; m_feat->col(col)->nonZero(&ifa); fi[0].update(ifa); if (top_num > 0 && ifa.size() > top_num) { ifa.sort_FloatInt(false); /* descending order */ ifa.cut(top_num); } fi[1].update(ifa); if (offs == 0) ifa_ctx->concat(&ifa); else { for (int ix = 0; ix < ifa.size(); ++ix) { int row = ifa.getInt(ix); double val = ifa.get(ix); ifa_ctx->put(row+offs, val); } } }
/*------------------------------------------------------------------*/ void AzSvDataS::_parseDataLine(const AzByte *inp, int inp_len, int f_num, const char *data_fn, int line_no, /*--- output ---*/ AzIFarr &ifa_ex_val) { const char *eyec = "AzSvDataS::_parseDataLine"; ifa_ex_val.prepare(f_num); const AzByte *wp = inp, *line_end = inp + inp_len; // AzIFarr ifa_ex_val; int ex = 0; for ( ; ; ) { if (wp >= line_end) break; #if 1 int str_len; const AzByte *str = AzTools::getString(&wp, line_end, &str_len); if (str_len > 0) { if (ex >= f_num) { AzBytArr s("Error in "); s.c(data_fn); s.c(": Line#="); s.cn(line_no); AzPrint::writeln(log_out, s); s.reset(); s.c("Too many values per line: expected "); s.cn(f_num); s.c(" values."); throw new AzException(AzInputNotValid, eyec, s.c_str()); } #if 1 double val = my_atof((char *)str, eyec, line_no); #else double val = atof((char *)str); if (val == 0 && *str != '0' && *str != '+' && *str != '-') { AzBytArr s("Invalid token "); s.c(str, str_len); s.c(" in "); s.c(data_fn); s.c(": Line#="); s.cn(line_no); AzPrint::writeln(log_out, s); throw new AzException(AzInputNotValid, eyec, s.c_str()); } #endif if (val != 0) { ifa_ex_val.put(ex, val); } ++ex; } #else AzBytArr str_token; AzTools::getString(&wp, line_end, &str_token); if (str_token.getLen() > 0) { if (ex >= f_num) { AzBytArr s("Error in "); s.c(data_fn); s.c(": Line#="); s.cn(line_no); AzPrint::writeln(log_out, s); s.reset(); s.c("Too many values per line: expected "); s.cn(f_num); s.c(" values."); throw new AzException(AzInputNotValid, eyec, s.c_str()); } /* double val = atof(str_token.c_str()); */ double val = my_atof(str_token.c_str(), eyec, line_no); if (val != 0) { ifa_ex_val.put(ex, val); } ++ex; } #endif } if (ex < f_num) { AzTimeLog::print("Error in Line#=", line_no, log_out); throw new AzException(AzInputNotValid, eyec, "Too few values"); } // m_feat->load(col, &ifa_ex_val); }