void show_below_above(const AzIntArr &ia_below, const AzIntArr &ia_above, AzBytArr &s) const { s << "("; for (int ix = 0; ix < ia_below.size(); ++ix) { if (ix > 0) s << ","; s << ia_below.get(ix); } s << ") -> ("; for (int ix = 0; ix < ia_above.size(); ++ix) { if (ix > 0) s << ","; s << ia_above.get(ix); } s << ")"; }
/*-------------------------------------------------------------------------*/ void AzPrepText2::gen_Y(const AzIntArr &ia_tokno, int dic_sz, const AzIntArr &ia_pos, int xpch_sz, /* patch size used to generate X */ int min_dist, int max_dist, AzSmat *m_y) const { AzX::throw_if_null(m_y, "AzPrepText2::gen_Y", "m_y"); int t_num; const int *tokno = ia_tokno.point(&t_num); m_y->reform(dic_sz*2, ia_pos.size()); /* *2 for left and right */ for (int ix = 0; ix < ia_pos.size(); ++ix) { int xtx0 = ia_pos.get(ix); int xtx1 = xtx0 + xpch_sz; AzIntArr ia_ctx0, ia_ctx1; for (int tx = MAX(0,xtx0+min_dist); tx < MIN(t_num,xtx0); ++tx) if (tokno[tx] >= 0) ia_ctx0.put(tokno[tx]); ia_ctx0.unique(); for (int tx = MAX(0,xtx1); tx < MIN(t_num,xtx1+max_dist); ++tx) if (tokno[tx] >= 0) ia_ctx1.put(tokno[tx]); ia_ctx1.unique(); ia_ctx1.add(dic_sz); AzIntArr ia_ctx; ia_ctx.concat(&ia_ctx0); ia_ctx.concat(&ia_ctx1); if (ia_ctx.size() > 0) { ia_ctx.unique(); m_y->col_u(ix)->load(&ia_ctx, 1); } } }
/*-------------------------------------------------------------------------*/ void AzPrepText2::gen_Y_ifeat(int top_num_each, int top_num_total, const AzSmat *m_feat, const AzIntArr &ia_tokno, const AzIntArr &ia_pos, int xpch_sz, int min_dist, int max_dist, bool do_nolr, int f_pch_sz, int f_pch_step, int f_padding, AzSmat *m_y, feat_info fi[2]) const { const char *eyec = "AzPrepText2::gen_neigh_topfeat"; AzX::throw_if_null(m_feat, eyec, "m_feat"); AzX::throw_if_null(m_y, eyec, "m_y"); int t_num; const int *tokno = ia_tokno.point(&t_num); int feat_sz = m_feat->rowNum(); int f_pch_num = DIVUP(t_num+f_padding*2-f_pch_sz, f_pch_step) + 1; if (m_feat->colNum() != f_pch_num) { AzBytArr s("#patch mismatch: Expcected: "); s << f_pch_num << " Actual: " << m_feat->colNum(); AzX::throw_if(true, AzInputError, eyec, s.c_str()); } if (do_nolr) m_y->reform(feat_sz, ia_pos.size()); else m_y->reform(feat_sz*2, ia_pos.size()); for (int ix = 0; ix < ia_pos.size(); ++ix) { int xtx0 = ia_pos[ix]; int xtx1 = xtx0 + xpch_sz; AzIFarr ifa_ctx; int offs = 0; for (int tx = xtx0+min_dist; tx < xtx0; ++tx) { if (tx + f_pch_sz > xtx0) break; set_ifeat(m_feat, top_num_each, (tx+f_padding)/f_pch_step, offs, &ifa_ctx, fi); } if (!do_nolr) offs = feat_sz; for (int tx = xtx1; tx < xtx1+max_dist; ++tx) { if (tx + f_pch_sz > xtx1+max_dist) break; set_ifeat(m_feat, top_num_each, (tx+f_padding)/f_pch_step, offs, &ifa_ctx, fi); } ifa_ctx.squeeze_Max(); if (top_num_total > 0 && ifa_ctx.size() > top_num_total) { ifa_ctx.sort_Float(false); ifa_ctx.cut(top_num_total); } m_y->col_u(ix)->load(&ifa_ctx); } }
/*-------------------------------------------------------------------------*/ void AzTools_text::tokenize(AzByte *buff, int &len, const AzDic *dic, AzIntArr &ia_nn, bool do_lower, bool do_utf8dashes, /*--- output ---*/ AzDataArr<AzIntArr> &aia_tokno) { const char *eyec = "AzTools_text::tokenize(multi n)"; AzStrPool sp_tok; tokenize(buff, len, do_utf8dashes, do_lower, &sp_tok); int t_num = sp_tok.size(); aia_tokno.reset(ia_nn.size()); for (int ix = 0; ix < ia_nn.size(); ++ix) { identify_tokens(&sp_tok, ia_nn[ix], dic, aia_tokno(ix)); if (aia_tokno[ix]->size() != t_num) throw new AzException(eyec, "Conflict in #tokens"); } }
inline void rewind(AzCursor &cur) const { if (_shouldDoBackward) { cur.set(ia_index.size()); } else { cur.set(0); } }
virtual void format(AzBytArr &s, bool do_reset=false) const { if (do_reset) s.reset(); // s.c("("); int ix; for (ix = 0; ix < ia_sz.size(); ++ix) { if (ix > 0) s.c(" x "); s.cn(ia_sz.get(ix)); } // s.c(")"); }
/*-------------------------------------------------------------------------*/ void AzPrepText2::gen_Y_ngram_bow(const AzIntArr &ia_nn, const AzDataArr<AzIntArr> &aia_tokno, int dic_sz, const AzIntArr &ia_pos, int xpch_sz, /* patch size used to generate X */ int min_dist, int max_dist, bool do_nolr, AzSmat *m_y) const { const char *eyec = "AzPrepText2::gen_Y_ngram_bow"; int t_num = aia_tokno[0]->size(); if (do_nolr) m_y->reform(dic_sz, ia_pos.size()); else m_y->reform(dic_sz*2, ia_pos.size()); /* *2 for left and right */ for (int ix = 0; ix < ia_pos.size(); ++ix) { int xtx0 = ia_pos.get(ix); int xtx1 = xtx0 + xpch_sz; AzIntArr ia_ctx; int base = xtx0+min_dist; for (int nx = 0; nx < aia_tokno.size(); ++nx) { const AzIntArr *ia_tokno = aia_tokno[nx]; int nn = ia_nn[nx]; for (int tx = MAX(0,base); tx <= MIN(t_num,xtx0)-nn; ++tx) { int tokno = ia_tokno->get(tx); if (tokno >= 0) ia_ctx.put(tokno); } } base = xtx1; for (int nx = 0; nx < aia_tokno.size(); ++nx) { const AzIntArr *ia_tokno = aia_tokno[nx]; int nn = ia_nn[nx]; for (int tx = MAX(0,base); tx <= MIN(t_num,xtx1+max_dist)-nn; ++tx) { int tokno = ia_tokno->get(tx); if (tokno >= 0) { if (do_nolr) ia_ctx.put(tokno); else ia_ctx.put(dic_sz+tokno); } } } ia_ctx.unique(); m_y->col_u(ix)->load(&ia_ctx, 1); } }
virtual int sz(int dx) const { AzX::throw_if((dx < 0 || dx >= ia_sz.size()), "AzxD::sz", "dim is out of range"); return ia_sz.get(dx); }
virtual int get_dim() const { return ia_sz.size(); }
virtual bool is_valid() const { if (ia_sz.size() <= 0) return false; int ix; for (ix = 0; ix < ia_sz.size(); ++ix) if (ia_sz.get(ix) <= 0) return false; return true; }
virtual int top_lay_ind() const { return ia_layer_order.get(ia_layer_order.size()-1); }
/*------------------------------------------------------------*/ void AzpCNet3_multi::insert_connectors(AzIntArr &ia_order, /* inout */ AzDataArr<AzIntArr> &aia_below, /* inout */ AzDataArr<AzIntArr> &aia_above, /* inout */ AzDataArr<AzpLayerConn> &conn) const { const char *eyec = "AzpCNet3_multi::insert_connectors"; int layer_num = ia_order.size(); /*--- count connectors to be inserted ---*/ int conn_num = 0; for (int lx = 0; lx < layer_num; ++lx) { if (aia_below[lx]->size() > 1) ++conn_num; if (aia_above[lx]->size() > 1) ++conn_num; } /*--- copy the current edges ---*/ AzDataArr<AzIntArr> aia_b(layer_num + conn_num); AzDataArr<AzIntArr> aia_a(layer_num + conn_num); conn.reset(layer_num + conn_num); for (int lx = 0; lx < layer_num; ++lx) { aia_b(lx)->reset(aia_below[lx]); aia_a(lx)->reset(aia_above[lx]); } /*--- insert connection where multiple input/output ---*/ AzIntArr ia_o; int cx = layer_num; for (int ix = 0; ix < ia_order.size(); ++ix) { int lx = ia_order.get(ix); if (aia_b[lx]->size() > 1) { /* multiple inputs */ aia_b(cx)->reset(aia_b[lx]); aia_a(cx)->put(lx); for (int ix = 0; ix < aia_b[cx]->size(); ++ix) { int below = aia_b[cx]->get(ix); int count = aia_a(below)->replace(lx, cx); AzX::throw_if((count != 1), eyec, "something is wrong"); } aia_b(lx)->reset(); aia_b(lx)->put(cx); ia_o.put(cx); ++cx; } ia_o.put(lx); if (aia_above[lx]->size() > 1) { aia_b(cx)->put(lx); aia_a(cx)->reset(aia_above[lx]); for (int ix = 0; ix < aia_a[cx]->size(); ++ix) { int above = aia_a[cx]->get(ix); int count = aia_b(above)->replace(lx, cx); AzX::throw_if((count != 1), eyec, "something is wrong-2"); } aia_a(lx)->reset(); aia_a(lx)->put(cx); ia_o.put(cx); ++cx; } } /*--- output ---*/ aia_below.reset(&aia_b); aia_above.reset(&aia_a); ia_order.reset(&ia_o); }