コード例 #1
0
 void show_below_above(const AzIntArr &ia_below, const AzIntArr &ia_above, AzBytArr &s) const {
   s << "("; 
   for (int ix = 0; ix < ia_below.size(); ++ix) {
     if (ix > 0) s << ","; 
     s << ia_below.get(ix); 
   }
   s << ") -> ("; 
   for (int ix = 0; ix < ia_above.size(); ++ix) {
     if (ix > 0) s << ","; 
     s << ia_above.get(ix); 
   }    
   s << ")"; 
 }
コード例 #2
0
ファイル: AzPrepText2.cpp プロジェクト: DeercoderCourse/NLP
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_Y(const AzIntArr &ia_tokno, int dic_sz, 
                        const AzIntArr &ia_pos, 
                        int xpch_sz, /* patch size used to generate X */
                        int min_dist, int max_dist, 
                        AzSmat *m_y) const
{
  AzX::throw_if_null(m_y, "AzPrepText2::gen_Y", "m_y"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 

  m_y->reform(dic_sz*2, ia_pos.size()); /* *2 for left and right */
  for (int ix = 0; ix < ia_pos.size(); ++ix) {
    int xtx0 = ia_pos.get(ix); 
    int xtx1 = xtx0 + xpch_sz; 
        
    AzIntArr ia_ctx0, ia_ctx1;      
    for (int tx = MAX(0,xtx0+min_dist); tx < MIN(t_num,xtx0); ++tx) if (tokno[tx] >= 0) ia_ctx0.put(tokno[tx]); 
    ia_ctx0.unique(); 

    for (int tx = MAX(0,xtx1); tx < MIN(t_num,xtx1+max_dist); ++tx) if (tokno[tx] >= 0) ia_ctx1.put(tokno[tx]); 
    ia_ctx1.unique(); 
    ia_ctx1.add(dic_sz); 
    
    AzIntArr ia_ctx; 
    ia_ctx.concat(&ia_ctx0); 
    ia_ctx.concat(&ia_ctx1); 
    if (ia_ctx.size() > 0) {
      ia_ctx.unique(); 
      m_y->col_u(ix)->load(&ia_ctx, 1); 
    }
  }
}
コード例 #3
0
ファイル: AzPrepText2.cpp プロジェクト: DeercoderCourse/NLP
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_Y_ifeat(int top_num_each, int top_num_total, const AzSmat *m_feat, 
                              const AzIntArr &ia_tokno, const AzIntArr &ia_pos, 
                              int xpch_sz, int min_dist, int max_dist, 
                              bool do_nolr, 
                              int f_pch_sz, int f_pch_step, int f_padding, 
                              AzSmat *m_y, 
                              feat_info fi[2]) const
{
  const char *eyec = "AzPrepText2::gen_neigh_topfeat"; 
  AzX::throw_if_null(m_feat, eyec, "m_feat"); 
  AzX::throw_if_null(m_y, eyec, "m_y"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 

  int feat_sz = m_feat->rowNum(); 
  int f_pch_num = DIVUP(t_num+f_padding*2-f_pch_sz, f_pch_step) + 1; 
  if (m_feat->colNum() != f_pch_num) {
    AzBytArr s("#patch mismatch: Expcected: "); s << f_pch_num << " Actual: " << m_feat->colNum(); 
    AzX::throw_if(true, AzInputError, eyec, s.c_str()); 
  }
  
  if (do_nolr) m_y->reform(feat_sz,    ia_pos.size()); 
  else         m_y->reform(feat_sz*2,  ia_pos.size()); 
  for (int ix = 0; ix < ia_pos.size(); ++ix) {
    int xtx0 = ia_pos[ix]; 
    int xtx1 = xtx0 + xpch_sz; 
     
    AzIFarr ifa_ctx; 
    int offs = 0;     
    for (int tx = xtx0+min_dist; tx < xtx0; ++tx) {
      if (tx + f_pch_sz > xtx0) break; 
      set_ifeat(m_feat, top_num_each, (tx+f_padding)/f_pch_step, offs, &ifa_ctx, fi); 
    }
    
    if (!do_nolr) offs = feat_sz; 
    for (int tx = xtx1; tx < xtx1+max_dist; ++tx) {
      if (tx + f_pch_sz > xtx1+max_dist) break; 
      set_ifeat(m_feat, top_num_each, (tx+f_padding)/f_pch_step, offs, &ifa_ctx, fi); 
    }
    ifa_ctx.squeeze_Max(); 
    if (top_num_total > 0 && ifa_ctx.size() > top_num_total) {
      ifa_ctx.sort_Float(false); 
      ifa_ctx.cut(top_num_total);       
    }
    m_y->col_u(ix)->load(&ifa_ctx); 
  }
}                                   
コード例 #4
0
ファイル: AzTools_text.cpp プロジェクト: DeercoderCourse/NLP
/*-------------------------------------------------------------------------*/
void AzTools_text::tokenize(AzByte *buff, int &len, 
                       const AzDic *dic,
                       AzIntArr &ia_nn, 
                       bool do_lower, bool do_utf8dashes, 
                       /*---  output  ---*/                       
                       AzDataArr<AzIntArr> &aia_tokno)
{
  const char *eyec = "AzTools_text::tokenize(multi n)"; 
  AzStrPool sp_tok; 
  tokenize(buff, len, do_utf8dashes, do_lower, &sp_tok);   
  int t_num = sp_tok.size(); 
  aia_tokno.reset(ia_nn.size()); 
  for (int ix = 0; ix < ia_nn.size(); ++ix) {
    identify_tokens(&sp_tok, ia_nn[ix], dic, aia_tokno(ix)); 
    if (aia_tokno[ix]->size() != t_num) throw new AzException(eyec, "Conflict in #tokens"); 
  }
} 
コード例 #5
0
 inline void rewind(AzCursor &cur) const {
   if (_shouldDoBackward) {
     cur.set(ia_index.size()); 
   }
   else {
     cur.set(0); 
   }
 }
コード例 #6
0
ファイル: AzxD.hpp プロジェクト: DeercoderCourse/NLP
  virtual void format(AzBytArr &s, bool do_reset=false) const {
    if (do_reset) s.reset(); 
//    s.c("("); 
    int ix; 
    for (ix = 0; ix < ia_sz.size(); ++ix) {
      if (ix > 0) s.c(" x "); 
      s.cn(ia_sz.get(ix)); 
    }
//    s.c(")"); 
  }  
コード例 #7
0
ファイル: AzPrepText2.cpp プロジェクト: DeercoderCourse/NLP
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_Y_ngram_bow(const AzIntArr &ia_nn, 
                             const AzDataArr<AzIntArr> &aia_tokno, int dic_sz, 
                             const AzIntArr &ia_pos, 
                             int xpch_sz, /* patch size used to generate X */
                             int min_dist, int max_dist, 
                             bool do_nolr, 
                             AzSmat *m_y) const
{
  const char *eyec = "AzPrepText2::gen_Y_ngram_bow"; 
  int t_num = aia_tokno[0]->size(); 
  if (do_nolr) m_y->reform(dic_sz, ia_pos.size());
  else         m_y->reform(dic_sz*2, ia_pos.size()); /* *2 for left and right */
  for (int ix = 0; ix < ia_pos.size(); ++ix) {
    int xtx0 = ia_pos.get(ix); 
    int xtx1 = xtx0 + xpch_sz; 
    
    AzIntArr ia_ctx;     
    int base = xtx0+min_dist;  
    for (int nx = 0; nx < aia_tokno.size(); ++nx) {
      const AzIntArr *ia_tokno = aia_tokno[nx];      
      int nn = ia_nn[nx]; 
      for (int tx = MAX(0,base); tx <= MIN(t_num,xtx0)-nn; ++tx) {         
        int tokno = ia_tokno->get(tx); 
        if (tokno >= 0) ia_ctx.put(tokno); 
      }
    }
   
    base = xtx1; 
    for (int nx = 0; nx < aia_tokno.size(); ++nx) {  
      const AzIntArr *ia_tokno = aia_tokno[nx];     
      int nn = ia_nn[nx];   
      for (int tx = MAX(0,base); tx <= MIN(t_num,xtx1+max_dist)-nn; ++tx) {           
        int tokno = ia_tokno->get(tx); 
        if (tokno >= 0) {
          if (do_nolr) ia_ctx.put(tokno); 
          else         ia_ctx.put(dic_sz+tokno); 
        }
      }
    }
    ia_ctx.unique();
    m_y->col_u(ix)->load(&ia_ctx, 1); 
  }
}
コード例 #8
0
ファイル: AzxD.hpp プロジェクト: DeercoderCourse/NLP
 virtual int sz(int dx) const {
   AzX::throw_if((dx < 0 || dx >= ia_sz.size()), "AzxD::sz", "dim is out of range"); 
   return ia_sz.get(dx); 
 }
コード例 #9
0
ファイル: AzxD.hpp プロジェクト: DeercoderCourse/NLP
 virtual int get_dim() const { return ia_sz.size(); }
コード例 #10
0
ファイル: AzxD.hpp プロジェクト: DeercoderCourse/NLP
 virtual bool is_valid() const {
   if (ia_sz.size() <= 0) return false; 
   int ix; 
   for (ix = 0; ix < ia_sz.size(); ++ix) if (ia_sz.get(ix) <= 0) return false; 
   return true; 
 }
コード例 #11
0
 virtual int top_lay_ind() const {
   return ia_layer_order.get(ia_layer_order.size()-1); 
 }
コード例 #12
0
/*------------------------------------------------------------*/ 
void AzpCNet3_multi::insert_connectors(AzIntArr &ia_order,  /* inout */
                               AzDataArr<AzIntArr> &aia_below, /* inout */
                               AzDataArr<AzIntArr> &aia_above, /* inout */
                               AzDataArr<AzpLayerConn> &conn)
const                               
{
  const char *eyec = "AzpCNet3_multi::insert_connectors"; 

  int layer_num = ia_order.size();
  /*---  count connectors to be inserted  ---*/
  int conn_num = 0; 
  for (int lx = 0; lx < layer_num; ++lx) {
    if (aia_below[lx]->size() > 1) ++conn_num; 
    if (aia_above[lx]->size() > 1) ++conn_num; 
  }
  
  /*---  copy the current edges  ---*/
  AzDataArr<AzIntArr> aia_b(layer_num + conn_num); 
  AzDataArr<AzIntArr> aia_a(layer_num + conn_num); 
  conn.reset(layer_num + conn_num); 
  for (int lx = 0; lx < layer_num; ++lx) {
    aia_b(lx)->reset(aia_below[lx]); 
    aia_a(lx)->reset(aia_above[lx]); 
  }

  /*---  insert connection where multiple input/output  ---*/
  AzIntArr ia_o; 
  int cx = layer_num; 
  for (int ix = 0; ix < ia_order.size(); ++ix) {
    int lx = ia_order.get(ix); 
    if (aia_b[lx]->size() > 1) { /* multiple inputs */
      aia_b(cx)->reset(aia_b[lx]); 
      aia_a(cx)->put(lx); 
      for (int ix = 0; ix < aia_b[cx]->size(); ++ix) {
        int below = aia_b[cx]->get(ix);        
        int count = aia_a(below)->replace(lx, cx); 
        AzX::throw_if((count != 1), eyec, "something is wrong"); 
      }
      aia_b(lx)->reset(); 
      aia_b(lx)->put(cx); 
      ia_o.put(cx);       
      ++cx; 
    }

    ia_o.put(lx); 
    
    if (aia_above[lx]->size() > 1) {
      aia_b(cx)->put(lx); 
      aia_a(cx)->reset(aia_above[lx]); 
      for (int ix = 0; ix < aia_a[cx]->size(); ++ix) {
        int above = aia_a[cx]->get(ix); 
        int count = aia_b(above)->replace(lx, cx); 
        AzX::throw_if((count != 1), eyec, "something is wrong-2"); 
      }
      aia_a(lx)->reset(); 
      aia_a(lx)->put(cx); 
      ia_o.put(cx); 
      ++cx; 
    }
  }
  
  /*---  output  ---*/
  aia_below.reset(&aia_b); 
  aia_above.reset(&aia_a); 
  ia_order.reset(&ia_o); 
}