コード例 #1
0
ファイル: AzPrepText2.cpp プロジェクト: DeercoderCourse/NLP
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_Y(const AzIntArr &ia_tokno, int dic_sz, 
                        const AzIntArr &ia_pos, 
                        int xpch_sz, /* patch size used to generate X */
                        int min_dist, int max_dist, 
                        AzSmat *m_y) const
{
  AzX::throw_if_null(m_y, "AzPrepText2::gen_Y", "m_y"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 

  m_y->reform(dic_sz*2, ia_pos.size()); /* *2 for left and right */
  for (int ix = 0; ix < ia_pos.size(); ++ix) {
    int xtx0 = ia_pos.get(ix); 
    int xtx1 = xtx0 + xpch_sz; 
        
    AzIntArr ia_ctx0, ia_ctx1;      
    for (int tx = MAX(0,xtx0+min_dist); tx < MIN(t_num,xtx0); ++tx) if (tokno[tx] >= 0) ia_ctx0.put(tokno[tx]); 
    ia_ctx0.unique(); 

    for (int tx = MAX(0,xtx1); tx < MIN(t_num,xtx1+max_dist); ++tx) if (tokno[tx] >= 0) ia_ctx1.put(tokno[tx]); 
    ia_ctx1.unique(); 
    ia_ctx1.add(dic_sz); 
    
    AzIntArr ia_ctx; 
    ia_ctx.concat(&ia_ctx0); 
    ia_ctx.concat(&ia_ctx1); 
    if (ia_ctx.size() > 0) {
      ia_ctx.unique(); 
      m_y->col_u(ix)->load(&ia_ctx, 1); 
    }
  }
}
コード例 #2
0
ファイル: AzPrepText2.cpp プロジェクト: DeercoderCourse/NLP
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_X_bow(const AzIntArr &ia_tokno, int dic_sz, 
                       int pch_sz, int pch_step, int padding,  
                       bool do_skip_stopunk, 
                       /*---  output  ---*/
                       AzSmat *m_feat, 
                       AzIntArr *ia_pos) const /* patch position: may be NULL */
{
  const char *eyec = "AzPrepText2::gen_X_bow"; 
  AzX::throw_if_null(m_feat, eyec, "m_feat"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 
    
  int pch_num = DIVUP(t_num+padding*2-pch_sz, pch_step) + 1; 
  m_feat->reform(dic_sz, pch_num);   
  if (ia_pos != NULL) ia_pos->reset(); 
  
  int col = 0; 
  int tx0 = -padding; 
  for (int pch_no = 0; pch_no < pch_num; ++pch_no) {
    int tx1 = tx0 + pch_sz; 
    
    AzIntArr ia_rows; 
    for (int tx = MAX(0, tx0); tx < MIN(t_num, tx1); ++tx) {
      if (tokno[tx] >= 0) ia_rows.put(tokno[tx]); 
    }
    if (!do_skip_stopunk || ia_rows.size() > 0) {
      ia_rows.unique();  /* sorting too */
      m_feat->col_u(col)->load(&ia_rows, 1); 
      if (ia_pos != NULL) ia_pos->put(tx0); 
      ++col; 
    }

    if (tx1 >= t_num+padding) break; 
    
    int dist = 1; 
    if (do_skip_stopunk) {
      /*---  to avoid repeating the same bow  ---*/
      int tx; 
      for (tx = tx0; tx < t_num; ++tx) if (tx >= 0 && tokno[tx] >= 0) break;    
      int dist0 = tx-tx0+1; /* to lose a word, we have to slide a window this much */

      tx = tx1; 
      for (tx = tx1; tx < t_num; ++tx) if (tx >= 0 && tokno[tx] >= 0) break; 
      int dist1 = tx-tx1+1; /* to get a new word, we have to slide a window this much */
      dist = MIN(dist0, dist1); 
    }
    tx0 += MAX(dist, pch_step); 
  }
  m_feat->resize(col);     
} 
コード例 #3
0
ファイル: AzPrepText2.cpp プロジェクト: DeercoderCourse/NLP
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_Y_ifeat(int top_num_each, int top_num_total, const AzSmat *m_feat, 
                              const AzIntArr &ia_tokno, const AzIntArr &ia_pos, 
                              int xpch_sz, int min_dist, int max_dist, 
                              bool do_nolr, 
                              int f_pch_sz, int f_pch_step, int f_padding, 
                              AzSmat *m_y, 
                              feat_info fi[2]) const
{
  const char *eyec = "AzPrepText2::gen_neigh_topfeat"; 
  AzX::throw_if_null(m_feat, eyec, "m_feat"); 
  AzX::throw_if_null(m_y, eyec, "m_y"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 

  int feat_sz = m_feat->rowNum(); 
  int f_pch_num = DIVUP(t_num+f_padding*2-f_pch_sz, f_pch_step) + 1; 
  if (m_feat->colNum() != f_pch_num) {
    AzBytArr s("#patch mismatch: Expcected: "); s << f_pch_num << " Actual: " << m_feat->colNum(); 
    AzX::throw_if(true, AzInputError, eyec, s.c_str()); 
  }
  
  if (do_nolr) m_y->reform(feat_sz,    ia_pos.size()); 
  else         m_y->reform(feat_sz*2,  ia_pos.size()); 
  for (int ix = 0; ix < ia_pos.size(); ++ix) {
    int xtx0 = ia_pos[ix]; 
    int xtx1 = xtx0 + xpch_sz; 
     
    AzIFarr ifa_ctx; 
    int offs = 0;     
    for (int tx = xtx0+min_dist; tx < xtx0; ++tx) {
      if (tx + f_pch_sz > xtx0) break; 
      set_ifeat(m_feat, top_num_each, (tx+f_padding)/f_pch_step, offs, &ifa_ctx, fi); 
    }
    
    if (!do_nolr) offs = feat_sz; 
    for (int tx = xtx1; tx < xtx1+max_dist; ++tx) {
      if (tx + f_pch_sz > xtx1+max_dist) break; 
      set_ifeat(m_feat, top_num_each, (tx+f_padding)/f_pch_step, offs, &ifa_ctx, fi); 
    }
    ifa_ctx.squeeze_Max(); 
    if (top_num_total > 0 && ifa_ctx.size() > top_num_total) {
      ifa_ctx.sort_Float(false); 
      ifa_ctx.cut(top_num_total);       
    }
    m_y->col_u(ix)->load(&ifa_ctx); 
  }
}                                   
コード例 #4
0
ファイル: AzPrepText2.cpp プロジェクト: DeercoderCourse/NLP
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_X_seq(const AzIntArr &ia_tokno, int dic_sz, 
                       int pch_sz, int pch_step, int padding,  
                       bool do_allow_zero, bool do_skip_stopunk, 
                       /*---  output  ---*/
                       AzSmat *m_feat, 
                       AzIntArr *ia_pos) const /* patch position: may be NULL */
{
  const char *eyec = "AzPrepText2::gen_X_seq"; 
  AzX::throw_if_null(m_feat, eyec, "m_feat"); 
  AzX::no_support(do_skip_stopunk, eyec, "variable strides with Seq"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 
    
  int pch_num = DIVUP(t_num+padding*2-pch_sz, pch_step) + 1; 
  m_feat->reform(dic_sz*pch_sz, pch_num);   
  if (ia_pos != NULL) ia_pos->reset(); 

  int col = 0; 
  int tx0 = -padding; 
  for (int pch_no = 0; pch_no < pch_num; ++pch_no) {
    int tx1 = tx0 + pch_sz; 
    
    AzSmat m; 
    for (int tx = tx0; tx < tx1; ++tx) {
      AzSmat m0(dic_sz, 1); 
      if (tx >= 0 && tx < t_num && tokno[tx] >= 0) {
        AzIntArr ia_row; ia_row.put(tokno[tx]); 
        m0.col_u(0)->load(&ia_row, 1); 
      }
      if (tx == tx0) m.set(&m0); 
      else           m.rbind(&m0); 
    }
    if (do_allow_zero || !m.isZero()) {
      m_feat->col_u(col)->set(m.col(0)); 
      if (ia_pos != NULL) ia_pos->put(tx0); 
      ++col; 
    }

    if (tx1 >= t_num+padding) break; 
    tx0 += pch_step; 
  }
  m_feat->resize(col);   
}