Example #1
0
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_Y(const AzIntArr &ia_tokno, int dic_sz, 
                        const AzIntArr &ia_pos, 
                        int xpch_sz, /* patch size used to generate X */
                        int min_dist, int max_dist, 
                        AzSmat *m_y) const
{
  AzX::throw_if_null(m_y, "AzPrepText2::gen_Y", "m_y"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 

  m_y->reform(dic_sz*2, ia_pos.size()); /* *2 for left and right */
  for (int ix = 0; ix < ia_pos.size(); ++ix) {
    int xtx0 = ia_pos.get(ix); 
    int xtx1 = xtx0 + xpch_sz; 
        
    AzIntArr ia_ctx0, ia_ctx1;      
    for (int tx = MAX(0,xtx0+min_dist); tx < MIN(t_num,xtx0); ++tx) if (tokno[tx] >= 0) ia_ctx0.put(tokno[tx]); 
    ia_ctx0.unique(); 

    for (int tx = MAX(0,xtx1); tx < MIN(t_num,xtx1+max_dist); ++tx) if (tokno[tx] >= 0) ia_ctx1.put(tokno[tx]); 
    ia_ctx1.unique(); 
    ia_ctx1.add(dic_sz); 
    
    AzIntArr ia_ctx; 
    ia_ctx.concat(&ia_ctx0); 
    ia_ctx.concat(&ia_ctx1); 
    if (ia_ctx.size() > 0) {
      ia_ctx.unique(); 
      m_y->col_u(ix)->load(&ia_ctx, 1); 
    }
  }
}
 /*--------------------------------------*/
 virtual void writeText(const char *fn, int digits, 
                        bool doSparse=false,
                        bool doAppend=false) const {
   AzIntArr ia; 
   ia.range(0, colNum()); 
   writeText(fn, &ia, digits, doSparse, doAppend); 
 }
 AzSortedFeat_Sparse & operator =(const AzSortedFeat_Sparse &inp) { /* never tested */
   if (this == &inp) return *this; 
   ia_zero.reset(&inp.ia_zero); 
   ia_index.reset(&inp.ia_index); 
   v_value.set(&inp.v_value); 
   _shouldDoBackward = inp._shouldDoBackward; 
   data_num = inp.data_num; 
   return *this; 
 }
Example #4
0
  virtual void format(AzBytArr &s, bool do_reset=false) const {
    if (do_reset) s.reset(); 
//    s.c("("); 
    int ix; 
    for (ix = 0; ix < ia_sz.size(); ++ix) {
      if (ix > 0) s.c(" x "); 
      s.cn(ia_sz.get(ix)); 
    }
//    s.c(")"); 
  }  
Example #5
0
 /*------------------------------------------------------------*/
 inline void show(const AzOut &out) const {
   AzPrint::writeln(out, "AzpPoolingDflt::show"); 
   int sz = pia2_out2inp.size(); 
   int ox;
   for (ox = 0; ox < sz; ++ox) {
     AzBytArr s("["); s.cn(ox); s.c("] "); 
     AzIntArr ia; 
     pia2_out2inp.get(ox, &ia); 
     ia.print(out, s.c_str()); 
   }
 }
Example #6
0
 void show_below_above(const AzIntArr &ia_below, const AzIntArr &ia_above, AzBytArr &s) const {
   s << "("; 
   for (int ix = 0; ix < ia_below.size(); ++ix) {
     if (ix > 0) s << ","; 
     s << ia_below.get(ix); 
   }
   s << ") -> ("; 
   for (int ix = 0; ix < ia_above.size(); ++ix) {
     if (ix > 0) s << ","; 
     s << ia_above.get(ix); 
   }    
   s << ")"; 
 }
 inline int *base_index_for_update(int *len) {
   if (isOriginal) {
     throw new AzException("AzSortedFeat_Dense::base_index_for_update", 
                           "Not allowed"); 
   }
   return ia_index.point_u(len); 
 }
 void reset() {
   a_dense.free(&arrd); 
   a_sparse.free(&arrs); 
   f_num = 0; 
   ia_isActive.reset(); 
   active_num = 0;   
 }
Example #9
0
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_Y_ifeat(int top_num_each, int top_num_total, const AzSmat *m_feat, 
                              const AzIntArr &ia_tokno, const AzIntArr &ia_pos, 
                              int xpch_sz, int min_dist, int max_dist, 
                              bool do_nolr, 
                              int f_pch_sz, int f_pch_step, int f_padding, 
                              AzSmat *m_y, 
                              feat_info fi[2]) const
{
  const char *eyec = "AzPrepText2::gen_neigh_topfeat"; 
  AzX::throw_if_null(m_feat, eyec, "m_feat"); 
  AzX::throw_if_null(m_y, eyec, "m_y"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 

  int feat_sz = m_feat->rowNum(); 
  int f_pch_num = DIVUP(t_num+f_padding*2-f_pch_sz, f_pch_step) + 1; 
  if (m_feat->colNum() != f_pch_num) {
    AzBytArr s("#patch mismatch: Expcected: "); s << f_pch_num << " Actual: " << m_feat->colNum(); 
    AzX::throw_if(true, AzInputError, eyec, s.c_str()); 
  }
  
  if (do_nolr) m_y->reform(feat_sz,    ia_pos.size()); 
  else         m_y->reform(feat_sz*2,  ia_pos.size()); 
  for (int ix = 0; ix < ia_pos.size(); ++ix) {
    int xtx0 = ia_pos[ix]; 
    int xtx1 = xtx0 + xpch_sz; 
     
    AzIFarr ifa_ctx; 
    int offs = 0;     
    for (int tx = xtx0+min_dist; tx < xtx0; ++tx) {
      if (tx + f_pch_sz > xtx0) break; 
      set_ifeat(m_feat, top_num_each, (tx+f_padding)/f_pch_step, offs, &ifa_ctx, fi); 
    }
    
    if (!do_nolr) offs = feat_sz; 
    for (int tx = xtx1; tx < xtx1+max_dist; ++tx) {
      if (tx + f_pch_sz > xtx1+max_dist) break; 
      set_ifeat(m_feat, top_num_each, (tx+f_padding)/f_pch_step, offs, &ifa_ctx, fi); 
    }
    ifa_ctx.squeeze_Max(); 
    if (top_num_total > 0 && ifa_ctx.size() > top_num_total) {
      ifa_ctx.sort_Float(false); 
      ifa_ctx.cut(top_num_total);       
    }
    m_y->col_u(ix)->load(&ifa_ctx); 
  }
}                                   
Example #10
0
/*-------------------------------------------------------------------------*/
void AzTools_text::tokenize(AzByte *buff, int &len, 
                       const AzDic *dic,
                       AzIntArr &ia_nn, 
                       bool do_lower, bool do_utf8dashes, 
                       /*---  output  ---*/                       
                       AzDataArr<AzIntArr> &aia_tokno)
{
  const char *eyec = "AzTools_text::tokenize(multi n)"; 
  AzStrPool sp_tok; 
  tokenize(buff, len, do_utf8dashes, do_lower, &sp_tok);   
  int t_num = sp_tok.size(); 
  aia_tokno.reset(ia_nn.size()); 
  for (int ix = 0; ix < ia_nn.size(); ++ix) {
    identify_tokens(&sp_tok, ia_nn[ix], dic, aia_tokno(ix)); 
    if (aia_tokno[ix]->size() != t_num) throw new AzException(eyec, "Conflict in #tokens"); 
  }
} 
 inline void rewind(AzCursor &cur) const {
   if (_shouldDoBackward) {
     cur.set(ia_index.size()); 
   }
   else {
     cur.set(0); 
   }
 }
Example #12
0
 virtual void read(AzFile *file) {
   AzpCNet3::read(file); 
   int my_version = AzTools::read_header(file, reserved_len); 
   iia_layer_conn.read(file); 
   ia_layer_order.read(file); 
   layer_info.read(file); 
   conn.read(file); 
   sp_conn.read(file); 
 }
Example #13
0
 virtual void write(AzFile *file) {
   AzpCNet3::write(file); 
   AzTools::write_header(file, version, reserved_len); 
   iia_layer_conn.write(file); 
   ia_layer_order.writec(file); /* write2: const */
   layer_info.write(file); 
   conn.write(file); 
   sp_conn.writec(file); 
 }
Example #14
0
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_Y_ngram_bow(const AzIntArr &ia_nn, 
                             const AzDataArr<AzIntArr> &aia_tokno, int dic_sz, 
                             const AzIntArr &ia_pos, 
                             int xpch_sz, /* patch size used to generate X */
                             int min_dist, int max_dist, 
                             bool do_nolr, 
                             AzSmat *m_y) const
{
  const char *eyec = "AzPrepText2::gen_Y_ngram_bow"; 
  int t_num = aia_tokno[0]->size(); 
  if (do_nolr) m_y->reform(dic_sz, ia_pos.size());
  else         m_y->reform(dic_sz*2, ia_pos.size()); /* *2 for left and right */
  for (int ix = 0; ix < ia_pos.size(); ++ix) {
    int xtx0 = ia_pos.get(ix); 
    int xtx1 = xtx0 + xpch_sz; 
    
    AzIntArr ia_ctx;     
    int base = xtx0+min_dist;  
    for (int nx = 0; nx < aia_tokno.size(); ++nx) {
      const AzIntArr *ia_tokno = aia_tokno[nx];      
      int nn = ia_nn[nx]; 
      for (int tx = MAX(0,base); tx <= MIN(t_num,xtx0)-nn; ++tx) {         
        int tokno = ia_tokno->get(tx); 
        if (tokno >= 0) ia_ctx.put(tokno); 
      }
    }
   
    base = xtx1; 
    for (int nx = 0; nx < aia_tokno.size(); ++nx) {  
      const AzIntArr *ia_tokno = aia_tokno[nx];     
      int nn = ia_nn[nx];   
      for (int tx = MAX(0,base); tx <= MIN(t_num,xtx1+max_dist)-nn; ++tx) {           
        int tokno = ia_tokno->get(tx); 
        if (tokno >= 0) {
          if (do_nolr) ia_ctx.put(tokno); 
          else         ia_ctx.put(dic_sz+tokno); 
        }
      }
    }
    ia_ctx.unique();
    m_y->col_u(ix)->load(&ia_ctx, 1); 
  }
}
Example #15
0
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_X_bow(const AzIntArr &ia_tokno, int dic_sz, 
                       int pch_sz, int pch_step, int padding,  
                       bool do_skip_stopunk, 
                       /*---  output  ---*/
                       AzSmat *m_feat, 
                       AzIntArr *ia_pos) const /* patch position: may be NULL */
{
  const char *eyec = "AzPrepText2::gen_X_bow"; 
  AzX::throw_if_null(m_feat, eyec, "m_feat"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 
    
  int pch_num = DIVUP(t_num+padding*2-pch_sz, pch_step) + 1; 
  m_feat->reform(dic_sz, pch_num);   
  if (ia_pos != NULL) ia_pos->reset(); 
  
  int col = 0; 
  int tx0 = -padding; 
  for (int pch_no = 0; pch_no < pch_num; ++pch_no) {
    int tx1 = tx0 + pch_sz; 
    
    AzIntArr ia_rows; 
    for (int tx = MAX(0, tx0); tx < MIN(t_num, tx1); ++tx) {
      if (tokno[tx] >= 0) ia_rows.put(tokno[tx]); 
    }
    if (!do_skip_stopunk || ia_rows.size() > 0) {
      ia_rows.unique();  /* sorting too */
      m_feat->col_u(col)->load(&ia_rows, 1); 
      if (ia_pos != NULL) ia_pos->put(tx0); 
      ++col; 
    }

    if (tx1 >= t_num+padding) break; 
    
    int dist = 1; 
    if (do_skip_stopunk) {
      /*---  to avoid repeating the same bow  ---*/
      int tx; 
      for (tx = tx0; tx < t_num; ++tx) if (tx >= 0 && tokno[tx] >= 0) break;    
      int dist0 = tx-tx0+1; /* to lose a word, we have to slide a window this much */

      tx = tx1; 
      for (tx = tx1; tx < t_num; ++tx) if (tx >= 0 && tokno[tx] >= 0) break; 
      int dist1 = tx-tx1+1; /* to get a new word, we have to slide a window this much */
      dist = MIN(dist0, dist1); 
    }
    tx0 += MAX(dist, pch_step); 
  }
  m_feat->resize(col);     
} 
Example #16
0
/*------------------------------------------------------------------*/
double AzTaskTools::analyzeLoss(AzLossType loss_type, 
                            const AzDvect *v_p, 
                            const AzDvect *v_y, 
                            const AzIntArr *inp_ia_dx, 
                            double p_coeff)
{
  if (loss_type == AzLoss_None) {
    return -1; 
  }
  const double *p = v_p->point(); 
  const double *y = v_y->point(); 

  AzIntArr iq; 
  const AzIntArr *ia_dx = inp_ia_dx; 
  if (ia_dx == NULL) {
    iq.range(0, v_p->rowNum()); 
    ia_dx = &iq;     
  }

  double total_loss = 0; 
  int num; 
  const int *dxs = ia_dx->point(&num); 
  int ix; 
  for (ix = 0; ix < num; ++ix) {
    int dx = dxs[ix]; 

    double loss = AzLoss::getLoss(loss_type, p[dx]*p_coeff, y[dx]); 
    total_loss += loss; 
  }

  double avg_loss = 0; 
  if (num > 0) {
    avg_loss = total_loss / (double)num; 
  }
  return avg_loss; 
}
Example #17
0
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_X_seq(const AzIntArr &ia_tokno, int dic_sz, 
                       int pch_sz, int pch_step, int padding,  
                       bool do_allow_zero, bool do_skip_stopunk, 
                       /*---  output  ---*/
                       AzSmat *m_feat, 
                       AzIntArr *ia_pos) const /* patch position: may be NULL */
{
  const char *eyec = "AzPrepText2::gen_X_seq"; 
  AzX::throw_if_null(m_feat, eyec, "m_feat"); 
  AzX::no_support(do_skip_stopunk, eyec, "variable strides with Seq"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 
    
  int pch_num = DIVUP(t_num+padding*2-pch_sz, pch_step) + 1; 
  m_feat->reform(dic_sz*pch_sz, pch_num);   
  if (ia_pos != NULL) ia_pos->reset(); 

  int col = 0; 
  int tx0 = -padding; 
  for (int pch_no = 0; pch_no < pch_num; ++pch_no) {
    int tx1 = tx0 + pch_sz; 
    
    AzSmat m; 
    for (int tx = tx0; tx < tx1; ++tx) {
      AzSmat m0(dic_sz, 1); 
      if (tx >= 0 && tx < t_num && tokno[tx] >= 0) {
        AzIntArr ia_row; ia_row.put(tokno[tx]); 
        m0.col_u(0)->load(&ia_row, 1); 
      }
      if (tx == tx0) m.set(&m0); 
      else           m.rbind(&m0); 
    }
    if (do_allow_zero || !m.isZero()) {
      m_feat->col_u(col)->set(m.col(0)); 
      if (ia_pos != NULL) ia_pos->put(tx0); 
      ++col; 
    }

    if (tx1 >= t_num+padding) break; 
    tx0 += pch_step; 
  }
  m_feat->resize(col);   
}
Example #18
0
 void read(AzFile *file) {
   ia_sz.read(file); 
 }
Example #19
0
 void write(AzFile *file) {
   ia_sz.write(file); 
 }
Example #20
0
 virtual int sz(int dx) const {
   AzX::throw_if((dx < 0 || dx >= ia_sz.size()), "AzxD::sz", "dim is out of range"); 
   return ia_sz.get(dx); 
 }
Example #21
0
 virtual int get_dim() const { return ia_sz.size(); }
Example #22
0
 virtual void reset(const int *arr, int len) {
   ia_sz.reset(arr, len); 
 }
Example #23
0
 virtual void reset(const AzIntArr *ia) {
   ia_sz.reset(ia); 
 }
Example #24
0
 virtual void reset(const AzxD *inp) {
   ia_sz.reset(&inp->ia_sz); 
 }
Example #25
0
 virtual bool isSame(const AzxD *inp) const {
   if (ia_sz.compare(&inp->ia_sz) != 0) return false; 
   return true; 
 }
Example #26
0
 virtual void reset(int dim=0) { /* generate a unit size region */
   ia_sz.reset(); 
   if (dim == 0) return; 
   ia_sz.reset(dim, 1); 
 }  
Example #27
0
 virtual bool is_valid() const {
   if (ia_sz.size() <= 0) return false; 
   int ix; 
   for (ix = 0; ix < ia_sz.size(); ++ix) if (ia_sz.get(ix) <= 0) return false; 
   return true; 
 }
Example #28
0
 AzxD(const AzxD *inp) {
   ia_sz.reset(&inp->ia_sz); 
 }
Example #29
0
 virtual int top_lay_ind() const {
   return ia_layer_order.get(ia_layer_order.size()-1); 
 }
 /*--------------------------------------*/
 virtual void writeText(const char *fn, int digits) const {
   AzIntArr ia; 
   ia.range(0, rowNum()); 
   writeText(fn, &ia, digits);  
 }