示例#1
0
 void append_const(double const_to_add) {
   int r_num = m_feat.rowNum()+1, c_num = m_feat.colNum(); 
   m_feat.resize(r_num, c_num); 
   int col; 
   for (col = 0; col < c_num; ++col) {
     AzIFarr ifa; 
     m_feat.col(col)->nonZero(&ifa); 
     ifa.put(r_num-1, const_to_add); 
     m_feat.col_u(col)->load(&ifa); 
   }
 }
/*----------------------------------------------------------------------*/
void AzSvDataS::mergeData(const AzSmat *m_x,
                         const AzSvFeatInfo *feat,
                         const char *fn_template,
                         const char *str,
                         bool doSparse,
                         int digits,
                         const char *out_x_fn,
                         const char *out_n_fn,
                         int num,
                         const char *names[])
{
  const char *eyec = "AzSvDataS::mergeData";

  int data_num = m_x->colNum();
  int f_num = m_x->rowNum();
  if (feat->featNum() != f_num) {
    throw new AzException(eyec, "Conflict btw m_x and featInfo");
  }
  AzFile n_file(out_n_fn);
  n_file.open("wb");
  int fx;
  for (fx = 0; fx < feat->featNum(); ++fx) {
    AzBytArr s; feat->desc(fx, &s); s.nl();
    s.writeText(&n_file);
  }

  AzSmat m;
  m_x->transpose(&m);
  m.resize(data_num, f_num+num);
  AzStrPool sp_names;
  for (fx = 0; fx < num; ++fx) {
    AzBytArr s_fn(fn_template);
    s_fn.replace("*", names[fx]);

    AzDvect v;
    AzSvDataS::readVector(s_fn.c_str(), &v);
    if (v.rowNum() != m.rowNum()) {
      throw new AzException(AzInputError, eyec, "conflict in #data:", s_fn.c_str());
    }
    m.col_u(f_num+fx)->set(&v);

    AzBytArr s_nm;
    if (AzTools::isSpecified(str)) s_nm.c(str);
    s_nm.c(names[fx]); s_nm.nl();
    s_nm.writeText(&n_file);
  }
  n_file.close(true);

  AzSmat m1;
  m.transpose(&m1);
  m1.writeText(out_x_fn, digits, doSparse);
}
  virtual void reset_data_for_test(const AzOut &out, 
                     const AzSmat *m_data) {
    bool doSparse = false; 
    if (m_data->rowNum()*m_data->colNum() > Az_max_test_entries) { /* large data */
      /*---  dense is faster but uses up more memory if data is sparse  ---*/
      double nz_ratio; 
      m_data->nonZeroNum(&nz_ratio); 
      if (nz_ratio < 0.6) { /* relatively sparse */
        doSparse = true; 
        AzBytArr s; s.c("Large and sparse test data (nonzero ratio=", nz_ratio); 
        s.c("); treated as sparse data.");
        AzPrint::writeln(out, s); 
      }
    }

    data_num = m_data->colNum(); 
    m_tran_dense.reset(); 
    m_tran_sparse.reset(); 
    if (doSparse) {    
      m_data->transpose(&m_tran_sparse); 
    }
    else {
      m_tran_dense.transpose_from(m_data); 
    }
    sorted_arr.reset(); 
    feat.reset(m_data->rowNum()); 
  }
  virtual void reset_data(const AzOut &out, 
                  const AzSmat *m_data, 
                  AzParam &p, 
                  bool beTight, 
                  const AzSvFeatInfo *inp_feat=NULL)
  {
    resetParam(p); 
    printParam(out); 

    /*---  count nonzero components  ---*/
    double nz_ratio; 
    m_data->nonZeroNum(&nz_ratio); 
    AzBytArr s("Training data: "); 
    s.cn(m_data->rowNum());s.c("x");s.cn(m_data->colNum()); 
    s.c(", nonzero_ratio=", nz_ratio, 4); 

    /*---  decide sparse or dense  ---*/
    AzBytArr s_dp("; managed as dense data"); 
    bool doSparse = false; 
    if (dataproc == dataproc_Auto && 
        nz_ratio < Az_nz_ratio_threshold || 
        dataproc == dataproc_Sparse) { 
      doSparse = true; 
      s_dp.reset("; managed as sparse data"); 
    }
    if (dataproc != dataproc_Auto) s_dp.concat(" as requested."); 
    else                           s_dp.concat("."); 
    AzPrint::writeln(out, "-------------"); 
    AzPrint::writeln(out, s, s_dp); 
    AzPrint::writeln(out, "-------------"); 

   /*---  pre-sort data  ---*/
    m_tran_sparse.reset(); 
    m_tran_dense.unlock(); 
    m_tran_dense.reset(); 
    data_num = m_data->colNum(); 
    if (doSparse) {
      m_data->transpose(&m_tran_sparse); 
      sorted_arr.reset_sparse(&m_tran_sparse, beTight); 
    }
    else {
      m_tran_dense.transpose_from(m_data); 
      sorted_arr.reset_dense(&m_tran_dense, beTight); 
      /* prohibit any action to change the pointers to the column vectors */
      m_tran_dense.lock(); 
    }
    if (inp_feat != NULL) {
      feat.reset(inp_feat); 
      if (feat.featNum() != m_data->rowNum()) {
        throw new AzException(AzInputError, "AzDataForTrTree::reset", "#feat mismatch"); 
      }
    }
    else {
      feat.reset(m_data->rowNum()); 
    }
  }
示例#5
0
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_X_seq(const AzIntArr &ia_tokno, int dic_sz, 
                       int pch_sz, int pch_step, int padding,  
                       bool do_allow_zero, bool do_skip_stopunk, 
                       /*---  output  ---*/
                       AzSmat *m_feat, 
                       AzIntArr *ia_pos) const /* patch position: may be NULL */
{
  const char *eyec = "AzPrepText2::gen_X_seq"; 
  AzX::throw_if_null(m_feat, eyec, "m_feat"); 
  AzX::no_support(do_skip_stopunk, eyec, "variable strides with Seq"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 
    
  int pch_num = DIVUP(t_num+padding*2-pch_sz, pch_step) + 1; 
  m_feat->reform(dic_sz*pch_sz, pch_num);   
  if (ia_pos != NULL) ia_pos->reset(); 

  int col = 0; 
  int tx0 = -padding; 
  for (int pch_no = 0; pch_no < pch_num; ++pch_no) {
    int tx1 = tx0 + pch_sz; 
    
    AzSmat m; 
    for (int tx = tx0; tx < tx1; ++tx) {
      AzSmat m0(dic_sz, 1); 
      if (tx >= 0 && tx < t_num && tokno[tx] >= 0) {
        AzIntArr ia_row; ia_row.put(tokno[tx]); 
        m0.col_u(0)->load(&ia_row, 1); 
      }
      if (tx == tx0) m.set(&m0); 
      else           m.rbind(&m0); 
    }
    if (do_allow_zero || !m.isZero()) {
      m_feat->col_u(col)->set(m.col(0)); 
      if (ia_pos != NULL) ia_pos->put(tx0); 
      ++col; 
    }

    if (tx1 >= t_num+padding) break; 
    tx0 += pch_step; 
  }
  m_feat->resize(col);   
}
示例#6
0
 /*---  For the sparse data format  ---*/
 inline static void parseDataLine_Sparse(const AzByte *inp, 
                             int inp_len, 
                             int f_num, 
                             const char *data_fn, 
                             int line_no, 
                             /*---  output  ---*/
                             AzSmat *m_feat, 
                             int col) {
   AzIFarr ifa_ex_val; 
   _parseDataLine_Sparse(inp, inp_len, f_num, data_fn, line_no, ifa_ex_val); 
   m_feat->load(col, &ifa_ex_val);
 }   
 virtual inline bool isLE(int dx, 
             int fx, 
             double border_val) const
 {
   double value; 
   if (AzSmat::isNull(&m_tran_sparse)) {
     value = m_tran_dense.get(dx, fx); 
   }
   else {
     value = m_tran_sparse.get(dx, fx); 
   }
   if (value <= border_val) return true; 
   return false; 
 }
示例#8
0
 inline void add_to(AzDvect *v_dst, int col, double coeff) const {
   if      (md != NULL) v_dst->add(md->col(col), coeff); /* dst += md[,col]*coeff */
   else if (ms != NULL) v_dst->add(ms->col(col), coeff); 
   else                 throw new AzException("AzDSmat::add", "No data"); 
 }
示例#9
0
 inline int colNum() const { 
   if (md != NULL) return md->colNum(); 
   if (ms != NULL) return ms->colNum(); 
   return 0; 
 }
示例#10
0
 inline int rowNum() const { 
   if (md != NULL) return md->rowNum(); 
   if (ms != NULL) return ms->rowNum(); 
   return 0; 
 }
示例#11
0
 /*-----------------------------------------------*/
 void normalize_feat() {
   m_feat.normalize();  /* to unit vectors */
 }
示例#12
0
 inline int size() const { return m_feat.colNum(); }
示例#13
0
 inline int featNum() const { return m_feat.rowNum(); }