Пример #1
0
  void warm_start(const AzTreeEnsemble *inp_ens, 
              const AzDataForTrTree *data, 
              AzParam &param,              
              const AzBytArr *s_temp_prefix, 
              const AzOut &out,           
              int max_t_num, 
              int search_t_num, 
              AzDvect *v_p, /* inout */
              const AzIntArr *inp_ia_tr_dx=NULL)
  {
    const char *eyec = "AzTrTreeEnsemble::warmup"; 
    if (max_t_num < inp_ens->size()) {
      throw new AzException(eyec, "maximum #tree is less than the #tree we already have"); 
    }

    reset(); 
    a_tree.alloc(&t, max_t_num, "AzTrTreeEnsemble::warmup"); 
    t_num = inp_ens->size(); 
    const_val = inp_ens->constant(); 
    org_dim = inp_ens->orgdim(); 
    if (org_dim > 0 && org_dim != data->featNum()) {
      throw new AzException(AzInputError, eyec, "feature dimensionality mismatch"); 
    }

    const AzIntArr *ia_tr_dx = inp_ia_tr_dx; 
    AzIntArr ia_temp; 
    if (ia_tr_dx == NULL) {
      ia_temp.range(0, data->dataNum()); 
      ia_tr_dx = &ia_temp; 
    }
    v_p->reform(data->dataNum()); 
    v_p->add(const_val, ia_tr_dx); 

    T dummy_tree(param); 
    if (dummy_tree.usingInternalNodes()) {
      throw new AzException(AzInputError, eyec, 
                "warm start is not allowed with use of internal nodes"); 
    }
    dummy_tree.printParam(out); 

    temp_files.reset(&dummy_tree, data->dataNum(), s_temp_prefix); 

    s_param.reset(param.c_str());   
    dt_param = s_param.c_str(); 
    AzParam p(dt_param, false); 
 
    int tx; 
    for (tx = 0; tx < t_num; ++tx) {
      t[tx] = new T(p); 
      t[tx]->forStoringDataIndexes(temp_files.point_file()); 
      if (search_t_num > 0 && tx < t_num-search_t_num) {
        t[tx]->quick_warmup(inp_ens->tree(tx), data, v_p, ia_tr_dx); 
      }
      else {
        t[tx]->warmup(inp_ens->tree(tx), data, v_p, ia_tr_dx); 
      }
    }    
  }
Пример #2
0
 inline void vLoss(const char *kw, AzLossType *out_loss) {
   if (param == NULL) return; 
   AzBytArr s; 
   vStr(kw, &s); 
   if (s.length() > 0) {
     *out_loss = AzLoss::lossType(s.c_str()); 
   }
   if (doCheck) sp_used_kw.put(kw); 
 }
Пример #3
0
 inline void print(const char *name, T val, int width_prec=-1, 
                   bool doZero_doSci=false) {
   if (o == NULL) return; 
   itemBegin(); 
   if (name != NULL) {
     *o<<name; 
     if (name_dlm != NULL) *o<<name_dlm; 
   }
   AzBytArr s; s.cn(val, width_prec, doZero_doSci);
   *o<<s.c_str(); 
 }
Пример #4
0
  /*-------------------*/
  void toplevel_header(const char *desc, AzByte dlm='*') 
  {
    if (out.isNull()) return; 

    newline(); 

    AzBytArr s; 
    s.fill(dlm, 3); 
    int dlm_len = Az64::cstrlen(desc) + s.length()*2; 
    dlm_len = MIN(line_width, dlm_len); 

    AzBytArr s_long; 
    s_long.fill(dlm, dlm_len); 

    AzPrint::writeln(out, s_long.c_str()); 
    AzPrint::write(out, s.c_str()); 
    AzPrint::write(out, desc); 
    AzPrint::writeln(out, s.c_str()); 
    AzPrint::writeln(out, s_long.c_str()); 
  }
Пример #5
0
 /*--------*/
 inline void ppBegin(const char *caller, 
                     const char *desc=NULL, 
                     const char *inp_dlm=NULL) {
   AzBytArr s; 
   if (level > 0) {
     s.concat(caller); 
   }
   else {
     s.concat(desc); 
   }
   printBegin(s.c_str(), inp_dlm); 
 }
Пример #6
0
 AzParam(int argc, const char *argv[], 
         bool inp_doCheck=true, 
         char file_mark='@', 
         char inp_dlm=',', 
         char inp_kwval_dlm='=',
         char cmt='#') : sp_used_kw(100, 30)
 {
   doCheck = inp_doCheck; 
   dlm = inp_dlm; 
   kwval_dlm = inp_kwval_dlm; 
   concat_args(argc, argv, &s_param, file_mark, dlm, cmt); 
   param = s_param.c_str(); 
 }
Пример #7
0
  //! copy nodes only; not split
  void copy_nodes_from(const AzTrTreeEnsemble_ReadOnly *inp) {
    reset(); 
    const_val = inp->constant(); 
    org_dim = inp->orgdim(); 
    t_num = inp->size(); 

    s_param.reset(inp->param_c_str()); 
    dt_param = s_param.c_str(); 
    AzParam p(dt_param, false); 
    a_tree.alloc(&t, t_num, "AzTrTreeEnsemble::copy_nodes_from"); 
    for (int tx = 0; tx < t_num; ++tx) {
      t[tx] = new T(p);
      t[tx]->copy_nodes_from(inp->tree(tx)); 
    }
  }
Пример #8
0
/*-------------------------------------------------------------------------*/
template <class Vmat> /* Vmat: AzSmatVar | AzSmatcVar */
int AzPrepText2::_write_XY(AzDataArr<AzSmat> &am_xy, /* destroyed to save memory */
                           int data_num, 
                           const AzBytArr &s_batch_id, 
                           const char *outnm, const char *xy_ext) const {                          
  Vmat mv_xy; mv_xy.transfer_from(&am_xy, data_num); am_xy.reset(); 
  AzBytArr s_xy(": "); AzTools::show_smat_stat(*mv_xy.data(), s_xy); 

  AzBytArr s_xy_fn(outnm, xy_ext); 
  if (s_batch_id.length() > 0) s_xy_fn << "." << s_batch_id.c_str(); 
  const char *xy_fn = s_xy_fn.c_str(); 
  AzTimeLog::print(xy_fn, s_xy.c_str(), out); 
  if (AzBytArr::endsWith(xy_ext, "smat")) mv_xy.write_matrix(xy_fn); 
  else                                    mv_xy.write(xy_fn);  
  return mv_xy.rowNum(); 
}  
Пример #9
0
  inline void cold_start(
                    AzParam &param, 
                    const AzBytArr *s_temp_prefix, /* may be NULL */
                    int data_num, /* to estimate the data size for temp */
                    const AzOut &out, 
                    int tree_num_max, 
                    int inp_org_dim) {
    reset(); 
    T dummy_tree(param); 
    dummy_tree.printParam(out); 
    s_param.reset(param.c_str()); 
    dt_param = s_param.c_str(); 
    alloc(tree_num_max, "AzTrTreeEnsemble::reset"); //@ allocate forest space
    org_dim = inp_org_dim; 

    temp_files.reset(&dummy_tree, data_num, s_temp_prefix); //@ estimate the data size for temp and do something?
  }
Пример #10
0
/*-------------------------------------------------------------------------*/
void AzPrepText2::check_batch_id(const AzBytArr &s_batch_id) const
{
  const char *eyec = "AzPrepText::check_batch_id"; 
  if (s_batch_id.length() <= 0) return; 
  AzBytArr s(kw_batch_id); s << " should look like \"1of5\""; 
  const char *batch_id = s_batch_id.c_str(); 
  const char *of_str = strstr(batch_id, "of"); 
  AzX::throw_if((of_str == NULL), AzInputError, eyec, s.c_str()); 
  for (const char *wp = batch_id; wp < batch_id+s_batch_id.length(); ++wp) {
    if (wp >= of_str && wp < of_str+2) continue; 
    AzX::throw_if((*wp < '0' || *wp > '9'), AzInputError, eyec, s.c_str());  
  }
  int batch_no = atol(batch_id); 
  int batch_num = atol(of_str+2); 
  AzX::throw_if((batch_no < 1 || batch_no > batch_num), AzInputError, eyec, 
                s.c_str(), " batch# must start with 1 and must not exceed the number of batches. "); 
} 
Пример #11
0
 inline void printBegin(const char *kw, 
                        const char *inp_dlm=NULL, 
                        const char *inp_name_dlm=NULL, 
                        int indent=0) {
   if (o == NULL) return; 
   dlm = inp_dlm; 
   name_dlm = inp_name_dlm; 
   if (name_dlm == NULL) name_dlm = dlm; 
   if (indent > 0) { /* indentation */
     AzBytArr s; s.fill(' ', indent);  
     *o<<s.c_str(); 
   }
   if (kw != NULL && strlen(kw) > 0) {   
     *o<<kw<<": "; 
   }
   count = 0; 
 }
Пример #12
0
  /*------------------------------------------*/  
  virtual void reset_data(const AzOut &_out, const char *nm, int dummy_ydim, 
                          AzpData_batch_info *bi=NULL) {
    const char *eyec = "AzpData_img::reset_data"; 
    out = _out; 
    s_nm.reset(nm); 

    double min_val = 0, max_val = 0, abssum = 0, abssum_pop = 0; 
    if (bi != NULL) bi->reset(batch_num);     
    total_data_num = 0; 
    int x_row = -1, y_row = -1; 
    int bx; 
    for (bx = batch_num-1; bx >= 0; --bx) {
      AzBytArr s_batchnm; 
      _reset_data(bx); 
      if (bx == batch_num-1) {
        x_row = m_x.rowNum(); 
        y_row = ms_y.rowNum(); 
      }
      else {
        AzX::throw_if((m_x.rowNum() != x_row || ms_y.rowNum() != y_row), 
                      AzInputError, eyec, "Data dimensionality conflict between batches"); 
      }
      if (bi != NULL) bi->update(bx, data_num, NULL); 
      total_data_num += m_x.colNum(); 
      current_batch = bx; 
      AzTimeLog::print("#data = ", data_num, out); 
      abssum += m_x.absSum(); abssum_pop += m_x.size(); 
      if (bx == batch_num-1) {
        min_val = m_x.min(); 
        max_val = m_x.max();
        min_tar = ms_y.min(); 
        max_tar = ms_y.max(); 
      }
      else {
        min_val = MIN(min_val, m_x.min()); 
        max_val = MAX(max_val, m_x.max()); 
        min_tar = MIN(min_tar, ms_y.min()); 
        max_tar = MAX(max_tar, ms_y.max()); 
      }
    }  
    AzBytArr s; s << "#total_data=" << total_data_num << ",min,max=" << min_val << "," << max_val; 
    if (abssum_pop > 0) s << ",absavg=" << abssum/abssum_pop; 
    s << ", target-min,max=" << min_tar << "," << max_tar; 
    AzTimeLog::print(s.c_str(), out); 
  }
Пример #13
0
 inline void inBrackets(const AzBytArr &s) {
   if (o == NULL) return; 
   inBrackets(s.c_str()); 
 }
Пример #14
0
 inline void inParen(const AzBytArr &s) {
   if (o == NULL) return; 
   inParen(s.c_str()); 
 }
Пример #15
0
 inline void inBrackets(double val, int prec=-1, bool doSci=false) {
   if (o == NULL) return; 
   itemBegin(); 
   AzBytArr s; inBrackets(&s, val, prec, doSci); 
   *o<<s.c_str(); 
 }
Пример #16
0
 inline void inBrackets(int val, int width=-1, bool doFillZero=false) {
   if (o == NULL) return; 
   itemBegin(); 
   AzBytArr s; inBrackets(&s, val, width, doFillZero); 
   *o<<s.c_str(); 
 }
Пример #17
0
 inline void printV_if_not_empty(const char *kw, const AzBytArr &s) {
   if (o == NULL) return; 
   if (s.length() <= 0) return; 
   itemBegin(); 
   *o<<kw<<s.c_str(); 
 }
Пример #18
0
 /*-------------------*/
 inline void print(const AzBytArr &s) {
   if (o == NULL) return; 
   print(s.c_str()); 
 }
Пример #19
0
 inline void printV(const char *kw, const AzBytArr &s) {
   if (o == NULL) return; 
   itemBegin(); 
   *o<<kw<<s.c_str(); 
 }
Пример #20
0
 const char *configuration() const { return s_config.c_str(); }
Пример #21
0
 inline const char *param_c_str() const {
   return s_param.c_str(); 
 }
Пример #22
0
 static inline void writeln(const AzOut &out, const AzBytArr &s) {
   writeln(out, s.c_str()); 
 }
Пример #23
0
 inline void writeln(const AzBytArr &s) {
   if (o == NULL) return; 
   *o<<s.c_str()<<endl; 
 }
Пример #24
0
 inline void write(const AzBytArr &s) {
   if (o == NULL) return; 
   *o<<s.c_str(); 
 }
Пример #25
0
 inline void close() {
   ofs.close(); 
   if (ofs.fail()) {
     throw new AzException(AzFileIOError, "AzOfs::close", "Failed to close:", s_fn.c_str()); 
   }
 }
Пример #26
0
 static inline void force_writeln(const AzBytArr &s) {
   force_writeln(s.c_str()); 
 }
Пример #27
0
 inline static void print(const AzBytArr &s, const AzOut &out) {
   print(s.c_str(), out); 
 }
Пример #28
0
 inline static void print(int number, const char *msg, const AzOut &out) {
   AzBytArr s; s.cn(number); 
   print(s.c_str(), msg, out); 
 }
Пример #29
0
 const char *signature() const { return s_sign.c_str(); }