예제 #1
0
  virtual void reset_data_for_test(const AzOut &out, 
                     const AzSmat *m_data) {
    bool doSparse = false; 
    if (m_data->rowNum()*m_data->colNum() > Az_max_test_entries) { /* large data */
      /*---  dense is faster but uses up more memory if data is sparse  ---*/
      double nz_ratio; 
      m_data->nonZeroNum(&nz_ratio); 
      if (nz_ratio < 0.6) { /* relatively sparse */
        doSparse = true; 
        AzBytArr s; s.c("Large and sparse test data (nonzero ratio=", nz_ratio); 
        s.c("); treated as sparse data.");
        AzPrint::writeln(out, s); 
      }
    }

    data_num = m_data->colNum(); 
    m_tran_dense.reset(); 
    m_tran_sparse.reset(); 
    if (doSparse) {    
      m_data->transpose(&m_tran_sparse); 
    }
    else {
      m_tran_dense.transpose_from(m_data); 
    }
    sorted_arr.reset(); 
    feat.reset(m_data->rowNum()); 
  }
예제 #2
0
 void _item(bool isRequired, 
            const char *kw, const char *desc, 
            const char *dflt=NULL) 
 {
   AzBytArr s; 
   if (dflt != NULL && strlen(dflt) > 0) {
     s.c(" (Default:"); s.c(dflt); s.c(")"); 
   }
   _item(isRequired, kw, desc, s); 
 }
예제 #3
0
/*------------------------------------------------------------*/  
void AzpLayer::show_stat(AzBytArr &s) const {
  if (do_topthru) {
    s.c("[topthru]"); 
    return; 
  }
  cs.weight->show_stat(s); 
  if (using_lm2) {
    s.c(";-lm2-;"); 
    cs.weight2->show_stat(s); 
  }
  activ_show_stat(s); 
}
예제 #4
0
 virtual void show_stat(AzBytArr &s) const {
   if (p.typ == AzpActivDflt_None) return;     
   if (!p.do_stat) return; 
   AzDvect my_v_pop(&v_pop_last); 
   my_v_pop.normalize1(); 
   s.nl(); 
   double accum = 0; 
   int ix; 
   for (ix = 0; ix < my_v_pop.rowNum(); ++ix) {
     accum += my_v_pop.get(ix); 
     s.c("b"); s.cn(v_border.get(ix)); s.c("="); s.cn(my_v_pop.get(ix),3); s.c("("); s.cn(accum,3); s.c("),"); 
   }
 }
예제 #5
0
/*-------------------------------------------------------------------------*/
int AzTools_text::replace_utf8dashes(AzByte *data, int len) /* inout */
{
  const char *eyec = "AzTools_text:;replace_utf8dashes"; 
  AzBytArr s_mydata;
  AzByte *mydata = s_mydata.reset(len*2, 0); 
  AzByte *mywp = mydata;  
  /* 0xe28093: en dash (often used as in 1900-1935) */
  /* 0xe28094: em dash (long dash) */ 
  /* 0xe2809c: double quote begin -> [ " ]  */
  /* 0xe2809d: duoble quote end   -> [ " ]  */
  /* 0xe28098: single quote begin -> [ ' ]*/
  /* 0xe28099: single quote end -> [ 's] if ending a token; [ ' ] otherwise */
  const AzByte *data_end = data+len, *wp = data; 
  for ( ; wp < data_end; ) {
    const AzByte *ptr = (AzByte *)memchr(wp,  0xE2, data_end-wp); 
    if (ptr == NULL) {
      ptr = data_end; 
    }
    int mvlen = Az64::ptr_diff(ptr-wp, eyec); 
    if (mvlen > 0) {
      memcpy(mywp, wp, mvlen);  mywp += mvlen; /* string before 0xE2 */
    }
    if (ptr+3 <= data_end) {
      AzByte prevch = (ptr-1 >= data) ? *(ptr-1) : 0; 
      AzByte nextch = (ptr+3 < data_end) ? *(ptr+3) : 0; 
      AzBytArr s; 
      if (*(ptr+1) == 0x80) {
        if (*(ptr+2) == 0x93) { /* en dash */
          if (prevch>='0' && prevch<='9' && nextch>='0' && nextch<='9') s.c("-");  /* between digits */
          else                                                          s.c(" - "); 
        }
        else if (*(ptr+2) == 0x94)                          s.c(" - "); /* em dash */
        else if (*(ptr+2) == 0x98)                          s.c(" ' "); 
        else if (*(ptr+2) == 0x9c || *(ptr+2) == 0x9d)      s.c(" \" "); /* double quote */        
        else if (*(ptr+2) == 0x99) {
          if (ptr+5<=data_end && memcmp(ptr+3, "s ", 2)==0) s.c(" '");    
          else                                              s.c(" ' "); 
        }
        else s.c(ptr, 3); 
      }
      else s.c(ptr, 3); 
      memcpy(mywp, s.point(), s.length());  mywp += s.length(); 
      wp = ptr+3;     
    }
    else {
      mvlen = Az64::ptr_diff(data_end-ptr, eyec);
      if (mvlen > 0) {
        memcpy(mywp, ptr, mvlen); mywp += mvlen; 
      }
      wp = data_end; 
    }
  }
  int mydata_len = Az64::ptr_diff(mywp-mydata); 
  memcpy(data, mydata, mydata_len); 
  mydata[mydata_len] = '\0'; 
  return mydata_len; 
}
예제 #6
0
  virtual void format(AzBytArr &s, bool do_reset=false) const {
    if (do_reset) s.reset(); 
//    s.c("("); 
    int ix; 
    for (ix = 0; ix < ia_sz.size(); ++ix) {
      if (ix > 0) s.c(" x "); 
      s.cn(ia_sz.get(ix)); 
    }
//    s.c(")"); 
  }  
예제 #7
0
/*--------------------------------------------------------*/
void AzTreeEnsemble::show_weights(const AzOut &out, AzSvFeatInfo *fi) const
{
  AzIIFarr iifa_tx_nx_posiw, iifa_tx_nx_negaw; 
  int tx; 
  for (tx = 0; tx < t_num; ++tx) {
    int nx; 
    for (nx = 0; nx < t[tx]->nodeNum(); ++nx) {
      const AzTreeNode *np = t[tx]->node(nx); 
      if (np->weight > 0) {
        iifa_tx_nx_posiw.put(tx, nx, np->weight); 
      }
      else if (np->weight < 0) {
        iifa_tx_nx_negaw.put(tx, nx, np->weight); 
      }
    }
  }
  iifa_tx_nx_posiw.sort_Float(false); /* descending order */
  iifa_tx_nx_negaw.sort_Float(true); /* ascending order */

  AzPrint::writeln(out, "Positive weights -------------------"); 
  int ix; 
  for (ix = 0; ix < iifa_tx_nx_posiw.size(); ++ix) {
    int tx, nx; 
    double w = iifa_tx_nx_posiw.get(ix, &tx, &nx); 
    AzBytArr s_desc; 
    t[tx]->genDesc(fi, nx, &s_desc); 
    AzBytArr s; s.cn(w, 6, false); s.c(' '); s.c(&s_desc); 
    AzPrint::writeln(out, s); 
  }

  AzPrint::writeln(out, "Negative weights -------------------"); 
  for (ix = 0; ix < iifa_tx_nx_negaw.size(); ++ix) {
    int tx, nx; 
    double w = iifa_tx_nx_negaw.get(ix, &tx, &nx); 
    AzBytArr s_desc; 
    t[tx]->genDesc(fi, nx, &s_desc); 
    AzBytArr s; s.cn(w, 6, false); s.c(' '); s.c(&s_desc); 
    AzPrint::writeln(out, s); 
  }
}
예제 #8
0
 inline virtual void show(const char *header, const AzOut &out) const {
   AzBytArr s; 
   s.c(header); 
   format(s); 
   AzPrint::writeln(out, s); 
 }