/*------------------------------------------------------------------*/
int AzSvDataS::if_sparse(AzBytArr &s_line,
                         int expected_f_num,
                         const char *str)
{
  const char *eyec = "AzSvDataS::if_sparse";

  int sparse_f_num = -1;
  AzBytArr s_sparse(str);
  AzStrPool sp_tok;
  AzTools::getStrings(s_line.point(), s_line.length(), &sp_tok);
  if (sp_tok.size() > 0 &&
      s_sparse.compare(sp_tok.c_str(0)) == 0) {
    if (sp_tok.size() >= 2) {
      sparse_f_num = atol(sp_tok.c_str(1));
    }
    if (sparse_f_num <= 0) {
      throw new AzException(AzInputError, eyec,
            "1st line of sparse data file must be \"sparse dd\" where dd is the feature dimensionality.");
    }
    if (expected_f_num > 0 && sparse_f_num != expected_f_num) {
      throw new AzException(AzInputError, eyec,
            "Conflict in feature dim: feature definition file vs. data file.");
    }
  }
  return sparse_f_num;
}
Beispiel #2
0
/*-------------------------------------------------------------------------*/
int AzTools_text::replace_utf8dashes(AzByte *data, int len) /* inout */
{
  const char *eyec = "AzTools_text:;replace_utf8dashes"; 
  AzBytArr s_mydata;
  AzByte *mydata = s_mydata.reset(len*2, 0); 
  AzByte *mywp = mydata;  
  /* 0xe28093: en dash (often used as in 1900-1935) */
  /* 0xe28094: em dash (long dash) */ 
  /* 0xe2809c: double quote begin -> [ " ]  */
  /* 0xe2809d: duoble quote end   -> [ " ]  */
  /* 0xe28098: single quote begin -> [ ' ]*/
  /* 0xe28099: single quote end -> [ 's] if ending a token; [ ' ] otherwise */
  const AzByte *data_end = data+len, *wp = data; 
  for ( ; wp < data_end; ) {
    const AzByte *ptr = (AzByte *)memchr(wp,  0xE2, data_end-wp); 
    if (ptr == NULL) {
      ptr = data_end; 
    }
    int mvlen = Az64::ptr_diff(ptr-wp, eyec); 
    if (mvlen > 0) {
      memcpy(mywp, wp, mvlen);  mywp += mvlen; /* string before 0xE2 */
    }
    if (ptr+3 <= data_end) {
      AzByte prevch = (ptr-1 >= data) ? *(ptr-1) : 0; 
      AzByte nextch = (ptr+3 < data_end) ? *(ptr+3) : 0; 
      AzBytArr s; 
      if (*(ptr+1) == 0x80) {
        if (*(ptr+2) == 0x93) { /* en dash */
          if (prevch>='0' && prevch<='9' && nextch>='0' && nextch<='9') s.c("-");  /* between digits */
          else                                                          s.c(" - "); 
        }
        else if (*(ptr+2) == 0x94)                          s.c(" - "); /* em dash */
        else if (*(ptr+2) == 0x98)                          s.c(" ' "); 
        else if (*(ptr+2) == 0x9c || *(ptr+2) == 0x9d)      s.c(" \" "); /* double quote */        
        else if (*(ptr+2) == 0x99) {
          if (ptr+5<=data_end && memcmp(ptr+3, "s ", 2)==0) s.c(" '");    
          else                                              s.c(" ' "); 
        }
        else s.c(ptr, 3); 
      }
      else s.c(ptr, 3); 
      memcpy(mywp, s.point(), s.length());  mywp += s.length(); 
      wp = ptr+3;     
    }
    else {
      mvlen = Az64::ptr_diff(data_end-ptr, eyec);
      if (mvlen > 0) {
        memcpy(mywp, ptr, mvlen); mywp += mvlen; 
      }
      wp = data_end; 
    }
  }
  int mydata_len = Az64::ptr_diff(mywp-mydata); 
  memcpy(data, mydata, mydata_len); 
  mydata[mydata_len] = '\0'; 
  return mydata_len; 
}
Beispiel #3
0
 virtual void resetParam(AzParam &azp, const char *pfx, bool is_warmstart) {
   azp.reset_prefix(pfx); 
   if (!is_warmstart) {
     azp.vStr(kw_activ_typ, &s_activ_typ); 
     if (s_activ_typ.length() > 0) typ = *s_activ_typ.point(); 
     azp.vFloat(kw_trunc, &trunc); 
   }
   azp.swOn(&do_stat, kw_do_stat);       
   azp.reset_prefix(); 
 }
Beispiel #4
0
 void resetParam(AzParam &azp, const char *pfx, bool is_warmstart) {
   azp.reset_prefix(pfx); 
   if (!is_warmstart) {
     azp.vStr(kw_pl_type, &s_pl_type); 
     if (s_pl_type.length() > 0) ptyp = *s_pl_type.point(); 
     else                        ptyp = AzpPoolingDflt_None; 
     azp.vInt(kw_pl_num, &pl_num); 
     azp.vInt(kw_pl_sz, &pl_sz); 
     azp.vInt(kw_pl_step, &pl_step); 
     azp.swOff(&do_pl_simple_grid, kw_no_pl_simple_grid); 
   }
   azp.reset_prefix(); 
 }