Beispiel #1
0
// [[Rcpp::export]]
List get_idf_cpp(List x,Nullable<CharacterVector> stop_) {

  IDFmap m;
  for(ListOf<CharacterVector>::iterator it = x.begin();it != x.end();it++){
    unsigned int dis = distance( x.begin(), it );
    auto tmp = as<CharacterVector>(*it);
    inner_find(tmp,m,dis);
  }
  RCPP_UNORDERED_MAP< string,unsigned int > res;
  
  
  unordered_set<string> st;
  
  if(!stop_.isNull()){
    CharacterVector stop_value = stop_.get();
    const char *const stop_path = stop_value[0];
    _loadStopWordDict(stop_path,st);
    for(auto its= m.begin();its!=m.end();its++){
      if(st.find((*its).first) ==st.end()) res[(*its).first] = (*its).second.second;
    }
    return wrap(res);
  }
  
  for(auto its= m.begin();its!=m.end();its++){
    res[(*its).first] = (*its).second.second;
  }
  return wrap(res);
}
Beispiel #2
0
// [[Rcpp::export]]
List get_idf_cpp(List x,Nullable<CharacterVector> stop_) {

  IDFmap m;
  for(ListOf<CharacterVector>::iterator it = x.begin();it != x.end();it++){
    unsigned int dis = distance( x.begin(), it );
    auto tmp = as<CharacterVector>(*it);
    inner_find(tmp,m,dis);
  }


  
  vector<string> sts;
  vector<double> stn;
  sts.reserve(m.size());
  stn.reserve(m.size());
  
  unordered_set<string> st;

  double xsize = x.size();
  
  if(!stop_.isNull()){
    CharacterVector stop_value = stop_.get();
    const char *const stop_path = stop_value[0];
    _loadStopWordDict(stop_path,st);
    for(auto its= m.begin();its!=m.end();its++){
      if(st.find((*its).first) ==st.end()){
        sts.push_back((*its).first);
        stn.push_back( log(xsize / (*its).second.second) );
      }
    }

  }else{
    for(auto its= m.begin();its!=m.end();its++){
      sts.push_back((*its).first);
      stn.push_back((*its).second.second);
    }
  }
  
  vector<string> row_names;
  row_names.reserve(sts.size());
  for (unsigned int i = 0; i < sts.size(); ++i) {
    row_names.emplace_back(int64tos(i));
  }
  
  List res = List::create(_["name"] = wrap(sts),_["count"] = wrap(stn));
  res.attr("row.names") = row_names;
  res.attr("names") = CharacterVector::create("name","count");
  res.attr("class") = "data.frame";
  return res;
}
Beispiel #3
0
void inner_find(CharacterVector& y,IDFmap& m,unsigned int dis){
  for(CharacterVector::iterator it = y.begin();it!=y.end();it++){
    string tmp = as<string>(*it);
    IDFmap::iterator m_it = m.find(tmp);
    if(m_it==m.end()){
      m[tmp].first=dis;
      m[tmp].second=1;
    }else{
      if((*m_it).second.first != dis){
        (*m_it).second.first = dis;
        (*m_it).second.second =(*m_it).second.second+1;
      }
    }
  }
}