Beispiel #1
0
//' @export
// [[Rcpp::export]]
arma::vec th(Rcpp::StringVector strata_rsid, Rcpp::StringVector rsid, Rcpp::NumericVector r2){
	
	// Inputs:
	//
	// strata_rsid is the overall index of rsid returned from stratify()
	// rsid is the rsid column from LdList
	// r2 is the r2 column from LdList
	
	// Create the output vector of th numbers
	arma::vec th(strata_rsid.size(), fill::zeros);

	// Find the indices of where the rsid are in strata_rsid
	for(int i = 0; i < rsid.size(); i++){

		// Pull out each of the RSIDs successively 
		// std::string id = rsid(i);

		// Find the index in strata_rsid where the RSID string is
		// and fill it in with the r2 at the given id.
//		th( arma::find( strata_rsid == rsid(i) ) ) = r2(i);
	}

	// Create duplicate vector to fill in discretized values
	arma::vec out = th;

	out.elem( find( th >= 0.2 ) ).fill(0.2);
	out.elem( find( th >= 0.4 ) ).fill(0.4);
	out.elem( find( th >= 0.6 ) ).fill(0.6);
	out.elem( find( th >= 0.8 ) ).fill(0.8);
	out.elem( find( th >= 0.9 ) ).fill(0.9);
	out.elem( find( th >= 1 ) ).fill(1);
	
	return out;

}
Beispiel #2
0
Rcpp::StringMatrix DataFrame_to_StringMatrix( Rcpp::DataFrame df ){
  Rcpp::StringVector sv = df(0);
  Rcpp::StringMatrix sm(sv.size(), df.size());
  
  sm.attr("col.names") = df.attr("col.names");
  sm.attr("row.names") = df.attr("row.names");

  for(int i=0; i < df.size(); i++){
    sv = df(i);
    for(int j=0; j < sv.size(); j++){
      sm(j, i) = sv(j);
    }
  }

  return sm;
}
Beispiel #3
0
void TabEditor::displayEditor(RCore *rExe,std::string name, DataEditor *dataEditor, VariableEditor *variableEditor){
    qDebug("Display Editor");
    rExe->diplayData(name);
    //----Update
    dataEditor->setVarTypes(rExe->getVarTypes());
    QStringList varNames;
    Rcpp::StringVector sv = rExe->getColNames();
    for(int i = 0; i < sv.size();i++){
        varNames.push_back(QString(sv[i]));
    }
    dataEditor->setVarNames(varNames);
    dataEditor->loadData(rExe->getDataFrame(),rExe->getColNames());
    //---
    variableEditor->loadVariable(rExe,rExe->getColNames());

    addTab(dataEditor,"Data");
    addTab(variableEditor, "Variable");
    setTabPosition(West);
    //Update 30 Juni
    connect(this, SIGNAL(currentChanged(int)), variableEditor, SLOT(checkWidgetVisibility()));
}
Beispiel #4
0
// [[Rcpp::export]]
Rcpp::NumericMatrix infoContentMethod_cpp(
    Rcpp::StringVector&  id1_,
    Rcpp::StringVector&  id2_,
    Rcpp::List&          anc_,
    Rcpp::NumericVector& ic_,
    const std::string&   method_,
    const std::string&   ont_
) {
    go_dist_func_t* go_dist;
    // Resnik does not consider how distant the terms are from their common ancestor.
    //  Lin and Jiang take that distance into account.
    if (method_ == "Resnik") {
        go_dist = &go_dist_Resnik;
    }
    else if (method_ == "Lin") {
        go_dist = &go_dist_Lin;
    }
    else if (method_ == "Jiang") {
        go_dist = &go_dist_Jiang;
    }
    else if (method_ == "Rel") {
        go_dist = &go_dist_Rel;
    }
    else {
        throw std::runtime_error( "Unknown GO distance method" );
    }

    typedef std::string term_id_t;
    typedef std::set<term_id_t> term_set_t;

    // calculate the maximum IC and build the map of normalized IC
    typedef std::map<term_id_t, double> ic_map_t;
    ic_map_t normIcMap;
    // more specific term, larger IC value.
    // Normalized, all divide the most informative IC.
    // all IC values range from 0(root node) to 1(most specific node)
    double mic = NA_REAL;
    {
        Rcpp::StringVector icNames( ic_.names() );
        for (std::size_t i=0; i < ic_.size(); i++ ) {
            const double cic = ic_[i];
            if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue;
            if ( Rcpp::NumericVector::is_na( mic ) || mic < cic ) mic = cic;
        }
        LOG_DEBUG( "mic=" << mic );
        for (std::size_t i=0; i < ic_.size(); i++ ) {
            const double cic = ic_[i];
            if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue;
            normIcMap.insert( std::make_pair( (std::string) icNames[i], cic / mic ) );
        }
    }

    // set root node IC to 0
    if(ont_ == "DO") {
        normIcMap["DOID:4"] = 0;
    } else {
        normIcMap["all"] = 0;
    }

    // convert anc_ into map of sets
    typedef std::map<term_id_t, term_set_t> anc_map_t;
    anc_map_t ancMap;
    {
        Rcpp::StringVector goTerms( anc_.names() );
        for (std::size_t i=0; i < anc_.size(); i++ ) {
            const std::vector<std::string> ancVec = Rcpp::as<std::vector<std::string> >( anc_[i] );
            term_set_t ancestors( ancVec.begin(), ancVec.end() );
            // term itself is also considered an ancestor
            ancestors.insert( (std::string)goTerms[i] );
            ancMap.insert( std::make_pair( (std::string) goTerms[i], ancestors ) );
        }
    }

    Rcpp::NumericMatrix res( id1_.size(), id2_.size() );
    res.attr("dimnames") = Rcpp::Rcpp_list2( id1_, id2_ );
    for ( std::size_t i = 0; i < id1_.size(); i++ ) {
        const std::string id1_term = (std::string)id1_[i];
        const ic_map_t::const_iterator iIcIt = normIcMap.find( id1_term );
        if ( iIcIt != normIcMap.end() && iIcIt->second != 0 ) {
            const double iIc = iIcIt->second;
            LOG_DEBUG( "ic[" << id1_term << "]=" << iIc );
            const anc_map_t::const_iterator iAncsIt = ancMap.find( id1_term );
            for ( std::size_t j = 0; j < id2_.size(); j++ ) {
                const std::string id2_term = (std::string)id2_[j];
                const ic_map_t::const_iterator jIcIt = normIcMap.find( id2_term );
                if ( jIcIt != normIcMap.end() && jIcIt->second != 0 ) {
                    const anc_map_t::const_iterator jAncsIt = ancMap.find( id2_term );
                    // find common ancestors
                    term_set_t commonAncs;
                    if ( iAncsIt != ancMap.end() && jAncsIt != ancMap.end() ) {
                        std::set_intersection( iAncsIt->second.begin(), iAncsIt->second.end(),
                                               jAncsIt->second.begin(), jAncsIt->second.end(),
                                               std::inserter( commonAncs, commonAncs.end() ) );
                    }
                    LOG_DEBUG( "n(commonAncs(" << id1_term << "," << id2_term << "))=" << commonAncs.size() );

                    // Information Content of the most informative common ancestor (MICA)
                    double mica = 0;
                    for ( term_set_t::const_iterator termIt = commonAncs.begin(); termIt != commonAncs.end(); ++termIt ) {
                        ic_map_t::const_iterator ancIcIt = normIcMap.find( *termIt );
                        if ( ancIcIt != normIcMap.end() && mica < ancIcIt->second ) mica = ancIcIt->second;
                    }
                    LOG_DEBUG( "mica(" << id1_term << "," << id2_term << ")=" << mica );
                    res(i,j) = go_dist( mica, iIc, jIcIt->second, mic );
                } else {
                    res(i,j) = NA_REAL;
                }
            }
        } else {
            for ( std::size_t j = 0; j < id2_.size(); j++ ) {
                res(i,j) = NA_REAL;
            }
        }
    }
    return ( res );
}
Beispiel #5
0
// [[Rcpp::export]]
void write_vcf_body_gz( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) {
  // http://stackoverflow.com/a/5649224
  
  // fix DataFrame
  Rcpp::StringVector chrom  = fix["CHROM"];
  Rcpp::StringVector pos    = fix["POS"];
  Rcpp::StringVector id     = fix["ID"];
  Rcpp::StringVector ref    = fix["REF"];
  Rcpp::StringVector alt    = fix["ALT"];
  Rcpp::StringVector qual   = fix["QUAL"];
  Rcpp::StringVector filter = fix["FILTER"];
  Rcpp::StringVector info   = fix["INFO"];
  
  // gt DataFrame
  Rcpp::StringMatrix gt_cm = DataFrame_to_StringMatrix(gt);
  Rcpp::StringVector column_names(gt.size());
  column_names = gt.attr("names");
  
  int i = 0;
  int j = 0;
  
  gzFile *fi = (gzFile *)gzopen(filename.c_str(),"ab");
//  gzFile *fi = (gzFile *)gzopen(filename.c_str(),"abw");
  for(i=0; i<chrom.size(); i++){
    Rcpp::checkUserInterrupt();
    if(mask == 1 && filter(i) != "PASS" ){
      // Don't print variant.
    } else {
      std::string tmpstring;
      tmpstring = chrom(i);
      tmpstring = tmpstring + "\t" + pos(i) + "\t";
      if(id(i) == NA_STRING){
        tmpstring = tmpstring + ".";
      } else {
        tmpstring = tmpstring + id(i);
      }
      tmpstring = tmpstring + "\t" + ref(i) + "\t" + alt(i) + "\t";
      if(qual(i) == NA_STRING){
        tmpstring = tmpstring + "." + "\t";
      } else {
        tmpstring = tmpstring + qual(i) + "\t";
      }
      if(filter(i) == NA_STRING){
        tmpstring = tmpstring + "." + "\t";
      } else {
        tmpstring = tmpstring + filter(i) + "\t";
      }
      tmpstring = tmpstring + info(i);

      // gt portion
      for(j=0; j<column_names.size(); j++){
        if(gt_cm(i, j) == NA_STRING){
          tmpstring = tmpstring + "\t" + "./.";
        } else {
          tmpstring = tmpstring + "\t" + gt_cm(i, j);
        }
      }


//      gzwrite(fi,"my decompressed data",strlen("my decompressed data"));
//      gzwrite(fi,"\n",strlen("\n"));
//      std::string tmpstring = "test string\n";
      gzwrite(fi, (char *)tmpstring.c_str(), tmpstring.size());
      
      gzwrite(fi,"\n",strlen("\n"));
    }
  }
  gzclose(fi);
  
  
  return;
}
Beispiel #6
0
// [[Rcpp::export]]
void write_vcf_body( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) {
//int write_vcf_body( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) {

  // fix DataFrame
  Rcpp::StringVector chrom  = fix["CHROM"];
  Rcpp::StringVector pos    = fix["POS"];
  Rcpp::StringVector id     = fix["ID"];
  Rcpp::StringVector ref    = fix["REF"];
  Rcpp::StringVector alt    = fix["ALT"];
  Rcpp::StringVector qual   = fix["QUAL"];
  Rcpp::StringVector filter = fix["FILTER"];
  Rcpp::StringVector info   = fix["INFO"];

  // gt DataFrame
  Rcpp::StringMatrix gt_cm = DataFrame_to_StringMatrix(gt);
  Rcpp::StringVector column_names(gt.size());
  column_names = gt.attr("names");
//  column_names = gt_cm.attr("col.names");
//  delete gt;
  
  int i = 0;
  int j = 0;

  // Uncompressed.
  std::ofstream myfile;
  myfile.open (filename.c_str(), std::ios::out | std::ios::app | std::ios::binary);
  
//  gzFile *fi = (gzFile *)gzopen("file.gz","wb");
  

  for(i=0; i<chrom.size(); i++){
    Rcpp::checkUserInterrupt();
    if(mask == 1 && filter(i) == "PASS" ){
      // Don't print variant.
    } else {
      myfile << chrom(i);
      myfile << "\t";
      myfile << pos(i);
      myfile << "\t";
      if(id(i) == NA_STRING){
        myfile << ".";
        myfile << "\t";
      } else {
        myfile << id(i);
        myfile << "\t";
      }
      myfile << ref(i);
      myfile << "\t";
      myfile << alt(i);
      myfile << "\t";
      if(qual(i) == NA_STRING){
        myfile << ".";
        myfile << "\t";
      } else {
        myfile << qual(i);
        myfile << "\t";
      }
      if(filter(i) == NA_STRING){
        myfile << ".";
        myfile << "\t";
      } else {
        myfile << filter(i);
        myfile << "\t";
      }
      if(info(i) == NA_STRING){
        myfile << ".";
        myfile << "\t";
      } else {
        myfile << info(i);
      }
      
      // gt region.
      myfile << "\t";
      myfile << gt_cm(i, 0);
      for(j=1; j<column_names.size(); j++){
        myfile << "\t";
        myfile << gt_cm(i, j);
      }

      myfile << "\n";
    }
  }

  myfile.close();
  
  return;
}
//' rcpp_lines_as_network
//'
//' Return OSM data in Simple Features format
//'
//' @param sf_lines An sf collection of LINESTRING objects
//' @param pr Rcpp::DataFrame containing the weighting profile
//'
//' @return Rcpp::List objects of OSM data
//'
//' @noRd
// [[Rcpp::export]]
Rcpp::List rcpp_lines_as_network (const Rcpp::List &sf_lines,
        Rcpp::DataFrame pr)
{
    std::map <std::string, float> profile;
    Rcpp::StringVector hw = pr [1];
    Rcpp::NumericVector val = pr [2];
    for (int i = 0; i != hw.size (); i ++)
        profile.insert (std::make_pair (std::string (hw [i]), val [i]));

    Rcpp::CharacterVector nms = sf_lines.attr ("names");
    if (nms [nms.size () - 1] != "geometry")
        throw std::runtime_error ("sf_lines have no geometry component");
    if (nms [0] != "osm_id")
        throw std::runtime_error ("sf_lines have no osm_id component");
    int one_way_index = -1;
    int one_way_bicycle_index = -1;
    int highway_index = -1;
    for (int i = 0; i < nms.size (); i++)
    {
        if (nms [i] == "oneway")
            one_way_index = i;
        if (nms [i] == "oneway.bicycle")
            one_way_bicycle_index = i;
        if (nms [i] == "highway")
            highway_index = i;
    }
    Rcpp::CharacterVector ow = NULL;
    Rcpp::CharacterVector owb = NULL;
    Rcpp::CharacterVector highway = NULL;
    if (one_way_index >= 0)
        ow = sf_lines [one_way_index];
    if (one_way_bicycle_index >= 0)
        owb = sf_lines [one_way_bicycle_index];
    if (highway_index >= 0)
        highway = sf_lines [highway_index];
    if (ow.size () > 0)
    {
        if (ow.size () == owb.size ())
        {
            for (unsigned i = 0; i != ow.size (); ++ i)
                if (ow [i] == "NA" && owb [i] != "NA")
                    ow [i] = owb [i];
        } else if (owb.size () > ow.size ())
            ow = owb;
    }

    Rcpp::List geoms = sf_lines [nms.size () - 1];
    std::vector<bool> isOneWay (geoms.length ());
    std::fill (isOneWay.begin (), isOneWay.end (), false);
    // Get dimension of matrix
    size_t nrows = 0;
    int ngeoms = 0;
    for (auto g = geoms.begin (); g != geoms.end (); ++g)
    {
        // Rcpp uses an internal proxy iterator here, NOT a direct copy
        Rcpp::NumericMatrix gi = (*g);
        int rows = gi.nrow () - 1;
        nrows += rows;
        if (ngeoms < ow.size ())
        {
            if (!(ow [ngeoms] == "yes" || ow [ngeoms] == "-1"))
            {
                nrows += rows;
                isOneWay [ngeoms] = true;
            }
        }
        ngeoms ++;
    }

    Rcpp::NumericMatrix nmat = Rcpp::NumericMatrix (Rcpp::Dimension (nrows, 6));
    Rcpp::CharacterMatrix idmat = Rcpp::CharacterMatrix (Rcpp::Dimension (nrows,
                3));

    nrows = 0;
    ngeoms = 0;
    int fake_id = 0;
    for (auto g = geoms.begin (); g != geoms.end (); ++ g)
    {
        Rcpp::NumericMatrix gi = (*g);
        std::string hway = std::string (highway [ngeoms]);
        float hw_factor = profile [hway];
        if (hw_factor == 0.0) hw_factor = 1e-5;
        hw_factor = 1.0 / hw_factor;

        Rcpp::List ginames = gi.attr ("dimnames");
        Rcpp::CharacterVector rnms;
        if (ginames.length () > 0)
            rnms = ginames [0];
        else
        {
            rnms = Rcpp::CharacterVector (gi.nrow ());
            for (int i = 0; i < gi.nrow (); i ++)
                rnms [i] = fake_id ++;
        }
        if (rnms.size () != gi.nrow ())
            throw std::runtime_error ("geom size differs from rownames");

        for (int i = 1; i < gi.nrow (); i ++)
        {
            float d = haversine (gi (i-1, 0), gi (i-1, 1), gi (i, 0),
                    gi (i, 1));
            nmat (nrows, 0) = gi (i-1, 0);
            nmat (nrows, 1) = gi (i-1, 1);
            nmat (nrows, 2) = gi (i, 0);
            nmat (nrows, 3) = gi (i, 1);
            nmat (nrows, 4) = d;
            nmat (nrows, 5) = d * hw_factor;
            idmat (nrows, 0) = rnms (i-1);
            idmat (nrows, 1) = rnms (i);
            idmat (nrows, 2) = hway;
            nrows ++;
            if (isOneWay [ngeoms])
            {
                nmat (nrows, 0) = gi (i, 0);
                nmat (nrows, 1) = gi (i, 1);
                nmat (nrows, 2) = gi (i-1, 0);
                nmat (nrows, 3) = gi (i-1, 1);
                nmat (nrows, 4) = d;
                nmat (nrows, 5) = d * hw_factor;
                idmat (nrows, 0) = rnms (i);
                idmat (nrows, 1) = rnms (i-1);
                idmat (nrows, 2) = hway;
                nrows ++;
            }
        }
        ngeoms ++;
    }

    Rcpp::List res (2);
    res [0] = nmat;
    res [1] = idmat;

    return res;
}