Beispiel #1
0
Rcpp::StringMatrix proc_feature( Rcpp::StringVector myBed,
//                                 std::vector< int > POS,
                                 Rcpp::StringMatrix myData,
                                 int fill_missing
                                 ){
  
  // myBed is a StringVector:
  //  myBed(0) = chrom
  //  myBed(1) = chromStart
  //  myBed(2) = chromEnd
  //  myBed(3) = name
  
  // myData is a StringMatrix
  // column 0 = CHROM
  // column 1 = POS
  
  // Convert POS to ints
  std::vector< int > POS = get_pos(myData);
  
  // Create an empty matrix to return in exceptions.
  Rcpp::StringMatrix MT_matrix(0, myData.ncol());

  // Rcpp::StringVector myBed includes start and stop integer coordinates.
  // Convert Rcpp::StringVector elements to int
  std::string temp = Rcpp::as< std::string >( myBed(1) );
  int start = atoi(temp.c_str());
  temp = Rcpp::as< std::string >( myBed(2) );
  int end = atoi(temp.c_str());

  // Manage if feature is on reverse strand
  if(end < start){
    int tmp = start;
    start = end;
    end = tmp;
  }

  // Increment i so that chromosome in myData
  // matches the chromosome in the single BED record.
  int i = 0; // Data row counter
  while ( myData(i,0) != myBed(0) && i < myData.nrow() ){
    i++;
  }
  
  // If we didn't find the chromosome return an empty matrix.
  if( i == myData.nrow() & fill_missing != 1 ){
    return MT_matrix;
  }
//  Rcpp::Rcout << "  Found CHROM " << myBed(0) << " in myData CHROM:" << myData(i,0) << " POS:" << myData(i,1) << "\n";

  // We should now have i at the correct chromosome in myData.
  // POS is the integer recast of POS in myData.
  // We can now increment to the correct position in the chromosome
  // by incrementing to the start of teh annotation.
  //
  // Increment to POS.
  while( myData(i,0) == myBed(0) && POS[i] < start && i < myData.nrow() ){
    i++;
  }
  // If we didn't find the POS return an empty matrix.
  if( i == myData.nrow() & fill_missing != 1 ){
    return MT_matrix;
  }
//  Rcpp::Rcout << "  Found CHROM " << myBed(0) << " in myData POS:" << POS[i] << "\n";
  
  
  // Increment to the end of the feature
  int j=i;
  while( myData(j,0) == myBed(0) && POS[j] <= end && j < myData.nrow() ){
    j++;
  }
//  Rcpp::Rcout << "  Found end of feature at: " << POS[j-1] << "\n";


  // We now have the information to declare a return matrix
  // and populate it.
  if( fill_missing != 1 ){
    // Do not fill missubg data.
    Rcpp::StringMatrix myMatrix( j-i , myData.ncol());
    Rcpp::colnames(myMatrix) = Rcpp::colnames(myData);
    // Populate the return matrix
    for(int k = 0; k < myMatrix.nrow(); k++){
      Rcpp::checkUserInterrupt();
      myMatrix(k, Rcpp::_) = myData(k+i, Rcpp::_);
    }
    return myMatrix;
  } else {
    // Fill missing data.
    Rcpp::StringMatrix myMatrix( end - start + 1 , myData.ncol());
    Rcpp::colnames(myMatrix) = Rcpp::colnames(myData);

    // Populate the return matrix
    if( i >= myData.nrow()){
      // No data
      for(int k = 0; k < myMatrix.nrow(); k++){
        Rcpp::checkUserInterrupt();
        myMatrix(k,0) = myBed(0);
//        myMatrix(k,1) = std::to_string(start + k);
        std::ostringstream stm;
        stm << start + k;
        myMatrix(k,1) = stm.str();
        
        myMatrix(k,2) = NA_STRING;
//        for(int m=2; m<myMatrix.ncol(); m++){
//          myMatrix(k,m) = NA_STRING;
//        }
      }
    } else {
      // Data and possibly missing data    
      int l = 0;
      for(int k = 0; k < myMatrix.nrow(); k++){
        Rcpp::checkUserInterrupt();
        
        if( i + l < myData.nrow() ){
          // We have not overrun the file yet
          temp = Rcpp::as< std::string >( myData( i+l , 1 ) );
          int myPOS = atoi(temp.c_str());
//          int myPOS = stoi(temp);

          if( myPOS == start + k ){
//            myMatrix(k, Rcpp::_) = myData(k+i, Rcpp::_);
            myMatrix(k, Rcpp::_) = myData( i + l, Rcpp::_);

            l++;
          } else {
            myMatrix(k,0) = myBed(0);
//            myMatrix(k,1) = std::to_string(myPOS);
            std::ostringstream stm;
            stm << myPOS;
            myMatrix(k,1) = stm.str();
            
            myMatrix(k,2) = NA_STRING;
            //myMatrix(k,1) = myBed(1) + k;
            //for(int m=2; m<myMatrix.ncol(); m++){
            //  myMatrix(k,m) = NA_STRING;
            //}
          }
        } else {
          // We've overrun the rows in the file.
          myMatrix(k,0) = myBed(0);
//          myMatrix(k,1) = std::to_string( start + k );
          std::ostringstream stm;
          stm << start + k;
          myMatrix(k,1) = stm.str();

          myMatrix(k,2) = NA_STRING;
          //myMatrix(k,1) = myBed(1) + k;
          //for(int m=2; m<myMatrix.ncol(); m++){
          //  myMatrix(k,m) = NA_STRING;
          //}
        }
      }
    }
    return myMatrix;
  }

  Rcpp::Rcerr << "You should never get here, something bad has happened!\n";
}
Beispiel #2
0
//' @rdname is_het
//' @name is_het
//' 
//' 
//' 
//' @export
// [[Rcpp::export]]
Rcpp::LogicalMatrix is_het(Rcpp::StringMatrix x,
                           Rcpp::LogicalVector na_is_false = true
){

  // NA matrix to return in case of unexpected results.
//  Rcpp::LogicalMatrix nam( 1, 1 );
//  nam(0,0) = NA_LOGICAL;
  
  // Initialize return data matrix.
  Rcpp::LogicalMatrix hets( x.nrow(), x.ncol() );
  hets.attr("dimnames") = x.attr("dimnames");
  
  int i;
  int j;
  int k;  
  for( i=0; i<x.nrow(); i++){
    for( j=0; j<x.ncol(); j++){

      // Parse genotype string into alleles.
      std::string my_string;
      if( x(i,j) == NA_STRING ){
        my_string = ".";
      } else {
        my_string = x(i,j);
      }


      std::vector < std::string > allele_vec;
//      vcfRCommon::strsplit(my_string, allele_vec, my_split);
      int unphased_as_na = 0; // 0 == FALSE
      vcfRCommon::gtsplit( my_string, allele_vec, unphased_as_na );

//      Rcpp::Rcout << "gtsplit returned: " << allele_vec[0];
//      for( k=1; k<allele_vec.size(); k++){
//        Rcpp::Rcout << "," << allele_vec[k];
//      }
//      Rcpp::Rcout << "\n";


      // Initialize new vector of alleles with first element of allele_vec.
      std::vector < std::string > allele_vec2;

      // Scroll through vector looking for alleles.
      for(k=0; k<allele_vec.size(); k++){
        if( allele_vec[k] == "." ){
          // Found missing value.
          // Delete and bail out.
          while( allele_vec2.size() > 0 ){
            allele_vec2.erase( allele_vec2.begin() );
          }
          k = allele_vec.size();
        } else if( allele_vec2.size() == 0 ){
          // Initialize.
          allele_vec2.push_back( allele_vec[k] );
        } else if( allele_vec2[0] != allele_vec[k] ){
          allele_vec2.push_back( allele_vec[k] );
        }
      }
      
//      Rcpp::Rcout << "allele_vec2.size(): " << allele_vec2.size();
//      Rcpp::Rcout << "\n";
//      Rcpp::Rcout << "\n";
      
      // Score return value.
      if( allele_vec2.size() == 0){
        if( na_is_false[0] == true ){
          hets(i,j) = false;
        } else if( na_is_false[0] == false ){
          hets(i,j) = NA_LOGICAL;
        }
      } else if( allele_vec2.size() == 1){
        hets(i,j) = false;
      } else if( allele_vec2.size() > 1){
        hets(i,j) = true;
      }

    }
  }
  
  return( hets );
}