Rcpp::StringMatrix proc_feature( Rcpp::StringVector myBed, // std::vector< int > POS, Rcpp::StringMatrix myData, int fill_missing ){ // myBed is a StringVector: // myBed(0) = chrom // myBed(1) = chromStart // myBed(2) = chromEnd // myBed(3) = name // myData is a StringMatrix // column 0 = CHROM // column 1 = POS // Convert POS to ints std::vector< int > POS = get_pos(myData); // Create an empty matrix to return in exceptions. Rcpp::StringMatrix MT_matrix(0, myData.ncol()); // Rcpp::StringVector myBed includes start and stop integer coordinates. // Convert Rcpp::StringVector elements to int std::string temp = Rcpp::as< std::string >( myBed(1) ); int start = atoi(temp.c_str()); temp = Rcpp::as< std::string >( myBed(2) ); int end = atoi(temp.c_str()); // Manage if feature is on reverse strand if(end < start){ int tmp = start; start = end; end = tmp; } // Increment i so that chromosome in myData // matches the chromosome in the single BED record. int i = 0; // Data row counter while ( myData(i,0) != myBed(0) && i < myData.nrow() ){ i++; } // If we didn't find the chromosome return an empty matrix. if( i == myData.nrow() & fill_missing != 1 ){ return MT_matrix; } // Rcpp::Rcout << " Found CHROM " << myBed(0) << " in myData CHROM:" << myData(i,0) << " POS:" << myData(i,1) << "\n"; // We should now have i at the correct chromosome in myData. // POS is the integer recast of POS in myData. // We can now increment to the correct position in the chromosome // by incrementing to the start of teh annotation. // // Increment to POS. while( myData(i,0) == myBed(0) && POS[i] < start && i < myData.nrow() ){ i++; } // If we didn't find the POS return an empty matrix. if( i == myData.nrow() & fill_missing != 1 ){ return MT_matrix; } // Rcpp::Rcout << " Found CHROM " << myBed(0) << " in myData POS:" << POS[i] << "\n"; // Increment to the end of the feature int j=i; while( myData(j,0) == myBed(0) && POS[j] <= end && j < myData.nrow() ){ j++; } // Rcpp::Rcout << " Found end of feature at: " << POS[j-1] << "\n"; // We now have the information to declare a return matrix // and populate it. if( fill_missing != 1 ){ // Do not fill missubg data. Rcpp::StringMatrix myMatrix( j-i , myData.ncol()); Rcpp::colnames(myMatrix) = Rcpp::colnames(myData); // Populate the return matrix for(int k = 0; k < myMatrix.nrow(); k++){ Rcpp::checkUserInterrupt(); myMatrix(k, Rcpp::_) = myData(k+i, Rcpp::_); } return myMatrix; } else { // Fill missing data. Rcpp::StringMatrix myMatrix( end - start + 1 , myData.ncol()); Rcpp::colnames(myMatrix) = Rcpp::colnames(myData); // Populate the return matrix if( i >= myData.nrow()){ // No data for(int k = 0; k < myMatrix.nrow(); k++){ Rcpp::checkUserInterrupt(); myMatrix(k,0) = myBed(0); // myMatrix(k,1) = std::to_string(start + k); std::ostringstream stm; stm << start + k; myMatrix(k,1) = stm.str(); myMatrix(k,2) = NA_STRING; // for(int m=2; m<myMatrix.ncol(); m++){ // myMatrix(k,m) = NA_STRING; // } } } else { // Data and possibly missing data int l = 0; for(int k = 0; k < myMatrix.nrow(); k++){ Rcpp::checkUserInterrupt(); if( i + l < myData.nrow() ){ // We have not overrun the file yet temp = Rcpp::as< std::string >( myData( i+l , 1 ) ); int myPOS = atoi(temp.c_str()); // int myPOS = stoi(temp); if( myPOS == start + k ){ // myMatrix(k, Rcpp::_) = myData(k+i, Rcpp::_); myMatrix(k, Rcpp::_) = myData( i + l, Rcpp::_); l++; } else { myMatrix(k,0) = myBed(0); // myMatrix(k,1) = std::to_string(myPOS); std::ostringstream stm; stm << myPOS; myMatrix(k,1) = stm.str(); myMatrix(k,2) = NA_STRING; //myMatrix(k,1) = myBed(1) + k; //for(int m=2; m<myMatrix.ncol(); m++){ // myMatrix(k,m) = NA_STRING; //} } } else { // We've overrun the rows in the file. myMatrix(k,0) = myBed(0); // myMatrix(k,1) = std::to_string( start + k ); std::ostringstream stm; stm << start + k; myMatrix(k,1) = stm.str(); myMatrix(k,2) = NA_STRING; //myMatrix(k,1) = myBed(1) + k; //for(int m=2; m<myMatrix.ncol(); m++){ // myMatrix(k,m) = NA_STRING; //} } } } return myMatrix; } Rcpp::Rcerr << "You should never get here, something bad has happened!\n"; }
//' @rdname is_het //' @name is_het //' //' //' //' @export // [[Rcpp::export]] Rcpp::LogicalMatrix is_het(Rcpp::StringMatrix x, Rcpp::LogicalVector na_is_false = true ){ // NA matrix to return in case of unexpected results. // Rcpp::LogicalMatrix nam( 1, 1 ); // nam(0,0) = NA_LOGICAL; // Initialize return data matrix. Rcpp::LogicalMatrix hets( x.nrow(), x.ncol() ); hets.attr("dimnames") = x.attr("dimnames"); int i; int j; int k; for( i=0; i<x.nrow(); i++){ for( j=0; j<x.ncol(); j++){ // Parse genotype string into alleles. std::string my_string; if( x(i,j) == NA_STRING ){ my_string = "."; } else { my_string = x(i,j); } std::vector < std::string > allele_vec; // vcfRCommon::strsplit(my_string, allele_vec, my_split); int unphased_as_na = 0; // 0 == FALSE vcfRCommon::gtsplit( my_string, allele_vec, unphased_as_na ); // Rcpp::Rcout << "gtsplit returned: " << allele_vec[0]; // for( k=1; k<allele_vec.size(); k++){ // Rcpp::Rcout << "," << allele_vec[k]; // } // Rcpp::Rcout << "\n"; // Initialize new vector of alleles with first element of allele_vec. std::vector < std::string > allele_vec2; // Scroll through vector looking for alleles. for(k=0; k<allele_vec.size(); k++){ if( allele_vec[k] == "." ){ // Found missing value. // Delete and bail out. while( allele_vec2.size() > 0 ){ allele_vec2.erase( allele_vec2.begin() ); } k = allele_vec.size(); } else if( allele_vec2.size() == 0 ){ // Initialize. allele_vec2.push_back( allele_vec[k] ); } else if( allele_vec2[0] != allele_vec[k] ){ allele_vec2.push_back( allele_vec[k] ); } } // Rcpp::Rcout << "allele_vec2.size(): " << allele_vec2.size(); // Rcpp::Rcout << "\n"; // Rcpp::Rcout << "\n"; // Score return value. if( allele_vec2.size() == 0){ if( na_is_false[0] == true ){ hets(i,j) = false; } else if( na_is_false[0] == false ){ hets(i,j) = NA_LOGICAL; } } else if( allele_vec2.size() == 1){ hets(i,j) = false; } else if( allele_vec2.size() > 1){ hets(i,j) = true; } } } return( hets ); }