// Splits a sam file into individual files, one per chromosome. The files are created in the specified directory. // Each splitted file contains the same header as the original samFile. The generated files may only contain // a header if no alignments to that chromosome exist. The names of the split files come from the sam header // with an additional .sam extension. The unmapped alignments are collected in the file splitChrSam_unaligned.sam // Returns the chromosome names in the order in which they occur in the sam file header SEXP split_sam_chr(SEXP samFile, SEXP outDir) { if (!Rf_isString(samFile) || 1 != Rf_length(samFile)){ Rf_error("'samFile' must be character(1)"); } if (!Rf_isString(outDir) || 1 != Rf_length(outDir)){ Rf_error("'outDir' must be character(1)"); } const char * sam_file = Rf_translateChar(STRING_ELT(samFile, 0)); const char * out_dir = Rf_translateChar(STRING_ELT(outDir, 0)); // open the input sam file samfile_t *fin = _bam_tryopen(sam_file, "r", NULL); if (fin->header == 0) { samclose(fin); Rf_error("invalid header"); } // remove \r from header if exists (for windows) int j, k = 0; for(j = 0; j<fin->header->l_text; j++){ if(fin->header->text[j] != '\r'){ fin->header->text[k++] = fin->header->text[j]; } } if(j != k){ fin->header->text[k] = '\0'; fin->header->l_text = (uint32_t)strlen(fin->header->text); } // allocate memory for a list of filehandles (n+1 because of the unaligned reads) samfile_t **foutList = (samfile_t**)calloc((size_t)(fin->header->n_targets+1), sizeof(samfile_t*)); // open the output file handles (n+1 due to the unaligned reads) int i; SEXP chrNames; PROTECT(chrNames = allocVector(STRSXP, (fin->header->n_targets+1))); // protect from garbage collector for (i = 0; i < (fin->header->n_targets); i++) { foutList[i] = _bam_tryopen(_assemble_file_name(out_dir,fin->header->target_name[i]), "wh", fin->header); SET_STRING_ELT(chrNames, i, mkChar(fin->header->target_name[i])); } foutList[fin->header->n_targets] = _bam_tryopen(_assemble_file_name(out_dir,"splitChrSam_unaligned"), "wh", fin->header); SET_STRING_ELT(chrNames, fin->header->n_targets, mkChar("splitChrSam_unaligned")); // split the sam file based on chromosome _walk_through_sam_and_split(fin,foutList); // close all the file handles for (i = 0; i < (fin->header->n_targets+1); i++){samclose(foutList[i]);} samclose(fin); UNPROTECT(1); // release return chrNames; }
bool cols_wrap<std::string>::get(size_t i, VARIANT &v) const { if (i >= len) { v.vt = VT_NULL; return true; } SEXP s = STRING_ELT(vect, (R_len_t)i); if (s == NA_STRING) v.vt = VT_NULL; else { v.vt = VT_BSTR; const char* ptr = Rf_translateChar(s); _bstr_t str(ptr); v.bstrVal = str.Detach(); } return true; }