示例#1
0
// Splits a sam file into individual files, one per chromosome. The files are created in the specified directory.
// Each splitted file contains the same header as the original samFile. The generated files may only contain
// a header if no alignments to that chromosome exist. The names of the split files come from the sam header
// with an additional .sam extension. The unmapped alignments are collected in the file splitChrSam_unaligned.sam
// Returns the chromosome names in the order in which they occur in the sam file header
SEXP split_sam_chr(SEXP samFile, SEXP outDir)
{
  if (!Rf_isString(samFile) || 1 != Rf_length(samFile)){
    Rf_error("'samFile' must be character(1)");
  }

  if (!Rf_isString(outDir) || 1 != Rf_length(outDir)){
    Rf_error("'outDir' must be character(1)");
  }

  const char * sam_file =  Rf_translateChar(STRING_ELT(samFile, 0));
  const char * out_dir =  Rf_translateChar(STRING_ELT(outDir, 0));

  // open the input sam file
  samfile_t *fin = _bam_tryopen(sam_file, "r", NULL);
  if (fin->header == 0) {
    samclose(fin);
    Rf_error("invalid header");
  }

  // remove \r from header if exists (for windows)
  int j, k = 0;
  for(j = 0; j<fin->header->l_text; j++){
    if(fin->header->text[j] != '\r'){
      fin->header->text[k++] = fin->header->text[j];
    }
  }
  if(j != k){
    fin->header->text[k] = '\0';
    fin->header->l_text = (uint32_t)strlen(fin->header->text);
  }

  // allocate memory for a list of filehandles (n+1 because of the unaligned reads)
  samfile_t **foutList = (samfile_t**)calloc((size_t)(fin->header->n_targets+1), sizeof(samfile_t*));

  // open the output file handles (n+1 due to the unaligned reads)
  int i;
  SEXP chrNames;
  PROTECT(chrNames = allocVector(STRSXP, (fin->header->n_targets+1))); // protect from garbage collector

  for (i = 0; i < (fin->header->n_targets); i++) {
    foutList[i] = _bam_tryopen(_assemble_file_name(out_dir,fin->header->target_name[i]), "wh", fin->header);
    SET_STRING_ELT(chrNames, i, mkChar(fin->header->target_name[i]));
  }
  foutList[fin->header->n_targets] = _bam_tryopen(_assemble_file_name(out_dir,"splitChrSam_unaligned"), "wh", fin->header);
  SET_STRING_ELT(chrNames, fin->header->n_targets, mkChar("splitChrSam_unaligned"));

  // split the sam file based on chromosome
  _walk_through_sam_and_split(fin,foutList);

  // close all the file handles
  for (i = 0; i < (fin->header->n_targets+1); i++){samclose(foutList[i]);}
  samclose(fin);

  UNPROTECT(1); // release
  return chrNames;
}
示例#2
0
static BAM_FILE _bamfile_open_r(SEXP filename, SEXP indexname, SEXP filemode)
{
    BAM_FILE bfile = (BAM_FILE) Calloc(1, _BAM_FILE);

    bfile->file = NULL;
    if (0 != Rf_length(filename)) {
        const char *cfile = translateChar(STRING_ELT(filename, 0));
        bfile->file = _bam_tryopen(cfile, CHAR(STRING_ELT(filemode, 0)), 0);
        if ((bfile->file->type & TYPE_BAM) != 1) {
            samclose(bfile->file);
            Free(bfile);
            Rf_error("'filename' is not a BAM file\n  file: %s", cfile);
        }
        bfile->pos0 = bam_tell(bfile->file->x.bam);
        bfile->irange0 = 0;
    }

    bfile->index = NULL;
    if (0 != Rf_length(indexname)) {
        const char *cindex = translateChar(STRING_ELT(indexname, 0));
        bfile->index = _bam_tryindexload(cindex);
        if (NULL == bfile->index) {
            samclose(bfile->file);
            Free(bfile);
            Rf_error("failed to open BAM index\n  index: %s\n", cindex);
        }
    }

    bfile->iter = NULL;
    bfile->pbuffer = NULL;
    return bfile;
}
示例#3
0
static BAM_FILE _bamfile_open_w(SEXP file0, SEXP file1)
{
    samfile_t *infile, *outfile;
    BAM_FILE bfile;

    if (0 == Rf_length(file1))
        Rf_error("'file1' must be a character(1) path to a valid bam file");
    infile = _bam_tryopen(translateChar(STRING_ELT(file1, 0)), "rb", 0);
    outfile = _bam_tryopen(translateChar(STRING_ELT(file0, 0)), "wb",
                           infile->header);
    samclose(infile);

    bfile = (BAM_FILE) Calloc(1, _BAM_FILE);
    bfile->file = outfile;
    bfile->pos0 = bam_tell(bfile->file->x.bam);
    bfile->irange0 = 0;

    return bfile;
}