void write_unmapped_read(fastq_read_t *fq_read, bam_file_t *bam_file) {
  static char aux[1024] = "";
  alignment_t *alig;
  size_t header_len;
  char *id;
  bam1_t *bam1;

  // calculating cigar
  //sprintf(aux, "%luX", fq_read->length);	       
  alig = alignment_new();	       
  //header_len = strlen(fq_read->id);
  //id = (char *) malloc(sizeof(char) * (header_len + 1));
  //get_to_first_blank(fq_read->id, header_len, id);
  //free(fq_read->id);
  
  alignment_init_single_end(strdup(fq_read->id), fq_read->sequence, fq_read->quality,
			    0, -1, -1, /*strdup(aux)*/"", 0, 0, 0, 0, 0, NULL, alig);
  
  bam1 = convert_to_bam(alig, 33);
  bam_fwrite(bam1, bam_file);
  bam_destroy1(bam1);
	       
  alig->sequence = NULL;
  alig->quality = NULL;
  alig->cigar = NULL;
  alignment_free(alig);	       

  //printf("\tWRITE : read %i (%d items): unmapped...done !!\n", i, num_items);
  
}
void write_mapped_read(array_list_t *array_list, bam_file_t *bam_file) {
  size_t num_items = array_list_size(array_list);
  alignment_t *alig;
  bam1_t *bam1;
  for (size_t j = 0; j < num_items; j++) {
    alig = (alignment_t *) array_list_get(j, array_list);

    //printf("\t******** %i(%i)\n", j, num_items);
    //printf("is null alig->name %i\n", (alig->query_name == NULL));
    //printf("name = %s\n", alig->query_name);
    //printf("read = %s\n", alig->sequence);
    //printf("\t-----> %s\n", alig->cigar);
    LOG_DEBUG("writting bam..\n");
    //alignment_print(alig);
    //exit(-1);
    if (alig != NULL) {
      bam1 = convert_to_bam(alig, 33);
      bam_fwrite(bam1, bam_file);
      bam_destroy1(bam1);	 
      alignment_free(alig);
    } else {
      LOG_FATAL_F("alig is NULL, num_items = %lu\n", num_items);
    }
    //printf("\t**************** %i(%i)\n", j, num_items);
  }
  if (array_list) { array_list_free(array_list, NULL); }
}
Пример #3
0
int sa_bam_writer(void *data) {
  sa_wf_batch_t *wf_batch = (sa_wf_batch_t *) data;
  
  sa_mapping_batch_t *mapping_batch = (sa_mapping_batch_t *) wf_batch->mapping_batch;
  if (mapping_batch == NULL) {
    printf("bam_writer1: error, NULL mapping batch\n");
    return 0;
  }

  //  for (int i = 0; i < NUM_COUNTERS; i++) {
  //    counters[i] += mapping_batch->counters[i];
  //  }

  #ifdef _TIMING
  for (int i = 0; i < NUM_TIMING; i++) {
    func_times[i] += mapping_batch->func_times[i];
  }
  #endif

  int flag, len;
  char *sequence, *quality;

  fastq_read_t *read;
  array_list_t *read_list = mapping_batch->fq_reads;

  bam1_t *bam1;
  alignment_t *alig;
  array_list_t *mapping_list;
  bam_file_t *out_file = wf_batch->writer_input->bam_file;

  sa_genome3_t *genome = wf_batch->sa_index->genome;

  size_t num_reads, num_mappings, num_mate_mappings;
  num_reads = mapping_batch->num_reads;
  for (size_t i = 0; i < num_reads; i++) {
    read = (fastq_read_t *) array_list_get(i, read_list);
    mapping_list = mapping_batch->mapping_lists[i];
    num_mappings = array_list_size(mapping_list);
    num_total_mappings += num_mappings;

    #ifdef _VERBOSE
    if (num_mappings > 1) {
      num_dup_reads++;
      num_total_dup_reads += num_mappings;
    }
    #endif

    if (num_mappings > 0) {
      num_mapped_reads++;
      if (num_mappings > 1) {
	num_multihit_reads++;
      }
      for (size_t j = 0; j < num_mappings; j++) {
	alig = (alignment_t *) array_list_get(j, mapping_list);

	// update alignment
	if (num_mappings > 1) {
	  alig->map_quality = 0;
	} else {
	  alig->map_quality = alig->mapq;
	}

	bam1 = convert_to_bam(alig, 33);
	bam_fwrite(bam1, out_file);
	bam_destroy1(bam1);
	alignment_free(alig);
      }
    } else {
      num_unmapped_reads++;

      if (read->adapter) {
	// sequences and cigar
	len = read->length + abs(read->adapter_length);
	sequence = (char *) malloc(len + 1);
	quality = (char *) malloc(len + 1);

	if (read->adapter_length < 0) {
	  strcpy(quality, read->adapter_quality);
	  strcat(quality, read->quality);
	} else {
	  strcpy(quality, read->quality);
	  strcat(quality, read->adapter_quality);
	}
	
	if ((read->adapter_strand == 0 && read->adapter_length < 0) || 
	    (read->adapter_strand == 1 && read->adapter_length > 0)) {
	  strcpy(sequence, read->adapter);
	  strcat(sequence, read->sequence);
	} else {
	  strcpy(sequence, read->sequence);
	  strcat(sequence, read->adapter);
	}
	sequence[len] = 0; 
	quality[len] = 0; 
      } else {
	// sequences
	sequence = read->sequence;
	quality = read->quality;
      }
      
      alig = alignment_new();       
      alignment_init_single_end(strdup(read->id), sequence, quality,
				0, -1, -1, /*strdup(aux)*/"", 0, 0, 0, 0, 0, NULL, alig);
      
      bam1 = convert_to_bam(alig, 33);
      bam_fwrite(bam1, out_file);
        
      // free memory
      bam_destroy1(bam1);
      alig->sequence = NULL;
      alig->quality = NULL;
      alig->cigar = NULL;
      alignment_free(alig);
      if (read->adapter) {
	free(sequence);
	free(quality);
      }
    }
    array_list_free(mapping_list, (void *) NULL);
  }

  // free memory
  sa_mapping_batch_free(mapping_batch);

  if (wf_batch) sa_wf_batch_free(wf_batch);

  return 0;
}
Пример #4
0
void batch_writer(batch_writer_input_t* input_p) {

    struct timespec ts;
    ts.tv_sec = 1;
    ts.tv_nsec = 0;

    alignment_t **buffer_p;
    bam1_t* bam1_p;
    bam_header_t* bam_header_p;
    bam_file_t* bam_file_p;

    char* match_filename = input_p->match_filename;
    //char* mismatch_filename = input_p->mismatch_filename;

    char* splice_exact_filename = input_p->splice_exact_filename;
    char* splice_extend_filename = input_p->splice_extend_filename;

    list_t* list_p = input_p->list_p;

    printf("batch_writer (%i): START\n", omp_get_thread_num());

    list_item_t *item_p = NULL;
    write_batch_t* batch_p;

    FILE* fd;
    FILE* splice_exact_fd  = fopen(splice_exact_filename, "w");
    FILE* splice_extend_fd = fopen(splice_extend_filename, "w");

    //printf("HEADER FROM WRITE: %s\n", input_p->header_filename);
    bam_header_p = bam_header_new(HUMAN, NCBI37, input_p->header_filename);
    //bam_file_p = bam_fopen(match_filename);
    bam_file_p = bam_fopen_mode(match_filename, bam_header_p, "w");
    bam_fwrite_header(bam_header_p, bam_file_p);

    // main loop
    while ( (item_p = list_remove_item(list_p)) != NULL ) {

        if (time_on) {
            timing_start(BATCH_WRITER, 0, timing_p);
        }

        batch_p = (write_batch_t*) item_p->data_p;
        //printf("*********************************Extract one item*********************************\n");
        if (batch_p->flag == MATCH_FLAG || batch_p->flag == MISMATCH_FLAG) { //fd = match_fd;
            //printf("start write alignment. Total %d\n", batch_p->size);
            buffer_p = (alignment_t **)batch_p->buffer_p;
            for(int i = 0; i < batch_p->size; i++) {
                //alignment_print(buffer_p[i]);
                bam1_p = convert_to_bam(buffer_p[i], 33);
                bam_fwrite(bam1_p, bam_file_p);
                bam_destroy1(bam1_p);
                alignment_free(buffer_p[i]);
            }
        } else {
            if (batch_p->flag == SPLICE_EXACT_FLAG) {
                fd = splice_exact_fd;
            }
            else if (batch_p->flag == SPLICE_EXTEND_FLAG) {
                fd = splice_extend_fd;
            }
            else {
                fd = NULL;
            }

            if (fd != NULL) {
                //printf("start write batch, %i bytes...\n", batch_p->size);
                fwrite((char *)batch_p->buffer_p, batch_p->size, 1, fd);
                //printf("write done !!\n");
                //if (time_on) { stop_timer(t1_write, t2_write, write_time); }
            }
        }
        //printf("Free batch\n");
        write_batch_free(batch_p);
        list_item_free(item_p);

        if (time_on) {
            timing_stop(BATCH_WRITER, 0, timing_p);
        }
    } // end of batch loop

    //fclose(match_fd);
    //fclose(mismatch_fd);
    fclose(splice_exact_fd);
    fclose(splice_extend_fd);

    bam_fclose(bam_file_p);
    //bam_header_free(bam_header_p);
    printf("batch_writer: END\n");
}
Пример #5
0
void batch_writer2(batch_writer_input_t* input) {

    printf("START: batch_writer (%i): START, for file %s\n",
           omp_get_thread_num(), input->match_filename);

    bam1_t *bam1;
    bam_header_t *bam_header;
    bam_file_t *bam_file;
    alignment_t *alig;

    char* match_filename = input->match_filename;
    //  char* splice_filename = input->splice_filename;

    list_t *write_list = input->list_p;
    array_list_t *array_list;

    list_item_t *item = NULL;
    aligner_batch_t *batch = NULL;
    fastq_batch_t *fq_batch = NULL;

    FILE* fd;

    static char aux[10];

    size_t read_len;

    bam_header = bam_header_new(HUMAN, NCBI37, input->header_filename);
    bam_file = bam_fopen_mode(match_filename, bam_header, "w");

    bam_fwrite_header(bam_header, bam_file);

    size_t num_reads = 0, num_items = 0, total_mappings = 0;

    // main loop
    while ( (item = list_remove_item(write_list)) != NULL ) {

        //    if (array_list == NULL) printf("batch_writer.c...\n");

        batch = (aligner_batch_t *) item->data_p;
        fq_batch = batch->fq_batch;
        num_reads = batch->num_mapping_lists;

        for (size_t i = 0; i < num_reads; i++) {

            array_list = batch->mapping_lists[i];
            //      if (array_list == NULL) printf("READ %d, writer, list is NULL\n", i);

            //      printf("----> list == NULL ? %d\n", (array_list == NULL));
            num_items = (array_list == NULL ? 0 : array_list_size(array_list));
            //      printf("----> number of items = %d, num_items <= 0 ? %d\n", num_items, num_items <= 0);

            read_len = fq_batch->data_indices[i + 1] - fq_batch->data_indices[i] - 1;

            // mapped or not mapped ?
            if (num_items == 0) {

                //printf("\tWRITE : read %i (%d items): unmapped...\n", i, num_items);

                // calculating cigar
                sprintf(aux, "%luX", read_len);

                alig = alignment_new();
                alignment_init_single_end(&(fq_batch->header[fq_batch->header_indices[i]])+1,
                                          &(fq_batch->seq[fq_batch->data_indices[i]]),
                                          &(fq_batch->quality[fq_batch->data_indices[i]]),
                                          0,
                                          0,
                                          0,
                                          aux, 1, 255, 0, 0, alig);

                bam1 = convert_to_bam(alig, 33);
                bam_fwrite(bam1, bam_file);
                bam_destroy1(bam1);

                // some cosmetic stuff before freeing the alignment,
                // (in order to not free twice some fields)
                alig->query_name = NULL;
                alig->sequence = NULL;
                alig->quality = NULL;
                alig->cigar = NULL;
                alignment_free(alig);

                //	printf("\tWRITE : read %i (%d items): unmapped...done !!\n", i, num_items);

            } else {
                //	printf("\tWRITE : read %d (%d items): mapped...\n", i, num_items);
                for (size_t j = 0; j < num_items; j++) {
                    alig = (alignment_t *) array_list_get(j, array_list);
                    if (alig != NULL) {

                        bam1 = convert_to_bam(alig, 33);
                        bam_fwrite(bam1, bam_file);
                        bam_destroy1(bam1);

                        alignment_free(alig);
                    }
                }
                //	printf("\tWRITE : read %d (%d items): mapped...done !!\n", i, num_items);
            }
            if (array_list != NULL) array_list_free(array_list, NULL);
        }

        if (batch != NULL) aligner_batch_free(batch);
        if (item != NULL) list_item_free(item);

        if (time_on) {
            timing_stop(BATCH_WRITER, 0, timing_p);
        }
    } // end of batch loop
    bam_fclose(bam_file);
    printf("END: batch_writer (total mappings %lu)\n", total_mappings);
}