예제 #1
0
void convert_bam_to_sam(char* bam_input, char* sam_input) {
    int read_bytes;
    bam1_t* bam_p = bam_init1();
    char* bam_string;

    LOG_DEBUG("CONVERT-START: bam to sam\n");

    //open BAM file for read
    if (time_flag) {
        start_timer(t1_convert);
    }
    bam_file_t* bam_file_p =  bam_fopen_mode(bam_input, NULL, "r");

    //open SAM file for write, SAM file is a text file!!!
    FILE* sam_fd = fopen(sam_input, "w");

    if (sam_fd == NULL) {
        char log_message[200];
        sprintf(log_message, "Error opening file '%.150s' in mode 'r' !!!!!\n", sam_input);
        LOG_FATAL(log_message);
    }

    //header for BAM file has been done in the opening
    bam_header_t* bam_header_p = bam_file_p->bam_header_p;

    //write header text to SAM file
    fprintf(sam_fd, "%s", bam_header_p->text);

    //write string alignments to SAM file
    while ((read_bytes = bam_read1(bam_file_p->bam_fd, bam_p)) > 0) {
        bam_string = bam_format1(bam_header_p, bam_p);
        fprintf(sam_fd, "%s\n", bam_string);
        free(bam_string); // it was allocated by the sam-tools, we must free it !!
        num_alignments++;
    }

    //close BAM and SAM files, free bam alignment and bam file object
    bam_fclose(bam_file_p);
    fclose(sam_fd);
    bam_destroy1(bam_p);
    if (time_flag) {
        stop_timer(t1_convert, t2_convert, convert_time);
    }

    //number_of_batchs = 1, convention value for statistics (not real batch)
    number_of_batchs = 1;

    LOG_DEBUG("CONVERT-START: bam to sam\n");
}
예제 #2
0
void convert_sam_to_bam(char* sam_input, char* bam_input) {
    bam1_t* bam_p = bam_init1();

    LOG_DEBUG("CONVERT-START: sam to bam\n");

    //open SAM file for read
    if (time_flag) {
        start_timer(t1_convert);
    }
    tamFile sam_fd = sam_open(sam_input);

    //open BAM file for write
    bam_file_t* bam_file_p =  bam_fopen_mode(bam_input, NULL, "w");

    //read header from SAM file
    bam_header_t* bam_header_p = sam_header_read(sam_fd);

    //write header to BAM file
    bam_header_write(bam_file_p->bam_fd, bam_header_p);

    //write alignments to BAM file
    while (sam_read1(sam_fd, bam_header_p, bam_p) > 0) {
        bam_write1(bam_file_p->bam_fd, bam_p);
        num_alignments++;
    }

    //close BAM and SAM files, free bam alignment and bam file object
    bam_fclose(bam_file_p);
    sam_close(sam_fd);
    bam_header_destroy(bam_header_p);
    bam_destroy1(bam_p);
    if (time_flag) {
        stop_timer(t1_convert, t2_convert, convert_time);
    }

    //number_of_batchs = 1, convention value for statistics (not real batch)
    number_of_batchs = 1;
}
예제 #3
0
void batch_writer(batch_writer_input_t* input_p) {

    struct timespec ts;
    ts.tv_sec = 1;
    ts.tv_nsec = 0;

    alignment_t **buffer_p;
    bam1_t* bam1_p;
    bam_header_t* bam_header_p;
    bam_file_t* bam_file_p;

    char* match_filename = input_p->match_filename;
    //char* mismatch_filename = input_p->mismatch_filename;

    char* splice_exact_filename = input_p->splice_exact_filename;
    char* splice_extend_filename = input_p->splice_extend_filename;

    list_t* list_p = input_p->list_p;

    printf("batch_writer (%i): START\n", omp_get_thread_num());

    list_item_t *item_p = NULL;
    write_batch_t* batch_p;

    FILE* fd;
    FILE* splice_exact_fd  = fopen(splice_exact_filename, "w");
    FILE* splice_extend_fd = fopen(splice_extend_filename, "w");

    //printf("HEADER FROM WRITE: %s\n", input_p->header_filename);
    bam_header_p = bam_header_new(HUMAN, NCBI37, input_p->header_filename);
    //bam_file_p = bam_fopen(match_filename);
    bam_file_p = bam_fopen_mode(match_filename, bam_header_p, "w");
    bam_fwrite_header(bam_header_p, bam_file_p);

    // main loop
    while ( (item_p = list_remove_item(list_p)) != NULL ) {

        if (time_on) {
            timing_start(BATCH_WRITER, 0, timing_p);
        }

        batch_p = (write_batch_t*) item_p->data_p;
        //printf("*********************************Extract one item*********************************\n");
        if (batch_p->flag == MATCH_FLAG || batch_p->flag == MISMATCH_FLAG) { //fd = match_fd;
            //printf("start write alignment. Total %d\n", batch_p->size);
            buffer_p = (alignment_t **)batch_p->buffer_p;
            for(int i = 0; i < batch_p->size; i++) {
                //alignment_print(buffer_p[i]);
                bam1_p = convert_to_bam(buffer_p[i], 33);
                bam_fwrite(bam1_p, bam_file_p);
                bam_destroy1(bam1_p);
                alignment_free(buffer_p[i]);
            }
        } else {
            if (batch_p->flag == SPLICE_EXACT_FLAG) {
                fd = splice_exact_fd;
            }
            else if (batch_p->flag == SPLICE_EXTEND_FLAG) {
                fd = splice_extend_fd;
            }
            else {
                fd = NULL;
            }

            if (fd != NULL) {
                //printf("start write batch, %i bytes...\n", batch_p->size);
                fwrite((char *)batch_p->buffer_p, batch_p->size, 1, fd);
                //printf("write done !!\n");
                //if (time_on) { stop_timer(t1_write, t2_write, write_time); }
            }
        }
        //printf("Free batch\n");
        write_batch_free(batch_p);
        list_item_free(item_p);

        if (time_on) {
            timing_stop(BATCH_WRITER, 0, timing_p);
        }
    } // end of batch loop

    //fclose(match_fd);
    //fclose(mismatch_fd);
    fclose(splice_exact_fd);
    fclose(splice_extend_fd);

    bam_fclose(bam_file_p);
    //bam_header_free(bam_header_p);
    printf("batch_writer: END\n");
}
예제 #4
0
파일: bfwork.c 프로젝트: opencb/hpg-aligner
/**
 * Run framework contexts.
 */
int
bfwork_run(bam_fwork_t *fwork)
{
	int err = 0, c;
	double times;


	assert(fwork);
	assert(fwork->input_file_str);
	assert(fwork->regions_list);

	printf("============== BEGIN RUN  ==============\n");

	//Check if contexts present
	if(fwork->v_context_l == 0)
	{
		LOG_WARN("No contexts have been specified to run!\n");
		printf("============== END RUN  ==============\n\n");

		return NO_ERROR;
	}

	//Open reference
	if(fwork->reference_str)
	{
		//Obtain reference filename and dirpath from full path
	  char *ref_path = NULL, *ref_name = NULL, *aux;

		ref_path = strdup(fwork->reference_str);
		aux = strrchr(ref_path, '/');
		if (aux) {
		  ref_name = strdup(aux);
		  *aux = '\0';
		}
		printf("Reference path: %s\n", ref_path);
		printf("Reference name: %s\n", ref_name);
		printf("Opening reference genome from \"%s%s\" ...\n", ref_path, ref_name);
		fwork->reference = genome_new(ref_name, ref_path, BWT_MODE);
		assert(fwork->reference);
		printf("Reference opened!...\n");

		if (ref_path) free(ref_path);
		if (ref_name) free(ref_name);
	}

	printf("--------------------------------------\n");

	for(c = 0; c < fwork->v_context_l; c++)
	{
		//Select next context
		fwork->context = fwork->v_context[c];
		assert(fwork->context);

#ifdef D_TIME_DEBUG
		times = omp_get_wtime();
#endif

		//Open input bam
		{
			//If last context had no output
			if(!fwork->last_temp_file_str)
			{
				//Open initial input file
				printf("Opening BAM from \"%s\" ...\n", fwork->input_file_str);
				fwork->input_file = bam_fopen(fwork->input_file_str);
				assert(fwork->input_file);
				printf("BAM opened!...\n");
			}
			else
			{
				//Open last context output
				printf("Opening intermediate BAM from \"%s\" ...\n", fwork->last_temp_file_str);
				fwork->input_file = bam_fopen(fwork->last_temp_file_str);
				assert(fwork->input_file);
				printf("Intermediate BAM opened!...\n");
			}
		}

		//Create new output bam if last context
		fwork->erase_tmp = 0;
		if(c == fwork->v_context_l - 1)
		{
			if(fwork->output_file_str)
			{
				if(fwork->context->output_file_str == NULL)
				{
					//Allocate
					fwork->context->output_file_str = (char *)malloc(256 * sizeof(char));
				}

				//Set final output
				strncpy(fwork->context->output_file_str, fwork->output_file_str, 256);
			}
		}
		else
		{
			//Temporary file?
			if(fwork->context->output_temp)
			{
				//Allocate
				fwork->context->output_file_str = malloc(256 * sizeof(char));

				//Set output temp path
				sprintf(fwork->context->output_file_str, "/tmp/bfwork_tmp_%d.tmp", c);
				fwork->erase_tmp = 1;
			}
		}

		//Create new temporary bam if context have output
		if(fwork->context->output_file_str != NULL)
		{
			printf("Creating new intermediate bam file in \"%s\"...\n", fwork->context->output_file_str);
			fwork->output_file = bam_fopen_mode(fwork->context->output_file_str, fwork->input_file->bam_header_p, "w");
			assert(fwork->output_file);
			bam_fwrite_header(fwork->output_file->bam_header_p, fwork->output_file);
			fwork->output_file->bam_header_p = NULL;
			printf("New intermediate BAM initialized!...\n");
		}

#ifdef D_TIME_DEBUG
		times = omp_get_wtime() - times;
		if(fwork->context->time_stats)
			time_add_time_slot(D_FWORK_INIT, fwork->context->time_stats, times);
#endif

		//Logging
		if(fwork->context->tag != NULL)
		{
			printf("Context %s is now running\n", fwork->context->tag);
		}
		else
		{
			printf("Context %d is now running\n", c);
		}

#ifdef D_TIME_DEBUG
		times = omp_get_wtime();
#endif

		//Run this context
		if(omp_get_max_threads() > 1)
		{
			//Run in multithreaded mode
			err = bfwork_run_threaded(fwork);
		}
		else
		{
			//Run in sequential mode
			err = bfwork_run_sequential(fwork);
		}

		//Reduce needed?
		if(fwork->context->reduce != NULL && fwork->context->reduce_dest != NULL)
		{
			//Reduce into context reduce data
			bfwork_context_local_user_data_reduce(fwork->context, fwork->context->reduce_dest, fwork->context->reduce);
		}

#ifdef D_TIME_DEBUG
		times = omp_get_wtime() - times;
		if(fwork->context->time_stats)
			time_add_time_slot(D_FWORK_TOTAL, fwork->context->time_stats, times);
#endif

		//Close input BAM
		printf("\nClosing BAM file...\n");
		bam_fclose(fwork->input_file);
		fwork->input_file = NULL;
		printf("BAM closed.\n");

		//Close output file
		if(fwork->output_file != NULL)
		{
			printf("Closing \"%s\" BAM file...\n", fwork->output_file->filename);
			bam_fclose(fwork->output_file);
			fwork->output_file = NULL;
			printf("BAM closed.\n");
		}

		//Remove last temporary file
		if(fwork->last_temp_file_str != NULL && fwork->erase_tmp)
		{
			//Delete file
			printf("Deleting %s...\n", fwork->last_temp_file_str);
			remove(fwork->last_temp_file_str);
			fwork->last_temp_file_str = NULL;
			fwork->erase_tmp = 0;
		}

		//Set last file
		if(fwork->context->output_file_str)
		{
			//Set last temporary file if not the last context
			if(c < fwork->v_context_l - 1)
				fwork->last_temp_file_str = fwork->context->output_file_str;
		}

		//Logging
		printf("--------------------------------------\n");
		LOG_INFO("Context SUCCESS!\n");
	}

	//Remove last temporary file
	if(fwork->last_temp_file_str != NULL && fwork->erase_tmp)
	{
		//Delete file
		printf("Deleting %s...\n", fwork->last_temp_file_str);
		remove(fwork->last_temp_file_str);
	}

	//Close reference
	if(fwork->reference != NULL)
	{
		printf("\nClosing reference file...\n");
		genome_free(fwork->reference);
		printf("Reference closed.\n");
	}

	//Logging
	LOG_INFO("Framework SUCCESS!\n");

	printf("============== END RUN  ==============\n\n");

	return err;
}
예제 #5
0
void batch_writer2(batch_writer_input_t* input) {

    printf("START: batch_writer (%i): START, for file %s\n",
           omp_get_thread_num(), input->match_filename);

    bam1_t *bam1;
    bam_header_t *bam_header;
    bam_file_t *bam_file;
    alignment_t *alig;

    char* match_filename = input->match_filename;
    //  char* splice_filename = input->splice_filename;

    list_t *write_list = input->list_p;
    array_list_t *array_list;

    list_item_t *item = NULL;
    aligner_batch_t *batch = NULL;
    fastq_batch_t *fq_batch = NULL;

    FILE* fd;

    static char aux[10];

    size_t read_len;

    bam_header = bam_header_new(HUMAN, NCBI37, input->header_filename);
    bam_file = bam_fopen_mode(match_filename, bam_header, "w");

    bam_fwrite_header(bam_header, bam_file);

    size_t num_reads = 0, num_items = 0, total_mappings = 0;

    // main loop
    while ( (item = list_remove_item(write_list)) != NULL ) {

        //    if (array_list == NULL) printf("batch_writer.c...\n");

        batch = (aligner_batch_t *) item->data_p;
        fq_batch = batch->fq_batch;
        num_reads = batch->num_mapping_lists;

        for (size_t i = 0; i < num_reads; i++) {

            array_list = batch->mapping_lists[i];
            //      if (array_list == NULL) printf("READ %d, writer, list is NULL\n", i);

            //      printf("----> list == NULL ? %d\n", (array_list == NULL));
            num_items = (array_list == NULL ? 0 : array_list_size(array_list));
            //      printf("----> number of items = %d, num_items <= 0 ? %d\n", num_items, num_items <= 0);

            read_len = fq_batch->data_indices[i + 1] - fq_batch->data_indices[i] - 1;

            // mapped or not mapped ?
            if (num_items == 0) {

                //printf("\tWRITE : read %i (%d items): unmapped...\n", i, num_items);

                // calculating cigar
                sprintf(aux, "%luX", read_len);

                alig = alignment_new();
                alignment_init_single_end(&(fq_batch->header[fq_batch->header_indices[i]])+1,
                                          &(fq_batch->seq[fq_batch->data_indices[i]]),
                                          &(fq_batch->quality[fq_batch->data_indices[i]]),
                                          0,
                                          0,
                                          0,
                                          aux, 1, 255, 0, 0, alig);

                bam1 = convert_to_bam(alig, 33);
                bam_fwrite(bam1, bam_file);
                bam_destroy1(bam1);

                // some cosmetic stuff before freeing the alignment,
                // (in order to not free twice some fields)
                alig->query_name = NULL;
                alig->sequence = NULL;
                alig->quality = NULL;
                alig->cigar = NULL;
                alignment_free(alig);

                //	printf("\tWRITE : read %i (%d items): unmapped...done !!\n", i, num_items);

            } else {
                //	printf("\tWRITE : read %d (%d items): mapped...\n", i, num_items);
                for (size_t j = 0; j < num_items; j++) {
                    alig = (alignment_t *) array_list_get(j, array_list);
                    if (alig != NULL) {

                        bam1 = convert_to_bam(alig, 33);
                        bam_fwrite(bam1, bam_file);
                        bam_destroy1(bam1);

                        alignment_free(alig);
                    }
                }
                //	printf("\tWRITE : read %d (%d items): mapped...done !!\n", i, num_items);
            }
            if (array_list != NULL) array_list_free(array_list, NULL);
        }

        if (batch != NULL) aligner_batch_free(batch);
        if (item != NULL) list_item_free(item);

        if (time_on) {
            timing_stop(BATCH_WRITER, 0, timing_p);
        }
    } // end of batch loop
    bam_fclose(bam_file);
    printf("END: batch_writer (total mappings %lu)\n", total_mappings);
}