GT_INLINE void gt_sam_attributes_add_fvalue(gt_sam_attributes* const sam_attributes,char* const tag,char type_id,const float value){ GT_SAM_ATTRIBUTES_CHECK(sam_attributes); GT_NULL_CHECK(tag); gt_sam_attribute* const sam_attribute = gt_alloc(gt_sam_attribute); gt_sam_attribute_set_fvalue(sam_attribute,tag,type_id,value); gt_sam_attributes_add_attribute(sam_attributes,sam_attribute); }
GT_INLINE gt_stats_vector* gt_stats_vector_step_range_new( const uint64_t min_value,const uint64_t max_value,const uint64_t step, const uint64_t out_of_range_bucket_size) { GT_ZERO_CHECK(step); gt_cond_fatal_error(min_value>max_value,VSTATS_INVALID_MIN_MAX); GT_ZERO_CHECK(out_of_range_bucket_size); const uint64_t range = max_value-min_value+1; const uint64_t num_values = (range+(step-1))/step; // Allocate handler gt_stats_vector* const stats_vector = gt_alloc(gt_stats_vector); // Init stats_vector->type = GT_STATS_VECTOR_STEP_RANGE; stats_vector->counters = gt_calloc(num_values,uint64_t,true); // Step Range stats_vector->num_values = num_values; stats_vector->min_value = min_value; stats_vector->max_value = max_value; stats_vector->step = step; stats_vector->out_of_range_bucket_size = out_of_range_bucket_size; stats_vector->out_values = gt_ihash_new(); // Nested stats_vector->template_vector = NULL; stats_vector->nested_vectors = NULL; return stats_vector; }
/* * SegmentedSEQ Constructor */ GT_INLINE gt_segmented_sequence* gt_segmented_sequence_new(void) { gt_segmented_sequence* sequence = gt_alloc(gt_segmented_sequence); sequence->blocks = gt_vector_new(GT_SEQ_ARCHIVE_NUM_BLOCKS,sizeof(gt_compact_dna_string*)); sequence->sequence_total_length = 0; sequence->seq_name = gt_string_new(10); return sequence; }
/* * Basic I/O functions */ gt_input_file* gt_input_stream_open(FILE* stream) { GT_NULL_CHECK(stream); // Allocate handler gt_input_file* input_file = gt_alloc(gt_input_file); // Input file input_file->file_name = GT_STREAM_FILE_NAME; input_file->file_type = STREAM; input_file->file = stream; input_file->fildes = -1; input_file->eof = feof(stream); input_file->file_size = UINT64_MAX; input_file->file_format = FILE_FORMAT_UNKNOWN; gt_cond_fatal_error(pthread_mutex_init(&input_file->input_mutex, NULL),SYS_MUTEX_INIT); // Auxiliary Buffer (for synch purposes) input_file->file_buffer = gt_malloc(GT_INPUT_BUFFER_SIZE); input_file->buffer_size = 0; input_file->buffer_begin = 0; input_file->buffer_pos = 0; input_file->global_pos = 0; input_file->processed_lines = 0; // ID generator input_file->processed_id = 0; // Detect file format gt_input_file_detect_file_format(input_file); return input_file; }
/* * Constructor */ GT_INLINE gt_dna_read* gt_dna_read_new(void) { gt_dna_read* read = gt_alloc(gt_dna_read); read->tag = gt_string_new(GT_DNA_READ_TAG_INITIAL_LENGTH); read->read = gt_string_new(GT_DNA_READ_INITIAL_LENGTH); read->qualities = gt_string_new(GT_DNA_READ_INITIAL_LENGTH); read->attributes = gt_attributes_new(); return read; }
GT_INLINE void gt_sam_attributes_add_sfunc(gt_sam_attributes* const sam_attributes,char* const tag,char type_id,gt_status (*s_func)(gt_sam_attribute_func_params*)){ GT_SAM_ATTRIBUTES_CHECK(sam_attributes); GT_NULL_CHECK(tag); GT_NULL_CHECK(s_func); gt_sam_attribute* const sam_attribute = gt_alloc(gt_sam_attribute); gt_sam_attribute_set_sfunc(sam_attribute,tag,type_id,s_func); gt_sam_attributes_add_attribute(sam_attributes,sam_attribute); }
GT_INLINE void gt_sam_attributes_add_svalue(gt_sam_attributes* const sam_attributes,char* const tag,char type_id,gt_string* const string){ GT_SAM_ATTRIBUTES_CHECK(sam_attributes); GT_NULL_CHECK(tag); GT_STRING_CHECK(string); gt_sam_attribute* const sam_attribute = gt_alloc(gt_sam_attribute); gt_sam_attribute_set_svalue(sam_attribute,tag,type_id,string); gt_sam_attributes_add_attribute(sam_attributes,sam_attribute); }
/* * Generic Printer Attributes */ GT_INLINE gt_generic_printer_attributes* gt_generic_printer_attributes_new(const gt_file_format file_format) { gt_generic_printer_attributes* attributes = gt_alloc(gt_generic_printer_attributes); attributes->output_sam_attributes = NULL; attributes->output_fasta_attributes = NULL; attributes->output_map_attributes = NULL; gt_generic_printer_attributes_set_format(attributes,file_format); return attributes; }
GT_INLINE gt_sam_headers* gt_sam_header_new(void) { gt_sam_headers* sam_headers = gt_alloc(gt_sam_headers); sam_headers->header = gt_string_new(50); // @HD sam_headers->read_group = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_string*)); // @RG sam_headers->program = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_string*)); // @PG sam_headers->comments = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_string*)); // @ CO sam_headers->sequence_archive = NULL; // @SQ return sam_headers; }
GT_INLINE gt_sam_attribute_func_params* gt_sam_attribute_func_params_new() { gt_sam_attribute_func_params* const func_params = gt_alloc(gt_sam_attribute_func_params); /* String (gt_string) buffer */ func_params->return_s = gt_string_new(GT_SAM_ATTR_FUNC_PARAMS_RETURN_S_INIT_LENGTH); /* Attributes */ func_params->attributes = gt_attributes_new(); /* Reset defaults */ gt_sam_attribute_func_params_clear(func_params); return func_params; }
GT_INLINE gt_sam_headers* gt_sam_header_new(void) { gt_sam_headers* sam_headers = gt_alloc(gt_sam_headers); sam_headers->header = NULL; // @HD sam_headers->read_group = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_sam_header_record*)); // @RG sam_headers->program = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_sam_header_record*)); // @PG sam_headers->sequence_dictionary = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_sam_header_record*)); // @SQ sam_headers->comments = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_string*)); // @ CO sam_headers->sequence_dictionary_sn_hash = NULL; sam_headers->read_group_id_hash = NULL; sam_headers->program_id_hash = NULL; return sam_headers; }
/* * Vector's Buckets getters */ GT_INLINE uint64_t* gt_stats_hvector_get_counter(gt_stats_vector* const stats_vector,const uint64_t value) { GT_STATS_VECTOR_CHECK(gt_stats_vector); const uint64_t bucket_index = value/stats_vector->out_of_range_bucket_size; // Fetch counter uint64_t* counter = gt_ihash_get_element(stats_vector->out_values,bucket_index); if (counter!=NULL) return counter; // Allocate new counter counter = gt_alloc(uint64_t); *counter = 0; gt_ihash_insert(stats_vector->out_values,bucket_index,counter,uint64_t); return counter; }
/* * Buffered map file handlers */ gt_buffered_input_file* gt_buffered_input_file_new(gt_input_file* const input_file) { GT_NULL_CHECK(input_file); gt_buffered_input_file* buffered_input_file = gt_alloc(gt_buffered_input_file); /* Input file */ buffered_input_file->input_file = input_file; /* Block buffer and cursors */ buffered_input_file->block_id = UINT32_MAX; buffered_input_file->block_buffer = gt_vector_new(GT_BMI_BUFFER_SIZE,sizeof(uint8_t)); buffered_input_file->cursor = (char*) gt_vector_get_mem(buffered_input_file->block_buffer,uint8_t); buffered_input_file->current_line_num = UINT64_MAX; /* Attached output buffer */ buffered_input_file->attached_buffered_output_file = gt_vector_new(2,sizeof(gt_buffered_output_file*)); return buffered_input_file; }
GT_INLINE gt_stats_vector_iterator* gt_stats_vector_iterator_new(gt_stats_vector* const stats_vector) { GT_STATS_VECTOR_CHECK(gt_stats_vector); // Allocate gt_stats_vector_iterator* const stats_vector_iterator = gt_alloc(gt_stats_vector_iterator); // Init stats_vector_iterator->stats_vector = stats_vector; stats_vector_iterator->start_index = 0; stats_vector_iterator->end_index = UINT64_MAX; stats_vector_iterator->eoi = (stats_vector->num_values>0 || gt_ihash_get_num_elements(stats_vector->out_values)>0); // Array iteration stats_vector_iterator->is_index_in_array = (stats_vector->num_values>0); stats_vector_iterator->array_index = 0; // Hash iteration gt_ihash_sort_by_key(stats_vector->out_values); stats_vector_iterator->ihash_iterator = gt_ihash_iterator_new(stats_vector->out_values); }
GT_INLINE gt_stats_vector* gt_stats_vector_raw_new( const uint64_t num_values,const uint64_t out_of_range_bucket_size) { GT_ZERO_CHECK(out_of_range_bucket_size); // Allocate handler gt_stats_vector* const stats_vector = gt_alloc(gt_stats_vector); // Init stats_vector->type = GT_STATS_VECTOR_RAW; stats_vector->counters = (num_values) ? gt_calloc(num_values,uint64_t,true) : NULL; // Raw stats_vector->num_values = num_values; stats_vector->out_of_range_bucket_size = out_of_range_bucket_size; stats_vector->out_values = gt_ihash_new(); // Nested stats_vector->template_vector = NULL; stats_vector->nested_vectors = NULL; return stats_vector; }
GT_INLINE gt_stats_vector* gt_stats_vector_new_from_template(gt_stats_vector* const stats_vector) { // Allocate handler gt_stats_vector* const stats_vector_copy = gt_alloc(gt_stats_vector); // Copy template stats_vector_copy->type = stats_vector->type; stats_vector_copy->min_value = stats_vector->min_value; stats_vector_copy->max_value = stats_vector->max_value; stats_vector_copy->step = stats_vector->step; stats_vector_copy->customed_range_values = stats_vector->customed_range_values; stats_vector_copy->num_values = stats_vector->num_values; stats_vector_copy->out_of_range_bucket_size = stats_vector->out_of_range_bucket_size; // Init stats_vector_copy->counters = gt_calloc(stats_vector->num_values,uint64_t,true); stats_vector_copy->out_values = gt_ihash_new(); // Nested (No copy) stats_vector_copy->template_vector = NULL; stats_vector_copy->nested_vectors = NULL; return stats_vector_copy; }
GT_INLINE void gt_sam_header_add_sequence_record(gt_sam_headers* const sam_headers,gt_sam_header_record* const header_record) { GT_SAM_HEADERS_CHECK(sam_headers); gt_string *sn_tag=gt_sam_header_record_get_tag(header_record,"SN"); gt_cond_error(!sn_tag,PARSE_SAM_HEADER_MISSING_TAG,"SQ","SN"); gt_cond_error(!gt_sam_header_record_get_tag(header_record,"LN"),PARSE_SAM_HEADER_MISSING_TAG,"SQ","LN"); if(sn_tag) { if(!sam_headers->sequence_dictionary_sn_hash) sam_headers->sequence_dictionary_sn_hash=gt_shash_new(); char *sn_str=gt_string_get_string(sn_tag); size_t* ix=gt_shash_get_element(sam_headers->sequence_dictionary_sn_hash,sn_str); // If SN Tag already exists, overwrite. if(ix) { gt_sam_header_record_delete(*(gt_sam_header_record **)gt_vector_get_elm(sam_headers->sequence_dictionary,*ix,gt_sam_header_record*)); gt_vector_set_elm(sam_headers->sequence_dictionary,*ix,gt_sam_header_record*,header_record); gt_error(PARSE_SAM_HEADER_DUPLICATE_TAG,"SQ","SN",sn_str); } else { ix=gt_alloc(size_t); *ix=gt_vector_get_used(sam_headers->sequence_dictionary); gt_shash_insert(sam_headers->sequence_dictionary_sn_hash,sn_str,ix,size_t*); gt_vector_insert(sam_headers->sequence_dictionary,header_record,gt_sam_header_record*); } }
/* * Constructors */ GT_INLINE gt_stats_vector* gt_stats_vector_customed_range_new( const uint64_t* const customed_range_values,const uint64_t num_values, const uint64_t out_of_range_bucket_size) { GT_NULL_CHECK(customed_range_values); gt_cond_fatal_error(num_values>=2,INVALID_VALUE,"'num_values'",">2"); GT_ZERO_CHECK(out_of_range_bucket_size); // Allocate handler gt_stats_vector* const stats_vector = gt_alloc(gt_stats_vector); // Init stats_vector->type = GT_STATS_VECTOR_CUSTOMED_RANGE; stats_vector->counters = gt_calloc(num_values,uint64_t,true); // Customed range vetor GT_NULL_CHECK(customed_range_values); stats_vector->customed_range_values = customed_range_values; stats_vector->num_values = num_values-1; stats_vector->out_of_range_bucket_size = out_of_range_bucket_size; stats_vector->out_values = gt_ihash_new(); // Nested stats_vector->template_vector = NULL; stats_vector->nested_vectors = NULL; return stats_vector; }
gt_input_file* gt_input_file_open(char* const file_name,const bool mmap_file) { GT_NULL_CHECK(file_name); // Allocate handler gt_input_file* input_file = gt_alloc(gt_input_file); // Input file struct stat stat_info; unsigned char tbuf[4]; int i; gt_cond_fatal_error(stat(file_name,&stat_info)==-1,FILE_STAT,file_name); input_file->file_name = file_name; input_file->file_size = stat_info.st_size; input_file->eof = (input_file->file_size==0); input_file->file_format = FILE_FORMAT_UNKNOWN; gt_cond_fatal_error(pthread_mutex_init(&input_file->input_mutex,NULL),SYS_MUTEX_INIT); if (mmap_file) { input_file->file = NULL; input_file->fildes = open(file_name,O_RDONLY,0); // TODO: O_NOATIME condCompl (Thanks Jordi Camps) gt_cond_fatal_error(input_file->fildes==-1,FILE_OPEN,file_name); input_file->file_buffer = (uint8_t*) mmap(0,input_file->file_size,PROT_READ,MAP_PRIVATE,input_file->fildes,0); gt_cond_fatal_error(input_file->file_buffer==MAP_FAILED,SYS_MMAP_FILE,file_name); input_file->file_type = MAPPED_FILE; } else { input_file->fildes = -1; gt_cond_fatal_error(!(input_file->file=fopen(file_name,"r")),FILE_OPEN,file_name); input_file->file_type = REGULAR_FILE; if(S_ISREG(stat_info.st_mode)) { // Regular file - check if gzip or bzip compressed i=(int)fread(tbuf,(size_t)1,(size_t)4,input_file->file); if(tbuf[0]==0x1f && tbuf[1]==0x8b && tbuf[2]==0x08) { input_file->file_type=GZIPPED_FILE; fclose(input_file->file); #ifdef HAVE_ZLIB gt_cond_fatal_error(!(input_file->file=(void *)gzopen(file_name,"r")),FILE_GZIP_OPEN,file_name); #else gt_fatal_error(FILE_GZIP_NO_ZLIB,file_name); #endif } else if(tbuf[0]=='B' && tbuf[1]=='Z' && tbuf[2]=='h' && tbuf[3]>='0' && tbuf[3]<='9') { fseek(input_file->file,0L,SEEK_SET); input_file->file_type=BZIPPED_FILE; #ifdef HAVE_BZLIB input_file->file=BZ2_bzReadOpen(&i,input_file->file,0,0,NULL,0); gt_cond_fatal_error(i!=BZ_OK,FILE_BZIP2_OPEN,file_name); #else gt_fatal_error(FILE_BZIP2_NO_BZLIB,file_name); #endif } else { fseek(input_file->file,0L,SEEK_SET); } } else { input_file->eof=0; } input_file->file_buffer = gt_malloc(GT_INPUT_BUFFER_SIZE); } // Auxiliary Buffer (for synch purposes) input_file->buffer_size = 0; input_file->buffer_begin = 0; input_file->buffer_pos = 0; input_file->global_pos = 0; input_file->processed_lines = 0; // ID generator input_file->processed_id = 0; // Detect file format gt_input_file_detect_file_format(input_file); return input_file; }