Beispiel #1
0
GT_INLINE void gt_sam_attributes_add_fvalue(gt_sam_attributes* const sam_attributes,char* const tag,char type_id,const float value){
  GT_SAM_ATTRIBUTES_CHECK(sam_attributes);
  GT_NULL_CHECK(tag);
  gt_sam_attribute* const sam_attribute = gt_alloc(gt_sam_attribute);
  gt_sam_attribute_set_fvalue(sam_attribute,tag,type_id,value);
  gt_sam_attributes_add_attribute(sam_attributes,sam_attribute);
}
GT_INLINE gt_stats_vector* gt_stats_vector_step_range_new(
    const uint64_t min_value,const uint64_t max_value,const uint64_t step,
    const uint64_t out_of_range_bucket_size) {
  GT_ZERO_CHECK(step);
  gt_cond_fatal_error(min_value>max_value,VSTATS_INVALID_MIN_MAX);
  GT_ZERO_CHECK(out_of_range_bucket_size);
  const uint64_t range = max_value-min_value+1;
  const uint64_t num_values = (range+(step-1))/step;
  // Allocate handler
  gt_stats_vector* const stats_vector = gt_alloc(gt_stats_vector);
  // Init
  stats_vector->type = GT_STATS_VECTOR_STEP_RANGE;
  stats_vector->counters = gt_calloc(num_values,uint64_t,true);
  // Step Range
  stats_vector->num_values = num_values;
  stats_vector->min_value = min_value;
  stats_vector->max_value = max_value;
  stats_vector->step = step;
  stats_vector->out_of_range_bucket_size = out_of_range_bucket_size;
  stats_vector->out_values = gt_ihash_new();
  // Nested
  stats_vector->template_vector = NULL;
  stats_vector->nested_vectors = NULL;
  return stats_vector;
}
/*
 * SegmentedSEQ Constructor
 */
GT_INLINE gt_segmented_sequence* gt_segmented_sequence_new(void) {
  gt_segmented_sequence* sequence = gt_alloc(gt_segmented_sequence);
  sequence->blocks = gt_vector_new(GT_SEQ_ARCHIVE_NUM_BLOCKS,sizeof(gt_compact_dna_string*));
  sequence->sequence_total_length = 0;
  sequence->seq_name = gt_string_new(10);
  return sequence;
}
Beispiel #4
0
/*
 * Basic I/O functions
 */
gt_input_file* gt_input_stream_open(FILE* stream) {
    GT_NULL_CHECK(stream);
    // Allocate handler
    gt_input_file* input_file = gt_alloc(gt_input_file);
    // Input file
    input_file->file_name = GT_STREAM_FILE_NAME;
    input_file->file_type = STREAM;
    input_file->file = stream;
    input_file->fildes = -1;
    input_file->eof = feof(stream);
    input_file->file_size = UINT64_MAX;
    input_file->file_format = FILE_FORMAT_UNKNOWN;
    gt_cond_fatal_error(pthread_mutex_init(&input_file->input_mutex, NULL),SYS_MUTEX_INIT);
    // Auxiliary Buffer (for synch purposes)
    input_file->file_buffer = gt_malloc(GT_INPUT_BUFFER_SIZE);
    input_file->buffer_size = 0;
    input_file->buffer_begin = 0;
    input_file->buffer_pos = 0;
    input_file->global_pos = 0;
    input_file->processed_lines = 0;
    // ID generator
    input_file->processed_id = 0;
    // Detect file format
    gt_input_file_detect_file_format(input_file);
    return input_file;
}
/*
 * Constructor
 */
GT_INLINE gt_dna_read* gt_dna_read_new(void) {
  gt_dna_read* read = gt_alloc(gt_dna_read);
  read->tag = gt_string_new(GT_DNA_READ_TAG_INITIAL_LENGTH);
  read->read = gt_string_new(GT_DNA_READ_INITIAL_LENGTH);
  read->qualities = gt_string_new(GT_DNA_READ_INITIAL_LENGTH);
  read->attributes = gt_attributes_new();
  return read;
}
Beispiel #6
0
GT_INLINE void gt_sam_attributes_add_sfunc(gt_sam_attributes* const sam_attributes,char* const tag,char type_id,gt_status (*s_func)(gt_sam_attribute_func_params*)){
  GT_SAM_ATTRIBUTES_CHECK(sam_attributes);
  GT_NULL_CHECK(tag);
  GT_NULL_CHECK(s_func);
  gt_sam_attribute* const sam_attribute = gt_alloc(gt_sam_attribute);
  gt_sam_attribute_set_sfunc(sam_attribute,tag,type_id,s_func);
  gt_sam_attributes_add_attribute(sam_attributes,sam_attribute);
}
Beispiel #7
0
GT_INLINE void gt_sam_attributes_add_svalue(gt_sam_attributes* const sam_attributes,char* const tag,char type_id,gt_string* const string){
  GT_SAM_ATTRIBUTES_CHECK(sam_attributes);
  GT_NULL_CHECK(tag);
  GT_STRING_CHECK(string);
  gt_sam_attribute* const sam_attribute = gt_alloc(gt_sam_attribute);
  gt_sam_attribute_set_svalue(sam_attribute,tag,type_id,string);
  gt_sam_attributes_add_attribute(sam_attributes,sam_attribute);
}
/*
 * Generic Printer Attributes
 */
GT_INLINE gt_generic_printer_attributes* gt_generic_printer_attributes_new(const gt_file_format file_format) {
  gt_generic_printer_attributes* attributes = gt_alloc(gt_generic_printer_attributes);
  attributes->output_sam_attributes = NULL;
  attributes->output_fasta_attributes = NULL;
  attributes->output_map_attributes = NULL;
  gt_generic_printer_attributes_set_format(attributes,file_format);
  return attributes;
}
Beispiel #9
0
GT_INLINE gt_sam_headers* gt_sam_header_new(void) {
  gt_sam_headers* sam_headers = gt_alloc(gt_sam_headers);
  sam_headers->header = gt_string_new(50); // @HD
  sam_headers->read_group = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_string*)); // @RG
  sam_headers->program = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_string*)); // @PG
  sam_headers->comments = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_string*)); // @ CO
  sam_headers->sequence_archive = NULL; // @SQ
  return sam_headers;
}
Beispiel #10
0
GT_INLINE gt_sam_attribute_func_params* gt_sam_attribute_func_params_new() {
  gt_sam_attribute_func_params* const func_params = gt_alloc(gt_sam_attribute_func_params);
  /* String (gt_string) buffer */
  func_params->return_s = gt_string_new(GT_SAM_ATTR_FUNC_PARAMS_RETURN_S_INIT_LENGTH);
  /* Attributes */
  func_params->attributes = gt_attributes_new();
  /* Reset defaults */
  gt_sam_attribute_func_params_clear(func_params);
  return func_params;
}
GT_INLINE gt_sam_headers* gt_sam_header_new(void) {
  gt_sam_headers* sam_headers = gt_alloc(gt_sam_headers);
  sam_headers->header = NULL; // @HD
  sam_headers->read_group = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_sam_header_record*)); // @RG
  sam_headers->program = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_sam_header_record*)); // @PG
  sam_headers->sequence_dictionary = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_sam_header_record*)); // @SQ
  sam_headers->comments = gt_vector_new(GT_ATTR_SAM_INIT_ELEMENTS,sizeof(gt_string*)); // @ CO
  sam_headers->sequence_dictionary_sn_hash = NULL;
  sam_headers->read_group_id_hash = NULL;
  sam_headers->program_id_hash = NULL;
  return sam_headers;
}
/*
 * Vector's Buckets getters
 */
GT_INLINE uint64_t* gt_stats_hvector_get_counter(gt_stats_vector* const stats_vector,const uint64_t value) {
  GT_STATS_VECTOR_CHECK(gt_stats_vector);
  const uint64_t bucket_index = value/stats_vector->out_of_range_bucket_size;
  // Fetch counter
  uint64_t* counter = gt_ihash_get_element(stats_vector->out_values,bucket_index);
  if (counter!=NULL) return counter;
  // Allocate new counter
  counter = gt_alloc(uint64_t);
  *counter = 0;
  gt_ihash_insert(stats_vector->out_values,bucket_index,counter,uint64_t);
  return counter;
}
/*
 * Buffered map file handlers
 */
gt_buffered_input_file* gt_buffered_input_file_new(gt_input_file* const input_file) {
  GT_NULL_CHECK(input_file);
  gt_buffered_input_file* buffered_input_file = gt_alloc(gt_buffered_input_file);
  /* Input file */
  buffered_input_file->input_file = input_file;
  /* Block buffer and cursors */
  buffered_input_file->block_id = UINT32_MAX;
  buffered_input_file->block_buffer = gt_vector_new(GT_BMI_BUFFER_SIZE,sizeof(uint8_t));
  buffered_input_file->cursor = (char*) gt_vector_get_mem(buffered_input_file->block_buffer,uint8_t);
  buffered_input_file->current_line_num = UINT64_MAX;
  /* Attached output buffer */
  buffered_input_file->attached_buffered_output_file = gt_vector_new(2,sizeof(gt_buffered_output_file*));
  return buffered_input_file;
}
GT_INLINE gt_stats_vector_iterator* gt_stats_vector_iterator_new(gt_stats_vector* const stats_vector) {
  GT_STATS_VECTOR_CHECK(gt_stats_vector);
  // Allocate
  gt_stats_vector_iterator* const stats_vector_iterator = gt_alloc(gt_stats_vector_iterator);
  // Init
  stats_vector_iterator->stats_vector = stats_vector;
  stats_vector_iterator->start_index = 0;
  stats_vector_iterator->end_index = UINT64_MAX;
  stats_vector_iterator->eoi = (stats_vector->num_values>0 || gt_ihash_get_num_elements(stats_vector->out_values)>0);
  // Array iteration
  stats_vector_iterator->is_index_in_array = (stats_vector->num_values>0);
  stats_vector_iterator->array_index = 0;
  // Hash iteration
  gt_ihash_sort_by_key(stats_vector->out_values);
  stats_vector_iterator->ihash_iterator = gt_ihash_iterator_new(stats_vector->out_values);
}
GT_INLINE gt_stats_vector* gt_stats_vector_raw_new(
    const uint64_t num_values,const uint64_t out_of_range_bucket_size) {
  GT_ZERO_CHECK(out_of_range_bucket_size);
  // Allocate handler
  gt_stats_vector* const stats_vector = gt_alloc(gt_stats_vector);
  // Init
  stats_vector->type = GT_STATS_VECTOR_RAW;
  stats_vector->counters = (num_values) ? gt_calloc(num_values,uint64_t,true) : NULL;
  // Raw
  stats_vector->num_values = num_values;
  stats_vector->out_of_range_bucket_size = out_of_range_bucket_size;
  stats_vector->out_values = gt_ihash_new();
  // Nested
  stats_vector->template_vector = NULL;
  stats_vector->nested_vectors = NULL;
  return stats_vector;
}
GT_INLINE gt_stats_vector* gt_stats_vector_new_from_template(gt_stats_vector* const stats_vector) {
  // Allocate handler
  gt_stats_vector* const stats_vector_copy = gt_alloc(gt_stats_vector);
  // Copy template
  stats_vector_copy->type = stats_vector->type;
  stats_vector_copy->min_value = stats_vector->min_value;
  stats_vector_copy->max_value = stats_vector->max_value;
  stats_vector_copy->step = stats_vector->step;
  stats_vector_copy->customed_range_values = stats_vector->customed_range_values;
  stats_vector_copy->num_values = stats_vector->num_values;
  stats_vector_copy->out_of_range_bucket_size = stats_vector->out_of_range_bucket_size;
  // Init
  stats_vector_copy->counters = gt_calloc(stats_vector->num_values,uint64_t,true);
  stats_vector_copy->out_values = gt_ihash_new();
  // Nested (No copy)
  stats_vector_copy->template_vector = NULL;
  stats_vector_copy->nested_vectors = NULL;
  return stats_vector_copy;
}
GT_INLINE void gt_sam_header_add_sequence_record(gt_sam_headers* const sam_headers,gt_sam_header_record* const header_record) {
  GT_SAM_HEADERS_CHECK(sam_headers);
  gt_string *sn_tag=gt_sam_header_record_get_tag(header_record,"SN");
  gt_cond_error(!sn_tag,PARSE_SAM_HEADER_MISSING_TAG,"SQ","SN");
  gt_cond_error(!gt_sam_header_record_get_tag(header_record,"LN"),PARSE_SAM_HEADER_MISSING_TAG,"SQ","LN");
  if(sn_tag) {
  	if(!sam_headers->sequence_dictionary_sn_hash) sam_headers->sequence_dictionary_sn_hash=gt_shash_new();
  	char *sn_str=gt_string_get_string(sn_tag);
  	size_t* ix=gt_shash_get_element(sam_headers->sequence_dictionary_sn_hash,sn_str);
  	// If SN Tag already exists, overwrite.
  	if(ix) {
  		gt_sam_header_record_delete(*(gt_sam_header_record **)gt_vector_get_elm(sam_headers->sequence_dictionary,*ix,gt_sam_header_record*));
  		gt_vector_set_elm(sam_headers->sequence_dictionary,*ix,gt_sam_header_record*,header_record);
  		gt_error(PARSE_SAM_HEADER_DUPLICATE_TAG,"SQ","SN",sn_str);
  	} else {
  		ix=gt_alloc(size_t);
  		*ix=gt_vector_get_used(sam_headers->sequence_dictionary);
  	  gt_shash_insert(sam_headers->sequence_dictionary_sn_hash,sn_str,ix,size_t*);
  		gt_vector_insert(sam_headers->sequence_dictionary,header_record,gt_sam_header_record*);
  	}
  }
/*
 * Constructors
 */
GT_INLINE gt_stats_vector* gt_stats_vector_customed_range_new(
    const uint64_t* const customed_range_values,const uint64_t num_values,
    const uint64_t out_of_range_bucket_size) {
  GT_NULL_CHECK(customed_range_values);
  gt_cond_fatal_error(num_values>=2,INVALID_VALUE,"'num_values'",">2");
  GT_ZERO_CHECK(out_of_range_bucket_size);
  // Allocate handler
  gt_stats_vector* const stats_vector = gt_alloc(gt_stats_vector);
  // Init
  stats_vector->type = GT_STATS_VECTOR_CUSTOMED_RANGE;
  stats_vector->counters = gt_calloc(num_values,uint64_t,true);
  // Customed range vetor
  GT_NULL_CHECK(customed_range_values);
  stats_vector->customed_range_values = customed_range_values;
  stats_vector->num_values = num_values-1;
  stats_vector->out_of_range_bucket_size = out_of_range_bucket_size;
  stats_vector->out_values = gt_ihash_new();
  // Nested
  stats_vector->template_vector = NULL;
  stats_vector->nested_vectors = NULL;
  return stats_vector;
}
Beispiel #19
0
gt_input_file* gt_input_file_open(char* const file_name,const bool mmap_file) {
    GT_NULL_CHECK(file_name);
    // Allocate handler
    gt_input_file* input_file = gt_alloc(gt_input_file);
    // Input file
    struct stat stat_info;
    unsigned char tbuf[4];
    int i;
    gt_cond_fatal_error(stat(file_name,&stat_info)==-1,FILE_STAT,file_name);
    input_file->file_name = file_name;
    input_file->file_size = stat_info.st_size;
    input_file->eof = (input_file->file_size==0);
    input_file->file_format = FILE_FORMAT_UNKNOWN;
    gt_cond_fatal_error(pthread_mutex_init(&input_file->input_mutex,NULL),SYS_MUTEX_INIT);
    if (mmap_file) {
        input_file->file = NULL;
        input_file->fildes = open(file_name,O_RDONLY,0); // TODO: O_NOATIME condCompl (Thanks Jordi Camps)
        gt_cond_fatal_error(input_file->fildes==-1,FILE_OPEN,file_name);
        input_file->file_buffer =
            (uint8_t*) mmap(0,input_file->file_size,PROT_READ,MAP_PRIVATE,input_file->fildes,0);
        gt_cond_fatal_error(input_file->file_buffer==MAP_FAILED,SYS_MMAP_FILE,file_name);
        input_file->file_type = MAPPED_FILE;
    } else {
        input_file->fildes = -1;
        gt_cond_fatal_error(!(input_file->file=fopen(file_name,"r")),FILE_OPEN,file_name);
        input_file->file_type = REGULAR_FILE;
        if(S_ISREG(stat_info.st_mode)) {
            // Regular file - check if gzip or bzip compressed
            i=(int)fread(tbuf,(size_t)1,(size_t)4,input_file->file);
            if(tbuf[0]==0x1f && tbuf[1]==0x8b && tbuf[2]==0x08) {
                input_file->file_type=GZIPPED_FILE;
                fclose(input_file->file);
#ifdef HAVE_ZLIB
                gt_cond_fatal_error(!(input_file->file=(void *)gzopen(file_name,"r")),FILE_GZIP_OPEN,file_name);
#else
                gt_fatal_error(FILE_GZIP_NO_ZLIB,file_name);
#endif
            } else if(tbuf[0]=='B' && tbuf[1]=='Z' && tbuf[2]=='h' && tbuf[3]>='0' && tbuf[3]<='9') {
                fseek(input_file->file,0L,SEEK_SET);
                input_file->file_type=BZIPPED_FILE;
#ifdef HAVE_BZLIB
                input_file->file=BZ2_bzReadOpen(&i,input_file->file,0,0,NULL,0);
                gt_cond_fatal_error(i!=BZ_OK,FILE_BZIP2_OPEN,file_name);
#else
                gt_fatal_error(FILE_BZIP2_NO_BZLIB,file_name);
#endif
            } else {
                fseek(input_file->file,0L,SEEK_SET);
            }
        } else {
            input_file->eof=0;
        }
        input_file->file_buffer = gt_malloc(GT_INPUT_BUFFER_SIZE);
    }
    // Auxiliary Buffer (for synch purposes)
    input_file->buffer_size = 0;
    input_file->buffer_begin = 0;
    input_file->buffer_pos = 0;
    input_file->global_pos = 0;
    input_file->processed_lines = 0;
    // ID generator
    input_file->processed_id = 0;
    // Detect file format
    gt_input_file_detect_file_format(input_file);
    return input_file;
}