Ejemplo n.º 1
0
GT_INLINE gt_compact_dna_string* gt_segmented_sequence_get_block(gt_segmented_sequence* const sequence,const uint64_t position) {
  const uint64_t num_block = position/GT_SEQ_ARCHIVE_BLOCK_SIZE;
  const uint64_t blocks_used = gt_vector_get_used(sequence->blocks);
  // Allocate new blocks (if needed)
  if (num_block>=blocks_used) {
    uint64_t i;
    for (i=blocks_used;i<num_block;++i) {
      gt_vector_insert(sequence->blocks,NULL,gt_compact_dna_string*);
    }
    gt_compact_dna_string* const block = gt_cdna_string_new(GT_SEQ_ARCHIVE_BLOCK_SIZE);
    gt_vector_insert(sequence->blocks,block,gt_compact_dna_string*);
    return block;
  } else {
Ejemplo n.º 2
0
/*
 * Basic line functions
 */
GT_INLINE size_t gt_input_file_dump_to_buffer(gt_input_file* const input_file,gt_vector* const buffer_dst) { // FIXME: If mmap file, internal buffer is just pointers to mem
    GT_INPUT_FILE_CHECK(input_file);
    // Copy internal file buffer to buffer_dst
    const uint64_t chunk_size = input_file->buffer_pos-input_file->buffer_begin;
    if (gt_expect_false(chunk_size==0)) return 0;
    gt_vector_reserve_additional(buffer_dst,chunk_size);
    memcpy(gt_vector_get_mem(buffer_dst,uint8_t)+gt_vector_get_used(buffer_dst),
           input_file->file_buffer+input_file->buffer_begin,chunk_size);
    gt_vector_add_used(buffer_dst,chunk_size);
    // Update position
    input_file->buffer_begin=input_file->buffer_pos;
    // Return number of written bytes
    return chunk_size;
}
Ejemplo n.º 3
0
GT_INLINE size_t gt_input_file_next_record(
    gt_input_file* const input_file,gt_vector* const buffer_dst,gt_string* const first_field,
    uint64_t* const num_blocks,uint64_t* const num_tabs) {
    GT_INPUT_FILE_CHECK(input_file);
    GT_VECTOR_CHECK(buffer_dst);
    GT_INPUT_FILE_CHECK_BUFFER__DUMP(input_file,buffer_dst);
    if (input_file->eof) return GT_INPUT_FILE_EOF;
    // Read line
    uint64_t const begin_line_pos_at_file = input_file->buffer_pos;
    uint64_t const begin_line_pos_at_buffer = gt_vector_get_used(buffer_dst);
    uint64_t current_pfield = 0, length_first_field = 0;
    while (gt_expect_true(!input_file->eof &&
                          GT_INPUT_FILE_CURRENT_CHAR(input_file)!=EOL &&
                          GT_INPUT_FILE_CURRENT_CHAR(input_file)!=DOS_EOL)) {
        if (current_pfield==0) {
            if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==TAB)) {
                ++current_pfield;
                ++(*num_tabs);
            } else {
                ++length_first_field;
            }
        } else if (current_pfield==1) {
            if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==SPACE)) {
                ++(*num_blocks);
            } else if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==TAB)) {
                ++current_pfield;
                ++(*num_tabs);
                ++(*num_blocks);
            }
        } else {
            if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==TAB)) {
                ++current_pfield;
                ++(*num_tabs);
            }
        }
        GT_INPUT_FILE_NEXT_CHAR__DUMP(input_file,buffer_dst);
    }
    // Handle EOL
    GT_INPUT_FILE_HANDLE_EOL(input_file,buffer_dst);
    // Set first field (from the input_file_buffer or the buffer_dst)
    if (first_field) {
        char* first_field_begin;
        if (input_file->buffer_pos <= begin_line_pos_at_file) {
            gt_input_file_dump_to_buffer(input_file,buffer_dst); // Forced to dump to buffer
            first_field_begin = gt_vector_get_elm(buffer_dst,begin_line_pos_at_buffer,char);
        } else {
GT_INLINE void gt_sam_header_add_sequence_record(gt_sam_headers* const sam_headers,gt_sam_header_record* const header_record) {
  GT_SAM_HEADERS_CHECK(sam_headers);
  gt_string *sn_tag=gt_sam_header_record_get_tag(header_record,"SN");
  gt_cond_error(!sn_tag,PARSE_SAM_HEADER_MISSING_TAG,"SQ","SN");
  gt_cond_error(!gt_sam_header_record_get_tag(header_record,"LN"),PARSE_SAM_HEADER_MISSING_TAG,"SQ","LN");
  if(sn_tag) {
  	if(!sam_headers->sequence_dictionary_sn_hash) sam_headers->sequence_dictionary_sn_hash=gt_shash_new();
  	char *sn_str=gt_string_get_string(sn_tag);
  	size_t* ix=gt_shash_get_element(sam_headers->sequence_dictionary_sn_hash,sn_str);
  	// If SN Tag already exists, overwrite.
  	if(ix) {
  		gt_sam_header_record_delete(*(gt_sam_header_record **)gt_vector_get_elm(sam_headers->sequence_dictionary,*ix,gt_sam_header_record*));
  		gt_vector_set_elm(sam_headers->sequence_dictionary,*ix,gt_sam_header_record*,header_record);
  		gt_error(PARSE_SAM_HEADER_DUPLICATE_TAG,"SQ","SN",sn_str);
  	} else {
  		ix=gt_alloc(size_t);
  		*ix=gt_vector_get_used(sam_headers->sequence_dictionary);
  	  gt_shash_insert(sam_headers->sequence_dictionary_sn_hash,sn_str,ix,size_t*);
  		gt_vector_insert(sam_headers->sequence_dictionary,header_record,gt_sam_header_record*);
  	}
  }
Ejemplo n.º 5
0
GT_INLINE bool gt_buffered_input_file_eob(gt_buffered_input_file* const buffered_input_file) {
  GT_BUFFERED_INPUT_FILE_CHECK(buffered_input_file);
  return gt_buffered_input_file_get_cursor_pos(buffered_input_file) >= gt_vector_get_used(buffered_input_file->block_buffer);
}