GT_INLINE size_t gt_input_file_next_record( gt_input_file* const input_file,gt_vector* const buffer_dst,gt_string* const first_field, uint64_t* const num_blocks,uint64_t* const num_tabs) { GT_INPUT_FILE_CHECK(input_file); GT_VECTOR_CHECK(buffer_dst); GT_INPUT_FILE_CHECK_BUFFER__DUMP(input_file,buffer_dst); if (input_file->eof) return GT_INPUT_FILE_EOF; // Read line uint64_t const begin_line_pos_at_file = input_file->buffer_pos; uint64_t const begin_line_pos_at_buffer = gt_vector_get_used(buffer_dst); uint64_t current_pfield = 0, length_first_field = 0; while (gt_expect_true(!input_file->eof && GT_INPUT_FILE_CURRENT_CHAR(input_file)!=EOL && GT_INPUT_FILE_CURRENT_CHAR(input_file)!=DOS_EOL)) { if (current_pfield==0) { if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==TAB)) { ++current_pfield; ++(*num_tabs); } else { ++length_first_field; } } else if (current_pfield==1) { if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==SPACE)) { ++(*num_blocks); } else if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==TAB)) { ++current_pfield; ++(*num_tabs); ++(*num_blocks); } } else { if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==TAB)) { ++current_pfield; ++(*num_tabs); } } GT_INPUT_FILE_NEXT_CHAR__DUMP(input_file,buffer_dst); } // Handle EOL GT_INPUT_FILE_HANDLE_EOL(input_file,buffer_dst); // Set first field (from the input_file_buffer or the buffer_dst) if (first_field) { char* first_field_begin; if (input_file->buffer_pos <= begin_line_pos_at_file) { gt_input_file_dump_to_buffer(input_file,buffer_dst); // Forced to dump to buffer first_field_begin = gt_vector_get_elm(buffer_dst,begin_line_pos_at_buffer,char); } else {
/* * Basic line functions */ GT_INLINE size_t gt_input_file_dump_to_buffer(gt_input_file* const input_file,gt_vector* const buffer_dst) { // FIXME: If mmap file, internal buffer is just pointers to mem GT_INPUT_FILE_CHECK(input_file); // Copy internal file buffer to buffer_dst const uint64_t chunk_size = input_file->buffer_pos-input_file->buffer_begin; if (gt_expect_false(chunk_size==0)) return 0; gt_vector_reserve_additional(buffer_dst,chunk_size); memcpy(gt_vector_get_mem(buffer_dst,uint8_t)+gt_vector_get_used(buffer_dst), input_file->file_buffer+input_file->buffer_begin,chunk_size); gt_vector_add_used(buffer_dst,chunk_size); // Update position input_file->buffer_begin=input_file->buffer_pos; // Return number of written bytes return chunk_size; }
GT_INLINE void gt_cdna_string_append_string(gt_compact_dna_string* const cdna_string,char* const string,const uint64_t length) { GT_COMPACT_DNA_STRING_CHECK(cdna_string); // Check allocated bitmaps register const uint64_t total_chars = cdna_string->length+length-1; if (total_chars >= cdna_string->allocated) { gt_cdna_string_resize(cdna_string,total_chars); } // Copy string register uint64_t block_num, block_pos, i; GT_CDNA_GET_BLOCK_POS(cdna_string->length,block_num,block_pos); register uint64_t* block_mem = GT_CDNA_GET_MEM_BLOCK(cdna_string->bitmaps,block_num); for (i=0; i<length; ++i,++block_pos) { if (gt_expect_false(block_pos==GT_CDNA_BLOCK_CHARS)) { block_pos=0; block_mem+=GT_CDNA_BLOCK_BITMAPS; } register const uint8_t enc_char = gt_cdna_encode(string[i]); GT_CDNA_SET_CHAR(block_mem,block_pos,enc_char); } // Update total length cdna_string->length = total_chars+1; }