GT_INLINE gt_status gt_buffered_input_file_add_lines_to_block( gt_buffered_input_file* const buffered_input_file,const uint64_t num_lines) { GT_BUFFERED_INPUT_FILE_CHECK(buffered_input_file); register gt_input_file* const input_file = buffered_input_file->input_file; // Read lines if (input_file->eof) return GT_BMI_EOF; register const uint64_t current_position = buffered_input_file->cursor - gt_vector_get_mem(buffered_input_file->block_buffer,char); register const uint64_t lines_added = gt_input_file_get_lines(input_file,buffered_input_file->block_buffer, gt_expect_true(num_lines)?num_lines:GT_BMI_NUM_LINES); buffered_input_file->lines_in_buffer += lines_added; buffered_input_file->cursor = gt_vector_get_elm(buffered_input_file->block_buffer,current_position,char); return lines_added; }
GT_INLINE size_t gt_input_file_next_line(gt_input_file* const input_file,gt_vector* const buffer_dst) { GT_INPUT_FILE_CHECK(input_file); GT_VECTOR_CHECK(buffer_dst); GT_INPUT_FILE_CHECK_BUFFER__DUMP(input_file,buffer_dst); if (input_file->eof) return GT_INPUT_FILE_EOF; // Read line while (gt_expect_true(!input_file->eof && GT_INPUT_FILE_CURRENT_CHAR(input_file)!=EOL && GT_INPUT_FILE_CURRENT_CHAR(input_file)!=DOS_EOL)) { GT_INPUT_FILE_NEXT_CHAR__DUMP(input_file,buffer_dst); } // Handle EOL GT_INPUT_FILE_HANDLE_EOL(input_file,buffer_dst); return GT_INPUT_FILE_LINE_READ; }
GT_INLINE size_t gt_input_file_next_record( gt_input_file* const input_file,gt_vector* const buffer_dst,gt_string* const first_field, uint64_t* const num_blocks,uint64_t* const num_tabs) { GT_INPUT_FILE_CHECK(input_file); GT_VECTOR_CHECK(buffer_dst); GT_INPUT_FILE_CHECK_BUFFER__DUMP(input_file,buffer_dst); if (input_file->eof) return GT_INPUT_FILE_EOF; // Read line uint64_t const begin_line_pos_at_file = input_file->buffer_pos; uint64_t const begin_line_pos_at_buffer = gt_vector_get_used(buffer_dst); uint64_t current_pfield = 0, length_first_field = 0; while (gt_expect_true(!input_file->eof && GT_INPUT_FILE_CURRENT_CHAR(input_file)!=EOL && GT_INPUT_FILE_CURRENT_CHAR(input_file)!=DOS_EOL)) { if (current_pfield==0) { if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==TAB)) { ++current_pfield; ++(*num_tabs); } else { ++length_first_field; } } else if (current_pfield==1) { if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==SPACE)) { ++(*num_blocks); } else if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==TAB)) { ++current_pfield; ++(*num_tabs); ++(*num_blocks); } } else { if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==TAB)) { ++current_pfield; ++(*num_tabs); } } GT_INPUT_FILE_NEXT_CHAR__DUMP(input_file,buffer_dst); } // Handle EOL GT_INPUT_FILE_HANDLE_EOL(input_file,buffer_dst); // Set first field (from the input_file_buffer or the buffer_dst) if (first_field) { char* first_field_begin; if (input_file->buffer_pos <= begin_line_pos_at_file) { gt_input_file_dump_to_buffer(input_file,buffer_dst); // Forced to dump to buffer first_field_begin = gt_vector_get_elm(buffer_dst,begin_line_pos_at_buffer,char); } else {
/* * Handlers */ GT_INLINE void gt_cdna_allocate__init_blocks(gt_compact_dna_string* const cdna_string,const uint64_t pos) { GT_COMPACT_DNA_STRING_CHECK(cdna_string); if (pos >= cdna_string->length) { // Check allocated blocks if (pos >= cdna_string->allocated) gt_cdna_string_resize(cdna_string,pos+(GT_CDNA_BLOCK_CHARS*10)); // Initialize new accessed blocks register const uint64_t next_block_num = gt_expect_true(cdna_string->length>0) ? ((cdna_string->length-1)/GT_CDNA_BLOCK_CHARS)+1 : 1; register const uint64_t top_block_num = pos/GT_CDNA_BLOCK_CHARS; if (next_block_num <= top_block_num) { register uint64_t i; register uint64_t* block_mem = GT_CDNA_GET_MEM_BLOCK(cdna_string->bitmaps,next_block_num); for (i=next_block_num; i<=top_block_num; ++i) { GT_CDNA_INIT_BLOCK(block_mem); block_mem+=GT_CDNA_BLOCK_BITMAPS; } } // Update total length cdna_string->length = pos+1; } }
GT_INLINE size_t gt_input_file_fill_buffer(gt_input_file* const input_file) { #ifdef HAVE_BZLIB int bzerr; #endif GT_INPUT_FILE_CHECK(input_file); input_file->global_pos += input_file->buffer_size; input_file->buffer_pos = 0; input_file->buffer_begin = 0; if (gt_expect_true( (input_file->file_type==STREAM && !feof(input_file->file)) || (input_file->file_type==REGULAR_FILE && !feof(input_file->file)))) { input_file->buffer_size = fread(input_file->file_buffer,sizeof(uint8_t),GT_INPUT_BUFFER_SIZE,input_file->file); if (input_file->buffer_size==0) { input_file->eof = true; } return input_file->buffer_size; } else if (input_file->file_type==MAPPED_FILE && input_file->global_pos < input_file->file_size) { input_file->buffer_size = input_file->file_size-input_file->global_pos; return input_file->buffer_size; #ifdef HAVE_ZLIB } else if (input_file->file_type==GZIPPED_FILE && !gzeof((gzFile)input_file->file)) { input_file->buffer_size = gzread((gzFile)input_file->file,input_file->file_buffer,GT_INPUT_BUFFER_SIZE); if (input_file->buffer_size==0) { input_file->eof = true; } return input_file->buffer_size; #endif #ifdef HAVE_BZLIB } else if (input_file->file_type==BZIPPED_FILE) { input_file->buffer_size = BZ2_bzRead(&bzerr,input_file->file,input_file->file_buffer,GT_INPUT_BUFFER_SIZE); if(input_file->buffer_size==0) { input_file->eof=true; } return input_file->buffer_size; #endif } else { input_file->eof = true; return 0; } }
GT_INLINE gt_status gt_buffered_input_file_get_block( gt_buffered_input_file* const buffered_input_file,const uint64_t num_lines) { GT_BUFFERED_INPUT_FILE_CHECK(buffered_input_file); register gt_input_file* const input_file = buffered_input_file->input_file; // Read lines if (input_file->eof) return GT_BMI_EOF; gt_input_file_lock(input_file); if (input_file->eof) { gt_input_file_unlock(input_file); return GT_BMI_EOF; } buffered_input_file->block_id = gt_input_file_next_id(input_file) % UINT32_MAX; buffered_input_file->current_line_num = input_file->processed_lines+1; buffered_input_file->lines_in_buffer = gt_input_file_get_lines(input_file,buffered_input_file->block_buffer, gt_expect_true(num_lines)?num_lines:GT_BMI_NUM_LINES); gt_input_file_unlock(input_file); // Setup the block buffered_input_file->cursor = gt_vector_get_mem(buffered_input_file->block_buffer,char); return buffered_input_file->lines_in_buffer; }