GT_INLINE gt_status gt_buffered_input_file_add_lines_to_block(
    gt_buffered_input_file* const buffered_input_file,const uint64_t num_lines) {
  GT_BUFFERED_INPUT_FILE_CHECK(buffered_input_file);
  register gt_input_file* const input_file = buffered_input_file->input_file;
  // Read lines
  if (input_file->eof) return GT_BMI_EOF;
  register const uint64_t current_position =
      buffered_input_file->cursor - gt_vector_get_mem(buffered_input_file->block_buffer,char);
  register const uint64_t lines_added =
      gt_input_file_get_lines(input_file,buffered_input_file->block_buffer,
          gt_expect_true(num_lines)?num_lines:GT_BMI_NUM_LINES);
  buffered_input_file->lines_in_buffer += lines_added;
  buffered_input_file->cursor = gt_vector_get_elm(buffered_input_file->block_buffer,current_position,char);
  return lines_added;
}
Beispiel #2
0
GT_INLINE size_t gt_input_file_next_line(gt_input_file* const input_file,gt_vector* const buffer_dst) {
    GT_INPUT_FILE_CHECK(input_file);
    GT_VECTOR_CHECK(buffer_dst);
    GT_INPUT_FILE_CHECK_BUFFER__DUMP(input_file,buffer_dst);
    if (input_file->eof) return GT_INPUT_FILE_EOF;
    // Read line
    while (gt_expect_true(!input_file->eof &&
                          GT_INPUT_FILE_CURRENT_CHAR(input_file)!=EOL &&
                          GT_INPUT_FILE_CURRENT_CHAR(input_file)!=DOS_EOL)) {
        GT_INPUT_FILE_NEXT_CHAR__DUMP(input_file,buffer_dst);
    }
    // Handle EOL
    GT_INPUT_FILE_HANDLE_EOL(input_file,buffer_dst);
    return GT_INPUT_FILE_LINE_READ;
}
Beispiel #3
0
GT_INLINE size_t gt_input_file_next_record(
    gt_input_file* const input_file,gt_vector* const buffer_dst,gt_string* const first_field,
    uint64_t* const num_blocks,uint64_t* const num_tabs) {
    GT_INPUT_FILE_CHECK(input_file);
    GT_VECTOR_CHECK(buffer_dst);
    GT_INPUT_FILE_CHECK_BUFFER__DUMP(input_file,buffer_dst);
    if (input_file->eof) return GT_INPUT_FILE_EOF;
    // Read line
    uint64_t const begin_line_pos_at_file = input_file->buffer_pos;
    uint64_t const begin_line_pos_at_buffer = gt_vector_get_used(buffer_dst);
    uint64_t current_pfield = 0, length_first_field = 0;
    while (gt_expect_true(!input_file->eof &&
                          GT_INPUT_FILE_CURRENT_CHAR(input_file)!=EOL &&
                          GT_INPUT_FILE_CURRENT_CHAR(input_file)!=DOS_EOL)) {
        if (current_pfield==0) {
            if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==TAB)) {
                ++current_pfield;
                ++(*num_tabs);
            } else {
                ++length_first_field;
            }
        } else if (current_pfield==1) {
            if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==SPACE)) {
                ++(*num_blocks);
            } else if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==TAB)) {
                ++current_pfield;
                ++(*num_tabs);
                ++(*num_blocks);
            }
        } else {
            if (gt_expect_false(GT_INPUT_FILE_CURRENT_CHAR(input_file)==TAB)) {
                ++current_pfield;
                ++(*num_tabs);
            }
        }
        GT_INPUT_FILE_NEXT_CHAR__DUMP(input_file,buffer_dst);
    }
    // Handle EOL
    GT_INPUT_FILE_HANDLE_EOL(input_file,buffer_dst);
    // Set first field (from the input_file_buffer or the buffer_dst)
    if (first_field) {
        char* first_field_begin;
        if (input_file->buffer_pos <= begin_line_pos_at_file) {
            gt_input_file_dump_to_buffer(input_file,buffer_dst); // Forced to dump to buffer
            first_field_begin = gt_vector_get_elm(buffer_dst,begin_line_pos_at_buffer,char);
        } else {
/*
 * Handlers
 */
GT_INLINE void gt_cdna_allocate__init_blocks(gt_compact_dna_string* const cdna_string,const uint64_t pos) {
  GT_COMPACT_DNA_STRING_CHECK(cdna_string);
  if (pos >= cdna_string->length) {
    // Check allocated blocks
    if (pos >= cdna_string->allocated) gt_cdna_string_resize(cdna_string,pos+(GT_CDNA_BLOCK_CHARS*10));
    // Initialize new accessed blocks
    register const uint64_t next_block_num = gt_expect_true(cdna_string->length>0) ? ((cdna_string->length-1)/GT_CDNA_BLOCK_CHARS)+1 : 1;
    register const uint64_t top_block_num = pos/GT_CDNA_BLOCK_CHARS;
    if (next_block_num <= top_block_num) {
      register uint64_t i;
      register uint64_t* block_mem = GT_CDNA_GET_MEM_BLOCK(cdna_string->bitmaps,next_block_num);
      for (i=next_block_num; i<=top_block_num; ++i) {
        GT_CDNA_INIT_BLOCK(block_mem);
        block_mem+=GT_CDNA_BLOCK_BITMAPS;
      }
    }
    // Update total length
    cdna_string->length = pos+1;
  }
}
Beispiel #5
0
GT_INLINE size_t gt_input_file_fill_buffer(gt_input_file* const input_file) {
#ifdef HAVE_BZLIB
    int bzerr;
#endif
    GT_INPUT_FILE_CHECK(input_file);
    input_file->global_pos += input_file->buffer_size;
    input_file->buffer_pos = 0;
    input_file->buffer_begin = 0;
    if (gt_expect_true(
                (input_file->file_type==STREAM && !feof(input_file->file)) ||
                (input_file->file_type==REGULAR_FILE && !feof(input_file->file)))) {
        input_file->buffer_size =
            fread(input_file->file_buffer,sizeof(uint8_t),GT_INPUT_BUFFER_SIZE,input_file->file);
        if (input_file->buffer_size==0) {
            input_file->eof = true;
        }
        return input_file->buffer_size;
    } else if (input_file->file_type==MAPPED_FILE && input_file->global_pos < input_file->file_size) {
        input_file->buffer_size = input_file->file_size-input_file->global_pos;
        return input_file->buffer_size;
#ifdef HAVE_ZLIB
    } else if (input_file->file_type==GZIPPED_FILE && !gzeof((gzFile)input_file->file)) {
        input_file->buffer_size = gzread((gzFile)input_file->file,input_file->file_buffer,GT_INPUT_BUFFER_SIZE);
        if (input_file->buffer_size==0) {
            input_file->eof = true;
        }
        return input_file->buffer_size;
#endif
#ifdef HAVE_BZLIB
    } else if (input_file->file_type==BZIPPED_FILE) {
        input_file->buffer_size = BZ2_bzRead(&bzerr,input_file->file,input_file->file_buffer,GT_INPUT_BUFFER_SIZE);
        if(input_file->buffer_size==0) {
            input_file->eof=true;
        }
        return input_file->buffer_size;
#endif
    } else {
        input_file->eof = true;
        return 0;
    }
}
GT_INLINE gt_status gt_buffered_input_file_get_block(
    gt_buffered_input_file* const buffered_input_file,const uint64_t num_lines) {
  GT_BUFFERED_INPUT_FILE_CHECK(buffered_input_file);
  register gt_input_file* const input_file = buffered_input_file->input_file;
  // Read lines
  if (input_file->eof) return GT_BMI_EOF;
  gt_input_file_lock(input_file);
  if (input_file->eof) {
    gt_input_file_unlock(input_file);
    return GT_BMI_EOF;
  }
  buffered_input_file->block_id = gt_input_file_next_id(input_file) % UINT32_MAX;
  buffered_input_file->current_line_num = input_file->processed_lines+1;
  buffered_input_file->lines_in_buffer =
      gt_input_file_get_lines(input_file,buffered_input_file->block_buffer,
          gt_expect_true(num_lines)?num_lines:GT_BMI_NUM_LINES);
  gt_input_file_unlock(input_file);
  // Setup the block
  buffered_input_file->cursor = gt_vector_get_mem(buffered_input_file->block_buffer,char);
  return buffered_input_file->lines_in_buffer;
}