static int parse_bed_range(GtRange *range, GtStr *start, GtStr *end, GtWord offset, GtIO *bed_file, bool thick, GtError *err) { int had_err; gt_error_check(err); had_err = gt_parse_range(range, gt_str_get(start), gt_str_get(end), gt_io_get_line_number(bed_file), gt_io_get_filename(bed_file), err); /* BED has a weird numbering scheme: positions are 0-based, but the end position is not part of the feature. Transform to 1-based coordinates. */ range->start++; /* Ranges defining a 'thick' region sometimes come with length 0 to designate that there are no thick regions. So do not fail here and handle that case later. */ if (!thick) { if (!had_err && range->start > range->end) { gt_error_set(err, "file \"%s\": line "GT_WU": BED feature has length 0", gt_io_get_filename(bed_file), gt_io_get_line_number(bed_file)); had_err = -1; } } if (offset) *range = gt_range_offset(range, offset); return had_err; }
static int parse_bed_range(GtRange *range, GtStr *start, GtStr *end, long offset, GtIO *bed_file, GtError *err) { int had_err; gt_error_check(err); had_err = gt_parse_range(range, gt_str_get(start), gt_str_get(end), gt_io_get_line_number(bed_file), gt_io_get_filename(bed_file), err); /* BED has a weird numbering scheme: positions are 0-based, but the end position is not part of the feature. Transform to 1-based coordinates. */ range->start++; if (!had_err && range->start > range->end) { gt_error_set(err, "file \"%s\": line %lu: BED feature has length 0", gt_io_get_filename(bed_file), gt_io_get_line_number(bed_file)); had_err = -1; } if (offset) *range = gt_range_offset(range, offset); return had_err; }
static void make_sequence_region(GtHashmap *sequence_regions, GtStr *sequenceid, GthRegionFactory *srf, GthInput *input, GtUword filenum, GtUword seqnum) { GtUword offset_is_defined = false; GtRange range, descrange; GtGenomeNode *sr = NULL; gt_assert(sequence_regions && sequenceid && srf && input); if (gth_input_use_substring_spec(input)) { range.start = gth_input_genomic_substring_from(input); range.end = gth_input_genomic_substring_to(input); } else { range = gth_input_get_relative_genomic_range(input, filenum, seqnum); } if (srf->use_desc_ranges) { GtStr *description = gt_str_new(); gth_input_get_genomic_description(input, description, filenum, seqnum); if (!gt_parse_description_range(gt_str_get(description), &descrange)) offset_is_defined = true; gt_str_delete(description); } if (offset_is_defined) range = gt_range_offset(&range, descrange.start); else range = gt_range_offset(&range, 1); /* 1-based */ if (!gt_str_length(sequenceid) || (gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid)) && !offset_is_defined)) { /* sequenceid is empty or exists already (and no offset has been parsed) -> make one up */ GtStr *seqid; char *base; base = gt_basename(gth_input_get_genomic_filename(input, filenum)); seqid = gt_str_new_cstr(base); gt_free(base); gt_str_append_char(seqid, '|'); gt_str_append_uword(seqid, seqnum + 1); /* 1-based */ seqid_store_add(srf->seqid_store, filenum, seqnum, seqid, GT_UNDEF_UWORD); gt_assert(!gt_cstr_table_get(srf->used_seqids, gt_str_get(seqid))); gt_cstr_table_add(srf->used_seqids, gt_str_get(seqid)); sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum, seqnum), range.start, range.end); gt_hashmap_add(sequence_regions, (void*) gt_cstr_table_get(srf->used_seqids, gt_str_get(seqid)), sr); gt_str_delete(seqid); } else { /* sequenceid does not exists already (or an offset has been parsed) -> use this one */ if (!gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid))) { /* no sequence region with this id exists -> create one */ gt_cstr_table_add(srf->used_seqids, gt_str_get(sequenceid)); seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid, offset_is_defined ? descrange.start : GT_UNDEF_UWORD); sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum, seqnum), range.start, range.end); gt_hashmap_add(sequence_regions, (void*) gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid)), sr); } else { GtRange prev_range, new_range; /* sequence region with this id exists already -> modify range */ sr = gt_hashmap_get(sequence_regions, gt_str_get(sequenceid)); gt_assert(sr); prev_range = gt_genome_node_get_range(sr); new_range = gt_range_join(&prev_range, &range); gt_genome_node_set_range(sr, &new_range); seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid, offset_is_defined ? descrange.start : GT_UNDEF_UWORD); } } gt_assert(sr); }