static int process_blocks(GtBEDParser *bed_parser, GtFeatureNode *fn, unsigned long block_count, GtStr *block_sizes, GtStr *block_starts, GtIO *bed_file, GtError *err) { GtSplitter *size_splitter = NULL , *start_splitter = NULL; int had_err = 0; gt_error_check(err); gt_assert(fn && block_count && block_sizes && block_starts); if (!gt_str_length(block_sizes)) { gt_error_set(err, "file \"%s\": line %lu: blockCount given without blockSizes", gt_io_get_filename(bed_file), gt_io_get_line_number(bed_file)); had_err = -1; } if (!had_err && !gt_str_length(block_starts)) { gt_error_set(err, "file \"%s\": line %lu: blockCount given without blockStarts", gt_io_get_filename(bed_file), gt_io_get_line_number(bed_file)); had_err = -1; } if (!had_err) { /* remove terminal commas found in real-world BED files */ remove_terminal_comma(block_sizes); remove_terminal_comma(block_starts); } if (!had_err) { size_splitter = gt_splitter_new(); gt_splitter_split(size_splitter, gt_str_get(block_sizes), gt_str_length(block_sizes), ','); if (gt_splitter_size(size_splitter) != block_count) { gt_error_set(err, "file \"%s\": line %lu: blockSizes column does not " "have blockCount=%lu many comma separated fields", gt_io_get_filename(bed_file), gt_io_get_line_number(bed_file), block_count); had_err = -1; } } if (!had_err) { start_splitter = gt_splitter_new(); gt_splitter_split(start_splitter, gt_str_get(block_starts), gt_str_length(block_starts), ','); if (gt_splitter_size(start_splitter) != block_count) { gt_error_set(err, "file \"%s\": line %lu: blockStarts column does not " "have " "blockCount=%lu many comma separated fields", gt_io_get_filename(bed_file), gt_io_get_line_number(bed_file), block_count); had_err = -1; } } if (!had_err) { had_err = create_block_features(bed_parser, fn, block_count, size_splitter, start_splitter, bed_file, err); } gt_splitter_delete(start_splitter); gt_splitter_delete(size_splitter); return had_err; }
static int extracttarget_from_seqfiles(const char *target, GtStrArray *seqfiles, GtError *err) { GtStr *unescaped_target; char *escaped_target; GtSplitter *splitter; unsigned long i; int had_err = 0; gt_error_check(err); gt_assert(target && seqfiles); splitter = gt_splitter_new(); unescaped_target = gt_str_new(); escaped_target = gt_cstr_dup(target); gt_splitter_split(splitter, escaped_target, strlen(escaped_target), ','); for (i = 0; !had_err && i < gt_splitter_size(splitter); i++) { GtSplitter *blank_splitter; char *token = gt_splitter_get_token(splitter, i); blank_splitter = gt_splitter_new(); gt_splitter_split(blank_splitter, token, strlen(token), ' '); had_err = gt_gff3_unescape(unescaped_target, gt_splitter_get_token(blank_splitter, 0), strlen(gt_splitter_get_token(blank_splitter, 0)), err); if (!had_err) { unsigned long j; for (j = 0; j < gt_str_array_size(seqfiles); j++) { unsigned long k; GtBioseq *bioseq; if (!(bioseq = gt_bioseq_new(gt_str_array_get(seqfiles, j), err))) { had_err = -1; break; } for (k = 0; k < gt_bioseq_number_of_sequences(bioseq); k++) { TargetInfo target_info; const char *desc = gt_bioseq_get_description(bioseq, k); target_info.bioseq = bioseq; target_info.seqnum = k; gt_string_matching_bmh(desc, strlen(desc), gt_str_get(unescaped_target), gt_str_length(unescaped_target), show_target, &target_info); } gt_bioseq_delete(bioseq); } } gt_splitter_delete(blank_splitter); } gt_free(escaped_target); gt_str_delete(unescaped_target); gt_splitter_delete(splitter); return had_err; }
GtXRFChecker* gt_xrf_checker_new(const char *file_path, GtError *err) { GtXRFChecker *xrc; GtUword i; gt_error_check(err); gt_assert(file_path); xrc = gt_calloc(1UL, sizeof (GtXRFChecker)); xrc->xpt = gt_xrf_abbr_parse_tree_new(file_path, err); if (!xrc->xpt) { gt_xrf_checker_delete(xrc); return NULL; } xrc->abbrvs = gt_hashmap_new(GT_HASH_STRING, NULL, NULL); for (i = 0; i < gt_xrf_abbr_parse_tree_num_of_entries(xrc->xpt); i++) { const GtXRFAbbrEntry *e = gt_xrf_abbr_parse_tree_get_entry(xrc->xpt, i); const char *synonym; gt_hashmap_add(xrc->abbrvs, (void*) gt_xrf_abbr_entry_get_value(e, "abbreviation"), (void*) e); if ((synonym = gt_xrf_abbr_entry_get_value(e, "synonym"))) { gt_hashmap_add(xrc->abbrvs, (void*) synonym, (void*) e); } } xrc->splitter = gt_splitter_new(); return xrc; }
static void proc_env_options(void) { int argc; char *env_options, **argv; GtSplitter *splitter; GtError *err; /* construct argument vector from $GT_ENV_OPTIONS */ env_options = getenv("GT_ENV_OPTIONS"); if (!env_options) return; env_options = gt_cstr_dup(env_options); /* make writeable copy */ splitter = gt_splitter_new(); gt_splitter_split(splitter, env_options, strlen(env_options), ' '); argc = gt_splitter_size(splitter); argv = gt_cstr_array_preprend((const char**) gt_splitter_get_tokens(splitter), "env"); argc++; /* parse options contained in $GT_ENV_OPTIONS */ err = gt_error_new(); switch (parse_env_options(argc, (const char**) argv, err)) { case GT_OPTION_PARSER_OK: break; case GT_OPTION_PARSER_ERROR: fprintf(stderr, "error parsing $GT_ENV_OPTIONS: %s\n", gt_error_get(err)); gt_error_unset(err); break; case GT_OPTION_PARSER_REQUESTS_EXIT: break; } gt_error_delete(err); gt_free(env_options); gt_splitter_delete(splitter); gt_cstr_array_delete(argv); }
static int file_find_in_env_generic(GtStr *path, const char *file, const char *env, FileExistsFunc file_exists, GtError *err) { char *pathvariable, *pathcomponent = NULL; GtSplitter *splitter = NULL; GtUword i; int had_err = 0; gt_error_check(err); gt_assert(file); gt_assert(file_exists); /* check if 'file' has dirname */ gt_file_dirname(path, file); if (gt_str_length(path)) return had_err; /* 'file' has no dirname -> scan $env */ pathvariable = getenv(env); if (pathvariable != NULL) pathvariable = gt_cstr_dup(pathvariable); /* make writeable copy */ else { gt_error_set(err, "environment variable $%s is not defined", env); had_err = -1; } if (!had_err) { splitter = gt_splitter_new(); gt_splitter_split(splitter, pathvariable, (GtUword) strlen(pathvariable), GT_PATH_VAR_SEPARATOR); for (i = 0; i < gt_splitter_size(splitter); i++) { pathcomponent = gt_splitter_get_token(splitter, i); gt_str_reset(path); gt_str_append_cstr(path, pathcomponent); gt_str_append_char(path, GT_PATH_SEPARATOR); gt_str_append_cstr(path, file); if (file_exists(gt_str_get(path))) break; } if (i < gt_splitter_size(splitter)) { /* file found in path */ gt_str_reset(path); gt_str_append_cstr(path, pathcomponent); } else { /* file not found in path */ gt_str_reset(path); } } /* free */ gt_free(pathvariable); gt_splitter_delete(splitter); return had_err; }
int gt_splitter_unit_test(GtError *err) { static char string_1[] = "a bb ccc dddd eeeee", string_2[] = "a\tbb\tccc\tdddd\teeeee", string_3[] = "", string_4[] = "a b", string_5[] = "ac bc ", string_6[] = "test"; GtSplitter *s; int had_err = 0; gt_error_check(err); s = gt_splitter_new(); /* string_1 */ gt_ensure(!gt_splitter_size(s)); gt_splitter_split(s, string_1, strlen(string_1), ' '); gt_ensure(gt_splitter_size(s) == 5); gt_ensure(strcmp(gt_splitter_get_token(s, 0), "a") == 0); gt_ensure(strcmp(gt_splitter_get_token(s, 1), "bb") == 0); gt_ensure(strcmp(gt_splitter_get_token(s, 2), "ccc") == 0); gt_ensure(strcmp(gt_splitter_get_token(s, 3), "dddd") == 0); gt_ensure(strcmp(gt_splitter_get_token(s, 4), "eeeee") == 0); gt_splitter_reset(s); /* string_2 */ gt_ensure(!gt_splitter_size(s)); gt_splitter_split(s, string_2, strlen(string_2), '\t'); gt_ensure(gt_splitter_size(s) == 5); gt_ensure(strcmp(gt_splitter_get_token(s, 0), "a") == 0); gt_ensure(strcmp(gt_splitter_get_token(s, 1), "bb") == 0); gt_ensure(strcmp(gt_splitter_get_token(s, 2), "ccc") == 0); gt_ensure(strcmp(gt_splitter_get_token(s, 3), "dddd") == 0); gt_ensure(strcmp(gt_splitter_get_token(s, 4), "eeeee") == 0); gt_splitter_reset(s); /* string_3 */ gt_ensure(!gt_splitter_size(s)); gt_splitter_split(s, string_3, strlen(string_3), '\t'); gt_ensure(gt_splitter_size(s) == 1); gt_ensure(strcmp(gt_splitter_get_token(s, 0), "") == 0); gt_splitter_reset(s); /* string_4 */ gt_ensure(!gt_splitter_size(s)); gt_splitter_split(s, string_4, strlen(string_4), ' '); gt_ensure(gt_splitter_size(s) == 3); gt_ensure(strcmp(gt_splitter_get_token(s, 0), "a") == 0); gt_ensure(strcmp(gt_splitter_get_token(s, 1), "") == 0); gt_ensure(strcmp(gt_splitter_get_token(s, 2), "b") == 0); gt_splitter_reset(s); /* string_5 */ gt_ensure(!gt_splitter_size(s)); gt_splitter_split(s, string_5, strlen(string_5), ' '); gt_ensure(gt_splitter_size(s) == 3); gt_ensure(strcmp(gt_splitter_get_token(s, 0), "ac") == 0); gt_ensure(strcmp(gt_splitter_get_token(s, 1), "bc") == 0); gt_ensure(strcmp(gt_splitter_get_token(s, 2), "") == 0); gt_splitter_reset(s); /* string_6 */ gt_ensure(!gt_splitter_size(s)); gt_splitter_split(s, string_6, strlen(string_6), ';'); gt_ensure(gt_splitter_size(s) == 1); gt_ensure(strcmp(gt_splitter_get_token(s, 0), "test") == 0); /* free */ gt_splitter_delete(s); return had_err; }
static int gt_compreads_compress_arguments_check(GT_UNUSED int rest_argc, void *tool_arguments, GtError *err) { int had_err = 0; GtCsrHcrEncodeArguments *arguments = tool_arguments; GtSplitter *splitter = NULL; GtStr *buffer; gt_error_check(err); gt_assert(arguments); if (gt_str_array_size(arguments->files) == 0) { gt_error_set(err, "option \"-files\" is mandatory and requires" " at least one filename as argument!"); had_err = -1; } if (!had_err) { if (gt_str_length(arguments->name) == 0) { if (gt_str_array_size(arguments->files) > 1UL) { gt_error_set(err, "option \"-name\" needs to be specified" " if more than one file is given"); had_err = -1; } else { GtUword i; char *basename; splitter = gt_splitter_new(); basename = gt_basename(gt_str_array_get(arguments->files, 0)); buffer = gt_str_new_cstr(basename); gt_splitter_split(splitter, gt_str_get(buffer), gt_str_length(buffer), '.'); for (i = 0; i < gt_splitter_size(splitter) - 1; i++) { gt_str_append_cstr(arguments->name, gt_splitter_get_token(splitter, i)); if (i < gt_splitter_size(splitter) - 2) gt_str_append_char(arguments->name, '.'); } gt_free(basename); gt_splitter_delete(splitter); gt_str_delete(buffer); } } } if (!had_err) { char *sampling_type = gt_str_get(arguments->method); static const char *methods[] = { "page", "regular", "none" }; if (!strcmp(methods[0], sampling_type)) { arguments->pagewise = true; if (arguments->srate == GT_UNDEF_UWORD) arguments->srate = GT_SAMPLING_DEFAULT_PAGE_RATE; else if (arguments->srate == 0) { gt_error_set(err, "page sampling was chosen, but sampling" " rate was set to "GT_WU"! this seems wrong.", arguments->srate); had_err = -1; } } else if (!strcmp(methods[1], sampling_type)) { arguments->regular = true; if (arguments->srate == GT_UNDEF_UWORD) arguments->srate = GT_SAMPLING_DEFAULT_REGULAR_RATE; else if (arguments->srate == 0) { gt_error_set(err, "regular sampling was chosen, but sampling rate " " was set to "GT_WU"! this seems wrong.", arguments->srate); had_err = -1; } } else if (!strcmp(methods[2], sampling_type)) { if (arguments->srate == GT_UNDEF_UWORD) arguments->srate = 0; else if (arguments->srate != 0) { gt_error_set(err, "no sampling was chosen, but sampling rate was" " set to "GT_WU"! this seems wrong.", arguments->srate); had_err = -1; } } else { gt_error_set(err, "somethings wrong with the stype option"); had_err = -1; } } if (!had_err) { if (arguments->arg_range.start != GT_UNDEF_UWORD) { if (arguments->arg_range.start <= (GtUword) UINT_MAX) { gt_safe_assign(arguments->qrng.start, arguments->arg_range.start); if (arguments->arg_range.end <= (GtUword) UINT_MAX) gt_safe_assign(arguments->qrng.end, arguments->arg_range.end); else had_err = -1; } else had_err = -1; } if (had_err) gt_error_set(err, "Range for qualities: value to large! larger than %u", UINT_MAX); } return had_err; }
static int hmmsearch_process_coarse_hits( char *table_filename, GtCondenseq *ces, GtCondenseqHmmsearchArguments *arguments, GtLogger *logger, GtError *err) { int had_err = 0; GtStr *line = gt_str_new(); FILE *table = NULL; GtSplitter *splitter = gt_splitter_new(); GtStr *query = gt_str_new(), *fine_fasta_filename = gt_str_new_cstr("condenseq"); GtRBTree *sequences = NULL; GtUword filecount = (GtUword) 1; unsigned int querycount = 0; const GtUword fine_fasta_name_length = gt_str_length(fine_fasta_filename); const GtUword table_name_length = gt_str_length(arguments->outtable_filename); table = gt_xfopen(table_filename, "r"); sequences = gt_rbtree_new(hmmsearch_cmp_seqnum, hmmsearch_tree_free_node, NULL); while (!had_err && gt_str_read_next_line(line, table) == 0) { char *c_line = gt_str_get(line); GtUword uid; const GtUword target_column = 0, query_column = (GtUword) 3; if (c_line[0] != '#') { gt_splitter_split_non_empty(splitter, c_line, gt_str_length(line), ' '); gt_assert(gt_splitter_size(splitter) == (GtUword) 23); if (sscanf(gt_splitter_get_token(splitter, target_column), GT_WU, &uid) != 1) { gt_error_set(err, "couldn't parse target number: %s", gt_splitter_get_token(splitter, target_column)); had_err = -1; } if (gt_str_length(query) == 0 || strcmp(gt_str_get(query), gt_splitter_get_token(splitter, query_column)) != 0) { gt_str_set(query, gt_splitter_get_token(splitter, query_column)); gt_logger_log(logger, "new query: %s", gt_str_get(query)); querycount++; } if (!had_err && querycount == arguments->max_queries) { hmmsearch_create_fine_fas(fine_fasta_filename, sequences, ces); if (table_name_length != 0) gt_str_append_uword(arguments->outtable_filename, filecount++); had_err = hmmsearch_call_fine_search(table_name_length != 0 ? arguments->outtable_filename : NULL, gt_str_get(fine_fasta_filename), gt_str_get(arguments->hmmsearch_path), gt_str_get(arguments->hmm), logger, err); gt_rbtree_clear(sequences); gt_str_set_length(fine_fasta_filename, fine_fasta_name_length); if (table_name_length != 0) gt_str_set_length(arguments->outtable_filename, table_name_length); querycount = 0; } if (!had_err) { if (gt_condenseq_each_redundant_seq(ces, uid, hmmsearch_process_seq, sequences, err) == 0) { had_err = -1; } } gt_splitter_reset(splitter); } gt_str_reset(line); } gt_splitter_delete(splitter); gt_str_delete(line); gt_str_delete(query); gt_xfclose(table); if (!had_err) { hmmsearch_create_fine_fas(fine_fasta_filename, sequences, ces); if (table_name_length != 0) gt_str_append_uword(arguments->outtable_filename, filecount++); had_err = hmmsearch_call_fine_search(table_name_length != 0 ? arguments->outtable_filename : NULL, gt_str_get(fine_fasta_filename), gt_str_get(arguments->hmmsearch_path), gt_str_get(arguments->hmm), logger, err); } gt_log_log("created " GT_WU " files", filecount); gt_rbtree_delete(sequences); gt_str_delete(fine_fasta_filename); return had_err; }
int gt_gtdata_show_help(const char *progname, GT_UNUSED void *unused, GtError *err) { GtSplitter *splitter; GtStr *doc_file; lua_State *L = NULL; char *prog, *bn; int had_err = 0; gt_error_check(err); gt_assert(progname); prog = gt_cstr_dup(progname); /* create modifiable copy for splitter */ splitter = gt_splitter_new(); gt_splitter_split(splitter, prog, strlen(prog), ' '); doc_file = gt_get_gtdata_path(gt_splitter_get_token(splitter, 0), err); if (!doc_file) had_err = -1; if (!had_err) { gt_str_append_cstr(doc_file, "/doc/"); /* create Lua & push gtdata_doc_dir to Lua */ L = luaL_newstate(); if (!L) { gt_error_set(err, "out of memory (cannot create new Lua state)"); had_err = -1; } } if (!had_err) { luaL_openlibs(L); lua_pushstring(L, gt_str_get(doc_file)); lua_setglobal(L, "gtdata_doc_dir"); /* finish creating doc_file */ if (gt_splitter_size(splitter) == 1) { /* special case for `gt` */ bn = gt_basename(progname); gt_str_append_cstr(doc_file, bn); gt_free(bn); } else { /* general case for the tools */ gt_str_append_cstr(doc_file, gt_splitter_get_token(splitter, gt_splitter_size(splitter) - 1)); } gt_str_append_cstr(doc_file, ".lua"); /* execute doc_file */ if (luaL_loadfile(L, gt_str_get(doc_file)) || lua_pcall(L, 0, 0, 0)) { gt_error_set(err, "cannot run doc file: %s", lua_tostring(L, -1)); had_err = -1; } } /* free */ if (L) lua_close(L); gt_str_delete(doc_file); gt_splitter_delete(splitter); gt_free(prog); return had_err; }
int gt_gtf_parser_parse(GtGTFParser *parser, GtQueue *genome_nodes, GtStr *filenamestr, GtFile *fpin, bool be_tolerant, GtError *err) { GtStr *seqid_str, *source_str, *line_buffer; char *line; size_t line_length; GtUword i, line_number = 0; GtGenomeNode *gn; GtRange range; GtPhase phase_value; GtStrand gt_strand_value; GtSplitter *splitter, *attribute_splitter; float score_value; char *seqname, *source, *feature, *start, *end, *score, *strand, *frame, *attributes, *token, *gene_id, *gene_name = NULL, *transcript_id, *transcript_name = NULL, **tokens; GtHashmap *transcript_id_hash; /* map from transcript id to array of genome nodes */ GtArray *gt_genome_node_array; ConstructionInfo cinfo; GTF_feature_type gtf_feature_type; GT_UNUSED bool gff_type_is_valid = false; const char *type = NULL; const char *filename; bool score_is_defined; int had_err = 0; gt_assert(parser && genome_nodes); gt_error_check(err); filename = gt_str_get(filenamestr); /* alloc */ line_buffer = gt_str_new(); splitter = gt_splitter_new(), attribute_splitter = gt_splitter_new(); #define HANDLE_ERROR \ if (had_err) { \ if (be_tolerant) { \ fprintf(stderr, "skipping line: %s\n", gt_error_get(err)); \ gt_error_unset(err); \ gt_str_reset(line_buffer); \ had_err = 0; \ continue; \ } \ else { \ had_err = -1; \ break; \ } \ } while (gt_str_read_next_line_generic(line_buffer, fpin) != EOF) { line = gt_str_get(line_buffer); line_length = gt_str_length(line_buffer); line_number++; had_err = 0; if (line_length == 0) { gt_warning("skipping blank line " GT_WU " in file \"%s\"", line_number, filename); } else if (line[0] == '#') { /* storing comment */ if (line_length >= 2 && line[1] == '#') gn = gt_comment_node_new(line+2); /* store '##' line as '#' line */ else gn = gt_comment_node_new(line+1); gt_genome_node_set_origin(gn, filenamestr, line_number); gt_queue_add(genome_nodes, gn); } else { /* process tab delimited GTF line */ gt_splitter_reset(splitter); gt_splitter_split(splitter, line, line_length, '\t'); if (gt_splitter_size(splitter) != 9UL) { gt_error_set(err, "line " GT_WU " in file \"%s\" contains " GT_WU " tab (\\t) " "separated fields instead of 9", line_number, filename, gt_splitter_size(splitter)); had_err = -1; break; } tokens = gt_splitter_get_tokens(splitter); seqname = tokens[0]; source = tokens[1]; feature = tokens[2]; start = tokens[3]; end = tokens[4]; score = tokens[5]; strand = tokens[6]; frame = tokens[7]; attributes = tokens[8]; /* parse feature */ if (GTF_feature_type_get(>f_feature_type, feature) == -1) { /* we skip unknown features */ fprintf(stderr, "skipping line " GT_WU " in file \"%s\": unknown " "feature: \"%s\"\n", line_number, filename, feature); gt_str_reset(line_buffer); continue; } /* translate into GFF3 feature type */ switch (gtf_feature_type) { case GTF_CDS: case GTF_stop_codon: gff_type_is_valid = gt_type_checker_is_valid(parser->type_checker, gt_ft_CDS); type = gt_ft_CDS; break; case GTF_exon: gff_type_is_valid = gt_type_checker_is_valid(parser->type_checker, gt_ft_exon); type = gt_ft_exon; } gt_assert(gff_type_is_valid); /* parse the range */ had_err = gt_parse_range(&range, start, end, line_number, filename, err); HANDLE_ERROR; /* process seqname (we have to do it here because we need the range) */ gt_region_node_builder_add_region(parser->region_node_builder, seqname, range); /* parse the score */ had_err = gt_parse_score(&score_is_defined, &score_value, score, line_number, filename, err); HANDLE_ERROR; /* parse the strand */ had_err = gt_parse_strand(>_strand_value, strand, line_number, filename, err); HANDLE_ERROR; /* parse the frame */ had_err = gt_parse_phase(&phase_value, frame, line_number, filename, err); HANDLE_ERROR; /* parse the attributes */ gt_splitter_reset(attribute_splitter); gene_id = NULL; transcript_id = NULL; gt_splitter_split(attribute_splitter, attributes, strlen(attributes), ';'); for (i = 0; i < gt_splitter_size(attribute_splitter); i++) { token = gt_splitter_get_token(attribute_splitter, i); /* skip leading blanks */ while (*token == ' ') token++; /* look for the two mandatory attributes */ if (strncmp(token, GENE_ID_ATTRIBUTE, strlen(GENE_ID_ATTRIBUTE)) == 0) { if (strlen(token) + 2 < strlen(GENE_ID_ATTRIBUTE)) { gt_error_set(err, "missing value to attribute \"%s\" on line " GT_WU "in file \"%s\"", GENE_ID_ATTRIBUTE, line_number, filename); had_err = -1; } HANDLE_ERROR; gene_id = token + strlen(GENE_ID_ATTRIBUTE) + 1; } else if (strncmp(token, TRANSCRIPT_ID_ATTRIBUTE, strlen(TRANSCRIPT_ID_ATTRIBUTE)) == 0) { if (strlen(token) + 2 < strlen(TRANSCRIPT_ID_ATTRIBUTE)) { gt_error_set(err, "missing value to attribute \"%s\" on line " GT_WU "in file \"%s\"", TRANSCRIPT_ID_ATTRIBUTE, line_number, filename); had_err = -1; } HANDLE_ERROR; transcript_id = token + strlen(TRANSCRIPT_ID_ATTRIBUTE) + 1; } else if (strncmp(token, GENE_NAME_ATTRIBUTE, strlen(GENE_NAME_ATTRIBUTE)) == 0) { if (strlen(token) + 2 < strlen(GENE_NAME_ATTRIBUTE)) { gt_error_set(err, "missing value to attribute \"%s\" on line " GT_WU "in file \"%s\"", GENE_NAME_ATTRIBUTE, line_number, filename); had_err = -1; } HANDLE_ERROR; gene_name = token + strlen(GENE_NAME_ATTRIBUTE) + 1; /* for output we want to strip quotes */ if (*gene_name == '"') gene_name++; if (gene_name[strlen(gene_name)-1] == '"') gene_name[strlen(gene_name)-1] = '\0'; } else if (strncmp(token, TRANSCRIPT_NAME_ATTRIBUTE, strlen(TRANSCRIPT_NAME_ATTRIBUTE)) == 0) { if (strlen(token) + 2 < strlen(TRANSCRIPT_NAME_ATTRIBUTE)) { gt_error_set(err, "missing value to attribute \"%s\" on line " GT_WU "in file \"%s\"", TRANSCRIPT_NAME_ATTRIBUTE, line_number, filename); had_err = -1; } HANDLE_ERROR; transcript_name = token + strlen(TRANSCRIPT_NAME_ATTRIBUTE) + 1; /* for output we want to strip quotes */ if (*transcript_name == '"') transcript_name++; if (transcript_name[strlen(transcript_name)-1] == '"') transcript_name[strlen(transcript_name)-1] = '\0'; } } /* check for the mandatory attributes */ if (!gene_id) { gt_error_set(err, "missing attribute \"%s\" on line " GT_WU " in file \"%s\"", GENE_ID_ATTRIBUTE, line_number, filename); had_err = -1; } HANDLE_ERROR; if (!transcript_id) { gt_error_set(err, "missing attribute \"%s\" on line " GT_WU " in file \"%s\"", TRANSCRIPT_ID_ATTRIBUTE, line_number, filename); had_err = -1; } HANDLE_ERROR; /* process the mandatory attributes */ if (!(transcript_id_hash = gt_hashmap_get(parser->gene_id_hash, gene_id))) { transcript_id_hash = gt_hashmap_new(GT_HASH_STRING, gt_free_func, (GtFree) gt_array_delete); gt_hashmap_add(parser->gene_id_hash, gt_cstr_dup(gene_id), transcript_id_hash); } gt_assert(transcript_id_hash); if (!(gt_genome_node_array = gt_hashmap_get(transcript_id_hash, transcript_id))) { gt_genome_node_array = gt_array_new(sizeof (GtGenomeNode*)); gt_hashmap_add(transcript_id_hash, gt_cstr_dup(transcript_id), gt_genome_node_array); } gt_assert(gt_genome_node_array); /* save optional gene_name and transcript_name attributes */ if (transcript_name && !gt_hashmap_get(parser->transcript_id_to_name_mapping, transcript_id)) { gt_hashmap_add(parser->transcript_id_to_name_mapping, gt_cstr_dup(transcript_id), gt_cstr_dup(transcript_name)); } if (gene_name && !gt_hashmap_get(parser->gene_id_to_name_mapping, gene_id)) { gt_hashmap_add(parser->gene_id_to_name_mapping, gt_cstr_dup(gene_id), gt_cstr_dup(gene_name)); } /* get seqid */ seqid_str = gt_hashmap_get(parser->seqid_to_str_mapping, seqname); if (!seqid_str) { seqid_str = gt_str_new_cstr(seqname); gt_hashmap_add(parser->seqid_to_str_mapping, gt_str_get(seqid_str), seqid_str); } gt_assert(seqid_str); /* construct the new feature */ gn = gt_feature_node_new(seqid_str, type, range.start, range.end, gt_strand_value); gt_genome_node_set_origin(gn, filenamestr, line_number); /* set source */ source_str = gt_hashmap_get(parser->source_to_str_mapping, source); if (!source_str) { source_str = gt_str_new_cstr(source); gt_hashmap_add(parser->source_to_str_mapping, gt_str_get(source_str), source_str); } gt_assert(source_str); gt_feature_node_set_source((GtFeatureNode*) gn, source_str); if (score_is_defined) gt_feature_node_set_score((GtFeatureNode*) gn, score_value); if (phase_value != GT_PHASE_UNDEFINED) gt_feature_node_set_phase((GtFeatureNode*) gn, phase_value); gt_array_add(gt_genome_node_array, gn); } gt_str_reset(line_buffer); } /* process all region nodes */ if (!had_err) gt_region_node_builder_build(parser->region_node_builder, genome_nodes); /* process all feature nodes */ cinfo.genome_nodes = genome_nodes; cinfo.gene_id_to_name_mapping = parser->gene_id_to_name_mapping; cinfo.transcript_id_to_name_mapping = parser->transcript_id_to_name_mapping; if (!had_err) { had_err = gt_hashmap_foreach(parser->gene_id_hash, construct_genes, &cinfo, err); } /* free */ gt_splitter_delete(splitter); gt_splitter_delete(attribute_splitter); gt_str_delete(line_buffer); return had_err; }