GtFastaReader* gt_fasta_reader_rec_new(GtStr *sequence_filename) { GtFastaReader *fr = gt_fasta_reader_create(gt_fasta_reader_rec_class()); GtFastaReaderRec *gt_fasta_reader_rec = gt_fasta_reader_rec_cast(fr); gt_fasta_reader_rec->seqio = gt_io_new(sequence_filename ? gt_str_get(sequence_filename) : NULL, "r"); return fr; }
GtOBOParseTree* gt_obo_parse_tree_new(const char *obo_file_path, GtError *err) { GtOBOParseTree *obo_parse_tree; GtIO *obo_file; gt_error_check(err); gt_assert(obo_file_path); obo_file = gt_io_new(obo_file_path, "r"); obo_parse_tree = gt_malloc(sizeof *obo_parse_tree); obo_parse_tree->obo_header = obo_header_new(); obo_parse_tree->stanzas = gt_array_new(sizeof (GtOBOStanza*)); if (parse_obo_file(obo_parse_tree, obo_file, err)) { gt_obo_parse_tree_delete(obo_parse_tree); gt_io_delete(obo_file); return NULL; } gt_io_delete(obo_file); return obo_parse_tree; }
int gt_bed_parser_parse(GtBEDParser *bed_parser, GtQueue *genome_nodes, const char *filename, GtError *err) { GtIO *bed_file; int had_err; gt_error_check(err); gt_assert(bed_parser && genome_nodes); bed_file = gt_io_new(filename, "r"); /* parse BED file */ had_err = parse_bed_file(bed_parser, bed_file, err); /* process created region and feature nodes */ gt_region_node_builder_build(bed_parser->region_node_builder, genome_nodes); gt_region_node_builder_reset(bed_parser->region_node_builder); while (gt_queue_size(bed_parser->feature_nodes)) gt_queue_add(genome_nodes, gt_queue_get(bed_parser->feature_nodes)); gt_io_delete(bed_file); return had_err; }
GtXRFAbbrParseTree* gt_xrf_abbr_parse_tree_new(const char *xrf_abbr_file_path, GtError *err) { GtXRFAbbrParseTree *xrf_abbr_parse_tree; GtIO *xrf_abbr_file; gt_error_check(err); gt_assert(xrf_abbr_file_path); xrf_abbr_file = gt_io_new(xrf_abbr_file_path, "r"); xrf_abbr_parse_tree = gt_malloc(sizeof *xrf_abbr_parse_tree); xrf_abbr_parse_tree->entries = gt_array_new(sizeof (GtXRFAbbrEntry*)); if (parse_xrf_abbr_file(xrf_abbr_parse_tree, xrf_abbr_file, err)) { gt_xrf_abbr_parse_tree_delete(xrf_abbr_parse_tree); gt_io_delete(xrf_abbr_file); return NULL; } gt_io_delete(xrf_abbr_file); return xrf_abbr_parse_tree; }
/* the score matrix parser */ static int parse_score_matrix(GtScoreMatrix *sm, const char *path, GtError *err) { GtTokenizer *tz; GtArray *index_to_alpha_char_mapping; unsigned int parsed_score_lines = 0; char parsed_characters[UCHAR_MAX] = { 0 }; int had_err = 0; gt_error_check(err); gt_assert(sm && path && sm->alphabet); tz = gt_tokenizer_new(gt_io_new(path, "r")); index_to_alpha_char_mapping = gt_array_new(sizeof (char)); gt_tokenizer_skip_comment_lines(tz); had_err = parse_alphabet_line(index_to_alpha_char_mapping, tz, err); if (!had_err) { while (gt_tokenizer_has_token(tz)) { had_err = parse_score_line(sm, tz, index_to_alpha_char_mapping, parsed_characters, err); if (had_err) break; parsed_score_lines++; } } /* check the number of parsed score lines */ if (!had_err && parsed_score_lines != gt_array_size(index_to_alpha_char_mapping)) { gt_error_set(err, "the score matrix given in '%s' is not symmetric", path); had_err = -1; } gt_array_delete(index_to_alpha_char_mapping); gt_tokenizer_delete(tz); return had_err; }
static int gt_gdiffcalc_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GT_UNUSED GtError *err) { GtGenomediffArguments *arguments = tool_arguments; int had_err = 0, i; GtUword lcounter = 0, zcounter = 0; double **shusums = NULL; GtEncseq *encseq = NULL; GtLogger *logger; GtShuUnitFileInfo *unit_info = NULL; GtTimer *timer = NULL; gt_error_check(err); gt_assert(arguments); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stdout); gt_assert(logger); for (i = parsed_args; i < argc; i++) { gt_str_array_add_cstr(arguments->filenames, argv[i]); } if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("load encseq"); gt_timer_start(timer); gt_assert(timer); } if (arguments->with_units) { gt_logger_log(logger, "unitfile option set, filename is %s\n", gt_str_get(arguments->unitfile)); } if (!had_err) { GtEncseqLoader *el = gt_encseq_loader_new_from_options(arguments->loadopts, err); encseq = gt_encseq_loader_load(el, gt_str_get(arguments->indexname), err); gt_encseq_loader_delete(el); } if (encseq == NULL) had_err = -1; if (timer != NULL) gt_timer_show_progress(timer, "load units", stdout); if (!had_err) { unit_info = gt_shu_unit_info_new(encseq); if (arguments->with_units) had_err = gt_shu_unit_file_info_read(arguments->unitfile, unit_info, logger, err); } if (timer != NULL) gt_timer_show_progress(timer, "read table", stdout); if (!had_err) { GtIO *table_file = NULL; GtTokenizer *tokenizer = NULL; GtStr *line = NULL; gt_assert(unit_info != NULL); gt_array2dim_calloc(shusums, unit_info->num_of_genomes, unit_info->num_of_genomes); table_file = gt_io_new(gt_str_array_get(arguments->filenames, 0), "r"); tokenizer = gt_tokenizer_new(table_file); line = gt_tokenizer_get_token(tokenizer); while (line != NULL && !had_err) { char *cline = gt_str_get(line); char *elem = strtok(cline, ";"); zcounter = 0; while (elem != NULL && !had_err) { if (*elem != '#') { if (1 != sscanf(elem, "%lf", &shusums[lcounter][zcounter])) { had_err = 1; gt_error_set(err, "couldn't scan"); break; } gt_logger_log(logger,"wert: %lf", shusums[lcounter][zcounter]); zcounter++; } else { gt_logger_log(logger, "name: %s", elem++); } elem = strtok(NULL, ";"); } gt_tokenizer_next_token(tokenizer); gt_str_delete(line); line = gt_tokenizer_get_token(tokenizer); lcounter++; gt_logger_log(logger, "line "GT_WD"", lcounter); } } if (!had_err) { GtUword num_of_seq, file_idx, seq_idx, startpos; GT_UNUSED GtUword oldpos = 0; gt_assert(unit_info != NULL); gt_assert(lcounter == zcounter); gt_assert(lcounter == unit_info->num_of_genomes); num_of_seq = gt_encseq_num_of_sequences(unit_info->encseq); for (seq_idx = 0; seq_idx < num_of_seq; seq_idx++) { startpos = gt_encseq_seqstartpos(unit_info->encseq, seq_idx); file_idx = gt_encseq_filenum(unit_info->encseq, startpos); gt_log_log("seq: "GT_WU" starts at: "GT_WU"\n" "belonges to file: "GT_WU" which is part of genome: %s", seq_idx, startpos, file_idx, gt_str_array_get(unit_info->genome_names, unit_info->map_files[file_idx])); gt_assert(oldpos <= startpos); oldpos = startpos; } } if (!had_err && shusums != NULL) { had_err = gt_genomediff_calculate_div_from_avg(shusums, arguments, unit_info, logger, timer, err); gt_array2dim_delete(shusums); } if (timer != NULL) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } gt_logger_delete(logger); gt_encseq_delete(encseq); gt_shu_unit_info_delete(unit_info); return had_err; }