static int gt_encseq_encode_runner(GT_UNUSED int argc, const char **argv, int parsed_args, GT_UNUSED void *tool_arguments, GtError *err) { int had_err = 0, i; GtEncseqEncodeArguments *arguments = (GtEncseqEncodeArguments*) tool_arguments; GtStrArray *infiles; gt_error_check(err); infiles = gt_str_array_new(); for (i = parsed_args; i < argc; i++) { gt_str_array_add_cstr(infiles, argv[i]); } if (gt_str_length(arguments->indexname) == 0UL) { if (gt_str_array_size(infiles) > 1UL) { gt_error_set(err,"if more than one input file is given, then " "option -indexname is mandatory"); had_err = -1; } else { char *basenameptr; basenameptr = gt_basename(gt_str_array_get(infiles, 0UL)); gt_str_set(arguments->indexname, basenameptr); gt_free(basenameptr); } } if (!had_err) { gt_assert(gt_str_length(arguments->indexname) > 0UL); had_err = encode_sequence_files(infiles, arguments->eopts, gt_str_get(arguments->indexname), arguments->verbose, arguments->no_esq_header, err); } if (!had_err && arguments->showstats) show_encoded_statistics(infiles, gt_str_get(arguments->indexname)); gt_str_array_delete(infiles); return had_err; }
static GtStr* make_id_unique(GtGFF3Visitor *gff3_visitor, GtFeatureNode *fn) { GtUword i = 1; GtStr *id = gt_str_new_cstr(gt_feature_node_get_attribute(fn, "ID")); if (gt_cstr_table_get(gff3_visitor->used_ids, gt_str_get(id))) { GtStr *buf = gt_str_new(); while (!id_string_is_unique(id, buf, gff3_visitor->used_ids, i++)); gt_warning("feature ID \"%s\" not unique: changing to %s", gt_str_get(id), gt_str_get(buf)); gt_str_set(id, gt_str_get(buf)); gt_str_delete(buf); } /* update table with the new id */ gt_cstr_table_add(gff3_visitor->used_ids, gt_str_get(id)); /* store (unique) id */ gt_hashmap_add(gff3_visitor->feature_node_to_unique_id_str, fn, id); return id; }
void vcfoutput_write(VcfOutput *v, ResultSet *r) { gt_assert(v); gt_assert(r); GtStr *temp = gt_str_new(); unsigned long i =0; unsigned long vcf_size = 0; vcf_size = gt_str_array_size(resultset_get_vcf_array(r)); for(i=0;i<gt_str_array_size(resultset_get_vcf_array(r));i++) { gt_str_set(temp, gt_str_array_get(resultset_get_vcf_array(r),i)); if(i == vcf_size-1) { gt_str_append_cstr(temp,";"); if(resultset_get_exon(r) != 0) { gt_str_append_cstr(temp,"EX;"); } if(resultset_get_frms(r) != 0) { gt_str_append_cstr(temp,"NSF;"); } if(resultset_get_miss(r) != 0) { gt_str_append_cstr(temp,"NSM;"); } if(resultset_get_nons(r) != 0) { gt_str_append_cstr(temp,"NSN;"); } if(resultset_get_threeprime(r) != 0) { gt_str_append_cstr(temp,"ASS;"); } if(resultset_get_fiveprime(r) != 0) { gt_str_append_cstr(temp,"DSS;"); } } gt_str_append_cstr(temp,"\t"); gt_file_xwrite(v->file,gt_str_get(temp),gt_str_length(temp)); gt_str_reset(temp); } gt_str_delete(temp); }
static int gt_compreads_decompress_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCsrHcrDecodeArguments *arguments = tool_arguments; int had_err = 0; GtAlphabet *alpha = NULL; GtHcrDecoder *hcrd = NULL; GtTimer *timer = NULL; unsigned long start, end; gt_error_check(err); gt_assert(arguments); if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("start"); gt_timer_start(timer); gt_assert(timer); } if (gt_str_length(arguments->smap) > 0) { alpha = gt_alphabet_new_from_file_no_suffix(gt_str_get(arguments->smap), err); if (!alpha) had_err = -1; } else { alpha = gt_alphabet_new_dna(); if (!alpha) had_err = -1; } if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "decoding", stdout); if (gt_str_length(arguments->name) == 0) { char *basenameptr; basenameptr = gt_basename(gt_str_get(arguments->file)); gt_str_set(arguments->name, basenameptr); gt_free(basenameptr); } hcrd = gt_hcr_decoder_new(gt_str_get(arguments->file), alpha, arguments->descs, timer, err); if (hcrd == NULL) had_err = -1; else { if (arguments->bench != 0) { had_err = gt_compreads_decompress_benchmark(hcrd, arguments->bench, timer, err); } else { if (arguments->rng.start != GT_UNDEF_ULONG && arguments->rng.end != GT_UNDEF_ULONG) { if (arguments->rng.start >= gt_hcr_decoder_num_of_reads(hcrd) || arguments->rng.end >= gt_hcr_decoder_num_of_reads(hcrd)) { gt_error_set(err, "range %lu-%lu includes a read number exceeding " "the total number of reads (%lu)", arguments->rng.start, arguments->rng.end, gt_hcr_decoder_num_of_reads(hcrd)); had_err = -1; } start = arguments->rng.start; end = arguments->rng.end; } else { start = 0; end = gt_hcr_decoder_num_of_reads(hcrd) - 1; } if (!had_err) { gt_log_log("filebasename: %s", gt_str_get(arguments->name)); if (gt_hcr_decoder_decode_range(hcrd, gt_str_get(arguments->name), start, end, timer, err) != 0) had_err = -1; } } } gt_hcr_decoder_delete(hcrd); } gt_alphabet_delete(alpha); if (timer != NULL) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } if (had_err) gt_assert(gt_error_is_set(err)); return had_err; }
static int hmmsearch_process_coarse_hits( char *table_filename, GtCondenseq *ces, GtCondenseqHmmsearchArguments *arguments, GtLogger *logger, GtError *err) { int had_err = 0; GtStr *line = gt_str_new(); FILE *table = NULL; GtSplitter *splitter = gt_splitter_new(); GtStr *query = gt_str_new(), *fine_fasta_filename = gt_str_new_cstr("condenseq"); GtRBTree *sequences = NULL; GtUword filecount = (GtUword) 1; unsigned int querycount = 0; const GtUword fine_fasta_name_length = gt_str_length(fine_fasta_filename); const GtUword table_name_length = gt_str_length(arguments->outtable_filename); table = gt_xfopen(table_filename, "r"); sequences = gt_rbtree_new(hmmsearch_cmp_seqnum, hmmsearch_tree_free_node, NULL); while (!had_err && gt_str_read_next_line(line, table) == 0) { char *c_line = gt_str_get(line); GtUword uid; const GtUword target_column = 0, query_column = (GtUword) 3; if (c_line[0] != '#') { gt_splitter_split_non_empty(splitter, c_line, gt_str_length(line), ' '); gt_assert(gt_splitter_size(splitter) == (GtUword) 23); if (sscanf(gt_splitter_get_token(splitter, target_column), GT_WU, &uid) != 1) { gt_error_set(err, "couldn't parse target number: %s", gt_splitter_get_token(splitter, target_column)); had_err = -1; } if (gt_str_length(query) == 0 || strcmp(gt_str_get(query), gt_splitter_get_token(splitter, query_column)) != 0) { gt_str_set(query, gt_splitter_get_token(splitter, query_column)); gt_logger_log(logger, "new query: %s", gt_str_get(query)); querycount++; } if (!had_err && querycount == arguments->max_queries) { hmmsearch_create_fine_fas(fine_fasta_filename, sequences, ces); if (table_name_length != 0) gt_str_append_uword(arguments->outtable_filename, filecount++); had_err = hmmsearch_call_fine_search(table_name_length != 0 ? arguments->outtable_filename : NULL, gt_str_get(fine_fasta_filename), gt_str_get(arguments->hmmsearch_path), gt_str_get(arguments->hmm), logger, err); gt_rbtree_clear(sequences); gt_str_set_length(fine_fasta_filename, fine_fasta_name_length); if (table_name_length != 0) gt_str_set_length(arguments->outtable_filename, table_name_length); querycount = 0; } if (!had_err) { if (gt_condenseq_each_redundant_seq(ces, uid, hmmsearch_process_seq, sequences, err) == 0) { had_err = -1; } } gt_splitter_reset(splitter); } gt_str_reset(line); } gt_splitter_delete(splitter); gt_str_delete(line); gt_str_delete(query); gt_xfclose(table); if (!had_err) { hmmsearch_create_fine_fas(fine_fasta_filename, sequences, ces); if (table_name_length != 0) gt_str_append_uword(arguments->outtable_filename, filecount++); had_err = hmmsearch_call_fine_search(table_name_length != 0 ? arguments->outtable_filename : NULL, gt_str_get(fine_fasta_filename), gt_str_get(arguments->hmmsearch_path), gt_str_get(arguments->hmm), logger, err); } gt_log_log("created " GT_WU " files", filecount); gt_rbtree_delete(sequences); gt_str_delete(fine_fasta_filename); return had_err; }
void gth_sa_set_ref_id(GthSA *sa, const char *id) { gt_assert(sa); gt_str_set(sa->ref_id, id); }
static int gt_condenseq_compress_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtCondenseqCompressArguments *arguments = tool_arguments; GtLogger *logger, *kdb_logger; FILE *kmer_fp = NULL; int had_err = 0; gt_error_check(err); gt_assert(arguments); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr); kdb_logger = gt_logger_new(arguments->kdb, GT_LOGGER_DEFLT_PREFIX, stderr); if (arguments->kdb) { kmer_fp = gt_fa_fopen("kmer_db.out", "w", err); gt_logger_set_target(kdb_logger, kmer_fp); } if (gt_str_length(arguments->indexname) == 0UL) { char *basenameptr; basenameptr = gt_basename(argv[parsed_args]); gt_str_set(arguments->indexname, basenameptr); gt_free(basenameptr); } if (!had_err) { GtEncseqLoader *es_l = gt_encseq_loader_new(); arguments->input_es = gt_encseq_loader_load(es_l, argv[parsed_args], err); if (arguments->input_es == NULL) had_err = -1; gt_encseq_loader_delete(es_l); } if (!had_err) { if (arguments->minalignlength == GT_UNDEF_UWORD) arguments->minalignlength = arguments->initsize != GT_UNDEF_UWORD ? arguments->initsize / (GtUword) 3UL : GT_UNDEF_UWORD; if (arguments->windowsize == GT_UNDEF_UINT) arguments->windowsize = arguments->minalignlength != GT_UNDEF_UWORD ? (unsigned int) (arguments->minalignlength / 5U) : GT_UNDEF_UINT; if (arguments->windowsize < 4U) arguments->windowsize = 4U; if (arguments->kmersize == GT_UNDEF_UINT) { unsigned int size = gt_alphabet_num_of_chars(gt_encseq_alphabet(arguments->input_es)); /* size^k ~= 100000 */ gt_safe_assign(arguments->kmersize, gt_round_to_long(gt_log_base(100000.0, (double) size))); gt_logger_log(logger, "|A|: %u, k: %u", size, arguments->kmersize); } if (arguments->windowsize == GT_UNDEF_UINT) { arguments->windowsize = 5U * arguments->kmersize; } if (arguments->minalignlength == GT_UNDEF_UWORD) { arguments->minalignlength = (GtUword) (3UL * arguments->windowsize); } if (arguments->initsize == GT_UNDEF_UWORD) { arguments->initsize = (GtUword) (3UL * arguments->minalignlength); } } if (!had_err && arguments->windowsize <= arguments->kmersize) { gt_error_set(err, "-windowsize (%u) must be larger -kmersize (%u)!", arguments->windowsize, arguments->kmersize); had_err = -1; } if (!had_err && arguments->minalignlength < (GtUword) arguments->windowsize) { gt_error_set(err, "-alignlength (" GT_WU ") must be at least " "-windowsize (%u)!", arguments->minalignlength, arguments->windowsize); had_err = -1; } if (!had_err && (arguments->initsize < arguments->minalignlength)) { gt_error_set(err, "-initsize (" GT_WU ") must be at least " "-alignlength (" GT_WU ")!", arguments->initsize, arguments->minalignlength); had_err = -1; } if (!had_err) { GtCondenseqCreator *ces_c; if (!had_err) { ces_c = gt_condenseq_creator_new(arguments->initsize, arguments->minalignlength, arguments->xdrop, &(arguments->scores), arguments->kmersize, arguments->windowsize, logger, err); if (ces_c == NULL) had_err = -1; } if (!had_err) { if (arguments->cutoff_value == GT_UNDEF_UWORD) gt_condenseq_creator_use_mean_cutoff(ces_c); else if (arguments->cutoff_value == 0) gt_condenseq_creator_disable_cutoff(ces_c); else gt_condenseq_creator_set_cutoff(ces_c, arguments->cutoff_value); gt_condenseq_creator_set_mean_fraction(ces_c, arguments->fraction); if (arguments->prune) gt_condenseq_creator_disable_prune(ces_c); if (arguments->brute) gt_condenseq_creator_enable_brute_force(ces_c); if (!arguments->diags) gt_condenseq_creator_disable_diagonals(ces_c); if (arguments->full_diags) gt_condenseq_creator_enable_full_diagonals(ces_c); if (arguments->clean_percent != GT_UNDEF_UINT) gt_condenseq_creator_set_diags_clean_limit(ces_c, arguments->clean_percent); had_err = gt_condenseq_creator_create(ces_c, arguments->indexname, arguments->input_es, logger, kdb_logger, err); gt_condenseq_creator_delete(ces_c); } } gt_logger_delete(logger); gt_logger_delete(kdb_logger); if (arguments->kdb) gt_fa_fclose(kmer_fp); return had_err; }
int gt_canvas_cairo_visit_element(GtCanvas *canvas, GtElement *elem, GtError *err) { int had_err = 0, arrow_status = ARROW_NONE; GtRange elem_range = gt_element_get_range(elem); GtDrawingRange draw_range; double elem_start = GT_UNDEF_DOUBLE, elem_width = GT_UNDEF_DOUBLE, stroke_width = STROKE_WIDTH_DEFAULT, bar_height = BAR_HEIGHT_DEFAULT, arrow_width = ARROW_WIDTH_DEFAULT; GtColor elem_color, grey, fill_color; const char *type; GtStyleQueryStatus rval; GtStr *style; GtStrand strand = gt_element_get_strand(elem); gt_assert(canvas && elem); /* This shouldn't happen. */ if (!gt_range_overlap(&elem_range, &canvas->pvt->viewrange)) return -1; type = gt_element_get_type(elem); grey.red = grey.green = grey.blue = .85; grey.alpha = 0.5; /* get default or image-wide bar height */ if (gt_style_get_num(canvas->pvt->sty, "format", "bar_height", &bar_height, NULL, err) == GT_STYLE_QUERY_ERROR) { return -1; } /* try to get type-specific bar height */ if (gt_style_get_num_with_track(canvas->pvt->sty, type, "bar_height", &bar_height, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } /* get default or image-wide arrow width */ if (gt_style_get_num(canvas->pvt->sty, "format", "arrow_width", &arrow_width, NULL, err)== GT_STYLE_QUERY_ERROR) { return -1; } /* try to get type-specific arrow width */ if (gt_style_get_num_with_track(canvas->pvt->sty, type, "arrow_width", &arrow_width, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } if ((strand == GT_STRAND_REVERSE || strand == GT_STRAND_BOTH) /*&& delem == gt_dlist_first(elems)*/) arrow_status = ARROW_LEFT; if ((strand == GT_STRAND_FORWARD || strand == GT_STRAND_BOTH) /*&& gt_dlistelem_next(delem) == NULL*/) arrow_status = (arrow_status == ARROW_LEFT ? ARROW_BOTH : ARROW_RIGHT); gt_log_log("processing element from %lu to %lu, strand %d\n", elem_range.start, elem_range.end, (int) strand); draw_range = gt_coords_calc_generic_range(elem_range, canvas->pvt->viewrange); draw_range.start *= (canvas->pvt->width-2*canvas->pvt->margins); draw_range.end *= (canvas->pvt->width-2*canvas->pvt->margins); elem_start = draw_range.start + canvas->pvt->margins; elem_width = draw_range.end - draw_range.start; gt_assert(elem_start != GT_UNDEF_DOUBLE && elem_width != GT_UNDEF_DOUBLE); if (gt_element_is_marked(elem)) { if (gt_style_get_color_with_track(canvas->pvt->sty, type, "stroke_marked", &elem_color, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } if (gt_style_get_num_with_track(canvas->pvt->sty, "format", "stroke_marked_width", &stroke_width, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } } else { if (gt_style_get_color_with_track(canvas->pvt->sty, type, "stroke", &elem_color, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } if (gt_style_get_num_with_track(canvas->pvt->sty, "format", "stroke_width", &stroke_width, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } if (gt_style_get_num_with_track(canvas->pvt->sty, type, "stroke_width", &stroke_width, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } } if (gt_style_get_color_with_track(canvas->pvt->sty, type, "fill", &fill_color, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err) == GT_STYLE_QUERY_ERROR) { return -1; } if (canvas->pvt->bt && gt_double_smaller_double(draw_range.end-draw_range.start, 1.1)) { if ((unsigned long) draw_range.start > gt_bittab_size(canvas->pvt->bt)) return had_err; if (gt_bittab_bit_is_set(canvas->pvt->bt, (unsigned long) draw_range.start)) return had_err; gt_graphics_draw_vertical_line(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_color, bar_height, stroke_width); gt_bittab_set_bit(canvas->pvt->bt, (unsigned long) draw_range.start); } /* register coordinates in GtImageInfo object if available */ if (canvas->pvt->ii) { GtRecMap *rm = gt_rec_map_new(elem_start, canvas->pvt->y - bar_height/2, elem_start+elem_width, canvas->pvt->y+bar_height/2, (GtFeatureNode*) gt_element_get_node_ref(elem)); gt_image_info_add_rec_map(canvas->pvt->ii, rm); } if (canvas->pvt->bt && draw_range.end-draw_range.start <= 1.1) { return had_err; } gt_log_log("drawing element from %f to %f, arrow status: %d", draw_range.start, draw_range.end, arrow_status); /* draw each element according to style set in the style */ style = gt_str_new(); rval = gt_style_get_str_with_track(canvas->pvt->sty, type, "style", style, gt_element_get_node_ref(elem), gt_track_get_title(canvas->pvt->current_track), err); switch (rval) { case GT_STYLE_QUERY_NOT_SET: gt_str_set(style, "box"); /* default style */ break; case GT_STYLE_QUERY_ERROR: gt_str_delete(style); gt_assert(gt_error_is_set(err)); return -1; default: break; } if (strcmp(gt_str_get(style), "box") == 0) { gt_graphics_draw_box(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_width, bar_height, fill_color, arrow_status, arrow_width, stroke_width, elem_color, false); } else if (strcmp(gt_str_get(style), "rectangle") == 0) { gt_graphics_draw_box(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_width, bar_height, fill_color, ARROW_NONE, arrow_width, stroke_width, elem_color, false); } else if (strcmp(gt_str_get(style), "caret") == 0) { gt_graphics_draw_caret(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_width, bar_height, ARROW_NONE, arrow_width, stroke_width, elem_color); } else if (strcmp(gt_str_get(style), "dashes") == 0) { gt_graphics_draw_dashes(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_width, bar_height, arrow_status, arrow_width, stroke_width, elem_color); } else if (strcmp(gt_str_get(style), "line") == 0) { gt_graphics_draw_horizontal_line(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_color, elem_width, 1.0); } else { gt_graphics_draw_box(canvas->pvt->g, elem_start, canvas->pvt->y - bar_height/2, elem_width, bar_height, fill_color, arrow_status, arrow_width, stroke_width, elem_color, false); } gt_str_delete(style); /* draw arrowheads at clipped margins */ if (draw_range.clip == CLIPPED_LEFT || draw_range.clip == CLIPPED_BOTH) gt_graphics_draw_arrowhead(canvas->pvt->g, canvas->pvt->margins - 10, canvas->pvt->y - 4, grey, ARROW_LEFT); if (draw_range.clip == CLIPPED_RIGHT || draw_range.clip == CLIPPED_BOTH) gt_graphics_draw_arrowhead(canvas->pvt->g, canvas->pvt->width-canvas->pvt->margins + 10, canvas->pvt->y - 4, grey, ARROW_RIGHT); return had_err; }
static int parse_fastq_sequence(GtSeqIteratorFastQ *seqit, GtError *err) { int had_err = 0; char currentchar; GtStr *tmp_str = gt_str_new(); gt_error_check(err); gt_assert(seqit); gt_assert(gt_str_length(seqit->sequencebuffer) == 0); /* read sequence */ if ((currentchar = fastq_buf_getchar(seqit)) == EOF) { gt_str_delete(tmp_str); return EOF; } while (currentchar != GT_FASTQ_QUAL_SEPARATOR_CHAR) { if (currentchar != '\n' && currentchar != ' ') { gt_str_append_char(tmp_str, currentchar); } else if (currentchar == '\n') { seqit->curline++; } if ((currentchar = fastq_buf_getchar(seqit)) == EOF) { gt_str_delete(tmp_str); return EOF; } seqit->currentread++; } if (!gt_str_length(tmp_str)) { gt_error_set(err, "empty sequence given in file '%s', line %lu", gt_str_array_get(seqit->filenametab, seqit->filenum), seqit->curline-1); had_err = -2; } if (!had_err && seqit->is_color_space) { GtStr *translated = gt_str_new(); had_err = gt_colorspace_decode_string(tmp_str, translated, err); gt_str_delete(tmp_str); tmp_str = translated; } if (!had_err) { if (seqit->symbolmap) { int charcode; char *input_str = gt_str_get(tmp_str); unsigned long str_len = gt_str_length(tmp_str), idx; for (idx = 0; !had_err && idx < str_len; idx++) { charcode = seqit->symbolmap[(unsigned int) input_str[idx]]; if (charcode == UNDEFCHAR) { gt_error_set(err, "illegal character '%c': file \"%s\", line %lu", input_str[idx], gt_str_array_get(seqit->filenametab, seqit->filenum), (unsigned long) seqit->curline); had_err = -2; } if (ISSPECIAL(charcode)) { seqit->lastspeciallength++; } else { if (seqit->lastspeciallength > 0) seqit->lastspeciallength = 0; if (seqit->chardisttab) seqit->chardisttab[(int) charcode]++; } gt_str_append_char(seqit->sequencebuffer, charcode); } } else { gt_str_set(seqit->sequencebuffer, gt_str_get(tmp_str)); } } fastq_buf_ungetchar(seqit); gt_str_delete(tmp_str); return had_err; }
extern int gt_packedindex_chk_search(int argc, const char *argv[], GtError *err) { struct chkSearchOptions params; Suffixarray suffixarray; Enumpatterniterator *epi = NULL; bool saIsLoaded = false; BWTSeq *bwtSeq = NULL; GtStr *inputProject = NULL; int parsedArgs; bool had_err = false; BWTSeqExactMatchesIterator EMIter; bool EMIterInitialized = false; GtLogger *logger = NULL; inputProject = gt_str_new(); do { gt_error_check(err); { bool exitNow = false; switch (parseChkBWTOptions(&parsedArgs, argc, argv, ¶ms, inputProject, err)) { case GT_OPTION_PARSER_OK: break; case GT_OPTION_PARSER_ERROR: had_err = true; exitNow = true; break; case GT_OPTION_PARSER_REQUESTS_EXIT: exitNow = true; break; } if (exitNow) break; } gt_str_set(inputProject, argv[parsedArgs]); logger = gt_logger_new(params.verboseOutput, GT_LOGGER_DEFLT_PREFIX, stdout); bwtSeq = gt_availBWTSeq(¶ms.idx.final, logger, err); if ((had_err = bwtSeq == NULL)) break; { enum verifyBWTSeqErrCode retval = gt_BWTSeqVerifyIntegrity(bwtSeq, gt_str_get(inputProject), params.flags, params.progressInterval, stderr, logger, err); if ((had_err = (retval != VERIFY_BWTSEQ_NO_ERROR))) { fprintf(stderr, "index integrity check failed: %s\n", gt_error_get(err)); gt_error_set(err, "aborted because of index integrity check fail"); break; } } if (BWTSeqHasLocateInformation(bwtSeq)) { if ((had_err = !gt_initEmptyEMIterator(&EMIter, bwtSeq))) { gt_error_set(err, "Cannot create matches iterator for sequence index."); break; } EMIterInitialized = true; } { unsigned long totalLen, dbstart; unsigned long trial, patternLen; if ((had_err = gt_mapsuffixarray(&suffixarray, SARR_SUFTAB | SARR_ESQTAB, gt_str_get(inputProject), NULL, err) != 0)) { gt_error_set(err, "Can't load suffix array project with" " demand for encoded sequence and suffix table files\n"); break; } totalLen = gt_encseq_total_length(suffixarray.encseq); saIsLoaded = true; if ((had_err = (params.minPatLen >= 0L && params.maxPatLen >= 0L && params.minPatLen > params.maxPatLen))) { gt_error_set(err, "Invalid pattern lengths selected: min=%ld, max=%ld;" " min <= max is required.", params.minPatLen, params.maxPatLen); break; } if (params.minPatLen < 0 || params.maxPatLen < 0) { unsigned int numofchars = gt_alphabet_num_of_chars( gt_encseq_alphabet(suffixarray.encseq)); if (params.minPatLen < 0) params.minPatLen = gt_recommendedprefixlength(numofchars, totalLen, GT_RECOMMENDED_MULTIPLIER_DEFAULT, true); if (params.maxPatLen < 0) params.maxPatLen = MAX(params.minPatLen, 125 * gt_recommendedprefixlength(numofchars,totalLen, GT_RECOMMENDED_MULTIPLIER_DEFAULT, true)/100); else params.maxPatLen = MAX(params.maxPatLen, params.minPatLen); } fprintf(stderr, "Using patterns of lengths %lu to %lu\n", params.minPatLen, params.maxPatLen); if ((had_err = totalLen + 1 != BWTSeqLength(bwtSeq))) { gt_error_set(err, "base suffix array and index have diferrent lengths!" "%lu vs. %lu", totalLen + 1, BWTSeqLength(bwtSeq)); break; } if ((had_err = (epi = gt_newenumpatterniterator(params.minPatLen, params.maxPatLen, suffixarray.encseq, err)) == NULL)) { fputs("Creation of pattern iterator failed!\n", stderr); break; } for (trial = 0; !had_err && trial < params.numOfSamples; ++trial) { const GtUchar *pptr = gt_nextEnumpatterniterator(&patternLen, epi); GtMMsearchiterator *mmsi = gt_mmsearchiterator_new_complete_olain(suffixarray.encseq, suffixarray.suftab, 0, /* leftbound */ totalLen, /* rightbound */ 0, /* offset */ suffixarray.readmode, pptr, patternLen); if (BWTSeqHasLocateInformation(bwtSeq)) { if ((had_err = !gt_reinitEMIterator(&EMIter, bwtSeq, pptr, patternLen, false))) { fputs("Internal error: failed to reinitialize pattern match" " iterator", stderr); abort(); } gt_assert(gt_EMINumMatchesTotal(&EMIter) == gt_BWTSeqMatchCount(bwtSeq, pptr, patternLen, false)); gt_assert(gt_EMINumMatchesTotal(&EMIter) == gt_mmsearchiterator_count(mmsi)); while (gt_mmsearchiterator_next(&dbstart,mmsi)) { unsigned long matchPos = 0; bool match = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = !match)) { gt_error_set(err, "matches of packedindex expired before mmsearch!"); break; } if ((had_err = matchPos != dbstart)) { gt_error_set(err, "packedindex match doesn't equal mmsearch " "match result!\n%lu vs. %lu\n", matchPos, dbstart); } } if (!had_err) { unsigned long matchPos; bool trailingMatch = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = trailingMatch)) { gt_error_set(err, "matches of mmsearch expired before fmindex!"); break; } } } else { unsigned long numFMIMatches = gt_BWTSeqMatchCount(bwtSeq, pptr, patternLen, false), numMMSearchMatches = gt_mmsearchiterator_count(mmsi); if ((had_err = numFMIMatches != numMMSearchMatches)) { gt_error_set(err, "Number of matches not equal for suffix array (" "%lu) and fmindex (%lu).\n", numFMIMatches, numMMSearchMatches); } } gt_mmsearchiterator_delete(mmsi); mmsi = NULL; if (params.progressInterval && !((trial + 1) % params.progressInterval)) putc('.', stderr); } if (params.progressInterval) putc('\n', stderr); fprintf(stderr, "Finished %lu of %lu matchings successfully.\n", trial, params.numOfSamples); } } while (0); if (EMIterInitialized) gt_destructEMIterator(&EMIter); if (saIsLoaded) gt_freesuffixarray(&suffixarray); gt_freeEnumpatterniterator(epi); if (bwtSeq) gt_deleteBWTSeq(bwtSeq); if (logger) gt_logger_delete(logger); if (inputProject) gt_str_delete(inputProject); return had_err?-1:0; }
extern int gt_packedindex_chk_search(int argc, const char *argv[], GtError *err) { struct chkSearchOptions params; Suffixarray suffixarray; Enumpatterniterator *epi = NULL; bool saIsLoaded = false; BWTSeq *bwtSeq = NULL; GtStr *inputProject = NULL; int parsedArgs; bool had_err = false; BWTSeqExactMatchesIterator EMIter; bool EMIterInitialized = false; Verboseinfo *verbosity = NULL; inputProject = gt_str_new(); do { gt_error_check(err); { bool exitNow = false; switch (parseChkBWTOptions(&parsedArgs, argc, argv, ¶ms, inputProject, err)) { case OPTIONPARSER_OK: break; case OPTIONPARSER_ERROR: had_err = true; exitNow = true; break; case OPTIONPARSER_REQUESTS_EXIT: exitNow = true; break; } if (exitNow) break; } gt_str_set(inputProject, argv[parsedArgs]); verbosity = newverboseinfo(params.verboseOutput); bwtSeq = availBWTSeq(¶ms.idx.final, verbosity, err); if ((had_err = bwtSeq == NULL)) break; { enum verifyBWTSeqErrCode retval = BWTSeqVerifyIntegrity(bwtSeq, inputProject, params.flags, params.progressInterval, stderr, verbosity, err); if ((had_err = (retval != VERIFY_BWTSEQ_NO_ERROR))) { fprintf(stderr, "index integrity check failed: %s\n", gt_error_get(err)); gt_error_set(err, "aborted because of index integrity check fail"); break; } } if (BWTSeqHasLocateInformation(bwtSeq)) { if ((had_err = !initEmptyEMIterator(&EMIter, bwtSeq))) { gt_error_set(err, "Cannot create matches iterator for sequence index."); break; } EMIterInitialized = true; } { Seqpos totalLen, dbstart; unsigned long trial, patternLen; if ((had_err = mapsuffixarray(&suffixarray, SARR_SUFTAB | SARR_ESQTAB, inputProject, NULL, err) != 0)) { gt_error_set(err, "Can't load suffix array project with" " demand for encoded sequence and suffix table files\n"); break; } totalLen = getencseqtotallength(suffixarray.encseq); saIsLoaded = true; if ((had_err = (params.minPatLen >= 0L && params.maxPatLen >= 0L && params.minPatLen > params.maxPatLen))) { gt_error_set(err, "Invalid pattern lengths selected: min=%ld, max=%ld;" " min <= max is required.", params.minPatLen, params.maxPatLen); break; } if (params.minPatLen < 0 || params.maxPatLen < 0) { unsigned int numofchars = getencseqAlphabetnumofchars(suffixarray.encseq); if (params.minPatLen < 0) params.minPatLen = recommendedprefixlength(numofchars, totalLen); if (params.maxPatLen < 0) params.maxPatLen = MAX(params.minPatLen, 125 * recommendedprefixlength(numofchars, totalLen) / 100); else params.maxPatLen = MAX(params.maxPatLen, params.minPatLen); } fprintf(stderr, "Using patterns of lengths %lu to %lu\n", params.minPatLen, params.maxPatLen); if ((had_err = totalLen + 1 != BWTSeqLength(bwtSeq))) { gt_error_set(err, "base suffix array and index have diferrent lengths!" FormatSeqpos" vs. "FormatSeqpos, totalLen + 1, BWTSeqLength(bwtSeq)); break; } if ((had_err = (epi = newenumpatterniterator(params.minPatLen, params.maxPatLen, suffixarray.encseq, err)) == NULL)) { fputs("Creation of pattern iterator failed!\n", stderr); break; } for (trial = 0; !had_err && trial < params.numOfSamples; ++trial) { const GtUchar *pptr = nextEnumpatterniterator(&patternLen, epi); MMsearchiterator *mmsi = newmmsearchiterator(suffixarray.encseq, suffixarray.suftab, 0, /* leftbound */ totalLen, /* rightbound */ 0, /* offset */ suffixarray.readmode, pptr, patternLen); if (BWTSeqHasLocateInformation(bwtSeq)) { Seqpos numMatches; if ((had_err = !reinitEMIterator(&EMIter, bwtSeq, pptr, patternLen, false))) { fputs("Internal error: failed to reinitialize pattern match" " iterator", stderr); abort(); } numMatches = EMINumMatchesTotal(&EMIter); gt_assert(numMatches == BWTSeqMatchCount(bwtSeq, pptr, patternLen, false)); gt_assert(EMINumMatchesTotal(&EMIter) == countmmsearchiterator(mmsi)); /* fprintf(stderr, "trial %lu, "FormatSeqpos" matches\n" */ /* "pattern: ", trial, numMatches); */ /* fprintfsymbolstring(stderr, suffixarray.alpha, pptr, */ /* patternLen); */ /* putc('\n', stderr); */ while (nextmmsearchiterator(&dbstart,mmsi)) { Seqpos matchPos = 0; bool match = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = !match)) { gt_error_set(err, "matches of packedindex expired before mmsearch!"); break; } if ((had_err = matchPos != dbstart)) { gt_error_set(err, "packedindex match doesn't equal mmsearch " "match result!\n"FormatSeqpos" vs. "FormatSeqpos"\n", matchPos, dbstart); } } if (!had_err) { Seqpos matchPos; bool trailingMatch = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = trailingMatch)) { gt_error_set(err, "matches of mmsearch expired before fmindex!"); break; } } } else { Seqpos numFMIMatches = BWTSeqMatchCount(bwtSeq, pptr, patternLen, false), numMMSearchMatches = countmmsearchiterator(mmsi); if ((had_err = numFMIMatches != numMMSearchMatches)) { gt_error_set(err, "Number of matches not equal for suffix array (" FormatSeqpos") and fmindex ("FormatSeqpos".\n", numFMIMatches, numMMSearchMatches); } } freemmsearchiterator(&mmsi); if (params.progressInterval && !((trial + 1) % params.progressInterval)) putc('.', stderr); } if (params.progressInterval) putc('\n', stderr); fprintf(stderr, "Finished %lu of %lu matchings successfully.\n", trial, params.numOfSamples); } } while (0); if (EMIterInitialized) destructEMIterator(&EMIter); if (saIsLoaded) freesuffixarray(&suffixarray); if (epi) freeEnumpatterniterator(&epi); if (bwtSeq) deleteBWTSeq(bwtSeq); if (verbosity) freeverboseinfo(&verbosity); if (inputProject) gt_str_delete(inputProject); return had_err?-1:0; }
static GtOPrval parsemkfmindex(Mkfmcallinfo *mkfmcallinfo, int argc, const char **argv, GtError *err) { GtOptionParser *op; GtOption *option, *optionfmout; GtOPrval oprval; int parsed_args; gt_error_check(err); mkfmcallinfo->indexnametab = gt_str_array_new(); mkfmcallinfo->outfmindex = gt_str_new(); mkfmcallinfo->leveldesc = gt_str_new(); op = gt_option_parser_new("[option ...] -ii indexfile [...]", "Compute FM-index."); gt_option_parser_set_mail_address(op, "<*****@*****.**>"); optionfmout = gt_option_new_string("fmout", "specify name of FM-index to be generated\n" "(mandatory if more than one input index " "is specified)", mkfmcallinfo->outfmindex, NULL); gt_option_parser_add_option(op, optionfmout); option = gt_option_new_filename_array("ii", "specify indices to be used", mkfmcallinfo->indexnametab); gt_option_is_mandatory(option); gt_option_parser_add_option(op, option); option = gt_option_new_string("size", "specify size (tiny, small, medium, big)", mkfmcallinfo->leveldesc, "medium"); gt_option_parser_add_option(op, option); option = gt_option_new_bool("noindexpos", "store no index positions (hence the positions of\n" "matches in the index cannot be retrieved)", &mkfmcallinfo->noindexpos,false); gt_option_parser_add_option(op, option); oprval = gt_option_parser_parse(op, &parsed_args, argc, argv, gt_versionfunc, err); if (oprval == GT_OPTION_PARSER_OK) { if (!gt_option_is_set(optionfmout)) { if (gt_str_array_size(mkfmcallinfo->indexnametab) > 1UL) { gt_error_set(err,"if more than one index is given, then " "option -fmout is mandatory"); oprval = GT_OPTION_PARSER_ERROR; } else { char *basenameptr; basenameptr = gt_basename(gt_str_array_get(mkfmcallinfo->indexnametab, 0)); gt_str_set(mkfmcallinfo->outfmindex,basenameptr); gt_free(basenameptr); } } } gt_option_parser_delete(op); if (oprval == GT_OPTION_PARSER_OK && parsed_args != argc) { gt_error_set(err,"superfluous program parameters"); oprval = GT_OPTION_PARSER_ERROR; } return oprval; }
static GtOPrval parse_options(int *parsed_args, bool doesa, Suffixeratoroptions *so, int argc, const char **argv, GtError *err) { GtOptionParser *op; GtOption *option, *optionshowprogress, *optiongenomediff, *optionii; GtOPrval oprval; gt_error_check(err); op = gt_option_parser_new("[option ...] (-db file [...] | -ii index)", doesa ? "Compute enhanced suffix array." : "Compute packed index."); gt_option_parser_set_mail_address(op, "<*****@*****.**>"); /* input info */ so->indexname = gt_str_new(); so->inputindex = gt_str_new(); so->db = gt_str_array_new(); /* register options for encoded sequence handling */ so->encopts = gt_encseq_options_register_encoding(op, so->indexname, so->db); so->loadopts = gt_encseq_options_register_loading(op, so->indexname); /* register options for index handling */ if (doesa) so->idxopts = gt_index_options_register_esa(op, so->encopts); else so->idxopts = gt_index_options_register_packedidx(op, so->indexname, so->encopts); /* verbosity */ option = gt_option_new_verbose(&so->beverbose); gt_option_parser_add_option(op, option); optionshowprogress = gt_option_new_bool("showprogress", "show a progress bar", &so->showprogress, false); gt_option_parser_add_option(op, optionshowprogress); optionii = gt_option_new_filename("ii", "specify existing encoded sequence", so->inputindex); gt_option_parser_add_option(op, optionii); gt_option_is_mandatory_either(gt_encseq_options_db_option(so->encopts), optionii); gt_option_exclude(gt_encseq_options_db_option(so->encopts), optionii); gt_option_exclude(optionii, gt_encseq_options_smap_option(so->encopts)); gt_option_exclude(optionii, gt_encseq_options_dna_option(so->encopts)); gt_option_exclude(optionii, gt_encseq_options_protein_option(so->encopts)); gt_option_exclude(optionii, gt_encseq_options_plain_option(so->encopts)); gt_option_exclude(optionii, gt_encseq_options_sat_option(so->encopts)); optiongenomediff = gt_option_new_bool("genomediff", "directly process the lcp intervals using " "the genomediff algorithm (suffix array and " "lcp-tables are not output)", &so->genomediff, false); gt_option_is_extended_option(optiongenomediff); if (gt_index_options_outsuftab_option(so->idxopts) != NULL) { gt_option_exclude(optiongenomediff, gt_index_options_outsuftab_option(so->idxopts)); } gt_option_parser_add_option(op, optiongenomediff); /* suffixerator and friends do not take arguments */ gt_option_parser_set_min_max_args(op, 0U, 0U); oprval = gt_option_parser_parse(op, parsed_args, argc, argv, gt_versionfunc, err); if (gt_str_length(so->indexname) == 0UL) { /* we do not have an indexname yet, so there was none given in the -indexname option and it could not be derived from the input filenames. So it must be in the -ii parameter. */ char *basenameptr; basenameptr = gt_basename(gt_str_get(so->inputindex)); gt_str_set(so->indexname, basenameptr); gt_free(basenameptr); } gt_option_parser_delete(op); return oprval; }