static char* generate_fragment(GtShredder *shredder, unsigned long *fragment_length, GtStr *desc) { gt_assert(shredder && fragment_length); if (shredder->seqnum < gt_bioseq_number_of_sequences(shredder->bioseq)) { unsigned long seqlen, fraglen; char *frag; seqlen = gt_bioseq_get_sequence_length(shredder->bioseq, shredder->seqnum); fraglen = (shredder->maxlength == shredder->minlength ? 0 : gt_rand_max(shredder->maxlength - shredder->minlength)) + shredder->minlength; gt_assert(fraglen >= shredder->minlength); if (shredder->pos + fraglen > seqlen) fraglen = seqlen - shredder->pos; *fragment_length = fraglen; gt_str_reset(desc); gt_str_append_cstr(desc, gt_bioseq_get_description(shredder->bioseq, shredder->seqnum)); gt_assert(shredder->pos + fraglen <= seqlen); frag = gt_bioseq_get_sequence_range(shredder->bioseq, shredder->seqnum, shredder->pos, shredder->pos + fraglen -1); if (shredder->pos + fraglen == seqlen) { /* last fragment */ shredder->seqnum++; shredder->pos = 0; } else { if (fraglen > shredder->overlap) shredder->pos += fraglen - shredder->overlap; else shredder->pos++; /* go at least one base further each step */ } return frag; } return NULL; }
static int extractseq_match(GtFile *outfp, GtBioseq *bs, const char *pattern, unsigned long width, GtError *err) { const char *desc; unsigned long i; bool match; int had_err = 0; gt_error_check(err); gt_assert(bs && pattern); for (i = 0; !had_err && i < gt_bioseq_number_of_sequences(bs); i++) { desc = gt_bioseq_get_description(bs, i); gt_assert(desc); had_err = gt_grep(&match, pattern, desc, err); if (!had_err && match) { gt_fasta_show_entry(desc, gt_bioseq_get_sequence(bs, i), gt_bioseq_get_sequence_length(bs, i), width, outfp); } } return had_err; }
static int gt_seqfilter_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { SeqFilterArguments *arguments = tool_arguments; GtBioseqIterator *bsi; GtBioseq *bioseq; GtUint64 passed = 0, filtered = 0, num_of_sequences = 0, steps = 0; int had_err = 0; gt_error_check(err); gt_assert(tool_arguments); bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args); while (!(had_err = gt_bioseq_iterator_next(bsi, &bioseq, err)) && bioseq != NULL) { GtUword i; GtUint64 current_num = gt_bioseq_number_of_sequences(bioseq); for (i = 0; i < current_num && (arguments->maxseqnum == GT_UNDEF_UWORD || passed + 1 <= arguments->maxseqnum); i++) { char *seq; if ((arguments->step == 1 || steps + 1 == arguments->step) && (arguments->sample_prob == 1.0 || gt_rand_0_to_1() <= arguments->sample_prob) && (arguments->minlength == GT_UNDEF_UWORD || gt_bioseq_get_sequence_length(bioseq, i) >= arguments->minlength) && (arguments->maxlength == GT_UNDEF_UWORD || gt_bioseq_get_sequence_length(bioseq, i) <= arguments->maxlength)) { seq = gt_bioseq_get_sequence(bioseq, i); gt_fasta_show_entry(gt_bioseq_get_description(bioseq, i), seq, gt_bioseq_get_sequence_length(bioseq, i), arguments->width, arguments->outfp); gt_free(seq); passed++; } else { filtered++; } steps = (steps + 1 == arguments->step) ? 0 : steps + 1; } filtered += current_num - i; num_of_sequences += current_num; gt_bioseq_delete(bioseq); } /* show statistics */ if (!had_err) { gt_assert(passed + filtered == num_of_sequences); fprintf(stderr, "# " GT_LLU " out of " GT_LLU " sequences have been removed (%.3f%%)\n", filtered, num_of_sequences, ((double) filtered / num_of_sequences) * 100.0); } gt_bioseq_iterator_delete(bsi); return had_err; }
static int grep_desc(GtBioseqCol *bsc, GtUword *filenum, GtUword *seqnum, GtStr *seqid, GtError *err) { GtUword i, j, num_matches = 0; const GtSeqInfo *seq_info_ptr; GtSeqInfo seq_info; GtStr *pattern, *escaped; bool match = false; int had_err = 0; gt_error_check(err); gt_assert(bsc && filenum && seqnum && seqid); /* create cache */ if (!bsc->grep_cache) bsc->grep_cache = gt_seq_info_cache_new(); /* try to read from cache */ seq_info_ptr = gt_seq_info_cache_get(bsc->grep_cache, gt_str_get(seqid)); if (seq_info_ptr) { *filenum = seq_info_ptr->filenum; *seqnum = seq_info_ptr->seqnum; return 0; } pattern = gt_str_new(); escaped = gt_str_new(); gt_grep_escape_extended(escaped, gt_str_get(seqid), gt_str_length(seqid)); if (bsc->matchdescstart) gt_str_append_cstr(pattern, "^"); gt_str_append_str(pattern, escaped); if (bsc->matchdescstart) gt_str_append_cstr(pattern, "([[:space:]]|$)"); for (i = 0; !had_err && i < bsc->num_of_seqfiles; i++) { GtBioseq *bioseq = bsc->bioseqs[i]; for (j = 0; !had_err && j < gt_bioseq_number_of_sequences(bioseq); j++) { const char *desc = gt_bioseq_get_description(bioseq, j); had_err = gt_grep(&match, gt_str_get(pattern), desc, err); if (!had_err && match) { num_matches++; if (num_matches > 1) { gt_error_set(err, "query seqid '%s' could match more than one " "sequence description", gt_str_get(seqid)); had_err = -1; break; } *filenum = i; *seqnum = j; /* cache results */ seq_info.filenum = i; seq_info.seqnum = j; gt_seq_info_cache_add(bsc->grep_cache, gt_str_get(seqid), &seq_info); } } if (match) break; } gt_str_delete(pattern); gt_str_delete(escaped); if (!had_err && num_matches == 0) { gt_error_set(err, "no description matched sequence ID '%s'", gt_str_get(seqid)); had_err = -1; } return had_err; }
int gt_pbs_unit_test(GtError *err) { int had_err = 0; GtLTRElement element; GtPBSOptions o; GtStr *tmpfilename; FILE *tmpfp; GtPBSResults *res; GtPBSHit *hit; double score1, score2; GtRange rng; char *rev_seq, *seq, tmp[BUFSIZ]; const char *fullseq = "aaaaaaaaaaaaaaaaaaaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "acatactaggatgctag" /* <- PBS forward */ "aatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatag" /* PBS reverse -> */ "gatcctaaggctac" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "aaaaaaaaaaaaaaaaaaaa"; /* notice previous errors */ gt_error_check(err); /* create temporary tRNA library file */ tmpfilename = gt_str_new(); tmpfp = gt_xtmpfp(tmpfilename); fprintf(tmpfp, ">test1\nccccccccccccccctagcatcctagtatgtccc\n" ">test2\ncccccccccgatcctagggctaccctttc\n"); gt_fa_xfclose(tmpfp); ensure(had_err, gt_file_exists(gt_str_get(tmpfilename))); /* setup testing parameters */ o.radius = 30; o.max_edist = 1; o.alilen.start = 11; o.alilen.end = 30; o.offsetlen.start = 0; o.offsetlen.end = 5; o.trnaoffsetlen.start = 0; o.trnaoffsetlen.end = 40; o.ali_score_match = 5; o.ali_score_mismatch = -10; o.ali_score_insertion = o.ali_score_deletion = -20; o.trna_lib = gt_bioseq_new(gt_str_get(tmpfilename), err); ensure(had_err, gt_bioseq_number_of_sequences(o.trna_lib) == 2); element.leftLTR_5 = 20; element.leftLTR_3 = 119; element.rightLTR_5 = 520; element.rightLTR_3 = 619; /* setup sequences */ seq = gt_malloc(600 * sizeof (char)); rev_seq = gt_malloc(600 * sizeof (char)); memcpy(seq, fullseq + 20, 600); memcpy(rev_seq, fullseq + 20, 600); gt_reverse_complement(rev_seq, 600, err); /* try to find PBS in sequences */ res = gt_pbs_find(seq, rev_seq, &element, &o, err); ensure(had_err, res != NULL); ensure(had_err, gt_pbs_results_get_number_of_hits(res) == 2); /* check first hit on forward strand */ hit = gt_pbs_results_get_ranked_hit(res, 0); ensure(had_err, hit != NULL); ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 17); ensure(had_err, gt_pbs_hit_get_edist(hit) == 0); ensure(had_err, gt_pbs_hit_get_offset(hit) == 0); ensure(had_err, gt_pbs_hit_get_tstart(hit) == 3); ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test1") == 0); rng = gt_pbs_hit_get_coords(hit); ensure(had_err, rng.start == 120); ensure(had_err, rng.end == 136); score1 = gt_pbs_hit_get_score(hit); ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_FORWARD); memset(tmp, 0, BUFSIZ-1); memcpy(tmp, fullseq + (rng.start * sizeof (char)), (rng.end - rng.start + 1) * sizeof (char)); ensure(had_err, strcmp(tmp, "acatactaggatgctag" ) == 0); /* check second hit on reverse strand */ hit = gt_pbs_results_get_ranked_hit(res, 1); ensure(had_err, hit != NULL); ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 14); ensure(had_err, gt_pbs_hit_get_edist(hit) == 1); ensure(had_err, gt_pbs_hit_get_offset(hit) == 0); ensure(had_err, gt_pbs_hit_get_tstart(hit) == 6); ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test2") == 0); rng = gt_pbs_hit_get_coords(hit); ensure(had_err, rng.start == 506); ensure(had_err, rng.end == 519); score2 = gt_pbs_hit_get_score(hit); ensure(had_err, gt_double_compare(score1, score2) > 0); ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_REVERSE); memset(tmp, 0, BUFSIZ-1); memcpy(tmp, fullseq + (rng.start * sizeof (char)), (rng.end - rng.start + 1) * sizeof (char)); ensure(had_err, strcmp(tmp, "gatcctaaggctac" ) == 0); /* clean up */ gt_xremove(gt_str_get(tmpfilename)); ensure(had_err, !gt_file_exists(gt_str_get(tmpfilename))); gt_str_delete(tmpfilename); gt_bioseq_delete(o.trna_lib); gt_free(rev_seq); gt_free(seq); gt_pbs_results_delete(res); return had_err; }
GtPBSResults* gt_pbs_find(const char *seq, const char *rev_seq, GtLTRElement *element, GtPBSOptions *o, GtError *err) { GtSeq *seq_forward, *seq_rev; GtPBSResults *results; unsigned long j; GtAlignment *ali; GtAlphabet *a = gt_alphabet_new_dna(); GtScoreFunction *sf = gt_dna_scorefunc_new(a, o->ali_score_match, o->ali_score_mismatch, o->ali_score_insertion, o->ali_score_deletion); gt_assert(seq && rev_seq && sf && a && element); results = gt_pbs_results_new(element, o); seq_forward = gt_seq_new(seq + (gt_ltrelement_leftltrlen(element)) - (o->radius), 2*o->radius + 1, a); seq_rev = gt_seq_new(rev_seq + (gt_ltrelement_rightltrlen(element)) - (o->radius), 2*o->radius + 1, a); for (j=0;j<gt_bioseq_number_of_sequences(o->trna_lib);j++) { GtSeq *trna_seq, *trna_from3; char *trna_from3_full; unsigned long trna_seqlen; trna_seq = gt_bioseq_get_seq(o->trna_lib, j); trna_seqlen = gt_seq_length(trna_seq); trna_from3_full = gt_calloc(trna_seqlen, sizeof (char)); memcpy(trna_from3_full, gt_seq_get_orig(trna_seq), sizeof (char)*trna_seqlen); (void) gt_reverse_complement(trna_from3_full, trna_seqlen, err); trna_from3 = gt_seq_new_own(trna_from3_full, trna_seqlen, a); ali = gt_swalign(seq_forward, trna_from3, sf); gt_pbs_add_hit(results->hits, ali, o, trna_seqlen, gt_seq_get_description(trna_seq), GT_STRAND_FORWARD, results); gt_alignment_delete(ali); ali = gt_swalign(seq_rev, trna_from3, sf); gt_pbs_add_hit(results->hits, ali, o, trna_seqlen, gt_seq_get_description(trna_seq), GT_STRAND_REVERSE, results); gt_alignment_delete(ali); gt_seq_delete(trna_from3); } gt_seq_delete(seq_forward); gt_seq_delete(seq_rev); gt_score_function_delete(sf); gt_alphabet_delete(a); gt_array_sort(results->hits, gt_pbs_hit_compare); return results; }
int gth_bssm_param_parameterize(GthBSSMParam *bssm_param, const char *path, Termtype termtype, bool gzip, GtError *err) { GtAlphabet *alphabet = NULL; GtBioseq *bioseq; GtStr *file2proc; GtUword i, j; int had_err = 0; gt_error_check(err); file2proc = gt_str_new(); /* set version number */ bssm_param->version_num = (unsigned char) MYVERSION; /* set model to true and set window sizes */ switch (termtype) { case GT_DONOR_TYPE: bssm_param->gt_donor_model_set = true; set_window_sizes_in_Bssmmodel(&bssm_param->gt_donor_model); break; case GC_DONOR_TYPE: bssm_param->gc_donor_model_set = true; set_window_sizes_in_Bssmmodel(&bssm_param->gc_donor_model); break; case AG_ACCEPTOR_TYPE: bssm_param->ag_acceptor_model_set = true; set_window_sizes_in_Bssmmodel(&bssm_param->ag_acceptor_model); break; default: gt_assert(0); } for (i = 0; !had_err && i < NUMOFFILES; i++) { /* process datafile */ gt_str_append_cstr(file2proc, path); switch (termtype) { case GT_DONOR_TYPE: gt_str_append_cstr(file2proc, "/GT_donor/"); gt_str_append_cstr(file2proc, filenames[i]); break; case GC_DONOR_TYPE: gt_str_append_cstr(file2proc, "/GC_donor/"); gt_str_append_cstr(file2proc, filenames[i]); break; case AG_ACCEPTOR_TYPE: gt_str_append_cstr(file2proc, "/AG_acceptor/"); gt_str_append_cstr(file2proc, filenames[i]); break; default: gt_assert(0); } if (gzip) gt_str_append_cstr(file2proc, ".gz"); if (!(bioseq = gt_bioseq_new(gt_str_get(file2proc), err))) had_err = -1; if (!had_err) alphabet = gt_bioseq_get_alphabet(bioseq); /* check here if all sequences have the length 102 and correct bases at positions 51 and 52 (i.e., GT, GC, or AG) */ for (j = 0; !had_err && j < gt_bioseq_number_of_sequences(bioseq); j++) { GtUchar encoded_seq[2]; /* check length */ if (gt_bioseq_get_sequence_length(bioseq, j) != STRINGSIZE) { gt_error_set(err, "sequence "GT_WU" in file \"%s\" does not have length %u", j, gt_str_get(file2proc), STRINGSIZE); had_err = -1; } encoded_seq[0] = gt_bioseq_get_encoded_char(bioseq, j, 50); encoded_seq[1] = gt_bioseq_get_encoded_char(bioseq, j, 51); if (!had_err) { /* check base correctness */ switch (termtype) { case GT_DONOR_TYPE: if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'G') || encoded_seq[1] != gt_alphabet_encode(alphabet, 'T')) { gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a GT " "sequence", j, gt_str_get(file2proc)); had_err = -1; } break; case GC_DONOR_TYPE: if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'G') || encoded_seq[1] != gt_alphabet_encode(alphabet, 'C')) { gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a GC " "sequence", j, gt_str_get(file2proc)); had_err = -1; } break; case AG_ACCEPTOR_TYPE: if (encoded_seq[0] != gt_alphabet_encode(alphabet, 'A') || encoded_seq[1] != gt_alphabet_encode(alphabet, 'G')) { gt_error_set(err, "sequence "GT_WU" in file \"%s\" is not a AG " "sequence", j, gt_str_get(file2proc)); had_err = -1; } break; default: gt_assert(0); } } } if (!had_err) { switch (termtype) { case GT_DONOR_TYPE: build_bssm(bioseq, &bssm_param->gt_donor_model, i); break; case GC_DONOR_TYPE: build_bssm(bioseq, &bssm_param->gc_donor_model, i); break; case AG_ACCEPTOR_TYPE: build_bssm(bioseq, &bssm_param->ag_acceptor_model, i); break; default: gt_assert(0); } } /* reset */ gt_str_reset(file2proc); /* free space */ gt_bioseq_delete(bioseq); } gt_str_delete(file2proc); return had_err; }
/* updates the BSSM parameterization file */ static void build_bssm(GtBioseq *bioseq, GthBSSMModel *bssm_model, unsigned int hypothesisnum) { GtUword mono_ct[STRINGSIZE-1][ALPHSIZE], /* Mononuc freq */ di_ct[STRINGSIZE-1][ALPHSIZE][ALPHSIZE]; /* Dinuc freq */ double mono_freq, /* Mononuc relative freq */ di_freq; /* Dinuc relative freq */ GtUword i, j, k, /* Iterator variables */ len, curlen = 0, num_entries = gt_bioseq_number_of_sequences(bioseq); GtUchar *encoded_seq = NULL; /* Inits of local variables */ for (i = 0; i < (STRINGSIZE-1); i++) { for (j = 0; j < ALPHSIZE; j++) { mono_ct[i][j] = INITVAL_INT; for (k = 0; k < ALPHSIZE; k++) di_ct[i][j][k] = INITVAL_INT; } } /* mononucleotides */ for (j = 0; j < num_entries; j++) { len = gt_bioseq_get_sequence_length(bioseq, j); gt_assert(len == STRINGSIZE); if (len > curlen) { encoded_seq = gt_realloc(encoded_seq, len); curlen = len; } gt_bioseq_get_encoded_sequence(bioseq, encoded_seq, j); for (i = 0; i < (STRINGSIZE-1); i++) { gt_assert(encoded_seq[i] < ALPHSIZE); mono_ct[i][encoded_seq[i]]++; } } /* dinucleotides */ for (j = 0; j < num_entries; j++) { len = gt_bioseq_get_sequence_length(bioseq, j); gt_assert(len == STRINGSIZE); if (len > curlen) { encoded_seq = gt_realloc(encoded_seq, len); curlen = len; } gt_bioseq_get_encoded_sequence(bioseq, encoded_seq, j); for (i = 0; i < (STRINGSIZE-1); i++) { di_ct[i][encoded_seq[i]] [encoded_seq[i + 1]]++; } } gt_free(encoded_seq); /* Record equilibrium frequencies (1st ``slot" in transition freqs) */ for (i = 0; i < ALPHSIZE; i++) { for (j = 0; j < ALPHSIZE; j++) { bssm_model->hypotables .hypo7table[hypothesisnum][0][i][j] = (GthFlt) mono_ct[0][i] / num_entries; } } /* Populate the remaining transition frequencies */ for (k = 1; k < STRINGSIZE; k++) { for (i = 0; i < ALPHSIZE; i++) { mono_freq = (double) mono_ct[k-1][i] / num_entries; for (j = 0; j < ALPHSIZE; j++) { di_freq = (double) di_ct[k-1][i][j] / num_entries; if (mono_freq == 0.0) { bssm_model->hypotables .hypo7table[hypothesisnum][k][i][j] = (GthFlt) NULLPROB; } else { bssm_model->hypotables .hypo7table[hypothesisnum][k][i][j] = (GthFlt) (di_freq / mono_freq); } } /* Remove non-zero transition probabilities: Briefly, 0.0 entries (dinucleotide absent in training corpus) are replaced arbitrarily by PSEUDOPROB, and non-0.0 entries p are replaced by p = p * (1 - 4 * PSEUDOPROB) + PSEUDOPROB */ for (j = 0; j < ALPHSIZE; ++j) { /* If any entry is NULLPROB, ALL elements in the row need fixed */ if (bssm_model->hypotables .hypo7table[hypothesisnum][k][i][j] == NULLPROB) { /* Fix all elements in the row, then break */ for (j = 0; j < ALPHSIZE; j++) { if (bssm_model->hypotables .hypo7table[hypothesisnum][k][i][j] == NULLPROB) { bssm_model->hypotables .hypo7table[hypothesisnum][k][i][j] = (GthFlt) PSEUDOPROB; } else { /* Adjust non-zero transition prob */ bssm_model->hypotables.hypo7table[hypothesisnum][k][i][j] = (GthFlt) (bssm_model->hypotables.hypo7table[hypothesisnum][k][i][j] * (1 - (4 * PSEUDOPROB)) + PSEUDOPROB); } } break; } } } } }
static int gt_sequniq_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtSequniqArguments *arguments = tool_arguments; GtUint64 duplicates = 0, num_of_sequences = 0; int i, had_err = 0; GtMD5Set *md5set; gt_error_check(err); gt_assert(arguments); md5set = gt_md5set_new(arguments->nofseqs); if (!arguments->seqit) { GtUword j; GtBioseq *bs; for (i = parsed_args; !had_err && i < argc; i++) { if (!(bs = gt_bioseq_new(argv[i], err))) had_err = -1; if (!had_err) { GtMD5SetStatus retval; for (j = 0; j < gt_bioseq_number_of_sequences(bs) && !had_err; j++) { char *seq = gt_bioseq_get_sequence(bs, j); retval = gt_md5set_add_sequence(md5set, seq, gt_bioseq_get_sequence_length(bs, j), arguments->rev, err); if (retval == GT_MD5SET_NOT_FOUND) gt_fasta_show_entry(gt_bioseq_get_description(bs, j), seq, gt_bioseq_get_sequence_length(bs, j), arguments->width, arguments->outfp); else if (retval != GT_MD5SET_ERROR) duplicates++; else had_err = -1; num_of_sequences++; gt_free(seq); } gt_bioseq_delete(bs); } } } else { GtSeqIterator *seqit; GtStrArray *files; off_t totalsize; const GtUchar *sequence; char *desc; GtUword len; files = gt_str_array_new(); for (i = parsed_args; i < argc; i++) gt_str_array_add_cstr(files, argv[i]); totalsize = gt_files_estimate_total_size(files); seqit = gt_seq_iterator_sequence_buffer_new(files, err); if (!seqit) had_err = -1; if (!had_err) { if (arguments->verbose) { gt_progressbar_start(gt_seq_iterator_getcurrentcounter(seqit, (GtUint64) totalsize), (GtUint64) totalsize); } while (!had_err) { GtMD5SetStatus retval; if ((gt_seq_iterator_next(seqit, &sequence, &len, &desc, err)) != 1) break; retval = gt_md5set_add_sequence(md5set, (const char*) sequence, len, arguments->rev, err); if (retval == GT_MD5SET_NOT_FOUND) gt_fasta_show_entry(desc, (const char*) sequence, len, arguments->width, arguments->outfp); else if (retval != GT_MD5SET_ERROR) duplicates++; else had_err = -1; num_of_sequences++; } if (arguments->verbose) gt_progressbar_stop(); gt_seq_iterator_delete(seqit); } gt_str_array_delete(files); } /* show statistics */ if (!had_err) { fprintf(stderr, "# "GT_WU" out of "GT_WU" sequences have been removed (%.3f%%)\n", (GtUword)duplicates, (GtUword)num_of_sequences, ((double) duplicates / (double)num_of_sequences) * 100.0); } gt_md5set_delete(md5set); return had_err; }