GtSeqIterator* gt_seq_iterator_sequence_buffer_new(const GtStrArray *filenametab, GtError *err) { GtSeqIterator *si; GtSequenceBuffer *sb = gt_sequence_buffer_new_guess_type(filenametab, err); if (!sb) return NULL; si = gt_seq_iterator_sequence_buffer_new_with_buffer(sb); gt_sequence_buffer_delete(sb); /* drop this reference */ return si; }
static int gt_seqtranslate_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GT_UNUSED GtError *err) { GtTranslateArguments *arguments = tool_arguments; GtSeqIterator *si = NULL; GtSequenceBuffer *sb = NULL; GtStrArray *infiles; int had_err = 0, rval, i; GtStr *translations[3]; translations[0] = gt_str_new(); translations[1] = gt_str_new(); translations[2] = gt_str_new(); gt_error_check(err); gt_assert(arguments); infiles = gt_str_array_new(); for (i = parsed_args; i < argc; i++) { gt_str_array_add_cstr(infiles, argv[i]); } sb = gt_sequence_buffer_new_guess_type(infiles, err); if (!sb) had_err = -1; if (!had_err) { si = gt_seq_iterator_sequence_buffer_new_with_buffer(sb); if (!si) had_err = -1; } if (!had_err) { char *desc; const GtUchar *sequence; GtUword len; while (!had_err && (rval = gt_seq_iterator_next(si, &sequence, &len, &desc, err))) { if (rval < 0) { had_err = -1; break; } if (len < GT_CODON_LENGTH) { gt_warning("sequence '%s' is shorter than codon length of %d, skipping", desc, GT_CODON_LENGTH); } else { had_err = gt_seqtranslate_do_translation(arguments, (char*) sequence, len, desc, translations, false, err); if (!had_err && arguments->reverse) { char *revseq = gt_cstr_dup_nt((char*) sequence, len); had_err = gt_reverse_complement(revseq, len, err); if (!had_err) { had_err = gt_seqtranslate_do_translation(arguments, revseq, len, desc, translations, true, err); } gt_free(revseq); } } } } gt_str_delete(translations[0]); gt_str_delete(translations[1]); gt_str_delete(translations[2]); gt_str_array_delete(infiles); gt_seq_iterator_delete(si); gt_sequence_buffer_delete(sb); return had_err; }
static int gt_convertseq_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtConvertseqArguments *arguments = tool_arguments; int had_err = 0, i; GtFilelengthvalues *flv; GtSeqIterator *seqit; GtSequenceBuffer *sb = NULL; GtStrArray *files; const GtUchar *sequence; char *desc; GtUword len, j; off_t totalsize; gt_error_check(err); gt_assert(arguments != NULL); files = gt_str_array_new(); for (i = parsed_args; i < argc; i++) { gt_str_array_add_cstr(files, argv[i]); } totalsize = gt_files_estimate_total_size(files); flv = gt_calloc((size_t) gt_str_array_size(files), sizeof (GtFilelengthvalues)); sb = gt_sequence_buffer_new_guess_type(files, err); if (!sb) { had_err = -1; } if (!had_err) { gt_sequence_buffer_set_filelengthtab(sb, flv); /* read input using seqiterator */ seqit = gt_seq_iterator_sequence_buffer_new_with_buffer(sb); if (arguments->verbose) { gt_progressbar_start(gt_seq_iterator_getcurrentcounter(seqit, (GtUint64) totalsize), (GtUint64) totalsize); } while (true) { GtUchar *seq = NULL; desc = NULL; j = 0UL; had_err = gt_seq_iterator_next(seqit, &sequence, &len, &desc, err); if (had_err != 1) break; if (arguments->revcomp) { GtUchar *newseq = gt_calloc((size_t) len+1, sizeof (GtUchar)); memcpy(newseq, sequence, (size_t) len*sizeof (GtUchar)); had_err = gt_reverse_complement((char*) newseq, len, err); if (had_err) break; seq = newseq; } else seq = (GtUchar*) sequence; if (!arguments->showseq) { bool in_wildcard = false; gt_file_xprintf(arguments->outfp, ">%s\n", desc); for (i = 0; (GtUword) i < len; i++) { if (arguments->reduce_wc_dna) { switch (seq[i]) { case 'a': case 'A': case 'c': case 'C': case 'g': case 'G': case 't': case 'u': case 'T': case 'U': in_wildcard = false; gt_file_xfputc((int) seq[i], arguments->outfp); j++; break; default: if (!in_wildcard) { in_wildcard = true; if (isupper((int) seq[i])) gt_file_xfputc((int) 'N', arguments->outfp); else gt_file_xfputc((int) 'n', arguments->outfp); j++; } } } else if (arguments->reduce_wc_prot) { switch (seq[i]) { case 'X': case 'B': case 'Z': if (!in_wildcard) { in_wildcard = true; gt_file_xfputc((int) 'N', arguments->outfp); j++; } break; case 'x': case 'b': case 'z': if (!in_wildcard) { in_wildcard = true; gt_file_xfputc((int) 'n', arguments->outfp); j++; } break; default: in_wildcard = false; gt_file_xfputc((int) seq[i], arguments->outfp); j++; } } else { gt_file_xfputc((int) seq[i], arguments->outfp); j++; } if (arguments->fastawidth > 0 && j % arguments->fastawidth == 0) { j = 0; gt_file_xprintf(arguments->outfp, "\n"); } } if (arguments->fastawidth == 0 || len % arguments->fastawidth != 0) gt_file_xprintf(arguments->outfp, "\n"); } if (arguments->revcomp) { gt_free(seq); } } if (arguments->showflv) { for (j=0;j<gt_str_array_size(files);j++) { fprintf(stderr, "file "GT_WU" (%s): "GT_WU"/"GT_WU"\n", j, gt_str_array_get(files, j), (GtUword) flv[j].length, (GtUword) flv[j].effectivelength); } } if (arguments->verbose) { gt_progressbar_stop(); } gt_sequence_buffer_delete(sb); gt_seq_iterator_delete(seqit); } gt_str_array_delete(files); gt_free(flv); return had_err; }