static void gt_hpol_processor_output_segment(GtAlignedSegment *as, bool may_be_gapped, GtFile *outfp, const char *desc) { unsigned long slen; if (may_be_gapped) gt_aligned_segment_ungap_seq_and_qual(as); slen = (unsigned long)strlen(gt_aligned_segment_seq(as)); gt_assert(slen == (unsigned long)strlen(gt_aligned_segment_qual(as))); if (gt_aligned_segment_is_reverse(as)) { GtError *err = gt_error_new(); char *q = gt_aligned_segment_qual(as), tmp; unsigned long i; for (i = 0; i < (slen + 1UL) >> 1; i++) { tmp = q[i]; q[i] = q[slen - i - 1UL]; q[slen - i - 1UL] = tmp; } gt_assert((unsigned long)strlen(gt_aligned_segment_qual(as)) == slen); if (gt_reverse_complement(gt_aligned_segment_seq(as), slen, err) != 0) { fprintf(stderr, "error: %s", gt_error_get(err)); exit(EXIT_FAILURE); } gt_error_delete(err); } gt_fastq_show_entry((desc != NULL) ? desc : gt_aligned_segment_description(as), gt_aligned_segment_seq(as), gt_aligned_segment_qual(as), slen, 0, false, outfp); }
int gt_splicedseq_reverse(Splicedseq *ss, GtError *err) { int had_err; gt_error_check(err); gt_assert(ss); had_err = gt_reverse_complement(gt_str_get(ss->splicedseq), gt_str_length(ss->splicedseq), err); if (!had_err) { gt_array_reverse(ss->positionmapping); ss->forward = !ss->forward; } return had_err; }
GtMD5SetStatus gt_md5set_add_sequence(GtMD5Set *set, const char* seq, GtUword seqlen, bool both_strands, GtError *err) { md5_t md5sum, md5sum_rc; GtUword i; int retval = 0; bool found; gt_assert(set != NULL); gt_assert(set->table != NULL); md5set_prepare_buffer(set, seqlen); for (i = 0; i < seqlen; i++) set->buffer[i] = toupper(seq[i]); MD5SET_HASH_STRING(set->buffer, seqlen, md5sum); found = md5set_search(set, md5sum, true); if (found) return GT_MD5SET_FOUND; if (both_strands) { retval = gt_reverse_complement(set->buffer, seqlen, err); if (retval != 0) { gt_assert(retval < 0); return GT_MD5SET_ERROR; } MD5SET_HASH_STRING(set->buffer, seqlen, md5sum_rc); /* if the MD5 sum of the reverse complement equals the MD5 sum of the sequence itself we don't check if the reverse complement is in the set. Otherwise such sequences would never be added to the set at all. */ if (md5sum_rc.l == md5sum.l && md5sum_rc.h == md5sum.h) { return GT_MD5SET_NOT_FOUND; } found = md5set_search(set, md5sum_rc, false); if (found) return GT_MD5SET_RC_FOUND; } return GT_MD5SET_NOT_FOUND; }
GtMD5SetStatus gt_md5set_add_sequence(GtMD5Set *set, const char* seq, unsigned long seqlen, bool both_strands, GtError *err) { gt_md5_t md5sum, md5sum_rc; unsigned long i; int retval = 0; bool found; gt_assert(set != NULL); gt_assert(set->table != NULL); gt_md5set_prepare_buffer(set, seqlen); for (i = 0; i < seqlen; i++) set->buffer[i] = toupper(seq[i]); GT_MD5SET_HASH_STRING(set->buffer, seqlen, md5sum); found = gt_md5set_search(set, md5sum, true); if (found) return GT_MD5SET_FOUND; if (both_strands) { retval = gt_reverse_complement(set->buffer, seqlen, err); if (retval != 0) { gt_assert(retval < 0); return GT_MD5SET_ERROR; } GT_MD5SET_HASH_STRING(set->buffer, seqlen, md5sum_rc); found = gt_md5set_search(set, md5sum_rc, false); if (found) return GT_MD5SET_RC_FOUND; } return GT_MD5SET_NOT_FOUND; }
static int run_orffinder(GtRegionMapping *rmap, GtFeatureNode *gf, unsigned long start, GT_UNUSED unsigned long end, unsigned int min, unsigned int max, bool all, GtError *err) { int had_err = 0, i; unsigned long sum; GtCodonIterator* ci = NULL; GtTranslator* translator = NULL; GtORFIterator* orfi = NULL; GtORFIteratorStatus state; GtRange orf_rng, tmp_orf_rng[3]; GtStr *seq; unsigned int orf_frame; /* forward strand */ seq = gt_str_new(); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) gf, gt_feature_node_get_type(gf), false, NULL, NULL, rmap, err); ci = gt_codon_iterator_simple_new(gt_str_get(seq), gt_str_length(seq), err); gt_assert(ci); translator = gt_translator_new(ci); gt_assert(translator); orfi = gt_orf_iterator_new(ci, translator); gt_assert(orfi); for (i = 0; i < 3; i++) { tmp_orf_rng[i].start = GT_UNDEF_ULONG; tmp_orf_rng[i].end = GT_UNDEF_ULONG; } while ((state = gt_orf_iterator_next(orfi, &orf_rng, &orf_frame, err)) == GT_ORF_ITERATOR_OK) { if (all) { process_orf(orf_rng, orf_frame, GT_STRAND_FORWARD, gf, start, min, max, err); } else { if (gt_range_length(&orf_rng) > gt_range_length(&tmp_orf_rng[orf_frame])) { tmp_orf_rng[orf_frame].start = orf_rng.start; tmp_orf_rng[orf_frame].end = orf_rng.end; } } } if (state == GT_ORF_ITERATOR_ERROR) had_err = -1; if (!had_err) { if (!all) { for (i = 0; i < 3; i++) { if (tmp_orf_rng[i].start != GT_UNDEF_ULONG) { process_orf(tmp_orf_rng[i], (unsigned int) i, GT_STRAND_FORWARD, gf, start, min, max, err); } } } gt_codon_iterator_delete(ci); gt_translator_delete(translator); gt_orf_iterator_delete(orfi); orfi = NULL; ci = NULL; translator = NULL; for (i = 0; i < 3; i++) { tmp_orf_rng[i].start = GT_UNDEF_ULONG; tmp_orf_rng[i].end = GT_UNDEF_ULONG; } /* reverse strand */ if (!had_err) { GT_UNUSED int rval = 0; unsigned long length = gt_str_length(seq); char *strp = (char*) gt_str_get_mem(seq); rval = gt_reverse_complement(strp, gt_str_length(seq), err); gt_assert(!rval); /* XXX */ ci = gt_codon_iterator_simple_new(gt_str_get(seq), gt_str_length(seq), err); gt_assert(ci); translator = gt_translator_new(ci); gt_assert(translator); orfi = gt_orf_iterator_new(ci, translator); gt_assert(orfi); sum = start + length - 1; while ((state = gt_orf_iterator_next(orfi, &orf_rng, &orf_frame, err)) == GT_ORF_ITERATOR_OK) { if (all) { process_orf(orf_rng, orf_frame, GT_STRAND_REVERSE, gf, sum, min, max, err); } else { if (gt_range_length(&orf_rng) > gt_range_length(&tmp_orf_rng[orf_frame])) { tmp_orf_rng[orf_frame].start = orf_rng.start; tmp_orf_rng[orf_frame].end = orf_rng.end; } } } if (state == GT_ORF_ITERATOR_ERROR) had_err = -1; if (!had_err) { if (!all) { for (i = 0; i < 3; i++) { if (tmp_orf_rng[i].start != GT_UNDEF_ULONG) { process_orf(tmp_orf_rng[i], (unsigned int) i, GT_STRAND_REVERSE, gf, sum, min, max, err); } } } } } gt_str_delete(seq); gt_codon_iterator_delete(ci); gt_translator_delete(translator); gt_orf_iterator_delete(orfi); } return had_err; }
static int gt_snp_annotator_visitor_prepare_gene(GtSNPAnnotatorVisitor *sav, GtError *err) { GtFeatureNodeIterator *fni, *mrnafni; GtFeatureNode *curnode, *last_mRNA = NULL; GtStr *mrnaseq, *seqid; int had_err = 0; mrnaseq = gt_str_new(); seqid = gt_genome_node_get_seqid((GtGenomeNode*) sav->gene); fni = gt_feature_node_iterator_new(sav->gene); while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) { if (gt_feature_node_get_type(curnode) == sav->mRNA_type) { GtFeatureNode *curnode2; if (last_mRNA) { char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char)); (void) strncpy(mrna_charseq, gt_str_get(mrnaseq), gt_str_length(mrnaseq)); if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) { had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq), err); } if (!had_err) { gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq); last_mRNA = curnode; gt_str_reset(mrnaseq); } } else last_mRNA = curnode; if (!had_err) { mrnafni = gt_feature_node_iterator_new(curnode); while (!had_err && (curnode2 = gt_feature_node_iterator_next(mrnafni))) { if (gt_feature_node_get_type(curnode2) == sav->CDS_type) { char *tmp; GtRange rng = gt_genome_node_get_range((GtGenomeNode*) curnode2); had_err = gt_region_mapping_get_sequence(sav->rmap, &tmp, seqid, rng.start, rng.end, err); if (!had_err) { gt_str_append_cstr_nt(mrnaseq, tmp, gt_range_length(&rng)); gt_free(tmp); } } } gt_feature_node_iterator_delete(mrnafni); } } } if (!had_err && last_mRNA) { char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char)); (void) strncpy(mrna_charseq, gt_str_get(mrnaseq), gt_str_length(mrnaseq)); if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) { had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq), err); } if (!had_err) { gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq); } } gt_feature_node_iterator_delete(fni); gt_str_delete(mrnaseq); return had_err; }
int gt_pbs_unit_test(GtError *err) { int had_err = 0; GtLTRElement element; GtPBSOptions o; GtStr *tmpfilename; FILE *tmpfp; GtPBSResults *res; GtPBSHit *hit; double score1, score2; GtRange rng; char *rev_seq, *seq, tmp[BUFSIZ]; const char *fullseq = "aaaaaaaaaaaaaaaaaaaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "acatactaggatgctag" /* <- PBS forward */ "aatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatag" /* PBS reverse -> */ "gatcctaaggctac" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "aaaaaaaaaaaaaaaaaaaa"; /* notice previous errors */ gt_error_check(err); /* create temporary tRNA library file */ tmpfilename = gt_str_new(); tmpfp = gt_xtmpfp(tmpfilename); fprintf(tmpfp, ">test1\nccccccccccccccctagcatcctagtatgtccc\n" ">test2\ncccccccccgatcctagggctaccctttc\n"); gt_fa_xfclose(tmpfp); ensure(had_err, gt_file_exists(gt_str_get(tmpfilename))); /* setup testing parameters */ o.radius = 30; o.max_edist = 1; o.alilen.start = 11; o.alilen.end = 30; o.offsetlen.start = 0; o.offsetlen.end = 5; o.trnaoffsetlen.start = 0; o.trnaoffsetlen.end = 40; o.ali_score_match = 5; o.ali_score_mismatch = -10; o.ali_score_insertion = o.ali_score_deletion = -20; o.trna_lib = gt_bioseq_new(gt_str_get(tmpfilename), err); ensure(had_err, gt_bioseq_number_of_sequences(o.trna_lib) == 2); element.leftLTR_5 = 20; element.leftLTR_3 = 119; element.rightLTR_5 = 520; element.rightLTR_3 = 619; /* setup sequences */ seq = gt_malloc(600 * sizeof (char)); rev_seq = gt_malloc(600 * sizeof (char)); memcpy(seq, fullseq + 20, 600); memcpy(rev_seq, fullseq + 20, 600); gt_reverse_complement(rev_seq, 600, err); /* try to find PBS in sequences */ res = gt_pbs_find(seq, rev_seq, &element, &o, err); ensure(had_err, res != NULL); ensure(had_err, gt_pbs_results_get_number_of_hits(res) == 2); /* check first hit on forward strand */ hit = gt_pbs_results_get_ranked_hit(res, 0); ensure(had_err, hit != NULL); ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 17); ensure(had_err, gt_pbs_hit_get_edist(hit) == 0); ensure(had_err, gt_pbs_hit_get_offset(hit) == 0); ensure(had_err, gt_pbs_hit_get_tstart(hit) == 3); ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test1") == 0); rng = gt_pbs_hit_get_coords(hit); ensure(had_err, rng.start == 120); ensure(had_err, rng.end == 136); score1 = gt_pbs_hit_get_score(hit); ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_FORWARD); memset(tmp, 0, BUFSIZ-1); memcpy(tmp, fullseq + (rng.start * sizeof (char)), (rng.end - rng.start + 1) * sizeof (char)); ensure(had_err, strcmp(tmp, "acatactaggatgctag" ) == 0); /* check second hit on reverse strand */ hit = gt_pbs_results_get_ranked_hit(res, 1); ensure(had_err, hit != NULL); ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 14); ensure(had_err, gt_pbs_hit_get_edist(hit) == 1); ensure(had_err, gt_pbs_hit_get_offset(hit) == 0); ensure(had_err, gt_pbs_hit_get_tstart(hit) == 6); ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test2") == 0); rng = gt_pbs_hit_get_coords(hit); ensure(had_err, rng.start == 506); ensure(had_err, rng.end == 519); score2 = gt_pbs_hit_get_score(hit); ensure(had_err, gt_double_compare(score1, score2) > 0); ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_REVERSE); memset(tmp, 0, BUFSIZ-1); memcpy(tmp, fullseq + (rng.start * sizeof (char)), (rng.end - rng.start + 1) * sizeof (char)); ensure(had_err, strcmp(tmp, "gatcctaaggctac" ) == 0); /* clean up */ gt_xremove(gt_str_get(tmpfilename)); ensure(had_err, !gt_file_exists(gt_str_get(tmpfilename))); gt_str_delete(tmpfilename); gt_bioseq_delete(o.trna_lib); gt_free(rev_seq); gt_free(seq); gt_pbs_results_delete(res); return had_err; }
GtPBSResults* gt_pbs_find(const char *seq, const char *rev_seq, GtLTRElement *element, GtPBSOptions *o, GtError *err) { GtSeq *seq_forward, *seq_rev; GtPBSResults *results; unsigned long j; GtAlignment *ali; GtAlphabet *a = gt_alphabet_new_dna(); GtScoreFunction *sf = gt_dna_scorefunc_new(a, o->ali_score_match, o->ali_score_mismatch, o->ali_score_insertion, o->ali_score_deletion); gt_assert(seq && rev_seq && sf && a && element); results = gt_pbs_results_new(element, o); seq_forward = gt_seq_new(seq + (gt_ltrelement_leftltrlen(element)) - (o->radius), 2*o->radius + 1, a); seq_rev = gt_seq_new(rev_seq + (gt_ltrelement_rightltrlen(element)) - (o->radius), 2*o->radius + 1, a); for (j=0;j<gt_bioseq_number_of_sequences(o->trna_lib);j++) { GtSeq *trna_seq, *trna_from3; char *trna_from3_full; unsigned long trna_seqlen; trna_seq = gt_bioseq_get_seq(o->trna_lib, j); trna_seqlen = gt_seq_length(trna_seq); trna_from3_full = gt_calloc(trna_seqlen, sizeof (char)); memcpy(trna_from3_full, gt_seq_get_orig(trna_seq), sizeof (char)*trna_seqlen); (void) gt_reverse_complement(trna_from3_full, trna_seqlen, err); trna_from3 = gt_seq_new_own(trna_from3_full, trna_seqlen, a); ali = gt_swalign(seq_forward, trna_from3, sf); gt_pbs_add_hit(results->hits, ali, o, trna_seqlen, gt_seq_get_description(trna_seq), GT_STRAND_FORWARD, results); gt_alignment_delete(ali); ali = gt_swalign(seq_rev, trna_from3, sf); gt_pbs_add_hit(results->hits, ali, o, trna_seqlen, gt_seq_get_description(trna_seq), GT_STRAND_REVERSE, results); gt_alignment_delete(ali); gt_seq_delete(trna_from3); } gt_seq_delete(seq_forward); gt_seq_delete(seq_rev); gt_score_function_delete(sf); gt_alphabet_delete(a); gt_array_sort(results->hits, gt_pbs_hit_compare); return results; }
static int gt_seqtranslate_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GT_UNUSED GtError *err) { GtTranslateArguments *arguments = tool_arguments; GtSeqIterator *si = NULL; GtSequenceBuffer *sb = NULL; GtStrArray *infiles; int had_err = 0, rval, i; GtStr *translations[3]; translations[0] = gt_str_new(); translations[1] = gt_str_new(); translations[2] = gt_str_new(); gt_error_check(err); gt_assert(arguments); infiles = gt_str_array_new(); for (i = parsed_args; i < argc; i++) { gt_str_array_add_cstr(infiles, argv[i]); } sb = gt_sequence_buffer_new_guess_type(infiles, err); if (!sb) had_err = -1; if (!had_err) { si = gt_seq_iterator_sequence_buffer_new_with_buffer(sb); if (!si) had_err = -1; } if (!had_err) { char *desc; const GtUchar *sequence; GtUword len; while (!had_err && (rval = gt_seq_iterator_next(si, &sequence, &len, &desc, err))) { if (rval < 0) { had_err = -1; break; } if (len < GT_CODON_LENGTH) { gt_warning("sequence '%s' is shorter than codon length of %d, skipping", desc, GT_CODON_LENGTH); } else { had_err = gt_seqtranslate_do_translation(arguments, (char*) sequence, len, desc, translations, false, err); if (!had_err && arguments->reverse) { char *revseq = gt_cstr_dup_nt((char*) sequence, len); had_err = gt_reverse_complement(revseq, len, err); if (!had_err) { had_err = gt_seqtranslate_do_translation(arguments, revseq, len, desc, translations, true, err); } gt_free(revseq); } } } } gt_str_delete(translations[0]); gt_str_delete(translations[1]); gt_str_delete(translations[2]); gt_str_array_delete(infiles); gt_seq_iterator_delete(si); gt_sequence_buffer_delete(sb); return had_err; }
static int gt_ltrdigest_pdom_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn, GtError *err) { GtLTRdigestPdomVisitor *lv; GtFeatureNodeIterator *fni; GtFeatureNode *curnode = NULL; int had_err = 0; GtRange rng; GtUword i; lv = gt_ltrdigest_pdom_visitor_cast(nv); gt_assert(lv); gt_error_check(err); /* traverse annotation subgraph and find LTR element */ fni = gt_feature_node_iterator_new(fn); while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) { if (strcmp(gt_feature_node_get_type(curnode), lv->root_type) == 0) { lv->ltr_retrotrans = curnode; } } gt_feature_node_iterator_delete(fni); if (!had_err && lv->ltr_retrotrans != NULL) { GtCodonIterator *ci; GtTranslator *tr; GtTranslatorStatus status; GtUword seqlen; char translated, *rev_seq; #ifndef _WIN32 FILE *instream; GtHMMERParseStatus *pstatus; #endif unsigned int frame; GtStr *seq; seq = gt_str_new(); rng = gt_genome_node_get_range((GtGenomeNode*) lv->ltr_retrotrans); lv->leftLTR_5 = rng.start - 1; lv->rightLTR_3 = rng.end - 1; seqlen = gt_range_length(&rng); had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) lv->ltr_retrotrans, lv->root_type, false, NULL, NULL, lv->rmap, err); if (!had_err) { for (i = 0UL; i < 3UL; i++) { gt_str_reset(lv->fwd[i]); gt_str_reset(lv->rev[i]); } /* create translations */ ci = gt_codon_iterator_simple_new(gt_str_get(seq), seqlen, NULL); gt_assert(ci); tr = gt_translator_new(ci); status = gt_translator_next(tr, &translated, &frame, err); while (status == GT_TRANSLATOR_OK && translated) { gt_str_append_char(lv->fwd[frame], translated); status = gt_translator_next(tr, &translated, &frame, NULL); } if (status == GT_TRANSLATOR_ERROR) had_err = -1; if (!had_err) { rev_seq = gt_malloc((size_t) seqlen * sizeof (char)); strncpy(rev_seq, gt_str_get(seq), (size_t) seqlen * sizeof (char)); (void) gt_reverse_complement(rev_seq, seqlen, NULL); gt_codon_iterator_delete(ci); ci = gt_codon_iterator_simple_new(rev_seq, seqlen, NULL); gt_translator_set_codon_iterator(tr, ci); status = gt_translator_next(tr, &translated, &frame, err); while (status == GT_TRANSLATOR_OK && translated) { gt_str_append_char(lv->rev[frame], translated); status = gt_translator_next(tr, &translated, &frame, NULL); } if (status == GT_TRANSLATOR_ERROR) had_err = -1; gt_free(rev_seq); } gt_codon_iterator_delete(ci); gt_translator_delete(tr); } /* run HMMER and handle results */ if (!had_err) { #ifndef _WIN32 int pid, pc[2], cp[2]; GT_UNUSED int rval; (void) signal(SIGCHLD, SIG_IGN); /* XXX: for now, ignore child's exit status */ rval = pipe(pc); gt_assert(rval == 0); rval = pipe(cp); gt_assert(rval == 0); switch ((pid = (int) fork())) { case -1: perror("Can't fork"); exit(1); /* XXX: error handling */ case 0: /* child */ (void) close(1); /* close current stdout. */ rval = dup(cp[1]); /* make stdout go to write end of pipe. */ (void) close(0); /* close current stdin. */ rval = dup(pc[0]); /* make stdin come from read end of pipe. */ (void) close(pc[0]); (void) close(pc[1]); (void) close(cp[0]); (void) close(cp[1]); (void) execvp("hmmscan", lv->args); /* XXX: read path from env */ perror("couldn't execute hmmscan!"); exit(1); default: /* parent */ for (i = 0UL; i < 3UL; i++) { char buf[5]; GT_UNUSED ssize_t written; (void) sprintf(buf, ">"GT_WU"%c\n", i, '+'); written = write(pc[1], buf, 4 * sizeof (char)); written = write(pc[1], gt_str_get(lv->fwd[i]), (size_t) gt_str_length(lv->fwd[i]) * sizeof (char)); written = write(pc[1], "\n", 1 * sizeof (char)); (void) sprintf(buf, ">"GT_WU"%c\n", i, '-'); written = write(pc[1], buf, 4 * sizeof (char)); written = write(pc[1], gt_str_get(lv->rev[i]), (size_t) gt_str_length(lv->rev[i]) * sizeof (char)); written = write(pc[1], "\n", 1 * sizeof (char)); } (void) close(pc[0]); (void) close(pc[1]); (void) close(cp[1]); instream = fdopen(cp[0], "r"); pstatus = gt_hmmer_parse_status_new(); had_err = gt_ltrdigest_pdom_visitor_parse_output(lv, pstatus, instream, err); (void) fclose(instream); if (!had_err) had_err = gt_ltrdigest_pdom_visitor_process_hits(lv, pstatus, err); gt_hmmer_parse_status_delete(pstatus); } #else /* XXX */ gt_error_set(err, "HMMER call not implemented on Windows\n"); had_err = -1; #endif } gt_str_delete(seq); } if (!had_err) had_err = gt_ltrdigest_pdom_visitor_choose_strand(lv); return had_err; }
static int gt_convertseq_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtConvertseqArguments *arguments = tool_arguments; int had_err = 0, i; GtFilelengthvalues *flv; GtSeqIterator *seqit; GtSequenceBuffer *sb = NULL; GtStrArray *files; const GtUchar *sequence; char *desc; GtUword len, j; off_t totalsize; gt_error_check(err); gt_assert(arguments != NULL); files = gt_str_array_new(); for (i = parsed_args; i < argc; i++) { gt_str_array_add_cstr(files, argv[i]); } totalsize = gt_files_estimate_total_size(files); flv = gt_calloc((size_t) gt_str_array_size(files), sizeof (GtFilelengthvalues)); sb = gt_sequence_buffer_new_guess_type(files, err); if (!sb) { had_err = -1; } if (!had_err) { gt_sequence_buffer_set_filelengthtab(sb, flv); /* read input using seqiterator */ seqit = gt_seq_iterator_sequence_buffer_new_with_buffer(sb); if (arguments->verbose) { gt_progressbar_start(gt_seq_iterator_getcurrentcounter(seqit, (GtUint64) totalsize), (GtUint64) totalsize); } while (true) { GtUchar *seq = NULL; desc = NULL; j = 0UL; had_err = gt_seq_iterator_next(seqit, &sequence, &len, &desc, err); if (had_err != 1) break; if (arguments->revcomp) { GtUchar *newseq = gt_calloc((size_t) len+1, sizeof (GtUchar)); memcpy(newseq, sequence, (size_t) len*sizeof (GtUchar)); had_err = gt_reverse_complement((char*) newseq, len, err); if (had_err) break; seq = newseq; } else seq = (GtUchar*) sequence; if (!arguments->showseq) { bool in_wildcard = false; gt_file_xprintf(arguments->outfp, ">%s\n", desc); for (i = 0; (GtUword) i < len; i++) { if (arguments->reduce_wc_dna) { switch (seq[i]) { case 'a': case 'A': case 'c': case 'C': case 'g': case 'G': case 't': case 'u': case 'T': case 'U': in_wildcard = false; gt_file_xfputc((int) seq[i], arguments->outfp); j++; break; default: if (!in_wildcard) { in_wildcard = true; if (isupper((int) seq[i])) gt_file_xfputc((int) 'N', arguments->outfp); else gt_file_xfputc((int) 'n', arguments->outfp); j++; } } } else if (arguments->reduce_wc_prot) { switch (seq[i]) { case 'X': case 'B': case 'Z': if (!in_wildcard) { in_wildcard = true; gt_file_xfputc((int) 'N', arguments->outfp); j++; } break; case 'x': case 'b': case 'z': if (!in_wildcard) { in_wildcard = true; gt_file_xfputc((int) 'n', arguments->outfp); j++; } break; default: in_wildcard = false; gt_file_xfputc((int) seq[i], arguments->outfp); j++; } } else { gt_file_xfputc((int) seq[i], arguments->outfp); j++; } if (arguments->fastawidth > 0 && j % arguments->fastawidth == 0) { j = 0; gt_file_xprintf(arguments->outfp, "\n"); } } if (arguments->fastawidth == 0 || len % arguments->fastawidth != 0) gt_file_xprintf(arguments->outfp, "\n"); } if (arguments->revcomp) { gt_free(seq); } } if (arguments->showflv) { for (j=0;j<gt_str_array_size(files);j++) { fprintf(stderr, "file "GT_WU" (%s): "GT_WU"/"GT_WU"\n", j, gt_str_array_get(files, j), (GtUword) flv[j].length, (GtUword) flv[j].effectivelength); } } if (arguments->verbose) { gt_progressbar_stop(); } gt_sequence_buffer_delete(sb); gt_seq_iterator_delete(seqit); } gt_str_array_delete(files); gt_free(flv); return had_err; }
static int gt_extract_feature_sequence_generic(GtStr *sequence, GtGenomeNode *gn, const char *type, bool join, GtStr *seqid, GtStrArray *target_ids, unsigned int *out_phase_offset, GtRegionMapping *region_mapping, GtError *err) { GtFeatureNode *fn; GtRange range; unsigned int phase_offset = 0; char *outsequence; const char *target; int had_err = 0; gt_error_check(err); fn = gt_genome_node_cast(gt_feature_node_class(), gn); gt_assert(fn); if (seqid) gt_str_append_str(seqid, gt_genome_node_get_seqid(gn)); if (target_ids && (target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET))) { had_err = gt_gff3_parser_parse_all_target_attributes(target, false, target_ids, NULL, NULL, "", 0, err); } if (!had_err) { if (join) { GtFeatureNodeIterator *fni; GtFeatureNode *child; bool reverse_strand = false, first_child = true, first_child_of_type_seen = false; GtPhase phase = GT_PHASE_UNDEFINED; /* in this case we have to traverse the children */ fni = gt_feature_node_iterator_new_direct(gt_feature_node_cast(gn)); while (!had_err && (child = gt_feature_node_iterator_next(fni))) { if (first_child) { if (target_ids && (target = gt_feature_node_get_attribute(child, GT_GFF_TARGET))) { gt_str_array_reset(target_ids); had_err = gt_gff3_parser_parse_all_target_attributes(target, false, target_ids, NULL, NULL, "", 0, err); } first_child = false; } if (!had_err) { if (extract_join_feature((GtGenomeNode*) child, type, region_mapping, sequence, &reverse_strand, &first_child_of_type_seen, &phase, err)) { had_err = -1; } if (phase != GT_PHASE_UNDEFINED) { phase_offset = (int) phase; } } } gt_feature_node_iterator_delete(fni); gt_assert(phase_offset <= (unsigned int) GT_PHASE_UNDEFINED); if (!had_err && gt_str_length(sequence)) { if (reverse_strand) { had_err = gt_reverse_complement(gt_str_get(sequence), gt_str_length(sequence), err); } } } else if (gt_feature_node_get_type(fn) == type) { GtPhase phase = gt_feature_node_get_phase(fn); gt_assert(!had_err); if (phase != GT_PHASE_UNDEFINED) phase_offset = (unsigned int) phase; /* otherwise we only have to look at this feature */ range = gt_genome_node_get_range(gn); gt_assert(range.start); /* 1-based coordinates */ had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence, gt_genome_node_get_seqid(gn), range.start, range.end, err); if (!had_err) { gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range)); gt_free(outsequence); if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) { had_err = gt_reverse_complement(gt_str_get(sequence), gt_str_length(sequence), err); } } } } if (out_phase_offset && phase_offset != GT_PHASE_UNDEFINED) { *out_phase_offset = phase_offset; } return had_err; }
GtLTRdigestStream *ls, #ifdef HAVE_HMMER GtError *err) #else GT_UNUSED GtError *err) #endif { int had_err = 0; char *rev_seq; unsigned long seqlen = gt_ltrelement_length(element); GtStrand canonical_strand = GT_STRAND_UNKNOWN; /* create reverse strand sequence */ rev_seq = gt_calloc((size_t) seqlen+1, sizeof (char)); memcpy(rev_seq, seq, sizeof (char) * seqlen); had_err = gt_reverse_complement(rev_seq, seqlen, err); if (!had_err) { #ifdef HAVE_HMMER /* Protein domain finding * ----------------------*/ if (ls->tests_to_run & GT_LTRDIGEST_RUN_PDOM) { GtPdomResults *pdom_results = NULL; if (!ls->pdf) { gt_error_set(err, "No PdomFinder object found -- " "how could that happen?"); had_err = -1; } else