static int gt_pbs_hit_compare(const void *h1, const void *h2) { GtPBSHit *hp1 = *(GtPBSHit**) h1; GtPBSHit *hp2 = *(GtPBSHit**) h2; return (gt_double_compare(hp2->score, hp1->score)); }
static int gt_ltrdigest_pdom_visitor_process_hit(GT_UNUSED void *key, void *val, void *data, GT_UNUSED GtError *err) { GtHMMERModelHit *mh = (GtHMMERModelHit*) val; GtLTRdigestPdomVisitor *lv = (GtLTRdigestPdomVisitor*) data; const char *mdl = (const char*) key; GtArray *hits = NULL; GtUword nof_hits; GtFragment *frags; if (gt_double_compare(mh->best_fwd, mh->best_rev) <= 0) hits = mh->fwd_hits; else hits = mh->rev_hits; gt_assert(hits); nof_hits = gt_array_size(hits); if (nof_hits == 0) return 0; if (nof_hits > 1UL) { GtUword i, chainno; frags = gt_malloc((size_t) nof_hits * sizeof (GtFragment)); for (i = 0; i < nof_hits; i++) { GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, i); gt_assert(h); frags[i].startpos1 = h->hmmfrom; frags[i].endpos1 = h->hmmto; frags[i].startpos2 = h->alifrom; frags[i].endpos2 = h->alito; frags[i].weight = (GtWord) (h->alito - h->alifrom + 1) * h->score; frags[i].data = h; } qsort(frags, (size_t) nof_hits, sizeof (GtFragment), gt_ltrdigest_pdom_visitor_fragcmp); gt_log_log("%s: chaining "GT_WU" frags", mdl, nof_hits); gt_globalchaining_max(frags, nof_hits, (GtUword) lv->chain_max_gap_length, gt_ltrdigest_pdom_visitor_chainproc, &chainno); gt_free(frags); for (i = 0; i < nof_hits; i++) { GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, i); (void) gt_ltrdigest_pdom_visitor_attach_hit(lv, mh, h); } } else { GtUword chainno = 0UL; GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, 0); gt_array_add(h->chains, chainno); (void) gt_ltrdigest_pdom_visitor_attach_hit(lv, mh, h); } return 0; }
int gt_mathsupport_unit_test(GtError *err) { int had_err = 0; double less_than_epsilon = 0.0000000000000001; gt_error_check(err); gt_ensure(had_err, !gt_double_equals_one(1.1)); gt_ensure(had_err, gt_double_equals_one(1)); gt_ensure(had_err, gt_double_equals_one(1+less_than_epsilon)); gt_ensure(had_err, !gt_double_equals_one(-1-less_than_epsilon)); gt_ensure(had_err, !gt_double_equals_double(1.0, 2.0)); gt_ensure(had_err, !gt_double_equals_double(-1.0, 1.0)); gt_ensure(had_err, !gt_double_equals_double(1.0, -1.0)); gt_ensure(had_err, !gt_double_equals_double(-1.0, 1+less_than_epsilon)); gt_ensure(had_err, !gt_double_equals_double(1.0, 1.1)); gt_ensure(had_err, gt_double_equals_double(1.0, 1+less_than_epsilon)); gt_ensure(had_err, gt_double_equals_double(1.0, 1.0)); gt_ensure(had_err, gt_double_equals_double(0.0, 0.0)); gt_ensure(had_err, gt_double_equals_double(-1.0, -1.0)); gt_ensure(had_err, gt_double_equals_double(-1.0+less_than_epsilon, -1.0)); gt_ensure(had_err, gt_double_equals_double(-1.0, -1.0+less_than_epsilon)); gt_ensure(had_err, gt_double_equals_double(1.0+less_than_epsilon, 1.0)); gt_ensure(had_err, gt_double_equals_double(1.0, 1.0+less_than_epsilon)); gt_ensure(had_err, gt_double_compare(1.0, 1.0) == 0); gt_ensure(had_err, gt_double_compare(1.0, 1.1) < 0); gt_ensure(had_err, gt_double_compare(1.1, 1.0) > 0); gt_ensure(had_err, gt_double_compare(1.1, -1.0) > 0); gt_ensure(had_err, gt_double_compare(-1.1, -1.0) < 0); gt_ensure(had_err, gt_double_compare(1+less_than_epsilon, 1.0) == 0); gt_ensure(had_err, gt_double_compare(1+less_than_epsilon, -1.0) > 0); gt_ensure(had_err, gt_double_compare(-1+less_than_epsilon, -1.0) == 0); gt_ensure(had_err, gt_double_compare(-1+less_than_epsilon, 1.0) < 0); gt_ensure(had_err, gt_double_smaller_double(1.0, 1.1)); gt_ensure(had_err, gt_double_smaller_double(-1.0, 1.1)); gt_ensure(had_err, gt_double_smaller_double(-1.1, -1.0)); gt_ensure(had_err, !gt_double_smaller_double(-1.0, -1.1)); gt_ensure(had_err, !gt_double_smaller_double(1.0-less_than_epsilon, 1.0)); return had_err; }
static void gt_hmmer_parse_status_add_hit(GtHMMERParseStatus *s, GtHMMERSingleHit *hit) { GtHMMERModelHit *mh; gt_assert(s); if (!(mh = gt_hashmap_get(s->models, gt_str_get(s->cur_model)))) { mh = gt_calloc((size_t) 1, sizeof (*mh)); mh->fwd_hits = gt_array_new(sizeof (GtHMMERSingleHit*)); mh->rev_hits = gt_array_new(sizeof (GtHMMERSingleHit*)); mh->best_rev = mh->best_fwd = DBL_MAX; mh->modelname = gt_cstr_dup(gt_str_get(s->cur_model)); gt_hashmap_add(s->models, mh->modelname, mh); } gt_assert(mh && mh->fwd_hits &&mh->rev_hits); if (hit->strand == GT_STRAND_FORWARD) { if (gt_double_compare(mh->best_fwd, hit->evalue) > 0) mh->best_fwd = hit->evalue; gt_array_add(mh->fwd_hits, hit); } else { if (gt_double_compare(mh->best_rev, hit->evalue) > 0) mh->best_rev = hit->evalue; gt_array_add(mh->rev_hits, hit); } }
int gt_pbs_unit_test(GtError *err) { int had_err = 0; GtLTRElement element; GtPBSOptions o; GtStr *tmpfilename; FILE *tmpfp; GtPBSResults *res; GtPBSHit *hit; double score1, score2; GtRange rng; char *rev_seq, *seq, tmp[BUFSIZ]; const char *fullseq = "aaaaaaaaaaaaaaaaaaaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "acatactaggatgctag" /* <- PBS forward */ "aatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatag" /* PBS reverse -> */ "gatcctaaggctac" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "tatagcactgcatttcgaatatagtttcgaatatagcactgcatttcgaa" "aaaaaaaaaaaaaaaaaaaa"; /* notice previous errors */ gt_error_check(err); /* create temporary tRNA library file */ tmpfilename = gt_str_new(); tmpfp = gt_xtmpfp(tmpfilename); fprintf(tmpfp, ">test1\nccccccccccccccctagcatcctagtatgtccc\n" ">test2\ncccccccccgatcctagggctaccctttc\n"); gt_fa_xfclose(tmpfp); ensure(had_err, gt_file_exists(gt_str_get(tmpfilename))); /* setup testing parameters */ o.radius = 30; o.max_edist = 1; o.alilen.start = 11; o.alilen.end = 30; o.offsetlen.start = 0; o.offsetlen.end = 5; o.trnaoffsetlen.start = 0; o.trnaoffsetlen.end = 40; o.ali_score_match = 5; o.ali_score_mismatch = -10; o.ali_score_insertion = o.ali_score_deletion = -20; o.trna_lib = gt_bioseq_new(gt_str_get(tmpfilename), err); ensure(had_err, gt_bioseq_number_of_sequences(o.trna_lib) == 2); element.leftLTR_5 = 20; element.leftLTR_3 = 119; element.rightLTR_5 = 520; element.rightLTR_3 = 619; /* setup sequences */ seq = gt_malloc(600 * sizeof (char)); rev_seq = gt_malloc(600 * sizeof (char)); memcpy(seq, fullseq + 20, 600); memcpy(rev_seq, fullseq + 20, 600); gt_reverse_complement(rev_seq, 600, err); /* try to find PBS in sequences */ res = gt_pbs_find(seq, rev_seq, &element, &o, err); ensure(had_err, res != NULL); ensure(had_err, gt_pbs_results_get_number_of_hits(res) == 2); /* check first hit on forward strand */ hit = gt_pbs_results_get_ranked_hit(res, 0); ensure(had_err, hit != NULL); ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 17); ensure(had_err, gt_pbs_hit_get_edist(hit) == 0); ensure(had_err, gt_pbs_hit_get_offset(hit) == 0); ensure(had_err, gt_pbs_hit_get_tstart(hit) == 3); ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test1") == 0); rng = gt_pbs_hit_get_coords(hit); ensure(had_err, rng.start == 120); ensure(had_err, rng.end == 136); score1 = gt_pbs_hit_get_score(hit); ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_FORWARD); memset(tmp, 0, BUFSIZ-1); memcpy(tmp, fullseq + (rng.start * sizeof (char)), (rng.end - rng.start + 1) * sizeof (char)); ensure(had_err, strcmp(tmp, "acatactaggatgctag" ) == 0); /* check second hit on reverse strand */ hit = gt_pbs_results_get_ranked_hit(res, 1); ensure(had_err, hit != NULL); ensure(had_err, gt_pbs_hit_get_alignment_length(hit) == 14); ensure(had_err, gt_pbs_hit_get_edist(hit) == 1); ensure(had_err, gt_pbs_hit_get_offset(hit) == 0); ensure(had_err, gt_pbs_hit_get_tstart(hit) == 6); ensure(had_err, strcmp(gt_pbs_hit_get_trna(hit), "test2") == 0); rng = gt_pbs_hit_get_coords(hit); ensure(had_err, rng.start == 506); ensure(had_err, rng.end == 519); score2 = gt_pbs_hit_get_score(hit); ensure(had_err, gt_double_compare(score1, score2) > 0); ensure(had_err, gt_pbs_hit_get_strand(hit) == GT_STRAND_REVERSE); memset(tmp, 0, BUFSIZ-1); memcpy(tmp, fullseq + (rng.start * sizeof (char)), (rng.end - rng.start + 1) * sizeof (char)); ensure(had_err, strcmp(tmp, "gatcctaaggctac" ) == 0); /* clean up */ gt_xremove(gt_str_get(tmpfilename)); ensure(had_err, !gt_file_exists(gt_str_get(tmpfilename))); gt_str_delete(tmpfilename); gt_bioseq_delete(o.trna_lib); gt_free(rev_seq); gt_free(seq); gt_pbs_results_delete(res); return had_err; }
static int gt_ltrdigest_pdom_visitor_choose_strand(GtLTRdigestPdomVisitor *lv) { int had_err = 0; double log_eval_fwd = 0.0, log_eval_rev = 0.0; GtFeatureNodeIterator *fni; GtStrand strand; double score; bool seen_fwd = false, seen_rev = false; GtFeatureNode *curnode = NULL; GtUword i; GtArray *to_delete; fni = gt_feature_node_iterator_new(lv->ltr_retrotrans); while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) { if (strcmp(gt_feature_node_get_type(curnode), gt_ft_protein_match) == 0) { strand = gt_feature_node_get_strand(curnode); score = (double) gt_feature_node_get_score(curnode); if (strand == GT_STRAND_FORWARD) { log_eval_fwd += log(score); seen_fwd = true; } else if (strand == GT_STRAND_REVERSE) { log_eval_rev += log(score); seen_rev = true; } } } gt_feature_node_iterator_delete(fni); if (seen_rev && !seen_fwd) gt_feature_node_set_strand(lv->ltr_retrotrans, GT_STRAND_REVERSE); else if (!seen_rev && seen_fwd) gt_feature_node_set_strand(lv->ltr_retrotrans, GT_STRAND_FORWARD); else if (!seen_rev && !seen_fwd) return had_err; else { gt_assert(seen_rev && seen_fwd); if (gt_double_compare(log_eval_fwd, log_eval_rev) < 0) strand = GT_STRAND_FORWARD; else strand = GT_STRAND_REVERSE; gt_feature_node_set_strand(lv->ltr_retrotrans, strand); to_delete = gt_array_new(sizeof (GtFeatureNode*)); fni = gt_feature_node_iterator_new(lv->ltr_retrotrans); while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) { if (strcmp(gt_feature_node_get_type(curnode), gt_ft_protein_match) == 0) { if (strand != gt_feature_node_get_strand(curnode)) { gt_array_add(to_delete, curnode); } } } gt_feature_node_iterator_delete(fni); gt_assert(gt_array_size(to_delete) > 0); for (i = 0; i < gt_array_size(to_delete); i++) { gt_feature_node_remove_leaf(lv->ltr_retrotrans, *(GtFeatureNode**) gt_array_get(to_delete, i)); } gt_array_delete(to_delete); } return had_err; }
int mg_computepath(CombinedScoreMatrixEntry **combinedscore_matrix, HitInformation *hit_information, unsigned long rows, unsigned long contig_len, ParseStruct *parsestruct_ptr, GtError * err) { int had_err = 0; /* Initialisieren der Matrix fuer die Pfadberechnung */ PathMatrixEntry **path_matrix; /* i: Zaehlvariable fuer die Matrix-Zeilen; k: Zaehlvariable Precursors (von 0 bis max 2) maxpath_frame: Speichern des vorherigen Frames von dem der max-Wert berechnet wird */ unsigned short row_index = 0, precursor_index = 0, precursors_row = 0, maxpath_frame = 0; /* Position in der Query-DNA */ unsigned long column_index = 0; /* Variablen fuer den aktuellen Frame, den vorherigen Frame(speichert einen Wert aus precursors[], die Zeile des vorherigen Frames, GtArray mit den Precursors-Frames */ short current_frame = 0, precursors_frame = 0, precursors[NUM_PRECURSORS]; /* q ist der Wert, der bei Aus- oder Eintreten in ein Gen auf dem Forward- bzw. Reverse-Strang berechnet wird */ double q = ARGUMENTSSTRUCT(leavegene_value), max_new = 1, max_old = 1; /* Speicherreservierung fuer die Path-Matrix - Groesse entsprechend der CombinedScore-Matrix */ gt_array2dim_calloc(path_matrix, 7, contig_len); gt_error_check(err); /* fuer die erste Spalte der Path-Matrix wird die erste Spalte der CombinedScore-Matrix uebernommen */ for (row_index = 0; row_index < rows; row_index++) { path_matrix[row_index][0].score = combinedscore_matrix[row_index][0].matrix_score; path_matrix[row_index][0].path_frame = row_index; } /* Spaltenweise Berechnung des opt. Pfades */ for (column_index = 1; column_index < contig_len; column_index++) { for (row_index = 0; row_index < rows; row_index++) { /* Zaehlvariable fuer die Zeile wird umgerechnet in den entsprechenden Leserahmen */ current_frame = get_current_frame(row_index); /* Aufruf der Methode zum Berechnen der moeglichen Leserahmen anhand von aktuellem Leserahmen und der Query-DNA-Sequenz */ compute_precursors(current_frame, column_index, precursors); /* der max-Wert der moeglichen Vorgaenger wird berechnet */ for (precursor_index = 0; precursor_index < NUM_PRECURSORS && (precursors[precursor_index] != UNDEFINED); ++precursor_index) { /* aktueller Vorgaengerleserahmen - es gibt max. 3 moegliche Vorgaenger */ precursors_frame = precursors[precursor_index]; /* Vorgaengerleserahmen wird umgerechnet in die entsprechende Matrix-Zeile */ precursors_row = get_matrix_row(precursors_frame); /* der DP-Algo umfasst 3 moegliche Faelle 1. Fall: Wechsel vom Reversen- auf den Forward-Strang bzw. umgekehrt */ if ((current_frame < 0 && precursors_frame > 0) || (current_frame > 0 && precursors_frame < 0)) { max_new = path_matrix[precursors_row][column_index-1].score + combinedscore_matrix[row_index][column_index].matrix_score + 2*q; } /* 2. Fall: Einfacher Wechsel des Leserahmens, also von + zu + bzw.- zu - */ else if (current_frame != 0 && precursors_frame != current_frame) { max_new = path_matrix[precursors_row][column_index-1].score + combinedscore_matrix[row_index][column_index].matrix_score + q; } /* 3. Fall: Leserahmen wird beibehalten bzw. Wechsel von kodierend zu nicht-kodierend oder umgekehrt */ else { max_new = path_matrix[precursors_row][column_index-1].score + combinedscore_matrix[row_index][column_index] .matrix_score; } /* Bestimmen des Max-Wertes der max. 3 Moeglichkeiten und Speichern der Zeile, von der der Max-Wert stammt */ if (gt_double_compare(max_new, max_old) > 0) { max_old = max_new; maxpath_frame = precursors_row; } } /* Speichern des Max-Wertes und der "Vorgaenger"-Zeile; zuruecksetzen der Variablen */ path_matrix[row_index][column_index].score = max_old; path_matrix[row_index][column_index].path_frame = maxpath_frame; max_new = DBL_MIN; max_old = DBL_MIN; maxpath_frame = 0; } } /* Aufruf der Methode zur Genvorhersage */ had_err = mg_compute_gene_prediction(combinedscore_matrix, path_matrix, contig_len, hit_information, parsestruct_ptr, err); gt_array2dim_delete(path_matrix); return had_err; }
bool gt_double_larger_double(double d1, double d2) { return gt_double_compare(d1, d2) > 0; }
bool gt_double_smaller_double(double d1, double d2) { return gt_double_compare(d1, d2) < 0; }