GtStrArray* agn_seq_union(GtFeatureIndex *refrfeats, GtFeatureIndex *predfeats, AgnLogger *logger) { // Fetch seqids from reference and prediction annotations GtError *e = gt_error_new(); GtStrArray *refrseqids = gt_feature_index_get_seqids(refrfeats, e); if(gt_error_is_set(e)) { agn_logger_log_error(logger, "error fetching seqids for reference: %s", gt_error_get(e)); gt_error_unset(e); } GtStrArray *predseqids = gt_feature_index_get_seqids(predfeats, e); if(gt_error_is_set(e)) { agn_logger_log_error(logger, "error fetching seqids for prediction: %s", gt_error_get(e)); gt_error_unset(e); } gt_error_delete(e); if(agn_logger_has_error(logger)) { gt_str_array_delete(refrseqids); gt_str_array_delete(predseqids); return NULL; } GtStrArray *seqids = agn_gt_str_array_union(refrseqids, predseqids); gt_str_array_delete(refrseqids); gt_str_array_delete(predseqids); return seqids; }
static void proc_env_options(void) { int argc; char *env_options, **argv; GtSplitter *splitter; GtError *err; /* construct argument vector from $GT_ENV_OPTIONS */ env_options = getenv("GT_ENV_OPTIONS"); if (!env_options) return; env_options = gt_cstr_dup(env_options); /* make writeable copy */ splitter = gt_splitter_new(); gt_splitter_split(splitter, env_options, strlen(env_options), ' '); argc = gt_splitter_size(splitter); argv = gt_cstr_array_preprend((const char**) gt_splitter_get_tokens(splitter), "env"); argc++; /* parse options contained in $GT_ENV_OPTIONS */ err = gt_error_new(); switch (parse_env_options(argc, (const char**) argv, err)) { case GT_OPTION_PARSER_OK: break; case GT_OPTION_PARSER_ERROR: fprintf(stderr, "error parsing $GT_ENV_OPTIONS: %s\n", gt_error_get(err)); gt_error_unset(err); break; case GT_OPTION_PARSER_REQUESTS_EXIT: break; } gt_error_delete(err); gt_free(env_options); gt_splitter_delete(splitter); gt_cstr_array_delete(argv); }
int gt_translator_unit_test(GtError *err) { int had_err = 0; GtTranslatorStatus test_errnum; GtTranslator *tr; GtCodonIterator *ci; GtError *test_err; GtStrArray *codons, *invalidcodons; const char *seq = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGT" "GGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGAACTGGT" "TACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGG"; const char *no_startcodon = "AAAAAAAAAATCATCTCCCCATTTTTTT"; const char *invalidseq = "ZAGCTTTTCATTCTGACTGCAAATATGTCTCTGTGT"; const char *invalidseq2 = "AGCTTTTCATTCTGACZTGCAAATATGTCTCTGTGT"; char translated; unsigned int frame; GtUword pos = 0; GtStr *protein[3]; gt_error_check(err); test_err = gt_error_new(); ci = gt_codon_iterator_simple_new(seq, (GtUword) strlen(seq), test_err); tr = gt_translator_new(ci); protein[0] = gt_str_new(); protein[1] = gt_str_new(); protein[2] = gt_str_new(); codons = gt_str_array_new(); gt_str_array_add_cstr(codons, "ACG"); gt_str_array_add_cstr(codons, "ACT"); invalidcodons = gt_str_array_new(); gt_str_array_add_cstr(invalidcodons, "ACG"); gt_str_array_add_cstr(invalidcodons, "AC"); /* do 3-frame translation */ gt_error_unset(test_err); test_errnum = gt_translator_next(tr, &translated, &frame, test_err); while (!test_errnum && translated) { gt_str_append_char(protein[frame], translated); test_errnum = gt_translator_next(tr, &translated, &frame, test_err); gt_ensure( test_errnum != GT_TRANSLATOR_ERROR && !gt_error_is_set(test_err)); } gt_ensure( test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err)); /* check 3-frame translation */ gt_ensure(strcmp(gt_str_get(protein[0]), "SFSF*LQRAICLCVD*KKSV**QLLNWLPAVSKLKFY*LR") == 0); gt_ensure(strcmp(gt_str_get(protein[1]), "AFHSDCNGQYVSVWIKKRVSDSSF*TGYLP*VN*NFIDL") == 0); gt_ensure(strcmp(gt_str_get(protein[2]), "LFILTATGNMSLCGLKKECLIAASELVTCRE*IKILLT*") == 0); /* find start codon -- positive */ gt_error_unset(test_err); gt_codon_iterator_rewind(ci); test_errnum = gt_translator_find_startcodon(tr, &pos, test_err); gt_ensure(!test_errnum && !gt_error_is_set(test_err)); gt_ensure(pos == 11UL); /* find stop codon -- positive */ gt_error_unset(test_err); gt_codon_iterator_rewind(ci); test_errnum = gt_translator_find_stopcodon(tr, &pos, test_err); gt_ensure(!test_errnum && !gt_error_is_set(test_err)); gt_ensure(pos == 12UL); /* find arbitrary codons -- positive */ gt_error_unset(test_err); gt_codon_iterator_rewind(ci); test_errnum = gt_translator_find_codon(tr, codons, &pos, test_err); gt_ensure(!test_errnum && !gt_error_is_set(test_err)); gt_ensure(pos == 14UL); /* find arbitrary codons -- negative (invalid codons) */ gt_error_unset(test_err); gt_codon_iterator_rewind(ci); test_errnum = gt_translator_find_codon(tr, invalidcodons, &pos, test_err); gt_ensure( test_errnum == GT_TRANSLATOR_ERROR && gt_error_is_set(test_err)); gt_error_unset(test_err); gt_codon_iterator_delete(ci); ci = gt_codon_iterator_simple_new(invalidseq, (GtUword) strlen(invalidseq), test_err); gt_ensure(ci && !gt_error_is_set(test_err)); gt_translator_reset(tr, ci); /* check translation of sequence with invalid beginning */ test_errnum = gt_translator_next(tr, &translated, &frame, test_err); gt_ensure(test_errnum && gt_error_is_set(test_err)); /* check translation of sequence with invalid character within */ gt_error_unset(test_err); gt_codon_iterator_delete(ci); ci = gt_codon_iterator_simple_new(invalidseq2, (GtUword) strlen(invalidseq2), test_err); gt_ensure(ci && !gt_error_is_set(test_err)); gt_translator_reset(tr, ci); test_errnum = gt_translator_next(tr, &translated, &frame, test_err); while (!test_errnum && translated) { gt_str_append_char(protein[frame], translated); test_errnum = gt_translator_next(tr, &translated, &frame, test_err); } gt_ensure( test_errnum == GT_TRANSLATOR_ERROR && gt_error_is_set(test_err)); /* find start codon -- fail */ gt_error_unset(test_err); gt_codon_iterator_delete(ci); ci = gt_codon_iterator_simple_new(no_startcodon, (GtUword) strlen(no_startcodon), test_err); gt_ensure(ci && !gt_error_is_set(test_err)); gt_translator_reset(tr, ci); test_errnum = gt_translator_find_startcodon(tr, &pos, test_err); gt_ensure( test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err)); /* find stop codon -- fail */ gt_error_unset(test_err); gt_codon_iterator_rewind(ci); test_errnum = gt_translator_find_stopcodon(tr, &pos, test_err); gt_ensure( test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err)); /* find arbitrary codons -- negative (none there) */ gt_error_unset(test_err); gt_codon_iterator_rewind(ci); test_errnum = gt_translator_find_codon(tr, codons, &pos, test_err); gt_ensure( test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err)); gt_codon_iterator_delete(ci); gt_translator_delete(tr); gt_str_delete(protein[0]); gt_str_delete(protein[1]); gt_str_delete(protein[2]); gt_str_array_delete(codons); gt_str_array_delete(invalidcodons); gt_error_delete(test_err); return had_err; }
GthBSSMParam* gth_bssm_param_load(const char *filename, GtError *err) { GthBSSMParam *bssm_param = NULL; GtStr *path = gt_str_new(); int had_err = 0; gt_error_check(err); if (gt_file_exists(filename)) gt_str_append_cstr(path, filename); else { if (strchr(filename, GT_PATH_SEPARATOR)) { gt_error_set(err, "filename \"%s\" contains illegal symbol '%c': the " "path list specified by environment variable \"%s\" " "cannot be searched for it", filename, GT_PATH_SEPARATOR, BSSMENVNAME); had_err = -1; } /* check for file path in environment variable */ if (!had_err) had_err = gt_file_find_in_env(path, filename, BSSMENVNAME, err); if (!had_err && !gt_str_length(path)) { gt_error_set(err, "file \"%s\" not found in directory list specified " "by environment variable %s", filename, BSSMENVNAME); had_err = -1; } if (!had_err) { /* path found -> append filename */ gt_str_append_char(path, GT_PATH_SEPARATOR); gt_str_append_cstr(path, filename); } else { /* check for file path relative to binary */ int new_err = gt_file_find_exec_in_path(path, gt_error_get_progname(err), NULL); if (!new_err) { gt_assert(gt_str_length(path)); gt_str_append_char(path, GT_PATH_SEPARATOR); gt_str_append_cstr(path, "bssm"); gt_str_append_char(path, GT_PATH_SEPARATOR); gt_str_append_cstr(path, filename); if (gt_file_exists(gt_str_get(path))) { gt_error_unset(err); had_err = 0; } } } } if (!had_err) { if (!(bssm_param = bssm_param_plain_read(gt_str_get(path), err))) had_err = -1; if (had_err) { /* loading new plain text format didn't work -> try old binary format */ if ((bssm_param = load_old_binary_format(path, filename, NULL))) { /* loading binary format worked -> unset error */ gt_error_unset(err); had_err = 0; } } } gt_str_delete(path); if (had_err) { gth_bssm_param_delete(bssm_param); return NULL; } return bssm_param; }
GtStrArray* agn_seq_intersection(GtFeatureIndex *refrfeats, GtFeatureIndex *predfeats, AgnLogger *logger) { // Fetch seqids from reference and prediction annotations GtError *e = gt_error_new(); GtStrArray *refrseqids = gt_feature_index_get_seqids(refrfeats, e); if(gt_error_is_set(e)) { agn_logger_log_error(logger, "error fetching seqids for reference: %s", gt_error_get(e)); gt_error_unset(e); } GtStrArray *predseqids = gt_feature_index_get_seqids(predfeats, e); if(gt_error_is_set(e)) { agn_logger_log_error(logger, "error fetching seqids for prediction: %s", gt_error_get(e)); gt_error_unset(e); } gt_error_delete(e); if(agn_logger_has_error(logger)) { gt_str_array_delete(refrseqids); gt_str_array_delete(predseqids); return NULL; } GtStrArray *seqids = agn_gt_str_array_intersection(refrseqids, predseqids); // Print reference sequences with no prediction annotations GtUword i, j; for(i = 0; i < gt_str_array_size(refrseqids); i++) { const char *refrseq = gt_str_array_get(refrseqids, i); int matches = 0; for(j = 0; j < gt_str_array_size(seqids); j++) { const char *seq = gt_str_array_get(seqids, j); if(strcmp(refrseq, seq) == 0) matches++; } if(matches == 0) { agn_logger_log_warning(logger, "no prediction annotations found for " "sequence '%s'", refrseq); } } // Print prediction sequences with no reference annotations for(i = 0; i < gt_str_array_size(predseqids); i++) { const char *predseq = gt_str_array_get(predseqids, i); int matches = 0; for(j = 0; j < gt_str_array_size(seqids); j++) { const char *seq = gt_str_array_get(seqids, j); if(strcmp(predseq, seq) == 0) matches++; } if(matches == 0) { agn_logger_log_warning(logger, "no reference annotations found for " "sequence '%s'", predseq); } } if(gt_str_array_size(seqids) == 0) { agn_logger_log_error(logger, "no sequences in common between reference and " "prediction"); } gt_str_array_delete(refrseqids); gt_str_array_delete(predseqids); return seqids; }