GtStrArray* agn_seq_union(GtFeatureIndex *refrfeats, GtFeatureIndex *predfeats, AgnLogger *logger) { // Fetch seqids from reference and prediction annotations GtError *e = gt_error_new(); GtStrArray *refrseqids = gt_feature_index_get_seqids(refrfeats, e); if(gt_error_is_set(e)) { agn_logger_log_error(logger, "error fetching seqids for reference: %s", gt_error_get(e)); gt_error_unset(e); } GtStrArray *predseqids = gt_feature_index_get_seqids(predfeats, e); if(gt_error_is_set(e)) { agn_logger_log_error(logger, "error fetching seqids for prediction: %s", gt_error_get(e)); gt_error_unset(e); } gt_error_delete(e); if(agn_logger_has_error(logger)) { gt_str_array_delete(refrseqids); gt_str_array_delete(predseqids); return NULL; } GtStrArray *seqids = agn_gt_str_array_union(refrseqids, predseqids); gt_str_array_delete(refrseqids); gt_str_array_delete(predseqids); return seqids; }
GtFeatureIndex *agn_import_simple(int numfiles, const char **filenames, char *type, AgnLogger *logger) { GtFeatureIndex *features = gt_feature_index_memory_new(); GtNodeStream *gff3 = gt_gff3_in_stream_new_unsorted(numfiles, filenames); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3); GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL); gt_hashmap_add(typestokeep, type, type); GtNodeStream *filterstream = agn_filter_stream_new(gff3, typestokeep); GtNodeStream *featstream = gt_feature_out_stream_new(filterstream, features); GtError *error = gt_error_new(); int result = gt_node_stream_pull(featstream, error); if(result == -1) { agn_logger_log_error(logger, "error processing node stream: %s", gt_error_get(error)); } gt_error_delete(error); if(agn_logger_has_error(logger)) { gt_feature_index_delete(features); features = NULL; } gt_node_stream_delete(gff3); gt_node_stream_delete(filterstream); gt_node_stream_delete(featstream); return features; }
static void infer_cds_visitor_test_data(GtQueue *queue) { GtError *error = gt_error_new(); const char *file = "data/gff3/grape-codons.gff3"; GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &file); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3in); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3in); GtLogger *logger = gt_logger_new(true, "", stderr); GtNodeStream *icv_stream = agn_infer_cds_stream_new(gff3in, NULL, logger); GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) ); GtNodeStream *arraystream = gt_array_out_stream_new(icv_stream, feats, error); int pullresult = gt_node_stream_pull(arraystream, error); if(pullresult == -1) { fprintf(stderr, "[AgnInferCDSVisitor::infer_cds_visitor_test_data] error " "processing features: %s\n", gt_error_get(error)); } gt_node_stream_delete(gff3in); gt_node_stream_delete(icv_stream); gt_node_stream_delete(arraystream); gt_logger_delete(logger); gt_array_sort(feats, (GtCompare)agn_genome_node_compare); gt_array_reverse(feats); while(gt_array_size(feats) > 0) { GtFeatureNode *fn = *(GtFeatureNode **)gt_array_pop(feats); gt_queue_add(queue, fn); } gt_array_delete(feats); gt_error_delete(error); }
GtUword gt_cntlist_xload(const char *filename, GtBitsequence **cntlist, GtUword expected_nofreads) { int retval; GtUword found_nofreads; GtError *err; if (!gt_file_exists(filename)) { fprintf(stderr, "FATAL: error by loading contained reads list: " "file %s does not exist\n", filename); exit(EXIT_FAILURE); } err = gt_error_new(); retval = gt_cntlist_parse(filename, true, cntlist, &found_nofreads, err); if (retval != 0) { fprintf(stderr, "FATAL: error by parsing contained reads list: %s\n", gt_error_get(err)); exit(EXIT_FAILURE); } gt_error_delete(err); if (found_nofreads != expected_nofreads) { fprintf(stderr, "FATAL: error by parsing contained reads list: " "file specifies a wrong number of reads\nexpected "GT_WU", found " GT_WU"\n", expected_nofreads, found_nofreads); exit(EXIT_FAILURE); } return gt_cntlist_count(*cntlist, found_nofreads); }
static void gt_hpol_processor_output_segment(GtAlignedSegment *as, bool may_be_gapped, GtFile *outfp, const char *desc) { unsigned long slen; if (may_be_gapped) gt_aligned_segment_ungap_seq_and_qual(as); slen = (unsigned long)strlen(gt_aligned_segment_seq(as)); gt_assert(slen == (unsigned long)strlen(gt_aligned_segment_qual(as))); if (gt_aligned_segment_is_reverse(as)) { GtError *err = gt_error_new(); char *q = gt_aligned_segment_qual(as), tmp; unsigned long i; for (i = 0; i < (slen + 1UL) >> 1; i++) { tmp = q[i]; q[i] = q[slen - i - 1UL]; q[slen - i - 1UL] = tmp; } gt_assert((unsigned long)strlen(gt_aligned_segment_qual(as)) == slen); if (gt_reverse_complement(gt_aligned_segment_seq(as), slen, err) != 0) { fprintf(stderr, "error: %s", gt_error_get(err)); exit(EXIT_FAILURE); } gt_error_delete(err); } gt_fastq_show_entry((desc != NULL) ? desc : gt_aligned_segment_description(as), gt_aligned_segment_seq(as), gt_aligned_segment_qual(as), slen, 0, false, outfp); }
GtFeatureIndex *agn_import_canonical(int numfiles, const char **filenames, AgnLogger *logger) { GtNodeStream *gff3 = gt_gff3_in_stream_new_unsorted(numfiles, filenames); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3); GtFeatureIndex *features = gt_feature_index_memory_new(); GtNodeStream *cgstream = agn_canon_gene_stream_new(gff3, logger); GtNodeStream *featstream = gt_feature_out_stream_new(cgstream, features); GtError *error = gt_error_new(); int result = gt_node_stream_pull(featstream, error); if(result == -1) { agn_logger_log_error(logger, "error processing node stream: %s", gt_error_get(error)); } gt_error_delete(error); if(agn_logger_has_error(logger)) { gt_feature_index_delete(features); features = NULL; } gt_node_stream_delete(gff3); gt_node_stream_delete(cgstream); gt_node_stream_delete(featstream); return features; }
static void proc_env_options(void) { int argc; char *env_options, **argv; GtSplitter *splitter; GtError *err; /* construct argument vector from $GT_ENV_OPTIONS */ env_options = getenv("GT_ENV_OPTIONS"); if (!env_options) return; env_options = gt_cstr_dup(env_options); /* make writeable copy */ splitter = gt_splitter_new(); gt_splitter_split(splitter, env_options, strlen(env_options), ' '); argc = gt_splitter_size(splitter); argv = gt_cstr_array_preprend((const char**) gt_splitter_get_tokens(splitter), "env"); argc++; /* parse options contained in $GT_ENV_OPTIONS */ err = gt_error_new(); switch (parse_env_options(argc, (const char**) argv, err)) { case GT_OPTION_PARSER_OK: break; case GT_OPTION_PARSER_ERROR: fprintf(stderr, "error parsing $GT_ENV_OPTIONS: %s\n", gt_error_get(err)); gt_error_unset(err); break; case GT_OPTION_PARSER_REQUESTS_EXIT: break; } gt_error_delete(err); gt_free(env_options); gt_splitter_delete(splitter); gt_cstr_array_delete(argv); }
bool gth_intermediate_output_is_correct(char *outputfilename, GthSACollection *orig_sa_collection, GthInput *input, GtFile **outfp, GtError *err) { SACollectionData sa_collection_data; GthSACollection *read_sa_collection; GtFileMode file_mode; bool rval; #ifndef NDEBUG GtUword numofgenomicfiles, numofreferencefiles; #endif gt_error_check(err); gt_assert(outputfilename); gt_assert(*outfp); #ifndef NDEBUG numofgenomicfiles = gth_input_num_of_gen_files(input); numofreferencefiles = gth_input_num_of_ref_files(input); #endif /* init */ read_sa_collection = gth_sa_collection_new(GTH_DC_NONE); sa_collection_data.sa_collection = read_sa_collection; sa_collection_data.sa_filter = NULL; sa_collection_data.stat = NULL; /* store file mode */ file_mode = gt_file_mode(*outfp); /* close output file */ gt_file_delete(*outfp); /* open intermediate file again for reading */ *outfp = gt_file_xopen_file_mode(file_mode, outputfilename, "r"); gt_assert(*outfp); /* read in the intermediate output */ if (gt_parse_intermediate_output(input, store_in_sa_collection, &sa_collection_data, outputfilename, *outfp, err)) { fprintf(stderr, "error: %s\n", gt_error_get(err)); exit(EXIT_FAILURE); } /* array of genomic files did not grow */ gt_assert(numofgenomicfiles == gth_input_num_of_gen_files(input)); /* array of reference files did not grow */ gt_assert(numofreferencefiles == gth_input_num_of_ref_files(input)); /* compare the trees */ rval = gth_sa_collections_are_equal(orig_sa_collection, read_sa_collection); /* free */ gth_sa_collection_delete(read_sa_collection); return rval; }
static void gv_test_calc_integrity(AgnUnitTest *test) { const char *filename = "data/gff3/gaeval-stream-unit-test-2.gff3"; GtNodeStream *align_in = gt_gff3_in_stream_new_unsorted(1, &filename); AgnGaevalParams params = { 0.6, 0.3, 0.05, 0.05, 400, 200, 100 }; GtNodeVisitor *nv = agn_gaeval_visitor_new(align_in, params); AgnGaevalVisitor *gv = gaeval_visitor_cast(nv); gt_node_stream_delete(align_in); GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &filename); GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL); gt_hashmap_add(typestokeep, "mRNA", "mRNA"); GtNodeStream *filtstream = agn_filter_stream_new(gff3in, typestokeep); GtLogger *logger = gt_logger_new(true, "", stderr); GtNodeStream *ics = agn_infer_cds_stream_new(filtstream, NULL, logger); GtNodeStream *ies = agn_infer_exons_stream_new(ics, NULL, logger); GtError *error = gt_error_new(); GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) ); GtNodeStream *featstream = gt_array_out_stream_new(ies, feats, error); int result = gt_node_stream_pull(featstream, error); if(result == -1) { fprintf(stderr, "[AgnGaevalVisitor::gv_test_calc_integrity] error " "processing GFF3: %s\n", gt_error_get(error)); return; } gt_node_stream_delete(gff3in); gt_node_stream_delete(filtstream); gt_node_stream_delete(featstream); gt_node_stream_delete(ics); gt_node_stream_delete(ies); gt_logger_delete(logger); gt_hashmap_delete(typestokeep); agn_assert(gt_array_size(feats) == 2); GtFeatureNode *g1 = *(GtFeatureNode **)gt_array_get(feats, 0); GtFeatureNode *g2 = *(GtFeatureNode **)gt_array_get(feats, 1); double cov1 = gaeval_visitor_calculate_coverage(gv, g1, error); double cov2 = gaeval_visitor_calculate_coverage(gv, g2, error); double int1 = gaeval_visitor_calculate_integrity(gv, g1, cov1, NULL, error); double int2 = gaeval_visitor_calculate_integrity(gv, g2, cov2, NULL, error); bool test1 = fabs(cov1 - 1.000) < 0.001 && fabs(cov2 - 0.997) < 0.001 && fabs(int1 - 0.850) < 0.001 && fabs(int2 - 0.863) < 0.001; agn_unit_test_result(test, "calculate integrity", test1); gt_error_delete(error); gt_array_delete(feats); gt_genome_node_delete((GtGenomeNode *)g1); gt_genome_node_delete((GtGenomeNode *)g2); gt_node_visitor_delete(nv); }
int gt_lua_error(lua_State *L, GtError *err) { gt_assert(L && err); gt_assert(gt_error_is_set(err)); luaL_where(L, 1); lua_pushstring(L, gt_error_get(err)); gt_error_delete(err); lua_concat(L, 2); return lua_error(L); }
extern int gt_packedindex_chk_integrity(int argc, const char *argv[], GtError *err) { struct encIdxSeq *seq; struct chkIndexOptions params; GtStr *inputProject; int parsedArgs; int had_err = 0; Verboseinfo *verbosity = NULL; gt_error_check(err); switch (parseChkIndexOptions(&parsedArgs, argc, argv, ¶ms, err)) { case OPTIONPARSER_OK: break; case OPTIONPARSER_ERROR: return -1; case OPTIONPARSER_REQUESTS_EXIT: return 0; } inputProject = gt_str_new_cstr(argv[parsedArgs]); verbosity = newverboseinfo(params.verboseOutput); seq = loadEncIdxSeq(inputProject, params.encType, params.EISFeatureSet, verbosity, err); if ((had_err = seq == NULL)) { gt_error_set(err, "Failed to load index: %s", gt_str_get(inputProject)); } else { fprintf(stderr, "# Using index over sequence "FormatSeqpos " symbols long.\n", EISLength(seq)); { int corrupt = EISVerifyIntegrity(seq, inputProject, params.skipCount, params.progressInterval, stderr, params.checkFlags, verbosity, err); if ((had_err = corrupt != 0)) { fputs(gt_error_get(err), stderr); fputs("\n", stderr); gt_error_set(err, "Integrity check failed for index: %s", EISIntegrityCheckResultStrings[corrupt]); } } } if (seq) deleteEncIdxSeq(seq); if (inputProject) gt_str_delete(inputProject); if (verbosity) freeverboseinfo(&verbosity); return had_err?-1:0; }
int main(int argc, char *argv[]) { GtError *err; GtWord readlong; bool haserr = false, withsequence = false; char *indexname = NULL, *mumlength_string = NULL; if (argc == 3) { withsequence = false; indexname = argv[1]; mumlength_string = argv[2]; } else { if (argc == 4) { if (strcmp(argv[1],"-s") != 0) { usage(argv[0]); return EXIT_FAILURE; } withsequence = true; indexname = argv[2]; mumlength_string = argv[3]; } else { usage(argv[0]); return EXIT_FAILURE; } } if (sscanf(mumlength_string,GT_WD,&readlong) != 1 || readlong < 0) { usage(argv[0]); return EXIT_FAILURE; } gt_lib_init(); err = gt_error_new(); if (gt_stree_mum(indexname, (GtUword) readlong, withsequence, err) != 0) { fprintf(stderr,"%s: failure when enumerating branch locs for " " index %s: %s\n", argv[0],indexname,gt_error_get(err)); haserr = true; } gt_error_delete(err); if (gt_lib_clean() != 0) { return GT_EXIT_PROGRAMMING_ERROR; } return haserr ? EXIT_FAILURE : EXIT_SUCCESS; }
void gt_runcheckfunctionontwofiles(Checkcmppairfuntype checkfunction, const char *file1, const char *file2) { const GtUchar *useq = NULL, *vseq = NULL; size_t ulen, vlen; bool forward = true; GtError *err; err = gt_error_new(); useq = (const GtUchar *) gt_fa_mmap_read(file1,&ulen,err); if (useq == NULL) { fprintf(stderr, "error: %s\n", gt_error_get(err)); exit(GT_EXIT_PROGRAMMING_ERROR); } vseq = (const GtUchar *) gt_fa_mmap_read(file2,&vlen,err); if (vseq == NULL) { fprintf(stderr, "error: %s\n", gt_error_get(err)); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_error_delete(err); while (true) { checkfunction(forward,useq,(GtUword) ulen, vseq,(GtUword) vlen); if (!forward) { break; } forward = false; } gt_fa_xmunmap((void *) useq); gt_fa_xmunmap((void *) vseq); }
int main(int argc, const char *argv[]) { GtNodeStream *gff3_in_stream; GtGenomeNode *gn; GtError *err; int had_err; if (gt_version_check(GT_MAJOR_VERSION, GT_MINOR_VERSION, GT_MICRO_VERSION)) { fprintf(stderr, "error: %s\n", gt_version_check(GT_MAJOR_VERSION, GT_MINOR_VERSION, GT_MICRO_VERSION)); return EXIT_FAILURE; } /* initialize */ gt_lib_init(); /* create error object */ err = gt_error_new(); /* create GFF3 input stream (with ID attribute checking) */ gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc-1, argv+1); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream*) gff3_in_stream); /* pull the features through the stream and free them afterwards */ while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn) gt_genome_node_delete(gn); /* handle error */ if (had_err) fprintf(stderr, "%s: error: %s\n", argv[0], gt_error_get(err)); else printf("input is valid GFF3\n"); /* free */ gt_node_stream_delete(gff3_in_stream); gt_error_delete(err); if (had_err) return EXIT_FAILURE; return EXIT_SUCCESS; }
int gt_tooldriver(int(*tool)(int argc, const char **argv, GtError*), int argc, char *argv[]) { GtError *err; int had_err; gt_allocators_init(); err = gt_error_new(); gt_error_set_progname(err, argv[0]); had_err = tool(argc, (const char**) argv, err); if (gt_error_is_set(err)) { fprintf(stderr, "%s: error: %s\n", gt_error_get_progname(err), gt_error_get(err)); gt_assert(had_err); } gt_error_delete(err); if (gt_allocators_clean()) return 2; /* programmer error */ if (had_err) return EXIT_FAILURE; return EXIT_SUCCESS; }
int gt_toolobjdriver(GtToolConstructor tool_constructor, int argc, char *argv[]) { GtTool *tool; GtError *err; int had_err; gt_allocators_init(); err = gt_error_new(); gt_error_set_progname(err, argv[0]); tool = tool_constructor(); had_err = gt_tool_run(tool, argc, (const char**) argv, err); gt_tool_delete(tool); if (gt_error_is_set(err)) { fprintf(stderr, "%s: error: %s\n", gt_error_get_progname(err), gt_error_get(err)); gt_assert(had_err); } gt_error_delete(err); if (gt_allocators_clean()) return 2; /* programmer error */ if (had_err) return EXIT_FAILURE; return EXIT_SUCCESS; }
extern int gt_packedindex_chk_search(int argc, const char *argv[], GtError *err) { struct chkSearchOptions params; Suffixarray suffixarray; Enumpatterniterator *epi = NULL; bool saIsLoaded = false; BWTSeq *bwtSeq = NULL; GtStr *inputProject = NULL; int parsedArgs; bool had_err = false; BWTSeqExactMatchesIterator EMIter; bool EMIterInitialized = false; GtLogger *logger = NULL; inputProject = gt_str_new(); do { gt_error_check(err); { bool exitNow = false; switch (parseChkBWTOptions(&parsedArgs, argc, argv, ¶ms, inputProject, err)) { case GT_OPTION_PARSER_OK: break; case GT_OPTION_PARSER_ERROR: had_err = true; exitNow = true; break; case GT_OPTION_PARSER_REQUESTS_EXIT: exitNow = true; break; } if (exitNow) break; } gt_str_set(inputProject, argv[parsedArgs]); logger = gt_logger_new(params.verboseOutput, GT_LOGGER_DEFLT_PREFIX, stdout); bwtSeq = gt_availBWTSeq(¶ms.idx.final, logger, err); if ((had_err = bwtSeq == NULL)) break; { enum verifyBWTSeqErrCode retval = gt_BWTSeqVerifyIntegrity(bwtSeq, gt_str_get(inputProject), params.flags, params.progressInterval, stderr, logger, err); if ((had_err = (retval != VERIFY_BWTSEQ_NO_ERROR))) { fprintf(stderr, "index integrity check failed: %s\n", gt_error_get(err)); gt_error_set(err, "aborted because of index integrity check fail"); break; } } if (BWTSeqHasLocateInformation(bwtSeq)) { if ((had_err = !gt_initEmptyEMIterator(&EMIter, bwtSeq))) { gt_error_set(err, "Cannot create matches iterator for sequence index."); break; } EMIterInitialized = true; } { unsigned long totalLen, dbstart; unsigned long trial, patternLen; if ((had_err = gt_mapsuffixarray(&suffixarray, SARR_SUFTAB | SARR_ESQTAB, gt_str_get(inputProject), NULL, err) != 0)) { gt_error_set(err, "Can't load suffix array project with" " demand for encoded sequence and suffix table files\n"); break; } totalLen = gt_encseq_total_length(suffixarray.encseq); saIsLoaded = true; if ((had_err = (params.minPatLen >= 0L && params.maxPatLen >= 0L && params.minPatLen > params.maxPatLen))) { gt_error_set(err, "Invalid pattern lengths selected: min=%ld, max=%ld;" " min <= max is required.", params.minPatLen, params.maxPatLen); break; } if (params.minPatLen < 0 || params.maxPatLen < 0) { unsigned int numofchars = gt_alphabet_num_of_chars( gt_encseq_alphabet(suffixarray.encseq)); if (params.minPatLen < 0) params.minPatLen = gt_recommendedprefixlength(numofchars, totalLen, GT_RECOMMENDED_MULTIPLIER_DEFAULT, true); if (params.maxPatLen < 0) params.maxPatLen = MAX(params.minPatLen, 125 * gt_recommendedprefixlength(numofchars,totalLen, GT_RECOMMENDED_MULTIPLIER_DEFAULT, true)/100); else params.maxPatLen = MAX(params.maxPatLen, params.minPatLen); } fprintf(stderr, "Using patterns of lengths %lu to %lu\n", params.minPatLen, params.maxPatLen); if ((had_err = totalLen + 1 != BWTSeqLength(bwtSeq))) { gt_error_set(err, "base suffix array and index have diferrent lengths!" "%lu vs. %lu", totalLen + 1, BWTSeqLength(bwtSeq)); break; } if ((had_err = (epi = gt_newenumpatterniterator(params.minPatLen, params.maxPatLen, suffixarray.encseq, err)) == NULL)) { fputs("Creation of pattern iterator failed!\n", stderr); break; } for (trial = 0; !had_err && trial < params.numOfSamples; ++trial) { const GtUchar *pptr = gt_nextEnumpatterniterator(&patternLen, epi); GtMMsearchiterator *mmsi = gt_mmsearchiterator_new_complete_olain(suffixarray.encseq, suffixarray.suftab, 0, /* leftbound */ totalLen, /* rightbound */ 0, /* offset */ suffixarray.readmode, pptr, patternLen); if (BWTSeqHasLocateInformation(bwtSeq)) { if ((had_err = !gt_reinitEMIterator(&EMIter, bwtSeq, pptr, patternLen, false))) { fputs("Internal error: failed to reinitialize pattern match" " iterator", stderr); abort(); } gt_assert(gt_EMINumMatchesTotal(&EMIter) == gt_BWTSeqMatchCount(bwtSeq, pptr, patternLen, false)); gt_assert(gt_EMINumMatchesTotal(&EMIter) == gt_mmsearchiterator_count(mmsi)); while (gt_mmsearchiterator_next(&dbstart,mmsi)) { unsigned long matchPos = 0; bool match = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = !match)) { gt_error_set(err, "matches of packedindex expired before mmsearch!"); break; } if ((had_err = matchPos != dbstart)) { gt_error_set(err, "packedindex match doesn't equal mmsearch " "match result!\n%lu vs. %lu\n", matchPos, dbstart); } } if (!had_err) { unsigned long matchPos; bool trailingMatch = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = trailingMatch)) { gt_error_set(err, "matches of mmsearch expired before fmindex!"); break; } } } else { unsigned long numFMIMatches = gt_BWTSeqMatchCount(bwtSeq, pptr, patternLen, false), numMMSearchMatches = gt_mmsearchiterator_count(mmsi); if ((had_err = numFMIMatches != numMMSearchMatches)) { gt_error_set(err, "Number of matches not equal for suffix array (" "%lu) and fmindex (%lu).\n", numFMIMatches, numMMSearchMatches); } } gt_mmsearchiterator_delete(mmsi); mmsi = NULL; if (params.progressInterval && !((trial + 1) % params.progressInterval)) putc('.', stderr); } if (params.progressInterval) putc('\n', stderr); fprintf(stderr, "Finished %lu of %lu matchings successfully.\n", trial, params.numOfSamples); } } while (0); if (EMIterInitialized) gt_destructEMIterator(&EMIter); if (saIsLoaded) gt_freesuffixarray(&suffixarray); gt_freeEnumpatterniterator(epi); if (bwtSeq) gt_deleteBWTSeq(bwtSeq); if (logger) gt_logger_delete(logger); if (inputProject) gt_str_delete(inputProject); return had_err?-1:0; }
static void end_element_handler(void *info, const XML_Char *name) { Parseinfo *parseinfo = (Parseinfo*) info; GthSA *sa = parseinfo->currentSA; GtUword datalength; double retdouble; GtWord ret; char *data; /* save data and data length */ data = gt_str_get(parseinfo->databuf); datalength = gt_str_length(parseinfo->databuf); /* perform actions depending on end tag */ if (strcmp(name, SPLICEDALIGNMENT_TAG) == 0) { /* before we store the spliced alignment we have to reverse its edit operations */ gt_assert(sa && gth_sa_backtrace_path(sa)); gth_backtrace_path_reverse(gth_sa_backtrace_path(sa)); /* ensure that before an intron which is not in phase the edit operation has length 1 (only for protein spliced alignments) */ gth_backtrace_path_ensure_length_1_before_introns( gth_sa_backtrace_path(sa)); if (parseinfo->saprocessfunc(parseinfo->data , sa, parseinfo->outputfilename, parseinfo->err)) { /* XXX */ fprintf(stderr, "error: %s\n", gt_error_get(parseinfo->err)); exit(EXIT_FAILURE); } /* reset current spliced alignment */ parseinfo->currentSA = NULL; } else if (strcmp(name, REFERENCEALPHATYPE_TAG) == 0) { if (strcmp(data, "DNA_ALPHA") == 0) gth_sa_set_alphatype(sa, DNA_ALPHA); else if (strcmp(data, "PROTEIN_ALPHA") == 0) { gth_sa_set_alphatype(sa, PROTEIN_ALPHA); } else { ILLEGAL_DATA; } } else if (strcmp(name, DNA_EOP_TYPE_TAG) == 0) { if (strcmp(data, "match") == 0) parseinfo->eoptype = EOP_TYPE_MATCH; else if (strcmp(data, "deletion") == 0) parseinfo->eoptype = EOP_TYPE_DELETION; else if (strcmp(data, "insertion") == 0) parseinfo->eoptype = EOP_TYPE_INSERTION; else if (strcmp(data, "mismatch") == 0) parseinfo->eoptype = EOP_TYPE_MISMATCH; else if (strcmp(data, "intron") == 0) parseinfo->eoptype = EOP_TYPE_INTRON; else { ILLEGAL_DATA; } } else if (strcmp(name, DNA_EOP_LENGTH_TAG) == 0) { SCANUINT; gth_backtrace_path_add_eop(gth_sa_backtrace_path(sa), parseinfo->eoptype, ret); } else if (strcmp(name, PROTEIN_EOP_TYPE_TAG) == 0) { if (strcmp(data, "match") == 0) parseinfo->eoptype = EOP_TYPE_MATCH; else if (strcmp(data, "deletion") == 0) parseinfo->eoptype = EOP_TYPE_DELETION; else if (strcmp(data, "insertion") == 0) parseinfo->eoptype = EOP_TYPE_INSERTION; else if (strcmp(data, "mismatch") == 0) parseinfo->eoptype = EOP_TYPE_MISMATCH; else if (strcmp(data, "intron") == 0) parseinfo->eoptype = EOP_TYPE_INTRON; else if (strcmp(data, "mismatch_with_1_gap") == 0) parseinfo->eoptype = EOP_TYPE_MISMATCH_WITH_1_GAP; else if (strcmp(data, "mismatch_with_2_gaps") == 0) parseinfo->eoptype = EOP_TYPE_MISMATCH_WITH_2_GAPS; else if (strcmp(data, "deletion_with_1_gap") == 0) parseinfo->eoptype = EOP_TYPE_DELETION_WITH_1_GAP; else if (strcmp(data, "deletion_with_2_gaps") == 0) parseinfo->eoptype = EOP_TYPE_DELETION_WITH_2_GAPS; else if (strcmp(data, "intron_with_1_base_left") == 0) parseinfo->eoptype = EOP_TYPE_INTRON_WITH_1_BASE_LEFT; else if (strcmp(data, "intron_with_2_bases_left") == 0) parseinfo->eoptype = EOP_TYPE_INTRON_WITH_2_BASES_LEFT; else { ILLEGAL_DATA; } } else if (strcmp(name, PROTEIN_EOP_LENGTH_TAG) == 0) { SCANUINT; gth_backtrace_path_add_eop(gth_sa_backtrace_path(sa), parseinfo->eoptype, ret); } else if (strcmp(name, INDELCOUNT_TAG) == 0) { SCANUINT; /* ignore indelcount, gets recomputed anyway */ } else if (strcmp(name, GENOMICLENGTHDP_TAG) == 0) { SCANUINT; gth_sa_set_gen_dp_length(sa, ret); } else if (strcmp(name, GENOMICLENGTHTOTAL_TAG) == 0) { SCANUINT; gth_sa_set_gen_total_length(sa, ret); } else if (strcmp(name, GENOMICOFFSET_TAG) == 0) { SCANUINT; gth_sa_set_gen_offset(sa, ret); } else if (strcmp(name, REFERENCELENGTH_TAG) == 0) { SCANUINT; gth_sa_set_ref_total_length(sa, ret); } else if (strcmp(name, DPSTARTPOS_TAG) == 0) { SCANUINT; gth_sa_set_gen_dp_start(sa, ret); } else if (strcmp(name, DPENDPOS_TAG) == 0) { SCANUINT; /* ignore DP end pos, gets recomputed from gen_dp_length anyway */ gt_assert(gth_sa_gen_dp_end(sa) == ret); } else if (strcmp(name, GENOMICFILENAME_TAG) == 0) { /* save genomic file name */ gt_str_append_cstr_nt(parseinfo->genomicfilename, data, datalength); } else if (strcmp(name, GENOMICFILEHASH_TAG) == 0) { gth_sa_set_gen_file_num(sa, process_file(parseinfo->input, gt_str_get(parseinfo->genomicfilename), data, false, UNDEF_ALPHA)); /* reset genomic filename */ gt_str_reset(parseinfo->genomicfilename); } else if (strcmp(name, GENOMICSEQNUM_TAG) == 0) { SCANUINT; gth_sa_set_gen_seq_num(sa, ret); } else if (strcmp(name, REFERENCEFILENAME_TAG) == 0) { /* save reference file name */ gt_str_append_cstr_nt(parseinfo->referencefilename, data, datalength); } else if (strcmp(name, REFERENCEFILEHASH_TAG) == 0) { gth_sa_set_ref_file_num(sa, process_file(parseinfo->input, gt_str_get(parseinfo->referencefilename), data, true, gth_sa_alphatype(sa))); /* reset reference filename */ gt_str_reset(parseinfo->referencefilename); } else if (strcmp(name, REFERENCESEQNUM_TAG) == 0) { SCANUINT; gth_sa_set_ref_seq_num(sa, ret); } else if (strcmp(name, GENOMICID_TAG) == 0) gth_sa_set_gen_id(sa, data); else if (strcmp(name, REFERENCEID_TAG) == 0) gth_sa_set_ref_id(sa, data); else if (strcmp(name, GENOMICSTRANDISFORWARD_TAG) == 0) gth_sa_set_gen_strand(sa, parse_boolean(data, parseinfo)); else if (strcmp(name, REFERENCESTRANDISFORWARD_TAG) == 0) gth_sa_set_ref_strand(sa, parse_boolean(data, parseinfo)); else if (strcmp(name, GENOMICCUTOFF_TAG) == 0) { SCANUINT; parseinfo->cutoffs.genomiccutoff = ret; } else if (strcmp(name, REFERENCECUTOFF_TAG) == 0) { SCANUINT; parseinfo->cutoffs.referencecutoff = ret; } else if (strcmp(name, EOPCUTOFF_TAG) == 0) { SCANUINT; parseinfo->cutoffs.eopcutoff = ret; } else if (strcmp(name, CUTOFFSSTART_TAG) == 0) gth_sa_set_cutoffs_start(sa, &parseinfo->cutoffs); else if (strcmp(name, CUTOFFSEND_TAG) == 0) gth_sa_set_cutoffs_end(sa, &parseinfo->cutoffs); else if (strcmp(name, LEFTGENOMICEXONBORDER_TAG) == 0) { SCANUINT; parseinfo->exoninfo.leftgenomicexonborder = ret; } else if (strcmp(name, RIGHTGENOMICEXONBORDER_TAG) == 0) { SCANUINT; parseinfo->exoninfo.rightgenomicexonborder = ret; } else if (strcmp(name, LEFTREFERENCEEXONBORDER_TAG) == 0) { SCANUINT; parseinfo->exoninfo.leftreferenceexonborder = ret; } else if (strcmp(name, RIGHTREFERENCEEXONBORDER_TAG) == 0) { SCANUINT; parseinfo->exoninfo.rightreferenceexonborder = ret; } else if (strcmp(name, EXONSCORE_TAG) == 0) { SCANDOUBLE; parseinfo->exoninfo.exonscore = retdouble; } else if (strcmp(name, EXONINFO_TAG) == 0) gth_sa_add_exon(sa, &parseinfo->exoninfo); else if (strcmp(name, DONORSITEPROBABILITY_TAG) == 0) { SCANDOUBLE; parseinfo->introninfo.donorsiteprobability = (GthFlt) retdouble; } else if (strcmp(name, ACCEPTORSITEPROBABILITY_TAG) == 0) { SCANDOUBLE; parseinfo->introninfo.acceptorsiteprobability = (GthFlt) retdouble; } else if (strcmp(name, DONORSITESCORE_TAG) == 0) { SCANDOUBLE; parseinfo->introninfo.donorsitescore = retdouble; } else if (strcmp(name, ACCEPTORSITESCORE_TAG) == 0) { SCANDOUBLE; parseinfo->introninfo.acceptorsitescore = retdouble; } else if (strcmp(name, INTRONINFO_TAG) == 0) gth_sa_add_intron(sa, &parseinfo->introninfo); else if (strcmp(name, POLYASTART_TAG) == 0) { SCANUINT; gth_sa_set_polyAtail_start(sa, ret); } else if (strcmp(name, POLYAEND_TAG) == 0) { SCANUINT; gth_sa_set_polyAtail_stop(sa, ret); } else if (strcmp(name, ALIGNMENTSCORE_TAG) == 0) { SCANDOUBLE; gth_sa_set_score(sa, retdouble); } else if (strcmp(name, COVERAGE_TAG) == 0) { SCANDOUBLE; gth_sa_set_coverage(sa, retdouble); } else if (strcmp(name, COVERAGEOFGENOMICSEGMENTISHIGHEST_TAG) == 0) { gth_sa_set_highest_cov(sa, parse_boolean(data, parseinfo)); } else if (strcmp(name, CUMULATIVELENGTHOFSCOREDEXONS_TAG) == 0) { SCANUINT; gth_sa_set_cumlen_scored_exons(sa, ret); } }
static int gt_xrf_abbr_parse_tree_validate_entries(const GtXRFAbbrParseTree *xrf_abbr_parse_tree, GtError *err) { GtUword i; GtHashmap *abbrvs; const char *value; int had_err = 0; gt_error_check(err); gt_assert(xrf_abbr_parse_tree); abbrvs = gt_hashmap_new(GT_HASH_STRING, NULL, NULL); for (i = 0; !had_err && i < gt_xrf_abbr_parse_tree_num_of_entries(xrf_abbr_parse_tree); i++) { GtXRFAbbrEntry *entry = *(GtXRFAbbrEntry**) gt_array_get(xrf_abbr_parse_tree->entries, i); if (!(value = gt_xrf_abbr_entry_get_value(entry, XRF_LABEL_ABBREVIATION))) { gt_error_set(err, "file \"%s\": line "GT_WU": required " "label \"" XRF_LABEL_ABBREVIATION "\" missing", gt_xrf_abbr_entry_filename(entry), gt_xrf_abbr_entry_line(entry)); had_err = -1; } if (!had_err) { gt_assert(value); if (gt_hashmap_get(abbrvs, value)) { gt_error_set(err, "file \"%s\": line "GT_WU": duplicate abbreviation " "\"%s\", must be unique", gt_xrf_abbr_entry_filename(entry), gt_xrf_abbr_entry_line(entry), value); had_err = -1; } else { gt_hashmap_add(abbrvs, (void*) value, (void*) value); } } if (!had_err && (value = gt_xrf_abbr_entry_get_value(entry, XRF_LABEL_SHORTHAND_NAME))) { if (strlen(value) >= 10) { gt_error_set(err, "file \"%s\": line "GT_WU": length of " "shorthand name \"%s\" " "is not less than 10 characters", gt_xrf_abbr_entry_filename(entry), gt_xrf_abbr_entry_line(entry), value); had_err = -1; } } if (!had_err && (value = gt_xrf_abbr_entry_get_value(entry, XRF_LABEL_LOCAL_ID_SYNTAX))) { GtError *regex_error = gt_error_new(); bool match; if (gt_grep(&match, value, "", regex_error)) { gt_error_set(err, "file \"%s\": line "GT_WU": invalid " "regular expression \"%s\" (%s)", gt_xrf_abbr_entry_filename(entry), gt_xrf_abbr_entry_line(entry), value, gt_error_get(regex_error)); had_err = -1; } gt_error_delete(regex_error); } } gt_hashmap_delete(abbrvs); return had_err; }
GtStrArray* agn_seq_intersection(GtFeatureIndex *refrfeats, GtFeatureIndex *predfeats, AgnLogger *logger) { // Fetch seqids from reference and prediction annotations GtError *e = gt_error_new(); GtStrArray *refrseqids = gt_feature_index_get_seqids(refrfeats, e); if(gt_error_is_set(e)) { agn_logger_log_error(logger, "error fetching seqids for reference: %s", gt_error_get(e)); gt_error_unset(e); } GtStrArray *predseqids = gt_feature_index_get_seqids(predfeats, e); if(gt_error_is_set(e)) { agn_logger_log_error(logger, "error fetching seqids for prediction: %s", gt_error_get(e)); gt_error_unset(e); } gt_error_delete(e); if(agn_logger_has_error(logger)) { gt_str_array_delete(refrseqids); gt_str_array_delete(predseqids); return NULL; } GtStrArray *seqids = agn_gt_str_array_intersection(refrseqids, predseqids); // Print reference sequences with no prediction annotations GtUword i, j; for(i = 0; i < gt_str_array_size(refrseqids); i++) { const char *refrseq = gt_str_array_get(refrseqids, i); int matches = 0; for(j = 0; j < gt_str_array_size(seqids); j++) { const char *seq = gt_str_array_get(seqids, j); if(strcmp(refrseq, seq) == 0) matches++; } if(matches == 0) { agn_logger_log_warning(logger, "no prediction annotations found for " "sequence '%s'", refrseq); } } // Print prediction sequences with no reference annotations for(i = 0; i < gt_str_array_size(predseqids); i++) { const char *predseq = gt_str_array_get(predseqids, i); int matches = 0; for(j = 0; j < gt_str_array_size(seqids); j++) { const char *seq = gt_str_array_get(seqids, j); if(strcmp(predseq, seq) == 0) matches++; } if(matches == 0) { agn_logger_log_warning(logger, "no reference annotations found for " "sequence '%s'", predseq); } } if(gt_str_array_size(seqids) == 0) { agn_logger_log_error(logger, "no sequences in common between reference and " "prediction"); } gt_str_array_delete(refrseqids); gt_str_array_delete(predseqids); return seqids; }
// Main method int main(int argc, char * const *argv) { GtError *error; GtLogger *logger; GtQueue *streams; GtNodeStream *stream, *last_stream; CanonGFF3Options options = { NULL, NULL, false }; gt_lib_init(); error = gt_error_new(); canon_gff3_parse_options(argc, argv + 0, &options, error); streams = gt_queue_new(); logger = gt_logger_new(true, "", stderr); stream = gt_gff3_in_stream_new_unsorted(argc - optind, (const char **) argv+optind); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)stream); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)stream); gt_queue_add(streams, stream); last_stream = stream; if(options.infer) { GtHashmap *type_parents = gt_hashmap_new(GT_HASH_STRING, gt_free_func, gt_free_func); gt_hashmap_add(type_parents, gt_cstr_dup("mRNA"), gt_cstr_dup("gene")); gt_hashmap_add(type_parents, gt_cstr_dup("tRNA"), gt_cstr_dup("gene")); stream = agn_infer_parent_stream_new(last_stream, type_parents); gt_hashmap_delete(type_parents); gt_queue_add(streams, stream); last_stream = stream; } stream = agn_gene_stream_new(last_stream, logger); gt_queue_add(streams, stream); last_stream = stream; if(options.source != NULL) { GtNodeVisitor *ssv = gt_set_source_visitor_new(options.source); stream = gt_visitor_stream_new(last_stream, ssv); gt_queue_add(streams, stream); last_stream = stream; } stream = gt_gff3_out_stream_new(last_stream, options.outstream); if(!options.infer) gt_gff3_out_stream_retain_id_attributes((GtGFF3OutStream *)stream); gt_queue_add(streams, stream); last_stream = stream; if(gt_node_stream_pull(last_stream, error) == -1) { fprintf(stderr, "[CanonGFF3] error processing node stream: %s", gt_error_get(error)); } while(gt_queue_size(streams) > 0) { stream = gt_queue_get(streams); gt_node_stream_delete(stream); } gt_queue_delete(streams); if(options.source != NULL) gt_str_delete(options.source); if(options.outstream != NULL) gt_file_delete(options.outstream); gt_error_delete(error); gt_logger_delete(logger); gt_lib_clean(); return 0; }
static void gv_test_intersect(AgnUnitTest *test) { GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) ); GtError *error = gt_error_new(); const char *filename = "data/gff3/gaeval-stream-unit-test-1.gff3"; GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &filename); GtNodeStream *fstream = gt_array_out_stream_new(gff3in, feats, error); int result = gt_node_stream_pull(fstream, error); if(result == -1) { fprintf(stderr, "[AgnGaevalVisitor::gv_test_intersect] error " "processing GFF3: %s\n", gt_error_get(error)); return; } gt_error_delete(error); gt_node_stream_delete(gff3in); gt_node_stream_delete(fstream); agn_assert(gt_array_size(feats) == 9); GtGenomeNode *g1 = *(GtGenomeNode **)gt_array_get(feats, 1); GtGenomeNode *g2 = *(GtGenomeNode **)gt_array_get(feats, 3); GtGenomeNode *g3 = *(GtGenomeNode **)gt_array_get(feats, 7); GtGenomeNode *est1 = *(GtGenomeNode **)gt_array_get(feats, 0); GtGenomeNode *est2 = *(GtGenomeNode **)gt_array_get(feats, 2); GtGenomeNode *est3 = *(GtGenomeNode **)gt_array_get(feats, 4); GtGenomeNode *est4 = *(GtGenomeNode **)gt_array_get(feats, 5); GtGenomeNode *est5 = *(GtGenomeNode **)gt_array_get(feats, 6); GtGenomeNode *est6 = *(GtGenomeNode **)gt_array_get(feats, 8); GtArray *cov = gaeval_visitor_intersect(g1, est1); bool test1 = cov == NULL; cov = gaeval_visitor_intersect(g1, est2); test1 = gt_array_size(cov) == 1; if(test1) { GtRange *range01 = gt_array_pop(cov); GtRange testrange = { 400, 500 }; test1 = gt_range_compare(range01, &testrange) == 0; } agn_unit_test_result(test, "intersect (1)", test1); gt_array_delete(cov); cov = gaeval_visitor_intersect(g2, est3); bool test2 = gt_array_size(cov) == 2; if(test2) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 800, 900 }; GtRange testrange2 = { 1050, 1075 }; test2 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (2)", test2); gt_array_delete(cov); cov = gaeval_visitor_intersect(g2, est4); bool test3 = gt_array_size(cov) == 2; if(test3) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 1070, 1125 }; GtRange testrange2 = { 1250, 1310 }; test3 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (3)", test3); gt_array_delete(cov); cov = gaeval_visitor_intersect(g3, est5); bool test4 = gt_array_size(cov) == 2; if(test4) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 2000, 3000 }; GtRange testrange2 = { 4000, 5000 }; test4 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (4)", test4); gt_array_delete(cov); cov = gaeval_visitor_intersect(g3, est6); bool test5 = gt_array_size(cov) == 2; if(test5) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 2500, 3000 }; GtRange testrange2 = { 4000, 5000 }; test5 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (5)", test5); gt_array_delete(cov); gt_array_delete(feats); gt_genome_node_delete(g1); gt_genome_node_delete(g2); gt_genome_node_delete(g3); gt_genome_node_delete(est1); gt_genome_node_delete(est2); gt_genome_node_delete(est3); gt_genome_node_delete(est4); gt_genome_node_delete(est5); gt_genome_node_delete(est6); }
static void handle_error(GtError *err) { fprintf(stderr, "error: %s\n", gt_error_get(err)); exit(EXIT_FAILURE); }
GtNodeVisitor* agn_gaeval_visitor_new(GtNodeStream *astream, AgnGaevalParams gparams) { agn_assert(astream); // Create the node visitor GtNodeVisitor *nv = gt_node_visitor_create(gaeval_visitor_class()); AgnGaevalVisitor *v = gaeval_visitor_cast(nv); v->alignments = gt_feature_index_memory_new(); v->tsvout = NULL; v->params = gparams; // Check that sum of weights is 1.0 double weights_total = gparams.alpha + gparams.beta + gparams.gamma + gparams.epsilon; if(fabs(weights_total - 1.0) > 0.0001) { fprintf(stderr, "[AgnGaevalVisitor::agn_gaeval_visitor_new] warning: " "sum of weights is not 1.0 %.3lf; integrity calculations will be " "incorrect\n", weights_total); } // Set up node stream to load alignment features into memory GtQueue *streams = gt_queue_new(); GtNodeStream *stream, *last_stream; GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL); gt_hashmap_add(typestokeep, "cDNA_match", "cDNA_match"); gt_hashmap_add(typestokeep, "EST_match", "EST_match"); gt_hashmap_add(typestokeep, "nucleotide_match", "nucleotide_match"); stream = agn_filter_stream_new(astream, typestokeep); gt_queue_add(streams, stream); last_stream = stream; stream = gt_feature_out_stream_new(last_stream, v->alignments); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "cDNA_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "EST_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "nucleotide_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; // Process the node stream GtError *error = gt_error_new(); int result = gt_node_stream_pull(last_stream, error); if(result == -1) { fprintf(stderr, "[AEGeAn::AgnGaevalStream] error parsing alignments: %s\n", gt_error_get(error)); gt_node_visitor_delete(nv); return NULL; } gt_error_delete(error); gt_hashmap_delete(typestokeep); while(gt_queue_size(streams) > 0) { stream = gt_queue_get(streams); gt_node_stream_delete(stream); } gt_queue_delete(streams); return nv; }
extern int gt_packedindex_chk_search(int argc, const char *argv[], GtError *err) { struct chkSearchOptions params; Suffixarray suffixarray; Enumpatterniterator *epi = NULL; bool saIsLoaded = false; BWTSeq *bwtSeq = NULL; GtStr *inputProject = NULL; int parsedArgs; bool had_err = false; BWTSeqExactMatchesIterator EMIter; bool EMIterInitialized = false; Verboseinfo *verbosity = NULL; inputProject = gt_str_new(); do { gt_error_check(err); { bool exitNow = false; switch (parseChkBWTOptions(&parsedArgs, argc, argv, ¶ms, inputProject, err)) { case OPTIONPARSER_OK: break; case OPTIONPARSER_ERROR: had_err = true; exitNow = true; break; case OPTIONPARSER_REQUESTS_EXIT: exitNow = true; break; } if (exitNow) break; } gt_str_set(inputProject, argv[parsedArgs]); verbosity = newverboseinfo(params.verboseOutput); bwtSeq = availBWTSeq(¶ms.idx.final, verbosity, err); if ((had_err = bwtSeq == NULL)) break; { enum verifyBWTSeqErrCode retval = BWTSeqVerifyIntegrity(bwtSeq, inputProject, params.flags, params.progressInterval, stderr, verbosity, err); if ((had_err = (retval != VERIFY_BWTSEQ_NO_ERROR))) { fprintf(stderr, "index integrity check failed: %s\n", gt_error_get(err)); gt_error_set(err, "aborted because of index integrity check fail"); break; } } if (BWTSeqHasLocateInformation(bwtSeq)) { if ((had_err = !initEmptyEMIterator(&EMIter, bwtSeq))) { gt_error_set(err, "Cannot create matches iterator for sequence index."); break; } EMIterInitialized = true; } { Seqpos totalLen, dbstart; unsigned long trial, patternLen; if ((had_err = mapsuffixarray(&suffixarray, SARR_SUFTAB | SARR_ESQTAB, inputProject, NULL, err) != 0)) { gt_error_set(err, "Can't load suffix array project with" " demand for encoded sequence and suffix table files\n"); break; } totalLen = getencseqtotallength(suffixarray.encseq); saIsLoaded = true; if ((had_err = (params.minPatLen >= 0L && params.maxPatLen >= 0L && params.minPatLen > params.maxPatLen))) { gt_error_set(err, "Invalid pattern lengths selected: min=%ld, max=%ld;" " min <= max is required.", params.minPatLen, params.maxPatLen); break; } if (params.minPatLen < 0 || params.maxPatLen < 0) { unsigned int numofchars = getencseqAlphabetnumofchars(suffixarray.encseq); if (params.minPatLen < 0) params.minPatLen = recommendedprefixlength(numofchars, totalLen); if (params.maxPatLen < 0) params.maxPatLen = MAX(params.minPatLen, 125 * recommendedprefixlength(numofchars, totalLen) / 100); else params.maxPatLen = MAX(params.maxPatLen, params.minPatLen); } fprintf(stderr, "Using patterns of lengths %lu to %lu\n", params.minPatLen, params.maxPatLen); if ((had_err = totalLen + 1 != BWTSeqLength(bwtSeq))) { gt_error_set(err, "base suffix array and index have diferrent lengths!" FormatSeqpos" vs. "FormatSeqpos, totalLen + 1, BWTSeqLength(bwtSeq)); break; } if ((had_err = (epi = newenumpatterniterator(params.minPatLen, params.maxPatLen, suffixarray.encseq, err)) == NULL)) { fputs("Creation of pattern iterator failed!\n", stderr); break; } for (trial = 0; !had_err && trial < params.numOfSamples; ++trial) { const GtUchar *pptr = nextEnumpatterniterator(&patternLen, epi); MMsearchiterator *mmsi = newmmsearchiterator(suffixarray.encseq, suffixarray.suftab, 0, /* leftbound */ totalLen, /* rightbound */ 0, /* offset */ suffixarray.readmode, pptr, patternLen); if (BWTSeqHasLocateInformation(bwtSeq)) { Seqpos numMatches; if ((had_err = !reinitEMIterator(&EMIter, bwtSeq, pptr, patternLen, false))) { fputs("Internal error: failed to reinitialize pattern match" " iterator", stderr); abort(); } numMatches = EMINumMatchesTotal(&EMIter); gt_assert(numMatches == BWTSeqMatchCount(bwtSeq, pptr, patternLen, false)); gt_assert(EMINumMatchesTotal(&EMIter) == countmmsearchiterator(mmsi)); /* fprintf(stderr, "trial %lu, "FormatSeqpos" matches\n" */ /* "pattern: ", trial, numMatches); */ /* fprintfsymbolstring(stderr, suffixarray.alpha, pptr, */ /* patternLen); */ /* putc('\n', stderr); */ while (nextmmsearchiterator(&dbstart,mmsi)) { Seqpos matchPos = 0; bool match = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = !match)) { gt_error_set(err, "matches of packedindex expired before mmsearch!"); break; } if ((had_err = matchPos != dbstart)) { gt_error_set(err, "packedindex match doesn't equal mmsearch " "match result!\n"FormatSeqpos" vs. "FormatSeqpos"\n", matchPos, dbstart); } } if (!had_err) { Seqpos matchPos; bool trailingMatch = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = trailingMatch)) { gt_error_set(err, "matches of mmsearch expired before fmindex!"); break; } } } else { Seqpos numFMIMatches = BWTSeqMatchCount(bwtSeq, pptr, patternLen, false), numMMSearchMatches = countmmsearchiterator(mmsi); if ((had_err = numFMIMatches != numMMSearchMatches)) { gt_error_set(err, "Number of matches not equal for suffix array (" FormatSeqpos") and fmindex ("FormatSeqpos".\n", numFMIMatches, numMMSearchMatches); } } freemmsearchiterator(&mmsi); if (params.progressInterval && !((trial + 1) % params.progressInterval)) putc('.', stderr); } if (params.progressInterval) putc('\n', stderr); fprintf(stderr, "Finished %lu of %lu matchings successfully.\n", trial, params.numOfSamples); } } while (0); if (EMIterInitialized) destructEMIterator(&EMIter); if (saIsLoaded) freesuffixarray(&suffixarray); if (epi) freeEnumpatterniterator(&epi); if (bwtSeq) deleteBWTSeq(bwtSeq); if (verbosity) freeverboseinfo(&verbosity); if (inputProject) gt_str_delete(inputProject); return had_err?-1:0; }