static int gt_compreads_compress_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCsrHcrEncodeArguments *arguments = tool_arguments; int had_err = 0; GtAlphabet *alpha = NULL; GtHcrEncoder *hcre = NULL; GtTimer *timer = NULL; gt_error_check(err); gt_assert(arguments); if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("start"); gt_timer_start(timer); gt_assert(timer); } if (gt_str_length(arguments->smap) > 0) { alpha = gt_alphabet_new_from_file_no_suffix(gt_str_get(arguments->smap), err); if (!alpha) had_err = 1; } else { alpha = gt_alphabet_new_dna(); if (!alpha) had_err = 1; } if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "encoding", stdout); hcre = gt_hcr_encoder_new(arguments->files, alpha, arguments->descs, arguments->qrng, timer, err); if (!hcre) had_err = 1; else { if (arguments->pagewise) gt_hcr_encoder_set_sampling_page(hcre); else if (arguments->regular) gt_hcr_encoder_set_sampling_regular(hcre); gt_hcr_encoder_set_sampling_rate(hcre, arguments->srate); if (gt_hcr_encoder_encode(hcre, gt_str_get(arguments->name), timer, err) != 0) had_err = 1; } gt_hcr_encoder_delete(hcre); } gt_alphabet_delete(alpha); if (timer != NULL) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } return had_err; }
int gt_codon_iterator_encseq_unit_test(GtError *err) { int had_err = 0, i, j; const char *testseq = "gctgatcgactgaacatagctagcacggccgcgcgatcgtacgatg", *testseq_rc = "catcgtacgatcgcgcggccgtgctagctatgttcagtcgatcagc", *testseq_rv = "gtagcatgctagcgcgccggcacgatcgatacaagtcagctagtcg", *testseq_cm = "cgactagctgacttgtatcgatcgtgccggcgcgctagcatgctac"; GtEncseq *encseq; GtEncseqBuilder *eb; GtCodonIterator *ci; GtAlphabet *alpha; char n1, n2, n3; unsigned int frame; gt_error_check(err); alpha = gt_alphabet_new_dna(); eb = gt_encseq_builder_new(alpha); gt_encseq_builder_add_cstr(eb, testseq, strlen(testseq), "foo"); encseq = gt_encseq_builder_build(eb, NULL); /* forward tests */ had_err = gt_codon_iterator_encseq_single_test(encseq, testseq, testseq, GT_READMODE_FORWARD, err); /* complement tests */ had_err = gt_codon_iterator_encseq_single_test(encseq, testseq, testseq_cm, GT_READMODE_COMPL, err); /* revcompl tests */ had_err = gt_codon_iterator_encseq_single_test(encseq, testseq, testseq_rc, GT_READMODE_REVCOMPL, err); /* reverse tests */ had_err = gt_codon_iterator_encseq_single_test(encseq, testseq, testseq_rv, GT_READMODE_REVERSE, err); /* lengths < 3 */ for (j = 0; !had_err && j < 3; j++) { ci = gt_codon_iterator_encseq_new_with_readmode(encseq, 10, j, GT_READMODE_REVCOMPL, NULL); i = 10; while (!(gt_codon_iterator_next(ci, &n1, &n2, &n3, &frame, NULL))) { gt_ensure(had_err, false); } gt_ensure(had_err, i == 10); gt_codon_iterator_delete(ci); } gt_encseq_delete(encseq); gt_encseq_builder_delete(eb); gt_alphabet_delete(alpha); return had_err; }
GtAlphabet* gt_alphabet_guess(const char *sequence, unsigned long seqlen) { unsigned long i; gt_assert(sequence && seqlen); for (i = 0; i < seqlen && i < (unsigned long) ALPHABET_GUESS_MAX_LENGTH; i++) { if (strchr(ALPHABET_GUESS_PROTEIN_CHARS, sequence[i]) != NULL) return gt_alphabet_new_protein(); } return gt_alphabet_new_dna(); }
static int alphabet_lua_new_dna(lua_State *L) { GtAlphabet **alpha; gt_assert(L); alpha = lua_newuserdata(L, sizeof *alpha); gt_assert(alpha); *alpha = gt_alphabet_new_dna(); gt_assert(*alpha); luaL_getmetatable(L, ALPHABET_METATABLE); lua_setmetatable(L, -2); return 1; }
static void gt_tyrsearchinfo_init(Tyrsearchinfo *tyrsearchinfo, const Tyrindex *tyrindex, unsigned int showmode, unsigned int searchstrand) { unsigned long merbytes; merbytes = gt_tyrindex_merbytes(tyrindex); tyrsearchinfo->mersize = gt_tyrindex_mersize(tyrindex); tyrsearchinfo->mertable = gt_tyrindex_mertable(tyrindex); tyrsearchinfo->lastmer = gt_tyrindex_lastmer(tyrindex); tyrsearchinfo->showmode = showmode; tyrsearchinfo->searchstrand = searchstrand; tyrsearchinfo->dnaalpha = gt_alphabet_new_dna(); ALLOCASSIGNSPACE(tyrsearchinfo->bytecode,NULL,GtUchar,merbytes); ALLOCASSIGNSPACE(tyrsearchinfo->rcbuf,NULL,GtUchar,tyrsearchinfo->mersize); }
GtHpolProcessor *gt_hpol_processor_new(GtEncseq *encseq, unsigned long hmin) { GtHpolProcessor *hpp; hpp = gt_malloc(sizeof (GtHpolProcessor)); gt_assert(encseq != NULL); gt_assert(hmin > 0); hpp->encseq = encseq; hpp->hmin = hmin; hpp->read_hmin = 0; hpp->qmax = 0; hpp->mapqmin = 0; hpp->covmin = 0; hpp->allow_partial = false; hpp->allow_multiple = false; hpp->hdist = gt_disc_distri_new(); hpp->nof_h = 0; hpp->hdist_e = gt_disc_distri_new(); hpp->nof_h_e = 0; hpp->hlen_max = 0; hpp->cds_oracle = NULL; hpp->asp = NULL; hpp->nof_complete_edited = 0; hpp->nof_complete_not_edited = 0; hpp->nof_skipped = 0; hpp->nof_unmapped = 0; hpp->nof_multihits = 0; hpp->nof_replaced = 0; hpp->clenmax = GT_UNDEF_ULONG; hpp->altmax = (double) 1.0; hpp->refmin = (double) 0.0; hpp->alpha = gt_alphabet_new_dna(); hpp->adjust_s_hlen = false; hpp->output_segments = false; hpp->outfp_segments = NULL; hpp->output_stats = false; hpp->output_multihit_stats = false; hpp->outfp_stats = NULL; hpp->processed_segments = NULL; hpp->reads_iters = NULL; hpp->outfiles = NULL; hpp->nfiles = 0; return hpp; }
int gt_ltrdigest_arguments_check(GT_UNUSED int rest_argc, void *tool_arguments, GtError* err) { GtLTRdigestOptions *arguments = tool_arguments; int had_err = 0; if (arguments->nthreads > 0) { gt_warning("The '-threads' option is deprecated. Please use the '-j'" "option of the 'gt' call instead, e.g.:\n" " gt -j %lu ltrdigest ...", arguments->nthreads); } /* -trnas */ if (!had_err && arguments->trna_lib && gt_str_length(arguments->trna_lib) > 0) { if (!gt_file_exists(gt_str_get(arguments->trna_lib))) { gt_error_set(err, "File '%s' does not exist!", gt_str_get(arguments->trna_lib)); had_err = -1; } } if (!had_err) { GtHMM *hmm; GtAlphabet *alpha; alpha = gt_alphabet_new_dna(); hmm = gt_ppt_hmm_new(alpha, &arguments->ppt_opts); if (!hmm) { gt_error_set(err, "PPT HMM parameters are not valid!"); had_err = -1; } else gt_hmm_delete(hmm); gt_alphabet_delete(alpha); } return had_err; }
int gt_alphabet_unit_test(GtError *err) { int had_err = 0; GtAlphabet *a, *b, *c; gt_error_check(err); a = gt_alphabet_new_dna(); b = gt_alphabet_new_protein(); c = gt_alphabet_clone(a); gt_ensure(had_err, gt_alphabet_equals(a, a)); gt_ensure(had_err, gt_alphabet_equals(b, b)); gt_ensure(had_err, gt_alphabet_equals(c, c)); gt_ensure(had_err, !gt_alphabet_equals(a, b)); gt_ensure(had_err, gt_alphabet_equals(a, c)); gt_alphabet_delete(a); gt_alphabet_delete(b); gt_alphabet_delete(c); return had_err; }
static int readfirstvaluesfromfile(GtEncseqMetadata *emd, const char *indexname, GtError *err) { FILE *fp; bool had_err = false; unsigned long cc, byteoffset = 0, alphatype; char *alphadef; gt_error_check(err); fp = gt_fa_fopen_with_suffix(indexname, GT_ENCSEQFILESUFFIX, "rb", err); if (fp == NULL) { had_err = true; } NEXTFREAD(emd->is64bit); if (!had_err) { if ((int) emd->is64bit > 1) { gt_error_set(err, "illegal platform code %u in \"%s%s\"", emd->is64bit, indexname, GT_ENCSEQFILESUFFIX); had_err = true; } if (!had_err && ((emd->is64bit && sizeof (unsigned long) != (size_t) 8) || (!emd->is64bit && sizeof (unsigned long) == (size_t) 8))) { gt_error_set(err, "trying to load 64-bit index \"%s%s\" on a 32-bit " "system or vice versa -- please use correct index " "for this platform", indexname, GT_ENCSEQFILESUFFIX); had_err = true; } } NEXTFREAD(emd->version); if (!had_err) { if (emd->version < GT_ENCSEQ_VERSION) { gt_error_set(err, "index \"%s%s\" is format version %lu, current is " "%lu -- please re-encode", indexname, GT_ENCSEQFILESUFFIX, emd->version, GT_ENCSEQ_VERSION); had_err = true; } } NEXTFREAD(cc); if (!had_err) { if (cc >= (unsigned long) GT_ACCESS_TYPE_UNDEFINED) { gt_error_set(err, "illegal type %lu in \"%s%s\"", cc, indexname, GT_ENCSEQFILESUFFIX); had_err = true; } } if (!had_err) { emd->sat = (GtEncseqAccessType) cc; NEXTFREAD(emd->totallength); NEXTFREAD(emd->numofdbsequences); NEXTFREAD(emd->numofdbfiles); NEXTFREAD(emd->lengthofdbfilenames); NEXTFREAD(emd->specialcharinfo); NEXTFREAD(emd->minseqlen); NEXTFREAD(emd->maxseqlen); } NEXTFREAD(alphatype); if (!had_err) { if (alphatype > 2UL) { gt_error_set(err, "illegal alphabet type %lu in \"%s%s\"", alphatype, indexname, GT_ENCSEQFILESUFFIX); had_err = true; } } if (!had_err) { NEXTFREAD(emd->lengthofalphadef); switch (alphatype) { case 0: emd->alpha = gt_alphabet_new_dna(); break; case 1: emd->alpha = gt_alphabet_new_protein(); break; case 2: gt_assert(emd->lengthofalphadef > 0); emd->customalphabet = true; alphadef = gt_malloc(sizeof (char) * emd->lengthofalphadef); NEXTFREADWSIZE(*(alphadef), emd->lengthofalphadef); emd->alpha = gt_alphabet_new_from_string(alphadef, emd->lengthofalphadef, err); if (!emd->alpha) { had_err = true; } gt_free(alphadef); break; } gt_assert(emd->alpha != NULL); } gt_fa_xfclose(fp); return had_err ? -1 : 0; }
GtPBSResults* gt_pbs_find(const char *seq, const char *rev_seq, GtLTRElement *element, GtPBSOptions *o, GtError *err) { GtSeq *seq_forward, *seq_rev; GtPBSResults *results; unsigned long j; GtAlignment *ali; GtAlphabet *a = gt_alphabet_new_dna(); GtScoreFunction *sf = gt_dna_scorefunc_new(a, o->ali_score_match, o->ali_score_mismatch, o->ali_score_insertion, o->ali_score_deletion); gt_assert(seq && rev_seq && sf && a && element); results = gt_pbs_results_new(element, o); seq_forward = gt_seq_new(seq + (gt_ltrelement_leftltrlen(element)) - (o->radius), 2*o->radius + 1, a); seq_rev = gt_seq_new(rev_seq + (gt_ltrelement_rightltrlen(element)) - (o->radius), 2*o->radius + 1, a); for (j=0;j<gt_bioseq_number_of_sequences(o->trna_lib);j++) { GtSeq *trna_seq, *trna_from3; char *trna_from3_full; unsigned long trna_seqlen; trna_seq = gt_bioseq_get_seq(o->trna_lib, j); trna_seqlen = gt_seq_length(trna_seq); trna_from3_full = gt_calloc(trna_seqlen, sizeof (char)); memcpy(trna_from3_full, gt_seq_get_orig(trna_seq), sizeof (char)*trna_seqlen); (void) gt_reverse_complement(trna_from3_full, trna_seqlen, err); trna_from3 = gt_seq_new_own(trna_from3_full, trna_seqlen, a); ali = gt_swalign(seq_forward, trna_from3, sf); gt_pbs_add_hit(results->hits, ali, o, trna_seqlen, gt_seq_get_description(trna_seq), GT_STRAND_FORWARD, results); gt_alignment_delete(ali); ali = gt_swalign(seq_rev, trna_from3, sf); gt_pbs_add_hit(results->hits, ali, o, trna_seqlen, gt_seq_get_description(trna_seq), GT_STRAND_REVERSE, results); gt_alignment_delete(ali); gt_seq_delete(trna_from3); } gt_seq_delete(seq_forward); gt_seq_delete(seq_rev); gt_score_function_delete(sf); gt_alphabet_delete(a); gt_array_sort(results->hits, gt_pbs_hit_compare); return results; }
static int gt_compreads_decompress_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCsrHcrDecodeArguments *arguments = tool_arguments; int had_err = 0; GtAlphabet *alpha = NULL; GtHcrDecoder *hcrd = NULL; GtTimer *timer = NULL; unsigned long start, end; gt_error_check(err); gt_assert(arguments); if (gt_showtime_enabled()) { timer = gt_timer_new_with_progress_description("start"); gt_timer_start(timer); gt_assert(timer); } if (gt_str_length(arguments->smap) > 0) { alpha = gt_alphabet_new_from_file_no_suffix(gt_str_get(arguments->smap), err); if (!alpha) had_err = -1; } else { alpha = gt_alphabet_new_dna(); if (!alpha) had_err = -1; } if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "decoding", stdout); if (gt_str_length(arguments->name) == 0) { char *basenameptr; basenameptr = gt_basename(gt_str_get(arguments->file)); gt_str_set(arguments->name, basenameptr); gt_free(basenameptr); } hcrd = gt_hcr_decoder_new(gt_str_get(arguments->file), alpha, arguments->descs, timer, err); if (hcrd == NULL) had_err = -1; else { if (arguments->bench != 0) { had_err = gt_compreads_decompress_benchmark(hcrd, arguments->bench, timer, err); } else { if (arguments->rng.start != GT_UNDEF_ULONG && arguments->rng.end != GT_UNDEF_ULONG) { if (arguments->rng.start >= gt_hcr_decoder_num_of_reads(hcrd) || arguments->rng.end >= gt_hcr_decoder_num_of_reads(hcrd)) { gt_error_set(err, "range %lu-%lu includes a read number exceeding " "the total number of reads (%lu)", arguments->rng.start, arguments->rng.end, gt_hcr_decoder_num_of_reads(hcrd)); had_err = -1; } start = arguments->rng.start; end = arguments->rng.end; } else { start = 0; end = gt_hcr_decoder_num_of_reads(hcrd) - 1; } if (!had_err) { gt_log_log("filebasename: %s", gt_str_get(arguments->name)); if (gt_hcr_decoder_decode_range(hcrd, gt_str_get(arguments->name), start, end, timer, err) != 0) had_err = -1; } } } gt_hcr_decoder_delete(hcrd); } gt_alphabet_delete(alpha); if (timer != NULL) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } if (had_err) gt_assert(gt_error_is_set(err)); return had_err; }
static int gt_linspace_align_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtLinspaceArguments *arguments = tool_arguments; int had_err = 0; GtAlignment *align; GtWord left_dist = 0, right_dist = 0; GtSequenceTable *sequence_table1, *sequence_table2; GtLinspaceManagement *spacemanager; GtScoreHandler *scorehandler = NULL; GtTimer *linspacetimer = NULL; GtAlphabet *alphabet = NULL; gt_error_check(err); gt_assert(arguments); sequence_table1 = gt_sequence_table_new(); sequence_table2 = gt_sequence_table_new(); align = gt_alignment_new(); spacemanager = gt_linspace_management_new(); gt_linspace_management_set_TSfactor(spacemanager,arguments->timesquarefactor); /* get sequences */ if (gt_str_array_size(arguments->strings) > 0) { get_onesequence(sequence_table1, arguments->strings, 0); sequence_table1->size++; get_onesequence(sequence_table2, arguments->strings, 1); sequence_table2->size++; } else if (gt_str_array_size(arguments->files) > 0) { had_err = get_fastasequences(sequence_table1, gt_str_array_get_str(arguments->files,0),err); if (!had_err) { had_err = get_fastasequences(sequence_table2, gt_str_array_get_str(arguments->files,1),err); } } if (arguments->dna) { alphabet = gt_alphabet_new_dna(); } else { gt_assert(arguments->protein); alphabet = gt_alphabet_new_protein(); } gt_encode_sequence_table(alphabet,sequence_table1); gt_encode_sequence_table(alphabet,sequence_table2); if (!had_err) { scorehandler = gt_arguments2scorehandler(arguments,err); if (scorehandler == NULL) { had_err = -1; } else { if (arguments->global && arguments->protein && !arguments->has_costmatrix) { GtScoreHandler *costhandler = gt_scorehandler2costhandler(scorehandler); gt_scorehandler_delete(scorehandler); scorehandler = costhandler; } } } /* get diagonal band */ if (!had_err && arguments->diagonal) { if (gt_str_array_size(arguments->diagonalbonds) > 0) { had_err = gt_parse_score_value(__LINE__,&left_dist, gt_str_array_get(arguments->diagonalbonds,0), false, err); if (!had_err) { had_err = gt_parse_score_value(__LINE__,&right_dist, gt_str_array_get(arguments->diagonalbonds,1), false, err); } } } if (!had_err && arguments->spacetime) { linspacetimer = gt_timer_new(); } /* alignment functions with linear gap costs */ if (!had_err) { bool affine; if (gt_str_array_size(arguments->linearcosts) > 0) { affine = false; } else { gt_assert(gt_str_array_size(arguments->affinecosts) > 0); affine = true; } had_err = gt_all_against_all_alignment_check ( affine, align, arguments, spacemanager, scorehandler, gt_alphabet_characters(alphabet), gt_alphabet_wildcard_show(alphabet), sequence_table1, sequence_table2, left_dist, right_dist, linspacetimer,err); } /*spacetime option*/ if (!had_err && arguments->spacetime) { printf("# combined space peak in kilobytes: %f\n", GT_KILOBYTES(gt_linspace_management_get_spacepeak(spacemanager))); gt_timer_show_formatted(linspacetimer,"# TIME overall " GT_WD ".%02ld\n", stdout); } gt_timer_delete(linspacetimer); gt_linspace_management_delete(spacemanager); gt_sequence_table_delete(sequence_table1); gt_sequence_table_delete(sequence_table2); gt_alignment_delete(align); gt_alphabet_delete(alphabet); gt_scorehandler_delete(scorehandler); return had_err; }
int gt_encseq_gc_unit_test(GtError *err) { int had_err = 0; double *results; GtEncseqBuilder *eb; GtEncseq *encseq; const char testseq1[] = "aaaaaa", testseq2[] = "cccccc", testseq3[] = "acgtacgt", testseq4[] = "acgtn"; /* testseq5[] = "xxxxn"; */ GtAlphabet *alpha; /*GtError *tmp_err;*/ gt_error_check(err); alpha = gt_alphabet_new_dna(); /* test a-seq */ eb = gt_encseq_builder_new(alpha); gt_encseq_builder_create_ssp_tab(eb); gt_encseq_builder_enable_description_support(eb); gt_encseq_builder_add_cstr(eb, testseq1, 6UL, "only a"); encseq = gt_encseq_builder_build(eb, err); if ((results = gt_encseq_get_gc(encseq, false, true, err)) != NULL) { gt_ensure(had_err, gt_double_equals_double(results[0], 0.0)); } else { had_err = -1; } gt_free(results); gt_encseq_builder_delete(eb); gt_encseq_delete(encseq); if (!had_err) { /* test c-seq */ eb = gt_encseq_builder_new(alpha); gt_encseq_builder_create_ssp_tab(eb); gt_encseq_builder_enable_description_support(eb); gt_encseq_builder_add_cstr(eb, testseq2, 6UL, "only c"); encseq = gt_encseq_builder_build(eb, err); if ((results = gt_encseq_get_gc(encseq, false, true, err)) != NULL) { gt_ensure(had_err, gt_double_equals_one(results[0])); } else { had_err = -1; } gt_free(results); gt_encseq_builder_delete(eb); gt_encseq_delete(encseq); } if (!had_err) { /* test dna-seq and dna+special-seq*/ eb = gt_encseq_builder_new(alpha); gt_encseq_builder_create_ssp_tab(eb); gt_encseq_builder_enable_description_support(eb); gt_encseq_builder_add_cstr(eb, testseq3, 8UL, "0.5"); gt_encseq_builder_add_cstr(eb, testseq4, 5UL, "0.5+special"); encseq = gt_encseq_builder_build(eb, err); if ((results = gt_encseq_get_gc(encseq, false, true, err)) != NULL) { gt_ensure(had_err, gt_double_equals_double(results[0], 0.5)); gt_ensure(had_err, gt_double_equals_double(results[1], 0.5)); } else { had_err = -1; } gt_free(results); if (!had_err) { /* count special chars */ if ((results = gt_encseq_get_gc(encseq, true, true, err)) != NULL) { gt_ensure(had_err, gt_double_equals_double(results[0], 0.5)); gt_ensure(had_err, gt_double_equals_double(results[1], (2.0/5.0))); } else { had_err = -1; } gt_free(results); } gt_encseq_builder_delete(eb); gt_encseq_delete(encseq); } if (!had_err) { /* test dna-seq and dna+special-seq*/ eb = gt_encseq_builder_new(alpha); gt_encseq_builder_create_ssp_tab(eb); gt_encseq_builder_enable_description_support(eb); gt_encseq_builder_add_cstr(eb, testseq3, 8UL, "0.5"); gt_encseq_builder_add_cstr(eb, testseq4, 5UL, "0.5+special"); encseq = gt_encseq_builder_build(eb, err); /*add mirrored sequence*/ had_err = gt_encseq_mirror(encseq, err); /* sequence wise */ if ((results = gt_encseq_get_gc(encseq, false, true, err)) != NULL) { gt_ensure(had_err, gt_double_equals_double(results[0], 0.5)); gt_ensure(had_err, gt_double_equals_double(results[1], 0.5)); gt_ensure(had_err, gt_double_equals_double(results[2], 0.5)); gt_ensure(had_err, gt_double_equals_double(results[3], 0.5)); } else { had_err = -1; } gt_free(results); if (!had_err) { /* count special chars */ if ((results = gt_encseq_get_gc(encseq, true, true, err)) != NULL) { gt_ensure(had_err, gt_double_equals_double(results[0], 0.5)); gt_ensure(had_err, gt_double_equals_double(results[1], (2.0/5.0))); gt_ensure(had_err, gt_double_equals_double(results[2], (2.0/5.0))); gt_ensure(had_err, gt_double_equals_double(results[3], 0.5)); } else { had_err = -1; } gt_free(results); } gt_encseq_builder_delete(eb); gt_encseq_delete(encseq); } gt_alphabet_delete(alpha); return had_err; }