GtAlphabet* gt_alphabet_guess(const char *sequence, unsigned long seqlen) { unsigned long i; gt_assert(sequence && seqlen); for (i = 0; i < seqlen && i < (unsigned long) ALPHABET_GUESS_MAX_LENGTH; i++) { if (strchr(ALPHABET_GUESS_PROTEIN_CHARS, sequence[i]) != NULL) return gt_alphabet_new_protein(); } return gt_alphabet_new_dna(); }
static int alphabet_lua_new_protein(lua_State *L) { GtAlphabet **alpha; gt_assert(L); alpha = lua_newuserdata(L, sizeof *alpha); gt_assert(alpha); *alpha = gt_alphabet_new_protein(); gt_assert(*alpha); luaL_getmetatable(L, ALPHABET_METATABLE); lua_setmetatable(L, -2); return 1; }
int gt_alphabet_unit_test(GtError *err) { int had_err = 0; GtAlphabet *a, *b, *c; gt_error_check(err); a = gt_alphabet_new_dna(); b = gt_alphabet_new_protein(); c = gt_alphabet_clone(a); gt_ensure(had_err, gt_alphabet_equals(a, a)); gt_ensure(had_err, gt_alphabet_equals(b, b)); gt_ensure(had_err, gt_alphabet_equals(c, c)); gt_ensure(had_err, !gt_alphabet_equals(a, b)); gt_ensure(had_err, gt_alphabet_equals(a, c)); gt_alphabet_delete(a); gt_alphabet_delete(b); gt_alphabet_delete(c); return had_err; }
GtScoreMatrix* gt_score_matrix_new_read_protein(const char *path, GtError *err) { GtAlphabet *protein_alpha; GtScoreMatrix *sm; int had_err; gt_error_check(err); gt_assert(path); /* create score matrix */ protein_alpha = gt_alphabet_new_protein(); sm = gt_score_matrix_new(protein_alpha); gt_alphabet_delete(protein_alpha); /* parse matrix file */ had_err = parse_score_matrix(sm, path, err); if (had_err) { gt_score_matrix_delete(sm); return NULL; } return sm; }
static int readfirstvaluesfromfile(GtEncseqMetadata *emd, const char *indexname, GtError *err) { FILE *fp; bool had_err = false; unsigned long cc, byteoffset = 0, alphatype; char *alphadef; gt_error_check(err); fp = gt_fa_fopen_with_suffix(indexname, GT_ENCSEQFILESUFFIX, "rb", err); if (fp == NULL) { had_err = true; } NEXTFREAD(emd->is64bit); if (!had_err) { if ((int) emd->is64bit > 1) { gt_error_set(err, "illegal platform code %u in \"%s%s\"", emd->is64bit, indexname, GT_ENCSEQFILESUFFIX); had_err = true; } if (!had_err && ((emd->is64bit && sizeof (unsigned long) != (size_t) 8) || (!emd->is64bit && sizeof (unsigned long) == (size_t) 8))) { gt_error_set(err, "trying to load 64-bit index \"%s%s\" on a 32-bit " "system or vice versa -- please use correct index " "for this platform", indexname, GT_ENCSEQFILESUFFIX); had_err = true; } } NEXTFREAD(emd->version); if (!had_err) { if (emd->version < GT_ENCSEQ_VERSION) { gt_error_set(err, "index \"%s%s\" is format version %lu, current is " "%lu -- please re-encode", indexname, GT_ENCSEQFILESUFFIX, emd->version, GT_ENCSEQ_VERSION); had_err = true; } } NEXTFREAD(cc); if (!had_err) { if (cc >= (unsigned long) GT_ACCESS_TYPE_UNDEFINED) { gt_error_set(err, "illegal type %lu in \"%s%s\"", cc, indexname, GT_ENCSEQFILESUFFIX); had_err = true; } } if (!had_err) { emd->sat = (GtEncseqAccessType) cc; NEXTFREAD(emd->totallength); NEXTFREAD(emd->numofdbsequences); NEXTFREAD(emd->numofdbfiles); NEXTFREAD(emd->lengthofdbfilenames); NEXTFREAD(emd->specialcharinfo); NEXTFREAD(emd->minseqlen); NEXTFREAD(emd->maxseqlen); } NEXTFREAD(alphatype); if (!had_err) { if (alphatype > 2UL) { gt_error_set(err, "illegal alphabet type %lu in \"%s%s\"", alphatype, indexname, GT_ENCSEQFILESUFFIX); had_err = true; } } if (!had_err) { NEXTFREAD(emd->lengthofalphadef); switch (alphatype) { case 0: emd->alpha = gt_alphabet_new_dna(); break; case 1: emd->alpha = gt_alphabet_new_protein(); break; case 2: gt_assert(emd->lengthofalphadef > 0); emd->customalphabet = true; alphadef = gt_malloc(sizeof (char) * emd->lengthofalphadef); NEXTFREADWSIZE(*(alphadef), emd->lengthofalphadef); emd->alpha = gt_alphabet_new_from_string(alphadef, emd->lengthofalphadef, err); if (!emd->alpha) { had_err = true; } gt_free(alphadef); break; } gt_assert(emd->alpha != NULL); } gt_fa_xfclose(fp); return had_err ? -1 : 0; }
static int gt_linspace_align_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtLinspaceArguments *arguments = tool_arguments; int had_err = 0; GtAlignment *align; GtWord left_dist = 0, right_dist = 0; GtSequenceTable *sequence_table1, *sequence_table2; GtLinspaceManagement *spacemanager; GtScoreHandler *scorehandler = NULL; GtTimer *linspacetimer = NULL; GtAlphabet *alphabet = NULL; gt_error_check(err); gt_assert(arguments); sequence_table1 = gt_sequence_table_new(); sequence_table2 = gt_sequence_table_new(); align = gt_alignment_new(); spacemanager = gt_linspace_management_new(); gt_linspace_management_set_TSfactor(spacemanager,arguments->timesquarefactor); /* get sequences */ if (gt_str_array_size(arguments->strings) > 0) { get_onesequence(sequence_table1, arguments->strings, 0); sequence_table1->size++; get_onesequence(sequence_table2, arguments->strings, 1); sequence_table2->size++; } else if (gt_str_array_size(arguments->files) > 0) { had_err = get_fastasequences(sequence_table1, gt_str_array_get_str(arguments->files,0),err); if (!had_err) { had_err = get_fastasequences(sequence_table2, gt_str_array_get_str(arguments->files,1),err); } } if (arguments->dna) { alphabet = gt_alphabet_new_dna(); } else { gt_assert(arguments->protein); alphabet = gt_alphabet_new_protein(); } gt_encode_sequence_table(alphabet,sequence_table1); gt_encode_sequence_table(alphabet,sequence_table2); if (!had_err) { scorehandler = gt_arguments2scorehandler(arguments,err); if (scorehandler == NULL) { had_err = -1; } else { if (arguments->global && arguments->protein && !arguments->has_costmatrix) { GtScoreHandler *costhandler = gt_scorehandler2costhandler(scorehandler); gt_scorehandler_delete(scorehandler); scorehandler = costhandler; } } } /* get diagonal band */ if (!had_err && arguments->diagonal) { if (gt_str_array_size(arguments->diagonalbonds) > 0) { had_err = gt_parse_score_value(__LINE__,&left_dist, gt_str_array_get(arguments->diagonalbonds,0), false, err); if (!had_err) { had_err = gt_parse_score_value(__LINE__,&right_dist, gt_str_array_get(arguments->diagonalbonds,1), false, err); } } } if (!had_err && arguments->spacetime) { linspacetimer = gt_timer_new(); } /* alignment functions with linear gap costs */ if (!had_err) { bool affine; if (gt_str_array_size(arguments->linearcosts) > 0) { affine = false; } else { gt_assert(gt_str_array_size(arguments->affinecosts) > 0); affine = true; } had_err = gt_all_against_all_alignment_check ( affine, align, arguments, spacemanager, scorehandler, gt_alphabet_characters(alphabet), gt_alphabet_wildcard_show(alphabet), sequence_table1, sequence_table2, left_dist, right_dist, linspacetimer,err); } /*spacetime option*/ if (!had_err && arguments->spacetime) { printf("# combined space peak in kilobytes: %f\n", GT_KILOBYTES(gt_linspace_management_get_spacepeak(spacemanager))); gt_timer_show_formatted(linspacetimer,"# TIME overall " GT_WD ".%02ld\n", stdout); } gt_timer_delete(linspacetimer); gt_linspace_management_delete(spacemanager); gt_sequence_table_delete(sequence_table1); gt_sequence_table_delete(sequence_table2); gt_alignment_delete(align); gt_alphabet_delete(alphabet); gt_scorehandler_delete(scorehandler); return had_err; }