static int encseq_lua_has_multiseq_support(lua_State *L) { GtEncseq **encseq; encseq = check_encseq(L, 1); lua_pushboolean(L, gt_encseq_has_multiseq_support(*encseq)); return 1; }
static void gt_querysubstringmatch(bool selfmatch, const GtEncseq *dbencseq, const ESASuffixptr *suftabpart, GtReadmode readmode, GtUword numberofsuffixes, uint64_t queryunitnum, GtQueryrepresentation *queryrep, GtUword minmatchlength, GtProcessquerymatch processquerymatch, void *processquerymatchinfo, GtQuerymatch *querymatchspaceptr) { GtMMsearchiterator *mmsi; GtUword totallength, localqueryoffset = 0; uint64_t localqueryunitnum = queryunitnum; GtQuerysubstring querysubstring; gt_assert(numberofsuffixes > 0); totallength = gt_encseq_total_length(dbencseq); querysubstring.queryrep = queryrep; for (querysubstring.currentoffset = 0; querysubstring.currentoffset <= queryrep->seqlen - minmatchlength; querysubstring.currentoffset++) { GtUword dbstart; mmsi = gt_mmsearchiterator_new(dbencseq, suftabpart, 0, /* leftbound */ numberofsuffixes - 1, /* rightbound */ 0, /* offset */ readmode, &querysubstring, minmatchlength); while (gt_mmsearchiterator_next(&dbstart,mmsi)) { if (gt_mmsearch_isleftmaximal(dbencseq, readmode, dbstart, &querysubstring)) { GtUword dbseqnum, dbseqstartpos, dbseqlen, extend; extend = gt_mmsearch_extendright(dbencseq, mmsi->esr, readmode, totallength, dbstart + minmatchlength, &querysubstring, minmatchlength); if (gt_encseq_has_multiseq_support(dbencseq)) { dbseqnum = gt_encseq_seqnum(dbencseq,dbstart); dbseqstartpos = gt_encseq_seqstartpos(dbencseq,dbseqnum); dbseqlen = gt_encseq_seqlength(dbencseq,dbseqnum); } else { dbseqnum = dbseqstartpos = dbseqlen = 0; } gt_querymatch_init(querymatchspaceptr, minmatchlength + extend, dbstart, dbseqnum, dbstart - dbseqstartpos, dbseqlen, 0, /* score */ 0, /* edist */ selfmatch, localqueryunitnum, minmatchlength + extend, localqueryoffset, queryrep->seqlen); processquerymatch(processquerymatchinfo,querymatchspaceptr); } } gt_mmsearchiterator_delete(mmsi); mmsi = NULL; if (gt_mmsearch_accessquery(queryrep,querysubstring.currentoffset) == (GtUchar) SEPARATOR) { localqueryunitnum++; localqueryoffset = 0; } else { localqueryoffset++; } } }
static int gt_encseq_check_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtEncseqCheckArguments *arguments = tool_arguments; int had_err = 0; GtEncseqLoader *encseq_loader; GtEncseq *encseq; gt_error_check(err); gt_assert(arguments); encseq_loader = gt_encseq_loader_new(); if (!(encseq = gt_encseq_loader_load(encseq_loader, argv[parsed_args], err))) had_err = -1; if (!had_err) { int readmode; gt_encseq_check_startpositions(encseq); for (readmode = 0; readmode < 4; readmode++) { if (gt_alphabet_is_dna(gt_encseq_alphabet(encseq)) || ((GtReadmode) readmode) == GT_READMODE_FORWARD || ((GtReadmode) readmode) == GT_READMODE_REVERSE) { if (gt_encseq_check_consistency(encseq, gt_encseq_filenames(encseq), (GtReadmode) readmode, arguments->scantrials, arguments->multicharcmptrials, gt_encseq_has_multiseq_support(encseq), err) != 0) { had_err = -1; break; } } } if (!had_err) { gt_encseq_check_specialranges(encseq); } if (!had_err) { gt_encseq_check_markpos(encseq); } if (!had_err) { had_err = gt_encseq_check_minmax(encseq, err); } if (!had_err && arguments->prefixlength > 0) { if (gt_verifymappedstr(encseq, arguments->prefixlength, err) != 0) { had_err = -1; } } } gt_encseq_delete(encseq); gt_encseq_loader_delete(encseq_loader); return had_err; }
static int gt_encseq_info_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtEncseqInfoArguments *arguments = tool_arguments; int had_err = 0; GtAlphabet *alpha; const GtUchar *chars; gt_error_check(err); gt_assert(arguments); if (arguments->nomap) { GtEncseqMetadata *emd = gt_encseq_metadata_new(argv[parsed_args], err); if (!emd) had_err = -1; if (!had_err) { if (!arguments->noindexname) { gt_file_xprintf(arguments->outfp, "index name: "); gt_file_xprintf(arguments->outfp, "%s\n", argv[parsed_args]); } gt_file_xprintf(arguments->outfp, "file format version: "); gt_file_xprintf(arguments->outfp, ""GT_WU"\n", gt_encseq_metadata_version(emd)); gt_file_xprintf(arguments->outfp, "64-bit file: "); gt_file_xprintf(arguments->outfp, "%s\n", gt_encseq_metadata_is64bit(emd) ? "yes" : "no"); gt_file_xprintf(arguments->outfp, "total length: "); gt_file_xprintf(arguments->outfp, ""GT_WU"\n", gt_encseq_metadata_total_length(emd)); gt_file_xprintf(arguments->outfp, "number of sequences: "); gt_file_xprintf(arguments->outfp, ""GT_WU"\n", gt_encseq_metadata_num_of_sequences(emd)); gt_file_xprintf(arguments->outfp, "number of files: "); gt_file_xprintf(arguments->outfp, ""GT_WU"\n", gt_encseq_metadata_num_of_files(emd)); gt_file_xprintf(arguments->outfp, "length of shortest/longest " "sequence: "); gt_file_xprintf(arguments->outfp, ""GT_WU"/"GT_WU"\n", gt_encseq_metadata_min_seq_length(emd), gt_encseq_metadata_max_seq_length(emd)); gt_file_xprintf(arguments->outfp, "accesstype: "); gt_file_xprintf(arguments->outfp, "%s\n", gt_encseq_access_type_str(gt_encseq_metadata_accesstype(emd))); alpha = gt_encseq_metadata_alphabet(emd); chars = gt_alphabet_characters(alpha); gt_file_xprintf(arguments->outfp, "alphabet size: "); gt_file_xprintf(arguments->outfp, "%u\n", gt_alphabet_num_of_chars(alpha)); gt_file_xprintf(arguments->outfp, "alphabet characters: "); gt_file_xprintf(arguments->outfp, "%.*s", gt_alphabet_num_of_chars(alpha), (char*) chars); if (gt_alphabet_is_dna(alpha)) gt_file_xprintf(arguments->outfp, " (DNA)"); if (gt_alphabet_is_protein(alpha)) gt_file_xprintf(arguments->outfp, " (Protein)"); gt_file_xprintf(arguments->outfp, "\n"); if (arguments->show_alphabet) { GtStr *out = gt_str_new(); gt_alphabet_to_str(alpha, out); gt_file_xprintf(arguments->outfp, "alphabet definition:\n"); gt_file_xprintf(arguments->outfp, "%s\n", gt_str_get(out)); gt_str_delete(out); } } gt_encseq_metadata_delete(emd); } else { GtEncseqLoader *encseq_loader; GtEncseq *encseq; encseq_loader = gt_encseq_loader_new(); if (arguments->mirror) gt_encseq_loader_mirror(encseq_loader); if (!(encseq = gt_encseq_loader_load(encseq_loader, argv[parsed_args], err))) had_err = -1; if (!had_err) { const GtStrArray *filenames; GtUword i; if (!arguments->noindexname) { gt_file_xprintf(arguments->outfp, "index name: "); gt_file_xprintf(arguments->outfp, "%s\n", argv[parsed_args]); } gt_file_xprintf(arguments->outfp, "file format version: "); gt_file_xprintf(arguments->outfp, ""GT_WU"\n", gt_encseq_version(encseq)); gt_file_xprintf(arguments->outfp, "64-bit file: "); gt_file_xprintf(arguments->outfp, "%s\n", gt_encseq_is_64_bit(encseq) ? "yes" : "no"); gt_file_xprintf(arguments->outfp, "total length: "); gt_file_xprintf(arguments->outfp, ""GT_WU"\n", gt_encseq_total_length(encseq)); gt_file_xprintf(arguments->outfp, "compressed size: "); gt_file_xprintf(arguments->outfp, ""GT_WU" bytes\n", gt_encseq_sizeofrep(encseq)); gt_file_xprintf(arguments->outfp, "number of sequences: "); gt_file_xprintf(arguments->outfp, ""GT_WU"\n", gt_encseq_num_of_sequences(encseq)); gt_file_xprintf(arguments->outfp, "number of files: "); gt_file_xprintf(arguments->outfp, ""GT_WU"\n", gt_encseq_num_of_files(encseq)); gt_file_xprintf(arguments->outfp, "length of shortest/longest " "sequence: "); gt_file_xprintf(arguments->outfp, ""GT_WU"/"GT_WU"\n", gt_encseq_min_seq_length(encseq), gt_encseq_max_seq_length(encseq)); filenames = gt_encseq_filenames(encseq); gt_file_xprintf(arguments->outfp, "original filenames:\n"); for (i = 0; i < gt_str_array_size(filenames); i++) { gt_file_xprintf(arguments->outfp, "\t%s ("GT_WU" characters)\n", gt_str_array_get(filenames, i), (GtUword) gt_encseq_effective_filelength(encseq, i)); } alpha = gt_encseq_alphabet(encseq); chars = gt_alphabet_characters(alpha); gt_file_xprintf(arguments->outfp, "alphabet size: "); gt_file_xprintf(arguments->outfp, "%u\n", gt_alphabet_num_of_chars(alpha)); gt_file_xprintf(arguments->outfp, "alphabet characters: "); gt_file_xprintf(arguments->outfp, "%.*s", gt_alphabet_num_of_chars(alpha), (char*) chars); if (gt_alphabet_is_dna(alpha)) gt_file_xprintf(arguments->outfp, " (DNA)"); if (gt_alphabet_is_protein(alpha)) gt_file_xprintf(arguments->outfp, " (Protein)"); gt_file_xprintf(arguments->outfp, "\n"); if (arguments->show_alphabet) { GtStr *out = gt_str_new(); gt_alphabet_to_str(alpha, out); gt_file_xprintf(arguments->outfp, "alphabet definition:\n"); gt_file_xprintf(arguments->outfp, "%s\n", gt_str_get(out)); gt_str_delete(out); } gt_file_xprintf(arguments->outfp, "character distribution:\n"); for (i = 0; i < gt_alphabet_num_of_chars(alpha); i++) { GtUword cc; cc = gt_encseq_charcount(encseq, gt_alphabet_encode(alpha, chars[i])); gt_file_xprintf(arguments->outfp, "\t%c: "GT_WU" (%.2f%%)\n", (char) chars[i], cc, (cc /(double) (gt_encseq_total_length(encseq) - gt_encseq_num_of_sequences(encseq)+1))*100); } gt_file_xprintf(arguments->outfp, "number of wildcards: "); gt_file_xprintf(arguments->outfp, ""GT_WU" ("GT_WU" range(s))\n", gt_encseq_wildcards(encseq), gt_encseq_realwildcardranges(encseq)); gt_file_xprintf(arguments->outfp, "number of special characters: "); gt_file_xprintf(arguments->outfp, ""GT_WU" ("GT_WU" range(s))\n", gt_encseq_specialcharacters(encseq), gt_encseq_realspecialranges(encseq)); gt_file_xprintf(arguments->outfp, "length of longest non-special " "character stretch: "); gt_file_xprintf(arguments->outfp, ""GT_WU"\n", gt_encseq_lengthoflongestnonspecial(encseq)); gt_file_xprintf(arguments->outfp, "accesstype: "); gt_file_xprintf(arguments->outfp, "%s\n", gt_encseq_access_type_str(gt_encseq_accesstype_get(encseq))); gt_file_xprintf(arguments->outfp, "bits used per character: "); gt_file_xprintf(arguments->outfp, "%f\n", (double) ((uint64_t) CHAR_BIT * (uint64_t) gt_encseq_sizeofrep(encseq)) / (double) gt_encseq_total_length(encseq)); gt_file_xprintf(arguments->outfp, "has special ranges: "); gt_file_xprintf(arguments->outfp, "%s\n", gt_encseq_has_specialranges(encseq) ? "yes" : "no"); gt_file_xprintf(arguments->outfp, "has description support: "); gt_file_xprintf(arguments->outfp, "%s\n", gt_encseq_has_description_support(encseq) ? "yes" : "no"); if (gt_encseq_has_description_support(encseq)) { gt_file_xprintf(arguments->outfp, "length of longest description: "); gt_file_xprintf(arguments->outfp, ""GT_WU"\n", gt_encseq_max_desc_length(encseq)); } gt_file_xprintf(arguments->outfp, "has multiple sequence support: "); gt_file_xprintf(arguments->outfp, "%s\n", gt_encseq_has_multiseq_support(encseq) ? "yes" : "no"); } gt_encseq_delete(encseq); gt_encseq_loader_delete(encseq_loader); } return had_err; }