/** * @param alphabet ownership of alphabet is with the newly produced * sequence object if return value is not 0 */ static int initBWTSeqFromEncSeqIdx(BWTSeq *bwtSeq, struct encIdxSeq *seqIdx, MRAEnc *alphabet, GtUword *counts, enum rangeSortMode *rangeSort, const enum rangeSortMode *defaultRangeSort) { size_t alphabetSize; Symbol bwtTerminatorFlat; EISHint hint; gt_assert(bwtSeq && seqIdx); bwtSeq->alphabet = alphabet; alphabetSize = gt_MRAEncGetSize(alphabet); if (!alphabetSize) /* weird error, shouldn't happen, but I prefer error return to * segfault in case someone tampered with the input */ return 0; /* FIXME: this should probably be handled in chardef.h to have a * unique mapping */ /* FIXME: this assumes there is exactly two ranges */ gt_MRAEncAddSymbolToRange(alphabet, bwtTerminatorSym, 1); gt_assert(gt_MRAEncGetSize(alphabet) == alphabetSize + 1); alphabetSize = gt_MRAEncGetSize(alphabet); bwtSeq->bwtTerminatorFallback = bwtTerminatorFlat = MRAEncMapSymbol(alphabet, UNDEFBWTCHAR); bwtSeq->bwtTerminatorFallbackRange = 1; bwtSeq->count = counts; bwtSeq->rangeSort = rangeSort; bwtSeq->seqIdx = seqIdx; bwtSeq->alphabetSize = alphabetSize; bwtSeq->hint = hint = newEISHint(seqIdx); { Symbol i; GtUword len = EISLength(seqIdx), *count = bwtSeq->count; count[0] = 0; for (i = 0; i < bwtTerminatorFlat; ++i) count[i + 1] = count[i] + EISSymTransformedRank(seqIdx, i, len, hint); /* handle character which the terminator has been mapped to specially */ count[i + 1] = count[i] + EISSymTransformedRank(seqIdx, i, len, hint) - 1; gt_assert(count[i + 1] >= count[i]); /* now we can finish the rest of the symbols */ for (i += 2; i < alphabetSize; ++i) count[i] = count[i - 1] + EISSymTransformedRank(seqIdx, i - 1, len, hint); /* and finally place the 1-count for the terminator */ count[i] = count[i - 1] + 1; #ifdef EIS_DEBUG gt_log_log("count[alphabetSize]="GT_WU", len="GT_WU"",count[alphabetSize], len); for (i = 0; i <= alphabetSize; ++i) gt_log_log("count[%u]="GT_WU"", (unsigned)i, count[i]); #endif gt_assert(count[alphabetSize] == len); } gt_BWTSeqInitLocateHandling(bwtSeq, defaultRangeSort); return 1; }
extern int gt_packedindex_chk_integrity(int argc, const char *argv[], GtError *err) { struct encIdxSeq *seq; struct chkIndexOptions params; GtStr *inputProject; int parsedArgs; int had_err = 0; Verboseinfo *verbosity = NULL; gt_error_check(err); switch (parseChkIndexOptions(&parsedArgs, argc, argv, ¶ms, err)) { case OPTIONPARSER_OK: break; case OPTIONPARSER_ERROR: return -1; case OPTIONPARSER_REQUESTS_EXIT: return 0; } inputProject = gt_str_new_cstr(argv[parsedArgs]); verbosity = newverboseinfo(params.verboseOutput); seq = loadEncIdxSeq(inputProject, params.encType, params.EISFeatureSet, verbosity, err); if ((had_err = seq == NULL)) { gt_error_set(err, "Failed to load index: %s", gt_str_get(inputProject)); } else { fprintf(stderr, "# Using index over sequence "FormatSeqpos " symbols long.\n", EISLength(seq)); { int corrupt = EISVerifyIntegrity(seq, inputProject, params.skipCount, params.progressInterval, stderr, params.checkFlags, verbosity, err); if ((had_err = corrupt != 0)) { fputs(gt_error_get(err), stderr); fputs("\n", stderr); gt_error_set(err, "Integrity check failed for index: %s", EISIntegrityCheckResultStrings[corrupt]); } } } if (seq) deleteEncIdxSeq(seq); if (inputProject) gt_str_delete(inputProject); if (verbosity) freeverboseinfo(&verbosity); return had_err?-1:0; }