Exemplo n.º 1
0
/**
 * @param alphabet ownership of alphabet is with the newly produced
 * sequence object if return value is not 0
 */
static int
initBWTSeqFromEncSeqIdx(BWTSeq *bwtSeq, struct encIdxSeq *seqIdx,
                        MRAEnc *alphabet, GtUword *counts,
                        enum rangeSortMode *rangeSort,
                        const enum rangeSortMode *defaultRangeSort)
{
  size_t alphabetSize;
  Symbol bwtTerminatorFlat;
  EISHint hint;
  gt_assert(bwtSeq && seqIdx);
  bwtSeq->alphabet = alphabet;
  alphabetSize = gt_MRAEncGetSize(alphabet);
  if (!alphabetSize)
    /* weird error, shouldn't happen, but I prefer error return to
     * segfault in case someone tampered with the input */
    return 0;
  /* FIXME: this should probably be handled in chardef.h to have a
   * unique mapping */
  /* FIXME: this assumes there is exactly two ranges */
  gt_MRAEncAddSymbolToRange(alphabet, bwtTerminatorSym, 1);
  gt_assert(gt_MRAEncGetSize(alphabet) ==  alphabetSize + 1);
  alphabetSize = gt_MRAEncGetSize(alphabet);
  bwtSeq->bwtTerminatorFallback = bwtTerminatorFlat
    = MRAEncMapSymbol(alphabet, UNDEFBWTCHAR);
  bwtSeq->bwtTerminatorFallbackRange = 1;
  bwtSeq->count = counts;
  bwtSeq->rangeSort = rangeSort;
  bwtSeq->seqIdx = seqIdx;
  bwtSeq->alphabetSize = alphabetSize;
  bwtSeq->hint = hint = newEISHint(seqIdx);
  {
    Symbol i;
    GtUword len = EISLength(seqIdx), *count = bwtSeq->count;
    count[0] = 0;
    for (i = 0; i < bwtTerminatorFlat; ++i)
      count[i + 1] = count[i]
        + EISSymTransformedRank(seqIdx, i, len, hint);
    /* handle character which the terminator has been mapped to specially */
    count[i + 1] = count[i]
      + EISSymTransformedRank(seqIdx, i, len, hint) - 1;
    gt_assert(count[i + 1] >= count[i]);
    /* now we can finish the rest of the symbols */
    for (i += 2; i < alphabetSize; ++i)
      count[i] = count[i - 1]
        + EISSymTransformedRank(seqIdx, i - 1, len, hint);
    /* and finally place the 1-count for the terminator */
    count[i] = count[i - 1] + 1;
#ifdef EIS_DEBUG
    gt_log_log("count[alphabetSize]="GT_WU", len="GT_WU"",count[alphabetSize],
               len);
    for (i = 0; i <= alphabetSize; ++i)
      gt_log_log("count[%u]="GT_WU"", (unsigned)i, count[i]);
#endif
    gt_assert(count[alphabetSize] == len);
  }
  gt_BWTSeqInitLocateHandling(bwtSeq, defaultRangeSort);
  return 1;
}
Exemplo n.º 2
0
extern int
gt_packedindex_chk_integrity(int argc, const char *argv[], GtError *err)
{
  struct encIdxSeq *seq;
  struct chkIndexOptions params;
  GtStr *inputProject;
  int parsedArgs;
  int had_err = 0;
  Verboseinfo *verbosity = NULL;
  gt_error_check(err);

  switch (parseChkIndexOptions(&parsedArgs, argc, argv, &params, err))
  {
    case OPTIONPARSER_OK:
      break;
    case OPTIONPARSER_ERROR:
      return -1;
    case OPTIONPARSER_REQUESTS_EXIT:
      return 0;
  }

  inputProject = gt_str_new_cstr(argv[parsedArgs]);

  verbosity = newverboseinfo(params.verboseOutput);

  seq = loadEncIdxSeq(inputProject, params.encType, params.EISFeatureSet,
                      verbosity, err);
  if ((had_err = seq == NULL))
  {
    gt_error_set(err, "Failed to load index: %s", gt_str_get(inputProject));
  }
  else
  {
    fprintf(stderr, "# Using index over sequence "FormatSeqpos
            " symbols long.\n", EISLength(seq));
    {
      int corrupt
        = EISVerifyIntegrity(seq, inputProject, params.skipCount,
                             params.progressInterval, stderr,
                             params.checkFlags, verbosity, err);
      if ((had_err = corrupt != 0))
      {
        fputs(gt_error_get(err), stderr); fputs("\n", stderr);
        gt_error_set(err, "Integrity check failed for index: %s",
                  EISIntegrityCheckResultStrings[corrupt]);
      }
    }
  }
  if (seq) deleteEncIdxSeq(seq);
  if (inputProject) gt_str_delete(inputProject);
  if (verbosity) freeverboseinfo(&verbosity);
  return had_err?-1:0;
}