Exemple #1
0
int test_trieins(bool onlyins,const GtStr *indexname,GtError *err)
{
  Suffixarray suffixarray;
  bool haserr = false;
  Seqpos totallength = 0;

  gt_error_check(err);
  if (streamsuffixarray(&suffixarray,
                        SARR_ESQTAB,
                        indexname,
                        NULL,
                        err) != 0)
  {
    haserr = true;
  } else
  {
    totallength = getencseqtotallength(suffixarray.encseq);
  }
  if (!haserr)
  {
    Mergertrierep trierep;
    const GtUchar *characters;

    ALLOCASSIGNSPACE(trierep.encseqreadinfo,NULL,Encseqreadinfo,1);
    trierep.encseqreadinfo[0].encseqptr = suffixarray.encseq;
    trierep.encseqreadinfo[0].readmode = suffixarray.readmode;
    characters = getencseqAlphabetcharacters(suffixarray.encseq);
    mergertrie_initnodetable(&trierep,totallength,1U);
    maketrie(&trierep,characters,totallength);
    if (onlyins)
    {
#ifdef WITHTRIEIDENT
#ifdef WITHTRIESHOW
      showtrie(&trierep,characters);
#endif
      checktrie(&trierep,totallength+1,totallength,err);
#endif
    } else
    {
#ifdef WITHTRIEIDENT
#ifdef WITHTRIESHOW
      showallnoderelations(trierep.root);
#endif
#endif
      successivelydeletesmallest(&trierep,totallength,characters,err);
    }
    mergertrie_delete(&trierep);
  }
  freesuffixarray(&suffixarray);
  return haserr ? -1 : 0;
}
Exemple #2
0
static void showexpandcode(const GtBucketspec2 *bucketspec2,
                           unsigned int prefixlength)
{
  Codetype ecode, code2;
  const GtUchar *characters = getencseqAlphabetcharacters(bucketspec2->encseq);

  for (code2 = 0; code2 < (Codetype) bucketspec2->numofcharssquared; code2++)
  {
    char buffer[100];

    ecode = expandtwocharcode(code2,bucketspec2);
    fromkmercode2string(buffer,
                        ecode,
                        bucketspec2->numofchars,
                        prefixlength,
                        (const char *) characters);
    printf("code2=%u = %lu %s\n",(unsigned int) code2,ecode,buffer);
  }
}
Exemple #3
0
static void producelongutput(const LTRharvestoptions *lo,
                             const LTRboundaries *boundaries,
                             const Encodedsequence *encseq,
                             Seqpos offset)
{
  const GtUchar *characters = getencseqAlphabetcharacters(encseq);

  printf(FormatSeqpos "  ",
      PRINTSeqposcast(boundaries->leftLTR_5 -offset + 1));
  printf(FormatSeqpos "  ",
      PRINTSeqposcast(boundaries->rightLTR_3 -offset  + 1));
  printf(FormatSeqpos "  ",
      PRINTSeqposcast((boundaries->rightLTR_3 - boundaries->leftLTR_5
          + 1)));
  printf(FormatSeqpos "  ",
      PRINTSeqposcast(boundaries->leftLTR_5 -offset  + 1));
  printf(FormatSeqpos "  ",
      PRINTSeqposcast(boundaries->leftLTR_3 -offset  + 1));
  printf(FormatSeqpos "  ",
      PRINTSeqposcast((boundaries->leftLTR_3 - boundaries->leftLTR_5
          + 1)));
  if (lo->minlengthTSD > 1U)
  {
    Seqpos j;

    for (j = 0; j < boundaries->lenleftTSD; j++)
    {
      printf("%c",(char) characters[getencodedchar(encseq,
                                                   boundaries->leftLTR_5 -
                                                   boundaries->lenleftTSD + j,
                                                   Forwardmode)]);
    }
    printf("  " FormatSeqpos "  ",
           PRINTSeqposcast(boundaries->lenleftTSD));
  }
  if (lo->motif.allowedmismatches < 4U)
  {
    printf("%c%c..%c%c  ",
        (char) characters[getencodedchar(encseq,/* Random access */
                       boundaries->leftLTR_5,
                       Forwardmode)],
        (char) characters[getencodedchar(encseq,/* Random access */
                       boundaries->leftLTR_5+1,
                       Forwardmode)],
        (char) characters[getencodedchar(encseq,/* Random access */
                       boundaries->leftLTR_3-1,
                       Forwardmode)],
        (char) characters[getencodedchar(encseq,/* Random access */
                       boundaries->leftLTR_3,
                       Forwardmode)] );
  }
  /* increase by 1 */
  printf(FormatSeqpos "  ",
      PRINTSeqposcast(boundaries->rightLTR_5 -offset + 1));
  /* increase by 1 */
  printf(FormatSeqpos "  ",PRINTSeqposcast(boundaries->rightLTR_3 -offset + 1));
  printf(FormatSeqpos "  ",PRINTSeqposcast(boundaries->rightLTR_3
                                           - boundaries->rightLTR_5 + 1));
  if (lo->minlengthTSD > 1U)
  {
    Seqpos j;

    for (j = 0; j < boundaries->lenrightTSD; j++)
    {
      printf("%c", (char) characters[getencodedchar(encseq,
                                                    boundaries->rightLTR_3+j+1,
                                                    Forwardmode)]);
    }
    printf("  " FormatSeqpos "  ",PRINTSeqposcast(boundaries->lenrightTSD));
  }
  if (lo->motif.allowedmismatches < 4U)
  {
    printf("%c%c..%c%c",
        (char) characters[getencodedchar(encseq,/* Randomaccess */
                       boundaries->rightLTR_5,
                       Forwardmode)],
        (char) characters[getencodedchar(encseq,/* Randomaccess */
                       boundaries->rightLTR_5+1,
                       Forwardmode)],
        (char) characters[getencodedchar(encseq,/* Randomaccess */
                       boundaries->rightLTR_3-1,
                       Forwardmode)],
        (char) characters[getencodedchar(encseq,/* Random access */
                       boundaries->rightLTR_3,/* Randomaccess */
                       Forwardmode)] );
  }
  /* print similarity */
  printf("  %.2f", boundaries->similarity);
  /* print sequence number */
  printf("  %lu\n", boundaries->contignumber);
}
Exemple #4
0
int runidxlocali(const IdxlocaliOptions *idxlocalioptions,GtError *err)
{
  Genericindex *genericindex = NULL;
  bool haserr = false;
  Verboseinfo *verboseinfo;
  const Encodedsequence *encseq = NULL;

  verboseinfo = newverboseinfo(idxlocalioptions->verbose);

  if (idxlocalioptions->doonline)
  {
    encseq = mapencodedsequence (true,
                                 idxlocalioptions->indexname,
                                 true,
                                 false,
                                 false,
                                 true,
                                 verboseinfo,
                                 err);
    if (encseq == NULL)
    {
      haserr = true;
    }
  } else
  {
    genericindex = genericindex_new(idxlocalioptions->indexname,
                                    idxlocalioptions->withesa,
                                    idxlocalioptions->withesa ||
                                    idxlocalioptions->docompare,
                                    false,
                                    true,
                                    0,
                                    verboseinfo,
                                    err);
    if (genericindex == NULL)
    {
      haserr = true;
    } else
    {
      encseq = genericindex_getencseq(genericindex);
    }
  }
  if (!haserr)
  {
    GtSeqIterator *seqit;
    const GtUchar *query;
    unsigned long querylen;
    char *desc = NULL;
    int retval;
    Limdfsresources *limdfsresources = NULL;
    const AbstractDfstransformer *dfst;
    SWdpresource *swdpresource = NULL;
    Showmatchinfo showmatchinfo;
    Processmatch processmatch;
    void *processmatchinfoonline, *processmatchinfooffline;
    Storematchinfo storeonline, storeoffline;

    if (idxlocalioptions->docompare)
    {
      processmatch = storematch;
      initstorematch(&storeonline,encseq);
      initstorematch(&storeoffline,encseq);
      processmatchinfoonline = &storeonline;
      processmatchinfooffline = &storeoffline;
    } else
    {
      processmatch = showmatch;
      showmatchinfo.encseq = encseq;
      showmatchinfo.characters = getencseqAlphabetcharacters(encseq);
      showmatchinfo.wildcardshow = getencseqAlphabetwildcardshow(encseq);
      showmatchinfo.showalignment = idxlocalioptions->showalignment;
      processmatchinfoonline = processmatchinfooffline = &showmatchinfo;
    }
    if (idxlocalioptions->doonline || idxlocalioptions->docompare)
    {
      swdpresource = newSWdpresource(idxlocalioptions->matchscore,
                                     idxlocalioptions->mismatchscore,
                                     idxlocalioptions->gapextend,
                                     idxlocalioptions->threshold,
                                     idxlocalioptions->showalignment,
                                     processmatch,
                                     processmatchinfoonline);
    }
    dfst = locali_AbstractDfstransformer();
    if (!idxlocalioptions->doonline || idxlocalioptions->docompare)
    {
      gt_assert(genericindex != NULL);
      limdfsresources = newLimdfsresources(genericindex,
                                           true,
                                           0,
                                           0,    /* maxpathlength */
                                           true, /* keepexpandedonstack */
                                           processmatch,
                                           processmatchinfooffline,
                                           NULL, /* processresult */
                                           NULL, /* processresult info */
                                           dfst);
    }
    seqit = gt_seqiterator_new(idxlocalioptions->queryfiles, err);
    if (!seqit)
      haserr = true;
    if (!haserr)
    {
      gt_seqiterator_set_symbolmap(seqit, getencseqAlphabetsymbolmap(encseq));
      for (showmatchinfo.queryunit = 0; /* Nothing */;
           showmatchinfo.queryunit++)
      {
        retval = gt_seqiterator_next(seqit,
                                     &query,
                                     &querylen,
                                     &desc,
                                     err);
        if (retval < 0)
        {
          haserr = true;
          break;
        }
        if (retval == 0)
        {
          break;
        }
        printf("process sequence " Formatuint64_t " of length %lu\n",
                PRINTuint64_tcast(showmatchinfo.queryunit),querylen);
        if (idxlocalioptions->doonline || idxlocalioptions->docompare)
        {
          multiapplysmithwaterman(swdpresource,encseq,query,querylen);
        }
        if (!idxlocalioptions->doonline || idxlocalioptions->docompare)
        {
          indexbasedlocali(limdfsresources,
                           idxlocalioptions->matchscore,
                           idxlocalioptions->mismatchscore,
                           idxlocalioptions->gapstart,
                           idxlocalioptions->gapextend,
                           idxlocalioptions->threshold,
                           query,
                           querylen,
                           dfst);
        }
        if (idxlocalioptions->docompare)
        {
          checkandresetstorematch(showmatchinfo.queryunit,
                                  &storeonline,&storeoffline);
        }
        gt_free(desc);
      }
      if (limdfsresources != NULL)
      {
        freeLimdfsresources(&limdfsresources,dfst);
      }
      if (swdpresource != NULL)
      {
        freeSWdpresource(swdpresource);
        swdpresource = NULL;
      }
      gt_seqiterator_delete(seqit);
    }
    if (idxlocalioptions->docompare)
    {
      freestorematch(&storeonline);
      freestorematch(&storeoffline);
    }
  }
  if (genericindex == NULL)
  {
    gt_assert(encseq != NULL);
    encodedsequence_free((Encodedsequence **) &encseq);
  } else
  {
    genericindex_delete(genericindex);
  }
  freeverboseinfo(&verboseinfo);
  return haserr ? -1 : 0;
}