Beispiel #1
0
int gt_echodescriptionandsequence(const GtStrArray *filenametab,GtError *err)
{
    GtSeqIterator *seqit;
    char *desc = NULL;
    const GtUchar *sequence;
    unsigned long seqlen;
    bool haserr = false;
    int retval;

    seqit = gt_seqiterator_sequence_buffer_new(filenametab, err);
    if (!seqit)
        return -1;
    while (true)
    {
        retval = gt_seqiterator_next(seqit,
                                     &sequence,
                                     &seqlen,
                                     &desc,
                                     err);
        if (retval < 0)
        {
            haserr = true;
            break;
        }
        if (retval == 0)
        {
            break;
        }
        gt_symbolstring2fasta(stdout,desc,NULL,sequence,seqlen,70UL);
    }
    gt_seqiterator_delete(seqit);
    return haserr ? -1 : 0;
}
Beispiel #2
0
int gt_testmaxpairs(const char *indexname,
                    GtUword samples,
                    unsigned int minlength,
                    GtUword substringlength,
                    GtLogger *logger,
                    GtError *err)
{
  GtEncseq *encseq;
  GtUword totallength = 0, dblen, querylen;
  GtUchar *dbseq = NULL, *query = NULL;
  bool haserr = false;
  GtUword s;
  GtArray *tabmaxquerymatches;
  Maxmatchselfinfo maxmatchselfinfo;
  GtEncseqLoader *el;

  gt_logger_log(logger,"draw "GT_WU" samples",samples);

  el = gt_encseq_loader_new();
  gt_encseq_loader_do_not_require_des_tab(el);
  gt_encseq_loader_do_not_require_ssp_tab(el);
  gt_encseq_loader_do_not_require_sds_tab(el);
  gt_encseq_loader_set_logger(el, logger);
  encseq = gt_encseq_loader_load(el, indexname, err);
  gt_encseq_loader_delete(el);

  if (encseq == NULL)
  {
    haserr = true;
  } else
  {
    totallength = gt_encseq_total_length(encseq);
  }
  if (!haserr)
  {
    if (substringlength > totallength/2)
    {
      substringlength = totallength/2;
    }
    dbseq = gt_malloc(sizeof *dbseq * substringlength);
    query = gt_malloc(sizeof *query * substringlength);
  }
  for (s=0; s<samples && !haserr; s++)
  {
    dblen = samplesubstring(dbseq,encseq,substringlength);
    querylen = samplesubstring(query,encseq,substringlength);
    gt_logger_log(logger,"run query match for dblen="GT_WU""
                         ",querylen= "GT_WU", minlength=%u",
                         dblen,
                         querylen,
                         minlength);
    tabmaxquerymatches = gt_array_new(sizeof (Substringmatch));
    if (gt_sarrquerysubstringmatch(dbseq,
                                   dblen,
                                   query,
                                   (GtUword) querylen,
                                   minlength,
                                   gt_encseq_alphabet(encseq),
                                   storemaxmatchquery,
                                   tabmaxquerymatches,
                                   logger,
                                   err) != 0)
    {
      haserr = true;
      break;
    }
    gt_logger_log(logger,"run self match for dblen="GT_WU""
                         ",querylen= "GT_WU", minlength=%u",
                         dblen,
                         querylen,
                         minlength);
    maxmatchselfinfo.results = gt_array_new(sizeof (Substringmatch));
    maxmatchselfinfo.dblen = dblen;
    maxmatchselfinfo.querylen = querylen;
    maxmatchselfinfo.querymarkpos
      = sequence2markpositions(&maxmatchselfinfo.numofquerysequences,
                               query,querylen);
    if (sarrselfsubstringmatch(dbseq,
                               dblen,
                               query,
                               (GtUword) querylen,
                               minlength,
                               gt_encseq_alphabet(encseq),
                               storemaxmatchself,
                               &maxmatchselfinfo,
                               logger,
                               err) != 0)
    {
      haserr = true;
      break;
    }
    gt_array_sort(tabmaxquerymatches,orderSubstringmatch);
    gt_array_sort(maxmatchselfinfo.results,orderSubstringmatch);
    if (!gt_array_equal(tabmaxquerymatches,maxmatchselfinfo.results,
                        orderSubstringmatch))
    {
      const GtUword width = 60UL;
      printf("failure for query of length "GT_WU"\n",(GtUword) querylen);
      printf("querymatches\n");
      (void) gt_array_iterate(tabmaxquerymatches,showSubstringmatch,NULL,
                           err);
      printf("dbmatches\n");
      (void) gt_array_iterate(maxmatchselfinfo.results,showSubstringmatch,
                           NULL,err);
      gt_symbolstring2fasta(stdout,"dbseq",
                         gt_encseq_alphabet(encseq),
                         dbseq,
                         (GtUword) dblen,
                         width);
      gt_symbolstring2fasta(stdout,"queryseq",
                         gt_encseq_alphabet(encseq),
                         query,
                         (GtUword) querylen,
                         width);
      exit(GT_EXIT_PROGRAMMING_ERROR);
    }
    gt_free(maxmatchselfinfo.querymarkpos);
    printf("# numberofmatches="GT_WU"\n",gt_array_size(tabmaxquerymatches));
    gt_array_delete(tabmaxquerymatches);
    gt_array_delete(maxmatchselfinfo.results);
  }
  gt_free(dbseq);
  gt_free(query);
  gt_encseq_delete(encseq);
  encseq = NULL;
  return haserr ? -1 : 0;
}