예제 #1
0
static inline void gt_cntlist_show_ascii(GtBitsequence *cntlist,
    GtUword nofreads, FILE *file)
{
  GtUword i;
  gt_assert(file != NULL);
  fprintf(file, "[n: "GT_WU"]\n", nofreads);
  for (i = 0; i < nofreads; i++)
    if (GT_ISIBITSET(cntlist, i))
      fprintf(file, ""GT_WU"\n", i);
}
예제 #2
0
GtUword gt_cntlist_count(const GtBitsequence *cntlist,
    GtUword nofreads)
{
  GtUword i, counter = 0;

  for (i = 0; i < nofreads; i++)
    if ((bool)GT_ISIBITSET(cntlist, i))
      counter++;
  return counter;
}
예제 #3
0
void gt_Outlcpinfo_check_lcpvalues(const GtEncseq *encseq,
                                   GtReadmode readmode,
                                   const GtSuffixsortspace *sortedsample,
                                   GtUword effectivesamplesize,
                                   const GtOutlcpinfo *outlcpinfosample,
                                   bool checkequality)
{
  GT_UNUSED int cmp;
  GtUword idx, reallcp, startpos1, startpos2, currentlcp,
                totalcmpmissing = 0;

  if (effectivesamplesize == 0)
  {
    return;
  }
  startpos1 = gt_suffixsortspace_getdirect(sortedsample,0);
  for (idx=1UL; idx<effectivesamplesize; idx++)
  {
    startpos2 = gt_suffixsortspace_getdirect(sortedsample,idx);
    cmp = gt_encseq_check_comparetwosuffixes(encseq,
                                             readmode,
                                             &reallcp,
                                             false,
                                             false,
                                             0,
                                             startpos1,
                                             startpos2,
                                             NULL,
                                             NULL);
    gt_assert(cmp <= 0);
    gt_assert(GT_ISIBITSET(outlcpinfosample->lcpsubtab.tableoflcpvalues
                                                      .isset,idx));
    currentlcp = (GtUword) outlcpinfosample->lcpsubtab.tableoflcpvalues.
                                 bucketoflcpvalues[idx];
    if ((checkequality && currentlcp != reallcp) ||
        (!checkequality && currentlcp > reallcp))
    {
      fprintf(stderr,"idx="GT_WU",suffixpair="GT_WU","GT_WU": "
                     "currentlcp = "GT_WU" %s "GT_WU" = reallcp\n",
                      idx,startpos1,startpos2,currentlcp,
                      checkequality ? "!=" : ">",reallcp);
      gt_encseq_showatstartposwithdepth(stderr,encseq,readmode,startpos1,50UL);
      fprintf(stderr,"\n");
      gt_encseq_showatstartposwithdepth(stderr,encseq,readmode,startpos2,50UL);
      fprintf(stderr,"\n");
      exit(GT_EXIT_PROGRAMMING_ERROR);
    } else
    {
      totalcmpmissing += (reallcp - currentlcp);
    }
    startpos1 = startpos2;
  }
  /*printf("totalcmpmissing = "GT_WU"(avg=%.2f)\n",
         totalcmpmissing,(double) totalcmpmissing/effectivesamplesize);*/
}
예제 #4
0
static inline int processleafedge_rdjcv(GT_UNUSED bool firstsucc,
    unsigned long fatherdepth,
    GT_UNUSED GtBUinfo_rdjcv *father, unsigned long leafnumber,
    GtBUstate_rdjcv *state, GT_UNUSED GtError *err)
{
  unsigned long seqnum;

  if (fatherdepth >= state->shortest)
  {
    if ((leafnumber == 0 ||
        GT_ISIBITSET(state->sspbittab, leafnumber-1)) &&
        GT_ISIBITSET(state->sspbittab, leafnumber + fatherdepth))
    {
      seqnum = gt_encseq_seqnum(state->encseq, leafnumber);
      processcontained(seqnum, state);
    }
  }
  if (state->show_progressbar) state->progress++;
  return 0;
}
예제 #5
0
static inline void processcontained(unsigned long seqnum,
    ContfindBUstate *state)
{
  if (state->firstrevcompl > 0)
    seqnum = GT_READJOINER_READNUM(seqnum, state->firstrevcompl,
        state->nofsequences);
  if (!GT_ISIBITSET(state->contained, seqnum))
  {
    GT_SETIBIT(state->contained, seqnum);
    state->counter++;
  }
  if (state->csize == 0 || seqnum < state->cmin)
    state->cmin = seqnum;
  state->csize++;
}
예제 #6
0
void gt_checkandresetstorematch(GT_UNUSED uint64_t queryunit,
                             Storematchinfo *storeonline,
                             Storematchinfo *storeoffline)
{
  unsigned long seqnum, countmatchseq = 0,
    numofdbsequences = gt_encseq_num_of_sequences(storeonline->encseq);

  for (seqnum = 0; seqnum < numofdbsequences; seqnum++)
  {
#ifndef NDEBUG
    if (GT_ISIBITSET(storeonline->hasmatch,seqnum) &&
        !GT_ISIBITSET(storeoffline->hasmatch,seqnum))
    {
      fprintf(stderr,"query " Formatuint64_t " refseq %lu: "
                     "online has match but offline not\n",
                     PRINTuint64_tcast(queryunit),seqnum);
      exit(GT_EXIT_PROGRAMMING_ERROR);
    }
    if (!GT_ISIBITSET(storeonline->hasmatch,seqnum) &&
        GT_ISIBITSET(storeoffline->hasmatch,seqnum))
    {
      fprintf(stderr,"query " Formatuint64_t " refseq %lu: "
                     "offline has match but online not\n",
                     PRINTuint64_tcast(queryunit),seqnum);
      exit(GT_EXIT_PROGRAMMING_ERROR);
    }
#endif
    if (GT_ISIBITSET(storeonline->hasmatch,seqnum))
    {
      countmatchseq++;
    }
  }
  GT_CLEARBITTAB(storeonline->hasmatch,numofdbsequences);
  GT_CLEARBITTAB(storeoffline->hasmatch,numofdbsequences);
  printf("matching sequences: %lu\n",countmatchseq);
}
예제 #7
0
static void storematch(void *info,const GtIdxMatch *match)
{
  Storematchinfo *storematch = (Storematchinfo *) info;
  unsigned long seqnum;

  if (match->dbabsolute)
  {
    seqnum = gt_encseq_seqnum(storematch->encseq,
                                           match->dbstartpos);
  } else
  {
    seqnum = match->dbseqnum;
  }
  if (!GT_ISIBITSET(storematch->hasmatch,seqnum))
  {
    GT_SETIBIT(storematch->hasmatch,seqnum);
  }
}
예제 #8
0
static inline void rdj_pairwise_generic(bool use_dp, GtOvlfindMode m,
    GtEncseq *encseq, bool revcompl, bool show_progressbar, bool use_kmp,
    double max_error, GtUword min_length, bool find_nonmaximal,
    GtSpmproc proc, GtSpmprocA proc_a, void* procdata, bool cntfilter,
    GtBitsequence *cntreads_in, GtBitsequence **cntreads_out,
    GtUword *nofreads)
{
  GtContfind containment_status;
  GtBitsequence *cntreads = NULL;
  GtUint64 progress = 0;
  GtUword i, j, startpos, v_seqnum, nofsequences, n;
  struct Read u, v;
  struct Data d;
  gt_kmp_t** kmp_values = NULL;

  GT_RDJ_PAIRWISE_INIT_STRUCT_DATA(d, proc, proc_a, procdata, &u, &v, 0);

  gt_assert(encseq != NULL);

  d.mode = m;
  if ((m == GT_OVLFIND_ALL) && cntfilter)
    d.mode = GT_OVLFIND_PROPER_SPM;

  n = gt_encseq_num_of_sequences(encseq);
  if (use_kmp)
    kmp_values = prepare_kmp_values(encseq, n);
  nofsequences = n;
  if (revcompl)
    n = n >> 1;
  if (cntreads_in != NULL)
    cntreads = cntreads_in;
  else if (m != GT_OVLFIND_SPM)
    GT_INITBITTAB(cntreads, n);
  if (show_progressbar) gt_progressbar_start(&progress, (GtUint64)n *
      ((GtUint64)n - 1ULL) / 2ULL);

  for (i = 0; i < n; i++)
  {
    u.seqnum = i;
    u.direct = true;
    u.len = gt_encseq_seqlength(encseq, i);
    u.seq = gt_malloc(sizeof (char) * (u.len + 1));
    startpos = gt_encseq_seqstartpos(encseq, i);
    gt_encseq_extract_decoded(encseq, u.seq, startpos, startpos + u.len - 1);
    u.seq[u.len] = '\0';
    if (use_kmp)
    {
      gt_assert(kmp_values != NULL);
      u.pi = kmp_values[i];
    }

    for (j = i; j < n; j++)
    {
      if (cntfilter)
      {
        gt_assert(cntreads != NULL);
        if ((bool)GT_ISIBITSET(cntreads, i)) break;
        if ((bool)GT_ISIBITSET(cntreads, j)) continue;
      }

      v.seqnum = j;

      /* find overlaps using direct v */
      v.direct = true;
      v.len = gt_encseq_seqlength(encseq, j);
      v.seq = gt_malloc(sizeof (char) * (v.len + 1));
      startpos = gt_encseq_seqstartpos(encseq, j);
      gt_encseq_extract_decoded(encseq, v.seq, startpos,
          startpos + v.len - 1);
      v.seq[v.len] = '\0';
      if (use_kmp)
      {
        gt_assert(kmp_values != NULL);
        v.pi = kmp_values[j];
      }
      containment_status = use_dp
          ? find_approx_overlaps(&d, max_error, min_length, find_nonmaximal)
          : find_exact_overlaps(&d, use_kmp, min_length, find_nonmaximal);
      if (m != GT_OVLFIND_SPM)
        mark_contained(containment_status, u.seqnum, v.seqnum, cntreads);

      /* find overlaps using reverse complement of v */
      if (revcompl)
      {
        v_seqnum =  nofsequences - j - 1;
        v.direct = false;
        gt_assert(gt_encseq_seqlength(encseq, j) ==
            gt_encseq_seqlength(encseq, v_seqnum));
        startpos = gt_encseq_seqstartpos(encseq, v_seqnum);
        gt_encseq_extract_decoded(encseq, v.seq, startpos,
            startpos + v.len - 1);
        if (use_kmp)
        {
          gt_assert(kmp_values != NULL);
          v.pi = kmp_values[v_seqnum];
        }
        containment_status = use_dp
          ? find_approx_overlaps(&d, max_error, min_length, find_nonmaximal)
          : find_exact_overlaps(&d, use_kmp, min_length, find_nonmaximal);
        if (m != GT_OVLFIND_SPM)
          mark_contained(containment_status, u.seqnum, v.seqnum, cntreads);
      }
      gt_free(v.seq);
      progress++;
    }
    gt_free(u.seq);
  }

  if (cntreads_out != NULL)
    *cntreads_out = cntreads;
  else if (cntreads_in == NULL)
    gt_free(cntreads);
  if (nofreads != NULL)
    *nofreads = n;
  if (use_kmp)
    free_kmp_values(kmp_values, revcompl ? n << 1 : n);
  if (show_progressbar)
    gt_progressbar_stop();
}
예제 #9
0
static int gt_compressedbits_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  GtCompressdbitsArguments *arguments = tool_arguments;
  int had_err = 0;
  unsigned long idx;
  unsigned long long num_of_bits = 0ULL;
  GtBitsequence *bits = NULL;
  GtCompressedBitsequence *cbs = NULL, *read_cbs = NULL;
  GtStr *filename = gt_str_new();
  FILE *fp = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  gt_assert(argc == parsed_args);

  if (gt_option_is_set(arguments->filename_op)) {
    FILE *file = NULL;
    gt_assert(arguments->filename != NULL);

    file = gt_xfopen(gt_str_get(arguments->filename), "r");
    if ((size_t) 1 != gt_xfread(&num_of_bits,
                                sizeof (num_of_bits), (size_t) 1, file)) {
      had_err = -1;
    }
    if (!had_err) {
      gt_log_log("bits to read: %llu", num_of_bits);
      arguments->size = (unsigned long) GT_NUMOFINTSFORBITS(num_of_bits);
      bits = gt_malloc(sizeof (*bits) * arguments->size);
      if ((size_t) arguments->size !=
          gt_xfread(bits, sizeof (*bits),
                    (size_t) arguments->size, file)) {
        had_err = -1;
      }
    }
    gt_xfclose(file);
  }
  else {
    bits = gt_calloc(sizeof (*bits), (size_t) arguments->size);
    num_of_bits = (unsigned long long) (GT_INTWORDSIZE * arguments->size);

    if (arguments->fill_random) {
      for (idx = 0; idx < arguments->size; idx++) {
        bits[idx] =
          (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ gt_rand_max(ULONG_MAX));
      }
    }
    else {
      for (idx = 0; idx < arguments->size; idx++)
        bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ idx);
    }
  }

  if (!had_err) {
    fp = gt_xtmpfp(filename);
    gt_fa_xfclose(fp);
    fp = NULL;

    gt_log_log("filename: %s", gt_str_get(filename));
    gt_log_log("size in words: %lu", arguments->size);
    cbs = gt_compressed_bitsequence_new(
                            bits, arguments->samplerate,
                            (unsigned long) num_of_bits);
    gt_log_log("original size in MB: %2.3f",
               (sizeof (*bits) * arguments->size) / (1024.0 * 1024.0));
    gt_log_log("compressed size in MB: %2.3f",
               gt_compressed_bitsequence_size(cbs) / (1024.0 * 1024.0));
    gt_log_log("popcount table size thereof in MB: %2.3f",
               gt_popcount_tab_calculate_size(15U) / (1024.0 * 1024.0));
    had_err = gt_compressed_bitsequence_write(cbs, gt_str_get(filename), err);
  }
  if (!had_err)
  {
    read_cbs =
      gt_compressed_bitsequence_new_from_file(gt_str_get(filename), err);
    if (read_cbs == NULL)
      had_err = -1;
  }
  if (!had_err && bits != NULL && arguments->check_consistency) {
    for (idx = 0; (unsigned long long) idx < num_of_bits; ++idx) {
      int GT_UNUSED bit = gt_compressed_bitsequence_access(read_cbs, idx);
      int GT_UNUSED original = GT_ISIBITSET(bits, idx) ? 1 : 0;
      gt_assert(gt_compressed_bitsequence_access(cbs, idx) == bit);
      gt_assert(original == bit);
    }
  }
  gt_compressed_bitsequence_delete(cbs);
  gt_compressed_bitsequence_delete(read_cbs);
  gt_free(bits);
  gt_str_delete(filename);
  return had_err;
}