Example #1
0
static void gthinitfragments(GtFragment *fragments,
                             GtUword *num_of_fragments,
                             GthMatch *storematchtab,
                             GtUword numofmatches,
                             GtUword rare,
                             double fragweightfactor)
{
  GthMatch *mptr;
  GtFragment *fragmentptr;
  GtWord tmp, largestdim1 = 0, largestdim2 = 0;
  GtDiscDistri *startpointdistri = NULL;

  /* init number of fragments */
  *num_of_fragments = 0;
  if (rare)
    startpointdistri = gt_disc_distri_new();

  for (mptr = storematchtab; mptr < storematchtab + numofmatches; mptr++) {
    /* first dimension */
    tmp = mptr->Storepositionreference + mptr->Storelengthreference - 1;
    if (largestdim1 < tmp)
      largestdim1 = tmp;

    /* second dimension */
    tmp = mptr->Storepositiongenomic + mptr->Storelengthgenomic - 1;
    if (largestdim2 < tmp)
      largestdim2 = tmp;
  }

  for (mptr = storematchtab, fragmentptr = fragments;
       mptr < storematchtab + numofmatches;
       mptr++) {
    if (rare)
      gt_disc_distri_add(startpointdistri, mptr->Storepositionreference);
    if ((!rare ||
         gt_disc_distri_get(startpointdistri, mptr->Storepositionreference)
         <= rare) &&
        (mptr == storematchtab || /* is the first match */
         !gth_matches_are_equal(mptr, mptr-1))) { /* or is different from last
                                                     one */
      fragmentptr->weight     = (GtWord) (fragweightfactor *
                                        (double) abs(mptr->Storescore));
      fragmentptr->startpos1  = mptr->Storepositionreference;
      fragmentptr->endpos1    = mptr->Storepositionreference
                                + mptr->Storelengthreference - 1;
      fragmentptr->startpos2  = mptr->Storepositiongenomic;
      fragmentptr->endpos2    = mptr->Storepositiongenomic
                                + mptr->Storelengthgenomic - 1;
      fragmentptr++;
      (*num_of_fragments)++;
    }
  }

  gt_disc_distri_delete(startpointdistri);

  gt_assert(*num_of_fragments <= numofmatches);
}
GtDiscDistri *gt_condenseq_link_length_dist(const GtCondenseq *condenseq)
{
  GtUword idx;
  GtDiscDistri *res = gt_disc_distri_new();

  for (idx = 0; idx < condenseq->ldb_nelems; idx++) {
    gt_disc_distri_add(res, condenseq->links[idx].len);
  }
  return res;
}
Example #3
0
GtHpolProcessor *gt_hpol_processor_new(GtEncseq *encseq, unsigned long hmin)
{
  GtHpolProcessor *hpp;
  hpp = gt_malloc(sizeof (GtHpolProcessor));
  gt_assert(encseq != NULL);
  gt_assert(hmin > 0);
  hpp->encseq = encseq;
  hpp->hmin = hmin;
  hpp->read_hmin = 0;
  hpp->qmax = 0;
  hpp->mapqmin = 0;
  hpp->covmin = 0;
  hpp->allow_partial = false;
  hpp->allow_multiple = false;
  hpp->hdist = gt_disc_distri_new();
  hpp->nof_h = 0;
  hpp->hdist_e = gt_disc_distri_new();
  hpp->nof_h_e = 0;
  hpp->hlen_max = 0;
  hpp->cds_oracle = NULL;
  hpp->asp = NULL;
  hpp->nof_complete_edited = 0;
  hpp->nof_complete_not_edited = 0;
  hpp->nof_skipped = 0;
  hpp->nof_unmapped = 0;
  hpp->nof_multihits = 0;
  hpp->nof_replaced = 0;
  hpp->clenmax = GT_UNDEF_ULONG;
  hpp->altmax = (double) 1.0;
  hpp->refmin = (double) 0.0;
  hpp->alpha = gt_alphabet_new_dna();
  hpp->adjust_s_hlen = false;
  hpp->output_segments = false;
  hpp->outfp_segments = NULL;
  hpp->output_stats = false;
  hpp->output_multihit_stats = false;
  hpp->outfp_stats = NULL;
  hpp->processed_segments = NULL;
  hpp->reads_iters = NULL;
  hpp->outfiles = NULL;
  hpp->nfiles = 0;
  return hpp;
}
Example #4
0
void gt_bioseq_show_seqlengthdistri(GtBioseq *bs, GtFile *outfp)
{
  GtDiscDistri *d;
  GtUword i;
  gt_assert(bs);
  d = gt_disc_distri_new();
  for (i = 0; i < gt_bioseq_number_of_sequences(bs); i++)
    gt_disc_distri_add(d, gt_bioseq_get_sequence_length(bs, i));
  gt_file_xprintf(outfp, "sequence length distribution:\n");
  gt_disc_distri_show(d, outfp);
  gt_disc_distri_delete(d);
}
Example #5
0
GtNodeVisitor* gt_stat_visitor_new(bool gene_length_distri,
                                   bool gene_score_distri,
                                   bool exon_length_distri,
                                   bool exon_number_distri,
                                   bool intron_length_distri,
                                   bool cds_length_distri,
                                   bool used_sources)
{
    GtNodeVisitor *nv = gt_node_visitor_create(gt_stat_visitor_class());
    GtStatVisitor *sv = stat_visitor_cast(nv);
    if (gene_length_distri)
        sv->gene_length_distribution = gt_disc_distri_new();
    if (gene_score_distri)
        sv->gene_score_distribution = gt_disc_distri_new();
    if (exon_length_distri)
        sv->exon_length_distribution = gt_disc_distri_new();
    if (exon_number_distri)
        sv->exon_number_distribution = gt_disc_distri_new();
    if (intron_length_distri)
        sv->intron_length_distribution = gt_disc_distri_new();
    if (cds_length_distri)
        sv->cds_length_distribution = gt_disc_distri_new();
    if (used_sources)
        sv->used_sources = gt_cstr_table_new();
    return nv;
}
GtDiscDistri *gt_condenseq_link_comp_dist(const GtCondenseq *condenseq)
{
  GtUword idx;
  GtDiscDistri *res = gt_disc_distri_new();

  for (idx = 0; idx < condenseq->ldb_nelems; idx++) {
    GtEditscript *es = condenseq->links[idx].editscript;
    GtUword vlen;
    size_t size;
    vlen = gt_editscript_get_target_len(es);
    size = gt_editscript_size(es);
    gt_disc_distri_add(res, (GtUword) ((double) size/(double) vlen * 100));
  }
  return res;
}
static inline GtDiscDistri *read_zero_disc_distri(GtDiscDistri *dist,
                                                  FILE *fp, GtError *err)
{
  int had_err = 0;
  GtUword idx, symbol;
  GtUint64 freq;
  GtUword num_of_zero_leaves;
  dist = gt_disc_distri_new();
  had_err = gt_io_error_fread_one(num_of_zero_leaves, fp, err);
  for (idx = 0; !had_err && idx < num_of_zero_leaves; idx++) {
    had_err = gt_io_error_fread_one(symbol, fp, err);
    if (!had_err)
      had_err = gt_io_error_fread_one(freq, fp, err);
    if (!had_err)
      gt_disc_distri_add_multi(dist, symbol, freq);
  }
  if (had_err) {
    gt_disc_distri_delete(dist);
    dist = NULL;
  }
  return dist;
}
Example #8
0
int gt_disc_distri_unit_test(GtError *err)
{
  GtDiscDistri *d;
  int had_err = 0;

  gt_error_check(err);

  d = gt_disc_distri_new();

  ensure(had_err, gt_disc_distri_get(d, 0) == 0);
  ensure(had_err, gt_disc_distri_get(d, 100) == 0);
  if (!had_err) {
    gt_disc_distri_add(d, 0);
    gt_disc_distri_add_multi(d, 100, 256);
  }
  ensure(had_err, gt_disc_distri_get(d, 0) == 1);
  ensure(had_err, gt_disc_distri_get(d, 100) == 256);

  gt_disc_distri_delete(d);

  return had_err;
}
Example #9
0
File: stat.c Project: 9beckert/TIR
GthStat *gth_stat_new(void)
{
  GthStat *stat;

  stat = gt_malloc(sizeof (GthStat));

  stat->exondistri       = GTH_DEFAULT_EXONDISTRI;
  stat->introndistri     = GTH_DEFAULT_INTRONDISTRI;
  stat->matchnumdistri   = GTH_DEFAULT_MATCHNUMDISTRI;
  stat->refseqcovdistri  = GTH_DEFAULT_REFSEQCOVDISTRI;
  stat->sa_stats         = false;
  stat->gthfilestat_mode = false;

  stat->numofchains                       = 0;
  stat->numofremovedzerobaseexons         = 0;
  stat->numofautointroncutoutcalls        = 0;
  stat->numofunsuccessfulintroncutoutDPs  = 0;
  stat->numoffailedDPparameterallocations = 0;
  stat->numoffailedmatrixallocations      = 0;
  stat->numofundeterminedSAs              = 0;
  stat->numoffilteredpolyAtailmatches     = 0;

  /* init variables for memory statistics */
  stat->numofSAs                          = 0;
  stat->numofPGLs_stored                  = 0;
  stat->totalsizeofbacktracematricesinMB  = 0;
  stat->numofbacktracematrixallocations   = 0;

  /* init distributions */
  stat->exondistribution = gt_disc_distri_new();
  stat->introndistribution = gt_disc_distri_new();
  stat->matchnumdistribution = gt_disc_distri_new();
  stat->refseqcoveragedistribution = gt_disc_distri_new();
  stat->sa_alignment_score_distribution = gt_disc_distri_new();
  stat->sa_coverage_distribution = gt_disc_distri_new();

  return stat;
}
Example #10
0
GtOutlcpinfo *gt_Outlcpinfo_new(const char *indexname,
                                unsigned int numofchars,
                                unsigned int prefixlength,
                                bool withdistribution,
                                bool swallow_tail_lcpvalues,
                                GtFinalProcessBucket final_process_bucket,
                                void *final_process_bucket_info,
                                GtError *err)
{
  bool haserr = false;
  GtOutlcpinfo *outlcpinfo;

  outlcpinfo = gt_malloc(sizeof (*outlcpinfo));
  outlcpinfo->sizeofinfo = sizeof (*outlcpinfo);
  outlcpinfo->lcpsubtab.lcptabsum = 0.0;
  outlcpinfo->swallow_tail_lcpvalues = swallow_tail_lcpvalues;
  if (withdistribution)
  {
    outlcpinfo->lcpsubtab.distlcpvalues = gt_disc_distri_new();
  } else
  {
    outlcpinfo->lcpsubtab.distlcpvalues = NULL;
  }
  if (indexname == NULL)
  {
    outlcpinfo->lcpsubtab.lcp2file = NULL;
    if (final_process_bucket != NULL)
    {
      outlcpinfo->lcpsubtab.lcpprocess
        = gt_malloc(sizeof (*outlcpinfo->lcpsubtab.lcpprocess));
      outlcpinfo->lcpsubtab.lcpprocess->final_process_bucket
        = final_process_bucket;
      outlcpinfo->lcpsubtab.lcpprocess->final_process_bucket_info
        = final_process_bucket_info;
    } else
    {
      outlcpinfo->lcpsubtab.lcpprocess = NULL;
    }
  } else
  {
    outlcpinfo->lcpsubtab.lcpprocess = NULL;
    outlcpinfo->lcpsubtab.lcp2file
      = gt_malloc(sizeof (*outlcpinfo->lcpsubtab.lcp2file));
    outlcpinfo->sizeofinfo += sizeof (*outlcpinfo->lcpsubtab.lcp2file);
    outlcpinfo->lcpsubtab.lcp2file->countoutputlcpvalues = 0;
    outlcpinfo->lcpsubtab.lcp2file->maxbranchdepth = 0;
    outlcpinfo->lcpsubtab.lcp2file->totalnumoflargelcpvalues = 0;
    outlcpinfo->lcpsubtab.lcp2file->reservoir = NULL;
    outlcpinfo->lcpsubtab.lcp2file->sizereservoir = 0;
    outlcpinfo->lcpsubtab.lcp2file->smalllcpvalues = NULL;
    GT_INITARRAY(&outlcpinfo->lcpsubtab.lcp2file->largelcpvalues,
                 Largelcpvalue);
    outlcpinfo->lcpsubtab.lcp2file->outfplcptab
      = gt_fa_fopen_with_suffix(indexname,GT_LCPTABSUFFIX,"wb",err);
    if (outlcpinfo->lcpsubtab.lcp2file->outfplcptab == NULL)
    {
      haserr = true;
    }
    if (!haserr)
    {
      outlcpinfo->lcpsubtab.lcp2file->outfpllvtab
        = gt_fa_fopen_with_suffix(indexname,GT_LARGELCPTABSUFFIX,"wb",err);
      if (outlcpinfo->lcpsubtab.lcp2file->outfpllvtab == NULL)
      {
        haserr = true;
      }
    }
  }
  outlcpinfo->numsuffixes2output = 0;
  outlcpinfo->minchanged = 0;
  if (!haserr && prefixlength > 0)
  {
    outlcpinfo->turnwheel = gt_turningwheel_new(prefixlength,numofchars);
    outlcpinfo->sizeofinfo += gt_turningwheel_size();
  } else
  {
    outlcpinfo->turnwheel = NULL;
  }
#ifdef SKDEBUG
  outlcpinfo->previoussuffix.startpos = 0;
#endif
  outlcpinfo->previoussuffix.code = 0;
  outlcpinfo->previoussuffix.prefixindex = 0;
  outlcpinfo->previoussuffix.defined = false;
  outlcpinfo->previousbucketwasempty = false;
  outlcpinfo->lcpsubtab.tableoflcpvalues.bucketoflcpvalues = NULL;
  outlcpinfo->lcpsubtab.tableoflcpvalues.numofentries = 0;
#ifndef NDEBUG
  outlcpinfo->lcpsubtab.tableoflcpvalues.isset = NULL;
#endif
  if (haserr)
  {
    gt_free(outlcpinfo);
    return NULL;
  }
  return outlcpinfo;
}