Esempio n. 1
0
static void* gt_sequniq_arguments_new(void)
{
  GtSequniqArguments *arguments = gt_calloc((size_t)1, sizeof *arguments);
  arguments->ofi = gt_output_file_info_new();
  return arguments;
}
Esempio n. 2
0
static void* gt_seqorder_arguments_new(void)
{
  GtSeqorderArguments *arguments = gt_calloc((size_t)1, sizeof *arguments);
  return arguments;
}
GtFeatureNodeObserver* gt_feature_node_observer_new()
{
  GtFeatureNodeObserver* fno = gt_calloc(1, sizeof (GtFeatureNodeObserver));
  return fno;
}
static int gt_snp_annotator_visitor_prepare_gene(GtSNPAnnotatorVisitor *sav,
                                                 GtError *err)
{
  GtFeatureNodeIterator *fni,
                        *mrnafni;
  GtFeatureNode *curnode,
                *last_mRNA = NULL;
  GtStr *mrnaseq,
        *seqid;
  int had_err = 0;

  mrnaseq = gt_str_new();
  seqid = gt_genome_node_get_seqid((GtGenomeNode*) sav->gene);
  fni = gt_feature_node_iterator_new(sav->gene);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (gt_feature_node_get_type(curnode) == sav->mRNA_type) {
      GtFeatureNode *curnode2;
      if (last_mRNA) {
        char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char));
        (void) strncpy(mrna_charseq, gt_str_get(mrnaseq),
                       gt_str_length(mrnaseq));
        if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) {
          had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq),
                                          err);
        }
        if (!had_err) {
          gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq);
          last_mRNA = curnode;
          gt_str_reset(mrnaseq);
        }
      } else last_mRNA = curnode;
      if (!had_err) {
        mrnafni = gt_feature_node_iterator_new(curnode);
        while (!had_err && (curnode2 =
                                      gt_feature_node_iterator_next(mrnafni))) {
          if (gt_feature_node_get_type(curnode2) == sav->CDS_type) {
            char *tmp;
            GtRange rng = gt_genome_node_get_range((GtGenomeNode*) curnode2);
            had_err = gt_region_mapping_get_sequence(sav->rmap, &tmp, seqid,
                                                     rng.start, rng.end, err);
            if (!had_err) {
              gt_str_append_cstr_nt(mrnaseq, tmp, gt_range_length(&rng));
              gt_free(tmp);
            }
          }
        }
        gt_feature_node_iterator_delete(mrnafni);
      }
    }
  }
  if (!had_err && last_mRNA) {
    char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char));
    (void) strncpy(mrna_charseq, gt_str_get(mrnaseq), gt_str_length(mrnaseq));
    if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) {
      had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq),
                                      err);
    }
    if (!had_err) {
      gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq);
    }
  }
  gt_feature_node_iterator_delete(fni);
  gt_str_delete(mrnaseq);
  return had_err;
}
Esempio n. 5
0
static void* gt_seqfilter_arguments_new(void)
{
  SeqFilterArguments *arguments = gt_calloc(1, sizeof *arguments);
  arguments->ofi = gt_output_file_info_new();
  return arguments;
}
int gt_ltrfileout_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err)
{
  GtLTRdigestFileOutStream *ls;
  GtFeatureNode *fn;
  GtRange lltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          rltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          ppt_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          pbs_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD};
  int had_err;
  GtUword i=0;

  gt_error_check(err);
  ls = gt_ltrdigest_file_out_stream_cast(ns);

  /* initialize this element */
  memset(&ls->element, 0, sizeof (GtLTRElement));

  /* get annotations from parser */
  had_err = gt_node_stream_next(ls->in_stream, gn, err);
  if (!had_err && *gn)
  {
    GtFeatureNodeIterator* gni;
    GtFeatureNode *mygn;

    /* only process feature nodes */
    if (!(fn = gt_feature_node_try_cast(*gn)))
      return 0;

    ls->element.pdomorder = gt_array_new(sizeof (const char*));

    /* fill LTRElement structure from GFF3 subgraph */
    gni = gt_feature_node_iterator_new(fn);
    for (mygn = fn; mygn != NULL; mygn = gt_feature_node_iterator_next(gni))
      (void) gt_genome_node_accept((GtGenomeNode*) mygn,
                                   (GtNodeVisitor*) ls->lv,
                                   err);
    gt_feature_node_iterator_delete(gni);
  }

  if (!had_err && ls->element.mainnode != NULL)
  {
    char desc[GT_MAXFASTAHEADER];
    GtFeatureNode *ltr3, *ltr5;
    GtStr *sdesc, *sreg, *seq;

    /* find sequence in GtEncseq */
    sreg = gt_genome_node_get_seqid((GtGenomeNode*) ls->element.mainnode);

    sdesc = gt_str_new();
    had_err = gt_region_mapping_get_description(ls->rmap, sdesc, sreg, err);

    if (!had_err) {
      GtRange rng;
      ls->element.seqid = gt_calloc((size_t) ls->seqnamelen+1, sizeof (char));
      (void) snprintf(ls->element.seqid,
                      MIN((size_t) gt_str_length(sdesc),
                          (size_t) ls->seqnamelen)+1,
                      "%s", gt_str_get(sdesc));
      gt_cstr_rep(ls->element.seqid, ' ', '_');
      if (gt_str_length(sdesc) > (GtUword) ls->seqnamelen)
        ls->element.seqid[ls->seqnamelen] = '\0';

      (void) gt_ltrelement_format_description(&ls->element,
                                              ls->seqnamelen,
                                              desc,
                                              (size_t) (GT_MAXFASTAHEADER-1));
      gt_str_delete(sdesc);

      /* output basic retrotransposon data */
      lltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftLTR);
      rltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightLTR);
      rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.mainnode);
      gt_file_xprintf(ls->tabout_file,
                      GT_WU"\t"GT_WU"\t"GT_WU"\t%s\t"GT_WU"\t"GT_WU"\t"GT_WU"\t"
                      GT_WU"\t"GT_WU"\t"GT_WU"\t",
                      rng.start, rng.end, gt_ltrelement_length(&ls->element),
                      ls->element.seqid, lltr_rng.start, lltr_rng.end,
                      gt_ltrelement_leftltrlen(&ls->element), rltr_rng.start,
                      rltr_rng.end, gt_ltrelement_rightltrlen(&ls->element));
    }
    seq = gt_str_new();

    /* output TSDs */
    if (!had_err && ls->element.leftTSD != NULL)
    {
      GtRange tsd_rng;
      tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftTSD);
      had_err = gt_extract_feature_sequence(seq,
                                       (GtGenomeNode*) ls->element.leftTSD,
                                       gt_symbol(gt_ft_target_site_duplication),
                                       false,
                                       NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_file_xprintf(ls->tabout_file,
                         ""GT_WU"\t"GT_WU"\t%s\t",
                         tsd_rng.start,
                         tsd_rng.end,
                         gt_str_get(seq));
      }
    gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t");

    if (!had_err && ls->element.rightTSD != NULL)
    {
      GtRange tsd_rng;

      tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightTSD);
      had_err = gt_extract_feature_sequence(seq,
                                       (GtGenomeNode*) ls->element.rightTSD,
                                       gt_symbol(gt_ft_target_site_duplication),
                                       false,
                                       NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_file_xprintf(ls->tabout_file,
                           ""GT_WU"\t"GT_WU"\t%s\t",
                           tsd_rng.start,
                           tsd_rng.end,
                           gt_str_get(seq));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t");

    /* output PPT */
    if (!had_err && ls->element.ppt != NULL)
    {
      GtStrand ppt_strand = gt_feature_node_get_strand(ls->element.ppt);

      ppt_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.ppt);
      had_err = gt_extract_feature_sequence(seq,
                                            (GtGenomeNode*) ls->element.ppt,
                                            gt_symbol(gt_ft_RR_tract), false,
                                            NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&ppt_rng),
                            GT_FSWIDTH, ls->pptout_file);
        gt_file_xprintf(ls->tabout_file,
                           ""GT_WU"\t"GT_WU"\t%s\t%c\t%d\t",
                           ppt_rng.start,
                           ppt_rng.end,
                           gt_str_get(seq),
                           GT_STRAND_CHARS[ppt_strand],
                           (ppt_strand == GT_STRAND_FORWARD ?
                               abs((int) (rltr_rng.start - ppt_rng.end)) :
                               abs((int) (lltr_rng.end - ppt_rng.start))));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t");

    /* output PBS */
    if (!had_err && ls->element.pbs != NULL)
    {
      GtStrand pbs_strand;

      pbs_strand = gt_feature_node_get_strand(ls->element.pbs);
      pbs_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.pbs);
      had_err = gt_extract_feature_sequence(seq,
                                           (GtGenomeNode*) ls->element.pbs,
                                           gt_symbol(gt_ft_primer_binding_site),
                                           false, NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&pbs_rng),
                            GT_FSWIDTH, ls->pbsout_file);
        gt_file_xprintf(ls->tabout_file,
                         ""GT_WU"\t"GT_WU"\t%c\t%s\t%s\t%s\t%s\t%s\t",
                         pbs_rng.start,
                         pbs_rng.end,
                         GT_STRAND_CHARS[pbs_strand],
                         gt_feature_node_get_attribute(ls->element.pbs, "trna"),
                         gt_str_get(seq),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "pbsoffset"),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "trnaoffset"),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "edist"));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t\t\t\t");

    /* output protein domains */
    if (!had_err && ls->element.pdoms != NULL)
    {
      GtStr *pdomorderstr = gt_str_new();
      for (i=0; !had_err && i<gt_array_size(ls->element.pdomorder); i++)
      {
        const char* key = *(const char**) gt_array_get(ls->element.pdomorder,
                                                       i);
        GtArray *entry = (GtArray*) gt_hashmap_get(ls->element.pdoms, key);
        had_err = write_pdom(ls, entry, key, ls->rmap, desc, err);
      }

      if (GT_STRAND_REVERSE == gt_feature_node_get_strand(ls->element.mainnode))
        gt_array_reverse(ls->element.pdomorder);

      for (i=0 ;!had_err && i<gt_array_size(ls->element.pdomorder); i++)
      {
        const char* name = *(const char**) gt_array_get(ls->element.pdomorder,
                                                        i);
        gt_str_append_cstr(pdomorderstr, name);
        if (i != gt_array_size(ls->element.pdomorder)-1)
          gt_str_append_cstr(pdomorderstr, "/");
      }
      gt_file_xprintf(ls->tabout_file, "%s", gt_str_get(pdomorderstr));
      gt_str_delete(pdomorderstr);
    }

    /* output LTRs (we just expect them to exist) */
    switch (gt_feature_node_get_strand(ls->element.mainnode))
    {
      case GT_STRAND_REVERSE:
        ltr5 = ls->element.rightLTR;
        ltr3 = ls->element.leftLTR;
        break;
      case GT_STRAND_FORWARD:
      default:
        ltr5 = ls->element.leftLTR;
        ltr3 = ls->element.rightLTR;
        break;
    }

    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr5,
                                          gt_symbol(gt_ft_long_terminal_repeat),
                                          false,
                                          NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->ltr5out_file);
      gt_str_reset(seq);
    }
    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr3,
                                          gt_symbol(gt_ft_long_terminal_repeat),
                                          false,
                                          NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->ltr3out_file);
      gt_str_reset(seq);
    }

    /* output complete oriented element */
    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq,
                                           (GtGenomeNode*) ls->element.mainnode,
                                           gt_symbol(gt_ft_LTR_retrotransposon),
                                           false,
                                           NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc,gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->elemout_file);
      gt_str_reset(seq);
    }
    gt_file_xprintf(ls->tabout_file, "\n");
    gt_str_delete(seq);
  }
  gt_hashmap_delete(ls->element.pdoms);
  gt_array_delete(ls->element.pdomorder);
  gt_free(ls->element.seqid);
  return had_err;
}
Esempio n. 7
0
static void* gt_seqmutate_arguments_new(void)
{
  MutateArguments *arguments = gt_calloc(1, sizeof *arguments);
  arguments->ofi = gt_outputfileinfo_new();
  return arguments;
}
Esempio n. 8
0
static void* gt_mergefeat_arguments_new(void)
{
  InterFeatArguments *arguments = gt_calloc(1, sizeof *arguments);
  arguments->ofi = gt_output_file_info_new();
  return arguments;
}
static void* gt_script_filter_arguments_new(void)
{
  GtScriptFilterArguments *arguments = gt_calloc(1, sizeof *arguments);
  return arguments;
}
static void* gt_seqtranslate_arguments_new(void)
{
  GtTranslateArguments *arguments = gt_calloc(1, sizeof *arguments);
  arguments->ofi = gt_output_file_info_new();
  return arguments;
}
Esempio n. 11
0
GtError* gt_error_new(void)
{
  return gt_calloc(1, sizeof (GtError));
}
Esempio n. 12
0
static int gt_ltrdigest_pdom_visitor_parse_domainhits(GtLTRdigestPdomVisitor
                                                                            *lv,
                                                     GtHMMERParseStatus *status,
                                                     char *buf,
                                                     FILE *instream,
                                                     GtError *err)
{
  int had_err = 0;
  GtUword i, nof_targets = 0, nof_hits = 0;
  gt_assert(lv && instream && status);
  gt_error_check(err);

  had_err = pdom_parser_get_next_line(buf, instream, err);
  gt_assert(buf != NULL);
  while (!had_err && strncmp("Internal", buf, (size_t) 8)) {
    GtUword no, hmmfrom, hmmto, alifrom, alito;
    double score, evalue;
    char threshold_ok = '-';
    if ((buf[0] == '>' && buf[1] == '>')) {
      char *b = buf;
      b = strtok(buf+3, " ");
      gt_str_reset(status->cur_model);
      gt_str_append_cstr(status->cur_model, b);
      had_err = pdom_parser_get_next_line(buf, instream, err);
      if (!had_err && strncmp("   [No individual", buf, (size_t) 17)) {
        for (i = 0UL; i < 2UL && !had_err; i++)
          had_err = pdom_parser_get_next_line(buf, instream, err);
      }
      nof_targets++;
      nof_hits = 0UL;
      gt_hmmer_parse_status_mark_frame_finished(status);
    }
    while (!had_err &&
             8 == sscanf(buf, ""GT_WU" %c %lf %*f %*f %lf "GT_WU" "GT_WU" %*s "
                         GT_WU" "GT_WU"", &no,  &threshold_ok, &score, &evalue,
                         &hmmfrom, &hmmto, &alifrom, &alito)) {
      GtHMMERSingleHit *shit = gt_calloc((size_t) 1, sizeof (*shit));
      shit->hmmfrom = hmmfrom;
      shit->hmmto = hmmto;
      shit->alifrom = alifrom;
      shit->alito = alito;
      shit->score = score;
      shit->evalue = evalue;
      shit->strand = status->strand;
      shit->frame = (GtUword) status->frame;
      shit->reported = (threshold_ok == '!');
      shit->chains = gt_array_new(sizeof (GtUword));
      gt_hmmer_parse_status_add_hit(status, shit);
      nof_hits++;
      had_err = pdom_parser_get_next_line(buf, instream, err);
    }
    if (!had_err) {
      if (nof_hits > 0)
        had_err = gt_ltrdigest_pdom_visitor_parse_alignments(lv, status, buf,
                                                             instream, err);
      else
        had_err = pdom_parser_get_next_line(buf, instream, err);
    }
  }
  return had_err;
}
Esempio n. 13
0
GtDiscDistri* gt_disc_distri_new(void)
{
  return gt_calloc(1, sizeof (GtDiscDistri));
}
Esempio n. 14
0
GtDlist* gt_dlist_new(GtCompare cmp_func)
{
  GtDlist *dlist = gt_calloc(1, sizeof (GtDlist));
  dlist->cmp_func = cmp_func;
  return dlist;
}
Esempio n. 15
0
GtPBSResults* gt_pbs_find(const char *seq,
                          const char *rev_seq,
                          GtLTRElement *element,
                          GtPBSOptions *o,
                          GtError *err)
{
  GtSeq *seq_forward, *seq_rev;
  GtPBSResults *results;
  unsigned long j;
  GtAlignment *ali;
  GtAlphabet *a = gt_alphabet_new_dna();
  GtScoreFunction *sf = gt_dna_scorefunc_new(a,
                                             o->ali_score_match,
                                             o->ali_score_mismatch,
                                             o->ali_score_insertion,
                                             o->ali_score_deletion);

  gt_assert(seq && rev_seq && sf && a && element);

  results = gt_pbs_results_new(element, o);

  seq_forward = gt_seq_new(seq + (gt_ltrelement_leftltrlen(element))
                               - (o->radius),
                           2*o->radius + 1,
                           a);

  seq_rev     = gt_seq_new(rev_seq + (gt_ltrelement_rightltrlen(element))
                                   - (o->radius),
                           2*o->radius + 1,
                           a);

    for (j=0;j<gt_bioseq_number_of_sequences(o->trna_lib);j++)
  {
    GtSeq *trna_seq, *trna_from3;
    char *trna_from3_full;
    unsigned long trna_seqlen;

    trna_seq = gt_bioseq_get_seq(o->trna_lib, j);
    trna_seqlen = gt_seq_length(trna_seq);

    trna_from3_full = gt_calloc(trna_seqlen, sizeof (char));
    memcpy(trna_from3_full, gt_seq_get_orig(trna_seq),
           sizeof (char)*trna_seqlen);
    (void) gt_reverse_complement(trna_from3_full, trna_seqlen, err);
    trna_from3 = gt_seq_new_own(trna_from3_full, trna_seqlen, a);

    ali = gt_swalign(seq_forward, trna_from3, sf);
    gt_pbs_add_hit(results->hits, ali, o, trna_seqlen,
                   gt_seq_get_description(trna_seq), GT_STRAND_FORWARD,
                   results);
    gt_alignment_delete(ali);

    ali = gt_swalign(seq_rev, trna_from3, sf);
    gt_pbs_add_hit(results->hits, ali, o, trna_seqlen,
                   gt_seq_get_description(trna_seq), GT_STRAND_REVERSE,
                   results);
    gt_alignment_delete(ali);

    gt_seq_delete(trna_from3);
  }
  gt_seq_delete(seq_forward);
  gt_seq_delete(seq_rev);
  gt_score_function_delete(sf);
  gt_alphabet_delete(a);
  gt_array_sort(results->hits, gt_pbs_hit_compare);
  return results;
}
Esempio n. 16
0
GthBSSMParam* gth_bssm_param_new(void)
{
    return gt_calloc(1, sizeof (GthBSSMParam));
}
Esempio n. 17
0
static void* gt_encseq_check_arguments_new(void)
{
  GtEncseqCheckArguments *arguments = gt_calloc(1, sizeof *arguments);
  return arguments;
}
Esempio n. 18
0
static void condenseq_process_descriptions(GtCondenseq *condenseq,
                                           const GtEncseq *orig_es,
                                           GtLogger *logger)
{
  GtUword    *dist;
  const char *desc;
  char       *cur_id_startptr;
  GtUword     desclen,
              dist_idx,
              distsize = (GtUword) 128,
              idlen,
              idx,
              maxendidx = 0,
              maxlen = 0,
              minlen = GT_UWORD_MAX,
              wastedmem = 0,
              sdssize,
              cur_total_id_len = 0;
  bool        use_const_len;

  condenseq->ids_total_len = 0;
  dist = gt_calloc((size_t) distsize, sizeof (*dist));

  for (idx = 0; idx < condenseq->orig_num_seq; ++idx) {
    desc = gt_encseq_description(orig_es, &desclen, idx);
    idlen = condenseq_idlen(desc, desclen);
    if (distsize <= idlen) {
      dist = gt_realloc(dist, (size_t) (idlen + 1) * sizeof (*dist));
      for (dist_idx = distsize; dist_idx <= idlen; dist_idx++)
        dist[dist_idx] = 0;
      distsize = idlen + 1;
    }
    dist[idlen]++;
    if (idlen > maxlen)
      maxlen = idlen;
    if (idlen < minlen)
      minlen = idlen;
    maxendidx += idlen;
  }

  /* calculate memory we would waste if we assume equal length, and size if we
     store actual descriptions */
  for (dist_idx = minlen; dist_idx < maxlen; dist_idx++) {
    wastedmem += dist[dist_idx] * (maxlen - dist_idx);
    condenseq->ids_total_len += dist[dist_idx] * dist_idx;
  }
  condenseq->ids_total_len += dist_idx * dist[dist_idx];

  sdssize = (GtUword) gt_intset_best_memory_size(maxendidx,
                                                 condenseq->orig_num_seq);
  use_const_len = wastedmem < sdssize;

  if (use_const_len) {
    gt_logger_log(logger, "Condenseq descriptions will use const len, " GT_WU
                  ", \"wasting\" " GT_WU " bytes. SDS would use "
                  GT_WU " bytes",
                  maxlen, wastedmem, sdssize);
    condenseq->id_len = maxlen;
    condenseq->ids_total_len = maxlen * condenseq->orig_num_seq;
  }
  else {
    gt_logger_log(logger, "Condenseq descriptions will use sdstab with size "
                  GT_WU ". Const length would have wasted " GT_WU " bytes.",
                  sdssize, wastedmem);
    condenseq->sdstab = gt_intset_best_new(maxendidx, condenseq->orig_num_seq);
  }
  condenseq->orig_ids = gt_calloc((size_t) condenseq->ids_total_len,
                                  sizeof (*condenseq->orig_ids));

  cur_id_startptr = condenseq->orig_ids;
  for (idx = 0; idx < condenseq->orig_num_seq; ++idx) {
    desc = gt_encseq_description(orig_es, &desclen, idx);
    idlen = condenseq_idlen(desc, desclen);
    gt_assert(idlen <= maxlen);
    (void) memcpy(cur_id_startptr, desc, (size_t) idlen);
    if (use_const_len) {
      cur_id_startptr += maxlen;
      cur_total_id_len += maxlen;
    }
    else {
      cur_id_startptr += idlen;
      cur_total_id_len += idlen;
      gt_intset_add(condenseq->sdstab, cur_total_id_len);
    }
  }
  gt_assert(cur_total_id_len == condenseq->ids_total_len);
  gt_free(dist);
}
int gt_ltrdigest_file_out_stream_write_metadata(GtLTRdigestFileOutStream *ls,
                                         int tests_to_run,
                                         const char *trnafilename,
                                         const char *gfffilename,
                                         GtRange ppt_len,
                                         GtRange ubox_len,
                                         unsigned int ppt_radius,
                                         GtRange alilen,
                                         unsigned int max_edist,
                                         GtRange offsetlen,
                                         GtRange trnaoffsetlen,
                                         unsigned int pbs_radius,
                                         GtStrArray *hmm_files,
                                         unsigned int chain_max_gap_length,
                                         double evalue_cutoff,
                                         GtError *err)
{
  int buflen = 1024;
  GtFile *metadata_file;
  char *buffer,
       fn[GT_MAXFILENAMELEN];

  (void) snprintf(fn, (size_t) (GT_MAXFILENAMELEN-1),
                  "%s_conditions.csv", ls->fileprefix);
  metadata_file = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, fn, "w+", err);
  if (!metadata_file)
    return -1;

  buffer = gt_calloc((size_t) (buflen+1), sizeof (char));
  /* get working directory */
  while (getcwd(buffer, (size_t) buflen) == NULL) {
    buflen += 1024;
    buffer = gt_realloc(buffer, (buflen+1) * sizeof (char));
  }
  gt_assert(buffer && strlen(buffer) > 0);

  /* append working dir to relative paths if necessary */
  if (gfffilename == NULL) {
    gt_file_xprintf(metadata_file,
                       "GFF3 input used\t<stdin>\n");
  } else {
    if (gfffilename[0] != GT_PATH_SEPARATOR)
      gt_file_xprintf(metadata_file,
                         "GFF3 input used\t%s/%s\n", buffer, gfffilename);
    else
      gt_file_xprintf(metadata_file,
                         "GFF3 input used\t%s\n", gfffilename);
  }

  if (tests_to_run & GT_LTRDIGEST_RUN_PPT)
  {
    gt_file_xprintf(metadata_file,
                       "PPT length\t"GT_WU"-"GT_WU"nt\t8-30nt\n",
                       ppt_len.start,
                       ppt_len.end);
    gt_file_xprintf(metadata_file,
                       "U-box length\t"GT_WU"-"GT_WU"nt\t3-30nt\n",
                       ubox_len.start,
                       ubox_len.end);
    gt_file_xprintf(metadata_file,
                       "PPT search radius\t%u\t30\n", ppt_radius);
  }

  if (tests_to_run & GT_LTRDIGEST_RUN_PBS)
  {
    if (trnafilename[0] != GT_PATH_SEPARATOR)
      gt_file_xprintf(metadata_file,
                         "tRNA library for PBS detection\t%s/%s\n",
                         buffer, trnafilename);
    else
      gt_file_xprintf(metadata_file,
                         "tRNA library for PBS detection\t%s\n",
                         trnafilename);
    gt_file_xprintf(metadata_file,
                       "allowed PBS/tRNA alignment length"
                       " range\t"GT_WU"-"GT_WU"nt\t11-30nt\n",
                       alilen.start,
                       alilen.end);
    gt_file_xprintf(metadata_file,
                       "PBS/tRNA maximum unit edit distance\t%u\t1\n",
                       max_edist);
    gt_file_xprintf(metadata_file,
                       "allowed PBS offset from 5' LTR range"
                       "\t"GT_WU"-"GT_WU"nt\t0-5nt\n",
                       offsetlen.start,
                       offsetlen.end);
    gt_file_xprintf(metadata_file,
                       "allowed PBS offset from 3' tRNA end"
                       " range\t"GT_WU"-"GT_WU"nt\t0-5nt\n",
                       trnaoffsetlen.start,
                       trnaoffsetlen.end);
    gt_file_xprintf(metadata_file,
                       "PBS search radius\t%d\t30\n", pbs_radius);
  }

  if (tests_to_run & GT_LTRDIGEST_RUN_PDOM)
  {
    GtUword i;
    gt_file_xprintf(metadata_file,
                       "Protein domain models\t"GT_WU" (",
                       gt_str_array_size(hmm_files));
    for (i=0;i<gt_str_array_size(hmm_files);i++)
    {
      gt_file_xprintf(metadata_file, "%s", gt_str_array_get(hmm_files, i));
      if (i != gt_str_array_size(hmm_files)-1)
        gt_file_xprintf(metadata_file, ", ");
    }
    gt_file_xprintf(metadata_file, ")\n");
    gt_file_xprintf(metadata_file,
                       "pHMM e-value cutoff \t%g\t%g\n",
                       evalue_cutoff, 0.000001);
    gt_file_xprintf(metadata_file,
                       "maximal allowed gap length between fragments to chain"
                       " \t%u\t%u\n",
                       chain_max_gap_length, 50);
  }

  gt_file_xprintf(metadata_file, "\n");
  if (metadata_file != NULL)
    gt_file_delete(metadata_file);
  gt_free(buffer);
  return 0;
}
Esempio n. 20
0
static int condenseq_io(GtCondenseq *condenseq,
                        FILE* fp,
                        GtIOFunc io_func,
                        GtError *err)
{
  int had_err = 0;
  int file_format = GT_CONDENSEQ_VERSION;
  GtUword idx;
  had_err = gt_condenseq_io_one(condenseq->orig_length);
  if (!had_err)
    had_err = gt_condenseq_io_one(file_format);
  if (!had_err && file_format != GT_CONDENSEQ_VERSION) {
    gt_error_set(err, "condenseq index is format version %d, current is "
                 "%d -- please re-encode",
                 file_format, GT_CONDENSEQ_VERSION);
    had_err = -1;
  }
  if (!had_err)
    had_err = gt_condenseq_io_one(condenseq->orig_num_seq);
  if (!had_err)
    had_err = gt_condenseq_io_one(condenseq->ldb_nelems);
  if (!had_err) {
    if (condenseq->ldb_nelems == 0) {
      gt_warning("compression of condenseq did not succeed in finding any "
                 "compressable similarities, maybe the input is to small or "
                 "the chosen parameters should be reconsidered.");
    }
    if (condenseq->links == NULL) {
      condenseq->links = gt_calloc((size_t) condenseq->ldb_nelems,
                                   sizeof (*condenseq->links));
      condenseq->ldb_allocated = condenseq->ldb_nelems;
    }

    had_err = gt_condenseq_io_one(condenseq->udb_nelems);
  }

  if (!had_err) {
    gt_assert(condenseq->udb_nelems > 0);

    if (condenseq->uniques == NULL) {
      condenseq->uniques = gt_malloc(sizeof (*condenseq->uniques) *
                                     condenseq->udb_nelems );
      condenseq->udb_allocated = condenseq->udb_nelems;
    }
  }

  for (idx = 0; !had_err && idx < condenseq->ldb_nelems; idx++) {
    had_err = condenseq_linkentry_io(&condenseq->links[idx], fp, io_func, err);
  }

  for (idx = 0; !had_err && idx < condenseq->udb_nelems; idx++) {
    had_err = condenseq_uniqueentry_io(&condenseq->uniques[idx], fp, io_func,
                                       err);
  }
  if (!had_err && condenseq->orig_num_seq > (GtUword) 1) {
    condenseq->ssptab = gt_intset_io(condenseq->ssptab, fp, err);
    if (condenseq->ssptab == NULL)
      had_err = 1;
  }
  if (!had_err)
    had_err = gt_condenseq_io_one(condenseq->id_len);
  if (!had_err) {
    if (condenseq->id_len == GT_UNDEF_UWORD) {
      condenseq->sdstab = gt_intset_io(condenseq->sdstab, fp, err);
      if (condenseq->sdstab == NULL)
        had_err = 1;
    }
  }
  if (!had_err)
    had_err = gt_condenseq_io_one(condenseq->ids_total_len);
  if (!had_err) {
    condenseq->orig_ids = gt_realloc(condenseq->orig_ids,
                                     (size_t) condenseq->ids_total_len);
    had_err = io_func(condenseq->orig_ids, sizeof (*condenseq->orig_ids),
                      (size_t) condenseq->ids_total_len, fp, err);
  }
  return had_err;
}
Esempio n. 21
0
static void* gt_encseq_bitextract_arguments_new(void)
{
  GtEncseqBitextractArguments *arguments = gt_calloc(1, sizeof *arguments);
  arguments->readmode = gt_str_new();
  return arguments;
}
Esempio n. 22
0
GtToolinfo* gt_toolinfo_new(void)
{
  return gt_calloc(1, sizeof (GtToolinfo));
}
Esempio n. 23
0
static void* gt_encseq_encode_arguments_new(void)
{
  GtEncseqEncodeArguments *arguments = gt_calloc(1, sizeof *arguments);
  arguments->indexname = gt_str_new();
  return arguments;
}
Esempio n. 24
0
GtTypecheckInfo* gt_typecheck_info_new(void)
{
  GtTypecheckInfo *tci = gt_calloc(1, sizeof *tci);
  tci->typecheck = gt_str_new();
  return tci;
}
Esempio n. 25
0
GtHcrEncoder *gt_hcr_encoder_new(GtStrArray *files, GtAlphabet *alpha,
                                 bool descs, GtQualRange qrange, GtTimer *timer,
                                 GtError *err)
{
    GtBaseQualDistr *bqd;
    GtHcrEncoder *hcr_enc;
    GtSeqIterator *seqit;
    GtStrArray *file;
    int had_err = 0,
        status;
    GtUword len1,
            len2,
            i,
            num_of_reads = 0;
    const GtUchar *seq,
          *qual;
    char *desc;

    gt_error_check(err);
    gt_assert(alpha && files);

    if (timer != NULL)
        gt_timer_show_progress(timer, "get <base,qual> distr", stdout);

    if (qrange.start != GT_UNDEF_UINT)
        if (qrange.start == qrange.end) {
            gt_error_set(err, "qrange.start must unequal qrange.end");
            return NULL;
        }

    hcr_enc = gt_malloc(sizeof (GtHcrEncoder));
    hcr_enc->files = files;
    hcr_enc->num_of_files = gt_str_array_size(files);
    hcr_enc->num_of_reads = 0;
    hcr_enc->page_sampling = false;
    hcr_enc->regular_sampling = false;
    hcr_enc->sampling_rate = 0;
    hcr_enc->pagesize = gt_pagesize();
    if (descs) {
        hcr_enc->encdesc_encoder = gt_encdesc_encoder_new();
        if (timer != NULL)
            gt_encdesc_encoder_set_timer(hcr_enc->encdesc_encoder, timer);
    }
    else
        hcr_enc->encdesc_encoder = NULL;

    hcr_enc->seq_encoder = gt_malloc(sizeof (GtHcrSeqEncoder));
    hcr_enc->seq_encoder->alpha = alpha;
    hcr_enc->seq_encoder->sampling = NULL;
    hcr_enc->seq_encoder->fileinfos = gt_calloc((size_t) hcr_enc->num_of_files,
                                      sizeof (*(hcr_enc->seq_encoder->fileinfos)));
    hcr_enc->seq_encoder->qrange = qrange;
    bqd = hcr_base_qual_distr_new(alpha, qrange);

    /* check if reads in the same file are of same length and get
       <base, quality> pair distribution */
    for (i = 0; i < hcr_enc->num_of_files; i++) {
        file = gt_str_array_new();
        gt_str_array_add(file, gt_str_array_get_str(files, i));
        seqit = gt_seq_iterator_fastq_new(file, err);
        if (!seqit) {
            gt_error_set(err, "cannot initialize GtSeqIteratorFastQ object");
            had_err = -1;
        }
        if (!had_err) {
            gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alpha));
            gt_seq_iterator_set_quality_buffer(seqit, &qual);
            status = gt_seq_iterator_next(seqit, &seq, &len1, &desc, err);

            if (status == 1) {
                num_of_reads = 1UL;
                while (!had_err) {
                    status = gt_seq_iterator_next(seqit, &seq, &len2, &desc, err);
                    if (status == -1)
                        had_err = -1;
                    if (status != 1)
                        break;
                    if (len2 != len1) {
                        gt_error_set(err, "reads have to be of equal length");
                        had_err = -1;
                        break;
                    }
                    if (hcr_base_qual_distr_add(bqd, qual, seq, len1) != 0)
                        had_err = -1;
                    len1 = len2;
                    num_of_reads++;
                }
            }
            else if (status == -1)
                had_err = -1;

            if (!had_err) {
                if (i == 0)
                    hcr_enc->seq_encoder->fileinfos[i].readnum = num_of_reads;
                else
                    hcr_enc->seq_encoder->fileinfos[i].readnum =
                        hcr_enc->seq_encoder->fileinfos[i - 1].readnum + num_of_reads;
                hcr_enc->seq_encoder->fileinfos[i].readlength = len1;
            }
        }
        hcr_enc->num_of_reads += num_of_reads;
        gt_str_array_delete(file);
        gt_seq_iterator_delete(seqit);
    }
    if (!had_err)
        hcr_base_qual_distr_trim(bqd);

    if (!had_err) {
        if (timer != NULL)
            gt_timer_show_progress(timer, "build huffman tree for sequences and"
                                   " qualities", stdout);
        hcr_enc->seq_encoder->huffman =
            gt_huffman_new(bqd,
                           hcr_base_qual_distr_func,
                           (GtUword) bqd->ncols * bqd->nrows);
    }
    if (!had_err) {
        hcr_enc->seq_encoder->qual_offset = bqd->qual_offset;
        hcr_base_qual_distr_delete(bqd);
        return hcr_enc;
    }
    return NULL;
}
Esempio n. 26
0
GtXRFCheckInfo* gt_xrfcheck_info_new(void)
{
  GtXRFCheckInfo *xci = gt_calloc(1, sizeof *xci);
  xci->xrfcheck = gt_str_new();
  return xci;
}
Esempio n. 27
0
static void* gt_gff3validator_arguments_new(void)
{
  GFF3ValidatorArguments *arguments = gt_calloc(1, sizeof *arguments);
  arguments->tci = gt_typecheck_info_new();
  return arguments;
}
Esempio n. 28
0
int
gt_bitPackStringInt8_unit_test(GtError *err)
{
  BitString bitStore = NULL;
  BitString bitStoreCopy = NULL;
  uint8_t *randSrc = NULL; /*< create random ints here for input as bit
                                *  store */
  uint8_t *randCmp = NULL; /*< used for random ints read back */
  unsigned *numBitsList = NULL;
  size_t i, numRnd;
  BitOffset offsetStart, offset;
  int had_err = 0;
  offset = offsetStart = random()%(sizeof (uint8_t) * CHAR_BIT);
  numRnd = random() % (MAX_RND_NUMS_uint8_t + 1);
  gt_log_log("offset=%lu, numRnd=%lu\n",
          (long unsigned)offsetStart, (long unsigned)numRnd);
  {
    BitOffset numBits = sizeof (uint8_t) * CHAR_BIT * numRnd + offsetStart;
    randSrc = gt_malloc(sizeof (uint8_t)*numRnd);
    bitStore = gt_malloc(bitElemsAllocSize(numBits) * sizeof (BitElem));
    bitStoreCopy = gt_calloc(bitElemsAllocSize(numBits), sizeof (BitElem));
    randCmp = gt_malloc(sizeof (uint8_t)*numRnd);
  }
  /* first test unsigned types */
  gt_log_log("gt_bsStoreUInt8/gt_bsGetUInt8: ");
  for (i = 0; i < numRnd; ++i)
  {
#if 8 > 32 && LONG_BIT < 8
    uint8_t v = randSrc[i] = (uint8_t)random() << 32 | random();
#else /* 8 > 32 && LONG_BIT < 8 */
    uint8_t v = randSrc[i] = random();
#endif /* 8 > 32 && LONG_BIT < 8 */
    int bits = gt_requiredUInt8Bits(v);
    gt_bsStoreUInt8(bitStore, offset, bits, v);
    offset += bits;
  }
  offset = offsetStart;
  for (i = 0; i < numRnd; ++i)
  {
    uint8_t v = randSrc[i];
    int bits = gt_requiredUInt8Bits(v);
    uint8_t r = gt_bsGetUInt8(bitStore, offset, bits);
    gt_ensure(had_err, r == v);
    if (had_err)
    {
      gt_log_log("Expected %"PRIu8", got %"PRIu8", i = %lu\n",
              v, r, (unsigned long)i);
      freeResourcesAndReturn(had_err);
    }
    offset += bits;
  }
  gt_log_log("passed\n");
  if (numRnd > 0)
  {
    uint8_t v = randSrc[0], r = 0;
    unsigned numBits = gt_requiredUInt8Bits(v);
    BitOffset i = offsetStart + numBits;
    uint8_t mask = ~(uint8_t)0;
    if (numBits < 8)
      mask = ~(mask << numBits);
    gt_log_log("bsSetBit, gt_bsClearBit, bsToggleBit, gt_bsGetBit: ");
    while (v)
    {
      int lowBit = v & 1;
      v >>= 1;
      gt_ensure(had_err, lowBit == (r = gt_bsGetBit(bitStore, --i)));
      if (had_err)
      {
        gt_log_log("Expected %d, got %d, i = %llu\n",
                lowBit, (int)r, (unsigned long long)i);
        freeResourcesAndReturn(had_err);
      }
    }
    i = offsetStart + numBits;
    gt_bsClear(bitStoreCopy, offsetStart, numBits, random()&1);
    v = randSrc[0];
    while (i)
    {
      int lowBit = v & 1;
      v >>= 1;
      if (lowBit)
        bsSetBit(bitStoreCopy, --i);
      else
        gt_bsClearBit(bitStoreCopy, --i);
    }
    v = randSrc[0];
    r = gt_bsGetUInt8(bitStoreCopy, offsetStart, numBits);
    gt_ensure(had_err, r == v);
    if (had_err)
    {
      gt_log_log("Expected %"PRIu8", got %"PRIu8"\n", v, r);
      freeResourcesAndReturn(had_err);
    }
    for (i = 0; i < numBits; ++i)
      bsToggleBit(bitStoreCopy, offsetStart + i);
    r = gt_bsGetUInt8(bitStoreCopy, offsetStart, numBits);
    gt_ensure(had_err, r == (v = (~v & mask)));
    if (had_err)
    {
      gt_log_log("Expected %"PRIu8", got %"PRIu8"\n", v, r);
      freeResourcesAndReturn(had_err);
    }
    gt_log_log("passed\n");
  }
Esempio n. 29
0
GtSplitter* gt_splitter_new(void)
{
  return gt_calloc(1, sizeof (GtSplitter));
}
Esempio n. 30
0
static void* gt_splitfasta_arguments_new(void)
{
  SplitfastaArguments *arguments = gt_calloc(1, sizeof *arguments);
  arguments->splitdesc = gt_str_new();
  return arguments;
}