Beispiel #1
0
static int gff3_visitor_meta_node(GtNodeVisitor *nv, GtMetaNode *mn,
                                  GT_UNUSED GtError *err)
{
  GtGFF3Visitor *gff3_visitor;
  gt_error_check(err);
  gff3_visitor = gff3_visitor_cast(nv);
  gt_assert(nv && mn);
  if (!gff3_visitor->version_string_shown) {
    if (strncmp(gt_meta_node_get_directive(mn), GT_GFF_VERSION_DIRECTIVE,
                strlen(GT_GFF_VERSION_DIRECTIVE)) == 0
          || strncmp(gt_meta_node_get_directive(mn), GT_GVF_VERSION_DIRECTIVE,
                     strlen(GT_GVF_VERSION_DIRECTIVE)) == 0) {
      gff3_visitor->version_string_shown = true;
    } else {
      gff3_version_string(nv);
    }
  }
  if (!gff3_visitor->outstr) {
    gt_file_xprintf(gff3_visitor->outfp, "##%s %s\n",
                    gt_meta_node_get_directive(mn),
                    gt_meta_node_get_data(mn));

  } else {
    gt_str_append_cstr(gff3_visitor->outstr, "##");
    gt_str_append_cstr(gff3_visitor->outstr, gt_meta_node_get_directive(mn));
    gt_str_append_char(gff3_visitor->outstr, ' ');
    gt_str_append_cstr(gff3_visitor->outstr, gt_meta_node_get_data(mn));
    gt_str_append_char(gff3_visitor->outstr, '\n');
  }
  return 0;
}
Beispiel #2
0
static int gff3_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn,
                                    GT_UNUSED GtError *err)
{
  GtGFF3Visitor *gff3_visitor;
  gt_error_check(err);
  gff3_visitor = gff3_visitor_cast(nv);
  gt_assert(nv && rn);
  gff3_version_string(nv);
  if (!gff3_visitor->outstr) {
    gt_file_xprintf(gff3_visitor->outfp, "%s   %s "GT_WU" "GT_WU"\n",
                    GT_GFF_SEQUENCE_REGION,
                    gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)),
                    gt_genome_node_get_start((GtGenomeNode*) rn),
                    gt_genome_node_get_end((GtGenomeNode*) rn));
  } else {
    gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_SEQUENCE_REGION);
    gt_str_append_cstr(gff3_visitor->outstr, "   ");
    gt_str_append_cstr(gff3_visitor->outstr,
                      gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)));
    gt_str_append_char(gff3_visitor->outstr, ' ');
    gt_str_append_ulong(gff3_visitor->outstr,
                                  gt_genome_node_get_start((GtGenomeNode*) rn));
    gt_str_append_char(gff3_visitor->outstr, ' ');
    gt_str_append_ulong(gff3_visitor->outstr,
                                  gt_genome_node_get_end((GtGenomeNode*) rn));
    gt_str_append_char(gff3_visitor->outstr, '\n');
  }
  return 0;
}
Beispiel #3
0
int gt_lua_set_modules_path(lua_State *L, GtError *err)
{
  GtStr *modules_path = NULL, *external_modules_path = NULL,
         *package_path = NULL;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(L);
  if (!(modules_path = gt_get_gtdata_path(gt_error_get_progname(err), err)))
    had_err = -1;
  if (!had_err) {
    external_modules_path = gt_str_clone(modules_path);
    gt_str_append_cstr(modules_path, "/modules/?.lua");
    gt_str_append_cstr(external_modules_path, "/modules/external/?.lua");
    lua_getglobal(L, "package");
    gt_assert(lua_istable(L, -1));
    lua_getfield(L, -1, "path");
    gt_assert(lua_isstring(L, -1));
    package_path = gt_str_new_cstr(lua_tostring(L, -1));
    lua_pop(L, 1);
    gt_str_append_char(package_path, ';');
    gt_str_append_str(package_path, modules_path);
    gt_str_append_char(package_path, ';');
    gt_str_append_str(package_path, external_modules_path);
    lua_pushstring(L, gt_str_get(package_path));
    lua_setfield(L, -2, "path");
    lua_pop(L, 1);
  }
  gt_str_delete(package_path);
  gt_str_delete(modules_path);
  gt_str_delete(external_modules_path);
  return had_err;
}
static void construct_description(GtStr *description, const char *type,
                                  GtUword counter, bool join,
                                  bool translate, GtStr *seqid,
                                  GtStrArray *target_ids)
{
  gt_assert(!gt_str_length(description));
  gt_str_append_cstr(description, type);
  gt_str_append_char(description, '_');
  gt_str_append_ulong(description, counter);
  if (join)
    gt_str_append_cstr(description, " (joined)");
  if (translate)
    gt_str_append_cstr(description, " (translated)");
  if (seqid) {
    gt_assert(gt_str_length(seqid));
    gt_str_append_cstr(description, " [seqid ");
    gt_str_append_str(description, seqid);
    gt_str_append_char(description, ']');
  }
  if (target_ids && gt_str_array_size(target_ids)) {
    GtUword i;
    gt_str_append_cstr(description, " [target IDs ");
    gt_str_append_cstr(description, gt_str_array_get(target_ids, 0));
    for (i = 1; i < gt_str_array_size(target_ids); i++) {
      gt_str_append_char(description, ',');
      gt_str_append_cstr(description, gt_str_array_get(target_ids, i));
    }
    gt_str_append_char(description, ']');
  }
}
static int gt_ltrdigest_pdom_visitor_attach_hit(GtLTRdigestPdomVisitor *lv,
                                                GtHMMERModelHit *modelhit,
                                                GtHMMERSingleHit *singlehit)
{
  GT_UNUSED GtUword i;
  GtGenomeNode *gf;
  int had_err = 0;
  GtRange rrng;
  gt_assert(lv && singlehit);

  rrng = gt_ltrdigest_pdom_visitor_coords(lv, singlehit);

  if (gt_array_size(singlehit->chains) > 0 || lv->output_all_chains) {
    char buf[32];
    gf = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*)
                                                      lv->ltr_retrotrans),
                             gt_ft_protein_match,
                             rrng.start,
                             rrng.end,
                             singlehit->strand);
    gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_alignment",
                                 gt_str_ref(singlehit->alignment),
                                 (GtFree) gt_str_delete);
    gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_aaseq",
                                 gt_str_ref(singlehit->aastring),
                                 (GtFree) gt_str_delete);
    gt_feature_node_set_source((GtFeatureNode*) gf, lv->tag);
    gt_feature_node_set_score((GtFeatureNode*) gf, (float) singlehit->evalue);
    (void) snprintf(buf, (size_t) 32, "%d", (int) singlehit->frame);
    gt_feature_node_add_attribute((GtFeatureNode*) gf,
                                    "reading_frame", buf);
    if (modelhit->modelname != NULL) {
      gt_feature_node_add_attribute((GtFeatureNode*) gf, "name",
                                    modelhit->modelname);
    }
    if (gt_array_size(singlehit->chains) > 1UL && lv->output_all_chains) {
      GtStr *buffer;
      GtUword j;
      gt_assert(singlehit->chains != NULL);
      buffer = gt_str_new();
      for (j = 0UL; j < gt_array_size(singlehit->chains); j++) {
        gt_str_append_cstr(buffer, modelhit->modelname);
        gt_str_append_char(buffer, ':');
        gt_str_append_ulong(buffer,
                          *(GtUword*) gt_array_get(singlehit->chains, j));
        if (j != gt_array_size(singlehit->chains) - 1) {
          gt_str_append_char(buffer, ',');
        }
      }
      gt_feature_node_set_attribute((GtFeatureNode*) gf, "chains",
                                    gt_str_get(buffer));
      gt_str_delete(buffer);
    }
    gt_feature_node_add_child(lv->ltr_retrotrans, (GtFeatureNode*) gf);
  }
  gt_array_delete(singlehit->chains);
  singlehit->chains = NULL;
  return had_err;
}
Beispiel #6
0
void gt_alphabet_to_str(const GtAlphabet *alphabet, GtStr *dest)
{
  GtUchar chartoshow, currentcc, previouscc = 0, firstinline = 0;
  unsigned int cnum, linenum = 0;
  bool afternewline = true;
  gt_assert(alphabet && dest);
  if (alphabet->alphadef != NULL) {
    gt_assert(gt_str_length(alphabet->alphadef));
    gt_str_append_str(dest, alphabet->alphadef);
  } else {
    for (cnum=0; cnum < alphabet->domainsize; cnum++)
    {
      currentcc = alphabet->mapdomain[cnum];
      if (cnum > 0)
      {
        if (alphabet->symbolmap[currentcc] != alphabet->symbolmap[previouscc])
        {
          if (linenum < alphabet->mapsize-1)
          {
            chartoshow = alphabet->characters[linenum];
          } else
          {
            chartoshow = alphabet->wildcardshow;
          }
          if (firstinline != chartoshow)
          {
            gt_str_append_char(dest, (char) chartoshow);
          }
          gt_str_append_char(dest, '\n');
          afternewline = true;
          linenum++;
        } else
        {
          afternewline = false;
        }
      }
      gt_str_append_char(dest, (char) currentcc);
      if (afternewline)
      {
        firstinline = currentcc;
      }
      previouscc = currentcc;
    }
    if (linenum < alphabet->mapsize-1)
    {
      chartoshow = alphabet->characters[linenum];
    } else
    {
      chartoshow = alphabet->wildcardshow;
    }
    if (firstinline != chartoshow)
    {
      gt_str_append_char(dest, (char) chartoshow);
    }
    gt_str_append_char(dest, '\n');
  }
}
static int gt_ltrdigest_pdom_visitor_parse_alignments(GT_UNUSED
                                                      GtLTRdigestPdomVisitor
                                                                            *lv,
                                                     GtHMMERParseStatus *status,
                                                     char *buf,
                                                     FILE *instream,
                                                     GtError *err)
{
  int had_err = 0, cur_domain = GT_UNDEF_INT, line = -1;
  GtHMMERSingleHit *hit = NULL;
  gt_assert(lv && instream && status);
  gt_error_check(err);
  had_err = pdom_parser_get_next_line(buf, instream, err);
  gt_assert(buf != NULL);
  while (!had_err && strncmp("Internal pipeline statistics",
                             buf, (size_t) 28) &&
                     strncmp(">>", buf, (size_t) 2)) {
    if ((buf[2] == '=' && buf[3] == '=')) {
      buf[17] = '\0';
      cur_domain = atoi(buf+12);
      gt_assert(cur_domain != GT_UNDEF_INT && cur_domain > 0);
      hit = gt_hmmer_parse_status_get_hit(status,
                                          (unsigned long) cur_domain - 1);
      gt_assert(hit && !hit->alignment);
      hit->alignment = gt_str_new();
      hit->aastring = gt_str_new();
      line = -2;
    } else {
      gt_assert(hit && hit->alignment);
      gt_str_append_cstr(hit->alignment, buf);
      gt_str_append_char(hit->alignment, '\n');
      switch (line % 4) {
        case 1:
          gt_str_append_char(hit->alignment, '\n');
          break;
        case 0:
          {
            char *b = buf;
            b = strtok(buf, " ");
            gt_assert(strspn(b, "012+-") == (size_t) 2);
            b = strtok(NULL, " ");
            gt_assert(strlen(b) > 0);
            b = strtok(NULL, " ");
            gt_ltrdigest_pdom_visitor_add_aaseq(b, hit->aastring);
          }
          break;
      }
      line++;
    }
    had_err = pdom_parser_get_next_line(buf, instream, err);
  }
  return had_err;
}
int gt_extract_and_translate_feature_sequence(GtFeatureNode *feature_node,
                                              const char *type,
                                              bool join,
                                              GtRegionMapping *rm,
                                              GtTransTable *ttable,
                                              GtStr *translation_fr1,
                                              GtStr *translation_fr2,
                                              GtStr *translation_fr3,
                                              GtError *err)
{
  GtTranslator *tr = NULL;
  GtTranslatorStatus status;
  GtCodonIterator *ci = NULL;
  unsigned int frame, phase_offset = 0;
  char translated;
  int had_err = 0;
  GtStr *sequence = gt_str_new();
  gt_assert(feature_node && type);

  had_err = gt_extract_feature_sequence_generic(sequence,
                                                (GtGenomeNode*) feature_node,
                                                type, join, NULL, NULL,
                                                &phase_offset, rm, err);

  /* do translation if we have at least one codon */
  if (!had_err && gt_str_length(sequence) > phase_offset + 2) {
    ci = gt_codon_iterator_simple_new(gt_str_get(sequence) + phase_offset,
                                      gt_str_length(sequence) - phase_offset,
                                      NULL);
    tr = gt_translator_new(ci);
    if (ttable)
      gt_translator_set_translation_table(tr, ttable);
    status = gt_translator_next(tr, &translated, &frame, NULL);
    while (status == GT_TRANSLATOR_OK) {
      if (frame == 0 && translation_fr1)
        gt_str_append_char(translation_fr1, translated);
      else if (frame == 1 && translation_fr2)
        gt_str_append_char(translation_fr2, translated);
      else if (frame == 2 && translation_fr3)
        gt_str_append_char(translation_fr3, translated);
      status = gt_translator_next(tr, &translated, &frame, NULL);
    }
    if (status == GT_TRANSLATOR_ERROR)
      had_err = -1;
  }
  gt_translator_delete(tr);
  gt_codon_iterator_delete(ci);
  gt_str_delete(sequence);

  return had_err;
}
static void gt_ltrdigest_pdom_visitor_add_aaseq(const char *str, GtStr *dest)
{
  unsigned long i;
  gt_assert(str && dest);
  for (i = 0; str[i] != '\0'; i++) {
    if (!gt_ltrdigest_pdom_visitor_isgap(str[i])) {
      /* replace stop codons by 'X'es */
      if (str[i] == '*') {
        gt_str_append_char(dest, 'X');
      } else {
        gt_str_append_char(dest, toupper(str[i]));
      }
    }
  }
}
Beispiel #10
0
static int gff3_visitor_sequence_node(GtNodeVisitor *nv, GtSequenceNode *sn,
                                      GT_UNUSED GtError *err)
{
  GtGFF3Visitor *gff3_visitor;
  gt_error_check(err);
  gff3_visitor = gff3_visitor_cast(nv);
  gt_assert(nv && sn);
  gff3_version_string(nv);
  if (!gff3_visitor->fasta_directive_shown) {
    if (!gff3_visitor->outstr)
      gt_file_xprintf(gff3_visitor->outfp, "%s\n", GT_GFF_FASTA_DIRECTIVE);
    else {
      gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_FASTA_DIRECTIVE);
      gt_str_append_char(gff3_visitor->outstr, '\n');
    }
    gff3_visitor->fasta_directive_shown = true;
  }
  if (!gff3_visitor->outstr) {
    gt_fasta_show_entry(gt_sequence_node_get_description(sn),
                        gt_sequence_node_get_sequence(sn),
                        gt_sequence_node_get_sequence_length(sn),
                        gff3_visitor->fasta_width, gff3_visitor->outfp);
  } else {
    gt_fasta_show_entry_str(gt_sequence_node_get_description(sn),
                            gt_sequence_node_get_sequence(sn),
                            gt_sequence_node_get_sequence_length(sn),
                            gff3_visitor->fasta_width, gff3_visitor->outstr);
  }
  return 0;
}
Beispiel #11
0
static int translate_dna_lua(lua_State *L)
{
  GtStr *protein;
  GtTranslator *tr;
  int rval;
  char translated;
  unsigned int frame;
  const char *dna = luaL_checkstring(L, 1);
  protein = gt_str_new();

  GtCodonIterator *ci = gt_codon_iterator_simple_new(dna,
                                                     strlen(dna),
                                                     NULL);
  tr = gt_translator_new(ci);
  rval = gt_translator_next(tr, &translated, &frame, NULL);
  while (!rval && translated) {
    gt_str_append_char(protein, translated);
    rval = gt_translator_next(tr, &translated, &frame, NULL);
  }
  lua_pushstring(L, gt_str_get(protein));
  gt_str_delete(protein);
  gt_translator_delete(tr);
  gt_codon_iterator_delete(ci);
  return 1;
}
Beispiel #12
0
static inline int parse_fastq_seqname(GtSeqIteratorFastQ *seqit,
                                      GtStr *buffer,
                                      char startchar,
                                      GtError *err)
{
  char currentchar;
  bool firstsymbol = true;
  gt_error_check(err);
  gt_assert(seqit && buffer);
  gt_assert(gt_str_length(buffer) == 0);
  if ((currentchar = fastq_buf_getchar(seqit)) == EOF)
    return EOF;
  seqit->currentread++;
  if (currentchar != startchar) {
    gt_error_set(err, "'%c' expected, '%c' encountered instead in line %lu",
                      startchar,
                      currentchar,
                      seqit->curline);
    return -2;
  }
  while (currentchar != GT_FASTQ_NEWLINESYMBOL) {
    if (!firstsymbol)
      gt_str_append_char(buffer, currentchar);
    else
      firstsymbol = false;
    if ((currentchar = fastq_buf_getchar(seqit)) == EOF)
      return EOF;
    seqit->currentread++;
  }
  seqit->curline++;
  return 0;
}
Beispiel #13
0
static int create_manpage(const char *outdir, const char *toolname,
                          GtOptionParser *option_parser, GtError *err)
{
  GtFile *outfile = NULL;
  GtStr *man, *pathbuf;
  char *utoolname;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(outdir && toolname && option_parser);
  man = gt_str_new();
  pathbuf = gt_str_new_cstr(outdir);
  utoolname = gt_cstr_dup(toolname);
  gt_cstr_rep(utoolname, ' ', '_');
  if (!gt_file_exists(gt_str_get(pathbuf)))
    gt_xmkdir(gt_str_get(pathbuf));
  gt_str_append_char(pathbuf, GT_PATH_SEPARATOR);
  gt_str_append_cstr(pathbuf, utoolname);
  gt_str_append_cstr(pathbuf, ".mansrc");
  gt_free(utoolname);
  if (!(outfile = gt_file_new(gt_str_get(pathbuf), "w+", err)))
    had_err = -1;
  if (!had_err)
    had_err = gt_option_parser_manpage(option_parser, toolname, man, err);
  if (!had_err)
    gt_file_xprintf(outfile, "%s", gt_str_get(man));
  gt_file_delete(outfile);
  gt_str_delete(pathbuf);
  gt_str_delete(man);
  return had_err;
}
static int gt_readjoiner_assembly_build_contained_reads_list(
    GtReadjoinerAssemblyArguments *arguments, GtBitsequence **contained,
    GtError *err)
{
  int had_err = 0;
  unsigned int i;
  GtUword nofreads, nofreads_i;
  GtStr *filename;

  filename = gt_str_clone(arguments->readset);
  gt_str_append_cstr(filename, ".0" GT_READJOINER_SUFFIX_CNTLIST);
  had_err = gt_cntlist_parse(gt_str_get(filename), true, contained,
    &nofreads, err);
  for (i = 1U; i < arguments->nspmfiles && had_err == 0; i++)
  {
    gt_str_reset(filename);
    gt_str_append_str(filename, arguments->readset);
    gt_str_append_char(filename, '.');
    gt_str_append_uint(filename, i);
    gt_str_append_cstr(filename, GT_READJOINER_SUFFIX_CNTLIST);
    had_err = gt_cntlist_parse(gt_str_get(filename), false, contained,
        &nofreads_i, err);
    gt_assert(had_err || nofreads == nofreads_i);
  }
  gt_str_delete(filename);
  return had_err;
}
Beispiel #15
0
static int quoted_word(GtStr *word, GtIO *bed_file, GtError *err)
{
  bool break_while = false;
  int had_err;
  gt_error_check(err);
  gt_str_reset(word);
  had_err = gt_io_expect(bed_file, QUOTE_CHAR, err);
  while (!had_err) {
    switch (gt_io_peek(bed_file)) {
      case QUOTE_CHAR:
      case GT_CARRIAGE_RETURN:
      case GT_END_OF_LINE:
      case GT_END_OF_FILE:
        break_while = true;
        break;
      default:
        gt_str_append_char(word, gt_io_next(bed_file));
    }
    if (break_while)
      break;
  }
  if (!had_err)
    had_err = gt_io_expect(bed_file, QUOTE_CHAR, err);
  return had_err;
}
static int show_entry(GtStr *description, GtStr *sequence, bool translate,
                      GtUword width, GtFile *outfp)
{
  int had_err = 0;
  if (translate) {
    GtTranslatorStatus status;
    unsigned int frame;
    char translated;
    GtStr *protein = gt_str_new();

    GtCodonIterator *ci = gt_codon_iterator_simple_new(gt_str_get(sequence),
                                                       gt_str_length(sequence),
                                                       NULL);
    GtTranslator* tr = gt_translator_new(ci);
    status = gt_translator_next(tr, &translated, &frame, NULL);
    while (status == GT_TRANSLATOR_OK) {
      if (frame == 0)
        gt_str_append_char(protein, translated);
      status = gt_translator_next(tr, &translated, &frame, NULL);
    }
    if (status == GT_TRANSLATOR_ERROR)
      had_err = -1;
    gt_fasta_show_entry(gt_str_get(description), gt_str_get(protein),
                        gt_str_length(protein), width, outfp);
    gt_str_delete(protein);
    gt_translator_delete(tr);
    gt_codon_iterator_delete(ci);
  }
  else {
    gt_fasta_show_entry(gt_str_get(description), gt_str_get(sequence),
                        gt_str_length(sequence), width, outfp);
  }
  return had_err;
}
Beispiel #17
0
static int proc_any_char(GtIO *obo_file, GtStr *capture, bool be_permissive,
                         GtError *err)
{
  gt_error_check(err);
  gt_assert(obo_file && capture);
  if (!any_char(obo_file, be_permissive)) {
    if (gt_io_peek(obo_file) == GT_END_OF_FILE) {
      gt_error_set(err, "file \"%s\": line %lu: unexpected end-of-file",
                gt_io_get_filename(obo_file), gt_io_get_line_number(obo_file));
    }
    else if ((gt_io_peek(obo_file) == GT_CARRIAGE_RETURN) ||
             (gt_io_peek(obo_file) == GT_END_OF_LINE)) {
      gt_error_set(err, "file \"%s\": line %lu: unexpected newline",
                gt_io_get_filename(obo_file), gt_io_get_line_number(obo_file));
    }
    else {
      gt_error_set(err, "file \"%s\": line %lu: unexpected character '%c'",
                gt_io_get_filename(obo_file), gt_io_get_line_number(obo_file),
                gt_io_peek(obo_file));
    }
    return -1;
  }
  gt_str_append_char(capture, gt_io_next(obo_file));
  return 0;
}
static int gt_readjoiner_assembly_count_spm(const char *readset, bool eqlen,
    unsigned int minmatchlength, unsigned int nspmfiles, GtStrgraph *strgraph,
    GtBitsequence *contained, GtLogger *default_logger, GtError *err)
{
  GtSpmprocSkipData skipdata;
  int had_err = 0;
  unsigned int i;
  GtStr *filename = gt_str_new();
  gt_logger_log(default_logger, GT_READJOINER_ASSEMBLY_MSG_COUNTSPM);
  if (!eqlen)
  {
    skipdata.out.e.proc = gt_spmproc_strgraph_count;
    skipdata.to_skip = contained;
    skipdata.out.e.data = strgraph;
  }
  for (i = 0; i < nspmfiles; i++)
  {
    gt_str_append_cstr(filename, readset);
    gt_str_append_char(filename, '.');
    gt_str_append_uint(filename, i);
    gt_str_append_cstr(filename, GT_READJOINER_SUFFIX_SPMLIST);
    had_err = gt_spmlist_parse(gt_str_get(filename),
        (GtUword)minmatchlength,
        eqlen ? gt_spmproc_strgraph_count : gt_spmproc_skip,
        eqlen ? (void*)strgraph : (void*)&skipdata, err);
    gt_str_reset(filename);
  }
  gt_str_delete(filename);
  return had_err;
}
static void build_key(GtStr *key, GtFeatureNode *feature, GtStr *target_id)
{
  gt_assert(key && feature && target_id);
  gt_str_reset(key);
  gt_str_append_str(key, gt_genome_node_get_seqid((GtGenomeNode*) feature));
  gt_str_append_char(key, '\t'); /* cannot occur in seqid or target_id */
  gt_str_append_str(key, target_id);
}
Beispiel #20
0
static int gff3_visitor_comment_node(GtNodeVisitor *nv, GtCommentNode *cn,
                                     GT_UNUSED GtError *err)
{
  GtGFF3Visitor *gff3_visitor;
  gt_error_check(err);
  gff3_visitor = gff3_visitor_cast(nv);
  gt_assert(nv && cn);
  gff3_version_string(nv);
  if (!gff3_visitor->outstr) {
    gt_file_xprintf(gff3_visitor->outfp, "#%s\n",
                    gt_comment_node_get_comment(cn));
  } else  {
    gt_str_append_char(gff3_visitor->outstr, '#');
    gt_str_append_cstr(gff3_visitor->outstr, gt_comment_node_get_comment(cn));
    gt_str_append_char(gff3_visitor->outstr, '\n');
  }
  return 0;
}
int gt_gthmkbssmfiles(int argc, const char **argv, GtError *err)
{
  GtUword i;
  GtStr *filename;
  int parsed_args, had_err = 0;

  /* option parsing */
  switch (gthmkbssmfiles_parse_options(&parsed_args, argc, argv, err)) {
    case GT_OPTION_PARSER_OK: break;
    case GT_OPTION_PARSER_ERROR: return -1;
    case GT_OPTION_PARSER_REQUESTS_EXIT: return 0;
  }

  gt_assert(parsed_args + 1 == argc);
  filename = gt_str_new();

  for (i = 0; !had_err && i <= LASTSPECIESNUM; i++) {
    GthBSSMParam *bssm_param;
    gt_str_append_cstr(filename, argv[parsed_args]);
    gt_str_append_char(filename, GT_PATH_SEPARATOR);
    gt_str_append_cstr(filename, speciestab[i]);

    /* for files which are obsolete due to new model files produced by
       gthbssmbuild add an .old after the species name */
    if (i >= 8)
      gt_str_append_cstr(filename, ".old");

    gt_str_append_char(filename, '.');
    gt_str_append_cstr(filename, BSSMFILEENDING);

    if (!(bssm_param = gth_bssm_param_extract(i, err)))
      had_err = -1;
    if (!had_err)
      had_err = gth_bssm_param_save(bssm_param, gt_str_get(filename), err);
    gth_bssm_param_delete(bssm_param);

    /* resetting filename */
    gt_str_reset(filename);
  }

  gt_str_delete(filename);

  return had_err;
}
Beispiel #22
0
static int file_find_in_env_generic(GtStr *path, const char *file,
                                    const char *env, FileExistsFunc file_exists,
                                    GtError *err)
{
  char *pathvariable, *pathcomponent = NULL;
  GtSplitter *splitter = NULL;
  GtUword i;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(file);
  gt_assert(file_exists);

  /* check if 'file' has dirname */
  gt_file_dirname(path, file);
  if (gt_str_length(path))
    return had_err;
  /* 'file' has no dirname -> scan $env */
  pathvariable = getenv(env);
  if (pathvariable != NULL)
    pathvariable = gt_cstr_dup(pathvariable); /* make writeable copy */
  else {
    gt_error_set(err, "environment variable $%s is not defined", env);
    had_err = -1;
  }

  if (!had_err) {
    splitter = gt_splitter_new();
    gt_splitter_split(splitter, pathvariable,
                      (GtUword) strlen(pathvariable), GT_PATH_VAR_SEPARATOR);
    for (i = 0; i < gt_splitter_size(splitter); i++) {
      pathcomponent = gt_splitter_get_token(splitter, i);
      gt_str_reset(path);
      gt_str_append_cstr(path, pathcomponent);
      gt_str_append_char(path, GT_PATH_SEPARATOR);
      gt_str_append_cstr(path, file);
      if (file_exists(gt_str_get(path)))
        break;
    }
    if (i < gt_splitter_size(splitter)) {
      /* file found in path */
      gt_str_reset(path);
      gt_str_append_cstr(path, pathcomponent);
    }
    else {
      /* file not found in path */
      gt_str_reset(path);
    }
  }

  /* free */
  gt_free(pathvariable);
  gt_splitter_delete(splitter);

  return had_err;
}
Beispiel #23
0
static void set_gff3_target_attribute(GthSA *sa, bool md5ids)
{
  gt_assert(sa && !sa->gff3_target_attribute);
  sa->gff3_target_attribute = gt_str_new();
  if (md5ids) {
    gt_assert(sa->ref_md5);
    gt_str_append_cstr(sa->gff3_target_attribute, GT_MD5_SEQID_PREFIX);
    gt_str_append_str(sa->gff3_target_attribute, sa->ref_md5);
    gt_str_append_char(sa->gff3_target_attribute, ':');
  }
  gt_gff3_escape(sa->gff3_target_attribute, gt_str_get(sa->ref_id),
                 gt_str_length(sa->ref_id));
  gt_str_append_char(sa->gff3_target_attribute, ' ');
  gt_str_append_uword(sa->gff3_target_attribute,
                      gth_sa_referencecutoff_start(sa) + 1); /* XXX: use
                                                                reference
                                                                dpstartpos */
  gt_str_append_char(sa->gff3_target_attribute, ' ');
  gt_str_append_uword(sa->gff3_target_attribute,
                      gth_sa_ref_total_length(sa) - /* XXX */
                      gth_sa_referencecutoff_end(sa));
  gt_str_append_char(sa->gff3_target_attribute, ' ');
  if (sa->ref_strand_forward) {
    gt_str_append_char(sa->gff3_target_attribute,
                       GT_STRAND_CHARS[GT_STRAND_FORWARD]);
  }
  else {
    gt_str_append_char(sa->gff3_target_attribute,
                       GT_STRAND_CHARS[GT_STRAND_REVERSE]);
  }
}
Beispiel #24
0
static void show_attribute(const char *attr_name, const char *attr_value,
                           void *data)
{
  ShowAttributeInfo *info = (ShowAttributeInfo*) data;
  gt_assert(attr_name && attr_value && info);
  if (strcmp(attr_name, GT_GFF_ID) && strcmp(attr_name, GT_GFF_PARENT)) {
    if (*info->attribute_shown) {
      if (!info->outstr)
        gt_file_xfputc(';', info->outfp);
      else
        gt_str_append_char(info->outstr, ';');
    } else
      *info->attribute_shown = true;
    if (!info->outstr)
      gt_file_xprintf(info->outfp, "%s=%s", attr_name, attr_value);
    else {
      gt_str_append_cstr(info->outstr, attr_name);
      gt_str_append_char(info->outstr, '=');
      gt_str_append_cstr(info->outstr, attr_value);
    }
  }
}
Beispiel #25
0
static void show_parse_file_status(GthShowVerbose showverbose,
                                   GtUword filenum,
                                   GtUword numoffiles,
                                   const char *filename)
{
  GtStr *buf = gt_str_new();
  gt_str_append_cstr(buf, "process file ");
  gt_str_append_uword(buf, filenum + 1);
  gt_str_append_char(buf, GT_PATH_SEPARATOR);
  gt_str_append_uword(buf, numoffiles);
  gt_str_append_cstr(buf, ": ");
  gt_str_append_cstr(buf, filename);
  showverbose(gt_str_get(buf));
  gt_str_delete(buf);
}
Beispiel #26
0
static void gff3_version_string(GtNodeVisitor *nv)
{
  GtGFF3Visitor *gff3_visitor = gff3_visitor_cast(nv);
  gt_assert(gff3_visitor);
  if (!gff3_visitor->version_string_shown) {
    if (!gff3_visitor->outstr) {
      gt_file_xprintf(gff3_visitor->outfp, "%s   %u\n", GT_GFF_VERSION_PREFIX,
                      GT_GFF_VERSION);
    } else {
      gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_VERSION_PREFIX);
      gt_str_append_cstr(gff3_visitor->outstr, "   ");
      gt_str_append_uint(gff3_visitor->outstr, GT_GFF_VERSION);
      gt_str_append_char(gff3_visitor->outstr, '\n');
    }
    gff3_visitor->version_string_shown = true;
  }
}
Beispiel #27
0
static void default_track_selector(GtBlock *block, GtStr *result,
                                   GT_UNUSED void *data)
{
  GtGenomeNode *top;
  char *basename;
  gt_assert(block && result);
  gt_str_reset(result);
  top = (GtGenomeNode*) gt_block_get_top_level_feature(block);
  /* we take the basename of the filename to have nicer output in the
     generated graphic. this might lead to ``collapsed'' tracks, if two files
     with different paths have the same basename. */
  basename = gt_basename(gt_genome_node_get_filename(top));
  gt_str_append_cstr(result, basename);
  gt_free(basename);
  gt_str_append_char(result, GT_FILENAME_TYPE_SEPARATOR);
  gt_str_append_cstr(result, gt_block_get_type(block));
}
static int gt_seqtranslate_do_translation(GtTranslateArguments *arguments,
                                       const char *sequence,
                                       GtUword length,
                                       const char *desc,
                                       GtStr **translations,
                                       bool rev,
                                       GtError *err)
{
  GtTranslator *tr;
  GT_UNUSED GtTranslatorStatus trst;
  GtCodonIterator *ci;
  char translated;
  int had_err = 0;
  GtStr *str;
  unsigned int frame,
               i;

  ci = gt_codon_iterator_simple_new(sequence, length, err);
  tr = gt_translator_new(ci);
  trst = gt_translator_next(tr, &translated, &frame, err);
  while (trst == GT_TRANSLATOR_OK) {
    gt_str_append_char(translations[frame], translated);
    trst = gt_translator_next(tr, &translated, &frame, err);
  }
  gt_codon_iterator_delete(ci);
  gt_translator_delete(tr);
  if (trst == GT_TRANSLATOR_ERROR)
    return -1;
  str = gt_str_new();
  for (i = 0; i < 3; i++) {
    if (gt_str_length(translations[i]) > 0) {
      gt_str_append_cstr(str, desc);
      gt_str_append_cstr(str, " (");
      gt_str_append_ulong(str, i+1);
      gt_str_append_cstr(str, rev ? "-" : "+");
      gt_str_append_cstr(str, ")");
      gt_fasta_show_entry(gt_str_get(str), gt_str_get(translations[i]),
                          gt_str_length(translations[i]),
                          arguments->fasta_width, arguments->outfp);
      gt_str_reset(translations[i]);
      gt_str_reset(str);
    }
  }
  gt_str_delete(str);
  return had_err;
}
Beispiel #29
0
static void word(GtStr *word, GtIO *bed_file)
{
  gt_str_reset(word);
  for (;;) {
    switch (gt_io_peek(bed_file)) {
      case BLANK_CHAR:
      case TABULATOR_CHAR:
      case PAIR_SEPARATOR:
      case GT_CARRIAGE_RETURN:
      case GT_END_OF_LINE:
      case GT_END_OF_FILE:
        return;
      default:
        gt_str_append_char(word, gt_io_next(bed_file));
    }
  }
}
Beispiel #30
0
static int gff3_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn,
                                     GtError *err)
{
  GtGFF3Visitor *gff3_visitor;
  int had_err;
  gt_error_check(err);
  gff3_visitor = gff3_visitor_cast(nv);

  gff3_version_string(nv);

  had_err = gt_feature_node_traverse_children(fn, gff3_visitor, store_ids, true,
                                              err);
  if (!had_err) {
    if (gt_feature_node_is_tree(fn)) {
      had_err = gt_feature_node_traverse_children(fn, gff3_visitor,
                                                  gff3_show_feature_node, true,
                                                  err);
    }
    else {
      /* got a DAG -> traverse in topologically sorted depth first fashion to
         make sure that the 'Parent' attributes are shown in correct order */
      had_err =
        gt_feature_node_traverse_children_top(fn, gff3_visitor,
                                              gff3_show_feature_node, err);
    }
  }

  /* reset hashmaps */
  gt_hashmap_reset(gff3_visitor->feature_node_to_id_array);
  gt_hashmap_reset(gff3_visitor->feature_node_to_unique_id_str);

  /* show terminator, if the feature has children (otherwise it is clear that
     the feature is complete, because no ID attribute has been shown) */
  if (gt_feature_node_has_children(fn) ||
      (gff3_visitor->retain_ids && gt_feature_node_get_attribute(fn, "ID"))) {
    if (!gff3_visitor->outstr)
      gt_file_xprintf(gff3_visitor->outfp, "%s\n", GT_GFF_TERMINATOR);
    else {
      gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_TERMINATOR);
      gt_str_append_char(gff3_visitor->outstr, '\n');
    }
  }

  return had_err;
}