コード例 #1
0
int gt_region_mapping_get_description(GtRegionMapping *rm, GtStr *desc,
                                      GtStr *seqid, GtError *err)
{
  int had_err = 0;
  gt_error_check(err);
  gt_assert(rm && desc && seqid);
  if (rm->userawseq) {
    gt_str_append_cstr(desc, "<rawseq>");
    return 0;
  }
  had_err = update_seq_col_if_necessary(rm, seqid, err);
  if (!had_err) {
    if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) {
      had_err = gt_seq_col_md5_to_description(rm->seq_col, desc, seqid,
                                              err);
    }
    return had_err;
  }
  if (!had_err) {
    if (rm->usedesc) {
      unsigned long filenum, seqnum;
      gt_assert(rm->seqid2seqnum_mapping);
      had_err = gt_seqid2seqnum_mapping_map(rm->seqid2seqnum_mapping,
                                            gt_str_get(seqid), NULL, &seqnum,
                                            &filenum, NULL, err);
      if (!had_err) {
        char *cdesc;
        cdesc = gt_seq_col_get_description(rm->seq_col, filenum, seqnum);
        gt_assert(cdesc);
        gt_str_append_cstr(desc, cdesc);
        gt_free(cdesc);
      }
    }
    else if (rm->useseqno) {
      unsigned long seqno = GT_UNDEF_ULONG;
      gt_assert(rm->encseq);
      if (1 != sscanf(gt_str_get(seqid), "seq%lu", &seqno)) {
        gt_error_set(err, "seqid '%s' does not have the form 'seqX' "
                          "where X is a sequence number in the encoded "
                          "sequence", gt_str_get(seqid));
        had_err = -1;
      }
      gt_assert(had_err || seqno != GT_UNDEF_ULONG);
      if (!had_err && seqno >= gt_encseq_num_of_sequences(rm->encseq)) {
          gt_error_set(err, "trying to access sequence %lu, but encoded"
                            "sequence contains only %lu sequences",
                            seqno, gt_encseq_num_of_sequences(rm->encseq));
          had_err = -1;
      }
      if (!had_err) {
        unsigned long desclen;
        const char *edesc;
        edesc = gt_encseq_description(rm->encseq, &desclen, seqno);
        gt_str_append_cstr_nt(desc, edesc, desclen);
      }
    } else if (rm->matchdesc) {
      const char *md5;
      /* XXX: not beautiful, but works -- this may be LOTS faster */
      had_err = gt_seq_col_grep_desc_md5(rm->seq_col, &md5, seqid, err);
      if (!had_err) {
        GtStr *md5_seqid = gt_str_new_cstr(md5);
        had_err = gt_seq_col_md5_to_description(rm->seq_col, desc, md5_seqid,
                                                err);
        gt_str_delete(md5_seqid);
      }
    } else {
      if (!had_err) {
        char *cdesc;
        cdesc = gt_seq_col_get_description(rm->seq_col, 0, 0);
        gt_assert(cdesc);
        gt_str_append_cstr(desc, cdesc);
        gt_free(cdesc);
      }
    }
  }
  return had_err;
}
コード例 #2
0
ファイル: gtdatahelp.c プロジェクト: jamescasbon/genometools
int gt_gtdata_show_help(const char *progname, GT_UNUSED void *unused,
                        GtError *err)
{
  GtSplitter *splitter;
  GtStr *doc_file;
  lua_State *L = NULL;
  char *prog, *bn;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(progname);

  prog = gt_cstr_dup(progname); /* create modifiable copy for splitter */
  splitter = gt_splitter_new();
  gt_splitter_split(splitter, prog, strlen(prog), ' ');
  doc_file = gt_get_gtdata_path(gt_splitter_get_token(splitter, 0), err);
  if (!doc_file)
    had_err = -1;

  if (!had_err) {
    gt_str_append_cstr(doc_file, "/doc/");
    /* create Lua & push gtdata_doc_dir to Lua */
    L = luaL_newstate();
    if (!L) {
      gt_error_set(err, "out of memory (cannot create new Lua state)");
      had_err = -1;
    }
  }

  if (!had_err) {
    luaL_openlibs(L);
    lua_pushstring(L, gt_str_get(doc_file));
    lua_setglobal(L, "gtdata_doc_dir");
    /* finish creating doc_file */
    if (gt_splitter_size(splitter) == 1) {
      /* special case for `gt` */
      bn = gt_basename(progname);
      gt_str_append_cstr(doc_file, bn);
      gt_free(bn);
    }
    else {
      /* general case for the tools */
      gt_str_append_cstr(doc_file,
                      gt_splitter_get_token(splitter,
                                         gt_splitter_size(splitter) - 1));
    }
    gt_str_append_cstr(doc_file, ".lua");
    /* execute doc_file */
    if (luaL_loadfile(L, gt_str_get(doc_file)) || lua_pcall(L, 0, 0, 0)) {
      gt_error_set(err, "cannot run doc file: %s", lua_tostring(L, -1));
      had_err = -1;
    }
  }

  /* free */
  if (L) lua_close(L);
  gt_str_delete(doc_file);
  gt_splitter_delete(splitter);
  gt_free(prog);

  return had_err;
}
コード例 #3
0
ファイル: gt_speck.c プロジェクト: teythoon/genometools
static int gt_speck_runner(int argc, const char **argv, int parsed_args,
                               void *tool_arguments, GtError *err)
{
  GtNodeStream *gff3_in_stream = NULL, *checker_stream = NULL,
               *a_in_stream = NULL, *a_out_stream = NULL,
               *feature_stream = NULL, *sort_stream = NULL,
               *last_stream = NULL;
  GtNodeVisitor *spec_visitor = NULL;
  GtSpecResults *res = NULL;
  GtFeatureIndex *fi = NULL;
  GtTypeChecker *type_checker = NULL;
  GtTimer *t = NULL;
  GtRegionMapping *rm = NULL;
  GtArray *arr = gt_array_new(sizeof (GtFeatureNode*));
  GtStr *prog, *speclib;
  SpeccheckArguments *arguments = tool_arguments;

  int had_err = 0;
  gt_error_check(err);

  res = gt_spec_results_new();
  gt_assert(res);

  if (gt_file_exists(gt_str_get(arguments->format))) {
    speclib = gt_str_ref(arguments->format);
  } else {
    prog = gt_str_new();
    gt_str_append_cstr_nt(prog, gt_error_get_progname(err),
                    gt_cstr_length_up_to_char(gt_error_get_progname(err), ' '));
    speclib = gt_get_gtdata_path(gt_str_get(prog), NULL);
    gt_str_delete(prog);
    gt_str_append_cstr(speclib, "/spec/output_drivers/");
    gt_str_append_str(speclib, arguments->format);

    if (!gt_file_exists(gt_str_get(speclib))) {
      gt_error_set(err, "output driver file \"%s\" does not exist",
                   gt_str_get(speclib));
      had_err = -1;
    }
  }

  if (!had_err) {
    spec_visitor = gt_spec_visitor_new(gt_str_get(arguments->specfile), res,
                                       err);
    if (!spec_visitor) {
      gt_spec_results_delete(res);
      return -1;
    }
  }

  t = gt_timer_new();
  gt_assert(t);

  /* add region mapping if given */
  if (!had_err && gt_seqid2file_option_used(arguments->s2fi)) {
    rm = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
    if (!rm)
      had_err = -1;
    if (!had_err)
      gt_spec_visitor_add_region_mapping((GtSpecVisitor*) spec_visitor, rm);
  }

  /* set type checker if necessary */
  if (!had_err && gt_typecheck_info_option_used(arguments->tci)) {
    type_checker = gt_typecheck_info_create_type_checker(arguments->tci, err);
    if (!type_checker)
      had_err = -1;
    if (!had_err)
      gt_spec_visitor_add_type_checker((GtSpecVisitor*) spec_visitor,
                                       type_checker);
  }

  if (!had_err) {
    /* set runtime error behaviour */
    if (arguments->fail_hard)
      gt_spec_visitor_fail_on_runtime_error((GtSpecVisitor*) spec_visitor);
    else
      gt_spec_visitor_report_runtime_errors((GtSpecVisitor*) spec_visitor);

    /* redirect warnings */
    gt_warning_set_handler(gt_speck_record_warning, res);

    last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted(
                                                            argc - parsed_args,
                                                            argv + parsed_args);
    gt_assert(gff3_in_stream);
    gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream);

    /* insert sort stream if requested */
    if (arguments->sort) {
      last_stream = sort_stream = gt_sort_stream_new(last_stream);
    }

    /* if -provideindex is given, collect input features and index them first */
    if (arguments->provideindex) {
      fi = gt_feature_index_memory_new();
      gt_assert(fi);

      last_stream = feature_stream = gt_feature_stream_new(last_stream, fi);
      gt_assert(feature_stream);

      last_stream = a_out_stream = gt_array_out_stream_all_new(last_stream, arr,
                                                               err);
      if (!a_out_stream)
        had_err = -1;

      gt_timer_start(t);

      if (!had_err)
        had_err = gt_node_stream_pull(last_stream, err);

      if (!had_err) {
        gt_spec_visitor_add_feature_index((GtSpecVisitor*) spec_visitor,
                                          gt_feature_index_ref(fi));
        last_stream = a_in_stream = gt_array_in_stream_new(arr, NULL, err);
        if (!a_in_stream)
          had_err = -1;
      }
    } else {
      gt_timer_start(t);
    }

    if (!had_err) {
      checker_stream = gt_visitor_stream_new(last_stream, spec_visitor);
      gt_assert(checker_stream);
    }

    /* perform checking  */
    if (!had_err)
      had_err = gt_node_stream_pull(checker_stream, err);

    gt_timer_stop(t);

    /* reset warnings output */
    gt_warning_set_handler(gt_warning_default_handler, NULL);

    /* output results */
    if (!had_err) {
      GtStr *runtime = gt_str_new();
      gt_timer_get_formatted(t, GT_WD ".%06ld", runtime);
      had_err = gt_spec_results_render_template(res, gt_str_get(speclib),
                                                arguments->outfp,
                                                gt_str_get(arguments->specfile),
                                                arguments->verbose,
                                                arguments->colored,
                                                gt_str_get(runtime), err);
      gt_str_delete(runtime);
    }
  }

  /* free */
  gt_node_stream_delete(gff3_in_stream);
  gt_node_stream_delete(a_in_stream);
  gt_node_stream_delete(a_out_stream);
  gt_node_stream_delete(checker_stream);
  gt_node_stream_delete(feature_stream);
  gt_node_stream_delete(sort_stream);
  gt_spec_results_delete(res);
  gt_feature_index_delete(fi);
  gt_type_checker_delete(type_checker);
  gt_timer_delete(t);
  gt_array_delete(arr);
  gt_str_delete(speclib);

  return had_err;
}
コード例 #4
0
GtNodeVisitor* gt_ltrdigest_pdom_visitor_new(GtPdomModelSet *model,
                                             double eval_cutoff,
                                             unsigned int chain_max_gap_length,
                                             GtPdomCutoff cutoff,
                                             GtRegionMapping *rmap,
                                             GtError *err)
{
  GtNodeVisitor *nv;
  GtLTRdigestPdomVisitor *lv;
  GtStr *cmd;
  int had_err = 0, i, rval;
  gt_assert(model && rmap);

  rval = system("hmmscan -h > /dev/null");
  if (rval == -1) {
    gt_error_set(err, "error executing system(hmmscan)");
    return NULL;
  }
  if (WEXITSTATUS(rval) != 0) {
    gt_error_set(err, "cannot find the hmmscan executable in PATH");
    return NULL;
  }

  nv = gt_node_visitor_create(gt_ltrdigest_pdom_visitor_class());
  lv = gt_ltrdigest_pdom_visitor_cast(nv);
  lv->eval_cutoff = eval_cutoff;
  lv->cutoff = cutoff;
  lv->chain_max_gap_length = chain_max_gap_length;
  lv->rmap = rmap;
  lv->output_all_chains = false;
  lv->tag = gt_str_new_cstr(GT_LTRDIGEST_TAG);

  for (i = 0; i < 3; i++) {
    lv->fwd[i] = gt_str_new();
    lv->rev[i] = gt_str_new();
  }

  if (!had_err) {
    cmd = gt_str_new_cstr("hmmscan --cpu ");
    gt_str_append_uint(cmd, gt_jobs);
    gt_str_append_cstr(cmd, " ");
    switch (cutoff) {
      case GT_PHMM_CUTOFF_GA:
        gt_str_append_cstr(cmd, "--cut_ga");
        break;
      case GT_PHMM_CUTOFF_TC:
        gt_str_append_cstr(cmd, "--cut_tc");
        break;
      case GT_PHMM_CUTOFF_NONE:
        gt_str_append_cstr(cmd, "--domE ");
        gt_str_append_double(cmd, eval_cutoff, 50);
        break;
    }
    gt_str_append_cstr(cmd, " ");
    gt_str_append_cstr(cmd, gt_pdom_model_set_get_filename(model));
    gt_str_append_cstr(cmd, " -");
    lv->cmdline = cmd;
    lv->args = gt_cstr_split(gt_str_get(lv->cmdline), ' ');
    gt_log_log("HMMER cmdline: %s", gt_str_get(cmd));
  }
  return nv;
}
コード例 #5
0
static int gt_ltrdigest_pdom_visitor_parse_alignments(GT_UNUSED
                                                      GtLTRdigestPdomVisitor
                                                                            *lv,
                                                     GtHMMERParseStatus *status,
                                                     char *buf,
                                                     FILE *instream,
                                                     GtError *err)
{
  int had_err = 0, cur_domain = GT_UNDEF_INT, line = GT_UNDEF_INT;
  bool first_align_line = false;
  int mod_val = 4;
  GtHMMERSingleHit *hit = NULL;
  gt_assert(lv && instream && status);
  gt_error_check(err);
  had_err = pdom_parser_get_next_line(buf, instream, err);
  gt_assert(buf != NULL);
  while (!had_err && strncmp("Internal pipeline statistics",
                             buf, (size_t) 28) &&
                     strncmp(">>", buf, (size_t) 2)) {
    if ((buf[2] == '=' && buf[3] == '=')) {
      buf[17] = '\0';
      cur_domain = atoi(buf+12);
      gt_assert(cur_domain != GT_UNDEF_INT && cur_domain > 0);
      hit = gt_hmmer_parse_status_get_hit(status,
                                          (GtUword) cur_domain - 1);
      gt_assert(hit && !hit->alignment);
      hit->alignment = gt_str_new();
      hit->aastring = gt_str_new();
      first_align_line = true;
      mod_val = 4;
    } else {
      bool run = true;
      char junkbuf[BUFSIZ];
      if (first_align_line) {
        /* some models contain consensus structure annotation -- in this case
           there is an additional line in the output which must be taken
           into account */
        line = 0;
        if (1 == sscanf(buf, "%*s %s", junkbuf)) {
          if (0 == strcmp(junkbuf, "CS") || 0 == strcmp(junkbuf, "RF")) {
            mod_val = 5;
            line = -1;
            run = false;
          }
        }
        first_align_line = false;
      }
      if (run) {
        gt_assert(hit && hit->alignment);
        gt_str_append_cstr(hit->alignment, buf);
        gt_str_append_char(hit->alignment, '\n');
        switch (line % mod_val) {
          case 1:
            gt_str_append_char(hit->alignment, '\n');
            break;
          case 2:
            {
              GT_UNUSED char *b = buf;
              b = strtok(buf, " ");
              gt_assert(strspn(b, "012+-") == (size_t) 2);
              b = strtok(NULL, " ");
              gt_assert(strlen(b) > 0);
              b = strtok(NULL, " ");
              gt_ltrdigest_pdom_visitor_add_aaseq(b, hit->aastring);
            }
            break;
        }
      }
      line++;
    }
    had_err = pdom_parser_get_next_line(buf, instream, err);
  }
  return had_err;
}
コード例 #6
0
static int gt_sketch_page_runner(GT_UNUSED int argc,
                                 const char **argv,
                                 int parsed_args,
                                 void *tool_arguments,
                                 GtError *err)
{
  SketchPageArguments *arguments = tool_arguments;
  int had_err = 0;
  GtFeatureIndex *features = NULL;
  GtRange qry_range, sequence_region_range;
  GtStyle *sty = NULL;
  GtStr *prog, *gt_style_file;
  GtDiagram *d = NULL;
  GtLayout *l = NULL;
  GtBioseq *bioseq = NULL;
  GtCanvas *canvas = NULL;
  char *seqid = NULL;
  const char *outfile = NULL;
  GtUword start, height, num_pages = 0;
  double offsetpos, usable_height;
  cairo_surface_t *surf = NULL;
  cairo_t *cr = NULL;
  bool has_seqid;
  GtTextWidthCalculator *twc;
  gt_error_check(err);

  features = gt_feature_index_memory_new();

  if (cairo_version() < CAIRO_VERSION_ENCODE(1, 8, 6))
    gt_warning("Your cairo library (version %s) is older than version 1.8.6! "
               "These versions contain a bug which may result in "
               "corrupted PDF output!", cairo_version_string());

  /* get style */
  sty = gt_style_new(err);
  if (gt_str_length(arguments->stylefile) == 0)
  {
    prog = gt_str_new();
    gt_str_append_cstr_nt(prog, argv[0],
                          gt_cstr_length_up_to_char(argv[0], ' '));
    gt_style_file = gt_get_gtdata_path(gt_str_get(prog), err);
    gt_str_delete(prog);
    gt_str_append_cstr(gt_style_file, "/sketch/default.style");
  }
  else
  {
    gt_style_file = gt_str_ref(arguments->stylefile);
  }
  had_err = gt_style_load_file(sty, gt_str_get(gt_style_file), err);
  if (!had_err) {
    had_err = gt_feature_index_has_seqid(features, &has_seqid,
                                         gt_str_get(arguments->seqid), err);
  }

  outfile = argv[parsed_args];
  if (!had_err)
  {
    /* get features */
    had_err = gt_feature_index_add_gff3file(features, argv[parsed_args+1], err);
     if (!had_err && gt_str_length(arguments->seqid) == 0) {
      seqid = gt_feature_index_get_first_seqid(features, err);
      if (seqid == NULL)
      {
        gt_error_set(err, "GFF input file must contain a sequence region!");
        had_err = -1;
      }
    }
    else if (!had_err && !has_seqid)
    {
      gt_error_set(err, "sequence region '%s' does not exist in GFF input file",
                   gt_str_get(arguments->seqid));
      had_err = -1;
    }
    else if (!had_err)
      seqid = gt_str_get(arguments->seqid);
  }

  /* set text */
  if (gt_str_length(arguments->text) == 0)
  {
    gt_str_delete(arguments->text);
    arguments->text = gt_str_new_cstr(argv[parsed_args+1]);
  }

  if (!had_err)
  {
    /* set display range */
    had_err = gt_feature_index_get_range_for_seqid(features,
                                                   &sequence_region_range,
                                                   seqid, err);
  }
  if (!had_err)
  {
    qry_range.start = (arguments->range.start == GT_UNDEF_UWORD ?
                         sequence_region_range.start :
                         arguments->range.start);
    qry_range.end   = (arguments->range.end == GT_UNDEF_UWORD ?
                         sequence_region_range.end :
                         arguments->range.end);

    /* set output format */
    if (strcmp(gt_str_get(arguments->format), "pdf") == 0)
    {
      surf = cairo_pdf_surface_create(outfile,
                                      mm_to_pt(arguments->pwidth),
                                      mm_to_pt(arguments->pheight));
    }
    else if (strcmp(gt_str_get(arguments->format), "ps") == 0)
    {
      surf =  cairo_ps_surface_create(outfile,
                                      mm_to_pt(arguments->pwidth),
                                      mm_to_pt(arguments->pheight));
    }
    gt_log_log("created page with %.2f:%.2f dimensions\n",
                                                  mm_to_pt(arguments->pwidth),
                                                  mm_to_pt(arguments->pheight));

    offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER;
    usable_height = mm_to_pt(arguments->pheight)
                              - arguments->theight
                              - arguments->theight
                              - 4*TEXT_SPACER;

    if (gt_str_length(arguments->seqfile) > 0) {
      bioseq = gt_bioseq_new(gt_str_get(arguments->seqfile), err);
    }

    cr = cairo_create(surf);
    cairo_set_font_size(cr, 8);
    twc = gt_text_width_calculator_cairo_new(cr, sty, err);
    for (start = qry_range.start; start <= qry_range.end;
         start += arguments->width)
    {
      GtRange single_range;
      GtCustomTrack *ct = NULL;
      const char *seq;
      single_range.start = start;
      single_range.end = start + arguments->width;

      if (had_err)
        break;

      d = gt_diagram_new(features, seqid, &single_range, sty, err);
      if (!d) {
        had_err = -1;
        break;
      }
      if (bioseq) {
        seq = gt_bioseq_get_sequence(bioseq, 0);
        ct = gt_custom_track_gc_content_new(seq,
                                      gt_bioseq_get_sequence_length(bioseq, 0),
                                      800, 70, 0.4, true);
        gt_diagram_add_custom_track(d, ct);
      }

      l = gt_layout_new_with_twc(d, mm_to_pt(arguments->width), sty, twc, err);
      had_err = gt_layout_get_height(l, &height, err);
      if (!had_err) {
        if (gt_double_smaller_double(usable_height - 10 - 2*TEXT_SPACER
              - arguments->theight, offsetpos + height))
        {
            draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1],
                        seqid, num_pages, mm_to_pt(arguments->pwidth),
                        mm_to_pt(arguments->pheight),
                        arguments->theight);
          cairo_show_page(cr);
          offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER;
          num_pages++;
        }
        canvas = gt_canvas_cairo_context_new(sty,
                                             cr,
                                             offsetpos,
                                             mm_to_pt(arguments->pwidth),
                                             height,
                                             NULL,
                                             err);
        if (!canvas)
          had_err = -1;
        offsetpos += height;
        if (!had_err)
          had_err = gt_layout_sketch(l, canvas, err);
      }
      gt_canvas_delete(canvas);
      gt_layout_delete(l);
      gt_diagram_delete(d);
      if (ct)
        gt_custom_track_delete(ct);
    }
    draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1], seqid,
                num_pages, mm_to_pt(arguments->pwidth),
                mm_to_pt(arguments->pheight),
                arguments->theight);
    cairo_show_page(cr);
    num_pages++;
    gt_log_log("finished, should be "GT_WU" pages\n", num_pages);
    gt_text_width_calculator_delete(twc);
    cairo_destroy(cr);
    cairo_surface_flush(surf);
    cairo_surface_finish(surf);
    cairo_surface_destroy(surf);
    cairo_debug_reset_static_data();
    if (bioseq)
      gt_bioseq_delete(bioseq);
    gt_style_delete(sty);
    gt_free(seqid);
    gt_str_delete(gt_style_file);
    gt_feature_index_delete(features);
  }
  return had_err;
}
コード例 #7
0
void gt_blast_process_call_set_opt(GtBlastProcessCall *call,
                                   const char *opt)
{
  gt_str_append_cstr(call->str, opt);
}
コード例 #8
0
ファイル: gt_splitfasta.c プロジェクト: 9beckert/TIR
static int split_fasta_file(const char *filename, unsigned long max_filesize,
                            bool force, GtError *err)
{
  GtFile *srcfp = NULL, *destfp = NULL;
  GtStr *destfilename = NULL;
  unsigned long filenum = 0, bytecount = 0, separator_pos;
  int read_bytes, had_err = 0;
  char buf[BUFSIZ];

  gt_error_check(err);
  gt_assert(filename && max_filesize);

  /* open source file */
  srcfp = gt_file_xopen(filename, "r");
  gt_assert(srcfp);

  /* read start characters */
  if ((read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) == 0) {
    gt_error_set(err, "file \"%s\" is empty", filename);
    had_err = -1;
  }
  bytecount += read_bytes;

  /* make sure the file is in fasta format */
  if (!had_err && buf[0] != '>') {
    gt_error_set(err, "file is not in FASTA format");
    had_err = -1;
  }

  if (!had_err) {
    /* open destination file */
    destfilename = gt_str_new();
    gt_str_append_cstr_nt(destfilename, filename,
                          gt_file_basename_length(filename));
    gt_str_append_char(destfilename, '.');
    gt_str_append_ulong(destfilename, ++filenum);
    gt_str_append_cstr(destfilename,
                       gt_file_mode_suffix(gt_file_mode(srcfp)));
    if (!(destfp = gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w",
                                                  force, err))) {
      had_err = -1;
    }
    if (!had_err)
      gt_file_xwrite(destfp, buf, read_bytes);

    while (!had_err &&
           (read_bytes = gt_file_xread(srcfp, buf, BUFSIZ)) != 0) {
      if (bytecount + read_bytes > max_filesize) {
        int offset = bytecount < max_filesize ? max_filesize - bytecount : 0;
        if ((separator_pos = buf_contains_separator(buf, offset, read_bytes))) {
          separator_pos--;
          gt_assert(separator_pos < read_bytes);
          if (separator_pos)
            gt_file_xwrite(destfp, buf, separator_pos);
          /* close current file */
          gt_file_delete(destfp);
          /* open new file */
          gt_str_reset(destfilename);
          gt_str_append_cstr_nt(destfilename, filename,
                                gt_file_basename_length(filename));
          gt_str_append_char(destfilename, '.');
          gt_str_append_ulong(destfilename, ++filenum);
          gt_str_append_cstr(destfilename,
                             gt_file_mode_suffix(gt_file_mode(srcfp)));
          if (!(destfp =
                  gt_outputfile_xopen_forcecheck(gt_str_get(destfilename), "w",
                                                 force, err))) {
            had_err = -1;
            break;
          }
          bytecount = read_bytes - separator_pos; /* reset */
          gt_assert(buf[separator_pos] == '>');
          gt_file_xwrite(destfp, buf + separator_pos,
                         read_bytes - separator_pos);
          continue;
        }
      }
      bytecount += read_bytes;
      gt_file_xwrite(destfp, buf, read_bytes);
    }
  }

  /* free */
  gt_str_delete(destfilename);

  /* close current file */
  gt_file_delete(destfp);

  /* close source file */
  gt_file_delete(srcfp);

  return had_err;
}
コード例 #9
0
GtPdomModelSet* gt_pdom_model_set_new(GtStrArray *hmmfiles, GtError *err)
{
  GtStr *concat_dbnames, *cmdline, *indexfilename = NULL;
  GtUword i;
  char *md5_hash, ch;
  const char *tmpdir;
  int had_err = 0, rval;
  FILE *dest;
  GtPdomModelSet *pdom_model_set;
  gt_assert(hmmfiles);
  gt_error_check(err);

  rval = system("hmmpress -h > /dev/null");
  if (rval == -1) {
    gt_error_set(err, "error executing system(hmmpress)");
    return NULL;
  }
#ifndef _WIN32
  if (WEXITSTATUS(rval) != 0) {
    gt_error_set(err, "cannot find the hmmpress executable in PATH");
    return NULL;
  }
#else
  /* XXX */
  gt_error_set(err, "hmmpress for Windows not implemented");
  return NULL;
#endif

  pdom_model_set = gt_calloc((size_t) 1, sizeof (GtPdomModelSet));
  concat_dbnames = gt_str_new();
  for (i = 0; !had_err && i < gt_str_array_size(hmmfiles); i++) {
    const char *filename = gt_str_array_get(hmmfiles, i);
    if (!gt_file_exists(filename)) {
      gt_error_set(err, "invalid HMM file: %s", filename);
      gt_str_delete(concat_dbnames);
      gt_free(pdom_model_set);
      return NULL;
    } else {
      gt_str_append_cstr(concat_dbnames, filename);
    }
  }
  if (!had_err) {
    pdom_model_set->filename = gt_str_new();
    if (!(tmpdir = getenv("TMPDIR")))
      tmpdir = "/tmp";
    gt_str_append_cstr(pdom_model_set->filename, tmpdir);
    gt_str_append_char(pdom_model_set->filename, GT_PATH_SEPARATOR);
    md5_hash = gt_md5_fingerprint(gt_str_get(concat_dbnames),
                                  gt_str_length(concat_dbnames));
    gt_str_append_cstr(pdom_model_set->filename, md5_hash);
    gt_free(md5_hash);
    gt_str_delete(concat_dbnames);
    indexfilename = gt_str_new_cstr(gt_str_get(pdom_model_set->filename));
    gt_str_append_cstr(indexfilename, GT_HMM_INDEX_SUFFIX);
  }

  if (!gt_file_exists(gt_str_get(indexfilename))) {
    dest = fopen(gt_str_get(pdom_model_set->filename), "w+");
    if (!dest) {
      gt_error_set(err, "could not create file %s",
                 gt_str_get(pdom_model_set->filename));
      had_err = -1;
    }
    if (!had_err) {
      for (i = 0; !had_err && i < gt_str_array_size(hmmfiles); i++) {
        FILE *source;
        const char *filename = gt_str_array_get(hmmfiles, i);
        source = fopen(filename, "r");
        if (!source) {
          gt_error_set(err, "could not open HMM file %s", filename);
          had_err = -1;
        }
        if (!had_err) {
          while (( ch = fgetc(source)) != EOF)
            (void) fputc(ch, dest);
          (void) fclose(source);
        }
      }
      (void) fclose(dest);
    }
    /* XXX: read hmmer path from env */
    cmdline = gt_str_new_cstr("hmmpress -f ");
    gt_str_append_str(cmdline, pdom_model_set->filename);
    gt_str_append_cstr(cmdline, "> /dev/null");   /* XXX: portability? */

    rval = system(gt_str_get(cmdline));
    gt_str_delete(cmdline);
    if (rval == -1) {
      gt_error_set(err, "error executing system(hmmpress)");
      return NULL;
    }
#ifndef _WIN32
    if (WEXITSTATUS(rval) != 0) {
      gt_error_set(err, "an error occurred during HMM preprocessing");
      had_err = -1;
    }
#else
    gt_error_set(err, "WEXITSTATUS not implemented on Windows");
    had_err = -1;
#endif
  }

  if (had_err) {
    gt_pdom_model_set_delete(pdom_model_set);
    pdom_model_set = NULL;
  }
  gt_str_delete(indexfilename);
  return pdom_model_set;
}
コード例 #10
0
ファイル: fmi-map.c プロジェクト: AlexWoroschilow/uni_hamburg
int gt_mapfmindex (Fmindex *fmindex,const char *indexname,
                GtLogger *logger,GtError *err)
{
  FILE *fpin = NULL;
  bool haserr = false, storeindexpos = true;
  GtSpecialcharinfo specialcharinfo;

  gt_error_check(err);
  fmindex->mappedptr = NULL;
  fmindex->bwtformatching = NULL;
  fmindex->alphabet = NULL;
  fpin = gt_fa_fopen_with_suffix(indexname,FMASCIIFILESUFFIX,"rb",err);
  if (fpin == NULL)
  {
    haserr = true;
  }
  if (!haserr)
  {
    if (scanfmafileviafileptr(fmindex,
                              &specialcharinfo,
                              &storeindexpos,
                              indexname,
                              fpin,
                              logger,
                              err) != 0)
    {
      haserr = true;
    }
  }
  gt_fa_xfclose(fpin);
  if (!haserr)
  {
    fmindex->bwtformatching = mapbwtencoding(indexname,logger,err);
    if (fmindex->bwtformatching == NULL)
    {
      haserr = true;
    }
  }
  if (!haserr)
  {
    fmindex->specpos.nextfreeGtPairBwtidx
      = (GtUword) gt_determinenumberofspecialstostore(&specialcharinfo);
    fmindex->specpos.spaceGtPairBwtidx = NULL;
    fmindex->specpos.allocatedGtPairBwtidx = 0;
    fmindex->alphabet = gt_alphabet_ref(
                                  gt_encseq_alphabet(fmindex->bwtformatching));
    if (fmindex->alphabet == NULL)
    {
      haserr = true;
    }
  }
  if (!haserr)
  {
    GtStr *tmpfilename;

    gt_computefmkeyvalues (fmindex,
                           &specialcharinfo,
                           fmindex->bwtlength,
                           fmindex->log2bsize,
                           fmindex->log2markdist,
                           gt_alphabet_num_of_chars(fmindex->alphabet),
                           fmindex->suffixlength,
                           storeindexpos);
    tmpfilename = gt_str_new_cstr(indexname);
    gt_str_append_cstr(tmpfilename,FMDATAFILESUFFIX);
    if (gt_fillfmmapspecstartptr(fmindex,storeindexpos,tmpfilename,err) != 0)
    {
      haserr = true;
    }
    gt_str_delete(tmpfilename);
  }
  if (haserr)
  {
    gt_freefmindex(fmindex);
  }
  return haserr ? -1 : 0;
}
コード例 #11
0
ファイル: gtf_parser.c プロジェクト: AnnSeidel/genometools
int gt_gtf_parser_parse(GtGTFParser *parser, GtQueue *genome_nodes,
                        GtStr *filenamestr, GtFile *fpin, bool be_tolerant,
                        GtError *err)
{
  GtStr *seqid_str, *source_str, *line_buffer;
  char *line;
  size_t line_length;
  GtUword i, line_number = 0;
  GtGenomeNode *gn;
  GtRange range;
  GtPhase phase_value;
  GtStrand gt_strand_value;
  GtSplitter *splitter, *attribute_splitter;
  float score_value;
  char *seqname,
       *source,
       *feature,
       *start,
       *end,
       *score,
       *strand,
       *frame,
       *attributes,
       *token,
       *gene_id,
       *gene_name = NULL,
       *transcript_id,
       *transcript_name = NULL,
       **tokens;
  GtHashmap *transcript_id_hash; /* map from transcript id to array of genome
                                    nodes */
  GtArray *gt_genome_node_array;
  ConstructionInfo cinfo;
  GTF_feature_type gtf_feature_type;
  GT_UNUSED bool gff_type_is_valid = false;
  const char *type = NULL;
  const char *filename;
  bool score_is_defined;
  int had_err = 0;

  gt_assert(parser && genome_nodes);
  gt_error_check(err);

  filename = gt_str_get(filenamestr);

  /* alloc */
  line_buffer = gt_str_new();
  splitter = gt_splitter_new(),
  attribute_splitter = gt_splitter_new();

#define HANDLE_ERROR                                                   \
        if (had_err) {                                                 \
          if (be_tolerant) {                                           \
            fprintf(stderr, "skipping line: %s\n", gt_error_get(err)); \
            gt_error_unset(err);                                       \
            gt_str_reset(line_buffer);                                 \
            had_err = 0;                                               \
            continue;                                                  \
          }                                                            \
          else {                                                       \
            had_err = -1;                                              \
            break;                                                     \
          }                                                            \
        }

  while (gt_str_read_next_line_generic(line_buffer, fpin) != EOF) {
    line = gt_str_get(line_buffer);
    line_length = gt_str_length(line_buffer);
    line_number++;
    gene_name = gene_id = transcript_id = transcript_name = NULL;
    had_err = 0;

    if (line_length == 0) {
      gt_warning("skipping blank line " GT_WU " in file \"%s\"", line_number,
                 filename);
    }
    else if (line[0] == '#') {
      /* storing comment */
      if (line_length >= 2 && line[1] == '#')
        gn = gt_comment_node_new(line+2); /* store '##' line as '#' line */
      else
        gn = gt_comment_node_new(line+1);
      gt_genome_node_set_origin(gn, filenamestr, line_number);
      gt_queue_add(genome_nodes, gn);
    }
    else {
      bool stop_codon = false;
      char *tokendup, *attrkey;
      GtStrArray *attrkeys, *attrvals;

      /* process tab delimited GTF line */
      gt_splitter_reset(splitter);
      gt_splitter_split(splitter, line, line_length, '\t');
      if (gt_splitter_size(splitter) != 9UL) {
        gt_error_set(err, "line " GT_WU " in file \"%s\" contains " GT_WU
                     " tab (\\t) " "separated fields instead of 9", line_number,
                     filename,
                  gt_splitter_size(splitter));
        had_err = -1;
        break;
      }
      tokens = gt_splitter_get_tokens(splitter);
      seqname    = tokens[0];
      source     = tokens[1];
      feature    = tokens[2];
      start      = tokens[3];
      end        = tokens[4];
      score      = tokens[5];
      strand     = tokens[6];
      frame      = tokens[7];
      attributes = tokens[8];

      /* parse feature */
      if (GTF_feature_type_get(&gtf_feature_type, feature) == -1) {
        /* we skip unknown features */
        fprintf(stderr, "skipping line " GT_WU " in file \"%s\": unknown "
                "feature: \"%s\"\n", line_number, filename, feature);
        gt_str_reset(line_buffer);
        continue;
      }

      /* translate into GFF3 feature type */
      switch (gtf_feature_type) {
        case GTF_stop_codon:
          stop_codon = true;
        case GTF_CDS:
          gff_type_is_valid = gt_type_checker_is_valid(parser->type_checker,
                                                       gt_ft_CDS);
          type = gt_ft_CDS;
          break;
        case GTF_exon:
          gff_type_is_valid = gt_type_checker_is_valid(parser->type_checker,
                                                       gt_ft_exon);
          type = gt_ft_exon;
          break;
        case GTF_start_codon:
          /* we can skip the start codons, they are part of the CDS anyway */
          gt_str_reset(line_buffer);
          continue;
      }
      gt_assert(gff_type_is_valid);

      /* parse the range */
      had_err = gt_parse_range(&range, start, end, line_number, filename, err);
      HANDLE_ERROR;

      /* process seqname (we have to do it here because we need the range) */
      gt_region_node_builder_add_region(parser->region_node_builder, seqname,
                                        range);

      /* parse the score */
      had_err = gt_parse_score(&score_is_defined, &score_value, score,
                               line_number, filename, err);
      HANDLE_ERROR;

      /* parse the strand */
      had_err = gt_parse_strand(&gt_strand_value, strand, line_number, filename,
                               err);
      HANDLE_ERROR;

      /* parse the frame */
      had_err = gt_parse_phase(&phase_value, frame, line_number, filename, err);
      HANDLE_ERROR;

      /* parse the attributes */
      attrkeys = gt_str_array_new();
      attrvals = gt_str_array_new();
      gt_splitter_reset(attribute_splitter);
      gene_id = NULL;
      transcript_id = NULL;
      gt_splitter_split(attribute_splitter, attributes, strlen(attributes),
                        ';');
      for (i = 0; i < gt_splitter_size(attribute_splitter); i++) {
        token = gt_splitter_get_token(attribute_splitter, i);
        /* skip leading blanks */
        while (*token == ' ')
          token++;

        tokendup = gt_cstr_dup(token);
        attrkey = strtok(tokendup, " ");
        if (attrkey) {
          char *attrval = strtok(NULL, " ");
          if (attrval == NULL || strcmp(attrval, "") == 0 ||
              strcmp(attrval, "\"\"") == 0)
          {
            gt_error_set(err, "missing value to attribute \"%s\" on line "
                         GT_WU " in file \"%s\"", attrkey,line_number,filename);
            had_err = -1;
          }
          HANDLE_ERROR;

          if (*attrval == '"')
            attrval++;
          if (attrval[strlen(attrval)-1] == '"')
            attrval[strlen(attrval)-1] = '\0';
          gt_assert(attrkey && strlen(attrkey) > 0);
          gt_assert(attrval && strlen(attrval) > 0);
          gt_str_array_add_cstr(attrkeys, attrkey);
          gt_str_array_add_cstr(attrvals, attrval);
        }
        gt_free(tokendup);

        /* look for the two mandatory attributes */
        if (strncmp(token, GENE_ID_ATTRIBUTE, strlen(GENE_ID_ATTRIBUTE)) == 0) {
          if (strlen(token) + 2 < strlen(GENE_ID_ATTRIBUTE)) {
            gt_error_set(err, "missing value to attribute \"%s\" on line "
                         GT_WU "in file \"%s\"", GENE_ID_ATTRIBUTE, line_number,
                         filename);
            had_err = -1;
          }
          HANDLE_ERROR;
          gene_id = token + strlen(GENE_ID_ATTRIBUTE) + 1;
          if (*gene_id == '"')
            gene_id++;
          if (gene_id[strlen(gene_id)-1] == '"')
            gene_id[strlen(gene_id)-1] = '\0';
        }
        else if (strncmp(token, TRANSCRIPT_ID_ATTRIBUTE,
                         strlen(TRANSCRIPT_ID_ATTRIBUTE)) == 0) {
          if (strlen(token) + 2 < strlen(TRANSCRIPT_ID_ATTRIBUTE)) {
            gt_error_set(err, "missing value to attribute \"%s\" on line "
                         GT_WU "in file \"%s\"", TRANSCRIPT_ID_ATTRIBUTE,
                         line_number, filename);
            had_err = -1;
          }
          HANDLE_ERROR;
          transcript_id = token + strlen(TRANSCRIPT_ID_ATTRIBUTE) + 1;
          if (*transcript_id == '"')
            transcript_id++;
          if (transcript_id[strlen(transcript_id)-1] == '"')
            transcript_id[strlen(transcript_id)-1] = '\0';
        }
        else if (strncmp(token, GENE_NAME_ATTRIBUTE,
                         strlen(GENE_NAME_ATTRIBUTE)) == 0) {
          if (strlen(token) + 2 < strlen(GENE_NAME_ATTRIBUTE)) {
            gt_error_set(err, "missing value to attribute \"%s\" on line "
                         GT_WU "in file \"%s\"", GENE_NAME_ATTRIBUTE,
                         line_number, filename);
            had_err = -1;
          }
          HANDLE_ERROR;
          gene_name = token + strlen(GENE_NAME_ATTRIBUTE) + 1;
          /* for output we want to strip quotes */
          if (*gene_name == '"')
            gene_name++;
          if (gene_name[strlen(gene_name)-1] == '"')
            gene_name[strlen(gene_name)-1] = '\0';
        }
        else if (strncmp(token, TRANSCRIPT_NAME_ATTRIBUTE,
                         strlen(TRANSCRIPT_NAME_ATTRIBUTE)) == 0) {
          if (strlen(token) + 2 < strlen(TRANSCRIPT_NAME_ATTRIBUTE)) {
            gt_error_set(err, "missing value to attribute \"%s\" on line "
                         GT_WU "in file \"%s\"", TRANSCRIPT_NAME_ATTRIBUTE,
                         line_number, filename);
            had_err = -1;
          }
          HANDLE_ERROR;
          transcript_name = token + strlen(TRANSCRIPT_NAME_ATTRIBUTE) + 1;
          /* for output we want to strip quotes */
          if (*transcript_name == '"')
            transcript_name++;
          if (transcript_name[strlen(transcript_name)-1] == '"')
            transcript_name[strlen(transcript_name)-1] = '\0';
        }
      }

      /* check for the mandatory attributes */
      if (!gene_id) {
        gt_error_set(err, "missing attribute \"%s\" on line " GT_WU
                     " in file \"%s\"", GENE_ID_ATTRIBUTE, line_number,
                     filename);
        had_err = -1;
      }
      HANDLE_ERROR;
      if (!transcript_id) {
        gt_error_set(err, "missing attribute \"%s\" on line " GT_WU
                     " in file \"%s\"", TRANSCRIPT_ID_ATTRIBUTE, line_number,
                     filename);
        had_err = -1;
      }
      HANDLE_ERROR;

      /* process the mandatory attributes */
      if (!(transcript_id_hash = gt_hashmap_get(parser->gene_id_hash,
                                             gene_id))) {
        transcript_id_hash = gt_hashmap_new(GT_HASH_STRING, gt_free_func,
                                            (GtFree) gt_array_delete);
        gt_hashmap_add(parser->gene_id_hash, gt_cstr_dup(gene_id),
                    transcript_id_hash);
      }
      gt_assert(transcript_id_hash);

      if (!(gt_genome_node_array = gt_hashmap_get(transcript_id_hash,
                                            transcript_id))) {
        gt_genome_node_array = gt_array_new(sizeof (GtGenomeNode*));
        gt_hashmap_add(transcript_id_hash, gt_cstr_dup(transcript_id),
                    gt_genome_node_array);
      }
      gt_assert(gt_genome_node_array);

      /* save optional gene_name and transcript_name attributes */
      if (transcript_name && strlen(transcript_name) > 0
            && !gt_hashmap_get(parser->transcript_id_to_name_mapping,
                             transcript_id)) {
        gt_hashmap_add(parser->transcript_id_to_name_mapping,
                    gt_cstr_dup(transcript_id),
                    gt_cstr_dup(transcript_name));
      }
      if (gene_name && strlen(gene_name) > 0
            && !gt_hashmap_get(parser->gene_id_to_name_mapping,
                                    gene_id)) {
        gt_hashmap_add(parser->gene_id_to_name_mapping,
                    gt_cstr_dup(gene_id),
                    gt_cstr_dup(gene_name));
      }

      /* get seqid */
      seqid_str = gt_hashmap_get(parser->seqid_to_str_mapping, seqname);
      if (!seqid_str) {
        seqid_str = gt_str_new_cstr(seqname);
        gt_hashmap_add(parser->seqid_to_str_mapping, gt_str_get(seqid_str),
                       seqid_str);
      }
      gt_assert(seqid_str);

      /* construct the new feature */
      gn = gt_feature_node_new(seqid_str, type, range.start, range.end,
                                 gt_strand_value);
      gt_genome_node_set_origin(gn, filenamestr, line_number);
      if (stop_codon) {
        gt_feature_node_add_attribute((GtFeatureNode*) gn,
                                      GTF_PARSER_STOP_CODON_FLAG, "true");
      }
      for (i = 0; i < gt_str_array_size(attrkeys); i++) {
        GtFeatureNode *fn = (GtFeatureNode *)gn;
        const char *key = gt_str_array_get(attrkeys, i);
        const char *val = gt_str_array_get(attrvals, i);

        /* Not a comprehensive solution to ensure correct encoding, just bare
           minimum required to get Cufflinks output parsed */
        if (strcmp(val, "=") == 0)
          val = "%26";

        if (gt_feature_node_get_attribute(fn, key) != NULL) {
          const char *oldval = gt_feature_node_get_attribute(fn, key);
          GtStr *newval = gt_str_new_cstr(oldval);
          gt_str_append_char(newval, ',');
          gt_str_append_cstr(newval, val);
          gt_feature_node_set_attribute(fn, key, gt_str_get(newval));
          gt_str_delete(newval);
        }
        else
          gt_feature_node_add_attribute(fn, key, val);
      }
      gt_str_array_delete(attrkeys);
      gt_str_array_delete(attrvals);

      /* set source */
      source_str = gt_hashmap_get(parser->source_to_str_mapping, source);
      if (!source_str) {
        source_str = gt_str_new_cstr(source);
        gt_hashmap_add(parser->source_to_str_mapping, gt_str_get(source_str),
                    source_str);
      }
      gt_assert(source_str);
      gt_feature_node_set_source((GtFeatureNode*) gn, source_str);

      if (score_is_defined)
        gt_feature_node_set_score((GtFeatureNode*) gn, score_value);
      if (phase_value != GT_PHASE_UNDEFINED)
        gt_feature_node_set_phase((GtFeatureNode*) gn, phase_value);
      gt_array_add(gt_genome_node_array, gn);
    }

    gt_str_reset(line_buffer);
  }

  /* process all region nodes */
  if (!had_err)
    gt_region_node_builder_build(parser->region_node_builder, genome_nodes);

  /* process all feature nodes */
  cinfo.genome_nodes = genome_nodes;
  cinfo.tidy = be_tolerant;
  cinfo.gene_id_to_name_mapping = parser->gene_id_to_name_mapping;
  cinfo.transcript_id_to_name_mapping = parser->transcript_id_to_name_mapping;
  if (!had_err) {
    had_err = gt_hashmap_foreach(parser->gene_id_hash, construct_genes,
                                 &cinfo, err);
  }
  gt_hashmap_foreach(parser->gene_id_hash, delete_genes, NULL, err);

  /* free */
  gt_splitter_delete(splitter);
  gt_splitter_delete(attribute_splitter);
  gt_str_delete(line_buffer);

  return had_err;
}
コード例 #12
0
ファイル: gtr.c プロジェクト: yesimon/genometools
GtR* gtr_new(GtError *err)
{
  GtR *gtr;
  char *seedstr = NULL;
  int had_err = 0;
#ifndef WITHOUT_CAIRO
  GtStr *style_file = NULL;
#endif
  gtr = gt_calloc(1, sizeof (GtR));
  if ((seedstr = getenv("GT_SEED"))) {
    if (gt_parse_uint(&gtr->seed, seedstr) != 0) {
      gt_error_set(err, "invalid seed in GT_SEED environment variable: %s",
                   seedstr);
      had_err = -1;
    }
  } else gtr->seed = 0;
  if (!had_err) {
    gtr->debugfp = gt_str_new();
    gtr->testspacepeak = gt_str_new();
    gtr->test_only = gt_str_new();
    gtr->manoutdir = gt_str_new();
    gtr->L = luaL_newstate();
    if (!gtr->L) {
      gt_error_set(err, "out of memory (cannot create new lua state)");
      had_err = -1;
    }
  }
  if (!had_err) {
    luaL_openlibs(gtr->L);    /* open the standard libraries */
    gt_lua_open_lib(gtr->L);  /* open the GenomeTools library */
    lua_pushcfunction(gtr->L, luaopen_lpeg);
    lua_pushstring(gtr->L, "lpeg");
    lua_call(gtr->L, 1, 0);   /* open LPeg library */
    lua_pushcfunction(gtr->L, luaopen_md5_core);
    lua_pushstring(gtr->L, "md5");
    lua_call(gtr->L, 1, 0);   /* open MD5 library */
    lua_pushcfunction(gtr->L, luaopen_lfs);
    lua_pushstring(gtr->L, "lfs");
    lua_call(gtr->L, 1, 0);   /* open Lua filesystem */
    lua_pushcfunction(gtr->L, luaopen_des56);
    lua_pushstring(gtr->L, "des56");
    lua_call(gtr->L, 1, 0);   /* open DES56 library */
    had_err = gt_lua_set_modules_path(gtr->L, err);
  }
#ifndef WITHOUT_CAIRO
  if (!had_err) {
    lua_settop(gtr->L, 0);
    if (!(gtr->style = gt_style_new_with_state(gtr->L)))
      had_err = -1;
  }
  if (!had_err) {
    if (!(style_file = gt_get_gtdata_path(gt_error_get_progname(err), err)))
      had_err = -1;
  }
  if (!had_err) {
    gt_str_append_cstr(style_file, "/sketch/default.style");
    if (gt_file_exists(gt_str_get(style_file))) {
      if (gt_style_load_file(gtr->style, gt_str_get(style_file), err))
        had_err = -1;
      else
        gt_lua_put_style_in_registry(gtr->L, gtr->style);
    }
  }
  gt_str_delete(style_file);
#endif
  if (had_err) {
    gt_free(gtr);
    return NULL;
  }
  return gtr;
}
コード例 #13
0
static int gt_condenser_search_runner(GT_UNUSED int argc,
                                      GT_UNUSED const char **argv,
                                      GT_UNUSED int parsed_args,
                                      void *tool_arguments,
                                      GtError *err)
{
  GtCondenserSearchArguments *arguments = tool_arguments;
  int i, had_err = 0;
  char *querypath = gt_str_get(arguments->querypath);
  GtStr* coarse_fname = gt_str_new_cstr("coarse_");
  char *db_basename = NULL;
  char *suffix_ptr = NULL;
  GtTimer *timer = NULL;
  GtLogger *logger = NULL;

  gt_error_check(err);
  gt_assert(arguments);

  logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr);

  db_basename = gt_basename(gt_str_get(arguments->dbpath));
  /* if first char is '.' this might be a hidden file */
  if (strlen(db_basename) > (size_t) 1 &&
      (suffix_ptr = strrchr(db_basename + 1, '.')) != NULL) {
    /* remove suffix */
    *suffix_ptr = '\0';
  }
  gt_str_append_cstr(coarse_fname, db_basename);
  gt_str_append_cstr(coarse_fname, ".fas");
  gt_free(db_basename);
  db_basename = NULL;
  suffix_ptr = NULL;

  if (arguments->blastn || arguments->blastp) {
    GtMatch              *match;
    GtMatchIterator      *mp = NULL;
    GtNREncseq           *nrencseq = NULL;
    GtStr                *fastaname = gt_str_clone(arguments->dbpath);
    HitPosition          *hits;
    double                eval,
                          raw_eval = 0.0;
    GtUword               coarse_db_len = 0;
    GtMatchIteratorStatus status;
    int                   curr_hits = 0,
                          max_hits = 100;

    hits = gt_malloc(sizeof (*hits) * (size_t) max_hits);

    gt_str_append_cstr(fastaname, ".fas");

    for (i=0; i < max_hits; i++) {
      hits[i].range = gt_malloc(sizeof (*hits[i].range) * (size_t) 1);
    }

    if (gt_showtime_enabled()) {
      timer = gt_timer_new_with_progress_description("initialization");
      gt_timer_start(timer);
    }

    /*extract sequences from compressed database*/
    if (!had_err) {
      nrencseq = gt_n_r_encseq_new_from_file(gt_str_get(arguments->dbpath),
                                             logger, err);
      if (nrencseq == NULL)
        had_err = -1;
    }
    if (!had_err) {
      if (arguments->ceval == GT_UNDEF_DOUBLE ||
          arguments->feval == GT_UNDEF_DOUBLE) {
        /* from NCBI BLAST tutorial:
           E = Kmne^{-lambdaS}
           calculates E-value for score S with natural scale parameters K for
           search space size and lambda for the scoring system
           E = mn2^-S'
           m being the subject (total) length, n the length of ONE query
           calculates E-value for bit-score S'
         */
        GtFastaReader *reader;
        GtCondenserSearchAvg avg = {0,0};
        reader = gt_fasta_reader_rec_new(arguments->querypath);
        had_err = gt_fasta_reader_run(reader, NULL, NULL,
                                      gt_condenser_search_cum_moving_avg,
                                      &avg,
                                      err);
        if (!had_err) {
          GtUword S = arguments->bitscore;
          gt_log_log(GT_WU " queries, avg query size: " GT_WU,
                     avg.count, avg.avg);
          raw_eval = 1/pow(2.0, (double) S) * avg.avg;
          gt_logger_log(logger, "Raw E-value set to %.4e", raw_eval);
          gt_assert(avg.avg != 0);
        }
        gt_fasta_reader_delete(reader);
      }
    }

    /*create BLAST database from compressed database fasta file*/
    if (!had_err) {
      if (timer != NULL)
        gt_timer_show_progress(timer, "create coarse BLAST db", stderr);
      if (arguments->blastn)
        had_err = gt_condenser_search_create_nucl_blastdb(gt_str_get(fastaname),
                                                          err);
      else
        had_err = gt_condenser_search_create_prot_blastdb(gt_str_get(fastaname),
                                                          err);
    }

    if (!had_err) {
      GtBlastProcessCall *call;

      if (timer != NULL)
        gt_timer_show_progress(timer, "coarse BLAST run", stderr);

      if (arguments->blastp)
        call = gt_blast_process_call_new_prot();
      else
        call = gt_blast_process_call_new_nucl();
      gt_blast_process_call_set_db(call, gt_str_get(fastaname));
      gt_blast_process_call_set_query(call, querypath);
      gt_blast_process_call_set_evalue(call, arguments->ceval);
      gt_blast_process_call_set_num_threads(call, arguments->blthreads);

      mp = gt_match_iterator_blast_process_new(call, err);
      if (!mp)
        had_err = -1;

      gt_blast_process_call_delete(call);

      while (!had_err &&
             (status = gt_match_iterator_next(mp, &match, err)) !=
             GT_MATCHER_STATUS_END)
      {
        if (status == GT_MATCHER_STATUS_OK) {
          GtUword hit_seq_id;
          char string[7];
          const char *dbseqid = gt_match_get_seqid2(match);
          if (sscanf(dbseqid,"%6s" GT_WU, string, &hit_seq_id) == 2) {
            gt_match_get_range_seq2(match, hits[curr_hits].range);
            hits[curr_hits].idx = hit_seq_id;
            gt_match_delete(match);
            curr_hits++;
            if (curr_hits == max_hits) {
              HitPosition *hit_extention;
              max_hits += 100;
              hits = gt_realloc(hits, sizeof (*hit_extention) * max_hits);
              for (i=max_hits - 100; i < max_hits; i++) {
                hits[i].range = gt_malloc(sizeof (*hits[i].range));
              }
            }
          } else {
            gt_error_set(err, "could not parse unique db header %s", dbseqid);
            had_err = -1;
          }
        } else if (status == GT_MATCHER_STATUS_ERROR) {
          had_err = -1;
        }
      }
      gt_match_iterator_delete(mp);
    }
    /*extract sequences*/
    if (!had_err) {
      GtNREncseqDecompressor *decomp;
      GtFile *coarse_hits;
      if (timer != NULL)
        gt_timer_show_progress(timer, "extract coarse search hits", stderr);
      decomp = gt_n_r_encseq_decompressor_new(nrencseq);
      coarse_hits = gt_file_new(gt_str_get(coarse_fname),"w", err);
      /* TODO DW do NOT extract complete uniques! these could be complete
         chromosomes!! just extract something around it? maybe +- max query
         length*/
      for (i = 0; i < curr_hits; i++) {
        gt_n_r_encseq_decompressor_add_unique_idx_to_extract(decomp,
                                                             hits[i].idx);
      }
      had_err =
        gt_n_r_encseq_decompressor_start_unique_extraction(coarse_hits,
                                                           decomp,
                                                           &coarse_db_len,
                                                           err);
      gt_assert(coarse_db_len != 0);
      gt_file_delete(coarse_hits);
      gt_n_r_encseq_decompressor_delete(decomp);
    }
    gt_n_r_encseq_delete(nrencseq);

    /* create BLAST database from decompressed database file */
    if (!had_err) {
      if (timer != NULL)
        gt_timer_show_progress(timer, "create fine BLAST db", stderr);
      if (arguments->blastn)
        had_err =
          gt_condenser_search_create_nucl_blastdb(gt_str_get(coarse_fname),
                                                  err);
      else
        had_err =
          gt_condenser_search_create_prot_blastdb(gt_str_get(coarse_fname),
                                                  err);
    }
    /* perform fine BLAST search */
    if (!had_err) {
      GtBlastProcessCall *call;

      if (timer != NULL)
        gt_timer_show_progress(timer, "fine BLAST run", stderr);

      if (arguments->feval == GT_UNDEF_DOUBLE) {
        eval = raw_eval * coarse_db_len;
      } else {
        eval = arguments->feval;
      }

      if (arguments->blastp)
        call = gt_blast_process_call_new_prot();
      else
        call = gt_blast_process_call_new_nucl();

      gt_blast_process_call_set_db(call, gt_str_get(coarse_fname));
      gt_blast_process_call_set_query(call, querypath);
      gt_blast_process_call_set_evalue(call, eval);
      gt_blast_process_call_set_num_threads(call, arguments->blthreads);

      gt_logger_log(logger, "Fine E-value set to: %.4e (len)" GT_WU, eval,
                    coarse_db_len);

      mp = gt_match_iterator_blast_process_new(call, err);
      if (!mp)
        had_err = -1;

      gt_blast_process_call_delete(call);

      if (!had_err) {
        GtUword numofhits = 0;
        while (!had_err &&
               (status = gt_match_iterator_next(mp, &match, err)) !=
               GT_MATCHER_STATUS_END) {
          if (status == GT_MATCHER_STATUS_OK) {
            GtMatchBlast *matchb = (GtMatchBlast*) match;
            char *dbseqid = gt_malloc(sizeof (*dbseqid) * 50);
            GtRange range_seq1;
            GtRange range_seq2;
            numofhits++;
            gt_match_get_range_seq1(match, &range_seq1);
            gt_match_get_range_seq2(match, &range_seq2);
            gt_file_xprintf(
                    arguments->outfp,
                    "%s\t%s\t%.2f\t" GT_WU "\t" GT_WU "\t" GT_WU "\t" GT_WU "\t"
                    GT_WU "\t%g\t%.3f\n",
                    gt_match_get_seqid1(match),
                    gt_match_get_seqid2(match),
                    gt_match_blast_get_similarity(matchb),
                    gt_match_blast_get_align_length(matchb),
                    range_seq1.start,
                    range_seq1.end,
                    range_seq2.start,
                    range_seq2.end,
                    gt_match_blast_get_evalue(matchb),
                    (double) gt_match_blast_get_bitscore(matchb));
            gt_match_delete(match);
            gt_free(dbseqid);
          } else if (status == GT_MATCHER_STATUS_ERROR) {
            had_err = -1;
          }
        }
        gt_log_log(GT_WU " hits found\n", numofhits);
      }
      gt_match_iterator_delete(mp);

    }
    if (!had_err)
      if (timer != NULL)
        gt_timer_show_progress_final(timer, stderr);
    gt_timer_delete(timer);

    /*cleanup*/
    for (i=0; i < max_hits; i++) {
      gt_free(hits[i].range);
    }
    gt_free(hits);
    gt_str_delete(fastaname);
  }
  gt_str_delete(coarse_fname);
  gt_logger_delete(logger);
  return had_err;
}
コード例 #14
0
ファイル: canvas_cairo.c プロジェクト: 9beckert/TIR
/* Renders a ruler with dynamic scale labeling and optional grid. */
int gt_canvas_cairo_draw_ruler(GtCanvas *canvas, GtRange viewrange,
                                GtError *err)
{
  double step, minorstep, vmajor, vminor, theight = TOY_TEXT_HEIGHT;
  long base_length, tick;
  GtColor rulercol, gridcol;
  GtStr *left_str, *right_str, *unit;
  char str[BUFSIZ];
  GtStyleQueryStatus rval;
  bool showgrid = true;
  gt_assert(canvas);

  if (gt_style_get_bool(canvas->pvt->sty, "format", "show_grid", &showgrid,
                        NULL, err) == GT_STYLE_QUERY_ERROR) {
    return -1;
  }
  if (gt_style_get_num(canvas->pvt->sty, "format", "ruler_font_size",
                       &theight, NULL, err) == GT_STYLE_QUERY_ERROR) {
    return -1;
  }

  /* get unit value from style, default: empty */
  unit = gt_str_new();
  if (gt_style_get_str(canvas->pvt->sty,
                       "format", "unit",
                       unit, NULL, err) == GT_STYLE_QUERY_ERROR) {
    gt_str_delete(unit);
    return -1;
  }

  /* get additional description texts from style */
  left_str = gt_str_new();
  rval = gt_style_get_str(canvas->pvt->sty, "format",
                          "ruler_left_text", left_str, NULL, err);
  switch (rval) {
    case GT_STYLE_QUERY_NOT_SET:
      gt_str_append_cstr(left_str, FIVE_PRIME_STRING);
      break;
    case GT_STYLE_QUERY_ERROR:
      gt_str_delete(unit);
      gt_str_delete(left_str);
      return -1;
      break;  /* shouldn't reach this */
    default:
      break;
  }
  right_str = gt_str_new();
  rval = gt_style_get_str(canvas->pvt->sty, "format",
                        "ruler_right_text", right_str, NULL, err);
  switch (rval) {
    case GT_STYLE_QUERY_NOT_SET:
      gt_str_append_cstr(right_str, THREE_PRIME_STRING);
      break;
    case GT_STYLE_QUERY_ERROR:
      gt_str_delete(unit);
      gt_str_delete(left_str);
      gt_str_delete(right_str);
      return -1;
      break;  /* shouldn't reach this */
    default:
      break;
  }

  /* reset font to default */
  gt_graphics_set_font(canvas->pvt->g,
                       "sans-serif",
                       SLANT_NORMAL,
                       WEIGHT_NORMAL,
                       theight);

  rulercol.red = rulercol.green = rulercol.blue = RULER_GREY;
  rulercol.alpha = 1.0;
  gridcol.red = gridcol.green = gridcol.blue = GRID_GREY;
  gridcol.alpha = 1.0;

  /* determine range and step of the scale */
  base_length = gt_range_length(&viewrange);

  /* determine tick steps */
  step = pow(10,ceil(log10(base_length))-1);
  minorstep = step/10.0;

  /* calculate starting positions */
  vminor = (double) (floor(viewrange.start / minorstep))*minorstep;
  vmajor = (double) (floor(viewrange.start / step))*step;

  /* draw major ticks */
  for (tick = vmajor; tick <= viewrange.end; tick += step)
  {
    double drawtick = (gt_coords_convert_point(viewrange, tick)
                       * (canvas->pvt->width-2*canvas->pvt->margins))
                       + canvas->pvt->margins;
    if (tick < viewrange.start) continue;
    gt_graphics_draw_vertical_line(canvas->pvt->g,
                                   drawtick,
                                   canvas->pvt->y + 30,
                                   rulercol,
                                   10,
                                   1.0);
    gt_format_ruler_label(str, tick, gt_str_get(unit), BUFSIZ);
    gt_graphics_draw_text_centered(canvas->pvt->g,
                                   drawtick,
                                   canvas->pvt->y + 20,
                                   str);
  }
  /* draw minor ticks */
  if (minorstep >= 1)
  {
    for (tick = vminor; tick <= viewrange.end; tick += minorstep)
    {
      double drawtick;
      if (tick < viewrange.start)
        continue;
      drawtick = (gt_coords_convert_point(viewrange, tick)
                    * (canvas->pvt->width-2*canvas->pvt->margins))
                  + canvas->pvt->margins;
      if (showgrid)
      {
        gt_graphics_draw_vertical_line(canvas->pvt->g,
                                       drawtick,
                                       canvas->pvt->y + 40,
                                       gridcol,
                                       canvas->pvt->height - 40 - 15,
                                       1.0);
      }
      gt_graphics_draw_vertical_line(canvas->pvt->g,
                                     drawtick,
                                     canvas->pvt->y + 35,
                                     rulercol,
                                     5,
                                     1.0);
    }
  }
  /* draw ruler line */
  gt_graphics_draw_horizontal_line(canvas->pvt->g,
                                   canvas->pvt->margins,
                                   canvas->pvt->y + 40,
                                   rulercol,
                                   canvas->pvt->width - 2
                                     * canvas->pvt->margins,
                                   1.25);

    gt_graphics_draw_text_right(canvas->pvt->g,
                              canvas->pvt->margins - 10,
                              canvas->pvt->y + 39 + (theight/2),
                              gt_str_get(left_str));
  gt_graphics_draw_text(canvas->pvt->g,
                        canvas->pvt->width - canvas->pvt->margins + 10,
                        canvas->pvt->y + 39 + (theight/2),
                        gt_str_get(right_str));

  gt_str_delete(unit);
  gt_str_delete(left_str);
  gt_str_delete(right_str);

  return 0;
}
コード例 #15
0
ファイル: gff3_visitor.c プロジェクト: ggonnella/genometools
static int gff3_show_feature_node(GtFeatureNode *fn, void *data,
                                  GT_UNUSED GtError *err)
{
  bool part_shown = false;
  GtGFF3Visitor *gff3_visitor = (GtGFF3Visitor*) data;
  GtArray *parent_features = NULL;
  ShowAttributeInfo info;
  GtUword i;
  GtStr *id;

  gt_error_check(err);
  gt_assert(fn && gff3_visitor);

  /* output leading part */
  if (!gff3_visitor->outstr) {
    gt_gff3_output_leading(fn, gff3_visitor->outfp);
  } else {
    gt_gff3_output_leading_str(fn, gff3_visitor->outstr);
  }

  /* show unique id part of attributes */
  if ((id = gt_hashmap_get(gff3_visitor->feature_node_to_unique_id_str, fn))) {
    if (!gff3_visitor->outstr)
      gt_file_xprintf(gff3_visitor->outfp, "%s=%s", GT_GFF_ID, gt_str_get(id));
    else {
      gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_ID);
      gt_str_append_char(gff3_visitor->outstr, '=');
      gt_str_append_cstr(gff3_visitor->outstr, gt_str_get(id));
    }
    part_shown = true;
  }

  /* show parent part of attributes */
  parent_features = gt_hashmap_get(gff3_visitor->feature_node_to_id_array, fn);
  if (gt_array_size(parent_features)) {
    if (part_shown) {
      if (!gff3_visitor->outstr)
        gt_file_xfputc(';', gff3_visitor->outfp);
      else
        gt_str_append_char(gff3_visitor->outstr, ';');
    }
    if (!gff3_visitor->outstr)
      gt_file_xprintf(gff3_visitor->outfp, "%s=", GT_GFF_PARENT);
    else {
      gt_str_append_cstr(gff3_visitor->outstr, GT_GFF_PARENT);
      gt_str_append_char(gff3_visitor->outstr, '=');
    }
    for (i = 0; i < gt_array_size(parent_features); i++) {
      if (i) {
        if (!gff3_visitor->outstr)
          gt_file_xfputc(',', gff3_visitor->outfp);
        else
          gt_str_append_char(gff3_visitor->outstr, ',');
      }
      if (!gff3_visitor->outstr) {
        gt_file_xprintf(gff3_visitor->outfp, "%s",
                        *(char**) gt_array_get(parent_features, i));
      } else {
        gt_str_append_cstr(gff3_visitor->outstr,
                           *(char**) gt_array_get(parent_features, i));
      }
    }
    part_shown = true;
  }

  /* show missing part of attributes */
  info.attribute_shown = &part_shown;
  info.outfp = gff3_visitor->outfp;
  info.outstr = gff3_visitor->outstr;
  gt_feature_node_foreach_attribute(fn, show_attribute, &info);

  /* show dot if no attributes have been shown */
  if (!part_shown) {
    if (!gff3_visitor->outstr)
      gt_file_xfputc('.', gff3_visitor->outfp);
    else
      gt_str_append_char(gff3_visitor->outstr, '.');
  }

  /* show terminal newline */
  if (!gff3_visitor->outstr)
    gt_file_xfputc('\n', gff3_visitor->outfp);
  else
    gt_str_append_char(gff3_visitor->outstr, '\n');

  return 0;
}
コード例 #16
0
static int gt_sketch_runner(int argc, const char **argv, int parsed_args,
                              void *tool_arguments, GT_UNUSED GtError *err)
{
  GtSketchArguments *arguments = tool_arguments;
  GtNodeStream *in_stream = NULL,
               *add_introns_stream = NULL,
               *gff3_out_stream = NULL,
               *feature_stream = NULL,
               *sort_stream = NULL,
               *last_stream;
  GtFeatureIndex *features = NULL;
  const char *file;
  char *seqid = NULL;
  GtRange qry_range, sequence_region_range;
  GtArray *results = NULL;
  GtStyle *sty = NULL;
  GtStr *prog, *defaultstylefile = NULL;
  GtDiagram *d = NULL;
  GtLayout *l = NULL;
  GtImageInfo* ii = NULL;
  GtCanvas *canvas = NULL;
  GtUword height;
  bool has_seqid;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(arguments);

  prog = gt_str_new();
  gt_str_append_cstr_nt(prog, argv[0],
                        gt_cstr_length_up_to_char(argv[0], ' '));
  defaultstylefile = gt_get_gtdata_path(gt_str_get(prog), err);
  gt_str_delete(prog);
  if (!defaultstylefile)
    had_err = -1;
  if (!had_err) {
    gt_str_append_cstr(defaultstylefile, "/sketch/default.style");
  }

  file = argv[parsed_args];
  if (!had_err) {
    /* create feature index */
    features = gt_feature_index_memory_new();
    parsed_args++;

    /* create an input stream */
    if (strcmp(gt_str_get(arguments->input), "gff") == 0)
    {
      in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                                 argv + parsed_args);
      if (arguments->verbose)
        gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) in_stream);
    } else if (strcmp(gt_str_get(arguments->input), "bed") == 0)
    {
      if (argc - parsed_args == 0)
        in_stream = gt_bed_in_stream_new(NULL);
      else
        in_stream = gt_bed_in_stream_new(argv[parsed_args]);
    } else if (strcmp(gt_str_get(arguments->input), "gtf") == 0)
    {
      if (argc - parsed_args == 0)
        in_stream = gt_gtf_in_stream_new(NULL);
      else
        in_stream = gt_gtf_in_stream_new(argv[parsed_args]);
    }
    last_stream = in_stream;

    /* create add introns stream if -addintrons was used */
    if (arguments->addintrons) {
      sort_stream = gt_sort_stream_new(last_stream);
      add_introns_stream = gt_add_introns_stream_new(sort_stream);
      last_stream = add_introns_stream;
    }

    /* create gff3 output stream if -pipe was used */
    if (arguments->pipe) {
      gff3_out_stream = gt_gff3_out_stream_new(last_stream, NULL);
      last_stream = gff3_out_stream;
    }

    /* create feature stream */
    feature_stream = gt_feature_stream_new(last_stream, features);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(feature_stream, err);

    gt_node_stream_delete(feature_stream);
    gt_node_stream_delete(gff3_out_stream);
    gt_node_stream_delete(sort_stream);
    gt_node_stream_delete(add_introns_stream);
    gt_node_stream_delete(in_stream);
  }

  if (!had_err) {
    had_err = gt_feature_index_has_seqid(features,
                                         &has_seqid,
                                         gt_str_get(arguments->seqid),
                                         err);
  }

  /* if seqid is empty, take first one added to index */
  if (!had_err && strcmp(gt_str_get(arguments->seqid),"") == 0) {
    seqid = gt_feature_index_get_first_seqid(features, err);
    if (seqid == NULL) {
      gt_error_set(err, "GFF input file must contain a sequence region!");
      had_err = -1;
    }
  }
  else if (!had_err && !has_seqid) {
    gt_error_set(err, "sequence region '%s' does not exist in GFF input file",
                 gt_str_get(arguments->seqid));
    had_err = -1;
  }
  else if (!had_err)
    seqid = gt_str_get(arguments->seqid);

  results = gt_array_new(sizeof (GtGenomeNode*));
  if (!had_err) {
    had_err = gt_feature_index_get_range_for_seqid(features,
                                                   &sequence_region_range,
                                                   seqid,
                                                   err);
  }
  if (!had_err) {
    qry_range.start = (arguments->start == GT_UNDEF_UWORD ?
                         sequence_region_range.start :
                         arguments->start);
    qry_range.end   = (arguments->end == GT_UNDEF_UWORD ?
                         sequence_region_range.end :
                         arguments->end);
  }

  if (!had_err) {
    if (arguments->verbose)
      fprintf(stderr, "# of results: "GT_WU"\n", gt_array_size(results));

    /* find and load style file */
    if (!(sty = gt_style_new(err)))
      had_err = -1;
    if (gt_str_length(arguments->stylefile) == 0) {
      gt_str_append_str(arguments->stylefile, defaultstylefile);
    } else {
      if (!had_err && gt_file_exists(gt_str_get(arguments->stylefile))) {
        if (arguments->unsafe)
          gt_style_unsafe_mode(sty);
      }
      else
      {
        had_err = -1;
        gt_error_set(err, "style file '%s' does not exist!",
                          gt_str_get(arguments->stylefile));
      }
    }
    if (!had_err)
      had_err = gt_style_load_file(sty, gt_str_get(arguments->stylefile), err);
  }

  if (!had_err) {
    /* create and write image file */
    if (!(d = gt_diagram_new(features, seqid, &qry_range, sty, err)))
      had_err = -1;
    if (!had_err && arguments->flattenfiles)
      gt_diagram_set_track_selector_func(d, flattened_file_track_selector,
                                         NULL);
    if (had_err || !(l = gt_layout_new(d, arguments->width, sty, err)))
      had_err = -1;
    if (!had_err)
      had_err = gt_layout_get_height(l, &height, err);
    if (!had_err) {
      ii = gt_image_info_new();

      if (strcmp(gt_str_get(arguments->format),"pdf")==0) {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PDF,
                                          arguments->width,
                                          height, ii, err);
      }
      else if (strcmp(gt_str_get(arguments->format),"ps")==0) {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PS,
                                          arguments->width,
                                          height, ii, err);
      }
      else if (strcmp(gt_str_get(arguments->format),"svg")==0) {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_SVG,
                                          arguments->width,
                                          height, ii, err);
      }
      else {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PNG,
                                          arguments->width,
                                          height, ii, err);
      }
      if (!canvas)
        had_err = -1;
      if (!had_err) {
        had_err = gt_layout_sketch(l, canvas, err);
      }
      if (!had_err) {
        if (arguments->showrecmaps) {
          GtUword i;
          const GtRecMap *rm;
          for (i = 0; i < gt_image_info_num_of_rec_maps(ii) ;i++) {
            char buf[BUFSIZ];
            rm = gt_image_info_get_rec_map(ii, i);
            (void) gt_rec_map_format_html_imagemap_coords(rm, buf, BUFSIZ);
            printf("%s, %s\n",
                   buf,
                   gt_feature_node_get_type(gt_rec_map_get_genome_feature(rm)));
          }
        }
        if (arguments->use_streams) {
          GtFile *outfile;
          GtStr *str = gt_str_new();
          gt_canvas_cairo_file_to_stream((GtCanvasCairoFile*) canvas, str);
          outfile = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, file, "w+", err);
          if (outfile) {
            gt_file_xwrite(outfile, gt_str_get_mem(str), gt_str_length(str));
            gt_file_delete(outfile);
          } else {
            had_err = -1;
          }
          gt_str_delete(str);
        } else {
          had_err = gt_canvas_cairo_file_to_file((GtCanvasCairoFile*) canvas,
                                                 file,
                                                 err);
        }
      }
    }
  }

  /* free */
  gt_free(seqid);
  gt_canvas_delete(canvas);
  gt_layout_delete(l);
  gt_image_info_delete(ii);
  gt_style_delete(sty);
  gt_diagram_delete(d);
  gt_array_delete(results);
  gt_str_delete(defaultstylefile);
  gt_feature_index_delete(features);

  return had_err;
}
コード例 #17
0
ファイル: bioseq.c プロジェクト: AnnSeidel/genometools
static int bioseq_fill(GtBioseq *bs, bool recreate, GtError *err)
{
  GtStr *bioseq_index_file = NULL,
        *bioseq_ois_file = NULL,
        *bioseq_sds_file = NULL,
        *bioseq_md5_file = NULL,
        *bioseq_des_file = NULL;
  int had_err = 0;
  GtStr *bioseq_basename;

  gt_assert(!bs->encseq);

  if (bs->use_stdin) {
    bioseq_basename = gt_str_new_cstr("stdin.");
    /* assign a unique name */
    gt_str_append_uword(bioseq_basename, (GtUword) bs);
  } else
    bioseq_basename = bs->sequence_file;

  /* construct file names */
  bioseq_index_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_index_file, GT_ENCSEQFILESUFFIX);
  bioseq_ois_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_ois_file, GT_OISTABFILESUFFIX);
  bioseq_sds_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_sds_file, GT_SDSTABFILESUFFIX);
  bioseq_md5_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_md5_file, GT_MD5TABFILESUFFIX);
  bioseq_des_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_des_file, GT_DESTABFILESUFFIX);

  /* construct the bioseq files if necessary */
  if (recreate || bs->use_stdin ||
      !gt_file_exists(gt_str_get(bioseq_index_file)) ||
      !gt_file_exists(gt_str_get(bioseq_ois_file)) ||
      !gt_file_exists(gt_str_get(bioseq_sds_file)) ||
      !gt_file_exists(gt_str_get(bioseq_md5_file)) ||
      !gt_file_exists(gt_str_get(bioseq_des_file)) ||
      gt_file_is_newer(gt_str_get(bs->sequence_file),
                       gt_str_get(bioseq_index_file))) {
    had_err = construct_bioseq_files(bs, bioseq_basename, err);
  }

  if (!had_err) {
    GtEncseqLoader *el = gt_encseq_loader_new();
    gt_encseq_loader_disable_autosupport(el);
    gt_encseq_loader_require_lossless_support(el);
    gt_encseq_loader_require_description_support(el);
    gt_encseq_loader_require_md5_support(el);
    gt_encseq_loader_require_multiseq_support(el);
    bs->encseq = gt_encseq_loader_load(el, gt_str_get(bioseq_basename), err);
    if (bs->encseq == NULL) {
      had_err = -1;
      gt_assert(gt_error_is_set(err));
    }
    gt_encseq_loader_delete(el);
  }
  if (!had_err) {
    gt_assert(bs->encseq);
  }

  /* free */
  if (bs->use_stdin)
    gt_str_delete(bioseq_basename);
  gt_str_delete(bioseq_index_file);
  gt_str_delete(bioseq_ois_file);
  gt_str_delete(bioseq_md5_file);
  gt_str_delete(bioseq_sds_file);
  gt_str_delete(bioseq_des_file);

  return had_err;
}
コード例 #18
0
ファイル: canvas.c プロジェクト: AnnSeidel/genometools
/* Formats a given position number for short display in the ruler. */
void gt_format_ruler_label(char *txt, GtWord pos,
                           const char *unitstr, size_t buflen)
{
  double fpos;
  int logval;
  GtStr *formatstring;
  GtUword upos;
  gt_assert(txt);
  bool negative = false;

  if (pos < 0)
  {
    upos = (GtUword)-pos;
    negative = true;
    formatstring = gt_str_new_cstr("-%.");
  }
  else
  {
    upos = (GtUword)pos;
    formatstring = gt_str_new_cstr("%.");
  }
  logval = (int) floor(log10(upos));
  if (upos >= 1000000000)
  {
    fpos = (double) upos / 1000000000;
    while (upos % 10 == 0)
    {
      upos /= 10;
      logval--;
    }
    /*@ignore@*/
    gt_str_append_uword(formatstring, (GtUword) logval);
    gt_str_append_cstr(formatstring, "fG%s");
    (void) snprintf(txt, buflen, gt_str_get(formatstring), fpos, unitstr);
    /*@end@*/
  }
  else if (upos >= 1000000)
  {
    fpos = (double) upos / 1000000;
    while (upos % 10 == 0)
    {
      upos /= 10;
      logval--;
    }
    /*@ignore@*/
    gt_str_append_uword(formatstring, (GtUword) logval);
    gt_str_append_cstr(formatstring, "fM%s");
    (void) snprintf(txt, buflen, gt_str_get(formatstring), fpos, unitstr);
    /*@end@*/
  }
  else if (upos >= 1000)
  {
    fpos = (double) upos / 1000;
    while (upos % 10 == 0)
    {
      upos /= 10;
      logval--;
    }
    /*@ignore@*/
    gt_str_append_uword(formatstring, (GtUword) logval);
    gt_str_append_cstr(formatstring, "fk%s");
    (void) snprintf(txt, buflen, gt_str_get(formatstring), fpos, unitstr);
    /*@end@*/
  } else {
    /*@ignore@*/
    (void) snprintf(txt, buflen, " %s"GT_WU"%s", negative ? "-" : "", upos,
        unitstr);
    /*@end@*/
  }

  gt_str_delete(formatstring);
}
コード例 #19
0
ファイル: hcr.c プロジェクト: AnnSeidel/genometools
static GtHcrSeqDecoder *hcr_seq_decoder_new(GtAlphabet *alpha, const char *name,
                                            GtError *err)
{
  int had_err = 0;
  GtHcrSeqDecoder *seq_dec = gt_malloc(sizeof (GtHcrSeqDecoder));
  GtBaseQualDistr *bqd = NULL;
  GtWord end_enc_start_sampling = 0;
  FILE *fp = NULL;
  GT_UNUSED size_t read;
  GT_UNUSED const size_t one = (size_t) 1;

  seq_dec->alpha = alpha;
  seq_dec->alphabet_size = gt_alphabet_size(alpha);
  seq_dec->cur_read = 0;
  seq_dec->data_iter = NULL;
  seq_dec->file_info_rbt = NULL;
  seq_dec->fileinfos = NULL;
  seq_dec->filename = gt_str_new_cstr(name);
  seq_dec->huff_dec = NULL;
  seq_dec->huffman = NULL;
  seq_dec->sampling = NULL;
  seq_dec->symbols = NULL;
  gt_str_append_cstr(seq_dec->filename, HCRFILESUFFIX);

  fp = gt_fa_fopen_with_suffix(name, HCRFILESUFFIX, "rb", err);
  if (fp == NULL) {
    had_err = -1;
    hcr_seq_decoder_delete(seq_dec);
    seq_dec = NULL;
  }

  if (!had_err) {
    hcr_read_file_info(seq_dec, fp);

    bqd = hcr_base_qual_distr_new_from_file(fp, seq_dec->alpha);
    seq_dec->qual_offset = bqd->qual_offset;

    read = gt_xfread_one(&end_enc_start_sampling, fp);
    gt_assert(read == one);

    seq_dec->start_of_encoding = decoder_calc_start_of_encoded_data(fp);

    had_err = seq_decoder_init_huffman(seq_dec,
                                       end_enc_start_sampling, bqd, err);
    if (had_err) {
      hcr_seq_decoder_delete(seq_dec);
      seq_dec = NULL;
    }
  }

  if (!had_err) {
    size_t pos;
    gt_xfseek(fp, 0, SEEK_END);
    pos = ftell(fp);

    gt_xfseek(fp, end_enc_start_sampling, SEEK_SET);
    if (end_enc_start_sampling < pos)
      seq_dec->sampling = gt_sampling_read(fp);
    else
      seq_dec->sampling = NULL;

    seq_dec->file_info_rbt = seq_decoder_init_file_info(seq_dec->fileinfos,
                                                        seq_dec->num_of_files);
  }

  hcr_base_qual_distr_delete(bqd);
  gt_fa_fclose(fp);
  return seq_dec;
}
コード例 #20
0
static int gt_ltrdigest_pdom_visitor_parse_domainhits(GtLTRdigestPdomVisitor
                                                                            *lv,
                                                     GtHMMERParseStatus *status,
                                                     char *buf,
                                                     FILE *instream,
                                                     GtError *err)
{
  int had_err = 0;
  GtUword i, nof_targets = 0, nof_hits = 0;
  gt_assert(lv && instream && status);
  gt_error_check(err);

  had_err = pdom_parser_get_next_line(buf, instream, err);
  gt_assert(buf != NULL);
  while (!had_err && strncmp("Internal", buf, (size_t) 8)) {
    GtUword no, hmmfrom, hmmto, alifrom, alito;
    double score, evalue;
    char threshold_ok = '-';
    if ((buf[0] == '>' && buf[1] == '>')) {
      char *b = buf;
      b = strtok(buf+3, " ");
      gt_str_reset(status->cur_model);
      gt_str_append_cstr(status->cur_model, b);
      had_err = pdom_parser_get_next_line(buf, instream, err);
      if (!had_err && strncmp("   [No individual", buf, (size_t) 17)) {
        for (i = 0UL; i < 2UL && !had_err; i++)
          had_err = pdom_parser_get_next_line(buf, instream, err);
      }
      nof_targets++;
      nof_hits = 0UL;
      gt_hmmer_parse_status_mark_frame_finished(status);
    }
    while (!had_err &&
             8 == sscanf(buf, ""GT_WU" %c %lf %*f %*f %lf "GT_WU" "GT_WU" %*s "
                         GT_WU" "GT_WU"", &no,  &threshold_ok, &score, &evalue,
                         &hmmfrom, &hmmto, &alifrom, &alito)) {
      GtHMMERSingleHit *shit = gt_calloc((size_t) 1, sizeof (*shit));
      shit->hmmfrom = hmmfrom;
      shit->hmmto = hmmto;
      shit->alifrom = alifrom;
      shit->alito = alito;
      shit->score = score;
      shit->evalue = evalue;
      shit->strand = status->strand;
      shit->frame = (GtUword) status->frame;
      shit->reported = (threshold_ok == '!');
      shit->chains = gt_array_new(sizeof (GtUword));
      gt_hmmer_parse_status_add_hit(status, shit);
      nof_hits++;
      had_err = pdom_parser_get_next_line(buf, instream, err);
    }
    if (!had_err) {
      if (nof_hits > 0)
        had_err = gt_ltrdigest_pdom_visitor_parse_alignments(lv, status, buf,
                                                             instream, err);
      else
        had_err = pdom_parser_get_next_line(buf, instream, err);
    }
  }
  return had_err;
}
コード例 #21
0
static int gt_readjoiner_cnttest_runner(GT_UNUSED int argc,
    GT_UNUSED const char **argv, GT_UNUSED int parsed_args,
    void *tool_arguments, GT_UNUSED GtError *err)
{
  GtReadjoinerCnttestArguments *arguments = tool_arguments;
  GtEncseqLoader *el = NULL;
  GtEncseq *reads = NULL;
  GtBitsequence *bits = NULL;
  GtUword nofreads;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  if (arguments->test == GT_READJOINER_CNTTEST_SHOWLIST)
  {
    GtStr *fn = NULL;
    fn = gt_str_clone(arguments->readset);
    gt_str_append_cstr(fn, GT_READJOINER_SUFFIX_CNTLIST);
    had_err = gt_cntlist_parse(gt_str_get(fn), true, &bits, &nofreads, err);
    gt_str_delete(fn);
  }
  else if (arguments->test == GT_READJOINER_CNTTEST_BRUTEFORCE ||
      arguments->test == GT_READJOINER_CNTTEST_KMP)
  {
    el = gt_encseq_loader_new();
    gt_encseq_loader_drop_description_support(el);
    gt_encseq_loader_disable_autosupport(el);
    if (!arguments->singlestrand)
      gt_encseq_loader_mirror(el);
    reads = gt_encseq_loader_load(el, gt_str_get(arguments->readset), err);
    if (reads == NULL)
      had_err = -1;
    else
    {
      gt_rdj_pairwise_exact(GT_OVLFIND_CNT, reads, !arguments->singlestrand,
          false, arguments->test == GT_READJOINER_CNTTEST_KMP, 1UL, true,
          NULL, NULL, false, NULL, &bits, &nofreads);
    }
    gt_encseq_delete(reads);
    gt_encseq_loader_delete(el);
  }
  else if (arguments->test == GT_READJOINER_CNTTEST_ESA)
  {
    Sequentialsuffixarrayreader *ssar = NULL;
    GtUword readlength = 0, firstrevcompl = 0;
    GtLogger *verbose_logger = gt_logger_new(arguments->verbose,
        GT_LOGGER_DEFLT_PREFIX, stderr);
    ssar = gt_newSequentialsuffixarrayreaderfromfile(gt_str_get(
          arguments->readset), SARR_LCPTAB | SARR_SUFTAB | SARR_SSPTAB,
        true, verbose_logger, err);
    if (gt_error_is_set(err))
      had_err = -1;
    else
    {
      nofreads = gt_encseq_num_of_sequences(ssar->encseq);
      if (!arguments->singlestrand)
      {
        nofreads = GT_DIV2(nofreads);
        firstrevcompl = nofreads;
      }
      GT_INITBITTAB(bits, nofreads);
      if (!arguments->singlestrand)
      if (gt_encseq_accesstype_get(ssar->encseq) == GT_ACCESS_TYPE_EQUALLENGTH)
        readlength = gt_encseq_seqlength(ssar->encseq, 0);
      (void)gt_contfind_bottomup(ssar, false, bits, arguments->singlestrand ? 0
          : firstrevcompl, readlength);
    }
    if (ssar != NULL)
      gt_freeSequentialsuffixarrayreader(&ssar);
    gt_logger_delete(verbose_logger);
  }
  else
  {
    gt_assert(false);
  }
  if (!had_err)
    had_err = gt_cntlist_show(bits, nofreads, NULL, false, err);
  gt_free(bits);
  return had_err;
}