예제 #1
0
static void canon_gff3_parse_options(int argc, char * const *argv,
                                     CanonGFF3Options *options, GtError *error)
{
  int opt = 0;
  int optindex = 0;
  const char *optstr = "hio:s:v";
  const struct option init_options[] =
  {
    { "help",    no_argument,       NULL, 'h' },
    { "infer",   no_argument,       NULL, 'i' },
    { "outfile", required_argument, NULL, 'o' },
    { "source",  required_argument, NULL, 's' },
    { "version", no_argument,       NULL, 'v' },
    { NULL,      no_argument,       NULL, 0 },
  };

  for(opt = getopt_long(argc, argv, optstr, init_options, &optindex);
      opt != -1;
      opt = getopt_long(argc, argv, optstr, init_options, &optindex))
  {
    if(opt == 'h')
    {
      print_usage(stdout);
      exit(0);
    }
    else if(opt == 'i')
      options->infer = true;
    else if(opt == 'o')
    {
      if(options->outstream != NULL)
        gt_file_delete(options->outstream);
      options->outstream = gt_file_new(optarg, "w", error);
    }
    else if(opt == 's')
    {
      if(options->source != NULL)
        gt_str_delete(options->source);
      options->source = gt_str_new_cstr(optarg);
    }
    else if(opt == 'v')
    {
      agn_print_version("CanonGFF3", stdout);
      exit(0);
    }
  }
}
예제 #2
0
bool gt_file_exists_with_suffix(const char *path, const char *suffix)
{
  struct stat statbuf;
  GtStr *tmpfilename;

  gt_assert(path && suffix);

  tmpfilename = gt_str_new_cstr(path);
  gt_str_append_cstr(tmpfilename, suffix);

  if (stat(gt_str_get(tmpfilename), &statbuf) == 0) {
    gt_str_delete(tmpfilename);
    return true;
  }
  gt_str_delete(tmpfilename);
  return false;
}
예제 #3
0
GtLeftborderOutbuffer *gt_leftborderbuffer_new(const char *name,
                                               GtFirstcodesspacelog *fcsl)
{
  GtLeftborderOutbuffer *lbbuf = gt_malloc(sizeof (*lbbuf));

  lbbuf->totalwrite = 0;
  lbbuf->outfilename = gt_str_new();
  lbbuf->fp = gt_xtmpfp(lbbuf->outfilename);
  lbbuf->nextfree = 0;
  lbbuf->allocated = 1024UL;
  lbbuf->name = gt_str_new_cstr(name);
  lbbuf->spaceuint32_t = gt_malloc(sizeof (*lbbuf->spaceuint32_t) *
                                   lbbuf->allocated);
  GT_FCI_ADDWORKSPACE(fcsl,name,
                      sizeof (*lbbuf->spaceuint32_t) * lbbuf->allocated);
  return lbbuf;
}
예제 #4
0
static int layout_tracks(void *key, void *value, void *data,
                         GT_UNUSED GtError *err)
{
  unsigned long i, max;
  GtTrack *track;
  GtLayoutTraverseInfo *lti = (GtLayoutTraverseInfo*) data;
  GtArray *list = (GtArray*) value;
  GtStr *gt_track_key;
  const char *type = key;
  GtBlock *block;
  bool split;
  double tmp;
  gt_assert(type && list);

  /* to get a deterministic layout, we sort the GtBlocks for each type */
  gt_array_sort_stable(list, blocklist_block_compare);

  block = *(GtBlock**) gt_array_get(list, 0);
  gt_track_key = gt_str_new_cstr((char*) key);

  if (!gt_style_get_bool(lti->layout->style, "format", "split_lines", &split,
                         NULL))
    split = true;
  if (split)
    if (!gt_style_get_bool(lti->layout->style, type, "split_lines", &split,
                           NULL))
      split = true;
  if (gt_style_get_num(lti->layout->style, type, "max_num_lines", &tmp, NULL))
    max = tmp;
  else
    max = 50;

  track = gt_track_new(gt_track_key, max, split,
                       gt_line_breaker_captions_new(lti->layout,
                                                    lti->layout->width,
                                                    lti->layout->style));
  lti->layout->nof_tracks++;
  for (i = 0; i < gt_array_size(list); i++) {
    block = *(GtBlock**) gt_array_get(list, i);
    gt_track_insert_block(track, block);
  }
  gt_hashmap_add(lti->layout->tracks, gt_cstr_dup(gt_str_get(gt_track_key)),
                 track);
  gt_str_delete(gt_track_key);
  return 0;
}
예제 #5
0
파일: hcr.c 프로젝트: mader/genometools
int gt_hcr_encoder_encode(GtHcrEncoder *hcr_enc, const char *name,
                          GtTimer *timer, GtError *err)
{
    int had_err = 0;
    GtStr *name1;
    gt_error_check(err);
    if (timer != NULL)
        gt_timer_show_progress(timer, "write encoding", stdout);
    if (hcr_enc->encdesc_encoder != NULL) {
        GtCstrIterator *cstr_iterator = gt_fasta_header_iterator_new(hcr_enc->files,
                                        err);
        had_err = gt_encdesc_encoder_encode(hcr_enc->encdesc_encoder,
                                            cstr_iterator, name, err);
        gt_cstr_iterator_delete(cstr_iterator);
    }

    if (!had_err)
        had_err = hcr_write_seq_qual_data(name, hcr_enc, timer, err);

    if (!had_err && gt_log_enabled()) {
        name1 = gt_str_new_cstr(name);
        gt_str_append_cstr(name1, HCRFILESUFFIX);
        gt_log_log("sequences with qualities encoding overview:");
        gt_log_log("**>");
        if (hcr_enc->page_sampling)
            gt_log_log("applied sampling technique: sampling every "GT_WU"th page",
                       hcr_enc->sampling_rate);
        else if (hcr_enc->regular_sampling)
            gt_log_log("applied sampling technique: sampling every "GT_WU"th read",
                       hcr_enc->sampling_rate);
        else
            gt_log_log("applied sampling technique: none");

        gt_log_log("total number of encoded nucleotide sequences with qualities: "
                   ""GT_WU"", hcr_enc->num_of_reads);
        gt_log_log("total number of encoded nucleotides: "GT_LLU"",
                   hcr_enc->seq_encoder->total_num_of_symbols);
        gt_log_log("bits per nucleotide encoding: %f",
                   (gt_file_estimate_size(gt_str_get(name1)) * 8.0) /
                   hcr_enc->seq_encoder->total_num_of_symbols);
        gt_log_log("<**");
        gt_str_delete(name1);
    }
    return had_err;
}
static int gt_compreads_decompress_benchmark(GtHcrDecoder *hcrd,
                                             unsigned long amount,
                                             GtTimer *timer,
                                             GtError *err) {
  char qual[BUFSIZ] = {0},
       seq[BUFSIZ] = {0};
  int had_err = 0;
  unsigned long rand,
                max_rand = gt_hcr_decoder_num_of_reads(hcrd) - 1,
                count;

  GtStr *timer_comment = gt_str_new_cstr("extracting ");
  GtStr *desc = gt_str_new();

  gt_str_append_ulong(timer_comment, amount);
  gt_str_append_cstr(timer_comment, " reads of ");
  gt_str_append_ulong(timer_comment, max_rand + 1);
  gt_str_append_cstr(timer_comment, "!");

  if (timer == NULL) {
    timer = gt_timer_new_with_progress_description("extract random reads");
    gt_timer_start(timer);
  }
  else {
    gt_timer_show_progress(timer, "extract random reads", stdout);
  }

  gt_log_log("%s",gt_str_get(timer_comment));
  for (count = 0; count < amount; count++) {
    if (!had_err) {
      rand = gt_rand_max(max_rand);
      gt_log_log("get read: %lu", rand);
      had_err = gt_hcr_decoder_decode(hcrd, rand, seq, qual, desc, err);
      gt_log_log("%s",gt_str_get(desc));
      gt_log_log("%s",seq);
      gt_log_log("%s",qual);
    }
  }
  gt_str_delete(timer_comment);
  gt_str_delete(desc);
  if (!gt_showtime_enabled())
    gt_timer_delete(timer);
  return had_err;
}
예제 #7
0
static int process_fastakeyfile(GtStr *fastakeyfile, int argc,
                                const char **argv, unsigned long width,
                                GtFile *outfp, GtError *err)
{
  int had_err = 0;
  gt_error_check(err);
  gt_assert(gt_str_length(fastakeyfile));

  if (argc == 0) {
    gt_error_set(err,"option -keys requires at least one file argument");
    had_err = -1;
  }

  if (!had_err)
  {
    GtStr *indexname = gt_str_new_cstr(argv[0]);

    if (argc == 1 && gt_deskeysfileexists(indexname))
    {
      if (gt_extractkeysfromfastaindex(indexname,fastakeyfile,width,err) != 0)
      {
        had_err = -1;
      }
    } else
    {
      GtStrArray *referencefiletab;
      int i;

      referencefiletab = gt_str_array_new();
      for (i = 0; i < argc; i++)
      {
        gt_str_array_add_cstr(referencefiletab, argv[i]);
      }
      if (gt_extractkeysfromfastafile(true, outfp, width, fastakeyfile,
                                      referencefiletab, err) != 1)
      {
        had_err = -1;
      }
      gt_str_array_delete(referencefiletab);
    }
    gt_str_delete(indexname);
  }
  return had_err;
}
예제 #8
0
static GtStr* create_unique_id(GtGFF3Visitor *gff3_visitor, GtFeatureNode *fn)
{
  const char *type;
  GtStr *id;
  gt_assert(gff3_visitor && fn);
  type = gt_feature_node_get_type(fn);

  /* increase id counter */
  gt_string_distri_add(gff3_visitor->id_counter, type);

  /* build id string */
  id = gt_str_new_cstr(type);
  gt_str_append_ulong(id, gt_string_distri_get(gff3_visitor->id_counter, type));

  /* store (unique) id */
  gt_hashmap_add(gff3_visitor->feature_node_to_unique_id_str, fn, id);

  return id;
}
예제 #9
0
GtSfxmappedrange *gt_Sfxmappedrange_new(const char *tablename,
                                        GtUword numofentries,
                                        GtSfxmappedrangetype type,
                                        GtSfxmappedrangetransformfunc
                                          transformfunc,
                                        const void *transformfunc_data)
{
  GtSfxmappedrange *sfxmappedrange;

  sfxmappedrange = gt_malloc(sizeof (*sfxmappedrange));
  sfxmappedrange->ptr = NULL;
  sfxmappedrange->pagesize = gt_pagesize();
  sfxmappedrange->usedptrptr = NULL;
  sfxmappedrange->filename = NULL;
  sfxmappedrange->writable = false;
  sfxmappedrange->entire = NULL;
  sfxmappedrange->transformfunc = transformfunc;
  sfxmappedrange->transformfunc_data = transformfunc_data;
  sfxmappedrange->type = type;
  sfxmappedrange->tablename = gt_str_new_cstr(tablename);
  sfxmappedrange->currentminindex = sfxmappedrange->currentmaxindex = 0;
  sfxmappedrange->indexrange_defined = false;
  switch (type)
  {
    case GtSfxGtBitsequence:
      sfxmappedrange->sizeofunit = sizeof (GtBitsequence);
      sfxmappedrange->numofunits = GT_NUMOFINTSFORBITS(numofentries);
      break;
    case GtSfxuint32_t:
      sfxmappedrange->sizeofunit = sizeof (uint32_t);
      sfxmappedrange->numofunits = (size_t) numofentries;
      break;
    case GtSfxunsignedlong:
      sfxmappedrange->sizeofunit = sizeof (GtUword);
      sfxmappedrange->numofunits = (size_t) numofentries;
      break;
    default:
      gt_assert(false);
      break;
  }
  return sfxmappedrange;
}
예제 #10
0
static GtStr* make_id_unique(GtGFF3Visitor *gff3_visitor, GtFeatureNode *fn)
{
  GtUword i = 1;
  GtStr *id = gt_str_new_cstr(gt_feature_node_get_attribute(fn, "ID"));

  if (gt_cstr_table_get(gff3_visitor->used_ids, gt_str_get(id))) {
    GtStr *buf = gt_str_new();
    while (!id_string_is_unique(id, buf, gff3_visitor->used_ids, i++));
    gt_warning("feature ID \"%s\" not unique: changing to %s", gt_str_get(id),
                                                               gt_str_get(buf));
    gt_str_set(id, gt_str_get(buf));
    gt_str_delete(buf);
  }
  /* update table with the new id */
  gt_cstr_table_add(gff3_visitor->used_ids, gt_str_get(id));
  /* store (unique) id */
  gt_hashmap_add(gff3_visitor->feature_node_to_unique_id_str, fn, id);

  return id;
}
예제 #11
0
bool gt_tool_iterator_next(GtToolIterator *tool_iterator, const char **name,
                           GtTool **tool)
{
  ToolIterationInfo tii;
  gt_assert(tool_iterator && name && tool);
  if (gt_array_size(tool_iterator->tool_stack)) {
    ToolEntry *entry = gt_array_pop(tool_iterator->tool_stack);
    *name = entry->name;
    *tool = entry->tool;
    if (tool_iterator->prefixptr) {
      gt_str_reset(tool_iterator->prefixptr);
      if (entry->prefix) {
        gt_str_append_str(tool_iterator->prefixptr, entry->prefix);
        gt_str_append_char(tool_iterator->prefixptr, tool_iterator->prefixsep);
      }
    }
    if (gt_tool_is_toolbox(entry->tool)) {
      GtToolbox *toolbox;
      GtArray *toollist;
      GtStr *myprefix;
      myprefix =
                gt_str_new_cstr(entry->prefix ? gt_str_get(entry->prefix) : "");
      gt_str_append_cstr(myprefix, entry->name);
      toolbox = gt_tool_get_toolbox(entry->tool);
      toollist = gt_array_new(sizeof (ToolEntry));
      tii.arr = toollist;
      tii.str = myprefix;
      gt_toolbox_iterate(toolbox, add_tool_to_stack, &tii);
      if (gt_array_size(toollist)) {
        gt_array_reverse(toollist); /* alphabetical order */
        gt_array_add_array(tool_iterator->tool_stack, toollist);
      }
      gt_array_delete(toollist);
      gt_str_delete(myprefix);
    } else
      gt_str_delete(entry->prefix);
    return true;
  }
  else
    return false;
}
예제 #12
0
GtTypeChecker* gt_typecheck_info_create_type_checker(const GtTypecheckInfo *tci,
                                                     GtError *err)
{
  GtTypeChecker *type_checker = NULL;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(tci);
  if (tci->typecheck_built_in)
    type_checker = gt_type_checker_builtin_new();
  else {
    GtStr *obo_file;
    gt_assert(gt_option_is_set(tci->typecheck_option));
    if (!gt_str_length(tci->typecheck)) {
      /* a. */
      if (!(obo_file = get_obo_path(err)))
        had_err = -1;
      if (!had_err)
        gt_str_append_cstr(obo_file, "sofa.obo");
    }
    else if (gt_file_exists(gt_str_get(tci->typecheck))) {
      /* b. */
      obo_file = gt_str_new_cstr(gt_str_get(tci->typecheck));
    }
    else {
      /* c. */
      if (!(obo_file = get_obo_path(err)))
        had_err = -1;
      if (!had_err) {
        gt_str_append_str(obo_file, tci->typecheck);
        gt_str_append_cstr(obo_file, ".obo");
      }
    }

    if (!had_err)
      type_checker = gt_type_checker_obo_new(gt_str_get(obo_file), err);

    gt_str_delete(obo_file);
  }
  return type_checker;
}
예제 #13
0
static void orf_attach_results_to_gff3(GtFeatureNode *gf,
                                       GtRange orf_rng, unsigned int orf_frame,
                                       GtStrand strand, GT_UNUSED GtError *err)
{
  GtGenomeNode *child;
  GtStr *tag;
  tag = gt_str_new_cstr(GT_ORF_FINDER_TAG);

  orf_rng.start++; orf_rng.end++;

  GtFeatureNodeIterator *gfi;
  GtFeatureNode *curnode = NULL, *parent_node = NULL;
  GtRange gfi_range;
  char frame_buf[3];
  sprintf(frame_buf, "%d", orf_frame);

  gfi = gt_feature_node_iterator_new(gf);

  while ((curnode = gt_feature_node_iterator_next(gfi))) {
    if (strcmp(gt_feature_node_get_type(curnode),
                                              (const char*) GT_ORF_TYPE) != 0) {
      gfi_range = gt_genome_node_get_range((GtGenomeNode*) curnode);
      if (gt_range_contains(&gfi_range, &orf_rng)) {
        parent_node = curnode;
      }
    }
  }
  if (parent_node) {
    child = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) gf),
                                GT_ORF_TYPE,
                                orf_rng.start,
                                orf_rng.end,
                                strand);
    gt_feature_node_set_source((GtFeatureNode*) child, tag);
    gt_feature_node_set_attribute((GtFeatureNode*) child, "frame", frame_buf);
    gt_feature_node_add_child(parent_node,(GtFeatureNode*) child);
  }
  gt_str_delete(tag);
  gt_feature_node_iterator_delete(gfi);
}
예제 #14
0
/* takes ownership of <files> */
static GtNodeStream* gff3_in_stream_plain_new(GtStrArray *files,
                                              bool ensure_sorting)
{
  GtNodeStream *ns = gt_node_stream_create(gt_gff3_in_stream_plain_class(),
                                           ensure_sorting);
  GtGFF3InStreamPlain *gff3_in_stream_plain = gff3_in_stream_plain_cast(ns);
  gff3_in_stream_plain->next_file          = 0;
  gff3_in_stream_plain->files              = files;
  gff3_in_stream_plain->stdinstr           = gt_str_new_cstr("stdin");
  gff3_in_stream_plain->ensure_sorting     = ensure_sorting;
  gff3_in_stream_plain->stdin_argument     = false;
  gff3_in_stream_plain->file_is_open       = false;
  gff3_in_stream_plain->fpin               = NULL;
  gff3_in_stream_plain->line_number        = 0;
  gff3_in_stream_plain->genome_node_buffer = gt_queue_new();
  gff3_in_stream_plain->checkids           = false;
  gff3_in_stream_plain->checkregions       = false;
  gff3_in_stream_plain->gff3_parser        = gt_gff3_parser_new(NULL);
  gff3_in_stream_plain->used_types         = gt_cstr_table_new();
  gff3_in_stream_plain->progress_bar       = false;
  return ns;
}
예제 #15
0
void feature_in_stream_init(GtFeatureInStream *stream)
{
  GtUword i;
  GtError *error = gt_error_new();

  stream->seqids = gt_feature_index_get_seqids(stream->fi, error);
  stream->seqindex = 0;
  for (i = 0; i < gt_str_array_size(stream->seqids); i++)
  {
    const char *seqid = gt_str_array_get(stream->seqids, i);
    GtRange seqrange;
    if (stream->useorig)
      gt_feature_index_get_orig_range_for_seqid(stream->fi, &seqrange, seqid,
                                                error);
    else
      gt_feature_index_get_range_for_seqid(stream->fi, &seqrange, seqid, error);
    GtStr *seqstr = gt_str_new_cstr(seqid);
    GtGenomeNode *rn = gt_region_node_new(seqstr, seqrange.start, seqrange.end);
    gt_queue_add(stream->regioncache, rn);
    gt_str_delete(seqstr);
  }
  gt_error_delete(error);
}
예제 #16
0
static int gtf_in_stream_process_file(GtGTFInStream *gtf_in_stream,
                                      GtError *err)
{
  GtGTFParser *gtf_parser;
  GtStr *filenamestr;
  GtFile *fpin;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(gtf_in_stream);

  gtf_parser = gt_gtf_parser_new(gtf_in_stream->type_checker);

  /* open input file */
  if (gtf_in_stream->filename) {
    if (!(fpin = gt_file_new(gtf_in_stream->filename, "r", err)))
      had_err = -1;
  }
  else
    fpin = NULL;

  /* parse input file */
  if (!had_err) {
    filenamestr = gt_str_new_cstr(gtf_in_stream->filename
                                  ? gtf_in_stream->filename : "stdin");
    had_err = gt_gtf_parser_parse(gtf_parser, gtf_in_stream->genome_node_buffer,
                                  filenamestr, fpin, gtf_in_stream->tidy, err);
    gt_str_delete(filenamestr);
  }

  /* close input file, if necessary */
  gt_file_delete(fpin);

  /* free */
  gt_gtf_parser_delete(gtf_parser);

  return had_err;
}
예제 #17
0
GtXRFChecker* gt_xrfcheck_info_create_xrf_checker(const GtXRFCheckInfo *xci,
                                                  GtError *err)
{
  GtXRFChecker *xrf_checker = NULL;
  int had_err = 0;
  GtStr *xrf_file;
  gt_error_check(err);
  gt_assert(xci);
  gt_assert(gt_option_is_set(xci->xrfcheck_option));
  if (!gt_str_length(xci->xrfcheck)) {
    /* a. */
    if (!(xrf_file = get_xrf_path(err)))
      had_err = -1;
    if (!had_err)
      gt_str_append_cstr(xrf_file, "GO.xrf_abbr");
  }
  else if (gt_file_exists(gt_str_get(xci->xrfcheck))) {
    /* b. */
    xrf_file = gt_str_new_cstr(gt_str_get(xci->xrfcheck));
  }
  else {
    /* c. */
    if (!(xrf_file = get_xrf_path(err)))
      had_err = -1;
    if (!had_err) {
      gt_str_append_str(xrf_file, xci->xrfcheck);
      gt_str_append_cstr(xrf_file, ".xrf_abbr");
    }
  }

  if (!had_err)
    xrf_checker = gt_xrf_checker_new(gt_str_get(xrf_file), err);

  gt_str_delete(xrf_file);
  return xrf_checker;
}
예제 #18
0
static GtIndexOptions* gt_index_options_new(void)
{
  GtIndexOptions *oi = gt_malloc(sizeof *oi);
  oi->algbounds = gt_str_array_new();
  oi->dir = gt_str_new_cstr("fwd");
  oi->indexname = NULL;
  oi->kysargumentstring = gt_str_new();
  oi->lcpdist = false;
  oi->maximumspace = 0UL; /* in bytes */
  oi->memlimit = gt_str_new();
  oi->numofparts = 1U;
  oi->option = NULL;
  oi->optionalgbounds = NULL;
  oi->optioncmpcharbychar = NULL;
  oi->optiondifferencecover = NULL;
  oi->optionmaxwidthrealmedian = NULL;
  oi->optionmemlimit = NULL;
  oi->optionoutbcktab = NULL;
  oi->optionoutbwttab = NULL;
  oi->optionoutlcptab = NULL;
  oi->optionoutsuftab = NULL;
  oi->optionparts = NULL;
  oi->optionprefixlength = NULL;
  oi->optionspmopt = NULL;
  oi->optionstorespecialcodes = NULL;
  oi->outbcktab = false;
  oi->outbwttab = false;
  oi->outkyssort = false;
  oi->outkystab = false;
  oi->outlcptab = false;
  oi->outsuftab = false; /* only defined for GT_INDEX_OPTIONS_ESA */
  oi->prefixlength = GT_PREFIXLENGTH_AUTOMATIC;
  oi->swallow_tail = false;
  oi->type = GT_INDEX_OPTIONS_UNDEFINED;
  return oi;
}
예제 #19
0
static int sequence_region_lua_new(lua_State *L)
{
  GtGenomeNode **rn;
  GtUword startpos, endpos;
  const char *seqid;
  GtStr *seqid_str;
  gt_assert(L);
  /* get_check parameters */
  seqid = luaL_checkstring(L, 1);
  startpos = luaL_checklong(L, 2);
  endpos   = luaL_checklong(L, 3);
  luaL_argcheck(L, startpos > 0, 2, "must be > 0");
  luaL_argcheck(L, endpos > 0, 3, "must be > 0");
  luaL_argcheck(L, startpos <= endpos, 2, "must be <= endpos");
  /* construct object */
  rn = lua_newuserdata(L, sizeof (GtGenomeNode*));
  seqid_str = gt_str_new_cstr(seqid);
  *rn = gt_region_node_new(seqid_str, startpos, endpos);
  gt_str_delete(seqid_str);
  gt_assert(*rn);
  luaL_getmetatable(L, GENOME_NODE_METATABLE);
  lua_setmetatable(L, -2);
  return 1;
}
예제 #20
0
int gt_condenseq_output_to_gff3(const GtCondenseq *condenseq,
                                GtError *err)
{
  int had_err = 0;
  GtUword idx,
          name_len,
          seqnum = 0, seqstart = 0, seqend = 0,
          desclen;
  GtStr *filename = NULL,
        *id = gt_str_new_cstr("U"),
        *name = gt_str_new_cstr("unique"),
        *parent_unique = gt_str_new_cstr("U"),
        *seqid = gt_str_new(),
        *source = gt_str_new_cstr("Condenseq");
  GtFile *outfile = NULL;
  GtGFF3Visitor *gffv = NULL;
  GtNodeVisitor *nodev = NULL;
  GtFeatureNode *fnode = NULL;
  GtGenomeNode *node = NULL;
  GtRange range;

  gt_assert(condenseq != NULL);

  filename = gt_str_new_cstr(gt_condenseq_basefilename(condenseq));

  name_len = gt_str_length(name);
  gt_str_append_cstr(filename, ".gff3");
  outfile = gt_file_new(gt_str_get(filename), "w", err);
  nodev = gt_gff3_visitor_new(outfile);
  gffv = (GtGFF3Visitor *) nodev;
  gt_gff3_visitor_retain_id_attributes(gffv);

  node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1,
                             (GtUword) 1, GT_STRAND_BOTH);
  fnode = (GtFeatureNode*) node;
  gt_feature_node_set_source(fnode, source);
  for (idx = 0; !had_err && idx < condenseq->udb_nelems; ++idx) {
    GtCondenseqUnique uq = condenseq->uniques[idx];
    if (seqend <= uq.orig_startpos) {
      const char *desc;
      gt_genome_node_delete(node);
      seqnum = gt_condenseq_pos2seqnum(condenseq, uq.orig_startpos);
      seqstart = gt_condenseq_seqstartpos(condenseq, seqnum);
      seqend = seqstart + condenseq_seqlength_help(condenseq, seqnum, seqstart);
      desc = gt_condenseq_description(condenseq, &desclen, seqnum);
      gt_str_reset(seqid);
      gt_str_append_cstr_nt(seqid, desc, desclen);
      node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1,
                                 (GtUword) 1, GT_STRAND_BOTH);
      fnode = (GtFeatureNode*) node;
      gt_feature_node_set_source(fnode, source);
    }
    gt_str_set_length(name, name_len);
    gt_str_append_uword(name, idx);
    gt_str_set_length(id, (GtUword) 1);
    gt_str_append_uword(id, idx);
    gt_feature_node_set_attribute(fnode, "Name", gt_str_get(name));
    gt_feature_node_set_attribute(fnode, "ID", gt_str_get(id));
    /* 1 Based coordinates! */
    range.start = uq.orig_startpos + 1 - seqstart;
    range.end = uq.orig_startpos + uq.len - seqstart;
    gt_genome_node_set_range(node, &range);
    had_err = gt_genome_node_accept(node, nodev, err);
  }
  gt_str_reset(name);
  gt_str_append_cstr(name, "link");
  gt_str_reset(id);
  gt_str_append_cstr(id, "L");
  name_len = gt_str_length(name);
  seqend = 0;
  for (idx = 0; !had_err && idx < condenseq->ldb_nelems; ++idx) {
    GtCondenseqLink link = condenseq->links[idx];
    if (seqend <= link.orig_startpos) {
      const char *desc;
      gt_genome_node_delete(node);
      seqnum = gt_condenseq_pos2seqnum(condenseq, link.orig_startpos);
      seqstart = gt_condenseq_seqstartpos(condenseq, seqnum);
      seqend = seqstart + condenseq_seqlength_help(condenseq, seqnum, seqstart);
      desc = gt_condenseq_description(condenseq, &desclen, seqnum);
      gt_str_reset(seqid);
      gt_str_append_cstr_nt(seqid, desc, desclen);
      node = gt_feature_node_new(seqid, "experimental_feature", (GtUword) 1,
                                 (GtUword) 1, GT_STRAND_BOTH);
      fnode = (GtFeatureNode*) node;
      gt_feature_node_set_source(fnode, source);
    }
    gt_str_set_length(name, name_len);
    gt_str_append_uword(name, idx);
    gt_str_set_length(id, (GtUword) 1);
    gt_str_append_uword(id, idx);
    gt_feature_node_set_attribute(fnode, "Name", gt_str_get(name));
    gt_feature_node_set_attribute(fnode, "ID", gt_str_get(id));
    gt_str_set_length(parent_unique, (GtUword) 1);
    gt_str_append_uword(parent_unique, link.unique_id);
    gt_feature_node_set_attribute(fnode, "Derives_from",
                                  gt_str_get(parent_unique));
    /* 1 Based coordinates! */
    range.start = link.orig_startpos + 1 - seqstart;
    range.end = link.orig_startpos + link.len - seqstart;
    gt_genome_node_set_range(node, &range);
    had_err = gt_genome_node_accept(node, nodev, err);
  }
  gt_file_delete(outfile);
  gt_genome_node_delete(node);
  gt_node_visitor_delete(nodev);
  gt_str_delete(filename);
  gt_str_delete(id);
  gt_str_delete(name);
  gt_str_delete(parent_unique);
  gt_str_delete(seqid);
  gt_str_delete(source);
  return had_err;
}
예제 #21
0
static int bioseq_fill(GtBioseq *bs, bool recreate, GtError *err)
{
  GtStr *bioseq_index_file = NULL,
        *bioseq_ois_file = NULL,
        *bioseq_sds_file = NULL,
        *bioseq_md5_file = NULL,
        *bioseq_des_file = NULL;
  int had_err = 0;
  GtStr *bioseq_basename;

  gt_assert(!bs->encseq);

  if (bs->use_stdin)
    bioseq_basename = gt_str_new_cstr("stdin");
  else
    bioseq_basename = bs->sequence_file;

  /* construct file names */
  bioseq_index_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_index_file, GT_ENCSEQFILESUFFIX);
  bioseq_ois_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_ois_file, GT_OISTABFILESUFFIX);
  bioseq_sds_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_sds_file, GT_SDSTABFILESUFFIX);
  bioseq_md5_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_md5_file, GT_MD5TABFILESUFFIX);
  bioseq_des_file = gt_str_clone(bioseq_basename);
  gt_str_append_cstr(bioseq_des_file, GT_DESTABFILESUFFIX);

  /* construct the bioseq files if necessary */
  if (recreate || bs->use_stdin ||
      !gt_file_exists(gt_str_get(bioseq_index_file)) ||
      !gt_file_exists(gt_str_get(bioseq_ois_file)) ||
      !gt_file_exists(gt_str_get(bioseq_sds_file)) ||
      !gt_file_exists(gt_str_get(bioseq_md5_file)) ||
      !gt_file_exists(gt_str_get(bioseq_des_file)) ||
      gt_file_is_newer(gt_str_get(bs->sequence_file),
                       gt_str_get(bioseq_index_file))) {
    had_err = construct_bioseq_files(bs, bioseq_basename, err);
  }

  if (!had_err) {
    GtEncseqLoader *el = gt_encseq_loader_new();
    gt_encseq_loader_disable_autosupport(el);
    gt_encseq_loader_require_lossless_support(el);
    gt_encseq_loader_require_description_support(el);
    gt_encseq_loader_require_md5_support(el);
    gt_encseq_loader_require_multiseq_support(el);
    bs->encseq = gt_encseq_loader_load(el, gt_str_get(bioseq_basename), err);
    if (bs->encseq == NULL) {
      had_err = -1;
      gt_assert(gt_error_is_set(err));
    }
    gt_encseq_loader_delete(el);
  }
  if (!had_err) {
    gt_assert(bs->encseq);
  }

  /* free */
  if (bs->use_stdin)
    gt_str_delete(bioseq_basename);
  gt_str_delete(bioseq_index_file);
  gt_str_delete(bioseq_ois_file);
  gt_str_delete(bioseq_md5_file);
  gt_str_delete(bioseq_sds_file);
  gt_str_delete(bioseq_des_file);

  return had_err;
}
예제 #22
0
static int gt_sketch_page_runner(GT_UNUSED int argc,
                                 const char **argv,
                                 int parsed_args,
                                 void *tool_arguments,
                                 GtError *err)
{
  SketchPageArguments *arguments = tool_arguments;
  int had_err = 0;
  GtFeatureIndex *features = NULL;
  GtRange qry_range, sequence_region_range;
  GtStyle *sty = NULL;
  GtStr *prog, *gt_style_file;
  GtDiagram *d = NULL;
  GtLayout *l = NULL;
  GtBioseq *bioseq = NULL;
  GtCanvas *canvas = NULL;
  const char *seqid = NULL, *outfile;
  unsigned long start, height, num_pages = 0;
  double offsetpos, usable_height;
  cairo_surface_t *surf = NULL;
  cairo_t *cr = NULL;
  GtTextWidthCalculator *twc;
  gt_error_check(err);

  features = gt_feature_index_memory_new();

  if (cairo_version() < CAIRO_VERSION_ENCODE(1, 8, 6))
    gt_warning("Your cairo library (version %s) is older than version 1.8.6! "
               "These versions contain a bug which may result in "
               "corrupted PDF output!", cairo_version_string());

  /* get style */
  sty = gt_style_new(err);
  if (gt_str_length(arguments->stylefile) == 0)
  {
    prog = gt_str_new();
    gt_str_append_cstr_nt(prog, argv[0],
                          gt_cstr_length_up_to_char(argv[0], ' '));
    gt_style_file = gt_get_gtdata_path(gt_str_get(prog), err);
    gt_str_delete(prog);
    gt_str_append_cstr(gt_style_file, "/sketch/default.style");
  }
  else
  {
    gt_style_file = gt_str_ref(arguments->stylefile);
  }
  had_err = gt_style_load_file(sty, gt_str_get(gt_style_file), err);

  outfile = argv[parsed_args];
  if (!had_err)
  {
    /* get features */
    had_err = gt_feature_index_add_gff3file(features, argv[parsed_args+1], err);
     if (!had_err && gt_str_length(arguments->seqid) == 0) {
      seqid = gt_feature_index_get_first_seqid(features);
      if (seqid == NULL)
      {
        gt_error_set(err, "GFF input file must contain a sequence region!");
        had_err = -1;
      }
    }
    else if (!had_err
               && !gt_feature_index_has_seqid(features,
                                              gt_str_get(arguments->seqid)))
    {
      gt_error_set(err, "sequence region '%s' does not exist in GFF input file",
                   gt_str_get(arguments->seqid));
      had_err = -1;
    }
    else if (!had_err)
      seqid = gt_str_get(arguments->seqid);
  }

  /* set text */
  if (gt_str_length(arguments->text) == 0)
  {
    gt_str_delete(arguments->text);
    arguments->text = gt_str_new_cstr(argv[parsed_args+1]);
  }

  if (!had_err)
  {
    /* set display range */
    gt_feature_index_get_range_for_seqid(features, &sequence_region_range,
                                         seqid);
    qry_range.start = (arguments->range.start == GT_UNDEF_ULONG ?
                         sequence_region_range.start :
                         arguments->range.start);
    qry_range.end   = (arguments->range.end == GT_UNDEF_ULONG ?
                         sequence_region_range.end :
                         arguments->range.end);

    /* set output format */
    if (strcmp(gt_str_get(arguments->format), "pdf") == 0)
    {
      surf = cairo_pdf_surface_create(outfile,
                                      mm_to_pt(arguments->pwidth),
                                      mm_to_pt(arguments->pheight));
    }
    else if (strcmp(gt_str_get(arguments->format), "ps") == 0)
    {
      surf =  cairo_ps_surface_create(outfile,
                                      mm_to_pt(arguments->pwidth),
                                      mm_to_pt(arguments->pheight));
    }
    gt_log_log("created page with %.2f:%.2f dimensions\n",
                                                  mm_to_pt(arguments->pwidth),
                                                  mm_to_pt(arguments->pheight));

    offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER;
    usable_height = mm_to_pt(arguments->pheight)
                              - arguments->theight
                              - arguments->theight
                              - 4*TEXT_SPACER;

    if (gt_str_length(arguments->seqfile) > 0) {
      bioseq = gt_bioseq_new(gt_str_get(arguments->seqfile), err);
    }

    cr = cairo_create(surf);
    cairo_set_font_size(cr, 8);
    twc = gt_text_width_calculator_cairo_new(cr, sty);
    for (start = qry_range.start; start <= qry_range.end;
         start += arguments->width)
    {
      GtRange single_range;
      GtCustomTrack *ct = NULL;
      const char *seq;
      single_range.start = start;
      single_range.end = start + arguments->width;

      if (had_err)
        break;

      d = gt_diagram_new(features, seqid, &single_range, sty, err);
      if (!d) {
        had_err = -1;
        break;
      }
      if (bioseq) {
        seq = gt_bioseq_get_sequence(bioseq, 0);
        ct = gt_custom_track_gc_content_new(seq,
                                      gt_bioseq_get_sequence_length(bioseq, 0),
                                      800, 70, 0.4, true);
        gt_diagram_add_custom_track(d, ct);
      }

      l = gt_layout_new_with_twc(d, mm_to_pt(arguments->width), sty, twc, err);
      had_err = gt_layout_get_height(l, &height, err);
      if (!had_err) {
        if (gt_double_smaller_double(usable_height - 10 - 2*TEXT_SPACER
              - arguments->theight, offsetpos + height))
        {
            draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1],
                        seqid, num_pages, mm_to_pt(arguments->pwidth),
                        mm_to_pt(arguments->pheight),
                        arguments->theight);
          cairo_show_page(cr);
          offsetpos = TEXT_SPACER + arguments->theight + TEXT_SPACER;
          num_pages++;
        }
        canvas = gt_canvas_cairo_context_new(sty,
                                             cr,
                                             offsetpos,
                                             mm_to_pt(arguments->pwidth),
                                             height,
                                             NULL,
                                             err);
        if (!canvas)
          had_err = -1;
        offsetpos += height;
        if (!had_err)
          had_err = gt_layout_sketch(l, canvas, err);
      }
      gt_canvas_delete(canvas);
      gt_layout_delete(l);
      gt_diagram_delete(d);
      if (ct)
        gt_custom_track_delete(ct);
    }
    draw_header(cr, gt_str_get(arguments->text), argv[parsed_args+1], seqid,
                num_pages, mm_to_pt(arguments->pwidth),
                mm_to_pt(arguments->pheight),
                arguments->theight);
    cairo_show_page(cr);
    num_pages++;
    gt_log_log("finished, should be %lu pages\n", num_pages);
    gt_text_width_calculator_delete(twc);
    cairo_destroy(cr);
    cairo_surface_flush(surf);
    cairo_surface_finish(surf);
    cairo_surface_destroy(surf);
    cairo_debug_reset_static_data();
    if (bioseq)
      gt_bioseq_delete(bioseq);
    gt_style_delete(sty);
    gt_str_delete(gt_style_file);
    gt_feature_index_delete(features);
  }
  return had_err;
}
static int gt_compreads_compress_arguments_check(GT_UNUSED int rest_argc,
                                       void *tool_arguments,
                                       GtError *err)
{
  int had_err = 0;
  GtCsrHcrEncodeArguments *arguments = tool_arguments;
  GtSplitter *splitter = NULL;
  GtStr *buffer;
  gt_error_check(err);
  gt_assert(arguments);

  if (gt_str_array_size(arguments->files) == 0) {
    gt_error_set(err, "option \"-files\" is mandatory and requires"
                      " at least one filename as argument!");
    had_err = -1;
  }

  if (!had_err) {
    if (gt_str_length(arguments->name) == 0) {
      if (gt_str_array_size(arguments->files) > 1UL) {
        gt_error_set(err, "option \"-name\" needs to be specified"
                          " if more than one file is given");
        had_err = -1;
      }
      else {
        GtUword i;
        char *basename;
        splitter = gt_splitter_new();
        basename = gt_basename(gt_str_array_get(arguments->files, 0));
        buffer = gt_str_new_cstr(basename);
        gt_splitter_split(splitter, gt_str_get(buffer), gt_str_length(buffer),
                          '.');
        for (i = 0; i < gt_splitter_size(splitter) - 1; i++) {
          gt_str_append_cstr(arguments->name,
                             gt_splitter_get_token(splitter, i));
          if (i < gt_splitter_size(splitter) - 2)
            gt_str_append_char(arguments->name, '.');
        }
        gt_free(basename);
        gt_splitter_delete(splitter);
        gt_str_delete(buffer);
      }
    }
  }

  if (!had_err) {
    char *sampling_type = gt_str_get(arguments->method);
    static const char *methods[] = { "page", "regular", "none" };

    if (!strcmp(methods[0], sampling_type)) {
      arguments->pagewise = true;
      if (arguments->srate == GT_UNDEF_UWORD)
        arguments->srate = GT_SAMPLING_DEFAULT_PAGE_RATE;
      else if (arguments->srate == 0) {
        gt_error_set(err, "page sampling was chosen, but sampling"
                          " rate was set to "GT_WU"! this seems wrong.",
                     arguments->srate);
        had_err = -1;
      }
    }
    else if (!strcmp(methods[1], sampling_type)) {
      arguments->regular = true;
      if (arguments->srate == GT_UNDEF_UWORD)
        arguments->srate = GT_SAMPLING_DEFAULT_REGULAR_RATE;
      else if (arguments->srate == 0) {
        gt_error_set(err, "regular sampling was chosen, but sampling rate "
                          " was set to "GT_WU"! this seems wrong.",
                     arguments->srate);
        had_err = -1;
      }
    }
    else if (!strcmp(methods[2], sampling_type)) {
      if (arguments->srate == GT_UNDEF_UWORD)
        arguments->srate = 0;
      else if (arguments->srate != 0) {
        gt_error_set(err, "no sampling was chosen, but sampling rate was"
                          " set to "GT_WU"! this seems wrong.",
                          arguments->srate);
        had_err = -1;
      }
    }
    else {
      gt_error_set(err, "somethings wrong with the stype option");
      had_err = -1;
    }
  }

  if (!had_err) {
    if (arguments->arg_range.start != GT_UNDEF_UWORD) {
      if (arguments->arg_range.start <= (GtUword) UINT_MAX) {
        gt_safe_assign(arguments->qrng.start, arguments->arg_range.start);
        if (arguments->arg_range.end <= (GtUword) UINT_MAX)
          gt_safe_assign(arguments->qrng.end, arguments->arg_range.end);
        else
          had_err = -1;
      }
      else
        had_err = -1;
    }
    if (had_err)
      gt_error_set(err, "Range for qualities: value to large! larger than %u",
                   UINT_MAX);
  }
  return had_err;
}
static int snp_annotator_classify_snp(GtSNPAnnotatorVisitor *sav,
                                      GtFeatureNode *mRNA,
                                      GtFeatureNode *snp,
                                      GtUword variant_pos,
                                      GtUword variant_idx,
                                      char variant_char,
#ifndef NDEBUG
                                      GT_UNUSED char reference_char,
#endif
                                      GT_UNUSED GtError *err)
{
  int had_err = 0;
  char *mrnaseq;
  const char *variant_effect = NULL;
  gt_assert(mRNA && snp && sav);
  gt_log_log("processing variant char %c for SNP %s\n",
               variant_char, gt_feature_node_get_attribute(snp, "Dbxref"));
  mrnaseq = gt_hashmap_get(sav->rnaseqs, mRNA);
  gt_assert(mrnaseq);
  if (mrnaseq) {
    char codon[3],
         variant_codon[3];
    GtStr *effect_string;
    char oldamino,
         newamino;
    GT_UNUSED GtUword mrnalen;
    GtUword startpos = variant_pos / GT_CODON_LENGTH,
                  variantoffset = variant_pos % GT_CODON_LENGTH;
    mrnalen = strlen(mrnaseq);
    gt_assert(variant_pos < mrnalen);
    variant_codon[0] = codon[0] = mrnaseq[3*startpos];
    variant_codon[1] = codon[1] = mrnaseq[3*startpos+1];
    variant_codon[2] = codon[2] = mrnaseq[3*startpos+2];
    variant_codon[variantoffset] = variant_char;
#ifndef NDEBUG
    gt_assert(toupper(codon[variantoffset]) == toupper(reference_char));
#endif
    if (gt_trans_table_is_stop_codon(sav->tt, codon[0], codon[1], codon[2])) {
      if (gt_trans_table_is_stop_codon(sav->tt, variant_codon[0],
                                       variant_codon[1], variant_codon[2])) {
        variant_effect = gt_symbol(GT_SNP_SYNONYMOUS_STOP_EFFECT);
      } else {
        variant_effect = gt_symbol(GT_SNP_STOP_LOST_EFFECT);
      }
    } else {
      if (gt_trans_table_is_stop_codon(sav->tt, variant_codon[0],
                                       variant_codon[1], variant_codon[2])) {
        variant_effect = gt_symbol(GT_SNP_NONSENSE_EFFECT);
      } else {
        had_err = gt_trans_table_translate_codon(sav->tt, codon[0], codon[1],
                                                 codon[2], &oldamino, err);
        if (!had_err) {
          had_err = gt_trans_table_translate_codon(sav->tt, variant_codon[0],
                                                   variant_codon[1],
                                                   variant_codon[2],
                                                   &newamino, err);
        }
        if (!had_err) {
          if (newamino == oldamino) {
            variant_effect = gt_symbol(GT_SNP_SYNONYMOUS_AMINO_EFFECT);
          } else {
            variant_effect = gt_symbol(GT_SNP_MISSENSE_EFFECT);
          }
        }
      }
    }
    if (!had_err) {
      const char *var_attrib;
      gt_assert(variant_effect != NULL);
      if ((var_attrib = gt_feature_node_get_attribute(snp,
                                                      GT_GVF_VARIANT_EFFECT))) {
        effect_string = gt_str_new_cstr(var_attrib);
        gt_str_append_cstr(effect_string, ",");
        gt_str_append_cstr(effect_string, variant_effect);
      } else {
        effect_string = gt_str_new_cstr(variant_effect);
      }
      gt_str_append_cstr(effect_string, " ");
      gt_str_append_ulong(effect_string, variant_idx);
      gt_str_append_cstr(effect_string, " ");
      gt_str_append_cstr(effect_string, gt_feature_node_get_type(mRNA));
      gt_str_append_cstr(effect_string, " ");
      gt_str_append_cstr(effect_string,
                         gt_feature_node_get_attribute(mRNA, GT_GFF_ID));
      gt_feature_node_set_attribute(snp, GT_GVF_VARIANT_EFFECT,
                                    gt_str_get(effect_string));
      gt_str_reset(effect_string);
      gt_str_delete(effect_string);
    }
  }

  return had_err;
}
예제 #25
0
int mg_curl(ParseStruct *parsestruct_ptr,
            GtUword hit_counter, GtError * err)
{
  int had_err = 0,
    curl_errornr = 0;

  /* Laenge der aus dem XML-File stammenden Hit-DNA-Sequenz */
  GtUword seq_len;
  GtWord numb_from = 0, numb_to = 0, numb_diff = 0;

  GtStr *seq_var,
   *http_adr;

  MemoryStruct memorystruct;

  /* char-Zeiger auf die HTTP-Adresse des cgi-Skriptes efetch von NCBI */
  char *http_adr_ptr,
   *seq_pos;                           /* char-Zeiger, wird benutzt zum
                                          Auslesen der Sequenzinformation
                                          aus dem XML-File, welche
                                          Ergebnis der efetch-Anfrage ist */
  const char *curlerror;

  /* Curl-Handle */
  CURL *curl_handle;

  /* char-Zeiger auf die Daten ist NULL */
  memorystruct.memory = NULL;
  /* noch keine Daten eingetragen bzw. abgespeichert */
  memorystruct.size = 0;

  /* Zwischenspeicher fuer die Sequnezinformation, da die GtStrArray-Klasse
     keine Funktion zum begrenzten Einfuegen eines Strings zur Verfuegung
     stellt; setzen des ersten Teils der HTTP-Adresse */
  seq_var = gt_str_new();
  http_adr =
    gt_str_new_cstr
    ("http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=");

  /* Check der Umgebungsvariablen */
  gt_error_check(err);

  curl_global_init(CURL_GLOBAL_ALL);

  /* initialisieren der curl-session */
  curl_handle = curl_easy_init();

  /* Zusammensetzen der http-Adresse durch Anhaengen der query-GI-Nummer,
     des Hit-from, des Hit-to Wertes und des Rueckgabetyps an den ersten
     Teil der HTTP-Adresse */
  gt_str_append_str(http_adr, ARGUMENTSSTRUCT(curl_fcgi_db));
  gt_str_append_cstr(http_adr, "&id=gi|");
  gt_str_append_str(http_adr, parsestruct_ptr->hit_gi_nr_tmp);
  gt_str_append_cstr(http_adr, "&seq_start=");
  gt_str_append_cstr(http_adr,
                  gt_str_array_get(MATRIXSTRUCT(hit_from), hit_counter));
  gt_str_append_cstr(http_adr, "&seq_stop=");
  gt_str_append_cstr(http_adr,
                  gt_str_array_get(MATRIXSTRUCT(hit_to), hit_counter));
  gt_str_append_cstr(http_adr, "&retmode=xml");

  /* char-Zeiger wird benoetigt, da curl_easy_setopt als 3. Parameter
     einen char-Zeiger erwartet */
  http_adr_ptr = gt_str_get(http_adr);

  /* festlegen, welche HTTP-Adresse aufgerufen werden soll */
  curl_easy_setopt(curl_handle, CURLOPT_URL, http_adr_ptr);

  /* die empfangenen Daten werden an die Funktion WriteMemoryCallback
     gesendet, wo Speicherplatz reserviert und die Daten in diesen
     Speicherbereich kopiert werden */
  curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION,
                   WriteMemoryCallback);

  /* Die Daten werden in die Struktur eingetragen */
  curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *) &memorystruct);

  /* setzen des user-agent field, da einige Server diesen voraussetzen */
  curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");

  /* Anfrage wird ausgefuehrt */
  curl_errornr = curl_easy_perform(curl_handle);
  curlerror = curl_easy_strerror(curl_errornr);

  if (curl_errornr)
  {
    gt_error_set(err,
              "an error occurred during curl-processing (error-code %d):\
               \"%s\"", curl_errornr, curlerror);
    had_err = -1;
  }

  if (!had_err)
  {
    /* Die Hit-DNA steht zwischen dem <GBSeq_sequence> und dem
       </GBSeq_sequence> XML-Tag, Zeiger auf das < Zeichen von
       <GBSeq_sequence> */
    seq_pos = strstr(memorystruct.memory, "<GBSeq_sequence>");

    if (!seq_pos)
    {
      gt_error_set(err,
                "an error occurred while retrieving sequence-information\
                 with the following request: \"%s\"", http_adr_ptr);
      had_err = -1;
    }
예제 #26
0
static int layout_tracks(void *key, void *value, void *data,
                         GtError *err)
{
  unsigned long i,
                max = 50;
  GtTrack *track = NULL;
  GtLayoutTraverseInfo *lti = (GtLayoutTraverseInfo*) data;
  GtArray *list = (GtArray*) value;
  GtStr *gt_track_key;
  GtBlock *block;
  int had_err = 0;
  bool split = true;
  double tmp = 50;
  gt_assert(list);

  /* to get a deterministic layout, we sort the GtBlocks for each type */
  if (lti->layout->block_ordering_func) {
    gt_array_sort_stable_with_data(list, blocklist_block_compare,
                                   lti->layout);
  }

  /* XXX: get first block for track property lookups, this should be reworked
     to allow arbitrary track keys! */
  block = *(GtBlock**) gt_array_get(list, 0);
  gt_track_key = gt_str_new_cstr((char*) key);

  /* obtain default settings*/
  if (gt_style_get_bool(lti->layout->style, "format", "split_lines", &split,
                         NULL, err) == GT_STYLE_QUERY_ERROR) {
    had_err = 1;
  }
  if (!had_err) {
    if (gt_style_get_num(lti->layout->style,
                         "format", "max_num_lines",
                         &tmp, NULL, err) == GT_STYLE_QUERY_ERROR) {
      had_err = 1;
    }
  }
  /* obtain track-specific settings, should be changed to query arbitrary
     track keys! */
  if (!had_err) {
    if (gt_style_get_bool(lti->layout->style, gt_block_get_type(block),
                          "split_lines",  &split, NULL,
                          err) == GT_STYLE_QUERY_ERROR) {
      had_err = 1;
    }
  }
  if (!had_err) {
    if (gt_style_get_num(lti->layout->style, gt_block_get_type(block),
                         "max_num_lines", &tmp, NULL,
                         err) == GT_STYLE_QUERY_ERROR) {
      had_err = 1;
    }
  }

  if (!had_err) {
    max = (unsigned long) tmp;
    track = gt_track_new(gt_track_key, max, split,
                         gt_line_breaker_captions_new(lti->layout,
                                                      lti->layout->width,
                                                      lti->layout->style));
    lti->layout->nof_tracks++;
    for (i = 0; !had_err && i < gt_array_size(list); i++) {
      block = *(GtBlock**) gt_array_get(list, i);
      had_err = gt_track_insert_block(track, block, err);
    }
  }
  if (!had_err) {
    gt_hashmap_add(lti->layout->tracks, gt_cstr_dup(gt_str_get(gt_track_key)),
                   track);
  }
  else
  {
    gt_track_delete(track);
  }

  gt_str_delete(gt_track_key);
  return had_err;
}
static int hmmsearch_process_coarse_hits(
                                       char *table_filename,
                                       GtCondenseq *ces,
                                       GtCondenseqHmmsearchArguments *arguments,
                                       GtLogger *logger,
                                       GtError *err) {
  int had_err = 0;
  GtStr *line = gt_str_new();
  FILE *table = NULL;
  GtSplitter *splitter = gt_splitter_new();
  GtStr *query = gt_str_new(),
        *fine_fasta_filename = gt_str_new_cstr("condenseq");
  GtRBTree *sequences = NULL;
  GtUword filecount = (GtUword) 1;
  unsigned int querycount = 0;
  const GtUword fine_fasta_name_length = gt_str_length(fine_fasta_filename);
  const GtUword table_name_length = gt_str_length(arguments->outtable_filename);

  table = gt_xfopen(table_filename, "r");

  sequences = gt_rbtree_new(hmmsearch_cmp_seqnum,
                            hmmsearch_tree_free_node, NULL);

  while (!had_err && gt_str_read_next_line(line, table) == 0) {
    char *c_line = gt_str_get(line);
    GtUword uid;
    const GtUword target_column = 0,
          query_column = (GtUword) 3;

    if (c_line[0] != '#') {
      gt_splitter_split_non_empty(splitter, c_line, gt_str_length(line), ' ');
      gt_assert(gt_splitter_size(splitter) == (GtUword) 23);
      if (sscanf(gt_splitter_get_token(splitter, target_column),
                 GT_WU, &uid) != 1) {
        gt_error_set(err, "couldn't parse target number: %s",
                     gt_splitter_get_token(splitter, target_column));
        had_err = -1;
      }
      if (gt_str_length(query) == 0 ||
          strcmp(gt_str_get(query),
                 gt_splitter_get_token(splitter, query_column)) != 0) {
        gt_str_set(query, gt_splitter_get_token(splitter, query_column));
        gt_logger_log(logger, "new query: %s", gt_str_get(query));
        querycount++;
      }
      if (!had_err && querycount == arguments->max_queries) {
        hmmsearch_create_fine_fas(fine_fasta_filename, sequences, ces);
        if (table_name_length != 0)
          gt_str_append_uword(arguments->outtable_filename, filecount++);
        had_err =
          hmmsearch_call_fine_search(table_name_length != 0 ?
                                       arguments->outtable_filename :
                                       NULL,
                                     gt_str_get(fine_fasta_filename),
                                     gt_str_get(arguments->hmmsearch_path),
                                     gt_str_get(arguments->hmm),
                                     logger, err);
        gt_rbtree_clear(sequences);
        gt_str_set_length(fine_fasta_filename, fine_fasta_name_length);
        if (table_name_length != 0)
          gt_str_set_length(arguments->outtable_filename, table_name_length);
        querycount = 0;
      }
      if (!had_err) {
        if (gt_condenseq_each_redundant_seq(ces, uid,
                                            hmmsearch_process_seq,
                                            sequences, err) == 0) {
          had_err = -1;
        }
      }
      gt_splitter_reset(splitter);
    }
    gt_str_reset(line);
  }
  gt_splitter_delete(splitter);
  gt_str_delete(line);
  gt_str_delete(query);
  gt_xfclose(table);

  if (!had_err) {
    hmmsearch_create_fine_fas(fine_fasta_filename, sequences, ces);
    if (table_name_length != 0)
      gt_str_append_uword(arguments->outtable_filename, filecount++);
    had_err =
      hmmsearch_call_fine_search(table_name_length != 0 ?
                                 arguments->outtable_filename :
                                 NULL,
                                 gt_str_get(fine_fasta_filename),
                                 gt_str_get(arguments->hmmsearch_path),
                                 gt_str_get(arguments->hmm),
                                 logger, err);
  }
  gt_log_log("created " GT_WU " files", filecount);
  gt_rbtree_delete(sequences);
  gt_str_delete(fine_fasta_filename);
  return had_err;
}
예제 #28
0
int gt_block_unit_test(GtError *err)
{
  GtRange r1, r2, r_temp, b_range;
  GtStrand s;
  GtGenomeNode *gn1, *gn2;
  GtElement *e1, *e2;
  double height;
  GtBlock *b;
  GtStr *seqid, *caption1, *caption2;
  int had_err = 0;
  GtStyle *sty;
  GtError *testerr;
  gt_error_check(err);

  seqid = gt_str_new_cstr("seqid");
  caption1 = gt_str_new_cstr("foo");
  caption2 = gt_str_new_cstr("bar");
  testerr = gt_error_new();

  r1.start = 10UL;
  r1.end = 50UL;

  r2.start = 40UL;
  r2.end = 50UL;

  gn1 = gt_feature_node_new(seqid, gt_ft_gene, r1.start, r1.end,
                            GT_STRAND_FORWARD);
  gn2 = gt_feature_node_new(seqid, gt_ft_exon, r2.start, r2.end,
                            GT_STRAND_FORWARD);

  e1 = gt_element_new((GtFeatureNode*) gn1);
  e2 = gt_element_new((GtFeatureNode*) gn2);

  b = gt_block_new();

  /* test gt_block_insert_elements */
  gt_ensure((0UL == gt_block_get_size(b)));
  gt_block_insert_element(b, (GtFeatureNode*) gn1);
  gt_ensure((1UL == gt_block_get_size(b)));
  gt_block_insert_element(b, (GtFeatureNode*) gn2);
  gt_ensure((2UL == gt_block_get_size(b)));

  /* test gt_block_set_range & gt_block_get_range */
  r_temp = gt_range_join(&r1, &r2);
  gt_block_set_range(b, r_temp);
  b_range = gt_block_get_range(b);
  gt_ensure((0 == gt_range_compare(&b_range, &r_temp)));
  gt_ensure((1 == gt_range_compare(&r2, &r_temp)));

  /* tests gt_block_set_caption & gt_block_get_caption */
  gt_block_set_caption(b, caption1);
  gt_ensure((0 == gt_str_cmp(gt_block_get_caption(b), caption1)));
  gt_ensure((0 != gt_str_cmp(gt_block_get_caption(b), caption2)));

  /* tests gt_block_set_strand & gt_block_get_range */
  s = gt_block_get_strand(b);
  gt_ensure((GT_STRAND_UNKNOWN == s));
  gt_block_set_strand(b, GT_STRAND_FORWARD);
  s = gt_block_get_strand(b);
  gt_ensure((GT_STRAND_FORWARD == s));

  /* test gt_block_get_max_height() */
  sty = gt_style_new(err);
  gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0);
  gt_ensure(!gt_error_is_set(testerr));
  gt_ensure(height == BAR_HEIGHT_DEFAULT);
  gt_style_set_num(sty, "exon", "bar_height", 42);
  gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0);
  gt_ensure(!gt_error_is_set(testerr));
  gt_ensure(height == 42);
  gt_style_set_num(sty, "gene", "bar_height", 23);
  gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0);
  gt_ensure(!gt_error_is_set(testerr));
  gt_ensure(height == 42);
  gt_style_unset(sty, "exon", "bar_height");
  gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0);
  gt_ensure(!gt_error_is_set(testerr));
  gt_ensure(height == 23);

  gt_str_delete(caption2);
  gt_str_delete(seqid);
  gt_element_delete(e1);
  gt_element_delete(e2);
  gt_block_delete(b);
  gt_style_delete(sty);
  gt_error_delete(testerr);
  gt_genome_node_delete(gn1);
  gt_genome_node_delete(gn2);

  return had_err;
}
예제 #29
0
int gt_track_unit_test(GtError *err)
{
    int had_err = 0;
    GtBlock *b[4];
    GtRange r[4];
    GtTrack *track;
    GtGenomeNode *parent[4], *gn[4];
    GtStr *title;
    double height, tmp;
    GtStyle *sty;
    unsigned long i;
    GtLineBreaker *lb;
    double t_rest = 0,
           l_rest = 0;
    gt_error_check(err);

    title = gt_str_new_cstr("test");

    r[0].start=100UL;
    r[0].end=1000UL;
    r[1].start=1001UL;
    r[1].end=1500UL;
    r[2].start=700UL;
    r[2].end=1200UL;
    r[3].start=10UL;
    r[3].end=200UL;

    for (i=0; i<4; i++)
    {
        parent[i] = gt_feature_node_new(title, gt_ft_gene, r[i].start, r[i].end,
                                        GT_STRAND_FORWARD);
        gn[i] = gt_feature_node_new(title, gt_ft_exon, r[i].start, r[i].end,
                                    GT_STRAND_FORWARD);

        gt_feature_node_add_child((GtFeatureNode*) parent[i],
                                  (GtFeatureNode*) gn[i]);

        gt_feature_node_add_attribute((GtFeatureNode*) parent[i], GT_GFF_NAME,
                                      "parent");
        gt_feature_node_add_attribute((GtFeatureNode*) gn[i], GT_GFF_NAME, "child");
    }

    for (i=0; i<4; i++)
    {
        b[i] = gt_block_new();
        gt_block_set_range(b[i], r[i]);
        gt_block_insert_element(b[i], (GtFeatureNode*) parent[i]);
        gt_block_insert_element(b[i], (GtFeatureNode*) gn[i]);
    }

    lb = gt_line_breaker_bases_new();

    sty = gt_style_new(err);

    if (gt_style_get_num(sty, "format", "track_caption_font_size", &tmp,
                         NULL, err) == GT_STYLE_QUERY_NOT_SET) {
        tmp = TEXT_SIZE_DEFAULT;
    }
    t_rest += tmp;
    if (gt_style_get_num(sty, "format", "track_caption_space", &tmp,
                         NULL, err) == GT_STYLE_QUERY_NOT_SET) {
        tmp = CAPTION_BAR_SPACE_DEFAULT;
    }
    t_rest += tmp;
    if (gt_style_get_num(sty, "format", "track_vspace", &tmp,
                         NULL, err) == GT_STYLE_QUERY_NOT_SET) {
        tmp = TRACK_VSPACE_DEFAULT;
    }
    t_rest += tmp;
    if (gt_style_get_num(sty, "format", "bar_vspace", &l_rest,
                         NULL, err) == GT_STYLE_QUERY_NOT_SET) {
        l_rest = BAR_VSPACE_DEFAULT;
    }

    track = gt_track_new(title, GT_UNDEF_ULONG, true, lb);
    gt_ensure(had_err, track);
    gt_ensure(had_err, gt_track_get_title(track) == title);

    gt_ensure(had_err, gt_track_get_number_of_lines(track) == 0);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest);
    gt_ensure(had_err, !gt_error_is_set(err));

    gt_ensure(had_err, gt_track_insert_block(track, b[0], err) == 0);
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_ensure(had_err, gt_track_get_number_of_lines(track) == 1);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + l_rest + BAR_HEIGHT_DEFAULT);
    gt_ensure(had_err, !gt_error_is_set(err));

    gt_ensure(had_err, gt_track_insert_block(track, b[1], err) == 0);
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_ensure(had_err, gt_track_get_number_of_lines(track) == 1);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + l_rest + BAR_HEIGHT_DEFAULT);
    gt_ensure(had_err, !gt_error_is_set(err));

    gt_ensure(had_err, gt_track_insert_block(track, b[2], err) == 0);
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_ensure(had_err, gt_track_get_number_of_lines(track) == 2);
    gt_ensure(had_err, gt_track_insert_block(track, b[3], err) == 0);
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_ensure(had_err, gt_track_get_number_of_lines(track) == 2);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + 2*(l_rest + BAR_HEIGHT_DEFAULT));
    gt_ensure(had_err, !gt_error_is_set(err));

    gt_style_set_num(sty, "exon", "bar_height", 42);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + 2*(l_rest+42));
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_style_set_num(sty, "gene", "bar_height", 23);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + 2*(l_rest+42));
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_style_unset(sty, "exon", "bar_height");
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + 2*(l_rest+23));
    gt_ensure(had_err, !gt_error_is_set(err));
    gt_style_unset(sty, "gene", "bar_height");
    gt_style_set_num(sty, "format", "bar_height", 99);
    gt_ensure(had_err, gt_track_get_height(track, &height, sty, err) == 0);
    gt_ensure(had_err, height == t_rest + 2*(l_rest+99));
    gt_ensure(had_err, !gt_error_is_set(err));

    gt_ensure(had_err, gt_track_get_number_of_discarded_blocks(track) == 0);

    gt_track_delete(track);
    gt_str_delete(title);
    gt_style_delete(sty);
    for (i=0; i<4; i++)
    {
        gt_block_delete(b[i]);
        gt_genome_node_delete(parent[i]);
    }
    return had_err;
}
예제 #30
0
static void make_sequence_region(GtHashmap *sequence_regions,
                                 GtStr *sequenceid,
                                 GthRegionFactory *srf,
                                 GthInput *input,
                                 GtUword filenum,
                                 GtUword seqnum)
{
    GtUword offset_is_defined = false;
    GtRange range, descrange;
    GtGenomeNode *sr = NULL;
    gt_assert(sequence_regions && sequenceid && srf && input);
    if (gth_input_use_substring_spec(input)) {
        range.start = gth_input_genomic_substring_from(input);
        range.end   = gth_input_genomic_substring_to(input);
    }
    else {
        range = gth_input_get_relative_genomic_range(input, filenum, seqnum);
    }
    if (srf->use_desc_ranges) {
        GtStr *description = gt_str_new();
        gth_input_get_genomic_description(input, description, filenum, seqnum);
        if (!gt_parse_description_range(gt_str_get(description), &descrange))
            offset_is_defined = true;
        gt_str_delete(description);
    }
    if (offset_is_defined)
        range = gt_range_offset(&range, descrange.start);
    else
        range = gt_range_offset(&range, 1); /* 1-based */
    if (!gt_str_length(sequenceid) ||
            (gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid)) &&
             !offset_is_defined)) {
        /* sequenceid is empty or exists already (and no offset has been parsed)
           -> make one up */
        GtStr *seqid;
        char *base;
        base = gt_basename(gth_input_get_genomic_filename(input, filenum));
        seqid = gt_str_new_cstr(base);
        gt_free(base);
        gt_str_append_char(seqid, '|');
        gt_str_append_uword(seqid, seqnum + 1); /* 1-based */
        seqid_store_add(srf->seqid_store, filenum, seqnum, seqid, GT_UNDEF_UWORD);
        gt_assert(!gt_cstr_table_get(srf->used_seqids, gt_str_get(seqid)));
        gt_cstr_table_add(srf->used_seqids, gt_str_get(seqid));
        sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum, seqnum),
                                range.start, range.end);
        gt_hashmap_add(sequence_regions,
                       (void*) gt_cstr_table_get(srf->used_seqids,
                               gt_str_get(seqid)),
                       sr);
        gt_str_delete(seqid);
    }
    else {
        /* sequenceid does not exists already (or an offset has been parsed)
           -> use this one */
        if (!gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid))) {
            /* no sequence region with this id exists -> create one */
            gt_cstr_table_add(srf->used_seqids, gt_str_get(sequenceid));
            seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid,
                            offset_is_defined ? descrange.start : GT_UNDEF_UWORD);
            sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum,
                                                    seqnum), range.start, range.end);
            gt_hashmap_add(sequence_regions,
                           (void*) gt_cstr_table_get(srf->used_seqids,
                                   gt_str_get(sequenceid)),
                           sr);
        }
        else {
            GtRange prev_range, new_range;
            /* sequence region with this id exists already -> modify range */
            sr = gt_hashmap_get(sequence_regions, gt_str_get(sequenceid));
            gt_assert(sr);
            prev_range = gt_genome_node_get_range(sr);
            new_range = gt_range_join(&prev_range, &range);
            gt_genome_node_set_range(sr, &new_range);
            seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid,
                            offset_is_defined ? descrange.start : GT_UNDEF_UWORD);
        }
    }
    gt_assert(sr);
}