static int gt_inlineseq_split_runner(int argc, const char **argv,
                                     int parsed_args,
                              void *tool_arguments, GtError *err)
{
  GtInlineseqSplitArguments *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream = NULL,
               *gff3_out_stream = NULL,
               *split_stream = NULL,
               *last_stream = NULL;
  GtFile *seq_out_file = NULL,
         *gff3_out_file = NULL;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  if (gt_str_length(arguments->seqoutfile) > 0) {
    seq_out_file = gt_file_new(gt_str_get(arguments->seqoutfile), "w+", err);
    if (!seq_out_file)
      had_err = -1;
  }

  if (!had_err && gt_str_length(arguments->gffoutfile) > 0) {
    gff3_out_file = gt_file_new(gt_str_get(arguments->gffoutfile), "w+", err);
    if (!gff3_out_file)
      had_err = -1;
  }

  if (!had_err) {
    last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted(
                                                            argc - parsed_args,
                                                            argv + parsed_args);
    gt_assert(gff3_in_stream);
  }

  if (!had_err) {
    last_stream = split_stream = gt_sequence_node_out_stream_new(last_stream,
                                                                 seq_out_file,
                                                                 err);
    gt_assert(split_stream);
  }

  if (!had_err) {
    last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream,
                                                           gff3_out_file);
    had_err = gt_node_stream_pull(last_stream, err);
  }

  gt_node_stream_delete(gff3_in_stream);
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(split_stream);
  gt_file_delete(seq_out_file);
  gt_file_delete(gff3_out_file);

  return had_err;
}
static int gt_fasta_reader_rec_run(GtFastaReader *fasta_reader,
                                   GtFastaReaderProcDescription
                                   proc_description,
                                   GtFastaReaderProcSequencePart
                                   proc_sequence_part,
                                   GtFastaReaderProcSequenceLength
                                   proc_sequence_length, void *data,
                                   GtError *err)
{
  GtFastaReaderRec *fr = gt_fasta_reader_rec_cast(fasta_reader);
  GtStr *description, *sequence;
  int had_err = 0;
  gt_error_check(err);

  /* at least one function has to be defined */
  gt_assert(proc_description || proc_sequence_part || proc_sequence_length);

  /* init */
  description = gt_str_new();
  sequence    = gt_str_new();

  /* make sure file is not empty */
  if (!gt_io_has_char(fr->seqio)) {
    gt_error_set(err, "sequence file \"%s\" is empty",
                 gt_io_get_filename(fr->seqio));
    had_err = -1;
  }

  /* parse file */
  while (!had_err && gt_io_has_char(fr->seqio)) {
    /* reset */
    gt_str_reset(description);
    gt_str_reset(sequence);

    /* parse entry */
    had_err = parse_fasta_entry(description, sequence, fr->seqio, err);

    /* process entry */
    if (!had_err && proc_description) {
      had_err = proc_description(gt_str_get(description),
                                 gt_str_length(description), data, err);
    }
    if (!had_err && proc_sequence_part) {
      had_err = proc_sequence_part(gt_str_get(sequence),
                                   gt_str_length(sequence), data, err);
    }
    if (!had_err && proc_sequence_length)
      had_err = proc_sequence_length(gt_str_length(sequence), data, err);
  }

  /* free */
  gt_str_delete(description);
  gt_str_delete(sequence);

  return had_err;
}
int gt_extract_and_translate_feature_sequence(GtFeatureNode *feature_node,
                                              const char *type,
                                              bool join,
                                              GtRegionMapping *rm,
                                              GtTransTable *ttable,
                                              GtStr *translation_fr1,
                                              GtStr *translation_fr2,
                                              GtStr *translation_fr3,
                                              GtError *err)
{
  GtTranslator *tr = NULL;
  GtTranslatorStatus status;
  GtCodonIterator *ci = NULL;
  unsigned int frame, phase_offset = 0;
  char translated;
  int had_err = 0;
  GtStr *sequence = gt_str_new();
  gt_assert(feature_node && type);

  had_err = gt_extract_feature_sequence_generic(sequence,
                                                (GtGenomeNode*) feature_node,
                                                type, join, NULL, NULL,
                                                &phase_offset, rm, err);

  /* do translation if we have at least one codon */
  if (!had_err && gt_str_length(sequence) > phase_offset + 2) {
    ci = gt_codon_iterator_simple_new(gt_str_get(sequence) + phase_offset,
                                      gt_str_length(sequence) - phase_offset,
                                      NULL);
    tr = gt_translator_new(ci);
    if (ttable)
      gt_translator_set_translation_table(tr, ttable);
    status = gt_translator_next(tr, &translated, &frame, NULL);
    while (status == GT_TRANSLATOR_OK) {
      if (frame == 0 && translation_fr1)
        gt_str_append_char(translation_fr1, translated);
      else if (frame == 1 && translation_fr2)
        gt_str_append_char(translation_fr2, translated);
      else if (frame == 2 && translation_fr3)
        gt_str_append_char(translation_fr3, translated);
      status = gt_translator_next(tr, &translated, &frame, NULL);
    }
    if (status == GT_TRANSLATOR_ERROR)
      had_err = -1;
  }
  gt_translator_delete(tr);
  gt_codon_iterator_delete(ci);
  gt_str_delete(sequence);

  return had_err;
}
Beispiel #4
0
static int gt_genomediff_arguments_check(int rest_argc,
        void *tool_arguments,
        GtError *err)
{
    GtGenomediffArguments *arguments = tool_arguments;
    bool prepared_index;
    int had_err = 0;
    gt_error_check(err);
    gt_assert(arguments);

    if (rest_argc == 0) {
        gt_error_set(err, "give at least one file (base)name!");
        had_err = -1;
    }
    if (!had_err) {
        if (strcmp("esa", gt_str_get(arguments->indextype)) == 0)
            arguments->with_esa = true;
        else if (strcmp("pck", gt_str_get(arguments->indextype)) == 0)
            arguments->with_pck = true;
    }
    prepared_index = (arguments->with_esa || arguments->with_pck);

    if (!had_err && arguments->user_max_depth != -1 && !arguments->with_pck)
        gt_warning("option -maxdepth does only apply to -indextype pck");

    if (!had_err &&
            prepared_index && gt_encseq_options_mirrored_value(arguments->loadopts))
        gt_warning("option -mirrored is ignored with esa and pck index");

    if (!had_err && prepared_index && rest_argc > 1) {
        gt_error_set(err, "there should be only one basename argument with "
                     "-indextype esa|pck");
        had_err = -1;
    }
    if (rest_argc == 1 && gt_str_length(arguments->indexname) != 0) {
        gt_error_set(err, "Option -indexname is only needed with sequence files, "
                     "if one file is given as argument, this should be an index.");
        had_err = -1;
    }
    if (!had_err && rest_argc > 1 && gt_str_length(arguments->indexname) == 0) {
        gt_error_set(err, "more than one input file given, please use -indexname "
                     "for basename of indices created during run.");
        had_err = -1;
    }

    if (!had_err)
        arguments->with_units = gt_option_is_set(arguments->ref_unitfile);

    return had_err;
}
Beispiel #5
0
static inline int parse_fastq_block(GtSeqIteratorFastQ *seqit, GtError *err)
{
  int had_err = 0;
  gt_assert(seqit);
  gt_error_check(err);

  /* parse @<seqname> */
  had_err = parse_fastq_seqname(seqit,
                                seqit->descbuffer,
                                GT_FASTQ_BLOCK_START_CHAR,
                                err);
  if (!had_err) {
    /* parse sequence */
    had_err = parse_fastq_sequence(seqit, err);
    gt_fastq_premature_end_check(had_err, seqit);
  }
  if (!had_err) {
    /* parse +[seqname] */
    had_err = parse_fastq_seqname(seqit,
                                  seqit->qdescbuffer,
                                  GT_FASTQ_QUAL_SEPARATOR_CHAR,
                                  err);
    gt_fastq_premature_end_check(had_err, seqit);
  }
  if (!had_err
      && gt_str_length(seqit->qdescbuffer)
      && gt_str_cmp(seqit->descbuffer, seqit->qdescbuffer) != 0)
  {
      gt_error_set(err, "sequence description '%s' is not equal to "
                        "qualities description '%s' in line %lu",
                        gt_str_get(seqit->descbuffer),
                        gt_str_get(seqit->qdescbuffer),
                        seqit->curline-1);
      return -2;
  }
  if (!had_err) {
    /* parse qualities */
    had_err = parse_fastq_qualities(seqit, err);
    if (gt_str_length(seqit->qualsbuffer)
          != gt_str_length(seqit->sequencebuffer))
    {
      gt_error_set(err, "lengths of character sequence and qualities "
                        "sequence differ (%lu <-> %lu)",
                        gt_str_length(seqit->qualsbuffer),
                        gt_str_length(seqit->sequencebuffer));
      return -2;
    }
  }
  return had_err;
}
Beispiel #6
0
static inline int parse_fastq_seqname(GtSeqIteratorFastQ *seqit,
                                      GtStr *buffer,
                                      char startchar,
                                      GtError *err)
{
  char currentchar;
  bool firstsymbol = true;
  gt_error_check(err);
  gt_assert(seqit && buffer);
  gt_assert(gt_str_length(buffer) == 0);
  if ((currentchar = fastq_buf_getchar(seqit)) == EOF)
    return EOF;
  seqit->currentread++;
  if (currentchar != startchar) {
    gt_error_set(err, "'%c' expected, '%c' encountered instead in line %lu",
                      startchar,
                      currentchar,
                      seqit->curline);
    return -2;
  }
  while (currentchar != GT_FASTQ_NEWLINESYMBOL) {
    if (!firstsymbol)
      gt_str_append_char(buffer, currentchar);
    else
      firstsymbol = false;
    if ((currentchar = fastq_buf_getchar(seqit)) == EOF)
      return EOF;
    seqit->currentread++;
  }
  seqit->curline++;
  return 0;
}
Beispiel #7
0
static int gt_splitfasta_runner(GT_UNUSED int argc, const char **argv,
                                int parsed_args, void *tool_arguments,
                                GtError *err)
{
  SplitfastaArguments *arguments = tool_arguments;
  unsigned int num_files;
  int had_err;
  off_t file_size;
  const char* filename;
  gt_error_check(err);
  gt_assert(arguments);

  num_files = arguments->num_files;
  filename = argv[parsed_args];

  if (gt_str_length(arguments->splitdesc)) {
    had_err = split_description(filename, arguments->splitdesc,
                                arguments->width, arguments->force, err);
  }
  else {
    unsigned long max_filesize;
    if (num_files) {
      /* set the maxfile size based on requested number of files */
      file_size = gt_file_estimate_size(filename);
      max_filesize= file_size / num_files ;
    }
    else
      max_filesize= arguments->max_filesize_in_MB << 20;
    had_err = split_fasta_file(filename, max_filesize, arguments->force, err);
  }

  return had_err;
}
Beispiel #8
0
static int gt_cge_spacedseed_arguments_check(int rest_argc,
                                             void *tool_arguments,
                                             GtError *err)
{
  Cge_spacedseed_options *arguments = tool_arguments;

  if (gt_str_length(arguments->str_inputindex) == 0)
  {
    gt_error_set(err,"missing indexname");
    return -1;
  }
  if (gt_option_is_set(arguments->refoptionesaindex))
  {
    arguments->withesa = true;
  } else
  {
    gt_assert(gt_option_is_set(arguments->refoptionpckindex));
    arguments->withesa = false;
  }
  if (rest_argc != 0)
  {
    gt_error_set(err,"superfluous file arguments");
    return -1;
  }
  return 0;
}
static GtUword applycheckfunctiontosimpleoptions(
                                  Checkcmppairfuntype checkfunction,
                                  const Cmppairwiseopt *opt)
{
  if (gt_str_array_size(opt->strings) > 0)
  {
    bool forward = true;
    while (true)
    {
      checkfunction(forward,
                    (const GtUchar *) gt_str_array_get(opt->strings,0),
                    (GtUword) strlen(gt_str_array_get(opt->strings,0)),
                    (const GtUchar *) gt_str_array_get(opt->strings,1UL),
                    (GtUword) strlen(gt_str_array_get(opt->strings,1UL)));
      if (!forward)
      {
        break;
      }
      forward = false;
    }
    return 2UL; /* number of testcases */
  }
  if (gt_str_array_size(opt->files) > 0)
  {
    if (opt->fasta)
    {
      GtUword  i, j;
      for (i = 0; i < gt_str_array_size(opt->fastasequences0); i++)
      {
        for (j = 0; j < gt_str_array_size(opt->fastasequences1); j++)
        {
          checkfunction(true,
                    (const GtUchar *) gt_str_array_get(opt->fastasequences0,i),
                    (GtUword) strlen(gt_str_array_get(opt->fastasequences0,i)),
                    (const GtUchar *) gt_str_array_get(opt->fastasequences1,j),
                    (GtUword) strlen(gt_str_array_get(opt->fastasequences1,j)));
        }
      }
    }
    else
    {
      gt_runcheckfunctionontwofiles(checkfunction,
                                    gt_str_array_get(opt->files,0),
                                    gt_str_array_get(opt->files,1UL));
    }
    return 2UL;
  }
  if (opt->charlistlen != NULL)
  {
    return gt_runcheckfunctiononalphalen(checkfunction,
                                      gt_str_get(opt->charlistlen->charlist),
                                      opt->charlistlen->len);
  }
  if (gt_str_length(opt->text) > 0)
  {
    return gt_runcheckfunctionontext(checkfunction, gt_str_get(opt->text));
  }
  gt_assert(false);
  return 0;
}
Beispiel #10
0
static void set_gff3_target_attribute(GthSA *sa, bool md5ids)
{
  gt_assert(sa && !sa->gff3_target_attribute);
  sa->gff3_target_attribute = gt_str_new();
  if (md5ids) {
    gt_assert(sa->ref_md5);
    gt_str_append_cstr(sa->gff3_target_attribute, GT_MD5_SEQID_PREFIX);
    gt_str_append_str(sa->gff3_target_attribute, sa->ref_md5);
    gt_str_append_char(sa->gff3_target_attribute, ':');
  }
  gt_gff3_escape(sa->gff3_target_attribute, gt_str_get(sa->ref_id),
                 gt_str_length(sa->ref_id));
  gt_str_append_char(sa->gff3_target_attribute, ' ');
  gt_str_append_uword(sa->gff3_target_attribute,
                      gth_sa_referencecutoff_start(sa) + 1); /* XXX: use
                                                                reference
                                                                dpstartpos */
  gt_str_append_char(sa->gff3_target_attribute, ' ');
  gt_str_append_uword(sa->gff3_target_attribute,
                      gth_sa_ref_total_length(sa) - /* XXX */
                      gth_sa_referencecutoff_end(sa));
  gt_str_append_char(sa->gff3_target_attribute, ' ');
  if (sa->ref_strand_forward) {
    gt_str_append_char(sa->gff3_target_attribute,
                       GT_STRAND_CHARS[GT_STRAND_FORWARD]);
  }
  else {
    gt_str_append_char(sa->gff3_target_attribute,
                       GT_STRAND_CHARS[GT_STRAND_REVERSE]);
  }
}
Beispiel #11
0
const char* gth_sa_gff3_target_attribute(GthSA *sa, bool md5ids)
{
  gt_assert(sa);
  if (!sa->gff3_target_attribute && (md5ids || gt_str_length(sa->ref_id)))
    set_gff3_target_attribute(sa, md5ids);
  return gt_str_get(sa->gff3_target_attribute);
}
Beispiel #12
0
static int gt_extractseq_runner(int argc, const char **argv, int parsed_args,
                                void *tool_arguments, GtError *err)
{
  ExtractSeqArguments *arguments = tool_arguments;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);
  if (gt_str_length(arguments->fastakeyfile)) {
    had_err = process_fastakeyfile(arguments->fastakeyfile, argc - parsed_args,
                            argv + parsed_args, arguments->width,
                            arguments->outfp, err);
  }
  else {
    GtBioseqIterator *bsi;
    GtBioseq *bs;
    bsi = gt_bioseq_iterator_new(argc - parsed_args, argv + parsed_args);
    while (!had_err &&
           !(had_err = gt_bioseq_iterator_next(bsi, &bs, err)) && bs) {
      if (arguments->frompos) {
        had_err = extractseq_pos(arguments->outfp, bs, arguments->frompos,
                                 arguments->topos, arguments->width, err);
      }
      else {
        had_err = extractseq_match(arguments->outfp, bs,
                                   gt_str_get(arguments->pattern),
                                   arguments->width, err);
      }
      gt_bioseq_delete(bs);
    }
    gt_bioseq_iterator_delete(bsi);
  }
  return had_err;
}
Beispiel #13
0
static int select_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn,
                                      GT_UNUSED GtError *err)
{
  GtSelectVisitor *select_visitor;
  gt_error_check(err);
  select_visitor = select_visitor_cast(nv);
  if (!gt_str_length(select_visitor->seqid) || /* no seqid was specified */
      !gt_str_cmp(select_visitor->seqid,       /* or seqids are equal */
               gt_genome_node_get_seqid((GtGenomeNode*) rn))) {
    if (select_visitor->contain_range.start != GT_UNDEF_ULONG) {
      GtRange range = gt_genome_node_get_range((GtGenomeNode*) rn);
      if (gt_range_overlap(&range, &select_visitor->contain_range)) {
        /* an overlapping contain range was defined -> update range  */
        range.start = MAX(range.start, select_visitor->contain_range.start);
        range.end = MIN(range.end, select_visitor->contain_range.end);
        gt_genome_node_set_range((GtGenomeNode*) rn, &range);
        gt_queue_add(select_visitor->node_buffer, rn);
      }
      else /* contain range does not overlap with <rn> range -> delete <rn> */
        gt_genome_node_delete((GtGenomeNode*) rn);
    }
    else
      gt_queue_add(select_visitor->node_buffer, rn);
  }
  else
    gt_genome_node_delete((GtGenomeNode*) rn);
  return 0;
}
Beispiel #14
0
static int feature_node_lua_extract_sequence(lua_State *L)
{
  GtGenomeNode **gn;
  GtFeatureNode *fn;
  const char *type;
  bool join;
  GtRegionMapping **region_mapping;
  GtStr *sequence;
  GtError *err;
  gn = check_genome_node(L, 1);
  /* make sure we get a feature node */
  fn = gt_feature_node_try_cast(*gn);
  luaL_argcheck(L, fn, 1, "not a feature node");
  type = luaL_checkstring(L, 2);
  join = lua_toboolean(L, 3);
  region_mapping = check_region_mapping(L, 4);
  err = gt_error_new();
  sequence = gt_str_new();
  if (gt_extract_feature_sequence(sequence, *gn, type, join, NULL, NULL,
                                  *region_mapping, err)) {
    gt_str_delete(sequence);
    return gt_lua_error(L, err);
  }
  if (gt_str_length(sequence))
    lua_pushstring(L, gt_str_get(sequence));
  else
    lua_pushnil(L);
  gt_str_delete(sequence);
  gt_error_delete(err);
  return 1;
}
static void showsimpleoptions(const Cmppairwiseopt *opt)
{
  if (gt_str_array_size(opt->strings) > 0)
  {
    if (!opt->showedist)
      printf("# two strings \"%s\" \"%s\"\n", gt_str_array_get(opt->strings,0),
             gt_str_array_get(opt->strings,1UL));
    return;
  }
  if (gt_str_array_size(opt->files) > 0)
  {
    printf("# two files \"%s\" \"%s\"\n", gt_str_array_get(opt->files,0),
           gt_str_array_get(opt->files,1UL));
    return;
  }
  if (opt->charlistlen != NULL)
  {
    printf("# alphalen \"%s\" " GT_WU "\n",
           gt_str_get(opt->charlistlen->charlist),
           opt->charlistlen->len);
    return;
  }
  if (gt_str_length(opt->text) > 0)
  {
    printf("# text \"%s\"\n", gt_str_get(opt->text));
    return;
  }
}
Beispiel #16
0
bool gt_splicedseq_pos_is_border(const Splicedseq *ss, unsigned long pos)
{
  gt_assert(ss &&
         gt_str_length(ss->splicedseq) == gt_array_size(ss->positionmapping));
  gt_assert(pos < gt_str_length(ss->splicedseq)); /* legal position */
  if (ss->forward && pos + 1 < gt_array_size(ss->positionmapping) &&
      *(unsigned long*) gt_array_get(ss->positionmapping, pos) + 1 !=
      *(unsigned long*) gt_array_get(ss->positionmapping, pos+1)) {
    return true;
  }
  if (!ss->forward && pos &&
      *(unsigned long*) gt_array_get(ss->positionmapping, pos-1) - 1 !=
      *(unsigned long*) gt_array_get(ss->positionmapping, pos)) {
    return true;
  }
  return false;
}
static int gt_seqtranslate_do_translation(GtTranslateArguments *arguments,
                                       const char *sequence,
                                       GtUword length,
                                       const char *desc,
                                       GtStr **translations,
                                       bool rev,
                                       GtError *err)
{
  GtTranslator *tr;
  GT_UNUSED GtTranslatorStatus trst;
  GtCodonIterator *ci;
  char translated;
  int had_err = 0;
  GtStr *str;
  unsigned int frame,
               i;

  ci = gt_codon_iterator_simple_new(sequence, length, err);
  tr = gt_translator_new(ci);
  trst = gt_translator_next(tr, &translated, &frame, err);
  while (trst == GT_TRANSLATOR_OK) {
    gt_str_append_char(translations[frame], translated);
    trst = gt_translator_next(tr, &translated, &frame, err);
  }
  gt_codon_iterator_delete(ci);
  gt_translator_delete(tr);
  if (trst == GT_TRANSLATOR_ERROR)
    return -1;
  str = gt_str_new();
  for (i = 0; i < 3; i++) {
    if (gt_str_length(translations[i]) > 0) {
      gt_str_append_cstr(str, desc);
      gt_str_append_cstr(str, " (");
      gt_str_append_ulong(str, i+1);
      gt_str_append_cstr(str, rev ? "-" : "+");
      gt_str_append_cstr(str, ")");
      gt_fasta_show_entry(gt_str_get(str), gt_str_get(translations[i]),
                          gt_str_length(translations[i]),
                          arguments->fasta_width, arguments->outfp);
      gt_str_reset(translations[i]);
      gt_str_reset(str);
    }
  }
  gt_str_delete(str);
  return had_err;
}
static int gt_compreads_compress_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments, GtError *err)
{
  GtCsrHcrEncodeArguments *arguments = tool_arguments;
  int had_err = 0;
  GtAlphabet *alpha = NULL;
  GtHcrEncoder *hcre = NULL;
  GtTimer *timer = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  if (gt_showtime_enabled()) {
    timer = gt_timer_new_with_progress_description("start");
    gt_timer_start(timer);
    gt_assert(timer);
  }

  if (gt_str_length(arguments->smap) > 0) {
    alpha = gt_alphabet_new_from_file_no_suffix(gt_str_get(arguments->smap),
                                                err);
    if (!alpha)
      had_err = 1;
  }
  else {
    alpha = gt_alphabet_new_dna();
    if (!alpha)
      had_err = 1;
  }
  if (!had_err) {
    if (timer != NULL)
      gt_timer_show_progress(timer, "encoding", stdout);
    hcre = gt_hcr_encoder_new(arguments->files, alpha, arguments->descs,
                              arguments->qrng, timer, err);
    if (!hcre)
      had_err = 1;
    else {
      if (arguments->pagewise)
        gt_hcr_encoder_set_sampling_page(hcre);
      else if (arguments->regular)
        gt_hcr_encoder_set_sampling_regular(hcre);

      gt_hcr_encoder_set_sampling_rate(hcre, arguments->srate);

      if (gt_hcr_encoder_encode(hcre, gt_str_get(arguments->name),
                                timer, err) != 0)
        had_err = 1;
    }
    gt_hcr_encoder_delete(hcre);
  }
  gt_alphabet_delete(alpha);
  if (timer != NULL) {
    gt_timer_show_progress_final(timer, stdout);
    gt_timer_delete(timer);
  }
  return had_err;
}
Beispiel #19
0
static int gt_encseq_encode_runner(GT_UNUSED int argc, const char **argv,
                               int parsed_args, GT_UNUSED void *tool_arguments,
                               GtError *err)
{
  int had_err = 0,
      i;
  GtEncseqEncodeArguments *arguments =
                                      (GtEncseqEncodeArguments*) tool_arguments;
  GtStrArray *infiles;
  gt_error_check(err);

  infiles = gt_str_array_new();
  for (i = parsed_args; i < argc; i++) {
    gt_str_array_add_cstr(infiles, argv[i]);
  }

  if (gt_str_length(arguments->indexname) == 0UL) {
    if (gt_str_array_size(infiles) > 1UL) {
      gt_error_set(err,"if more than one input file is given, then "
                       "option -indexname is mandatory");
      had_err = -1;
    } else {
      char *basenameptr;
      basenameptr = gt_basename(gt_str_array_get(infiles, 0UL));
      gt_str_set(arguments->indexname, basenameptr);
      gt_free(basenameptr);
    }
  }

  if (!had_err) {
    gt_assert(gt_str_length(arguments->indexname) > 0UL);
    had_err = encode_sequence_files(infiles,
                                    arguments->eopts,
                                    gt_str_get(arguments->indexname),
                                    arguments->verbose,
                                    arguments->no_esq_header,
                                    err);
  }

  if (!had_err && arguments->showstats)
    show_encoded_statistics(infiles, gt_str_get(arguments->indexname));

  gt_str_array_delete(infiles);
  return had_err;
}
Beispiel #20
0
void gt_alphabet_to_str(const GtAlphabet *alphabet, GtStr *dest)
{
  GtUchar chartoshow, currentcc, previouscc = 0, firstinline = 0;
  unsigned int cnum, linenum = 0;
  bool afternewline = true;
  gt_assert(alphabet && dest);
  if (alphabet->alphadef != NULL) {
    gt_assert(gt_str_length(alphabet->alphadef));
    gt_str_append_str(dest, alphabet->alphadef);
  } else {
    for (cnum=0; cnum < alphabet->domainsize; cnum++)
    {
      currentcc = alphabet->mapdomain[cnum];
      if (cnum > 0)
      {
        if (alphabet->symbolmap[currentcc] != alphabet->symbolmap[previouscc])
        {
          if (linenum < alphabet->mapsize-1)
          {
            chartoshow = alphabet->characters[linenum];
          } else
          {
            chartoshow = alphabet->wildcardshow;
          }
          if (firstinline != chartoshow)
          {
            gt_str_append_char(dest, (char) chartoshow);
          }
          gt_str_append_char(dest, '\n');
          afternewline = true;
          linenum++;
        } else
        {
          afternewline = false;
        }
      }
      gt_str_append_char(dest, (char) currentcc);
      if (afternewline)
      {
        firstinline = currentcc;
      }
      previouscc = currentcc;
    }
    if (linenum < alphabet->mapsize-1)
    {
      chartoshow = alphabet->characters[linenum];
    } else
    {
      chartoshow = alphabet->wildcardshow;
    }
    if (firstinline != chartoshow)
    {
      gt_str_append_char(dest, (char) chartoshow);
    }
    gt_str_append_char(dest, '\n');
  }
}
Beispiel #21
0
static int process_gt_strand_arg(GtStr *gt_strand_char, GtStrand *strand,
                                 const char *optstr, GtError *err)
{
  int had_err = 0;
  gt_error_check(err);
  if (gt_str_length(gt_strand_char)) {
    GtStrand tmpstrand = gt_strand_get(gt_str_get(gt_strand_char)[0]);
    if ((gt_str_length(gt_strand_char) > 1) ||
        (tmpstrand == GT_NUM_OF_STRAND_TYPES)) {
      gt_error_set(err, "argument to option -%s must be one of '"
                        GT_STRAND_CHARS"'", optstr);
      had_err = -1;
    }
    if (!had_err)
      *strand = tmpstrand;
  }
  return had_err;
}
Beispiel #22
0
static int file_find_in_env_generic(GtStr *path, const char *file,
                                    const char *env, FileExistsFunc file_exists,
                                    GtError *err)
{
  char *pathvariable, *pathcomponent = NULL;
  GtSplitter *splitter = NULL;
  GtUword i;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(file);
  gt_assert(file_exists);

  /* check if 'file' has dirname */
  gt_file_dirname(path, file);
  if (gt_str_length(path))
    return had_err;
  /* 'file' has no dirname -> scan $env */
  pathvariable = getenv(env);
  if (pathvariable != NULL)
    pathvariable = gt_cstr_dup(pathvariable); /* make writeable copy */
  else {
    gt_error_set(err, "environment variable $%s is not defined", env);
    had_err = -1;
  }

  if (!had_err) {
    splitter = gt_splitter_new();
    gt_splitter_split(splitter, pathvariable,
                      (GtUword) strlen(pathvariable), GT_PATH_VAR_SEPARATOR);
    for (i = 0; i < gt_splitter_size(splitter); i++) {
      pathcomponent = gt_splitter_get_token(splitter, i);
      gt_str_reset(path);
      gt_str_append_cstr(path, pathcomponent);
      gt_str_append_char(path, GT_PATH_SEPARATOR);
      gt_str_append_cstr(path, file);
      if (file_exists(gt_str_get(path)))
        break;
    }
    if (i < gt_splitter_size(splitter)) {
      /* file found in path */
      gt_str_reset(path);
      gt_str_append_cstr(path, pathcomponent);
    }
    else {
      /* file not found in path */
      gt_str_reset(path);
    }
  }

  /* free */
  gt_free(pathvariable);
  gt_splitter_delete(splitter);

  return had_err;
}
Beispiel #23
0
void gt_alphabet_output(const GtAlphabet *alphabet, FILE *fpout)
{
  GtStr *buf;
  gt_assert(alphabet && fpout);
  buf = gt_str_new();
  gt_alphabet_to_str(alphabet, buf);
  gt_xfwrite(gt_str_get(buf), sizeof (char), (size_t) gt_str_length(buf),
             fpout);
  gt_str_delete(buf);
}
Beispiel #24
0
static int construct_bioseq_files(GtBioseq *bs, GtStr *bioseq_indexname,
                                  GtError *err)
{
  GtStr *sequence_filename;
  GtEncseqEncoder *ee;
  GtStrArray *indexfn;
  int had_err = 0;

  gt_error_check(err);

  /* register the signal handler to remove incomplete files upon termination */
  if (!bs->use_stdin) {
    gt_bioseq_index_filename = gt_str_get(bs->sequence_file);
    gt_sig_register_all(remove_bioseq_files);
  }

  /* if stdin is used as input, we need to create a tempfile containing the
     sequence as GtEncseq cannot be built from stdin directly */
  if (bs->use_stdin) {
    GtStr *tmpfilename;
    FILE *tmpfile = NULL;
    int i;
    char buf[BUFSIZ];
    tmpfilename = gt_str_new();
    tmpfile = gt_xtmpfp(tmpfilename);
    gt_assert(tmpfile);
    i = 1;
    while (i > 0) {
      i = fread(buf, 1, BUFSIZ, stdin);
      if (i > 0) fwrite(buf, 1, i, tmpfile);
    }
    gt_fa_xfclose(tmpfile);
    sequence_filename = tmpfilename;
  } else {
    sequence_filename = gt_str_ref(bs->sequence_file);
  }
  gt_assert(gt_str_length(sequence_filename) > 0);
  ee = gt_encseq_encoder_new();
  gt_encseq_encoder_enable_description_support(ee);
  gt_encseq_encoder_enable_md5_support(ee);
  gt_encseq_encoder_enable_multiseq_support(ee);
  gt_encseq_encoder_enable_lossless_support(ee);
  indexfn = gt_str_array_new();
  gt_str_array_add(indexfn, sequence_filename);
  gt_str_delete(sequence_filename);
  had_err = gt_encseq_encoder_encode(ee, indexfn,
                                     gt_str_get(bioseq_indexname), err);
  /* unregister the signal handler */
   if (!bs->use_stdin)
    gt_sig_unregister_all();

  gt_str_array_delete(indexfn);
  gt_encseq_encoder_delete(ee);
  return had_err;
}
Beispiel #25
0
int gt_graphics_cairo_save_to_file(const GtGraphics *gg, const char *filename,
                                GtError *err)
{
  const GtGraphicsCairo *g = (const GtGraphicsCairo*) gg;
  cairo_surface_t *bgsurf = NULL;
  cairo_t *bgc = NULL;
  cairo_status_t rval;
  GtFile *outfile;
  gt_error_check(err);
  gt_assert(g && filename);

  /* do nothing if no surface was created */
  if (g->from_context)
    return 0;
  switch (g->type)
  {
    case GT_GRAPHICS_PNG:
      /* blend rendered image with background color */
      bgsurf = cairo_image_surface_create(CAIRO_FORMAT_ARGB32, g->width,
                                          g->height);
      bgc = cairo_create(bgsurf);
      cairo_set_source_rgba(bgc, g->bg_color.red, g->bg_color.green,
                                 g->bg_color.blue, g->bg_color.alpha);
      cairo_paint(bgc);
      cairo_set_source_surface(bgc, g->surf, 0, 0);
      cairo_paint(bgc);
      rval = cairo_surface_write_to_png(bgsurf, filename);
      gt_assert(rval == CAIRO_STATUS_SUCCESS ||
                rval == CAIRO_STATUS_WRITE_ERROR);
      if (rval == CAIRO_STATUS_WRITE_ERROR)
      {
        cairo_destroy(bgc);
        cairo_surface_destroy(bgsurf);
        gt_error_set(err, "an I/O error occurred while attempting "
                          "to write image file \"%s\"", filename);
        return -1;
      }
      cairo_destroy(bgc);
      cairo_surface_destroy(bgsurf);
      break;
    default:
      cairo_show_page(g->cr);
      cairo_surface_flush(g->surf);
      cairo_surface_finish(g->surf);
      outfile = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, filename, "w+", err);
      if (outfile)
      {
        gt_file_xwrite(outfile, gt_str_get_mem(g->outbuf),
                       gt_str_length(g->outbuf));
        gt_file_delete(outfile);
      } else return -1;
      break;
  }
  return 0;
}
static int determine_outfp(void *data, GtError *err)
{
  GtOutputFileInfo *ofi = (GtOutputFileInfo*) data;
  GtFileMode file_mode;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(ofi);
  if (!gt_str_length(ofi->output_filename))
    *ofi->outfp = NULL; /* no output file given -> use stdout */
  else { /* outputfile given -> create generic file pointer */
    gt_assert(!(ofi->gzip && ofi->bzip2));
    if (ofi->gzip)
      file_mode = GT_FILE_MODE_GZIP;
    else if (ofi->bzip2)
      file_mode = GT_FILE_MODE_BZIP2;
    else
      file_mode = GT_FILE_MODE_UNCOMPRESSED;
    if (file_mode != GT_FILE_MODE_UNCOMPRESSED &&
        strcmp(gt_str_get(ofi->output_filename) +
               gt_str_length(ofi->output_filename) -
               strlen(gt_file_mode_suffix(file_mode)),
               gt_file_mode_suffix(file_mode))) {
      gt_warning("output file '%s' doesn't have correct suffix '%s', appending "
                 "it", gt_str_get(ofi->output_filename),
                 gt_file_mode_suffix(file_mode));
      gt_str_append_cstr(ofi->output_filename, gt_file_mode_suffix(file_mode));
    }
    if (!ofi->force && gt_file_exists(gt_str_get(ofi->output_filename))) {
        gt_error_set(err, "file \"%s\" exists already, use option -%s to "
                     "overwrite", gt_str_get(ofi->output_filename),
                     GT_FORCE_OPT_CSTR);
        had_err = -1;
    }
    if (!had_err) {
      *ofi->outfp = gt_file_xopen_file_mode(file_mode,
                                            gt_str_get(ofi->output_filename),
                                            "w");
      gt_assert(*ofi->outfp);
    }
  }
  return had_err;
}
static int parse_fasta_sequence(GtStr *sequence, GtIO *seqio, GtError *err)
{
  char cc;
  gt_error_check(err);
  gt_assert(sequence && seqio);
  gt_assert(!gt_str_length(sequence));
  /* read sequence */
  while (!gt_io_get_char(seqio, &cc) && cc != FASTA_SEPARATOR) {
    if (cc != '\n' && cc != ' ')
      gt_str_append_char(sequence, cc);
  }
  if (!gt_str_length(sequence)) {
    gt_error_set(err, "empty sequence given in line %lu",
              gt_io_get_line_number(seqio));
    return -1;
  }
  if (cc == FASTA_SEPARATOR)
    gt_io_unget_char(seqio, FASTA_SEPARATOR);
  return 0;
}
Beispiel #28
0
static void seqid_store_add(SeqidStore *ss, GtUword filenum,
                            GtUword seqnum, GtStr *seqid,
                            GtUword offset)
{
    gt_assert(ss && seqid);
    gt_assert(gt_str_length(seqid)); /* is not empty */
    gt_assert(filenum < ss->num_of_files);
    gt_assert(seqnum < ss->num_of_sequences[filenum]);
    gt_assert(!ss->store[filenum][seqnum]); /* is unused */
    ss->store[filenum][seqnum] = gt_str_clone(seqid);
    ss->offsets[filenum][seqnum] = offset == GT_UNDEF_UWORD ? 1 : offset;
}
Beispiel #29
0
static int extracttarget_from_seqfiles(const char *target,
                                       GtStrArray *seqfiles,
                                       GtError *err)
{
  GtStr *unescaped_target;
  char *escaped_target;
  GtSplitter *splitter;
  unsigned long i;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(target && seqfiles);
  splitter = gt_splitter_new();
  unescaped_target = gt_str_new();
  escaped_target = gt_cstr_dup(target);
  gt_splitter_split(splitter, escaped_target, strlen(escaped_target), ',');
  for (i = 0; !had_err && i < gt_splitter_size(splitter); i++) {
    GtSplitter *blank_splitter;
    char *token = gt_splitter_get_token(splitter, i);
    blank_splitter = gt_splitter_new();
    gt_splitter_split(blank_splitter, token, strlen(token), ' ');
    had_err = gt_gff3_unescape(unescaped_target,
                               gt_splitter_get_token(blank_splitter, 0),
                               strlen(gt_splitter_get_token(blank_splitter, 0)),
                               err);
    if (!had_err) {
      unsigned long j;
      for (j = 0; j < gt_str_array_size(seqfiles); j++) {
        unsigned long k;
        GtBioseq *bioseq;
        if (!(bioseq =  gt_bioseq_new(gt_str_array_get(seqfiles, j), err))) {
          had_err = -1;
          break;
        }
        for (k = 0; k < gt_bioseq_number_of_sequences(bioseq); k++) {
          TargetInfo target_info;
          const char *desc = gt_bioseq_get_description(bioseq, k);
          target_info.bioseq = bioseq;
          target_info.seqnum = k;
          gt_string_matching_bmh(desc, strlen(desc),
                                 gt_str_get(unescaped_target),
                                 gt_str_length(unescaped_target), show_target,
                                 &target_info);
        }
        gt_bioseq_delete(bioseq);
      }
    }
    gt_splitter_delete(blank_splitter);
  }
  gt_free(escaped_target);
  gt_str_delete(unescaped_target);
  gt_splitter_delete(splitter);
  return had_err;
}
Beispiel #30
0
static GtStr* seqid_store_get(SeqidStore *ss, GtUword filenum,
                              GtUword seqnum)
{
    GtStr *seqid;
    gt_assert(ss);
    gt_assert(filenum < ss->num_of_files);
    gt_assert(seqnum < ss->num_of_sequences[filenum]);
    gt_assert(ss->store[filenum][seqnum]); /* is used */
    seqid = ss->store[filenum][seqnum];
    gt_assert(gt_str_length(seqid)); /* is not empty */
    return seqid;
}