Esempio n. 1
0
GtSeqCol* gt_bioseq_col_new(GtStrArray *sequence_files, GtError *err)
{
  GtSeqCol *sc;
  GtBioseqCol *bsc;
  GtUword i;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(sequence_files);
  gt_assert(gt_str_array_size(sequence_files));
  sc = gt_seq_col_create(gt_bioseq_col_class());
  bsc = gt_bioseq_col_cast(sc);
  bsc->num_of_seqfiles = gt_str_array_size(sequence_files);
  bsc->bioseqs = gt_calloc(bsc->num_of_seqfiles, sizeof (GtBioseq*));
  for (i = 0; !had_err && i < bsc->num_of_seqfiles; i++) {
    bsc->bioseqs[i] = gt_bioseq_new(gt_str_array_get(sequence_files, i), err);
    if (!bsc->bioseqs[i])
      had_err = -1;
  }
  if (had_err) {
    gt_bioseq_col_delete(sc);
    return NULL;
  }
  bsc->matchdescstart = false;
  return sc;
}
static void construct_description(GtStr *description, const char *type,
                                  GtUword counter, bool join,
                                  bool translate, GtStr *seqid,
                                  GtStrArray *target_ids)
{
  gt_assert(!gt_str_length(description));
  gt_str_append_cstr(description, type);
  gt_str_append_char(description, '_');
  gt_str_append_ulong(description, counter);
  if (join)
    gt_str_append_cstr(description, " (joined)");
  if (translate)
    gt_str_append_cstr(description, " (translated)");
  if (seqid) {
    gt_assert(gt_str_length(seqid));
    gt_str_append_cstr(description, " [seqid ");
    gt_str_append_str(description, seqid);
    gt_str_append_char(description, ']');
  }
  if (target_ids && gt_str_array_size(target_ids)) {
    GtUword i;
    gt_str_append_cstr(description, " [target IDs ");
    gt_str_append_cstr(description, gt_str_array_get(target_ids, 0));
    for (i = 1; i < gt_str_array_size(target_ids); i++) {
      gt_str_append_char(description, ',');
      gt_str_append_cstr(description, gt_str_array_get(target_ids, i));
    }
    gt_str_append_char(description, ']');
  }
}
Esempio n. 3
0
static void showsimpleoptions(const Cmppairwiseopt *opt)
{
  if (gt_str_array_size(opt->strings) > 0)
  {
    if (!opt->showedist)
      printf("# two strings \"%s\" \"%s\"\n", gt_str_array_get(opt->strings,0),
             gt_str_array_get(opt->strings,1UL));
    return;
  }
  if (gt_str_array_size(opt->files) > 0)
  {
    printf("# two files \"%s\" \"%s\"\n", gt_str_array_get(opt->files,0),
           gt_str_array_get(opt->files,1UL));
    return;
  }
  if (opt->charlistlen != NULL)
  {
    printf("# alphalen \"%s\" " GT_WU "\n",
           gt_str_get(opt->charlistlen->charlist),
           opt->charlistlen->len);
    return;
  }
  if (gt_str_length(opt->text) > 0)
  {
    printf("# text \"%s\"\n", gt_str_get(opt->text));
    return;
  }
}
Esempio n. 4
0
static GtUword applycheckfunctiontosimpleoptions(
                                  Checkcmppairfuntype checkfunction,
                                  const Cmppairwiseopt *opt)
{
  if (gt_str_array_size(opt->strings) > 0)
  {
    bool forward = true;
    while (true)
    {
      checkfunction(forward,
                    (const GtUchar *) gt_str_array_get(opt->strings,0),
                    (GtUword) strlen(gt_str_array_get(opt->strings,0)),
                    (const GtUchar *) gt_str_array_get(opt->strings,1UL),
                    (GtUword) strlen(gt_str_array_get(opt->strings,1UL)));
      if (!forward)
      {
        break;
      }
      forward = false;
    }
    return 2UL; /* number of testcases */
  }
  if (gt_str_array_size(opt->files) > 0)
  {
    if (opt->fasta)
    {
      GtUword  i, j;
      for (i = 0; i < gt_str_array_size(opt->fastasequences0); i++)
      {
        for (j = 0; j < gt_str_array_size(opt->fastasequences1); j++)
        {
          checkfunction(true,
                    (const GtUchar *) gt_str_array_get(opt->fastasequences0,i),
                    (GtUword) strlen(gt_str_array_get(opt->fastasequences0,i)),
                    (const GtUchar *) gt_str_array_get(opt->fastasequences1,j),
                    (GtUword) strlen(gt_str_array_get(opt->fastasequences1,j)));
        }
      }
    }
    else
    {
      gt_runcheckfunctionontwofiles(checkfunction,
                                    gt_str_array_get(opt->files,0),
                                    gt_str_array_get(opt->files,1UL));
    }
    return 2UL;
  }
  if (opt->charlistlen != NULL)
  {
    return gt_runcheckfunctiononalphalen(checkfunction,
                                      gt_str_get(opt->charlistlen->charlist),
                                      opt->charlistlen->len);
  }
  if (gt_str_length(opt->text) > 0)
  {
    return gt_runcheckfunctionontext(checkfunction, gt_str_get(opt->text));
  }
  gt_assert(false);
  return 0;
}
Esempio n. 5
0
GtFastaBuffer* gt_fastabuffer_new(const GtStrArray *filenametab,
                                  const GtUchar *symbolmap,
                                  bool plainformat,
                                  Filelengthvalues **filelengthtab,
                                  GtQueue *descptr,
                                  unsigned long *characterdistribution)
{
  GtFastaBuffer *fb;
  fb = gt_calloc(1, sizeof (GtFastaBuffer));
  fb->plainformat = plainformat;
  fb->filenum = 0;
  fb->firstoverallseq = true;
  fb->firstseqinfile = true;
  fb->nextfile = true;
  fb->nextread = fb->nextfree = 0;
  fb->filenametab = filenametab;
  fb->symbolmap = symbolmap;
  fb->complete = false;
  fb->lastspeciallength = 0;
  fb->descptr = descptr;
  if (filelengthtab)
  {
    *filelengthtab = gt_calloc(gt_str_array_size(filenametab),
                               sizeof (Filelengthvalues));
    fb->filelengthtab = *filelengthtab;
  } else
  {
    fb->filelengthtab = NULL;
  }
  fb->characterdistribution = characterdistribution;
  GT_INITARRAY(&fb->headerbuffer, char);
  return fb;
}
int gt_parse_algbounds(Sfxstrategy *sfxstrategy,
                       const GtStrArray *algbounds,
                       GtError *err)
{
  bool haserr = false;
  const char *arg;
  GtWord readint;

  if (gt_str_array_size(algbounds) != 3UL)
  {
    gt_error_set(err,"option -algbds must have exactly 3 arguments");
    haserr = true;
  }
  GT_IDXOPTS_READMAXBOUND(maxinsertionsort, 0);
  GT_IDXOPTS_READMAXBOUND(maxbltriesort, 1UL);
  if (sfxstrategy->maxinsertionsort > sfxstrategy->maxbltriesort)
  {
    gt_error_set(err,"first argument of option -algbds must not be larger "
                     "than second argument");
    haserr = true;
  }
  GT_IDXOPTS_READMAXBOUND(maxcountingsort, 2UL);
  if (sfxstrategy->maxbltriesort > sfxstrategy->maxcountingsort)
  {
    gt_error_set(err,"second argument of option -algbds must not be larger "
                     "than third argument");
    haserr = true;
  }
  return haserr ? -1 : 0;
}
Esempio n. 7
0
int gt_seqiterator_fastq_next(GtSeqIterator *seqit,
                              const GtUchar **sequence,
                              unsigned long *len,
                              char **desc,
                              GtError *err)
{
  int errstatus = 0;
  GtSeqIteratorFastQ *seqitf;
  gt_assert(seqit);
  seqitf = gt_seqiterator_fastq_cast((GtSeqIterator*) seqit);
  gt_assert(seqit && len && desc);

  seqitf = gt_seqiterator_fastq_cast(seqit);
  gt_str_reset(seqitf->qualsbuffer);
  gt_str_reset(seqitf->qdescbuffer);
  gt_str_reset(seqitf->sequencebuffer);
  gt_str_reset(seqitf->descbuffer);

  /* parse file */
  errstatus = parse_fastq_block(seqitf, err);

  if (!errstatus) {
    *sequence = (GtUchar*) gt_str_get(seqitf->sequencebuffer);
    *len = gt_str_length(seqitf->sequencebuffer);
    *desc = gt_str_get(seqitf->descbuffer);
    if (seqitf->qualities)
      *seqitf->qualities = (GtUchar*) gt_str_get(seqitf->qualsbuffer);
    errstatus = 1;
  } else {
    if (errstatus == EOF) {
      /* we could not get a next entry from this file */
      /* can we open another? */
      if (seqitf->filenum+1 < gt_str_array_size(seqitf->filenametab)) {
        const char *filename;
        filename = gt_str_array_get(seqitf->filenametab, ++seqitf->filenum);
        gt_file_delete(seqitf->curfile);
        seqitf->curfile = gt_file_xopen(filename, "r");
        seqitf->curline = 1;
        /* get first entry from next file*/
        errstatus = parse_fastq_block(seqitf, err);
        if (!errstatus) {
          *sequence = (GtUchar*) gt_str_get(seqitf->sequencebuffer);
          *len = gt_str_length(seqitf->sequencebuffer);
          *desc = gt_str_get(seqitf->descbuffer);
          if (seqitf->qualities)
            *seqitf->qualities = (GtUchar*) gt_str_get(seqitf->qualsbuffer);
          errstatus = 1;
        } else {
          errstatus = -1;
        }
      } else {
        /* all entries read from all files */
        errstatus = 0;
      }
    } else {
      errstatus = -1;
    }
  }
  return errstatus;
}
Esempio n. 8
0
static int encode_sequence_files(GtStrArray *infiles, GtEncseqOptions *opts,
                                 const char *indexname, bool verbose,
                                 bool esq_no_header,
                                 GtError *err)
{
  GtEncseqEncoder *encseq_encoder;
  GtLogger *logger;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(infiles && gt_str_array_size(infiles) > 0 && opts);
  logger = gt_logger_new(verbose, "# ", stderr);
  encseq_encoder = gt_encseq_encoder_new_from_options(opts, err);
  if (!encseq_encoder)
    had_err = -1;
  if (!had_err) {
    gt_encseq_encoder_set_logger(encseq_encoder, logger);
    if (esq_no_header)
    {
      gt_encseq_encoder_disable_esq_header(encseq_encoder);
    }
    had_err = gt_encseq_encoder_encode(encseq_encoder, infiles, indexname, err);
  }
  gt_encseq_encoder_delete(encseq_encoder);
  gt_logger_delete(logger);
  return had_err;
}
Esempio n. 9
0
static int gt_idxlocali_runner (GT_UNUSED int argc,
                                GT_UNUSED const char **argv,
                                GT_UNUSED int parsed_args,
                                void *tool_arguments,
                                GtError * err)
{
  IdxlocaliOptions *arguments = tool_arguments;
  bool haserr = false;
  unsigned long idx;

  gt_error_check (err);
  gt_assert (arguments != NULL);

  gt_assert (parsed_args == argc);
  printf ("# indexname(%s)=%s\n", arguments->withesa ? "esa" : "pck",
          gt_str_get (arguments->indexname));
  for (idx = 0; idx < gt_str_array_size (arguments->queryfiles); idx++)
  {
    printf ("# queryfile=%s\n",gt_str_array_get (arguments->queryfiles, idx));
  }
  printf ("# threshold=%lu\n", arguments->threshold);
  if (!haserr && runidxlocali (arguments, err) != 0)
  {
    haserr = true;
  }
  return haserr ? -1 : 0;
}
Esempio n. 10
0
static int inputthesequences(unsigned int *numofchars,
                             unsigned long *nextpostable,
                             Suffixarray *suffixarraytable,
                             const GtStrArray *indexnametab,
                             unsigned int demand,
                             GtLogger *logger,
                             GtError *err)
{
  unsigned long idx;
  const char *indexname;

  gt_error_check(err);
  for (idx=0; idx<gt_str_array_size(indexnametab); idx++)
  {
    indexname = gt_str_array_get(indexnametab,idx);
    if (streamsuffixarray(&suffixarraytable[idx],
                          demand,
                          indexname,
                          logger,
                          err) != 0)
    {
      return -1;
    }
    if (idx == 0)
    {
      *numofchars =
            gt_alphabet_num_of_chars(
                     gt_encseq_alphabet(suffixarraytable[idx].encseq));
    }
    nextpostable[idx] = 0;
  }
  return 0;
}
Esempio n. 11
0
static int gt_seqids_runner(GT_UNUSED int argc, const char **argv,
                                  int parsed_args,
                                  GT_UNUSED void *tool_arguments, GtError *err)
{
  GtNodeStream *in_stream, *v_stream;
  GtCstrTable *cst;
  int had_err = 0;
  gt_error_check(err);

  cst = gt_cstr_table_new();
  in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                             argv + parsed_args);
  v_stream = gt_visitor_stream_new(in_stream, gt_collect_ids_visitor_new(cst));

  had_err = gt_node_stream_pull(v_stream, err);
  if (!had_err) {
    GtStrArray *seqids;
    GtUword i;
    seqids = gt_cstr_table_get_all(cst);
    for (i = 0; i < gt_str_array_size(seqids); i++) {
      printf("%s\n", gt_str_array_get(seqids, i));
    }
    gt_str_array_delete(seqids);
  }

  gt_node_stream_delete(v_stream);
  gt_node_stream_delete(in_stream);
  gt_cstr_table_delete(cst);
  return had_err;
}
Esempio n. 12
0
/* single sequences (-ss)*/
static void get_onesequence(const GtSequenceTable *sequence_table,
                            const GtStrArray *strings,
                            GtUword idx)
{
  gt_assert(sequence_table != NULL && strings != NULL &&
            idx < gt_str_array_size(strings));

  sequence_table->seqarray[0] = gt_str_new_cstr(gt_str_array_get(strings,idx));
}
Esempio n. 13
0
off_t gt_files_estimate_total_size(const GtStrArray *filenames)
{
  GtUword filenum;
  off_t totalsize = 0;

  for (filenum = 0; filenum < gt_str_array_size(filenames); filenum++)
    totalsize += gt_file_estimate_size(gt_str_array_get(filenames, filenum));

  return totalsize;
}
Esempio n. 14
0
static unsigned long applycheckfunctiontosimpleoptions(
                                  Checkcmppairfuntype checkfunction,
                                  const Cmppairwiseopt *opt)
{
  if (gt_str_array_size(opt->strings) > 0)
  {
    bool forward = true;
    while (true)
    {
      checkfunction(forward,
                    (const GtUchar *) gt_str_array_get(opt->strings,0),
                    (unsigned long) strlen(gt_str_array_get(opt->strings,0)),
                    (const GtUchar *) gt_str_array_get(opt->strings,1UL),
                    (unsigned long) strlen(gt_str_array_get(opt->strings,1UL)));
      if (!forward)
      {
        break;
      }
      forward = false;
    }
    return 2UL; /* number of testcases */
  }
  if (gt_str_array_size(opt->files) > 0)
  {
    gt_runcheckfunctionontwofiles(checkfunction,
                               gt_str_array_get(opt->files,0),
                               gt_str_array_get(opt->files,1UL));
    return 2UL;
  }
  if (opt->charlistlen != NULL)
  {
    return gt_runcheckfunctiononalphalen(checkfunction,
                                      gt_str_get(opt->charlistlen->charlist),
                                      opt->charlistlen->len);
  }
  if (gt_str_length(opt->text) > 0)
  {
    return gt_runcheckfunctionontext(checkfunction,gt_str_get(opt->text));
  }
  gt_assert(false);
  return 0;
}
Esempio n. 15
0
void gt_lua_push_strarray_as_table(lua_State *L, GtStrArray *sa)
{
  unsigned long i;
  gt_assert(L && sa);
  lua_newtable(L);
  for (i = 0; i < gt_str_array_size(sa); i++) {
    lua_pushinteger(L, i+1); /* in Lua we index from 1 on */
    lua_pushstring(L, gt_str_array_get(sa, i));
    lua_rawset(L, -3);
  }
}
Esempio n. 16
0
static int gt_ltr_cluster_stream_next(GtNodeStream *ns,
                                      GtGenomeNode **gn,
                                      GtError *err)
{
  GtLTRClusterStream *lcs;
  GtGenomeNode *ref_gn;
  int had_err = 0;
  unsigned long i = 0;

  gt_error_check(err);
  lcs = gt_ltr_cluster_stream_cast(ns);
  if (lcs->first_next) {
    while (!(had_err = gt_node_stream_next(lcs->in_stream, gn, err)) && *gn) {
      gt_assert(*gn && !had_err);
      ref_gn = gt_genome_node_ref(*gn);
      gt_array_add(lcs->nodes, ref_gn);
      had_err = gt_genome_node_accept(*gn, (GtNodeVisitor*) lcs->lcv, err);
      if (had_err) {
        gt_genome_node_delete(*gn);
        *gn = NULL;
        break;
      }
    }
    lcs->feat_to_encseq =
                       gt_ltr_cluster_prepare_seq_visitor_get_encseqs(lcs->lcv);
    lcs->feat_to_encseq_keys =
                      gt_ltr_cluster_prepare_seq_visitor_get_features(lcs->lcv);
    if (!had_err) {
      for (i = 0; i < gt_str_array_size(lcs->feat_to_encseq_keys); i++) {
        had_err = process_feature(lcs,
                                  gt_str_array_get(lcs->feat_to_encseq_keys, i),
                                  err);
        if (had_err)
          break;
      }
    }
    if (!had_err) {
      *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index);
      lcs->next_index++;
      lcs->first_next = false;
      return 0;
    }
  } else {
    if (lcs->next_index >= gt_array_size(lcs->nodes))
      *gn = NULL;
    else {
      *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index);
      lcs->next_index++;
    }
    return 0;
  }

  return had_err;
}
Esempio n. 17
0
GtQuerysubstringmatchiterator *gt_querysubstringmatchiterator_new(
                                     const GtEncseq *dbencseq,
                                     GtUword totallength,
                                     const ESASuffixptr *suftabpart,
                                     GtReadmode db_readmode,
                                     GtUword numberofsuffixes,
                                     const GtStrArray *query_files,
                                     const GtEncseq *query_encseq,
                                     GtReadmode query_readmode,
                                     unsigned int userdefinedleastlength,
                                     GtError *err)
{
  GtQuerysubstringmatchiterator *qsmi = gt_malloc(sizeof *qsmi);

  qsmi->dbencseq = dbencseq;
  qsmi->suftabpart = suftabpart;
  qsmi->db_readmode = db_readmode;
  qsmi->numberofsuffixes = numberofsuffixes;
  qsmi->totallength = totallength;
  qsmi->userdefinedleastlength = (GtUword) userdefinedleastlength;
  qsmi->queryunitnum = 0;
  qsmi->desc = NULL;
  qsmi->query_for_seqit = NULL;
  qsmi->query_seqlen = 0;
  qsmi->queryrep.sequence = NULL;
  qsmi->queryrep.encseq = query_encseq;
  qsmi->queryrep.readmode = query_readmode;
  qsmi->queryrep.startpos = 0;
  qsmi->dbstart = 0;
  qsmi->matchlength = 0;
  qsmi->querysubstring.queryrep = &qsmi->queryrep;
  qsmi->mmsi = gt_mmsearchiterator_new_empty();
  qsmi->mmsi_defined = false;
  if (query_files == NULL || gt_str_array_size(query_files) == 0)
  {
    gt_assert(query_encseq != NULL);
    qsmi->seqit = NULL;
    qsmi->query_encseq_numofsequences
      = (uint64_t) gt_encseq_num_of_sequences(query_encseq);
  } else
  {
    gt_assert(query_encseq == NULL);
    qsmi->seqit = gt_seq_iterator_sequence_buffer_new(query_files, err);
    if (qsmi->seqit == NULL)
    {
      gt_querysubstringmatchiterator_delete(qsmi);
      return NULL;
    }
    gt_seq_iterator_set_symbolmap(qsmi->seqit,
                        gt_alphabet_symbolmap(gt_encseq_alphabet(dbencseq)));
  }
  return qsmi;
}
Esempio n. 18
0
static int extracttarget_from_seqfiles(const char *target,
                                       GtStrArray *seqfiles,
                                       GtError *err)
{
  GtStr *unescaped_target;
  char *escaped_target;
  GtSplitter *splitter;
  unsigned long i;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(target && seqfiles);
  splitter = gt_splitter_new();
  unescaped_target = gt_str_new();
  escaped_target = gt_cstr_dup(target);
  gt_splitter_split(splitter, escaped_target, strlen(escaped_target), ',');
  for (i = 0; !had_err && i < gt_splitter_size(splitter); i++) {
    GtSplitter *blank_splitter;
    char *token = gt_splitter_get_token(splitter, i);
    blank_splitter = gt_splitter_new();
    gt_splitter_split(blank_splitter, token, strlen(token), ' ');
    had_err = gt_gff3_unescape(unescaped_target,
                               gt_splitter_get_token(blank_splitter, 0),
                               strlen(gt_splitter_get_token(blank_splitter, 0)),
                               err);
    if (!had_err) {
      unsigned long j;
      for (j = 0; j < gt_str_array_size(seqfiles); j++) {
        unsigned long k;
        GtBioseq *bioseq;
        if (!(bioseq =  gt_bioseq_new(gt_str_array_get(seqfiles, j), err))) {
          had_err = -1;
          break;
        }
        for (k = 0; k < gt_bioseq_number_of_sequences(bioseq); k++) {
          TargetInfo target_info;
          const char *desc = gt_bioseq_get_description(bioseq, k);
          target_info.bioseq = bioseq;
          target_info.seqnum = k;
          gt_string_matching_bmh(desc, strlen(desc),
                                 gt_str_get(unescaped_target),
                                 gt_str_length(unescaped_target), show_target,
                                 &target_info);
        }
        gt_bioseq_delete(bioseq);
      }
    }
    gt_splitter_delete(blank_splitter);
  }
  gt_free(escaped_target);
  gt_str_delete(unescaped_target);
  gt_splitter_delete(splitter);
  return had_err;
}
Esempio n. 19
0
int gth_process_intermediate_files(GthInput *input, GtStrArray *consensusfiles,
                                   GthSAProcessFunc saprocessfunc, void *data,
                                   GthShowVerbose showverbose, GtError *err)
{
  GtUword i;
  GtFile *fp, *genfile;
  int had_err = 0;

  gt_error_check(err);

  /* process all files */
  if (gt_str_array_size(consensusfiles)) {
    for (i = 0; !had_err && i < gt_str_array_size(consensusfiles); i++) {
      /* open file */
      fp = gt_file_xopen(gt_str_array_get(consensusfiles, i), "r");

      if (showverbose) {
        show_parse_file_status(showverbose, i,
                               gt_str_array_size(consensusfiles),
                               gt_str_array_get(consensusfiles, i));
      }

      had_err = gt_parse_intermediate_output(input, saprocessfunc, data,
                                          gt_str_array_get(consensusfiles, i),
                                          fp, err);

      /* close file */
      gt_file_delete(fp);
    }
  }
  else {
    genfile = gt_file_new_from_fileptr(stdin);
    had_err = gt_parse_intermediate_output(input, saprocessfunc, data, "stdin",
                                           genfile, err);
    gt_file_delete_without_handle(genfile);
  }

  return had_err;
}
Esempio n. 20
0
void vcfoutput_write(VcfOutput *v, ResultSet *r) {
  gt_assert(v);
  gt_assert(r); 
  GtStr *temp = gt_str_new();
  unsigned long i =0;
  unsigned long vcf_size = 0;
  
  vcf_size = gt_str_array_size(resultset_get_vcf_array(r));
  for(i=0;i<gt_str_array_size(resultset_get_vcf_array(r));i++) {
    gt_str_set(temp, gt_str_array_get(resultset_get_vcf_array(r),i));
    
    if(i == vcf_size-1) {
      gt_str_append_cstr(temp,";");
      
      if(resultset_get_exon(r) != 0) {
        gt_str_append_cstr(temp,"EX;");
      }
      if(resultset_get_frms(r) != 0) {
        gt_str_append_cstr(temp,"NSF;");
      }
      if(resultset_get_miss(r) != 0) {
        gt_str_append_cstr(temp,"NSM;");
      }
      if(resultset_get_nons(r) != 0) {
        gt_str_append_cstr(temp,"NSN;");
      }      
      if(resultset_get_threeprime(r) != 0) {
        gt_str_append_cstr(temp,"ASS;");
      }
      if(resultset_get_fiveprime(r) != 0) {
        gt_str_append_cstr(temp,"DSS;");
      }
    }
    gt_str_append_cstr(temp,"\t");
    gt_file_xwrite(v->file,gt_str_get(temp),gt_str_length(temp));    
    gt_str_reset(temp);
  } 
  gt_str_delete(temp);
}
Esempio n. 21
0
GtTranslatorStatus gt_translator_find_codon(GtTranslator *translator,
                                            GtStrArray *codons,
                                            GtUword *pos,
                                            GtError *err)
{
  char n1, n2, n3;
  unsigned int frame;
  GtUword i;
  GtCodonIteratorStatus retval;
  gt_assert(translator && codons && pos);
  gt_error_check(err);

  for (i = 0; i<gt_str_array_size(codons); i++) {
    int len;
    if ((len = (int) strlen(gt_str_array_get(codons, i))) != GT_CODON_LENGTH) {
      gt_error_set(err, "invalid codon length for codon %s: %d",
                   gt_str_array_get(codons, i), len);
      return GT_TRANSLATOR_ERROR;
    }
  }

  while (!(retval =
             gt_codon_iterator_next(translator->ci,
                                    &n1, &n2, &n3, &frame, err))) {
    for (i = 0; i<gt_str_array_size(codons); i++) {
      const char *codon;
      codon = gt_str_array_get(codons, i);
      if (n1 == codon[0] && n2 == codon[1] && n3 == codon[2]) {
        *pos = gt_codon_iterator_current_position(translator->ci)-1;
        return GT_TRANSLATOR_OK;
      }
    }
  }
  if (retval == GT_CODON_ITERATOR_END)
    return GT_TRANSLATOR_END;
  else
    return GT_TRANSLATOR_ERROR;
}
Esempio n. 22
0
static int gt_tyr_search_arguments_check(int rest_argc,
                                         void *tool_arguments,
                                         GtError *err)
{
  Optionargmodedesc showmodedesctable[] =
  {
    {"qseqnum","query sequence number",SHOWQSEQNUM},
    {"qpos","query position",SHOWQPOS},
    {"counts","number of occurrence counts",SHOWCOUNTS},
    {"sequence","mer-sequence",SHOWSEQUENCE}
  };

  Optionargmodedesc stranddesctable[] =
  {
    {"f","forward strand",STRAND_FORWARD},
    {"p","reverse strand",STRAND_REVERSE},
    {"fp","forward and reverse strand",STRAND_FORWARD | STRAND_REVERSE}
  };
  unsigned long idx;
  Tyr_search_options *arguments = tool_arguments;

  if (rest_argc != 0)
  {
    gt_error_set(err,"superfluous arguments");
    return -1;
  }
  for (idx=0; idx<gt_str_array_size(arguments->showmodespec); idx++)
  {
    if (optionargaddbitmask(showmodedesctable,
                         sizeof (showmodedesctable)/
                         sizeof (showmodedesctable[0]),
                         &arguments->showmode,
                         "-output",
                         gt_str_array_get(arguments->showmodespec,idx),
                         err) != 0)
    {
      return -1;
    }
  }
  if (optionargaddbitmask(stranddesctable,
                          sizeof (stranddesctable)/
                          sizeof (stranddesctable[0]),
                          &arguments->strand,
                          "-output",
                          gt_str_get(arguments->strandspec),err) != 0)
  {
    return -1;
  }
  return 0;
}
Esempio n. 23
0
static int encseq_lua_filenames(lua_State *L)
{
  GtEncseq **encseq;
  const GtStrArray *filenames;
  GtUword i;
  encseq = check_encseq(L, 1);
  filenames = gt_encseq_filenames(*encseq);
  lua_newtable(L);
  for (i = 0; i < gt_str_array_size(filenames); i++) {
    lua_pushinteger(L, i+1); /* in Lua we index from 1 on */
    lua_pushstring(L, gt_str_array_get(filenames, i));
    lua_rawset(L, -3);
  }
  return 1;
}
static int sequence_node_add_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                         GtError *err)
{
  GtSequenceNodeAddStream *s;
  int had_err;
  gt_error_check(err);
  s = gt_sequence_node_add_stream_cast(ns);

  /* stream nodes as long as we have some, record seen seqids */
  if (!(had_err = gt_node_stream_next(s->in_stream, gn, err)) && *gn) {
    had_err = gt_genome_node_accept(*gn, s->collect_vis, err);
  }

  /* if there are no more  */
  if (!had_err && !*gn) {
    if (!s->seqids) {
      s->seqids = gt_cstr_table_get_all(s->seqid_table);
    }
    gt_assert(s->seqids);
    if (s->cur_seqid >= gt_str_array_size(s->seqids)) {
      *gn = NULL;
      return 0;
    } else {
      GtGenomeNode *new_sn;
      GtUword len;
      char *seq = NULL;
      GtStr *seqid = gt_str_new(),
            *seqstr = gt_str_new();
      gt_str_append_cstr(seqid, gt_str_array_get(s->seqids, s->cur_seqid));
      had_err = gt_region_mapping_get_sequence_length(s->rm, &len, seqid, err);
      if (!had_err) {
        had_err = gt_region_mapping_get_sequence(s->rm, &seq, seqid, 1, len,
                                                 err);
      }
      if (!had_err) {
        gt_str_append_cstr_nt(seqstr, seq, len);
        new_sn = gt_sequence_node_new(gt_str_get(seqid), seqstr);
        *gn = new_sn;
      }
      s->cur_seqid++;
      gt_free(seq);
      gt_str_delete(seqid);
      gt_str_delete(seqstr);
    }
  }

  return had_err;
}
Esempio n. 25
0
int gt_trans_table_unit_test(GtError *err)
{
    int had_err = 0;
    GtStrArray *schemes;
    gt_error_check(err);

    /* check retrieval of table descriptions */
    schemes = gt_trans_table_get_scheme_descriptions();
    gt_ensure(
        gt_str_array_size(schemes) == (GtUword) GT_NUMOFTRANSSCHEMES);

    /* check switching translation scheme */
    /* test_errnum = gt_translator_set_translation_scheme(tr, 3, test_err);
    gt_ensure(!test_errnum && !gt_error_is_set(test_err)); */

    /* check switching to invalid translation scheme */
    /* test_errnum = gt_translator_set_translation_scheme(tr, 7, test_err);
    gt_ensure(test_errnum && gt_error_is_set(test_err)); */

    /* switch back to default translation scheme */
    /* gt_error_unset(test_err);
    test_errnum = gt_translator_set_translation_scheme(tr, 1, test_err);
    gt_ensure(!test_errnum && !gt_error_is_set(test_err)); */

    /* check single codon translation */
    /*
     *  char *bases = "AaCcGgTt";
     *  gt_error_unset(test_err);
    for (i=0; i<8; i++) {
      char c1 = bases[i];
      for (j=0; j<8; j++) {
        char c2 = bases[j];
        for (k=0; k<8; k++) {
          char c3 = bases[k], ret1, ret2;
          test_errnum = gt_translator_codon2amino(tr, c1, c2, c3, &ret1,
                                                  test_err);
          gt_ensure(!test_errnum && !gt_error_is_set(test_err));
          ret2 = gt_transa(tr->scheme->aminos, true, c1, c2, c3, NULL,
                             test_err);
          gt_ensure(ret1 == ret2);
        }
      }
    } */

    return had_err;
}
Esempio n. 26
0
int gt_esa2shulengthqueryfiles(unsigned long *totalgmatchlength,
                               const Suffixarray *suffixarray,
                               const GtStrArray *queryfilenames,
                               GtError *err)
{
  bool haserr = false;
  GtSeqIterator *seqit;
  const GtUchar *query;
  unsigned long querylen;
  char *desc = NULL;
  int retval;
  GtAlphabet *alphabet;

  gt_error_check(err);
  alphabet = gt_encseq_alphabet(suffixarray->encseq);
  gt_assert(gt_str_array_size(queryfilenames) == 1UL);
  seqit = gt_seq_iterator_sequence_buffer_new(queryfilenames, err);
  if (!seqit)
  {
    haserr = true;
  }
  if (!haserr)
  {
    gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alphabet));
    for (; /* Nothing */; )
    {
      retval = gt_seq_iterator_next(seqit,
                                   &query,
                                   &querylen,
                                   &desc,
                                   err);
      if (retval < 0)
      {
        haserr = true;
        break;
      }
      if (retval == 0)
      {
        break;
      }
      *totalgmatchlength += gt_esa2shulengthquery(suffixarray,query,querylen);
    }
    gt_seq_iterator_delete(seqit);
  }
  return haserr ? -1 : 0;
}
static int m2i_change_target_seqids(GtFeatureNode *fn, const char *target,
                                    GtRegionMapping *region_mapping,
                                    GtError *err)
{
  GtStrArray *target_ids;
  GtArray *target_ranges, *target_strands;
  GtStr *desc, *new_seqid;
  unsigned long i;
  int had_err;
  gt_error_check(err);
  gt_assert(fn && target && region_mapping);
  target_ids = gt_str_array_new();
  target_ranges = gt_array_new(sizeof (GtRange));
  target_strands = gt_array_new(sizeof (GtStrand));
  desc = gt_str_new();
  new_seqid = gt_str_new();
  had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                       target_ids,
                                                       target_ranges,
                                                       target_strands, "", 0,
                                                       err);
  for (i = 0; !had_err && i < gt_str_array_size(target_ids); i++) {
    GtStr *seqid;
    gt_str_reset(desc);
    gt_str_reset(new_seqid);
    seqid = gt_str_array_get_str(target_ids, i);
    had_err = gt_region_mapping_get_description(region_mapping, desc, seqid,
                                                err);
    if (!had_err)
      gt_regular_seqid_save(new_seqid, desc);
      gt_str_array_set(target_ids, i, new_seqid);
  }
  if (!had_err) {
    GtStr *new_target = gt_str_new();
    gt_gff3_parser_build_target_str(new_target, target_ids, target_ranges,
                                    target_strands);
    gt_feature_node_set_attribute(fn, GT_GFF_TARGET, gt_str_get(new_target));
    gt_str_delete(new_target);
  }
  gt_str_delete(new_seqid);
  gt_str_delete(desc);
  gt_array_delete(target_strands);
  gt_array_delete(target_ranges);
  gt_str_array_delete(target_ids);
  return had_err;
}
Esempio n. 28
0
static int gt_encseq_encode_runner(GT_UNUSED int argc, const char **argv,
                               int parsed_args, GT_UNUSED void *tool_arguments,
                               GtError *err)
{
  int had_err = 0,
      i;
  GtEncseqEncodeArguments *arguments =
                                      (GtEncseqEncodeArguments*) tool_arguments;
  GtStrArray *infiles;
  gt_error_check(err);

  infiles = gt_str_array_new();
  for (i = parsed_args; i < argc; i++) {
    gt_str_array_add_cstr(infiles, argv[i]);
  }

  if (gt_str_length(arguments->indexname) == 0UL) {
    if (gt_str_array_size(infiles) > 1UL) {
      gt_error_set(err,"if more than one input file is given, then "
                       "option -indexname is mandatory");
      had_err = -1;
    } else {
      char *basenameptr;
      basenameptr = gt_basename(gt_str_array_get(infiles, 0UL));
      gt_str_set(arguments->indexname, basenameptr);
      gt_free(basenameptr);
    }
  }

  if (!had_err) {
    gt_assert(gt_str_length(arguments->indexname) > 0UL);
    had_err = encode_sequence_files(infiles,
                                    arguments->eopts,
                                    gt_str_get(arguments->indexname),
                                    arguments->verbose,
                                    arguments->no_esq_header,
                                    err);
  }

  if (!had_err && arguments->showstats)
    show_encoded_statistics(infiles, gt_str_get(arguments->indexname));

  gt_str_array_delete(infiles);
  return had_err;
}
Esempio n. 29
0
int gt_mergeesa(int argc, const char **argv, GtError *err)
{
    GtStr *storeindex;
    GtStrArray *indexnametab;
    bool haserr = false;
    int parsed_args;

    gt_error_check(err);

    storeindex = gt_str_new();
    indexnametab = gt_str_array_new();
    switch (parse_options(storeindex, indexnametab, &parsed_args, argc, argv,
                          err)) {
    case GT_OPTION_PARSER_OK:
        break;
    case GT_OPTION_PARSER_ERROR:
        haserr = true;
        break;
    case GT_OPTION_PARSER_REQUESTS_EXIT:
        return 0;
    }
    if (!haserr)
    {
        GtUword i;
        GtLogger *logger;

        printf("# storeindex=%s\n",gt_str_get(storeindex));
        for (i=0; i<gt_str_array_size(indexnametab); i++)
        {
            printf("# input=%s\n",gt_str_array_get(indexnametab,i));
        }
        logger = gt_logger_new(false, GT_LOGGER_DEFLT_PREFIX, stdout);
        if (gt_performtheindexmerging(storeindex,
                                      indexnametab,
                                      logger,
                                      err) != 0)
        {
            haserr = true;
        }
        gt_logger_delete(logger);
    }
    gt_str_delete(storeindex);
    gt_str_array_delete(indexnametab);
    return haserr ? -1 : 0;
}
Esempio n. 30
0
static int feature_in_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                   GtError *error)
{
  GtFeatureInStream *stream = feature_in_stream_cast(ns);
  gt_error_check(error);

  if (!stream->init)
  {
    feature_in_stream_init(stream);
    stream->init = true;
  }

  if (gt_queue_size(stream->regioncache) > 0)
  {
    GtGenomeNode *region = gt_queue_get(stream->regioncache);
    *gn = region;
    return 0;
  }

  if (stream->featurecache == NULL || gt_array_size(stream->featurecache) == 0)
  {
    if (stream->featurecache != NULL)
    {
      gt_array_delete(stream->featurecache);
      stream->featurecache = NULL;
    }

    if (stream->seqindex == gt_str_array_size(stream->seqids))
    {
      *gn = NULL;
      return 0;
    }

    const char *seqid = gt_str_array_get(stream->seqids, stream->seqindex++);
    stream->featurecache = gt_feature_index_get_features_for_seqid(stream->fi,
                                                                   seqid,
                                                                   error);
    gt_array_sort(stream->featurecache, (GtCompare)gt_genome_node_compare);
    gt_array_reverse(stream->featurecache);
  }

  GtGenomeNode *feat = *(GtGenomeNode **)gt_array_pop(stream->featurecache);
  *gn = gt_genome_node_ref(feat);
  return 0;
}