Ejemplo n.º 1
0
GtStrArray* agn_seq_union(GtFeatureIndex *refrfeats, GtFeatureIndex *predfeats,
                          AgnLogger *logger)
{
  // Fetch seqids from reference and prediction annotations
  GtError *e = gt_error_new();
  GtStrArray *refrseqids = gt_feature_index_get_seqids(refrfeats, e);
  if(gt_error_is_set(e))
  {
    agn_logger_log_error(logger, "error fetching seqids for reference: %s",
                         gt_error_get(e));
    gt_error_unset(e);
  }
  GtStrArray *predseqids = gt_feature_index_get_seqids(predfeats, e);
  if(gt_error_is_set(e))
  {
    agn_logger_log_error(logger, "error fetching seqids for prediction: %s",
                         gt_error_get(e));
    gt_error_unset(e);
  }
  gt_error_delete(e);
  if(agn_logger_has_error(logger))
  {
    gt_str_array_delete(refrseqids);
    gt_str_array_delete(predseqids);
    return NULL;
  }
  GtStrArray *seqids = agn_gt_str_array_union(refrseqids, predseqids);

  gt_str_array_delete(refrseqids);
  gt_str_array_delete(predseqids);
  return seqids;
}
Ejemplo n.º 2
0
Archivo: init.c Proyecto: 9beckert/TIR
static void proc_env_options(void)
{
  int argc;
  char *env_options, **argv;
  GtSplitter *splitter;
  GtError *err;
  /* construct argument vector from $GT_ENV_OPTIONS */
  env_options = getenv("GT_ENV_OPTIONS");
  if (!env_options)
    return;
  env_options = gt_cstr_dup(env_options); /* make writeable copy */
  splitter = gt_splitter_new();
  gt_splitter_split(splitter, env_options, strlen(env_options), ' ');
  argc = gt_splitter_size(splitter);
  argv = gt_cstr_array_preprend((const char**) gt_splitter_get_tokens(splitter),
                             "env");
  argc++;
  /* parse options contained in $GT_ENV_OPTIONS */
  err = gt_error_new();
  switch (parse_env_options(argc, (const char**) argv, err)) {
    case GT_OPTION_PARSER_OK: break;
    case GT_OPTION_PARSER_ERROR:
      fprintf(stderr, "error parsing $GT_ENV_OPTIONS: %s\n", gt_error_get(err));
      gt_error_unset(err);
      break;
    case GT_OPTION_PARSER_REQUESTS_EXIT: break;
  }
  gt_error_delete(err);
  gt_free(env_options);
  gt_splitter_delete(splitter);
  gt_cstr_array_delete(argv);
}
Ejemplo n.º 3
0
int gt_translator_unit_test(GtError *err)
{
  int had_err = 0;
  GtTranslatorStatus test_errnum;
  GtTranslator *tr;
  GtCodonIterator *ci;
  GtError *test_err;
  GtStrArray *codons, *invalidcodons;
  const char *seq = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGT"
                    "GGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGAACTGGT"
                    "TACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGG";
  const char *no_startcodon = "AAAAAAAAAATCATCTCCCCATTTTTTT";
  const char *invalidseq  = "ZAGCTTTTCATTCTGACTGCAAATATGTCTCTGTGT";
  const char *invalidseq2 = "AGCTTTTCATTCTGACZTGCAAATATGTCTCTGTGT";

  char translated;
  unsigned int frame;
  GtUword pos = 0;
  GtStr *protein[3];
  gt_error_check(err);

  test_err = gt_error_new();
  ci = gt_codon_iterator_simple_new(seq, (GtUword) strlen(seq), test_err);
  tr = gt_translator_new(ci);
  protein[0] = gt_str_new();
  protein[1] = gt_str_new();
  protein[2] = gt_str_new();
  codons = gt_str_array_new();
  gt_str_array_add_cstr(codons, "ACG");
  gt_str_array_add_cstr(codons, "ACT");
  invalidcodons = gt_str_array_new();
  gt_str_array_add_cstr(invalidcodons, "ACG");
  gt_str_array_add_cstr(invalidcodons, "AC");

  /* do 3-frame translation */
  gt_error_unset(test_err);
  test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
  while (!test_errnum && translated) {
    gt_str_append_char(protein[frame], translated);
    test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
    gt_ensure(
           test_errnum != GT_TRANSLATOR_ERROR && !gt_error_is_set(test_err));
  }
  gt_ensure(
         test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err));

  /* check 3-frame translation */
  gt_ensure(strcmp(gt_str_get(protein[0]),
                         "SFSF*LQRAICLCVD*KKSV**QLLNWLPAVSKLKFY*LR") == 0);
  gt_ensure(strcmp(gt_str_get(protein[1]),
                         "AFHSDCNGQYVSVWIKKRVSDSSF*TGYLP*VN*NFIDL") == 0);
  gt_ensure(strcmp(gt_str_get(protein[2]),
                         "LFILTATGNMSLCGLKKECLIAASELVTCRE*IKILLT*") == 0);

  /* find start codon -- positive */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_startcodon(tr, &pos, test_err);
  gt_ensure(!test_errnum && !gt_error_is_set(test_err));
  gt_ensure(pos == 11UL);

  /* find stop codon -- positive */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_stopcodon(tr, &pos, test_err);
  gt_ensure(!test_errnum && !gt_error_is_set(test_err));
  gt_ensure(pos == 12UL);

  /* find arbitrary codons -- positive */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_codon(tr, codons, &pos, test_err);
  gt_ensure(!test_errnum && !gt_error_is_set(test_err));
  gt_ensure(pos == 14UL);

  /* find arbitrary codons -- negative (invalid codons) */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_codon(tr, invalidcodons, &pos, test_err);
  gt_ensure(
         test_errnum == GT_TRANSLATOR_ERROR && gt_error_is_set(test_err));

  gt_error_unset(test_err);
  gt_codon_iterator_delete(ci);
  ci = gt_codon_iterator_simple_new(invalidseq,
                                    (GtUword) strlen(invalidseq),
                                    test_err);
  gt_ensure(ci && !gt_error_is_set(test_err));
  gt_translator_reset(tr, ci);
  /* check translation of sequence with invalid beginning */
  test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
  gt_ensure(test_errnum && gt_error_is_set(test_err));

  /* check translation of sequence with invalid character within */
  gt_error_unset(test_err);
  gt_codon_iterator_delete(ci);
  ci = gt_codon_iterator_simple_new(invalidseq2,
                                    (GtUword) strlen(invalidseq2),
                                    test_err);
  gt_ensure(ci && !gt_error_is_set(test_err));
  gt_translator_reset(tr, ci);
  test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
  while (!test_errnum && translated) {
    gt_str_append_char(protein[frame], translated);
    test_errnum = gt_translator_next(tr, &translated, &frame, test_err);
  }
  gt_ensure(
         test_errnum == GT_TRANSLATOR_ERROR && gt_error_is_set(test_err));

  /* find start codon -- fail */
  gt_error_unset(test_err);
  gt_codon_iterator_delete(ci);
  ci = gt_codon_iterator_simple_new(no_startcodon,
                                    (GtUword) strlen(no_startcodon),
                                    test_err);
  gt_ensure(ci && !gt_error_is_set(test_err));
  gt_translator_reset(tr, ci);
  test_errnum = gt_translator_find_startcodon(tr, &pos, test_err);
  gt_ensure(
         test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err));

  /* find stop codon -- fail */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_stopcodon(tr, &pos, test_err);
  gt_ensure(
         test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err));

  /* find arbitrary codons -- negative (none there) */
  gt_error_unset(test_err);
  gt_codon_iterator_rewind(ci);
  test_errnum = gt_translator_find_codon(tr, codons, &pos, test_err);
  gt_ensure(
         test_errnum == GT_TRANSLATOR_END && !gt_error_is_set(test_err));

  gt_codon_iterator_delete(ci);
  gt_translator_delete(tr);
  gt_str_delete(protein[0]);
  gt_str_delete(protein[1]);
  gt_str_delete(protein[2]);
  gt_str_array_delete(codons);
  gt_str_array_delete(invalidcodons);
  gt_error_delete(test_err);

  return had_err;
}
Ejemplo n.º 4
0
GthBSSMParam* gth_bssm_param_load(const char *filename, GtError *err)
{
    GthBSSMParam *bssm_param = NULL;
    GtStr *path = gt_str_new();
    int had_err = 0;

    gt_error_check(err);

    if (gt_file_exists(filename))
        gt_str_append_cstr(path, filename);
    else {
        if (strchr(filename, GT_PATH_SEPARATOR)) {
            gt_error_set(err, "filename \"%s\" contains illegal symbol '%c': the "
                         "path list specified by environment variable \"%s\" "
                         "cannot be searched for it", filename,
                         GT_PATH_SEPARATOR, BSSMENVNAME);
            had_err = -1;
        }
        /* check for file path in environment variable */
        if (!had_err)
            had_err = gt_file_find_in_env(path, filename, BSSMENVNAME, err);
        if (!had_err && !gt_str_length(path)) {
            gt_error_set(err, "file \"%s\" not found in directory list specified "
                         "by environment variable %s", filename, BSSMENVNAME);
            had_err = -1;
        }
        if (!had_err) {
            /* path found -> append filename */
            gt_str_append_char(path, GT_PATH_SEPARATOR);
            gt_str_append_cstr(path, filename);
        }
        else {
            /* check for file path relative to binary */
            int new_err = gt_file_find_exec_in_path(path, gt_error_get_progname(err),
                                                    NULL);
            if (!new_err) {
                gt_assert(gt_str_length(path));
                gt_str_append_char(path, GT_PATH_SEPARATOR);
                gt_str_append_cstr(path, "bssm");
                gt_str_append_char(path, GT_PATH_SEPARATOR);
                gt_str_append_cstr(path, filename);
                if (gt_file_exists(gt_str_get(path))) {
                    gt_error_unset(err);
                    had_err = 0;
                }
            }
        }
    }

    if (!had_err) {
        if (!(bssm_param = bssm_param_plain_read(gt_str_get(path), err)))
            had_err = -1;
        if (had_err) {
            /* loading new plain text format didn't work -> try old binary format */
            if ((bssm_param = load_old_binary_format(path, filename, NULL))) {
                /* loading binary format worked -> unset error */
                gt_error_unset(err);
                had_err = 0;
            }
        }
    }

    gt_str_delete(path);

    if (had_err) {
        gth_bssm_param_delete(bssm_param);
        return NULL;
    }
    return bssm_param;
}
Ejemplo n.º 5
0
GtStrArray* agn_seq_intersection(GtFeatureIndex *refrfeats,
                                 GtFeatureIndex *predfeats, AgnLogger *logger)
{
  // Fetch seqids from reference and prediction annotations
  GtError *e = gt_error_new();
  GtStrArray *refrseqids = gt_feature_index_get_seqids(refrfeats, e);
  if(gt_error_is_set(e))
  {
    agn_logger_log_error(logger, "error fetching seqids for reference: %s",
                         gt_error_get(e));
    gt_error_unset(e);
  }
  GtStrArray *predseqids = gt_feature_index_get_seqids(predfeats, e);
  if(gt_error_is_set(e))
  {
    agn_logger_log_error(logger, "error fetching seqids for prediction: %s",
                         gt_error_get(e));
    gt_error_unset(e);
  }
  gt_error_delete(e);
  if(agn_logger_has_error(logger))
  {
    gt_str_array_delete(refrseqids);
    gt_str_array_delete(predseqids);
    return NULL;
  }
  GtStrArray *seqids = agn_gt_str_array_intersection(refrseqids, predseqids);

  // Print reference sequences with no prediction annotations
  GtUword i, j;
  for(i = 0; i < gt_str_array_size(refrseqids); i++)
  {
    const char *refrseq = gt_str_array_get(refrseqids, i);
    int matches = 0;
    for(j = 0; j < gt_str_array_size(seqids); j++)
    {
      const char *seq = gt_str_array_get(seqids, j);
      if(strcmp(refrseq, seq) == 0)
        matches++;
    }
    if(matches == 0)
    {
      agn_logger_log_warning(logger, "no prediction annotations found for "
                             "sequence '%s'", refrseq);
    }
  }

  // Print prediction sequences with no reference annotations
  for(i = 0; i < gt_str_array_size(predseqids); i++)
  {
    const char *predseq = gt_str_array_get(predseqids, i);
    int matches = 0;
    for(j = 0; j < gt_str_array_size(seqids); j++)
    {
      const char *seq = gt_str_array_get(seqids, j);
      if(strcmp(predseq, seq) == 0)
        matches++;
    }
    if(matches == 0)
    {
      agn_logger_log_warning(logger, "no reference annotations found for "
                             "sequence '%s'", predseq);
    }
  }

  if(gt_str_array_size(seqids) == 0)
  {
    agn_logger_log_error(logger, "no sequences in common between reference and "
                         "prediction");
  }

  gt_str_array_delete(refrseqids);
  gt_str_array_delete(predseqids);
  return seqids;
}