예제 #1
0
GtAlphabet* gt_alphabet_guess(const char *sequence, unsigned long seqlen)
{
  unsigned long i;
  gt_assert(sequence && seqlen);
  for (i = 0;
       i < seqlen && i < (unsigned long) ALPHABET_GUESS_MAX_LENGTH;
        i++) {
    if (strchr(ALPHABET_GUESS_PROTEIN_CHARS, sequence[i]) != NULL)
      return gt_alphabet_new_protein();
  }
  return gt_alphabet_new_dna();
}
예제 #2
0
static int alphabet_lua_new_protein(lua_State *L)
{
  GtAlphabet **alpha;
  gt_assert(L);
  alpha = lua_newuserdata(L, sizeof *alpha);
  gt_assert(alpha);
  *alpha = gt_alphabet_new_protein();
  gt_assert(*alpha);
  luaL_getmetatable(L, ALPHABET_METATABLE);
  lua_setmetatable(L, -2);
  return 1;
}
예제 #3
0
int gt_alphabet_unit_test(GtError *err)
{
  int had_err = 0;
  GtAlphabet *a, *b, *c;
  gt_error_check(err);

  a = gt_alphabet_new_dna();
  b = gt_alphabet_new_protein();
  c = gt_alphabet_clone(a);

  gt_ensure(had_err, gt_alphabet_equals(a, a));
  gt_ensure(had_err, gt_alphabet_equals(b, b));
  gt_ensure(had_err, gt_alphabet_equals(c, c));

  gt_ensure(had_err, !gt_alphabet_equals(a, b));
  gt_ensure(had_err, gt_alphabet_equals(a, c));

  gt_alphabet_delete(a);
  gt_alphabet_delete(b);
  gt_alphabet_delete(c);

  return had_err;
}
예제 #4
0
파일: score_matrix.c 프로젝트: 9beckert/TIR
GtScoreMatrix* gt_score_matrix_new_read_protein(const char *path, GtError *err)
{
  GtAlphabet *protein_alpha;
  GtScoreMatrix *sm;
  int had_err;

  gt_error_check(err);
  gt_assert(path);

  /* create score matrix */
  protein_alpha = gt_alphabet_new_protein();
  sm = gt_score_matrix_new(protein_alpha);
  gt_alphabet_delete(protein_alpha);

  /* parse matrix file */
  had_err = parse_score_matrix(sm, path, err);

  if (had_err) {
    gt_score_matrix_delete(sm);
    return NULL;
  }
  return sm;
}
예제 #5
0
static int readfirstvaluesfromfile(GtEncseqMetadata *emd,
                                   const char *indexname, GtError *err)
{
  FILE *fp;
  bool had_err = false;
  unsigned long cc, byteoffset = 0, alphatype;
  char *alphadef;

  gt_error_check(err);
  fp = gt_fa_fopen_with_suffix(indexname, GT_ENCSEQFILESUFFIX, "rb", err);
  if (fp == NULL)
  {
    had_err = true;
  }
  NEXTFREAD(emd->is64bit);
  if (!had_err)
  {
    if ((int) emd->is64bit > 1)
    {
      gt_error_set(err, "illegal platform code %u in \"%s%s\"", emd->is64bit,
                   indexname, GT_ENCSEQFILESUFFIX);
      had_err = true;
    }
    if (!had_err && ((emd->is64bit && sizeof (unsigned long) != (size_t) 8)
          || (!emd->is64bit && sizeof (unsigned long) == (size_t) 8)))
    {
      gt_error_set(err, "trying to load 64-bit index \"%s%s\" on a 32-bit "
                        "system or vice versa -- please use correct index "
                        "for this platform", indexname, GT_ENCSEQFILESUFFIX);
      had_err = true;
    }
  }
  NEXTFREAD(emd->version);
  if (!had_err)
  {
    if (emd->version < GT_ENCSEQ_VERSION)    {
      gt_error_set(err, "index \"%s%s\" is format version %lu, current is "
                        "%lu -- please re-encode",
                        indexname, GT_ENCSEQFILESUFFIX,
                        emd->version, GT_ENCSEQ_VERSION);
      had_err = true;
    }
  }
  NEXTFREAD(cc);
  if (!had_err)
  {
    if (cc >= (unsigned long) GT_ACCESS_TYPE_UNDEFINED)
    {
      gt_error_set(err, "illegal type %lu in \"%s%s\"", cc,
                   indexname, GT_ENCSEQFILESUFFIX);
      had_err = true;
    }
  }
  if (!had_err) {
    emd->sat = (GtEncseqAccessType) cc;
    NEXTFREAD(emd->totallength);
    NEXTFREAD(emd->numofdbsequences);
    NEXTFREAD(emd->numofdbfiles);
    NEXTFREAD(emd->lengthofdbfilenames);
    NEXTFREAD(emd->specialcharinfo);
    NEXTFREAD(emd->minseqlen);
    NEXTFREAD(emd->maxseqlen);
  }
  NEXTFREAD(alphatype);
  if (!had_err) {
    if (alphatype > 2UL) {
      gt_error_set(err, "illegal alphabet type %lu in \"%s%s\"", alphatype,
                   indexname, GT_ENCSEQFILESUFFIX);
      had_err = true;
    }
  }
  if (!had_err) {
    NEXTFREAD(emd->lengthofalphadef);
    switch (alphatype) {
      case 0:
        emd->alpha = gt_alphabet_new_dna();
        break;
      case 1:
        emd->alpha = gt_alphabet_new_protein();
        break;
      case 2:
        gt_assert(emd->lengthofalphadef > 0);
        emd->customalphabet = true;
        alphadef = gt_malloc(sizeof (char) * emd->lengthofalphadef);
        NEXTFREADWSIZE(*(alphadef), emd->lengthofalphadef);
        emd->alpha = gt_alphabet_new_from_string(alphadef,
                                                 emd->lengthofalphadef,
                                                 err);
        if (!emd->alpha) {
          had_err = true;
        }
        gt_free(alphadef);
        break;
    }
    gt_assert(emd->alpha != NULL);
  }
  gt_fa_xfclose(fp);
  return had_err ? -1 : 0;
}
예제 #6
0
static int gt_linspace_align_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  GtLinspaceArguments *arguments = tool_arguments;
  int had_err = 0;
  GtAlignment *align;
  GtWord left_dist = 0, right_dist = 0;
  GtSequenceTable *sequence_table1, *sequence_table2;
  GtLinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler = NULL;
  GtTimer *linspacetimer = NULL;
  GtAlphabet *alphabet = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  sequence_table1 = gt_sequence_table_new();
  sequence_table2 = gt_sequence_table_new();
  align = gt_alignment_new();
  spacemanager = gt_linspace_management_new();
  gt_linspace_management_set_TSfactor(spacemanager,arguments->timesquarefactor);

  /* get sequences */
  if (gt_str_array_size(arguments->strings) > 0)
  {
    get_onesequence(sequence_table1, arguments->strings, 0);
    sequence_table1->size++;
    get_onesequence(sequence_table2, arguments->strings, 1);
    sequence_table2->size++;
  }
  else if (gt_str_array_size(arguments->files) > 0)
  {
    had_err = get_fastasequences(sequence_table1,
                                 gt_str_array_get_str(arguments->files,0),err);
    if (!had_err)
    {
      had_err = get_fastasequences(sequence_table2,
                                  gt_str_array_get_str(arguments->files,1),err);
    }
  }
  if (arguments->dna)
  {
    alphabet = gt_alphabet_new_dna();
  } else
  {
    gt_assert(arguments->protein);
    alphabet = gt_alphabet_new_protein();
  }
  gt_encode_sequence_table(alphabet,sequence_table1);
  gt_encode_sequence_table(alphabet,sequence_table2);
  if (!had_err)
  {
    scorehandler = gt_arguments2scorehandler(arguments,err);
    if (scorehandler == NULL)
    {
      had_err = -1;
    } else
    {
      if (arguments->global && arguments->protein && !arguments->has_costmatrix)
      {
        GtScoreHandler *costhandler = gt_scorehandler2costhandler(scorehandler);
        gt_scorehandler_delete(scorehandler);
        scorehandler = costhandler;
      }
    }
  }
  /* get diagonal band */
  if (!had_err && arguments->diagonal)
  {
    if (gt_str_array_size(arguments->diagonalbonds) > 0)
    {
      had_err = gt_parse_score_value(__LINE__,&left_dist,
                                  gt_str_array_get(arguments->diagonalbonds,0),
                                  false, err);
      if (!had_err)
      {
        had_err = gt_parse_score_value(__LINE__,&right_dist,
                                  gt_str_array_get(arguments->diagonalbonds,1),
                                  false, err);
      }
    }
  }
  if (!had_err && arguments->spacetime)
  {
    linspacetimer = gt_timer_new();
  }

  /* alignment functions with linear gap costs */
  if (!had_err)
  {
    bool affine;

    if (gt_str_array_size(arguments->linearcosts) > 0)
    {
      affine = false;
    } else
    {
      gt_assert(gt_str_array_size(arguments->affinecosts) > 0);
      affine = true;
    }
    had_err = gt_all_against_all_alignment_check (
                            affine, align, arguments,
                            spacemanager,
                            scorehandler,
                            gt_alphabet_characters(alphabet),
                            gt_alphabet_wildcard_show(alphabet),
                            sequence_table1,
                            sequence_table2,
                            left_dist,
                            right_dist,
                            linspacetimer,err);
  }
  /*spacetime option*/
  if (!had_err && arguments->spacetime)
  {
    printf("# combined space peak in kilobytes: %f\n",
           GT_KILOBYTES(gt_linspace_management_get_spacepeak(spacemanager)));
    gt_timer_show_formatted(linspacetimer,"# TIME overall " GT_WD ".%02ld\n",
                            stdout);
  }
  gt_timer_delete(linspacetimer);
  gt_linspace_management_delete(spacemanager);
  gt_sequence_table_delete(sequence_table1);
  gt_sequence_table_delete(sequence_table2);
  gt_alignment_delete(align);
  gt_alphabet_delete(alphabet);
  gt_scorehandler_delete(scorehandler);
  return had_err;
}