Ejemplo n.º 1
0
static GtScoreFunction* gt_dna_scorefunc_new(GtAlphabet *a, int match,
                                             int mismatch, int insertion,
                                             int deletion)
{
  GtScoreMatrix *sm = gt_score_matrix_new(a);
  GtScoreFunction *sf = gt_score_function_new(sm, insertion, deletion);
  unsigned int m,n;

  for (m=0;m<gt_alphabet_size(a);m++)
  {
    for (n=0;n<gt_alphabet_size(a);n++)
    {
      gt_score_matrix_set_score(sm, m, n, (n==m ? match : mismatch));
    }
  }
  /* make N-N a mismatch! */
  gt_score_matrix_set_score(sm, gt_alphabet_size(a) - 1,
                            gt_alphabet_size(a) - 1, mismatch);
  return sf;
}
Ejemplo n.º 2
0
GtScoreHandler *gt_scorehandler2costhandler(const GtScoreHandler *scorehandler)
{
  GtScoreHandler *costhandler;

  gt_assert(scorehandler != NULL);
  if (scorehandler->scorematrix == NULL)
  {
    GtWord matchscore, mismatchscore, gap_extension, gap_opening,
           maxscore = MAX(MAX(GT_DIV2(scorehandler->matchscore+1),
                         GT_DIV2(scorehandler->mismatchscore+1)),
                     MAX(1 + scorehandler->gap_extension,0));

    matchscore = 2 * maxscore - scorehandler->matchscore;
    mismatchscore = 2 * maxscore - scorehandler->mismatchscore;
    gap_extension = maxscore - scorehandler->gap_extension;
    gap_opening = -scorehandler->gap_opening;
    costhandler = gt_scorehandler_new(matchscore,
                                      mismatchscore,
                                      gap_opening,
                                      gap_extension);
    if (!scorehandler->mappedsequence)
    {
      gt_scorehandler_plain(costhandler);
    }
  } else
  {
    int maxscore;
    GtWord gap_extension, gap_opening;
    unsigned int i, j,
                 dim = gt_score_matrix_get_dimension(scorehandler->scorematrix);
    GtScoreMatrix *costmatrix
      = gt_score_matrix_clone_empty(scorehandler->scorematrix);

    for (maxscore = 0, i = 0; i < dim; i++)
    {
      for (j = 0; j < dim; j++)
      {
        int val = gt_score_matrix_get_score(scorehandler->scorematrix, i, j);

        if (val > maxscore)
        {
          maxscore = val;
        }
      }
    }
    maxscore = MAX(GT_DIV2(maxscore+1), 1 + scorehandler->gap_extension);
    for (i = 0; i < dim; i++)
    {
      for (j = 0; j < dim; j++)
      {
        /* translate */
        int score = gt_score_matrix_get_score(scorehandler->scorematrix,i,j);
        gt_score_matrix_set_score(costmatrix, i, j, 2 * maxscore - score);
      }
    }
    gap_extension = maxscore - scorehandler->gap_extension;
    gap_opening = -scorehandler->gap_opening;
    costhandler = gt_scorehandler_new( 0,0, gap_opening, gap_extension);
    gt_scorehandler_add_scorematrix(costhandler,costmatrix);
  }
  return costhandler;
}
Ejemplo n.º 3
0
static int parse_score_line(GtScoreMatrix *sm, GtTokenizer *tz,
                            GtArray *index_to_alpha_char_mapping,
                            char *parsed_characters, GtError *err)
{
  unsigned int num_of_chars, i = 0;
  char amino_acid;
  int score, had_err = 0;
  GtStr *token;
  gt_assert(sm && tz && index_to_alpha_char_mapping);
  gt_error_check(err);
  token = gt_tokenizer_get_token(tz);
  gt_assert(token);
  if (gt_str_length(token) != 1) {
    gt_error_set(err, "illegal character token '%s' on line %lu in file '%s'",
                 gt_str_get(token), gt_tokenizer_get_line_number(tz),
                 gt_tokenizer_get_filename(tz));
    had_err = -1;
  }
  amino_acid = gt_str_get(token)[0];
  /* check for character duplications */
  if (parsed_characters[(int) amino_acid]) {
    gt_error_set(err, "multiple character '%c' entry on line %lu in file '%s'",
                 amino_acid, gt_tokenizer_get_line_number(tz),
                 gt_tokenizer_get_filename(tz));
    had_err = -1;
  }
  parsed_characters[(int) amino_acid] = GT_UNDEF_CHAR;
  gt_str_delete(token);
  if (!had_err) {
    num_of_chars = gt_alphabet_num_of_chars(sm->alphabet);
    gt_tokenizer_next_token(tz);
    while ((token = gt_tokenizer_get_token(tz))) {
      unsigned int idx1, idx2;
      /* the tokenizer can return tokens which are empty except for a newline
         -> skip these */
      if (!strcmp(gt_str_get(token), "\n")) {
        gt_str_delete(token);
        gt_tokenizer_next_token(tz);
        if (gt_tokenizer_line_start(tz))
          break;
        continue;
      }
      /* token is not empty -> parse score */
      had_err = gt_parse_int_line(&score, gt_str_get(token),
                                  gt_tokenizer_get_line_number(tz),
                                  gt_tokenizer_get_filename(tz), err);
      if (had_err)
        break;
      idx1 = gt_alphabet_encode(sm->alphabet, amino_acid);
      idx2 = gt_alphabet_encode(sm->alphabet, *(char*)
                                gt_array_get(index_to_alpha_char_mapping, i));
      gt_score_matrix_set_score(sm,
                                idx1 == WILDCARD ? num_of_chars : idx1,
                                idx2 == WILDCARD ? num_of_chars : idx2,
                                score);
      i++;
      gt_str_delete(token);
      gt_tokenizer_next_token(tz);
      if (gt_tokenizer_line_start(tz))
        break;
    }
  }
  return had_err;
}