static GthFlt get_score(GtScoreMatrix *score_matrix,
                        GtAlphabet *score_matrix_alphabet,
                        unsigned char amino,
                        unsigned char origreferencechar)
{
  GthFlt rval = 0.0,
         scalefactor   = SCALEFACTOR,
         indel_penalty = INDEL_PENALTY;

  if (amino  == DASH || origreferencechar == DASH) {
    /* 1.) scaled INDEL_PENALTY for deletions from and insertions into genomic
       DNA of lengths 1, 2, or 3, irrespective of indel size */
    rval = scalefactor * indel_penalty;
  }
  else if (amino != WILDCARD && amino <= CHAR_MAX &&
           gt_alphabet_valid_input(score_matrix_alphabet, amino) &&
           origreferencechar <= CHAR_MAX &&
           gt_alphabet_valid_input(score_matrix_alphabet,
                                   origreferencechar)) {
    /* XXX: shorten this */
    if (amino == GT_STOP_AMINO) {
      /* 2.) (-)2*INDEL_PENALTY for matching/mismatching a stop codon */
      if (origreferencechar == GT_STOP_AMINO)
        rval = scalefactor * -2 * indel_penalty;
      else
        rval = scalefactor *  2 * indel_penalty;
    }
    else {
      /* 3.) amino acid substitution score */
      if (origreferencechar == GT_STOP_AMINO)
        rval = scalefactor *  2 * indel_penalty;
      else {
        GtUchar code1, code2;
        int wcidx;
        code1 = gt_alphabet_encode(score_matrix_alphabet, amino);
        code2 = gt_alphabet_encode(score_matrix_alphabet, origreferencechar);
        wcidx = gt_alphabet_size(score_matrix_alphabet) - 1;
        rval = scalefactor *
               gt_score_matrix_get_score(score_matrix,
                                         code1 == WILDCARD ? wcidx : code1,
                                         code2 == WILDCARD ? wcidx : code2);
      }
    }
  }
  /* 4.) else: neutral score in case of wild-card characters in the genomic DNA
   */

  return rval;
}
Exemple #2
0
void gt_score_matrix_show(const GtScoreMatrix *sm, FILE *fp)
{
  unsigned i, j;
  gt_assert(sm && fp);
  /* show alphabet line */
  gt_xfputc(' ', fp);
  for (i = 0; i < gt_alphabet_size(sm->alphabet); i++)
    fprintf(fp, "  %c", gt_alphabet_decode(sm->alphabet, i));
  gt_xfputc('\n', fp);
  /* show score lines */
  for (i = 0; i < gt_alphabet_size(sm->alphabet); i++) {
    gt_xfputc(gt_alphabet_decode(sm->alphabet, i), fp);
    for (j = 0; j < gt_alphabet_size(sm->alphabet); j++)
      fprintf(fp, " %2d", gt_score_matrix_get_score(sm, i, j));
    gt_xfputc('\n', fp);
  }
}
Exemple #3
0
GtWord gt_scorehandler_get_replacement(const GtScoreHandler *scorehandler,
                                       GtUchar a, GtUchar b)
{
  gt_assert(scorehandler != NULL);
  if (scorehandler->scorematrix == NULL)
  {
    if (scorehandler->mappedsequence)
    {
      return ISSPECIAL(a) || ISSPECIAL(b) || a != b
               ? scorehandler->mismatchscore
               : scorehandler->matchscore;
    }
    if (scorehandler->downcase)
    {
      a = tolower((int) a);
      b = tolower((int) b);
    }
    return a != b ? scorehandler->mismatchscore
                  : scorehandler->matchscore;
  }
  gt_assert(scorehandler->mappedsequence);
  return gt_score_matrix_get_score(scorehandler->scorematrix,a,b);
}
Exemple #4
0
GtScoreHandler *gt_scorehandler2costhandler(const GtScoreHandler *scorehandler)
{
  GtScoreHandler *costhandler;

  gt_assert(scorehandler != NULL);
  if (scorehandler->scorematrix == NULL)
  {
    GtWord matchscore, mismatchscore, gap_extension, gap_opening,
           maxscore = MAX(MAX(GT_DIV2(scorehandler->matchscore+1),
                         GT_DIV2(scorehandler->mismatchscore+1)),
                     MAX(1 + scorehandler->gap_extension,0));

    matchscore = 2 * maxscore - scorehandler->matchscore;
    mismatchscore = 2 * maxscore - scorehandler->mismatchscore;
    gap_extension = maxscore - scorehandler->gap_extension;
    gap_opening = -scorehandler->gap_opening;
    costhandler = gt_scorehandler_new(matchscore,
                                      mismatchscore,
                                      gap_opening,
                                      gap_extension);
    if (!scorehandler->mappedsequence)
    {
      gt_scorehandler_plain(costhandler);
    }
  } else
  {
    int maxscore;
    GtWord gap_extension, gap_opening;
    unsigned int i, j,
                 dim = gt_score_matrix_get_dimension(scorehandler->scorematrix);
    GtScoreMatrix *costmatrix
      = gt_score_matrix_clone_empty(scorehandler->scorematrix);

    for (maxscore = 0, i = 0; i < dim; i++)
    {
      for (j = 0; j < dim; j++)
      {
        int val = gt_score_matrix_get_score(scorehandler->scorematrix, i, j);

        if (val > maxscore)
        {
          maxscore = val;
        }
      }
    }
    maxscore = MAX(GT_DIV2(maxscore+1), 1 + scorehandler->gap_extension);
    for (i = 0; i < dim; i++)
    {
      for (j = 0; j < dim; j++)
      {
        /* translate */
        int score = gt_score_matrix_get_score(scorehandler->scorematrix,i,j);
        gt_score_matrix_set_score(costmatrix, i, j, 2 * maxscore - score);
      }
    }
    gap_extension = maxscore - scorehandler->gap_extension;
    gap_opening = -scorehandler->gap_opening;
    costhandler = gt_scorehandler_new( 0,0, gap_opening, gap_extension);
    gt_scorehandler_add_scorematrix(costhandler,costmatrix);
  }
  return costhandler;
}
Exemple #5
0
static GtWord gt_alignment_eval_generic_with_affine_score(
                                               bool mapped,
                                               bool downcase,
                                               const GtUchar *characters,
                                               const GtAlignment *alignment,
                                               const GtScoreMatrix *scorematrix,
                                               GtWord matchscore,
                                               GtWord mismatchscore,
                                               GtWord gap_opening,
                                               GtWord gap_extension)
{
  GtUword i, j, idx_u = 0, idx_v = 0, meoplen;
  GtWord sumscore = 0;
  GtMultieop meop;
  AlignmentEoptype next_meop_type = Insertion + 1;

  gt_assert(alignment != NULL && (!mapped || !downcase));
  if (gt_alignment_get_length(alignment) == 0)
    return 0;
#ifndef NDEBUG
  gt_assert(gt_alignment_is_valid(alignment));
#endif

  meoplen = gt_multieoplist_get_num_entries(alignment->eops);
  for (i = meoplen; i > 0; i--) {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop.type) {
      case Mismatch:
      case Match:
      case Replacement:
        for (j = 0; j < meop.steps; j++) {
          GtUchar a = alignment->u[idx_u],
                  b = alignment->v[idx_v];
          if (mapped)
          {
            if (scorematrix != NULL)
            {
              sumscore += gt_score_matrix_get_score(scorematrix, a, b);
            } else
            {
              if (ISSPECIAL(a) || ISSPECIAL(b) ||
                  characters[a] != characters[b])
              {
                sumscore += mismatchscore;
              }
              else
                sumscore += matchscore;
            }
          } else
          {
            if (downcase)
            {
              a = tolower((int) a);
              b = tolower((int) b);
            }
            sumscore += (a != b) ? mismatchscore : matchscore;
          }
          idx_u++;
          idx_v++;
        }
        break;
      case Deletion:
        if (i < meoplen && next_meop_type == Deletion)
        {
          sumscore += gap_extension * meop.steps;
        } else
        {
          sumscore += gap_extension * meop.steps + gap_opening;
        }
        idx_u += meop.steps;
        break;
      case Insertion:
         if (i < meoplen && next_meop_type == Insertion)
        {
          sumscore += gap_extension * meop.steps;
        } else
        {
          sumscore += gap_extension * meop.steps + gap_opening;
        }
        idx_v += meop.steps;
        break;
    }
    next_meop_type = meop.type;
  }
  return sumscore;
}