Exemplo n.º 1
0
void stat_calcUngapKarlinParameters(double *dbCharProb, double *queryCharProb, int scoringMatrix[16][16])
{
  int4 scoreProbSize, highest, lowest;
  double * scoreProb;
  int i, j;

  lowest = stat_scoreBlock.mismatch;
  highest = stat_scoreBlock.match;
  scoreProbSize = highest-lowest+1;
  scoreProb = (double *) calloc(scoreProbSize, sizeof(double)); 

  for (i=0; i<16; i++) {
	  for (j=0; j<16; j++) {
		  scoreProb[scoringMatrix[i][j] - lowest] += dbCharProb[i] * queryCharProb[j];
	  }
  }

  // calculate the Karlin parameters, refer to karlin.c
  BlastKarlinBlkCalc(scoreProb-lowest, lowest, highest);
  stat_ungapH = BlastKarlin_H;
  stat_ungapK = BlastKarlin_K;
  stat_ungapLambda = BlastKarlin_lambda;
  
  if (stat_ungapK  <= 0.001)
  {
    fprintf(stderr,"ERROR: unable to calculate the KARLIN statistic for the given scoring scheme.\n");
    exit(-1);
  }  

  free(scoreProb);
}
Exemplo n.º 2
0
void statistics_calculateUngappedKarlinParameters(struct PSSMatrix PSSMatrix)
{
	int4 highest, lowest;
	double* scoreProbabilities;
	int4 queryPosition;
	unsigned char subjectCode;
	double probability, sum = 0;
	int4 score;
    int4 numRegularLettersInQuery = 0;
	static float* alphabetFrequencies;

    if (encoding_alphabetType == encoding_nucleotide)
    {
        alphabetFrequencies = Nucleotide_prob;
    }
    else
    {
    	// Use Robinson&Robinson frequencies for protein alphabet
    	alphabetFrequencies = Robinson_prob;
    }

    highest = PSSMatrix.highestValue;
	lowest = PSSMatrix.lowestValue;

	// Initialize array to hold probability values for range of possible scores
	scoreProbabilities = (double*)global_malloc(sizeof(double) * (highest - lowest + 1));
	scoreProbabilities -= lowest;
	score = lowest;
	while (score <= highest)
	{
		scoreProbabilities[score] = 0.0;
		score++;
	}

    // Determine the number of regular letters (ie. non-ambigious codes) in the query
	queryPosition = 0;
	while (queryPosition < statistics_querySize)
	{
//    	if (PSSMatrix.queryCodes[queryPosition] < encoding_numRegularLetters)
    	if (PSSMatrix.queryCodes[queryPosition] != encoding_numRegularLetters)
        {
			numRegularLettersInQuery++;
        }
        queryPosition++;
	}

	// For each position in the query PSSMatrix that does not represent an ambigious code
	queryPosition = 0;
	while (queryPosition < statistics_querySize)
	{
//    	if (PSSMatrix.queryCodes[queryPosition] < encoding_numRegularLetters)
    	if (PSSMatrix.queryCodes[queryPosition] != encoding_numRegularLetters)
        {
            // For each regular amino-acid subject code
            subjectCode = 0;
            while (subjectCode < encoding_numRegularLetters)
            {
                // Calculate probability that, if a residue was randomly chosen from the subject database
                // and a position was randomly chosen from query, they would be subjectCode and queryPosition
                // respectively
                probability = alphabetFrequencies[subjectCode] / (float)numRegularLettersInQuery;

                // Calculate score of aligning query position to this subject residue
                score = PSSMatrix.matrix[queryPosition][subjectCode];

                // Add to set of probabilities
                scoreProbabilities[score] += probability;

                subjectCode++;
            }
		}
        queryPosition++;
	}

	// Calculate residue frequency normalizing value
	subjectCode = 0;
	while (subjectCode < encoding_numRegularLetters)
	{
		sum += alphabetFrequencies[subjectCode];
		subjectCode++;
	}

	// Normalized probabilities by dividing them by the sum of the robinson frequency values
	score = lowest;
	while (score <= highest)
	{
		scoreProbabilities[score] /= sum;
		score++;
	}

	// Calculate the Lambda, H and K values for these score probabilities
	// See karlin.c for implementation
	BlastKarlinBlkCalc(scoreProbabilities, lowest, highest);

	statistics_ungappedH = BlastKarlin_H;
	statistics_ungappedK = BlastKarlin_K;
	statistics_ungappedLambda = BlastKarlin_lambda;

    if (statistics_ungappedK <= 0.0001)
    {
    	fprintf(stderr, "Error: unable to calculate statistical significance for given scoring scheme\n");
        fflush(stderr);
        exit(-1);
    }

	scoreProbabilities += lowest;
    free(scoreProbabilities);
}