double ConfusionMatrix::jaccard(int n) const { CHECK((_matrix.size() > (size_t)n) && (_matrix[n].size() > (size_t)n)); const double intersectionSize = (double)_matrix[n][n]; const double unionSize = (double)(rowSum(n) + colSum(n) - _matrix[n][n]); return (intersectionSize == unionSize) ? 1.0 : intersectionSize / unionSize; }
void ConfusionMatrix::printPrecisionRecall(const char *header) const { if (header == NULL) { LOG(INFO) << "--- class-specific recall/precision ---"; } else { LOG(INFO) << header; } // recall LOG(INFO) << ROW_BEGIN; for (size_t i = 0; i < _matrix.size(); i++) { if (i > 0) { LOG(INFO) << COL_SEP; } double r = (_matrix[i].size() > i) ? (double)_matrix[i][i] / (double)rowSum(i) : 0.0; LOG(INFO) << r; } LOG(INFO) << ROW_END; // precision LOG(INFO) << ROW_BEGIN; for (size_t i = 0; i < _matrix.size(); i++) { if (i > 0) { LOG(INFO) << COL_SEP; } double p = (_matrix[i].size() > i) ? (double)_matrix[i][i] / (double)colSum(i) : 1.0; LOG(INFO) << p; } LOG(INFO) << ROW_END; }
int main() { generateMatrix(); std::vector<int> colSum(MatrixSize); for(int i = 0; (i < 100); i++) { columnSum2(colSum); } std::cout << "colSum[10] = " << colSum[10] << std::endl; return 0; }
double ConfusionMatrix::avgPrecision() const { double totalPrecision = 0.0; for (size_t i = 0; i < _matrix.size(); i++) { totalPrecision += (_matrix[i].size() > i) ? (double)_matrix[i][i] / (double)colSum(i) : 1.0; } return totalPrecision /= (double)_matrix.size(); }
double ConfusionMatrix::avgJaccard() const { double totalJaccard = 0.0; for (size_t i = 0; i < _matrix.size(); i++) { if (_matrix[i].size() <= i) continue; const double intersectionSize = (double)_matrix[i][i]; const double unionSize = (double)(rowSum(i) + colSum(i) - _matrix[i][i]); if (intersectionSize == unionSize) { // avoid divide by zero totalJaccard += 1.0; } else { totalJaccard += intersectionSize / unionSize; } } return totalJaccard / (double)_matrix.size(); }
void ConfusionMatrix::printJaccard(const char *header) const { if (header == NULL) { LOG(INFO) << "--- class-specific Jaccard coefficient ---"; } else { LOG(INFO) << header; } LOG(INFO) << ROW_BEGIN; for (size_t i = 0; i < _matrix.size(); i++) { if (i > 0) { LOG(INFO) << COL_SEP; } double p = (_matrix[i].size() > i) ? (double)_matrix[i][i] / (double)(rowSum(i) + colSum(i) - _matrix[i][i]) : 0.0; LOG(INFO) << p; } LOG(INFO) << ROW_END; }
void ConfusionMatrix::printF1Score(const char *header) const { if (header == NULL) { LOG(INFO) << "--- class-specific F1 score ---"; } else { LOG(INFO) << header; } LOG(INFO) << ROW_BEGIN; for (size_t i = 0; i < _matrix.size(); i++) { if (i > 0) { LOG(INFO) << COL_SEP; } // recall double r = (_matrix[i].size() > i) ? (double)_matrix[i][i] / (double)rowSum(i) : 0.0; // precision double p = (_matrix[i].size() > i) ? (double)_matrix[i][i] / (double)colSum(i) : 1.0; LOG(INFO) << ((2.0 * p * r) / (p + r)); } LOG(INFO) << ROW_END; }
void ConfusionMatrix::printColNormalized(const char *header) const { std::vector<double> totals; for (size_t i = 0; i < _matrix[0].size(); i++) { totals.push_back(colSum(i)); } if (header == NULL) { LOG(INFO) << "--- confusion matrix: (actual, predicted) ---"; } else { LOG(INFO) << header; } for (size_t i = 0; i < _matrix.size(); i++) { LOG(INFO) << ROW_BEGIN; for (size_t j = 0; j < _matrix[i].size(); j++) { if (j > 0) { LOG(INFO) << COL_SEP; } LOG(INFO) << ((double)_matrix[i][j] / totals[j]); } LOG(INFO) << ROW_END; } }
void Confusion_getEntropies (Confusion me, double *p_h, double *p_hx, double *p_hy, double *p_hygx, double *p_hxgy, double *p_uygx, double *p_uxgy, double *p_uxy) { double h = 0.0, hx = 0.0, hy = 0.0, hxgy = 0.0, hygx = 0.0, uygx = 0.0, uxgy = 0.0, uxy = 0.0; autoNUMvector<double> rowSum (1, my numberOfRows); autoNUMvector<double> colSum (1, my numberOfColumns); double sum = 0.0; for (long i = 1; i <= my numberOfRows; i++) { for (long j = 1; j <= my numberOfColumns; j++) { rowSum[i] += my data[i][j]; colSum[j] += my data[i][j]; sum += my data[i][j]; } } for (long i = 1; i <= my numberOfRows; i++) { if (rowSum[i] > 0.0) { hy -= rowSum[i] / sum * NUMlog2 (rowSum[i] / sum); } } for (long j = 1; j <= my numberOfColumns; j++) { if (colSum[j] > 0.0) { hx -= colSum[j] / sum * NUMlog2 (colSum[j] / sum); } } for (long i = 1; i <= my numberOfRows; i++) { for (long j = 1; j <= my numberOfColumns; j++) { if (my data[i][j] > 0.0) { h -= my data[i][j] / sum * NUMlog2 (my data[i][j] / sum); } } } hygx = h - hx; hxgy = h - hy; uygx = (hy - hygx) / (hy + TINY); uxgy = (hx - hxgy) / (hx + TINY); uxy = 2.0 * (hx + hy - h) / (hx + hy + TINY); if (p_h) { *p_h = h; } if (p_hx) { *p_hx = hx; } if (p_hy) { *p_hy = hy; } if (p_hygx) { *p_hygx = hygx; } if (p_hxgy) { *p_hxgy = hxgy; } if (p_uygx) { *p_uygx = uygx; } if (p_uxgy) { *p_uxgy = uxgy; } if (p_uxy) { *p_uxy = uxy; } }
boolean isCassette(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, int *altBpStartV, int *altBpEndV, int *startV, int *endV) /* Return TRUE if SIMPLE cassette exon. Looking for pattern: he--->hs---->he---->hs \----------------/ Use edgesInArea() to investigate that encompasses the common hard end and common hard start. Should only be 4 edges in area defined by splicing. sesese 012345 0 1 + + 2 + 3 + 4 5 */ { unsigned char *vTypes = ag->vTypes; int i=0; int numAltVerts = 4; int *vPos = ag->vPositions; /* Quick check. */ if(vTypes[vs] != ggHardEnd || vTypes[ve1] != ggHardStart || vTypes[ve2] != ggHardStart) return FALSE; if(em[vs][ve1] && em[vs][ve2]) { /* Try to find a hard end that connects ve1 and ve2. */ for(i=0; i<ag->vertexCount; i++) { if(vTypes[i] == ggHardEnd && em[ve1][i] && em[i][ve2]) { /* Make sure that our cassette only connect to downstream. otherwise not so simple...*/ if(rowSum(em[i],ag->vTypes,ag->vertexCount) == 1 && rowSum(em[ve1],ag->vTypes,ag->vertexCount) == 1 && colSum(em, ag->vTypes, ag->vertexCount, ve1) == 1 && edgesInArea(ag,em,ve2-1,vs+1) == numAltVerts) { struct bed bedUp, bedDown, bedAlt; /* Initialize some beds for reporting. */ *startV = findClosestUpstreamVertex(ag, em, vs); *endV = findClosestDownstreamVertex(ag, em, ve2); bedUp.chrom = bedDown.chrom = bedAlt.chrom = ag->tName; bedUp.name = bedDown.name = bedAlt.name = ag->name; bedUp.score = bedDown.score = bedAlt.score = altCassette; safef(bedUp.strand, sizeof(bedUp.strand), "%s", ag->strand); safef(bedDown.strand, sizeof(bedDown.strand), "%s", ag->strand); safef(bedAlt.strand, sizeof(bedDown.strand), "%s", ag->strand); /* Alt spliced region. */ bedAlt.chromStart = vPos[ve1]; bedAlt.chromEnd = vPos[i]; /* Upstream/down stream */ if(sameString(ag->strand, "+")) { bedUp.chromStart = vPos[ve1] - flankingSize; bedUp.chromEnd = vPos[ve1]; bedDown.chromStart = vPos[i]; bedDown.chromEnd = vPos[i] + flankingSize; } else { bedDown.chromStart = vPos[ve1] - flankingSize; bedDown.chromEnd = vPos[ve1]; bedUp.chromStart = vPos[i]; bedUp.chromEnd = vPos[i] + flankingSize; } if(altRegion != NULL) { bedOutputN(&bedAlt, 6, altRegion, '\t','\n'); bedOutputN(&bedUp, 6, upStream100, '\t', '\n'); bedOutputN(&bedDown, 6, downStream100, '\t', '\n'); } *altBpStartV = ve1; *altBpEndV = i; return TRUE; } } } } return FALSE; }
void lookForAltSplicing(char *db, struct altGraphX *ag, struct altSpliceSite **aSpliceList, int *altSpliceSites, int *altSpliceLoci, int *totalSpliceSites) /* Walk throught the altGraphX graph and look for evidence of altSplicing. */ { struct altSpliceSite *notAlt = NULL, *notAltList = NULL; bool **em = altGraphXCreateEdgeMatrix(ag); int vCount = ag->vertexCount; unsigned char *vTypes = ag->vTypes; int altSpliceSitesOrig = *altSpliceSites; int i,j,k; int altCount = 0; occassionalDot(); totalLoci++; for(i=0; i<vCount; i++) { struct altSpliceSite *aSplice = NULL; for(j=0; j<vCount; j++) { if(em[i][j] && areConsSplice(em, vCount, vTypes,i,j) && (agxEvCount(ag, i,j) >= minConfidence)) { for(k=j+1; k<vCount; k++) { if(em[i][k] && areConsSplice(em, vCount, vTypes, i, k) && (agxEvCount(ag, i,k) >= minConfidence)) { totalSplices++; if(aSplice == NULL) { splicedLoci++; aSplice = initASplice(ag, em, i, j, k); (*altSpliceSites)++; } else addSpliceSite(ag, em, aSplice, k); } } } /* Only want non alt-spliced exons for our controls. Some of these checks are historical and probably redundant....*/ if(em[i][j] && rowSum(em[i], ag->vTypes, ag->vertexCount) == 1 && rowSum(em[j], ag->vTypes, ag->vertexCount) == 1 && colSum(em, ag->vTypes, ag->vertexCount, j) == 1 && colSum(em, ag->vTypes, ag->vertexCount, j) == 1 && altGraphXEdgeVertexType(ag, i, j) == ggExon && areHard(ag, i, j) && areConstitutive(ag, em, i, j)) { notAlt = initASplice(ag, em, i, j, j); if(altRegion != NULL) outputControlExonBeds(ag, i, j); slAddHead(¬AltList, notAlt); } } if(aSplice != NULL) { if(altLogFile) fprintf(altLogFile, "%s\n", ag->name); slAddHead(aSpliceList, aSplice); } /* If we have a simple splice classfy it and log it. */ if(aSplice != NULL && aSplice->altCount == 2) { altTotalCount++; fixOtherStrand(aSplice); logSpliceType(aSplice->spliceTypes[1], abs(aSplice->altBpEnds[1] - aSplice->altBpStarts[1])); if(aSplice->spliceTypes[1] == alt3Prime && (aSplice->altBpEnds[1] - aSplice->altBpStarts[1] == 3)) reportThreeBpBed(aSplice); if(doSScores) fillInSscores(db, aSplice, 1); if(RData != NULL) { outputForR(aSplice, 1, RData); } } /* Otherwise log it as altOther. Start at 1 as 0->1 is the first * splice, 1->2 is the first alt spliced.*/ else if(aSplice != NULL) { for(altCount=1; altCount<aSplice->altCount; altCount++) { altTotalCount++; altOtherCount++; } } } for(notAlt = notAltList; notAlt != NULL; notAlt = notAlt->next) { if(doSScores) fillInSscores(db, notAlt, 1); if(RData != NULL) { fixOtherStrand(notAlt); outputForR(notAlt, 1, RDataCont); } } if(*altSpliceSites != altSpliceSitesOrig) (*altSpliceLoci)++; altGraphXFreeEdgeMatrix(&em, vCount); }
double ConfusionMatrix::precision(int n) const { CHECK(_matrix.size() > (size_t)n); return (_matrix[n].size() > (size_t)n) ? (double)_matrix[n][n] / (double)colSum(n) : 1.0; }