int findPositiveMedian(double *data, int count, double minVal) /* Find median of positive numbers in data. */ { double *sorted; int i, realCount = 0; double median = -1; AllocArray(sorted, count); for (i=0; i<count; ++i) { if (data[i] >= minVal) { sorted[realCount] = data[i]; ++realCount; } } if (realCount > 2) { median = doubleMedian(realCount, sorted); } freez(&sorted); return median; }
static void processResult(struct hash *chrHash, struct hash *coordHash, char *accName, unsigned querySize, int partsConsidered) { struct hashCookie cookie; struct hashEl *hashEl; int highCount = 0; int secondHighest = 0; char *ctgName = (char *)NULL; char *secondHighestName = (char *)NULL; struct hashEl *coords; int coordCount; int i; unsigned lowMark = BIGNUM; unsigned highMark = 0; unsigned range = 0; struct coordEl *coord; struct coordEl **coordListPt = (struct coordEl **) NULL; double *midPoints; double sum; double sumData = 0.0; double sumSquares = 0.0; double variance; double stddev; unsigned median; unsigned mean; int strandSum; /* find highest count chrom name */ cookie=hashFirst(chrHash); while ((hashEl = hashNext(&cookie)) != NULL) { int count = ptToInt(hashEl->val); verbose(2,"# %s %d\n", hashEl->name, count); if (count >= highCount) { secondHighest = highCount; if (secondHighestName) freeMem(secondHighestName); highCount = count; if (ctgName) { secondHighestName = cloneString(ctgName); freeMem(ctgName); } ctgName = cloneString(hashEl->name); } } verbose(2,"# %s %d highest count, next: %s %d\n", ctgName, highCount, secondHighestName, secondHighest); if (highCount == 0) return; if (highCount == secondHighest) { int baseCount0 = 0; int baseCount1 = 0; /* Try to break the tie by examining the number of bases covered in * each and take the one with the most */ baseCount0 = countBases(coordHash, ctgName); baseCount1 = countBases(coordHash, secondHighestName); if (baseCount0 == baseCount1) { verbose(1,"# ERROR TIE for high count %s %d highest count, next: %s %d TIE *\n", ctgName, highCount, secondHighestName, secondHighest); verbose(1,"# ERROR TIE base count0: %d, base count1: %d\n", baseCount0, baseCount1); } else if (baseCount1 > baseCount0) /* switch the names */ { char *t; t = cloneString(ctgName); freeMem(ctgName); ctgName = cloneString(secondHighestName); freeMem(secondHighestName); secondHighestName = cloneString(t); freeMem(t); } } /* for that highest count chrom, examine its coordinates, find high * and low */ coords = hashLookup(coordHash, ctgName); if (coords) coordListPt = coords->val; else coordListPt = NULL; if (coordListPt) coord = *coordListPt; else coord = NULL; coordCount = 0; sum = 0.0; sumData = 0.0; sumSquares = 0.0; strandSum = 0; while (coord != NULL) { double midPoint; if (coord->start < lowMark) lowMark = coord->start; if (coord->end > highMark) highMark = coord->end; midPoint = (double) coord->start + (double)(coord->end - coord->start) / 2.0; sum += midPoint; strandSum += coord->strand; sumData += midPoint; sumSquares += midPoint * midPoint; verbose(2,"# %d %s %u - %u %u %c\n", ++coordCount, coord->name, coord->start, coord->end, coord->qSize, (coord->strand == 1) ? '+' : '-'); coord = coord->next; } range = highMark - lowMark; variance = 0.0; stddev = 0.0; if (coordCount > 0) { unsigned usStdDev; unsigned startExtended; unsigned endExtended; int partsUsed = 0; mean = (unsigned) (sum / coordCount); if (coordCount > 1) { variance = (sumSquares - ((sumData*sumData)/coordCount)) / (coordCount - 1); if (variance > 0.0) stddev = sqrt(variance); } usStdDev = (unsigned) stddev; verbose(2,"# range: %u:%u = %u, Mean: %u, stddev: %u\n", lowMark, highMark, range, mean, usStdDev); midPoints = (double *) needMem(coordCount * sizeof(double)); coordListPt = coords->val; coord = *coordListPt; i = 0; while (coord != NULL) { midPoints[i++] = (double) coord->start + (double)(coord->end - coord->start) / 2.0; coord = coord->next; } median = (unsigned) doubleMedian(coordCount, midPoints); partsUsed = extendLimits(coordListPt, median, querySize, &startExtended, &endExtended, ctgName, partsConsidered); verbose(2, "# qSize: %u, Median: %u implies %u-%u %s\n#\textended to %u-%u\n", querySize, median, median - (querySize/2), median+(querySize/2), accName, startExtended, endExtended); verbose(2,"# %s total parts %d, parts used %d, percent used %% %7.2f\n", accName, partsConsidered, partsUsed, 100.0 * (double) partsUsed / (double) partsConsidered); /* if BED output, output the line here, AGP was done in extendLimits */ if (!agp) { printf("%s\t%u\t%u\t%s\t%c\n", ctgName, startExtended, endExtended, accName, strandSum > (coordCount/2) ? '+' : '-'); } freeMem(midPoints); } else { verbose(1,"# ERROR %s - no coordinates found ? %s\n", accName, ctgName); } /* free the chrom coordinates lists */ cookie=hashFirst(chrHash); while ((hashEl = hashNext(&cookie)) != NULL) { coords = hashLookup(coordHash, hashEl->name); if (coords) coordListPt = coords->val; else coordListPt = NULL; if (coordListPt) coord = *coordListPt; else coord = NULL; while (coord != NULL) { freeMem(coord->name); coord = coord->next; } if (coordListPt) slFreeList(coordListPt); } freeMem(ctgName); } /* static void processResult() */