int ZLStatistics::correlation(const ZLStatistics& candidate, const ZLStatistics& pattern) { if (&candidate == &pattern) { return 1000000; } const unsigned long long candidateSum = candidate.getVolume(); const unsigned long long patternSum = pattern.getVolume(); const unsigned long long candidateSum2 = candidate.getSquaresVolume(); const unsigned long long patternSum2 = pattern.getSquaresVolume(); fb::shared_ptr<ZLStatisticsItem> ptrA = candidate.begin(); fb::shared_ptr<ZLStatisticsItem> ptrB = pattern.begin(); const fb::shared_ptr<ZLStatisticsItem> endA = candidate.end(); const fb::shared_ptr<ZLStatisticsItem> endB = pattern.end(); size_t count = 0; long long correlationSum = 0; while ((*ptrA != *endA) && (*ptrB != *endB)) { ++count; const int comparison = ptrA->sequence().compareTo(ptrB->sequence()); if (comparison < 0) { ptrA->next(); } else if (comparison > 0) { ptrB->next(); } else { correlationSum += ptrA->frequency() * ptrB->frequency(); ptrA->next(); ptrB->next(); } } while (*ptrA != *endA) { ++count; ptrA->next(); } while (*ptrB != *endB) { ++count; ptrB->next(); } const long long patternDispersion = patternSum2 * count - patternSum * patternSum; const long long candidateDispersion = candidateSum2 * count - candidateSum * candidateSum; const long long numerator = correlationSum * count - candidateSum * patternSum ; if ((patternDispersion == 0) || (candidateDispersion == 0)) { return 0; } const long long quotient1 = (1000 * numerator / patternDispersion); const long long quotient2 = (1000 * numerator / candidateDispersion); const int sign = (numerator >= 0) ? 1 : -1; return sign * quotient1 * quotient2; }
int ZLStatistics::correlation(const ZLStatistics& candidate, const ZLStatistics& pattern) { if (&candidate == &pattern) { return 1000000; } const unsigned long long candidateSum = candidate.getVolume(); const unsigned long long patternSum = pattern.getVolume(); const unsigned long long candidateSum2 = candidate.getSquaresVolume(); const unsigned long long patternSum2 = pattern.getSquaresVolume(); shared_ptr<ZLStatisticsItem> ptrA = candidate.begin(); shared_ptr<ZLStatisticsItem> ptrB = pattern.begin(); const shared_ptr<ZLStatisticsItem> endA = candidate.end(); const shared_ptr<ZLStatisticsItem> endB = pattern.end(); size_t count = 0; long long correlationSum = 0; while ((*ptrA != *endA) && (*ptrB != *endB)) { ++count; const int comparison = ptrA->sequence().compareTo(ptrB->sequence()); if (comparison < 0) { ptrA->next(); } else if (comparison > 0) { ptrB->next(); } else { correlationSum += ptrA->frequency() * ptrB->frequency(); ptrA->next(); ptrB->next(); } } while (*ptrA != *endA) { ++count; ptrA->next(); } while (*ptrB != *endB) { ++count; ptrB->next(); } const long long patternDispersion = patternSum2 * count - patternSum * patternSum; const long long candidateDispersion = candidateSum2 * count - candidateSum * candidateSum; const long long numerator = correlationSum * count - candidateSum * patternSum ; if ((patternDispersion == 0) || (candidateDispersion == 0)) { return 0; } int orderDiff = ::log10(patternDispersion) - ::log10(candidateDispersion); int patternMult = 1000; if (orderDiff >= 5) { patternMult = ::power10(6); } else if (orderDiff >= 3) { patternMult = ::power10(5); } else if (orderDiff >= 1) { patternMult = ::power10(4); } else if (orderDiff <= -1) { patternMult = ::power10(2); } else if (orderDiff <= -3) { patternMult = ::power10(1); } else if (orderDiff <= -5) { patternMult = ::power10(0); } int candidateMult = 1000000 / patternMult; const long long quotient1 = (patternMult * numerator / patternDispersion); const long long quotient2 = (candidateMult * numerator / candidateDispersion); const int sign = (numerator >= 0) ? 1 : -1; return sign * quotient1 * quotient2; }