/* * Calculate adjustment for missing data in pairwise comparisons. This will * return a square matrix that is used to multiply the raw differences of a * distance matrix in order to scale the differences by the number of observed * loci. * * Parameters: * nas a list where each element represents a sample containing an integer * vector representing positions of missing data for that individual * nloc an integer specifying the number of loci observed in the entire set * * Return: * a square matrix */ SEXP adjust_missing(SEXP nas, SEXP nloc) { int i; int j; int NLOC = asInteger(nloc); SEXP nai; SEXP naj; double u; int n = length(nas); SEXP out = PROTECT(allocMatrix(REALSXP, n, n)); for (i = 0; i < n - 1; i++) { // set diag to one REAL(out)[i + i*n] = 1.0; // GET NA list for i nai = VECTOR_ELT(nas, i); for (j = i + 1; j < n; j++) { // Get NA list for j naj = VECTOR_ELT(nas, j); // Scale by N/(N - M) u = (double)NLOC/(double)(NLOC - count_unique(nai, naj)); REAL(out)[i + j*n] = u; REAL(out)[i*n + j] = u; } } // fencepost for identity REAL(out)[(n*n) - 1] = 1.0; UNPROTECT(1); return(out); }
void print_stat(int* data, int start, int end_exclusive) {//UNTESTED int unique_values= count_unique(data, start, end_exclusive); printf("[%d,%d,%d]\n",start,end_exclusive-1, unique_values); }
inline std::size_t count_unique(Iter first, Iter last) { using value_type = typename std::iterator_traits<Iter>::value_type; return count_unique(first, last, std::equal_to<value_type>()); } // count_unique