void nj(double *D, int *N, int *edge1, int *edge2, double *edge_length) { double *S, Sdist, Ndist, *new_dist, A, B, smallest_S, x, y; int n, i, j, k, ij, smallest, OTU1, OTU2, cur_nod, o_l, *otu_label; S = &Sdist; new_dist = &Ndist; otu_label = &o_l; n = *N; cur_nod = 2*n - 2; S = (double*)R_alloc(n + 1, sizeof(double)); new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double)); otu_label = (int*)R_alloc(n + 1, sizeof(int)); for (i = 1; i <= n; i++) otu_label[i] = i; /* otu_label[0] is not used */ k = 0; while (n > 3) { for (i = 1; i <= n; i++) S[i] = sum_dist_to_i(n, D, i); /* S[0] is not used */ ij = 0; smallest_S = 1e50; B = n - 2; for (i = 1; i < n; i++) { for (j = i + 1; j <= n; j++) { A = B*D[ij] - S[i] - S[j]; if (A < smallest_S) { OTU1 = i; OTU2 = j; smallest_S = A; smallest = ij; } ij++; } } edge2[k] = otu_label[OTU1]; edge2[k + 1] = otu_label[OTU2]; edge1[k] = edge1[k + 1] = cur_nod; /* get the distances between all OTUs but the 2 selected ones and the latter: a) get the sum for both b) compute the distances for the new OTU */ A = D[smallest]; ij = 0; for (i = 1; i <= n; i++) { if (i == OTU1 || i == OTU2) continue; x = D[give_index(i, OTU1, n)]; /* dist between OTU1 and i */ y = D[give_index(i, OTU2, n)]; /* dist between OTU2 and i */ new_dist[ij] = (x + y - A)/2; ij++; } /* compute the branch lengths */ B = (S[OTU1] - S[OTU2])/B; /* don't need B anymore */ edge_length[k] = (A + B)/2; edge_length[k + 1] = (A - B)/2; /* update before the next loop (we are sure that OTU1 < OTU2) */ if (OTU1 != 1) for (i = OTU1; i > 1; i--) otu_label[i] = otu_label[i - 1]; if (OTU2 != n) for (i = OTU2; i < n; i++) otu_label[i] = otu_label[i + 1]; otu_label[1] = cur_nod; for (i = 1; i < n; i++) { if (i == OTU1 || i == OTU2) continue; for (j = i + 1; j <= n; j++) { if (j == OTU1 || j == OTU2) continue; new_dist[ij] = D[DINDEX(i, j)]; ij++; } } n--; for (i = 0; i < n*(n - 1)/2; i++) D[i] = new_dist[i]; cur_nod--; k = k + 2; } for (i = 0; i < 3; i++) { edge1[*N*2 - 4 - i] = cur_nod; edge2[*N*2 - 4 - i] = otu_label[i + 1]; } edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2; edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2; edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2; }
void ape_nj(double *D, int *N, int *edge1, int *edge2, double *edge_length) { double *S, Sdist, Ndist, *new_dist, A, B, smallest_S, *DI, d_i, x, y; int n, i, j, k, ij, smallest, OTU1, OTU2, cur_nod, o_l, *otu_label; OTU1 = 0; OTU2 = 0; smallest = 0; S = &Sdist; new_dist = &Ndist; otu_label = &o_l; DI = &d_i; n = *N; cur_nod = 2*n - 2; /* S = (double*)R_alloc(n, sizeof(double)); new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double)); otu_label = (int*)R_alloc(n, sizeof(int)); DI = (double*)R_alloc(n - 2, sizeof(double)); */ S = (double*) malloc(n * sizeof(double)); new_dist = (double*) malloc(n*(n - 1)/2 * sizeof(double)); otu_label = (int*) malloc(n * sizeof(int)); DI = (double*) malloc((n - 2) * sizeof(double)); if(S == NULL || new_dist == NULL || otu_label == NULL || DI == NULL){ printf("Memory allocation fails!\n"); exit(1); } for (i = 0; i < n; i++) otu_label[i] = i + 1; k = 0; while (n > 3) { for (i = 0; i < n; i++) S[i] = sum_dist_to_i(n, D, i + 1); ij = 0; smallest_S = 1e50; B = n - 2; for (i = 0; i < n - 1; i++) { for (j = i + 1; j < n; j++) { A = D[ij] - (S[i] + S[j])/B; if (A < smallest_S) { OTU1 = i + 1; OTU2 = j + 1; smallest_S = A; smallest = ij; } ij++; } } edge2[k] = otu_label[OTU1 - 1]; edge2[k + 1] = otu_label[OTU2 - 1]; edge1[k] = edge1[k + 1] = cur_nod; /* get the distances between all OTUs but the 2 selected ones and the latter: a) get the sum for both b) compute the distances for the new OTU */ A = B = ij = 0; for (i = 1; i <= n; i++) { if (i == OTU1 || i == OTU2) continue; x = D[give_index(i, OTU1, n)]; /* dist between OTU1 and i */ y = D[give_index(i, OTU2, n)]; /* dist between OTU2 and i */ new_dist[ij] = (x + y)/2; A += x; B += y; ij++; } /* compute the branch lengths */ A /= n - 2; B /= n - 2; edge_length[k] = (D[smallest] + A - B)/2; edge_length[k + 1] = (D[smallest] + B - A)/2; DI[cur_nod - *N - 1] = D[smallest]; /* update before the next loop */ if (OTU1 > OTU2) { /* make sure that OTU1 < OTU2 */ i = OTU1; OTU1 = OTU2; OTU2 = i; } if (OTU1 != 1) for (i = OTU1 - 1; i > 0; i--) otu_label[i] = otu_label[i - 1]; if (OTU2 != n) for (i = OTU2; i < n; i++) otu_label[i - 1] = otu_label[i]; otu_label[0] = cur_nod; for (i = 1; i < n; i++) { if (i == OTU1 || i == OTU2) continue; for (j = i + 1; j <= n; j++) { if (j == OTU1 || j == OTU2) continue; new_dist[ij] = D[DINDEX(i, j)]; ij++; } } n--; for (i = 0; i < n*(n - 1)/2; i++) D[i] = new_dist[i]; cur_nod--; k = k + 2; } for (i = 0; i < 3; i++) { edge1[*N*2 - 4 - i] = cur_nod; edge2[*N*2 - 4 - i] = otu_label[i]; } edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2; edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2; edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2; for (i = 0; i < *N*2 - 3; i++) { if (edge2[i] <= *N) continue; /* In case there are zero branch lengths: */ if (DI[edge2[i] - *N - 1] == 0) continue; edge_length[i] -= DI[edge2[i] - *N - 1]/2; } free(S); free(new_dist); free(otu_label); free(DI); } /* End of ape_nj(). */