void ape_nj(double *D, int *N, int *edge1, int *edge2, double *edge_length) { double *S, Sdist, Ndist, *new_dist, A, B, smallest_S, *DI, d_i, x, y; int n, i, j, k, ij, smallest, OTU1, OTU2, cur_nod, o_l, *otu_label; OTU1 = 0; OTU2 = 0; smallest = 0; S = &Sdist; new_dist = &Ndist; otu_label = &o_l; DI = &d_i; n = *N; cur_nod = 2*n - 2; /* S = (double*)R_alloc(n, sizeof(double)); new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double)); otu_label = (int*)R_alloc(n, sizeof(int)); DI = (double*)R_alloc(n - 2, sizeof(double)); */ S = (double*) malloc(n * sizeof(double)); new_dist = (double*) malloc(n*(n - 1)/2 * sizeof(double)); otu_label = (int*) malloc(n * sizeof(int)); DI = (double*) malloc((n - 2) * sizeof(double)); if(S == NULL || new_dist == NULL || otu_label == NULL || DI == NULL){ printf("Memory allocation fails!\n"); exit(1); } for (i = 0; i < n; i++) otu_label[i] = i + 1; k = 0; while (n > 3) { for (i = 0; i < n; i++) S[i] = sum_dist_to_i(n, D, i + 1); ij = 0; smallest_S = 1e50; B = n - 2; for (i = 0; i < n - 1; i++) { for (j = i + 1; j < n; j++) { A = D[ij] - (S[i] + S[j])/B; if (A < smallest_S) { OTU1 = i + 1; OTU2 = j + 1; smallest_S = A; smallest = ij; } ij++; } } edge2[k] = otu_label[OTU1 - 1]; edge2[k + 1] = otu_label[OTU2 - 1]; edge1[k] = edge1[k + 1] = cur_nod; /* get the distances between all OTUs but the 2 selected ones and the latter: a) get the sum for both b) compute the distances for the new OTU */ A = B = ij = 0; for (i = 1; i <= n; i++) { if (i == OTU1 || i == OTU2) continue; x = D[give_index(i, OTU1, n)]; /* dist between OTU1 and i */ y = D[give_index(i, OTU2, n)]; /* dist between OTU2 and i */ new_dist[ij] = (x + y)/2; A += x; B += y; ij++; } /* compute the branch lengths */ A /= n - 2; B /= n - 2; edge_length[k] = (D[smallest] + A - B)/2; edge_length[k + 1] = (D[smallest] + B - A)/2; DI[cur_nod - *N - 1] = D[smallest]; /* update before the next loop */ if (OTU1 > OTU2) { /* make sure that OTU1 < OTU2 */ i = OTU1; OTU1 = OTU2; OTU2 = i; } if (OTU1 != 1) for (i = OTU1 - 1; i > 0; i--) otu_label[i] = otu_label[i - 1]; if (OTU2 != n) for (i = OTU2; i < n; i++) otu_label[i - 1] = otu_label[i]; otu_label[0] = cur_nod; for (i = 1; i < n; i++) { if (i == OTU1 || i == OTU2) continue; for (j = i + 1; j <= n; j++) { if (j == OTU1 || j == OTU2) continue; new_dist[ij] = D[DINDEX(i, j)]; ij++; } } n--; for (i = 0; i < n*(n - 1)/2; i++) D[i] = new_dist[i]; cur_nod--; k = k + 2; } for (i = 0; i < 3; i++) { edge1[*N*2 - 4 - i] = cur_nod; edge2[*N*2 - 4 - i] = otu_label[i]; } edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2; edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2; edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2; for (i = 0; i < *N*2 - 3; i++) { if (edge2[i] <= *N) continue; /* In case there are zero branch lengths: */ if (DI[edge2[i] - *N - 1] == 0) continue; edge_length[i] -= DI[edge2[i] - *N - 1]/2; } free(S); free(new_dist); free(otu_label); free(DI); } /* End of ape_nj(). */
int give_index(int i, int j, int n) { if (i > j) return(DINDEX(j, i)); else return(DINDEX(i, j)); } /* End of give_index(). */
/* * Returns 0, 1 or 2 for number of solutions. 2 means `any number * more than one', or more accurately `we were unable to prove * there was only one'. * * Outputs in a `placements' array, indexed the same way as the one * within this function (see below); entries in there are <0 for a * placement ruled out, 0 for an uncertain placement, and 1 for a * definite one. */ static int solver(int w, int h, int n, int *grid, int *output) { int wh = w*h, dc = DCOUNT(n); int *placements, *heads; int i, j, x, y, ret; /* * This array has one entry for every possible domino * placement. Vertical placements are indexed by their top * half, at (y*w+x)*2; horizontal placements are indexed by * their left half at (y*w+x)*2+1. * * This array is used to link domino placements together into * linked lists, so that we can track all the possible * placements of each different domino. It's also used as a * quick means of looking up an individual placement to see * whether we still think it's possible. Actual values stored * in this array are -2 (placement not possible at all), -1 * (end of list), or the array index of the next item. * * Oh, and -3 for `not even valid', used for array indices * which don't even represent a plausible placement. */ placements = snewn(2*wh, int); for (i = 0; i < 2*wh; i++) placements[i] = -3; /* not even valid */ /* * This array has one entry for every domino, and it is an * index into `placements' denoting the head of the placement * list for that domino. */ heads = snewn(dc, int); for (i = 0; i < dc; i++) heads[i] = -1; /* * Set up the initial possibility lists by scanning the grid. */ for (y = 0; y < h-1; y++) for (x = 0; x < w; x++) { int di = DINDEX(grid[y*w+x], grid[(y+1)*w+x]); placements[(y*w+x)*2] = heads[di]; heads[di] = (y*w+x)*2; } for (y = 0; y < h; y++) for (x = 0; x < w-1; x++) { int di = DINDEX(grid[y*w+x], grid[y*w+(x+1)]); placements[(y*w+x)*2+1] = heads[di]; heads[di] = (y*w+x)*2+1; } #ifdef SOLVER_DIAGNOSTICS printf("before solver:\n"); for (i = 0; i <= n; i++) for (j = 0; j <= i; j++) { int k, m; m = 0; printf("%2d [%d %d]:", DINDEX(i, j), i, j); for (k = heads[DINDEX(i,j)]; k >= 0; k = placements[k]) printf(" %3d [%d,%d,%c]", k, k/2%w, k/2/w, k%2?'h':'v'); printf("\n"); } #endif while (1) { int done_something = FALSE; /* * For each domino, look at its possible placements, and * for each placement consider the placements (of any * domino) it overlaps. Any placement overlapped by all * placements of this domino can be ruled out. * * Each domino placement overlaps only six others, so we * need not do serious set theory to work this out. */ for (i = 0; i < dc; i++) { int permset[6], permlen = 0, p; if (heads[i] == -1) { /* no placement for this domino */ ret = 0; /* therefore puzzle is impossible */ goto done; } for (j = heads[i]; j >= 0; j = placements[j]) { assert(placements[j] != -2); if (j == heads[i]) { permlen = find_overlaps(w, h, j, permset); } else { int tempset[6], templen, m, n, k; templen = find_overlaps(w, h, j, tempset); /* * Pathetically primitive set intersection * algorithm, which I'm only getting away with * because I know my sets are bounded by a very * small size. */ for (m = n = 0; m < permlen; m++) { for (k = 0; k < templen; k++) if (tempset[k] == permset[m]) break; if (k < templen) permset[n++] = permset[m]; } permlen = n; } } for (p = 0; p < permlen; p++) { j = permset[p]; if (placements[j] != -2) { int p1, p2, di; done_something = TRUE; /* * Rule out this placement. First find what * domino it is... */ p1 = j / 2; p2 = (j & 1) ? p1 + 1 : p1 + w; di = DINDEX(grid[p1], grid[p2]); #ifdef SOLVER_DIAGNOSTICS printf("considering domino %d: ruling out placement %d" " for %d\n", i, j, di); #endif /* * ... then walk that domino's placement list, * removing this placement when we find it. */ if (heads[di] == j) heads[di] = placements[j]; else { int k = heads[di]; while (placements[k] != -1 && placements[k] != j) k = placements[k]; assert(placements[k] == j); placements[k] = placements[j]; } placements[j] = -2; } } } /* * For each square, look at the available placements * involving that square. If all of them are for the same * domino, then rule out any placements for that domino * _not_ involving this square. */ for (i = 0; i < wh; i++) { int list[4], k, n, adi; x = i % w; y = i / w; j = 0; if (x > 0) list[j++] = 2*(i-1)+1; if (x+1 < w) list[j++] = 2*i+1; if (y > 0) list[j++] = 2*(i-w); if (y+1 < h) list[j++] = 2*i; for (n = k = 0; k < j; k++) if (placements[list[k]] >= -1) list[n++] = list[k]; adi = -1; for (j = 0; j < n; j++) { int p1, p2, di; k = list[j]; p1 = k / 2; p2 = (k & 1) ? p1 + 1 : p1 + w; di = DINDEX(grid[p1], grid[p2]); if (adi == -1) adi = di; if (adi != di) break; } if (j == n) { int nn; assert(adi >= 0); /* * We've found something. All viable placements * involving this square are for domino `adi'. If * the current placement list for that domino is * longer than n, reduce it to precisely this * placement list and we've done something. */ nn = 0; for (k = heads[adi]; k >= 0; k = placements[k]) nn++; if (nn > n) { done_something = TRUE; #ifdef SOLVER_DIAGNOSTICS printf("considering square %d,%d: reducing placements " "of domino %d\n", x, y, adi); #endif /* * Set all other placements on the list to * impossible. */ k = heads[adi]; while (k >= 0) { int tmp = placements[k]; placements[k] = -2; k = tmp; } /* * Set up the new list. */ heads[adi] = list[0]; for (k = 0; k < n; k++) placements[list[k]] = (k+1 == n ? -1 : list[k+1]); } } } if (!done_something) break; } #ifdef SOLVER_DIAGNOSTICS printf("after solver:\n"); for (i = 0; i <= n; i++) for (j = 0; j <= i; j++) { int k, m; m = 0; printf("%2d [%d %d]:", DINDEX(i, j), i, j); for (k = heads[DINDEX(i,j)]; k >= 0; k = placements[k]) printf(" %3d [%d,%d,%c]", k, k/2%w, k/2/w, k%2?'h':'v'); printf("\n"); } #endif ret = 1; for (i = 0; i < wh*2; i++) { if (placements[i] == -2) { if (output) output[i] = -1; /* ruled out */ } else if (placements[i] != -3) { int p1, p2, di; p1 = i / 2; p2 = (i & 1) ? p1 + 1 : p1 + w; di = DINDEX(grid[p1], grid[p2]); if (i == heads[di] && placements[i] == -1) { if (output) output[i] = 1; /* certain */ } else { if (output) output[i] = 0; /* uncertain */ ret = 2; } } } done: /* * Free working data. */ sfree(placements); sfree(heads); return ret; }
static int do_compress ( const lzo_byte *in , lzo_uint in_len, lzo_byte *out, lzo_uint *out_len, lzo_voidp wrkmem ) { #if 1 && defined(__GNUC__) && defined(__i386__) register const lzo_byte *ip __asm__("%esi"); #else register const lzo_byte *ip; #endif lzo_uint32 dv; lzo_byte *op; const lzo_byte * const in_end = in + in_len; const lzo_byte * const ip_end = in + in_len - 9; const lzo_byte *ii; const lzo_bytepp const dict = (const lzo_bytepp) wrkmem; op = out; ip = in; ii = ip; DVAL_FIRST(dv,ip); UPDATE_D(dict,cycle,dv,ip); ip++; DVAL_NEXT(dv,ip); while (1) { #if 1 && defined(__GNUC__) && defined(__i386__) register const lzo_byte *m_pos __asm__("%edi"); #else register const lzo_byte *m_pos; #endif lzo_uint m_len; lzo_ptrdiff_t m_off; lzo_uint lit; { lzo_uint dindex = DINDEX(dv,ip); m_pos = dict[dindex]; UPDATE_I(dict,cycle,dindex,ip); } if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M3_MAX_OFFSET)) { } #if defined(LZO_UNALIGNED_OK_2) else if (* (lzo_ushortp) m_pos != * (lzo_ushortp) ip) #else else if (m_pos[0] != ip[0] || m_pos[1] != ip[1]) #endif { } else { if (m_pos[2] == ip[2]) { m_pos += 3; if (m_off <= M2_MAX_OFFSET) goto match; #if 1 if (ip - ii <= 3) goto match; #else if (ip - ii == 3) /* better compression, but slower */ goto match; #endif if (*m_pos == ip[3]) goto match; } } /* a literal */ ++ip; if (ip >= ip_end) break; DVAL_NEXT(dv,ip); continue; /* a match */ match: /* store current literal run */ lit = ip - ii; if (lit > 0) { register lzo_uint t = lit; if (t < 4 && op > out) op[-2] |= LZO_BYTE(t); else if (t <= 31) *op++ = LZO_BYTE(t); else { register lzo_uint tt = t - 31; *op++ = 0; while (tt > 255) { tt -= 255; *op++ = 0; } assert(tt > 0); *op++ = LZO_BYTE(tt); } do *op++ = *ii++; while (--t > 0); } assert(ii == ip); /* code the match */ ip += 3; if (*m_pos++ != *ip++ || *m_pos++ != *ip++ || *m_pos++ != *ip++ || *m_pos++ != *ip++ || *m_pos++ != *ip++ || *m_pos++ != *ip++) { --ip; m_len = ip - ii; assert(m_len >= 3); assert(m_len <= 8); if (m_off <= M2_MAX_OFFSET) { m_off -= 1; *op++ = LZO_BYTE(((m_len - 2) << 5) | ((m_off & 7) << 2)); *op++ = LZO_BYTE(m_off >> 3); } else if (m_len == 3 && m_off <= 2*M2_MAX_OFFSET && lit > 0)
void C_nj(double *D, int *N, int *edge1, int *edge2, double *edge_length) { double *S, Sdist, Ndist, *new_dist, A, B, smallest_S, x, y; int n, i, j, k, ij, smallest, OTU1, OTU2, cur_nod, o_l, *otu_label; S = &Sdist; new_dist = &Ndist; otu_label = &o_l; n = *N; cur_nod = 2*n - 2; S = (double*)R_alloc(n + 1, sizeof(double)); new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double)); otu_label = (int*)R_alloc(n + 1, sizeof(int)); for (i = 1; i <= n; i++) otu_label[i] = i; /* otu_label[0] is not used */ k = 0; while (n > 3) { for (i = 1; i <= n; i++) S[i] = sum_dist_to_i(n, D, i); /* S[0] is not used */ ij = 0; smallest_S = 1e50; B = n - 2; for (i = 1; i < n; i++) { for (j = i + 1; j <= n; j++) { A = B*D[ij] - S[i] - S[j]; if (A < smallest_S) { OTU1 = i; OTU2 = j; smallest_S = A; smallest = ij; } ij++; } } edge2[k] = otu_label[OTU1]; edge2[k + 1] = otu_label[OTU2]; edge1[k] = edge1[k + 1] = cur_nod; /* get the distances between all OTUs but the 2 selected ones and the latter: a) get the sum for both b) compute the distances for the new OTU */ A = D[smallest]; ij = 0; for (i = 1; i <= n; i++) { if (i == OTU1 || i == OTU2) continue; x = D[give_index(i, OTU1, n)]; /* dist between OTU1 and i */ y = D[give_index(i, OTU2, n)]; /* dist between OTU2 and i */ new_dist[ij] = (x + y - A)/2; ij++; } /* compute the branch lengths */ B = (S[OTU1] - S[OTU2])/B; /* don't need B anymore */ edge_length[k] = (A + B)/2; edge_length[k + 1] = (A - B)/2; /* update before the next loop (we are sure that OTU1 < OTU2) */ if (OTU1 != 1) for (i = OTU1; i > 1; i--) otu_label[i] = otu_label[i - 1]; if (OTU2 != n) for (i = OTU2; i < n; i++) otu_label[i] = otu_label[i + 1]; otu_label[1] = cur_nod; for (i = 1; i < n; i++) { if (i == OTU1 || i == OTU2) continue; for (j = i + 1; j <= n; j++) { if (j == OTU1 || j == OTU2) continue; new_dist[ij] = D[DINDEX(i, j)]; ij++; } } n--; for (i = 0; i < n*(n - 1)/2; i++) D[i] = new_dist[i]; cur_nod--; k = k + 2; } for (i = 0; i < 3; i++) { edge1[*N*2 - 4 - i] = cur_nod; edge2[*N*2 - 4 - i] = otu_label[i + 1]; } edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2; edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2; edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2; }
void C_njs(double *D, int *N, int *edge1, int *edge2, double *edge_length, int *fsS) { //assume missing values are denoted by -1 double *S,*R, Sdist, Ndist, *new_dist, A, B, smallest_S; int n, i, j, k, ij, OTU1, OTU2, cur_nod, o_l, *otu_label; /*for(i=0;i<n*(n-1)/2;i++) {if(isNA(D[i])){D[i]=-1;} }*/ int *s;//s contains |Sxy|, which is all we need for agglomeration double *newR; int *newS; int fS=*fsS; R = &Sdist; new_dist = &Ndist; otu_label = &o_l; n = *N; cur_nod = 2*n - 2; R = (double*)R_alloc(n*(n - 1)/2, sizeof(double)); S = (double*)R_alloc(n + 1, sizeof(double)); newR = (double*)R_alloc(n*(n - 1)/2, sizeof(double)); new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double)); otu_label = (int*)R_alloc(n + 1, sizeof(int)); s = (int*)R_alloc(n*(n - 1)/2, sizeof(int)); newS = (int*)R_alloc(n*(n - 1)/2, sizeof(int)); for (i = 1; i <= n; i++) otu_label[i] = i; /* otu_label[0] is not used */ k = 0; //compute Sxy and Rxy for(i=0;i<n*(n-1)/2;i++) {newR[i]=0; newS[i]=0; s[i]=0; R[i]=0; } for(i=1;i<n;i++) for(j=i+1;j<=n;j++) {//algorithm assumes i,j /in Sij, so skip pair if it is not known if(D[give_index(i,j,n)]==-1) { continue; } //Rprintf("for %i and %i :\n",i,j); for(k=1;k<=n;k++) {//ij is the pair for which we compute //skip k if we do not know the distances between it and i AND j if(k==i || k==j) { s[give_index(i,j,n)]++; if(i!=k)R[give_index(i,j,n)]+=D[give_index(i,k,n)]; if(j!=k)R[give_index(i,j,n)]+=D[give_index(j,k,n)]; continue; } if(D[give_index(i,k,n)]==-1 || D[give_index(j,k,n)]==-1)continue; //Rprintf("%i\n",k); s[give_index(i,j,n)]++; R[give_index(i,j,n)]+=D[give_index(i,k,n)]; R[give_index(i,j,n)]+=D[give_index(j,k,n)]; } } /*for(i=1;i<n;i++) { for(j=i+1;j<=n;j++) { Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]); } Rprintf("\n"); } for(i=1;i<n;i++) { for(j=i+1;j<=n;j++) { Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]); } Rprintf("\n"); }*/ k=0; int sw=1;//if 1 then incomplete while (n > 3) { ij = 0; for(i=1;i<n;i++) for(j=i+1;j<=n;j++) {newR[give_index(i,j,n)]=0; newS[give_index(i,j,n)]=0; } smallest_S = -1e50; if(sw==0) for(i=1;i<=n;i++) {S[i]=0; } B=n-2; if(sw==1) { choosePair(D,n,R,s,&sw,&OTU1,&OTU2,fS); } else{ //Rprintf("distance matrix is now complete\n"); for (i=1;i<=n;i++) for(j=1;j<=n;j++) {if(i==j)continue; //Rprintf("give_index(%i,%i)=%i\n",i,j,give_index(i,j,n)); //Rprintf("D[%i,%i]=%f\n",i,j,D[give_index(i,j,n)]); S[i]+=D[give_index(i,j,n)]; } B=n-2; //Rprintf("n=%i,B=%f",n,B); for (i = 1; i < n; i++) { for (j = i + 1; j <= n; j++) { //Rprintf("S[%i]=%f, S[%i]=%f, D[%i,%i]=%f, B=%f",i,S[i],j,S[j],i,j,D[give_index(i,j,n)],B); A=S[i]+S[j]-B*D[give_index(i,j,n)]; //Rprintf("Q[%i,%i]=%f\n",i,j,A); if (A > smallest_S) { OTU1 = i; OTU2 = j; smallest_S = A; /* smallest = ij; */ } ij++; } } } /*Rprintf("agglomerating %i and %i, Q=%f \n",OTU1,OTU2,smallest_S); for(i=1;i<n;i++) { for(j=i+1;j<=n;j++) { Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]); } Rprintf("\n"); } for(i=1;i<n;i++) { for(j=i+1;j<=n;j++) { Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]); } Rprintf("\n"); } for(i=1;i<n;i++) { for(j=i+1;j<=n;j++) { Rprintf("d[%i,%i]=%f ",i,j,D[give_index(i,j,n)]); } Rprintf("\n"); }*/ //update Rxy and Sxy, only if matrix still incomplete if(sw==1) for(i=1;i<n;i++) {if(i==OTU1 || i==OTU2)continue; for(j=i+1;j<=n;j++) {if(j==OTU1 || j==OTU2)continue; if(D[give_index(i,j,n)]==-1)continue; if(D[give_index(i,OTU1,n)]!=-1 && D[give_index(j,OTU1,n)]!=-1) {//OTU1 was considered for Rij, so now subtract R[give_index(i,j,n)]-=(D[give_index(i,OTU1,n)]+D[give_index(j,OTU1,n)]); s[give_index(i,j,n)]--; } if(D[give_index(i,OTU2,n)]!=-1 && D[give_index(j,OTU2,n)]!=-1) {//OTU2 was considered for Rij, so now subtract R[give_index(i,j,n)]-=(D[give_index(i,OTU2,n)]+D[give_index(j,OTU2,n)]); s[give_index(i,j,n)]--; } } } edge2[k] = otu_label[OTU1]; edge2[k + 1] = otu_label[OTU2]; edge1[k] = edge1[k + 1] = cur_nod; /* get the distances between all OTUs but the 2 selected ones and the latter: a) get the sum for both b) compute the distances for the new OTU */ double sum=0; for(i=1;i<=n;i++) {if(i==OTU1 || i==OTU2)continue; if(D[give_index(OTU1,i,n)]==-1 || D[give_index(OTU2,i,n)]==-1)continue; sum+=(D[give_index(OTU1,i,n)]-D[give_index(OTU2,i,n)]); } //although s was updated above, s[otu1,otu2] has remained unchanged //so it is safe to use it here //if complete distanes, use N-2, else use S int down=B; if(sw==1){down=s[give_index(OTU1,OTU2,n)]-2;} if(down<=0) {error("distance information insufficient to construct a tree, leaves %i and %i isolated from tree",OTU1,OTU2); } //Rprintf("down=%i\n",down); sum*=(1.0/(2*(down))); //Rprintf("sum=%f\n",sum); double dxy=D[give_index(OTU1,OTU2,n)]/2; //Rprintf("R[%i,%i]:%f \n",OTU1,OTU2,sum); edge_length[k] = dxy+sum;//OTU1 //Rprintf("l1:%f \n",edge_length[k]); edge_length[k + 1] = dxy-sum;//OTU2 //Rprintf("l2:%f \n",edge_length[k+1]); //no need to change distance matrix update for complete distance //case, as pairs will automatically fall in the right cathegory A = D[give_index(OTU1,OTU2,n)]; ij = 0; for (i = 1; i <= n; i++) { if (i == OTU1 || i == OTU2) continue; if(D[give_index(OTU1,i,n)]!=-1 && D[give_index(OTU2,i,n)]!=-1) { new_dist[ij]=0.5*(D[give_index(OTU1,i,n)]-edge_length[k]+D[give_index(OTU2,i,n)]-edge_length[k+1]); }else{ if(D[give_index(OTU1,i,n)]!=-1) { new_dist[ij]=D[give_index(OTU1,i,n)]-edge_length[k]; }else{ if(D[give_index(OTU2,i,n)]!=-1) { new_dist[ij]=D[give_index(OTU2,i,n)]-edge_length[k+1]; }else{new_dist[ij]=-1;} } } ij++; } for (i = 1; i < n; i++) { if (i == OTU1 || i == OTU2) continue; for (j = i + 1; j <= n; j++) { if (j == OTU1 || j == OTU2) continue; new_dist[ij] = D[DINDEX(i, j)]; ij++; } } /*for(i=1;i<n-1;i++) { for(j=i+1;j<=n-1;j++) {Rprintf("%f ",new_dist[give_index(i,j,n-1)]); } Rprintf("\n"); }*/ //compute Rui, only if distance matrix is still incomplete ij=0; if(sw==1) for(i=2;i<n;i++) { ij++; if(new_dist[give_index(i,1,n-1)]==-1)continue; for(j=1;j<n;j++) { if(j==1 || j==i) { if(i!=j)newR[give_index(1,i,n-1)]+=new_dist[give_index(i,j,n-1)]; if(j!=1)newR[give_index(1,i,n-1)]+=new_dist[give_index(1,j,n-1)]; newS[give_index(1,i,n-1)]++; continue; } if(new_dist[give_index(i,j,n-1)]!=-1 && new_dist[give_index(1,j,n-1)]!=-1) { newS[give_index(1,i,n-1)]++; newR[give_index(1,i,n-1)]+=new_dist[give_index(i,j,n-1)]; newR[give_index(1,i,n-1)]+=new_dist[give_index(1,j,n-1)]; } } } //fill in the rest of R and S, again only if distance matrix still //incomplete if(sw==1) /* added 2012-04-02 */ for(i=1;i<n;i++) {if(i==OTU1 || i==OTU2)continue; for(j=i+1;j<=n;j++) {if(j==OTU1 || j==OTU2)continue; newR[ij]=R[give_index(i,j,n)]; newS[ij]=s[give_index(i,j,n)]; ij++; } } //update newR and newS with the new taxa, again only if distance //matrix is still incomplete if(sw==1) for(i=2;i<n-1;i++) {if(new_dist[give_index(1,i,n-1)]==-1)continue; for(j=i+1;j<=n-1;j++) {if(new_dist[give_index(1,j,n-1)]==-1)continue; if(new_dist[give_index(i,j,n-1)]==-1)continue; newR[give_index(i,j,n-1)]+=(new_dist[give_index(1,i,n-1)]+new_dist[give_index(1,j,n-1)]); newS[give_index(i,j,n-1)]++; } } /* update before the next loop (we are sure that OTU1 < OTU2) */ if (OTU1 != 1) for (i = OTU1; i > 1; i--) otu_label[i] = otu_label[i - 1]; if (OTU2 != n) for (i = OTU2; i < n; i++) otu_label[i] = otu_label[i + 1]; otu_label[1] = cur_nod; n--; for (i = 0; i < n*(n - 1)/2; i++) { D[i] = new_dist[i]; if(sw==1) { R[i] = newR[i]; s[i] = newS[i]; } } cur_nod--; k = k + 2; } int dK=0;//number of known distances in final distance matrix int iUK=-1;//index of unkown distance, if we have one missing distance int iK=-1;//index of only known distance, only needed if dK==1 for (i = 0; i < 3; i++) { edge1[*N*2 - 4 - i] = cur_nod; edge2[*N*2 - 4 - i] = otu_label[i + 1]; if(D[i]!=-1){dK++;iK=i;}else{iUK=i;} } if(dK==2) {//if two distances are known: assume our leaves are x,y,z, d(x,z) unknown //and edge weights of three edges are a,b,c, then any b,c>0 that //satisfy c-b=d(y,z)-d(x,y) a+c=d(y,z) are good edge weights, but for //simplicity we assume a=c if d(yz)<d(xy) a=b otherwise, and after some //algebra we get that we can set the missing distance equal to the //maximum of the already present distances double max=-1e50; for(i=0;i<3;i++) {if(i==iUK)continue; if(D[i]>max)max=D[i]; } D[iUK]=max; } if(dK==1) {//through similar motivation as above, if we have just one known distance //we set the other two distances equal to it for(i=0;i<3;i++) {if(i==iK)continue; D[i]=D[iK]; } } if(dK==0) {//no distances are known, we just set them to 1 for(i=0;i<3;i++) {D[i]=1; } } edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2; edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2; edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2; }