void sppermute(long n) { /* permute the species order as given in array sppord */ long i, j, k; for (i = 1; i <= (spp - 1); i++) { k = (long)((i+1) * randum(seed)); j = sppord[n - 1][i]; sppord[n - 1][i] = sppord[n - 1][k]; sppord[n - 1][k] = j; } } /* sppermute */
void charpermute(long m, long n) { /* permute the n+1 characters of species m+1 */ long i, j, k; for (i = 1; i <= (n - 1); i++) { k = (long)((i+1) * randum(seed)); j = charorder[m][i]; charorder[m][i] = charorder[m][k]; charorder[m][k] = j; } } /* charpermute */
void permute_vec(long *a, long n) { long i, j, k; for (i = 1; i < n; i++) { k = (long)((i+1) * randum(seed)); j = a[i]; a[i] = a[k]; a[k] = j; } }
void bootweights() { /* sets up weights by resampling data */ long i, j, k, blocks; double p, q, r; long grp = 0, site = 0; ws = newgroups; for (i = 0; i < (ws); i++) weight[i] = 0; blocks = fracsample * newgroups / blocksize; for (i = 1; i <= (blocks); i++) { j = (long)(newgroups * randum(seed)) + 1; for (k = 0; k < blocksize; k++) { weight[j - 1]++; j++; if (j > newgroups) j = 1; } } /* Count number of replicated groups */ newergroups = 0; newersites = 0; for (i = 0; i < newgroups; i++) { newergroups += weight[i]; newersites += newhowmany[i] * weight[i]; } if (newergroups < 1) { fprintf(stdout, "ERROR: sampling frequency or number of sites is too small\n"); exxit(-1); } /* reallocate "newer" arrays, sized by output groups: * newerfactor, newerwhere, newerhowmany, and charorder */ allocnewer(newergroups, newersites); /* Replicate each group i weight[i] times */ grp = 0; site = 0; for (i = 0; i < newgroups; i++) { for (j = 0; j < weight[i]; j++) { for (k = 0; k < newhowmany[i]; k++) { newerfactor[site] = grp + 1; site++; } newerwhere[grp] = newwhere[i]; newerhowmany[grp] = newhowmany[i]; grp++; } } } /* bootweights */
static void permute(int *perm, int n, long *seed) { int i, j, k; for (i = 0; i < n; i++) { k = (int)((double)(n - i) * randum(seed)); j = perm[i]; perm[i] = perm[i + k]; perm[i + k] = j; /*assert(i + k < n);*/ } }
static void makePermutationFast(int *perm, int n, tree *tr) { int i, j, k; for (i = 1; i <= n; i++) perm[i] = i; for (i = 1; i <= n; i++) { double d = randum(&tr->randomNumberSeed); k = (int)((double)(n + 1 - i) * d); j = perm[i]; perm[i] = perm[i + k]; perm[i + k] = j; } }
void bootweights() { /* sets up weights by resampling data */ long i, j, k, blocks; double p, q, r; ws = newgroups; for (i = 0; i < (ws); i++) weight[i] = 0; if (jackknife) { if (fabs(newgroups*fracsample - (long)(newgroups*fracsample+0.5)) > 0.00001) { if (randum(seed) < (newgroups*fracsample - (long)(newgroups*fracsample)) /((long)(newgroups*fracsample+1.0)-(long)(newgroups*fracsample))) q = (long)(newgroups*fracsample)+1; else q = (long)(newgroups*fracsample); } else q = (long)(newgroups*fracsample+0.5); r = newgroups; p = q / r; ws = 0; for (i = 0; i < (newgroups); i++) { if (randum(seed) < p) { weight[i]++; ws++; q--; } r--; if (i + 1 < newgroups) p = q / r; } } else if (permute) { for (i = 0; i < (newgroups); i++) weight[i] = 1; } else if (bootstrap) { blocks = fracsample * newgroups / blocksize; for (i = 1; i <= (blocks); i++) { j = (long)(newgroups * randum(seed)) + 1; for (k = 0; k < blocksize; k++) { weight[j - 1]++; j++; if (j > newgroups) j = 1; } } } else /* case of rewriting data */ for (i = 0; i < (newgroups); i++) weight[i] = 1; for (i = 0; i < (newgroups); i++) newerwhere[i] = 0; for (i = 0; i < (newgroups); i++) newerhowmany[i] = 0; newergroups = 0; newersites = 0; for (i = 0; i < (newgroups); i++) { for (j = 1; j <= (weight[i]); j++) { newergroups++; for (k = 1; k <= (newhowmany[i]); k++) { newersites++; newerfactor[newersites - 1] = newergroups; } newerwhere[newergroups - 1] = newwhere[i]; newerhowmany[newergroups - 1] = newhowmany[i]; } } } /* bootweights */
void computeNextReplicate(tree *tr, long *randomSeed, int *originalRateCategories, int *originalInvariant, boolean isRapid, boolean fixRates) { int j, model, w, *weightBuffer, endsite, *weights, i, l; for(j = 0; j < tr->originalCrunchedLength; j++) tr->cdta->aliaswgt[j] = 0; for(model = 0; model < tr->NumberOfModels; model++) { int nonzero = 0, pos = 0; for (j = 0; j < tr->originalCrunchedLength; j++) { if(tr->originalModel[j] == model) nonzero += tr->originalWeights[j]; } weightBuffer = (int *)rax_calloc(nonzero, sizeof(int)); for (j = 0; j < nonzero; j++) weightBuffer[(int) (nonzero*randum(randomSeed))]++; for(j = 0; j < tr->originalCrunchedLength; j++) { if(model == tr->originalModel[j]) { for(w = 0; w < tr->originalWeights[j]; w++) { tr->cdta->aliaswgt[j] += weightBuffer[pos]; pos++; } } } rax_free(weightBuffer); } endsite = 0; for (j = 0; j < tr->originalCrunchedLength; j++) { if(tr->cdta->aliaswgt[j] > 0) endsite++; } weights = tr->cdta->aliaswgt; for(i = 0; i < tr->rdta->numsp; i++) { unsigned char *yPos = &(tr->rdta->y0[((size_t)tr->originalCrunchedLength) * ((size_t)i)]), *origSeq = &(tr->rdta->yBUF[((size_t)tr->originalCrunchedLength) * ((size_t)i)]); for(j = 0, l = 0; j < tr->originalCrunchedLength; j++) if(tr->cdta->aliaswgt[j] > 0) yPos[l++] = origSeq[j]; } for(j = 0, l = 0; j < tr->originalCrunchedLength; j++) { if(weights[j]) { tr->cdta->aliaswgt[l] = tr->cdta->aliaswgt[j]; tr->dataVector[l] = tr->originalDataVector[j]; tr->model[l] = tr->originalModel[j]; if(isRapid) { tr->cdta->rateCategory[l] = originalRateCategories[j]; tr->invariant[l] = originalInvariant[j]; } l++; } } tr->cdta->endsite = endsite; fixModelIndices(tr, endsite, fixRates); { int count = 0; for(j = 0; j < tr->cdta->endsite; j++) count += tr->cdta->aliaswgt[j]; if(count != tr->rdta->sites) printf("count=%d\ttr->rdta->sites=%d\n",count, tr->rdta->sites ); assert(count == tr->rdta->sites); } }
void computeBOOTRAPID (tree *tr, analdef *adef, long *radiusSeed) { int i, bestTrav, impr; double lh, previousLh, difference, epsilon; bestlist *bestT, *bt; int countIT; bestT = (bestlist *) malloc(sizeof(bestlist)); bestT->ninit = 0; initBestTree(bestT, 1, tr->mxtips); saveBestTree(bestT, tr); bt = (bestlist *) malloc(sizeof(bestlist)); bt->ninit = 0; initBestTree(bt, 5, tr->mxtips); initInfoList(10); difference = 10.0; epsilon = 0.01; bestTrav = adef->bestTrav = 5 + 11 * randum(radiusSeed); Thorough = 1; impr = 1; if(tr->doCutoff) tr->itCount = 0; tr->bigCutoff = TRUE; for(countIT = 0; countIT < 2 && impr; countIT++) { recallBestTree(bestT, 1, tr); treeEvaluate(tr, 1); saveBestTree(bestT, tr); lh = previousLh = tr->likelihood; treeOptimizeRapid(tr, 1, bestTrav, adef, bt); impr = 0; for(i = 1; i <= bt->nvalid; i++) { recallBestTree(bt, i, tr); treeEvaluate(tr, 0.25); difference = ((tr->likelihood > previousLh)? tr->likelihood - previousLh: previousLh - tr->likelihood); if(tr->likelihood > lh && difference > epsilon) { impr = 1; lh = tr->likelihood; saveBestTree(bestT, tr); } } } tr->bigCutoff = FALSE; recallBestTree(bestT, 1, tr); freeBestTree(bestT); free(bestT); freeBestTree(bt); free(bt); freeInfoList(); }
int *permutationSH(tree *tr, int nBootstrap, long _randomSeed) { int replicate, model, maxNonZero = 0, *weightBuffer, *col = (int*)rax_calloc(((size_t)tr->cdta->endsite) * ((size_t)nBootstrap), sizeof(int)), *nonzero = (int*)rax_calloc(tr->NumberOfModels, sizeof(int)); long randomSeed = _randomSeed; size_t bufferSize; for(model = 0; model < tr->NumberOfModels; model++) { int j; for (j = 0; j < tr->cdta->endsite; j++) { if(tr->originalModel[j] == model) nonzero[model] += tr->originalWeights[j]; } if(nonzero[model] > maxNonZero) maxNonZero = nonzero[model]; } bufferSize = ((size_t)maxNonZero) * sizeof(int); weightBuffer = (int*)rax_malloc(bufferSize); for(replicate = 0; replicate < nBootstrap; replicate++) { int j, *wgt = &col[((size_t)tr->cdta->endsite) * ((size_t)replicate)]; for(model = 0; model < tr->NumberOfModels; model++) { int pos, nonz = nonzero[model]; memset(weightBuffer, 0, bufferSize); for(j = 0; j < nonz; j++) weightBuffer[(int) (nonz * randum(&randomSeed))]++; for(j = 0, pos = 0; j < tr->cdta->endsite; j++) { if(model == tr->originalModel[j]) { int w; for(w = 0; w < tr->originalWeights[j]; w++) { wgt[j] += weightBuffer[pos]; pos++; } } } } } rax_free(weightBuffer); rax_free(nonzero); return col; }
void bootweights() { /* sets up weights by resampling data */ long i, j, k, blocks; double p, q, r; long grp = 0, site = 0; ws = newgroups; for (i = 0; i < (ws); i++) weight[i] = 0; if (jackknife) { if (fabs(newgroups*fracsample - (long)(newgroups*fracsample+0.5)) > 0.00001) { if (randum(seed) < (newgroups*fracsample - (long)(newgroups*fracsample)) /((long)(newgroups*fracsample+1.0)-(long)(newgroups*fracsample))) q = (long)(newgroups*fracsample)+1; else q = (long)(newgroups*fracsample); } else q = (long)(newgroups*fracsample+0.5); r = newgroups; p = q / r; ws = 0; for (i = 0; i < (newgroups); i++) { if (randum(seed) < p) { weight[i]++; ws++; q--; } r--; if (i + 1 < newgroups) p = q / r; } } else if (permute) { for (i = 0; i < (newgroups); i++) weight[i] = 1; } else if (bootstrap) { blocks = fracsample * newgroups / blocksize; for (i = 1; i <= (blocks); i++) { j = (long)(newgroups * randum(seed)) + 1; for (k = 0; k < blocksize; k++) { weight[j - 1]++; j++; if (j > newgroups) j = 1; } } } else /* case of rewriting data */ for (i = 0; i < (newgroups); i++) weight[i] = 1; /* Count number of replicated groups */ newergroups = 0; newersites = 0; for (i = 0; i < newgroups; i++) { newergroups += weight[i]; newersites += newhowmany[i] * weight[i]; } if (newergroups < 1) { fprintf(stdout, "ERROR: sampling frequency or number of sites is too small\n"); exxit(-1); } /* reallocate "newer" arrays, sized by output groups: * newerfactor, newerwhere, newerhowmany, and charorder */ allocnewer(newergroups, newersites); /* Replicate each group i weight[i] times */ grp = 0; site = 0; for (i = 0; i < newgroups; i++) { for (j = 0; j < weight[i]; j++) { for (k = 0; k < newhowmany[i]; k++) { newerfactor[site] = grp + 1; site++; } newerwhere[grp] = newwhere[i]; newerhowmany[grp] = newhowmany[i]; grp++; } } } /* bootweights */