SExpr treeFromSpot(man::vision::Spot & b, int width, int height) { SExpr xLo(b.xLo() + width / 2); SExpr xHi(b.xHi() + width / 2); SExpr yLo(b.yLo() + height / 2); SExpr yHi(b.yHi() + height / 2); SExpr x(b.rawX); SExpr y(b.rawY); SExpr p = SExpr::list({x, y}); SExpr ul = SExpr::list({xLo, yHi}); SExpr lr = SExpr::list({xHi, yLo}); SExpr center = SExpr::keyValue("center", p); SExpr topleft = SExpr::keyValue("topLeft", ul); SExpr lowerright = SExpr::keyValue("lowerRight", lr); SExpr innerdiam = SExpr::keyValue("inner", b.innerDiam); SExpr outerdiam = SExpr::keyValue("outer", b.outerDiam); SExpr spottype = SExpr::keyValue("spottype", b.spotType); SExpr toRet = SExpr::list({center, topleft, lowerright, innerdiam, outerdiam, spottype}); return toRet; }
static lbfgsfloatval_t PLMNegLogPosteriorDO(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) { /* Compute the the negative log posterior, which is the negative penalized log-(pseudo)likelihood and the objective for MAP inference */ void **d = (void **)instance; alignment_t *ali = (alignment_t *) d[0]; options_t *options = (options_t *) d[1]; numeric_t *lambdas = (numeric_t *) d[2]; /* Initialize log-likelihood and gradient */ lbfgsfloatval_t fx = 0.0; for (int i = 0; i < ali->nParams; i++) g[i] = 0; numeric_t *H = (numeric_t *) malloc(ali->nCodes * sizeof(numeric_t)); numeric_t *P = (numeric_t *) malloc(ali->nCodes * sizeof(numeric_t)); int *drop_mask = (int *) malloc(ali->nParams * sizeof(int)); for (int s = 0; s < ali->nSeqs; s++) { /* Generate random bit mask over parameters */ for (int p = 0; p < ali->nParams; p ++) drop_mask[p] = (int) rand() % 2; /* Pseudolikelihood objective */ for (int i = 0; i < ali->nSites; i++) { for (int a = 0; a < ali->nCodes; a++) H[a] = bitHi(i, a) * xHi(i, a); for (int a = 0; a < ali->nCodes; a++) for (int j = 0; j < i; j++) H[a] += bitEij(i, j, a, seq(s, j)) * xEij(i, j, a, seq(s, j)); for (int a = 0; a < ali->nCodes; a++) for (int j = i + 1; j < ali->nSites; j++) H[a] += bitEij(i, j, a, seq(s, j)) * xEij(i, j, a, seq(s, j)); /* Compute distribution from potential */ for (int a = 0; a < ali->nCodes; a++) P[a] = exp(H[a]); numeric_t Z = 0; for (int a = 0; a < ali->nCodes; a++) Z += P[a]; numeric_t Zinv = 1.0 / Z; for (int a = 0; a < ali->nCodes; a++) P[a] *= Zinv; /* Log-likelihood contributions */ fx -= ali->weights[s] * log(P[seq(s, i)]); /* Field gradient */ dHi(i, seq(s, i)) -= bitHi(i, seq(s, i)) * ali->weights[s]; for (int a = 0; a < ali->nCodes; a++) dHi(i, a) -= -bitHi(i, a) * ali->weights[s] * P[a]; /* Couplings gradient */ for (int j = 0; j < i; j++) dEij(i, j, seq(s, i), seq(s, j)) -= bitEij(i, j, seq(s, i), seq(s, j)) * ali->weights[s]; for (int j = i + 1; j < ali->nSites; j++) dEij(i, j, seq(s, i), seq(s, j)) -= bitEij(i, j, seq(s, i), seq(s, j)) * ali->weights[s]; for (int j = 0; j < i; j++) for (int a = 0; a < ali->nCodes; a++) dEij(i, j, a, seq(s, j)) -= -bitEij(i, j, a, seq(s, j)) * ali->weights[s] * P[a]; for (int j = i + 1; j < ali->nSites; j++) for (int a = 0; a < ali->nCodes; a++) dEij(i, j, a, seq(s, j)) -= -bitEij(i, j, a, seq(s, j)) * ali->weights[s] * P[a]; } } free(H); free(P); free(drop_mask); ali->negLogLk = fx; /* Gaussian priors */ for (int i = 0; i < ali->nSites; i++) for (int ai = 0; ai < ali->nCodes; ai++) { dHi(i, ai) += lambdaHi(i) * 2.0 * xHi(i, ai); fx += lambdaHi(i) * xHi(i, ai) * xHi(i, ai); } for (int i = 0; i < ali->nSites-1; i++) for (int j = i + 1; j < ali->nSites; j++) for (int ai = 0; ai < ali->nCodes; ai++) for (int aj = 0; aj < ali->nCodes; aj++) { dEij(i, j, ai, aj) += lambdaEij(i, j) * 2.0 * xEij(i, j, ai, aj); fx += lambdaEij(i, j) * xEij(i, j, ai, aj) * xEij(i, j, ai, aj); } fx = PostCondition(x, g, fx, ali, options); return fx; }
numeric_t *InferPairModel(alignment_t *ali, options_t *options) { /* Estimate the parameters of a maximum entropy model for a multiple sequence alignment */ /* Initialize the regularization parameters */ numeric_t *lambdas = (numeric_t *) malloc((ali->nSites + ali->nSites * (ali->nSites - 1) / 2) * sizeof(numeric_t)); for (int i = 0; i < ali->nSites; i++) lambdaHi(i) = options->lambdaH; for (int i = 0; i < ali->nSites - 1; i++) for (int j = i + 1; j < ali->nSites; j++) lambdaEij(i, j) = options->lambdaE; /* For gap-reduced problems, eliminate the gaps and reduce the alphabet */ if (options->estimatorMAP == INFER_MAP_PLM_GAPREDUCE) { ali->nCodes = strlen(ali->alphabet) - 1; for (int i = 0; i < ali->nSites; i++) for (int s = 0; s < ali->nSeqs; s++) seq(s, i) -= 1; } /* Initialize parameters */ ali->nParams = ali->nSites * ali->nCodes + ali->nSites * (ali->nSites - 1) / 2 * ali->nCodes * ali->nCodes; numeric_t *x = (numeric_t *) malloc(sizeof(numeric_t) * ali->nParams); if (x == NULL) { fprintf(stderr, "ERROR: Failed to allocate a memory block for variables.\n"); exit(1); } for (int i = 0; i < ali->nParams; i++) x[i] = 0.0; /* Initialize site parameters with the ML estimates hi = log(fi) + C A single pseudocount is added for stability (Laplace's rule or Morcos et al. with lambda = nCodes) */ if (options->zeroAPC != 1) { numeric_t pseudoC = (numeric_t) ali->nCodes; numeric_t Zinv = 1.0 / (ali->nEff + pseudoC); for (int i = 0; i < ali->nSites; i++) for (int ai = 0; ai < ali->nSites; ai++) xHi(i, ai) = Zinv * pseudoC / (numeric_t) ali->nCodes; for (int s = 0; s < ali->nSeqs; s++) for (int i = 0; i < ali->nSites; i++) xHi(i, seq(s, i)) += ali->weights[s] * Zinv; for (int i = 0; i < ali->nSites; i++) for (int ai = 0; ai < ali->nCodes; ai++) xHi(i, ai) = log(xHi(i, ai)); /* Zero-sum gauge */ for (int i = 0; i < ali->nSites; i++) { numeric_t hSum = 0.0; for (int ai = 0; ai < ali->nCodes; ai++) hSum += xHi(i, ai); numeric_t hShift = hSum / (numeric_t) ali->nCodes; for (int ai = 0; ai < ali->nCodes; ai++) xHi(i, ai) -= hShift; } } switch(options->estimator) { /* Point estimates */ case INFER_MAP: /* Maximum a posteriori estimates of model parameters */ EstimatePairModelMAP(x, lambdas, ali, options); break; /* For: future alternative estimators */ default: /* Maximum a posteriori estimates of model parameters */ EstimatePairModelMAP(x, lambdas, ali, options); } /* Restore the alignment encoding after inference */ if (options->estimatorMAP == INFER_MAP_PLM_GAPREDUCE) { for (int i = 0; i < ali->nSites; i++) for (int s = 0; s < ali->nSeqs; s++) seq(s, i) += 1; } return (numeric_t *) x; }
static lbfgsfloatval_t PLMNegLogPosteriorBlock(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) { /* Compute the the negative log posterior, which is the negative penalized log-(pseudo)likelihood and the objective for MAP inference */ void **d = (void **)instance; alignment_t *ali = (alignment_t *) d[0]; options_t *options = (options_t *) d[1]; numeric_t *lambdas = (numeric_t *) d[2]; /* Initialize log-likelihood and gradient */ lbfgsfloatval_t fx = 0.0; for (int i = 0; i < ali->nParams; i++) g[i] = 0; /* Block fields hi */ numeric_t *hi = (numeric_t *) malloc(ali->nSites * ali->nCodes * sizeof(numeric_t)); numeric_t *gHi = (numeric_t *) malloc(ali->nSites * ali->nCodes * sizeof(numeric_t)); for (int i = 0; i < ali->nSites; i++) for (int ai = 0; ai < ali->nCodes; ai++) Hi(i, ai) = xHi(i, ai); for (int i = 0; i < ali->nSites * ali->nCodes; i++) gHi[i] = 0; /* Block couplings eij */ numeric_t *eij = (numeric_t *) malloc(ali->nSites * ali->nSites * ali->nCodes * ali->nCodes * sizeof(numeric_t)); numeric_t *gEij = (numeric_t *) malloc(ali->nSites * ali->nSites * ali->nCodes * ali->nCodes * sizeof(numeric_t)); for (int i = 0; i < ali->nSites * ali->nSites * ali->nCodes * ali->nCodes; i++) eij[i] = 0.0; for (int i = 0; i < ali->nSites * ali->nSites * ali->nCodes * ali->nCodes; i++) gEij[i] = 0.0; for (int i = 0; i < ali->nSites - 1; i++) for (int j = i + 1; j < ali->nSites; j++) for (int ai = 0; ai < ali->nCodes; ai++) for (int aj = 0; aj < ali->nCodes; aj++) Eij(j, aj, i, ai) = Eij(i, ai, j, aj) = xEij(i, j, ai, aj); /* Negative log-pseudolikelihood */ for (int s = 0; s < ali->nSeqs; s++) { /* Form potential for conditional log likelihoods at every site */ numeric_t *H = (numeric_t *) malloc(ali->nCodes * ali->nSites * sizeof(numeric_t)); numeric_t *Z = (numeric_t *) malloc(ali->nSites * sizeof(numeric_t)); /* Initialize potentials with fields */ // memcpy(H, hi, ali->nSites * ali->nCodes * sizeof(numeric_t)); for(int jx = 0; jx < ali->nSites * ali->nCodes; jx++) H[jx] = hi[jx]; /* Contribute coupling block due to i, ai */ for (int i = 0; i < ali->nSites; i++) { const letter_t ai = seq(s, i); const numeric_t *jB = &(Eij(i, ai, 0, 0)); for(int jx = 0; jx < ali->nSites * ali->nCodes; jx++) H[jx] += jB[jx]; } /* Conditional log likelihoods */ for (int i = 0; i < ali->nSites * ali->nCodes; i++) H[i] = exp(H[i]); for (int i = 0; i < ali->nSites; i++) Z[i] = 0; for (int i = 0; i < ali->nSites; i++) for (int ai = 0; ai < ali->nSites; ai++) Z[i] += Hp(i, ai); for (int i = 0; i < ali->nSites; i++) for (int ai = 0; ai < ali->nSites; ai++) Hp(i, ai) /= Z[i]; numeric_t seqFx = 0; for (int i = 0; i < ali->nSites; i++) seqFx -= ali->weights[s] * log(Hp(i, seq(s, i))); for(int jx = 0; jx < ali->nSites * ali->nCodes; jx++) H[jx] *= -ali->weights[s]; for (int i = 0; i < ali->nSites; i++) gHi(i, seq(s, i)) -= ali->weights[s]; for(int jx = 0; jx < ali->nSites * ali->nCodes; jx++) gHi[jx] -= H[jx]; for (int i = 0; i < ali->nSites - 1; i++) for (int j = i; j < ali->nSites; j++) gEij(i, seq(s, i), j, seq(s, j)) -= ali->weights[s]; for (int i = 0; i < ali->nSites; i++) { const letter_t ai = seq(s, i); numeric_t *jgBlock = &(gEij(i, ai, 0, 0)); for (int jx = 0; jx < ali->nSites * ali->nCodes; jx++) jgBlock[jx] -= H[jx]; } free(H); free(Z); fx += seqFx; } for (int i = 0; i < ali->nSites; i++) for (int ai = 0; ai < ali->nCodes; ai++) dHi(i, ai) += gHi(i, ai); for (int i = 0; i < ali->nSites - 1; i++) for (int j = i + 1; j < ali->nSites; j++) for (int ai = 0; ai < ali->nCodes; ai++) for (int aj = 0; aj < ali->nCodes; aj++) dEij(i, j, ai, aj) += gEij(j, aj, i, ai) + gEij(i, ai, j, aj); free(hi); free(gHi); free(eij); free(gEij); ali->negLogLk = fx; /* Gaussian priors */ for (int i = 0; i < ali->nSites; i++) for (int ai = 0; ai < ali->nCodes; ai++) { dHi(i, ai) += lambdaHi(i) * 2.0 * xHi(i, ai); fx += lambdaHi(i) * xHi(i, ai) * xHi(i, ai); } for (int i = 0; i < ali->nSites-1; i++) for (int j = i + 1; j < ali->nSites; j++) for (int ai = 0; ai < ali->nCodes; ai++) for (int aj = 0; aj < ali->nCodes; aj++) { dEij(i, j, ai, aj) += lambdaEij(i, j) * 2.0 * xEij(i, j, ai, aj); fx += lambdaEij(i, j) * xEij(i, j, ai, aj) * xEij(i, j, ai, aj); } fx = PostCondition(x, g, fx, ali, options); return fx; }
static lbfgsfloatval_t PLMNegLogPosteriorGapReduce(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) { /* Compute the the negative log posterior, which is the negative penalized log-(pseudo)likelihood and the objective for MAP inference */ void **d = (void **)instance; alignment_t *ali = (alignment_t *) d[0]; options_t *options = (options_t *) d[1]; numeric_t *lambdas = (numeric_t *) d[2]; /* Initialize log-likelihood and gradient */ lbfgsfloatval_t fx = 0.0; for (int i = 0; i < ali->nParams; i++) g[i] = 0; /* Negative log-pseudolikelihood */ #pragma omp parallel for for (int i = 0; i < ali->nSites; i++) { numeric_t *H = (numeric_t *) malloc(ali->nCodes * sizeof(numeric_t)); numeric_t *P = (numeric_t *) malloc(ali->nCodes * sizeof(numeric_t)); numeric_t siteFx = 0.0; /* Reshape site parameters and gradient into local blocks */ numeric_t *Xi = (numeric_t *) malloc(ali->nCodes * ali->nCodes * ali->nSites * sizeof(numeric_t)); for (int j = 0; j < i; j++) for (int a = 0; a < ali->nCodes; a++) for (int b = 0; b < ali->nCodes; b++) siteE(j, a, b) = xEij(i, j, a, b); for (int j = i + 1; j < ali->nSites; j++) for (int a = 0; a < ali->nCodes; a++) for (int b = 0; b < ali->nCodes; b++) siteE(j, a, b) = xEij(i, j, a, b); for (int a = 0; a < ali->nCodes; a++) siteH(i, a) = xHi(i, a); numeric_t *Di = (numeric_t *) malloc(ali->nCodes * ali->nCodes * ali->nSites * sizeof(numeric_t)); for (int d = 0; d < ali->nCodes * ali->nCodes * ali->nSites; d++) Di[d] = 0.0; /* Site negative conditional log likelihoods */ for (int s = 0; s < ali->nSeqs; s++) { /* Only ungapped sites are considered in the model */ if (seq(s, i) >= 0) { /* Compute potentials */ for (int a = 0; a < ali->nCodes; a++) H[a] = siteH(i, a); for (int j = 0; j < i; j++) for (int a = 0; a < ali->nCodes; a++) if (seq(s, j) >= 0) H[a] += siteE(j, a, seq(s, j)); for (int j = i + 1; j < ali->nSites; j++) for (int a = 0; a < ali->nCodes; a++) if (seq(s, j) >= 0) H[a] += siteE(j, a, seq(s, j)); /* Conditional distribution given sequence background */ numeric_t scale = H[0]; for (int a = 1; a < ali->nCodes; a++) scale = (scale >= H[a] ? scale : H[a]); for (int a = 0; a < ali->nCodes; a++) P[a] = exp(H[a] - scale); numeric_t Z = 0; for (int a = 0; a < ali->nCodes; a++) Z += P[a]; numeric_t Zinv = 1.0 / Z; for (int a = 0; a < ali->nCodes; a++) P[a] *= Zinv; /* Log-likelihood contributions are scaled by sequence weight */ numeric_t w = ali->weights[s]; siteFx -= w * log(P[seq(s, i)]); /* Field gradient */ siteDH(i, seq(s, i)) -= w; for (int a = 0; a < ali->nCodes; a++) siteDH(i, a) -= -w * P[a]; /* Couplings gradient */ int ix = seq(s, i); for (int j = 0; j < i; j++) if (seq(s, j) >= 0) siteDE(j, ix, seq(s, j)) -= w; for (int j = i + 1; j < ali->nSites; j++) if (seq(s, j) >= 0) siteDE(j, ix, seq(s, j)) -= w; for (int j = 0; j < i; j++) if (seq(s, j) >= 0) for (int a = 0; a < ali->nCodes; a++) siteDE(j, a, seq(s, j)) -= -w * P[a]; for (int j = i + 1; j < ali->nSites; j++) if (seq(s, j) >= 0) for (int a = 0; a < ali->nCodes; a++) siteDE(j, a, seq(s, j)) -= -w * P[a]; } } /* Contribute local loglk and gradient to global */ #pragma omp critical { fx += siteFx; for (int j = 0; j < i; j++) for (int a = 0; a < ali->nCodes; a++) for (int b = 0; b < ali->nCodes; b++) dEij(i, j, a, b) += siteDE(j, a, b); for (int j = i + 1; j < ali->nSites; j++) for (int a = 0; a < ali->nCodes; a++) for (int b = 0; b < ali->nCodes; b++) dEij(i, j, a, b) += siteDE(j, a, b); for (int a = 0; a < ali->nCodes; a++) dHi(i, a) += siteDH(i, a); free(Xi); free(Di); } free(H); free(P); } ali->negLogLk = fx; /* Gaussian priors */ for (int i = 0; i < ali->nSites; i++) for (int ai = 0; ai < ali->nCodes; ai++) { dHi(i, ai) += lambdaHi(i) * 2.0 * xHi(i, ai); fx += lambdaHi(i) * xHi(i, ai) * xHi(i, ai); } for (int i = 0; i < ali->nSites-1; i++) for (int j = i + 1; j < ali->nSites; j++) for (int ai = 0; ai < ali->nCodes; ai++) for (int aj = 0; aj < ali->nCodes; aj++) { dEij(i, j, ai, aj) += lambdaEij(i, j) * 2.0 * xEij(i, j, ai, aj); fx += lambdaEij(i, j) * xEij(i, j, ai, aj) * xEij(i, j, ai, aj); } fx = PostCondition(x, g, fx, ali, options); return fx; }