コード例 #1
0
ファイル: Helpers.cpp プロジェクト: northern-bites/nbites
SExpr treeFromSpot(man::vision::Spot & b, int width, int height)
{
    SExpr xLo(b.xLo() + width / 2);
    SExpr xHi(b.xHi() + width / 2);
    SExpr yLo(b.yLo() + height / 2);
    SExpr yHi(b.yHi() + height / 2);

    SExpr x(b.rawX);
    SExpr y(b.rawY);
    SExpr p = SExpr::list({x, y});
    SExpr ul = SExpr::list({xLo, yHi});
    SExpr lr = SExpr::list({xHi, yLo});

    SExpr center = SExpr::keyValue("center", p);
    SExpr topleft = SExpr::keyValue("topLeft", ul);
    SExpr lowerright = SExpr::keyValue("lowerRight", lr);
    SExpr innerdiam = SExpr::keyValue("inner", b.innerDiam);
    SExpr outerdiam = SExpr::keyValue("outer", b.outerDiam);
    SExpr spottype = SExpr::keyValue("spottype", b.spotType);
    SExpr toRet = SExpr::list({center, topleft, lowerright, innerdiam, outerdiam,
        spottype});

    return toRet;
}
コード例 #2
0
ファイル: inference.c プロジェクト: debbiemarkslab/plmc
static lbfgsfloatval_t PLMNegLogPosteriorDO(void *instance,
    const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n,
    const lbfgsfloatval_t step) {
    /* Compute the the negative log posterior, which is the negative 
       penalized log-(pseudo)likelihood and the objective for MAP inference
    */
    void **d = (void **)instance;
    alignment_t *ali = (alignment_t *) d[0];
    options_t *options = (options_t *) d[1];
    numeric_t *lambdas = (numeric_t *) d[2];

    /* Initialize log-likelihood and gradient */
    lbfgsfloatval_t fx = 0.0;
    for (int i = 0; i < ali->nParams; i++) g[i] = 0;

    numeric_t *H = (numeric_t *) malloc(ali->nCodes * sizeof(numeric_t));
    numeric_t *P = (numeric_t *) malloc(ali->nCodes * sizeof(numeric_t));
    int *drop_mask = (int *) malloc(ali->nParams * sizeof(int));
    for (int s = 0; s < ali->nSeqs; s++) {
        /* Generate random bit mask over parameters */
        for (int p = 0; p < ali->nParams; p ++)
            drop_mask[p] = (int) rand() % 2;

        /* Pseudolikelihood objective */
        for (int i = 0; i < ali->nSites; i++) {
            for (int a = 0; a < ali->nCodes; a++) H[a] = bitHi(i, a)
                                               * xHi(i, a);
            for (int a = 0; a < ali->nCodes; a++)
                for (int j = 0; j < i; j++)
                    H[a] += bitEij(i, j, a, seq(s, j))
                            * xEij(i, j, a, seq(s, j));
            for (int a = 0; a < ali->nCodes; a++)
                for (int j = i + 1; j < ali->nSites; j++)
                    H[a] += bitEij(i, j, a, seq(s, j))
                            * xEij(i, j, a, seq(s, j));

            /* Compute distribution from potential */
            for (int a = 0; a < ali->nCodes; a++) P[a] = exp(H[a]);
            numeric_t Z = 0;
            for (int a = 0; a < ali->nCodes; a++) Z += P[a];
            numeric_t Zinv = 1.0 / Z;
            for (int a = 0; a < ali->nCodes; a++) P[a] *= Zinv;

            /* Log-likelihood contributions */
            fx -= ali->weights[s] * log(P[seq(s, i)]);

            /* Field gradient */
            dHi(i, seq(s, i)) -= bitHi(i, seq(s, i)) * ali->weights[s];
            for (int a = 0; a < ali->nCodes; a++)
                dHi(i, a) -= -bitHi(i, a) * ali->weights[s] * P[a];

            /* Couplings gradient */
            for (int j = 0; j < i; j++)
                dEij(i, j, seq(s, i), seq(s, j)) -=
                    bitEij(i, j, seq(s, i), seq(s, j)) * ali->weights[s];
            for (int j = i + 1; j < ali->nSites; j++)
                dEij(i, j, seq(s, i), seq(s, j)) -=
                    bitEij(i, j, seq(s, i), seq(s, j)) * ali->weights[s];

            for (int j = 0; j < i; j++)
                for (int a = 0; a < ali->nCodes; a++)
                    dEij(i, j, a, seq(s, j)) -=
                        -bitEij(i, j, a, seq(s, j)) * ali->weights[s] * P[a];
            for (int j = i + 1; j < ali->nSites; j++)
                for (int a = 0; a < ali->nCodes; a++)
                    dEij(i, j, a, seq(s, j)) -=
                        -bitEij(i, j, a, seq(s, j)) * ali->weights[s] * P[a];
        }
    }
    free(H);
    free(P);
    free(drop_mask);

    ali->negLogLk = fx;

    /* Gaussian priors */
    for (int i = 0; i < ali->nSites; i++)
        for (int ai = 0; ai < ali->nCodes; ai++) {
            dHi(i, ai) += lambdaHi(i) * 2.0 * xHi(i, ai);
            fx += lambdaHi(i) * xHi(i, ai) * xHi(i, ai);
        }

    for (int i = 0; i < ali->nSites-1; i++)
        for (int j = i + 1; j < ali->nSites; j++)
            for (int ai = 0; ai < ali->nCodes; ai++)
                for (int aj = 0; aj < ali->nCodes; aj++) {
                    dEij(i, j, ai, aj) += lambdaEij(i, j)
                        * 2.0 * xEij(i, j, ai, aj);
                    fx += lambdaEij(i, j)
                        * xEij(i, j, ai, aj) * xEij(i, j, ai, aj);
                }

    fx = PostCondition(x, g, fx, ali, options);
    return fx;
}
コード例 #3
0
ファイル: inference.c プロジェクト: debbiemarkslab/plmc
numeric_t *InferPairModel(alignment_t *ali, options_t *options) {
    /* Estimate the parameters of a maximum entropy model for a
       multiple sequence alignment */

    /* Initialize the regularization parameters */
    numeric_t *lambdas =
    (numeric_t *) malloc((ali->nSites + ali->nSites * (ali->nSites - 1) / 2)
            * sizeof(numeric_t));
    for (int i = 0; i < ali->nSites; i++) lambdaHi(i) = options->lambdaH;
    for (int i = 0; i < ali->nSites - 1; i++)
        for (int j = i + 1; j < ali->nSites; j++)
            lambdaEij(i, j) = options->lambdaE;

    /* For gap-reduced problems, eliminate the gaps and reduce the alphabet */
    if (options->estimatorMAP == INFER_MAP_PLM_GAPREDUCE) {
        ali->nCodes = strlen(ali->alphabet) - 1;
        for (int i = 0; i < ali->nSites; i++)
            for (int s = 0; s < ali->nSeqs; s++)
                seq(s, i) -= 1;
    }

    /* Initialize parameters */
    ali->nParams = ali->nSites * ali->nCodes
        + ali->nSites * (ali->nSites - 1) / 2 * ali->nCodes * ali->nCodes;
    numeric_t *x = (numeric_t *) malloc(sizeof(numeric_t) * ali->nParams);
    if (x == NULL) {
        fprintf(stderr,
            "ERROR: Failed to allocate a memory block for variables.\n");
        exit(1);
    }
    for (int i = 0; i < ali->nParams; i++) x[i] = 0.0;

    /* Initialize site parameters with the ML estimates 
        hi = log(fi) + C
        A single pseudocount is added for stability 
       (Laplace's rule or Morcos et al. with lambda = nCodes) */
    if (options->zeroAPC != 1) {
        numeric_t pseudoC = (numeric_t) ali->nCodes;
        numeric_t Zinv = 1.0 / (ali->nEff + pseudoC);
        for (int i = 0; i < ali->nSites; i++)
            for (int ai = 0; ai < ali->nSites; ai++)
                xHi(i, ai) = Zinv * pseudoC / (numeric_t) ali->nCodes;
        for (int s = 0; s < ali->nSeqs; s++)
            for (int i = 0; i < ali->nSites; i++)
                xHi(i, seq(s, i)) += ali->weights[s] * Zinv;
        for (int i = 0; i < ali->nSites; i++)
            for (int ai = 0; ai < ali->nCodes; ai++)
                xHi(i, ai) = log(xHi(i, ai));
        /* Zero-sum gauge */
        for (int i = 0; i < ali->nSites; i++) {
            numeric_t hSum = 0.0;
            for (int ai = 0; ai < ali->nCodes; ai++) hSum += xHi(i, ai);
            numeric_t hShift = hSum / (numeric_t) ali->nCodes;
            for (int ai = 0; ai < ali->nCodes; ai++)
                xHi(i, ai) -= hShift;
        }
    }

    switch(options->estimator) {
        /* Point estimates */
        case INFER_MAP:
            /* Maximum a posteriori estimates of model parameters */
            EstimatePairModelMAP(x, lambdas, ali, options);
            break;
        /* For: future alternative estimators */
        default:
            /* Maximum a posteriori estimates of model parameters */
            EstimatePairModelMAP(x, lambdas, ali, options);
    }

    /* Restore the alignment encoding after inference */
    if (options->estimatorMAP == INFER_MAP_PLM_GAPREDUCE) {
        for (int i = 0; i < ali->nSites; i++)
            for (int s = 0; s < ali->nSeqs; s++)
                seq(s, i) += 1;
    }

    return (numeric_t *) x;
}
コード例 #4
0
ファイル: inference.c プロジェクト: debbiemarkslab/plmc
static lbfgsfloatval_t PLMNegLogPosteriorBlock(void *instance,
    const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n,
    const lbfgsfloatval_t step) {
    /* Compute the the negative log posterior, which is the negative 
       penalized log-(pseudo)likelihood and the objective for MAP inference
    */
    void **d = (void **)instance;
    alignment_t *ali = (alignment_t *) d[0];
    options_t *options = (options_t *) d[1];
    numeric_t *lambdas = (numeric_t *) d[2];

    /* Initialize log-likelihood and gradient */
    lbfgsfloatval_t fx = 0.0;
    for (int i = 0; i < ali->nParams; i++) g[i] = 0;

    /* Block fields hi */
    numeric_t *hi = (numeric_t *)
        malloc(ali->nSites * ali->nCodes * sizeof(numeric_t));
    numeric_t *gHi = (numeric_t *)
        malloc(ali->nSites * ali->nCodes * sizeof(numeric_t));
    for (int i = 0; i < ali->nSites; i++)
        for (int ai = 0; ai < ali->nCodes; ai++) Hi(i, ai) = xHi(i, ai);
    for (int i = 0; i < ali->nSites * ali->nCodes; i++) gHi[i] = 0;

    /* Block couplings eij */
    numeric_t *eij = (numeric_t *) malloc(ali->nSites * ali->nSites
        * ali->nCodes * ali->nCodes * sizeof(numeric_t));
    numeric_t *gEij = (numeric_t *) malloc(ali->nSites * ali->nSites
        * ali->nCodes * ali->nCodes * sizeof(numeric_t));
    for (int i = 0; i < ali->nSites * ali->nSites * ali->nCodes * ali->nCodes;
        i++) eij[i] = 0.0;
    for (int i = 0; i < ali->nSites * ali->nSites * ali->nCodes * ali->nCodes;
        i++) gEij[i] = 0.0;
    for (int i = 0; i < ali->nSites - 1; i++)
        for (int j = i + 1; j < ali->nSites; j++)
            for (int ai = 0; ai < ali->nCodes; ai++)
                for (int aj = 0; aj < ali->nCodes; aj++)
                    Eij(j, aj, i, ai) = Eij(i, ai, j, aj) = xEij(i, j, ai, aj);


    /* Negative log-pseudolikelihood */
    for (int s = 0; s < ali->nSeqs; s++) {
        /* Form potential for conditional log likelihoods at every site */
        numeric_t *H = (numeric_t *)
            malloc(ali->nCodes * ali->nSites * sizeof(numeric_t));
        numeric_t *Z = (numeric_t *) malloc(ali->nSites * sizeof(numeric_t));

        /* Initialize potentials with fields */
        // memcpy(H, hi, ali->nSites * ali->nCodes * sizeof(numeric_t));
        for(int jx = 0; jx < ali->nSites * ali->nCodes; jx++) H[jx] = hi[jx];

        /* Contribute coupling block due to i, ai */
        for (int i = 0; i < ali->nSites; i++) {
            const letter_t ai = seq(s, i);
            const numeric_t *jB = &(Eij(i, ai, 0, 0));
            for(int jx = 0; jx < ali->nSites * ali->nCodes; jx++)
                H[jx] += jB[jx];
        }

        /* Conditional log likelihoods */
        for (int i = 0; i < ali->nSites * ali->nCodes; i++) H[i] = exp(H[i]);
        for (int i = 0; i < ali->nSites; i++) Z[i] = 0;
        for (int i = 0; i < ali->nSites; i++)
            for (int ai = 0; ai < ali->nSites; ai++) Z[i] += Hp(i, ai);
        for (int i = 0; i < ali->nSites; i++)
            for (int ai = 0; ai < ali->nSites; ai++) Hp(i, ai) /= Z[i];

        numeric_t seqFx = 0;
        for (int i = 0; i < ali->nSites; i++)
            seqFx -= ali->weights[s] * log(Hp(i, seq(s, i)));

        for(int jx = 0; jx < ali->nSites * ali->nCodes; jx++)
            H[jx] *= -ali->weights[s];

        for (int i = 0; i < ali->nSites; i++)
            gHi(i, seq(s, i)) -= ali->weights[s];
        for(int jx = 0; jx < ali->nSites * ali->nCodes; jx++) gHi[jx] -= H[jx];

        for (int i = 0; i < ali->nSites - 1; i++)
            for (int j = i; j < ali->nSites; j++)
                gEij(i, seq(s, i), j, seq(s, j)) -= ali->weights[s];

        for (int i = 0; i < ali->nSites; i++) {
            const letter_t ai = seq(s, i);
            numeric_t *jgBlock = &(gEij(i, ai, 0, 0));
            for (int jx = 0; jx < ali->nSites * ali->nCodes; jx++)
                jgBlock[jx] -= H[jx];
        }

        free(H);
        free(Z);
        fx += seqFx;
    }

    for (int i = 0; i < ali->nSites; i++)
        for (int ai = 0; ai < ali->nCodes; ai++)
            dHi(i, ai) += gHi(i, ai);

    for (int i = 0; i < ali->nSites - 1; i++)
        for (int j = i + 1; j < ali->nSites; j++)
            for (int ai = 0; ai < ali->nCodes; ai++)
                for (int aj = 0; aj < ali->nCodes; aj++)
                    dEij(i, j, ai, aj) += gEij(j, aj, i, ai) + gEij(i, ai, j, aj);
    free(hi);
    free(gHi);
    free(eij);
    free(gEij);

    ali->negLogLk = fx;

    /* Gaussian priors */
    for (int i = 0; i < ali->nSites; i++)
        for (int ai = 0; ai < ali->nCodes; ai++) {
            dHi(i, ai) += lambdaHi(i) * 2.0 * xHi(i, ai);
            fx += lambdaHi(i) * xHi(i, ai) * xHi(i, ai);
        }

    for (int i = 0; i < ali->nSites-1; i++)
        for (int j = i + 1; j < ali->nSites; j++)
            for (int ai = 0; ai < ali->nCodes; ai++)
                for (int aj = 0; aj < ali->nCodes; aj++) {
                    dEij(i, j, ai, aj) += lambdaEij(i, j)
                        * 2.0 * xEij(i, j, ai, aj);
                    fx += lambdaEij(i, j)
                        * xEij(i, j, ai, aj) * xEij(i, j, ai, aj);
                }

    fx = PostCondition(x, g, fx, ali, options);
    return fx;
}
コード例 #5
0
ファイル: inference.c プロジェクト: debbiemarkslab/plmc
static lbfgsfloatval_t PLMNegLogPosteriorGapReduce(void *instance,
    const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n,
    const lbfgsfloatval_t step) {
    /* Compute the the negative log posterior, which is the negative 
       penalized log-(pseudo)likelihood and the objective for MAP inference
    */
    void **d = (void **)instance;
    alignment_t *ali = (alignment_t *) d[0];
    options_t *options = (options_t *) d[1];
    numeric_t *lambdas = (numeric_t *) d[2];

    /* Initialize log-likelihood and gradient */
    lbfgsfloatval_t fx = 0.0;
    for (int i = 0; i < ali->nParams; i++) g[i] = 0;

    /* Negative log-pseudolikelihood */
    #pragma omp parallel for
    for (int i = 0; i < ali->nSites; i++) {
        numeric_t *H = (numeric_t *) malloc(ali->nCodes * sizeof(numeric_t));
        numeric_t *P = (numeric_t *) malloc(ali->nCodes * sizeof(numeric_t));

        numeric_t siteFx = 0.0;
        /* Reshape site parameters and gradient into local blocks */
        numeric_t *Xi = (numeric_t *) malloc(ali->nCodes * ali->nCodes
            * ali->nSites * sizeof(numeric_t));
        for (int j = 0; j < i; j++)
            for (int a = 0; a < ali->nCodes; a++)
                for (int b = 0; b < ali->nCodes; b++)
                    siteE(j, a, b) = xEij(i, j, a, b);
        for (int j = i + 1; j < ali->nSites; j++)
            for (int a = 0; a < ali->nCodes; a++)
                for (int b = 0; b < ali->nCodes; b++)
                    siteE(j, a, b) = xEij(i, j, a, b);
        for (int a = 0; a < ali->nCodes; a++) siteH(i, a) = xHi(i, a);

        numeric_t *Di = (numeric_t *) malloc(ali->nCodes * ali->nCodes
        * ali->nSites * sizeof(numeric_t));
        for (int d = 0; d < ali->nCodes * ali->nCodes * ali->nSites; d++)
            Di[d] = 0.0;

        /* Site negative conditional log likelihoods */
        for (int s = 0; s < ali->nSeqs; s++) {
            /* Only ungapped sites are considered in the model */
            if (seq(s, i) >= 0) {
                /* Compute potentials */
                for (int a = 0; a < ali->nCodes; a++) H[a] = siteH(i, a);
                for (int j = 0; j < i; j++)
                    for (int a = 0; a < ali->nCodes; a++)
                        if (seq(s, j) >= 0)
                            H[a] += siteE(j, a, seq(s, j));
                for (int j = i + 1; j < ali->nSites; j++)
                    for (int a = 0; a < ali->nCodes; a++)
                        if (seq(s, j) >= 0)
                            H[a] += siteE(j, a, seq(s, j));

                /* Conditional distribution given sequence background */
                numeric_t scale = H[0];
                for (int a = 1; a < ali->nCodes; a++)
                    scale = (scale >= H[a] ? scale : H[a]);
                for (int a = 0; a < ali->nCodes; a++) P[a] = exp(H[a] - scale);
                numeric_t Z = 0;
                for (int a = 0; a < ali->nCodes; a++) Z += P[a];
                numeric_t Zinv = 1.0 / Z;
                for (int a = 0; a < ali->nCodes; a++) P[a] *= Zinv;


                /* Log-likelihood contributions are scaled by sequence weight */
                numeric_t w = ali->weights[s];  
                siteFx -= w * log(P[seq(s, i)]);

                /* Field gradient */
                siteDH(i, seq(s, i)) -= w;
                for (int a = 0; a < ali->nCodes; a++)
                    siteDH(i, a) -= -w * P[a];

                /* Couplings gradient */
                int ix = seq(s, i);
                for (int j = 0; j < i; j++)
                    if (seq(s, j) >= 0)
                        siteDE(j, ix, seq(s, j)) -= w;
                for (int j = i + 1; j < ali->nSites; j++)
                    if (seq(s, j) >= 0)
                        siteDE(j, ix, seq(s, j)) -= w;
                for (int j = 0; j < i; j++)
                    if (seq(s, j) >= 0)
                        for (int a = 0; a < ali->nCodes; a++)
                            siteDE(j, a, seq(s, j)) -= -w * P[a];
                for (int j = i + 1; j < ali->nSites; j++)
                    if (seq(s, j) >= 0)
                        for (int a = 0; a < ali->nCodes; a++)
                            siteDE(j, a, seq(s, j)) -= -w * P[a];
            }
        }

        /* Contribute local loglk and gradient to global */
        #pragma omp critical
        {
        fx += siteFx;
        for (int j = 0; j < i; j++)
            for (int a = 0; a < ali->nCodes; a++)
                for (int b = 0; b < ali->nCodes; b++)
                    dEij(i, j, a, b) += siteDE(j, a, b);
        for (int j = i + 1; j < ali->nSites; j++)
            for (int a = 0; a < ali->nCodes; a++)
                for (int b = 0; b < ali->nCodes; b++)
                    dEij(i, j, a, b) += siteDE(j, a, b);
        for (int a = 0; a < ali->nCodes; a++) dHi(i, a) += siteDH(i, a);
        free(Xi);
        free(Di);
        }

        free(H);
        free(P);
    }

    ali->negLogLk = fx;

    /* Gaussian priors */
    for (int i = 0; i < ali->nSites; i++)
        for (int ai = 0; ai < ali->nCodes; ai++) {
            dHi(i, ai) += lambdaHi(i) * 2.0 * xHi(i, ai);
            fx += lambdaHi(i) * xHi(i, ai) * xHi(i, ai);
        }

    for (int i = 0; i < ali->nSites-1; i++)
        for (int j = i + 1; j < ali->nSites; j++)
            for (int ai = 0; ai < ali->nCodes; ai++)
                for (int aj = 0; aj < ali->nCodes; aj++) {
                    dEij(i, j, ai, aj) += lambdaEij(i, j)
                        * 2.0 * xEij(i, j, ai, aj);
                    fx += lambdaEij(i, j)
                        * xEij(i, j, ai, aj) * xEij(i, j, ai, aj);
                }

    fx = PostCondition(x, g, fx, ali, options);
    return fx;
}