Exemple #1
0
double* calcMIp_line(char m[],int n,int l, int pos)
{
/*
This function is used to calculate the MIp matrix.
m is the fastas sequences which has been concanated to one array
n is the number of sequences and l is the length.
len(m) must eaqul to l*n.
*/
  double *mi=calcMI(m,n,l);
  int i,j,k;
  double mean[l],allmean=0;
  k=0;
  for (i=0;i<l;i++)
  {
    mean[i]=0;
    for (j=0;j<l;j++)
    {
      mean[i]+=mi[k];
      k++;
    }
    mean[i]=mean[i]/(l-1);
    allmean+=mean[i];
  }
  allmean=allmean/l;
  double *mip;
  mip=malloc( l * sizeof(double) );
  for (j=0;j<l;j++)
  {
    mip[j]=mi[pos*l+j]-((mean[pos]*mean[j])/allmean);
  }
  return mip;
}
Exemple #2
0
static PyObject *msamutinfo(PyObject *self, PyObject *args, PyObject *kwargs) {

    PyArrayObject *msa, *mutinfo;
    int ambiguity = 1, turbo = 1, debug = 0, norm = 0;

    static char *kwlist[] = {"msa", "mutinfo",
                             "ambiguity", "turbo", "norm", "debug", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|iiii", kwlist,
                                     &msa, &mutinfo,
                                     &ambiguity, &turbo, &norm, &debug))
        return NULL;

    /* make sure to have a contiguous and well-behaved array */
    msa = PyArray_GETCONTIGUOUS(msa);

    /* check dimensions */
    long number = PyArray_DIMS(msa)[0], length = PyArray_DIMS(msa)[1];

    /* get pointers to data */
    char *seq = (char *) PyArray_DATA(msa); /*size: number x length */
    double *mut = (double *) PyArray_DATA(mutinfo);


    long i, j;
    /* allocate memory */
    unsigned char *iseq = malloc(number * sizeof(unsigned char));
    if (!iseq)
        return PyErr_NoMemory();

    /* hold transpose of the sorted character array */
    unsigned char **trans = malloc(length * sizeof(unsigned char *));
    if (!trans) {
        turbo = 0;
    }

    if (turbo) {
        /* allocate rows that will store columns of MSA */
        trans[0] = iseq;
        for (i = 1; i < length; i++) {
            trans[i] = malloc(number * sizeof(unsigned char));
            if (!trans[i]) {
                for (j = 1; j < i; j++)
                    free(trans[j]);
                free(trans);
                turbo = 0;
            }
        }
    }
    unsigned char *jseq = iseq; /* so that we don't get uninitialized warning*/

    /* length*27, a row for each column in the MSA */
    double **probs = malloc(length * sizeof(double *)), *prow;
    if (!probs) {
        if (turbo)
            for (j = 1; j < length; j++)
                free(trans[j]);
        free(trans);
        free(iseq);
        return PyErr_NoMemory();
    }

    /* 27x27, alphabet characters and a gap*/
    double **joint = malloc(NUMCHARS * sizeof(double *)), *jrow;
    if (!joint) {
        if (turbo)
            for (j = 1; j < length; j++)
                free(trans[j]);
        free(trans);
        free(iseq);
        free(probs);
        return PyErr_NoMemory();
    }

    for (i = 0; i < length; i++) {
        prow = malloc(NUMCHARS * sizeof(double));
        if (!prow) {
            for (j = 0; j < i; j++)
                free(probs[j]);
            free(probs);
            free(joint);
            if (turbo)
                for (j = 1; j < length; j++)
                    free(trans[j]);
            free(trans);
            free(iseq);
            return PyErr_NoMemory();
        }
        probs[i] = prow;
        for (j = 0; j < NUMCHARS; j++)
            prow[j] = 0;
    }

    for (i = 0; i < NUMCHARS; i++)  {
        joint[i] = malloc(NUMCHARS * sizeof(double));
        if (!joint[i]) {
            for (j = 0; j < i; j++)
                free(joint[j]);
            free(joint);
            for (j = 0; j < length; j++)
                free(probs[j]);
            free(probs);
            if (turbo)
                for (j = 1; j < length; j++)
                    free(trans[j]);
            free(trans);
            free(iseq);
            return PyErr_NoMemory();
        }
    }

    if (debug)
        printProbs(probs, length);

    unsigned char a, b;
    long k, l, diff, offset;
    double p_incr = 1. / number, prb = 0;
    prow = probs[0];


    /* START mutinfo calculation */
    /* calculate first row of MI matrix and all column probabilities */
    i = 0;
    mut[0] = 0;
    for (j = 1; j < length; j++) {
        mut[j * length + j] = 0; /* using empty, so needed for diagonal */
        jrow = probs[j];
        zeroJoint(joint);
        diff = j - 1;
        if (turbo) /* in turbo mode, there is a row for refined sequences */
            jseq = trans[j];
        for (k = 0; k < number; k++) {
            offset = k * length;
            if (diff) {
                a = iseq[k];
            } else {
                a = (unsigned char) seq[offset + i];
                if (a > 90)
                    a -= 96;
                else
                    a -= 64;
                if (a < 1 || a > 26)
                    a = 0; /* gap character */
                iseq[k] = a;
                prow[a] += p_incr;
            }

            b = (unsigned char) seq[offset + j];
            if (b > 90)
                b -= 96;
            else
                b -= 64;
            if (b < 1 || b > 26)
                b = 0; /* gap character */
            if (turbo)  /* we keep the refined chars for all sequences*/
                jseq[k] = b;
            joint[a][b] += p_incr;
            jrow[b] += p_incr;
        }

        if (ambiguity) {

            if (debug)
                printProbs(probs, length);
            if (diff)
                k = j;
            else
                k = 0;
            for (; k <= j; k++) {
                prow = probs[k];
                prb = prow[2];
                if (prb > 0) { /* B -> D, N  */
                    prb = prb / 2.;
                    prow[4] += prb;
                    prow[14] += prb;
                    prow[2] = 0;
                }
                prb = prow[10];
                if (prb > 0) { /* J -> I, L  */
                    prb = prb / 2.;
                    prow[9] += prb;
                    prow[12] += prb;
                    prow[10] = 0;
                }
                prb = prow[26];
                if (prb > 0) { /* Z -> E, Q  */
                    prb = prb / 2.;
                    prow[5] += prb;
                    prow[17] += prb;
                    prow[26] = 0;
                }
                if (prow[24] > 0) { /* X -> 20 AA */
                    prb = prow[24] / 20.;
                    for (l = 0; l < 20; l++)
                        prow[twenty[l]] += prb;
                    prow[24] = 0;
                }
            }

            if (debug)
                printProbs(probs, length);
            if (debug)
                printJoint(joint, i, j);
            sortJoint(joint);
            if (debug)
                printJoint(joint, i, j);
        }
        if (norm)
            mut[j] = mut[length * j] = calcMI(joint, probs, i, j, debug) /
                                      jointEntropy(joint);
        else
            mut[j] = mut[length * j] = calcMI(joint, probs, i, j, debug);
    }
    if (debug)
        printProbs(probs, length);
    if (turbo)
        free(iseq);


    /* calculate rest of MI matrix */
    long ioffset;
    for (i = 1; i < length; i++) {
        ioffset = i * length;
        if (turbo)
            iseq = trans[i];

        for (j = i + 1; j < length; j++) {
            zeroJoint(joint);

            if (turbo) {
                jseq = trans[j];
                for (k = 0; k < number; k++)
                    joint[iseq[k]][jseq[k]] += p_incr;

            } else {
                diff = j - i - 1;
                for (k = 0; k < number; k++) {
                    offset = k * length;
                    if (diff) {
                        a = iseq[k];
                    } else {
                        a = (unsigned char) seq[offset + i];
                        if (a > 90)
                            a -= 96;
                        else
                            a -= 64;
                        if (a < 1 || a > 26)
                            a = 0; /* gap character */
                        iseq[k] = a;
                    }

                    b = (unsigned char) seq[offset + j];
                    if (b > 90)
                        b -= 96;
                    else
                        b -= 64;
                    if (b < 1 || b > 26)
                        b = 0; /* gap character */
                    joint[a][b] += p_incr;
                }
            }
            if (ambiguity)
                sortJoint(joint);
        if (norm)
            mut[ioffset + j] = mut[i + length * j] =
                calcMI(joint, probs, i, j, debug) / jointEntropy(joint);
        else
            mut[ioffset + j] = mut[i + length * j] =
                calcMI(joint, probs, i, j, debug);
        }
    }

    /* free memory */
    for (i = 0; i < length; i++){
        free(probs[i]);
    }
    free(probs);
    for (i = 0; i < NUMCHARS; i++){
        free(joint[i]);
    }
    free(joint);
    if (turbo)
        for (j = 1; j < length; j++)
            free(trans[j]);
    free(trans);

    return Py_BuildValue("O", mutinfo);
}