unsigned int numpyToC(double **data, PyObject *array) { PyArrayObject *arrayObj = (PyArrayObject *)PyArray_GETCONTIGUOUS((PyArrayObject *)array); unsigned int frames = PyArray_SIZE(arrayObj); *data = (double *)malloc(frames * sizeof(double)); memcpy(*data, PyArray_BYTES(arrayObj), frames * sizeof(double)); return frames; }
static PyObject* gse_checksum(PyObject *dummy, PyObject *args) { int checksum, length, i; PyObject *array = NULL; PyArrayObject *carray = NULL; int *data; if (!PyArg_ParseTuple(args, "O", &array )) { PyErr_SetString(GSEError, "usage checksum(array)" ); return NULL; } if (!PyArray_Check(array)) { PyErr_SetString(GSEError, "Data must be given as NumPy array." ); return NULL; } if (PyArray_TYPE(array) != NPY_INT32) { PyErr_SetString(GSEError, "Data must be 32-bit integers."); return NULL; } carray = PyArray_GETCONTIGUOUS((PyArrayObject*)array); length = PyArray_SIZE(carray); data = (int*)PyArray_DATA(carray); checksum = 0; for (i=0; i<length; i++) { checksum += data[i] % MODULUS; checksum %= MODULUS; } return Py_BuildValue("i", abs(checksum)); }
static PyObject *readData(PyObject *self, PyObject *args, PyObject *kwargs) { char *filename=NULL; PyArrayObject *data; FILE *m_fp=NULL; gzFile gzfp=NULL; int nsymbt=0, datamode=-1,size=0,bytesize=4,compress=0; unsigned char *matrix; static char *kwlist[] = {"filename", "nsymbt", "datamode", "data", "size", "compress", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "siiOii", kwlist, &filename, &nsymbt, &datamode, &data, &size, &compress)) return Py_BuildValue("Os", Py_None,"Couldn't parse variable from C function."); data = PyArray_GETCONTIGUOUS(data); matrix = (unsigned char *) PyArray_DATA(data); if (compress){ gzfp = gzopen(filename,"rb"); if(gzfp==NULL) return Py_BuildValue("Os", Py_None,"Couldn't read file."); if (gzseek(gzfp,(1024+nsymbt),SEEK_SET)==-1) return Py_BuildValue("Os", Py_None,"File header is not complete."); } else{ m_fp=fopen(filename,"rb"); if(m_fp==NULL) return Py_BuildValue("Os", Py_None,"Couldn't read file."); if(fseek(m_fp, (size_t)(1024+nsymbt), SEEK_SET)!=0) return Py_BuildValue("Os", Py_None,"Matrix data couldn't be located."); } switch(datamode) { case 0: bytesize=1;break; case 1: bytesize=2;break; case 2: bytesize=4;break; case 5: bytesize=1;break; case 6: bytesize=2;break; } if (compress){ if(gzread(gzfp,matrix, size*bytesize)!=size*bytesize) return Py_BuildValue("Os", Py_None,"Parsing data Error."); gzclose(gzfp); } else{ if(fread(matrix, bytesize, size, m_fp)!=size) return Py_BuildValue("Os", Py_None,"Parsing data Error."); fclose(m_fp); } Py_XDECREF(data); return Py_BuildValue("O", data); }
static inline PyArrayObject *getContiguous(PyArrayObject *array, int typenum) { // gets the pointer to the block of contiguous C memory // the overhead should be small unless the numpy array has been // reordered in some way or the data type doesn't quite match // // the "new_owner" pointer has to have Py_DECREF called on it; it owns // the "new" array object created by PyArray_Cast // static PyArrayObject *tmp_arr; PyArrayObject *new_owner; tmp_arr = PyArray_GETCONTIGUOUS(array); new_owner = (PyArrayObject *) PyArray_Cast(tmp_arr, typenum); Py_DECREF(tmp_arr); return new_owner; }
static PyObject *cs_gamma_finddRdz(PyObject *self, PyObject *args) { PyArrayObject *Numpy_zofA; PyObject *Numpy_dRdz; double Gmu, alpha, f, Gamma, *zofA, *dRdz; long int Namp; cs_cosmo_functions_t cosmofns; int j; (void)self; /* silence unused parameter warning */ if (!PyArg_ParseTuple(args, "ddddO!", &Gmu, &alpha, &f, &Gamma, &PyArray_Type, &Numpy_zofA)) return NULL; Numpy_zofA = PyArray_GETCONTIGUOUS(Numpy_zofA); if(!Numpy_zofA) return NULL; Namp = PyArray_DIM(Numpy_zofA, 0); zofA = PyArray_DATA(Numpy_zofA); { npy_intp dims[1] = {Namp}; Numpy_dRdz = PyArray_SimpleNew(1, dims, NPY_DOUBLE); } dRdz = PyArray_DATA((PyArrayObject *) Numpy_dRdz); cosmofns = XLALCSCosmoFunctions( zofA, Namp); for ( j = 0; j < Namp; j++ ) { /*double theta = pow((1+cosmofns.z[j]) * f * alpha * cosmofns.phit[j] / H0, -1.0/3.0); if (theta > 1.0) dRdz[j] = 0.0; else*/ dRdz[j] = 0.5 * H0 * pow(f/H0,-2.0/3.0) * pow(alpha, -5.0/3.0) / (Gamma*Gmu) * pow(cosmofns.phit[j],-14.0/3.0) * cosmofns.phiV[j] * pow(1+cosmofns.z[j],-5.0/3.0); if(gsl_isnan(dRdz[j])) { Py_DECREF(Numpy_dRdz); Numpy_dRdz = NULL; break; } } XLALCSCosmoFunctionsFree( cosmofns ); Py_DECREF(Numpy_zofA); return Numpy_dRdz; }
static PyObject *msadipretest(PyObject *self, PyObject *args, PyObject *kwargs) { PyArrayObject *msa; int refine = 0; int alignlist[26] = {1, 0, 2, 3, 4, 5, 6, 7, 8, 0, 9, 10, 11, 12, 0, 13, 14, 15, 16, 17, 0, 18, 19, 0, 20, 0}; static char *kwlist[] = {"msa", "refine", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi", kwlist, &msa, &refine)) return NULL; msa = PyArray_GETCONTIGUOUS(msa); long number = PyArray_DIMS(msa)[0], length = PyArray_DIMS(msa)[1]; char *seq = (char *) PyArray_DATA(msa); long i, j, k = 0, l = 0; int *ind = malloc(length * sizeof(int)); if (!ind) return PyErr_NoMemory(); if (!refine){ for (i = 0; i < length; i++) ind[i] = i + 1; l = length; } else for (i = 0; i < length; i++) if (seq[i] <= 90 && seq[i] >= 65){ l += 1; ind[i] = l; } else ind[i] = 0; for (i = 0; i < number; i++) for (j = 0; j < length; j++) if (ind[j]) if (seq[i*length+j] >= 65 && seq[i*length+j] <= 90) k = alignlist[seq[i*length+j]-65]>k? alignlist[seq[i*length+j]-65]:k; free(ind); return Py_BuildValue("ii",l,k); }
static PyObject *msaomes(PyObject *self, PyObject *args, PyObject *kwargs) { PyArrayObject *msa, *omes; int ambiguity = 1, turbo = 1, debug = 0; static char *kwlist[] = {"msa", "omes", "ambiguity", "turbo", "debug", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|iii", kwlist, &msa, &omes, &ambiguity, &turbo, &debug)) return NULL; /* make sure to have a contiguous and well-behaved array */ msa = PyArray_GETCONTIGUOUS(msa); /* check dimensions */ long number = PyArray_DIMS(msa)[0], length = PyArray_DIMS(msa)[1]; /* get pointers to data */ char *seq = (char *) PyArray_DATA(msa); /*size: number x length */ double *data = (double *) PyArray_DATA(omes); long i, j; /* allocate memory */ unsigned char *iseq = malloc(number * sizeof(unsigned char)); if (!iseq) return PyErr_NoMemory(); /* hold transpose of the sorted character array */ unsigned char **trans = malloc(length * sizeof(unsigned char *)); if (!trans) { turbo = 0; } if (turbo) { /* allocate rows that will store columns of MSA */ trans[0] = iseq; for (i = 1; i < length; i++) { trans[i] = malloc(number * sizeof(unsigned char)); if (!trans[i]) { for (j = 1; j < i; j++) free(trans[j]); free(trans); turbo = 0; } } } unsigned char *jseq = iseq; /* so that we don't get uninitialized warning*/ /* length*27, a row for each column in the MSA */ double **probs = malloc(length * sizeof(double *)), *prow; if (!probs) { if (turbo) for (j = 1; j < length; j++) free(trans[j]); free(trans); free(iseq); return PyErr_NoMemory(); } /* 27x27, alphabet characters and a gap*/ double **joint = malloc(NUMCHARS * sizeof(double *)), *jrow; if (!joint) { if (turbo) for (j = 1; j < length; j++) free(trans[j]); free(trans); free(iseq); free(probs); return PyErr_NoMemory(); } for (i = 0; i < length; i++) { prow = malloc(NUMCHARS * sizeof(double)); if (!prow) { for (j = 0; j < i; j++) free(probs[j]); free(probs); free(joint); if (turbo) for (j = 1; j < length; j++) free(trans[j]); free(trans); free(iseq); return PyErr_NoMemory(); } probs[i] = prow; for (j = 0; j < NUMCHARS; j++) prow[j] = 0; } for (i = 0; i < NUMCHARS; i++) { joint[i] = malloc(NUMCHARS * sizeof(double)); if (!joint[i]) { for (j = 0; j < i; j++) free(joint[j]); free(joint); for (j = 0; j < length; j++) free(probs[j]); free(probs); if (turbo) for (j = 1; j < length; j++) free(trans[j]); free(trans); free(iseq); return PyErr_NoMemory(); } } if (debug) printProbs(probs, length); unsigned char a, b; long k, l, diff, offset; double p_incr = 1. / number; double prb = 0; prow = probs[0]; /* START OMES calculation */ /* calculate first row of OMES matrix and all column probabilities */ i = 0; data[0] = 0; for (j = 1; j < length; j++) { data[j * length + j] = 0; /* using empty, so needed for diagonal */ jrow = probs[j]; zeroJoint(joint); diff = j - 1; if (turbo) /* in turbo mode, there is a row for refined sequences */ jseq = trans[j]; for (k = 0; k < number; k++) { offset = k * length; if (diff) { a = iseq[k]; } else { a = (unsigned char) seq[offset + i]; if (a > 90) a -= 96; else a -= 64; if (a < 1 || a > 26) a = 0; /* gap character */ iseq[k] = a; prow[a] += p_incr; } b = (unsigned char) seq[offset + j]; if (b > 90) b -= 96; else b -= 64; if (b < 1 || b > 26) b = 0; /* gap character */ if (turbo) /* we keep the refined chars for all sequences*/ jseq[k] = b; joint[a][b] += p_incr; jrow[b] += p_incr; } if (ambiguity) { if (debug) printProbs(probs, length); if (diff) k = j; else k = 0; for (; k <= j; k++) { prow = probs[k]; prb = prow[2]; if (prb > 0) { /* B -> D, N */ prb = prb / 2.; prow[4] += prb; prow[14] += prb; prow[2] = 0; } prb = prow[10]; if (prb > 0) { /* J -> I, L */ prb = prb / 2.; prow[9] += prb; prow[12] += prb; prow[10] = 0; } prb = prow[26]; if (prb > 0) { /* Z -> E, Q */ prb = prb / 2.; prow[5] += prb; prow[17] += prb; prow[26] = 0; } if (prow[24] > 0) { /* X -> 20 AA */ prb = prow[24] / 20.; for (l = 0; l < 20; l++) prow[twenty[l]] += prb; prow[24] = 0; } } if (debug) printProbs(probs, length); if (debug) printJoint(joint, i, j); sortJoint(joint); if (debug) printJoint(joint, i, j); } data[j] = data[length * j] = calcOMES(joint, probs, i, j, number); } if (debug) printProbs(probs, length); if (turbo) free(iseq); /* calculate rest of OMES matrix */ long ioffset; for (i = 1; i < length; i++) { ioffset = i * length; if (turbo) iseq = trans[i]; for (j = i + 1; j < length; j++) { zeroJoint(joint); if (turbo) { jseq = trans[j]; for (k = 0; k < number; k++) joint[iseq[k]][jseq[k]] += p_incr; } else { diff = j - i - 1; for (k = 0; k < number; k++) { offset = k * length; if (diff) { a = iseq[k]; } else { a = (unsigned char) seq[offset + i]; if (a > 90) a -= 96; else a -= 64; if (a < 1 || a > 26) a = 0; /* gap character */ iseq[k] = a; } b = (unsigned char) seq[offset + j]; if (b > 90) b -= 96; else b -= 64; if (b < 1 || b > 26) b = 0; /* gap character */ joint[a][b] += p_incr; } } if (ambiguity) sortJoint(joint); data[ioffset + j] = data[i + length * j] = calcOMES(joint, probs, i, j, number); } } /* free memory */ for (i = 0; i < length; i++){ free(probs[i]); } free(probs); for (i = 0; i < NUMCHARS; i++){ free(joint[i]); } free(joint); if (turbo) for (j = 1; j < length; j++) free(trans[j]); free(trans); return Py_BuildValue("O", omes); }
static PyObject *cs_gamma_findzofA(PyObject *self, PyObject *args) { PyArrayObject *Numpy_amp; PyObject *Numpy_zofA; double Gmu, alpha, *zofA, *amp; unsigned long int Namp; (void)self; /* silence unused parameter warning */ double z_min = 1e-20, z_max = 1e10; double dlnz = 0.05; unsigned numz = floor( (log(z_max) - log(z_min)) / dlnz ); unsigned long int i; cs_cosmo_functions_t cosmofns; double *fz,*z; double a; gsl_interp *zofa_interp; gsl_interp_accel *acc_zofa = gsl_interp_accel_alloc(); if (!PyArg_ParseTuple(args, "ddO!", &Gmu, &alpha, &PyArray_Type, &Numpy_amp)) return NULL; Numpy_amp = PyArray_GETCONTIGUOUS(Numpy_amp); if(!Numpy_amp) return NULL; Namp = PyArray_DIM(Numpy_amp, 0); amp = PyArray_DATA(Numpy_amp); { npy_intp dims[1] = {Namp}; Numpy_zofA = PyArray_SimpleNew(1, dims, NPY_DOUBLE); } zofA = PyArray_DATA((PyArrayObject *) Numpy_zofA); cosmofns = XLALCSCosmoFunctionsAlloc( z_min, dlnz, numz ); zofa_interp = gsl_interp_alloc (gsl_interp_linear, cosmofns.n); fz = calloc( cosmofns.n, sizeof( *fz ) ); z = calloc( cosmofns.n, sizeof( *z ) ); /* first compute the function that relates A and z */ /* invert order; b/c fz is a monotonically decreasing func of z */ for ( i = cosmofns.n ; i > 0; i-- ) { unsigned long int j = cosmofns.n - i; z[j] = cosmofns.z[i-1]; fz[j] = pow(cosmofns.phit[i-1], 2.0/3.0) * pow(1+z[j], -1.0/3.0) / cosmofns.phiA[i-1]; } gsl_interp_init (zofa_interp, fz, z, cosmofns.n); /* now compute the amplitudes (suitably multiplied) that are equal to fz for some z*/ for ( i = 0; i < Namp; i++ ) { a = amp[i] * pow(H0,-1.0/3.0) * pow(alpha,-2.0/3.0) / Gmu; /* evaluate z(fz) at fz=a */ zofA[i] = gsl_interp_eval (zofa_interp, fz, z, a, acc_zofa ); if(gsl_isnan(zofA[i])) { Py_DECREF(Numpy_zofA); Numpy_zofA = NULL; break; } } XLALCSCosmoFunctionsFree( cosmofns ); Py_DECREF(Numpy_amp); free(fz); free(z); gsl_interp_free (zofa_interp); gsl_interp_accel_free(acc_zofa); return Numpy_zofA; }
/// @brief Construct a Mat from an NDArray object. static void construct(PyObject* object, boost::python::converter::rvalue_from_python_stage1_data* data) { namespace python = boost::python; // Object is a borrowed reference, so create a handle indicting it is // borrowed for proper reference counting. python::handle<> handle(python::borrowed(object)); // Obtain a handle to the memory block that the converter has allocated // for the C++ type. typedef python::converter::rvalue_from_python_storage<Mat> storage_type; void* storage = reinterpret_cast<storage_type*>(data)->storage.bytes; // Allocate the C++ type into the converter's memory block, and assign // its handle to the converter's convertible variable. The C++ // container is populated by passing the begin and end iterators of // the python object to the container's constructor. PyArrayObject* oarr = (PyArrayObject*) object; bool needcopy = false, needcast = false; int typenum = PyArray_TYPE(oarr), new_typenum = typenum; int type = typenum == NPY_UBYTE ? CV_8U : typenum == NPY_BYTE ? CV_8S : typenum == NPY_USHORT ? CV_16U : typenum == NPY_SHORT ? CV_16S : typenum == NPY_INT ? CV_32S : typenum == NPY_INT32 ? CV_32S : typenum == NPY_FLOAT ? CV_32F : typenum == NPY_DOUBLE ? CV_64F : -1; if (type < 0) { needcopy = needcast = true; new_typenum = NPY_INT; type = CV_32S; } #ifndef CV_MAX_DIM const int CV_MAX_DIM = 32; #endif int ndims = PyArray_NDIM(oarr); int size[CV_MAX_DIM + 1]; size_t step[CV_MAX_DIM + 1]; size_t elemsize = CV_ELEM_SIZE1(type); const npy_intp* _sizes = PyArray_DIMS(oarr); const npy_intp* _strides = PyArray_STRIDES(oarr); bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX; for (int i = ndims - 1; i >= 0 && !needcopy; i--) { // these checks handle cases of // a) multi-dimensional (ndims > 2) arrays, as well as simpler 1- and 2-dimensional cases // b) transposed arrays, where _strides[] elements go in non-descending order // c) flipped arrays, where some of _strides[] elements are negative if ((i == ndims - 1 && (size_t) _strides[i] != elemsize) || (i < ndims - 1 && _strides[i] < _strides[i + 1])) needcopy = true; } if (ismultichannel && _strides[1] != (npy_intp) elemsize * _sizes[2]) needcopy = true; if (needcopy) { if (needcast) { object = PyArray_Cast(oarr, new_typenum); oarr = (PyArrayObject*) object; } else { oarr = PyArray_GETCONTIGUOUS(oarr); object = (PyObject*) oarr; } _strides = PyArray_STRIDES(oarr); } for (int i = 0; i < ndims; i++) { size[i] = (int) _sizes[i]; step[i] = (size_t) _strides[i]; } // handle degenerate case if (ndims == 0) { size[ndims] = 1; step[ndims] = elemsize; ndims++; } if (ismultichannel) { ndims--; type |= CV_MAKETYPE(0, size[2]); } if (!needcopy) { Py_INCREF(object); } cv::Mat* m = new (storage) cv::Mat(ndims, size, type, PyArray_DATA(oarr), step); m->u = g_numpyAllocator.allocate(object, ndims, size, type, step); m->allocator = &g_numpyAllocator; m->addref(); data->convertible = storage; }
Mat fromNDArrayToMat(PyObject* o) { cv::Mat m; bool allowND = true; if (!PyArray_Check(o)) { failmsg("argument is not a numpy array"); if (!m.data) m.allocator = &g_numpyAllocator; } else { PyArrayObject* oarr = (PyArrayObject*) o; bool needcopy = false, needcast = false; int typenum = PyArray_TYPE(oarr), new_typenum = typenum; int type = typenum == NPY_UBYTE ? CV_8U : typenum == NPY_BYTE ? CV_8S : typenum == NPY_USHORT ? CV_16U : typenum == NPY_SHORT ? CV_16S : typenum == NPY_INT ? CV_32S : typenum == NPY_INT32 ? CV_32S : typenum == NPY_FLOAT ? CV_32F : typenum == NPY_DOUBLE ? CV_64F : -1; if (type < 0) { if (typenum == NPY_INT64 || typenum == NPY_UINT64 || type == NPY_LONG) { needcopy = needcast = true; new_typenum = NPY_INT; type = CV_32S; } else { failmsg("Argument data type is not supported"); m.allocator = &g_numpyAllocator; return m; } } #ifndef CV_MAX_DIM const int CV_MAX_DIM = 32; #endif int ndims = PyArray_NDIM(oarr); if (ndims >= CV_MAX_DIM) { failmsg("Dimensionality of argument is too high"); if (!m.data) m.allocator = &g_numpyAllocator; return m; } int size[CV_MAX_DIM + 1]; size_t step[CV_MAX_DIM + 1]; size_t elemsize = CV_ELEM_SIZE1(type); const npy_intp* _sizes = PyArray_DIMS(oarr); const npy_intp* _strides = PyArray_STRIDES(oarr); bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX; for (int i = ndims - 1; i >= 0 && !needcopy; i--) { // these checks handle cases of // a) multi-dimensional (ndims > 2) arrays, as well as simpler 1- and 2-dimensional cases // b) transposed arrays, where _strides[] elements go in non-descending order // c) flipped arrays, where some of _strides[] elements are negative if ((i == ndims - 1 && (size_t) _strides[i] != elemsize) || (i < ndims - 1 && _strides[i] < _strides[i + 1])) needcopy = true; } if (ismultichannel && _strides[1] != (npy_intp) elemsize * _sizes[2]) needcopy = true; if (needcopy) { if (needcast) { o = PyArray_Cast(oarr, new_typenum); oarr = (PyArrayObject*) o; } else { oarr = PyArray_GETCONTIGUOUS(oarr); o = (PyObject*) oarr; } _strides = PyArray_STRIDES(oarr); } for (int i = 0; i < ndims; i++) { size[i] = (int) _sizes[i]; step[i] = (size_t) _strides[i]; } // handle degenerate case if (ndims == 0) { size[ndims] = 1; step[ndims] = elemsize; ndims++; } if (ismultichannel) { ndims--; type |= CV_MAKETYPE(0, size[2]); } if (ndims > 2 && !allowND) { failmsg("%s has more than 2 dimensions"); } else { m = Mat(ndims, size, type, PyArray_DATA(oarr), step); m.u = g_numpyAllocator.allocate(o, ndims, size, type, step); m.addref(); if (!needcopy) { Py_INCREF(o); } } m.allocator = &g_numpyAllocator; } return m; }
static PyObject* mseed_store_traces (PyObject *dummy, PyObject *args) { char *filename; MSTrace *mst = NULL; PyObject *array = NULL; PyObject *in_traces = NULL; PyObject *in_trace = NULL; PyArrayObject *contiguous_array = NULL; int i; char *network, *station, *location, *channel; char mstype; int msdetype; int psamples, precords; int numpytype; int length; FILE *outfile; if (!PyArg_ParseTuple(args, "Os", &in_traces, &filename)) { PyErr_SetString(MSeedError, "usage store_traces(traces, filename)" ); return NULL; } if (!PySequence_Check( in_traces )) { PyErr_SetString(MSeedError, "Traces is not of sequence type." ); return NULL; } outfile = fopen(filename, "w" ); if (outfile == NULL) { PyErr_SetString(MSeedError, "Error opening file."); return NULL; } for (i=0; i<PySequence_Length(in_traces); i++) { in_trace = PySequence_GetItem(in_traces, i); if (!PyTuple_Check(in_trace)) { PyErr_SetString(MSeedError, "Trace record must be a tuple of (network, station, location, channel, starttime, endtime, samprate, data)." ); Py_DECREF(in_trace); return NULL; } mst = mst_init (NULL); if (!PyArg_ParseTuple(in_trace, "ssssLLdO", &network, &station, &location, &channel, &(mst->starttime), &(mst->endtime), &(mst->samprate), &array )) { PyErr_SetString(MSeedError, "Trace record must be a tuple of (network, station, location, channel, starttime, endtime, samprate, data)." ); mst_free( &mst ); Py_DECREF(in_trace); return NULL; } strncpy( mst->network, network, 10); strncpy( mst->station, station, 10); strncpy( mst->location, location, 10); strncpy( mst->channel, channel, 10); mst->network[10] = '\0'; mst->station[10] = '\0'; mst->location[10] ='\0'; mst->channel[10] = '\0'; if (!PyArray_Check(array)) { PyErr_SetString(MSeedError, "Data must be given as NumPy array." ); mst_free( &mst ); Py_DECREF(in_trace); return NULL; } numpytype = PyArray_TYPE(array); switch (numpytype) { case NPY_INT32: assert( ms_samplesize('i') == 4 ); mstype = 'i'; msdetype = DE_STEIM1; break; case NPY_INT8: assert( ms_samplesize('a') == 1 ); mstype = 'a'; msdetype = DE_ASCII; break; case NPY_FLOAT32: assert( ms_samplesize('f') == 4 ); mstype = 'f'; msdetype = DE_FLOAT32; break; case NPY_FLOAT64: assert( ms_samplesize('d') == 8 ); mstype = 'd'; msdetype = DE_FLOAT64; break; default: PyErr_SetString(MSeedError, "Data must be of type float64, float32, int32 or int8."); mst_free( &mst ); Py_DECREF(in_trace); return NULL; } mst->sampletype = mstype; contiguous_array = PyArray_GETCONTIGUOUS((PyArrayObject*)array); length = PyArray_SIZE(contiguous_array); mst->numsamples = length; mst->samplecnt = length; mst->datasamples = calloc(length,ms_samplesize(mstype)); memcpy(mst->datasamples, PyArray_DATA(contiguous_array), length*ms_samplesize(mstype)); Py_DECREF(contiguous_array); precords = mst_pack (mst, &record_handler, outfile, 4096, msdetype, 1, &psamples, 1, 0, NULL); mst_free( &mst ); Py_DECREF(in_trace); } fclose( outfile ); Py_INCREF(Py_None); return Py_None; }
static PyObject *msaeye(PyObject *self, PyObject *args, PyObject *kwargs) { PyArrayObject *msa, *array; double unique = 0; int turbo = 1; static char *kwlist[] = {"msa", "array", "unique", "turbo", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|di", kwlist, &msa, &array, &unique, &turbo)) return NULL; /* make sure to have a contiguous and well-behaved array */ msa = PyArray_GETCONTIGUOUS(msa); /* get dimensions */ long number = PyArray_DIMS(msa)[0], length = PyArray_DIMS(msa)[1]; /* get pointers to data */ char *iraw, *jraw, *raw = (char *) PyArray_DATA(msa); long i, j; /* allocate memory */ double *jrow, *sim = (double *) raw; _Bool *unq = (_Bool *) raw; /* to avoid uninitialized warnings*/ if (unique) unq = (_Bool *) PyArray_DATA(array); else sim = (double *) PyArray_DATA(array); /* arrays to store refined sequences*/ unsigned char *iseq = malloc(length * sizeof(unsigned char)); if (!iseq) return PyErr_NoMemory(); unsigned char **seq = malloc(number * sizeof(unsigned char *)); if (!seq) { turbo = 0; } if (turbo) { /* allocate rows that will store columns of MSA */ seq[0] = iseq; for (i = 1; i < number; i++) { seq[i] = malloc(length * sizeof(unsigned char)); if (!seq[i]) { for (j = 1; j < i; j++) free(seq[j]); free(seq); turbo = 0; } } } /* initialize jseq, so that we don't get uninitialized warning */ unsigned char *jseq = iseq; unsigned char a, b; long k, diff; /* zero sim array */ if (unique) { for (i = 0; i < number; i++) unq[i] = 1; } else { for (i = 0; i < number; i++) { jrow = sim + i * number; for (j = 0; j < number; j++) jrow[j] = 0; jrow[i] = 1; } } double ncols, score, seqid; /* START calculation */ /* calculate first row of MI matrix and all column probabilities */ i = 0; iraw = raw; for (j = 1; j < number; j++) { ncols = score = 0.; jraw = raw + length * j; diff = j - 1; if (turbo) /* in turbo mode, there is a row for refined sequences */ jseq = seq[j]; for (k = 0; k < length; k++) { if (diff) { a = iseq[k]; } else { a = (unsigned char) iraw[k]; if (a > 90) a -= 96; else a -= 64; if (a < 1 || a > 26) a = 0; /* gap character */ iseq[k] = a; } b = (unsigned char) jraw[k]; if (b > 90) b -= 96; else b -= 64; if (b < 1 || b > 26) b = 0; /* gap character */ if (turbo) /* we keep the refined chars for all sequences*/ jseq[k] = b; if (a || b) { ncols++; if (a == b) score++; } } seqid = score / ncols; if (unique) { if (seqid >= unique) unq[j] = 0; } else if (ncols) sim[j] = sim[number * j] = seqid; } if (turbo) free(iseq); /* calculate rest of identities */ for (i = 1; i < number; i++) { if (unique && !unq[i]) continue; if (turbo) iseq = seq[i]; else iraw = raw + length * i; for (j = i + 1; j < number; j++) { ncols = score = 0.; if (turbo) { jseq = seq[j]; for (k = 0; k < length; k++) { a = iseq[k]; b = jseq[k]; if (a || b) { ncols++; if (a == b) score++; } } } else { jraw = raw + length * j; diff = j - i - 1; for (k = 0; k < length; k++) { if (diff) { a = iseq[k]; } else { a = (unsigned char) iraw[k]; if (a > 90) a -= 96; else a -= 64; if (a < 1 || a > 26) a = 0; /* gap character */ iseq[k] = a; } b = (unsigned char) jraw[k]; if (b > 90) b -= 96; else b -= 64; if (b < 1 || b > 26) b = 0; /* gap character */ if (a || b) { ncols++; if (a == b) score++; } } } seqid = score / ncols; if (unique) { if (seqid >= unique) unq[j] = 0; } else if (ncols) sim[i * number + j] = sim[i + number * j] = seqid; } } /* free memory */ if (turbo) for (j = 1; j < number; j++) free(seq[j]); free(seq); return Py_BuildValue("O", array); }
static PyObject *writeSelex(PyObject *self, PyObject *args, PyObject *kwargs) { /* Write MSA where inputs are: labels in the form of Python lists and sequences in the form of Python numpy array and write them in SELEX (default) or Stockholm format in the specified filename.*/ char *filename; PyObject *labels; PyArrayObject *msa; int stockholm; int label_length = 31; static char *kwlist[] = {"filename", "labels", "msa", "stockholm", "label_length", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sOO|ii", kwlist, &filename, &labels, &msa, &stockholm, &label_length)) return NULL; /* make sure to have a contiguous and well-behaved array */ msa = PyArray_GETCONTIGUOUS(msa); long numseq = PyArray_DIMS(msa)[0], lenseq = PyArray_DIMS(msa)[1]; if (numseq != PyList_Size(labels)) { PyErr_SetString(PyExc_ValueError, "size of labels and msa array does not match"); return NULL; } FILE *file = fopen(filename, "wb"); int i, j; int pos = 0; char *seq = PyArray_DATA(msa); if (stockholm) fprintf(file, "# STOCKHOLM 1.0\n"); char *outline = (char *) malloc((label_length + lenseq + 2) * sizeof(char)); outline[label_length + lenseq] = '\n'; outline[label_length + lenseq + 1] = '\0'; #if PY_MAJOR_VERSION >= 3 PyObject *plabel; #endif for (i = 0; i < numseq; i++) { #if PY_MAJOR_VERSION >= 3 plabel = PyUnicode_AsEncodedString( PyList_GetItem(labels, (Py_ssize_t) i), "utf-8", "label encoding"); char *label = PyBytes_AsString(plabel); Py_DECREF(plabel); #else char *label = PyString_AsString(PyList_GetItem(labels, (Py_ssize_t)i)); #endif int labelbuffer = label_length - strlen(label); strcpy(outline, label); if (labelbuffer > 0) for(j = strlen(label); j < label_length; j++) outline[j] = ' '; for (j = label_length; j < (lenseq + label_length); j++) outline[j] = seq[pos++]; fprintf(file, "%s", outline); } if (stockholm) fprintf(file, "//\n"); free(outline); fclose(file); return Py_BuildValue("s", filename); }
static PyObject *writeData(PyObject *self, PyObject *args, PyObject *kwargs) { char *filename=NULL, SymData[80], *tempchar; PyArrayObject *data; PyObject *header; MRCHeader m_header; FILE *m_fp=NULL; gzFile gzfp=NULL; int i,j,k,bytesize=4,compress=0; void *matrix; static char *kwlist[] = {"header", "data", "filename", "compress", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OOsi", kwlist, &header, &data, &filename, &compress)) return Py_BuildValue("Os", Py_None,"Couldn't parse variable from C function."); data = PyArray_GETCONTIGUOUS(data); matrix = (void *) PyArray_DATA(data); if (compress){ gzfp = gzopen(filename,"wb"); if(gzfp==NULL) return Py_BuildValue("Os", Py_None,"Couldn't write file."); } else{ m_fp=fopen(filename,"w"); if(m_fp==NULL) return Py_BuildValue("Os", Py_None,"Couldn't write file."); } m_header.nx=PyInt_AsLong(PyObject_GetAttrString(header, "nx")); m_header.ny=PyInt_AsLong(PyObject_GetAttrString(header, "ny")); m_header.nz=PyInt_AsLong(PyObject_GetAttrString(header, "nz")); m_header.mode=PyInt_AsLong(PyObject_GetAttrString(header, "mode")); m_header.nxstart=PyInt_AsLong(PyObject_GetAttrString(header, "nxstart")); m_header.nystart=PyInt_AsLong(PyObject_GetAttrString(header, "nystart")); m_header.nzstart=PyInt_AsLong(PyObject_GetAttrString(header, "nzstart")); m_header.mx=PyInt_AsLong(PyObject_GetAttrString(header, "mx")); m_header.my=PyInt_AsLong(PyObject_GetAttrString(header, "my")); m_header.mz=PyInt_AsLong(PyObject_GetAttrString(header, "mz")); m_header.mapc=PyInt_AsLong(PyObject_GetAttrString(header, "mapc")); m_header.mapr=PyInt_AsLong(PyObject_GetAttrString(header, "mapr")); m_header.maps=PyInt_AsLong(PyObject_GetAttrString(header, "maps")); m_header.ispg=PyInt_AsLong(PyObject_GetAttrString(header, "ispg")); m_header.nsymbt=PyInt_AsLong(PyObject_GetAttrString(header, "nsymbt")); m_header.machst=PyInt_AsLong(PyObject_GetAttrString(header, "machst")); m_header.nlabels=PyInt_AsLong(PyObject_GetAttrString(header, "nlabels")); m_header.dmin=(float)PyFloat_AsDouble(PyObject_GetAttrString(header, "dmin")); m_header.dmax=(float)PyFloat_AsDouble(PyObject_GetAttrString(header, "dmax")); m_header.dmean=(float)PyFloat_AsDouble(PyObject_GetAttrString(header, "dmean")); m_header.rms=(float)PyFloat_AsDouble(PyObject_GetAttrString(header, "rms")); tempchar=PyString_AsString(PyObject_GetAttrString(header, "map")); strncpy(m_header.map,tempchar,4); for(i=0;i<4;i++) if (m_header.map[i]=='\0'){ for(j=i+1;j<4;j++) m_header.map[j]='\0'; break; } tempchar=PyString_AsString(PyObject_GetAttrString(header, "extra")); strncpy(m_header.extra,tempchar,100); for(i=0;i<100;i++) if (m_header.extra[i]=='\0'){ for(j=i+1;j<100;j++) m_header.extra[j]='\0'; break; } for (i=0;i<3;i++){ m_header.cella[i]=(float)PyFloat_AsDouble(PyList_GetItem(PyObject_GetAttrString(header,"cella"), i)); m_header.cellb[i]=(float)PyFloat_AsDouble(PyList_GetItem(PyObject_GetAttrString(header,"cellb"), i)); m_header.origin[i]=(float)PyFloat_AsDouble(PyList_GetItem(PyObject_GetAttrString(header,"origin"), i)); } for (i=0;i<10;i++){ tempchar=PyString_AsString(PyList_GetItem(PyObject_GetAttrString(header,"label"), i)); strncpy(m_header.label[i],tempchar,80); for(j=0;j<80;j++) if (m_header.label[i][j]=='\0'){ for(k=j+1;k<80;k++) m_header.label[i][k]='\0'; break; } } if (m_header.nsymbt==80){ tempchar=PyString_AsString(PyObject_GetAttrString(header, "symdata")); strncpy(SymData,tempchar,80); for(i=0;i<80;i++) if (SymData[i]=='\0'){ for(j=i+1;j<80;j++) SymData[j]='\0'; break; } } else m_header.nsymbt=0; switch(m_header.mode) { case 0: bytesize=1;break; case 1: bytesize=2;break; case 2: bytesize=4;break; case 5: bytesize=1;break; case 6: bytesize=2;break; } // Write file. if (compress){ if (gzwrite(gzfp,&m_header,1024)!=1024){ gzclose(gzfp); return Py_BuildValue("Os", Py_None,"Couldn't write the header."); } if (m_header.nsymbt==80){ if (gzwrite(gzfp, SymData, 80)!=80){ gzclose(gzfp); return Py_BuildValue("Os", Py_None,"Couldn't write Symmetry Data."); } } if (gzwrite(gzfp, matrix, bytesize*m_header.nz*m_header.ny*m_header.nx)!=bytesize*m_header.nz*m_header.ny*m_header.nx){ gzclose(gzfp); return Py_BuildValue("Os", Py_None,"Couldn't write Matrix."); } gzclose(gzfp); } else{ if (fwrite(&m_header,1,1024,m_fp)!=1024){ fclose(m_fp); return Py_BuildValue("Os", Py_None,"Couldn't write the header."); } if (m_header.nsymbt==80){ if (fwrite(SymData, 1, (size_t)80, m_fp)!=80){ fclose(m_fp); return Py_BuildValue("Os", Py_None,"Couldn't write Symmetry Data."); } } if (fwrite(matrix, bytesize, m_header.nz*m_header.ny*m_header.nx, m_fp)!=m_header.nz*m_header.ny*m_header.nx){ fclose(m_fp); return Py_BuildValue("Os", Py_None,"Couldn't write Matrix."); } fclose(m_fp); } Py_XDECREF(data); return Py_BuildValue("i", 0); }
static PyObject *msadirectinfo2(PyObject *self, PyObject *args, PyObject *kwargs) { PyArrayObject *cinfo, *pinfo, *diinfo; long number = 0, l = 0, q = 0; static char *kwlist[] = {"n", "l", "c", "p", "di", "q", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "llOOOl", kwlist, &number, &l, &cinfo, &pinfo, &diinfo, &q)) return NULL; cinfo = PyArray_GETCONTIGUOUS(cinfo); pinfo = PyArray_GETCONTIGUOUS(pinfo); diinfo = PyArray_GETCONTIGUOUS(diinfo); double *c = (double *) PyArray_DATA(cinfo); double *prob = (double *) PyArray_DATA(pinfo); double *di = (double *) PyArray_DATA(diinfo); long i, j, k1, k2; double *w = malloc(q * q * sizeof(double)); if (!w) return NULL; for (i = 0; i < q*q; i++){ w[i] = 0.0; } #define w(x,y) w[(x)*q+(y)] #define c(x,y) c[(x)*l*(q-1) + (y)] #define prob(x,y) prob[(x)*q + (y)] #define di(x,y) di[(x)*l + (y)] double epsilon = 1e-4, tiny = 1.0e-100; double diff = 1.0, sum1 = 0.0, sum2 = 0.0, sumpdir = 0.0, sumdi = 0.0; double *mu1 = malloc(q*sizeof(double)), *mu2 = malloc(q*sizeof(double)); double *scra1 = malloc(q*sizeof(double)), *scra2 = malloc(q*sizeof(double)); for (i = 0; i < l; i++){ di(i,i) = 0.0; for (j = i+1; j < l; j++){ for (k1 = 0; k1 < q-1; k1++){ for (k2 = 0; k2 < q-1; k2++){ w(k1,k2) = exp(- c((q-1)*i + k1, (q-1)*j + k2)); } } for (k1 = 0; k1 < q; k1++){ w(q-1, k1) = w(k1, q-1) = 1.; } for (k1 = 0; k1 < q; k1++){ mu1[k1] = 1./q; mu2[k1] = 1./q; } diff = 1.0; while (diff > epsilon){ for (k1 = 0; k1 < q; k1++){ scra1[k1] = 0.0; scra2[k1] = 0.0; } for (k1 = 0; k1 < q; k1++){ for (k2 = 0; k2 < q; k2++){ scra1[k1] += mu2[k2] * w(k1, k2); scra2[k1] += mu1[k2] * w(k2, k1); } } sum1 = 0.0; sum2 = 0.0; for (k1 = 0; k1 < q; k1++){ scra1[k1] = prob(i, k1) / scra1[k1]; sum1 += scra1[k1]; scra2[k1] = prob(j, k1) / scra2[k1]; sum2 += scra2[k1]; } for (k1 = 0; k1 < q; k1++){ scra1[k1] /= sum1; scra2[k1] /= sum2; } diff = -1.0; for (k1 = 0; k1 < q; k1++){ if (fabs(mu1[k1] - scra1[k1]) > diff) diff = fabs(mu1[k1] - scra1[k1]); if (fabs(mu2[k1] - scra2[k1]) > diff) diff = fabs(mu2[k1] - scra2[k1]); mu1[k1] = scra1[k1]; mu2[k1] = scra2[k1]; } } sumpdir = 0.0; for (k1 = 0; k1 < q; k1++){ for (k2 = 0; k2 < q; k2++){ w(k1,k2) = w(k1, k2) * mu1[k1] * mu2[k2]; sumpdir += w(k1,k2); } } sumdi = 0.0; for (k1 = 0; k1 < q; k1++){ for (k2 = 0; k2 < q; k2++){ w(k1,k2) /= sumpdir; sumdi += w(k1,k2) * log((w(k1,k2) + tiny) / (prob(i,k1) * prob(j,k2) +tiny)); } } di(i,j) = di(j,i) = sumdi; } } #undef w #undef c #undef prob #undef di free(w); free(c); free(prob); free(mu1); free(mu2); free(scra1); free(scra2); return Py_BuildValue("O", diinfo); }
static PyObject *msadirectinfo1(PyObject *self, PyObject *args, PyObject *kwargs) { PyArrayObject *msa, *cinfo, *pinfo; double theta = 0.2, pseudocount_weight = 0.5; int refine = 0, q = 0; static char *kwlist[] = {"msa", "c", "prob", "theta", "pseudocount_weight", "refine", "q", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OOOddi|i", kwlist, &msa, &cinfo, &pinfo, &theta, &pseudocount_weight, &refine, &q)) return NULL; long i, j, k, k1, k2; cinfo = PyArray_GETCONTIGUOUS(cinfo); pinfo = PyArray_GETCONTIGUOUS(pinfo); double *c = (double *) PyArray_DATA(cinfo); double *prob = (double *) PyArray_DATA(pinfo); /*Calculate meff, w and align.*/ double meff = -1.; long number = 0, l = 0; int *align = NULL; double *w = NULL; PyObject *meffinfo; meffinfo = msameff(NULL, Py_BuildValue("(O)", msa), Py_BuildValue("{s:d,s:i,s:i}", "theta", theta, "meff_only", 2, "refine", refine)); if (!PyArg_ParseTuple(meffinfo, "dllll", &meff, &number, &l, &w, &align)) return NULL; /*Build single probablity. use pseudocount_weight to weight it.*/ double pse_weight_val = pseudocount_weight / q; double pro_weight = 1. - pseudocount_weight; for (i = 0; i < q*l; i++) prob[i] = pse_weight_val; #define prob(x,y) prob[(x)*q + (y)] #define align(x,y) align[(x)*l + (y)] for (i = 0; i < number; i++) for (j = 0; j < l; j++) prob(j, align(i,j)) += pro_weight * w[i]; /*Calculate C matrix.*/ double *joint = malloc(q*q*sizeof(double)); if (!joint){ free(w); free(align); return PyErr_NoMemory(); } #define joint(x,y) joint[(x)*q + (y)] #define c(x,y) c[(x)*l*(q-1) + (y)] for (i = 0; i < l; i++){ for (j = i; j < l; j++){ if (i==j){ for (k = 0; k < q*q; k++) joint[k] = 0.; pse_weight_val = pseudocount_weight / q; for (k = 0; k < q; k++) joint(k,k) = pse_weight_val; } else{ pse_weight_val = pseudocount_weight / q / q; for (k = 0; k < q*q; k++) joint[k] = pse_weight_val; } for (k = 0; k < number; k++){ joint(align(k,i), align(k,j)) += pro_weight * w[k]; } for (k1 = 0; k1 < q-1; k1++){ for(k2 = 0; k2 < q-1; k2++){ c((q-1)*j+k2, (q-1)*i+k1) = c((q-1)*i+k1, (q-1)*j+k2) = joint(k1,k2) - prob(i,k1) * prob(j,k2); // c((q-1)*j+k2, (q-1)*i+k1) = c((q-1)*i+k1, (q-1)*j+k2); } } } } free(w); free(align); free(joint); #undef prob #undef align #undef joint #undef c return Py_BuildValue("dllOO", meff, number, l, cinfo, pinfo); }
static PyObject *msameff(PyObject *self, PyObject *args, PyObject *kwargs) { PyArrayObject *msa,*pythonw; double theta = 0.0; int meff_only = 1, refine = 0; int alignlist[26] = {1, 0, 2, 3, 4, 5, 6, 7, 8, 0, 9, 10, 11, 12, 0, 13, 14, 15, 16, 17, 0, 18, 19, 0, 20, 0}; static char *kwlist[] = {"msa", "theta", "meff_only", "refine", "w", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Odii|O", kwlist, &msa, &theta, &meff_only, &refine, &pythonw)) return NULL; /* make sure to have a contiguous and well-behaved array */ msa = PyArray_GETCONTIGUOUS(msa); /* check dimensions */ long number = PyArray_DIMS(msa)[0], length = PyArray_DIMS(msa)[1]; long i, j, k, l = 0; /* get pointers to data */ char *seq = (char *) PyArray_DATA(msa); /*size: number x length */ /*Set ind and get l first.*/ int *ind = malloc(length * sizeof(int)); if (!ind) { return PyErr_NoMemory(); } if (!refine){ for (i = 0; i < length; i++){ l += 1; ind[i] = l; } } else{ for (i = 0; i < length; i++){ if (seq[i] <= 90 && seq[i] >= 65){ l += 1; ind[i] = l; } else ind[i] = 0; } } /*Use l to set align and w size.*/ int *align = malloc(number * l * sizeof(int)); if (!align) { free(ind); return PyErr_NoMemory(); } for (i = 0; i < number * l; i++){ align[i] = 0; } double *w = malloc(number * sizeof(double)); if (!w) { free(ind); free(align); return PyErr_NoMemory(); } #define align(x,y) align[(x)*l+(y)] /*Set align matrix*/ for (i = 0; i < number; i++){ for (j = 0; j < length; j++){ if (ind[j] != 0){ if (seq[i*length+j] >= 65 && seq[i*length+j] <= 90) align(i,ind[j]-1) = alignlist[seq[i*length+j] - 65]; else align(i,ind[j]-1) = 0; } } } /*Calculate weight(w) for each sequence, sum of w is Meff*/ for (i = 0; i < number; i++) w[i] = 1.; for (i = 0; i < number; i++) for (j = i+1; j < number; j++){ double temp = 0.; for (k = 0; k < l; k++){ if (align(i,k) != align(j,k)) temp += 1.; } temp /= l; if (temp < theta){ w[i] += 1.; w[j] += 1.; } } double meff = 0.0; for (i = 0; i < number; i++){ w[i] = 1./ w[i]; meff += w[i]; } #undef align /*Clean up memory.*/ free(ind); if (meff_only == 1){ free(align); free(w); return Py_BuildValue("d", meff); } else if (meff_only == 2){ for (i = 0; i < number; i++) w[i] /= meff; return Py_BuildValue("dllll", meff, number, l , w, align); } else { free(align); pythonw = PyArray_GETCONTIGUOUS(pythonw); double *pw = (double *) PyArray_DATA(pythonw); for (i = 0; i < number; i++){ pw[i]=w[i]; } free(w); return Py_BuildValue("dO",meff,pythonw); } }
static PyObject *msaentropy(PyObject *self, PyObject *args, PyObject *kwargs) { PyArrayObject *msa, *entropy; int ambiguity = 1, omitgaps = 0; static char *kwlist[] = {"msa", "entropy", "ambiguity", "omitgaps", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|ii", kwlist, &msa, &entropy, &ambiguity, &omitgaps)) return NULL; /* make sure to have a contiguous and well-behaved array */ msa = PyArray_GETCONTIGUOUS(msa); long number = PyArray_DIMS(msa)[0], length = PyArray_DIMS(msa)[1]; char *seq = (char *) PyArray_DATA(msa); double *ent = (double *) PyArray_DATA(entropy); /* start here */ long size = number * length; double count[128]; /* number of ASCII characters*/ double shannon = 0, probability = 0, numgap = 0, denom = number; long i = 0, j = 0; double ambiguous = 0; int twenty[20] = {65, 67, 68, 69, 70, 71, 72, 73, 75, 76, 77, 78, 80, 81, 82, 83, 84, 86, 87, 89}; for (i = 0; i < length; i++) { /* zero counters */ for (j = 65; j < 91; j++) count[j] = 0; for (j = 97; j < 123; j++) count[j] = 0; /* count characters in a column*/ for (j = i; j < size; j += length) count[(int) seq[j]]++; for (j = 65; j < 91; j++) count[j] += count[j + 32]; /* handle ambiguous amino acids */ if (ambiguity) { if (count[66]) { ambiguous = count[66] / 2.; /* B */ count[66] = 0; count[68] += ambiguous; /* D */ count[78] += ambiguous; /* N */ } if (count[90]) { ambiguous = count[90] / 2.; /* Z */ count[90] = 0; count[69] += ambiguous; /* E */ count[81] += ambiguous; /* Q */ } if (count[74]) { ambiguous = count[74] / 2.; /* J */ count[74] = 0; count[73] += ambiguous; /* I */ count[76] += ambiguous; /* L */ } if (count[88]) { ambiguous = count[88] / 20.; /* X */ count[88] = 0; for (j = 0; j < 20; j++) count[twenty[j]] += ambiguous; } } /* non-gap counts */ numgap = number; for (j = 65; j < 91; j++) numgap -= count[j]; shannon = 0; denom = number; if (omitgaps) denom = number - numgap; else if (numgap > 0) { probability = numgap / number; shannon += probability * log(probability); } for (j = 65; j < 91; j++) { if (count[j] > 0) { probability = count[j] / denom; shannon += probability * log(probability); } } ent[i] = -shannon; } return Py_BuildValue("O", entropy); }
static PyObject *msasca(PyObject *self, PyObject *args, PyObject *kwargs) { PyArrayObject *msa, *scainfo; int turbo = 1; static char *kwlist[] = {"msa", "sca", "turbo", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|i", kwlist, &msa, &scainfo, &turbo)) return NULL; /* make sure to have a contiguous and well-behaved array */ msa = PyArray_GETCONTIGUOUS(msa); /* check dimensions */ long number = PyArray_DIMS(msa)[0], length = PyArray_DIMS(msa)[1]; /* get pointers to data */ char *seq = (char *) PyArray_DATA(msa); /*size: number x length */ double *sca = (double *) PyArray_DATA(scainfo); long i, j, k; double q[NUMCHARS] = {0., 0.073, 0., 0.025, 0.05, 0.061, 0.042, 0.072, 0.023, 0.053, 0., 0.064, 0.089, 0.023, 0.043, 0., 0.052, 0.04, 0.052, 0.073, 0.056, 0., 0.063, 0.013, 0., 0.033, 0.}; int qlist[21] = {0, 1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 25}; /* weighted probability matrix length*27 */ double **wprob = malloc(length * sizeof(double *)); if (!wprob) return PyErr_NoMemory(); /* each row of weighted probability */ for (i = 0; i < length; i++) { wprob[i] = malloc(NUMCHARS * sizeof(double)); if (!wprob[i]) { for (j = 0; j < i; j++) free(wprob[j]); free(wprob); return PyErr_NoMemory(); } for (j = 0; j < NUMCHARS; j++) wprob[i][j] = 0; } /* single column probability */ double *prob; /* weighted x~ matrix array */ double **wx = malloc(length * sizeof(double *)); if (!turbo) free(wx); if (!wx) turbo = 0; if (turbo) { for (i = 0; i < length; i++) { wx[i] = malloc(number * sizeof(double)); if (!wx[i]) { for (j = 0; j < i; j++) free(wx[j]); free(wx); turbo = 0; } } } /* build weighted probability prob */ for (i = 0; i < length; i++){ prob = wprob[i]; double phi[NUMCHARS]; for (j = 0; j < NUMCHARS; j++){ prob[j] = 0.0; phi[i] = 0.0; } for (j=0; j<number; j++){ int temp = seq[j * length + i]; temp = (temp > 96) ? temp - 97 : temp - 65; if ((temp >= 0) && (temp <= 25)) prob[temp + 1] += 1.0 ; } for (j=0; j<NUMCHARS; j++){ prob[j] = prob[j] / number; } if (prob[2] > 0){ /* B -> D, N */ prob[4] += prob[2] / 2.; prob[14] += prob[2] / 2.; prob[2] = 0.; } if (prob[10] > 0){ /* J -> I, L */ prob[9] += prob[10] / 2.; prob[12] += prob[10] / 2.; prob[10] = 0.; } if (prob[26] > 0){ /* Z -> E, Q */ prob[4] += prob[26] / 2.; prob[17] += prob[26] / 2.; prob[26] = 0.; } if (prob[24] > 0) { /* X -> 20 AA */ for (k = 0; k < 20; k++) prob[twenty[k]] += prob[24] / 20.; prob[24] = 0.; } double sum=0.0; for (j = 0; j < 21; j++){ phi[qlist[j]] = (prob[qlist[j]] == 0.0 || q[qlist[j]] == 0.0 || prob[qlist[j]] == 1.0 || q[qlist[j]] == 1.0) ? 0.0 : log(prob[qlist[j]] * (1 - q[qlist[j]]) / (1 - prob[qlist[j]]) / q[qlist[j]]); phi[qlist[j]] = (phi[qlist[j]] >= 0.) ? phi[qlist[j]] : -phi[qlist[j]]; prob[qlist[j]] = prob[qlist[j]] * phi[qlist[j]]; sum += prob[qlist[j]] * prob[qlist[j]]; prob[qlist[j]] = prob[qlist[j]] * phi[qlist[j]]; } sum = sqrt(sum); if (sum == 0.) for (j = 0; j < 21; j++){ prob[qlist[j]] = 0.; } else for (j = 0; j < 21; j++){ prob[qlist[j]] = prob[qlist[j]] / sum; } prob[2] = (prob[4] + prob[14]) /2.0; prob[10] = (prob[9] + prob[12]) /2.0; prob[26] = (prob[4] + prob[17]) /2.0; sum =0.0; for (k = 0; k < 20; k++) sum += prob[twenty[k]]; sum = sum / 20.0; prob[24] = sum; if (turbo){ for (j = 0; j < number; j++){ int temp = seq[j * length + i]; temp = (temp > 96) ? temp - 97 : temp - 65; if (temp >= 0 && temp <= 25) wx[i][j] = prob[temp + 1]; else wx[i][j] = 0.0; } } } /* Calculate SCA Matrix*/ for (i=0;i<length;i++){ for (j = i;j<length;j++){ double *icol, *jcol, sumi=0.0, sumj=0.0, sum=0.0; if (turbo){ icol=wx[i]; jcol=wx[j]; for (k=0; k< number; k++){ sumi += icol[k]; sumj += jcol[k]; sum += icol[k]*jcol[k]; } } else{ for (k = 0; k < number; k++){ int tempi = (seq[k*length + i] > 96) ? seq[k * length + i] - 97 : seq[k * length + i] - 65; double xi = (tempi >= 0 && tempi <= 25) ? wprob[i][tempi + 1] : wprob[i][0]; int tempj = (seq[k * length + j] > 96) ? seq[k * length + j] - 97 : seq[k * length + j] - 65; double xj = (tempj >= 0 && tempj <= 25) ? wprob[j][tempj + 1] : wprob[j][0]; sumi += xi; sumj += xj; sum += xi * xj; } } sum /= number; sumj /= number; sumi /= number; sum = sum - sumi * sumj; sum = sum >= 0 ? sum : -sum ; sca[i * length + j] = sca[j * length + i] = sum; } } /* free memory */ for (j = 1; j < length; j++) free(wprob[j]); free(wprob); if (turbo){ for (j = 1; j < length; j++) free(wx[j]); free(wx); } return Py_BuildValue("O", scainfo); }
static PyObject* w_store_get(PyObject *dummy, PyObject *args) { PyObject *capsule; uint64_t irecord; store_t *store; gf_dtype *adata; trace_t trace; PyArrayObject *array = NULL; npy_intp array_dims[1] = {0}; int32_t itmin; int32_t nsamples; int i; store_error_t err; (void)dummy; /* silence warning */ if (!PyArg_ParseTuple(args, "OKii", &capsule, &irecord, &itmin, &nsamples)) { PyErr_SetString(StoreExtError, "usage store_get(cstore, irecord, itmin, nsamples)"); return NULL; } #ifdef HAVE_CAPSULE if (!PyCapsule_IsValid(capsule, NULL)) { #else if (!PyCObject_Check(capsule)) { #endif PyErr_SetString(StoreExtError, "invalid cstore argument"); return NULL; } if (!inlimits(itmin)) { PyErr_SetString(StoreExtError, "invalid itmin argument"); return NULL; } if (!(inposlimits(nsamples) || -1 == nsamples)) { PyErr_SetString(StoreExtError, "invalid nsamples argument"); return NULL; } #ifdef HAVE_CAPSULE store = (store_t*)PyCapsule_GetPointer(capsule, NULL); #else store = (store_t*)PyCObject_AsVoidPtr(capsule); #endif err = store_get(store, irecord, &trace); if (SUCCESS != err) { PyErr_SetString(StoreExtError, store_error_names[err]); return NULL; } if (-1 != nsamples) { trace_trim(&trace, itmin, nsamples); } array_dims[0] = trace.nsamples; array = (PyArrayObject*)PyArray_EMPTY(1, array_dims, NPY_FLOAT32, 0); adata = (gf_dtype*)PyArray_DATA(array); for (i=0; i<trace.nsamples; i++) { adata[i] = fe32toh(trace.data[i]); } return Py_BuildValue("Nififf", array, trace.itmin, store->deltat, trace.is_zero, trace.begin_value, trace.end_value); } static PyObject* w_store_sum(PyObject *dummy, PyObject *args) { PyObject *capsule, *irecords_arr, *delays_arr, *weights_arr; store_t *store; gf_dtype *adata; trace_t result; PyArrayObject *array = NULL; npy_intp array_dims[1] = {0}; PyArrayObject *c_irecords_arr, *c_delays_arr, *c_weights_arr; uint64_t *irecords; float32_t *delays, *weights; npy_intp n, n1, n2; int32_t itmin; int32_t nsamples; store_error_t err; (void)dummy; /* silence warning */ if (!PyArg_ParseTuple(args, "OOOOii", &capsule, &irecords_arr, &delays_arr, &weights_arr, &itmin, &nsamples)) { PyErr_SetString(StoreExtError, "usage: store_sum(cstore, irecords, delays, weights, itmin, nsamples)"); return NULL; } #ifdef HAVE_CAPSULE if (!PyCapsule_IsValid(capsule, NULL)) { #else if (!PyCObject_Check(capsule)) { #endif PyErr_SetString(StoreExtError, "invalid cstore argument"); return NULL; } if (!PyArray_Check(irecords_arr) || NPY_UINT64 != PyArray_TYPE((PyArrayObject*)irecords_arr)) { PyErr_SetString(StoreExtError, "store_sum: 'irecords' must be a NumPy array of type uint64"); return NULL; } if (!PyArray_Check(delays_arr) || NPY_FLOAT32 != PyArray_TYPE((PyArrayObject*)delays_arr)) { PyErr_SetString(StoreExtError, "store_sum: 'delays' must be a NumPy array of type float32"); return NULL; } if (!PyArray_Check(weights_arr) || NPY_FLOAT32 != PyArray_TYPE((PyArrayObject*)weights_arr)) { PyErr_SetString(StoreExtError, "store_sum: 'weights' must be a NumPy array of type float32"); return NULL; } if (!inlimits(itmin)) { PyErr_SetString(StoreExtError, "invalid itmin argument"); return NULL; } if (!(inposlimits(nsamples) || -1 == nsamples)) { PyErr_SetString(StoreExtError, "invalid nsamples argument"); return NULL; } #ifdef HAVE_CAPSULE store = (store_t*)PyCapsule_GetPointer(capsule, NULL); #else store = (store_t*)PyCObject_AsVoidPtr(capsule); #endif c_irecords_arr = PyArray_GETCONTIGUOUS((PyArrayObject*)irecords_arr); c_delays_arr = PyArray_GETCONTIGUOUS((PyArrayObject*)delays_arr); c_weights_arr = PyArray_GETCONTIGUOUS((PyArrayObject*)weights_arr); n = PyArray_SIZE(c_irecords_arr); n1 = PyArray_SIZE(c_delays_arr); n2 = PyArray_SIZE(c_weights_arr); if (n != n1 || n != n2) { PyErr_SetString(StoreExtError, "store_sum: 'irecords', 'delays', and 'weights' must have same length"); return NULL; } irecords = PyArray_DATA(c_irecords_arr); delays = PyArray_DATA(c_delays_arr); weights = PyArray_DATA(c_weights_arr); err = store_sum(store, irecords, delays, weights, n, itmin, nsamples, &result); if (SUCCESS != err) { PyErr_SetString(StoreExtError, store_error_names[err]); return NULL; } Py_DECREF(c_irecords_arr); Py_DECREF(c_delays_arr); Py_DECREF(c_weights_arr); array_dims[0] = result.nsamples; array = (PyArrayObject*)PyArray_EMPTY(1, array_dims, NPY_FLOAT32, 0); adata = (gf_dtype*)PyArray_DATA(array); memcpy(adata, result.data, result.nsamples*sizeof(gf_dtype)); free(result.data); return Py_BuildValue("Nififf", array, result.itmin, store->deltat, result.is_zero, result.begin_value, result.end_value); } static PyMethodDef StoreExtMethods[] = { {"store_init", w_store_init, METH_VARARGS, "Initialize store struct." }, {"store_get", w_store_get, METH_VARARGS, "Get a GF trace." }, {"store_sum", w_store_sum, METH_VARARGS, "Get weight-and-delay-sum of GF traces." }, {NULL, NULL, 0, NULL} /* Sentinel */ }; PyMODINIT_FUNC initstore_ext(void) { PyObject *m; m = Py_InitModule("store_ext", StoreExtMethods); if (m == NULL) return; import_array(); StoreExtError = PyErr_NewException("store_ext.error", NULL, NULL); Py_INCREF(StoreExtError); /* required, because other code could remove `error` from the module, what would create a dangling pointer. */ PyModule_AddObject(m, "StoreExtError", StoreExtError); }
static PyObject *writeFasta(PyObject *self, PyObject *args, PyObject *kwargs) { /* Write MSA where inputs are: labels in the form of Python lists and sequences in the form of Python numpy array and write them in FASTA format in the specified filename.*/ char *filename; int line_length = 60; PyObject *labels; PyArrayObject *msa; static char *kwlist[] = {"filename", "labels", "msa", "line_length", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sOO|i", kwlist, &filename, &labels, &msa, &line_length)) return NULL; /* make sure to have a contiguous and well-behaved array */ msa = PyArray_GETCONTIGUOUS(msa); long numseq = PyArray_DIMS(msa)[0], lenseq = PyArray_DIMS(msa)[1]; if (numseq != PyList_Size(labels)) { PyErr_SetString(PyExc_ValueError, "size of labels and msa array does not match"); return NULL; } FILE *file = fopen(filename, "wb"); int nlines = lenseq / line_length; int remainder = lenseq - line_length * nlines; int i, j, k; int count = 0; char *seq = PyArray_DATA(msa); int lenmsa = strlen(seq); #if PY_MAJOR_VERSION >= 3 PyObject *plabel; #endif for (i = 0; i < numseq; i++) { #if PY_MAJOR_VERSION >= 3 plabel = PyUnicode_AsEncodedString( PyList_GetItem(labels, (Py_ssize_t) i), "utf-8", "label encoding"); char *label = PyBytes_AsString(plabel); Py_DECREF(plabel); #else char *label = PyString_AsString(PyList_GetItem(labels, (Py_ssize_t) i)); #endif fprintf(file, ">%s\n", label); for (j = 0; j < nlines; j++) { for (k = 0; k < 60; k++) if (count < lenmsa) fprintf(file, "%c", seq[count++]); fprintf(file, "\n"); } if (remainder) for (k = 0; k < remainder; k++) if (count < lenmsa) fprintf(file, "%c", seq[count++]); fprintf(file, "\n"); } fclose(file); return Py_BuildValue("s", filename); }