Exemplo n.º 1
0
static PyObject *cMarkovModel__logadd(PyObject *self, PyObject *args)
{
    PyObject *py_logx, *py_logy;
    double logx, logy, minxy;
    double sum;

    if(!PyArg_ParseTuple(args, "OO", &py_logx, &py_logy))
	return NULL;
    logx = PyNumber_AsDouble(py_logx);
    logy = PyNumber_AsDouble(py_logy);
    if(PyErr_Occurred())
	return NULL;

    if(logy-logx > 100.0) {
	Py_INCREF(py_logy);
	return py_logy;
    } else if (logx-logy > 100.0) {
	Py_INCREF(py_logx);
	return py_logx;
    }
    minxy = (logx < logy) ? logx : logy;
    sum = minxy + log(exp(logx-minxy) + exp(logy-minxy));
    return PyFloat_FromDouble(sum);
}
Exemplo n.º 2
0
double _get_match_score(PyObject *py_sequenceA, PyObject *py_sequenceB,
			PyObject *py_match_fn, int i, int j,
			char *sequenceA, char *sequenceB,
			int use_sequence_cstring,
			double match, double mismatch,
			int use_match_mismatch_scores)
{
    PyObject *py_A=NULL,
	*py_B=NULL;
    PyObject *py_arglist=NULL, *py_result=NULL;
    double score = 0;

    if(use_sequence_cstring && use_match_mismatch_scores) {
	score = (sequenceA[i] == sequenceB[j]) ? match : mismatch;
	return score;
    }
    /* Calculate the match score. */
    if(!(py_A = PySequence_GetItem(py_sequenceA, i)))
	goto _get_match_score_cleanup;
    if(!(py_B = PySequence_GetItem(py_sequenceB, j)))
	goto _get_match_score_cleanup;
    if(!(py_arglist = Py_BuildValue("(OO)", py_A, py_B)))
	goto _get_match_score_cleanup;

    if(!(py_result = PyEval_CallObject(py_match_fn, py_arglist)))
	goto _get_match_score_cleanup;
    score = PyNumber_AsDouble(py_result);
 _get_match_score_cleanup:
    if(py_A) {
	Py_DECREF(py_A);
    }
    if(py_B) {
	Py_DECREF(py_B);
    }
    if(py_arglist) {
	Py_DECREF(py_arglist);
    }
    if(py_result) {
	Py_DECREF(py_result);
    }
    return score;
}
Exemplo n.º 3
0
/* This function is a more-or-less straightforward port of the
 * equivalent function in pairwise2.  Please see there for algorithm
 * documentation.
 */
static PyObject *cpairwise2__make_score_matrix_fast(
    PyObject *self, PyObject *args)
{
    int i;
    int row, col;

    PyObject *py_sequenceA, *py_sequenceB, *py_match_fn;
#if PY_MAJOR_VERSION >= 3
    PyObject *py_bytesA, *py_bytesB;
#endif
    char *sequenceA=NULL, *sequenceB=NULL;
    int use_sequence_cstring;
    double open_A, extend_A, open_B, extend_B;
    int penalize_extend_when_opening, penalize_end_gaps;
    int align_globally, score_only;

    PyObject *py_match=NULL, *py_mismatch=NULL;
    double first_A_gap, first_B_gap;
    double match, mismatch;
    int use_match_mismatch_scores;
    int lenA, lenB;
    double *score_matrix = (double *)NULL;
    struct IndexList *trace_matrix = (struct IndexList *)NULL;
    PyObject *py_score_matrix=NULL, *py_trace_matrix=NULL;

    double *row_cache_score = (double *)NULL,
	*col_cache_score = (double *)NULL;
    struct IndexList *row_cache_index = (struct IndexList *)NULL,
	*col_cache_index = (struct IndexList *)NULL;

    PyObject *py_retval = NULL;

    if(!PyArg_ParseTuple(args, "OOOddddiiii", &py_sequenceA, &py_sequenceB,
			 &py_match_fn, &open_A, &extend_A, &open_B, &extend_B,
			 &penalize_extend_when_opening, &penalize_end_gaps,
			 &align_globally, &score_only))
	return NULL;
    if(!PySequence_Check(py_sequenceA) || !PySequence_Check(py_sequenceB)) {
	PyErr_SetString(PyExc_TypeError,
			"py_sequenceA and py_sequenceB should be sequences.");
	return NULL;
    }

    /* Optimize for the common case.  Check to see if py_sequenceA and
       py_sequenceB are strings.  If they are, use the c string
       representation. */
#if PY_MAJOR_VERSION < 3
    use_sequence_cstring = 0;
    if(PyString_Check(py_sequenceA) && PyString_Check(py_sequenceB)) {
	sequenceA = PyString_AS_STRING(py_sequenceA);
	sequenceB = PyString_AS_STRING(py_sequenceB);
	use_sequence_cstring = 1;
    }
#else
    py_bytesA = _create_bytes_object(py_sequenceA);
    py_bytesB = _create_bytes_object(py_sequenceB);
    if (py_bytesA && py_bytesB) {
        sequenceA = PyBytes_AS_STRING(py_bytesA);
        sequenceB = PyBytes_AS_STRING(py_bytesB);
	use_sequence_cstring = 1;
    }
    else {
        Py_XDECREF(py_bytesA);
        Py_XDECREF(py_bytesB);
        use_sequence_cstring = 0;
    }
#endif

    if(!PyCallable_Check(py_match_fn)) {
	PyErr_SetString(PyExc_TypeError, "py_match_fn must be callable.");
	return NULL;
    }
    /* Optimize for the common case.  Check to see if py_match_fn is
       an identity_match.  If so, pull out the match and mismatch
       member variables and calculate the scores myself. */
    match = mismatch = 0;
    use_match_mismatch_scores = 0;
    if(!(py_match = PyObject_GetAttrString(py_match_fn, "match")))
        goto cleanup_after_py_match_fn;
    match = PyNumber_AsDouble(py_match);
    if(PyErr_Occurred())
        goto cleanup_after_py_match_fn;
    if(!(py_mismatch = PyObject_GetAttrString(py_match_fn, "mismatch")))
        goto cleanup_after_py_match_fn;
    mismatch = PyNumber_AsDouble(py_mismatch);
    if(PyErr_Occurred())
        goto cleanup_after_py_match_fn;
    use_match_mismatch_scores = 1;
cleanup_after_py_match_fn:
    if(PyErr_Occurred())
        PyErr_Clear();
    if(py_match) {
        Py_DECREF(py_match);
    }
    if(py_mismatch) {
        Py_DECREF(py_mismatch);
    }

    /* Cache some commonly used gap penalties */
    first_A_gap = calc_affine_penalty(1, open_A, extend_A,
				      penalize_extend_when_opening);
    first_B_gap = calc_affine_penalty(1, open_B, extend_B,
				      penalize_extend_when_opening);

    /* Allocate matrices for storing the results and initialize them. */
    lenA = PySequence_Length(py_sequenceA);
    lenB = PySequence_Length(py_sequenceB);
    score_matrix = malloc(lenA*lenB*sizeof(*score_matrix));
    trace_matrix = malloc(lenA*lenB*sizeof(*trace_matrix));
    if(!score_matrix || !trace_matrix) {
	PyErr_SetString(PyExc_MemoryError, "Out of memory");
	goto _cleanup_make_score_matrix_fast;
    }
    for(i=0; i<lenA*lenB; i++) {
	score_matrix[i] = 0;
	IndexList_init(&trace_matrix[i]);
    }

    /* Initialize the first row and col of the score matrix. */
    for(i=0; i<lenA; i++) {
	double score = _get_match_score(py_sequenceA, py_sequenceB,
					py_match_fn, i, 0,
					sequenceA, sequenceB,
					use_sequence_cstring,
					match, mismatch,
					use_match_mismatch_scores);
	if(PyErr_Occurred())
	    goto _cleanup_make_score_matrix_fast;
	if(penalize_end_gaps)
	    score += calc_affine_penalty(i, open_B, extend_B,
					 penalize_extend_when_opening);
	score_matrix[i*lenB] = score;
    }
    for(i=0; i<lenB; i++) {
	double score = _get_match_score(py_sequenceA, py_sequenceB,
					py_match_fn, 0, i,
					sequenceA, sequenceB,
					use_sequence_cstring,
					match, mismatch,
					use_match_mismatch_scores);
	if(PyErr_Occurred())
	    goto _cleanup_make_score_matrix_fast;
	if(penalize_end_gaps)
	    score += calc_affine_penalty(i, open_A, extend_A,
					 penalize_extend_when_opening);
	score_matrix[i] = score;
    }

    /* Now initialize the row and col cache. */
    row_cache_score = malloc((lenA-1)*sizeof(*row_cache_score));
    row_cache_index = malloc((lenA-1)* sizeof(*row_cache_index));
    col_cache_score = malloc((lenB-1)*sizeof(*col_cache_score));
    col_cache_index = malloc((lenB-1)* sizeof(*col_cache_index));
    if(!row_cache_score || !row_cache_index ||
       !col_cache_score || !col_cache_index) {
	PyErr_SetString(PyExc_MemoryError, "Out of memory");
	goto _cleanup_make_score_matrix_fast;
    }
    memset((void *)row_cache_score, 0, (lenA-1)*sizeof(*row_cache_score));
    memset((void *)row_cache_index, 0, (lenA-1)*sizeof(*row_cache_index));
    memset((void *)col_cache_score, 0, (lenB-1)*sizeof(*col_cache_score));
    memset((void *)col_cache_index, 0, (lenB-1)*sizeof(*col_cache_index));
    for(i=0; i<lenA-1; i++) {
	row_cache_score[i] = score_matrix[i*lenB] + first_A_gap;
	IndexList_append(&row_cache_index[i], i, 0);
    }
    for(i=0; i<lenB-1; i++) {
	col_cache_score[i] = score_matrix[i] + first_B_gap;
	IndexList_append(&col_cache_index[i], 0, i);
    }

    /* Fill in the score matrix. */
    for(row=1; row<lenA; row++) {
	for(col=1; col<lenB; col++) {
	    double nogap_score, row_score, col_score, best_score;
	    int best_score_rint;
	    struct IndexList *il;

	    double score, open_score, extend_score;
	    int open_score_rint, extend_score_rint;

	    /* Calculate the best score. */
	    nogap_score = score_matrix[(row-1)*lenB+col-1];
	    if(col > 1) {
		row_score = row_cache_score[row-1];
	    } else {
		row_score = nogap_score-1; /* Make sure it's not best score */
	    }
	    if(row > 1) {
		col_score = col_cache_score[col-1];
	    } else {
		col_score = nogap_score-1; /* Make sure it's not best score */
	    }

	    best_score = (row_score > col_score) ? row_score : col_score;
	    if(nogap_score > best_score)
		best_score = nogap_score;
	    best_score_rint = rint(best_score);

	    /* Set the score and traceback matrices. */
	    score = best_score + _get_match_score(py_sequenceA, py_sequenceB,
						  py_match_fn, row, col,
						  sequenceA, sequenceB,
						  use_sequence_cstring,
						  match, mismatch,
						  use_match_mismatch_scores);
	    if(PyErr_Occurred())
		goto _cleanup_make_score_matrix_fast;
	    if(!align_globally && score < 0)
		score_matrix[row*lenB+col] = 0;
	    else
		score_matrix[row*lenB+col] = score;

	    il = &trace_matrix[row*lenB+col];
	    if(best_score_rint == rint(nogap_score)) {
		IndexList_append(il, row-1, col-1);
	    }
	    if(best_score_rint == rint(row_score)) {
		IndexList_extend(il, &row_cache_index[row-1]);
	    }
	    if(best_score_rint == rint(col_score)) {
		IndexList_extend(il, &col_cache_index[col-1]);
	    }

	    /* Update the cached column scores. */
	    open_score = score_matrix[(row-1)*lenB+col-1] + first_B_gap;
	    extend_score = col_cache_score[col-1] + extend_B;
	    open_score_rint = rint(open_score);
	    extend_score_rint = rint(extend_score);
	    if(open_score_rint > extend_score_rint) {
		col_cache_score[col-1] = open_score;
		IndexList_clear(&col_cache_index[col-1]);
		IndexList_append(&col_cache_index[col-1], row-1, col-1);
	    } else if(extend_score_rint > open_score_rint) {
		col_cache_score[col-1] = extend_score;
	    } else {
		col_cache_score[col-1] = open_score;
		if(!IndexList_contains(&col_cache_index[col-1], row-1, col-1))
		    IndexList_append(&col_cache_index[col-1], row-1, col-1);
	    }

	    /* Update the cached row scores. */
	    open_score = score_matrix[(row-1)*lenB+col-1] + first_A_gap;
	    extend_score = row_cache_score[row-1] + extend_A;
	    open_score_rint = rint(open_score);
	    extend_score_rint = rint(extend_score);
	    if(open_score_rint > extend_score_rint) {
		row_cache_score[row-1] = open_score;
		IndexList_clear(&row_cache_index[row-1]);
		IndexList_append(&row_cache_index[row-1], row-1, col-1);
	    } else if(extend_score_rint > open_score_rint) {
		row_cache_score[row-1] = extend_score;
	    } else {
		row_cache_score[row-1] = open_score;
		if(!IndexList_contains(&row_cache_index[row-1], row-1, col-1))
		    IndexList_append(&row_cache_index[row-1], row-1, col-1);
	    }
	}
    }

    /* Save the score and traceback matrices into real python objects. */
    if(!(py_score_matrix = PyList_New(lenA)))
	goto _cleanup_make_score_matrix_fast;
    if(!(py_trace_matrix = PyList_New(lenA)))
	goto _cleanup_make_score_matrix_fast;
    for(row=0; row<lenA; row++) {
	PyObject *py_score_row, *py_trace_row;
	if(!(py_score_row = PyList_New(lenB)))
	    goto _cleanup_make_score_matrix_fast;
	PyList_SET_ITEM(py_score_matrix, row, py_score_row);
	if(!(py_trace_row = PyList_New(lenB)))
	    goto _cleanup_make_score_matrix_fast;
	PyList_SET_ITEM(py_trace_matrix, row, py_trace_row);

	for(col=0; col<lenB; col++) {
	    int i;
	    PyObject *py_score, *py_indexlist;
	    int offset = row*lenB + col;
	    struct IndexList *il = &trace_matrix[offset];

	    /* Set py_score_matrix[row][col] to the score. */
	    if(!(py_score = PyFloat_FromDouble(score_matrix[offset])))
		goto _cleanup_make_score_matrix_fast;
	    PyList_SET_ITEM(py_score_row, col, py_score);

	    if(score_only)
		continue;
	    /* Set py_trace_matrix[row][col] to a list of indexes.  On
	       the edges of the matrix (row or column is 0), the
	       matrix should be [None]. */
	    if(!row || !col) {
		if(!(py_indexlist = PyList_New(1)))
		    goto _cleanup_make_score_matrix_fast;
		Py_INCREF(Py_None);
		PyList_SET_ITEM(py_indexlist, 0, Py_None);
	    }
	    else {
		if(!(py_indexlist = PyList_New(il->num_used)))
		    goto _cleanup_make_score_matrix_fast;
		for(i=0; i<il->num_used; i++) {
		    PyObject *py_index=NULL;
		    int row = il->indexes[i*2],
			col = il->indexes[i*2+1];
		    if(!(py_index = Py_BuildValue("(ii)", row, col)))
			goto _cleanup_make_score_matrix_fast;
		    PyList_SET_ITEM(py_indexlist, i, py_index);
		}
	    }
	    PyList_SET_ITEM(py_trace_row, col, py_indexlist);
	}
    }

    py_retval = Py_BuildValue("(OO)", py_score_matrix, py_trace_matrix);


 _cleanup_make_score_matrix_fast:
    if(score_matrix)
	free(score_matrix);
    if(trace_matrix) {
	for(i=0; i<lenA*lenB; i++)
	    IndexList_free(&trace_matrix[i]);
	free(trace_matrix);
    }
    if(row_cache_score)
	free(row_cache_score);
    if(col_cache_score)
	free(col_cache_score);
    if(row_cache_index) {
	for(i=0; i<lenA-1; i++)
	    IndexList_free(&row_cache_index[i]);
	free(row_cache_index);
    }
    if(col_cache_index) {
	for(i=0; i<lenB-1; i++) {
	    IndexList_free(&col_cache_index[i]);
	}
	free(col_cache_index);
    }
    if(py_score_matrix) {
	Py_DECREF(py_score_matrix);
    }
    if(py_trace_matrix) {
	Py_DECREF(py_trace_matrix);
    }
#if PY_MAJOR_VERSION >= 3
    if (py_bytesA != NULL && py_bytesA != py_sequenceA) Py_DECREF(py_bytesA);
    if (py_bytesB != NULL && py_bytesB != py_sequenceB) Py_DECREF(py_bytesB);
#endif

    return py_retval;
}