static PyObject *cMarkovModel__logadd(PyObject *self, PyObject *args) { PyObject *py_logx, *py_logy; double logx, logy, minxy; double sum; if(!PyArg_ParseTuple(args, "OO", &py_logx, &py_logy)) return NULL; logx = PyNumber_AsDouble(py_logx); logy = PyNumber_AsDouble(py_logy); if(PyErr_Occurred()) return NULL; if(logy-logx > 100.0) { Py_INCREF(py_logy); return py_logy; } else if (logx-logy > 100.0) { Py_INCREF(py_logx); return py_logx; } minxy = (logx < logy) ? logx : logy; sum = minxy + log(exp(logx-minxy) + exp(logy-minxy)); return PyFloat_FromDouble(sum); }
double _get_match_score(PyObject *py_sequenceA, PyObject *py_sequenceB, PyObject *py_match_fn, int i, int j, char *sequenceA, char *sequenceB, int use_sequence_cstring, double match, double mismatch, int use_match_mismatch_scores) { PyObject *py_A=NULL, *py_B=NULL; PyObject *py_arglist=NULL, *py_result=NULL; double score = 0; if(use_sequence_cstring && use_match_mismatch_scores) { score = (sequenceA[i] == sequenceB[j]) ? match : mismatch; return score; } /* Calculate the match score. */ if(!(py_A = PySequence_GetItem(py_sequenceA, i))) goto _get_match_score_cleanup; if(!(py_B = PySequence_GetItem(py_sequenceB, j))) goto _get_match_score_cleanup; if(!(py_arglist = Py_BuildValue("(OO)", py_A, py_B))) goto _get_match_score_cleanup; if(!(py_result = PyEval_CallObject(py_match_fn, py_arglist))) goto _get_match_score_cleanup; score = PyNumber_AsDouble(py_result); _get_match_score_cleanup: if(py_A) { Py_DECREF(py_A); } if(py_B) { Py_DECREF(py_B); } if(py_arglist) { Py_DECREF(py_arglist); } if(py_result) { Py_DECREF(py_result); } return score; }
/* This function is a more-or-less straightforward port of the * equivalent function in pairwise2. Please see there for algorithm * documentation. */ static PyObject *cpairwise2__make_score_matrix_fast( PyObject *self, PyObject *args) { int i; int row, col; PyObject *py_sequenceA, *py_sequenceB, *py_match_fn; #if PY_MAJOR_VERSION >= 3 PyObject *py_bytesA, *py_bytesB; #endif char *sequenceA=NULL, *sequenceB=NULL; int use_sequence_cstring; double open_A, extend_A, open_B, extend_B; int penalize_extend_when_opening, penalize_end_gaps; int align_globally, score_only; PyObject *py_match=NULL, *py_mismatch=NULL; double first_A_gap, first_B_gap; double match, mismatch; int use_match_mismatch_scores; int lenA, lenB; double *score_matrix = (double *)NULL; struct IndexList *trace_matrix = (struct IndexList *)NULL; PyObject *py_score_matrix=NULL, *py_trace_matrix=NULL; double *row_cache_score = (double *)NULL, *col_cache_score = (double *)NULL; struct IndexList *row_cache_index = (struct IndexList *)NULL, *col_cache_index = (struct IndexList *)NULL; PyObject *py_retval = NULL; if(!PyArg_ParseTuple(args, "OOOddddiiii", &py_sequenceA, &py_sequenceB, &py_match_fn, &open_A, &extend_A, &open_B, &extend_B, &penalize_extend_when_opening, &penalize_end_gaps, &align_globally, &score_only)) return NULL; if(!PySequence_Check(py_sequenceA) || !PySequence_Check(py_sequenceB)) { PyErr_SetString(PyExc_TypeError, "py_sequenceA and py_sequenceB should be sequences."); return NULL; } /* Optimize for the common case. Check to see if py_sequenceA and py_sequenceB are strings. If they are, use the c string representation. */ #if PY_MAJOR_VERSION < 3 use_sequence_cstring = 0; if(PyString_Check(py_sequenceA) && PyString_Check(py_sequenceB)) { sequenceA = PyString_AS_STRING(py_sequenceA); sequenceB = PyString_AS_STRING(py_sequenceB); use_sequence_cstring = 1; } #else py_bytesA = _create_bytes_object(py_sequenceA); py_bytesB = _create_bytes_object(py_sequenceB); if (py_bytesA && py_bytesB) { sequenceA = PyBytes_AS_STRING(py_bytesA); sequenceB = PyBytes_AS_STRING(py_bytesB); use_sequence_cstring = 1; } else { Py_XDECREF(py_bytesA); Py_XDECREF(py_bytesB); use_sequence_cstring = 0; } #endif if(!PyCallable_Check(py_match_fn)) { PyErr_SetString(PyExc_TypeError, "py_match_fn must be callable."); return NULL; } /* Optimize for the common case. Check to see if py_match_fn is an identity_match. If so, pull out the match and mismatch member variables and calculate the scores myself. */ match = mismatch = 0; use_match_mismatch_scores = 0; if(!(py_match = PyObject_GetAttrString(py_match_fn, "match"))) goto cleanup_after_py_match_fn; match = PyNumber_AsDouble(py_match); if(PyErr_Occurred()) goto cleanup_after_py_match_fn; if(!(py_mismatch = PyObject_GetAttrString(py_match_fn, "mismatch"))) goto cleanup_after_py_match_fn; mismatch = PyNumber_AsDouble(py_mismatch); if(PyErr_Occurred()) goto cleanup_after_py_match_fn; use_match_mismatch_scores = 1; cleanup_after_py_match_fn: if(PyErr_Occurred()) PyErr_Clear(); if(py_match) { Py_DECREF(py_match); } if(py_mismatch) { Py_DECREF(py_mismatch); } /* Cache some commonly used gap penalties */ first_A_gap = calc_affine_penalty(1, open_A, extend_A, penalize_extend_when_opening); first_B_gap = calc_affine_penalty(1, open_B, extend_B, penalize_extend_when_opening); /* Allocate matrices for storing the results and initialize them. */ lenA = PySequence_Length(py_sequenceA); lenB = PySequence_Length(py_sequenceB); score_matrix = malloc(lenA*lenB*sizeof(*score_matrix)); trace_matrix = malloc(lenA*lenB*sizeof(*trace_matrix)); if(!score_matrix || !trace_matrix) { PyErr_SetString(PyExc_MemoryError, "Out of memory"); goto _cleanup_make_score_matrix_fast; } for(i=0; i<lenA*lenB; i++) { score_matrix[i] = 0; IndexList_init(&trace_matrix[i]); } /* Initialize the first row and col of the score matrix. */ for(i=0; i<lenA; i++) { double score = _get_match_score(py_sequenceA, py_sequenceB, py_match_fn, i, 0, sequenceA, sequenceB, use_sequence_cstring, match, mismatch, use_match_mismatch_scores); if(PyErr_Occurred()) goto _cleanup_make_score_matrix_fast; if(penalize_end_gaps) score += calc_affine_penalty(i, open_B, extend_B, penalize_extend_when_opening); score_matrix[i*lenB] = score; } for(i=0; i<lenB; i++) { double score = _get_match_score(py_sequenceA, py_sequenceB, py_match_fn, 0, i, sequenceA, sequenceB, use_sequence_cstring, match, mismatch, use_match_mismatch_scores); if(PyErr_Occurred()) goto _cleanup_make_score_matrix_fast; if(penalize_end_gaps) score += calc_affine_penalty(i, open_A, extend_A, penalize_extend_when_opening); score_matrix[i] = score; } /* Now initialize the row and col cache. */ row_cache_score = malloc((lenA-1)*sizeof(*row_cache_score)); row_cache_index = malloc((lenA-1)* sizeof(*row_cache_index)); col_cache_score = malloc((lenB-1)*sizeof(*col_cache_score)); col_cache_index = malloc((lenB-1)* sizeof(*col_cache_index)); if(!row_cache_score || !row_cache_index || !col_cache_score || !col_cache_index) { PyErr_SetString(PyExc_MemoryError, "Out of memory"); goto _cleanup_make_score_matrix_fast; } memset((void *)row_cache_score, 0, (lenA-1)*sizeof(*row_cache_score)); memset((void *)row_cache_index, 0, (lenA-1)*sizeof(*row_cache_index)); memset((void *)col_cache_score, 0, (lenB-1)*sizeof(*col_cache_score)); memset((void *)col_cache_index, 0, (lenB-1)*sizeof(*col_cache_index)); for(i=0; i<lenA-1; i++) { row_cache_score[i] = score_matrix[i*lenB] + first_A_gap; IndexList_append(&row_cache_index[i], i, 0); } for(i=0; i<lenB-1; i++) { col_cache_score[i] = score_matrix[i] + first_B_gap; IndexList_append(&col_cache_index[i], 0, i); } /* Fill in the score matrix. */ for(row=1; row<lenA; row++) { for(col=1; col<lenB; col++) { double nogap_score, row_score, col_score, best_score; int best_score_rint; struct IndexList *il; double score, open_score, extend_score; int open_score_rint, extend_score_rint; /* Calculate the best score. */ nogap_score = score_matrix[(row-1)*lenB+col-1]; if(col > 1) { row_score = row_cache_score[row-1]; } else { row_score = nogap_score-1; /* Make sure it's not best score */ } if(row > 1) { col_score = col_cache_score[col-1]; } else { col_score = nogap_score-1; /* Make sure it's not best score */ } best_score = (row_score > col_score) ? row_score : col_score; if(nogap_score > best_score) best_score = nogap_score; best_score_rint = rint(best_score); /* Set the score and traceback matrices. */ score = best_score + _get_match_score(py_sequenceA, py_sequenceB, py_match_fn, row, col, sequenceA, sequenceB, use_sequence_cstring, match, mismatch, use_match_mismatch_scores); if(PyErr_Occurred()) goto _cleanup_make_score_matrix_fast; if(!align_globally && score < 0) score_matrix[row*lenB+col] = 0; else score_matrix[row*lenB+col] = score; il = &trace_matrix[row*lenB+col]; if(best_score_rint == rint(nogap_score)) { IndexList_append(il, row-1, col-1); } if(best_score_rint == rint(row_score)) { IndexList_extend(il, &row_cache_index[row-1]); } if(best_score_rint == rint(col_score)) { IndexList_extend(il, &col_cache_index[col-1]); } /* Update the cached column scores. */ open_score = score_matrix[(row-1)*lenB+col-1] + first_B_gap; extend_score = col_cache_score[col-1] + extend_B; open_score_rint = rint(open_score); extend_score_rint = rint(extend_score); if(open_score_rint > extend_score_rint) { col_cache_score[col-1] = open_score; IndexList_clear(&col_cache_index[col-1]); IndexList_append(&col_cache_index[col-1], row-1, col-1); } else if(extend_score_rint > open_score_rint) { col_cache_score[col-1] = extend_score; } else { col_cache_score[col-1] = open_score; if(!IndexList_contains(&col_cache_index[col-1], row-1, col-1)) IndexList_append(&col_cache_index[col-1], row-1, col-1); } /* Update the cached row scores. */ open_score = score_matrix[(row-1)*lenB+col-1] + first_A_gap; extend_score = row_cache_score[row-1] + extend_A; open_score_rint = rint(open_score); extend_score_rint = rint(extend_score); if(open_score_rint > extend_score_rint) { row_cache_score[row-1] = open_score; IndexList_clear(&row_cache_index[row-1]); IndexList_append(&row_cache_index[row-1], row-1, col-1); } else if(extend_score_rint > open_score_rint) { row_cache_score[row-1] = extend_score; } else { row_cache_score[row-1] = open_score; if(!IndexList_contains(&row_cache_index[row-1], row-1, col-1)) IndexList_append(&row_cache_index[row-1], row-1, col-1); } } } /* Save the score and traceback matrices into real python objects. */ if(!(py_score_matrix = PyList_New(lenA))) goto _cleanup_make_score_matrix_fast; if(!(py_trace_matrix = PyList_New(lenA))) goto _cleanup_make_score_matrix_fast; for(row=0; row<lenA; row++) { PyObject *py_score_row, *py_trace_row; if(!(py_score_row = PyList_New(lenB))) goto _cleanup_make_score_matrix_fast; PyList_SET_ITEM(py_score_matrix, row, py_score_row); if(!(py_trace_row = PyList_New(lenB))) goto _cleanup_make_score_matrix_fast; PyList_SET_ITEM(py_trace_matrix, row, py_trace_row); for(col=0; col<lenB; col++) { int i; PyObject *py_score, *py_indexlist; int offset = row*lenB + col; struct IndexList *il = &trace_matrix[offset]; /* Set py_score_matrix[row][col] to the score. */ if(!(py_score = PyFloat_FromDouble(score_matrix[offset]))) goto _cleanup_make_score_matrix_fast; PyList_SET_ITEM(py_score_row, col, py_score); if(score_only) continue; /* Set py_trace_matrix[row][col] to a list of indexes. On the edges of the matrix (row or column is 0), the matrix should be [None]. */ if(!row || !col) { if(!(py_indexlist = PyList_New(1))) goto _cleanup_make_score_matrix_fast; Py_INCREF(Py_None); PyList_SET_ITEM(py_indexlist, 0, Py_None); } else { if(!(py_indexlist = PyList_New(il->num_used))) goto _cleanup_make_score_matrix_fast; for(i=0; i<il->num_used; i++) { PyObject *py_index=NULL; int row = il->indexes[i*2], col = il->indexes[i*2+1]; if(!(py_index = Py_BuildValue("(ii)", row, col))) goto _cleanup_make_score_matrix_fast; PyList_SET_ITEM(py_indexlist, i, py_index); } } PyList_SET_ITEM(py_trace_row, col, py_indexlist); } } py_retval = Py_BuildValue("(OO)", py_score_matrix, py_trace_matrix); _cleanup_make_score_matrix_fast: if(score_matrix) free(score_matrix); if(trace_matrix) { for(i=0; i<lenA*lenB; i++) IndexList_free(&trace_matrix[i]); free(trace_matrix); } if(row_cache_score) free(row_cache_score); if(col_cache_score) free(col_cache_score); if(row_cache_index) { for(i=0; i<lenA-1; i++) IndexList_free(&row_cache_index[i]); free(row_cache_index); } if(col_cache_index) { for(i=0; i<lenB-1; i++) { IndexList_free(&col_cache_index[i]); } free(col_cache_index); } if(py_score_matrix) { Py_DECREF(py_score_matrix); } if(py_trace_matrix) { Py_DECREF(py_trace_matrix); } #if PY_MAJOR_VERSION >= 3 if (py_bytesA != NULL && py_bytesA != py_sequenceA) Py_DECREF(py_bytesA); if (py_bytesB != NULL && py_bytesB != py_sequenceB) Py_DECREF(py_bytesB); #endif return py_retval; }