static Read *diff_readings(EdStruct *xx, Read *r1, int seq1, int off1, Read *r2, int seq2, int off2, int *startp, int *start1p) { int start1, end1, start2, end2, start, end; Read *r; tracediff_t td; /* One consensus trace works well, but not two. */ if (!seq1 && !seq2) return NULL; /* Sequences must be in the same orientation */ if (DB_Comp(xx, seq1) != DB_Comp(xx, seq2)) return NULL; if (r1 == NULL || r2 == NULL) return NULL; /* * Compute start and end of the overlap point in each reading. This is * done using the non cutoff data only. We take 1 off the end position * to ensure that we always have a next base to find the position of. */ /* This sets start and end to be positions in the consensus */ if (xx->diff_trace_size) { start = positionInContig(xx, xx->cursorSeq, xx->cursorPos) - xx->diff_trace_size; start1 = MAX(start, DB_RelPos(xx, seq1) - DB_Start(xx, seq1)-1); start2 = MAX(start, DB_RelPos(xx, seq2) - DB_Start(xx, seq2)-1); end = positionInContig(xx, xx->cursorSeq, xx->cursorPos) + xx->diff_trace_size; end1 = MIN(end, DB_RelPos(xx, seq1) - DB_Start(xx, seq1)-1 + DB_Length2(xx, seq1) - 1); end2 = MIN(end, DB_RelPos(xx, seq2) - DB_Start(xx, seq2)-1 + DB_Length2(xx, seq2) - 1); } else { if (xx->reveal_cutoffs) { start1 = DB_RelPos(xx, seq1) - DB_Start(xx, seq1)-1; start2 = DB_RelPos(xx, seq2) - DB_Start(xx, seq2)-1; end1 = DB_RelPos(xx, seq1) - DB_Start(xx, seq1)-1 + DB_Length2(xx, seq1) - 1; end2 = DB_RelPos(xx, seq2) - DB_Start(xx, seq2)-1 + DB_Length2(xx, seq2) - 1; } else { start1 = DB_RelPos(xx, seq1); start2 = DB_RelPos(xx, seq2); end1 = DB_RelPos(xx, seq1) + DB_Length(xx, seq1) - 1; end2 = DB_RelPos(xx, seq2) + DB_Length(xx, seq2) - 1; } } start = MAX(start1, start2); end = MIN(end1, end2); start = MAX(start, 1); end = MAX(end, 1); start = MIN(start, DB_Length(xx, 0)); end = MIN(end, DB_Length(xx, 0)); if (end <= start) return NULL; /* Now we convert these to positions in the full-length editor sequences */ start1 = start - (DB_RelPos(xx, seq1)-1) + DB_Start(xx, seq1); start2 = start - (DB_RelPos(xx, seq2)-1) + DB_Start(xx, seq2); end1 = end - (DB_RelPos(xx, seq1)-1) + DB_Start(xx, seq1); end2 = end - (DB_RelPos(xx, seq2)-1) + DB_Start(xx, seq2); /* Change from first/last used base to last/first clipped base */ start1--; start2--; end1++; end2++; /* * And now we convert these to positions in the trace (orig orientation) * When comparing against a consensus trace we just use the entire lot * as this is how it's been generated in the first place (we hope). */ if (seq1) { start1 = origpos(xx, seq1, start1); end1 = origpos(xx, seq1, end1); } else { end1 -= start1; start1 = 0; } if (seq2) { start2 = origpos(xx, seq2, start2); end2 = origpos(xx, seq2, end2); } else { end2 -= start2; start2 = 0; } /* * If complemented, change the start and end positions so that they map * on to a trace counting with base 1 at the left, as the tracediff * library does not have access to the trace display widget details (which * contains the real numbering order). */ if (start1 > end1) { start1 = r1->NBases - start1 + 1; end1 = r1->NBases - end1 + 1; } if (start2 > end2) { start2 = r2->NBases - start2 + 1; end2 = r2->NBases - end2 + 1; } *startp = start; /* Initialise Mark's trace diff code */ TraceDiffInit(&td); if( xx->compare_trace_yscale ) TraceDiffSetParameter( &td, TRACEDIFF_PARAMETER_YSCALE, 1.0 ); TraceDiffSetReference(&td, r2, MUTLIB_STRAND_FORWARD, start2, end2); TraceDiffSetInput(&td, r1, MUTLIB_STRAND_FORWARD, start1, end1); /* Do the difference without analysis of the results */ TraceDiffExecute(&td, TRACEDIFF_ALGORITHM_DEFAULT_DIFFERENCE_ONLY); if (TraceDiffGetResultCode(&td)) { verror(ERR_WARN, "diff_readings", "%s", TraceDiffGetResultString(&td)); return NULL; } /* Get a copy of the result and then destroy the TraceDiff instance */ r = TraceDiffGetDifference(&td,start1p, NULL); if (!seq1) { *start1p += start2-1; } if (r) { r = read_dup(r, NULL); /* set baseline and maxTraceVal */ /* diff_reset_zero(r); */ } TraceDiffDestroy(&td); return r; }
/* FDB_tp_as_mapping.mp_length */ static Py_ssize_t FDB_Length(FDB *self) { return DB_Length(tcfdbrnum(self->fdb)); }
static int do_cons_base(EdStruct *xx, char *cons, int pos, int start, int count, int *seqList, diff_cons_seq *rlist, Read *r, int off, int match, int *max_points) { int i, j, istart; int width; static diff_cons_trace *tr = NULL; static int diff_count = 0; int used_count; double avg_back, avg_wid; if (count == 0) return do_empty_cons_base(xx, cons, pos, start, r, off, max_points); if (count > diff_count) { tr = xrealloc(tr, count * 2 * sizeof(*tr)); if (NULL == tr) return -1; diff_count = count * 2; } /* printf("--- At position %3d ---", pos); */ width = 0; used_count = 0; avg_back = 0; avg_wid = 0; for (i = 0; i < count; i++) { int seq = seqList[i]; int p = pos-DB_RelPos(xx, seq); if (match && cons_matches(xx, cons, start, seq, pos) == 0) { tr[i].opos = -1; continue; } /* * printf(" %d:%c", * DB_Number(xx, seq), * rlist[i].seq[p]); */ tr[i].opos = rlist[i].opos[p]; if (p > 0) tr[i].back = (double)(rlist[i].opos[p-1] + rlist[i].opos[p])/2.0; else tr[i].back = tr[i].opos; if (p + 1 < DB_Length(xx, seq)) tr[i].forw = (double)(rlist[i].opos[p] + rlist[i].opos[p+1])/2.0; else tr[i].forw = tr[i].opos; width += tr[i].forw - tr[i].back; avg_back += tr[i].opos - tr[i].back; avg_wid += tr[i].forw - tr[i].back; used_count++; } /* putchar('\n'); */ if (used_count == 0) return do_empty_cons_base(xx, cons, pos, start, r, off, max_points); /* width = ABS((int)((double)width / used_count + 0.5)); */ /* width = 2 * (int)((double)width / used_count / 2 + 0.5); */ /* * We have to set the width to a fixed amount to get the consensus trace * to be smooth. I'm certain that variable widths will work provided that * we iron out the rounding problems involved with this. */ width = TWIDTH; if (-1 == cons_realloc_trace(r, max_points, off + width/2 + 1)) return -1; if (off-width/2 < 0) istart = -(off-width/2); else istart = 0; for (i = istart; i < width; i++) { double scale_a = 0.0, scale_c = 0.0, scale_g = 0.0, scale_t = 0.0; double ratio; double posd, posm; int posi; int i1, i2; for (j = 0; j < count; j++) { if (tr[j].opos == -1) continue; if (DB_Comp(xx, seqList[j]) == UNCOMPLEMENTED) { ratio = (double)(tr[j].forw - tr[j].back) / width; posd = (double)i * ratio; posi = (signed int)posd; posm = posd - posi; i1 = (int)(tr[j].back+0.5) + posi; i2 = i1+1; scale_a += (rlist[j].r->traceA[i2] - rlist[j].r->traceA[i1]) * posm + rlist[j].r->traceA[i1]; scale_c += (rlist[j].r->traceC[i2] - rlist[j].r->traceC[i1]) * posm + rlist[j].r->traceC[i1]; scale_g += (rlist[j].r->traceG[i2] - rlist[j].r->traceG[i1]) * posm + rlist[j].r->traceG[i1]; scale_t += (rlist[j].r->traceT[i2] - rlist[j].r->traceT[i1]) * posm + rlist[j].r->traceT[i1]; } else { ratio = (double)(tr[j].forw - tr[j].back) / width; posd = (double)i * ratio; posi = (signed int)posd; posm = posi - posd; i1 = (int)(tr[j].back+0.5) + posi; i2 = i1-1; scale_t += (rlist[j].r->traceA[i2] - rlist[j].r->traceA[i1]) * posm + rlist[j].r->traceA[i1]; scale_g += (rlist[j].r->traceC[i2] - rlist[j].r->traceC[i1]) * posm + rlist[j].r->traceC[i1]; scale_c += (rlist[j].r->traceG[i2] - rlist[j].r->traceG[i1]) * posm + rlist[j].r->traceG[i1]; scale_a += (rlist[j].r->traceT[i2] - rlist[j].r->traceT[i1]) * posm + rlist[j].r->traceT[i1]; } } /* printf("i=%d a=%f c=%f g=%f t=%f\n", * i, scale_a, scale_c, scale_g, scale_t); */ r->traceA[off+i-width/2] = (TRACE)(scale_a / used_count); r->traceC[off+i-width/2] = (TRACE)(scale_c / used_count); r->traceG[off+i-width/2] = (TRACE)(scale_g / used_count); r->traceT[off+i-width/2] = (TRACE)(scale_t / used_count); } /* * Reposition base positions to the centre of peaks. This is vitally * important when combined with the trace_diff code. */ { signed int tmp; if (avg_wid) if (avg_back) tmp = off-width/2 + avg_back / avg_wid * width + 0.5; else tmp = off; else tmp = off-width/2 + 0.5; if (tmp < 0) tmp = 0; r->base[pos-start] = cons[pos-start]; r->basePos[pos-start] = tmp; r->prob_A[pos-start] = 0; r->prob_C[pos-start] = 0; r->prob_G[pos-start] = 0; r->prob_T[pos-start] = 0; } return width; }
/* * Produce a consensus trace from a specific region of this contig. */ Read *cons_trace(EdStruct *xx, int start, int end, int strand, int match, int exception) { int *seqList, i, j, count, next; Read *r; int max_points = 10000; char *con = NULL; diff_cons_seq *rlist = NULL; char fileName[256]; char t_type[5]; int form; int offset = 0, w; /* Get the consensus sequence */ if (NULL == (con = (char *)xmalloc(end - start + 2))) goto error; DBcalcConsensus(xx, start, end - start + 1, con, NULL, BOTH_STRANDS); /* Allocate a list of read pointers and positions */ if (NULL == (rlist = (diff_cons_seq *)xcalloc(DBI_gelCount(xx), sizeof(*rlist)))) goto error; /* Allocate a read structure */ if (NULL == (r = read_allocate(max_points, end - start + 1))) goto error; /* Derive the initial list of sequences covering the start point */ count = 0; seqList = DBI_list(xx); for (i = 1; i <= DBI_gelCount(xx) && DB_RelPos(xx, DBI_order(xx)[i]) <= start; i++) { int seq = DBI_order(xx)[i]; DBgetSeq(DBI(xx), seq); if (DB_RelPos(xx, seq) + DB_Length(xx, seq) > start && strand_matches(xx, seq, strand) && seq != exception) { if (get_trace_path(xx, seq, fileName, t_type) == 0) { form = trace_type_str2int(t_type); rlist[count].r = read_reading(fileName, form); if (rlist[count].r) { rlist[count].seq = DBgetSeq(DBI(xx), seq); rlist[count].opos = get_trace_pos(rlist[count].r, xx, seq, 0, DB_Start(xx, seq), DB_Start(xx, seq) + DB_Length(xx, seq), DB_Seq(xx, seq), 0); seqList[count++] = seq; } } } } if (i <= DBI_gelCount(xx)) next = i; else next = 0; /* * Loop along the sequence updating seqList as we go. * At each point we know how many sequences there are so we can * produce the consensus from these sequences. */ for (i = start; i <= end; i++) { w = do_cons_base(xx, con, i, start, count, seqList, rlist, r, offset, match, &max_points); if (w == -1) goto error; offset += w; /* Update seqList for the next position */ if (i < end) { /* Remove sequences */ for (j = 0; j < count; j++) { int seq = seqList[j]; if (DB_RelPos(xx, seq) + DB_Length(xx, seq) - 1 <= i) { read_deallocate(rlist[j].r); xfree(rlist[j].opos); memmove(&seqList[j], &seqList[j+1], (count-1-j) * sizeof(*seqList)); memmove(&rlist[j], &rlist[j+1], (count-1-j) * sizeof(*rlist)); count--; j--; } } /* Add sequences */ while (next && DB_RelPos(xx, next) <= i+1) { /* printf("next=%d %d %d\n", next, DB_RelPos(xx, next), i+1); */ DBgetSeq(DBI(xx), next); if (strand_matches(xx, next, strand) && get_trace_path(xx, next, fileName, t_type) == 0) { form = trace_type_str2int(t_type); rlist[count].r = read_reading(fileName, form); if (rlist[count].r) { rlist[count].seq = DBgetSeq(DBI(xx), next); rlist[count].opos = get_trace_pos(rlist[count].r, xx, next, 0, DB_Start(xx, next), DB_Start(xx,next)+DB_Length(xx,next), DB_Seq(xx, next), 0); seqList[count++] = next; } } if (++next > DBI_gelCount(xx)) next = 0; } } } for (i = 0; i < count; i++) { read_deallocate(rlist[i].r); xfree(rlist[i].opos); } tidy_up(r, end-start + 1, offset); xfree(con); xfree(rlist); return r; error: if (con) xfree(con); if (rlist) xfree(rlist); return NULL; }
/* * Align the two contig editor windows * Returns: * 0 - aligned ok * 1 - not ok */ int alignOverlap(EdStruct *xx[2]) { int left0,right0; int left1/*,right1*/; int length0,length1; int offset = editorLockedPos(xx, 1/*force recalculation*/); int overlapLength; int len0,len1; int ret; int xx0_dp, xx1_dp; if (! inJoinMode(xx[0])) return 1; /* Compute overlap position and sizes */ if (offset < 0) { left0 = 1-offset; left1 = 1; } else { left0 = 1; left1 = 1+offset; } length0 = DB_Length(xx[0],0); length1 = DB_Length(xx[1],0); if (offset+length0 < length1) { right0 = length0; } else { right0 = length1-offset; } overlapLength = right0 - left0+1; if (overlapLength <= 0) return 1; len0 = len1 = overlapLength; /* Add on extra data either end to allow for padding */ #define XTRA_PERC 0.30 left0 -= (int)(overlapLength * XTRA_PERC); left1 -= (int)(overlapLength * XTRA_PERC); len0 += (int)(overlapLength * XTRA_PERC * 2); len1 += (int)(overlapLength * XTRA_PERC * 2); xx0_dp = xx[0]->displayPos; xx1_dp = xx[1]->displayPos; if (left0 < 1 && left1 < 1) { xx[0]->displayPos += MAX(left0, left1)-1; xx[1]->displayPos += MAX(left0, left1)-1; } if (left0 < 1) { len0 -= 1-left0; xx[0]->displayPos += 1-left0; left0 = 1; } if (left1 < 1) { len1 -= 1-left1; xx[1]->displayPos += 1-left1; left1 = 1; } if (len0 > length0 - left0 + 1) { len0 = length0 - left0 + 1; } if (len1 > length1 - left1 + 1) { len1 = length1 - left1 + 1; } xx[0]->link->lockOffset = xx[1]->displayPos - xx[0]->displayPos; openUndo(DBI(xx[0])); openUndo(DBI(xx[1])); /* Do the actual alignment */ ret = align(xx[0], left0, len0, xx[1], left1, len1); if (ret) { /* Alignment failed - put back display positions before returning */ xx[0]->displayPos = xx0_dp; xx[1]->displayPos = xx1_dp; } else { /* * If displayPos has changed, put it back to the original position and * adjust it once more using U_adjust_display. This will cause the undo * information to be stored correctly. */ if (xx0_dp != xx[0]->displayPos) { int tmp = xx[0]->displayPos - xx0_dp; xx[0]->displayPos = xx0_dp; U_adjust_display(xx[0], tmp); } if (xx1_dp != xx[1]->displayPos) { int tmp = xx[1]->displayPos - xx1_dp; xx[1]->displayPos = xx1_dp; U_adjust_display(xx[1], tmp); } } closeUndo(xx[1], DBI(xx[1])); closeUndo(xx[0], DBI(xx[0])); return ret; }