void aliStringToPsl(struct lineFile *lf, char *qNameParm, char *tNameParm, int qSize, int tSize, int aliSize, int qStart, int qEnd, int tStart, int tEnd, char strand, FILE *f, struct chain *chain, struct hash *tHash, struct hash *qHash, struct dlList *fileCache ) /* Output alignment in a pair of strings with insert chars * to a psl line in a file. */ { static char *tName = NULL, *qName = NULL; static struct dnaSeq *tSeq = NULL, *qSeq = NULL; static bool onNegStrand = FALSE; //struct dyString *q = newDyString(16*1024); //struct dyString *t = newDyString(16*1024); unsigned match = 0; /* Number of bases that match */ unsigned misMatch = 0; /* Number of bases that don't match */ unsigned repMatch = 0; /* Number of bases that match but are part of repeats */ unsigned qNumInsert = 0; /* Number of inserts in query */ int qBaseInsert = 0; /* Number of bases inserted in query */ unsigned tNumInsert = 0; /* Number of inserts in target */ int tBaseInsert = 0; /* Number of bases inserted in target */ boolean eitherInsert = FALSE; /* True if either in insert state. */ int qOffset = 0; int tOffset = 0; boolean qIsNib = FALSE; static boolean tIsNib ; int blockCount = 1, blockIx=0; int i,j; int qs,qe,ts,te; int *blocks = NULL, *qStarts = NULL, *tStarts = NULL; struct cBlock *b, *nextB; int qbSize = 0, tbSize = 0; /* sum of block sizes */ /* Don't ouput if either query or target is zero length */ if ((qStart == qEnd) || (tStart == tEnd)) return; if (qName == NULL || !sameString(qName, qNameParm)) { freeDnaSeq(&qSeq); freez(&qName); qName = cloneString(qNameParm); readCachedSeqPart(qName, qStart, qEnd-qStart, FALSE, qHash, fileCache, &qSeq, &qOffset , &qIsNib); onNegStrand = FALSE; if (qIsNib && strand == '-') qOffset = qSize - qEnd; } if (tIsNib || tName == NULL || !sameString(tName, tNameParm) ) { freeDnaSeq(&tSeq); freez(&tName); tName = cloneString(tNameParm); readCachedSeqPart(tName, tStart, tEnd-tStart, tMasked, tHash, fileCache, &tSeq, &tOffset, &tIsNib); } if ((!onNegStrand && (strand == '-')) || (onNegStrand && (strand == '+')) ) { reverseComplement(qSeq->dna, qSeq->size); onNegStrand = !onNegStrand; } for (b = chain->blockList; b != NULL; b = nextB) { blockCount++; qbSize += b->qEnd - b->qStart + 1; tbSize += b->tEnd - b->tStart + 1; nextB = b->next; } /* Allocate dynamic memory for block lists. */ AllocArray(blocks, blockCount); AllocArray(qStarts, blockCount); AllocArray(tStarts, blockCount); /* Figure block sizes and starts. */ eitherInsert = FALSE; qs = qe = qStart; ts = te = tStart; nextB = NULL; for (b = chain->blockList; b != NULL; b = nextB) { qStarts[blockIx] = b->qStart; tStarts[blockIx] = b->tStart; blocks[blockIx] = b->tEnd - b->tStart; j = tIsNib ? b->tStart-tStart : b->tStart; // cmclean change to correctly find target coordinates from all files i = qIsNib ? b->qStart-qStart : b->qStart; //printf("tStart %d b->tStart %d tEnd %d size %d block %d\n",tStart, b->tStart, tEnd,tSeq->size, b->tEnd-b->tStart); //printf("qStart %d b->qStart %d qEnd %d size %d qend-qstart %d loop start %d loopend %d\n",qStart, b->qStart, qEnd, qSeq->size, qEnd-qStart, (b->qStart)-qStart, b->qStart+(b->tEnd - b->tStart)-qStart); int counter; for (counter = 0 ; counter < (b->tEnd - b->tStart); counter++) { char qq ; char tt ; if (j > tSeq->size || i > qSeq->size) { break; //printf("tStart %d b->tStart %d tEnd %d size %d block %d\n",tStart, b->tStart, tEnd,tSeq->size, b->tEnd-b->tStart); //printf("qStart %d b->qStart %d qEnd %d size %d qend-qstart %d loop start %d loopend %d\n",qStart, b->qStart, qEnd, qSeq->size, qEnd-qStart, (b->qStart)-qStart, b->qStart+(b->tEnd - b->tStart)-qStart); assert(j <= tSeq->size); assert(i <= qSeq->size); } qq = qSeq->dna[i++]; tt = tSeq->dna[j++]; if (toupper(qq) == toupper(tt)) { if (tMasked && islower(tt)) ++repMatch; else ++match; } else ++misMatch; } ++blockIx; eitherInsert = TRUE; nextB = b->next; } assert(blockIx == blockCount-1); /* qs = qStart; qe = qStart + match + misMatch + tBaseInsert; assert(qe == qEnd); assert(qs < qe); te = tStart + match + misMatch + qBaseInsert; assert(te == tEnd); assert(tStart < te); */ /* Output header */ fprintf(f, "%d\t", match); fprintf(f, "%d\t", misMatch); fprintf(f, "%d\t", repMatch); fprintf(f, "0\t"); fprintf(f, "%d\t", qNumInsert); fprintf(f, "%d\t", qBaseInsert); fprintf(f, "%d\t", tNumInsert); fprintf(f, "%d\t", tBaseInsert); fprintf(f, "%c\t", strand); fprintf(f, "%s\t", qNameParm); fprintf(f, "%d\t", qSize); if (strand == '+') { fprintf(f, "%d\t", qStart); fprintf(f, "%d\t", qEnd); } else { fprintf(f, "%d\t", qSize - qEnd); fprintf(f, "%d\t", qSize - qStart); } fprintf(f, "%s\t", tNameParm); fprintf(f, "%d\t", tSize); fprintf(f, "%d\t", tStart); fprintf(f, "%d\t", tEnd); fprintf(f, "%d\t", blockCount-1); if (ferror(f)) { perror("Error writing psl file\n"); errAbort("\n"); } /* Output block sizes */ for (i=0; i<blockCount-1; ++i) fprintf(f, "%d,", blocks[i]); fprintf(f, "\t"); /* Output qStarts */ for (i=0; i<blockCount-1; ++i) fprintf(f, "%d,", qStarts[i]); fprintf(f, "\t"); /* Output tStarts */ for (i=0; i<blockCount-1; ++i) fprintf(f, "%d,", tStarts[i]); fprintf(f, "\n"); /* Clean Up. */ freez(&blocks); freez(&qStarts); freez(&tStarts); }
void prettyOne(struct psl *psl, struct hash *qHash, struct hash *tHash, struct dlList *fileCache, FILE *f, boolean axt, FILE *checkFile) /* Make pretty output for one psl. Find target and query * sequence in hash. Load them. Output bases. */ { static char *tName = NULL, *qName = NULL; static struct dnaSeq *tSeq = NULL, *qSeq = NULL; struct dyString *q = newDyString(16*1024); struct dyString *t = newDyString(16*1024); int blockIx; int qs, ts; int lastQ = 0, lastT = 0, size; int qOffset = 0; int tOffset = 0; boolean qIsPartial = FALSE; boolean tIsPartial = FALSE; if (qName == NULL || !sameString(qName, psl->qName)) { freeDnaSeq(&qSeq); freez(&qName); qName = cloneString(psl->qName); readCachedSeqPart(qName, psl->qStart, psl->qEnd-psl->qStart, qHash, fileCache, &qSeq, &qOffset, &qIsPartial); if (qIsPartial && psl->strand[0] == '-') qOffset = psl->qSize - psl->qEnd; } if (tName == NULL || !sameString(tName, psl->tName) || tIsPartial) { freeDnaSeq(&tSeq); freez(&tName); tName = cloneString(psl->tName); readCachedSeqPart(tName, psl->tStart, psl->tEnd-psl->tStart, tHash, fileCache, &tSeq, &tOffset, &tIsPartial); } if (tIsPartial && psl->strand[1] == '-') tOffset = psl->tSize - psl->tEnd; if (psl->strand[0] == '-') reverseComplement(qSeq->dna, qSeq->size); if (psl->strand[1] == '-') reverseComplement(tSeq->dna, tSeq->size); for (blockIx=0; blockIx < psl->blockCount; ++blockIx) { qs = psl->qStarts[blockIx] - qOffset; ts = psl->tStarts[blockIx] - tOffset; /* Output gaps except in first case. */ if (blockIx != 0) { int qGap, tGap, minGap; qGap = qs - lastQ; tGap = ts - lastT; minGap = min(qGap, tGap); if (minGap > 0) { writeGap(q, qGap, qSeq->dna + lastQ, t, tGap, tSeq->dna + lastT); } else if (qGap > 0) { writeInsert(q, t, qSeq->dna + lastQ, qGap); } else if (tGap > 0) { writeInsert(t, q, tSeq->dna + lastT, tGap); } } /* Output sequence. */ size = psl->blockSizes[blockIx]; dyStringAppendN(q, qSeq->dna + qs, size); lastQ = qs + size; dyStringAppendN(t, tSeq->dna + ts, size); lastT = ts + size; if(q->stringSize != t->stringSize) { // printf("%d BLK %s q size %d t size %d diff %d qs size %d ts size %d\n",blockIx, psl->qName, q->stringSize, t->stringSize, q->stringSize - t->stringSize, qSeq->size, tSeq->size ); } } if (checkFile != NULL) { outputCheck(psl, qSeq, qOffset, tSeq, tOffset, checkFile); } if (psl->strand[0] == '-' && !qIsPartial) reverseComplement(qSeq->dna, qSeq->size); if (psl->strand[1] == '-' && !tIsPartial) reverseComplement(tSeq->dna, tSeq->size); if(q->stringSize != t->stringSize) { // printf("AF %s q size %d t size %d qs size %d ts size %d\n",psl->qName, q->stringSize, t->stringSize, qSeq->size, tSeq->size ); } //assert(q->stringSize == t->stringSize); if (axt) axtOutString(q->string, t->string, min(q->stringSize,t->stringSize), 60, psl, f); else prettyOutString(q->string, t->string, min(q->stringSize,t->stringSize), 60, psl, f); dyStringFree(&q); dyStringFree(&t); if (qIsPartial) freez(&qName); if (tIsPartial) freez(&tName); }