Esempio n. 1
0
void aliStringToPsl(struct lineFile *lf, char *qNameParm, char *tNameParm, 
	int qSize, int tSize, int aliSize, 
	int qStart, int qEnd, int tStart, int tEnd, char strand, FILE *f, struct chain *chain, struct hash *tHash, struct hash *qHash, struct dlList *fileCache )
/* Output alignment in a pair of strings with insert chars
 * to a psl line in a file. */
{
static char *tName = NULL, *qName = NULL;
static struct dnaSeq *tSeq = NULL, *qSeq = NULL;
static bool onNegStrand = FALSE;
//struct dyString *q = newDyString(16*1024);
//struct dyString *t = newDyString(16*1024);
unsigned match = 0;	/* Number of bases that match */
unsigned misMatch = 0;	/* Number of bases that don't match */
unsigned repMatch = 0;	/* Number of bases that match but are part of repeats */
unsigned qNumInsert = 0;	/* Number of inserts in query */
int qBaseInsert = 0;	/* Number of bases inserted in query */
unsigned tNumInsert = 0;	/* Number of inserts in target */
int tBaseInsert = 0;	/* Number of bases inserted in target */
boolean eitherInsert = FALSE;	/* True if either in insert state. */
int qOffset = 0;
int tOffset = 0;
boolean qIsNib = FALSE;
static boolean tIsNib ;
int blockCount = 1, blockIx=0;
int i,j;
int qs,qe,ts,te;
int *blocks = NULL, *qStarts = NULL, *tStarts = NULL;
struct cBlock *b, *nextB;
int qbSize = 0, tbSize = 0; /* sum of block sizes */

/* Don't ouput if either query or target is zero length */
 if ((qStart == qEnd) || (tStart == tEnd))
     return;

if (qName == NULL || !sameString(qName, qNameParm))
    {
    freeDnaSeq(&qSeq);
    freez(&qName);
    qName = cloneString(qNameParm);
    readCachedSeqPart(qName, qStart, qEnd-qStart, FALSE,
    	qHash, fileCache, &qSeq, &qOffset , &qIsNib);
    onNegStrand = FALSE;
    if (qIsNib && strand == '-')
	    qOffset = qSize - qEnd;
    }
if (tIsNib || tName == NULL || !sameString(tName, tNameParm) )
    {
    freeDnaSeq(&tSeq);
    freez(&tName);
    tName = cloneString(tNameParm);
    readCachedSeqPart(tName, tStart, tEnd-tStart, tMasked,
	tHash, fileCache, &tSeq, &tOffset, &tIsNib);
    }
if ((!onNegStrand && (strand == '-')) || (onNegStrand && (strand == '+')) )
    {
    reverseComplement(qSeq->dna, qSeq->size);
    onNegStrand = !onNegStrand;
    }
for (b = chain->blockList; b != NULL; b = nextB)
    {
    blockCount++;
    qbSize += b->qEnd - b->qStart + 1;
    tbSize += b->tEnd - b->tStart + 1;
    nextB = b->next;
    }
/* Allocate dynamic memory for block lists. */
AllocArray(blocks, blockCount);
AllocArray(qStarts, blockCount);
AllocArray(tStarts, blockCount);

/* Figure block sizes and starts. */
eitherInsert = FALSE;
qs = qe = qStart;
ts = te = tStart;
nextB = NULL;
for (b = chain->blockList; b != NULL; b = nextB)
    {
	    qStarts[blockIx] = b->qStart;
	    tStarts[blockIx] = b->tStart;
	    blocks[blockIx] = b->tEnd - b->tStart;
            j = tIsNib ? b->tStart-tStart : b->tStart;  // cmclean change to correctly find target coordinates from all files
	    i = qIsNib ? b->qStart-qStart : b->qStart;
            //printf("tStart %d b->tStart %d tEnd %d size %d block %d\n",tStart, b->tStart,  tEnd,tSeq->size, b->tEnd-b->tStart);
            //printf("qStart %d b->qStart %d qEnd %d size %d qend-qstart %d loop start %d loopend %d\n",qStart, b->qStart,  qEnd, qSeq->size, qEnd-qStart, (b->qStart)-qStart, b->qStart+(b->tEnd - b->tStart)-qStart);
	    int counter;
            for (counter = 0 ; counter < (b->tEnd - b->tStart); counter++)
                {
                char qq ;
                char tt ;
                if (j > tSeq->size || i > qSeq->size)
                    {
                    break;
                    //printf("tStart %d b->tStart %d tEnd %d size %d block %d\n",tStart, b->tStart,  tEnd,tSeq->size, b->tEnd-b->tStart);
                    //printf("qStart %d b->qStart %d qEnd %d size %d qend-qstart %d loop start %d loopend %d\n",qStart, b->qStart,  qEnd, qSeq->size, qEnd-qStart, (b->qStart)-qStart, b->qStart+(b->tEnd - b->tStart)-qStart);
                    assert(j <= tSeq->size);
                    assert(i <= qSeq->size);
                    }
                qq = qSeq->dna[i++];
                tt = tSeq->dna[j++];
                if (toupper(qq) == toupper(tt))
                    {
                    if (tMasked && islower(tt))
                        ++repMatch;
                    else
                        ++match;
                    }
                else 
                    ++misMatch;
                }
	    ++blockIx;
	    eitherInsert = TRUE;
        nextB = b->next;
    }

assert(blockIx == blockCount-1);

/*
qs = qStart;
qe = qStart + match + misMatch + tBaseInsert;
assert(qe == qEnd); 
assert(qs < qe);
te = tStart + match + misMatch + qBaseInsert;
assert(te == tEnd);
assert(tStart < te);
*/

/* Output header */
fprintf(f, "%d\t", match);
fprintf(f, "%d\t", misMatch);
fprintf(f, "%d\t", repMatch);
fprintf(f, "0\t");
fprintf(f, "%d\t", qNumInsert);
fprintf(f, "%d\t", qBaseInsert);
fprintf(f, "%d\t", tNumInsert);
fprintf(f, "%d\t", tBaseInsert);
fprintf(f, "%c\t", strand);
fprintf(f, "%s\t", qNameParm);
fprintf(f, "%d\t", qSize);
if (strand == '+')
    {
    fprintf(f, "%d\t", qStart);
    fprintf(f, "%d\t", qEnd);
    }
    else
    {
    fprintf(f, "%d\t", qSize - qEnd);
    fprintf(f, "%d\t", qSize - qStart);
    }
fprintf(f, "%s\t", tNameParm);
fprintf(f, "%d\t", tSize);
fprintf(f, "%d\t", tStart);
fprintf(f, "%d\t", tEnd);
fprintf(f, "%d\t", blockCount-1);
if (ferror(f))
    {
    perror("Error writing psl file\n");
    errAbort("\n");
    }

/* Output block sizes */
for (i=0; i<blockCount-1; ++i)
    fprintf(f, "%d,", blocks[i]);
fprintf(f, "\t");

/* Output qStarts */
for (i=0; i<blockCount-1; ++i)
    fprintf(f, "%d,", qStarts[i]);
fprintf(f, "\t");

/* Output tStarts */
for (i=0; i<blockCount-1; ++i)
    fprintf(f, "%d,", tStarts[i]);
fprintf(f, "\n");

/* Clean Up. */
freez(&blocks);
freez(&qStarts);
freez(&tStarts);
}
Esempio n. 2
0
void prettyOne(struct psl *psl, struct hash *qHash, struct hash *tHash,
	struct dlList *fileCache, FILE *f, boolean axt, FILE *checkFile)
/* Make pretty output for one psl.  Find target and query
 * sequence in hash.  Load them.  Output bases. */
{
static char *tName = NULL, *qName = NULL;
static struct dnaSeq *tSeq = NULL, *qSeq = NULL;
struct dyString *q = newDyString(16*1024);
struct dyString *t = newDyString(16*1024);
int blockIx;
int qs, ts;
int lastQ = 0, lastT = 0, size;
int qOffset = 0;
int tOffset = 0;
boolean qIsPartial = FALSE;
boolean tIsPartial = FALSE;

if (qName == NULL || !sameString(qName, psl->qName))
    {
    freeDnaSeq(&qSeq);
    freez(&qName);
    qName = cloneString(psl->qName);
    readCachedSeqPart(qName, psl->qStart, psl->qEnd-psl->qStart, 
    	qHash, fileCache, &qSeq, &qOffset, &qIsPartial);
    if (qIsPartial && psl->strand[0] == '-')
	    qOffset = psl->qSize - psl->qEnd;
    }
if (tName == NULL || !sameString(tName, psl->tName) || tIsPartial)
    {
    freeDnaSeq(&tSeq);
    freez(&tName);
    tName = cloneString(psl->tName);
    readCachedSeqPart(tName, psl->tStart, psl->tEnd-psl->tStart, 
	tHash, fileCache, &tSeq, &tOffset, &tIsPartial);
    }
if (tIsPartial && psl->strand[1] == '-')
    tOffset = psl->tSize - psl->tEnd;
if (psl->strand[0] == '-')
    reverseComplement(qSeq->dna, qSeq->size);
if (psl->strand[1] == '-')
    reverseComplement(tSeq->dna, tSeq->size);
for (blockIx=0; blockIx < psl->blockCount; ++blockIx)
    {
    qs = psl->qStarts[blockIx] - qOffset;
    ts = psl->tStarts[blockIx] - tOffset;

    /* Output gaps except in first case. */
    if (blockIx != 0)
        {
	int qGap, tGap, minGap;
	qGap = qs - lastQ;
	tGap = ts - lastT;
	minGap = min(qGap, tGap);
	if (minGap > 0)
	    {
	    writeGap(q, qGap, qSeq->dna + lastQ, t, tGap, tSeq->dna + lastT);
	    }
	else if (qGap > 0)
	    {
	    writeInsert(q, t, qSeq->dna + lastQ, qGap);
	    }
	else if (tGap > 0)
	    {
	    writeInsert(t, q, tSeq->dna + lastT, tGap);
	    }
	}
    /* Output sequence. */
    size = psl->blockSizes[blockIx];
    dyStringAppendN(q, qSeq->dna + qs, size);
    lastQ = qs + size;
    dyStringAppendN(t, tSeq->dna + ts, size);
    lastT = ts + size;
    if(q->stringSize != t->stringSize)
        {
//        printf("%d BLK %s q size %d t size %d diff %d qs size %d ts size %d\n",blockIx, psl->qName, q->stringSize, t->stringSize, q->stringSize - t->stringSize, qSeq->size, tSeq->size );
        }
    }

if (checkFile != NULL)
    {
    outputCheck(psl, qSeq, qOffset, tSeq, tOffset, checkFile);
    }
if (psl->strand[0] == '-' && !qIsPartial)
    reverseComplement(qSeq->dna, qSeq->size);
if (psl->strand[1] == '-' && !tIsPartial)
    reverseComplement(tSeq->dna, tSeq->size);

if(q->stringSize != t->stringSize)
    {
 //   printf("AF %s q size %d t size %d qs size %d ts size %d\n",psl->qName, q->stringSize, t->stringSize, qSeq->size, tSeq->size );
    }
//assert(q->stringSize == t->stringSize);
if (axt)
    axtOutString(q->string, t->string, min(q->stringSize,t->stringSize), 60, psl, f);
else
    prettyOutString(q->string, t->string, min(q->stringSize,t->stringSize), 60, psl, f);
dyStringFree(&q);
dyStringFree(&t);
if (qIsPartial)
    freez(&qName);
if (tIsPartial)
    freez(&tName);
}