Exemplo n.º 1
struct axt *pslToAxt(struct psl *psl, struct hash *qHash, char *tNibDir, 
	struct dlList *fileCache)
static char *tName = NULL, *qName = NULL;
static struct dnaSeq *tSeq = NULL;
struct dyString *q = newDyString(16*1024);
struct dyString *t = newDyString(16*1024);
int blockIx;
int qs, ts ;
int lastQ = 0, lastT = 0, size;
int qOffset = 0;
int tOffset = 0;
struct axt *axt = NULL;
boolean qIsNib = FALSE;
boolean tIsNib = FALSE;
int cnt = 0;
//struct dnaSeq *tSeq = NULL;
struct nibInfo *tNib = NULL;

struct dnaSeq *qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0);
   // hGenBankGetMrna(psl->qName, NULL);
assert(mrnaList != NULL);
for (mrna = mrnaList; mrna != NULL ; mrna = mrna->next)
    assert(mrna != NULL);
    if (sameString(mrna->name, psl->qName))
        qSeq = cloneDnaSeq(mrna);
        assert(qSeq != NULL);
if (qSeq == NULL)
    warn("mrna sequence data not found %s, searched %d sequences\n",psl->qName,cnt);
    return NULL;
if (qSeq->size != psl->qSize)
    warn("sequence %s aligned is different size %d from mrna.fa file %d \n",psl->qName,psl->qSize,qSeq->size);
    return NULL;
qName = cloneString(psl->qName);
if (qIsNib && psl->strand[0] == '-')
    qOffset = psl->qSize - psl->qEnd;
    qOffset = 0;
verbose(5,"qString len = %d qOffset = %d\n",qSeq->size,qOffset);
if (tName == NULL || !sameString(tName, psl->tName) || tIsNib)
    tName = cloneString(psl->tName);
    tNib = nibInfoFromCache(nibHash, tNibDir, tName);
    assert(tNib !=NULL);
    tSeq = nibInfoLoadStrand(tNib, psl->tStart, psl->tEnd, '+');
    assert(tSeq !=NULL);
    tOffset = psl->tStart;
    //readCachedSeqPart(tName, psl->tStart, psl->tEnd-psl->tStart, 
//	tHash, fileCache, &tSeq, &tOffset, &tIsNib);
verbose(4,"strand t %s \n",psl->strand);
if (tSeq != NULL)
    verbose(5,"tString len = %d tOffset = %d\n",tSeq->size,tOffset);
    errAbort("tSeq is NULL\n");
if (psl->strand[0] == '-')
    reverseComplement(qSeq->dna, qSeq->size);
//if (strlen(psl->strand) > 1 )
//    if (psl->strand[1] == '-')
//        reverseComplement(tSeq->dna, tSeq->size);
for (blockIx=0; blockIx < psl->blockCount; ++blockIx)
    qs = psl->qStarts[blockIx] - qOffset;
    ts = psl->tStarts[blockIx] - tOffset;

    if (blockIx != 0)
	int qGap, tGap, minGap;
	qGap = qs - lastQ;
	tGap = ts - lastT;
	minGap = min(qGap, tGap);
	if (minGap > 0)
	    writeGap(q, qGap, qSeq->dna + lastQ, t, tGap, tSeq->dna + lastT);
	else if (qGap > 0)
	    writeInsert(q, t, qSeq->dna + lastQ, qGap);
	else if (tGap > 0)
	    writeInsert(t, q, tSeq->dna + lastT, tGap);
    size = psl->blockSizes[blockIx];
    assert(qSeq != NULL);
    dyStringAppendN(q, qSeq->dna + qs, size);
    lastQ = qs + size;
    dyStringAppendN(t, tSeq->dna + ts, size);
    lastT = ts + size;

if (strlen(q->string) != strlen(t->string))
    warn("Symbol count(t) %d != %d inconsistent at t %s:%d and qName %s\n%s\n%s\n",
    	(int)strlen(t->string), (int)strlen(q->string), psl->tName, psl->tStart, psl->qName, t->string, q->string);
if (psl->strand[0] == '-')
    reverseComplement(q->string, q->stringSize);
    reverseComplement(t->string, t->stringSize);
axt = axtCreate(q->string, t->string, min(q->stringSize,t->stringSize), psl);
if (qIsNib)
//if (tIsNib)
//    freez(&tName);
return axt;
Exemplo n.º 2
struct gapInfo *findLargeGaps(struct xaAli *xa, struct gapInfo *oldList)
/* Find large gaps in alignment and classify them. */
struct gdfGene *gdfList;
struct gapInfo *gapList = NULL, *gap;
int ceIx=0, cbIx=0, symIx=0;
int ceStart=0, cbStart=0, symStart=0;
int runSize = 0;
char sym, lastSym = 0;
int symCount = xa->symCount;

/* Fetch C. elegans region. */
gdfList = wormGdfGenesInRange(xa->target, xa->tStart, xa->tEnd, &wormSangerGdfCache);

/* Run a little state machine that does something at the end of each solid run 
 * of a symbol. */
for (symIx = 0; symIx <= symCount; ++symIx)
    sym = xa->hSym[symIx];
    if (sym != lastSym)
        if (runSize > 32)       /* Introns need to be at least this long. */
            /* We're at end of a solid run. */
            if (lastSym == 'Q' || lastSym == 'T')
                int ceGapStart = xa->tStart + ceStart;
                int ceGapEnd = xa->tStart + ceIx;
                struct gdfGene *gdf;
                char hBefore = xa->hSym[symStart-1];
                char hAfter = sym;
                char strand = '.';

                gap->query = cloneString(xa->query);
                gap->qStart = xa->qStart + cbStart;
                gap->qEnd = xa->qStart + cbIx;
                gap->target = cloneString(xa->target);
                gap->tStart = ceGapStart;
                gap->tEnd = ceGapEnd;
                gap->name = cloneString(xa->name);
                gap->size = runSize;
                gap->hSym = lastSym;
                if (uniqueGap(oldList, gap))
                    slAddHead(&gapList, gap);

                    classifyGap(gdfList, xa->target, ceGapStart, ceGapEnd, lastSym, &gap->type, &gdf);
                    if (gdf != NULL)
                        strand = gdf->strand;
                    gap->hasIntronEnds = isIntron(xa, symStart, symIx, lastSym, strand, &gap->slideCount, &gap->isRc);
                    if (gap->hasIntronEnds)
                        slideGap(gap, xa, lastSym, symStart, symIx);
                    if (isConserved(hBefore) && isConserved(hAfter))
                        gap->hasStrongHomology = TRUE;
                    if (gap->hasStrongHomology)
                        if (lastSym == 'T')
                            writeGap(gap, xa, symStart+gap->slideCount, symIx+gap->slideCount, strand, out);
        runSize = 0;
        ceStart = ceIx;
        cbStart = cbIx;
        symStart = symIx;
        lastSym = sym;
    if (xa->qSym[symIx] != '-')
    if (xa->tSym[symIx] != '-')

return gapList;
Exemplo n.º 3
void prettyOne(struct psl *psl, struct hash *qHash, struct hash *tHash,
	struct dlList *fileCache, FILE *f, boolean axt, FILE *checkFile)
/* Make pretty output for one psl.  Find target and query
 * sequence in hash.  Load them.  Output bases. */
static char *tName = NULL, *qName = NULL;
static struct dnaSeq *tSeq = NULL, *qSeq = NULL;
struct dyString *q = newDyString(16*1024);
struct dyString *t = newDyString(16*1024);
int blockIx;
int qs, ts;
int lastQ = 0, lastT = 0, size;
int qOffset = 0;
int tOffset = 0;
boolean qIsPartial = FALSE;
boolean tIsPartial = FALSE;

if (qName == NULL || !sameString(qName, psl->qName))
    qName = cloneString(psl->qName);
    readCachedSeqPart(qName, psl->qStart, psl->qEnd-psl->qStart, 
    	qHash, fileCache, &qSeq, &qOffset, &qIsPartial);
    if (qIsPartial && psl->strand[0] == '-')
	    qOffset = psl->qSize - psl->qEnd;
if (tName == NULL || !sameString(tName, psl->tName) || tIsPartial)
    tName = cloneString(psl->tName);
    readCachedSeqPart(tName, psl->tStart, psl->tEnd-psl->tStart, 
	tHash, fileCache, &tSeq, &tOffset, &tIsPartial);
if (tIsPartial && psl->strand[1] == '-')
    tOffset = psl->tSize - psl->tEnd;
if (psl->strand[0] == '-')
    reverseComplement(qSeq->dna, qSeq->size);
if (psl->strand[1] == '-')
    reverseComplement(tSeq->dna, tSeq->size);
for (blockIx=0; blockIx < psl->blockCount; ++blockIx)
    qs = psl->qStarts[blockIx] - qOffset;
    ts = psl->tStarts[blockIx] - tOffset;

    /* Output gaps except in first case. */
    if (blockIx != 0)
	int qGap, tGap, minGap;
	qGap = qs - lastQ;
	tGap = ts - lastT;
	minGap = min(qGap, tGap);
	if (minGap > 0)
	    writeGap(q, qGap, qSeq->dna + lastQ, t, tGap, tSeq->dna + lastT);
	else if (qGap > 0)
	    writeInsert(q, t, qSeq->dna + lastQ, qGap);
	else if (tGap > 0)
	    writeInsert(t, q, tSeq->dna + lastT, tGap);
    /* Output sequence. */
    size = psl->blockSizes[blockIx];
    dyStringAppendN(q, qSeq->dna + qs, size);
    lastQ = qs + size;
    dyStringAppendN(t, tSeq->dna + ts, size);
    lastT = ts + size;
    if(q->stringSize != t->stringSize)
//        printf("%d BLK %s q size %d t size %d diff %d qs size %d ts size %d\n",blockIx, psl->qName, q->stringSize, t->stringSize, q->stringSize - t->stringSize, qSeq->size, tSeq->size );

if (checkFile != NULL)
    outputCheck(psl, qSeq, qOffset, tSeq, tOffset, checkFile);
if (psl->strand[0] == '-' && !qIsPartial)
    reverseComplement(qSeq->dna, qSeq->size);
if (psl->strand[1] == '-' && !tIsPartial)
    reverseComplement(tSeq->dna, tSeq->size);

if(q->stringSize != t->stringSize)
 //   printf("AF %s q size %d t size %d qs size %d ts size %d\n",psl->qName, q->stringSize, t->stringSize, qSeq->size, tSeq->size );
//assert(q->stringSize == t->stringSize);
if (axt)
    axtOutString(q->string, t->string, min(q->stringSize,t->stringSize), 60, psl, f);
    prettyOutString(q->string, t->string, min(q->stringSize,t->stringSize), 60, psl, f);
if (qIsPartial)
if (tIsPartial)