Ejemplo n.º 1
0
void copyPslXaToTab(char *pslFile, char *tabFile)
/* copy a single PSL XA to the tab file */
{
struct xAli *xa;
char *row[23];
struct lineFile *lf = lineFileOpen(pslFile, TRUE);
struct pipeline *pl = NULL;
FILE *tabFh = NULL;
if (noSort)
    tabFh = mustOpen(tabFile, "w");
else
    {
    if (pslCreateOpts & PSL_WITH_BIN)
	pl = pipelineOpen(outPipeBin, pipelineWrite, tabFile, NULL);
    else
	pl = pipelineOpen(outPipeNoBin, pipelineWrite, tabFile, NULL);
    tabFh = pipelineFile(pl);
    }
while (lineFileRow(lf, row))
    {
    xa = xAliLoad(row);
    if (pslCreateOpts & PSL_WITH_BIN)
        fprintf(tabFh, "%u\t", hFindBin(xa->tStart, xa->tEnd));
    xAliTabOut(xa, tabFh);
    xAliFree(&xa);
    }
lineFileClose(&lf);
if (noSort)
    carefulClose(&tabFh);
else
    {
    pipelineWait(pl);
    pipelineFree(&pl);
    }
}
void liftPsl(char *destFile, struct hash *liftHash, int sourceCount, char *sources[],
	boolean querySide, boolean isExtended)
/* Lift up coordinates in .psl file. */
{
FILE *dest = mustOpen(destFile, "w");
char *source;
int i,j;
struct lineFile *lf;
struct psl *psl;
struct xAli *xa = NULL;
unsigned *starts;
unsigned *blockSizes;
struct liftSpec *spec;
int offset;
int blockCount;
char *seqName;
int dotMod = dots;
int seqSize;
int strandChar = (querySide ? 0 : 1);

if (!nohead)
    pslWriteHead(dest);
for (i=0; i<sourceCount; ++i)
    {
    source = sources[i];
    if (!fileExists(source))
	{
	warn("%s doesn't exist!", source);
	continue;
	}
    verbose(1, "Lifting %s\n", source);
    lf = pslFileOpenWithMeta(source, dest);
    for (;;)
        {
	if (isExtended)
	    {
	    xa = xAliNext(lf);
	    psl = (struct psl *)xa;
	    }
	else
	    psl = pslNext(lf);
	if (psl == NULL)
	    break;
	boolean isProt = pslIsProtein(psl);

	doDots(&dotMod);
	if (querySide)
	    seqName = psl->qName;
	else
	    seqName = psl->tName;
	spec = findLift(liftHash, seqName, lf);
	if (spec == NULL)
	    {
	    if (how != carryMissing)
	        {
		freePslOrXa(psl, isExtended);
		continue;
		}
	    }
	else
	    {
	    offset = spec->offset;
	    blockSizes = psl->blockSizes;
	    if (querySide)
	        {
		if (!isPtoG)
		    {
		    cantHandleSpecRevStrand(spec);
		    psl->qStart += offset;
		    psl->qEnd += offset;
		    }
		else
		    {
		    psl->match *= 3;
		    psl->misMatch *= 3;
		    if (spec->strand == '-')
			{
			int tmp = psl->qEnd;
			psl->qEnd = psl->qStart;
			psl->qStart = tmp;
			psl->qStart *= -3;
			psl->qEnd *= -3;
			psl->qStart += offset;
			psl->qEnd += offset;
			}
		    else if (spec->strand == '+')
			{
			psl->qStart *= 3;
			psl->qStart += offset;
			psl->qEnd *= 3;
			psl->qEnd += offset;
			}
		    }
		starts = psl->qStarts;
		seqSize = psl->qSize;
		}
	    else
	        {
		if (spec->strand == '-')
		    reverseIntRange(&psl->tStart, &psl->tEnd, psl->tSize);
		psl->tStart += offset;
		psl->tEnd += offset;
		starts = psl->tStarts;
		seqSize = psl->tSize;
		}
	    blockCount = psl->blockCount;
	    if (isPtoG && (spec->strand == '-'))
	        {
		psl->strand[strandChar] = spec->strand;
		for (j=0; j<blockCount; ++j)
		    {
		    starts[j] *= -3;
		    starts[j] += offset;
		    starts[j] = spec->newSize - starts[j];
		    }
		}
	    else if (isPtoG && (spec->strand == '+'))
	        {
		psl->strand[strandChar] = spec->strand;
		for (j=0; j<blockCount; ++j)
		    {
		    starts[j] *= 3;
		    starts[j] += offset;
		    }
		}
	    else /* mRNA case. */
		{
		if (spec->strand == '+')
		    {
		    if (psl->strand[strandChar] == '-')
			{
			for (j=0; j<blockCount; ++j)
			    {
			    int tr = seqSize - starts[j];
			    tr += offset;
			    starts[j] = spec->newSize - tr;
			    }
			}
		    else
			{
			for (j=0; j<blockCount; ++j)
			    starts[j] += offset;
			}
		    }
		else
		    {
		    if (isProt)
			{
			/* if it's protein, we can't reverse the query */
			if (psl->strand[strandChar] == '-')
			    {
			    for (j=0; j<blockCount; ++j)
				starts[j] += offset;
			    }
			else
			    {
			    for (j=0; j<blockCount; ++j)
				{
				int tr = seqSize - starts[j];
				tr += offset;
				starts[j] = spec->newSize - tr;
				}
			    }
			psl->strand[strandChar] = 
			    flipStrand(psl->strand[strandChar]);
			}
		    else
			{
			if (psl->strand[strandChar] == '-')
			     errAbort("Can't handle all these minus strands! line %d",lf->lineIx);
			else
			    {
			    for (j=0; j<blockCount; ++j)
				{
				psl->tStarts[j] = psl->tSize - 
				    (psl->tStarts[j] + blockSizes[j]) + offset;
				psl->qStarts[j] = psl->qSize - 
				    (psl->qStarts[j] + blockSizes[j]);	/* no offset. */
				}
			    psl->strand[1-strandChar] = 
				flipStrand(psl->strand[1-strandChar]);
			    reverseUnsigned(blockSizes, blockCount);
			    reverseUnsigned(psl->qStarts, blockCount);
			    reverseUnsigned(psl->tStarts, blockCount);
			    }
			}
		    }
		}

	    if (isPtoG)
		for (j=0; j<blockCount; ++j)
		    blockSizes[j] *= 3;
	    if (querySide)
	        {
		psl->qSize = spec->newSize;
		psl->qName = spec->newName;
		}
	    else
	        {
		psl->tSize = spec->newSize;
		psl->tName = spec->newName;
		}
	    }
	if (isExtended)
	    {
	    xAliTabOut(xa, dest);
	    }
	else
	    {
	    pslTabOut(psl, dest);
	    }
	if (querySide)
	    psl->qName = seqName;
	else
	    psl->tName = seqName;
	freePslOrXa(psl, isExtended);
	}
    lineFileClose(&lf);
    if (dots)
        verbose(1, "\n");
    }
if (ferror(dest))
    errAbort("error writing %s", destFile);
fclose(dest);
}