Ejemplo n.º 1
0
static void addUngappedBlock(struct psl* psl, int* pslSpace, struct block* blk, unsigned flags)
/* add the next  ungapped block to a psl */
{
unsigned newIBlk = psl->blockCount;
unsigned blkSize = blk->qEnd - blk->qStart;  // uses query size so protein psl is right
if (newIBlk >= *pslSpace)
    pslGrow(psl, pslSpace);
psl->qStarts[newIBlk] = blk->qCoordMult * blk->qStart;
psl->tStarts[newIBlk] = blk->tCoordMult * blk->tStart;
psl->blockSizes[newIBlk] = blk->qCoordMult * blkSize;

/* keep bounds current */
psl->qStart = psl->qStarts[0];
psl->qEnd = psl->qStarts[newIBlk] + (blk->qCoordMult * blkSize);
if (psl->strand[0] == '-')
    reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize);
psl->tStart = psl->tStarts[0];
psl->tEnd = psl->tStarts[newIBlk] + (blk->q2tBlkSizeMult * blkSize);
if (psl->strand[1] == '-')
    reverseIntRange(&psl->tStart, &psl->tEnd, psl->tSize);

if (flags & bldPslx)
    {
    psl->qSequence[newIBlk] = cloneStringZ(blk->qAln + blk->alnStart, blkSize);
    psl->tSequence[newIBlk] = cloneStringZ(blk->tAln + blk->alnStart, blkSize);
    }
psl->blockCount++;
}
static void convertToPsl(struct mafComp *qc, struct mafComp *tc, FILE *pslFh)
/* convert two components to a psl */
{
struct psl* psl;
int qStart = qc->start;
int qEnd = qc->start+qc->size;
int tStart = tc->start;
int tEnd = tc->start+tc->size;
char strand[3];
strand[0] = qc->strand;
strand[1] = tc->strand;
strand[2] = '\0';

    
if (qc->strand == '-')
    reverseIntRange(&qStart, &qEnd, qc->srcSize);
if (tc->strand == '-')
    reverseIntRange(&tStart, &tEnd, tc->srcSize);

psl = pslFromAlign(skipDot(qc->src), qc->srcSize, qStart, qEnd, qc->text,
                   skipDot(tc->src), tc->srcSize, tStart, tEnd, tc->text,
                   strand, 0);
if (psl != NULL)
    {
    /* drop target strand */
    if (psl->strand[1] == '-')
        pslRc(psl);
    psl->strand[1] = '\0';
    pslTabOut(psl, pslFh);
    }
}
Ejemplo n.º 3
0
void axtOutString(char *q, char *t, int size, int lineSize, 
	struct psl *psl, FILE *f)
/* Output string side-by-side in Scott's axt format. */
{
int i;
static int ix = 0;
int qs = psl->qStart, qe = psl->qEnd;
int ts = psl->tStart, te = psl->tEnd;
int score = axtScoreSym(ss, size, q, t);

if (psl->strand[0] == '-')
    reverseIntRange(&qs, &qe, psl->qSize);

if (psl->strand[1] == '-')
    reverseIntRange(&ts, &te, psl->tSize);

if (psl->strand[1] != 0)
    fprintf(f, "%d %s %d %d %s %d %d %c%c %d\n", ++ix, psl->tName, ts+1, 
            te, psl->qName, qs+1, qe, psl->strand[1], psl->strand[0], score);
else
    fprintf(f, "%d %s %d %d %s %d %d %c %d\n", ++ix, psl->tName, psl->tStart+1, 
            psl->tEnd, psl->qName, qs+1, qe, psl->strand[0], score);
if (strlen(t) != size)
    warn("size of T %ld and Q %d differ on line %d\n",(long)strlen(t), size, ix);
for (i=0; i<size ; i++) 
    fputc(t[i],f);
fputc('\n',f);
if (strlen(q) != size)
    warn("size of T %ld and Q %d differ on line %d\n",(long)strlen(q), size, ix);
for (i=0; i<size ; i++) 
    fputc(q[i],f);
fputc('\n',f);
fputc('\n',f);
}
Ejemplo n.º 4
0
struct dnaSeq *gfiExpandAndLoadCached(struct gfRange *range, 
	struct hash *tFileCache, char *tSeqDir, int querySize, 
	int *retTotalSeqSize, boolean respectFrame, boolean isRc, int expansion)
/* Expand range to cover an additional expansion bases on either side.
 * Load up target sequence and return. (Done together because don't
 * know target size before loading.) */
{
struct dnaSeq *target = NULL;
char fileName[PATH_LEN+256];

safef(fileName, sizeof(fileName), "%s/%s", tSeqDir, range->tName);
if (nibIsFile(fileName))
    {
    struct nibInfo *nib = hashFindVal(tFileCache, fileName);
    if (nib == NULL)
        {
	nib = nibInfoNew(fileName);
	hashAdd(tFileCache, fileName, nib);
	}
    if (isRc)
	reverseIntRange(&range->tStart, &range->tEnd, nib->size);
    gfiExpandRange(range, querySize, nib->size, respectFrame, isRc, expansion);
    target = nibLdPart(fileName, nib->f, nib->size, 
    	range->tStart, range->tEnd - range->tStart);
    if (isRc)
	{
	reverseComplement(target->dna, target->size);
	reverseIntRange(&range->tStart, &range->tEnd, nib->size);
	}
    *retTotalSeqSize = nib->size;
    }
else
    {
    struct twoBitFile *tbf = NULL;
    char *tSeqName = strchr(fileName, ':');
    int tSeqSize = 0;
    if (tSeqName == NULL)
        errAbort("No colon in .2bit response from gfServer");
    *tSeqName++ = 0;
    tbf = hashFindVal(tFileCache, fileName);
    if (tbf == NULL)
        {
	tbf = twoBitOpen(fileName);
	hashAdd(tFileCache, fileName, tbf);
	}
    tSeqSize = twoBitSeqSize(tbf, tSeqName);
    if (isRc)
	reverseIntRange(&range->tStart, &range->tEnd, tSeqSize);
    gfiExpandRange(range, querySize, tSeqSize, respectFrame, isRc, expansion);
    target = twoBitReadSeqFragLower(tbf, tSeqName, range->tStart, range->tEnd);
    if (isRc)
	{
	reverseComplement(target->dna, target->size);
	reverseIntRange(&range->tStart, &range->tEnd, tSeqSize);
	}
    *retTotalSeqSize = tSeqSize;
    }
return target;
}
Ejemplo n.º 5
0
void correctOne(struct dnaSeq *est, struct psl *psl, char *nibDir, 
   struct hash *nibHash, FILE *f)
/* Write one corrected EST to file. */
{
struct dnaSeq *geno = readCachedNib(nibHash, nibDir, psl->tName, 
	psl->tStart, psl->tEnd - psl->tStart);
struct dyString *t = newDyString(est->size+20);
int qSize = psl->qSize;
int tSize = psl->tSize;
int qLastEnd = 0;
int blockIx;
struct mrnaBlock *mbList, *mb;
int genoOffset = psl->tStart;
boolean isRc = FALSE;

/* Load sequence and alignment blocks, coping with reverse
 * strand as necessary. */
toUpperN(geno->dna, geno->size);	/* This helps debug... */
mbList = mrnaBlockFromPsl(psl);
if (psl->strand[0] == '-')
    {
    reverseComplement(geno->dna, geno->size);
    genoOffset = tSize - psl->tEnd;
    for (mb = mbList; mb != NULL; mb = mb->next)
         {
	 reverseIntRange(&mb->tStart, &mb->tEnd, tSize);
	 reverseIntRange(&mb->qStart, &mb->qEnd, qSize);
	 }
    slReverse(&mbList);
    isRc = TRUE;
    }

/* Make t have corrected sequence. */
for (mb = mbList; mb != NULL; mb = mb->next)
    {
    int qStart = mb->qStart;
    int qEnd = mb->qEnd;
    int uncovSize = qStart - qLastEnd;
    if (uncovSize > 0)
	dyStringAppendN(t, est->dna + qLastEnd, uncovSize);
    dyStringAppendN(t, geno->dna + mb->tStart - genoOffset, 
    	mb->tEnd - mb->tStart);
    qLastEnd = qEnd;
    }
if (qLastEnd != qSize)
    {
    int uncovSize = qSize - qLastEnd;
    dyStringAppendN(t, est->dna + qLastEnd, uncovSize);
    }

/* Output */
faWriteNext(f, est->name, t->string, t->stringSize);

/* Clean up time. */
slFreeList(&mbList);
freeDyString(&t);
freeDnaSeq(&geno);
}
static struct mapAln *chainToPsl(struct chain *ch)
/* convert a chain to a psl, ignoring match counts, etc */
{
struct psl *psl;
struct cBlock *cBlk;
int iBlk;
int qStart = ch->qStart, qEnd = ch->qEnd;
char strand[2];
strand[0] = ch->qStrand;
strand[1] = '\0';
if (ch->qStrand == '-')
    reverseIntRange(&qStart, &qEnd, ch->qSize);

psl = pslNew(ch->qName, ch->qSize, qStart, qEnd,
             ch->tName, ch->tSize, ch->tStart, ch->tEnd,
             strand, slCount(ch->blockList), 0);
for (cBlk = ch->blockList, iBlk = 0; cBlk != NULL; cBlk = cBlk->next, iBlk++)
    {
    psl->blockSizes[iBlk] = (cBlk->tEnd - cBlk->tStart);
    psl->qStarts[iBlk] = cBlk->qStart;
    psl->tStarts[iBlk] = cBlk->tStart;
    psl->match += psl->blockSizes[iBlk];
    }
psl->blockCount = iBlk;
if (swapMap)
    pslSwap(psl, FALSE);
return mapAlnNew(psl, ch->id);
}
Ejemplo n.º 7
0
static void setPslBounds(struct psl* mappedPsl)
/* set sequences bounds on mapped PSL */
{
int lastBlk = mappedPsl->blockCount-1;

/* set start/end of sequences */
mappedPsl->qStart = mappedPsl->qStarts[0];
mappedPsl->qEnd = mappedPsl->qStarts[lastBlk] + mappedPsl->blockSizes[lastBlk];
if (pslQStrand(mappedPsl) == '-')
    reverseIntRange(&mappedPsl->qStart, &mappedPsl->qEnd, mappedPsl->qSize);

mappedPsl->tStart = mappedPsl->tStarts[0];
mappedPsl->tEnd = mappedPsl->tStarts[lastBlk] + mappedPsl->blockSizes[lastBlk];
if (pslTStrand(mappedPsl) == '-')
    reverseIntRange(&mappedPsl->tStart, &mappedPsl->tEnd, mappedPsl->tSize);
}
Ejemplo n.º 8
0
void axtToPsl(char *inName, char *tSizeFile, char *qSizeFile, char *outName)
/* axtToPsl - Convert axt to psl format. */
{
struct hash *tSizeHash = readSizes(tSizeFile);
struct hash *qSizeHash = readSizes(qSizeFile);
struct lineFile *lf = lineFileOpen(inName, TRUE);
char strand[2];
FILE *f = mustOpen(outName, "w");
struct psl* psl;
struct axt *axt;
strand[1] = '\0';

while ((axt = axtRead(lf)) != NULL)
    {
    int qSize = findSize(qSizeHash, axt->qName);
    int qStart =  axt->qStart;
    int qEnd = axt->qEnd;
    if (axt->qStrand == '-')
        reverseIntRange(&qStart, &qEnd, qSize);
    strand[0] = axt->qStrand;
    psl = pslFromAlign(axt->qName, qSize, qStart, qEnd, axt->qSym, 
                       axt->tName, findSize(tSizeHash, axt->tName),
                       axt->tStart, axt->tEnd, axt->tSym, strand,
                       PSL_IS_SOFTMASK);
    if (psl != NULL)
	{
	pslTabOut(psl, f);
	pslFree(&psl);
	}
    axtFree(&axt);
    }
lineFileClose(&lf);
carefulClose(&f);
}
Ejemplo n.º 9
0
void samToOpenBed(char *samIn, FILE *f)
/* Like samToOpenBed, but the output is the already open file f. */
{
    samfile_t *sf = samopen(samIn, "r", NULL);
    bam_header_t *bamHeader = sf->header;
    bam1_t one;
    ZeroVar(&one);
    int err;
    while ((err = samread(sf, &one)) >= 0)
    {
        int32_t tid = one.core.tid;
        if (tid < 0)
            continue;
        char *chrom = bamHeader->target_name[tid];
        // Approximate here... can do better if parse cigar.
        int start = one.core.pos;
        int size = one.core.l_qseq;
        int end = start + size;
        boolean isRc = (one.core.flag & BAM_FREVERSE);
        char strand = '+';
        if (isRc)
        {
            strand = '-';
            reverseIntRange(&start, &end, bamHeader->target_len[tid]);
        }
        fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand);
    }
    if (err < 0 && err != -1)
        errnoAbort("samread err %d", err);
    samclose(sf);
}
Ejemplo n.º 10
0
boolean closeToTop(struct psl *psl, int *scoreTrack)
/* Returns TRUE if psl is near the top scorer for at least 20 bases. */
{
int milliScore = calcSizedScore(psl);
int threshold = round(milliScore * (1.0+nearTop));
int i, blockIx;
int start, size, end;
int topCount = 0;
char strand = psl->strand[0];

for (blockIx = 0; blockIx < psl->blockCount; ++blockIx)
    {
    start = psl->qStarts[blockIx];
    size = psl->blockSizes[blockIx];
    end = start+size;
    if (strand == '-')
	reverseIntRange(&start, &end, psl->qSize);
    for (i=start; i<end; ++i)
	{
	if (scoreTrack[i] <= threshold)
	    {
	    if (++topCount >= minNearTopSize)
		return TRUE;
	    }
	}
    }
return FALSE;
}
Ejemplo n.º 11
0
static void qFillOtherRange(struct fill *fill)
/* Given bounds of fill in q coordinates, calculate
 * oStart/oEnd in t coordinates, and refine 
 * start/end to reflect parts of chain actually used. */
{
struct chain *chain = fill->chain;
int clipStart = fill->start;
int clipEnd = fill->end;
boolean isRev = (chain->qStrand == '-');
int tMin = BIGNUM, tMax = -BIGNUM;
int qMin = BIGNUM, qMax = -BIGNUM;
struct cBlock *b;

if (isRev)
    reverseIntRange(&clipStart, &clipEnd, chain->qSize);
for (b = chain->blockList; b != NULL; b = b->next)
    {
    int qs, qe, ts, te;	/* Clipped bounds of block */
    if ((qe = b->qEnd) <= clipStart)
        continue;
    if ((qs = b->qStart) >= clipEnd)
        break;
    ts = b->tStart;
    te = b->tEnd;
    if (qs < clipStart)
        {
	ts += (clipStart - qs);
	qs = clipStart;
	}
    if (qe > clipEnd)
        {
	te  -= (qe - clipEnd);
	qe = clipEnd;
	}
    if (qMin > qs) qMin = qs;
    if (qMax < qe) qMax = qe;
    if (tMin > ts) tMin = ts;
    if (tMax < te) tMax = te;
    }
if (isRev)
    reverseIntRange(&qMin, &qMax, chain->qSize);
fill->start = qMin;
fill->end = qMax;
fill->oStart = tMin;
fill->oEnd = tMax;
assert(tMin < tMax);
}
Ejemplo n.º 12
0
void reverseBlocksQ(struct cBlock **pList, int qSize)
/* Reverse qside of blocks. */
{
struct cBlock *b;
slReverse(pList);
for (b = *pList; b != NULL; b = b->next)
    reverseIntRange(&b->qStart, &b->qEnd, qSize);
}
void flipExonList(struct range **pList, int regionSize)
/* Flip exon list to other strand */
{
struct range *exon;
for (exon = *pList; exon != NULL; exon = exon->next)
    reverseIntRange(&exon->start, &exon->end, regionSize);
slReverse(pList);
}
Ejemplo n.º 14
0
struct axt *axtCreate(char *q, char *t, int size, struct psl *psl)
/* create axt */
{
int qs = psl->qStart, qe = psl->qEnd;
int ts = psl->tStart, te = psl->tEnd;
int symCount = 0;
struct axt *axt = NULL;

AllocVar(axt);
if (psl->strand[0] == '-')
    reverseIntRange(&qs, &qe, psl->qSize);

if (psl->strand[1] == '-')
    reverseIntRange(&ts, &te, psl->tSize);

axt->qName = cloneString(psl->qName);
axt->tName = cloneString(psl->tName);
axt->qStart = qs+1;
axt->qEnd = qe;
axt->qStrand = psl->strand[0];
axt->tStrand = '+';
if (psl->strand[1] != 0)
    {
    axt->tStart = ts+1;
    axt->tEnd = te;
    }
else
    {
    axt->tStart = psl->tStart+1;
    axt->tEnd = psl->tEnd;
    }
axt->symCount = symCount = strlen(t);
axt->tSym = cloneString(t);
if (strlen(q) != symCount)
    warn("Symbol count %d != %d inconsistent at t %s:%d and qName %s\n%s\n%s\n",
    	symCount, (int)strlen(q), psl->tName, psl->tStart, psl->qName, t, q);
axt->qSym = cloneString(q);
axt->score = axtScoreFilterRepeats(axt, ss);
verbose(1,"axt score = %d\n",axt->score);
//for (i=0; i<size ; i++) 
//    fputc(t[i],f);
//for (i=0; i<size ; i++) 
//    fputc(q[i],f);
return axt;
}
Ejemplo n.º 15
0
void chainSwap(struct chain *chain)
/* Swap target and query side of chain. */
{
struct chain old = *chain;
struct cBlock *b;

/* Copy basic stuff swapping t and q. */
chain->qName = old.tName;
chain->tName = old.qName;
chain->qStart = old.tStart;
chain->qEnd = old.tEnd;
chain->tStart = old.qStart;
chain->tEnd = old.qEnd;
chain->qSize = old.tSize;
chain->tSize = old.qSize;

/* Swap t and q in blocks. */
for (b = chain->blockList; b != NULL; b = b->next)
    {
    struct cBlock old = *b;
    b->qStart = old.tStart;
    b->qEnd = old.tEnd;
    b->tStart = old.qStart;
    b->tEnd = old.qEnd;
    }

/* Cope with the minus strand. */
if (chain->qStrand == '-')
    {
    /* chain's are really set up so that the target is on the
     * + strand and the query is on the minus strand.
     * Therefore we need to reverse complement both 
     * strands while swapping to preserve this. */
    for (b = chain->blockList; b != NULL; b = b->next)
        {
	reverseIntRange(&b->tStart, &b->tEnd, chain->tSize);
	reverseIntRange(&b->qStart, &b->qEnd, chain->qSize);
	}
    reverseIntRange(&chain->tStart, &chain->tEnd, chain->tSize);
    reverseIntRange(&chain->qStart, &chain->qEnd, chain->qSize);
    slReverse(&chain->blockList);
    }
}
void bedWriteAxt(struct axt *axt, int qSize, int tSize, double idRatio, FILE *f)
/* Write out bounds of axt to a bed file. */
{
int idPpt = idRatio * 1000;
int qStart = axt->qStart, qEnd = axt->qEnd;
if (axt->qStrand == '-')
    reverseIntRange(&qStart, &qEnd, qSize);
fprintf(f, "%s\t%d\t%d\t", axt->tName, axt->tStart, axt->tEnd);
fprintf(f, "%s\t%d\t%c\n", axt->qName, idPpt, axt->qStrand);
}
Ejemplo n.º 17
0
struct genbankCds getCds(struct sqlConnection *conn, struct psl *psl)
/* Lookup the CDS, either in the database or hash, or generate for query.  If
 * not found and looks like a it has a genbank version, try without the
 * version.  If allCds is true, generate a cds that covers the query.  Conn
 * maybe null if gCdsTable exists or gAllCds or gNoCds are true.  If CDS can't be
 * obtained, start and end are both set to -1.  If there is an error parsing
 * it, start and end are both set to 0. */
{
    struct genbankCds cds;
    ZeroVar(&cds);
    if (gNoCds)
    {
        cds.start = -1;
        cds.end = -1;
        cds.startComplete = FALSE;
        cds.endComplete = FALSE;
    }
    else if (gAllCds)
    {
        cds.start = psl->qStart;
        cds.end = psl->qEnd;
        if (psl->strand[0] == '-')
            reverseIntRange(&cds.start, &cds.end, psl->qSize);
        cds.startComplete = TRUE;
        cds.endComplete = TRUE;
    }
    else
    {
        char cdsBuf[4096];
        char *cdsStr = getCdsForAcc(conn, psl->qName, cdsBuf, sizeof(cdsBuf));
        if (cdsStr == NULL)
        {
            if (!gQuiet)
                fprintf(stderr, "Warning: no CDS for %s\n", psl->qName);
            cds.start = cds.end = -1;
        }
        else
        {
            if (!genbankCdsParse(cdsStr, &cds))
            {
                if (!gQuiet)
                    fprintf(stderr, "Warning: invalid CDS for %s: %s\n",
                            psl->qName, cdsStr);
            }
            else if ((cds.end-cds.start) > psl->qSize)
            {
                if (!gQuiet)
                    fprintf(stderr, "Warning: CDS for %s (%u..%u) longer than qSize (%u)\n",
                            psl->qName, cds.start, cds.end, psl->qSize);
                cds.start = cds.end = -1;
            }
        }
    }
    return cds;
}
Ejemplo n.º 18
0
static boolean breakUpIfOnDiagonal(struct block *blockList, boolean isRc,
	char *qName, char *tName, int qSize, int tSize,
	struct block *retBlockLists[], int maxBlockLists, int *retCount) 
/* If any blocks are on diagonal, remove the blocks and separate the lists 
 * of blocks before and after the diagonal. Store block list pointers in 
 * retBlockLists, the number of lists in retCount, and return TRUE if 
 * we found any blocks on diagonal so we know to rescore afterwards. */
{
int blockListIndex = 0;
boolean brokenUp = FALSE;

retBlockLists[blockListIndex] = blockList;
if (sameString(qName, tName))
    {
    struct block *block = NULL, *lastBlock = NULL;
    int i = 0;
    for (block = blockList;  block != NULL;  block = block->next)
	{
	int qStart = block->qStart;
	int qEnd   = block->qEnd;
	if (lastBlock != NULL && block == retBlockLists[blockListIndex])
	    freez(&lastBlock);
	if (isRc)
	    reverseIntRange(&qStart, &qEnd, qSize);
	if (rangeIntersection(block->tStart, block->tEnd, qStart, qEnd) > 0)
	    {
	    brokenUp = TRUE;
	    if (block != retBlockLists[blockListIndex])
		{
		assert(lastBlock != NULL);
		lastBlock->next = NULL;
		blockListIndex++;
		if (blockListIndex >= maxBlockLists)
		    errAbort("breakUpIfOnDiagonal: Too many fragmented block lists!");
		}
	    retBlockLists[blockListIndex] = block->next;
	    }
	lastBlock = block;
	}
    if (retBlockLists[blockListIndex] == NULL)
	{
	blockListIndex--;
	if (lastBlock != NULL)
	    freez(&lastBlock);
	}
    for (i=0;  i <= blockListIndex; i++)
	{
	retBlockLists[i] = removeFrayedEnds(retBlockLists[i]);
	}
    }
*retCount = blockListIndex + 1;
return brokenUp;
}
Ejemplo n.º 19
0
void addChainT(struct chrom *chrom, struct chrom *otherChrom, struct chain *chain)
/* Add T side of chain to fill/gap tree of chromosome. 
 * This is the easier case since there are no strand
 * issues to worry about. */
{
struct slRef *spaceList;
struct slRef *ref;
struct cBlock *startBlock, *block, *nextBlock;
struct gap *gap;

spaceList = findSpaces(chrom->spaces,chain->tStart,chain->tEnd);
startBlock = chain->blockList;
for (ref = spaceList; ref != NULL; ref = ref->next)
    {
    struct space *space = ref->val;
    struct fill *fill;
    int gapStart, gapEnd;
    for (;;)
        {
	nextBlock = startBlock->next;
	if (nextBlock == NULL)
	    break;
	gapEnd = nextBlock->tStart;
	if (gapEnd > space->start)
	    break;
	startBlock = nextBlock;
	}
    if ((fill = fillSpace(chrom, space, chain, startBlock, FALSE)) != NULL)
	{
	for (block = startBlock; ; block = nextBlock)
	    {
	    nextBlock = block->next;
	    if (nextBlock == NULL)
		break;
	    gapStart = block->tEnd;
	    gapEnd = nextBlock->tStart;
	    if (strictlyInside(space->start, space->end, gapStart, gapEnd))
		{
		int qs = block->qEnd;
		int qe = nextBlock->qStart;
		if (chain->qStrand == '-')
		    reverseIntRange(&qs, &qe, chain->qSize);
		gap = gapNew(gapStart, gapEnd, qs, qe);
		addSpaceForGap(chrom, gap);
		slAddHead(&fill->gapList, gap);
		}
	    }
	freez(&ref->val);	/* aka space */
	}
    }
slFreeList(&spaceList);
}
Ejemplo n.º 20
0
boolean mafNeedSubset(struct mafAli *maf, char *componentSource,
	int newStart, int newEnd)
/* Return TRUE if maf only partially fits between newStart/newEnd
 * in given component. */
{
struct mafComp *mcMaster = mafFindComponent(maf, componentSource);

/* Reverse complement input range if necessary. */
if (mcMaster->strand == '-')
    reverseIntRange(&newStart, &newEnd, mcMaster->srcSize);

return newStart > mcMaster->start || newEnd < mcMaster->start + mcMaster->size;
}
static void addChainQBlocks(struct chromAnn* ca, unsigned opts, struct chain* chain)
/* add query blocks from a chain */
{
struct cBlock *blk;
for (blk = chain->blockList; blk != NULL; blk = blk->next)
    {
    int start = blk->qStart;
    int end = blk->qEnd;
    if (chain->qStrand == '-')
        reverseIntRange(&start, &end, chain->qSize);
    chromAnnBlkNew(ca, start, end);
    }
}
Ejemplo n.º 22
0
static struct range getHapQRangePartContained(struct hapChrom *hapChrom, struct psl *refPsl)
/* find the range of an mRNA that is aligned to a haplotype region of a ref chrom when
 * not completely contained in haplotype range */
{
    struct range qRange = {0, 0};
    unsigned hapTStart = hapChrom->refStart, hapTEnd = hapChrom->refEnd;
    if (refPsl->strand[1] == '-')
        reverseUnsignedRange(&hapTStart, &hapTEnd, refPsl->tSize);
    qRange.start = getHapQRangePartContainedStart(hapTStart, refPsl);
    qRange.end = getHapQRangePartContainedEnd(hapTEnd, refPsl);
    if (refPsl->strand[0] == '-')
        reverseIntRange(&qRange.start, &qRange.end, refPsl->qSize);
    return qRange;
}
Ejemplo n.º 23
0
static struct coords blastToUcsc(int blastStart, int blastEnd, int size, int blastFrame)
/* convert coordinates from blast to UCSC convention. */
{
// blastStart >= blastEnd for queries with blastFrame < 0
// blastStart <= blastEnd for hits with blastFrame < 0
struct coords ucsc;
ucsc.start = (blastStart <= blastEnd) ? blastStart-1 : blastEnd-1;
ucsc.end = (blastStart <= blastEnd) ? blastEnd : blastStart;
ucsc.size = size;
ucsc.strand = (blastFrame >= 0) ? '+' : '-';
if (ucsc.strand == '-')
    reverseIntRange(&ucsc.start, &ucsc.end, size);
assert(ucsc.start < ucsc.end);
return ucsc;
}
static int basesShared(struct genePred *gp, struct psl *psl)
/* Return number of bases a&b share. */
{
int intersect = 0;
int i, blockCount = psl->blockCount;
int s,e;
for (i=0; i<blockCount; ++i)
    {
    s = psl->tStarts[i];
    e = s + psl->blockSizes[i];
    if (psl->strand[1] == '-')
	reverseIntRange(&s, &e, psl->tSize);
    intersect += gpRangeIntersection(gp, s, e);
    }
return intersect;
}
Ejemplo n.º 25
0
static void exonFramesCheck(struct cdsExon *exon, struct exonFrames *ef)
/* sanity check an exonFrames object */
{
/* convert to genomic coords */
int efStart = ef->srcStart, efEnd = ef->srcEnd;
if (ef->srcStrand == '-')
    reverseIntRange(&efStart, &efEnd, exon->gene->chromSize);

if (ef->exon != exon)
    errAbort("%s: exonFrames linked to wrong exon", exon->gene->name);
if ((efStart < exon->chromStart) || (efStart >= exon->chromEnd))
    errAbort("%s: exonFrames srcStart not in the range of it's exon", exon->gene->name);
if ((efEnd <= exon->chromStart) || (efEnd > exon->chromEnd))
    errAbort("%s: exonFrames srcEnd not in the range of it's exon", exon->gene->name);
if (efStart >= efEnd)
    errAbort("%s: exonFrames srcStart>=srcEnd", exon->gene->name);
}
static void addPslBlocks(struct chromAnn* ca, unsigned opts, struct psl* psl)
/* add blocks from a psl */
{
boolean blkStrand = (opts & chromAnnUseQSide) ? pslQStrand(psl) : pslTStrand(psl);
int size = (opts & chromAnnUseQSide) ? psl->qSize : psl->tSize;
unsigned *blocks = (opts & chromAnnUseQSide) ? psl->qStarts : psl->tStarts;
boolean blkSizeMult = pslIsProtein(psl) ? 3 : 1;
int iBlk;
for (iBlk = 0; iBlk < psl->blockCount; iBlk++)
    {
    int start = blocks[iBlk];
    int end = start + (blkSizeMult * psl->blockSizes[iBlk]);
    if (blkStrand == '-')
        reverseIntRange(&start, &end, size);
    chromAnnBlkNew(ca, start, end);
    }
}
Ejemplo n.º 27
0
static void printAxtTargetBlastTab(FILE *f, struct axt *axt, int targetSize)
/* Print out target in tabular blast-oriented format. */
{
int s = axt->tStart, e = axt->tEnd;
if (axt->tStrand == '-')
    reverseIntRange(&s, &e, targetSize);
if (axt->tStrand == axt->qStrand)
    {
    fprintf(f, "%d\t", s+1);
    fprintf(f, "%d\t", e);
    }
else
    {
    fprintf(f, "%d\t", e);
    fprintf(f, "%d\t", s+1);
    }
}
static void liftSide(char *desc, struct hash *seqSizes, struct psl *psl, char *name, char strand, unsigned *seqSize, int *start, int *end, unsigned *starts)
/* life one side of the alignment */
{
int regStart, regEnd, i;
if (parseName(desc, name, &regStart, &regEnd))
    {
    *seqSize = hashIntVal(seqSizes, name);
    if (*end > *seqSize)
        errAbort("subrange %s:%d-%d extends past sequence end %ud", name, regStart, regEnd, *seqSize);
    *start += regStart;
    *end += regStart;
    if (strand == '-')
        reverseIntRange(&regStart, &regEnd, *seqSize);
    for (i = 0; i < psl->blockCount; i++)
        starts[i] += regStart;
    }
}
Ejemplo n.º 29
0
static void scanSam(char *samIn, FILE *f, struct genomeRangeTree *grt, long long *retHit, 
    long long *retMiss,  long long *retTotalBasesInHits)
/* Scan through sam file doing several things:counting how many reads hit and how many 
 * miss target during mapping phase, copying those that hit to a little bed file, and 
 * also defining regions covered in a genomeRangeTree. */
{
samfile_t *sf = samopen(samIn, "r", NULL);
bam_header_t *bamHeader = sf->header;
bam1_t one;
ZeroVar(&one);
int err;
long long hit = 0, miss = 0, totalBasesInHits = 0;
while ((err = samread(sf, &one)) >= 0)
    {
    int32_t tid = one.core.tid;
    if (tid < 0)
	{
	++miss;
        continue;
	}
    ++hit;
    char *chrom = bamHeader->target_name[tid];
    // Approximate here... can do better if parse cigar.
    int start = one.core.pos;
    int size = one.core.l_qseq;
    int end = start + size;	
    totalBasesInHits += size;
    boolean isRc = (one.core.flag & BAM_FREVERSE);
    char strand = '+';
    if (isRc)
	{
	strand = '-';
	reverseIntRange(&start, &end, bamHeader->target_len[tid]);
	}
    if (start < 0) start=0;
    if (f != NULL)
	fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand);
    genomeRangeTreeAdd(grt, chrom, start, end);
    }
if (err < 0 && err != -1)
    errnoAbort("samread err %d", err);
samclose(sf);
*retHit = hit;
*retMiss = miss;
*retTotalBasesInHits = totalBasesInHits;
}
Ejemplo n.º 30
0
void mafAliToFa(struct mafAli *maf, FILE *of)
/* convert a MAF alignment to a fa */
{
struct mafComp *c;
for (c = maf->components ; c ; c = c->next )
    {
    int start = c->start;
    int end   = c->start+c->size;
    if (stripDotsDashes)
	{
	stripChar(c->text, '.');
	stripChar(c->text, '-');
	}
    reverseIntRange(&start, &end, c->srcSize);
    fprintf(of, ">%s.%d.%d.%c.%d\n%s\n", c->src, start, end, c->strand, c->srcSize, c->text);
    }
fprintf(of,"\n");
}