static void addUngappedBlock(struct psl* psl, int* pslSpace, struct block* blk, unsigned flags) /* add the next ungapped block to a psl */ { unsigned newIBlk = psl->blockCount; unsigned blkSize = blk->qEnd - blk->qStart; // uses query size so protein psl is right if (newIBlk >= *pslSpace) pslGrow(psl, pslSpace); psl->qStarts[newIBlk] = blk->qCoordMult * blk->qStart; psl->tStarts[newIBlk] = blk->tCoordMult * blk->tStart; psl->blockSizes[newIBlk] = blk->qCoordMult * blkSize; /* keep bounds current */ psl->qStart = psl->qStarts[0]; psl->qEnd = psl->qStarts[newIBlk] + (blk->qCoordMult * blkSize); if (psl->strand[0] == '-') reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize); psl->tStart = psl->tStarts[0]; psl->tEnd = psl->tStarts[newIBlk] + (blk->q2tBlkSizeMult * blkSize); if (psl->strand[1] == '-') reverseIntRange(&psl->tStart, &psl->tEnd, psl->tSize); if (flags & bldPslx) { psl->qSequence[newIBlk] = cloneStringZ(blk->qAln + blk->alnStart, blkSize); psl->tSequence[newIBlk] = cloneStringZ(blk->tAln + blk->alnStart, blkSize); } psl->blockCount++; }
static void convertToPsl(struct mafComp *qc, struct mafComp *tc, FILE *pslFh) /* convert two components to a psl */ { struct psl* psl; int qStart = qc->start; int qEnd = qc->start+qc->size; int tStart = tc->start; int tEnd = tc->start+tc->size; char strand[3]; strand[0] = qc->strand; strand[1] = tc->strand; strand[2] = '\0'; if (qc->strand == '-') reverseIntRange(&qStart, &qEnd, qc->srcSize); if (tc->strand == '-') reverseIntRange(&tStart, &tEnd, tc->srcSize); psl = pslFromAlign(skipDot(qc->src), qc->srcSize, qStart, qEnd, qc->text, skipDot(tc->src), tc->srcSize, tStart, tEnd, tc->text, strand, 0); if (psl != NULL) { /* drop target strand */ if (psl->strand[1] == '-') pslRc(psl); psl->strand[1] = '\0'; pslTabOut(psl, pslFh); } }
void axtOutString(char *q, char *t, int size, int lineSize, struct psl *psl, FILE *f) /* Output string side-by-side in Scott's axt format. */ { int i; static int ix = 0; int qs = psl->qStart, qe = psl->qEnd; int ts = psl->tStart, te = psl->tEnd; int score = axtScoreSym(ss, size, q, t); if (psl->strand[0] == '-') reverseIntRange(&qs, &qe, psl->qSize); if (psl->strand[1] == '-') reverseIntRange(&ts, &te, psl->tSize); if (psl->strand[1] != 0) fprintf(f, "%d %s %d %d %s %d %d %c%c %d\n", ++ix, psl->tName, ts+1, te, psl->qName, qs+1, qe, psl->strand[1], psl->strand[0], score); else fprintf(f, "%d %s %d %d %s %d %d %c %d\n", ++ix, psl->tName, psl->tStart+1, psl->tEnd, psl->qName, qs+1, qe, psl->strand[0], score); if (strlen(t) != size) warn("size of T %ld and Q %d differ on line %d\n",(long)strlen(t), size, ix); for (i=0; i<size ; i++) fputc(t[i],f); fputc('\n',f); if (strlen(q) != size) warn("size of T %ld and Q %d differ on line %d\n",(long)strlen(q), size, ix); for (i=0; i<size ; i++) fputc(q[i],f); fputc('\n',f); fputc('\n',f); }
struct dnaSeq *gfiExpandAndLoadCached(struct gfRange *range, struct hash *tFileCache, char *tSeqDir, int querySize, int *retTotalSeqSize, boolean respectFrame, boolean isRc, int expansion) /* Expand range to cover an additional expansion bases on either side. * Load up target sequence and return. (Done together because don't * know target size before loading.) */ { struct dnaSeq *target = NULL; char fileName[PATH_LEN+256]; safef(fileName, sizeof(fileName), "%s/%s", tSeqDir, range->tName); if (nibIsFile(fileName)) { struct nibInfo *nib = hashFindVal(tFileCache, fileName); if (nib == NULL) { nib = nibInfoNew(fileName); hashAdd(tFileCache, fileName, nib); } if (isRc) reverseIntRange(&range->tStart, &range->tEnd, nib->size); gfiExpandRange(range, querySize, nib->size, respectFrame, isRc, expansion); target = nibLdPart(fileName, nib->f, nib->size, range->tStart, range->tEnd - range->tStart); if (isRc) { reverseComplement(target->dna, target->size); reverseIntRange(&range->tStart, &range->tEnd, nib->size); } *retTotalSeqSize = nib->size; } else { struct twoBitFile *tbf = NULL; char *tSeqName = strchr(fileName, ':'); int tSeqSize = 0; if (tSeqName == NULL) errAbort("No colon in .2bit response from gfServer"); *tSeqName++ = 0; tbf = hashFindVal(tFileCache, fileName); if (tbf == NULL) { tbf = twoBitOpen(fileName); hashAdd(tFileCache, fileName, tbf); } tSeqSize = twoBitSeqSize(tbf, tSeqName); if (isRc) reverseIntRange(&range->tStart, &range->tEnd, tSeqSize); gfiExpandRange(range, querySize, tSeqSize, respectFrame, isRc, expansion); target = twoBitReadSeqFragLower(tbf, tSeqName, range->tStart, range->tEnd); if (isRc) { reverseComplement(target->dna, target->size); reverseIntRange(&range->tStart, &range->tEnd, tSeqSize); } *retTotalSeqSize = tSeqSize; } return target; }
void correctOne(struct dnaSeq *est, struct psl *psl, char *nibDir, struct hash *nibHash, FILE *f) /* Write one corrected EST to file. */ { struct dnaSeq *geno = readCachedNib(nibHash, nibDir, psl->tName, psl->tStart, psl->tEnd - psl->tStart); struct dyString *t = newDyString(est->size+20); int qSize = psl->qSize; int tSize = psl->tSize; int qLastEnd = 0; int blockIx; struct mrnaBlock *mbList, *mb; int genoOffset = psl->tStart; boolean isRc = FALSE; /* Load sequence and alignment blocks, coping with reverse * strand as necessary. */ toUpperN(geno->dna, geno->size); /* This helps debug... */ mbList = mrnaBlockFromPsl(psl); if (psl->strand[0] == '-') { reverseComplement(geno->dna, geno->size); genoOffset = tSize - psl->tEnd; for (mb = mbList; mb != NULL; mb = mb->next) { reverseIntRange(&mb->tStart, &mb->tEnd, tSize); reverseIntRange(&mb->qStart, &mb->qEnd, qSize); } slReverse(&mbList); isRc = TRUE; } /* Make t have corrected sequence. */ for (mb = mbList; mb != NULL; mb = mb->next) { int qStart = mb->qStart; int qEnd = mb->qEnd; int uncovSize = qStart - qLastEnd; if (uncovSize > 0) dyStringAppendN(t, est->dna + qLastEnd, uncovSize); dyStringAppendN(t, geno->dna + mb->tStart - genoOffset, mb->tEnd - mb->tStart); qLastEnd = qEnd; } if (qLastEnd != qSize) { int uncovSize = qSize - qLastEnd; dyStringAppendN(t, est->dna + qLastEnd, uncovSize); } /* Output */ faWriteNext(f, est->name, t->string, t->stringSize); /* Clean up time. */ slFreeList(&mbList); freeDyString(&t); freeDnaSeq(&geno); }
static struct mapAln *chainToPsl(struct chain *ch) /* convert a chain to a psl, ignoring match counts, etc */ { struct psl *psl; struct cBlock *cBlk; int iBlk; int qStart = ch->qStart, qEnd = ch->qEnd; char strand[2]; strand[0] = ch->qStrand; strand[1] = '\0'; if (ch->qStrand == '-') reverseIntRange(&qStart, &qEnd, ch->qSize); psl = pslNew(ch->qName, ch->qSize, qStart, qEnd, ch->tName, ch->tSize, ch->tStart, ch->tEnd, strand, slCount(ch->blockList), 0); for (cBlk = ch->blockList, iBlk = 0; cBlk != NULL; cBlk = cBlk->next, iBlk++) { psl->blockSizes[iBlk] = (cBlk->tEnd - cBlk->tStart); psl->qStarts[iBlk] = cBlk->qStart; psl->tStarts[iBlk] = cBlk->tStart; psl->match += psl->blockSizes[iBlk]; } psl->blockCount = iBlk; if (swapMap) pslSwap(psl, FALSE); return mapAlnNew(psl, ch->id); }
static void setPslBounds(struct psl* mappedPsl) /* set sequences bounds on mapped PSL */ { int lastBlk = mappedPsl->blockCount-1; /* set start/end of sequences */ mappedPsl->qStart = mappedPsl->qStarts[0]; mappedPsl->qEnd = mappedPsl->qStarts[lastBlk] + mappedPsl->blockSizes[lastBlk]; if (pslQStrand(mappedPsl) == '-') reverseIntRange(&mappedPsl->qStart, &mappedPsl->qEnd, mappedPsl->qSize); mappedPsl->tStart = mappedPsl->tStarts[0]; mappedPsl->tEnd = mappedPsl->tStarts[lastBlk] + mappedPsl->blockSizes[lastBlk]; if (pslTStrand(mappedPsl) == '-') reverseIntRange(&mappedPsl->tStart, &mappedPsl->tEnd, mappedPsl->tSize); }
void axtToPsl(char *inName, char *tSizeFile, char *qSizeFile, char *outName) /* axtToPsl - Convert axt to psl format. */ { struct hash *tSizeHash = readSizes(tSizeFile); struct hash *qSizeHash = readSizes(qSizeFile); struct lineFile *lf = lineFileOpen(inName, TRUE); char strand[2]; FILE *f = mustOpen(outName, "w"); struct psl* psl; struct axt *axt; strand[1] = '\0'; while ((axt = axtRead(lf)) != NULL) { int qSize = findSize(qSizeHash, axt->qName); int qStart = axt->qStart; int qEnd = axt->qEnd; if (axt->qStrand == '-') reverseIntRange(&qStart, &qEnd, qSize); strand[0] = axt->qStrand; psl = pslFromAlign(axt->qName, qSize, qStart, qEnd, axt->qSym, axt->tName, findSize(tSizeHash, axt->tName), axt->tStart, axt->tEnd, axt->tSym, strand, PSL_IS_SOFTMASK); if (psl != NULL) { pslTabOut(psl, f); pslFree(&psl); } axtFree(&axt); } lineFileClose(&lf); carefulClose(&f); }
void samToOpenBed(char *samIn, FILE *f) /* Like samToOpenBed, but the output is the already open file f. */ { samfile_t *sf = samopen(samIn, "r", NULL); bam_header_t *bamHeader = sf->header; bam1_t one; ZeroVar(&one); int err; while ((err = samread(sf, &one)) >= 0) { int32_t tid = one.core.tid; if (tid < 0) continue; char *chrom = bamHeader->target_name[tid]; // Approximate here... can do better if parse cigar. int start = one.core.pos; int size = one.core.l_qseq; int end = start + size; boolean isRc = (one.core.flag & BAM_FREVERSE); char strand = '+'; if (isRc) { strand = '-'; reverseIntRange(&start, &end, bamHeader->target_len[tid]); } fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand); } if (err < 0 && err != -1) errnoAbort("samread err %d", err); samclose(sf); }
boolean closeToTop(struct psl *psl, int *scoreTrack) /* Returns TRUE if psl is near the top scorer for at least 20 bases. */ { int milliScore = calcSizedScore(psl); int threshold = round(milliScore * (1.0+nearTop)); int i, blockIx; int start, size, end; int topCount = 0; char strand = psl->strand[0]; for (blockIx = 0; blockIx < psl->blockCount; ++blockIx) { start = psl->qStarts[blockIx]; size = psl->blockSizes[blockIx]; end = start+size; if (strand == '-') reverseIntRange(&start, &end, psl->qSize); for (i=start; i<end; ++i) { if (scoreTrack[i] <= threshold) { if (++topCount >= minNearTopSize) return TRUE; } } } return FALSE; }
static void qFillOtherRange(struct fill *fill) /* Given bounds of fill in q coordinates, calculate * oStart/oEnd in t coordinates, and refine * start/end to reflect parts of chain actually used. */ { struct chain *chain = fill->chain; int clipStart = fill->start; int clipEnd = fill->end; boolean isRev = (chain->qStrand == '-'); int tMin = BIGNUM, tMax = -BIGNUM; int qMin = BIGNUM, qMax = -BIGNUM; struct cBlock *b; if (isRev) reverseIntRange(&clipStart, &clipEnd, chain->qSize); for (b = chain->blockList; b != NULL; b = b->next) { int qs, qe, ts, te; /* Clipped bounds of block */ if ((qe = b->qEnd) <= clipStart) continue; if ((qs = b->qStart) >= clipEnd) break; ts = b->tStart; te = b->tEnd; if (qs < clipStart) { ts += (clipStart - qs); qs = clipStart; } if (qe > clipEnd) { te -= (qe - clipEnd); qe = clipEnd; } if (qMin > qs) qMin = qs; if (qMax < qe) qMax = qe; if (tMin > ts) tMin = ts; if (tMax < te) tMax = te; } if (isRev) reverseIntRange(&qMin, &qMax, chain->qSize); fill->start = qMin; fill->end = qMax; fill->oStart = tMin; fill->oEnd = tMax; assert(tMin < tMax); }
void reverseBlocksQ(struct cBlock **pList, int qSize) /* Reverse qside of blocks. */ { struct cBlock *b; slReverse(pList); for (b = *pList; b != NULL; b = b->next) reverseIntRange(&b->qStart, &b->qEnd, qSize); }
void flipExonList(struct range **pList, int regionSize) /* Flip exon list to other strand */ { struct range *exon; for (exon = *pList; exon != NULL; exon = exon->next) reverseIntRange(&exon->start, &exon->end, regionSize); slReverse(pList); }
struct axt *axtCreate(char *q, char *t, int size, struct psl *psl) /* create axt */ { int qs = psl->qStart, qe = psl->qEnd; int ts = psl->tStart, te = psl->tEnd; int symCount = 0; struct axt *axt = NULL; AllocVar(axt); if (psl->strand[0] == '-') reverseIntRange(&qs, &qe, psl->qSize); if (psl->strand[1] == '-') reverseIntRange(&ts, &te, psl->tSize); axt->qName = cloneString(psl->qName); axt->tName = cloneString(psl->tName); axt->qStart = qs+1; axt->qEnd = qe; axt->qStrand = psl->strand[0]; axt->tStrand = '+'; if (psl->strand[1] != 0) { axt->tStart = ts+1; axt->tEnd = te; } else { axt->tStart = psl->tStart+1; axt->tEnd = psl->tEnd; } axt->symCount = symCount = strlen(t); axt->tSym = cloneString(t); if (strlen(q) != symCount) warn("Symbol count %d != %d inconsistent at t %s:%d and qName %s\n%s\n%s\n", symCount, (int)strlen(q), psl->tName, psl->tStart, psl->qName, t, q); axt->qSym = cloneString(q); axt->score = axtScoreFilterRepeats(axt, ss); verbose(1,"axt score = %d\n",axt->score); //for (i=0; i<size ; i++) // fputc(t[i],f); //for (i=0; i<size ; i++) // fputc(q[i],f); return axt; }
void chainSwap(struct chain *chain) /* Swap target and query side of chain. */ { struct chain old = *chain; struct cBlock *b; /* Copy basic stuff swapping t and q. */ chain->qName = old.tName; chain->tName = old.qName; chain->qStart = old.tStart; chain->qEnd = old.tEnd; chain->tStart = old.qStart; chain->tEnd = old.qEnd; chain->qSize = old.tSize; chain->tSize = old.qSize; /* Swap t and q in blocks. */ for (b = chain->blockList; b != NULL; b = b->next) { struct cBlock old = *b; b->qStart = old.tStart; b->qEnd = old.tEnd; b->tStart = old.qStart; b->tEnd = old.qEnd; } /* Cope with the minus strand. */ if (chain->qStrand == '-') { /* chain's are really set up so that the target is on the * + strand and the query is on the minus strand. * Therefore we need to reverse complement both * strands while swapping to preserve this. */ for (b = chain->blockList; b != NULL; b = b->next) { reverseIntRange(&b->tStart, &b->tEnd, chain->tSize); reverseIntRange(&b->qStart, &b->qEnd, chain->qSize); } reverseIntRange(&chain->tStart, &chain->tEnd, chain->tSize); reverseIntRange(&chain->qStart, &chain->qEnd, chain->qSize); slReverse(&chain->blockList); } }
void bedWriteAxt(struct axt *axt, int qSize, int tSize, double idRatio, FILE *f) /* Write out bounds of axt to a bed file. */ { int idPpt = idRatio * 1000; int qStart = axt->qStart, qEnd = axt->qEnd; if (axt->qStrand == '-') reverseIntRange(&qStart, &qEnd, qSize); fprintf(f, "%s\t%d\t%d\t", axt->tName, axt->tStart, axt->tEnd); fprintf(f, "%s\t%d\t%c\n", axt->qName, idPpt, axt->qStrand); }
struct genbankCds getCds(struct sqlConnection *conn, struct psl *psl) /* Lookup the CDS, either in the database or hash, or generate for query. If * not found and looks like a it has a genbank version, try without the * version. If allCds is true, generate a cds that covers the query. Conn * maybe null if gCdsTable exists or gAllCds or gNoCds are true. If CDS can't be * obtained, start and end are both set to -1. If there is an error parsing * it, start and end are both set to 0. */ { struct genbankCds cds; ZeroVar(&cds); if (gNoCds) { cds.start = -1; cds.end = -1; cds.startComplete = FALSE; cds.endComplete = FALSE; } else if (gAllCds) { cds.start = psl->qStart; cds.end = psl->qEnd; if (psl->strand[0] == '-') reverseIntRange(&cds.start, &cds.end, psl->qSize); cds.startComplete = TRUE; cds.endComplete = TRUE; } else { char cdsBuf[4096]; char *cdsStr = getCdsForAcc(conn, psl->qName, cdsBuf, sizeof(cdsBuf)); if (cdsStr == NULL) { if (!gQuiet) fprintf(stderr, "Warning: no CDS for %s\n", psl->qName); cds.start = cds.end = -1; } else { if (!genbankCdsParse(cdsStr, &cds)) { if (!gQuiet) fprintf(stderr, "Warning: invalid CDS for %s: %s\n", psl->qName, cdsStr); } else if ((cds.end-cds.start) > psl->qSize) { if (!gQuiet) fprintf(stderr, "Warning: CDS for %s (%u..%u) longer than qSize (%u)\n", psl->qName, cds.start, cds.end, psl->qSize); cds.start = cds.end = -1; } } } return cds; }
static boolean breakUpIfOnDiagonal(struct block *blockList, boolean isRc, char *qName, char *tName, int qSize, int tSize, struct block *retBlockLists[], int maxBlockLists, int *retCount) /* If any blocks are on diagonal, remove the blocks and separate the lists * of blocks before and after the diagonal. Store block list pointers in * retBlockLists, the number of lists in retCount, and return TRUE if * we found any blocks on diagonal so we know to rescore afterwards. */ { int blockListIndex = 0; boolean brokenUp = FALSE; retBlockLists[blockListIndex] = blockList; if (sameString(qName, tName)) { struct block *block = NULL, *lastBlock = NULL; int i = 0; for (block = blockList; block != NULL; block = block->next) { int qStart = block->qStart; int qEnd = block->qEnd; if (lastBlock != NULL && block == retBlockLists[blockListIndex]) freez(&lastBlock); if (isRc) reverseIntRange(&qStart, &qEnd, qSize); if (rangeIntersection(block->tStart, block->tEnd, qStart, qEnd) > 0) { brokenUp = TRUE; if (block != retBlockLists[blockListIndex]) { assert(lastBlock != NULL); lastBlock->next = NULL; blockListIndex++; if (blockListIndex >= maxBlockLists) errAbort("breakUpIfOnDiagonal: Too many fragmented block lists!"); } retBlockLists[blockListIndex] = block->next; } lastBlock = block; } if (retBlockLists[blockListIndex] == NULL) { blockListIndex--; if (lastBlock != NULL) freez(&lastBlock); } for (i=0; i <= blockListIndex; i++) { retBlockLists[i] = removeFrayedEnds(retBlockLists[i]); } } *retCount = blockListIndex + 1; return brokenUp; }
void addChainT(struct chrom *chrom, struct chrom *otherChrom, struct chain *chain) /* Add T side of chain to fill/gap tree of chromosome. * This is the easier case since there are no strand * issues to worry about. */ { struct slRef *spaceList; struct slRef *ref; struct cBlock *startBlock, *block, *nextBlock; struct gap *gap; spaceList = findSpaces(chrom->spaces,chain->tStart,chain->tEnd); startBlock = chain->blockList; for (ref = spaceList; ref != NULL; ref = ref->next) { struct space *space = ref->val; struct fill *fill; int gapStart, gapEnd; for (;;) { nextBlock = startBlock->next; if (nextBlock == NULL) break; gapEnd = nextBlock->tStart; if (gapEnd > space->start) break; startBlock = nextBlock; } if ((fill = fillSpace(chrom, space, chain, startBlock, FALSE)) != NULL) { for (block = startBlock; ; block = nextBlock) { nextBlock = block->next; if (nextBlock == NULL) break; gapStart = block->tEnd; gapEnd = nextBlock->tStart; if (strictlyInside(space->start, space->end, gapStart, gapEnd)) { int qs = block->qEnd; int qe = nextBlock->qStart; if (chain->qStrand == '-') reverseIntRange(&qs, &qe, chain->qSize); gap = gapNew(gapStart, gapEnd, qs, qe); addSpaceForGap(chrom, gap); slAddHead(&fill->gapList, gap); } } freez(&ref->val); /* aka space */ } } slFreeList(&spaceList); }
boolean mafNeedSubset(struct mafAli *maf, char *componentSource, int newStart, int newEnd) /* Return TRUE if maf only partially fits between newStart/newEnd * in given component. */ { struct mafComp *mcMaster = mafFindComponent(maf, componentSource); /* Reverse complement input range if necessary. */ if (mcMaster->strand == '-') reverseIntRange(&newStart, &newEnd, mcMaster->srcSize); return newStart > mcMaster->start || newEnd < mcMaster->start + mcMaster->size; }
static void addChainQBlocks(struct chromAnn* ca, unsigned opts, struct chain* chain) /* add query blocks from a chain */ { struct cBlock *blk; for (blk = chain->blockList; blk != NULL; blk = blk->next) { int start = blk->qStart; int end = blk->qEnd; if (chain->qStrand == '-') reverseIntRange(&start, &end, chain->qSize); chromAnnBlkNew(ca, start, end); } }
static struct range getHapQRangePartContained(struct hapChrom *hapChrom, struct psl *refPsl) /* find the range of an mRNA that is aligned to a haplotype region of a ref chrom when * not completely contained in haplotype range */ { struct range qRange = {0, 0}; unsigned hapTStart = hapChrom->refStart, hapTEnd = hapChrom->refEnd; if (refPsl->strand[1] == '-') reverseUnsignedRange(&hapTStart, &hapTEnd, refPsl->tSize); qRange.start = getHapQRangePartContainedStart(hapTStart, refPsl); qRange.end = getHapQRangePartContainedEnd(hapTEnd, refPsl); if (refPsl->strand[0] == '-') reverseIntRange(&qRange.start, &qRange.end, refPsl->qSize); return qRange; }
static struct coords blastToUcsc(int blastStart, int blastEnd, int size, int blastFrame) /* convert coordinates from blast to UCSC convention. */ { // blastStart >= blastEnd for queries with blastFrame < 0 // blastStart <= blastEnd for hits with blastFrame < 0 struct coords ucsc; ucsc.start = (blastStart <= blastEnd) ? blastStart-1 : blastEnd-1; ucsc.end = (blastStart <= blastEnd) ? blastEnd : blastStart; ucsc.size = size; ucsc.strand = (blastFrame >= 0) ? '+' : '-'; if (ucsc.strand == '-') reverseIntRange(&ucsc.start, &ucsc.end, size); assert(ucsc.start < ucsc.end); return ucsc; }
static int basesShared(struct genePred *gp, struct psl *psl) /* Return number of bases a&b share. */ { int intersect = 0; int i, blockCount = psl->blockCount; int s,e; for (i=0; i<blockCount; ++i) { s = psl->tStarts[i]; e = s + psl->blockSizes[i]; if (psl->strand[1] == '-') reverseIntRange(&s, &e, psl->tSize); intersect += gpRangeIntersection(gp, s, e); } return intersect; }
static void exonFramesCheck(struct cdsExon *exon, struct exonFrames *ef) /* sanity check an exonFrames object */ { /* convert to genomic coords */ int efStart = ef->srcStart, efEnd = ef->srcEnd; if (ef->srcStrand == '-') reverseIntRange(&efStart, &efEnd, exon->gene->chromSize); if (ef->exon != exon) errAbort("%s: exonFrames linked to wrong exon", exon->gene->name); if ((efStart < exon->chromStart) || (efStart >= exon->chromEnd)) errAbort("%s: exonFrames srcStart not in the range of it's exon", exon->gene->name); if ((efEnd <= exon->chromStart) || (efEnd > exon->chromEnd)) errAbort("%s: exonFrames srcEnd not in the range of it's exon", exon->gene->name); if (efStart >= efEnd) errAbort("%s: exonFrames srcStart>=srcEnd", exon->gene->name); }
static void addPslBlocks(struct chromAnn* ca, unsigned opts, struct psl* psl) /* add blocks from a psl */ { boolean blkStrand = (opts & chromAnnUseQSide) ? pslQStrand(psl) : pslTStrand(psl); int size = (opts & chromAnnUseQSide) ? psl->qSize : psl->tSize; unsigned *blocks = (opts & chromAnnUseQSide) ? psl->qStarts : psl->tStarts; boolean blkSizeMult = pslIsProtein(psl) ? 3 : 1; int iBlk; for (iBlk = 0; iBlk < psl->blockCount; iBlk++) { int start = blocks[iBlk]; int end = start + (blkSizeMult * psl->blockSizes[iBlk]); if (blkStrand == '-') reverseIntRange(&start, &end, size); chromAnnBlkNew(ca, start, end); } }
static void printAxtTargetBlastTab(FILE *f, struct axt *axt, int targetSize) /* Print out target in tabular blast-oriented format. */ { int s = axt->tStart, e = axt->tEnd; if (axt->tStrand == '-') reverseIntRange(&s, &e, targetSize); if (axt->tStrand == axt->qStrand) { fprintf(f, "%d\t", s+1); fprintf(f, "%d\t", e); } else { fprintf(f, "%d\t", e); fprintf(f, "%d\t", s+1); } }
static void liftSide(char *desc, struct hash *seqSizes, struct psl *psl, char *name, char strand, unsigned *seqSize, int *start, int *end, unsigned *starts) /* life one side of the alignment */ { int regStart, regEnd, i; if (parseName(desc, name, ®Start, ®End)) { *seqSize = hashIntVal(seqSizes, name); if (*end > *seqSize) errAbort("subrange %s:%d-%d extends past sequence end %ud", name, regStart, regEnd, *seqSize); *start += regStart; *end += regStart; if (strand == '-') reverseIntRange(®Start, ®End, *seqSize); for (i = 0; i < psl->blockCount; i++) starts[i] += regStart; } }
static void scanSam(char *samIn, FILE *f, struct genomeRangeTree *grt, long long *retHit, long long *retMiss, long long *retTotalBasesInHits) /* Scan through sam file doing several things:counting how many reads hit and how many * miss target during mapping phase, copying those that hit to a little bed file, and * also defining regions covered in a genomeRangeTree. */ { samfile_t *sf = samopen(samIn, "r", NULL); bam_header_t *bamHeader = sf->header; bam1_t one; ZeroVar(&one); int err; long long hit = 0, miss = 0, totalBasesInHits = 0; while ((err = samread(sf, &one)) >= 0) { int32_t tid = one.core.tid; if (tid < 0) { ++miss; continue; } ++hit; char *chrom = bamHeader->target_name[tid]; // Approximate here... can do better if parse cigar. int start = one.core.pos; int size = one.core.l_qseq; int end = start + size; totalBasesInHits += size; boolean isRc = (one.core.flag & BAM_FREVERSE); char strand = '+'; if (isRc) { strand = '-'; reverseIntRange(&start, &end, bamHeader->target_len[tid]); } if (start < 0) start=0; if (f != NULL) fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand); genomeRangeTreeAdd(grt, chrom, start, end); } if (err < 0 && err != -1) errnoAbort("samread err %d", err); samclose(sf); *retHit = hit; *retMiss = miss; *retTotalBasesInHits = totalBasesInHits; }
void mafAliToFa(struct mafAli *maf, FILE *of) /* convert a MAF alignment to a fa */ { struct mafComp *c; for (c = maf->components ; c ; c = c->next ) { int start = c->start; int end = c->start+c->size; if (stripDotsDashes) { stripChar(c->text, '.'); stripChar(c->text, '-'); } reverseIntRange(&start, &end, c->srcSize); fprintf(of, ">%s.%d.%d.%c.%d\n%s\n", c->src, start, end, c->strand, c->srcSize, c->text); } fprintf(of,"\n"); }