static struct joinedRow *jrRowAdd(struct joinedTables *joined, char **row, int fieldCount, int keyCount) /* Add new row to joinable table. */ { if (joined->maxRowCount != 0 && joined->rowCount >= joined->maxRowCount) { warn("Stopping after %d rows, try restricting region or adding filter", joined->rowCount); return NULL; } else { struct joinedRow *jr; int i; struct lm *lm = joined->lm; lmAllocVar(lm, jr); lmAllocArray(lm, jr->fields, joined->fieldCount); lmAllocArray(lm, jr->keys, joined->keyCount); jr->passedFilter = TRUE; for (i=0; i<fieldCount; ++i) jr->fields[i] = lmSlName(lm, row[i]); row += fieldCount; for (i=0; i<keyCount; ++i) jr->keys[i] = lmSlName(lm, row[i]); slAddHead(&joined->rowList, jr); joined->rowCount += 1; return jr; } }
struct cdsEvidence *orfsOnRna(struct dnaSeq *seq, struct hash *nmdHash, struct hash *mafHash, int otherSpeciesCount, boolean anyStart) /* Return scored list of all ORFs on RNA. */ { DNA *dna = seq->dna; int lastPos = seq->size - 3; int startPos; struct cdsEvidence *orfList = NULL, *orf; struct lm *lm = lmInit(64*1024); /* Figure out the key piece of info for NMD. */ int lastIntronPos = findLastIntronPos(nmdHash, seq->name); double orthoWeightPer = 0; struct orthoCdsArray *orthoList = NULL; /* Calculate stuff useful for orthology */ if (otherSpeciesCount > 0) { orthoWeightPer = 1.0/otherSpeciesCount; struct mafAli *maf = hashFindVal(mafHash, seq->name); if (maf != NULL) { orthoList = calcOrthoList(maf, lm); // uglyf("%s: ", seq->name); // dumpOrthoArray(orthoArray, uglyOut); } } /* Allocate some arrays that keep track of bases in * upstream. This dramatically speeds up processing * of TTN and other long transcripts which otherwise * can take almost a minute each. */ int *upAtgCount, *upKozakCount; lmAllocArray(lm, upAtgCount, seq->size); lmAllocArray(lm, upKozakCount, seq->size); calcUpstreams(seq, upAtgCount, upKozakCount); /* Go through sequence making up a record for each * start codon we find. */ for (startPos=0; startPos<=lastPos; ++startPos) { if (startsWith("atg", dna+startPos) || (anyStart && startPos < 3)) { int stopPos = orfEndInSeq(seq, startPos); orf = createCds(seq, startPos, stopPos, upAtgCount, upKozakCount, lastIntronPos, orthoList, orthoWeightPer); slAddHead(&orfList, orf); } } slReverse(&orfList); /* Clean up and go home. */ lmCleanup(&lm); return orfList; }
struct pslSets *pslSetsNew(int numSets) /* construct a new pslSets object */ { struct pslSets *ps; AllocVar(ps); ps->lm = lmInit(1024*1024); ps->numSets = numSets; lmAllocArray(ps->lm, ps->sets, numSets); lmAllocArray(ps->lm, ps->pending, numSets); return ps; }
struct annoRow *aggvIntergenicRow(struct annoGratorGpVar *self, struct variant *variant, boolean *retRJFilterFailed, struct lm *callerLm) /* If intergenic variants (no overlapping or nearby genes) are to be included in output, * make an output row with empty genePred and a gpFx that is empty except for soNumber. */ { struct annoGrator *gSelf = &(self->grator); struct annoStreamer *sSelf = &(gSelf->streamer); char **wordsOut; lmAllocArray(self->lm, wordsOut, sSelf->numCols); // Add empty strings for genePred string columns: int gpColCount = gSelf->mySource->numCols; int i; for (i = 0; i < gpColCount; i++) wordsOut[i] = ""; struct gpFx *intergenicGpFx; lmAllocVar(self->lm, intergenicGpFx); intergenicGpFx->allele = firstAltAllele(variant->alleles); if (isAllNt(intergenicGpFx->allele, strlen(intergenicGpFx->allele))) touppers(intergenicGpFx->allele); intergenicGpFx->soNumber = intergenic_variant; intergenicGpFx->detailType = none; aggvStringifyGpFx(&wordsOut[gpColCount], intergenicGpFx, self->lm); boolean rjFail = (retRJFilterFailed && *retRJFilterFailed); return annoRowFromStringArray(variant->chrom, variant->chromStart, variant->chromEnd, rjFail, wordsOut, sSelf->numCols, callerLm); }
struct hash *allChainsHash(char *fileName) /* Hash all the chains in a given file by their ids. */ { struct hash *chainHash = newHash(18); struct lineFile *lf = lineFileOpen(fileName, TRUE); struct chain *chain; char chainId[20]; struct lm *lm = chainHash->lm; struct rbTreeNode **stack; lmAllocArray(lm, stack, 128); while ((chain = chainRead(lf)) != NULL) { struct indexedChain *ixc; lmAllocVar(lm, ixc); ixc->chain = chain; #ifdef SOON #endif /* SOON */ ixc->blockTree = rangeTreeNewDetailed(lm, stack); struct cBlock *block; for (block = chain->blockList; block != NULL; block = block->next) { struct range *r = rangeTreeAdd(ixc->blockTree, block->tStart, block->tEnd); r->val = block; } safef(chainId, sizeof(chainId), "%x", chain->id); hashAddUnique(chainHash, chainId, ixc); } lineFileClose(&lf); return chainHash; }
static void rowBufInit(struct rowBuf *rowBuf, int size) /* Clean up rowBuf and give it a new lm and buffer[size]. */ { resetRowBuf(rowBuf); rowBuf->lm = lmInit(0); rowBuf->size = size; lmAllocArray(rowBuf->lm, rowBuf->buf, size); }
static struct pslMatches *pslMatchesAlloc(struct pslSets *ps) /* allocate a matches object, either new or from the recycled list */ { struct pslMatches *pm = slPopHead(&ps->matchesPool); if (pm == NULL) { lmAllocVar(ps->lm, pm); lmAllocArray(ps->lm, pm->psls, ps->numSets); } pm->numSets = ps->numSets; return pm; }
static struct bwgFixedStepPacked * createFixedStepItems(double *score, int len, struct lm *lm) { struct bwgFixedStepPacked *packed; lmAllocArray(lm, packed, len); int i; for (i=0; i<len; ++i) { packed[i].val = score[i]; } return packed; }
static struct bwgVariableStepPacked * createVariableStepItems(int *start, double *score, int len, struct lm *lm) { struct bwgVariableStepPacked *packed; lmAllocArray(lm, packed, len); int i; for (i=0; i<len; ++i) { packed[i].start = start[i] - 1; packed[i].val = score[i]; } return packed; }
static struct ffAli *ffFindExtendNmers(char *nStart, char *nEnd, char *hStart, char *hEnd, int seedSize) /* Find perfectly matching n-mers and extend them. */ { struct lm *lm = lmInit(32*1024); struct seqHashEl **hashTable, *hashEl, **hashSlot; struct ffAli *ffList = NULL, *ff; char *n = nStart, *h = hStart, *ne = nEnd - seedSize, *he = hEnd - seedSize; /* Hash the needle. */ lmAllocArray(lm, hashTable, 4*1024); while (n <= ne) { if (!totalDegenerateN(n, seedSize)) { hashSlot = ffHashFuncN(n, seedSize) + hashTable; lmAllocVar(lm, hashEl); hashEl->seq = n; slAddHead(hashSlot, hashEl); } ++n; } /* Scan the haystack adding hits. */ while (h <= he) { for (hashEl = hashTable[ffHashFuncN(h, seedSize)]; hashEl != NULL; hashEl = hashEl->next) { if (memcmp(hashEl->seq, h, seedSize) == 0) { AllocVar(ff); ff->hStart = h; ff->hEnd = h + seedSize; ff->nStart = hashEl->seq; ff->nEnd = hashEl->seq + seedSize; extendExactLeft(ff->nStart - nStart, ff->hStart - hStart, &ff->nStart, &ff->hStart); extendExactRight(nEnd - ff->nEnd, hEnd - ff->hEnd, &ff->nEnd, &ff->hEnd); ff->left = ffList; ffList = ff; } } ++h; } ffList = ffMakeRightLinks(ffList); ffList = ffMergeClose(ffList, nStart, hStart); lmCleanup(&lm); return ffList; }
struct annoRow *annoRowFromStringArray(char *chrom, uint start, uint end, boolean rightJoinFail, char **wordsIn, int numCols, struct lm *lm) /* Allocate & return an annoRow with words cloned from wordsIn. */ { struct annoRow *aRow; lmAllocVar(lm, aRow); aRow->chrom = lmCloneString(lm, chrom); aRow->start = start; aRow->end = end; aRow->rightJoinFail = rightJoinFail; char **words; lmAllocArray(lm, words, numCols); int i; for (i = 0; i < numCols; i++) words[i] = lmCloneString(lm, wordsIn[i]); aRow->data = words; return aRow; }
struct orthoCdsArray *calcOrthoList(struct mafAli *maf, struct lm *lm) /* Given maf, figure out orthoCdsArray list, one for each other * species. (Assume first species is native.) */ { struct orthoCdsArray *array, *arrayList = NULL; struct mafComp *nativeComp = maf->components; int nativeSize = nativeComp->size; struct mafComp *comp; for (comp = maf->components->next; comp != NULL; comp = comp->next) { AllocVar(array); array->species = lmCloneString(lm, comp->src); array->arraySize = nativeSize; lmAllocArray(lm, array->cdsArray, nativeSize); fillInArrayFromPair(lm, nativeComp, comp, array->cdsArray, nativeSize, maf->textSize); slAddHead(&arrayList, array); } slReverse(&arrayList); return arrayList; }
static struct annoRow *aggvEffectToRow(struct annoGratorGpVar *self, struct gpFx *effect, struct annoRow *rowIn, struct lm *callerLm) // convert a single genePred annoRow and gpFx record to an augmented genePred annoRow; { struct annoGrator *gSelf = &(self->grator); struct annoStreamer *sSelf = &(gSelf->streamer); assert(sSelf->numCols > gSelf->mySource->numCols); char **wordsOut; lmAllocArray(self->lm, wordsOut, sSelf->numCols); // copy the genePred fields over int gpColCount = gSelf->mySource->numCols; char **wordsIn = (char **)rowIn->data; memcpy(wordsOut, wordsIn, sizeof(char *) * gpColCount); // stringify the gpFx structure aggvStringifyGpFx(&wordsOut[gpColCount], effect, callerLm); return annoRowFromStringArray(rowIn->chrom, rowIn->start, rowIn->end, rowIn->rightJoinFail, wordsOut, sSelf->numCols, callerLm); }
static void parseVariableStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm, int itemsPerSlot, char *chrom, int chromSize, bits32 span, struct bwgSection **pSectionList) /* Read the single column data in section until get to end. */ { struct lm *lmLocal = lmInit(0); /* Stream through section until get to end of file or next section, * adding values from single column to list. */ char *words[2]; char *line; struct bwgVariableStepItem *item, *nextItem, *itemList = NULL; int originalSectionSize = 0; while (lineFileNextReal(lf, &line)) { if (steppedSectionEnd(line, 2)) { lineFileReuse(lf); break; } chopLine(line, words); lmAllocVar(lmLocal, item); int start = lineFileNeedNum(lf, words, 0); if (start <= 0) { errAbort("line %d of %s: zero or negative chromosome coordinate not allowed", lf->lineIx, lf->fileName); } item->start = start - 1; item->val = lineFileNeedDouble(lf, words, 1); if (item->start + span > chromSize) { warn("line %d of %s: chromosome %s has %u bases, but item ends at %u", lf->lineIx, lf->fileName, chrom, chromSize, item->start + span); if (!clipDontDie) noWarnAbort(); } else { slAddHead(&itemList, item); ++originalSectionSize; } } slSort(&itemList, bwgVariableStepItemCmp); /* Make sure no overlap between items. */ if (itemList != NULL) { item = itemList; for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next) { if (item->start + span > nextItem->start) errAbort("Overlap on %s between items starting at %d and %d.\n" "Please remove overlaps and try again", chrom, item->start, nextItem->start); item = nextItem; } } /* Break up into sections of no more than items-per-slot size. */ int sizeLeft = originalSectionSize; for (item = itemList; item != NULL; ) { /* Figure out size of this section */ int sectionSize = sizeLeft; if (sectionSize > itemsPerSlot) sectionSize = itemsPerSlot; sizeLeft -= sectionSize; /* Convert from list to array representation. */ struct bwgVariableStepPacked *packed, *p; p = lmAllocArray(lm, packed, sectionSize); int i; for (i=0; i<sectionSize; ++i) { p->start = item->start; p->val = item->val; item = item->next; ++p; } /* Fill in section and add it to list. */ struct bwgSection *section; lmAllocVar(lm, section); section->chrom = chrom; section->start = packed[0].start; section->end = packed[sectionSize-1].start + span; section->type = bwgTypeVariableStep; section->items.variableStepPacked = packed; section->itemSpan = span; section->itemCount = sectionSize; slAddHead(pSectionList, section); } lmCleanup(&lmLocal); }
static void parseFixedStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm, int itemsPerSlot, char *chrom, bits32 chromSize, bits32 span, bits32 sectionStart, bits32 step, struct bwgSection **pSectionList) /* Read the single column data in section until get to end. */ { struct lm *lmLocal = lmInit(0); /* Stream through section until get to end of file or next section, * adding values from single column to list. */ char *words[1]; char *line; struct bwgFixedStepItem *item, *itemList = NULL; int originalSectionSize = 0; bits32 sectionEnd = sectionStart; while (lineFileNextReal(lf, &line)) { if (steppedSectionEnd(line, 1)) { lineFileReuse(lf); break; } chopLine(line, words); lmAllocVar(lmLocal, item); item->val = lineFileNeedDouble(lf, words, 0); if (sectionEnd + span > chromSize) { warn("line %d of %s: chromosome %s has %u bases, but item ends at %u", lf->lineIx, lf->fileName, chrom, chromSize, sectionEnd + span); if (!clipDontDie) noWarnAbort(); } else { slAddHead(&itemList, item); ++originalSectionSize; } sectionEnd += step; } slReverse(&itemList); /* Break up into sections of no more than items-per-slot size, and convert to packed format. */ int sizeLeft = originalSectionSize; for (item = itemList; item != NULL; ) { /* Figure out size of this section */ int sectionSize = sizeLeft; if (sectionSize > itemsPerSlot) sectionSize = itemsPerSlot; sizeLeft -= sectionSize; /* Allocate and fill in section. */ struct bwgSection *section; lmAllocVar(lm, section); section->chrom = chrom; section->start = sectionStart; sectionStart += sectionSize * step; section->end = sectionStart - step + span; section->type = bwgTypeFixedStep; section->itemStep = step; section->itemSpan = span; section->itemCount = sectionSize; /* Allocate array for data, and copy from list to array representation */ struct bwgFixedStepPacked *packed; /* An array */ section->items.fixedStepPacked = lmAllocArray(lm, packed, sectionSize); int i; for (i=0; i<sectionSize; ++i) { packed->val = item->val; item = item->next; ++packed; } /* Add section to list. */ slAddHead(pSectionList, section); } lmCleanup(&lmLocal); }
void fillInArrayFromPair(struct lm *lm, struct mafComp *native, struct mafComp *xeno, struct orthoCds *array, int arraySize, int symCount) /* Figure out the CDS in xeno for each position in native. */ { char *nText = native->text, *xText = xeno->text; int nSize = arraySize, xSize = symCount - countChars(xText, '-'); /* Create an array that for each point in native gives you the index of corresponding * point in xeno, and another array that does the opposite. */ int *nToX, *xToN; lmAllocArray(lm, nToX, nSize+1); lmAllocArray(lm, xToN, xSize+1); int i; int nIx = 0, xIx = 0; for (i=0; i<symCount; ++i) { char n = nText[i], x = xText[i]; if (n == '.') errAbort("Dot in native component %s of maf. Can't handle it.", native->src); nToX[nIx] = xIx; xToN[xIx] = nIx; if (n != '-') { array[nIx].base = x; nToX[nIx] = xIx; ++nIx; } if (x != '-') ++xIx; } assert(xIx == xSize); assert(nIx == nSize); /* Put an extra value at end of arrays to simplify logic. */ nToX[nSize] = xSize; xToN[xSize] = nSize; /* Create xeno sequence without the '-' chars */ char *xDna = lmCloneString(lm, xText); tolowers(xDna); stripChar(xDna, '-'); #ifdef DEBUG uglyf("xToN:"); for (i=0; i<xSize; ++i) uglyf(" %d", xToN[i]); uglyf("\n"); #endif /* DEBUG */ /* Step through this, one frame at a time, looking for best ORF */ int frame; for (frame=0; frame<3; ++frame) { /* Calculate some things constant for this frame, and deal with * ORF that starts at beginning (may not have ATG) */ int lastPos = xSize-3; int frameDnaSize = xSize-frame; int start = frame, end = findOrfEnd(xDna, frameDnaSize, frame); applyOrf(start, end, xDna, xToN, array, arraySize); for (start = end; start<=lastPos; ) { // uglyf("start %d %c%c%c\n", start, xDna[start], xDna[start+1], xDna[start+2]); if (startsWith("atg", xDna+start)) { end = findOrfEnd(xDna, frameDnaSize, start); applyOrf(start, end, xDna, xToN, array, arraySize); start = end; } else start += 3; } } }
struct wordTree *wordTreeForChainsInFile(char *fileName, int chainSize, struct lm *lm) /* Return a wordTree of all chains-of-words of length chainSize seen in file. * Allocate the structure in local memory pool lm. */ { /* Stuff for processing file a line at a time. */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *word; /* We'll keep a chain of three or so words in a doubly linked list. */ struct dlNode *node; struct dlList *chain = dlListNew(); int curSize = 0; /* We'll build up the tree starting with an empty root node. */ struct wordTree *wt = wordTreeNew(""); int wordCount = 0; /* Save time/space by sharing stack between all "following" rbTrees. */ struct rbTreeNode **stack; lmAllocArray(lm, stack, 256); /* Loop through each line of input file, lowercasing the whole line, and then * looping through each word of line, stripping out special chars, and finally * processing each word. */ while (lineFileNext(lf, &line, NULL)) { if (lower) tolowers(line); while ((word = nextWord(&line)) != NULL) { if (unpunc) { stripChar(word, ','); stripChar(word, '.'); stripChar(word, ';'); stripChar(word, '-'); stripChar(word, '"'); stripChar(word, '?'); stripChar(word, '!'); stripChar(word, '('); stripChar(word, ')'); if (word[0] == 0) continue; } verbose(2, "%s\n", word); /* We come to this point in the code for each word in the file. * Here we want to maintain a chain of sequential words up to * chainSize long. We do this with a doubly-linked list structure. * For the first few words in the file we'll just build up the list, * only adding it to the tree when we finally do get to the desired * chain size. Once past the initial section of the file we'll be * getting rid of the first link in the chain as well as adding a new * last link in the chain with each new word we see. */ if (curSize < chainSize) { dlAddValTail(chain, cloneString(word)); ++curSize; if (curSize == chainSize) addChainToTree(wt, chain, lm, stack); } else { /* Reuse doubly-linked-list node, but give it a new value, as we move * it from head to tail of list. */ node = dlPopHead(chain); freeMem(node->val); node->val = cloneString(word); dlAddTail(chain, node); addChainToTree(wt, chain, lm, stack); } ++wordCount; } } /* Handle last few words in file, where can't make a chain of full size. Need * a special case for file that has fewer than chain size words too. */ if (curSize < chainSize) addChainToTree(wt, chain, lm, stack); while ((node = dlPopHead(chain)) != NULL) { addChainToTree(wt, chain, lm, stack); freeMem(node->val); freeMem(node); } dlListFree(&chain); lineFileClose(&lf); return wt; }
void chainNet(char *chainFile, char *tSizes, char *qSizes, char *tNet, char *qNet) /* chainNet - Make alignment nets out of chains. */ { struct lineFile *lf = lineFileOpen(chainFile, TRUE); struct hash *qHash, *tHash; struct chrom *qChromList, *tChromList, *tChrom, *qChrom; struct chain *chain; double lastScore = -1; struct lm *lm = lmInit(0); struct rbTreeNode **rbStack; FILE *tNetFile = mustOpen(tNet, "w"); FILE *qNetFile = mustOpen(qNet, "w"); lmAllocArray(lm, rbStack, 256); makeChroms(qSizes, lm, rbStack, &qHash, &qChromList); makeChroms(tSizes, lm, rbStack, &tHash, &tChromList); verbose(1, "Got %d chroms in %s, %d in %s\n", slCount(tChromList), tSizes, slCount(qChromList), qSizes); lineFileSetMetaDataOutput(lf, tNetFile); lineFileSetMetaDataOutput(lf, qNetFile); /* Loop through chain file building up net. */ while ((chain = chainRead(lf)) != NULL) { /* Make sure that input is really sorted. */ if (lastScore >= 0 && chain->score > lastScore) errAbort("%s must be sorted in order of score", chainFile); lastScore = chain->score; if (chain->score < minScore) { break; } verbose(2, "chain %f (%d els) %s %d-%d %c %s %d-%d\n", chain->score, slCount(chain->blockList), chain->tName, chain->tStart, chain->tEnd, chain->qStrand, chain->qName, chain->qStart, chain->qEnd); qChrom = hashMustFindVal(qHash, chain->qName); if (qChrom->size != chain->qSize) errAbort("%s is %d in %s but %d in %s", chain->qName, chain->qSize, chainFile, qChrom->size, qSizes); tChrom = hashMustFindVal(tHash, chain->tName); if (tChrom->size != chain->tSize) errAbort("%s is %d in %s but %d in %s", chain->tName, chain->tSize, chainFile, tChrom->size, tSizes); if (!inclQuery(chain)) verbose(2, "skipping chain on query %s\n", chain->qName); else { addChain(qChrom, tChrom, chain); verbose(2, "%s has %d inserts, %s has %d\n", tChrom->name, tChrom->spaces->n, qChrom->name, qChrom->spaces->n); } } /* Build up other side of fills. It's just for historical * reasons this is not done during the main build up. * It's a little less efficient this way, but to change it * some hard reverse strand issues would have to be juggled. */ verbose(1, "Finishing nets\n"); finishNet(qChromList, TRUE); finishNet(tChromList, FALSE); /* Write out basic net files. */ verbose(1, "writing %s\n", tNet); outputNetSide(tChromList, tNetFile, FALSE); verbose(1, "writing %s\n", qNet); outputNetSide(qChromList, qNetFile, TRUE); /* prevent SIGPIPE in preceding process if input is a pipe, consume remainder * of input file since we stop before EOF. */ if (isPipe(lf->fd)) { char *line; while(lineFileNext(lf, &line, NULL)) continue; } lineFileClose(&lf); if (verboseLevel() > 1) printMem(stderr); }
struct bed *bedFromRow( char *chrom, /* Chromosome bed is on. */ char **row, /* Row with other data for bed. */ int fieldCount, /* Number of fields in final bed. */ boolean isPsl, /* True if in PSL format. */ boolean isGenePred, /* True if in GenePred format. */ boolean isBedWithBlocks, /* True if BED with block list. */ boolean *pslKnowIfProtein,/* Have we figured out if psl is protein? */ boolean *pslIsProtein, /* True if we know psl is protien. */ struct lm *lm) /* Local memory pool */ /* Create bed from a database row when we already understand * the format pretty well. The bed is allocated inside of * the local memory pool lm. Generally use this in conjunction * with the results of a SQL query constructed with the aid * of the bedSqlFieldsExceptForChrom function. */ { char *strand, tStrand, qStrand; struct bed *bed; int i, blockCount; lmAllocVar(lm, bed); bed->chrom = chrom; bed->chromStart = sqlUnsigned(row[0]); bed->chromEnd = sqlUnsigned(row[1]); if (fieldCount < 4) return bed; bed->name = lmCloneString(lm, row[2]); if (fieldCount < 5) return bed; bed->score = atoi(row[3]); if (fieldCount < 6) return bed; strand = row[4]; qStrand = strand[0]; tStrand = strand[1]; if (tStrand == 0) bed->strand[0] = qStrand; else { /* psl: use XOR of qStrand,tStrand if both are given. */ if (tStrand == qStrand) bed->strand[0] = '+'; else bed->strand[0] = '-'; } if (fieldCount < 8) return bed; bed->thickStart = sqlUnsigned(row[5]); bed->thickEnd = sqlUnsigned(row[6]); if (fieldCount < 12) return bed; bed->blockCount = blockCount = sqlUnsigned(row[7]); lmAllocArray(lm, bed->blockSizes, blockCount); sqlUnsignedArray(row[8], bed->blockSizes, blockCount); lmAllocArray(lm, bed->chromStarts, blockCount); sqlUnsignedArray(row[9], bed->chromStarts, blockCount); if (isGenePred) { /* Translate end coordinates to sizes. */ for (i=0; i<bed->blockCount; ++i) bed->blockSizes[i] -= bed->chromStarts[i]; } else if (isPsl) { if (!*pslKnowIfProtein) { /* Figure out if is protein using a rather elaborate but * working test I think Angie or Brian must have figured out. */ if (tStrand == '-') { int tSize = sqlUnsigned(row[10]); *pslIsProtein = (bed->chromStart == tSize - (3*bed->blockSizes[bed->blockCount - 1] + bed->chromStarts[bed->blockCount - 1])); } else { *pslIsProtein = (bed->chromEnd == 3*bed->blockSizes[bed->blockCount - 1] + bed->chromStarts[bed->blockCount - 1]); } *pslKnowIfProtein = TRUE; } if (*pslIsProtein) { /* if protein then blockSizes are in protein space */ for (i=0; i<blockCount; ++i) bed->blockSizes[i] *= 3; } if (tStrand == '-') { /* psl: if target strand is '-', flip the coords. * (this is the target part of pslRcBoth from src/lib/psl.c) */ int tSize = sqlUnsigned(row[10]); for (i=0; i<blockCount; ++i) { bed->chromStarts[i] = tSize - (bed->chromStarts[i] + bed->blockSizes[i]); } reverseInts(bed->chromStarts, bed->blockCount); reverseInts(bed->blockSizes, bed->blockCount); } } if (!isBedWithBlocks) { /* non-bed: translate absolute starts to relative starts */ for (i=0; i < bed->blockCount; i++) bed->chromStarts[i] -= bed->chromStart; } return bed; }
void refSeparateButJoined(struct txGraph *graph, FILE *f) /* Flag graphs that have two non-overlapping refSeqs. */ { int sourceIx; boolean foundIt = FALSE; struct lm *lm = lmInit(0); struct rbTreeNode **stack; lmAllocArray(lm, stack, 128); /* Loop through sources looking for reference type. */ for (sourceIx=0; sourceIx<graph->sourceCount; ++sourceIx) { struct txSource *source = &graph->sources[sourceIx]; if (sameString(source->type, refType)) { /* Create a rangeTree including all exons of source. */ struct rbTree *tree = rangeTreeNewDetailed(lm, stack); struct txEdge *edge; for (edge = graph->edgeList; edge != NULL; edge = edge->next) { if (edge->type == ggExon && evOfSourceOnList(edge->evList, sourceIx)) rangeTreeAdd(tree, graph->vertices[edge->startIx].position, graph->vertices[edge->endIx].position); } /* Go through remaining reference sources looking for no overlap. */ int i; for (i=0; i<graph->sourceCount; ++i) { if (i == sourceIx) continue; struct txSource *s = &graph->sources[i]; if (sameString(s->type, refType)) { boolean gotOverlap = FALSE; for (edge = graph->edgeList; edge != NULL; edge = edge->next) { if (edge->type == ggExon && evOfSourceOnList(edge->evList, i)) { if (rangeTreeOverlaps(tree, graph->vertices[edge->startIx].position, graph->vertices[edge->endIx].position)) { gotOverlap = TRUE; break; } } } if (!gotOverlap) { foundIt = TRUE; break; } } } freez(&tree); } if (foundIt) break; } if (foundIt) { fprintf(f, "%s\t%d\t%d\t%s\t0\t%s\n", graph->tName, graph->tStart, graph->tEnd, "refJoined", graph->strand); } lmCleanup(&lm); }