static struct joinedRow *jrRowAdd(struct joinedTables *joined, char **row,
                                  int fieldCount, int keyCount)
/* Add new row to joinable table. */
{
    if (joined->maxRowCount != 0 && joined->rowCount >= joined->maxRowCount)
    {
        warn("Stopping after %d rows, try restricting region or adding filter",
             joined->rowCount);
        return NULL;
    }
    else
    {
        struct joinedRow *jr;
        int i;
        struct lm *lm = joined->lm;
        lmAllocVar(lm, jr);
        lmAllocArray(lm, jr->fields, joined->fieldCount);
        lmAllocArray(lm, jr->keys, joined->keyCount);
        jr->passedFilter = TRUE;
        for (i=0; i<fieldCount; ++i)
            jr->fields[i] = lmSlName(lm, row[i]);
        row += fieldCount;
        for (i=0; i<keyCount; ++i)
            jr->keys[i] = lmSlName(lm, row[i]);
        slAddHead(&joined->rowList, jr);
        joined->rowCount += 1;
        return jr;
    }
}
Exemplo n.º 2
0
struct cdsEvidence *orfsOnRna(struct dnaSeq *seq, struct hash *nmdHash, struct hash *mafHash,
	int otherSpeciesCount, boolean anyStart)
/* Return scored list of all ORFs on RNA. */
{
DNA *dna = seq->dna;
int lastPos = seq->size - 3;
int startPos;
struct cdsEvidence *orfList = NULL, *orf;
struct lm *lm = lmInit(64*1024);

/* Figure out the key piece of info for NMD. */
int lastIntronPos = findLastIntronPos(nmdHash, seq->name);
double orthoWeightPer = 0;
struct orthoCdsArray *orthoList = NULL;

/* Calculate stuff useful for orthology */
if (otherSpeciesCount > 0)
    {
    orthoWeightPer = 1.0/otherSpeciesCount;
    struct mafAli *maf = hashFindVal(mafHash, seq->name);
    if (maf != NULL)
	{
	orthoList = calcOrthoList(maf, lm);
	// uglyf("%s: ", seq->name);
	// dumpOrthoArray(orthoArray, uglyOut);
	}
    }

/* Allocate some arrays that keep track of bases in
 * upstream.  This dramatically speeds up processing
 * of TTN and other long transcripts which otherwise
 * can take almost a minute each. */
int *upAtgCount, *upKozakCount;
lmAllocArray(lm, upAtgCount, seq->size);
lmAllocArray(lm, upKozakCount, seq->size);
calcUpstreams(seq, upAtgCount, upKozakCount);

/* Go through sequence making up a record for each 
 * start codon we find. */
for (startPos=0; startPos<=lastPos; ++startPos)
    {
    if (startsWith("atg", dna+startPos) || (anyStart && startPos < 3))
        {
	int stopPos = orfEndInSeq(seq, startPos);
	orf = createCds(seq, startPos, stopPos, upAtgCount, upKozakCount, 
		lastIntronPos, orthoList, orthoWeightPer);
	slAddHead(&orfList, orf);
	}
    }
slReverse(&orfList);

/* Clean up and go home. */
lmCleanup(&lm);
return orfList;
}
Exemplo n.º 3
0
struct pslSets *pslSetsNew(int numSets)
/* construct a new pslSets object */
{
struct pslSets *ps;
AllocVar(ps);
ps->lm = lmInit(1024*1024);
ps->numSets = numSets;
lmAllocArray(ps->lm, ps->sets, numSets);
lmAllocArray(ps->lm, ps->pending, numSets);
return ps;
}
Exemplo n.º 4
0
struct annoRow *aggvIntergenicRow(struct annoGratorGpVar *self, struct variant *variant,
				  boolean *retRJFilterFailed, struct lm *callerLm)
/* If intergenic variants (no overlapping or nearby genes) are to be included in output,
 * make an output row with empty genePred and a gpFx that is empty except for soNumber. */
{
struct annoGrator *gSelf = &(self->grator);
struct annoStreamer *sSelf = &(gSelf->streamer);
char **wordsOut;
lmAllocArray(self->lm, wordsOut, sSelf->numCols);
// Add empty strings for genePred string columns:
int gpColCount = gSelf->mySource->numCols;
int i;
for (i = 0;  i < gpColCount;  i++)
    wordsOut[i] = "";
struct gpFx *intergenicGpFx;
lmAllocVar(self->lm, intergenicGpFx);
intergenicGpFx->allele = firstAltAllele(variant->alleles);
if (isAllNt(intergenicGpFx->allele, strlen(intergenicGpFx->allele)))
    touppers(intergenicGpFx->allele);
intergenicGpFx->soNumber = intergenic_variant;
intergenicGpFx->detailType = none;
aggvStringifyGpFx(&wordsOut[gpColCount], intergenicGpFx, self->lm);
boolean rjFail = (retRJFilterFailed && *retRJFilterFailed);
return annoRowFromStringArray(variant->chrom, variant->chromStart, variant->chromEnd, rjFail,
			      wordsOut, sSelf->numCols, callerLm);
}
Exemplo n.º 5
0
struct hash *allChainsHash(char *fileName)
/* Hash all the chains in a given file by their ids. */
{
struct hash *chainHash = newHash(18);
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct chain *chain;
char chainId[20];
struct lm *lm = chainHash->lm;
struct rbTreeNode **stack;

lmAllocArray(lm, stack, 128);
while ((chain = chainRead(lf)) != NULL)
    {
    struct indexedChain *ixc;
    lmAllocVar(lm, ixc);
    ixc->chain = chain;
#ifdef SOON
#endif /* SOON */
    ixc->blockTree = rangeTreeNewDetailed(lm, stack);
    struct cBlock *block;
    for (block = chain->blockList; block != NULL; block = block->next)
	{
        struct range *r = rangeTreeAdd(ixc->blockTree, block->tStart, block->tEnd);
	r->val = block;
	}
    safef(chainId, sizeof(chainId), "%x", chain->id);
    hashAddUnique(chainHash, chainId, ixc);
    }
lineFileClose(&lf);
return chainHash;
}
Exemplo n.º 6
0
static void rowBufInit(struct rowBuf *rowBuf, int size)
/* Clean up rowBuf and give it a new lm and buffer[size]. */
{
resetRowBuf(rowBuf);
rowBuf->lm = lmInit(0);
rowBuf->size = size;
lmAllocArray(rowBuf->lm, rowBuf->buf, size);
}
Exemplo n.º 7
0
static struct pslMatches *pslMatchesAlloc(struct pslSets *ps)
/* allocate a matches object, either new or from the recycled list */
{
struct pslMatches *pm = slPopHead(&ps->matchesPool);
if (pm == NULL)
    {
    lmAllocVar(ps->lm, pm);
    lmAllocArray(ps->lm, pm->psls, ps->numSets);
    }
pm->numSets = ps->numSets;
return pm;
}
Exemplo n.º 8
0
static struct bwgFixedStepPacked *
createFixedStepItems(double *score, int len, struct lm *lm)
{
  struct bwgFixedStepPacked *packed;
  lmAllocArray(lm, packed, len);
  int i;
  for (i=0; i<len; ++i)
    {
      packed[i].val = score[i];
    }
  return packed;
}
Exemplo n.º 9
0
static struct bwgVariableStepPacked *
createVariableStepItems(int *start, double *score, int len, struct lm *lm)
{
  struct bwgVariableStepPacked *packed;
  lmAllocArray(lm, packed, len);
  int i;
  for (i=0; i<len; ++i)
    {
      packed[i].start = start[i] - 1;
      packed[i].val = score[i];
    }
  return packed;
}
Exemplo n.º 10
0
static struct ffAli *ffFindExtendNmers(char *nStart, char *nEnd, char *hStart, char *hEnd,
	int seedSize)
/* Find perfectly matching n-mers and extend them. */
{
struct lm *lm = lmInit(32*1024);
struct seqHashEl **hashTable, *hashEl, **hashSlot;
struct ffAli *ffList = NULL, *ff;
char *n = nStart, *h = hStart, *ne = nEnd - seedSize, *he = hEnd - seedSize;

/* Hash the needle. */
lmAllocArray(lm, hashTable, 4*1024);
while (n <= ne)
    {
    if (!totalDegenerateN(n, seedSize))
	{
	hashSlot = ffHashFuncN(n, seedSize) + hashTable;
	lmAllocVar(lm, hashEl);
	hashEl->seq = n;
	slAddHead(hashSlot, hashEl);
	}
    ++n;
    }

/* Scan the haystack adding hits. */
while (h <= he)
    {
    for (hashEl = hashTable[ffHashFuncN(h, seedSize)]; 
    	hashEl != NULL; hashEl = hashEl->next)
	{
	if (memcmp(hashEl->seq, h, seedSize) == 0)
	    {
	    AllocVar(ff);
	    ff->hStart = h;
	    ff->hEnd = h + seedSize;
	    ff->nStart = hashEl->seq;
	    ff->nEnd = hashEl->seq + seedSize;
	    extendExactLeft(ff->nStart - nStart, ff->hStart - hStart, 
		&ff->nStart, &ff->hStart);
	    extendExactRight(nEnd - ff->nEnd, hEnd - ff->hEnd, &ff->nEnd, &ff->hEnd);
	    ff->left = ffList;
	    ffList = ff;
	    }
	}
    ++h;
    }
ffList = ffMakeRightLinks(ffList);
ffList = ffMergeClose(ffList, nStart, hStart);
lmCleanup(&lm);
return ffList;
}
Exemplo n.º 11
0
struct annoRow *annoRowFromStringArray(char *chrom, uint start, uint end, boolean rightJoinFail,
				       char **wordsIn, int numCols, struct lm *lm)
/* Allocate & return an annoRow with words cloned from wordsIn. */
{
struct annoRow *aRow;
lmAllocVar(lm, aRow);
aRow->chrom = lmCloneString(lm, chrom);
aRow->start = start;
aRow->end = end;
aRow->rightJoinFail = rightJoinFail;
char **words;
lmAllocArray(lm, words, numCols);
int i;
for (i = 0;  i < numCols;  i++)
    words[i] = lmCloneString(lm, wordsIn[i]);
aRow->data = words;
return aRow;
}
Exemplo n.º 12
0
struct orthoCdsArray *calcOrthoList(struct mafAli *maf, struct lm *lm)
/* Given maf, figure out orthoCdsArray list, one for each other
 * species.  (Assume first species is native.) */
{
struct orthoCdsArray *array, *arrayList = NULL;
struct mafComp *nativeComp = maf->components;
int nativeSize = nativeComp->size;
struct mafComp *comp;
for (comp = maf->components->next; comp != NULL; comp = comp->next)
    {
    AllocVar(array);
    array->species = lmCloneString(lm, comp->src);
    array->arraySize = nativeSize;
    lmAllocArray(lm, array->cdsArray, nativeSize);
    fillInArrayFromPair(lm, nativeComp, comp, array->cdsArray, nativeSize, maf->textSize);
    slAddHead(&arrayList, array);
    }
slReverse(&arrayList);
return arrayList;
}
Exemplo n.º 13
0
static struct annoRow *aggvEffectToRow(struct annoGratorGpVar *self, struct gpFx *effect,
				       struct annoRow *rowIn, struct lm *callerLm)
// convert a single genePred annoRow and gpFx record to an augmented genePred annoRow;
{
struct annoGrator *gSelf = &(self->grator);
struct annoStreamer *sSelf = &(gSelf->streamer);
assert(sSelf->numCols > gSelf->mySource->numCols);

char **wordsOut;
lmAllocArray(self->lm, wordsOut, sSelf->numCols);

// copy the genePred fields over
int gpColCount = gSelf->mySource->numCols;
char **wordsIn = (char **)rowIn->data;
memcpy(wordsOut, wordsIn, sizeof(char *) * gpColCount);

// stringify the gpFx structure 
aggvStringifyGpFx(&wordsOut[gpColCount], effect, callerLm);

return annoRowFromStringArray(rowIn->chrom, rowIn->start, rowIn->end, rowIn->rightJoinFail,
			      wordsOut, sSelf->numCols, callerLm);
}
Exemplo n.º 14
0
static void parseVariableStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm,
	int itemsPerSlot, char *chrom, int chromSize, bits32 span, struct bwgSection **pSectionList)
/* Read the single column data in section until get to end. */
{
struct lm *lmLocal = lmInit(0);

/* Stream through section until get to end of file or next section,
 * adding values from single column to list. */
char *words[2];
char *line;
struct bwgVariableStepItem *item, *nextItem, *itemList = NULL;
int originalSectionSize = 0;
while (lineFileNextReal(lf, &line))
    {
    if (steppedSectionEnd(line, 2))
	{
        lineFileReuse(lf);
	break;
	}
    chopLine(line, words);
    lmAllocVar(lmLocal, item);
    int start = lineFileNeedNum(lf, words, 0);
    if (start <= 0)
	{
	errAbort("line %d of %s: zero or negative chromosome coordinate not allowed",
	    lf->lineIx, lf->fileName);
	}
    item->start = start - 1;
    item->val = lineFileNeedDouble(lf, words, 1);
    if (item->start + span > chromSize)
        {
	warn("line %d of %s: chromosome %s has %u bases, but item ends at %u",
	    lf->lineIx, lf->fileName, chrom, chromSize, item->start + span);
	if (!clipDontDie)
	    noWarnAbort();
	}
    else
        {
	slAddHead(&itemList, item);
	++originalSectionSize;
	}
    }
slSort(&itemList, bwgVariableStepItemCmp);

/* Make sure no overlap between items. */
if (itemList != NULL)
    {
    item = itemList;
    for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next)
        {
	if (item->start + span > nextItem->start)
	    errAbort("Overlap on %s between items starting at %d and %d.\n"
	             "Please remove overlaps and try again",
		    chrom, item->start, nextItem->start);
	item = nextItem;
	}
    }

/* Break up into sections of no more than items-per-slot size. */
int sizeLeft = originalSectionSize;
for (item = itemList; item != NULL; )
    {
    /* Figure out size of this section  */
    int sectionSize = sizeLeft;
    if (sectionSize > itemsPerSlot)
        sectionSize = itemsPerSlot;
    sizeLeft -= sectionSize;

    /* Convert from list to array representation. */
    struct bwgVariableStepPacked *packed, *p;		
    p = lmAllocArray(lm, packed, sectionSize);
    int i;
    for (i=0; i<sectionSize; ++i)
        {
	p->start = item->start;
	p->val = item->val;
	item = item->next;
	++p;
	}

    /* Fill in section and add it to list. */
    struct bwgSection *section;
    lmAllocVar(lm, section);
    section->chrom = chrom;
    section->start = packed[0].start;
    section->end = packed[sectionSize-1].start + span;
    section->type = bwgTypeVariableStep;
    section->items.variableStepPacked = packed;
    section->itemSpan = span;
    section->itemCount = sectionSize;
    slAddHead(pSectionList, section);
    }
lmCleanup(&lmLocal);
}
Exemplo n.º 15
0
static void parseFixedStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm,
	int itemsPerSlot, char *chrom, bits32 chromSize, bits32 span, bits32 sectionStart, 
	bits32 step, struct bwgSection **pSectionList)
/* Read the single column data in section until get to end. */
{
struct lm *lmLocal = lmInit(0);

/* Stream through section until get to end of file or next section,
 * adding values from single column to list. */
char *words[1];
char *line;
struct bwgFixedStepItem *item, *itemList = NULL;
int originalSectionSize = 0;
bits32 sectionEnd = sectionStart;
while (lineFileNextReal(lf, &line))
    {
    if (steppedSectionEnd(line, 1))
	{
        lineFileReuse(lf);
	break;
	}
    chopLine(line, words);
    lmAllocVar(lmLocal, item);
    item->val = lineFileNeedDouble(lf, words, 0);
    if (sectionEnd + span > chromSize)
	{
	warn("line %d of %s: chromosome %s has %u bases, but item ends at %u",
	    lf->lineIx, lf->fileName, chrom, chromSize, sectionEnd + span);
	if (!clipDontDie)
	    noWarnAbort();
	}
    else
	{
	slAddHead(&itemList, item);
	++originalSectionSize;
	}
    sectionEnd += step;
    }
slReverse(&itemList);

/* Break up into sections of no more than items-per-slot size, and convert to packed format. */
int sizeLeft = originalSectionSize;
for (item = itemList; item != NULL; )
    {
    /* Figure out size of this section  */
    int sectionSize = sizeLeft;
    if (sectionSize > itemsPerSlot)
        sectionSize = itemsPerSlot;
    sizeLeft -= sectionSize;


    /* Allocate and fill in section. */
    struct bwgSection *section;
    lmAllocVar(lm, section);
    section->chrom = chrom;
    section->start = sectionStart;
    sectionStart += sectionSize * step;
    section->end = sectionStart - step + span;
    section->type = bwgTypeFixedStep;
    section->itemStep = step;
    section->itemSpan = span;
    section->itemCount = sectionSize;

    /* Allocate array for data, and copy from list to array representation */
    struct bwgFixedStepPacked *packed;		/* An array */
    section->items.fixedStepPacked = lmAllocArray(lm, packed, sectionSize);
    int i;
    for (i=0; i<sectionSize; ++i)
        {
	packed->val = item->val;
	item = item->next;
	++packed;
	}

    /* Add section to list. */
    slAddHead(pSectionList, section);
    }
lmCleanup(&lmLocal);
}
Exemplo n.º 16
0
void fillInArrayFromPair(struct lm *lm, struct mafComp *native, struct mafComp *xeno,
	struct orthoCds *array, int arraySize, int symCount)
/* Figure out the CDS in xeno for each position in native. */
{
char *nText = native->text, *xText = xeno->text;
int nSize = arraySize, xSize = symCount - countChars(xText, '-');

/* Create an array that for each point in native gives you the index of corresponding
 * point in xeno, and another array that does the opposite. */
int *nToX, *xToN;
lmAllocArray(lm, nToX, nSize+1);
lmAllocArray(lm, xToN, xSize+1);
int i;
int nIx = 0, xIx = 0;
for (i=0; i<symCount; ++i)
    {
    char n = nText[i], x = xText[i];
    if (n == '.')
       errAbort("Dot in native component %s of maf. Can't handle it.", native->src);
    nToX[nIx] = xIx;
    xToN[xIx] = nIx;
    if (n != '-')
	{
	array[nIx].base = x;
	nToX[nIx] = xIx;
	++nIx;
	}
    if (x != '-')
       ++xIx;
    }
assert(xIx == xSize);
assert(nIx == nSize);

/* Put an extra value at end of arrays to simplify logic. */
nToX[nSize] = xSize;
xToN[xSize] = nSize;

/* Create xeno sequence without the '-' chars */
char *xDna = lmCloneString(lm, xText);
tolowers(xDna);
stripChar(xDna, '-');

#ifdef DEBUG
uglyf("xToN:");
for (i=0; i<xSize; ++i) uglyf(" %d", xToN[i]);
uglyf("\n");
#endif /* DEBUG */

/* Step through this, one frame at a time, looking for best ORF */
int frame;
for (frame=0; frame<3; ++frame)
    {
    /* Calculate some things constant for this frame, and deal with
     * ORF that starts at beginning (may not have ATG) */
    int lastPos = xSize-3;
    int frameDnaSize = xSize-frame;
    int start = frame, end = findOrfEnd(xDna, frameDnaSize, frame);
    applyOrf(start, end, xDna, xToN, array, arraySize);
    for (start = end; start<=lastPos; )
        {
	// uglyf("start %d %c%c%c\n", start, xDna[start], xDna[start+1], xDna[start+2]);
	if (startsWith("atg", xDna+start))
	    {
	    end = findOrfEnd(xDna, frameDnaSize, start);
	    applyOrf(start, end, xDna, xToN, array, arraySize);
	    start = end;
	    }
	else
	    start += 3;
	}
    }

}
Exemplo n.º 17
0
struct wordTree *wordTreeForChainsInFile(char *fileName, int chainSize, struct lm *lm)
/* Return a wordTree of all chains-of-words of length chainSize seen in file. 
 * Allocate the structure in local memory pool lm. */ 
{
/* Stuff for processing file a line at a time. */
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line, *word;

/* We'll keep a chain of three or so words in a doubly linked list. */
struct dlNode *node;
struct dlList *chain = dlListNew();
int curSize = 0;

/* We'll build up the tree starting with an empty root node. */
struct wordTree *wt = wordTreeNew("");	
int wordCount = 0;

/* Save time/space by sharing stack between all "following" rbTrees. */
struct rbTreeNode **stack;	
lmAllocArray(lm, stack, 256);

/* Loop through each line of input file, lowercasing the whole line, and then
 * looping through each word of line, stripping out special chars, and finally
 * processing each word. */
while (lineFileNext(lf, &line, NULL))
    {
    if (lower)
        tolowers(line);
    while ((word = nextWord(&line)) != NULL)
	{
	if (unpunc)
	    {
	    stripChar(word, ',');
	    stripChar(word, '.');
	    stripChar(word, ';');
	    stripChar(word, '-');
	    stripChar(word, '"');
	    stripChar(word, '?');
	    stripChar(word, '!');
	    stripChar(word, '(');
	    stripChar(word, ')');
	    if (word[0] == 0)
	         continue;
	    }
	verbose(2, "%s\n", word);

	/* We come to this point in the code for each word in the file. 
	 * Here we want to maintain a chain of sequential words up to
	 * chainSize long.  We do this with a doubly-linked list structure.
	 * For the first few words in the file we'll just build up the list,
	 * only adding it to the tree when we finally do get to the desired
	 * chain size.  Once past the initial section of the file we'll be
	 * getting rid of the first link in the chain as well as adding a new
	 * last link in the chain with each new word we see. */
	if (curSize < chainSize)
	    {
	    dlAddValTail(chain, cloneString(word));
	    ++curSize;
	    if (curSize == chainSize)
		addChainToTree(wt, chain, lm, stack);
	    }
	else
	    {
	    /* Reuse doubly-linked-list node, but give it a new value, as we move
	     * it from head to tail of list. */
	    node = dlPopHead(chain);
	    freeMem(node->val);
	    node->val = cloneString(word);
	    dlAddTail(chain, node);
	    addChainToTree(wt, chain, lm, stack);
	    }
	++wordCount;
	}
    }

/* Handle last few words in file, where can't make a chain of full size.  Need
 * a special case for file that has fewer than chain size words too. */
if (curSize < chainSize)
    addChainToTree(wt, chain, lm, stack);
while ((node = dlPopHead(chain)) != NULL)
    {
    addChainToTree(wt, chain, lm, stack);
    freeMem(node->val);
    freeMem(node);
    }
dlListFree(&chain);
lineFileClose(&lf);
return wt;
}
Exemplo n.º 18
0
void chainNet(char *chainFile, char *tSizes, char *qSizes, 
	char *tNet, char *qNet)
/* chainNet - Make alignment nets out of chains. */
{
struct lineFile *lf = lineFileOpen(chainFile, TRUE);
struct hash *qHash, *tHash;
struct chrom *qChromList, *tChromList, *tChrom, *qChrom;
struct chain *chain;
double lastScore = -1;
struct lm *lm = lmInit(0);
struct rbTreeNode **rbStack;
FILE *tNetFile = mustOpen(tNet, "w");
FILE *qNetFile = mustOpen(qNet, "w");


lmAllocArray(lm, rbStack, 256);
makeChroms(qSizes, lm, rbStack, &qHash, &qChromList);
makeChroms(tSizes, lm, rbStack, &tHash, &tChromList);
verbose(1, "Got %d chroms in %s, %d in %s\n", slCount(tChromList), tSizes,
       slCount(qChromList), qSizes);
lineFileSetMetaDataOutput(lf, tNetFile);
lineFileSetMetaDataOutput(lf, qNetFile);

/* Loop through chain file building up net. */
while ((chain = chainRead(lf)) != NULL)
    {
    /* Make sure that input is really sorted. */
    if (lastScore >= 0 && chain->score > lastScore)
        errAbort("%s must be sorted in order of score", chainFile);
    lastScore = chain->score;

    if (chain->score < minScore) 
	{
    	break;
	}
    verbose(2, "chain %f (%d els) %s %d-%d %c %s %d-%d\n", 
	    chain->score, slCount(chain->blockList), 
	    chain->tName, chain->tStart, chain->tEnd, 
	    chain->qStrand, chain->qName, chain->qStart, chain->qEnd);
    qChrom = hashMustFindVal(qHash, chain->qName);
    if (qChrom->size != chain->qSize)
        errAbort("%s is %d in %s but %d in %s", chain->qName, 
		chain->qSize, chainFile,
		qChrom->size, qSizes);
    tChrom = hashMustFindVal(tHash, chain->tName);
    if (tChrom->size != chain->tSize)
        errAbort("%s is %d in %s but %d in %s", chain->tName, 
		chain->tSize, chainFile,
		tChrom->size, tSizes);
    if (!inclQuery(chain))
        verbose(2, "skipping chain on query %s\n", chain->qName);
    else
        {
        addChain(qChrom, tChrom, chain);
        verbose(2, "%s has %d inserts, %s has %d\n", tChrom->name, 
                tChrom->spaces->n, qChrom->name, qChrom->spaces->n);
        }
    }
/* Build up other side of fills.  It's just for historical 
 * reasons this is not done during the main build up.   
 * It's a little less efficient this way, but to change it
 * some hard reverse strand issues would have to be juggled. */
verbose(1, "Finishing nets\n");
finishNet(qChromList, TRUE);
finishNet(tChromList, FALSE);

/* Write out basic net files. */
verbose(1, "writing %s\n", tNet);
outputNetSide(tChromList, tNetFile, FALSE);
verbose(1, "writing %s\n", qNet);
outputNetSide(qChromList, qNetFile, TRUE);

/* prevent SIGPIPE in preceding process if input is a pipe, consume remainder
 * of input file since we stop before EOF. */
if (isPipe(lf->fd))
    {
    char *line;
    while(lineFileNext(lf, &line, NULL))
        continue;
    }
lineFileClose(&lf);

if (verboseLevel() > 1)
    printMem(stderr);
}
struct bed *bedFromRow(
	char *chrom, 		  /* Chromosome bed is on. */
	char **row,  		  /* Row with other data for bed. */
	int fieldCount,		  /* Number of fields in final bed. */
	boolean isPsl, 		  /* True if in PSL format. */
	boolean isGenePred,	  /* True if in GenePred format. */
	boolean isBedWithBlocks,  /* True if BED with block list. */
	boolean *pslKnowIfProtein,/* Have we figured out if psl is protein? */
	boolean *pslIsProtein,    /* True if we know psl is protien. */
	struct lm *lm)		  /* Local memory pool */
/* Create bed from a database row when we already understand
 * the format pretty well.  The bed is allocated inside of
 * the local memory pool lm.  Generally use this in conjunction
 * with the results of a SQL query constructed with the aid
 * of the bedSqlFieldsExceptForChrom function. */
{
char *strand, tStrand, qStrand;
struct bed *bed;
int i, blockCount;

lmAllocVar(lm, bed);
bed->chrom = chrom;
bed->chromStart = sqlUnsigned(row[0]);
bed->chromEnd = sqlUnsigned(row[1]);

if (fieldCount < 4)
    return bed;
bed->name = lmCloneString(lm, row[2]);
if (fieldCount < 5)
    return bed;
bed->score = atoi(row[3]);
if (fieldCount < 6)
    return bed;
strand = row[4];
qStrand = strand[0];
tStrand = strand[1];
if (tStrand == 0)
    bed->strand[0] = qStrand;
else
    {
    /* psl: use XOR of qStrand,tStrand if both are given. */
    if (tStrand == qStrand)
	bed->strand[0] = '+';
    else
	bed->strand[0] = '-';
    }
if (fieldCount < 8)
    return bed;
bed->thickStart = sqlUnsigned(row[5]);
bed->thickEnd   = sqlUnsigned(row[6]);
if (fieldCount < 12)
    return bed;
bed->blockCount = blockCount = sqlUnsigned(row[7]);
lmAllocArray(lm, bed->blockSizes, blockCount);
sqlUnsignedArray(row[8], bed->blockSizes, blockCount);
lmAllocArray(lm, bed->chromStarts, blockCount);
sqlUnsignedArray(row[9], bed->chromStarts, blockCount);
if (isGenePred)
    {
    /* Translate end coordinates to sizes. */
    for (i=0; i<bed->blockCount; ++i)
	bed->blockSizes[i] -= bed->chromStarts[i];
    }
else if (isPsl)
    {
    if (!*pslKnowIfProtein)
	{
	/* Figure out if is protein using a rather elaborate but
	 * working test I think Angie or Brian must have figured out. */
	if (tStrand == '-')
	    {
	    int tSize = sqlUnsigned(row[10]);
	    *pslIsProtein = 
		   (bed->chromStart == 
		    tSize - (3*bed->blockSizes[bed->blockCount - 1]  + 
		    bed->chromStarts[bed->blockCount - 1]));
	    }
	else
	    {
	    *pslIsProtein = (bed->chromEnd == 
		    3*bed->blockSizes[bed->blockCount - 1]  + 
		    bed->chromStarts[bed->blockCount - 1]);
	    }
	*pslKnowIfProtein = TRUE;
	}
    if (*pslIsProtein)
	{
	/* if protein then blockSizes are in protein space */
	for (i=0; i<blockCount; ++i)
	    bed->blockSizes[i] *= 3;
	}
    if (tStrand == '-')
	{
	/* psl: if target strand is '-', flip the coords.
	 * (this is the target part of pslRcBoth from src/lib/psl.c) */
	int tSize = sqlUnsigned(row[10]);
	for (i=0; i<blockCount; ++i)
	    {
	    bed->chromStarts[i] = tSize - 
		    (bed->chromStarts[i] + bed->blockSizes[i]);
	    }
	reverseInts(bed->chromStarts, bed->blockCount);
	reverseInts(bed->blockSizes, bed->blockCount);
	}
    }
if (!isBedWithBlocks)
    {
    /* non-bed: translate absolute starts to relative starts */
    for (i=0;  i < bed->blockCount;  i++)
	bed->chromStarts[i] -= bed->chromStart;
    }
return bed;
}
Exemplo n.º 20
0
void refSeparateButJoined(struct txGraph *graph, FILE *f)
/* Flag graphs that have two non-overlapping refSeqs. */
{
    int sourceIx;
    boolean foundIt = FALSE;
    struct lm *lm = lmInit(0);
    struct rbTreeNode **stack;
    lmAllocArray(lm, stack, 128);

    /* Loop through sources looking for reference type. */
    for (sourceIx=0; sourceIx<graph->sourceCount; ++sourceIx)
    {
        struct txSource *source = &graph->sources[sourceIx];
        if (sameString(source->type, refType))
        {
            /* Create a rangeTree including all exons of source. */
            struct rbTree *tree = rangeTreeNewDetailed(lm, stack);
            struct txEdge *edge;
            for (edge = graph->edgeList; edge != NULL; edge = edge->next)
            {
                if (edge->type == ggExon && evOfSourceOnList(edge->evList, sourceIx))
                    rangeTreeAdd(tree, graph->vertices[edge->startIx].position,
                                 graph->vertices[edge->endIx].position);
            }

            /* Go through remaining reference sources looking for no overlap. */
            int i;
            for (i=0; i<graph->sourceCount; ++i)
            {
                if (i == sourceIx)
                    continue;
                struct txSource *s = &graph->sources[i];
                if (sameString(s->type, refType))
                {
                    boolean gotOverlap = FALSE;
                    for (edge = graph->edgeList; edge != NULL; edge = edge->next)
                    {
                        if (edge->type == ggExon && evOfSourceOnList(edge->evList, i))
                        {
                            if (rangeTreeOverlaps(tree,
                                                  graph->vertices[edge->startIx].position,
                                                  graph->vertices[edge->endIx].position))
                            {
                                gotOverlap = TRUE;
                                break;
                            }
                        }
                    }
                    if (!gotOverlap)
                    {
                        foundIt = TRUE;
                        break;
                    }
                }
            }
            freez(&tree);
        }
        if (foundIt)
            break;
    }
    if (foundIt)
    {
        fprintf(f, "%s\t%d\t%d\t%s\t0\t%s\n", graph->tName,
                graph->tStart, graph->tEnd, "refJoined", graph->strand);
    }
    lmCleanup(&lm);
}