Exemple #1
0
struct altGraphX *txGraphToAltGraphX(struct txGraph *tx)
/* Copy transcription graph to altSpliceX format. */
{
/* Allocate struct and deal with easy fields. */
struct altGraphX *ag;
AllocVar(ag);
ag->tName = cloneString(tx->tName);
ag->tStart = tx->tStart;
ag->tEnd = tx->tEnd;
ag->name = cloneString(tx->name);
ag->id = 0;
ag->strand[0] = tx->strand[0];

/* Deal with vertices. */
int vertexCount = ag->vertexCount = tx->vertexCount;
AllocArray(ag->vTypes, vertexCount);
AllocArray(ag->vPositions, vertexCount);
int i;
for (i=0; i<vertexCount; ++i)
    {
    struct txVertex *v = &tx->vertices[i];
    ag->vTypes[i] = v->type;
    ag->vPositions[i] = v->position;
    }

/* Deal with edges. */
int edgeCount = ag->edgeCount = tx->edgeCount;
AllocArray(ag->edgeStarts, edgeCount);
AllocArray(ag->edgeEnds, edgeCount);
AllocArray(ag->edgeTypes, edgeCount);
struct txEdge *edge;
for (edge = tx->edgeList, i=0; edge != NULL; edge = edge->next, ++i)
    {
    assert(i < edgeCount);
    ag->edgeStarts[i] = edge->startIx;
    ag->edgeEnds[i] = edge->endIx;
    ag->edgeTypes[i] = edge->type;
    }

/* Deal with evidence inside of edges. */
for (edge = tx->edgeList; edge != NULL; edge = edge->next)
    {
    struct evidence *ev;
    AllocVar(ev);
    int *mrnaIds = AllocArray(ev->mrnaIds, edge->evCount);
    int i;
    struct txEvidence *txEv;
    for (txEv = edge->evList, i=0; txEv != NULL; txEv = txEv->next, ++i)
        {
	assert(i < edge->evCount);
	struct txSource *source = &tx->sources[txEv->sourceId];
	char *sourceType = source->type;
	if (sameString(sourceType, "refSeq") || sameString(sourceType, "mrna") || sameString(sourceType, "est"))
	    {
	    mrnaIds[ev->evCount] = txEv->sourceId;
	    ev->evCount += 1;
	    }
        }
    slAddHead(&ag->evidence, ev);
    }
slReverse(&ag->evidence);

/* Convert sources into mrnaRefs. */
int sourceCount = ag->mrnaRefCount = tx->sourceCount;
AllocArray(ag->mrnaRefs, sourceCount);
int sourceIx;
for (sourceIx=0; sourceIx<sourceCount; ++sourceIx)
    {
    struct txSource *source = &tx->sources[sourceIx];
    ag->mrnaRefs[sourceIx] = cloneString(source->accession);
    }

/* Deal with tissues and libs by just making arrays of all zero. */
AllocArray(ag->mrnaTissues, tx->sourceCount);
AllocArray(ag->mrnaLibs, tx->sourceCount);
return ag;
}
Exemple #2
0
void writeMergers(struct cdnaAliList *calList, char *cdnaName, char *bacNames[])
/* Write out any mergers indicated by this cdna. This destroys calList. */
{
struct cdnaAliList *startBac, *endBac, *cal, *prevCal, *nextCal;
int bacCount;
int bacIx;
    
    {
    if (sameString(cdnaName, "R08304_AND_R08305"))
        {
        uglyf("Got you %s\n", cdnaName);
        }
    }

slSort(&calList, cmpCal);
for (startBac = calList; startBac != NULL; startBac = endBac)
    {
    /* Scan until find a cal that isn't pointing into the same BAC. */
    bacCount = 1;
    bacIx = startBac->bacIx;
    prevCal = startBac;
    for (cal =  startBac->next; cal != NULL; cal = cal->next)
        {
        if (cal->bacIx != bacIx)
            {
            prevCal->next = NULL;
            break;
            }
        ++bacCount;
        prevCal = cal;
        }
    endBac = cal;
    if (bacCount > 1)
        {
        while (startBac != NULL)
            {
            struct cdnaAliList *clumpList = NULL, *leftoverList = NULL;
            for (cal = startBac; cal != NULL; cal = nextCal)
                {
                nextCal = cal->next;
                if (noMajorOverlap(cal, clumpList))
                    {
                    slAddHead(&clumpList, cal);
                    }
                else
                    {
                    slAddHead(&leftoverList, cal);
                    }
                }
            slReverse(&clumpList);
            slReverse(&leftoverList);
            if (slCount(clumpList) > 1)
                {
                char lastStrand = 0;
                boolean switchedStrand = FALSE;
                if (!allSameContig(clumpList))
                    {
                    fprintf(mergerOut, "%s glues %s contigs", cdnaName, bacNames[bacIx]);
                    lastStrand = clumpList->strand;
                    for (cal = clumpList; cal != NULL; cal = cal->next)
                        {
                        if (cal->strand != lastStrand)
                            switchedStrand = TRUE;
                        fprintf(mergerOut, " %d %c %c' (%d-%d) %3.1f%%", cal->seqIx, cal->strand, 
                            cal->dir,
                            cal->start, cal->end, 100.0*cal->cookedScore);
                        }
                    fprintf(mergerOut, "\n");
                    }
                }
            freeCalList(&clumpList);
            startBac = leftoverList;
            }        
        }
    else
        {
        freeCalList(&startBac);
        }
    }
}
void loadHumMusL(struct track *tg)
	/* Load humMusL track with 2 zoom levels and one normal level. 
	* Also used for loading the musHumL track (called Human Cons) 
	* on the mouse browser. It decides which of 4 tables to
	* load based on how large of a window the user is looking at*/
{
	struct sqlConnection *conn = hAllocConn(database);
	struct sqlResult *sr;
	char **row;
	int rowOffset;
	struct sample *sample;
	struct linkedFeatures *lfList = NULL, *lf;
	char *hasDense = NULL;
	char *where = NULL;
	char tableName[256];
	int z;
	float pixPerBase = 0;

	if(tl.picWidth == 0)
		errAbort("hgTracks.c::loadHumMusL() - can't have pixel width of 0");
	pixPerBase = (winEnd - winStart)/ tl.picWidth;


	/* Determine zoom level. */
	if (!strstr(tg->table,"HMRConservation"))
		z = humMusZoomLevel();
	else z=0;


	if(z == 1 )
		safef(tableName, sizeof(tableName), "%s_%s", "zoom1",
		tg->table);
	else if( z == 2)
		safef(tableName, sizeof(tableName), "%s_%s", "zoom50",
		tg->table);
	else if(z == 3)
		safef(tableName, sizeof(tableName), "%s_%s",
		"zoom2500", tg->table);
	else
		safef(tableName, sizeof(tableName), "%s", tg->table);

	//printf("(%s)", tableName );

	sr = hRangeQuery(conn, tableName, chromName, winStart, winEnd,
	where, &rowOffset);
	while ((row = sqlNextRow(sr)) != NULL)
	{
		sample = sampleLoad(row+rowOffset);
		lf = lfFromSample(sample);
		slAddHead(&lfList, lf);
		sampleFree(&sample);
	}
	if(where != NULL)
		freez(&where);
	sqlFreeResult(&sr);
	hFreeConn(&conn);
	slReverse(&lfList);

	/* sort to bring items with common names to the same line
	but only for tracks with a summary table (with name=shortLabel)
	in
	dense mode*/
	if( hasDense != NULL )
	{
		sortGroupList = tg; /* used to put track name at
							* top of sorted list. */
		slSort(&lfList, lfNamePositionCmp);
		sortGroupList = NULL;
	}
	tg->items = lfList;
}
Exemple #4
0
static void parseFixedStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm,
	int itemsPerSlot, char *chrom, bits32 chromSize, bits32 span, bits32 sectionStart, 
	bits32 step, struct bwgSection **pSectionList)
/* Read the single column data in section until get to end. */
{
struct lm *lmLocal = lmInit(0);

/* Stream through section until get to end of file or next section,
 * adding values from single column to list. */
char *words[1];
char *line;
struct bwgFixedStepItem *item, *itemList = NULL;
int originalSectionSize = 0;
bits32 sectionEnd = sectionStart;
while (lineFileNextReal(lf, &line))
    {
    if (steppedSectionEnd(line, 1))
	{
        lineFileReuse(lf);
	break;
	}
    chopLine(line, words);
    lmAllocVar(lmLocal, item);
    item->val = lineFileNeedDouble(lf, words, 0);
    if (sectionEnd + span > chromSize)
	{
	warn("line %d of %s: chromosome %s has %u bases, but item ends at %u",
	    lf->lineIx, lf->fileName, chrom, chromSize, sectionEnd + span);
	if (!clipDontDie)
	    noWarnAbort();
	}
    else
	{
	slAddHead(&itemList, item);
	++originalSectionSize;
	}
    sectionEnd += step;
    }
slReverse(&itemList);

/* Break up into sections of no more than items-per-slot size, and convert to packed format. */
int sizeLeft = originalSectionSize;
for (item = itemList; item != NULL; )
    {
    /* Figure out size of this section  */
    int sectionSize = sizeLeft;
    if (sectionSize > itemsPerSlot)
        sectionSize = itemsPerSlot;
    sizeLeft -= sectionSize;


    /* Allocate and fill in section. */
    struct bwgSection *section;
    lmAllocVar(lm, section);
    section->chrom = chrom;
    section->start = sectionStart;
    sectionStart += sectionSize * step;
    section->end = sectionStart - step + span;
    section->type = bwgTypeFixedStep;
    section->itemStep = step;
    section->itemSpan = span;
    section->itemCount = sectionSize;

    /* Allocate array for data, and copy from list to array representation */
    struct bwgFixedStepPacked *packed;		/* An array */
    section->items.fixedStepPacked = lmAllocArray(lm, packed, sectionSize);
    int i;
    for (i=0; i<sectionSize; ++i)
        {
	packed->val = item->val;
	item = item->next;
	++packed;
	}

    /* Add section to list. */
    slAddHead(pSectionList, section);
    }
lmCleanup(&lmLocal);
}
Exemple #5
0
static void saveAxtBundle(char *chromName, int chromSize, int chromOffset,
                          struct ffAli *ali,
                          struct dnaSeq *tSeq, struct hash *t3Hash, struct dnaSeq *qSeq,
                          boolean qIsRc, boolean tIsRc,
                          enum ffStringency stringency, int minMatch, struct gfOutput *out)
/* Save alignment to axtBundle. */
{
    struct axtData *ad = out->data;
    struct ffAli *sAli, *eAli, *ff, *rt, *eFf = NULL;
    struct axt *axt;
    struct dyString *q = newDyString(1024), *t = newDyString(1024);
    struct axtBundle *gab;
    struct trans3 *t3List = NULL;

    if (t3Hash != NULL)
        t3List = hashMustFindVal(t3Hash, tSeq->name);
    AllocVar(gab);
    gab->tSize = chromSize;
    gab->qSize = qSeq->size;
    for (sAli = ali; sAli != NULL; sAli = eAli)
    {
        eAli = ffNextBreak(sAli, 8, tSeq, t3List);
        dyStringClear(q);
        dyStringClear(t);
        for (ff = sAli; ff != eAli; ff = ff->right)
        {
            dyStringAppendN(q, ff->nStart, ff->nEnd - ff->nStart);
            dyStringAppendN(t, ff->hStart, ff->hEnd - ff->hStart);
            rt = ff->right;
            if (rt != eAli)
            {
                int nGap = rt->nStart - ff->nEnd;
                int nhStart = trans3GenoPos(rt->hStart, tSeq, t3List, FALSE)
                              + chromOffset;
                int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE)
                            + chromOffset;
                int hGap = nhStart - ohEnd;
                int gap = max(nGap, hGap);
                if (nGap < 0 || hGap < 0)
                {
                    errAbort("Negative gap size in %s vs %s", tSeq->name, qSeq->name);
                }
                if (nGap == gap)
                {
                    dyStringAppendN(q, ff->nEnd, gap);
                    dyStringAppendMultiC(t, '-', gap);
                }
                else
                {
                    dyStringAppendN(t, ff->hEnd, gap);
                    dyStringAppendMultiC(q, '-', gap);
                }
            }
            eFf = ff;	/* Keep track of last block in bunch */
        }
        assert(t->stringSize == q->stringSize);
        AllocVar(axt);
        axt->qName = cloneString(qSeq->name);
        axt->qStart = sAli->nStart - qSeq->dna;
        axt->qEnd = eFf->nEnd - qSeq->dna;
        axt->qStrand = (qIsRc ? '-' : '+');
        axt->tName = cloneString(chromName);
        axt->tStart = trans3GenoPos(sAli->hStart, tSeq, t3List, FALSE) + chromOffset;
        axt->tEnd = trans3GenoPos(eFf->hEnd, tSeq, t3List, TRUE) + chromOffset;
        axt->tStrand = (tIsRc ? '-' : '+');
        axt->symCount = t->stringSize;
        axt->qSym = cloneString(q->string);
        axt->tSym = cloneString(t->string);
        axt->frame = trans3Frame(sAli->hStart, t3List);
        if (out->qIsProt)
            axt->score = axtScoreProteinDefault(axt);
        else
            axt->score = axtScoreDnaDefault(axt);
        slAddHead(&gab->axtList, axt);
    }
    slReverse(&gab->axtList);
    dyStringFree(&q);
    dyStringFree(&t);
    slAddHead(&ad->bundleList, gab);
}
Exemple #6
0
static struct bigBedInterval *bigBedIntervalsMatchingName(struct bbiFile *bbi, 
    struct fileOffsetSize *fosList, BbFirstWordMatch matcher, int fieldIx, 
    void *target, struct lm *lm)
/* Return list of intervals inside of sectors of bbiFile defined by fosList where the name 
 * matches target somehow. */
{
struct bigBedInterval *interval, *intervalList = NULL;
struct fileOffsetSize *fos;
boolean isSwapped = bbi->isSwapped;
for (fos = fosList; fos != NULL; fos = fos->next)
    {
    /* Read in raw data */
    udcSeek(bbi->udc, fos->offset);
    char *rawData = needLargeMem(fos->size);
    udcRead(bbi->udc, rawData, fos->size);

    /* Optionally uncompress data, and set data pointer to uncompressed version. */
    char *uncompressedData = NULL;
    char *data = NULL;
    int dataSize = 0;
    if (bbi->uncompressBufSize > 0)
	{
	data = uncompressedData = needLargeMem(bbi->uncompressBufSize);
	dataSize = zUncompress(rawData, fos->size, uncompressedData, bbi->uncompressBufSize);
	}
    else
	{
        data = rawData;
	dataSize = fos->size;
	}

    /* Set up for "memRead" routines to more or less treat memory block like file */
    char *blockPt = data, *blockEnd = data + dataSize;
    struct dyString *dy = dyStringNew(32); // Keep bits outside of chrom/start/end here


    /* Read next record into local variables. */
    while (blockPt < blockEnd)
	{
	bits32 chromIx = memReadBits32(&blockPt, isSwapped);
	bits32 s = memReadBits32(&blockPt, isSwapped);
	bits32 e = memReadBits32(&blockPt, isSwapped);
	int c;
	dyStringClear(dy);
	// TODO - can simplify this probably just to for (;;) {if ((c = *blockPt++) == 0) ...
	while ((c = *blockPt++) >= 0)
	    {
	    if (c == 0)
		break;
	    dyStringAppendC(dy, c);
	    }
	if ((*matcher)(dy->string, fieldIx, target))
	    {
	    lmAllocVar(lm, interval);
	    interval->start = s;
	    interval->end = e;
	    interval->rest = cloneString(dy->string);
	    interval->chromId = chromIx;
	    slAddHead(&intervalList, interval);
	    }
	}

    /* Clean up temporary buffers. */
    dyStringFree(&dy);
    freez(&uncompressedData);
    freez(&rawData);
    }
slReverse(&intervalList);
return intervalList;
}
Exemple #7
0
static struct rTree *rTreeFromChromRangeArray( struct lm *lm, int blockSize, int itemsPerSlot,
	void *itemArray, int itemSize, bits64 itemCount,  void *context,
	struct cirTreeRange (*fetchKey)(const void *va, void *context),
	bits64 (*fetchOffset)(const void *va, void *context), bits64 endFileOffset,
	int *retLevelCount)
{
char *items = itemArray;
struct rTree *el, *list=NULL, *tree = NULL;

/* Make first level above leaf. */
bits64 i;
bits64 nextOffset = (*fetchOffset)(items, context);
for (i=0; i<itemCount; i += itemsPerSlot)
    {
    /* Figure out if we are on final iteration through loop, and the
     * count of items in this iteration. */
    boolean finalIteration = FALSE;
    int oneSize = itemCount-i;
    if (oneSize > itemsPerSlot)
        oneSize = itemsPerSlot;
    else
        finalIteration = TRUE;

    /* Allocate element and put on list. */
    lmAllocVar(lm, el);
    slAddHead(&list, el);

    /* Fill out most of element from first item in element. */
    char *startItem = items + itemSize * i;
    struct cirTreeRange key = (*fetchKey)(startItem, context);
    el->startChromIx = el->endChromIx = key.chromIx;
    el->startBase = key.start;
    el->endBase = key.end;
    el->startFileOffset = nextOffset;

    /* Figure out end of element from offset of next element (or file size
     * for final element.) */
    if (finalIteration)
	nextOffset = endFileOffset;
    else
        {
	char *endItem = startItem + itemSize*oneSize;
        nextOffset = (*fetchOffset)(endItem, context);
	}
    el->endFileOffset = nextOffset;

    /* Expand area spanned to include all items in block. */
    int j;
    for (j=1; j<oneSize; ++j)
        {
	void *item = items + itemSize*(i+j);
	key = (*fetchKey)(item, context);
	if (key.chromIx < el->startChromIx)
	    {
	    el->startChromIx = key.chromIx;
	    el->startBase = key.start;
	    }
	else if (key.chromIx == el->startChromIx)
	    {
	    if (key.start < el->startBase)
	        el->startBase = key.start;
	    }
	if (key.chromIx > el->endChromIx)
	    {
	    el->endChromIx = key.chromIx;
	    el->endBase = key.end;
	    }
	else if (key.chromIx == el->endChromIx)
	    {
	    if (key.end > el->endBase)
	        el->endBase = key.end;
	    }
	}
    }
slReverse(&list);
verbose(2, "Made %d primary index nodes out of %llu items\n", slCount(list), itemCount);

/* Now iterate through making more and more condensed versions until have just one. */
int levelCount = 1;
tree = list;
while (tree->next != NULL || levelCount < 2)
    {
    list = NULL;
    int slotsUsed = blockSize;
    struct rTree *parent = NULL, *next;
    for (el = tree; el != NULL; el = next)
        {
	next = el->next;
	if (slotsUsed >= blockSize)
	    {
	    slotsUsed = 1;
	    lmAllocVar(lm, parent);
	    parent = lmCloneMem(lm, el, sizeof(*el));
	    parent->children = el;
	    el->parent = parent;
	    el->next = NULL;
	    slAddHead(&list, parent);
	    }
	else
	    {
	    ++slotsUsed;
	    slAddHead(&parent->children, el);
	    el->parent = parent;
	    if (el->startChromIx < parent->startChromIx)
		{
	        parent->startChromIx = el->startChromIx;
		parent->startBase = el->startBase;
		}
	    else if (el->startChromIx == parent->startChromIx)
	        {
		if (el->startBase < parent->startBase)
		    parent->startBase = el->startBase;
		}
	    if (el->endChromIx > parent->endChromIx)
		{
	        parent->endChromIx = el->endChromIx;
		parent->endBase = el->endBase;
		}
	    else if (el->endChromIx == parent->endChromIx)
	        {
		if (el->endBase > parent->endBase)
		    parent->endBase = el->endBase;
		}
	    }
	}

    slReverse(&list);
    for (el = list; el != NULL; el = el->next)
        slReverse(&el->children);
    tree = list;
    levelCount += 1;
    }
*retLevelCount = levelCount;
return tree;
}
void cdwJobCleanFailed(int submitId)
/* Check out the symlink to determine its type. */
{

struct sqlConnection *conn = sqlConnect("cdw");

struct dyString *query = dyStringNew(0);
sqlDyStringPrintf(query, 
 "select id, commandLine, startTime, endTime, returnCode, pid from cdwJob where submitId=%d "
"order by commandLine,CAST(returnCode AS unsigned)", 
 submitId);
 // NOTE we need this CAST on returnCode since it can be -1. we want success 0 first.

// TODO DO we need to add any other conditions such as distinguishing
// between running, queued, and done?

/* Scan through result set finding redundant rows beyond success row. */
struct sqlResult *sr = sqlGetResult(conn, query->string);
char **row;
char *lastCommand = "";
boolean success = FALSE;
struct slInt *list = NULL;
struct slInt *e;
while ((row = sqlNextRow(sr)) != NULL)
    {
    unsigned int id = sqlUnsigned(row[0]);
    char *commandLine = row[1];
    unsigned long startTime = sqlUnsignedLong(row[2]);
    unsigned long endTime = sqlUnsignedLong(row[3]);
    int returnCode = sqlSigned(row[4]);
    unsigned int pid = sqlUnsigned(row[5]);
    verbose(2, "%u\t%s\t%lu\t%lu\t%d\t%u\t%u\n", id, commandLine, startTime, endTime, returnCode, pid, submitId);
    if (sameString(lastCommand, commandLine))
	{
	if (success)  // we already succeeded, the old failure is unwanted baggage.
	    {
	    e = slIntNew(id);  // or add it to a list of rows whose ids should get removed
	    slAddHead(&list, e);
	    }
	}
    else
	{
	if (returnCode == 0)
	    success = TRUE;
	else
	    success = FALSE;
	}
    // note fields pid and submitId are defined as signed integers in cdwJob table, probably should be unsigned.
    lastCommand = cloneString(commandLine);
    }
                                                                              
sqlFreeResult(&sr);

slReverse(&list);
for(e=list;e;e=e->next)
    {
    dyStringClear(query);
    sqlDyStringPrintf(query, "delete from cdwJob where id=%u", (unsigned int) e->val);
    //printf("%s\n", query->string);
    sqlUpdate(conn, query->string);
    }

/* Clean up and go home */
dyStringFree(&query);

sqlDisconnect(&conn);


}
Exemple #9
0
struct bbiChromUsage *bbiChromUsageFromBedFile(struct lineFile *lf, 
	struct hash *chromSizesHash, int *retMinDiff, double *retAveSize, bits64 *retBedCount)
/* Go through bed file and collect chromosomes and statistics. */
{
char *row[3];
struct hash *uniqHash = hashNew(0);
struct bbiChromUsage *usage = NULL, *usageList = NULL;
int lastStart = -1;
bits32 id = 0;
bits64 totalBases = 0, bedCount = 0;
int minDiff = BIGNUM;

lineFileRemoveInitialCustomTrackLines(lf);

for (;;)
    {
    int rowSize = lineFileChopNext(lf, row, ArraySize(row));
    if (rowSize == 0)
        break;
    lineFileExpectWords(lf, 3, rowSize);
    char *chrom = row[0];
    int start = lineFileNeedNum(lf, row, 1);
    int end = lineFileNeedNum(lf, row, 2);
    if (start > end)
        {
	    errAbort("end (%d) before start (%d) line %d of %s",
	    	end, start, lf->lineIx, lf->fileName);
	}
    ++bedCount;
    totalBases += (end - start);
    if (usage == NULL || differentString(usage->name, chrom))
        {
	if (hashLookup(uniqHash, chrom))
	    {
	    errAbort("%s is not sorted at line %d.  Please use \"sort -k1,1 -k2,2n\" or bedSort and try again.",
	    	lf->fileName, lf->lineIx);
	    }
	hashAdd(uniqHash, chrom, NULL);
	struct hashEl *chromHashEl = hashLookup(chromSizesHash, chrom);
	if (chromHashEl == NULL)
	    errAbort("%s is not found in chromosome sizes file", chrom);
	int chromSize = ptToInt(chromHashEl->val);
	AllocVar(usage);
	usage->name = cloneString(chrom);
	usage->id = id++;
	usage->size = chromSize;
	slAddHead(&usageList, usage);
	lastStart = -1;
	}
    if (end > usage->size)
        errAbort("End coordinate %d bigger than %s size of %d line %d of %s", end, usage->name, usage->size, lf->lineIx, lf->fileName);
    usage->itemCount += 1;
    if (lastStart >= 0)
        {
	int diff = start - lastStart;
	if (diff < minDiff)
	    {
	    if (diff < 0)
		errAbort("%s is not sorted at line %d.  Please use \"sort -k1,1 -k2,2n\" or bedSort and try again.",
		    lf->fileName, lf->lineIx);
	    minDiff = diff;
	    }
	}
    lastStart = start;
    }
slReverse(&usageList);
*retMinDiff = minDiff;
*retAveSize = (double)totalBases/bedCount;
*retBedCount = bedCount;
freeHash(&uniqHash);
return usageList;
}
Exemple #10
0
struct gapInfo *findLargeGaps(struct xaAli *xa, struct gapInfo *oldList)
/* Find large gaps in alignment and classify them. */
{
struct gdfGene *gdfList;
struct gapInfo *gapList = NULL, *gap;
int ceIx=0, cbIx=0, symIx=0;
int ceStart=0, cbStart=0, symStart=0;
int runSize = 0;
char sym, lastSym = 0;
int symCount = xa->symCount;

/* Fetch C. elegans region. */
gdfList = wormGdfGenesInRange(xa->target, xa->tStart, xa->tEnd, &wormSangerGdfCache);

/* Run a little state machine that does something at the end of each solid run 
 * of a symbol. */
for (symIx = 0; symIx <= symCount; ++symIx)
    {
    sym = xa->hSym[symIx];
    if (sym != lastSym)
        {
        if (runSize > 32)       /* Introns need to be at least this long. */
            {
            /* We're at end of a solid run. */
            if (lastSym == 'Q' || lastSym == 'T')
                {
                int ceGapStart = xa->tStart + ceStart;
                int ceGapEnd = xa->tStart + ceIx;
                struct gdfGene *gdf;
                char hBefore = xa->hSym[symStart-1];
                char hAfter = sym;
                char strand = '.';

                AllocVar(gap);
                gap->query = cloneString(xa->query);
                gap->qStart = xa->qStart + cbStart;
                gap->qEnd = xa->qStart + cbIx;
                gap->target = cloneString(xa->target);
                gap->tStart = ceGapStart;
                gap->tEnd = ceGapEnd;
                gap->name = cloneString(xa->name);
                gap->size = runSize;
                gap->hSym = lastSym;
                if (uniqueGap(oldList, gap))
                    {
                    slAddHead(&gapList, gap);

                    classifyGap(gdfList, xa->target, ceGapStart, ceGapEnd, lastSym, &gap->type, &gdf);
                    if (gdf != NULL)
                        strand = gdf->strand;
                    gap->hasIntronEnds = isIntron(xa, symStart, symIx, lastSym, strand, &gap->slideCount, &gap->isRc);
                    if (gap->hasIntronEnds)
                        slideGap(gap, xa, lastSym, symStart, symIx);
                    if (isConserved(hBefore) && isConserved(hAfter))
                        gap->hasStrongHomology = TRUE;
                    if (gap->hasStrongHomology)
                        {
                        if (lastSym == 'T')
                            writeGap(gap, xa, symStart+gap->slideCount, symIx+gap->slideCount, strand, out);
                        }
                    }
                }
            }
        runSize = 0;
        ceStart = ceIx;
        cbStart = cbIx;
        symStart = symIx;
        lastSym = sym;
        }
    ++runSize;
    if (xa->qSym[symIx] != '-')
        ++cbIx;
    if (xa->tSym[symIx] != '-')
        ++ceIx;
    }

gdfFreeGeneList(&gdfList);
slReverse(&gapList);
return gapList;
}
Exemple #11
0
int main(int argc, char *argv[])
{
FILE *xaFile;
struct xaAli *xa;
struct gapInfo *gapList = NULL, *gaps;
int count = 0;
long startTime = clock1000();
char *xaName, *newName;
char *first;
boolean cbFirst;

if (argc != 4)
    usage();
first = argv[1];
xaName = argv[2];
newName = argv[3];
if (sameWord("elegans", first))
    cbFirst = FALSE;
else if (sameWord("briggsae", first))
    cbFirst = TRUE;
else
    usage();
dnaUtilOpen();
intronHash = newHash(0);
out = mustOpen(newName, "w");
xaFile = mustOpen(xaName, "r");
while ((xa = xaReadNext(xaFile, FALSE)) != NULL)
    {
    char *s;
    if (!cbFirst)
	{
	char *swaps;
	int swapi;
	char swapc;
	uglyf("Swapping....\n");
	swaps = xa->query;
	xa->query = xa->target;
	xa->target = swaps;
	swapi = xa->qStart;
	xa->qStart = xa->tStart;
	xa->tStart = swapi;
	swapi = xa->qEnd;
	xa->qEnd = xa->tEnd;
	xa->tEnd = swapi;
	swapc = xa->qStrand;
	xa->qStrand = xa->tStrand;
	xa->tStrand = swapc;
	swaps = xa->qSym;
	xa->qSym = xa->tSym;
	xa->tSym = swaps;
	swapSym(xa->hSym, xa->symCount);
	}
    uglyf("%d  query %s target %s\n", count, xa->query, xa->target);
    s = chromFromPath(xa->target);
    freeMem(xa->target);
    xa->target = s;
    if (++count % 500 == 0)
        printf("Processing %d\n", count);
    gaps = findLargeGaps(xa, gapList);
    gapList = slCat(gaps, gapList);
    xaAliFree(xa);
    }
slReverse(&gapList);
report(out, "Processing took %f seconds\n", (clock1000()-startTime)*0.001);

reportGaps(gapList, out);
printAllHistograms(out);
calcCeHomoCount();
printHomologousEndStats(out);
printSameIntronStats(out);
return 0;
}
void txGeneCanonical(char *codingCluster, char *infoFile, 
	char *noncodingGraph, char *genesBed, char *nearCoding, 
	char *outCanonical, char *outIsoforms, char *outClusters)
/* txGeneCanonical - Pick a canonical version of each gene - that is the form
 * to use when just interested in a single splicing varient. Produces final
 * transcript clusters as well. */
{
/* Read in input into lists in memory. */
struct txCluster *coding, *codingList = txClusterLoadAll(codingCluster);
struct txGraph *graph, *graphList = txGraphLoadAll(noncodingGraph);
struct bed *bed, *nextBed, *bedList = bedLoadNAll(genesBed, 12);
struct txInfo *info, *infoList = txInfoLoadAll(infoFile);
struct bed *nearList = bedLoadNAll(nearCoding, 12);

/* Make hash of all beds. */
struct hash *bedHash = hashNew(18);
for (bed = bedList; bed != NULL; bed = bed->next)
    hashAdd(bedHash, bed->name, bed);

/* Make has of all info. */
struct hash *infoHash = hashNew(18);
for (info = infoList; info != NULL; info = info->next)
    hashAdd(infoHash, info->name, info);

/* Make a binKeeper structure that we'll populate with coding genes. */
struct hash *sizeHash = minChromSizeFromBeds(bedList);
struct hash *keeperHash = minChromSizeKeeperHash(sizeHash);

/* Make list of coding genes and toss them into binKeeper.
 * This will eat up bed list, but bedHash is ok. */
struct gene *gene, *geneList = NULL;
for (coding = codingList; coding != NULL; coding = coding->next)
    {
    gene = geneFromCluster(coding, bedHash, infoHash);
    slAddHead(&geneList, gene);
    struct binKeeper *bk = hashMustFindVal(keeperHash, gene->chrom);
    binKeeperAdd(bk, gene->start, gene->end, gene);
    }

/* Go through near-coding genes and add them to the coding gene
 * they most overlap. */
for (bed = nearList; bed != NULL; bed = nextBed)
    {
    nextBed = bed->next;
    gene = mostOverlappingGene(keeperHash, bed);
    if (gene == NULL)
        errAbort("%s is near coding, but doesn't overlap any coding!?", bed->name);
    geneAddBed(gene, bed);
    }

/* Add non-coding genes. */
for (graph = graphList; graph != NULL; graph = graph->next)
    {
    gene = geneFromGraph(graph, bedHash);
    slAddHead(&geneList, gene);
    }

/* Sort so it all looks nicer. */
slSort(&geneList, geneCmp);

/* Open up output files. */
FILE *fCan = mustOpen(outCanonical, "w");
FILE *fIso = mustOpen(outIsoforms, "w");
FILE *fClus = mustOpen(outClusters, "w");

/* Loop through, making up gene name, and writing output. */
int geneId = 0;
for (gene = geneList; gene != NULL; gene = gene->next)
    {
    /* Make up name. */
    char name[16];
    safef(name, sizeof(name), "g%05d", ++geneId);

    /* Reverse transcript list just to make it look better. */
    slReverse(&gene->txList);

    /* Write out canonical file output */
    bed = hashMustFindVal(bedHash, gene->niceTx->name);
    fprintf(fCan, "%s\t%d\t%d\t%d\t%s\t%s\n",
    	bed->chrom, bed->chromStart, bed->chromEnd, geneId,
	gene->niceTx->name, gene->niceTx->name);

    /* Write out isoforms output. */
    for (bed = gene->txList; bed != NULL; bed = bed->next)
        fprintf(fIso, "%d\t%s\n", geneId, bed->name);

    /* Write out cluster output, starting with bed 6 standard fields. */
    fprintf(fClus, "%s\t%d\t%d\t%s\t%d\t%c\t",
    	gene->chrom, gene->start, gene->end, name, 0, gene->strand);

    /* Write out thick-start/thick end. */
    if (gene->isCoding)
        {
	int thickStart = gene->end, thickEnd  = gene->start;
	for (bed = gene->txList; bed != NULL; bed = bed->next)
	    {
	    if (bed->thickStart < bed->thickEnd)
	        {
		thickStart = min(thickStart, bed->thickStart);
		thickEnd = max(thickEnd, bed->thickEnd);
		}
	    }
	fprintf(fClus, "%d\t%d\t", thickStart, thickEnd);
	}
    else
        {
	fprintf(fClus, "%d\t%d\t", gene->start, gene->start);
	}

    /* We got no rgb value, just write out zero. */
    fprintf(fClus, "0\t");

    /* Get exons from exonTree. */
    struct range *exon, *exonList = rangeTreeList(gene->exonTree);
    fprintf(fClus, "%d\t", slCount(exonList));
    for (exon = exonList; exon != NULL; exon = exon->next)
	fprintf(fClus, "%d,", exon->start - gene->start);
    fprintf(fClus, "\t");
    for (exon = exonList; exon != NULL; exon = exon->next)
	fprintf(fClus, "%d,", exon->end - exon->start);
    fprintf(fClus, "\t");

    /* Write out associated transcripts. */
    fprintf(fClus, "%d\t", slCount(gene->txList));
    for (bed = gene->txList; bed != NULL; bed = bed->next)
        fprintf(fClus, "%s,", bed->name);
    fprintf(fClus, "\t");

    /* Write out nice value */
    fprintf(fClus, "%s\t", gene->niceTx->name);

    /* Write out coding/noncoding value. */
    fprintf(fClus, "%d\n", gene->isCoding);
    }

/* Close up files. */
carefulClose(&fCan);
carefulClose(&fIso);
carefulClose(&fClus);
}
void loadSimpleBed(struct track *tg)
/* Load the items in one track - just move beds in
 * window... */
{
struct bed *(*loader)(char **row);
struct bed *bed, *list = NULL;
struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb);
char **row;
int rowOffset;
char *words[3];
int wordCt;
char query[128];
char *setting = NULL;
bool doScoreCtFilter = FALSE;
int scoreFilterCt = 0;
char *topTable = NULL;

if (tg->bedSize <= 3)
    loader = bedLoad3;
else if (tg->bedSize == 4)
    loader = bedLoad;
else if (tg->bedSize == 5)
    loader = bedLoad5;
else
    loader = bedLoad6;

// pairedTagAlign loader is required for base coloring using sequence from seq1 & seq2
// after removing optional bin column, this loader assumes seq1 and seq2 are in
// row[6] and row[7] respectively of the sql result.
if ((setting = trackDbSetting(tg->tdb, BASE_COLOR_USE_SEQUENCE)) 
	&& sameString(setting, "seq1Seq2"))
    loader = bedLoadPairedTagAlign;

/* limit to a specified count of top scoring items.
 * If this is selected, it overrides selecting item by specified score */
if ((setting = trackDbSettingClosestToHome(tg->tdb, "filterTopScorers")) != NULL)
    {
    wordCt = chopLine(cloneString(setting), words);
    if (wordCt >= 3)
        {
        doScoreCtFilter = cartUsualBooleanClosestToHome(cart, tg->tdb, FALSE,
                             "filterTopScorersOn",sameString(words[0], "on"));
        scoreFilterCt = cartUsualIntClosestToHome(cart, tg->tdb, FALSE,
                             "filterTopScorersCt", atoi(words[1]));
        topTable = words[2];
        /* if there are not too many rows in the table then can define */
        /* top table as the track or subtrack table */
        if (sameWord(topTable, "self"))
            topTable = cloneString(tg->table);
        }
    }

/* Get list of items */
if (tg->isBigBed)
    {
    char *scoreFilter = cartOrTdbString(cart, tg->tdb, "scoreFilter", NULL);
    if (scoreFilter != NULL || tg->visibility != tvDense)
	{
	struct lm *lm = lmInit(0);
	struct bigBedInterval *bb, *bbList = bigBedSelectRange(tg, chromName, winStart, winEnd, lm);
	char *bedRow[32];
	char startBuf[16], endBuf[16];
	int minScore = 0;
	if (scoreFilter)
	    minScore = atoi(scoreFilter);

	for (bb = bbList; bb != NULL; bb = bb->next)
	    {
	    bigBedIntervalToRow(bb, chromName, startBuf, endBuf, bedRow, ArraySize(bedRow));
	    bed = loader(bedRow);
	    if (scoreFilter == NULL || bed->score >= minScore)
		slAddHead(&list, bed);
	    }
	lmCleanup(&lm);
	}
    }
else
    {
    struct sqlResult *sr = NULL;
    /* limit to items above a specified score */
    char *scoreFilterClause = getScoreFilterClause(cart, tg->tdb,NULL);
    if (doScoreCtFilter && (topTable != NULL) && hTableExists(database, topTable))
	{
	safef(query, sizeof(query),"select * from %s order by score desc limit %d",
	      topTable, scoreFilterCt);
	sr = sqlGetResult(conn, query);
	rowOffset = hOffsetPastBin(database, hDefaultChrom(database), topTable);
	}
    else if(scoreFilterClause != NULL && tg->bedSize >= 5)
	{
	sr = hRangeQuery(conn, tg->table, chromName, winStart, winEnd, scoreFilterClause, &rowOffset);
	}
    else
	{
	sr = hRangeQuery(conn, tg->table, chromName, winStart, winEnd, NULL, &rowOffset);
	}
    freeMem(scoreFilterClause);
    while ((row = sqlNextRow(sr)) != NULL)
	{
	bed = loader(row+rowOffset);
	slAddHead(&list, bed);
	}
    sqlFreeResult(&sr);
    }

if (doScoreCtFilter)
    {
    /* filter out items not in this window */
    struct bed *newList = bedFilterListInRange(list, NULL, chromName, winStart, winEnd);
    list = newList;
    }
slReverse(&list);
hFreeConn(&conn);
tg->items = list;
}
static void filterBed(struct track *tg, struct linkedFeatures **pLfList)
/* Apply filters if any to mRNA linked features. */
{
struct linkedFeatures *lf, *next, *newList = NULL, *oldList = NULL;
struct mrnaUiData *mud = tg->extraUiData;
struct mrnaFilter *fil;
char *type;
boolean anyFilter = FALSE;
boolean colorIx = 0;
boolean isExclude = FALSE;
boolean andLogic = TRUE;

if (*pLfList == NULL || mud == NULL)
    return;

/* First make a quick pass through to see if we actually have
 * to do the filter. */
for (fil = mud->filterList; fil != NULL; fil = fil->next)
    {
    fil->pattern = cartUsualString(cart, fil->key, "");
    if (fil->pattern[0] != 0)
        anyFilter = TRUE;
    }
if (!anyFilter)
    return;

type = cartUsualString(cart, mud->filterTypeVar, "red");
if (sameString(type, "exclude"))
    isExclude = TRUE;
else if (sameString(type, "include"))
    isExclude = FALSE;
else
    colorIx = getFilterColor(type, MG_BLACK);
type = cartUsualString(cart, mud->logicTypeVar, "and");
andLogic = sameString(type, "and");

/* Make a pass though each filter, and start setting up search for
 * those that have some text. */
for (fil = mud->filterList; fil != NULL; fil = fil->next)
    {
    fil->pattern = cartUsualString(cart, fil->key, "");
    if (fil->pattern[0] != 0)
	{
	fil->hash = newHash(10);
	}
    }

/* Scan tables id/name tables to build up hash of matching id's. */
for (fil = mud->filterList; fil != NULL; fil = fil->next)
    {
    struct hash *hash = fil->hash;
    int wordIx, wordCount;
    char *words[128];

    if (hash != NULL)
	{
	boolean anyWild;
	char *dupPat = cloneString(fil->pattern);
	wordCount = chopLine(dupPat, words);
	for (wordIx=0; wordIx <wordCount; ++wordIx)
	    {
	    char *pattern = cloneString(words[wordIx]);
	    if (lastChar(pattern) != '*')
		{
		int len = strlen(pattern)+1;
		pattern = needMoreMem(pattern, len, len+1);
		pattern[len-1] = '*';
		}
	    anyWild = (strchr(pattern, '*') != NULL || strchr(pattern, '?') != NULL);
	    touppers(pattern);
	    for(lf = *pLfList; lf != NULL; lf=lf->next)
		{
		char copy[SMALLBUF];
		boolean gotMatch;
		safef(copy, sizeof(copy), "%s", lf->name);
		touppers(copy);
		if (anyWild)
		    gotMatch = wildMatch(pattern, copy);
		else
		    gotMatch = sameString(pattern, copy);
		if (gotMatch)
		    {
		    hashAdd(hash, lf->name, NULL);
		    }
		}
	    freez(&pattern);
	    }
	freez(&dupPat);
	}
    }

/* Scan through linked features coloring and or including/excluding ones that
 * match filter. */
for (lf = *pLfList; lf != NULL; lf = next)
    {
    boolean passed = andLogic;
    next = lf->next;
    for (fil = mud->filterList; fil != NULL; fil = fil->next)
	{
	if (fil->hash != NULL)
	    {
	    if (hashLookup(fil->hash, lf->name) == NULL)
		{
		if (andLogic)
		    passed = FALSE;
		}
	    else
		{
		if (!andLogic)
		    passed = TRUE;
		}
	    }
	}
    if (passed ^ isExclude)
	{
	slAddHead(&newList, lf);
	if (colorIx > 0)
	    lf->filterColor = colorIx;
	}
    else
        {
	slAddHead(&oldList, lf);
	}
    }

slReverse(&newList);
slReverse(&oldList);
if (colorIx > 0)
   {
   /* Draw stuff that passes filter first in full mode, last in dense. */
   if (tg->visibility == tvDense)
       {
       newList = slCat(oldList, newList);
       }
   else
       {
       newList = slCat(newList, oldList);
       }
   }
*pLfList = newList;
tg->limitedVisSet = FALSE;	/* Need to recalculate this after filtering. */

/* Free up hashes, etc. */
for (fil = mud->filterList; fil != NULL; fil = fil->next)
    {
    hashFree(&fil->hash);
    }
}
struct mafAli *mafFromBed12(char *database, char *track, struct bed *bed, 
	struct slName *orgList)
/* Construct a maf out of exons in bed. */
{
/* Loop through all block in bed, collecting a list of mafs, one
 * for each block.  While we're at make a hash of all species seen. */
struct hash *speciesHash = hashNew(0);
struct mafAli *mafList = NULL, *maf, *bigMaf;
struct mafComp *comp, *bigComp;
int totalTextSize = 0;
int i;
for (i=0; i<bed->blockCount; ++i)
    {
    int start = bed->chromStart + bed->chromStarts[i];
    int end = start + bed->blockSizes[i];
    if (thickOnly)
        {
	start = max(start, bed->thickStart);
	end = min(end, bed->thickEnd);
	}
    if (start < end)
        {
	maf = hgMafFrag(database, track, bed->chrom, start, end, '+',
	   database, NULL);
	slAddHead(&mafList, maf);
	for (comp = maf->components; comp != NULL; comp = comp->next)
	    hashStore(speciesHash, comp->src);
	totalTextSize += maf->textSize; 
	}
    }
slReverse(&mafList);

/* Add species in order list too */
struct slName *org;
for (org = orgList; org != NULL; org = org->next)
    hashStore(speciesHash, org->name);

/* Allocate memory for return maf that contains all blocks concatenated together. 
 * Also fill in components with any species seen at all. */
AllocVar(bigMaf);
bigMaf->textSize = totalTextSize;
struct hashCookie it = hashFirst(speciesHash);
struct hashEl *hel;
while ((hel = hashNext(&it)) != NULL)
    {
    AllocVar(bigComp);
    bigComp->src = cloneString(hel->name);
    bigComp->text = needLargeMem(totalTextSize + 1);
    memset(bigComp->text, '.', totalTextSize);
    bigComp->text[totalTextSize] = 0;
    bigComp->strand = '+';
    bigComp->srcSize = totalTextSize;	/* It's safe if a bit of a lie. */
    hel->val = bigComp;
    slAddHead(&bigMaf->components, bigComp);
    }

/* Loop through maf list copying in data. */
int textOffset = 0;
for (maf = mafList; maf != NULL; maf = maf->next)
    {
    for (comp = maf->components; comp != NULL; comp = comp->next)
        {
	bigComp = hashMustFindVal(speciesHash, comp->src);
	memcpy(bigComp->text + textOffset, comp->text, maf->textSize);
	bigComp->size += comp->size;
	}
    textOffset += maf->textSize;
    }

/* Cope with strand of darkness. */
if (bed->strand[0] == '-')
    {
    for (comp = bigMaf->components; comp != NULL; comp = comp->next)
	reverseComplement(comp->text, bigMaf->textSize);
    }

/* If got an order list then reorder components according to it. */
if (orgList != NULL)
    {
    struct mafComp *newList = NULL;
    for (org = orgList; org != NULL; org = org->next)
        {
	comp = hashMustFindVal(speciesHash, org->name);
	slAddHead(&newList, comp);
	}
    slReverse(&newList);
    bigMaf->components = newList;
    }

/* Rename our own component to bed name */
comp = hashMustFindVal(speciesHash, database);
freeMem(comp->src);
comp->src = cloneString(bed->name);


/* Clean up and go home. */
hashFree(&speciesHash);
mafAliFreeList(&mafList);
return bigMaf;
}
Exemple #16
0
struct bed *breakUpBedAtCdsBreaks(struct cdsEvidence *cds, struct bed *bed)
/* Create a new broken-up that excludes part of gene between CDS breaks.  
 * Also jiggles cds->end coordinate to cope with the sequence we remove.
 * Deals with transcript to genome coordinate mapping including negative
 * strand.  Be afraid, be very afraid! */
{
/* Create range tree covering all breaks.  The coordinates here
 * are transcript coordinates.  While we're out it shrink outer CDS
 * since we are actually shrinking transcript. */
struct rbTree *gapTree = rangeTreeNew();
int bedSize = bed->chromEnd - bed->chromStart;
struct lm *lm = gapTree->lm;	/* Convenient place to allocate memory. */
int i, lastCds = cds->cdsCount-1;
for (i=0; i<lastCds; ++i)
    {
    int gapStart = cds->cdsStarts[i] + cds->cdsSizes[i];
    int gapEnd = cds->cdsStarts[i+1];
    int gapSize = gapEnd - gapStart;
    cds->end -= gapSize;
    rangeTreeAdd(gapTree, gapStart, gapEnd);
    }

/* Get list of exons in bed, flipped to reverse strand if need be. */
struct range *exon, *exonList = bedToExonList(bed, lm);
if (bed->strand[0] == '-')
    flipExonList(&exonList, bedSize);

/* Go through exon list, mapping each exon to transcript
 * coordinates. Check if exon needs breaking up, and if
 * so do so, as we copy it to new list. */
/* Copy exons to new list, breaking them up if need be. */
struct range *newList = NULL, *nextExon, *newExon;
int txStartPos = 0, txEndPos;
for (exon = exonList; exon != NULL; exon = nextExon)
    {
    txEndPos = txStartPos + exon->end - exon->start;
    nextExon = exon->next;
    struct range *gapList = rangeTreeAllOverlapping(gapTree, txStartPos, txEndPos);
    if (gapList != NULL)
        {
	verbose(3, "Splitting exon because of CDS gap\n");

	/* Make up exons from current position up to next gap.  This is a little
	 * complicated by possibly the gap starting before the exon. */
	int exonStart = exon->start;
	int txStart = txStartPos;
	struct range *gap;
	for (gap = gapList; gap != NULL; gap = gap->next)
	    {
	    int txEnd = gap->start;
	    int gapSize = rangeIntersection(gap->start, gap->end, txStart, txEndPos);
	    int exonSize = txEnd - txStart;
	    if (exonSize > 0)
		{
		lmAllocVar(lm, newExon);
		newExon->start = exonStart;
		newExon->end = exonStart + exonSize;
		slAddHead(&newList, newExon);
		}
	    else /* This case happens if gap starts before exon */
	        {
		exonSize = 0;
		}

	    /* Update current position in both transcript and genome space. */
	    exonStart += exonSize + gapSize;
	    txStart += exonSize + gapSize;
	    }

	/* Make up final exon from last gap to end, at least if we don't end in a gap. */
	if (exonStart < exon->end)
	    {
	    lmAllocVar(lm, newExon);
	    newExon->start = exonStart;
	    newExon->end = exon->end;
	    slAddHead(&newList, newExon);
	    }
	}
    else
        {
	/* Easy case where we don't intersect any gaps. */
	slAddHead(&newList, exon);
	}
    txStartPos= txEndPos;
    }
slReverse(&newList);

/* Flip exons back to forward strand if need be */
if (bed->strand[0] == '-')
    flipExonList(&newList, bedSize);

/* Convert exons to bed12 */
struct bed *newBed;
AllocVar(newBed);
newBed->chrom = cloneString(bed->chrom);
newBed->chromStart = newList->start + bed->chromStart;
newBed->chromEnd = newList->end + bed->chromStart;
newBed->name  = cloneString(bed->name);
newBed->score = bed->score;
newBed->strand[0] = bed->strand[0];
newBed->blockCount = slCount(newList);
AllocArray(newBed->blockSizes,  newBed->blockCount);
AllocArray(newBed->chromStarts,  newBed->blockCount);
for (exon = newList, i=0; exon != NULL; exon = exon->next, i++)
    {
    newBed->chromStarts[i] = exon->start;
    newBed->blockSizes[i] = exon->end - exon->start;
    newBed->chromEnd = exon->end + bed->chromStart;
    }

/* Clean up and go home. */
rbTreeFree(&gapTree);
return newBed;
}
Exemple #17
0
struct bigBedInterval *bigBedIntervalQuery(struct bbiFile *bbi, char *chrom,
	bits32 start, bits32 end, int maxItems, struct lm *lm)
/* Get data for interval.  Return list allocated out of lm.  Set maxItems to maximum
 * number of items to return, or to 0 for all items. */
{
struct bigBedInterval *el, *list = NULL;
int itemCount = 0;
bbiAttachUnzoomedCir(bbi);
bits32 chromId;
struct fileOffsetSize *blockList = bbiOverlappingBlocks(bbi, bbi->unzoomedCir,
	chrom, start, end, &chromId);
struct fileOffsetSize *block, *beforeGap, *afterGap;
struct udcFile *udc = bbi->udc;
boolean isSwapped = bbi->isSwapped;
struct dyString *dy = dyStringNew(32);

/* Set up for uncompression optionally. */
char *uncompressBuf = NULL;
if (bbi->uncompressBufSize > 0)
    uncompressBuf = needLargeMem(bbi->uncompressBufSize);

for (block = blockList; block != NULL; )
    {
    /* Find contigious blocks and read them into mergedBuf. */
    fileOffsetSizeFindGap(block, &beforeGap, &afterGap);
    bits64 mergedOffset = block->offset;
    bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset;
    udcSeek(udc, mergedOffset);
    char *mergedBuf = needLargeMem(mergedSize);
    udcMustRead(udc, mergedBuf, mergedSize);
    char *blockBuf = mergedBuf;

    /* Loop through individual blocks within merged section. */
    for (;block != afterGap; block = block->next)
        {
	/* Uncompress if necessary. */
	char *blockPt, *blockEnd;
	if (uncompressBuf)
	    {
	    blockPt = uncompressBuf;
	    int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize);
	    blockEnd = blockPt + uncSize;
	    }
	else
	    {
	    blockPt = blockBuf;
	    blockEnd = blockPt + block->size;
	    }

	while (blockPt < blockEnd)
	    {
	    /* Read next record into local variables. */
	    bits32 chr = memReadBits32(&blockPt, isSwapped);	// Read and discard chromId
	    bits32 s = memReadBits32(&blockPt, isSwapped);
	    bits32 e = memReadBits32(&blockPt, isSwapped);
	    int c;
	    dyStringClear(dy);
	    // TODO - can simplify this probably just to for (;;) {if ((c = *blockPt++) == 0) ...
	    while ((c = *blockPt++) >= 0)
		{
		if (c == 0)
		    break;
		dyStringAppendC(dy, c);
		}

	    /* If we're actually in range then copy it into a new  element and add to list. */
	    if (chr == chromId && s < end && e > start)
		{
		++itemCount;
		if (maxItems > 0 && itemCount > maxItems)
		    break;

		lmAllocVar(lm, el);
		el->start = s;
		el->end = e;
		if (dy->stringSize > 0)
		    el->rest = lmCloneString(lm, dy->string);
		el->chromId = chromId;
		slAddHead(&list, el);
		}
	    }
	if (maxItems > 0 && itemCount > maxItems)
	    break;
	blockBuf += block->size;
        }
    if (maxItems > 0 && itemCount > maxItems)
        break;
    freez(&mergedBuf);
    }
freeMem(uncompressBuf);
dyStringFree(&dy);
slFreeList(&blockList);
slReverse(&list);
return list;
}
Exemple #18
0
static struct linkedFeatures *cgapSageToLinkedFeatures(struct cgapSage *tag, 
                  struct hash *libHash, struct hash *libTotHash, enum trackVisibility vis)
/* Convert a single CGAP tag to a list of linkedFeatures. */
{
struct linkedFeatures *libList = NULL;
struct linkedFeatures *skel = skeletonLf(tag);
int i;
if (vis == tvDense)
    /* Just use the skeleton one. */
    {
    int tagTotal = 0;
    int freqTotal = 0;
    int libsUsed = 0;
    for (i = 0; i < tag->numLibs; i++)
	{
	char libId[16];
	char *libName;
	safef(libId, sizeof(libId), "%d", tag->libIds[i]);
	libName = hashMustFindVal(libHash, libId);
	if (keepThisLib(libName, libId))
	    {
	    int libTotal = hashIntVal(libTotHash, libId);
	    tagTotal += libTotal;
	    freqTotal += tag->freqs[i];
	    libsUsed++;
	    }
	}
    if (libsUsed > 0)
	{
	skel->name = cloneString("whatever");
	skel->score = (float)((double)freqTotal * (1000000/tagTotal));
	skel->grayIx = grayIxForCgap(skel->score);
	addSimpleFeature(skel);
	libList = skel;
	}
    }
else if (vis == tvPack)
    {
    /* If it's pack mode, average tissues into one linkedFeature. */
    struct hash *tpmHash = combineCgapSages(tag, libHash, libTotHash);
    struct hashEl *tpmList = hashElListHash(tpmHash);
    struct hashEl *tpmEl;
    slSort(&tpmList, slNameCmp);
    for (tpmEl = tpmList; tpmEl != NULL; tpmEl = tpmEl->next)
	{
	struct linkedFeatures *tiss = CloneVar(skel);
	struct cgapSageTpmHashEl *tpm = (struct cgapSageTpmHashEl *)tpmEl->val;
	char link[256];
	char *encTissName = NULL;
	double score = 0;
        int len = strlen(tpmEl->name) + 32;
        tiss->name = needMem(len);
	safef(tiss->name, len, "%s (%d)", tpmEl->name, tpm->count);
	encTissName = cgiEncode(tpmEl->name);
	safef(link, sizeof(link), "i=%s&tiss=%s", tag->name, encTissName);
	score = (double)tpm->freqTotal*(1000000/(double)tpm->libTotals);
	tiss->score = (float)score;
	tiss->grayIx = grayIxForCgap(score);
	tiss->extra = cloneString(link);
	freeMem(encTissName);
	addSimpleFeature(tiss);
	slAddHead(&libList, tiss);
	}
    hashElFreeList(&tpmList);
    freeHashAndVals(&tpmHash);
    }
else 
    /* full mode */
    {
    for (i = 0; i < tag->numLibs; i++)
	{
	char libId[16];
	char *libName;
	char link[256];
	struct linkedFeatures *lf;
	safef(libId, sizeof(libId), "%d", tag->libIds[i]);
	libName = hashMustFindVal(libHash, libId);
	if (keepThisLib(libName, libId))
	    {	    
	    lf = CloneVar(skel);
	    lf->name = cloneString(libName);
	    safef(link, sizeof(link), "i=%s&lib=%s", tag->name, libId);
	    lf->score = (float)tag->tagTpms[i];
	    lf->grayIx = grayIxForCgap(tag->tagTpms[i]);
	    lf->extra = cloneString(link);
	    addSimpleFeature(lf);	
	    slAddHead(&libList, lf);
	    }
	}
    }
slSort(&libList, cgapLinkedFeaturesCmp);
slReverse(&libList);
return libList;
}
Exemple #19
0
int main(int argc, char *argv[])
{
char *genoListName;
char *cdnaListName;
char *oocFileName;
char *hitFileName;
char *mergerFileName;
struct patSpace *patSpace;
long startTime, endTime;
char **genoList;
int genoListSize;
char *genoListBuf;
char **cdnaList;
int cdnaListSize;
char *cdnaListBuf;
char *genoName;
int i;
int estIx = 0;
struct dnaSeq **seqListList = NULL, *seq;

if (dumpMe)
    {
    bigHtmlFile = mustOpen("C:\\inetpub\\wwwroot\\test\\patAli.html", "w");
    littleHtmlFile = mustOpen("C:\\inetpub\\wwwroot\\test\\patSpace.html", "w");
    htmStart(bigHtmlFile, "PatSpace Alignments");
    htmStart(littleHtmlFile, "PatSpace Index");
    }

if (argc != 6)
    usage();

startTime = clock1000();
dnaUtilOpen();
makePolys();
genoListName = argv[1];
cdnaListName = argv[2];
oocFileName = argv[3];
hitFileName = argv[4];
mergerFileName = argv[5];

readAllWords(genoListName, &genoList, &genoListSize, &genoListBuf);
readAllWords(cdnaListName, &cdnaList, &cdnaListSize, &cdnaListBuf);
hitOut = mustOpen(hitFileName, "w");
mergerOut = mustOpen(mergerFileName, "w");
dumpOut = mustOpen("dump.out", "w");
seqListList = needMem(genoListSize*sizeof(seqListList[0]) );
fprintf(hitOut, "Pattern space 0.2 cDNA matcher\n");
fprintf(hitOut, "cDNA files: ", cdnaListSize);
for (i=0; i<cdnaListSize; ++i)
    fprintf(hitOut, " %s", cdnaList[i]);
fprintf(hitOut, "\n");
fprintf(hitOut, "%d genomic files\n", genoListSize);
for (i=0; i<genoListSize; ++i)
    {
    genoName = genoList[i];
    if (!startsWith("//", genoName)  )
        {
        seqListList[i] = seq = faReadAllDna(genoName);
        fprintf(hitOut, "%d els in %s ", slCount(seq), genoList[i]);
        for (; seq != NULL; seq = seq->next)
            fprintf(hitOut, "%d ", seq->size);
        fprintf(hitOut, "\n");
        }
    }

patSpace = makePatSpace(seqListList, genoListSize, oocFileName);

for (i=0; i<cdnaListSize; ++i)
    {
    FILE *f;
	char *estFileName;
    DNA *dna;
    char *estName;
    int size;
    int c;
    int maxSizeForFuzzyFind = 20000;
    int dotCount = 0;

	estFileName = cdnaList[i];
    if (startsWith("//", estFileName)  )
		continue;

	f = mustOpen(estFileName, "rb");
	while ((c = fgetc(f)) != EOF)
        if (c == '>')
            break;
    printf("%s", cdnaList[i]);
    fflush(stdout);
    while (fastFaReadNext(f, &dna, &size, &estName))
        {
        if (size < maxSizeForFuzzyFind)  /* Some day need to fix this somehow... */
            {
            struct cdnaAliList *calList = NULL;
            patSpaceFindOne(patSpace, dna, size, '+', estName, estIx, &calList);
            reverseComplement(dna, size);
            patSpaceFindOne(patSpace, dna, size, '-', estName, estIx, &calList);
            slReverse(&calList);
            writeMergers(calList, estName, size, genoList);
            ++estIx;
            if ((estIx & 0xfff) == 0)
                {
                printf(".");
                ++dotCount;
                fflush(stdout);
                }
            }
        }
    printf("\n");
    }
printf("raw %4d ffSubmitted %3d ffAccepted %3d ffOkScore %3d ffSolidMatch %2d\n",
    grandTotalHits, ffSubmitted, ffAccepted, ffOkScore, ffSolidMatch);

endTime = clock1000();

printf("Total time is %4.2f\n", 0.001*(endTime-startTime));

if (dumpMe)
    {
    htmEnd(bigHtmlFile);
    htmEnd(littleHtmlFile);
    }
return 0;
}
Exemple #20
0
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable)
/* hgExpDistance - Create table that measures expression distance between pairs. */
{
struct sqlConnection *conn = sqlConnect(database);
struct sqlResult *sr;
char query[256];
char **row;
struct hash *expHash = hashNew(16);
int realExpCount = -1;
struct microData *geneList = NULL, *curGene, *gene;
int geneIx, geneCount = 0;
struct microData **geneArray = NULL;
float *weights = NULL;
char *tempDir = ".";
FILE *f = hgCreateTabFile(tempDir, outTable);
long time1, time2;

time1 = clock1000();

/* Get list/hash of all items with expression values. */

/* uglyf("warning: temporarily limited to 1000 records\n"); */

sqlSafef(query, sizeof(query), "select name,expCount,expScores from %s", posTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    char *name = row[0];
    if (!hashLookup(expHash, name))
	{
	int expCount = sqlUnsigned(row[1]);
	int commaCount;
	float *expScores = NULL;

	sqlFloatDynamicArray(row[2], &expScores, &commaCount);
	if (expCount != commaCount)
	    errAbort("expCount and expScores don't match on %s in %s", name, posTable);
	if (realExpCount == -1)
	    realExpCount = expCount;
	if (expCount != realExpCount)
	    errAbort("In %s some rows have %d experiments others %d", 
	    	name, expCount, realExpCount);
	AllocVar(gene);
	gene->expCount = expCount;
	gene->expScores = expScores;
	hashAddSaveName(expHash, name, gene, &gene->name);
	slAddHead(&geneList, gene);
	}
    }
sqlFreeResult(&sr);
conn = sqlConnect(database);
slReverse(&geneList);
geneCount = slCount(geneList);
printf("Have %d elements in %s\n", geneCount, posTable);

weights = getWeights(realExpCount);

if (optionExists("lookup"))
    geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList);
geneCount = slCount(geneList);
printf("Got %d unique elements in %s\n", geneCount, posTable);

sqlDisconnect(&conn);	/* Disconnect because next step is slow. */


if (geneCount < 1)
    errAbort("ERROR: unique gene count less than one ?");

time2 = clock1000();
verbose(2, "records read time: %.2f seconds\n", (time2 - time1) / 1000.0);

/* Get an array for sorting. */
AllocArray(geneArray, geneCount);
for (gene = geneList,geneIx=0; gene != NULL; gene = gene->next, ++geneIx)
    geneArray[geneIx] = gene;

/* Print out closest 1000 in tab file. */
for (curGene = geneList; curGene != NULL; curGene = curGene->next)
    {
    calcDistances(curGene, geneList, weights);
    qsort(geneArray, geneCount, sizeof(geneArray[0]), cmpMicroDataDistance);
    for (geneIx=0; geneIx < 1000 && geneIx < geneCount; ++geneIx)
        {
	gene = geneArray[geneIx];
	fprintf(f, "%s\t%s\t%f\n", curGene->name, gene->name, gene->distance);
	}
    dotOut();
    }

printf("Made %s.tab\n", outTable);

time1 = time2;
time2 = clock1000();
verbose(2, "distance computation time: %.2f seconds\n", (time2 - time1) / 1000.0);

/* Create and load table. */
conn = sqlConnect(database);
distanceTableCreate(conn, outTable);
hgLoadTabFile(conn, tempDir, outTable, &f);
printf("Loaded %s\n", outTable);

/* Add indices. */
sqlSafef(query, sizeof(query), "alter table %s add index(query(12))", outTable);
sqlUpdate(conn, query);
printf("Made query index\n");
if (optionExists("targetIndex"))
    {
    sqlSafef(query, sizeof(query), "alter table %s add index(target(12))", outTable);
    sqlUpdate(conn, query);
    printf("Made target index\n");
    }

hgRemoveTabFile(tempDir, outTable);

time1 = time2;
time2 = clock1000();
verbose(2, "table create/load/index time: %.2f seconds\n", (time2 - time1) / 1000.0);

}
Exemple #21
0
void cloneSpan(char *fileName)
/* cloneSpan - List clones and the amount the span by looking at .gl file. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
int wordCount, lineSize;
char *words[16], *line;
struct hash *hash = newHash(0);
struct hashEl *hel;
char *cloneName;
int start, end;
struct clone *cloneList = NULL, *clone;
int totalSpan = 0, totalBases = 0;

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '#')
        continue;
    wordCount = chopLine(line, words);
    if (wordCount == 0)
        continue;
    if (wordCount < 3)
       lineFileExpectWords(lf, 3, wordCount);
    cloneName = words[0];
    chopSuffix(cloneName);
    start = sqlUnsigned(words[1]);
    end = sqlUnsigned(words[2]);
    clone = hashFindVal(hash, cloneName);
    if (clone == NULL)
        {
	AllocVar(clone);
	hel = hashAdd(hash, cloneName, clone);
	clone->name = hel->name;
	clone->start = start;
	clone->end = end;
	slAddHead(&cloneList, clone);
	}
    else
        {
	if (clone->start > start)
	    clone->start = start;
	if (clone->end < end)
	    clone->end = end;
	}
    clone->baseCount += end-start;
    }
lineFileClose(&lf);
slReverse(&cloneList);

for (clone = cloneList; clone != NULL; clone = clone->next)
    {
    int span = clone->end - clone->start;
#ifdef SOMETIMES
    printf("clone %s, bases %d, spans %d, density %4.2f%%\n",
        clone->name, clone->baseCount, span,
	100.0 * (double)clone->baseCount/(double)span);
#endif
    totalSpan += span;
    totalBases += clone->baseCount;
    }
printf("%s bases %d, spans %d, density %4.2f%%\n",
    fileName, totalBases, totalSpan,
    100.0 * (double)totalBases/(double)totalSpan);
}
struct cutter *readGcg(char *gcgFile)
/* Parse a GCG file and load it into cutter format. */
{
struct lineFile *lf = lineFileOpen(gcgFile,TRUE);
struct cutter *enzList = NULL;
char *line = "whatever", *words[10], numWords;

/* Skip to the right line. */
while (lineFileNext(lf,&line,NULL) && !startsWith("..",line));
/* */
while ((numWords=lineFileChop(lf,words)))
    {
    struct cutter *newone = NULL;
    int comIx = (numWords==7) ? 5 : 6;
    int refIx = (numWords==7) ? 6 : 7;
    int i;
    char *items[100];

    /* Skip ones */
    if (words[4][0] == '?')
	continue;
    AllocVar(newone);
    newone->semicolon = (words[0][0] == ';') ? TRUE : FALSE;
    /* Deal with the first few columns */
    if (!isdigit(words[1][0]))
	errAbort("Error: expecting a number in cut site column on line %d\n", lf->lineIx+1);
    if (!isdigit(words[3][0]) && words[3][0]!='-')
	errAbort("Error: expecting a number in the overhang column on line %d\n", lf->lineIx+1);
    if (words[comIx][0] != '>')
	errAbort("Error: expecting a \'>\' in the commercial sources column of line %d\n", lf->lineIx+1);
    newone->name = (words[0][0] == ';') ? cloneString(words[0]+1) : cloneString(words[0]);
    newone->cut = atoi(words[1]);
    newone->seq = cloneString(words[2]);
    touppers(newone->seq);
    stripChar(newone->seq,'\'');
    stripChar(newone->seq,'_');
    newone->size = strlen(newone->seq);
    newone->matchSize = newone->size - countChars(newone->seq, 'N');
    newone->palindromic = isPalindrome(newone->seq);
    newone->overhang = atoi(words[3]);
    newone->numCompanies = strlen(words[comIx]+1);
    if (newone->numCompanies > 0)
	newone->companies = cloneMem(words[comIx]+1, newone->numCompanies*sizeof(char));
    newone->numRefs = chopString(words[refIx], ",", items, ArraySize(items));
    AllocArray(newone->refs, newone->numRefs);
    for (i = 0; i < newone->numRefs; i++) 
	{
	if (i == 100)
	    errAbort("Error: Andy didn't make the array for holding references big enough\n");
	if (!isdigit(items[i][0]))
	    errAbort("Error: expecting number in references column in line %d\n", lf->lineIx+1);
	newone->refs[i] = atoi(items[i]);
	}
    /* Deal with isoscizomers. */
    if (numWords == 8)
	{
	newone->numSciz = chopString(words[5], ",", items, ArraySize(items));
	AllocArray(newone->scizs, newone->numSciz*sizeof(int));
	for (i = 0; i < newone->numSciz; i++)
	    {
	    if (i == 100)
		errAbort("Error: Andy didn't make the array for having isoscizomers big enough\n");
	    newone->scizs[i] = cloneString(items[i]);
	    }
	}
    else 
	newone->numSciz = 0;
    slAddHead(&enzList, newone);
    }
slReverse(&enzList);
lineFileClose(&lf);
return enzList;
}
struct commit* getCommits()
/* Get all commits from startTag to endTag */
{
int numCommits = 0;
safef(gitCmd,sizeof(gitCmd), ""
"git log %s..%s --name-status > commits.tmp"
, startTag, endTag);
runShell(gitCmd);
struct lineFile *lf = lineFileOpen("commits.tmp", TRUE);
int lineSize;
char *line;
struct commit *commits = NULL, *commit = NULL;
struct files *files = NULL, *f = NULL;
char *sep = "";
while (lineFileNext(lf, &line, &lineSize))
    {
    boolean isMerge = FALSE;
    char *w = nextWord(&line);
    AllocVar(commit);
    if (!sameString("commit", w))
	errAbort("expected keyword commit parsing commits.tmp\n");
    commit->commitId = cloneString(nextWord(&line));
    commit->commitNumber = ++numCommits;

    lineFileNext(lf, &line, &lineSize);
    w = nextWord(&line);
    if (sameString("Merge:", w))
	{
	isMerge = TRUE;
	lineFileNext(lf, &line, &lineSize);
	w = nextWord(&line);
	}
    if (!sameString("Author:", w))
	errAbort("expected keyword Author: parsing commits.tmp\n");

    /* by request, keep just the email account name */
    char *lc = strchr(line, '<');
    if (!lc)
	errAbort("expected '<' char in email address in Author: parsing commits.tmp\n");
    ++lc;
    char *rc = strchr(lc, '>');
    if (!rc)
	errAbort("expected '>' char in email address in Author: parsing commits.tmp\n");
    char *ac = strchr(lc, '@');
    if (ac)
	rc = ac;
    commit->author = cloneStringZ(lc, rc-lc);

    lineFileNext(lf, &line, &lineSize);
    w = nextWord(&line);
    if (!sameString("Date:", w))
	errAbort("expected keyword Date: parsing commits.tmp\n");
    commit->date = cloneString(line);

    lineFileNext(lf, &line, &lineSize);
    if (!sameString("", line))
	errAbort("expected blank line parsing commits.tmp\n");

    /* collect the comment-lines */
    struct dyString *dy = NULL;
    dy = dyStringNew(0);
    sep = "";
    files = NULL;
    while (lineFileNext(lf, &line, &lineSize))
	{
	if (sameString("", line))
	    break;
	w = skipLeadingSpaces(line);
	dyStringPrintf(dy, "%s%s", w, sep);
	sep = "\n";
	}
    commit->comment = cloneString(dy->string);
    freeDyString(&dy);

    if (!isMerge)
	{
	/* collect the files-list */
	while (lineFileNext(lf, &line, &lineSize))
	    {
	    if (sameString("", line))
		break;
	    AllocVar(f);
	    w = nextWord(&line);
	    f->type = w[0];
	    f->path = cloneString(line);
	    slAddHead(&files, f);
	    }
	slReverse(&files);
	}

    commit->files = files;

    
    if (!isMerge  /* for now, default to filtering out the records for automatic-merges */
        && !endsWith(commit->comment, "elease log update"))  /* filter out automatic release log commits */
	slAddHead(&commits, commit);

    verbose(2, 
 "commitId: %s\n"
 "author: %s\n"
 "date: %s\n"
 "comment: [%s]\n"
 "file(s): \n"
, commit->commitId
, commit->author
, commit->date
, commit->comment);

    for (f=commit->files; f; f = f->next)
	{
    	verbose(2, "%c %s\n", f->type, f->path);

	// anything other than M or A?
	if (f->type != 'M' && f->type != 'A' )
	    verbose(2, "special type: %c %s\n", f->type, f->path);
	}


    verbose(2, "------------\n");

    }
lineFileClose(&lf);
/* We want to keep them chronological order,
so do not need slReverse since the addHead reversed git log's rev chron order already */


unlink("commits.tmp");
return commits;
}
struct bed *matchEnzymes(struct cutter *cutters, struct dnaSeq *seq, int startOffset)
/* Match the enzymes to sequence and return a bed list in all cases. */
{
struct hash *sixers = newHash(8), *palinSixers = newHash(8);
struct cutter *enz;
struct cutter *ACGTo[5], *palinACGTo[5];
struct bed *bedList = NULL, *tmp;
int i;
if (!cutters)
    return NULL;
for (i = 0; i < 5; i++)
    ACGTo[i] = palinACGTo[i] = NULL;
/* Put each of the enzymes in either a hash table of six-cutters or */
enz = cutters;
while (enz != NULL)
    {
    int acgtCount = 0;
    struct cutter *next = enz->next;
    acgtCount = countChars(enz->seq,'A') + countChars(enz->seq,'C') + 
                countChars(enz->seq,'G') + countChars(enz->seq,'T');
    /* Super dumb coding here but it's quick. */
     if (enz->palindromic)
	{
	if (enz->size==6 && acgtCount==6)
	    hashAdd(palinSixers, enz->seq, enz);
	else
	    {
	    if (enz->seq[0] == 'A')
		slAddHead(&palinACGTo[0], enz);
	    else if (enz->seq[0] == 'C')
		slAddHead(&palinACGTo[1], enz);
	    else if (enz->seq[0] == 'G')
		slAddHead(&palinACGTo[2], enz);
	    else if (enz->seq[0] == 'T')
		slAddHead(&palinACGTo[3], enz);
	    else 
		{
		slAddHead(&palinACGTo[4], enz);
		}
	    }
	}
    else
	{
	if (enz->size==6 && acgtCount==6)
	    hashAdd(sixers, enz->seq, enz);
	else
	    {
	    if (enz->seq[0] == 'A')
		slAddHead(&ACGTo[0], enz);
	    else if (enz->seq[0] == 'C')
		slAddHead(&ACGTo[1], enz);
	    else if (enz->seq[0] == 'G')
		slAddHead(&ACGTo[2], enz);
	    else if (enz->seq[0] == 'T')
		slAddHead(&ACGTo[3], enz);
	    else 
		slAddHead(&ACGTo[4], enz);
	    }	
	}
    enz = next;
    }
/* At this point we got a hash for the palindromes and non-palindromic six-cutters, 
   plus an array for each too.  The array is set up so the enzymes starting with 'A' go into [0], 'C'
   into [1], 'G' into [2], 'T' into [3], and other bases into [4].  */
if (ACGTo[4])
    {
    ACGTo[0] = slCat(ACGTo[0], ACGTo[4]);
    ACGTo[1] = slCat(ACGTo[1], ACGTo[4]);
    ACGTo[2] = slCat(ACGTo[2], ACGTo[4]);
    ACGTo[3] = slCat(ACGTo[3], ACGTo[4]);
    }
if (palinACGTo[4])
    {
    palinACGTo[0] = slCat(palinACGTo[0], palinACGTo[4]);
    palinACGTo[1] = slCat(palinACGTo[1], palinACGTo[4]);
    palinACGTo[2] = slCat(palinACGTo[2], palinACGTo[4]);
    palinACGTo[3] = slCat(palinACGTo[3], palinACGTo[4]);
    }
/* Search the DNA in three ways: on the plus strand for both palindromes and nonpalindromes, and then
   just nonpalindromes on the minus strand. */
bedList = searchStrand(palinSixers, palinACGTo, seq, startOffset, '+');
tmp = searchStrand(sixers, ACGTo, seq, startOffset, '+');
bedList = slCat(bedList, tmp);
reverseComplement(seq->dna, seq->size);
tmp = searchStrand(sixers, ACGTo, seq, startOffset, '-');
bedList = slCat(bedList, tmp);
if (bedList)
    slReverse(&bedList);
return bedList;
}
void oneChrom(char *database, char *chrom, char *refAliTrack, char *bedTrack,
              struct hash *otherHash, struct stats *stats)
/* Process one chromosome. */
{
    struct bed *bedList = NULL, *bed;
    struct sqlConnection *conn = hAllocConn(database);
    struct sqlResult *sr;
    char **row;
    int rowOffset;
    int chromSize = hChromSize(database, chrom);
    struct binKeeper *bk = binKeeperNew(0, chromSize);
    struct psl *pslList = NULL;
    struct dnaSeq *chromSeq = NULL;

    if (endsWith(bedTrack, ".bed"))
    {
        struct lineFile *lf = lineFileOpen(bedTrack, TRUE);
        char *row[3];
        while (lineFileRow(lf, row))
        {
            if (sameString(chrom, row[0]))
            {
                bed = bedLoad3(row);
                slAddHead(&bedList, bed);
            }
        }
        lineFileClose(&lf);
    }
    else
    {
        sr = hChromQuery(conn, bedTrack, chrom, NULL, &rowOffset);
        while ((row = sqlNextRow(sr)) != NULL)
        {
            bed = bedLoad3(row+rowOffset);
            slAddHead(&bedList, bed);
        }
        sqlFreeResult(&sr);
    }
    slReverse(&bedList);
    uglyf("Loaded beds\n");

    sr = hChromQuery(conn, refAliTrack, chrom, NULL, &rowOffset);
    while ((row = sqlNextRow(sr)) != NULL)
    {
        struct psl *psl = pslLoad(row + rowOffset);
        slAddHead(&pslList, psl);
        binKeeperAdd(bk, psl->tStart, psl->tEnd, psl);
    }
    sqlFreeResult(&sr);
    uglyf("Loaded psls\n");

    chromSeq = hLoadChrom(database, chrom);
    /* Fetch entire chromosome into memory. */
    uglyf("Loaded human seq\n");

    for (bed = bedList; bed != NULL; bed = bed->next)
    {
        struct binElement *el, *list = binKeeperFind(bk, bed->chromStart, bed->chromEnd);
        for (el = list; el != NULL; el = el->next)
        {
            struct psl *fullPsl = el->val;
            struct psl *psl = pslTrimToTargetRange(fullPsl,
                                                   bed->chromStart, bed->chromEnd);
            if (psl != NULL)
            {
                foldPslIntoStats(psl, chromSeq, otherHash, stats);
                pslFree(&psl);
            }
        }
        slFreeList(&list);
        stats->bedCount += 1;
        stats->bedBaseCount += bed->chromEnd - bed->chromStart;
        sqlFreeResult(&sr);
    }
    freeDnaSeq(&chromSeq);
    pslFreeList(&pslList);
    binKeeperFree(&bk);
    hFreeConn(&conn);
}
Exemple #26
0
void reportAlt3Prime(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, 
		    int altBpStart, int altBpEnd, int startV, int endV, FILE *out)
/* Write out an altGraphX record for an alt3Prime splicing
event. Variable names are consistent with the rest of the program, but
can be misleading. Specifically vs = start of alt splicing, ve1 =
first end of alt splicing, etc. even though "vs" is really the end of
an exon. For an alt5Prime splice the edges are:

 Name       Vertexes         Class
 ------     ----------       -----
exon1:      startV->vs       constituative (0)
junction1:  vs->ve1          alternative (1)
junction2:  vs->ve2          alternative (2)
exon2:      ve1->e2        alternative (1)
exon3:      ve2->endV        constituative (0)
*/
{
struct altGraphX *agLoc = NULL;  /* Local altGraphX. */
struct evidence *ev = NULL, *evLoc = NULL;
int *vPos = ag->vPositions;
unsigned char *vT = ag->vTypes;
int *vPosLoc = NULL;    /* Vertex Positions. */
int *eStartsLoc = NULL; /* Edge Starts. */
int *eEndsLoc = NULL;   /* Edge ends. */
unsigned char *vTLoc = NULL;      /* Vertex Types. */
int *eTLoc = NULL;      /* Edge Types. */
int vCLoc = 0;
int eCLoc = 0;
int edgeIx = 0, vertexIx = 0;
int i =0;
struct dyString *dy = NULL;

if(out == NULL)
    return;
AllocVar(agLoc);
agLoc->tName = cloneString(ag->tName);
agLoc->name = cloneString(ag->name);
agLoc->tStart = vPos[startV];
agLoc->tEnd = vPos[endV];
agLoc->strand[0] = ag->strand[0];
agLoc->vertexCount = vCLoc = 6;
agLoc->edgeCount = eCLoc = 5;
agLoc->id = alt3Prime;
/* Allocate some arrays. */
AllocArray(vPosLoc, vCLoc);
AllocArray(eStartsLoc, eCLoc);
AllocArray(eEndsLoc, eCLoc);
AllocArray(vTLoc, vCLoc);
AllocArray(eTLoc, eCLoc);

/* Fill in the vertex positions. */
vertexIx = 0;
vPosLoc[vertexIx++] = vPos[startV]; /* 0 */
vPosLoc[vertexIx++] = vPos[vs];     /* 1 */
vPosLoc[vertexIx++] = vPos[ve1];    /* 2 */
vPosLoc[vertexIx++] = vPos[ve2];    /* 3 */
vPosLoc[vertexIx++] = vPos[ve2];    /* 4 */
vPosLoc[vertexIx++] = vPos[endV];   /* 5 */

/* Fill in the vertex types. */
vertexIx = 0;
vTLoc[vertexIx++] = vT[startV];
vTLoc[vertexIx++] = vT[vs];
vTLoc[vertexIx++] = vT[ve1];
vTLoc[vertexIx++] = vT[vs]; /* Faking a separate exon for the alt spliced portion. */
vTLoc[vertexIx++] = vT[ve2];
vTLoc[vertexIx++] = vT[endV];

edgeIx = 0;

/* Constitutive first exon. */
eStartsLoc[edgeIx] = 0;
eEndsLoc[edgeIx] = 1;
eTLoc[edgeIx] = 0;
ev = evidenceForEdge(ag, startV, vs);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Alternative1 junction (shorter). */
eStartsLoc[edgeIx] = 1;
eEndsLoc[edgeIx] = 2;
eTLoc[edgeIx] = 1;
ev = evidenceForEdge(ag, vs, ve1);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Alt2 junction (longer). */
eStartsLoc[edgeIx] = 1;
eEndsLoc[edgeIx] = 4;
eTLoc[edgeIx] = 2;
ev = evidenceForEdge(ag, vs, ve2);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Alt1 portion of second exon. */
eStartsLoc[edgeIx] = 2;
eEndsLoc[edgeIx] = 3;
eTLoc[edgeIx] = 1;
ev = evidenceForEdge(ag, ve1, endV);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Exon 2 constitutive (shorter exon) */
eStartsLoc[edgeIx] = 4;
eEndsLoc[edgeIx] = 5;
eTLoc[edgeIx] = 0;
ev = evidenceForEdge(ag, ve2, endV);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Package up the evidence, tissues, etc. */
slReverse(&agLoc->evidence);
dy = newDyString(ag->mrnaRefCount*36);
agLoc->mrnaRefCount = ag->mrnaRefCount;
for(i=0; i<ag->mrnaRefCount; i++)
    dyStringPrintf(dy, "%s,", ag->mrnaRefs[i]);
sqlStringDynamicArray(dy->string, &agLoc->mrnaRefs, &i);
dyStringFree(&dy);
agLoc->mrnaTissues = CloneArray(ag->mrnaTissues, ag->mrnaRefCount);
agLoc->mrnaLibs = CloneArray(ag->mrnaLibs, ag->mrnaRefCount);
agLoc->vPositions = vPosLoc;
agLoc->edgeStarts = eStartsLoc;
agLoc->edgeEnds = eEndsLoc;
agLoc->vTypes = vTLoc;
agLoc->edgeTypes = eTLoc;
altGraphXTabOut(agLoc, out);
altGraphXFree(&agLoc);
}
Exemple #27
0
int main(int argc, char *argv[])
{
char *genoListName;
char *cdnaListName;
char *oocFileName;
char *pairFileName;
struct patSpace *patSpace;
long startTime, endTime;
char **genoList;
int genoListSize;
char *genoListBuf;
char **cdnaList;
int cdnaListSize;
char *cdnaListBuf;
char *genoName;
int i;
int estIx = 0;
struct dnaSeq **seqListList = NULL, *seq;
static char hitFileName[512], mergerFileName[512], okFileName[512];
char *outRoot;
struct hash *pairHash;

if (dumpMe)
    {
    bigHtmlFile = mustOpen("C:\\inetpub\\wwwroot\\test\\patAli.html", "w");
    littleHtmlFile = mustOpen("C:\\inetpub\\wwwroot\\test\\patSpace.html", "w");
    htmStart(bigHtmlFile, "PatSpace Alignments");
    htmStart(littleHtmlFile, "PatSpace Index");
    }

if ((hostName = getenv("HOST")) == NULL)
    hostName = "";

if (argc != 6)
    usage();

pushWarnHandler(patSpaceWarnHandler);
startTime = clock1000();
dnaUtilOpen();
makePolys();
genoListName = argv[1];
cdnaListName = argv[2];
oocFileName = argv[3];
pairFileName = argv[4];
outRoot = argv[5];

sprintf(hitFileName, "%s.hit", outRoot);
sprintf(mergerFileName, "%s.glu", outRoot);
sprintf(okFileName, "%s.ok", outRoot);

readAllWords(genoListName, &genoList, &genoListSize, &genoListBuf);
readAllWords(cdnaListName, &cdnaList, &cdnaListSize, &cdnaListBuf);
pairHash = makePairHash(pairFileName);

hitOut = mustOpen(hitFileName, "w");
mergerOut = mustOpen(mergerFileName, "w");
dumpOut = mustOpen("dump.out", "w");
seqListList = needMem(genoListSize*sizeof(seqListList[0]) );
fprintf(hitOut, "Pattern space 0.2 cDNA matcher\n");
fprintf(hitOut, "cDNA files: ", cdnaListSize);
for (i=0; i<cdnaListSize; ++i)
    fprintf(hitOut, " %s", cdnaList[i]);
fprintf(hitOut, "\n");
fprintf(hitOut, "%d genomic files\n", genoListSize);
for (i=0; i<genoListSize; ++i)
    {
    genoName = genoList[i];
    if (!startsWith("//", genoName)  )
        {
        seqListList[i] = seq = faReadAllDna(genoName);
        fprintf(hitOut, "%d els in %s ", slCount(seq), genoList[i]);
        for (; seq != NULL; seq = seq->next)
            fprintf(hitOut, "%d ", seq->size);
        fprintf(hitOut, "\n");
        }
    }

patSpace = makePatSpace(seqListList, genoListSize, oocFileName);

for (i=0; i<cdnaListSize; ++i)
    {
    FILE *f;
    char *estFileName;
    DNA *dna;
    char *estName;
    int size;
    int c;
    int maxSizeForFuzzyFind = 20000;
    int dotCount = 0;

    estFileName = cdnaList[i];
    if (startsWith("//", estFileName)  )
		continue;

    f = mustOpen(estFileName, "rb");
    while ((c = fgetc(f)) != EOF)
    if (c == '>')
        break;
    printf("%s", cdnaList[i]);
    fflush(stdout);
    while (fastFaReadNext(f, &dna, &size, &estName))
        {
	aliSeqName = estName;
        if (size < maxSizeForFuzzyFind)  /* Some day need to fix this somehow... */
            {
            struct hashEl *hel;
            struct cdnaAliList *calList = NULL;

            hel = hashLookup(pairHash, estName);
            if (hel != NULL)    /* Do pair processing. */
                {
                struct estPair *ep;
                struct seq *thisSeq, *otherSeq;

                ep = hel->val;
                if (hel->name == ep->name3)
                    {
                    thisSeq = &ep->seq3;
                    otherSeq = &ep->seq5;
                    }
                else
                    {
                    thisSeq = &ep->seq5;
                    otherSeq = &ep->seq3;
                    }
                if (otherSeq->dna == NULL)  /* First in pair - need to save sequence. */
                    {
                    thisSeq->size = size;
                    thisSeq->dna = needMem(size);
                    memcpy(thisSeq->dna, dna, size);
                    }
                else                        /* Second in pair - do gluing and free partner. */
                    {
                    char mergedName[64];
                    thisSeq->dna = dna;
                    thisSeq->size = size;
                    sprintf(mergedName, "%s_AND_%s", ep->name5, ep->name3);

                    patSpaceFindOne(patSpace, ep->seq5.dna, ep->seq5.size,
                        '+', '5', ep->name5, &calList);
                    reverseComplement(ep->seq5.dna, ep->seq5.size);
                    patSpaceFindOne(patSpace, ep->seq5.dna, ep->seq5.size,
                        '-', '5', ep->name5, &calList);
                    patSpaceFindOne(patSpace, ep->seq3.dna, ep->seq3.size,
                        '+', '3', ep->name3, &calList);
                    reverseComplement(ep->seq3.dna, ep->seq3.size);
                    patSpaceFindOne(patSpace, ep->seq3.dna, ep->seq3.size,
                        '-', '3', ep->name3, &calList);
                    slReverse(&calList);
                    writeMergers(calList, mergedName, genoList);

                    freez(&otherSeq->dna);
                    thisSeq->dna = NULL;
                    thisSeq->size =otherSeq->size = 0;
                    }
                }
            else
                {
                patSpaceFindOne(patSpace, dna, size, '+', '5', estName, &calList);
                reverseComplement(dna, size);
                patSpaceFindOne(patSpace, dna, size, '-', '5', estName, &calList);
                slReverse(&calList);
                writeMergers(calList, estName, genoList);
                }
            ++estIx;
            if ((estIx & 0xfff) == 0)
                {
                printf(".");
                ++dotCount;
                fflush(stdout);
                }
            }
        }
    printf("\n");
    }
aliSeqName = "";
printf("ffSubmitted %3d ffAccepted %3d ffOkScore %3d ffSolidMatch %2d\n",
    ffSubmitted, ffAccepted, ffOkScore, ffSolidMatch);

endTime = clock1000();

printf("Total time is %4.2f\n", 0.001*(endTime-startTime));

/* Write out file who's presense say's we succeeded */
    {
    FILE *f = mustOpen(okFileName, "w");
    fputs("ok", f);
    fclose(f);
    }

if (dumpMe)
    {
    htmEnd(bigHtmlFile);
    htmEnd(littleHtmlFile);
    }
return 0;
}
Exemple #28
0
void reportCassette(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, 
		    int altBpStart, int altBpEnd, int startV, int endV, FILE *out)
/* Write out both an altGraphX and two bed files. For a cassette exon the
 edges are - 
 Name       Vertexes         Class
 ------     ----------       -----
 exon1:     startV->vs       constitutive (cons 0)
 junction1: vs->ve1          alternative1 (alt1 1)
 exon2:     ve1->altBpEnd    alternative1 (alt1 1)
 junction2: altBpEnd->ve2    alternative1 (alt1 1)
 exon3:     ve2->endV        constitutive (cons 0)
 junction3: vs->ve2          alternative2 (alt2 2)
*/
{
struct altGraphX *agLoc = NULL;  /* Local altGraphX. */
struct evidence *ev = NULL, *evLoc = NULL;
int *vPos = ag->vPositions;
unsigned char *vT = ag->vTypes;
int *vPosLoc = NULL;    /* Vertex Positions. */
int *eStartsLoc = NULL; /* Edge Starts. */
int *eEndsLoc = NULL;   /* Edge ends. */
unsigned char *vTLoc = NULL;      /* Vertex Types. */
int *eTLoc = NULL;      /* Edge Types. */
int vCLoc = 0;
int eCLoc = 0;
int i =0;
struct dyString *dy = NULL;
if(out == NULL)
    return;
AllocVar(agLoc);
agLoc->tName = cloneString(ag->tName);
agLoc->name = cloneString(ag->name);
agLoc->tStart = vPos[startV];
agLoc->tEnd = vPos[endV];
agLoc->strand[0] = ag->strand[0];
agLoc->vertexCount = vCLoc = 6;
agLoc->edgeCount = eCLoc = 6;
agLoc->id = altCassette;
/* Allocate some arrays. */
AllocArray(vPosLoc, vCLoc);
AllocArray(eStartsLoc, vCLoc);
AllocArray(eEndsLoc, vCLoc);
AllocArray(vTLoc, vCLoc);
AllocArray(eTLoc, vCLoc);

/* Fill in the vertex positions. */
vPosLoc[0] = vPos[startV];
vPosLoc[1] = vPos[vs];
vPosLoc[2] = vPos[ve1];
vPosLoc[3] = vPos[altBpEnd];
vPosLoc[4] = vPos[ve2];
vPosLoc[5] = vPos[endV];

/* Fill in the vertex types. */
vTLoc[0] = vT[startV];
vTLoc[1] = vT[vs];
vTLoc[2] = vT[ve1];
vTLoc[3] = vT[altBpEnd];
vTLoc[4] = vT[ve2];
vTLoc[5] = vT[endV];

/* Fill in the edges. */
/* Constitutive first exon. */
eStartsLoc[0] = 0;
eEndsLoc[0] = 1;
eTLoc[0] = 0;
ev = evidenceForEdge(ag, startV, vs);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
/* Exon inclusion junction. */
eStartsLoc[1] = 1;
eEndsLoc[1] = 2;
eTLoc[1] = 1;
ev = evidenceForEdge(ag, vs, ve1);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Exon exclusion junction. */
eStartsLoc[2] = 1;
eEndsLoc[2] = 4;
eTLoc[2] = 2;
ev = evidenceForEdge(ag, vs, ve2);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Cassette exon. */
eStartsLoc[3] = 2;
eEndsLoc[3] = 3;
eTLoc[3] = 1;
ev = evidenceForEdge(ag, ve1, altBpEnd);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Exon inclusion junction. */
eStartsLoc[4] = 3;
eEndsLoc[4] = 4;
eTLoc[4] = 1;
ev = evidenceForEdge(ag, altBpEnd, ve2);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Constitutive second exon. */
eStartsLoc[5] = 4;
eEndsLoc[5] = 5;
eTLoc[5] = 0;
ev = evidenceForEdge(ag, ve2, endV);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

slReverse(&agLoc->evidence);

dy = newDyString(ag->mrnaRefCount*36);
agLoc->mrnaRefCount = ag->mrnaRefCount;
for(i=0; i<ag->mrnaRefCount; i++)
    dyStringPrintf(dy, "%s,", ag->mrnaRefs[i]);
sqlStringDynamicArray(dy->string, &agLoc->mrnaRefs, &i);
dyStringFree(&dy);
agLoc->mrnaTissues = CloneArray(ag->mrnaTissues, ag->mrnaRefCount);
agLoc->mrnaLibs = CloneArray(ag->mrnaLibs, ag->mrnaRefCount);
agLoc->vPositions = vPosLoc;
agLoc->edgeStarts = eStartsLoc;
agLoc->edgeEnds = eEndsLoc;
agLoc->vTypes = vTLoc;
agLoc->edgeTypes = eTLoc;
altGraphXTabOut(agLoc, out);
altGraphXFree(&agLoc);
}
void loadSampleZoo(struct track *tg)
	/* Convert sample info in window to linked feature. */
{
	int maxWiggleTrackHeight = 2500;
	struct sqlConnection *conn = hAllocConn(database);
	struct sqlResult *sr;
	char **row;
	int rowOffset;
	struct sample *sample;
	struct linkedFeatures *lfList = NULL, *lf;
	char *hasDense = NULL;
	char *where = NULL;
	char query[256];
	char option[64];

	zooSpeciesHashInit();

	/*see if we have a summary table*/
	safef(query, sizeof(query), "select name from %s where name = '%s' limit 1", tg->table, tg->shortLabel);
	//errAbort( "%s", query );
	hasDense = sqlQuickQuery(conn, query, query, sizeof(query));

	/* If we're in dense mode and have a summary table load it. */
	if(tg->visibility == tvDense)
	{
		if(hasDense != NULL)
		{
			safef(query, sizeof(query), " name = '%s' ", tg->shortLabel);
			where = cloneString(query);
		}
	}

	sr = hRangeQuery(conn, tg->table, chromName, winStart, winEnd, where, &rowOffset);
	while ((row = sqlNextRow(sr)) != NULL)
	{
		sample = sampleLoad(row + rowOffset);
		lf = lfFromSample(sample);
		safef( option, sizeof(option), "zooSpecies.%s", sample->name );
		if( cartUsualBoolean(cart, option, TRUE ))
			slAddHead(&lfList, lf);
		sampleFree(&sample);
	}
	if(where != NULL)
		freez(&where);
	sqlFreeResult(&sr);
	hFreeConn(&conn);
	slReverse(&lfList);

	/* sort to bring items with common names to the same line
	* but only for tracks with a summary table 
	* (with name=shortLabel) in dense mode */
	if( hasDense != NULL )
	{
		sortGroupList = tg; /* used to put track name at top of sorted list. */
		slSort(&lfList, lfNamePositionCmp);
		sortGroupList = NULL;
	}

	/* Sort in species phylogenetic order */
	slSort(&lfList, lfZooCmp);


	tg->items = lfList;

	/*turn off full mode if there are too many rows or each row is too
	* large. A total of maxWiggleTrackHeight is allowed for number of
	* rows times the rowHeight*/
	if( tg->visibility == tvFull && sampleTotalHeight( tg, tvFull ) > maxWiggleTrackHeight  )
	{
		tg->limitedVisSet = TRUE;
		tg->limitedVis = tvDense;
	}
}
Exemple #30
0
void agpVsMap(char *agpName, char *infoName, char *gifName)
/* agpVsMap - Plot clones in agp vs. map coordinates. */
{
struct mapPos *mapList, *mp;
struct agpFrag *agpList, *bp;
struct hash *cloneHash = newHash(14);
struct hashEl *hel;
struct cloneInfo *cloneList = NULL, *clone;
struct memGfx *mg = NULL;
int pixWidth = 600;
int pixHeight = 600;
int rulerHeight = 20;
int maxMapPos = 0, maxAgpPos = 0;
double scaleMap, scaleAgp;
Color orange, green;

mapList = readInfoFile(infoName);
agpList = readAgpFile(agpName);

for (mp = mapList; mp != NULL; mp = mp->next)
    {
    if (mp->phase > 0)
        {
	AllocVar(clone);
	hel = hashAddUnique(cloneHash, mp->cloneName, clone);
	clone->name = hel->name;
	clone->mp = mp;
	slAddHead(&cloneList, clone);
	if (mp->pos > maxMapPos) maxMapPos = mp->pos;
	}
    }
slReverse(&cloneList);

for (bp = agpList; bp != NULL; bp = bp->next)
    {
    if (bp->chromStart > maxAgpPos) maxAgpPos = bp->chromStart;
    }

/* Draw scatterplot on bitmap. */
mg = mgNew(pixWidth, pixHeight);
mgClearPixels(mg);
orange = mgFindColor(mg, 210, 150, 0);
green = mgFindColor(mg, 0, 200, 0);
mgDrawRuler(mg, 0, pixHeight-rulerHeight, rulerHeight, pixWidth, MG_BLACK,
       mgSmallFont(), 0, maxMapPos+1);
scaleMap = (double)pixWidth/(double)(maxMapPos+1.0);
scaleAgp = (double)(pixHeight)/(double)(maxAgpPos+1.0);
for (bp = agpList; bp != NULL; bp = bp->next)
    {
    char cloneName[128];
    fragToCloneName(bp->frag, cloneName);
    clone = hashFindVal(cloneHash, cloneName);
    if (clone == NULL)
        warn("%s is in %s but not %s", cloneName, 
	    agpName, infoName);
    else
	{
	int x = round(scaleMap*clone->mp->pos);
	int y = pixHeight - round(scaleAgp*bp->chromStart);
	int phase = clone->mp->phase;
	int back;
	if (phase <= 1) back = green;
	else if (phase == 2) back = orange;
	else back = MG_RED;
	drawPlus(mg, x, y, back);
	}
    }

mgSaveGif(mg, gifName);
}