Exemple #1
0
void bamLoadItemsCore(struct track *tg, boolean isPaired)
/* Load BAM data into tg->items item list, unless zoomed out so far
 * that the data would just end up in dense mode and be super-slow. */
{
/* protect against temporary network error */
struct errCatch *errCatch = errCatchNew();
if (errCatchStart(errCatch))
    {
    struct hash *pairHash = isPaired ? hashNew(18) : NULL;
    int minAliQual = atoi(cartOrTdbString(cart, tg->tdb, BAM_MIN_ALI_QUAL, BAM_MIN_ALI_QUAL_DEFAULT));
    char *colorMode = cartOrTdbString(cart, tg->tdb, BAM_COLOR_MODE, BAM_COLOR_MODE_DEFAULT);
    char *grayMode = cartOrTdbString(cart, tg->tdb, BAM_GRAY_MODE, BAM_GRAY_MODE_DEFAULT);
    char *userTag = cartOrTdbString(cart, tg->tdb, BAM_COLOR_TAG, BAM_COLOR_TAG_DEFAULT);
    int aliQualShadeMin = 0, aliQualShadeMax = 99, baseQualShadeMin = 0, baseQualShadeMax = 40;
    parseIntRangeSetting(tg->tdb, "aliQualRange", &aliQualShadeMin, &aliQualShadeMax);
    parseIntRangeSetting(tg->tdb, "baseQualRange", &baseQualShadeMin, &baseQualShadeMax);
    struct bamTrackData btd = {tg, pairHash, minAliQual, colorMode, grayMode, userTag,
			       aliQualShadeMin, aliQualShadeMax, baseQualShadeMin, baseQualShadeMax};

    char *fileName = trackDbSetting(tg->tdb, "bigDataUrl");
    if (fileName == NULL)
	{
	if (tg->customPt)
	    {
	    errAbort("bamLoadItemsCore: can't find bigDataUrl for custom track %s", tg->track);
	    }
	else
	    {
	    struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb);
	    fileName = bamFileNameFromTable(conn, tg->table, chromName);
	    hFreeConn(&conn);
	    }
	}

    char *fileName2 = hReplaceGbdb(fileName);

    char posForBam[512];
    safef(posForBam, sizeof(posForBam), "%s:%d-%d", chromName, winStart, winEnd);
    char *cacheDir =  cfgOption("cramRef");
    char *refUrl = trackDbSetting(tg->tdb, "refUrl");
    if (!isPaired)
	bamFetchPlus(fileName2, posForBam, addBam, &btd, NULL, refUrl, cacheDir);
    else
	{
	char *setting = trackDbSettingClosestToHomeOrDefault(tg->tdb, "pairSearchRange", "20000");
	int pairSearchRange = atoi(setting);
	if (pairSearchRange > 0)
	    safef(posForBam, sizeof(posForBam), "%s:%d-%d", chromName,
		  max(0, winStart-pairSearchRange), winEnd+pairSearchRange);
	bamFetchPlus(fileName2, posForBam, addBamPaired, &btd, NULL, refUrl, cacheDir);
	struct hashEl *hel;
	struct hashCookie cookie = hashFirst(btd.pairHash);
	while ((hel = hashNext(&cookie)) != NULL)
	    {
	    struct linkedFeatures *lf = hel->val;
	    if (lf->start < winEnd && lf->end > winStart)
		slAddHead(&(tg->items), lfsFromLf(lf));
	    }
	}
    freez(&fileName2);

    if (tg->visibility != tvDense)
	{
	slReverse(&(tg->items));
	if (isPaired)
	    slSort(&(tg->items), linkedFeaturesSeriesCmp);
	else if (sameString(colorMode, BAM_COLOR_MODE_STRAND))
	    slSort(&(tg->items), linkedFeaturesCmpOri);
	else if (sameString(colorMode, BAM_COLOR_MODE_GRAY) &&
		 sameString(grayMode, BAM_GRAY_MODE_ALI_QUAL))
	    slSort(&(tg->items), linkedFeaturesCmpScore);
	else
	    slSort(&(tg->items), linkedFeaturesCmpStart);
	if (slCount(tg->items) > MAX_ITEMS_FOR_MAPBOX)
	    {
	    // flag drawItems to make a mapBox for the whole track
	    tg->customInt = 1;
	    tg->mapItem = dontMapItem;
	    }
	}
    }
errCatchEnd(errCatch);
if (errCatch->gotError)
    {
    tg->networkErrMsg = cloneString(errCatch->message->string);
    tg->drawItems = bigDrawWarning;
    tg->totalHeight = bigWarnTotalHeight;
    }
errCatchFree(&errCatch);
}
Exemple #2
0
struct dgNodeRef *dgFindPath(struct diGraph *dg, struct dgNode *a, struct dgNode *b)
/* Find shortest path from a to b.  Return NULL if can't be found. */
{
struct dgNodeRef *refList  = NULL, *ref;
struct dgConnection *con;
struct dgNode *node, *nNode;
struct dlList *fifo;
struct dlNode *ffNode;
struct dgNode endNode;
int fifoSize = 1;

/* Do some quick and easy tests first to return if have no way out
 * of node A, or if B directly follows A. */
if (a->nextList == NULL)
    return NULL;
if (a == b)
    {
    AllocVar(ref);
    ref->node = a;
    return ref;
    }
if ((con = dgFindNodeInConList(a->nextList, b)) != NULL)
    {
    AllocVar(refList);
    refList->node = a;
    node = con->node;
    AllocVar(ref);
    ref->node = node;
    slAddTail(&refList, ref);
    return refList;
    }

/* Set up for breadth first traversal.  Will use a doubly linked
 * list as a fifo. */
for (node = dg->nodeList; node != NULL; node = node->next)
    node->tempEntry = NULL;
fifo = newDlList();
dlAddValTail(fifo, a);
a->tempEntry = &endNode;

while ((ffNode = dlPopHead(fifo)) != NULL)
    {
    --fifoSize;
    node = ffNode->val;
    freeMem(ffNode);
    for (con = node->nextList; con != NULL; con = con->next)
	{
	nNode = con->node;
	if (nNode->tempEntry == NULL)
	    {
	    nNode->tempEntry = node;
	    if (nNode == b)
		{
		while (nNode != &endNode && nNode != NULL)
		    {
		    AllocVar(ref);
		    ref->node = nNode;
		    slAddHead(&refList, ref);
		    nNode = nNode->tempEntry;
		    }
		break;
		}
	    else
		{
		dlAddValTail(fifo, nNode);
		++fifoSize;
		if (fifoSize > 100000)
		    errAbort("Internal error in dgFindPath");
		}
	    }
	}
    }
freeDlList(&fifo);
return refList;
}
Exemple #3
0
static void clusterClone(int argc, char *argv[])
{
int i;

for (i=1; i < argc; ++i)
    {
    struct lineFile *lf;
    struct psl *psl;
    unsigned tSize;
    char *prevAccPart = (char *)NULL;
    char *prevAccName = (char *)NULL;
    char *prevTargetName = (char *)NULL;
    struct hashEl *el;
    struct hash *chrHash = newHash(0);
    struct hash *coordHash = newHash(0);
    struct coordEl *coord;
    struct coordEl **coordListPt = (struct coordEl **) NULL;
    unsigned querySize = 0;
    int partCount = 0;
    int partsConsidered = 0;

    verbose(2,"#\tprocess: %s\n", argv[i]);
    lf=pslFileOpen(argv[i]);
    while ((struct psl *)NULL != (psl = pslNext(lf)) )
	{
	char *accName = (char *)NULL;
	char *targetName = (char *)NULL;
	int chrCount = 0;
	double percentCoverage;

	accName = cloneString(psl->qName);
	if ((char *)NULL == prevAccPart)
	    {
	    prevAccPart = cloneString(psl->qName);  /* first time */
	    querySize = psl->qSize;
	    ++partsConsidered;
	    }
	chopSuffixAt(accName,'_');

	if ((char *)NULL == prevAccName)
		prevAccName = cloneString(accName);  /* first time */
	if ((char *)NULL == prevTargetName)
		prevTargetName = cloneString(psl->tName);  /* first time */

	/*	encountered a new accession name, process the one we
 	 *	were working on
	 */
	if (differentWord(accName, prevAccName))
	    {
	    if (partCount > 0)
		processResult(chrHash, coordHash, prevAccName, querySize,
		    partsConsidered);
	    else
		verbose(1,"# ERROR %s %s - no coordinates found in %d parts considered\n",
		    prevTargetName, prevAccName, partsConsidered);
	    freeMem(prevAccName);
	    prevAccName = cloneString(accName);
	    freeHash(&chrHash);
	    freeHash(&coordHash);
	    chrHash = newHash(0);
	    coordHash = newHash(0);
	    querySize = 0;
	    partCount = 0;
	    partsConsidered = 0;
	    }

	tSize = psl->tEnd - psl->tStart;
	percentCoverage = 100.0*((double)(tSize+1)/(psl->qSize + 1));
	if (differentWord(psl->qName, prevAccPart))
	    {
	    ++partsConsidered;
	    querySize += psl->qSize;
	    freeMem(prevAccPart);
	    prevAccPart = cloneString(psl->qName);
	    }

	targetName = cloneString(psl->tName);
	if (differentWord(targetName, prevTargetName))
	    {
	    freeMem(prevTargetName);
	    prevTargetName = cloneString(targetName);
	    }
	/*	keep a hash of chrom names encountered	*/
	el = hashLookup(chrHash, targetName);
	if (el == NULL)
	    {
	    if (percentCoverage > minCover)
		{
		hashAddInt(chrHash, targetName, 1);
		chrCount = 1;
		}
	    else
		{
		hashAddInt(chrHash, targetName, 0);
		chrCount = 0;
		}
	    }
	else
	    {
	    if (percentCoverage > minCover)
		{
		chrCount = ptToInt(el->val) + 1;
		el->val=intToPt(chrCount);
		}
	    }

	AllocVar(coord);
	coord->start = psl->tStart;
	coord->end = psl->tEnd;
	coord->qSize = psl->qSize;
	coord->strand = sameWord(psl->strand,"+") ? 1 : 0;
	/*	when coverage is sufficient	*/
	if (percentCoverage > minCover)
	    {
	    ++partCount;
	    coord->name = cloneString(psl->qName);
	    /*	for each chrom name, accumulate a list of coordinates */
	    el = hashLookup(coordHash, targetName);
	    if (el == NULL)
		{
		AllocVar(coordListPt);
		hashAdd(coordHash, targetName, coordListPt);
		}
	    else
		{
		coordListPt = el->val;
		}
	    slAddHead(coordListPt,coord);
	verbose(2,"# %s\t%u\t%u\t%u\t%.4f\t%d %s:%d-%d %s\n",
	    psl->qName, psl->qSize, tSize, tSize - psl->qSize,
	    percentCoverage, chrCount, psl->tName, psl->tStart, psl->tEnd,
	    psl->strand);
	    }
	else
	    {
	verbose(3,"# %s\t%u\t%u\t%u\t%.4f\t%d %s:%d-%d %s\n",
	    psl->qName, psl->qSize, tSize, tSize - psl->qSize,
	    percentCoverage, chrCount, psl->tName, psl->tStart, psl->tEnd,
	    psl->strand);
	    }


	freeMem(accName);
	freeMem(targetName);
	pslFree(&psl);
	}
    if (partCount > 0)
	processResult(chrHash, coordHash, prevAccName, querySize,
	    partsConsidered);
    else
	verbose(1,"# ERROR %s %s - no coordinates found\n",
	    prevTargetName, prevAccName);
    freeMem(prevAccName);
    freeHash(&chrHash);
    freeHash(&coordHash);
    lineFileClose(&lf);
    }
}	/*	static void clusterClone()	*/
Exemple #4
0
static void pslRefRecycle(struct pslSets *ps, struct pslRef *pr)
/* recycle a pslRef object */
{
memset(pr, 0, sizeof(struct pslRef));
slAddHead(&ps->refPool, pr);
}
Exemple #5
0
void pslSort2(char *outDir, char *tempDir, boolean noHead)
/* Do second step of sort - merge all sorted files in tempDir
 * to final outdir. */
{
char fileName[512];
struct slName *tmpList, *tmp;
struct midFile *midList = NULL, *mid;
int aliCount = 0;
FILE *f = NULL;
char lastTargetAcc[256];
char targetAcc[256];


strcpy(lastTargetAcc, "");
tmpList = listDir(tempDir, "tmp*.psl");
if (tmpList == NULL)
    errAbort("No tmp*.psl files in %s\n", tempDir);
for (tmp = tmpList; tmp != NULL; tmp = tmp->next)
    {
    sprintf(fileName, "%s/%s", tempDir, tmp->name);
    AllocVar(mid);
    mid->lf = pslFileOpen(fileName);
    slAddHead(&midList, mid);
    }
printf("writing %s", outDir);
fflush(stdout);
/* Write out the lowest sorting line from mid list until done. */
for (;;)
    {
    struct midFile *bestMid = NULL;
    if ( (++aliCount & 0xffff) == 0)
	{
	printf(".");
	fflush(stdout);
	}
    for (mid = midList; mid != NULL; mid = mid->next)
	{
	if (mid->lf != NULL && mid->psl == NULL)
	    {
	    if ((mid->psl = nextPsl(mid->lf)) == NULL)
		lineFileClose(&mid->lf);
	    }
	if (mid->psl != NULL)
	    {
	    if (bestMid == NULL || pslCmpTarget(&mid->psl, &bestMid->psl) < 0)
		bestMid = mid;
	    }
	}
    if (bestMid == NULL)
	break;
    getTargetAcc(bestMid->psl->tName, targetAcc);
    if (!sameString(targetAcc, lastTargetAcc))
	{
	strcpy(lastTargetAcc, targetAcc);
	carefulClose(&f);
	sprintf(fileName, "%s/%s.psl", outDir, targetAcc);
	f = mustOpen(fileName, "w");
	if (!noHead)
	    pslWriteHead(f);
	}
    pslTabOut(bestMid->psl, f);
    pslFree(&bestMid->psl);
    }
carefulClose(&f);
printf("\n");

printf("Cleaning up temp files\n");
for (tmp = tmpList; tmp != NULL; tmp = tmp->next)
    {
    sprintf(fileName, "%s/%s", tempDir, tmp->name);
    remove(fileName);
    }
}
Exemple #6
0
void addChainQ(struct chrom *chrom, struct chrom *otherChrom, struct chain *chain)
/* Add Q side of chain to fill/gap tree of chromosome. 
 * For this side we have to cope with reverse strand
 * issues. */
{
struct slRef *spaceList;
struct slRef *ref;
struct cBlock *startBlock, *block, *nextBlock;
int gapStart, gapEnd;
struct gap *gap;
boolean isRev = (chain->qStrand == '-'); 
int qStart = chain->qStart, qEnd = chain->qEnd;

if (isRev)
    {
    reverseIntRange(&qStart, &qEnd, chain->qSize);
    reverseBlocksQ(&chain->blockList, chain->qSize);
    }
spaceList = findSpaces(chrom->spaces,qStart,qEnd);
startBlock = chain->blockList;

for (ref = spaceList; ref != NULL; ref = ref->next)
    {
    struct space *space = ref->val;
    struct fill *fill;
    for (;;)
        {
	nextBlock = startBlock->next;
	if (nextBlock == NULL)
	    break;
	gapEnd = nextBlock->qStart;
	if (gapEnd > space->start)
	    break;
	startBlock = nextBlock;
	}
    if ((fill = fillSpace(chrom, space, chain, startBlock, TRUE)) 
    	!= NULL)
	{
	for (block = startBlock; ; block = nextBlock)
	    {
	    nextBlock = block->next;
	    if (nextBlock == NULL)
		break;
	    gapStart = block->qEnd;
	    gapEnd = nextBlock->qStart;
	    if (strictlyInside(space->start, space->end, gapStart, gapEnd))
		{
		int ts, te;
		if (chain->qStrand == '+')
		    {
		    ts = block->tEnd;
		    te = nextBlock->tStart;
		    }
		else
		    {
		    ts = nextBlock->tStart;
		    te = block->tEnd;
		    }
		gap = gapNew(gapStart, gapEnd, ts, te);
		addSpaceForGap(chrom, gap);
		slAddHead(&fill->gapList, gap);
		}
	    }
	freez(&ref->val);	/* aka space */
	}
    }
slFreeList(&spaceList);
if (isRev)
    reverseBlocksQ(&chain->blockList, chain->qSize);
}
Exemple #7
0
void doPastedIdentifiers(struct sqlConnection *conn)
/* Process submit in paste identifiers page. */
{
char *idText = trimSpaces(cartString(cart, hgtaPastedIdentifiers));
htmlOpen("Table Browser (Input Identifiers)");
if (isNotEmpty(idText))
    {
    /* Write terms to temp file, checking whether they have matches, and
     * save temp file name. */
    boolean saveIdText = (strlen(idText) < MAX_IDTEXT);
    char *idTextForLf = saveIdText ? cloneString(idText) : idText;
    struct lineFile *lf = lineFileOnString("idText", TRUE, idTextForLf);
    char *line, *word;
    struct tempName tn;
    FILE *f;
    int totalTerms = 0, foundTerms = 0;
    struct slName* missingTerms = NULL;
    struct dyString *exampleMissingIds = dyStringNew(256);
    char *actualDb = database;
    if (sameWord(curTable, WIKI_TRACK_TABLE))
	actualDb = wikiDbName();
    struct hTableInfo *hti = maybeGetHti(actualDb, curTable, conn);
    char *idField = getIdField(actualDb, curTrack, curTable, hti);
    if (idField == NULL)
	{
	warn("Sorry, I can't tell which field of table %s to treat as the "
	     "identifier field.", curTable);
	webNewSection("Table Browser");
	cartRemove(cart, hgtaIdentifierDb);
	cartRemove(cart, hgtaIdentifierTable);
	cartRemove(cart, hgtaIdentifierFile);
	mainPageAfterOpen(conn);
	htmlClose();
	return;
	}
    struct slName *allTerms = NULL, *term;
    while (lineFileNext(lf, &line, NULL))
	{
	while ((word = nextWord(&line)) != NULL)
	    {
	    term = slNameNew(word);
	    slAddHead(&allTerms, term);
	    totalTerms++;
	    }
	}
    slReverse(&allTerms);
    lineFileClose(&lf);
    char *extraWhere = NULL;
    int maxIdsInWhere = cartUsualInt(cart, "hgt_maxIdsInWhere", DEFAULT_MAX_IDS_IN_WHERE);
    if (totalTerms > 0 && totalTerms <= maxIdsInWhere)
	extraWhere = slNameToInExpression(idField, allTerms);

    struct lm *lm = lmInit(0);
    struct hash *matchHash = getAllPossibleIds(conn, lm, idField, extraWhere);
    trashDirFile(&tn, "hgtData", "identifiers", ".key");
    f = mustOpen(tn.forCgi, "w");
    for (term = allTerms;  term != NULL;  term = term->next)
	{
	struct slName *matchList = NULL, *match;
	if (matchHash == NULL)
	    {
	    matchList = slNameNew(term->name);
	    }
	else
	    {
	    /* Support multiple alias->id mappings: */
	    char upcased[1024];
	    safecpy(upcased, sizeof(upcased), term->name);
	    touppers(upcased);
	    struct hashEl *hel = hashLookup(matchHash, upcased);
	    if (hel != NULL)
		{
		matchList = slNameNew((char *)hel->val);
		while ((hel = hashLookupNext(hel)) != NULL)
		    {
		    match = slNameNew((char *)hel->val);
		    slAddHead(&matchList, match);
		    }
		}
	    }
	if (matchList != NULL)
	    {
	    foundTerms++;
	    for (match = matchList;  match != NULL;  match = match->next)
		{
		mustWrite(f, match->name, strlen(match->name));
		mustWrite(f, "\n", 1);
		}
	    }
	else 
	    {
	    slAddHead(&missingTerms, slNameNew(term->name));
	    }
	}
    slReverse(&missingTerms);
    carefulClose(&f);
    cartSetString(cart, hgtaIdentifierDb, database);
    cartSetString(cart, hgtaIdentifierTable, curTable);
    cartSetString(cart, hgtaIdentifierFile, tn.forCgi);
    if (saveIdText)
	freez(&idTextForLf);
    else
	cartRemove(cart, hgtaPastedIdentifiers);
    int missingCount = totalTerms - foundTerms;
    if (missingCount > 0)
	{
	char *xrefTable, *aliasField;
	getXrefInfo(conn, &xrefTable, NULL, &aliasField);
	boolean xrefIsSame = xrefTable && sameString(curTable, xrefTable);
	struct tempName tn;
	trashDirFile(&tn, "hgt/missingIds", cartSessionId(cart), ".tmp");
	FILE *f = mustOpen(tn.forCgi, "w");
	int exampleCount = 0;
	for (term = missingTerms;  term != NULL;  term = term->next)
	    {
	    if (exampleCount < 10)
		{
		++exampleCount;
		dyStringPrintf(exampleMissingIds, "%s\n", term->name);
		}
	    fprintf(f, "%s\n", term->name);
	    }
	carefulClose(&f);

	dyStringPrintf(exampleMissingIds, "\n<a href=%s>Complete list of missing identifiers<a>\n", tn.forHtml);

	warn("Note: %d of the %d given identifiers have no match in "
	     "table %s, field %s%s%s%s%s.  "
	     "Try the \"describe table schema\" button for more "
	     "information about the table and field.\n"
	     "%d %smissing identifier(s):\n"
	     "%s\n",
	     (totalTerms - foundTerms), totalTerms,
	     curTable, idField,
	     (xrefTable ? (xrefIsSame ? "" : " or in alias table ") : ""),
	     (xrefTable ? (xrefIsSame ? "" : xrefTable) : ""),
	     (xrefTable ? (xrefIsSame ? " or in field " : ", field ") : ""),
	     (xrefTable ? aliasField : ""),
	     exampleCount,
	     exampleCount < missingCount ? "example " : "",
	     exampleMissingIds->string
	    );
	webNewSection("Table Browser");
	}
    lmCleanup(&lm);
    hashFree(&matchHash);
    }
else
    {
    cartRemove(cart, hgtaIdentifierFile);
    }
mainPageAfterOpen(conn);
htmlClose();
}
void printSettingsWithUrls(struct hash *ra,char *urlSetting,char *nameSetting,char *idSetting)
// will print one or more urls with name and optional id.  Only Name is required!
// If more than one, then should add same number of slots to each ("fred;ethyl" & " ;wife")
{
char *names = hashFindVal(ra, nameSetting);
struct slName *nameList = slNameListFromString(names, ';');;

char *urls = NULL;
struct slName *urlList = NULL;
char *ids = NULL;
struct slName *idList = NULL;
if (idSetting != NULL)
    ids = hashFindVal(ra, idSetting);
if (ids != NULL)
    {
    idList = slNameListFromString(ids, ';');
    if (slCount(idList) > slCount(nameList))
        {
        if (slCount(nameList) == 1)
            {
            while (slCount(nameList) < slCount(idList))
                slAddHead(&nameList,slNameNew(nameList->name));
            }
        else
            errAbort("The number of of items in %s and %s must match for term %s",
                     nameSetting,idSetting,(char *)hashMustFindVal(ra,CV_TERM));
        }
    }

if (urlSetting != NULL)
    urls = hashFindVal(ra, urlSetting);
if (urls != NULL)
    {
    if (slCount(nameList) == 1)
        urlList = slNameNew(urls); // It is the case that singleton URLs sometimes have ';'!
    else
        {
        urlList = slNameListFromString(urls, ';');
        if (slCount(urlList) > slCount(nameList))
            errAbort("The number of of items in %s and %s must match for term %s",
                     nameSetting,urlSetting,(char *)hashMustFindVal(ra,CV_TERM));
        }
    }

printf("  <TD>");

// while there are items in the list of vendorNames, print the vendorName 
//  and vendorID together with the url if present
struct slName *curName = NULL;
struct slName *curId;
struct slName *curUrl;

for (curName=nameList,curId=idList,curUrl=urlList; curName != NULL; curName=curName->next)
    {
    if (curName!=nameList)  // Break between links
        printf("<BR>\n      ");

    // if there is a url, add it as a link
    char *url = NULL;
    if (curUrl != NULL)
        {
        url = trimSpaces(curUrl->name);
        if (isNotEmpty(url))
            printf("<A TARGET=_BLANK HREF=%s>", url);
        curUrl=curUrl->next;
        }

    printf("%s", curName->name);
    if (curId != NULL)
        {
        char *id = trimSpaces(curId->name);
        if (isNotEmpty(id))
            printf(" %s", id );
        curId=curId->next;
        }

    if (isNotEmpty(url))
        printf("</A>");
    }
puts("</TD>");

// Free the memory
slFreeList(&nameList);
slFreeList(&idList);
slFreeList(&urlList);
}
Exemple #9
0
struct bbiInterval *bigWigIntervalQuery(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end,
	struct lm *lm)
/* Get data for interval.  Return list allocated out of lm. */
{
if (bwf->typeSig != bigWigSig)
   errAbort("Trying to do bigWigIntervalQuery on a non big-wig file.");
bbiAttachUnzoomedCir(bwf);
struct bbiInterval *el, *list = NULL;
struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, 
	chrom, start, end, NULL);
struct fileOffsetSize *block;
struct udcFile *udc = bwf->udc;
boolean isSwapped = bwf->isSwapped;
float val;
int i;

// slSort(&blockList, fileOffsetSizeCmp);
struct fileOffsetSize *mergedBlocks = fileOffsetSizeMerge(blockList);
for (block = mergedBlocks; block != NULL; block = block->next)
    {
    udcSeek(udc, block->offset);
    char *blockBuf = needLargeMem(block->size);
    udcRead(udc, blockBuf, block->size);
    char *blockPt = blockBuf, *blockEnd = blockBuf + block->size;
    while (blockPt < blockEnd)
	{
	struct bwgSectionHead head;
	bwgSectionHeadFromMem(&blockPt, &head, isSwapped);
	switch (head.type)
	    {
	    case bwgTypeBedGraph:
		{
		for (i=0; i<head.itemCount; ++i)
		    {
		    bits32 s = memReadBits32(&blockPt, isSwapped);
		    bits32 e = memReadBits32(&blockPt, isSwapped);
		    val = memReadFloat(&blockPt, isSwapped);
		    if (s < start) s = start;
		    if (e > end) e = end;
		    if (s < e)
			{
			lmAllocVar(lm, el);
			el->start = s;
			el->end = e;
			el->val = val;
			slAddHead(&list, el);
			}
		    }
		break;
		}
	    case bwgTypeVariableStep:
		{
		for (i=0; i<head.itemCount; ++i)
		    {
		    bits32 s = memReadBits32(&blockPt, isSwapped);
		    bits32 e = s + head.itemSpan;
		    val = memReadFloat(&blockPt, isSwapped);
		    if (s < start) s = start;
		    if (e > end) e = end;
		    if (s < e)
			{
			lmAllocVar(lm, el);
			el->start = s;
			el->end = e;
			el->val = val;
			slAddHead(&list, el);
			}
		    }
		break;
		}
	    case bwgTypeFixedStep:
		{
		bits32 s = head.start;
		bits32 e = s + head.itemSpan;
		for (i=0; i<head.itemCount; ++i)
		    {
		    val = memReadFloat(&blockPt, isSwapped);
		    bits32 clippedS = s, clippedE = e;
		    if (clippedS < start) clippedS = start;
		    if (clippedE > end) clippedE = end;
		    if (clippedS < clippedE)
			{
			lmAllocVar(lm, el);
			el->start = clippedS;
			el->end = clippedE;
			el->val = val;
			slAddHead(&list, el);
			}
		    s += head.itemStep;
		    e += head.itemStep;
		    }
		break;
		}
	    default:
		internalErr();
		break;
	    }
	}
    }
slFreeList(&mergedBlocks);
slFreeList(&blockList);
slReverse(&list);
return list;
}
Exemple #10
0
struct hash *agpLoadAll(char *agpFile)
/* load AGP entries into a hash of AGP lists, one per chromosome */
{
struct hash *agpHash = newHash(0);
struct lineFile *lf = lineFileOpen(agpFile, TRUE);
char *words[9];
int lastPos = 0;
int wordCount;
struct agpFrag *agpFrag;
struct agpGap *agpGap;
char *chrom;
struct agp *agp;
struct hashEl *hel;

while ((wordCount = lineFileChopNext(lf, words, ArraySize(words))) != 0)
    {
    lineFileExpectAtLeast(lf, 8, wordCount);
    chrom = words[0];
    if (!hashFindVal(agpHash, chrom))
        lastPos = 1;
    AllocVar(agp);
    if (words[4][0] != 'N' && words[4][0] != 'U')
        {
        /* not a gap */
        lineFileExpectWords(lf, 9, wordCount);
        agpFrag = agpFragLoad(words);
        if (agpFrag->chromStart != lastPos)
            errAbort(
               "Frag start (%d, %d) doesn't match previous end line %d of %s\n",
                     agpFrag->chromStart, lastPos, lf->lineIx, lf->fileName);
        if (agpFrag->chromEnd - agpFrag->chromStart != 
                        agpFrag->fragEnd - agpFrag->fragStart)
            errAbort("Sizes don't match in %s and %s line %d of %s\n",
                    agpFrag->chrom, agpFrag->frag, lf->lineIx, lf->fileName);
        lastPos = agpFrag->chromEnd + 1;
        agp->entry = agpFrag;
        agp->isFrag = TRUE;
        }
    else
        {
        /* gap */
        lineFileExpectWords(lf, 8, wordCount);
        agpGap = agpGapLoad(words);
        if (agpGap->chromStart != lastPos)
            errAbort("Gap start (%d, %d) doesn't match previous end line %d of %s\n",
                     agpGap->chromStart, lastPos, lf->lineIx, lf->fileName);
        lastPos = agpGap->chromEnd + 1;
        agp->entry = agpGap;
        agp->isFrag = FALSE;
        }
    if ((hel = hashLookup(agpHash, chrom)) == NULL)
        hashAdd(agpHash, chrom, agp);
    else
        slAddHead(&(hel->val), agp);
    }
#ifndef DEBUG
    {
struct hashCookie cookie;
struct hashEl *hel;
cookie = hashFirst(agpHash);
while ((hel = hashNext(&cookie)) != NULL)
    {
    struct agp *agpList;
    agpList = (struct agp *)hel->val;
    /*
    for (agp = agpList; agp != NULL; agp = agp->next)
        printf("isFrag: %d\n", agp->isFrag);
        */
    }
    }
#endif
/* reverse AGP lists */
//hashTraverseVals(agpHash, slReverse);
#ifndef DEBUG
    {
struct hashCookie cookie;
struct hashEl *hel;
cookie = hashFirst(agpHash);
while ((hel = hashNext(&cookie)) != NULL)
    {
    struct agp *agpList;
    slReverse(&hel->val);
    agpList = hel->val;
    /*
    agpList = (struct agp *)hel->val;
    slReverse(&agpList);
    hashRemove(agpHash, hel->name);
    hashAdd(agpHash, hel->name, agpList);
    */
    /*
    for (agp = agpList; agp != NULL; agp = agp->next)
        printf("isFrag: %d\n", agp->isFrag);
        */
    }
    }
#endif
return agpHash;
}
void loadCodeBlast(struct track *tg)
/* from the bed 6+1 codeBlast table, make a linkedFeaturesSeries and load it.  */
{
struct linkedFeaturesSeries *lfs = NULL, *originalLfs, *codeLfs, *lfsList = NULL;
struct linkedFeatures *lf;
struct slName *codes = NULL, *track=NULL, *scores=NULL;
struct codeBlast *bedList;
struct codeBlast *cb, *list=NULL;
struct sqlConnection *conn = hAllocConn(database);
struct sqlResult *sr;

char **temparray3;
char *temparray[32];
char *temparray2;
char **row;
char *tempstring;
int x;
int cutoff;
char cMode[64];

/*The most common names used to display method*/
char *codeNames[18] = {"within genus", "\t", "crenarchaea", "euryarchaea", "\t", "bacteria", 
		       "\t", "eukarya", "\t", "thermophile", "hyperthermophile","acidophile",
		       "alkaliphile", "halophile", "methanogen", "strict aerobe",
		       "strict anaerobe", "anaerobe or aerobe"}; int i;
safef(cMode, sizeof(cMode), "%s.scoreFilter", tg->tdb->track);
cutoff=cartUsualInt(cart, cMode,0 );
sr=hRangeQuery(conn, tg->table, chromName, winStart, winEnd, NULL, 0);

while ((row = sqlNextRow(sr)) != NULL)
    {
    cb = codeBlastLoad(row);
    slAddHead(&list, cb);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
slReverse(&list);
if(list == NULL)
    return;
for(cb = list; cb != NULL; cb = cb->next)
    {
    AllocVar(lfs);
    AllocVar(lf);
    lfs->name = cloneString(cb->name);
    lf = lfFromBed6(cb,0,1000);
    lf->score = cb->score;
    tempstring=cloneString(cb->code);
    
    chopString(tempstring, "," , temparray, ArraySize(temparray));
    if(sameWord(database, "pyrFur2"))
	{
	temparray3=(char**)calloc(19*8,sizeof(char**));
	for(x=0; x<19; x++)
	    {
	    temparray3[x]=(char *)calloc(256, sizeof(char*));
	    /* Fix to cloneString problem when both patricia and my track was showing at the same time */
	    if(temparray[x]!=NULL)
		{
		if(atoi(temparray[x])==1000)
		    temparray3[x]="1000";
		else if(atoi(temparray[x])==900)
		    temparray3[x]="900";
		else if(atoi(temparray[x])==800)
		    temparray3[x]="800";
		else if(atoi(temparray[x])==700)
		    temparray3[x]="700";
		else if(atoi(temparray[x])==600)
		    temparray3[x]="600";
		else if(atoi(temparray[x])==500)
		    temparray3[x]="500";
		else if(atoi(temparray[x])==400)
		    temparray3[x]="400";
		else if(atoi(temparray[x])==300)
		    temparray3[x]="300";
		else if(atoi(temparray[x])==200)
		    temparray3[x]="200";
		else if(atoi(temparray[x])==100)
		    temparray3[x]="100";
		else
		    temparray3[x]="0";
		}
	    }
	}
    else
	{
	temparray3=(char**)calloc(18*8,sizeof(char**));
	for(x=0; x<18; x++)
	    {
	    temparray3[x]=(char *)calloc(256, sizeof(char*));
	    /* Fix to cloneString problem when both patricia and my track was showing at the same time */
	    if(temparray[x]!=NULL)
		{
		if(atoi(temparray[x])==1000)
		    temparray3[x]="1000";
		else if(atoi(temparray[x])==900)
		    temparray3[x]="900";
		else if(atoi(temparray[x])==800)
		    temparray3[x]="800";
		else if(atoi(temparray[x])==700)
		    temparray3[x]="700";
		else if(atoi(temparray[x])==600)
		    temparray3[x]="600";
		else if(atoi(temparray[x])==500)
		    temparray3[x]="500";
		else if(atoi(temparray[x])==400)
		    temparray3[x]="400";
		else if(atoi(temparray[x])==300)
		    temparray3[x]="300";
		else if(atoi(temparray[x])==200)
		    temparray3[x]="200";
		else if(atoi(temparray[x])==100)
		    temparray3[x]="100";
		else
		    temparray3[x]="0";
		}
	    }
	}
    lf->extra = temparray3;
    lfs->start = lf->start;
    lfs->end = lf->end;
    lfs->features= lf;  
    slAddHead(&lfsList, lfs);
    }

tg->items=lfsList;
bedList=tg->items;
lfsList=NULL;

if(tg->limitedVis != tvDense)
    {
    originalLfs = tg->items;
    if(sameWord(database, "pyrFur2"))
	{
	for (i = 0; i < 19; i++)
            {
	    struct linkedFeatures *lfList = NULL;
	    AllocVar(codeLfs);
	    /*When doing abyssi displays differnt names at the begining*/
	    if(i == 0)
		codeLfs->name="within Pho";
	    else if (i==1)
		codeLfs->name="within Pab";
	    else if (i==2)
		codeLfs->name="\t";
	    else
		codeLfs->name = cloneString(codeNames[i-1]);
	    codeLfs->noLine = TRUE;
	    for (lfs = originalLfs; lfs != NULL; lfs = lfs->next)
            	{
		lf = lfsToLf(lfs);
		if(i>2)
            	    temparray2=((char**)(lfs->features->extra))[i-0];
		else temparray2=((char**)(lfs->features->extra))[i];
		if (i!=2 && i!=5 && i!=7 && i!=9 && atoi(temparray2)>-9997 && atoi(temparray2)!=0 && atoi(temparray2)>=cutoff)
                    {
		    lf->score=atoi(temparray2);
		    slAddHead(&lfList,lf);
		    }
                }
	    slReverse(&lfList);
	    codeLfs->features = lfList;   
	    slAddHead(&lfsList,codeLfs);
            }
	}
    else
	{   
	for (i = 0; i < 18; i++)
            {
	    struct linkedFeatures *lfList = NULL;
	    AllocVar(codeLfs);
	    codeLfs->name = cloneString(codeNames[i]);
	    codeLfs->noLine = TRUE;
	    for (lfs = originalLfs; lfs != NULL; lfs = lfs->next)
                {
		lf = lfsToLf(lfs);
		temparray2=((char**)(lfs->features->extra))[i];
		if (i!=1 && i!=4 && i!=6 && i!=8 && atoi(temparray2)>-9997 && atoi(temparray2)!=0 && atoi(temparray2)>=cutoff)
        	    {
		    lf->score=atoi(temparray2);
		    slAddHead(&lfList,lf);
	      	    }
                }
	    slReverse(&lfList);
	    codeLfs->features = lfList;   
	    slAddHead(&lfsList,codeLfs);
            }
	}
    freeLinkedFeaturesSeries(&originalLfs);
    slReverse(&lfsList);
    tg->items=lfsList;
    }
slFreeList(&track);
slFreeList(&scores);
slFreeList(&codes);
codeBlastFree(&list);
}
void oneChromInput(char *database, char *chrom, int chromSize, 	
	char *rangeTrack, char *expTrack, 
	struct hash *refLinkHash, struct hash *erHash, FILE *f)
/* Read in info for one chromosome. */
{
struct binKeeper *rangeBk = binKeeperNew(0, chromSize);
struct binKeeper *expBk = binKeeperNew(0, chromSize);
struct binKeeper *knownBk = binKeeperNew(0, chromSize);
struct bed *rangeList = NULL, *range;
struct bed *expList = NULL;
struct genePred *knownList = NULL;
struct rangeInfo *riList = NULL, *ri;
struct hash *riHash = hashNew(0); /* rangeInfo values. */
struct binElement *rangeBeList = NULL, *rangeBe, *beList = NULL, *be;

/* Load up data from database. */
rangeList = loadBed(database, chrom, rangeTrack, 12, rangeBk);
expList = loadBed(database, chrom, expTrack, 15, expBk);
knownList = loadGenePred(database, chrom, "refGene", knownBk);

/* Build range info basics. */
rangeBeList = binKeeperFindAll(rangeBk);
for (rangeBe = rangeBeList; rangeBe != NULL; rangeBe = rangeBe->next)
    {
    range = rangeBe->val;
    AllocVar(ri);
    slAddHead(&riList, ri);
    hashAddSaveName(riHash, range->name, ri, &ri->id);
    ri->range = range;
    ri->commonName = findCommonName(range, knownBk, refLinkHash);
    }
slReverse(&riList);

/* Mark split ones. */
beList = binKeeperFindAll(expBk);
for (be = beList; be != NULL; be = be->next)
    {
    struct bed *exp = be->val;
    struct binElement *subList = binKeeperFind(rangeBk, 
    	exp->chromStart, exp->chromEnd);
    if (slCount(subList) > 1)
        {
	struct binElement *sub;
	for (sub = subList; sub != NULL; sub = sub->next)
	    {
	    struct bed *range = sub->val;
	    struct rangeInfo *ri = hashMustFindVal(riHash, range->name);
	    ri->isSplit = TRUE;
	    }
	}
    slFreeList(&subList);
    }

/* Output the nice ones: not split and having some expression info. */
for (ri = riList; ri != NULL; ri = ri->next)
    {
    if (!ri->isSplit)
        {
	struct bed *range =  ri->range;
	beList = binKeeperFind(expBk, range->chromStart, range->chromEnd);
	if (beList != NULL)
	    outputAveraged(f, ri, erHash, beList);
	slFreeList(&beList);
	}
    }

/* Clean up time! */
freeHash(&riHash);
genePredFreeList(&knownList);
bedFree(&rangeList);
bedFree(&expList);
slFreeList(&rangeBeList);
slFreeList(&beList);
slFreeList(&riList);
binKeeperFree(&rangeBk);
binKeeperFree(&expBk);
binKeeperFree(&knownBk);
}
void update(struct g2cFile *old, struct g2cFile *up)
{
struct gene *oldGene, *upGene;
struct cdnaHit *oldHit, *upHit;
struct hash *geneHash;
struct hashEl *hel;
int sameHitCount = 0;
int newHitCount = 0;
int newGeneCount = 0;
int updatedGeneCount = 0;
int altCount = 0;
struct geneFamily smallFamily;
struct geneFamily *family;

printf("Updating %s with %s\n", old->name, up->name);

/* Hash the existing gene names for faster lookup. */
geneHash = newHash(12);
for (oldGene = old->geneList; oldGene != NULL; oldGene = oldGene->next)
    hashAdd(geneHash, oldGene->name, oldGene);

for (upGene = up->geneList; upGene != NULL; upGene = upGene->next)
    {
    boolean changedGene = FALSE;
    if (isAltSplicedName(upGene->name))
        {
        family = getAltFamily(geneHash, upGene->name);
        ++altCount;
        }
    else
        {
        hel = hashLookup(geneHash, upGene->name);
        if (hel != NULL)
            {
            smallFamily.gene = hel->val;
            smallFamily.next = NULL;
            family = &smallFamily;
            }
        else
            family = NULL;
        }

    /* Set corresponding gene in old file to NULL until we
     * need to find it. */
    oldGene = NULL;
    for (upHit = upGene->hitList; upHit != NULL; upHit = upHit->next)
        {
        if ((oldHit = findHitInFamily(family, upHit->name)) != NULL)
            ++sameHitCount;
        else
            {
            if (oldGene == NULL)
                {
                /* We haven't found corresponding gene yet.  First
                 * look for it in the family. */
                struct geneFamily *member;
                for (member = family; member != NULL; member = member->next)
                    {
                    if (strcmp(member->gene->name, upGene->name) == 0)
                        {
                        oldGene = member->gene;
                        break;
                        }
                    }
                /* The corresponding gene doesn't exist yet. We
                 * have to make it up and hang it on the genelist
                 * for the file, the hash list, and the family list. */
                if (oldGene == NULL)
                    {
                    oldGene = alloc(sizeof(*oldGene));
                    oldGene->name = upGene->name;
                    slAddHead(&old->geneList, oldGene);
                    hashAdd(geneHash, oldGene->name, oldGene);
                    member = alloc(sizeof(*member));
                    member->gene = oldGene;
                    slAddHead(&family, member);
                    ++newGeneCount;
                    }
                }
            oldHit = alloc(sizeof(*oldHit));
            oldHit->name = upHit->name;
            oldHit->hel = hel;
            slAddHead(&oldGene->hitList, oldHit);
            ++newHitCount;
            changedGene = TRUE;
            }
        }
    if (changedGene)
        ++updatedGeneCount;
    }
slSort(&old->geneList, cmpName);
printf("Updated %d genes (including %d alt spliced ones) with %d cdna hits (%d hits unchanged) %d new genes\n",
    updatedGeneCount, altCount, newHitCount, sameHitCount, newGeneCount);
}
struct g2cFile *loadG2cFile(char *fileName)
{
char lineBuf[1024*8];
int lineLen;
char *words[256*8];
int wordCount;
FILE *f;
int lineCount = 0;
struct g2cFile *gf = alloc(sizeof(*gf));
int hitCount = 0;
int cdnaCount = 0;
int geneCount = 0;

gf->name = fileName;
f = mustOpen(fileName, "r");
gf->cdnaHash = newHash(14);
while (fgets(lineBuf, sizeof(lineBuf), f) != NULL)
    {
    ++lineCount;
    lineLen = strlen(lineBuf);
    if (lineLen >= sizeof(lineBuf) - 1)
        {
        errAbort("%s\nLine %d of %s too long, can only handle %d chars\n",
            lineBuf, lineCount, fileName, sizeof(lineBuf)-1);
        }
    wordCount = chopString(lineBuf, whiteSpaceChopper, words, ArraySize(words));
    if (wordCount > 0)
        {
        struct gene *gene = alloc(sizeof(*gene));
        char *geneName = words[0];
        int i;
        
        /* Create new gene struct and put it on list. */
        gene->name = cloneString(geneName);
        slAddHead(&gf->geneList, gene);
        ++geneCount;

        /* Put all cdna hits on gene. */
        for (i=1; i<wordCount; ++i)
            {
            struct cdnaHit *hit;
            struct cdnaVal *cdnaVal;
            struct hashEl *hel;
            char *cdnaName = words[i];

            /* Get cdna, or if it's the first time we've seen it
             * make up a data structure for it and hang it on
             * hash list and cdna list. */
            if ((hel = hashLookup(gf->cdnaHash, cdnaName)) == NULL)
                {
                cdnaVal = alloc(sizeof(*cdnaVal));
                hel = hashAdd(gf->cdnaHash, cdnaName, cdnaVal);
                cdnaVal->name = hel->name;
                slAddHead(&gf->cdnaList, cdnaVal);
                ++cdnaCount;
                }
            else
                {
                cdnaVal = hel->val;
                }
            ++cdnaVal->useCount;

            /* Make up new cdna hit and hang it on the gene. */
            hit = alloc(sizeof(*hit));
            hit->hel = hel;
            hit->name = hel->name;
            slAddHead(&gene->hitList, hit);
            ++hitCount;
            }
        slReverse(&gene->hitList);
        }    
    }
slReverse(&gf->geneList);
slSort(&gf->geneList, cmpName);
slSort(&gf->cdnaList, cmpName);
fclose(f);
reportHashStats(gf->cdnaHash);
printf("Loaded %s.  %d genes %d cdnas %d hits\n", fileName,
    geneCount, cdnaCount, hitCount);
return gf;
}
Exemple #15
0
struct region *stitchedList(struct frag *fragList)
/* Stitch things together into list of regions. */
{
struct frag *frag, *nextFrag;
struct region *openList = NULL, *closedList = NULL, *region, *nextRegion, *stillOpenList = NULL;
struct psl *psl;
int distance = 0;

for (frag = fragList; frag != NULL; frag = nextFrag)
    {
    nextFrag = frag->next; 

    /* Clear hit flags. */
    for (region = openList; region != NULL; region = region->next)
	region->isHit = FALSE;

    /* Loop through psl's extending open regions where possible
     * and creating new open regions where not. */
    for (psl = frag->pslList; psl != NULL; psl = psl->next)
        {
	if ((region = fragConnects(openList, psl, distance)) == NULL)
	    {
	    AllocVar(region);
	    slAddHead(&openList, region);
	    region->qName = frag->chrom;
	    region->qStart = frag->start;
	    region->qEnd = frag->end;
	    region->tName = psl->tName;
	    region->tStart = psl->tStart;
	    region->tEnd = psl->tEnd;
	    region->strand[0] = psl->strand[0];
	    }
	region->match += psl->match;
	region->misMatch += psl->misMatch;
	region->repMatch += psl->repMatch;
	region->nCount += psl->nCount;
	region->qNumInsert += psl->qNumInsert;
	region->qBaseInsert += psl->qBaseInsert;
	region->tNumInsert += psl->tNumInsert;
	region->tBaseInsert += psl->tBaseInsert;
	region->isHit = TRUE;
	}

    /* Move regions not hit by this fragment to closed list. */
    for (region = openList; region != NULL; region = nextRegion)
        {
	nextRegion = region->next;
	if (region->isHit)
	    {
	    slAddHead(&stillOpenList, region);
	    }
	else
	    {
	    slAddHead(&closedList, region);
	    }
	}
    openList = stillOpenList;
    stillOpenList = NULL;
    if (nextFrag != NULL)
        distance = nextFrag->start - frag->start;
    }

/* Move remainder of open list to cloest list. */
for (region = openList; region != NULL; region = nextRegion)
    {
    nextRegion = region->next;
    slAddHead(&closedList, region);
    }
slReverse(&closedList);
return closedList;
}
Exemple #16
0
void chainFastSubsetOnT(struct chain *chain, struct cBlock *firstBlock,
	int subStart, int subEnd, struct chain **retSubChain,  struct chain **retChainToFree)
/* Get subchain as in chainSubsetOnT. Pass in initial block that may
 * be known from some index to speed things up. */
{
struct chain *sub = NULL;
struct cBlock *oldB, *b, *bList = NULL;
int qStart = BIGNUM, qEnd = -BIGNUM;
int tStart = BIGNUM, tEnd = -BIGNUM;

/* Check for easy case. */
if (subStart <= chain->tStart && subEnd >= chain->tEnd)
    {
    *retSubChain = chain;
    *retChainToFree = NULL;
    return;
    }
/* Build new block list and calculate bounds. */
for (oldB = firstBlock; oldB != NULL; oldB = oldB->next)
    {
    if (oldB->tStart >= subEnd)
        break;
    b = CloneVar(oldB);
    if (b->tStart < subStart)
        {
	b->qStart += subStart - b->tStart;
	b->tStart = subStart;
	}
    if (b->tEnd > subEnd)
        {
	b->qEnd -= b->tEnd - subEnd;
	b->tEnd = subEnd;
	}
    slAddHead(&bList, b);
    if (qStart > b->qStart)
        qStart = b->qStart;
    if (qEnd < b->qEnd)
        qEnd = b->qEnd;
    if (tStart > b->tStart)
        tStart = b->tStart;
    if (tEnd < b->tEnd)
        tEnd = b->tEnd;
    }
slReverse(&bList);

/* Make new chain based on old. */
if (bList != NULL)
    {
    double sizeRatio;
    AllocVar(sub);
    sub->blockList = bList;
    sub->qName = cloneString(chain->qName);
    sub->qSize = chain->qSize;
    sub->qStrand = chain->qStrand;
    sub->qStart = qStart;
    sub->qEnd = qEnd;
    sub->tName = cloneString(chain->tName);
    sub->tSize = chain->tSize;
    sub->tStart = tStart;
    sub->tEnd = tEnd;
    sub->id = chain->id;

    /* Fake new score. */
    sizeRatio = (sub->tEnd - sub->tStart);
    sizeRatio /= (chain->tEnd - chain->tStart);
    sub->score = sizeRatio * chain->score;
    }
*retSubChain = *retChainToFree = sub;
}
Exemple #17
0
void dupeFoo(char *pslName, char *faName, char *regionFile)
/* dupeFoo - Do some duplication analysis. */
{
struct lineFile *lf;
struct frag *fragList = NULL, *frag;
struct hash *fragHash = newHash(16);
struct psl *psl;
int fragCount=0,missCount=0,dupeCount=0,kSub=0,
   k1=0, k10=0,k100=0,k1000=0,k10000=0,diffChrom=0,distance;

/* Read in fragment list and put it in hash. */
fragList = readFragList(faName);
for (frag = fragList; frag != NULL; frag = frag->next)
    hashAdd(fragHash, frag->name, frag);

/* Read psl's and store under the fragment the belong to. */
lf = pslFileOpen(pslName);
while ((psl = pslNext(lf)) != NULL)
    {
    if ((frag = hashFindVal(fragHash, psl->qName)) == NULL)
        errAbort("Couldn't find %s in %s line %d of %s", 
		psl->qName, faName, lf->lineIx, lf->fileName);
    slAddHead(&frag->pslList, psl);
    }
lineFileClose(&lf);

/* Look through fragments and report missing and dupes. */
for (frag = fragList; frag != NULL; frag = frag->next)
    {
    ++fragCount;
    if ((psl = frag->pslList) == NULL)
        {
	++missCount;
	printf("missing %s\n", frag->name);
	}
    else
        {
	for (psl = frag->pslList; psl != NULL; psl = psl->next)
	    {
	    if (sameString(psl->tName, frag->chrom))
	        {
		distance = frag->start - psl->tStart;
		if (distance != 0)
		    {
		    if (distance < 0) distance = -distance;
		    if (distance >= 10000000) ++k10000;
		    else if (distance >= 1000000) ++k1000;
		    else if (distance >= 100000) ++k100;
		    else if (distance >= 10000) ++k10;
		    else if (distance >= 1000) ++k1;
		    else ++kSub;
		    }
		}
	    else
	        {
		++diffChrom;
		}
	    }
	}
    }
printPercent("Total", fragCount, fragCount);
printPercent("Unaligned", missCount, fragCount);
printPercent("Other Chrom", diffChrom, fragCount);
printPercent("Same Chrom >10M", k10000, fragCount);
printPercent("Same Chrom >1M", k1000, fragCount);
printPercent("Same Chrom >10Ok", k100, fragCount);
printPercent("Same Chrom >1Ok", k10, fragCount);
printPercent("Same Chrom >1k", k1, fragCount);
printPercent("Self-overlap", kSub, fragCount);
writeRegions(fragList, regionFile);
}
Exemple #18
0
void chainSubsetOnQ(struct chain *chain, int subStart, int subEnd, 
    struct chain **retSubChain,  struct chain **retChainToFree)
/* Get subchain of chain bounded by subStart-subEnd on 
 * query side.  Return result in *retSubChain.  In some
 * cases this may be the original chain, in which case
 * *retChainToFree is NULL.  When done call chainFree on
 * *retChainToFree.  The score and id fields are not really
 * properly filled in. */
{
struct chain *sub = NULL;
struct cBlock *oldB, *b, *bList = NULL;
int qStart = BIGNUM, qEnd = -BIGNUM;
int tStart = BIGNUM, tEnd = -BIGNUM;

/* Check for easy case. */
if (subStart <= chain->qStart && subEnd >= chain->qEnd)
    {
    *retSubChain = chain;
    *retChainToFree = NULL;
    return;
    }
/* Build new block list and calculate bounds. */
for (oldB = chain->blockList; oldB != NULL; oldB = oldB->next)
    {
    if (oldB->qEnd <= subStart)
        continue;
    if (oldB->qStart >= subEnd)
        break;
    b = CloneVar(oldB);
    if (b->qStart < subStart)
        {
	b->tStart += subStart - b->qStart;
	b->qStart = subStart;
	}
    if (b->qEnd > subEnd)
        {
	b->tEnd -= b->qEnd - subEnd;
	b->qEnd = subEnd;
	}
    slAddHead(&bList, b);
    if (tStart > b->tStart)
        tStart = b->tStart;
    if (tEnd < b->tEnd)
        tEnd = b->tEnd;
    if (qStart > b->qStart)
        qStart = b->qStart;
    if (qEnd < b->qEnd)
        qEnd = b->qEnd;
    }
slReverse(&bList);

/* Make new chain based on old. */
if (bList != NULL)
    {
    AllocVar(sub);
    sub->blockList = bList;
    sub->qName = cloneString(chain->qName);
    sub->qSize = chain->qSize;
    sub->qStrand = chain->qStrand;
    sub->qStart = qStart;
    sub->qEnd = qEnd;
    sub->tName = cloneString(chain->tName);
    sub->tSize = chain->tSize;
    sub->tStart = tStart;
    sub->tEnd = tEnd;
    sub->id = chain->id;
    }
*retSubChain = *retChainToFree = sub;
}
Exemple #19
0
static struct exonInfo *buildGIList(char *database, struct genePred *pred, 
    char *mafTable, unsigned options)
{
struct exonInfo *giList = NULL;
unsigned *exonStart = pred->exonStarts;
unsigned *lastStart = &exonStart[pred->exonCount];
unsigned *exonEnd = pred->exonEnds;
int *frames = pred->exonFrames;

boolean includeUtr = options & MAFGENE_INCLUDEUTR;

if (frames == NULL)
    {
    genePredAddExonFrames(pred);
    frames = pred->exonFrames;
    }

assert(frames != NULL);

int start = 0;


/* first skip 5' UTR if the includeUtr option is not set */
if (!includeUtr) 
    {
    for(; exonStart < lastStart; exonStart++, exonEnd++, frames++)
       {
       int size = *exonEnd - *exonStart;

       if (*exonStart + size > pred->cdsStart)
       	   break;
       }
    }

for(; exonStart < lastStart; exonStart++, exonEnd++, frames++)
    {
    struct exonInfo *gi;
    int thisStart = *exonStart;
    int thisEnd = *exonEnd;
    
    if (!includeUtr) 
        {
        if (thisStart > pred->cdsEnd)
	    break;

        if (thisStart < pred->cdsStart)
	    thisStart = pred->cdsStart;


        if (thisEnd > pred->cdsEnd)
	    thisEnd = pred->cdsEnd;
        
	}
    int thisSize = thisEnd - thisStart;
    if (!includeUtr)
        verbose(3, "in %d %d cds %d %d\n",*exonStart,*exonEnd, thisStart, thisEnd);
    AllocVar(gi);
    gi->frame = *frames;
    gi->name = pred->name;
    gi->ali = getAliForRange(database, mafTable, pred->chrom, 
	thisStart, thisEnd);
    gi->chromStart = thisStart;
    gi->chromEnd = thisEnd;
    gi->exonStart = start;
    gi->exonSize = thisSize;
    verbose(3, "exon size %d\n", thisSize);
    gi->strand = pred->strand[0];
    start += gi->exonSize;
    slAddHead(&giList, gi);
 
    if (!includeUtr) 
        {
        if (thisEnd == pred->cdsEnd)
	    break;
        }
    }
slReverse(&giList);

return giList;
}
Exemple #20
0
static struct rTree *rTreeFromChromRangeArray( struct lm *lm, int blockSize, int itemsPerSlot,
	void *itemArray, int itemSize, bits64 itemCount,  void *context,
	struct cirTreeRange (*fetchKey)(const void *va, void *context),
	bits64 (*fetchOffset)(const void *va, void *context), bits64 endFileOffset,
	int *retLevelCount)
{
char *items = itemArray;
struct rTree *el, *list=NULL, *tree = NULL;

/* Make first level above leaf. */
bits64 i;
bits64 nextOffset = (*fetchOffset)(items, context);
for (i=0; i<itemCount; i += itemsPerSlot)
    {
    /* Figure out if we are on final iteration through loop, and the
     * count of items in this iteration. */
    boolean finalIteration = FALSE;
    int oneSize = itemCount-i;
    if (oneSize > itemsPerSlot)
        oneSize = itemsPerSlot;
    else
        finalIteration = TRUE;

    /* Allocate element and put on list. */
    lmAllocVar(lm, el);
    slAddHead(&list, el);

    /* Fill out most of element from first item in element. */
    char *startItem = items + itemSize * i;
    struct cirTreeRange key = (*fetchKey)(startItem, context);
    el->startChromIx = el->endChromIx = key.chromIx;
    el->startBase = key.start;
    el->endBase = key.end;
    el->startFileOffset = nextOffset;

    /* Figure out end of element from offset of next element (or file size
     * for final element.) */
    if (finalIteration)
	nextOffset = endFileOffset;
    else
        {
	char *endItem = startItem + itemSize*oneSize;
        nextOffset = (*fetchOffset)(endItem, context);
	}
    el->endFileOffset = nextOffset;

    /* Expand area spanned to include all items in block. */
    int j;
    for (j=1; j<oneSize; ++j)
        {
	void *item = items + itemSize*(i+j);
	key = (*fetchKey)(item, context);
	if (key.chromIx < el->startChromIx)
	    {
	    el->startChromIx = key.chromIx;
	    el->startBase = key.start;
	    }
	else if (key.chromIx == el->startChromIx)
	    {
	    if (key.start < el->startBase)
	        el->startBase = key.start;
	    }
	if (key.chromIx > el->endChromIx)
	    {
	    el->endChromIx = key.chromIx;
	    el->endBase = key.end;
	    }
	else if (key.chromIx == el->endChromIx)
	    {
	    if (key.end > el->endBase)
	        el->endBase = key.end;
	    }
	}
    }
slReverse(&list);
verbose(2, "Made %d primary index nodes out of %llu items\n", slCount(list), itemCount);

/* Now iterate through making more and more condensed versions until have just one. */
int levelCount = 1;
tree = list;
while (tree->next != NULL || levelCount < 2)
    {
    list = NULL;
    int slotsUsed = blockSize;
    struct rTree *parent = NULL, *next;
    for (el = tree; el != NULL; el = next)
        {
	next = el->next;
	if (slotsUsed >= blockSize)
	    {
	    slotsUsed = 1;
	    lmAllocVar(lm, parent);
	    parent = lmCloneMem(lm, el, sizeof(*el));
	    parent->children = el;
	    el->parent = parent;
	    el->next = NULL;
	    slAddHead(&list, parent);
	    }
	else
	    {
	    ++slotsUsed;
	    slAddHead(&parent->children, el);
	    el->parent = parent;
	    if (el->startChromIx < parent->startChromIx)
		{
	        parent->startChromIx = el->startChromIx;
		parent->startBase = el->startBase;
		}
	    else if (el->startChromIx == parent->startChromIx)
	        {
		if (el->startBase < parent->startBase)
		    parent->startBase = el->startBase;
		}
	    if (el->endChromIx > parent->endChromIx)
		{
	        parent->endChromIx = el->endChromIx;
		parent->endBase = el->endBase;
		}
	    else if (el->endChromIx == parent->endChromIx)
	        {
		if (el->endBase > parent->endBase)
		    parent->endBase = el->endBase;
		}
	    }
	}

    slReverse(&list);
    for (el = list; el != NULL; el = el->next)
        slReverse(&el->children);
    tree = list;
    levelCount += 1;
    }
*retLevelCount = levelCount;
return tree;
}
struct tagStorm *tagStormFromFile(char *fileName)
/* Load up all tags from file.  */
{
int depth = 0, maxDepth = 32;
int indentStack[maxDepth];
indentStack[0] = 0;

/* Open up file first thing.  Abort if there's a problem here. */
struct lineFile *lf = lineFileOpen(fileName, TRUE);

/* Set up new empty tag storm and get local pointer to memory pool. */
struct tagStorm *tagStorm = tagStormNew(fileName);
struct lm *lm = tagStorm->lm;

struct tagStanza *stanza, *parent = NULL, *lastStanza = NULL;
int currentIndent = 0;
int stanzaCount = 0;
int tagCount = 0;
while (raSkipLeadingEmptyLines(lf, NULL))
    {
    ++stanzaCount;
    char *tag, *val;
    int stanzaIndent, tagIndent;
    lmAllocVar(lm, stanza);
    struct slPair *pairList = NULL, *pair;
    while (raNextTagValWithIndent(lf, &tag, &val, NULL, &tagIndent))
        {
	lmAllocVar(lm, pair);
	pair->name = lmCloneString(lm, tag);
	pair->val = lmCloneString(lm, val);
	
	if (pairList == NULL)	/* If this is first tag of a new stanza check indentation
	                         * and put stanza in appropriate level of hierarchy */
	    {
	    if (tagIndent != currentIndent)
		{
		stanzaIndent = tagIndent;
		if (stanzaIndent > currentIndent)
		    {
		    if (++depth >= maxDepth)
		        errAbort("Tags nested too deep line %d of %s. Max nesting is %d",
			    lf->lineIx, lf->fileName, maxDepth);
		    indentStack[depth] = stanzaIndent;
		    if (lastStanza == NULL)
			errAbort("Initial stanza needs to be non-indented line %d of %s", 
			    lf->lineIx, lf->fileName);
		    parent = lastStanza;
		    }
		else  /* going up */
		    {
		    /* Find stanza in parent chain at same level of indentation.  This
		     * will be an older sibling */
		    struct tagStanza *olderSibling;
		    for (olderSibling = parent; olderSibling != NULL; 
			olderSibling = olderSibling->parent)
		        {
			--depth;
			if (indentStack[depth] == stanzaIndent)
			    break;
			}
		    if (olderSibling == NULL)
		        {
			warn("Indentation inconsistent line %d of %s.", lf->lineIx, lf->fileName);
			warn("If you are using tabs, check your tab stop is set to 8.");
			warn("Otherwise check that when you are reducing indentation in a stanza");
			warn("that it is the same as the previous stanza at the same level.");
			noWarnAbort();
			}
		    parent = olderSibling->parent;
		    }
		currentIndent = tagIndent;
		}
	    if (parent == NULL)
	        slAddHead(&tagStorm->forest, stanza);
	    else
		slAddHead(&parent->children, stanza);
	    stanza->parent = parent;
	    pairList = pair;
	    lastStanza = stanza;
	    }
	else
	    {
	    if (tagIndent != currentIndent)
	         errAbort("Tags in stanza inconsistently indented line %d of %s",
		    lf->lineIx, lf->fileName);
	    slAddHead(&pairList, pair);
	    }
	++tagCount;
	}
    slReverse(&pairList);
    stanza->tagList = pairList;
    }
lineFileClose(&lf);
rReverseStanzaList(&tagStorm->forest);
return tagStorm;
}
Exemple #22
0
static void rFindOverlappingBlocks(struct cirTreeFile *crt, int level, bits64 indexFileOffset,
	bits32 chromIx, bits32 start, bits32 end, struct fileOffsetSize **retList)
/* Recursively find blocks with data. */
{
struct udcFile *udc = crt->udc;

/* Seek to start of block. */
udcSeek(udc, indexFileOffset);

/* Read block header. */
UBYTE isLeaf;
UBYTE reserved;
bits16 i, childCount;
udcMustReadOne(udc, isLeaf);
udcMustReadOne(udc, reserved);
boolean isSwapped = crt->isSwapped;
childCount = udcReadBits16(udc, isSwapped);

verbose(3, "rFindOverlappingBlocks %llu %u:%u-%u.  childCount %d. isLeaf %d\n", indexFileOffset, chromIx, start, end, (int)childCount, (int)isLeaf);

if (isLeaf)
    {
    /* Loop through node adding overlapping leaves to block list. */
    for (i=0; i<childCount; ++i)
        {
	bits32 startChromIx = udcReadBits32(udc, isSwapped);
	bits32 startBase = udcReadBits32(udc, isSwapped);
	bits32 endChromIx = udcReadBits32(udc, isSwapped);
	bits32 endBase = udcReadBits32(udc, isSwapped);
	bits64 offset = udcReadBits64(udc, isSwapped);
	bits64 size = udcReadBits64(udc, isSwapped);
	if (cirTreeOverlaps(chromIx, start, end, startChromIx, startBase, endChromIx, endBase))
	    {
	    struct fileOffsetSize *block;
	    AllocVar(block);
	    block->offset = offset;
	    block->size = size;
	    slAddHead(retList, block);
	    }
	}
    }
else
    {
    /* Read node into arrays. */
    bits32 startChromIx[childCount], startBase[childCount];
    bits32 endChromIx[childCount], endBase[childCount];
    bits64 offset[childCount];
    for (i=0; i<childCount; ++i)
        {
	startChromIx[i] = udcReadBits32(udc, isSwapped);
	startBase[i] = udcReadBits32(udc, isSwapped);
	endChromIx[i] = udcReadBits32(udc, isSwapped);
	endBase[i] = udcReadBits32(udc, isSwapped);
	offset[i] = udcReadBits64(udc, isSwapped);
	}

    /* Recurse into child nodes that we overlap. */
    for (i=0; i<childCount; ++i)
	{
	if (cirTreeOverlaps(chromIx, start, end, startChromIx[i], startBase[i], 
		endChromIx[i], endBase[i]))
	    {
	    rFindOverlappingBlocks(crt, level+1, offset[i], chromIx, start, end, retList);
	    }
	}
    }
}
static struct mafAli *mafFromBed12(char *database, char *track,
    struct bed *bed, struct slName *orgList)
/* Construct a maf out of exons in bed. */
{
/* Loop through all block in bed, collecting a list of mafs, one
 * for each block.  While we're at make a hash of all species seen. */
struct hash *speciesHash = hashNew(0);
struct mafAli *mafList = NULL, *maf, *bigMaf;
struct mafComp *comp, *bigComp;
int totalTextSize = 0;
int i;
for (i=0; i<bed->blockCount; ++i)
    {
    int start = bed->chromStart + bed->chromStarts[i];
    int end = start + bed->blockSizes[i];
    if (thickOnly)
        {
	start = max(start, bed->thickStart);
	end = min(end, bed->thickEnd);
	}
    if (start < end)
        {
	maf = hgMafFrag(database, track, bed->chrom, start, end, '+',
	   database, NULL);
	slAddHead(&mafList, maf);
	for (comp = maf->components; comp != NULL; comp = comp->next)
	    hashStore(speciesHash, comp->src);
	totalTextSize += maf->textSize; 
	}
    }
slReverse(&mafList);

/* Add species in order list too */
struct slName *org;
for (org = orgList; org != NULL; org = org->next)
    hashStore(speciesHash, org->name);

/* Allocate memory for return maf that contains all blocks concatenated together. 
 * Also fill in components with any species seen at all. */
AllocVar(bigMaf);
bigMaf->textSize = totalTextSize;
struct hashCookie it = hashFirst(speciesHash);
struct hashEl *hel;
while ((hel = hashNext(&it)) != NULL)
    {
    AllocVar(bigComp);
    bigComp->src = cloneString(hel->name);
    bigComp->text = needLargeMem(totalTextSize + 1);
    memset(bigComp->text, '.', totalTextSize);
    bigComp->text[totalTextSize] = 0;
    bigComp->strand = '+';
    bigComp->srcSize = totalTextSize;	/* It's safe if a bit of a lie. */
    hel->val = bigComp;
    slAddHead(&bigMaf->components, bigComp);
    }

/* Loop through maf list copying in data. */
int textOffset = 0;
for (maf = mafList; maf != NULL; maf = maf->next)
    {
    for (comp = maf->components; comp != NULL; comp = comp->next)
        {
	bigComp = hashMustFindVal(speciesHash, comp->src);
	memcpy(bigComp->text + textOffset, comp->text, maf->textSize);
	bigComp->size += comp->size;
	}
    textOffset += maf->textSize;
    }

/* Cope with strand of darkness. */
if (bed->strand[0] == '-')
    {
    for (comp = bigMaf->components; comp != NULL; comp = comp->next)
	reverseComplement(comp->text, bigMaf->textSize);
    }

/* If got an order list then reorder components according to it. */
if (orgList != NULL)
    {
    struct mafComp *newList = NULL;
    for (org = orgList; org != NULL; org = org->next)
        {
	comp = hashMustFindVal(speciesHash, org->name);
	slAddHead(&newList, comp);
	}
    slReverse(&newList);
    bigMaf->components = newList;
    }

/* Rename our own component to bed name */
comp = hashMustFindVal(speciesHash, database);
freeMem(comp->src);
comp->src = cloneString(bed->name);


/* Clean up and go home. */
hashFree(&speciesHash);
mafAliFreeList(&mafList);
return bigMaf;
}
void freeCal(struct cdnaAliList *cal)
/* Free up one cal. */
{
slAddHead(&calFreeList, cal);
}
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable)
/* hgExpDistance - Create table that measures expression distance between pairs. */
{
struct sqlConnection *conn = sqlConnect(database);
struct sqlResult *sr;
char query[256];
char **row;
struct hash *expHash = hashNew(16);
int realExpCount = -1;
struct microData *gene;
int rc, t;
pthread_t *threads = NULL;
pthread_attr_t attr;
int *threadID = NULL;
void *status;
char *tempDir = ".";
long time1, time2;

time1 = clock1000();

/* Get list/hash of all items with expression values. */
sqlSafef(query, sizeof(query), "select name,expCount,expScores from %s", posTable);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    char *name = row[0];
    if (!hashLookup(expHash, name))
	{
	int expCount = sqlUnsigned(row[1]);
	int commaCount;
	float *expScores = NULL;

	sqlFloatDynamicArray(row[2], &expScores, &commaCount);
	if (expCount != commaCount)
	    errAbort("expCount and expScores don't match on %s in %s", name, posTable);
	if (realExpCount == -1)
	    realExpCount = expCount;
	if (expCount != realExpCount)
	    errAbort("In %s some rows have %d experiments others %d", 
	    	name, expCount, realExpCount);
	AllocVar(gene);
	gene->expCount = expCount;
	gene->expScores = expScores;
	hashAddSaveName(expHash, name, gene, &gene->name);
	slAddHead(&geneList, gene);
	}
    }
sqlFreeResult(&sr);
conn = sqlConnect(database);
slReverse(&geneList);
geneCount = slCount(geneList);
printf("Have %d elements in %s\n", geneCount, posTable);

weights = getWeights(realExpCount);

if (optionExists("lookup"))
    geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList);
geneCount = slCount(geneList);
printf("Got %d unique elements in %s\n", geneCount, posTable);

sqlDisconnect(&conn);	/* Disconnect because next step is slow. */


if (geneCount < 1)
    errAbort("ERROR: unique gene count less than one ?");

time2 = clock1000();
verbose(2, "records read time: %.2f seconds\n", (time2 - time1) / 1000.0);

f = hgCreateTabFile(tempDir, outTable);

/* instantiate threads */
AllocArray( threadID, numThreads );
AllocArray( threads, numThreads );
pthread_attr_init( &attr );
pthread_mutex_init( &mutexfilehandle, NULL );
pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );

for (t = 0; t < numThreads; t++) {
	threadID[t] = t;
	rc = pthread_create( &threads[t], &attr, computeDistance, 
						(void *) &threadID[t]);
	if (rc)
		errAbort("ERROR: in pthread_create() %d\n", rc );
} 

/* synchronize all threads */
for (t = 0; t < numThreads; t++) {
	rc = pthread_join( threads[t], &status);
	if (rc)
		errAbort("ERROR: in pthread_join() %d\n", rc );
} 

printf("Made %s.tab\n", outTable);

slFreeList( &geneList );

pthread_mutex_destroy( &mutexfilehandle );
pthread_attr_destroy( &attr );

time1 = time2;
time2 = clock1000();
verbose(2, "distance computation time: %.2f seconds\n", (time2 - time1) / 1000.0);

/* Create and load table. */
conn = sqlConnect(database);
distanceTableCreate(conn, outTable);
hgLoadTabFile(conn, tempDir, outTable, &f);
printf("Loaded %s\n", outTable);

/* Add indices. */
sqlSafef(query, sizeof(query), "alter table %s add index(query(12))", outTable);
sqlUpdate(conn, query);
printf("Made query index\n");
if (optionExists("targetIndex"))
    {
    sqlSafef(query, sizeof(query), "alter table %s add index(target(12))", outTable);
    sqlUpdate(conn, query);
    printf("Made target index\n");
    }

hgRemoveTabFile(tempDir, outTable);

time1 = time2;
time2 = clock1000();
verbose(2, "table create/load/index time: %.2f seconds\n", (time2 - time1) / 1000.0);

}
void writeClump(struct blockPos *first, struct blockPos *last,
    char *cdnaName, char strand, char dir, DNA *cdna, int cdnaSize, struct cdnaAliList **pList)
/* Write hitOut one clump. */
{
struct dnaSeq *seq = first->seq;
char *bacName = seq->name;
int seqIx = first->seqIx;
int start = first->offset;
int end = last->offset+last->size;
struct ffAli *ff, *left, *right;
int extraAtEnds = minMatch*patSize;
struct cdnaAliList *cal;

start -= extraAtEnds;
if (start < 0)
    start = 0;
end += extraAtEnds;
if (end >seq->size)
    end = seq->size;

++ffSubmitted;
if (dumpMe)
	fprintf(dumpOut, "%s %d %s %d-%d\n", cdnaName, cdnaSize, bacName, start, end);
ff = ffFind(cdna, cdna+cdnaSize, seq->dna+start, seq->dna+end, ffCdna);
if (dumpMe)
    {
    fprintf(dumpOut, "ffFind = %x\n", ff);
    }
if (ff != NULL)
    {
    int ffScore = ffScoreCdna(ff);
    ++ffAccepted;
    if (dumpMe) fprintf(dumpOut, "ffScore = %d\n", ffScore);
    if (ffScore >= 22)
        {
        int hiStart, hiEnd;
        int oldStart, oldEnd;

        ffFindEnds(ff, &left, &right);
        hiStart = oldStart = left->nStart - cdna;
        hiEnd = oldEnd = right->nEnd - cdna;
        ++ffOkScore;

        if (solidMatch(&left, &right, cdna, &hiStart, &hiEnd))
            {
            int solidSize = hiEnd - hiStart;
            int solidScore;
            int seqStart, seqEnd;
            double cookedScore;

            solidScore = scoreCdna(left, right);
            cookedScore = (double)solidScore/solidSize;
            if (cookedScore > 0.25)
                {
                ++ffSolidMatch;

                seqStart = left->hStart - seq->dna;
                seqEnd = right->hEnd - seq->dna;
                fprintf(hitOut, "%3.1f%% %c %s:%d-%d (old %d-%d) of %d at %s.%d:%d-%d\n", 
                    100.0 * cookedScore, strand, cdnaName, 
                    hiStart, hiEnd, oldStart, oldEnd, cdnaSize,
                    bacName, seqIx, seqStart, seqEnd);

                if (dumpMe)
                    {
                    fprintf(bigHtmlFile, "<A NAME=i%d>", htmlIx);
                    fprintf(bigHtmlFile, "<H2>%4.1f%% %4d %4d %c %s:%d-%d of %d at %s.%d:%d-%d</H2><BR>", 
                        100.0 * cookedScore, solidScore, ffScore, strand, cdnaName, 
                        hiStart, hiEnd, cdnaSize,
                        bacName, seqIx, seqStart, seqEnd);
                    fprintf(bigHtmlFile, "</A>");
                    ffShAli(bigHtmlFile, ff, cdnaName, cdna, cdnaSize, 0,
                        bacName, seq->dna+start, end-start, start, FALSE);
                    fprintf(bigHtmlFile, "<BR><BR>\n");

                    fprintf(littleHtmlFile, "<A HREF=\"patAli.html#i%d\">", htmlIx);
                    fprintf(littleHtmlFile, "%4.1f%% %4d %4d %c %s:%d-%d of %d at %s.%d:%d-%d\n", 
                        100.0 * cookedScore, solidScore, ffScore, strand, cdnaName, 
                        hiStart, hiEnd, cdnaSize,
                        bacName, seqIx, seqStart, seqEnd);
                    fprintf(littleHtmlFile, "</A><BR>");
                    ++htmlIx;
                    }

                cal = newCal(first->bacIx, seqIx, hiStart, hiEnd, cdnaSize, strand, dir, cookedScore);
                slAddHead(pList, cal);
                }
            }
        }
    ffFreeAli(&ff);
    }
}
Exemple #27
0
int main(int argc, char *argv[])
{
struct hash *bacHash;
char line[1024];
int lineCount;
char *words[256];
int wordCount;
int fileIx;
char *fileName;
FILE *f;

if (argc < 2)
    usage();
bacHash = newHash(16);

for (fileIx = 1; fileIx < argc; ++fileIx)
    {
    fileName = argv[fileIx];
    uglyf("Processing %s\n", fileName);
    f = mustOpen(fileName, "r");
    lineCount = 0;
    while (fgets(line, sizeof(line), f))
        {
        ++lineCount;
        wordCount = chopLine(line, words);
        if (wordCount == ArraySize(words))
            errAbort("Too many words line %d of %s\n", lineCount, fileName);
        if (wordCount != 0)
            {
            char *bacName;
            int cIx;
            struct contigTrack *ctList = NULL, *ct;
            struct bacTrack *bt;
            struct hashEl *hel;

            /* Check line syntax and parse it. */
            if (!sameString(words[1], "glues"))
                errAbort("Bad format line %d of %s\n", lineCount, fileName);
            bacName = words[2];
            for (cIx = 4; cIx < wordCount; cIx += 5)
                {
                char *parts[3];
                int partCount;

                AllocVar(ct);
                ct->ix = atoi(words[cIx]);
                ct->strand = words[cIx+1][0];
                ct->dir = words[cIx+2][0];
                partCount = chopString(words[cIx+3], "(-)", parts, ArraySize(parts));
                if (partCount != 2)
                    errAbort("Bad format line %d of %s\n", lineCount, fileName);
                ct->start = atoi(parts[0]);
                ct->end = atoi(parts[1]);
                ct->cookedScore = atof(words[cIx+4]);
                slAddHead(&ctList, ct);                
                }
            slSort(&ctList, cmpContigTrack);
        
            /* Lookup bacTrack and make it if new. */
            hel = hashLookup(bacHash, bacName);
            if (hel == NULL)
                {
                AllocVar(bt);
                hel = hashAdd(bacHash, bacName, bt);
                bt->name = hel->name;
                slAddHead(&bacList, bt);
                }
            else
                {
                bt = hel->val;
                }
            
            /* Process pairs into bacTrack. */
            addPairs(bt, ctList);
            slFreeList(&ctList);
            }
        }
    fclose(f);
    }
slSort(&bacList, cmpBacTrack);

printStats();
return 0;
}
void writeMergers(struct cdnaAliList *calList, char *cdnaName, char *bacNames[])
/* Write out any mergers indicated by this cdna. This destroys calList. */
{
struct cdnaAliList *startBac, *endBac, *cal, *prevCal, *nextCal;
int bacCount;
int bacIx;
    
    {
    if (sameString(cdnaName, "R08304_AND_R08305"))
        {
        uglyf("Got you %s\n", cdnaName);
        }
    }

slSort(&calList, cmpCal);
for (startBac = calList; startBac != NULL; startBac = endBac)
    {
    /* Scan until find a cal that isn't pointing into the same BAC. */
    bacCount = 1;
    bacIx = startBac->bacIx;
    prevCal = startBac;
    for (cal =  startBac->next; cal != NULL; cal = cal->next)
        {
        if (cal->bacIx != bacIx)
            {
            prevCal->next = NULL;
            break;
            }
        ++bacCount;
        prevCal = cal;
        }
    endBac = cal;
    if (bacCount > 1)
        {
        while (startBac != NULL)
            {
            struct cdnaAliList *clumpList = NULL, *leftoverList = NULL;
            for (cal = startBac; cal != NULL; cal = nextCal)
                {
                nextCal = cal->next;
                if (noMajorOverlap(cal, clumpList))
                    {
                    slAddHead(&clumpList, cal);
                    }
                else
                    {
                    slAddHead(&leftoverList, cal);
                    }
                }
            slReverse(&clumpList);
            slReverse(&leftoverList);
            if (slCount(clumpList) > 1)
                {
                char lastStrand = 0;
                boolean switchedStrand = FALSE;
                if (!allSameContig(clumpList))
                    {
                    fprintf(mergerOut, "%s glues %s contigs", cdnaName, bacNames[bacIx]);
                    lastStrand = clumpList->strand;
                    for (cal = clumpList; cal != NULL; cal = cal->next)
                        {
                        if (cal->strand != lastStrand)
                            switchedStrand = TRUE;
                        fprintf(mergerOut, " %d %c %c' (%d-%d) %3.1f%%", cal->seqIx, cal->strand, 
                            cal->dir,
                            cal->start, cal->end, 100.0*cal->cookedScore);
                        }
                    fprintf(mergerOut, "\n");
                    }
                }
            freeCalList(&clumpList);
            startBac = leftoverList;
            }        
        }
    else
        {
        freeCalList(&startBac);
        }
    }
}
Exemple #29
0
struct bigBedInterval *bigBedIntervalQuery(struct bbiFile *bbi, char *chrom,
	bits32 start, bits32 end, int maxItems, struct lm *lm)
/* Get data for interval.  Return list allocated out of lm.  Set maxItems to maximum
 * number of items to return, or to 0 for all items. */
{
struct bigBedInterval *el, *list = NULL;
int itemCount = 0;
bbiAttachUnzoomedCir(bbi);
bits32 chromId;
struct fileOffsetSize *blockList = bbiOverlappingBlocks(bbi, bbi->unzoomedCir,
	chrom, start, end, &chromId);
struct fileOffsetSize *block, *beforeGap, *afterGap;
struct udcFile *udc = bbi->udc;
boolean isSwapped = bbi->isSwapped;
struct dyString *dy = dyStringNew(32);

/* Set up for uncompression optionally. */
char *uncompressBuf = NULL;
if (bbi->uncompressBufSize > 0)
    uncompressBuf = needLargeMem(bbi->uncompressBufSize);

for (block = blockList; block != NULL; )
    {
    /* Find contigious blocks and read them into mergedBuf. */
    fileOffsetSizeFindGap(block, &beforeGap, &afterGap);
    bits64 mergedOffset = block->offset;
    bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset;
    udcSeek(udc, mergedOffset);
    char *mergedBuf = needLargeMem(mergedSize);
    udcMustRead(udc, mergedBuf, mergedSize);
    char *blockBuf = mergedBuf;

    /* Loop through individual blocks within merged section. */
    for (;block != afterGap; block = block->next)
        {
	/* Uncompress if necessary. */
	char *blockPt, *blockEnd;
	if (uncompressBuf)
	    {
	    blockPt = uncompressBuf;
	    int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize);
	    blockEnd = blockPt + uncSize;
	    }
	else
	    {
	    blockPt = blockBuf;
	    blockEnd = blockPt + block->size;
	    }

	while (blockPt < blockEnd)
	    {
	    /* Read next record into local variables. */
	    bits32 chr = memReadBits32(&blockPt, isSwapped);	// Read and discard chromId
	    bits32 s = memReadBits32(&blockPt, isSwapped);
	    bits32 e = memReadBits32(&blockPt, isSwapped);
	    int c;
	    dyStringClear(dy);
	    while ((c = *blockPt++) >= 0)
		{
		if (c == 0)
		    break;
		dyStringAppendC(dy, c);
		}

	    /* If we're actually in range then copy it into a new  element and add to list. */
	    if (chr == chromId && rangeIntersection(s, e, start, end) > 0)
		{
		++itemCount;
		if (maxItems > 0 && itemCount > maxItems)
		    break;

		lmAllocVar(lm, el);
		el->start = s;
		el->end = e;
		if (dy->stringSize > 0)
		    el->rest = lmCloneString(lm, dy->string);
		slAddHead(&list, el);
		}
	    }
	if (maxItems > 0 && itemCount > maxItems)
	    break;
	blockBuf += block->size;
        }
    if (maxItems > 0 && itemCount > maxItems)
        break;
    freez(&mergedBuf);
    }
freeMem(uncompressBuf);
dyStringFree(&dy);
slFreeList(&blockList);
slReverse(&list);
return list;
}
Exemple #30
0
int addBamPaired(const bam1_t *bam, void *data)
#endif
/* bam_fetch() calls this on each bam alignment retrieved.  Translate each bam
 * into a linkedFeaturesSeries item, and either store it until we find its mate
 * or add it to tg->items. */
{
const bam1_core_t *core = &bam->core;
struct bamTrackData *btd = (struct bamTrackData *)data;
if (! passesFilters(bam, btd))
    return 0;
struct linkedFeatures *lf = bamToLf(bam, data);
struct track *tg = btd->tg;
if (!(core->flag & BAM_FPAIRED) || (core->flag & BAM_FMUNMAP))
    {
    if (lf->start < winEnd && lf->end > winStart)
	slAddHead(&(tg->items), lfsFromLf(lf));
    if ((core->flag & BAM_FMUNMAP) && sameString(btd->colorMode, BAM_COLOR_MODE_GRAY) &&
	sameString(btd->grayMode, BAM_GRAY_MODE_UNPAIRED))
	// not properly paired: make it a lighter shade.
	lf->grayIx -= 4;
    }
else
    {
    struct linkedFeatures *lfMate = (struct linkedFeatures *)hashFindVal(btd->pairHash, lf->name);
    if (lfMate == NULL)
	{
	if (core->flag & BAM_FPROPER_PAIR)
	    {
	    // If we know that this is properly paired, but don't have the mate,
	    // make a bogus item off the edge of the window so that if we don't
	    // encounter its mate later, we can at least draw an arrow off the
	    // edge of the window.
	    struct linkedFeatures *stub;
	    // don't link to pair that's not on the same chrom
	    if ((core->mpos < 0) || (core->tid != core->mtid))
		{
		int offscreen;
		if (lf->orientation > 0)
		    offscreen = max(winEnd, lf->end) + 10;
		else
		    offscreen = min(winStart, lf->start) - 10;
		if (offscreen < 0) offscreen = 0;
		stub = lfStub(offscreen, -lf->orientation);
		}
	    else
		{
		stub = lfStub(core->mpos, -lf->orientation);
		}
	    lf->next = stub;
	    }
	else if (sameString(btd->colorMode, BAM_COLOR_MODE_GRAY) &&
		 sameString(btd->grayMode, BAM_GRAY_MODE_UNPAIRED))
	    // not properly paired: make it a lighter shade.
	    lf->grayIx -= 4;
	hashAdd(btd->pairHash, lf->name, lf);
	}
    else
	{
	lfMate->next = lf;
	if (min(lfMate->start, lf->start) < winEnd && max(lfMate->end, lf->end) > winStart)
	    slAddHead(&(tg->items), lfsFromLf(lfMate));
	hashRemove(btd->pairHash, lf->name);
	}
    }
return 0;
}