void bamLoadItemsCore(struct track *tg, boolean isPaired) /* Load BAM data into tg->items item list, unless zoomed out so far * that the data would just end up in dense mode and be super-slow. */ { /* protect against temporary network error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { struct hash *pairHash = isPaired ? hashNew(18) : NULL; int minAliQual = atoi(cartOrTdbString(cart, tg->tdb, BAM_MIN_ALI_QUAL, BAM_MIN_ALI_QUAL_DEFAULT)); char *colorMode = cartOrTdbString(cart, tg->tdb, BAM_COLOR_MODE, BAM_COLOR_MODE_DEFAULT); char *grayMode = cartOrTdbString(cart, tg->tdb, BAM_GRAY_MODE, BAM_GRAY_MODE_DEFAULT); char *userTag = cartOrTdbString(cart, tg->tdb, BAM_COLOR_TAG, BAM_COLOR_TAG_DEFAULT); int aliQualShadeMin = 0, aliQualShadeMax = 99, baseQualShadeMin = 0, baseQualShadeMax = 40; parseIntRangeSetting(tg->tdb, "aliQualRange", &aliQualShadeMin, &aliQualShadeMax); parseIntRangeSetting(tg->tdb, "baseQualRange", &baseQualShadeMin, &baseQualShadeMax); struct bamTrackData btd = {tg, pairHash, minAliQual, colorMode, grayMode, userTag, aliQualShadeMin, aliQualShadeMax, baseQualShadeMin, baseQualShadeMax}; char *fileName = trackDbSetting(tg->tdb, "bigDataUrl"); if (fileName == NULL) { if (tg->customPt) { errAbort("bamLoadItemsCore: can't find bigDataUrl for custom track %s", tg->track); } else { struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb); fileName = bamFileNameFromTable(conn, tg->table, chromName); hFreeConn(&conn); } } char *fileName2 = hReplaceGbdb(fileName); char posForBam[512]; safef(posForBam, sizeof(posForBam), "%s:%d-%d", chromName, winStart, winEnd); char *cacheDir = cfgOption("cramRef"); char *refUrl = trackDbSetting(tg->tdb, "refUrl"); if (!isPaired) bamFetchPlus(fileName2, posForBam, addBam, &btd, NULL, refUrl, cacheDir); else { char *setting = trackDbSettingClosestToHomeOrDefault(tg->tdb, "pairSearchRange", "20000"); int pairSearchRange = atoi(setting); if (pairSearchRange > 0) safef(posForBam, sizeof(posForBam), "%s:%d-%d", chromName, max(0, winStart-pairSearchRange), winEnd+pairSearchRange); bamFetchPlus(fileName2, posForBam, addBamPaired, &btd, NULL, refUrl, cacheDir); struct hashEl *hel; struct hashCookie cookie = hashFirst(btd.pairHash); while ((hel = hashNext(&cookie)) != NULL) { struct linkedFeatures *lf = hel->val; if (lf->start < winEnd && lf->end > winStart) slAddHead(&(tg->items), lfsFromLf(lf)); } } freez(&fileName2); if (tg->visibility != tvDense) { slReverse(&(tg->items)); if (isPaired) slSort(&(tg->items), linkedFeaturesSeriesCmp); else if (sameString(colorMode, BAM_COLOR_MODE_STRAND)) slSort(&(tg->items), linkedFeaturesCmpOri); else if (sameString(colorMode, BAM_COLOR_MODE_GRAY) && sameString(grayMode, BAM_GRAY_MODE_ALI_QUAL)) slSort(&(tg->items), linkedFeaturesCmpScore); else slSort(&(tg->items), linkedFeaturesCmpStart); if (slCount(tg->items) > MAX_ITEMS_FOR_MAPBOX) { // flag drawItems to make a mapBox for the whole track tg->customInt = 1; tg->mapItem = dontMapItem; } } } errCatchEnd(errCatch); if (errCatch->gotError) { tg->networkErrMsg = cloneString(errCatch->message->string); tg->drawItems = bigDrawWarning; tg->totalHeight = bigWarnTotalHeight; } errCatchFree(&errCatch); }
struct dgNodeRef *dgFindPath(struct diGraph *dg, struct dgNode *a, struct dgNode *b) /* Find shortest path from a to b. Return NULL if can't be found. */ { struct dgNodeRef *refList = NULL, *ref; struct dgConnection *con; struct dgNode *node, *nNode; struct dlList *fifo; struct dlNode *ffNode; struct dgNode endNode; int fifoSize = 1; /* Do some quick and easy tests first to return if have no way out * of node A, or if B directly follows A. */ if (a->nextList == NULL) return NULL; if (a == b) { AllocVar(ref); ref->node = a; return ref; } if ((con = dgFindNodeInConList(a->nextList, b)) != NULL) { AllocVar(refList); refList->node = a; node = con->node; AllocVar(ref); ref->node = node; slAddTail(&refList, ref); return refList; } /* Set up for breadth first traversal. Will use a doubly linked * list as a fifo. */ for (node = dg->nodeList; node != NULL; node = node->next) node->tempEntry = NULL; fifo = newDlList(); dlAddValTail(fifo, a); a->tempEntry = &endNode; while ((ffNode = dlPopHead(fifo)) != NULL) { --fifoSize; node = ffNode->val; freeMem(ffNode); for (con = node->nextList; con != NULL; con = con->next) { nNode = con->node; if (nNode->tempEntry == NULL) { nNode->tempEntry = node; if (nNode == b) { while (nNode != &endNode && nNode != NULL) { AllocVar(ref); ref->node = nNode; slAddHead(&refList, ref); nNode = nNode->tempEntry; } break; } else { dlAddValTail(fifo, nNode); ++fifoSize; if (fifoSize > 100000) errAbort("Internal error in dgFindPath"); } } } } freeDlList(&fifo); return refList; }
static void clusterClone(int argc, char *argv[]) { int i; for (i=1; i < argc; ++i) { struct lineFile *lf; struct psl *psl; unsigned tSize; char *prevAccPart = (char *)NULL; char *prevAccName = (char *)NULL; char *prevTargetName = (char *)NULL; struct hashEl *el; struct hash *chrHash = newHash(0); struct hash *coordHash = newHash(0); struct coordEl *coord; struct coordEl **coordListPt = (struct coordEl **) NULL; unsigned querySize = 0; int partCount = 0; int partsConsidered = 0; verbose(2,"#\tprocess: %s\n", argv[i]); lf=pslFileOpen(argv[i]); while ((struct psl *)NULL != (psl = pslNext(lf)) ) { char *accName = (char *)NULL; char *targetName = (char *)NULL; int chrCount = 0; double percentCoverage; accName = cloneString(psl->qName); if ((char *)NULL == prevAccPart) { prevAccPart = cloneString(psl->qName); /* first time */ querySize = psl->qSize; ++partsConsidered; } chopSuffixAt(accName,'_'); if ((char *)NULL == prevAccName) prevAccName = cloneString(accName); /* first time */ if ((char *)NULL == prevTargetName) prevTargetName = cloneString(psl->tName); /* first time */ /* encountered a new accession name, process the one we * were working on */ if (differentWord(accName, prevAccName)) { if (partCount > 0) processResult(chrHash, coordHash, prevAccName, querySize, partsConsidered); else verbose(1,"# ERROR %s %s - no coordinates found in %d parts considered\n", prevTargetName, prevAccName, partsConsidered); freeMem(prevAccName); prevAccName = cloneString(accName); freeHash(&chrHash); freeHash(&coordHash); chrHash = newHash(0); coordHash = newHash(0); querySize = 0; partCount = 0; partsConsidered = 0; } tSize = psl->tEnd - psl->tStart; percentCoverage = 100.0*((double)(tSize+1)/(psl->qSize + 1)); if (differentWord(psl->qName, prevAccPart)) { ++partsConsidered; querySize += psl->qSize; freeMem(prevAccPart); prevAccPart = cloneString(psl->qName); } targetName = cloneString(psl->tName); if (differentWord(targetName, prevTargetName)) { freeMem(prevTargetName); prevTargetName = cloneString(targetName); } /* keep a hash of chrom names encountered */ el = hashLookup(chrHash, targetName); if (el == NULL) { if (percentCoverage > minCover) { hashAddInt(chrHash, targetName, 1); chrCount = 1; } else { hashAddInt(chrHash, targetName, 0); chrCount = 0; } } else { if (percentCoverage > minCover) { chrCount = ptToInt(el->val) + 1; el->val=intToPt(chrCount); } } AllocVar(coord); coord->start = psl->tStart; coord->end = psl->tEnd; coord->qSize = psl->qSize; coord->strand = sameWord(psl->strand,"+") ? 1 : 0; /* when coverage is sufficient */ if (percentCoverage > minCover) { ++partCount; coord->name = cloneString(psl->qName); /* for each chrom name, accumulate a list of coordinates */ el = hashLookup(coordHash, targetName); if (el == NULL) { AllocVar(coordListPt); hashAdd(coordHash, targetName, coordListPt); } else { coordListPt = el->val; } slAddHead(coordListPt,coord); verbose(2,"# %s\t%u\t%u\t%u\t%.4f\t%d %s:%d-%d %s\n", psl->qName, psl->qSize, tSize, tSize - psl->qSize, percentCoverage, chrCount, psl->tName, psl->tStart, psl->tEnd, psl->strand); } else { verbose(3,"# %s\t%u\t%u\t%u\t%.4f\t%d %s:%d-%d %s\n", psl->qName, psl->qSize, tSize, tSize - psl->qSize, percentCoverage, chrCount, psl->tName, psl->tStart, psl->tEnd, psl->strand); } freeMem(accName); freeMem(targetName); pslFree(&psl); } if (partCount > 0) processResult(chrHash, coordHash, prevAccName, querySize, partsConsidered); else verbose(1,"# ERROR %s %s - no coordinates found\n", prevTargetName, prevAccName); freeMem(prevAccName); freeHash(&chrHash); freeHash(&coordHash); lineFileClose(&lf); } } /* static void clusterClone() */
static void pslRefRecycle(struct pslSets *ps, struct pslRef *pr) /* recycle a pslRef object */ { memset(pr, 0, sizeof(struct pslRef)); slAddHead(&ps->refPool, pr); }
void pslSort2(char *outDir, char *tempDir, boolean noHead) /* Do second step of sort - merge all sorted files in tempDir * to final outdir. */ { char fileName[512]; struct slName *tmpList, *tmp; struct midFile *midList = NULL, *mid; int aliCount = 0; FILE *f = NULL; char lastTargetAcc[256]; char targetAcc[256]; strcpy(lastTargetAcc, ""); tmpList = listDir(tempDir, "tmp*.psl"); if (tmpList == NULL) errAbort("No tmp*.psl files in %s\n", tempDir); for (tmp = tmpList; tmp != NULL; tmp = tmp->next) { sprintf(fileName, "%s/%s", tempDir, tmp->name); AllocVar(mid); mid->lf = pslFileOpen(fileName); slAddHead(&midList, mid); } printf("writing %s", outDir); fflush(stdout); /* Write out the lowest sorting line from mid list until done. */ for (;;) { struct midFile *bestMid = NULL; if ( (++aliCount & 0xffff) == 0) { printf("."); fflush(stdout); } for (mid = midList; mid != NULL; mid = mid->next) { if (mid->lf != NULL && mid->psl == NULL) { if ((mid->psl = nextPsl(mid->lf)) == NULL) lineFileClose(&mid->lf); } if (mid->psl != NULL) { if (bestMid == NULL || pslCmpTarget(&mid->psl, &bestMid->psl) < 0) bestMid = mid; } } if (bestMid == NULL) break; getTargetAcc(bestMid->psl->tName, targetAcc); if (!sameString(targetAcc, lastTargetAcc)) { strcpy(lastTargetAcc, targetAcc); carefulClose(&f); sprintf(fileName, "%s/%s.psl", outDir, targetAcc); f = mustOpen(fileName, "w"); if (!noHead) pslWriteHead(f); } pslTabOut(bestMid->psl, f); pslFree(&bestMid->psl); } carefulClose(&f); printf("\n"); printf("Cleaning up temp files\n"); for (tmp = tmpList; tmp != NULL; tmp = tmp->next) { sprintf(fileName, "%s/%s", tempDir, tmp->name); remove(fileName); } }
void addChainQ(struct chrom *chrom, struct chrom *otherChrom, struct chain *chain) /* Add Q side of chain to fill/gap tree of chromosome. * For this side we have to cope with reverse strand * issues. */ { struct slRef *spaceList; struct slRef *ref; struct cBlock *startBlock, *block, *nextBlock; int gapStart, gapEnd; struct gap *gap; boolean isRev = (chain->qStrand == '-'); int qStart = chain->qStart, qEnd = chain->qEnd; if (isRev) { reverseIntRange(&qStart, &qEnd, chain->qSize); reverseBlocksQ(&chain->blockList, chain->qSize); } spaceList = findSpaces(chrom->spaces,qStart,qEnd); startBlock = chain->blockList; for (ref = spaceList; ref != NULL; ref = ref->next) { struct space *space = ref->val; struct fill *fill; for (;;) { nextBlock = startBlock->next; if (nextBlock == NULL) break; gapEnd = nextBlock->qStart; if (gapEnd > space->start) break; startBlock = nextBlock; } if ((fill = fillSpace(chrom, space, chain, startBlock, TRUE)) != NULL) { for (block = startBlock; ; block = nextBlock) { nextBlock = block->next; if (nextBlock == NULL) break; gapStart = block->qEnd; gapEnd = nextBlock->qStart; if (strictlyInside(space->start, space->end, gapStart, gapEnd)) { int ts, te; if (chain->qStrand == '+') { ts = block->tEnd; te = nextBlock->tStart; } else { ts = nextBlock->tStart; te = block->tEnd; } gap = gapNew(gapStart, gapEnd, ts, te); addSpaceForGap(chrom, gap); slAddHead(&fill->gapList, gap); } } freez(&ref->val); /* aka space */ } } slFreeList(&spaceList); if (isRev) reverseBlocksQ(&chain->blockList, chain->qSize); }
void doPastedIdentifiers(struct sqlConnection *conn) /* Process submit in paste identifiers page. */ { char *idText = trimSpaces(cartString(cart, hgtaPastedIdentifiers)); htmlOpen("Table Browser (Input Identifiers)"); if (isNotEmpty(idText)) { /* Write terms to temp file, checking whether they have matches, and * save temp file name. */ boolean saveIdText = (strlen(idText) < MAX_IDTEXT); char *idTextForLf = saveIdText ? cloneString(idText) : idText; struct lineFile *lf = lineFileOnString("idText", TRUE, idTextForLf); char *line, *word; struct tempName tn; FILE *f; int totalTerms = 0, foundTerms = 0; struct slName* missingTerms = NULL; struct dyString *exampleMissingIds = dyStringNew(256); char *actualDb = database; if (sameWord(curTable, WIKI_TRACK_TABLE)) actualDb = wikiDbName(); struct hTableInfo *hti = maybeGetHti(actualDb, curTable, conn); char *idField = getIdField(actualDb, curTrack, curTable, hti); if (idField == NULL) { warn("Sorry, I can't tell which field of table %s to treat as the " "identifier field.", curTable); webNewSection("Table Browser"); cartRemove(cart, hgtaIdentifierDb); cartRemove(cart, hgtaIdentifierTable); cartRemove(cart, hgtaIdentifierFile); mainPageAfterOpen(conn); htmlClose(); return; } struct slName *allTerms = NULL, *term; while (lineFileNext(lf, &line, NULL)) { while ((word = nextWord(&line)) != NULL) { term = slNameNew(word); slAddHead(&allTerms, term); totalTerms++; } } slReverse(&allTerms); lineFileClose(&lf); char *extraWhere = NULL; int maxIdsInWhere = cartUsualInt(cart, "hgt_maxIdsInWhere", DEFAULT_MAX_IDS_IN_WHERE); if (totalTerms > 0 && totalTerms <= maxIdsInWhere) extraWhere = slNameToInExpression(idField, allTerms); struct lm *lm = lmInit(0); struct hash *matchHash = getAllPossibleIds(conn, lm, idField, extraWhere); trashDirFile(&tn, "hgtData", "identifiers", ".key"); f = mustOpen(tn.forCgi, "w"); for (term = allTerms; term != NULL; term = term->next) { struct slName *matchList = NULL, *match; if (matchHash == NULL) { matchList = slNameNew(term->name); } else { /* Support multiple alias->id mappings: */ char upcased[1024]; safecpy(upcased, sizeof(upcased), term->name); touppers(upcased); struct hashEl *hel = hashLookup(matchHash, upcased); if (hel != NULL) { matchList = slNameNew((char *)hel->val); while ((hel = hashLookupNext(hel)) != NULL) { match = slNameNew((char *)hel->val); slAddHead(&matchList, match); } } } if (matchList != NULL) { foundTerms++; for (match = matchList; match != NULL; match = match->next) { mustWrite(f, match->name, strlen(match->name)); mustWrite(f, "\n", 1); } } else { slAddHead(&missingTerms, slNameNew(term->name)); } } slReverse(&missingTerms); carefulClose(&f); cartSetString(cart, hgtaIdentifierDb, database); cartSetString(cart, hgtaIdentifierTable, curTable); cartSetString(cart, hgtaIdentifierFile, tn.forCgi); if (saveIdText) freez(&idTextForLf); else cartRemove(cart, hgtaPastedIdentifiers); int missingCount = totalTerms - foundTerms; if (missingCount > 0) { char *xrefTable, *aliasField; getXrefInfo(conn, &xrefTable, NULL, &aliasField); boolean xrefIsSame = xrefTable && sameString(curTable, xrefTable); struct tempName tn; trashDirFile(&tn, "hgt/missingIds", cartSessionId(cart), ".tmp"); FILE *f = mustOpen(tn.forCgi, "w"); int exampleCount = 0; for (term = missingTerms; term != NULL; term = term->next) { if (exampleCount < 10) { ++exampleCount; dyStringPrintf(exampleMissingIds, "%s\n", term->name); } fprintf(f, "%s\n", term->name); } carefulClose(&f); dyStringPrintf(exampleMissingIds, "\n<a href=%s>Complete list of missing identifiers<a>\n", tn.forHtml); warn("Note: %d of the %d given identifiers have no match in " "table %s, field %s%s%s%s%s. " "Try the \"describe table schema\" button for more " "information about the table and field.\n" "%d %smissing identifier(s):\n" "%s\n", (totalTerms - foundTerms), totalTerms, curTable, idField, (xrefTable ? (xrefIsSame ? "" : " or in alias table ") : ""), (xrefTable ? (xrefIsSame ? "" : xrefTable) : ""), (xrefTable ? (xrefIsSame ? " or in field " : ", field ") : ""), (xrefTable ? aliasField : ""), exampleCount, exampleCount < missingCount ? "example " : "", exampleMissingIds->string ); webNewSection("Table Browser"); } lmCleanup(&lm); hashFree(&matchHash); } else { cartRemove(cart, hgtaIdentifierFile); } mainPageAfterOpen(conn); htmlClose(); }
void printSettingsWithUrls(struct hash *ra,char *urlSetting,char *nameSetting,char *idSetting) // will print one or more urls with name and optional id. Only Name is required! // If more than one, then should add same number of slots to each ("fred;ethyl" & " ;wife") { char *names = hashFindVal(ra, nameSetting); struct slName *nameList = slNameListFromString(names, ';');; char *urls = NULL; struct slName *urlList = NULL; char *ids = NULL; struct slName *idList = NULL; if (idSetting != NULL) ids = hashFindVal(ra, idSetting); if (ids != NULL) { idList = slNameListFromString(ids, ';'); if (slCount(idList) > slCount(nameList)) { if (slCount(nameList) == 1) { while (slCount(nameList) < slCount(idList)) slAddHead(&nameList,slNameNew(nameList->name)); } else errAbort("The number of of items in %s and %s must match for term %s", nameSetting,idSetting,(char *)hashMustFindVal(ra,CV_TERM)); } } if (urlSetting != NULL) urls = hashFindVal(ra, urlSetting); if (urls != NULL) { if (slCount(nameList) == 1) urlList = slNameNew(urls); // It is the case that singleton URLs sometimes have ';'! else { urlList = slNameListFromString(urls, ';'); if (slCount(urlList) > slCount(nameList)) errAbort("The number of of items in %s and %s must match for term %s", nameSetting,urlSetting,(char *)hashMustFindVal(ra,CV_TERM)); } } printf(" <TD>"); // while there are items in the list of vendorNames, print the vendorName // and vendorID together with the url if present struct slName *curName = NULL; struct slName *curId; struct slName *curUrl; for (curName=nameList,curId=idList,curUrl=urlList; curName != NULL; curName=curName->next) { if (curName!=nameList) // Break between links printf("<BR>\n "); // if there is a url, add it as a link char *url = NULL; if (curUrl != NULL) { url = trimSpaces(curUrl->name); if (isNotEmpty(url)) printf("<A TARGET=_BLANK HREF=%s>", url); curUrl=curUrl->next; } printf("%s", curName->name); if (curId != NULL) { char *id = trimSpaces(curId->name); if (isNotEmpty(id)) printf(" %s", id ); curId=curId->next; } if (isNotEmpty(url)) printf("</A>"); } puts("</TD>"); // Free the memory slFreeList(&nameList); slFreeList(&idList); slFreeList(&urlList); }
struct bbiInterval *bigWigIntervalQuery(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, struct lm *lm) /* Get data for interval. Return list allocated out of lm. */ { if (bwf->typeSig != bigWigSig) errAbort("Trying to do bigWigIntervalQuery on a non big-wig file."); bbiAttachUnzoomedCir(bwf); struct bbiInterval *el, *list = NULL; struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, chrom, start, end, NULL); struct fileOffsetSize *block; struct udcFile *udc = bwf->udc; boolean isSwapped = bwf->isSwapped; float val; int i; // slSort(&blockList, fileOffsetSizeCmp); struct fileOffsetSize *mergedBlocks = fileOffsetSizeMerge(blockList); for (block = mergedBlocks; block != NULL; block = block->next) { udcSeek(udc, block->offset); char *blockBuf = needLargeMem(block->size); udcRead(udc, blockBuf, block->size); char *blockPt = blockBuf, *blockEnd = blockBuf + block->size; while (blockPt < blockEnd) { struct bwgSectionHead head; bwgSectionHeadFromMem(&blockPt, &head, isSwapped); switch (head.type) { case bwgTypeBedGraph: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); val = memReadFloat(&blockPt, isSwapped); if (s < start) s = start; if (e > end) e = end; if (s < e) { lmAllocVar(lm, el); el->start = s; el->end = e; el->val = val; slAddHead(&list, el); } } break; } case bwgTypeVariableStep: { for (i=0; i<head.itemCount; ++i) { bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = s + head.itemSpan; val = memReadFloat(&blockPt, isSwapped); if (s < start) s = start; if (e > end) e = end; if (s < e) { lmAllocVar(lm, el); el->start = s; el->end = e; el->val = val; slAddHead(&list, el); } } break; } case bwgTypeFixedStep: { bits32 s = head.start; bits32 e = s + head.itemSpan; for (i=0; i<head.itemCount; ++i) { val = memReadFloat(&blockPt, isSwapped); bits32 clippedS = s, clippedE = e; if (clippedS < start) clippedS = start; if (clippedE > end) clippedE = end; if (clippedS < clippedE) { lmAllocVar(lm, el); el->start = clippedS; el->end = clippedE; el->val = val; slAddHead(&list, el); } s += head.itemStep; e += head.itemStep; } break; } default: internalErr(); break; } } } slFreeList(&mergedBlocks); slFreeList(&blockList); slReverse(&list); return list; }
struct hash *agpLoadAll(char *agpFile) /* load AGP entries into a hash of AGP lists, one per chromosome */ { struct hash *agpHash = newHash(0); struct lineFile *lf = lineFileOpen(agpFile, TRUE); char *words[9]; int lastPos = 0; int wordCount; struct agpFrag *agpFrag; struct agpGap *agpGap; char *chrom; struct agp *agp; struct hashEl *hel; while ((wordCount = lineFileChopNext(lf, words, ArraySize(words))) != 0) { lineFileExpectAtLeast(lf, 8, wordCount); chrom = words[0]; if (!hashFindVal(agpHash, chrom)) lastPos = 1; AllocVar(agp); if (words[4][0] != 'N' && words[4][0] != 'U') { /* not a gap */ lineFileExpectWords(lf, 9, wordCount); agpFrag = agpFragLoad(words); if (agpFrag->chromStart != lastPos) errAbort( "Frag start (%d, %d) doesn't match previous end line %d of %s\n", agpFrag->chromStart, lastPos, lf->lineIx, lf->fileName); if (agpFrag->chromEnd - agpFrag->chromStart != agpFrag->fragEnd - agpFrag->fragStart) errAbort("Sizes don't match in %s and %s line %d of %s\n", agpFrag->chrom, agpFrag->frag, lf->lineIx, lf->fileName); lastPos = agpFrag->chromEnd + 1; agp->entry = agpFrag; agp->isFrag = TRUE; } else { /* gap */ lineFileExpectWords(lf, 8, wordCount); agpGap = agpGapLoad(words); if (agpGap->chromStart != lastPos) errAbort("Gap start (%d, %d) doesn't match previous end line %d of %s\n", agpGap->chromStart, lastPos, lf->lineIx, lf->fileName); lastPos = agpGap->chromEnd + 1; agp->entry = agpGap; agp->isFrag = FALSE; } if ((hel = hashLookup(agpHash, chrom)) == NULL) hashAdd(agpHash, chrom, agp); else slAddHead(&(hel->val), agp); } #ifndef DEBUG { struct hashCookie cookie; struct hashEl *hel; cookie = hashFirst(agpHash); while ((hel = hashNext(&cookie)) != NULL) { struct agp *agpList; agpList = (struct agp *)hel->val; /* for (agp = agpList; agp != NULL; agp = agp->next) printf("isFrag: %d\n", agp->isFrag); */ } } #endif /* reverse AGP lists */ //hashTraverseVals(agpHash, slReverse); #ifndef DEBUG { struct hashCookie cookie; struct hashEl *hel; cookie = hashFirst(agpHash); while ((hel = hashNext(&cookie)) != NULL) { struct agp *agpList; slReverse(&hel->val); agpList = hel->val; /* agpList = (struct agp *)hel->val; slReverse(&agpList); hashRemove(agpHash, hel->name); hashAdd(agpHash, hel->name, agpList); */ /* for (agp = agpList; agp != NULL; agp = agp->next) printf("isFrag: %d\n", agp->isFrag); */ } } #endif return agpHash; }
void loadCodeBlast(struct track *tg) /* from the bed 6+1 codeBlast table, make a linkedFeaturesSeries and load it. */ { struct linkedFeaturesSeries *lfs = NULL, *originalLfs, *codeLfs, *lfsList = NULL; struct linkedFeatures *lf; struct slName *codes = NULL, *track=NULL, *scores=NULL; struct codeBlast *bedList; struct codeBlast *cb, *list=NULL; struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **temparray3; char *temparray[32]; char *temparray2; char **row; char *tempstring; int x; int cutoff; char cMode[64]; /*The most common names used to display method*/ char *codeNames[18] = {"within genus", "\t", "crenarchaea", "euryarchaea", "\t", "bacteria", "\t", "eukarya", "\t", "thermophile", "hyperthermophile","acidophile", "alkaliphile", "halophile", "methanogen", "strict aerobe", "strict anaerobe", "anaerobe or aerobe"}; int i; safef(cMode, sizeof(cMode), "%s.scoreFilter", tg->tdb->track); cutoff=cartUsualInt(cart, cMode,0 ); sr=hRangeQuery(conn, tg->table, chromName, winStart, winEnd, NULL, 0); while ((row = sqlNextRow(sr)) != NULL) { cb = codeBlastLoad(row); slAddHead(&list, cb); } sqlFreeResult(&sr); hFreeConn(&conn); slReverse(&list); if(list == NULL) return; for(cb = list; cb != NULL; cb = cb->next) { AllocVar(lfs); AllocVar(lf); lfs->name = cloneString(cb->name); lf = lfFromBed6(cb,0,1000); lf->score = cb->score; tempstring=cloneString(cb->code); chopString(tempstring, "," , temparray, ArraySize(temparray)); if(sameWord(database, "pyrFur2")) { temparray3=(char**)calloc(19*8,sizeof(char**)); for(x=0; x<19; x++) { temparray3[x]=(char *)calloc(256, sizeof(char*)); /* Fix to cloneString problem when both patricia and my track was showing at the same time */ if(temparray[x]!=NULL) { if(atoi(temparray[x])==1000) temparray3[x]="1000"; else if(atoi(temparray[x])==900) temparray3[x]="900"; else if(atoi(temparray[x])==800) temparray3[x]="800"; else if(atoi(temparray[x])==700) temparray3[x]="700"; else if(atoi(temparray[x])==600) temparray3[x]="600"; else if(atoi(temparray[x])==500) temparray3[x]="500"; else if(atoi(temparray[x])==400) temparray3[x]="400"; else if(atoi(temparray[x])==300) temparray3[x]="300"; else if(atoi(temparray[x])==200) temparray3[x]="200"; else if(atoi(temparray[x])==100) temparray3[x]="100"; else temparray3[x]="0"; } } } else { temparray3=(char**)calloc(18*8,sizeof(char**)); for(x=0; x<18; x++) { temparray3[x]=(char *)calloc(256, sizeof(char*)); /* Fix to cloneString problem when both patricia and my track was showing at the same time */ if(temparray[x]!=NULL) { if(atoi(temparray[x])==1000) temparray3[x]="1000"; else if(atoi(temparray[x])==900) temparray3[x]="900"; else if(atoi(temparray[x])==800) temparray3[x]="800"; else if(atoi(temparray[x])==700) temparray3[x]="700"; else if(atoi(temparray[x])==600) temparray3[x]="600"; else if(atoi(temparray[x])==500) temparray3[x]="500"; else if(atoi(temparray[x])==400) temparray3[x]="400"; else if(atoi(temparray[x])==300) temparray3[x]="300"; else if(atoi(temparray[x])==200) temparray3[x]="200"; else if(atoi(temparray[x])==100) temparray3[x]="100"; else temparray3[x]="0"; } } } lf->extra = temparray3; lfs->start = lf->start; lfs->end = lf->end; lfs->features= lf; slAddHead(&lfsList, lfs); } tg->items=lfsList; bedList=tg->items; lfsList=NULL; if(tg->limitedVis != tvDense) { originalLfs = tg->items; if(sameWord(database, "pyrFur2")) { for (i = 0; i < 19; i++) { struct linkedFeatures *lfList = NULL; AllocVar(codeLfs); /*When doing abyssi displays differnt names at the begining*/ if(i == 0) codeLfs->name="within Pho"; else if (i==1) codeLfs->name="within Pab"; else if (i==2) codeLfs->name="\t"; else codeLfs->name = cloneString(codeNames[i-1]); codeLfs->noLine = TRUE; for (lfs = originalLfs; lfs != NULL; lfs = lfs->next) { lf = lfsToLf(lfs); if(i>2) temparray2=((char**)(lfs->features->extra))[i-0]; else temparray2=((char**)(lfs->features->extra))[i]; if (i!=2 && i!=5 && i!=7 && i!=9 && atoi(temparray2)>-9997 && atoi(temparray2)!=0 && atoi(temparray2)>=cutoff) { lf->score=atoi(temparray2); slAddHead(&lfList,lf); } } slReverse(&lfList); codeLfs->features = lfList; slAddHead(&lfsList,codeLfs); } } else { for (i = 0; i < 18; i++) { struct linkedFeatures *lfList = NULL; AllocVar(codeLfs); codeLfs->name = cloneString(codeNames[i]); codeLfs->noLine = TRUE; for (lfs = originalLfs; lfs != NULL; lfs = lfs->next) { lf = lfsToLf(lfs); temparray2=((char**)(lfs->features->extra))[i]; if (i!=1 && i!=4 && i!=6 && i!=8 && atoi(temparray2)>-9997 && atoi(temparray2)!=0 && atoi(temparray2)>=cutoff) { lf->score=atoi(temparray2); slAddHead(&lfList,lf); } } slReverse(&lfList); codeLfs->features = lfList; slAddHead(&lfsList,codeLfs); } } freeLinkedFeaturesSeries(&originalLfs); slReverse(&lfsList); tg->items=lfsList; } slFreeList(&track); slFreeList(&scores); slFreeList(&codes); codeBlastFree(&list); }
void oneChromInput(char *database, char *chrom, int chromSize, char *rangeTrack, char *expTrack, struct hash *refLinkHash, struct hash *erHash, FILE *f) /* Read in info for one chromosome. */ { struct binKeeper *rangeBk = binKeeperNew(0, chromSize); struct binKeeper *expBk = binKeeperNew(0, chromSize); struct binKeeper *knownBk = binKeeperNew(0, chromSize); struct bed *rangeList = NULL, *range; struct bed *expList = NULL; struct genePred *knownList = NULL; struct rangeInfo *riList = NULL, *ri; struct hash *riHash = hashNew(0); /* rangeInfo values. */ struct binElement *rangeBeList = NULL, *rangeBe, *beList = NULL, *be; /* Load up data from database. */ rangeList = loadBed(database, chrom, rangeTrack, 12, rangeBk); expList = loadBed(database, chrom, expTrack, 15, expBk); knownList = loadGenePred(database, chrom, "refGene", knownBk); /* Build range info basics. */ rangeBeList = binKeeperFindAll(rangeBk); for (rangeBe = rangeBeList; rangeBe != NULL; rangeBe = rangeBe->next) { range = rangeBe->val; AllocVar(ri); slAddHead(&riList, ri); hashAddSaveName(riHash, range->name, ri, &ri->id); ri->range = range; ri->commonName = findCommonName(range, knownBk, refLinkHash); } slReverse(&riList); /* Mark split ones. */ beList = binKeeperFindAll(expBk); for (be = beList; be != NULL; be = be->next) { struct bed *exp = be->val; struct binElement *subList = binKeeperFind(rangeBk, exp->chromStart, exp->chromEnd); if (slCount(subList) > 1) { struct binElement *sub; for (sub = subList; sub != NULL; sub = sub->next) { struct bed *range = sub->val; struct rangeInfo *ri = hashMustFindVal(riHash, range->name); ri->isSplit = TRUE; } } slFreeList(&subList); } /* Output the nice ones: not split and having some expression info. */ for (ri = riList; ri != NULL; ri = ri->next) { if (!ri->isSplit) { struct bed *range = ri->range; beList = binKeeperFind(expBk, range->chromStart, range->chromEnd); if (beList != NULL) outputAveraged(f, ri, erHash, beList); slFreeList(&beList); } } /* Clean up time! */ freeHash(&riHash); genePredFreeList(&knownList); bedFree(&rangeList); bedFree(&expList); slFreeList(&rangeBeList); slFreeList(&beList); slFreeList(&riList); binKeeperFree(&rangeBk); binKeeperFree(&expBk); binKeeperFree(&knownBk); }
void update(struct g2cFile *old, struct g2cFile *up) { struct gene *oldGene, *upGene; struct cdnaHit *oldHit, *upHit; struct hash *geneHash; struct hashEl *hel; int sameHitCount = 0; int newHitCount = 0; int newGeneCount = 0; int updatedGeneCount = 0; int altCount = 0; struct geneFamily smallFamily; struct geneFamily *family; printf("Updating %s with %s\n", old->name, up->name); /* Hash the existing gene names for faster lookup. */ geneHash = newHash(12); for (oldGene = old->geneList; oldGene != NULL; oldGene = oldGene->next) hashAdd(geneHash, oldGene->name, oldGene); for (upGene = up->geneList; upGene != NULL; upGene = upGene->next) { boolean changedGene = FALSE; if (isAltSplicedName(upGene->name)) { family = getAltFamily(geneHash, upGene->name); ++altCount; } else { hel = hashLookup(geneHash, upGene->name); if (hel != NULL) { smallFamily.gene = hel->val; smallFamily.next = NULL; family = &smallFamily; } else family = NULL; } /* Set corresponding gene in old file to NULL until we * need to find it. */ oldGene = NULL; for (upHit = upGene->hitList; upHit != NULL; upHit = upHit->next) { if ((oldHit = findHitInFamily(family, upHit->name)) != NULL) ++sameHitCount; else { if (oldGene == NULL) { /* We haven't found corresponding gene yet. First * look for it in the family. */ struct geneFamily *member; for (member = family; member != NULL; member = member->next) { if (strcmp(member->gene->name, upGene->name) == 0) { oldGene = member->gene; break; } } /* The corresponding gene doesn't exist yet. We * have to make it up and hang it on the genelist * for the file, the hash list, and the family list. */ if (oldGene == NULL) { oldGene = alloc(sizeof(*oldGene)); oldGene->name = upGene->name; slAddHead(&old->geneList, oldGene); hashAdd(geneHash, oldGene->name, oldGene); member = alloc(sizeof(*member)); member->gene = oldGene; slAddHead(&family, member); ++newGeneCount; } } oldHit = alloc(sizeof(*oldHit)); oldHit->name = upHit->name; oldHit->hel = hel; slAddHead(&oldGene->hitList, oldHit); ++newHitCount; changedGene = TRUE; } } if (changedGene) ++updatedGeneCount; } slSort(&old->geneList, cmpName); printf("Updated %d genes (including %d alt spliced ones) with %d cdna hits (%d hits unchanged) %d new genes\n", updatedGeneCount, altCount, newHitCount, sameHitCount, newGeneCount); }
struct g2cFile *loadG2cFile(char *fileName) { char lineBuf[1024*8]; int lineLen; char *words[256*8]; int wordCount; FILE *f; int lineCount = 0; struct g2cFile *gf = alloc(sizeof(*gf)); int hitCount = 0; int cdnaCount = 0; int geneCount = 0; gf->name = fileName; f = mustOpen(fileName, "r"); gf->cdnaHash = newHash(14); while (fgets(lineBuf, sizeof(lineBuf), f) != NULL) { ++lineCount; lineLen = strlen(lineBuf); if (lineLen >= sizeof(lineBuf) - 1) { errAbort("%s\nLine %d of %s too long, can only handle %d chars\n", lineBuf, lineCount, fileName, sizeof(lineBuf)-1); } wordCount = chopString(lineBuf, whiteSpaceChopper, words, ArraySize(words)); if (wordCount > 0) { struct gene *gene = alloc(sizeof(*gene)); char *geneName = words[0]; int i; /* Create new gene struct and put it on list. */ gene->name = cloneString(geneName); slAddHead(&gf->geneList, gene); ++geneCount; /* Put all cdna hits on gene. */ for (i=1; i<wordCount; ++i) { struct cdnaHit *hit; struct cdnaVal *cdnaVal; struct hashEl *hel; char *cdnaName = words[i]; /* Get cdna, or if it's the first time we've seen it * make up a data structure for it and hang it on * hash list and cdna list. */ if ((hel = hashLookup(gf->cdnaHash, cdnaName)) == NULL) { cdnaVal = alloc(sizeof(*cdnaVal)); hel = hashAdd(gf->cdnaHash, cdnaName, cdnaVal); cdnaVal->name = hel->name; slAddHead(&gf->cdnaList, cdnaVal); ++cdnaCount; } else { cdnaVal = hel->val; } ++cdnaVal->useCount; /* Make up new cdna hit and hang it on the gene. */ hit = alloc(sizeof(*hit)); hit->hel = hel; hit->name = hel->name; slAddHead(&gene->hitList, hit); ++hitCount; } slReverse(&gene->hitList); } } slReverse(&gf->geneList); slSort(&gf->geneList, cmpName); slSort(&gf->cdnaList, cmpName); fclose(f); reportHashStats(gf->cdnaHash); printf("Loaded %s. %d genes %d cdnas %d hits\n", fileName, geneCount, cdnaCount, hitCount); return gf; }
struct region *stitchedList(struct frag *fragList) /* Stitch things together into list of regions. */ { struct frag *frag, *nextFrag; struct region *openList = NULL, *closedList = NULL, *region, *nextRegion, *stillOpenList = NULL; struct psl *psl; int distance = 0; for (frag = fragList; frag != NULL; frag = nextFrag) { nextFrag = frag->next; /* Clear hit flags. */ for (region = openList; region != NULL; region = region->next) region->isHit = FALSE; /* Loop through psl's extending open regions where possible * and creating new open regions where not. */ for (psl = frag->pslList; psl != NULL; psl = psl->next) { if ((region = fragConnects(openList, psl, distance)) == NULL) { AllocVar(region); slAddHead(&openList, region); region->qName = frag->chrom; region->qStart = frag->start; region->qEnd = frag->end; region->tName = psl->tName; region->tStart = psl->tStart; region->tEnd = psl->tEnd; region->strand[0] = psl->strand[0]; } region->match += psl->match; region->misMatch += psl->misMatch; region->repMatch += psl->repMatch; region->nCount += psl->nCount; region->qNumInsert += psl->qNumInsert; region->qBaseInsert += psl->qBaseInsert; region->tNumInsert += psl->tNumInsert; region->tBaseInsert += psl->tBaseInsert; region->isHit = TRUE; } /* Move regions not hit by this fragment to closed list. */ for (region = openList; region != NULL; region = nextRegion) { nextRegion = region->next; if (region->isHit) { slAddHead(&stillOpenList, region); } else { slAddHead(&closedList, region); } } openList = stillOpenList; stillOpenList = NULL; if (nextFrag != NULL) distance = nextFrag->start - frag->start; } /* Move remainder of open list to cloest list. */ for (region = openList; region != NULL; region = nextRegion) { nextRegion = region->next; slAddHead(&closedList, region); } slReverse(&closedList); return closedList; }
void chainFastSubsetOnT(struct chain *chain, struct cBlock *firstBlock, int subStart, int subEnd, struct chain **retSubChain, struct chain **retChainToFree) /* Get subchain as in chainSubsetOnT. Pass in initial block that may * be known from some index to speed things up. */ { struct chain *sub = NULL; struct cBlock *oldB, *b, *bList = NULL; int qStart = BIGNUM, qEnd = -BIGNUM; int tStart = BIGNUM, tEnd = -BIGNUM; /* Check for easy case. */ if (subStart <= chain->tStart && subEnd >= chain->tEnd) { *retSubChain = chain; *retChainToFree = NULL; return; } /* Build new block list and calculate bounds. */ for (oldB = firstBlock; oldB != NULL; oldB = oldB->next) { if (oldB->tStart >= subEnd) break; b = CloneVar(oldB); if (b->tStart < subStart) { b->qStart += subStart - b->tStart; b->tStart = subStart; } if (b->tEnd > subEnd) { b->qEnd -= b->tEnd - subEnd; b->tEnd = subEnd; } slAddHead(&bList, b); if (qStart > b->qStart) qStart = b->qStart; if (qEnd < b->qEnd) qEnd = b->qEnd; if (tStart > b->tStart) tStart = b->tStart; if (tEnd < b->tEnd) tEnd = b->tEnd; } slReverse(&bList); /* Make new chain based on old. */ if (bList != NULL) { double sizeRatio; AllocVar(sub); sub->blockList = bList; sub->qName = cloneString(chain->qName); sub->qSize = chain->qSize; sub->qStrand = chain->qStrand; sub->qStart = qStart; sub->qEnd = qEnd; sub->tName = cloneString(chain->tName); sub->tSize = chain->tSize; sub->tStart = tStart; sub->tEnd = tEnd; sub->id = chain->id; /* Fake new score. */ sizeRatio = (sub->tEnd - sub->tStart); sizeRatio /= (chain->tEnd - chain->tStart); sub->score = sizeRatio * chain->score; } *retSubChain = *retChainToFree = sub; }
void dupeFoo(char *pslName, char *faName, char *regionFile) /* dupeFoo - Do some duplication analysis. */ { struct lineFile *lf; struct frag *fragList = NULL, *frag; struct hash *fragHash = newHash(16); struct psl *psl; int fragCount=0,missCount=0,dupeCount=0,kSub=0, k1=0, k10=0,k100=0,k1000=0,k10000=0,diffChrom=0,distance; /* Read in fragment list and put it in hash. */ fragList = readFragList(faName); for (frag = fragList; frag != NULL; frag = frag->next) hashAdd(fragHash, frag->name, frag); /* Read psl's and store under the fragment the belong to. */ lf = pslFileOpen(pslName); while ((psl = pslNext(lf)) != NULL) { if ((frag = hashFindVal(fragHash, psl->qName)) == NULL) errAbort("Couldn't find %s in %s line %d of %s", psl->qName, faName, lf->lineIx, lf->fileName); slAddHead(&frag->pslList, psl); } lineFileClose(&lf); /* Look through fragments and report missing and dupes. */ for (frag = fragList; frag != NULL; frag = frag->next) { ++fragCount; if ((psl = frag->pslList) == NULL) { ++missCount; printf("missing %s\n", frag->name); } else { for (psl = frag->pslList; psl != NULL; psl = psl->next) { if (sameString(psl->tName, frag->chrom)) { distance = frag->start - psl->tStart; if (distance != 0) { if (distance < 0) distance = -distance; if (distance >= 10000000) ++k10000; else if (distance >= 1000000) ++k1000; else if (distance >= 100000) ++k100; else if (distance >= 10000) ++k10; else if (distance >= 1000) ++k1; else ++kSub; } } else { ++diffChrom; } } } } printPercent("Total", fragCount, fragCount); printPercent("Unaligned", missCount, fragCount); printPercent("Other Chrom", diffChrom, fragCount); printPercent("Same Chrom >10M", k10000, fragCount); printPercent("Same Chrom >1M", k1000, fragCount); printPercent("Same Chrom >10Ok", k100, fragCount); printPercent("Same Chrom >1Ok", k10, fragCount); printPercent("Same Chrom >1k", k1, fragCount); printPercent("Self-overlap", kSub, fragCount); writeRegions(fragList, regionFile); }
void chainSubsetOnQ(struct chain *chain, int subStart, int subEnd, struct chain **retSubChain, struct chain **retChainToFree) /* Get subchain of chain bounded by subStart-subEnd on * query side. Return result in *retSubChain. In some * cases this may be the original chain, in which case * *retChainToFree is NULL. When done call chainFree on * *retChainToFree. The score and id fields are not really * properly filled in. */ { struct chain *sub = NULL; struct cBlock *oldB, *b, *bList = NULL; int qStart = BIGNUM, qEnd = -BIGNUM; int tStart = BIGNUM, tEnd = -BIGNUM; /* Check for easy case. */ if (subStart <= chain->qStart && subEnd >= chain->qEnd) { *retSubChain = chain; *retChainToFree = NULL; return; } /* Build new block list and calculate bounds. */ for (oldB = chain->blockList; oldB != NULL; oldB = oldB->next) { if (oldB->qEnd <= subStart) continue; if (oldB->qStart >= subEnd) break; b = CloneVar(oldB); if (b->qStart < subStart) { b->tStart += subStart - b->qStart; b->qStart = subStart; } if (b->qEnd > subEnd) { b->tEnd -= b->qEnd - subEnd; b->qEnd = subEnd; } slAddHead(&bList, b); if (tStart > b->tStart) tStart = b->tStart; if (tEnd < b->tEnd) tEnd = b->tEnd; if (qStart > b->qStart) qStart = b->qStart; if (qEnd < b->qEnd) qEnd = b->qEnd; } slReverse(&bList); /* Make new chain based on old. */ if (bList != NULL) { AllocVar(sub); sub->blockList = bList; sub->qName = cloneString(chain->qName); sub->qSize = chain->qSize; sub->qStrand = chain->qStrand; sub->qStart = qStart; sub->qEnd = qEnd; sub->tName = cloneString(chain->tName); sub->tSize = chain->tSize; sub->tStart = tStart; sub->tEnd = tEnd; sub->id = chain->id; } *retSubChain = *retChainToFree = sub; }
static struct exonInfo *buildGIList(char *database, struct genePred *pred, char *mafTable, unsigned options) { struct exonInfo *giList = NULL; unsigned *exonStart = pred->exonStarts; unsigned *lastStart = &exonStart[pred->exonCount]; unsigned *exonEnd = pred->exonEnds; int *frames = pred->exonFrames; boolean includeUtr = options & MAFGENE_INCLUDEUTR; if (frames == NULL) { genePredAddExonFrames(pred); frames = pred->exonFrames; } assert(frames != NULL); int start = 0; /* first skip 5' UTR if the includeUtr option is not set */ if (!includeUtr) { for(; exonStart < lastStart; exonStart++, exonEnd++, frames++) { int size = *exonEnd - *exonStart; if (*exonStart + size > pred->cdsStart) break; } } for(; exonStart < lastStart; exonStart++, exonEnd++, frames++) { struct exonInfo *gi; int thisStart = *exonStart; int thisEnd = *exonEnd; if (!includeUtr) { if (thisStart > pred->cdsEnd) break; if (thisStart < pred->cdsStart) thisStart = pred->cdsStart; if (thisEnd > pred->cdsEnd) thisEnd = pred->cdsEnd; } int thisSize = thisEnd - thisStart; if (!includeUtr) verbose(3, "in %d %d cds %d %d\n",*exonStart,*exonEnd, thisStart, thisEnd); AllocVar(gi); gi->frame = *frames; gi->name = pred->name; gi->ali = getAliForRange(database, mafTable, pred->chrom, thisStart, thisEnd); gi->chromStart = thisStart; gi->chromEnd = thisEnd; gi->exonStart = start; gi->exonSize = thisSize; verbose(3, "exon size %d\n", thisSize); gi->strand = pred->strand[0]; start += gi->exonSize; slAddHead(&giList, gi); if (!includeUtr) { if (thisEnd == pred->cdsEnd) break; } } slReverse(&giList); return giList; }
static struct rTree *rTreeFromChromRangeArray( struct lm *lm, int blockSize, int itemsPerSlot, void *itemArray, int itemSize, bits64 itemCount, void *context, struct cirTreeRange (*fetchKey)(const void *va, void *context), bits64 (*fetchOffset)(const void *va, void *context), bits64 endFileOffset, int *retLevelCount) { char *items = itemArray; struct rTree *el, *list=NULL, *tree = NULL; /* Make first level above leaf. */ bits64 i; bits64 nextOffset = (*fetchOffset)(items, context); for (i=0; i<itemCount; i += itemsPerSlot) { /* Figure out if we are on final iteration through loop, and the * count of items in this iteration. */ boolean finalIteration = FALSE; int oneSize = itemCount-i; if (oneSize > itemsPerSlot) oneSize = itemsPerSlot; else finalIteration = TRUE; /* Allocate element and put on list. */ lmAllocVar(lm, el); slAddHead(&list, el); /* Fill out most of element from first item in element. */ char *startItem = items + itemSize * i; struct cirTreeRange key = (*fetchKey)(startItem, context); el->startChromIx = el->endChromIx = key.chromIx; el->startBase = key.start; el->endBase = key.end; el->startFileOffset = nextOffset; /* Figure out end of element from offset of next element (or file size * for final element.) */ if (finalIteration) nextOffset = endFileOffset; else { char *endItem = startItem + itemSize*oneSize; nextOffset = (*fetchOffset)(endItem, context); } el->endFileOffset = nextOffset; /* Expand area spanned to include all items in block. */ int j; for (j=1; j<oneSize; ++j) { void *item = items + itemSize*(i+j); key = (*fetchKey)(item, context); if (key.chromIx < el->startChromIx) { el->startChromIx = key.chromIx; el->startBase = key.start; } else if (key.chromIx == el->startChromIx) { if (key.start < el->startBase) el->startBase = key.start; } if (key.chromIx > el->endChromIx) { el->endChromIx = key.chromIx; el->endBase = key.end; } else if (key.chromIx == el->endChromIx) { if (key.end > el->endBase) el->endBase = key.end; } } } slReverse(&list); verbose(2, "Made %d primary index nodes out of %llu items\n", slCount(list), itemCount); /* Now iterate through making more and more condensed versions until have just one. */ int levelCount = 1; tree = list; while (tree->next != NULL || levelCount < 2) { list = NULL; int slotsUsed = blockSize; struct rTree *parent = NULL, *next; for (el = tree; el != NULL; el = next) { next = el->next; if (slotsUsed >= blockSize) { slotsUsed = 1; lmAllocVar(lm, parent); parent = lmCloneMem(lm, el, sizeof(*el)); parent->children = el; el->parent = parent; el->next = NULL; slAddHead(&list, parent); } else { ++slotsUsed; slAddHead(&parent->children, el); el->parent = parent; if (el->startChromIx < parent->startChromIx) { parent->startChromIx = el->startChromIx; parent->startBase = el->startBase; } else if (el->startChromIx == parent->startChromIx) { if (el->startBase < parent->startBase) parent->startBase = el->startBase; } if (el->endChromIx > parent->endChromIx) { parent->endChromIx = el->endChromIx; parent->endBase = el->endBase; } else if (el->endChromIx == parent->endChromIx) { if (el->endBase > parent->endBase) parent->endBase = el->endBase; } } } slReverse(&list); for (el = list; el != NULL; el = el->next) slReverse(&el->children); tree = list; levelCount += 1; } *retLevelCount = levelCount; return tree; }
struct tagStorm *tagStormFromFile(char *fileName) /* Load up all tags from file. */ { int depth = 0, maxDepth = 32; int indentStack[maxDepth]; indentStack[0] = 0; /* Open up file first thing. Abort if there's a problem here. */ struct lineFile *lf = lineFileOpen(fileName, TRUE); /* Set up new empty tag storm and get local pointer to memory pool. */ struct tagStorm *tagStorm = tagStormNew(fileName); struct lm *lm = tagStorm->lm; struct tagStanza *stanza, *parent = NULL, *lastStanza = NULL; int currentIndent = 0; int stanzaCount = 0; int tagCount = 0; while (raSkipLeadingEmptyLines(lf, NULL)) { ++stanzaCount; char *tag, *val; int stanzaIndent, tagIndent; lmAllocVar(lm, stanza); struct slPair *pairList = NULL, *pair; while (raNextTagValWithIndent(lf, &tag, &val, NULL, &tagIndent)) { lmAllocVar(lm, pair); pair->name = lmCloneString(lm, tag); pair->val = lmCloneString(lm, val); if (pairList == NULL) /* If this is first tag of a new stanza check indentation * and put stanza in appropriate level of hierarchy */ { if (tagIndent != currentIndent) { stanzaIndent = tagIndent; if (stanzaIndent > currentIndent) { if (++depth >= maxDepth) errAbort("Tags nested too deep line %d of %s. Max nesting is %d", lf->lineIx, lf->fileName, maxDepth); indentStack[depth] = stanzaIndent; if (lastStanza == NULL) errAbort("Initial stanza needs to be non-indented line %d of %s", lf->lineIx, lf->fileName); parent = lastStanza; } else /* going up */ { /* Find stanza in parent chain at same level of indentation. This * will be an older sibling */ struct tagStanza *olderSibling; for (olderSibling = parent; olderSibling != NULL; olderSibling = olderSibling->parent) { --depth; if (indentStack[depth] == stanzaIndent) break; } if (olderSibling == NULL) { warn("Indentation inconsistent line %d of %s.", lf->lineIx, lf->fileName); warn("If you are using tabs, check your tab stop is set to 8."); warn("Otherwise check that when you are reducing indentation in a stanza"); warn("that it is the same as the previous stanza at the same level."); noWarnAbort(); } parent = olderSibling->parent; } currentIndent = tagIndent; } if (parent == NULL) slAddHead(&tagStorm->forest, stanza); else slAddHead(&parent->children, stanza); stanza->parent = parent; pairList = pair; lastStanza = stanza; } else { if (tagIndent != currentIndent) errAbort("Tags in stanza inconsistently indented line %d of %s", lf->lineIx, lf->fileName); slAddHead(&pairList, pair); } ++tagCount; } slReverse(&pairList); stanza->tagList = pairList; } lineFileClose(&lf); rReverseStanzaList(&tagStorm->forest); return tagStorm; }
static void rFindOverlappingBlocks(struct cirTreeFile *crt, int level, bits64 indexFileOffset, bits32 chromIx, bits32 start, bits32 end, struct fileOffsetSize **retList) /* Recursively find blocks with data. */ { struct udcFile *udc = crt->udc; /* Seek to start of block. */ udcSeek(udc, indexFileOffset); /* Read block header. */ UBYTE isLeaf; UBYTE reserved; bits16 i, childCount; udcMustReadOne(udc, isLeaf); udcMustReadOne(udc, reserved); boolean isSwapped = crt->isSwapped; childCount = udcReadBits16(udc, isSwapped); verbose(3, "rFindOverlappingBlocks %llu %u:%u-%u. childCount %d. isLeaf %d\n", indexFileOffset, chromIx, start, end, (int)childCount, (int)isLeaf); if (isLeaf) { /* Loop through node adding overlapping leaves to block list. */ for (i=0; i<childCount; ++i) { bits32 startChromIx = udcReadBits32(udc, isSwapped); bits32 startBase = udcReadBits32(udc, isSwapped); bits32 endChromIx = udcReadBits32(udc, isSwapped); bits32 endBase = udcReadBits32(udc, isSwapped); bits64 offset = udcReadBits64(udc, isSwapped); bits64 size = udcReadBits64(udc, isSwapped); if (cirTreeOverlaps(chromIx, start, end, startChromIx, startBase, endChromIx, endBase)) { struct fileOffsetSize *block; AllocVar(block); block->offset = offset; block->size = size; slAddHead(retList, block); } } } else { /* Read node into arrays. */ bits32 startChromIx[childCount], startBase[childCount]; bits32 endChromIx[childCount], endBase[childCount]; bits64 offset[childCount]; for (i=0; i<childCount; ++i) { startChromIx[i] = udcReadBits32(udc, isSwapped); startBase[i] = udcReadBits32(udc, isSwapped); endChromIx[i] = udcReadBits32(udc, isSwapped); endBase[i] = udcReadBits32(udc, isSwapped); offset[i] = udcReadBits64(udc, isSwapped); } /* Recurse into child nodes that we overlap. */ for (i=0; i<childCount; ++i) { if (cirTreeOverlaps(chromIx, start, end, startChromIx[i], startBase[i], endChromIx[i], endBase[i])) { rFindOverlappingBlocks(crt, level+1, offset[i], chromIx, start, end, retList); } } } }
static struct mafAli *mafFromBed12(char *database, char *track, struct bed *bed, struct slName *orgList) /* Construct a maf out of exons in bed. */ { /* Loop through all block in bed, collecting a list of mafs, one * for each block. While we're at make a hash of all species seen. */ struct hash *speciesHash = hashNew(0); struct mafAli *mafList = NULL, *maf, *bigMaf; struct mafComp *comp, *bigComp; int totalTextSize = 0; int i; for (i=0; i<bed->blockCount; ++i) { int start = bed->chromStart + bed->chromStarts[i]; int end = start + bed->blockSizes[i]; if (thickOnly) { start = max(start, bed->thickStart); end = min(end, bed->thickEnd); } if (start < end) { maf = hgMafFrag(database, track, bed->chrom, start, end, '+', database, NULL); slAddHead(&mafList, maf); for (comp = maf->components; comp != NULL; comp = comp->next) hashStore(speciesHash, comp->src); totalTextSize += maf->textSize; } } slReverse(&mafList); /* Add species in order list too */ struct slName *org; for (org = orgList; org != NULL; org = org->next) hashStore(speciesHash, org->name); /* Allocate memory for return maf that contains all blocks concatenated together. * Also fill in components with any species seen at all. */ AllocVar(bigMaf); bigMaf->textSize = totalTextSize; struct hashCookie it = hashFirst(speciesHash); struct hashEl *hel; while ((hel = hashNext(&it)) != NULL) { AllocVar(bigComp); bigComp->src = cloneString(hel->name); bigComp->text = needLargeMem(totalTextSize + 1); memset(bigComp->text, '.', totalTextSize); bigComp->text[totalTextSize] = 0; bigComp->strand = '+'; bigComp->srcSize = totalTextSize; /* It's safe if a bit of a lie. */ hel->val = bigComp; slAddHead(&bigMaf->components, bigComp); } /* Loop through maf list copying in data. */ int textOffset = 0; for (maf = mafList; maf != NULL; maf = maf->next) { for (comp = maf->components; comp != NULL; comp = comp->next) { bigComp = hashMustFindVal(speciesHash, comp->src); memcpy(bigComp->text + textOffset, comp->text, maf->textSize); bigComp->size += comp->size; } textOffset += maf->textSize; } /* Cope with strand of darkness. */ if (bed->strand[0] == '-') { for (comp = bigMaf->components; comp != NULL; comp = comp->next) reverseComplement(comp->text, bigMaf->textSize); } /* If got an order list then reorder components according to it. */ if (orgList != NULL) { struct mafComp *newList = NULL; for (org = orgList; org != NULL; org = org->next) { comp = hashMustFindVal(speciesHash, org->name); slAddHead(&newList, comp); } slReverse(&newList); bigMaf->components = newList; } /* Rename our own component to bed name */ comp = hashMustFindVal(speciesHash, database); freeMem(comp->src); comp->src = cloneString(bed->name); /* Clean up and go home. */ hashFree(&speciesHash); mafAliFreeList(&mafList); return bigMaf; }
void freeCal(struct cdnaAliList *cal) /* Free up one cal. */ { slAddHead(&calFreeList, cal); }
void hgExpDistance(char *database, char *posTable, char *expTable, char *outTable) /* hgExpDistance - Create table that measures expression distance between pairs. */ { struct sqlConnection *conn = sqlConnect(database); struct sqlResult *sr; char query[256]; char **row; struct hash *expHash = hashNew(16); int realExpCount = -1; struct microData *gene; int rc, t; pthread_t *threads = NULL; pthread_attr_t attr; int *threadID = NULL; void *status; char *tempDir = "."; long time1, time2; time1 = clock1000(); /* Get list/hash of all items with expression values. */ sqlSafef(query, sizeof(query), "select name,expCount,expScores from %s", posTable); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *name = row[0]; if (!hashLookup(expHash, name)) { int expCount = sqlUnsigned(row[1]); int commaCount; float *expScores = NULL; sqlFloatDynamicArray(row[2], &expScores, &commaCount); if (expCount != commaCount) errAbort("expCount and expScores don't match on %s in %s", name, posTable); if (realExpCount == -1) realExpCount = expCount; if (expCount != realExpCount) errAbort("In %s some rows have %d experiments others %d", name, expCount, realExpCount); AllocVar(gene); gene->expCount = expCount; gene->expScores = expScores; hashAddSaveName(expHash, name, gene, &gene->name); slAddHead(&geneList, gene); } } sqlFreeResult(&sr); conn = sqlConnect(database); slReverse(&geneList); geneCount = slCount(geneList); printf("Have %d elements in %s\n", geneCount, posTable); weights = getWeights(realExpCount); if (optionExists("lookup")) geneList = lookupGenes(conn, optionVal("lookup", NULL), geneList); geneCount = slCount(geneList); printf("Got %d unique elements in %s\n", geneCount, posTable); sqlDisconnect(&conn); /* Disconnect because next step is slow. */ if (geneCount < 1) errAbort("ERROR: unique gene count less than one ?"); time2 = clock1000(); verbose(2, "records read time: %.2f seconds\n", (time2 - time1) / 1000.0); f = hgCreateTabFile(tempDir, outTable); /* instantiate threads */ AllocArray( threadID, numThreads ); AllocArray( threads, numThreads ); pthread_attr_init( &attr ); pthread_mutex_init( &mutexfilehandle, NULL ); pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE ); for (t = 0; t < numThreads; t++) { threadID[t] = t; rc = pthread_create( &threads[t], &attr, computeDistance, (void *) &threadID[t]); if (rc) errAbort("ERROR: in pthread_create() %d\n", rc ); } /* synchronize all threads */ for (t = 0; t < numThreads; t++) { rc = pthread_join( threads[t], &status); if (rc) errAbort("ERROR: in pthread_join() %d\n", rc ); } printf("Made %s.tab\n", outTable); slFreeList( &geneList ); pthread_mutex_destroy( &mutexfilehandle ); pthread_attr_destroy( &attr ); time1 = time2; time2 = clock1000(); verbose(2, "distance computation time: %.2f seconds\n", (time2 - time1) / 1000.0); /* Create and load table. */ conn = sqlConnect(database); distanceTableCreate(conn, outTable); hgLoadTabFile(conn, tempDir, outTable, &f); printf("Loaded %s\n", outTable); /* Add indices. */ sqlSafef(query, sizeof(query), "alter table %s add index(query(12))", outTable); sqlUpdate(conn, query); printf("Made query index\n"); if (optionExists("targetIndex")) { sqlSafef(query, sizeof(query), "alter table %s add index(target(12))", outTable); sqlUpdate(conn, query); printf("Made target index\n"); } hgRemoveTabFile(tempDir, outTable); time1 = time2; time2 = clock1000(); verbose(2, "table create/load/index time: %.2f seconds\n", (time2 - time1) / 1000.0); }
void writeClump(struct blockPos *first, struct blockPos *last, char *cdnaName, char strand, char dir, DNA *cdna, int cdnaSize, struct cdnaAliList **pList) /* Write hitOut one clump. */ { struct dnaSeq *seq = first->seq; char *bacName = seq->name; int seqIx = first->seqIx; int start = first->offset; int end = last->offset+last->size; struct ffAli *ff, *left, *right; int extraAtEnds = minMatch*patSize; struct cdnaAliList *cal; start -= extraAtEnds; if (start < 0) start = 0; end += extraAtEnds; if (end >seq->size) end = seq->size; ++ffSubmitted; if (dumpMe) fprintf(dumpOut, "%s %d %s %d-%d\n", cdnaName, cdnaSize, bacName, start, end); ff = ffFind(cdna, cdna+cdnaSize, seq->dna+start, seq->dna+end, ffCdna); if (dumpMe) { fprintf(dumpOut, "ffFind = %x\n", ff); } if (ff != NULL) { int ffScore = ffScoreCdna(ff); ++ffAccepted; if (dumpMe) fprintf(dumpOut, "ffScore = %d\n", ffScore); if (ffScore >= 22) { int hiStart, hiEnd; int oldStart, oldEnd; ffFindEnds(ff, &left, &right); hiStart = oldStart = left->nStart - cdna; hiEnd = oldEnd = right->nEnd - cdna; ++ffOkScore; if (solidMatch(&left, &right, cdna, &hiStart, &hiEnd)) { int solidSize = hiEnd - hiStart; int solidScore; int seqStart, seqEnd; double cookedScore; solidScore = scoreCdna(left, right); cookedScore = (double)solidScore/solidSize; if (cookedScore > 0.25) { ++ffSolidMatch; seqStart = left->hStart - seq->dna; seqEnd = right->hEnd - seq->dna; fprintf(hitOut, "%3.1f%% %c %s:%d-%d (old %d-%d) of %d at %s.%d:%d-%d\n", 100.0 * cookedScore, strand, cdnaName, hiStart, hiEnd, oldStart, oldEnd, cdnaSize, bacName, seqIx, seqStart, seqEnd); if (dumpMe) { fprintf(bigHtmlFile, "<A NAME=i%d>", htmlIx); fprintf(bigHtmlFile, "<H2>%4.1f%% %4d %4d %c %s:%d-%d of %d at %s.%d:%d-%d</H2><BR>", 100.0 * cookedScore, solidScore, ffScore, strand, cdnaName, hiStart, hiEnd, cdnaSize, bacName, seqIx, seqStart, seqEnd); fprintf(bigHtmlFile, "</A>"); ffShAli(bigHtmlFile, ff, cdnaName, cdna, cdnaSize, 0, bacName, seq->dna+start, end-start, start, FALSE); fprintf(bigHtmlFile, "<BR><BR>\n"); fprintf(littleHtmlFile, "<A HREF=\"patAli.html#i%d\">", htmlIx); fprintf(littleHtmlFile, "%4.1f%% %4d %4d %c %s:%d-%d of %d at %s.%d:%d-%d\n", 100.0 * cookedScore, solidScore, ffScore, strand, cdnaName, hiStart, hiEnd, cdnaSize, bacName, seqIx, seqStart, seqEnd); fprintf(littleHtmlFile, "</A><BR>"); ++htmlIx; } cal = newCal(first->bacIx, seqIx, hiStart, hiEnd, cdnaSize, strand, dir, cookedScore); slAddHead(pList, cal); } } } ffFreeAli(&ff); } }
int main(int argc, char *argv[]) { struct hash *bacHash; char line[1024]; int lineCount; char *words[256]; int wordCount; int fileIx; char *fileName; FILE *f; if (argc < 2) usage(); bacHash = newHash(16); for (fileIx = 1; fileIx < argc; ++fileIx) { fileName = argv[fileIx]; uglyf("Processing %s\n", fileName); f = mustOpen(fileName, "r"); lineCount = 0; while (fgets(line, sizeof(line), f)) { ++lineCount; wordCount = chopLine(line, words); if (wordCount == ArraySize(words)) errAbort("Too many words line %d of %s\n", lineCount, fileName); if (wordCount != 0) { char *bacName; int cIx; struct contigTrack *ctList = NULL, *ct; struct bacTrack *bt; struct hashEl *hel; /* Check line syntax and parse it. */ if (!sameString(words[1], "glues")) errAbort("Bad format line %d of %s\n", lineCount, fileName); bacName = words[2]; for (cIx = 4; cIx < wordCount; cIx += 5) { char *parts[3]; int partCount; AllocVar(ct); ct->ix = atoi(words[cIx]); ct->strand = words[cIx+1][0]; ct->dir = words[cIx+2][0]; partCount = chopString(words[cIx+3], "(-)", parts, ArraySize(parts)); if (partCount != 2) errAbort("Bad format line %d of %s\n", lineCount, fileName); ct->start = atoi(parts[0]); ct->end = atoi(parts[1]); ct->cookedScore = atof(words[cIx+4]); slAddHead(&ctList, ct); } slSort(&ctList, cmpContigTrack); /* Lookup bacTrack and make it if new. */ hel = hashLookup(bacHash, bacName); if (hel == NULL) { AllocVar(bt); hel = hashAdd(bacHash, bacName, bt); bt->name = hel->name; slAddHead(&bacList, bt); } else { bt = hel->val; } /* Process pairs into bacTrack. */ addPairs(bt, ctList); slFreeList(&ctList); } } fclose(f); } slSort(&bacList, cmpBacTrack); printStats(); return 0; }
void writeMergers(struct cdnaAliList *calList, char *cdnaName, char *bacNames[]) /* Write out any mergers indicated by this cdna. This destroys calList. */ { struct cdnaAliList *startBac, *endBac, *cal, *prevCal, *nextCal; int bacCount; int bacIx; { if (sameString(cdnaName, "R08304_AND_R08305")) { uglyf("Got you %s\n", cdnaName); } } slSort(&calList, cmpCal); for (startBac = calList; startBac != NULL; startBac = endBac) { /* Scan until find a cal that isn't pointing into the same BAC. */ bacCount = 1; bacIx = startBac->bacIx; prevCal = startBac; for (cal = startBac->next; cal != NULL; cal = cal->next) { if (cal->bacIx != bacIx) { prevCal->next = NULL; break; } ++bacCount; prevCal = cal; } endBac = cal; if (bacCount > 1) { while (startBac != NULL) { struct cdnaAliList *clumpList = NULL, *leftoverList = NULL; for (cal = startBac; cal != NULL; cal = nextCal) { nextCal = cal->next; if (noMajorOverlap(cal, clumpList)) { slAddHead(&clumpList, cal); } else { slAddHead(&leftoverList, cal); } } slReverse(&clumpList); slReverse(&leftoverList); if (slCount(clumpList) > 1) { char lastStrand = 0; boolean switchedStrand = FALSE; if (!allSameContig(clumpList)) { fprintf(mergerOut, "%s glues %s contigs", cdnaName, bacNames[bacIx]); lastStrand = clumpList->strand; for (cal = clumpList; cal != NULL; cal = cal->next) { if (cal->strand != lastStrand) switchedStrand = TRUE; fprintf(mergerOut, " %d %c %c' (%d-%d) %3.1f%%", cal->seqIx, cal->strand, cal->dir, cal->start, cal->end, 100.0*cal->cookedScore); } fprintf(mergerOut, "\n"); } } freeCalList(&clumpList); startBac = leftoverList; } } else { freeCalList(&startBac); } } }
struct bigBedInterval *bigBedIntervalQuery(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, int maxItems, struct lm *lm) /* Get data for interval. Return list allocated out of lm. Set maxItems to maximum * number of items to return, or to 0 for all items. */ { struct bigBedInterval *el, *list = NULL; int itemCount = 0; bbiAttachUnzoomedCir(bbi); bits32 chromId; struct fileOffsetSize *blockList = bbiOverlappingBlocks(bbi, bbi->unzoomedCir, chrom, start, end, &chromId); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; struct dyString *dy = dyStringNew(32); /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bbi->uncompressBufSize > 0) uncompressBuf = needLargeMem(bbi->uncompressBufSize); for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; /* Loop through individual blocks within merged section. */ for (;block != afterGap; block = block->next) { /* Uncompress if necessary. */ char *blockPt, *blockEnd; if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } while (blockPt < blockEnd) { /* Read next record into local variables. */ bits32 chr = memReadBits32(&blockPt, isSwapped); // Read and discard chromId bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); int c; dyStringClear(dy); while ((c = *blockPt++) >= 0) { if (c == 0) break; dyStringAppendC(dy, c); } /* If we're actually in range then copy it into a new element and add to list. */ if (chr == chromId && rangeIntersection(s, e, start, end) > 0) { ++itemCount; if (maxItems > 0 && itemCount > maxItems) break; lmAllocVar(lm, el); el->start = s; el->end = e; if (dy->stringSize > 0) el->rest = lmCloneString(lm, dy->string); slAddHead(&list, el); } } if (maxItems > 0 && itemCount > maxItems) break; blockBuf += block->size; } if (maxItems > 0 && itemCount > maxItems) break; freez(&mergedBuf); } freeMem(uncompressBuf); dyStringFree(&dy); slFreeList(&blockList); slReverse(&list); return list; }
int addBamPaired(const bam1_t *bam, void *data) #endif /* bam_fetch() calls this on each bam alignment retrieved. Translate each bam * into a linkedFeaturesSeries item, and either store it until we find its mate * or add it to tg->items. */ { const bam1_core_t *core = &bam->core; struct bamTrackData *btd = (struct bamTrackData *)data; if (! passesFilters(bam, btd)) return 0; struct linkedFeatures *lf = bamToLf(bam, data); struct track *tg = btd->tg; if (!(core->flag & BAM_FPAIRED) || (core->flag & BAM_FMUNMAP)) { if (lf->start < winEnd && lf->end > winStart) slAddHead(&(tg->items), lfsFromLf(lf)); if ((core->flag & BAM_FMUNMAP) && sameString(btd->colorMode, BAM_COLOR_MODE_GRAY) && sameString(btd->grayMode, BAM_GRAY_MODE_UNPAIRED)) // not properly paired: make it a lighter shade. lf->grayIx -= 4; } else { struct linkedFeatures *lfMate = (struct linkedFeatures *)hashFindVal(btd->pairHash, lf->name); if (lfMate == NULL) { if (core->flag & BAM_FPROPER_PAIR) { // If we know that this is properly paired, but don't have the mate, // make a bogus item off the edge of the window so that if we don't // encounter its mate later, we can at least draw an arrow off the // edge of the window. struct linkedFeatures *stub; // don't link to pair that's not on the same chrom if ((core->mpos < 0) || (core->tid != core->mtid)) { int offscreen; if (lf->orientation > 0) offscreen = max(winEnd, lf->end) + 10; else offscreen = min(winStart, lf->start) - 10; if (offscreen < 0) offscreen = 0; stub = lfStub(offscreen, -lf->orientation); } else { stub = lfStub(core->mpos, -lf->orientation); } lf->next = stub; } else if (sameString(btd->colorMode, BAM_COLOR_MODE_GRAY) && sameString(btd->grayMode, BAM_GRAY_MODE_UNPAIRED)) // not properly paired: make it a lighter shade. lf->grayIx -= 4; hashAdd(btd->pairHash, lf->name, lf); } else { lfMate->next = lf; if (min(lfMate->start, lf->start) < winEnd && max(lfMate->end, lf->end) > winStart) slAddHead(&(tg->items), lfsFromLf(lfMate)); hashRemove(btd->pairHash, lf->name); } } return 0; }