void writeAliasTable(FILE *alias) /* print out tabbed file for loading into the alias table, bacCloneAlias */ { struct hashEl *aliasList = NULL, *aliasEl = NULL; struct hashEl *bacList = NULL, *bacEl = NULL; struct alias *al = NULL; struct bac *bac = NULL; char *name = NULL; int i, j; aliasList = hashElListHash(aliasHash); bacList = hashElListHash(bacHash); if (aliasList != NULL) { /* walk through list of hash elements */ for (aliasEl = aliasList; aliasEl != NULL; aliasEl = aliasEl->next) { al = (struct alias *)aliasEl->val; name = cloneString(aliasEl->name); /* print out row for Sanger STS name and each alias */ for (i = 0; i < NUMALIASES && (al->aliases[i] != NULL); i++) { fprintf(alias, "%s\t%s\n", al->aliases[i], al->sangerName); } fflush(alias); } } else errAbort("The hash of BAC clones aliases is empty or there was an error retrieving the list of entries in the hash\n"); /* those BAC clones without an entry in the alias hash have no sangerName */ /* so do not print to this table */ hashElFreeList(&aliasList); hashElFreeList(&bacList); }
void doPrintSelectedFields() /* Actually produce selected field output as text stream. */ { char *db = cartString(cart, hgtaDatabase); char *table = cartString(cart, hgtaTable); char *varPrefix = checkVarPrefix(); int varPrefixSize = strlen(varPrefix); struct hashEl *varList = NULL, *var; struct slName *fieldList = NULL, *field; textOpen(); /* Gather together field list for primary and linked tables from cart. */ varList = cartFindPrefix(cart, varPrefix); for (var = varList; var != NULL; var = var->next) { if (!sameString(var->val, "0")) { field = slNameNew(var->name + varPrefixSize); if (primaryOrLinked(field->name)) slAddHead(&fieldList, field); } } if (fieldList == NULL) errAbort("Please go back and select at least one field"); slReverse(&fieldList); /* Do output. */ tabOutSelectedFields(db, table, NULL, fieldList); /* Clean up. */ slFreeList(&fieldList); hashElFreeList(&varList); }
static Color cgColorLikeHgGenome(struct track *tg, struct hvGfx *hvg) /* Search the cart variables and use the colors corresponding to the hgGenome */ /* graph. */ { Color ret; struct hashEl *matchingCartSettings = NULL; struct hashEl *el; char cartGenomeWildStr[256]; char *graphCartVarName = NULL; char *colorCartVarSetting = "black"; safef(cartGenomeWildStr, sizeof(cartGenomeWildStr), "hgGenome_graph_%s_*", database); /* for ex. hgGenome_graph_hg18_1_1 */ matchingCartSettings = cartFindLike(cart, cartGenomeWildStr); for (el = matchingCartSettings; el != NULL; el = el->next) { char *val = (char *)el->val; if (val && sameString(val, tg->track)) { graphCartVarName = el->name; break; } } if (graphCartVarName) { char *colorCartVarName = replaceChars(graphCartVarName, "_graph_", "_graphColor_"); colorCartVarSetting = cartUsualString(cart, colorCartVarName, "black"); freeMem(colorCartVarName); } hashElFreeList(&matchingCartSettings); ret = colorFromAscii(hvg, colorCartVarSetting); return ret; }
/* one iteration of convolution * * input is a probability histogram with N bins * bins are numbered 0 through N-1 * output is a probability histogram with (N*2)-1 bins * * The value to calculate in an output bin is a sum * of the probabilities from the input bins that contribute to that * output bin. Specifically: * * output[0] = input[0] * input[0] * output[1] = (input[0] * input[1]) + * (input[1] * input[0]) * output[2] = (input[0] * input[2]) + * (input[1] * input[1]) + * (input[2] * input[0]) * output[3] = (input[0] * input[3]) + * (input[1] * input[2]) + * (input[2] * input[1]) + * (input[3] * input[0]) * output[4] = (input[0] * input[4]) + * (input[1] * input[3]) + * (input[2] * input[2]) + * (input[3] * input[1]) + * (input[4] * input[0]) * the pattern here is that the bin numbers from the input are * summed together to get the bin number of the output. The * probabilities from those bins used from the input in each * individual sum are multiplied together, and all those resulting * multiplies are added together for the resulting output. * * A double for() loop would do the above calculation: * for (i = 0; i < N; ++i) { * for (j = 0; j < N; ++j) { * output[i+j] += input[i] * input[j]; * } * } */ static int iteration(struct hash *input, struct hash *output) { struct hashEl *el, *elList; int median = 0; double maxLog = -1000000.0; /* This outside loop is like (i = 0; i < N; ++i) */ elList = hashElListHash(input); for (el = elList; el != NULL; el = el->next) { struct histoGram *hg0; /* a hash element */ struct hashEl *elInput; int bin0; /* This inside loop is like (j = 0; j < N; ++j) */ hg0 = el->val; bin0 = hg0->bin; for (elInput = elList; elInput != NULL; elInput = elInput->next) { struct histoGram *hg1; /* a hash element */ int bin1; char binName[128]; struct hashEl *el; struct histoGram *hgNew; hg1 = elInput->val; bin1 = hg1->bin; /* output bin name is (i+j) == (bin0+bin1) */ snprintf(binName, sizeof(binName), "%d", bin0+bin1); el = hashLookup(output, binName); /* start new output bin if it does not yet exist */ if (el == NULL) { AllocVar(hgNew); hgNew->bin = bin0+bin1; hgNew->prob = hg0->prob * hg1->prob; hgNew->log_2 = hg0->log_2 + hg1->log_2; hashAdd(output, binName, hgNew); } else { /* Add to existing output bin the new inputs */ hgNew = el->val; hgNew->log_2 = addLogProbabilities(hgNew->log_2, (hg0->log_2+hg1->log_2)); hgNew->prob = pow(2.0,hgNew->log_2); } /* Keep track of the resulting output median */ if (hgNew->log_2 > maxLog) { maxLog = hgNew->log_2; median = hgNew->bin; } } } hashElFreeList(&elList); return median; } /* iteration() */
static void setCheckVarsForTable(char *dbTable, char *val) /* Return list of check variables for this table. */ { char prefix[128]; struct hashEl *varList, *var; safef(prefix, sizeof(prefix), "%s%s.", checkVarPrefix(), dbTable); varList = cartFindPrefix(cart, prefix); for (var = varList; var != NULL; var = var->next) cartSetString(cart, var->name, val); hashElFreeList(&varList); }
void writeHashToFile(struct hash *countHash, char *outputFile) /* Make an slPair list out of a hashEl list, sort it, output it. */ { FILE *f = mustOpen(outputFile, "w"); struct hashEl *el, *list = hashElListHash(countHash); slNameSort((struct slName **)&list); for (el = list; el != NULL; el = el->next) fprintf(f, "%s\t%d\n", el->name, ptToInt(el->val)); carefulClose(&f); hashElFreeList(&list); }
void printHash(char *label, struct hash *hash) /* Print out keys in hash alphabetically. */ { struct hashEl *list, *el; list = hashElListHash(hash); slSort(&list, hashElCmp); printf("%s:\n", label); for (el = list; el != NULL; el = el->next) printf(" %s\n", el->name); hashElFreeList(&list); }
void doEnrichmentsFromBed3Sample(struct bed3 *sampleList, struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf, struct cdwAssembly *assembly, struct target *targetList) /* Given a bed3 list, calculate enrichments for targets */ { struct genomeRangeTree *sampleGrt = cdwMakeGrtFromBed3List(sampleList); struct hashEl *chrom, *chromList = hashElListHash(sampleGrt->hash); /* Iterate through each target - and in lockstep each associated grt to calculate unique overlap */ struct target *target; for (target = targetList; target != NULL; target = target->next) { if (target->skip) continue; struct genomeRangeTree *grt = target->grt; long long uniqOverlapBases = 0; for (chrom = chromList; chrom != NULL; chrom = chrom->next) { struct rbTree *sampleTree = chrom->val; struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name); if (targetTree != NULL) { struct range *range, *rangeList = rangeTreeList(sampleTree); for (range = rangeList; range != NULL; range = range->next) { /* Do unique base overlap counts (since using range trees both sides) */ int overlap = rangeTreeOverlapSize(targetTree, range->start, range->end); uniqOverlapBases += overlap; } } } /* Figure out how much we overlap allowing same bases in genome * to part of more than one overlap. */ long long overlapBases = 0; struct bed3 *sample; for (sample = sampleList; sample != NULL; sample = sample->next) { int overlap = genomeRangeTreeOverlapSize(grt, sample->chrom, sample->chromStart, sample->chromEnd); overlapBases += overlap; } /* Save to database. */ struct cdwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, overlapBases, uniqOverlapBases); cdwQaEnrichSaveToDb(conn, enrich, "cdwQaEnrich", 128); cdwQaEnrichFree(&enrich); } genomeRangeTreeFree(&sampleGrt); hashElFreeList(&chromList); }
static void printGbConf(FILE *fh, struct gbConf *conf) /* Print contents of genbank.conf file for debugging purposes */ { struct hashEl *confEls = hashElListHash(conf->hash); struct hashEl *confEl; slSort(&confEls, hashElCmp); for (confEl = confEls; confEl != NULL; confEl = confEl->next) fprintf(fh, "%s = %s\n", confEl->name, (char*)confEl->val); hashElFreeList(&confEls); }
struct slName *hashToList(struct hash *wordHash) /* convert the hash back to a list. */ { struct hashEl *elList = hashElListHash(wordHash); struct slName *list = NULL; struct hashEl *cur; for (cur = elList; cur != NULL; cur = cur->next) slNameAddHead(&list, cur->name); slNameSort(&list); hashElFreeList(&elList); return list; }
struct hash *minChromSizeKeeperHash(struct hash *sizeHash) /* Return a hash full of binKeepers that match the input sizeHash, * (which generally is the output of minChromSizeFromBeds). */ { struct hashEl *el, *list = hashElListHash(sizeHash); struct hash *keeperHash = hashNew(16); for (el = list; el != NULL; el = el->next) { struct minChromSize *chrom = el->val; struct binKeeper *bk = binKeeperNew(0, chrom->minSize); hashAdd(keeperHash, chrom->chrom, bk); } hashElFreeList(&list); return keeperHash; }
char *hashToRaString(struct hash *hash) /* Convert hash to string in ra format. */ { struct hashEl *el, *list = hashElListHash(hash); struct dyString *dy = dyStringNew(0); slSort(&list, hashElCmp); for (el = list; el != NULL; el = el->next) { dyStringAppend(dy, el->name); dyStringAppendC(dy, ' '); dyStringAppend(dy, el->val); dyStringAppendC(dy, '\n'); } hashElFreeList(&list); return dyStringCannibalize(&dy); }
static struct bed* sectionsFromChromSizes(struct hash* chromSizes) /* return a list of bed3s used as sections using the whole chromosome sizes */ { struct bed* bedList = NULL; struct hashEl* list = hashElListHash(chromSizes); struct hashEl* el; for (el = list; el != NULL; el = el->next) { struct bed* bed; AllocVar(bed); bed->chrom = cloneString(el->name); bed->chromStart = 0; bed->chromEnd = (unsigned)ptToInt(el->val); slAddHead(&bedList, bed); } slReverse(&bedList); hashElFreeList(&list); return bedList; }
void bwtool_lift(struct hash *options, char *favorites, char *regions, unsigned decimals, enum wigOutType wot, char *bigfile, char *chainfile, char *outputfile) /* bwtool_lift - main for lifting program */ { struct hash *sizeHash = NULL; struct hash *chainHash = readLiftOverMapChainHash(chainfile); struct hash *gpbw = NULL; char *size_file = hashFindVal(options, "sizes"); char *bad_file = hashFindVal(options, "unlifted"); if (size_file) sizeHash = readCsizeHash(size_file); else sizeHash = qSizeHash(chainfile); gpbw = genomePbw(sizeHash); struct metaBig *mb = metaBigOpen_check(bigfile, regions); char wigfile[512]; safef(wigfile, sizeof(wigfile), "%s.tmp.wig", outputfile); FILE *out = mustOpen(wigfile, "w"); struct hashEl *elList = hashElListHash(gpbw); struct hashEl *el; verbose(2,"starting first pass\n"); do_pass1(mb, chainHash, gpbw); verbose(2, "starting second pass\n"); do_pass2(mb, chainHash, gpbw); verbose(2,"starting final pass\n"); do_final_pass(mb, chainHash, gpbw, bad_file); slSort(&elList, pbwHashElCmp); for (el = elList; el != NULL; el = el->next) { struct perBaseWig *pbw = (struct perBaseWig *)el->val; perBaseWigOutputNASkip(pbw, out, wot, decimals, NULL, FALSE, FALSE); } hashElFreeList(&elList); carefulClose(&out); hashFreeWithVals(&chainHash, freeChainHashMap); hashFreeWithVals(&gpbw, perBaseWigFree); writeBw(wigfile, outputfile, sizeHash); hashFree(&sizeHash); remove(wigfile); metaBigClose(&mb); }
void agpNotInf(char *agpFile, char *infFile) /* agpNotInf - List clones in .agp file not in .inf file. */ { struct hash *agpHash = readAgp(agpFile); struct hash *infHash = readInf(infFile); struct hashEl *list, *el; struct clone *agpClone, *infClone; list = hashElListHash(agpHash); uglyf("%d clones in agpHash\n", slCount(list)); for (el = list; el != NULL; el = el->next) { agpClone = el->val; infClone = hashFindVal(infHash, agpClone->name); if (infClone == NULL) printf("%s missing\n", agpClone->version); else if (!sameString(agpClone->version, infClone->version)) printf("%s updated from %s\n", agpClone->version, infClone->version); } hashElFreeList(&list); }
struct hash *genomePbw(struct hash *qSizes) /* make a parallel hash of pbws given the size hash, also keyed on chrom name */ { struct hash *pbwHash = newHash(10); struct hashEl *list = hashElListHash(qSizes); struct hashEl *el; const double na = NANUM; int i; for (el = list; el != NULL; el = el->next) { int size = ptToInt(el->val); struct perBaseWig *pbw = alloc_perBaseWig(el->name, 0, size); for (i = 0; i < pbw->len; i++) pbw->data[i] = na; pbw->name = cloneString(el->name); pbw->strand[0] = '+'; pbw->strand[1] = '\0'; hashAdd(pbwHash, el->name, pbw); } hashElFreeList(&list); return pbwHash; }
boolean anyRealInCart(struct cart *cart, char *wild) /* Return TRUE if variables are set matching wildcard. */ { struct hashEl *varList = NULL, *var; boolean ret = FALSE; varList = cartFindLike(cart, wild); for (var = varList; var != NULL; var = var->next) { char *s = var->val; if (s != NULL) { s = trimSpaces(s); if (s[0] != 0) { ret = TRUE; break; } } } hashElFreeList(&varList); return ret; }
static void printDir(FILE *f, struct _pf_dir *dir, struct _pf_base *base, struct hash *idHash) /* Print out each key/val pair in dir. */ { if (dir == NULL) fprintf(f, "()"); else { if (idHash) { if (!reuseObject(f, idHash, dir)) { struct hash *hash = dir->hash; struct hashEl *hel, *helList = hashElListHash(hash); struct _pf_base *base = dir->elType->base; slSort(&helList, hashElCmp); fprintf(f, "("); for (hel = helList; hel != NULL; hel = hel->next) { fprintf(f, "'%s'@", hel->name); if (base->needsCleanup) _pf_printField(f, &hel->val, base, idHash); else _pf_printField(f, hel->val, base, idHash); if (hel->next != NULL) fprintf(f, ","); } hashElFreeList(&helList); fprintf(f, ")"); } } else { fprintf(f, "<dir of %d>", dir->hash->elCount); } } }
void nibTwoCacheFree(struct nibTwoCache **pNtc) /* Free up resources associated with nibTwoCache. */ { struct nibTwoCache *ntc = *pNtc; if (ntc != NULL) { freez(&ntc->pathName); if (ntc->isTwoBit) twoBitClose(&ntc->tbf); else { struct hashEl *el, *list = hashElListHash(ntc->nibHash); struct nibInfo *nib; for (el = list; el != NULL; el = el->next) { nib = el->val; nibInfoFree(&nib); } hashElFreeList(&list); hashFree(&ntc->nibHash); } freez(pNtc); } }
void writeOut(FILE *sto) /* write out STS marker information to a file */ { struct hashEl *mapList = NULL, *mapEl = NULL, *stsList = NULL, *stsEl = NULL; struct hashEl *aliasList = NULL, *aliasEl = NULL; struct zfin *zfEl = NULL; struct sts *sts = NULL, *s = NULL, *stsVal = NULL; struct alias *aliases = NULL; struct aliasId *aliasId = NULL; char *alias = NULL, *id = NULL, *al = NULL, *addName = NULL, *newName = NULL; boolean found = FALSE, aliasFound = FALSE; int i = 0, j = 0; char **aliasNames = NULL; mapList = hashElListHash(zfinMarkerHash); stsList = hashElListHash(stsHash); aliasList = hashElListHash(aliasByNameHash); fprintf(sto, "UniSTS ID\tUniSTS Accession\tUniSTS Name\tZFIN name\tZFIN ID\tUniSTS Alias\tZFIN Alias\tZFIN Gb Acc\tPanel\tChrom\tPosition\tUnits\n \n"); if (mapList != NULL) { /* walk through list of hash elements */ for (mapEl = mapList; mapEl != NULL; mapEl = mapEl->next) { found = FALSE; zfEl = (struct zfin *)mapEl->val; /* check aliases in all combinations */ /* if this name is a key in the stsHash */ if ((sts = hashFindVal(stsHash, mapEl->name)) != NULL) found = TRUE; /* check by ZFIN alias instead */ else if ((zfEl->zfAlias != NULL) && (!found)) { alias = cloneString(zfEl->zfAlias); touppers(alias); stsVal = NULL; if ((stsVal = hashFindVal(stsHash, alias)) != NULL) { found = TRUE; sts = stsVal; } } /* check stsHash for name with extension or remove extension from zfin name */ else if (!found) { sts = (struct sts *)addExtensionAndSearch(mapEl->name, stsHash, FALSE); if (sts != NULL) found = TRUE; } /* check if this is an alias in alias hash keyed by alias names */ /* need to get list of aliases in all cases but if sts ID not found */ /* can find it here */ /* mapEl->name is in upper case, so get lower case, zfinEl->name */ aliasFound = FALSE; if (((aliasId = hashFindVal(aliasByNameHash, mapEl->name))!=NULL) || ((aliasId = hashFindVal(aliasByNameHash, zfEl->name))!=NULL)) aliasFound = TRUE; else /* alias is not found, try different extensions */ { aliasId = (struct aliasId *)addExtensionAndSearch(zfEl->name, aliasByNameHash, TRUE); if (aliasId != NULL) aliasFound = TRUE; } if (!found && sts != NULL) sts = NULL; /* all UniSTS IDs have an entry in sts and alias files so the */ /* UniSTS ID in the sts found will also be in aliases IDs list */ if (aliasFound) { /* go through each ID in the ids array and get sts struct */ /* and the list of aliases */ for (j = 0; j < MAXIDS; j++) { if (aliasId->ids[j] != NULL) { /* get sts and alias names */ sts = hashFindVal(stsIdHash, aliasId->ids[j]); aliases = hashFindVal(aliasHash, aliasId->ids[j]); printMarkers(sto, sts, zfEl, aliases->names); } } } else printMarkers(sto, sts, zfEl, NULL); } } hashElFreeList(&mapList); hashElFreeList(&stsList); hashElFreeList(&aliasList); }
static void jsonDyStringPrintRecurse(struct dyString *dy, struct jsonElement *ele, int indentLevel) { if (indentLevel >= -1) // Note that < -1 will result in no indenting indentLevel++; char *tab = "\t"; char *nl = "\n"; if (indentLevel < 0) { tab = ""; nl = ""; } char *indentBuf = makeIndentBuf(indentLevel); switch (ele->type) { case jsonObject: { dyStringPrintf(dy,"{%s",nl); if(hashNumEntries(ele->val.jeHash)) { struct hashEl *el, *list = hashElListHash(ele->val.jeHash); slSort(&list, hashElCmp); for (el = list; el != NULL; el = el->next) { struct jsonElement *val = el->val; dyStringPrintf(dy,"%s%s\"%s\": ", indentBuf, tab, el->name); jsonDyStringPrintRecurse(dy, val, indentLevel); dyStringPrintf(dy,"%s%s", el->next == NULL ? "" : ",",nl); } hashElFreeList(&list); } dyStringPrintf(dy,"%s}", indentBuf); break; } case jsonList: { struct slRef *el; dyStringPrintf(dy,"[%s",nl); if(ele->val.jeList) { for (el = ele->val.jeList; el != NULL; el = el->next) { struct jsonElement *val = el->val; dyStringPrintf(dy,"%s%s", indentBuf,tab); jsonDyStringPrintRecurse(dy, val, indentLevel); dyStringPrintf(dy,"%s%s", el->next == NULL ? "" : ",",nl); } } dyStringPrintf(dy,"%s]", indentBuf); break; } case jsonString: { dyStringPrintf(dy,"\"%s\"", jsonStringEscape(ele->val.jeString)); break; } case jsonBoolean: { dyStringPrintf(dy,"%s", ele->val.jeBoolean ? "true" : "false"); break; } case jsonNumber: { char buf[256]; safef(buf, sizeof(buf), "%ld", ele->val.jeNumber); dyStringPrintf(dy,"%s", buf); break; } case jsonDouble: { char buf[256]; safef(buf, sizeof(buf), "%g", ele->val.jeDouble); dyStringPrintf(dy,"%s", buf); break; } default: { errAbort("jsonPrintRecurse; invalid type: %d", ele->type); break; } } if (indentLevel >= 0) freez(&indentBuf); }
static struct linkedFeatures *cgapSageToLinkedFeatures(struct cgapSage *tag, struct hash *libHash, struct hash *libTotHash, enum trackVisibility vis) /* Convert a single CGAP tag to a list of linkedFeatures. */ { struct linkedFeatures *libList = NULL; struct linkedFeatures *skel = skeletonLf(tag); int i; if (vis == tvDense) /* Just use the skeleton one. */ { int tagTotal = 0; int freqTotal = 0; int libsUsed = 0; for (i = 0; i < tag->numLibs; i++) { char libId[16]; char *libName; safef(libId, sizeof(libId), "%d", tag->libIds[i]); libName = hashMustFindVal(libHash, libId); if (keepThisLib(libName, libId)) { int libTotal = hashIntVal(libTotHash, libId); tagTotal += libTotal; freqTotal += tag->freqs[i]; libsUsed++; } } if (libsUsed > 0) { skel->name = cloneString("whatever"); skel->score = (float)((double)freqTotal * (1000000/tagTotal)); skel->grayIx = grayIxForCgap(skel->score); addSimpleFeature(skel); libList = skel; } } else if (vis == tvPack) { /* If it's pack mode, average tissues into one linkedFeature. */ struct hash *tpmHash = combineCgapSages(tag, libHash, libTotHash); struct hashEl *tpmList = hashElListHash(tpmHash); struct hashEl *tpmEl; slSort(&tpmList, slNameCmp); for (tpmEl = tpmList; tpmEl != NULL; tpmEl = tpmEl->next) { struct linkedFeatures *tiss = CloneVar(skel); struct cgapSageTpmHashEl *tpm = (struct cgapSageTpmHashEl *)tpmEl->val; char link[256]; char *encTissName = NULL; double score = 0; int len = strlen(tpmEl->name) + 32; tiss->name = needMem(len); safef(tiss->name, len, "%s (%d)", tpmEl->name, tpm->count); encTissName = cgiEncode(tpmEl->name); safef(link, sizeof(link), "i=%s&tiss=%s", tag->name, encTissName); score = (double)tpm->freqTotal*(1000000/(double)tpm->libTotals); tiss->score = (float)score; tiss->grayIx = grayIxForCgap(score); tiss->extra = cloneString(link); freeMem(encTissName); addSimpleFeature(tiss); slAddHead(&libList, tiss); } hashElFreeList(&tpmList); freeHashAndVals(&tpmHash); } else /* full mode */ { for (i = 0; i < tag->numLibs; i++) { char libId[16]; char *libName; char link[256]; struct linkedFeatures *lf; safef(libId, sizeof(libId), "%d", tag->libIds[i]); libName = hashMustFindVal(libHash, libId); if (keepThisLib(libName, libId)) { lf = CloneVar(skel); lf->name = cloneString(libName); safef(link, sizeof(link), "i=%s&lib=%s", tag->name, libId); lf->score = (float)tag->tagTpms[i]; lf->grayIx = grayIxForCgap(tag->tagTpms[i]); lf->extra = cloneString(link); addSimpleFeature(lf); slAddHead(&libList, lf); } } } slSort(&libList, cgapLinkedFeaturesCmp); slReverse(&libList); return libList; }
void doEnrichmentsFromSampleBed(struct sqlConnection *conn, struct edwFile *ef, struct edwValidFile *vf, struct edwAssembly *assembly, struct target *targetList) /* Figure out enrichments from sample bed file. */ { char *sampleBed = vf->sampleBed; if (isEmpty(sampleBed)) { warn("No sample bed for %s", ef->edwFileName); return; } /* Load sample bed, make a range tree to track unique coverage, and get list of all chroms .*/ struct bed3 *sample, *sampleList = bed3LoadAll(sampleBed); if (sampleList == NULL) { warn("Sample bed is empty for %s", ef->edwFileName); return; } struct genomeRangeTree *sampleGrt = edwMakeGrtFromBed3List(sampleList); struct hashEl *chrom, *chromList = hashElListHash(sampleGrt->hash); /* Iterate through each target - and in lockstep each associated grt to calculate unique overlap */ struct target *target; for (target = targetList; target != NULL; target = target->next) { if (target->skip) continue; struct genomeRangeTree *grt = target->grt; long long uniqOverlapBases = 0; for (chrom = chromList; chrom != NULL; chrom = chrom->next) { struct rbTree *sampleTree = chrom->val; struct rbTree *targetTree = genomeRangeTreeFindRangeTree(grt, chrom->name); if (targetTree != NULL) { struct range *range, *rangeList = rangeTreeList(sampleTree); for (range = rangeList; range != NULL; range = range->next) { /* Do unique base overlap counts (since using range trees both sides) */ int overlap = rangeTreeOverlapSize(targetTree, range->start, range->end); uniqOverlapBases += overlap; } } } /* Figure out how much we overlap allowing same bases in genome * to part of more than one overlap. */ long long overlapBases = 0; for (sample = sampleList; sample != NULL; sample = sample->next) { int overlap = genomeRangeTreeOverlapSize(grt, sample->chrom, sample->chromStart, sample->chromEnd); overlapBases += overlap; } /* Save to database. */ struct edwQaEnrich *enrich = enrichFromOverlaps(ef, vf, assembly, target, overlapBases, uniqOverlapBases); edwQaEnrichSaveToDb(conn, enrich, "edwQaEnrich", 128); edwQaEnrichFree(&enrich); } genomeRangeTreeFree(&sampleGrt); bed3FreeList(&sampleList); hashElFreeList(&chromList); }
/* the printHistorgram needs to order the entries from the hash * by bin number since the hash is not necessarily ordered. * Thus, we first count them, then allocate an array for all, * read them into the array, and then traversing the array they * are ordered. bin numbers are assumed to run from 0 to N-1 * for N bins. */ static void printHistogram(struct hash * histo, int medianBin) { int i; /* for loop counting */ int elCount = 0; /* to count the hash elements */ double *probabilities; /* will be an array of probabilities */ double *log_2; /* will be an array of the log_2 */ double *inverseProbabilities; /* inverse of probabilities */ double *inverseLog_2; /* inverse of log_2 */ struct hashEl *el, *elList; /* to traverse the hash */ double cumulativeProbability; /* to show CPD */ double cumulativeLog_2; /* to show CPD */ elList = hashElListHash(histo); /* fetch the hash as a list */ for (el = elList; el != NULL; el = el->next) { ++elCount; /* count hash elements */ } /* Allocate the arrays */ probabilities = (double *) needMem((size_t)(sizeof(double) * elCount)); log_2 = (double *) needMem((size_t) (sizeof(double) * elCount)); inverseProbabilities = (double *) needMem((size_t)(sizeof(double) * elCount)); inverseLog_2 = (double *) needMem((size_t) (sizeof(double) * elCount)); /* Traverse the list again, this time placing all values in the * arrays */ for (el = elList; el != NULL; el = el->next) { struct histoGram *hg; /* histogram hash element */ hg = el->val; probabilities[hg->bin] = hg->prob; log_2[hg->bin] = hg->log_2; } hashElFreeList(&elList); cumulativeProbability = 0.0; cumulativeLog_2 = -500.0; /* arbitrarily small number */ /* compute the inverse P(V<=v) */ for (i = elCount-1; i >= 0; --i) { if (i == (elCount-1)) { cumulativeLog_2 = log_2[i]; } else { cumulativeLog_2 = addLogProbabilities(cumulativeLog_2, log_2[i]); } if (cumulativeLog_2 > 0.0) cumulativeLog_2 = 0.0; inverseLog_2[i] = cumulativeLog_2; inverseProbabilities[i] = pow(2.0,cumulativeLog_2); } printf("Histogram with %d bins:\n", elCount); /* Now the array is an ordered list */ for (i = 0; i < elCount; ++i) { if (i == 0) { cumulativeLog_2 = log_2[i]; } else { cumulativeLog_2 = addLogProbabilities(cumulativeLog_2, log_2[i]); } if (cumulativeLog_2 > 0.0) cumulativeLog_2 = 0.0; cumulativeProbability = pow(2.0,cumulativeLog_2); if (html) { if (medianBin == i) printf("<TR><TH> %d <BR> (median) </TH>\n", i); printf("\t<TD ALIGN=RIGHT> %% %.2f </TD><TD ALIGN=RIGHT> %.4g </TD>\n\t<TD ALIGN=RIGHT> %% %.2f </TD><TD ALIGN=RIGHT> %.4f </TD>\n\t<TD ALIGN=RIGHT> %% %.2f </TD><TD ALIGN=RIGHT> %.4f </TD></TR>\n", 100.0 * probabilities[i], log_2[i], 100.0 * cumulativeProbability, cumulativeLog_2, 100.0 * inverseProbabilities[i], inverseLog_2[i]); } else { printf("bin %d: %% %.2f %0.6g\t%% %.2f\t%.6g\t%% %.2f\t%.6g", i, 100.0 * probabilities[i], log_2[i], 100.0 * cumulativeProbability, cumulativeLog_2, 100.0 * inverseProbabilities[i], inverseLog_2[i]); if (medianBin == i) printf(" - median\n"); else printf("\n"); } } } /* printHistogram() */
char *filterClause(char *db, char *table, char *chrom, char *extraClause) /* Get filter clause (something to put after 'where') * for table */ { struct sqlConnection *conn = NULL; char varPrefix[128]; int varPrefixSize, fieldNameSize; struct hashEl *varList, *var; struct dyString *dy = NULL; boolean needAnd = FALSE; char oldDb[128]; char dbTableBuf[256]; char explicitDb[128]; char splitTable[256]; char explicitDbTable[512]; /* Return just extraClause (which may be NULL) if no filter on us. */ if (! (anyFilter() && filteredOrLinked(db, table))) return cloneString(extraClause); safef(oldDb, sizeof(oldDb), "%s", db); dbOverrideFromTable(dbTableBuf, &db, &table); if (!sameString(oldDb, db)) safef(explicitDb, sizeof(explicitDb), "%s.", db); else explicitDb[0] = 0; /* Cope with split table and/or custom tracks. */ if (isCustomTrack(table)) { conn = hAllocConn(CUSTOM_TRASH); struct customTrack *ct = ctLookupName(table); safef(explicitDbTable, sizeof(explicitDbTable), "%s", ct->dbTableName); } else { conn = hAllocConn(db); safef(splitTable, sizeof(splitTable), "%s_%s", chrom, table); if (!sqlTableExists(conn, splitTable)) safef(splitTable, sizeof(splitTable), "%s", table); safef(explicitDbTable, sizeof(explicitDbTable), "%s%s", explicitDb, splitTable); } /* Get list of filter variables for this table. */ safef(varPrefix, sizeof(varPrefix), "%s%s.%s.", hgtaFilterVarPrefix, db, table); varPrefixSize = strlen(varPrefix); varList = cartFindPrefix(cart, varPrefix); if (varList == NULL) { hFreeConn(&conn); return cloneString(extraClause); } /* Create filter clause string, stepping through vars. */ dy = dyStringNew(0); for (var = varList; var != NULL; var = var->next) { /* Parse variable name into field and type. */ char field[64], *s, *type; s = var->name + varPrefixSize; type = strchr(s, '.'); if (type == NULL) internalErr(); fieldNameSize = type - s; if (fieldNameSize >= sizeof(field)) internalErr(); memcpy(field, s, fieldNameSize); field[fieldNameSize] = 0; sqlCkId(field); type += 1; /* rawLogic and rawQuery are handled below; * filterMaxOutputVar is not really a filter variable and is handled * in wiggle.c. */ if (startsWith("raw", type) || sameString(filterMaxOutputVar, type)) continue; /* Any other variables that are missing a name: * <varPrefix>..<type> * are illegal */ if (fieldNameSize < 1) { warn("Missing name in cart variable: %s\n", var->name); continue; } if (sameString(type, filterDdVar)) { char *patVar = filterPatternVarName(db, table, field); struct slName *patList = cartOptionalSlNameList(cart, patVar); normalizePatList(&patList); if (slCount(patList) > 0) { char *ddVal = cartString(cart, var->name); boolean neg = sameString(ddVal, ddOpMenu[1]); char *fieldType = getSqlType(conn, explicitDbTable, field); boolean needOr = FALSE; if (needAnd) dyStringAppend(dy, " and "); needAnd = TRUE; if (neg) dyStringAppend(dy, "not "); boolean composite = (slCount(patList) > 1); if (composite || neg) dyStringAppendC(dy, '('); struct slName *pat; for (pat = patList; pat != NULL; pat = pat->next) { char *sqlPat = sqlLikeFromWild(pat->name); if (needOr) dyStringAppend(dy, " OR "); needOr = TRUE; if (isSqlSetType(fieldType)) { sqlDyStringPrintfFrag(dy, "FIND_IN_SET('%s', %s.%s)>0 ", sqlPat, explicitDbTable , field); } else { sqlDyStringPrintfFrag(dy, "%s.%s ", explicitDbTable, field); if (sqlWildcardIn(sqlPat)) dyStringAppend(dy, "like "); else dyStringAppend(dy, "= "); sqlDyStringPrintf(dy, "'%s'", sqlPat); } freez(&sqlPat); } if (composite || neg) dyStringAppendC(dy, ')'); } } else if (sameString(type, filterCmpVar)) { char *patVar = filterPatternVarName(db, table, field); char *pat = trimSpaces(cartOptionalString(cart, patVar)); char *cmpVal = cartString(cart, var->name); if (cmpReal(pat, cmpVal)) { if (needAnd) dyStringAppend(dy, " and "); needAnd = TRUE; if (sameString(cmpVal, "in range")) { char *words[2]; int wordCount; char *dupe = cloneString(pat); wordCount = chopString(dupe, ", \t\n", words, ArraySize(words)); if (wordCount < 2) /* Fake short input */ words[1] = "2000000000"; if (strchr(pat, '.')) /* Assume floating point */ { double a = atof(words[0]), b = atof(words[1]); sqlDyStringPrintfFrag(dy, "%s.%s >= %f && %s.%s <= %f", explicitDbTable, field, a, explicitDbTable, field, b); } else { int a = atoi(words[0]), b = atoi(words[1]); sqlDyStringPrintfFrag(dy, "%s.%s >= %d && %s.%s <= %d", explicitDbTable, field, a, explicitDbTable, field, b); } freez(&dupe); } else { // cmpVal has been checked already above in cmpReal for legal values. sqlDyStringPrintfFrag(dy, "%s.%s %-s ", explicitDbTable, field, cmpVal); if (strchr(pat, '.')) /* Assume floating point. */ dyStringPrintf(dy, "%f", atof(pat)); else dyStringPrintf(dy, "%d", atoi(pat)); } } } } /* Handle rawQuery if any */ { char *varName; char *logic, *query; varName = filterFieldVarName(db, table, "", filterRawLogicVar); logic = cartUsualString(cart, varName, logOpMenu[0]); varName = filterFieldVarName(db, table, "", filterRawQueryVar); query = trimSpaces(cartOptionalString(cart, varName)); if (query != NULL && query[0] != 0) { if (needAnd) dyStringPrintf(dy, " %s ", logic); sqlSanityCheckWhere(query, dy); } } /* Clean up and return */ hFreeConn(&conn); hashElFreeList(&varList); if (dy->stringSize == 0) { dyStringFree(&dy); return cloneString(extraClause); } else { if (isNotEmpty(extraClause)) dyStringPrintf(dy, " and %s", extraClause); return dyStringCannibalize(&dy); } }