static char *mkStatusValSet(struct sqlConnection *conn) /* Generate set of CCDS status values to use, based on cmd options or * defaults. WARNING: static return. */ { static char *statValSet = NULL; if (statValSet != NULL) return statValSet; struct dyString *buf = dyStringNew(0); struct slName *val; struct hash *validStats = ccdsStatusValLoad(conn); for (val = statVals; val != NULL; val = val->next) { ccdsStatusValCheck(validStats, val->name); if (buf->stringSize > 0) dyStringAppendC(buf, ','); dyStringPrintf(buf, "\"%s\"", val->name); } hashFree(&validStats); statValSet = dyStringCannibalize(&buf); return statValSet; }
static struct targetHits *bundleIntoTargets(struct axtBundle *abList) /* BLAST typically outputs everything on the same query and target * in one clump. This routine rearranges axts in abList to do this. */ { struct targetHits *targetList = NULL, *target; struct hash *targetHash = newHash(10); struct axtBundle *ab; struct axtRef *ref; /* Build up a list of targets in database hit by query sorted by * score of hits. */ for (ab = abList; ab != NULL; ab = ab->next) { struct axt *axt; for (axt = ab->axtList; axt != NULL; axt = axt->next) { target = hashFindVal(targetHash, axt->tName); if (target == NULL) { AllocVar(target); slAddHead(&targetList, target); hashAdd(targetHash, axt->tName, target); target->name = cloneString(axt->tName); target->size = ab->tSize; } if (axt->score > target->score) target->score = axt->score; AllocVar(ref); ref->axt = axt; slAddHead(&target->axtList, ref); } } slSort(&targetList, targetHitsCmpScore); for (target = targetList; target != NULL; target = target->next) slSort(&target->axtList, axtRefCmpScore); hashFree(&targetHash); return targetList; }
void doRefGeneMrnaSequence(struct sqlConnection *conn, struct bed *bedList) /* Fetch refGene mRNA sequence. */ { struct hash *uniqHash = newHash(18); struct bed *bed; boolean gotResults = FALSE; for (bed = bedList; bed != NULL; bed = bed->next) { if (!hashLookup(uniqHash, bed->name)) { char *fa = hGetSeqAndId(conn, bed->name, NULL); hashAdd(uniqHash, bed->name, NULL); if (fa != NULL) hPrintf("%s", fa); freez(&fa); gotResults = TRUE; } } if (!gotResults) hPrintf(NO_RESULTS); hashFree(&uniqHash); }
struct slName *valsForVar(char *varName, struct taggedFile *tfList) /* Return all values for given variable. */ { struct slName *list = NULL; struct hash *uniqHash = hashNew(7); struct taggedFile *tf; for (tf = tfList; tf != NULL; tf = tf->next) { char *val = metaTagValFindVal(tf->tagList, varName); if (val != NULL) { if (hashLookup(uniqHash, val) == NULL) { hashAdd(uniqHash, val, NULL); slNameAddHead(&list, val); } } } hashFree(&uniqHash); slNameSort(&list); return list; }
void raFoldIn(char *fileName, struct hash *hashOfHash) /* Read ra's in file name and fold them into hashOfHash. * This will add ra's and ra fields to whatever already * exists in the hashOfHash, overriding fields of the * same name if they exist already. */ { struct lineFile *lf = lineFileMayOpen(fileName, TRUE); if (lf != NULL) { struct hash *uniqHash = hashNew(0); char *name; while ((name = raFoldInOneRetName(lf, hashOfHash)) != NULL) { if (hashLookup(uniqHash, name)) errAbort("%s duplicated in record ending line %d of %s", name, lf->lineIx, lf->fileName); hashAdd(uniqHash, name, NULL); } lineFileClose(&lf); hashFree(&uniqHash); } }
struct hash *txgIntoKeeperHash(struct txGraph *txgList) /* Create a hash full of bin keepers (one for each chromosome or contig. * The binKeepers are full of txGraphs. */ { struct hash *sizeHash = txgChromMinSizeHash(txgList); struct hash *bkHash = hashNew(16); struct txGraph *txg; for (txg = txgList; txg != NULL; txg = txg->next) { struct binKeeper *bk = hashFindVal(bkHash, txg->tName); if (bk == NULL) { struct minChromSize *chrom = hashMustFindVal(sizeHash, txg->tName); verbose(3, "New binKeeper for %s\n", txg->tName); bk = binKeeperNew(0, chrom->minSize); hashAdd(bkHash, txg->tName, bk); } binKeeperAdd(bk, txg->tStart, txg->tEnd, txg); } hashFree(&sizeHash); return bkHash; }
struct slPair *tagListIncludingParents(struct tagStanza *stanza) /* Return a list of all tags including ones defined in parents. */ { struct hash *uniq = hashNew(0); struct slPair *list = NULL; struct tagStanza *ts; for (ts = stanza; ts != NULL; ts = ts->parent) { struct slPair *pair; for (pair = ts->tagList; pair != NULL; pair = pair->next) { if (!hashLookup(uniq, pair->name)) { slPairAdd(&list, pair->name, pair->val); hashAdd(uniq, pair->name, pair); } } } hashFree(&uniq); slReverse(&list); return list; }
PUBLIC void websFreeUpload(Webs *wp) { WebsUpload *up; WebsKey *s; for (s = hashFirst(wp->files); s; s = hashNext(wp->files, s)) { up = s->content.value.symbol; freeUploadFile(up); if (up == wp->currentFile) { wp->currentFile = 0; } } hashFree(wp->files); if (wp->currentFile) { freeUploadFile(wp->currentFile); wp->currentFile = 0; } if (wp->upfd >= 0) { close(wp->upfd); wp->upfd = -1; } }
static struct genePos *wildAssociationFilter( struct slName *wildList, boolean orLogic, struct column *col, struct sqlConnection *conn, struct genePos *list) /* Handle relatively slow filtering when there is a wildcard present. */ { struct assocGroup *ag = assocGroupNew(16); struct genePos *gp; struct hash *passHash = newHash(16); /* Hash of items passing filter. */ int assocCount = 0; struct sqlResult *sr; char **row; /* Build up associations. */ sr = sqlGetResult(conn, col->queryFull); while ((row = sqlNextRow(sr)) != NULL) { ++assocCount; assocGroupAdd(ag, row[0],row[1]); } sqlFreeResult(&sr); /* Look for matching associations and put them on newList. */ for (gp = list; gp != NULL; gp = gp->next) { char *key = (col->protKey ? (kgVersion == KG_III ? lookupProtein(conn, gp->name) : gp->protein) : gp->name); struct assocList *al = hashFindVal(ag->listHash, key); if (al != NULL) { if (wildList == NULL || wildMatchRefs(wildList, al->list, orLogic)) hashAdd(passHash, gp->name, gp); } } list = weedUnlessInHash(list, passHash); hashFree(&passHash); assocGroupFree(&ag); return list; }
void statsOnSubsets(struct nearTest *list, int subIx, FILE *f) /* Report tests of certain subtype. */ { struct nearTest *test; struct hash *hash = newHash(0); struct slName *typeList = NULL, *type; fprintf(f, "\n%s subtotals\n", nearTestInfoTypes[subIx]); /* Get list of all types in this field. */ for (test = list; test != NULL; test = test->next) { char *info = test->info[subIx]; if (!hashLookup(hash, info)) { type = slNameNew(info); hashAdd(hash, info, type); slAddHead(&typeList, type); } } slNameSort(&typeList); hashFree(&hash); for (type = typeList; type != NULL; type = type->next) { struct qaStatistics *stats; AllocVar(stats); for (test = list; test != NULL; test = test->next) { if (sameString(type->name, test->info[subIx])) { qaStatisticsAdd(stats, test->status); } } qaStatisticsReport(stats, type->name, f); freez(&stats); } }
struct group *groupTracks(char *db, struct trackDb *tracks) /* Make up groups and assign tracks to groups. */ { struct trackDb *track; struct trackRef *tr; struct group *group, *groups = NULL; struct grp *grp; struct grp *grps = hLoadGrps(db); struct hash *groupHash = newHash(8); /* Sort groups by priority */ slSort(&grps, cmpGroupPri); /* Create hash and list of groups */ for (grp = grps; grp != NULL; grp = grp->next) { AllocVar(group); group->name = cloneString(grp->name); group->label = cloneString(grp->label); slAddTail(&groups, group); hashAdd(groupHash, grp->name, group); } /* Add tracks to group */ for (track = tracks; track != NULL; track = track->next) { AllocVar(tr); tr->track = track; group = hashFindVal(groupHash, track->grp); slAddHead(&group->tracks, tr); } /* order tracks within groups by priority */ for (group = groups; group != NULL; group = group->next) slSort(&group->tracks, cmpTrackPri); hashFree(&groupHash); return groups; }
struct raLevel *raLevelRead(char *initialFile, struct lm *lm) /* Read initialFile and all files that are included by it. */ { /* Create structure for level. */ struct raLevel *level; lmAllocVar(lm, level); char dir[PATH_LEN]; splitPath(initialFile, dir, NULL, NULL); level->name = lmCloneString(lm, dir); /* Build up list of files by recursion. */ if (fileExists(initialFile)) { struct hash *circularHash = hashNew(0); hashAdd(circularHash, initialFile, NULL); recurseThroughIncludes(initialFile, lm, circularHash, level, &level->fileList); hashFree(&circularHash); slReverse(&level->fileList); } level->trackHash = hashLevelTracks(level); return level; }
struct bed *wikiTrackGetFilteredBeds(char *name, struct region *regionList, struct lm *lm, int *retFieldCount) /* Get list of beds from the wikiTrack * in current regions and that pass * filters. You can bedFree this when done. */ { struct bed *bedList = NULL; struct hash *idHash = NULL; struct bedFilter *bf = NULL; struct region *region = NULL; /* Figure out how to filter things. */ bf = bedFilterForCustomTrack(name); idHash = identifierHash(database, name); /* Grab filtered beds for each region. */ for (region = regionList; region != NULL; region = region->next) wikiTrackFilteredBedOnRegion(region, idHash, bf, lm, &bedList); /* clean up. */ hashFree(&idHash); slReverse(&bedList); return bedList; }
struct hTableInfo *bigBedToHti(char *table, struct sqlConnection *conn) /* Get fields of bigBed into hti structure. */ { /* Get columns in asObject format. */ char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct asObject *as = bigBedAsOrDefault(bbi); /* Allocate hTableInfo structure and fill in info about bed fields. */ struct hash *colHash = asColumnHash(as); struct hTableInfo *hti; AllocVar(hti); hti->rootName = cloneString(table); hti->isPos= TRUE; fillField(colHash, "chrom", hti->chromField); fillField(colHash, "chromStart", hti->startField); fillField(colHash, "chromEnd", hti->endField); fillField(colHash, "name", hti->nameField); fillField(colHash, "score", hti->scoreField); fillField(colHash, "strand", hti->strandField); fillField(colHash, "thickStart", hti->cdsStartField); fillField(colHash, "thickEnd", hti->cdsEndField); fillField(colHash, "blockCount", hti->countField); fillField(colHash, "chromStarts", hti->startsField); fillField(colHash, "blockSizes", hti->endsSizesField); hti->hasCDS = (bbi->definedFieldCount >= 8); hti->hasBlocks = (bbi->definedFieldCount >= 12); char type[256]; safef(type, sizeof(type), "bed %d %c", bbi->definedFieldCount, (bbi->definedFieldCount == bbi->fieldCount ? '.' : '+')); hti->type = cloneString(type); freeMem(fileName); hashFree(&colHash); bbiFileClose(&bbi); return hti; }
char *altGraphXMakeImage(struct altGraphX *ag) /* create a drawing of splicing pattern */ { MgFont *font = mgSmallFont(); int fontHeight = mgFontLineHeight(font); struct spaceSaver *ssList = NULL; struct hash *heightHash = NULL; int rowCount = 0; struct tempName gifTn; int pixWidth = atoi(cartUsualString(cart, "pix", DEFAULT_PIX_WIDTH )); int pixHeight = 0; struct hvGfx *hvg; int lineHeight = 0; double scale = 0; Color shadesOfGray[9]; int maxShade = ArraySize(shadesOfGray)-1; scale = (double)pixWidth/(ag->tEnd - ag->tStart); lineHeight = 2 * fontHeight +1; altGraphXLayout(ag, ag->tStart, ag->tEnd, scale, 100, &ssList, &heightHash, &rowCount); hashFree(&heightHash); pixHeight = rowCount * lineHeight; makeTempName(&gifTn, "hgc", ".png"); hvg = hvGfxOpenPng(pixWidth, pixHeight, gifTn.forCgi, FALSE); makeGrayShades(hvg, maxShade, shadesOfGray); hvGfxSetClip(hvg, 0, 0, pixWidth, pixHeight); altGraphXDrawPack(ag, ssList, hvg, 0, 0, pixWidth, lineHeight, lineHeight-1, ag->tStart, ag->tEnd, scale, font, MG_BLACK, shadesOfGray, "Dummy", NULL); hvGfxUnclip(hvg); hvGfxClose(&hvg); printf( "<IMG SRC = \"%s\" BORDER=1 WIDTH=%d HEIGHT=%d><BR>\n", gifTn.forHtml, pixWidth, pixHeight); return cloneString(gifTn.forHtml); }
void nibTwoCacheFree(struct nibTwoCache **pNtc) /* Free up resources associated with nibTwoCache. */ { struct nibTwoCache *ntc = *pNtc; if (ntc != NULL) { freez(&ntc->pathName); if (ntc->isTwoBit) twoBitClose(&ntc->tbf); else { struct hashEl *el, *list = hashElListHash(ntc->nibHash); struct nibInfo *nib; for (el = list; el != NULL; el = el->next) { nib = el->val; nibInfoFree(&nib); } hashElFreeList(&list); hashFree(&ntc->nibHash); } freez(pNtc); } }
struct bigBedInterval *bigBedMultiNameQuery(struct bbiFile *bbi, struct bptFile *index, int fieldIx, char **names, int nameCount, struct lm *lm) /* Fetch all records matching any of the names. Using given index on given field. * Return list is allocated out of lm. */ { /* Set up name index and get list of chunks that match any of our names. */ struct fileOffsetSize *fosList = bigBedChunksMatchingNames(bbi, index, names, nameCount); /* Create hash of all names. */ struct hash *hash = newHash(0); int nameIx; for (nameIx=0; nameIx < nameCount; ++nameIx) hashAdd(hash, names[nameIx], NULL); /* Get intervals where name matches hash target. */ struct bigBedInterval *intervalList = bigBedIntervalsMatchingName(bbi, fosList, bbWordIsInHash, fieldIx, hash, lm); /* Clean up and return results. */ slFreeList(&fosList); hashFree(&hash); return intervalList; }
struct gffGroup *splitGroupByChrom(struct gffFile *gff, struct gffGroup *oldGroup) /* Split up a group into multiple groups, each one chromosome specific. */ { struct gffGroup *groupList = NULL, *group; struct hash *seqHash = hashNew(0); verbose(2, "Regrouping %s with %d elements\n", oldGroup->name, slCount(oldGroup->lineList)); struct gffLine *gl, *nextGl; for (gl = oldGroup->lineList; gl != NULL; gl = nextGl) { nextGl = gl->next; group = hashFindVal(seqHash, gl->seq); if (group == NULL) { AllocVar(group); group->name = oldGroup->name; group->seq = gl->seq; group->source = oldGroup->source; group->start = gl->start; group->end = gl->end; group->strand = gl->strand; slAddHead(&groupList, group); hashAdd(seqHash, group->seq, group); } else { group->start = min(gl->start, group->start); group->end = max(gl->end, group->end); } slAddHead(&group->lineList, gl); } hashFree(&seqHash); for (group = groupList; group != NULL; group = group->next) slReverse(&group->lineList); return groupList; }
void mergeDataAndAlignments() /** Load up the psls, hash them and transform into beds. */ { char *pslFileName = NULL; char *bedOutName = NULL; char *affyFileName = NULL; char *expRecordOutName = NULL; char *expFileName = NULL; struct hash *pslHash = NULL; struct bed *bed = NULL; /* Parse some arguments and make sure they exist. */ pslFileName = optionVal("pslFile", NULL); if(pslFileName == NULL) errAbort("Must specify -pslFile flag. Use -help for usage."); bedOutName = optionVal("bedOut", NULL); if(bedOutName == NULL) errAbort("Must specify -bedOut flag. Use -help for usage."); affyFileName = optionVal("affyFile", NULL); if(affyFileName == NULL) errAbort("Must specify -affyFile flag. Use -help for usage."); expRecordOutName = optionVal("expRecordOut", NULL); if(expRecordOutName == NULL) errAbort("Must specify -expRecordOut flag. Use -help for usage."); expFileName = optionVal("expFile", NULL); if(expFileName == NULL) errAbort("Must specify -expFile flag. Use -help for usage."); /* Hash psls according to their name. */ warn("Reading psls from: %s", pslFileName); pslHash = hashPsls(pslFileName); warn("Outputing beds:"); outputBedsFromPsls(pslHash, bedOutName, expRecordOutName, affyFileName, expFileName); warn("\nFreeing Memory."); hashTraverseVals(pslHash, pslFreeListWrapper); hashFree(&pslHash); warn("Done."); }
static struct slName *parseDatabaseList(struct lineFile *lf, char *s) /* Parse out comma-separated list of databases, with * possible !db's. */ { struct slName *list, *el; struct hash *notHash; /* Get comma-separated list. */ list = slNameListFromComma(s); if (list == NULL) errAbort("Empty database name line %d of %s", lf->lineIx, lf->fileName); /* Remove !'s */ notHash = makeNotHash(list); if (notHash != NULL) { struct slName *newList = NULL, *next; for (el = list; el != NULL; el = next) { next = el->next; if (el->name[0] != '!' && !hashLookup(notHash, el->name)) { slAddHead(&newList, el); } else { freeMem(el); } } hashFree(¬Hash); slReverse(&newList); list = newList; } return list; }
static void printSomeGenomeListHtmlNamedMaybeCheck(char *customOrgCgiName, char *db, struct dbDb *dbList, char *onChangeText, boolean doCheck) /* Prints to stdout the HTML to render a dropdown list * containing a list of the possible genomes to choose from. * param db - a database whose genome will be the default genome. * If NULL, no default selection. * param onChangeText - Optional (can be NULL) text to pass in * any onChange javascript. */ { char *orgList[1024]; int numGenomes = 0; struct dbDb *cur = NULL; struct hash *hash = hashNew(10); // 2^^10 entries = 1024 char *selGenome = hGenomeOrArchive(db); char *values [1024]; char *cgiName; for (cur = dbList; cur != NULL; cur = cur->next) { if (!hashFindVal(hash, cur->genome) && (!doCheck || hDbExists(cur->name))) { hashAdd(hash, cur->genome, cur); orgList[numGenomes] = trackHubSkipHubName(cur->genome); values[numGenomes] = cur->genome; numGenomes++; if (numGenomes >= ArraySize(orgList)) internalErr(); } } cgiName = (customOrgCgiName != NULL) ? customOrgCgiName : orgCgiName; cgiMakeDropListFull(cgiName, orgList, values, numGenomes, selGenome, onChangeText); hashFree(&hash); }
void showGenomes(char *genome, struct pcrServer *serverList) /* Put up drop-down list with genomes on it. */ { struct hash *uniqHash = hashNew(8); struct pcrServer *server; char *onChangeText = "onchange='" ORGFORM_KEEP_PARAMS ORGFORM_KEEP_ORG ORGFORM_RESET_DB ORGFORM_RESET_TARGET ORGFORM_SUBMIT; printf("<SELECT NAME=\"org\" %s>\n", onChangeText); for (server = serverList; server != NULL; server = server->next) { if (!hashLookup(uniqHash, server->genome)) { hashAdd(uniqHash, server->genome, NULL); printf(" <OPTION%s VALUE=\"%s\">%s</OPTION>\n", (sameWord(genome, server->genome) ? " SELECTED" : ""), server->genome, server->genome); } } printf("</SELECT>\n"); hashFree(&uniqHash); }
void bigBedTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f) /* Print out selected fields from Big Bed. If fields is NULL, then print out all fields. */ { if (f == NULL) f = stdout; /* Convert comma separated list of fields to array. */ int fieldCount = chopByChar(fields, ',', NULL, 0); char **fieldArray; AllocArray(fieldArray, fieldCount); chopByChar(fields, ',', fieldArray, fieldCount); /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by * column name. */ struct hash *fieldHash = hashNew(0); struct slName *bb, *bbList = bigBedGetFields(table, conn); int i; for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i) hashAddInt(fieldHash, bb->name, i); // If bigBed has name column, look up pasted/uploaded identifiers if any: struct hash *idHash = NULL; if (slCount(bbList) >= 4) idHash = identifierHash(db, table); /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; i<fieldCount; ++i) { columnArray[i] = hashIntVal(fieldHash, fieldArray[i]); } /* Output row of labels */ fprintf(f, "#%s", fieldArray[0]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", fieldArray[i]); fprintf(f, "\n"); /* Open up bigBed file. */ char *fileName = bigBedFileName(table, conn); struct bbiFile *bbi = bigBedFileOpen(fileName); struct asObject *as = bigBedAsOrDefault(bbi); struct asFilter *filter = NULL; if (anyFilter()) { filter = asFilterFromCart(cart, db, table, as); if (filter) { fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); } } /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); for (region = regionList; region != NULL; region = region->next) { struct lm *lm = lmInit(0); struct bigBedInterval *iv, *ivList = bigBedIntervalQuery(bbi, region->chrom, region->start, region->end, 0, lm); char *row[bbi->fieldCount]; char startBuf[16], endBuf[16]; for (iv = ivList; iv != NULL; iv = iv->next) { bigBedIntervalToRow(iv, region->chrom, startBuf, endBuf, row, bbi->fieldCount); if (asFilterOnRow(filter, row)) { if ((idHash != NULL) && (hashLookup(idHash, row[3]) == NULL)) continue; int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", row[columnArray[i]]); fprintf(f, "\n"); } } lmCleanup(&lm); } /* Clean up and exit. */ bbiFileClose(&bbi); hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); }
void bamTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f) /* Print out selected fields from BAM. If fields is NULL, then print out all fields. */ { struct hTableInfo *hti = NULL; hti = getHti(db, table, conn); struct hash *idHash = NULL; char *idField = getIdField(db, curTrack, table, hti); int idFieldNum = 0; /* if we know what field to use for the identifiers, get the hash of names */ if (idField != NULL) idHash = identifierHash(db, table); if (f == NULL) f = stdout; /* Convert comma separated list of fields to array. */ int fieldCount = chopByChar(fields, ',', NULL, 0); char **fieldArray; AllocArray(fieldArray, fieldCount); chopByChar(fields, ',', fieldArray, fieldCount); /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by * column name. */ struct hash *fieldHash = hashNew(0); struct slName *bb, *bbList = bamGetFields(); int i; for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i) { /* if we know the field for identifiers, save it away */ if ((idField != NULL) && sameString(idField, bb->name)) idFieldNum = i; hashAddInt(fieldHash, bb->name, i); } /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; i<fieldCount; ++i) { columnArray[i] = hashIntVal(fieldHash, fieldArray[i]); } /* Output row of labels */ fprintf(f, "#%s", fieldArray[0]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", fieldArray[i]); fprintf(f, "\n"); struct asObject *as = bamAsObj(); struct asFilter *filter = NULL; if (anyFilter()) { filter = asFilterFromCart(cart, db, table, as); if (filter) { fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); } } /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); int maxOut = bigFileMaxOutput(); for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { struct lm *lm = lmInit(0); char *fileName = bamFileName(table, conn, region->chrom); struct samAlignment *sam, *samList = bamFetchSamAlignment(fileName, region->chrom, region->start, region->end, lm); char *row[SAMALIGNMENT_NUM_COLS]; char numBuf[BAM_NUM_BUF_SIZE]; for (sam = samList; sam != NULL && (maxOut > 0); sam = sam->next) { samAlignmentToRow(sam, numBuf, row); if (asFilterOnRow(filter, row)) { /* if we're looking for identifiers, check if this matches */ if ((idHash != NULL)&&(hashLookup(idHash, row[idFieldNum]) == NULL)) continue; int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", row[columnArray[i]]); fprintf(f, "\n"); maxOut --; } } freeMem(fileName); lmCleanup(&lm); } if (maxOut == 0) warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); /* Clean up and exit. */ hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); }
void freeRbmTreeHash(struct hash **pTreeHash) /* Free up a whole hash of rbmTrees of ranges. */ { hashTraverseEls(*pTreeHash, hashElFreeRbmTree); hashFree(pTreeHash); }
static struct mafAli *mafFromBed12(char *database, char *track, struct bed *bed, struct slName *orgList) /* Construct a maf out of exons in bed. */ { /* Loop through all block in bed, collecting a list of mafs, one * for each block. While we're at make a hash of all species seen. */ struct hash *speciesHash = hashNew(0); struct mafAli *mafList = NULL, *maf, *bigMaf; struct mafComp *comp, *bigComp; int totalTextSize = 0; int i; for (i=0; i<bed->blockCount; ++i) { int start = bed->chromStart + bed->chromStarts[i]; int end = start + bed->blockSizes[i]; if (thickOnly) { start = max(start, bed->thickStart); end = min(end, bed->thickEnd); } if (start < end) { maf = hgMafFrag(database, track, bed->chrom, start, end, '+', database, NULL); slAddHead(&mafList, maf); for (comp = maf->components; comp != NULL; comp = comp->next) hashStore(speciesHash, comp->src); totalTextSize += maf->textSize; } } slReverse(&mafList); /* Add species in order list too */ struct slName *org; for (org = orgList; org != NULL; org = org->next) hashStore(speciesHash, org->name); /* Allocate memory for return maf that contains all blocks concatenated together. * Also fill in components with any species seen at all. */ AllocVar(bigMaf); bigMaf->textSize = totalTextSize; struct hashCookie it = hashFirst(speciesHash); struct hashEl *hel; while ((hel = hashNext(&it)) != NULL) { AllocVar(bigComp); bigComp->src = cloneString(hel->name); bigComp->text = needLargeMem(totalTextSize + 1); memset(bigComp->text, '.', totalTextSize); bigComp->text[totalTextSize] = 0; bigComp->strand = '+'; bigComp->srcSize = totalTextSize; /* It's safe if a bit of a lie. */ hel->val = bigComp; slAddHead(&bigMaf->components, bigComp); } /* Loop through maf list copying in data. */ int textOffset = 0; for (maf = mafList; maf != NULL; maf = maf->next) { for (comp = maf->components; comp != NULL; comp = comp->next) { bigComp = hashMustFindVal(speciesHash, comp->src); memcpy(bigComp->text + textOffset, comp->text, maf->textSize); bigComp->size += comp->size; } textOffset += maf->textSize; } /* Cope with strand of darkness. */ if (bed->strand[0] == '-') { for (comp = bigMaf->components; comp != NULL; comp = comp->next) reverseComplement(comp->text, bigMaf->textSize); } /* If got an order list then reorder components according to it. */ if (orgList != NULL) { struct mafComp *newList = NULL; for (org = orgList; org != NULL; org = org->next) { comp = hashMustFindVal(speciesHash, org->name); slAddHead(&newList, comp); } slReverse(&newList); bigMaf->components = newList; } /* Rename our own component to bed name */ comp = hashMustFindVal(speciesHash, database); freeMem(comp->src); comp->src = cloneString(bed->name); /* Clean up and go home. */ hashFree(&speciesHash); mafAliFreeList(&mafList); return bigMaf; }
static struct genePos *pfamAdvFilter(struct column *col, struct sqlConnection *defaultConn, struct genePos *list) /* Do advanced filter on for pfam. */ { char *terms = advFilterVal(col, "terms"); if (terms != NULL) { struct sqlConnection *conn = sqlConnect(col->protDb); char query[256]; struct sqlResult *sr; struct dyString *dy = newDyString(1024); char **row; boolean orLogic = advFilterOrLogic(col, "logic", TRUE); struct slName *term, *termList = stringToSlNames(terms); struct hash *passHash = newHash(17); struct hash *prevHash = NULL; struct genePos *gp; /* Build up hash of all genes. */ struct hash *geneHash = newHash(18); for (gp = list; gp != NULL; gp = gp->next) hashAdd(geneHash, gp->name, gp); for (term = termList; term != NULL; term = term->next) { /* Build up a list of IDs of descriptions that match term. */ struct slName *idList = NULL, *id; if (isPfamId(term->name)) { idList = slNameNew(term->name); } else { char *sqlWild = sqlLikeFromWild(term->name); sqlSafef(query, sizeof(query), "select pfamAC from pfamDesc where description like '%s'", sqlWild); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { id = slNameNew(row[0]); slAddHead(&idList, id); } sqlFreeResult(&sr); } if (idList != NULL) { /* Build up query that includes all IDs. */ dyStringClear(dy); sqlDyStringPrintf(dy, "select name from %s where ", col->table); sqlDyStringPrintf(dy, "value='%s'", idList->name); for (id = idList->next; id != NULL; id = id->next) sqlDyStringPrintf(dy, "or value='%s'", id->name); /* Execute query and put matchers into hash. */ sr = sqlGetResult(defaultConn, dy->string); while ((row = sqlNextRow(sr)) != NULL) { gp = hashFindVal(geneHash, row[0]); if (gp != NULL) { char *name = gp->name; if (prevHash == NULL || hashLookup(prevHash, name) != NULL) hashStore(passHash, name); } } sqlFreeResult(&sr); slFreeList(&idList); } if (!orLogic) { hashFree(&prevHash); if (term->next != NULL) { prevHash = passHash; passHash = newHash(17); } } } list = weedUnlessInHash(list, passHash); hashFree(&prevHash); hashFree(&passHash); dyStringFree(&dy); sqlDisconnect(&conn); } return list; }
static void filterBed(struct track *tg, struct linkedFeatures **pLfList) /* Apply filters if any to mRNA linked features. */ { struct linkedFeatures *lf, *next, *newList = NULL, *oldList = NULL; struct mrnaUiData *mud = tg->extraUiData; struct mrnaFilter *fil; char *type; boolean anyFilter = FALSE; boolean colorIx = 0; boolean isExclude = FALSE; boolean andLogic = TRUE; if (*pLfList == NULL || mud == NULL) return; /* First make a quick pass through to see if we actually have * to do the filter. */ for (fil = mud->filterList; fil != NULL; fil = fil->next) { fil->pattern = cartUsualStringClosestToHome(cart, tg->tdb, FALSE, fil->suffix, ""); if (fil->pattern[0] != 0) anyFilter = TRUE; } if (!anyFilter) return; type = cartUsualStringClosestToHome(cart, tg->tdb, FALSE, mud->filterTypeSuffix, "red"); if (sameString(type, "exclude")) isExclude = TRUE; else if (sameString(type, "include")) isExclude = FALSE; else colorIx = getFilterColor(type, MG_BLACK); type = cartUsualStringClosestToHome(cart, tg->tdb, FALSE, mud->logicTypeSuffix, "and"); andLogic = sameString(type, "and"); /* Make a pass though each filter, and start setting up search for * those that have some text. */ for (fil = mud->filterList; fil != NULL; fil = fil->next) { if (fil->pattern[0] != 0) // Already retrieved above. fil->hash = newHash(10); } /* Scan tables id/name tables to build up hash of matching id's. */ for (fil = mud->filterList; fil != NULL; fil = fil->next) { struct hash *hash = fil->hash; int wordIx, wordCount; char *words[128]; if (hash != NULL) { boolean anyWild; char *dupPat = cloneString(fil->pattern); wordCount = chopLine(dupPat, words); for (wordIx=0; wordIx <wordCount; ++wordIx) { char *pattern = cloneString(words[wordIx]); if (lastChar(pattern) != '*') { int len = strlen(pattern)+1; pattern = needMoreMem(pattern, len, len+1); pattern[len-1] = '*'; } anyWild = (strchr(pattern, '*') != NULL || strchr(pattern, '?') != NULL); touppers(pattern); for(lf = *pLfList; lf != NULL; lf=lf->next) { char copy[SMALLBUF]; boolean gotMatch; safef(copy, sizeof(copy), "%s", lf->name); touppers(copy); if (anyWild) gotMatch = wildMatch(pattern, copy); else gotMatch = sameString(pattern, copy); if (gotMatch) { hashAdd(hash, lf->name, NULL); } } freez(&pattern); } freez(&dupPat); } } /* Scan through linked features coloring and or including/excluding ones that * match filter. */ for (lf = *pLfList; lf != NULL; lf = next) { boolean passed = andLogic; next = lf->next; for (fil = mud->filterList; fil != NULL; fil = fil->next) { if (fil->hash != NULL) { if (hashLookup(fil->hash, lf->name) == NULL) { if (andLogic) passed = FALSE; } else { if (!andLogic) passed = TRUE; } } } if (passed ^ isExclude) { slAddHead(&newList, lf); if (colorIx > 0) lf->filterColor = colorIx; } else { slAddHead(&oldList, lf); } } slReverse(&newList); slReverse(&oldList); if (colorIx > 0) { /* Draw stuff that passes filter first in full mode, last in dense. */ if (tg->visibility == tvDense) { newList = slCat(oldList, newList); } else { newList = slCat(newList, oldList); } } *pLfList = newList; tg->limitedVisSet = FALSE; /* Need to recalculate this after filtering. */ /* Free up hashes, etc. */ for (fil = mud->filterList; fil != NULL; fil = fil->next) { hashFree(&fil->hash); } }
static void parseBedGraphSection(struct lineFile *lf, boolean clipDontDie, struct hash *chromSizeHash, struct lm *lm, int itemsPerSlot, struct bwgSection **pSectionList) /* Parse out bedGraph section until we get to something that is not in bedGraph format. */ { /* Set up hash and list to store chromosomes. */ struct hash *chromHash = hashNew(0); struct bedGraphChrom *chrom, *chromList = NULL; /* Collect lines in items on appropriate chromosomes. */ struct bwgBedGraphItem *item; char *line; while (lineFileNextReal(lf, &line)) { /* Check for end of section. */ if (stepTypeLine(line)) { lineFileReuse(lf); break; } /* Parse out our line and make sure it has exactly 4 columns. */ char *words[5]; int wordCount = chopLine(line, words); lineFileExpectWords(lf, 4, wordCount); /* Get chromosome. */ char *chromName = words[0]; chrom = hashFindVal(chromHash, chromName); if (chrom == NULL) { lmAllocVar(chromHash->lm, chrom); hashAddSaveName(chromHash, chromName, chrom, &chrom->name); chrom->size = (chromSizeHash ? hashIntVal(chromSizeHash, chromName) : BIGNUM); slAddHead(&chromList, chrom); } /* Convert to item and add to chromosome list. */ lmAllocVar(lm, item); item->start = lineFileNeedNum(lf, words, 1); item->end = lineFileNeedNum(lf, words, 2); item->val = lineFileNeedDouble(lf, words, 3); /* Do sanity checking on coordinates. */ if (item->start > item->end) errAbort("bedGraph error: start (%u) after end line (%u) %d of %s.", item->start, item->end, lf->lineIx, lf->fileName); if (item->end > chrom->size) { warn("bedGraph error line %d of %s: chromosome %s has size %u but item ends at %u", lf->lineIx, lf->fileName, chrom->name, chrom->size, item->end); if (!clipDontDie) noWarnAbort(); } else { slAddHead(&chrom->itemList, item); } } slSort(&chromList, bedGraphChromCmpName); /* Loop through each chromosome and output the item list, broken into sections * for that chrom. */ for (chrom = chromList; chrom != NULL; chrom = chrom->next) { slSort(&chrom->itemList, bwgBedGraphItemCmp); /* Check to make sure no overlap between items. */ struct bwgBedGraphItem *item = chrom->itemList, *nextItem; for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next) { if (item->end > nextItem->start) errAbort("Overlap between %s %d %d and %s %d %d.\nPlease remove overlaps and try again", chrom->name, item->start, item->end, chrom->name, nextItem->start, nextItem->end); item = nextItem; } /* Break up into sections of no more than items-per-slot size. */ struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList; for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem) { /* Find end item of this section, and start item for next section. * Terminate list at end item. */ int sectionSize = 0; int i; endItem = startItem; for (i=0; i<itemsPerSlot; ++i) { if (nextStartItem == NULL) break; endItem = nextStartItem; nextStartItem = nextStartItem->next; ++sectionSize; } endItem->next = NULL; /* Fill in section and add it to section list. */ struct bwgSection *section; lmAllocVar(lm, section); section->chrom = cloneString(chrom->name); section->start = startItem->start; section->end = endItem->end; section->type = bwgTypeBedGraph; section->items.bedGraphList = startItem; section->itemCount = sectionSize; slAddHead(pSectionList, section); } } /* Free up hash, no longer needed. Free's chromList as a side effect since chromList is in * hash's memory. */ hashFree(&chromHash); chromList = NULL; }
void freeFreqHash(struct hash **pFreqHash) /* Free up the hash we created. */ { hashTraverseEls(*pFreqHash, hashElSlPairListFree); hashFree(pFreqHash); }