static struct chromInfo *loadChromInfo(struct sqlConnection *conn) { struct chromInfo *ret = NULL; char **row; char query[256]; int chromCount = 0; cInfoHash = newHash(0); if (workChr) sqlSafef(query, ArraySize(query), "SELECT * FROM chromInfo WHERE " "chrom='%s' ORDER BY chrom DESC", workChr); else sqlSafef(query, ArraySize(query), "SELECT * FROM chromInfo ORDER BY chrom DESC"); struct sqlResult *sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct chromInfo *el; struct chromInfo *ci; AllocVar(ci); el = chromInfoLoad(row); ci->chrom = cloneString(el->chrom); ci->size = el->size; slAddHead(&ret, ci); hashAddInt(cInfoHash, el->chrom, el->size); ++chromCount; } sqlFreeResult(&sr); verbose(2,"#\tchrom count: %d\n", chromCount); return (ret); }
struct raToStructReader *raToStructReaderNew(char *name, int fieldCount, char **fields, int requiredFieldCount, char **requiredFields) /* Create a helper object for parsing an ra file into a C structure. This structure will * contain */ { struct raToStructReader *reader; AllocVar(reader); reader->name = cloneString(name); reader->fieldCount = fieldCount; reader->fields = fields; reader->requiredFieldCount = requiredFieldCount; reader->requiredFields = requiredFields; struct hash *fieldIds = reader->fieldIds = hashNew(4); int i; for (i=0; i<fieldCount; ++i) hashAddInt(fieldIds, fields[i], i); if (requiredFieldCount > 0) { AllocArray(reader->requiredFieldIds, requiredFieldCount); for (i=0; i<requiredFieldCount; ++i) { char *required = requiredFields[i]; struct hashEl *hel = hashLookup(fieldIds, required); if (hel == NULL) errAbort("Required field %s is not in field list", required); reader->requiredFieldIds[i] = ptToInt(hel->val); } } AllocArray(reader->fieldsObserved, fieldCount); return reader; }
void gensatFixFull(char *captionFile) /* Fix missing captions. */ { struct lineFile *lf = lineFileOpen(captionFile, TRUE); char *row[2]; struct dyString *sql = dyStringNew(0); struct sqlConnection *conn = sqlConnect(database); struct hash *capHash = newHash(16); while (lineFileRowTab(lf, row)) { int captionId; char *submitId = row[0]; char *caption = row[1]; captionId = hashIntValDefault(capHash, caption, 0); if (captionId == 0) { dyStringClear(sql); dyStringAppend(sql, "insert into caption values(default, \""); dyStringAppend(sql, caption); dyStringAppend(sql, "\")"); sqlUpdate(conn, sql->string); verbose(1, "%s\n", sql->string); captionId = sqlLastAutoId(conn); hashAddInt(capHash, caption, captionId); } dyStringClear(sql); dyStringPrintf(sql, "update imageFile set caption=%d ", captionId); dyStringPrintf(sql, "where submissionSet=%d ", gensatId); dyStringPrintf(sql, "and submitId = \"%s\"", submitId); sqlUpdate(conn, sql->string); verbose(1, "%s\n", sql->string); } dyStringFree(&sql); }
void chainIndex(char *inChain, char *outIndex) /* chainIndex - Create simple two column file index for chain. */ { struct lineFile *lf = lineFileOpen(inChain, TRUE); FILE *f = mustOpen(outIndex, "w"); struct chain *chain, *lastChain = NULL; long pos = 0; struct hash *uniqHash = hashNew(16); while ((chain = chainRead(lf)) != NULL) { if (lastChain == NULL || !sameString(chain->tName, lastChain->tName)) { if (hashLookup(uniqHash, chain->tName)) { errAbort("%s is not sorted, %s repeated with intervening %s", inChain, chain->tName, lastChain->tName); } hashAddInt(uniqHash, chain->tName, pos); fprintf(f, "%lx\t%s\n", pos, chain->tName); } chainFree(&lastChain); lastChain = chain; pos = lineFileTell(lf); } }
static void idAdd(struct hash *hash, void *obj) /* Add to object hash */ { char buf[17]; safef(buf, sizeof(buf), "%p", obj); hashAddInt(hash, buf, hash->elCount+1); }
static struct hash *createIntHash(SEXP v) { struct hash *hash = hashNew(0); SEXP names = getAttrib(v, R_NamesSymbol); for (int i = 0; i < length(v); i++) hashAddInt(hash, (char *)CHAR(STRING_ELT(names, i)), INTEGER(v)[i]); return hash; }
static struct hash *buildSymHash(char **values, boolean isEnum) /* build a hash of values for either enum or set symbolic column */ { struct hash *valHash = hashNew(0); unsigned setVal = 1; /* not used for enum */ int iVal; for (iVal = 0; values[iVal] != NULL; iVal++) { if (isEnum) hashAddInt(valHash, values[iVal], iVal); else { hashAddInt(valHash, values[iVal], setVal); setVal = setVal << 1; } } return valHash; }
struct hash *initCutterCountHash(struct cutter *cutters) /* Return one of those hashes (keyed on enz name) of ints all set to zero. */ { struct hash *countHash = newHash(12); struct cutter *cutter; for (cutter = cutters; cutter != NULL; cutter = cutter->next) hashAddInt(countHash, cutter->name, 0); return countHash; }
struct hash *ccdsStatusValLoad(struct sqlConnection *conn) /* load values from the imported ccdsStatusVals table. Table hashes * status name to uid. Names are loaded both as-is and lower-case */ { struct hash *statusVals = hashNew(0); struct sqlResult *sr = sqlGetResult(conn, "NOSQLINJ SELECT ccds_status_val_uid, ccds_status FROM CcdsStatusVals"); char **row; while ((row = sqlNextRow(sr)) != NULL) { int uid = sqlSigned(row[0]); char *stat = row[1]; hashAddInt(statusVals, stat, uid); tolowers(stat); hashAddInt(statusVals, stat, uid); } sqlFreeResult(&sr); return statusVals; }
struct hash *bbiChromSizesFromFile(char *fileName) /* Read two column file into hash keyed by chrom. */ { struct hash *hash = hashNew(0); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[2]; while (lineFileRow(lf, row)) hashAddInt(hash, row[0], sqlUnsigned(row[1])); return hash; }
static struct hash* bbiChromSizes(struct bbiFile* bbi) /* return the hash of chrom sizes from the bigBed/bigWig */ { struct bbiChromInfo* cList = bbiChromList(bbi); struct bbiChromInfo* c; struct hash* cHash = newHash(10); for (c = cList; c != NULL; c = c->next) hashAddInt(cHash, c->name, (int)c->size); bbiChromInfoFreeList(&cList); return cHash; }
static struct hash *loadSizes(char *szFile) /* load sizes into a hash */ { struct hash *sizes = hashNew(0); struct lineFile *lf = lineFileOpen(szFile, TRUE); char *row[2]; while (lineFileNextRowTab(lf, row, 2)) hashAddInt(sizes, row[0], sqlSigned(row[1])); return sizes; }
struct hash *readCsizeHash(char *filename) /* read in a chrom sizes file */ { struct lineFile *lf = lineFileOpen(filename, TRUE); struct hash *cHash = hashNew(10); char *words[2]; while (lineFileRowTab(lf, words)) hashAddInt(cHash, words[0], sqlSigned(words[1])); lineFileClose(&lf); return cHash; }
static struct hash *loadChromSizes(char *chromSizesFile) /* read the chromosome sizes file. */ { struct lineFile *lf = lineFileOpen(chromSizesFile, TRUE); struct hash *sizes = newHash(12); char *words[2]; while (lineFileRow(lf, words)) hashAddInt(sizes, words[0], sqlSigned(words[1])); lineFileClose(&lf); return sizes; }
struct hash *loadIntHash(char *fileName) /* Load up file with lines of name<space>size into hash. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[2]; struct hash *hash = hashNew(0); while (lineFileRow(lf, row)) hashAddInt(hash, row[0], lineFileNeedNum(lf, row, 1)); return hash; }
struct hash *hashNameIntFile(char *fileName) /* Given a two column file (name, integer value) return a * hash keyed by name with integer values */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[2]; struct hash *hash = hashNew(16); while (lineFileRow(lf, row)) hashAddInt(hash, row[0], lineFileNeedNum(lf, row, 1)); lineFileClose(&lf); return hash; }
void annoFormatTabSetColumnVis(struct annoFormatter *vSelf, char *sourceName, char *colName, boolean enabled) /* Explicitly include or exclude column in output. sourceName must be the same * as the corresponding annoStreamer source's name. */ { struct annoFormatTab *self = (struct annoFormatTab *)vSelf; if (! self->columnVis) self->columnVis = hashNew(0); char fullName[PATH_LEN]; makeFullColumnName(fullName, sizeof(fullName), sourceName, colName); hashAddInt(self->columnVis, fullName, enabled); }
struct hash *getTotTagsHash(char *libsFile) /* Read in the library file and hash up the total tags. */ { struct hash *totTagsHash = newHash(9); struct cgapSageLib *libs = cgapSageLibLoadAllByTab(libsFile); struct cgapSageLib *lib; for (lib = libs; lib != NULL; lib = lib->next) { char buf[16]; safef(buf, sizeof(buf), "%d", lib->libId); hashAddInt(totTagsHash, buf, (int)lib->totalTags); } return totTagsHash; }
void hashIncInt(struct hash *hash, char *name) /* Increment integer value in hash */ { struct hashEl *hel = hashLookup(hash, name); if (hel == NULL) { hashAddInt(hash, name, 1); } else { hel->val = ((char *)hel->val)+1; /* The much simpler ++hel->val works for gnu C, but really adding one to a void pointer * I think is not well defined. */ } }
struct hash *getOrderHash(char *file) /* Parse species order file into a hash. */ { struct lineFile *lf = lineFileOpen(file, TRUE); char *row[1]; int count = 0; struct hash *hash = newHash(0); while (lineFileRow(lf, row)) { hashAddInt(hash, row[0], count); count++; } lineFileClose(&lf); return hash; }
void initKillList() /* Load up a hash of the accessions to avoid. */ { struct lineFile *lf = NULL; char *killFile = optionVal("killList", NULL); char *words[1]; assert(killFile); killHash = newHash(10); lf = lineFileOpen(killFile, TRUE); while(lineFileNextRow(lf, words, ArraySize(words))) { hashAddInt(killHash, words[0], 1); } lineFileClose(&lf); }
void fillInExpHash(char *expFileName, struct hash **expHash, struct slName **expNames, int *expCount) /** Read all of the names from the expFileName and store them in a hash. */ { struct lineFile *lf = lineFileOpen(expFileName, TRUE); char *line = NULL; int lineSize = 0; struct slName *name = NULL; *expCount = 0; *expHash = newHash(5); while(lineFileNextReal(lf, &line)) { hashAddInt(*expHash, line, (*expCount)++); name = newSlName(line); slAddHead(expNames, name); } slReverse(expNames); }
struct hash *qSizeHash(char *chainfile) /* read the chain file and figure out what the chromosome sizes are on the query end */ { struct lineFile *lf = lineFileOpen(chainfile, TRUE); struct chain *ch; struct hash *csizes = hashNew(10); while ((ch = chainRead(lf)) != NULL) { char *chrom = ch->qName; int size = ch->qSize; if (!hashLookup(csizes, chrom)) hashAddInt(csizes, chrom, size); chainFree(&ch); } lineFileClose(&lf); return csizes; }
static void buildChromSizes(char *db) /* build table of chromosome sizes and list of chromosomes */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr; char **row; gChromSizes = hashNew(8); sr = sqlGetResult(conn, NOSQLINJ "SELECT chrom,size FROM chromInfo"); while ((row = sqlNextRow(sr)) != NULL) { unsigned sz = gbParseUnsigned(NULL, row[1]); hashAddInt(gChromSizes, row[0], sz); slSafeAddHead(&gChroms, newSlName(row[0])); } sqlFreeResult(&sr); hFreeConn(&conn); }
static char *findUniqueName(struct hash *dupeHash, char *root) /* If root name is already in hash, return root_1, root_2 * or something like that. */ { struct hashEl *hel; if ((hel = hashLookup(dupeHash, root)) == NULL) { hashAddInt(dupeHash, root, 1); return root; } else { static char buf[256]; int val = ptToInt(hel->val) + 1; hel->val = intToPt(val); safef(buf, sizeof(buf), "%s_%d", root, val); return buf; } }
void consolidateTheCounts(char *inputFile, char *outputFile) /* Read the cat'ed file in, and either make a new hash item for each enzyme */ /* encountered on each line, or add to an existing one. Then output the hash. */ { struct lineFile *lf = lineFileOpen(inputFile, TRUE); struct hash *countHash = newHash(12); char *words[2]; while (lineFileRow(lf, words)) { char *name = words[0]; int count = lineFileNeedFullNum(lf, words, 1); struct hashEl *el = hashLookup(countHash, name); if (!el) hashAddInt(countHash, name, count); else el->val = intToPt(ptToInt(el->val) + count); } writeHashToFile(countHash, outputFile); freeHash(&countHash); }
struct hash *makeExpsTable(char *database, char *expTable, char *expFile, int *expCount) /* Open experiment file and use it to create experiment table. Use optional fields if present, otherwise defaults. Return a hash of expId's, keyed by name */ { struct lineFile *lf = lineFileOpen(expFile, TRUE); FILE *f = hgCreateTabFile(tabDir, expTable); int expId = 0; char *words[6]; int wordCt; struct hash *expHash = newHash(0); while ((wordCt = lineFileChopNext(lf, words, ArraySize(words)))) { char *name = words[0]; hashAddInt(expHash, name, expId); fprintf(f, "%d\t%s\t", expId++, name); fprintf(f, "%s\t", wordCt > 1 ? words[1] : name); fprintf(f, "%s\t", wordCt > 2 ? words[2] : expUrl); fprintf(f, "%s\t", wordCt > 3 ? words[3] : expRef); fprintf(f, "%s\t", wordCt > 4 ? words[4] : expCredit); fprintf(f, "0\n"); /* extras */ } if (expId <= 0) errAbort("No experiments in %s", lf->fileName); verbose(2, "%d experiments\n", expId); if (doLoad) { struct sqlConnection *conn = sqlConnect(database); expRecordCreateTable(conn, expTable); hgLoadTabFile(conn, tabDir, expTable, &f); sqlDisconnect(&conn); } lineFileClose(&lf); if (expCount) *expCount = expId; return expHash; }
void mafSplitPos(char *database, char *size, char *outFile) /* Pick best positions for split close to size. * Use middle of a gap as preferred site. * If not gaps are in range, use recent repeats (0% diverged) */ { int splitSize = 0; int chromSize = 0; struct hash *chromHash; struct hashCookie hc; struct hashEl *hel; struct sqlConnection *conn = sqlConnect(database); FILE *f; db = database; verbose(1, "Finding split positions for %s at ~%s Mbp intervals\n", database, size); splitSize = sqlSigned(size) * 1000000; if (chrom == NULL) { chromHash = hChromSizeHash(database); } else { chromHash = hashNew(6); hashAddInt(chromHash, chrom, hChromSize(database, chrom)); } conn = sqlConnect(database); f = mustOpen(outFile, "w"); hc = hashFirst(chromHash); while ((hel = hashNext(&hc)) != NULL) { chrom = hel->name; chromSize = ptToInt(hel->val); chromSplits(chrom, chromSize, splitSize, conn, f); } sqlDisconnect(&conn); carefulClose(&f); }
struct hash* searchForKeywords(struct sqlConnection* conn, char *articleTable, char *keywords) /* return hash with the articleIds that contain a given keyword in the abstract/title/authors */ { if (isEmpty(keywords)) return NULL; char query[12000]; sqlSafef(query, sizeof(query), "SELECT articleId FROM %s WHERE " "MATCH (citation, title, authors, abstract) AGAINST ('%s' IN BOOLEAN MODE)", articleTable, keywords); //printf("query %s", query); struct slName *artIds = sqlQuickList(conn, query); if (artIds==NULL || slCount(artIds)==0) return NULL; // convert list to hash struct hash *hashA = hashNew(0); struct slName *el; for (el = artIds; el != NULL; el = el->next) hashAddInt(hashA, el->name, 1); freeMem(keywords); slFreeList(artIds); return hashA; }
static struct hash *loadAllGaps(struct sqlConnection *conn, char *db) /* working on all chroms, fetch all per-chrom gap counts at once * returns hash by chrom name to gap counts for that chrom */ { struct chromInfo *cInfo; struct sqlResult *sr; char **row; struct hash *ret; int totalGapSize = 0; int gapCount = 0; ret = newHash(0); /* If not split, read in whole gulp, create per-chrom hash of sizes */ if (hTableExists(db, "gap")) { char *prevChrom = NULL; int totalGapsThisChrom = 0; sr = sqlGetResult(conn, NOSQLINJ "select chrom,chromStart,chromEnd from gap order by chrom"); while ((row = sqlNextRow(sr)) != NULL) { int gapSize = sqlUnsigned(row[2]) - sqlUnsigned(row[1]); ++gapCount; if (prevChrom && sameWord(prevChrom,row[0])) { totalGapsThisChrom += gapSize; totalGapSize += gapSize; } else { if (prevChrom) { hashAddInt(ret, prevChrom, totalGapsThisChrom); freeMem(prevChrom); prevChrom = cloneString(row[0]); totalGapsThisChrom = gapSize; totalGapSize += gapSize; } else { prevChrom = cloneString(row[0]); totalGapsThisChrom = gapSize; totalGapSize += gapSize; } } } /* and the last one */ if (prevChrom && (totalGapsThisChrom > 0)) { hashAddInt(ret, prevChrom, totalGapsThisChrom); freeMem(prevChrom); } sqlFreeResult(&sr); } else { /* for each chrom name, fetch the gap count */ for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next) { int rowOffset; int totalGapsThisChrom = 0; sr = hChromQuery(conn, "gap", cInfo->chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { int gapSize; struct agpGap gap; ++gapCount; agpGapStaticLoad(row+rowOffset, &gap); gapSize = gap.chromEnd - gap.chromStart; totalGapsThisChrom += gapSize; totalGapSize += gapSize; } sqlFreeResult(&sr); hashAddInt(ret, cInfo->chrom, totalGapsThisChrom); } } verbose(2,"#\tloaded %d gaps covering %d bases\n", gapCount, totalGapSize); return ret; }