int main(int argc, char *argv[]) /* read ContigLocFilter, writing to individual chrom tables */ { struct hashCookie cookie; struct hashEl *hel; char *chromName; if (argc != 3) usage(); snpDb = argv[1]; contigGroup = argv[2]; hSetDb(snpDb); /* check for needed tables */ if(!hTableExistsDb(snpDb, "ContigLocFilter")) errAbort("no ContigLocFilter table in %s\n", snpDb); if(!hTableExistsDb(snpDb, "ContigInfo")) errAbort("no ContigInfo table in %s\n", snpDb); chromHash = loadChroms(contigGroup); if (chromHash == NULL) { verbose(1, "couldn't get chrom info\n"); return 1; } writeSplitTables(); verbose(1, "closing files...\n"); cookie = hashFirst(chromHash); while (hel = hashNext(&cookie)) fclose(hel->val); verbose(1, "creating tables...\n"); cookie = hashFirst(chromHash); while ((chromName = hashNextName(&cookie)) != NULL) createTable(chromName); verbose(1, "loading database...\n"); cookie = hashFirst(chromHash); while ((chromName = hashNextName(&cookie)) != NULL) { verbose(1, "chrom = %s\n", chromName); loadDatabase(chromName); } return 0; }
char *identifierWhereClause(char *idField, struct hash *idHash) /* If the number of pasted IDs is reasonably low, return a where-clause component for the IDs. */ { if (idHash == NULL || idField == NULL) return NULL; int numIds = hashNumEntries(idHash); int maxIdsInWhere = cartUsualInt(cart, "hgt_maxIdsInWhere", DEFAULT_MAX_IDS_IN_WHERE); if (numIds > 0 && numIds <= maxIdsInWhere) { struct dyString *dy = dyStringNew(16 * numIds); dyStringPrintf(dy, "%s in (", idField); struct hashCookie hc = hashFirst(idHash); boolean first = TRUE; char *id; while ((id = hashNextName(&hc)) != NULL) { if (first) first = FALSE; else dyStringAppend(dy, ", "); dyStringPrintf(dy, "'%s'", id); } dyStringAppend(dy, ")"); return dyStringCannibalize(&dy); } return NULL; }
void doLog() { FILE *logFileHandle = mustOpen("snpGetSeqDup.log", "w"); struct hashCookie cookie = hashFirst(uniqHash); char *rsId = NULL; int count = 0; struct hashEl *hel = NULL; char *fileName = NULL; struct dyString *dy = newDyString(1024); while ((rsId = hashNextName(&cookie)) != NULL) { count = 0; for (hel = hashLookup(snpHash, rsId); hel != NULL; hel = hashLookupNext(hel)) count++; if (count == 1) continue; for (hel = hashLookup(snpHash, rsId); hel != NULL; hel = hashLookupNext(hel)) { fileName = (char *)hel->val; dyStringAppend(dy, fileName); dyStringAppend(dy, " "); } fprintf(logFileHandle, "%s\t%s\n", rsId, dy->string); dyStringClear(dy); } carefulClose(&logFileHandle); }
void writeResults() /* loop through nameHash */ /* print all coords from coordHash to outputFileHandle */ /* also print count per SNP to logFileHandle */ { struct hashCookie cookie; struct hashEl *hel= NULL; struct coords *cel = NULL; int count = 0; char *name; verbose(1, "writing results...\n"); cookie = hashFirst(nameHash); while ((name = hashNextName(&cookie)) != NULL) { count = 0; for (hel = hashLookup(coordHash, name); hel != NULL; hel= hashLookupNext(hel)) { cel = (struct coords *)hel->val; fprintf(outputFileHandle, "%s\t%d\t%d\t%s\tMultipleAlignments\n", cel->chrom, cel->start, cel->end, name); count++; } fprintf(logFileHandle, "%s\t%d\n", name, count); } }
int main(int argc, char *argv[]) { char *snpDb = NULL; char *snpTableName = NULL; struct hashCookie cookie; char *chromName = NULL; if (argc != 3) usage(); snpDb = argv[1]; hSetDb(snpDb); snpTableName = argv[2]; /* check that tables exist */ if (!hTableExists(snpTableName)) errAbort("no %s table in %s\n", snpTableName, snpDb); if (!hTableExists("chromInfo")) errAbort("no chromInfo table in %s\n", snpDb); loadChroms(); getSnps(snpTableName); verbose(1, "creating tables...\n"); cookie = hashFirst(chromHash); while ((chromName = hashNextName(&cookie)) != NULL) createTable(chromName); verbose(1, "loading database...\n"); cookie = hashFirst(chromHash); while ((chromName = hashNextName(&cookie)) != NULL) { verbose(1, "chrom = %s\n", chromName); loadDatabase(chromName); } return 0; }
void checkIds(char *inputFileName, char *outputFileName) /* report if duplicate ID found */ /* put all ids in idHash */ { struct chain *chainEl; struct lineFile *lf = lineFileOpen(inputFileName, TRUE); FILE *outputFileHandle = NULL; char idString[64]; char *idString2 = NULL; struct hashEl *hel = NULL; struct hashEl *hel2 = NULL; int chainCount = 0; int dupCount = 0; struct hashCookie cookie; idHash = newHash(0); duplicateHash = newHash(0); while ((chainEl = chainRead(lf)) != NULL) { chainCount++; safef(idString, sizeof(idString), "%d", chainEl->id); hel = hashLookup(idHash, idString); if (hel == NULL) hashAdd(idHash, cloneString(idString), NULL); else { hel2 = hashLookup(duplicateHash, idString); if (hel2 == NULL) hashAdd(duplicateHash, cloneString(idString), NULL); } } verbose(1, "chain count = %d\n", chainCount); // freeHash(&idHash); /* print contents of duplicateHash */ outputFileHandle = mustOpen(outputFileName, "w"); cookie = hashFirst(duplicateHash); while ((idString2 = hashNextName(&cookie)) != NULL) { dupCount++; fprintf(outputFileHandle, "%s\n", idString2); } verbose(1, "count of duplicate IDs = %d\n", dupCount); carefulClose(&outputFileHandle); // freeHash(&duplicateHash); }
void processSnps(char *oldTableName, char *newTableName) { static struct hash *contigLocHash = NULL; static struct hash *mapInfoHash = NULL; static struct hash *oldNameHash = NULL; static struct hash *newNameHash = NULL; struct hashCookie cookie; struct hashEl *helNameNew = NULL; struct hashEl *contigLocElement = NULL; struct hashEl *mapInfoElement = NULL; char *name; FILE *outputFileHandle = mustOpen("snpMissing.out", "w"); int count = 0; verbose(1, "creating hashes...\n"); contigLocHash = getUniqueStringHash("snp_id", "ContigLoc"); mapInfoHash = getUniqueStringHash("snp_id", "MapInfo"); oldNameHash = getUniqueStringHash("name", oldTableName); newNameHash = getUniqueStringHash("name", newTableName); verbose(1, "writing results...\n"); cookie = hashFirst(oldNameHash); while ((name = hashNextName(&cookie)) != NULL) { count++; helNameNew = hashLookup(newNameHash, name); if (helNameNew == NULL) { fprintf(outputFileHandle, "rs%s in %s but not in %s\n", name, oldTableName, newTableName); contigLocElement = hashLookup(contigLocHash, name); if (contigLocElement != NULL) fprintf(outputFileHandle, "found in ContigLoc\n"); mapInfoElement = hashLookup(mapInfoHash, name); if (mapInfoElement != NULL) fprintf(outputFileHandle, "found in MapInfo\n"); } if (count == 100000) break; } carefulClose(&outputFileHandle); }
struct locus *readSs(char *pbFile, char *strandFile) /* determine which allele matches assembly and store in details file */ { struct hash *strandHash = readStrand(strandFile); struct strand *strand = NULL; struct hash *missingHugoIdHash = newHash(16); struct hashCookie hashPtr; char *missingName; struct locus *l = NULL, *lPtr = NULL; struct alleleInfo *aPtr = NULL; struct lineFile *lf = lineFileOpen(pbFile, TRUE); /* input file */ char *row[4], *row2[3]; /* number of fields in input file */ char *pbName; char chrom[32]; int chromStart; int chromEnd; char name[32]; char *allele; while (lineFileRow(lf, row)) /* process one snp at a time */ { struct alleleInfo *ai1 = NULL, *ai2 = NULL, *aiPtr; struct locus *m = NULL; chopString(row[0], "-", row2, 3); chromStart = sqlUnsigned(row2[0]); chromEnd = chromStart+1; safef(chrom, sizeof(chrom), "chr%s", row2[2]); if(l==NULL||l->chrom==NULL||l->chromStart!=chromStart||!(sameString(l->chrom,chrom))) { AllocVar(m); safef(name, sizeof(name), "%s_%d", row2[1], ++ssnpId); m->chrom = cloneString(chrom); m->chromStart = chromStart; m->chromEnd = chromEnd; m->name = cloneString(name); m->hugoId = cloneString(row2[1]); m->strictSnp = TRUE; slAddHead(&l, m); } allele=cloneString(row[2]); convertToUppercase(allele); if ( sameString(allele,"A") || sameString(allele,"C") || sameString(allele,"G") || sameString(allele,"T") ) { for (aiPtr=l->alleles; aiPtr!=NULL; aiPtr=aiPtr->next) if (sameString(aiPtr->allele, allele)) break; if (aiPtr==NULL) { AllocVar(ai1); ai1->allele=cloneString(allele); slAddHead(&(l->alleles), ai1); l->alleleCount++; aiPtr=l->alleles; } aiPtr->count++; l->sampleSize++; } allele=cloneString(row[3]); convertToUppercase(allele); if ( sameString(allele,"A") || sameString(allele,"C") || sameString(allele,"G") || sameString(allele,"T") ) { for (aiPtr=l->alleles; aiPtr!=NULL; aiPtr=aiPtr->next) if (sameString(aiPtr->allele, allele)) break; if (aiPtr==NULL) { AllocVar(ai2); ai2->allele=cloneString(allele); slAddHead(&(l->alleles), ai2); l->alleleCount++; aiPtr=l->alleles; } aiPtr->count++; l->sampleSize++; } } slReverse(&l); for(lPtr=l; lPtr!=NULL; lPtr=lPtr->next) { strand = hashFindVal(strandHash, lPtr->hugoId); if (strand == NULL) { hashStore(missingHugoIdHash, lPtr->hugoId); slRemoveEl(l, lPtr); continue; } lPtr->strand = cloneString(strand->strand); } freeHash(&strandHash); hashPtr = hashFirst(missingHugoIdHash); while ( (missingName=hashNextName(&hashPtr)) != NULL ) printf("HUGO ID was not found in strand.txt (usually from proteome.hgncXref): %s\n", missingName); freeHash(&missingHugoIdHash); return l; }
static void mafOrAxtClick2(struct sqlConnection *conn, struct sqlConnection *conn2, struct trackDb *tdb, char *axtOtherDb, char *fileName) /* Display details for MAF or AXT tracks. */ { hgBotDelay(); if (winEnd - winStart > 30000) { printf("Zoom so that window is 30,000 bases or less to see alignments and conservation statistics\n"); } else { struct mafAli *mafList = NULL, *maf, *subList = NULL; int aliIx = 0, realCount = 0; char dbChrom[64]; char option[128]; char *capTrack; struct consWiggle *consWig, *consWiggles; struct hash *speciesOffHash = NULL; char *speciesOrder = NULL; char *speciesTarget = trackDbSetting(tdb, SPECIES_TARGET_VAR); char buffer[1024]; int useTarg = FALSE; int useIrowChains = FALSE; safef(option, sizeof(option), "%s.%s", tdb->track, MAF_CHAIN_VAR); if (cartCgiUsualBoolean(cart, option, FALSE) && trackDbSetting(tdb, "irows") != NULL) useIrowChains = TRUE; safef(buffer, sizeof(buffer), "%s.vis",tdb->track); if (useIrowChains) { if (!cartVarExists(cart, buffer) && (speciesTarget != NULL)) useTarg = TRUE; else { char *val; val = cartUsualString(cart, buffer, "useCheck"); useTarg = sameString("useTarg",val); } } if (sameString(tdb->type, "bigMaf")) { char *fileName = trackDbSetting(tdb, "bigDataUrl"); struct bbiFile *bbi = bigBedFileOpen(fileName); mafList = bigMafLoadInRegion(bbi, seqName, winStart, winEnd); } else mafList = mafOrAxtLoadInRegion2(conn,conn2, tdb, seqName, winStart, winEnd, axtOtherDb, fileName); safef(dbChrom, sizeof(dbChrom), "%s.%s", hubConnectSkipHubPrefix(database), seqName); safef(option, sizeof(option), "%s.speciesOrder", tdb->track); speciesOrder = cartUsualString(cart, option, NULL); if (speciesOrder == NULL) speciesOrder = trackDbSetting(tdb, "speciesOrder"); for (maf = mafList; maf != NULL; maf = maf->next) { int mcCount = 0; struct mafComp *mc; struct mafAli *subset; struct mafComp *nextMc; /* remove empty components and configured off components * from MAF, and ignore * the entire MAF if all components are empty * (solely for gap annotation) */ if (!useTarg) { for (mc = maf->components->next; mc != NULL; mc = nextMc) { char buf[64]; char *organism; mafSrcDb(mc->src, buf, sizeof buf); organism = hOrganism(buf); if (!organism) organism = buf; nextMc = mc->next; safef(option, sizeof(option), "%s.%s", tdb->track, buf); if (!cartUsualBoolean(cart, option, TRUE)) { if (speciesOffHash == NULL) speciesOffHash = newHash(4); hashStoreName(speciesOffHash, organism); } if (!cartUsualBoolean(cart, option, TRUE)) slRemoveEl(&maf->components, mc); else mcCount++; } } if (mcCount == 0) continue; if (speciesOrder) { int speciesCt; char *species[2048]; struct mafComp **newOrder, *mcThis; int i; mcCount = 0; speciesCt = chopLine(cloneString(speciesOrder), species); newOrder = needMem((speciesCt + 1) * sizeof (struct mafComp *)); newOrder[mcCount++] = maf->components; for (i = 0; i < speciesCt; i++) { if ((mcThis = mafMayFindCompSpecies(maf, species[i], '.')) == NULL) continue; newOrder[mcCount++] = mcThis; } maf->components = NULL; for (i = 0; i < mcCount; i++) { newOrder[i]->next = 0; slAddHead(&maf->components, newOrder[i]); } slReverse(&maf->components); } subset = mafSubsetE(maf, dbChrom, winStart, winEnd, TRUE); if (subset != NULL) { /* Reformat MAF if needed so that sequence from current * database is the first component and on the * plus strand. */ mafMoveComponentToTop(subset, dbChrom); if (subset->components->strand == '-') mafFlipStrand(subset); subset->score = mafScoreMultiz(subset); slAddHead(&subList, subset); ++realCount; } } slReverse(&subList); mafAliFreeList(&mafList); if (subList != NULL) { char *showVarName = "hgc.showMultiBase"; char *showVarVal = cartUsualString(cart, showVarName, "all"); boolean onlyDiff = sameWord(showVarVal, "diff"); #ifdef ADDEXONCAPITAL char *codeVarName = "hgc.multiCapCoding"; char *codeVarVal = cartUsualString(cart, codeVarName, "coding"); boolean onlyCds = sameWord(codeVarVal, "coding"); #endif /* add links for conservation score statistics */ consWiggles = wigMafWiggles(database, tdb); int wigCount = slCount(consWiggles); if (wigCount == 1) { conservationStatsLink(tdb, "Conservation score statistics", consWiggles->table); } else if (wigCount > 1) { /* multiple wiggles. List all that have been turned on with * checkboxes */ /* Scan for cart variables -- do any exist, are any turned on ? */ boolean wigSet = FALSE; boolean wigOn = FALSE; for (consWig = consWiggles; consWig != NULL; consWig = consWig->next) { char *wigVarSuffix = NULL; (void)wigMafWiggleVar(tdb->track, consWig, &wigVarSuffix); if (cartVarExistsAnyLevel(cart, tdb, FALSE, wigVarSuffix)) { wigSet = TRUE; if (cartBooleanClosestToHome(cart, tdb, FALSE, wigVarSuffix)) wigOn = TRUE; } } /* If there are no cart vars, turn on the first (default) wig */ if (!wigSet) { char *prefix = tdb->track; // use when setting things to the cart if (tdbIsContainerChild(tdb)) prefix = tdbGetContainer(tdb)->track; cartSetBoolean(cart, wigMafWiggleVar(prefix, consWiggles, NULL), TRUE); wigOn = TRUE; } if (wigOn) { boolean first = TRUE; for (consWig = consWiggles; consWig != NULL; consWig = consWig->next) { if (first) { printf("Conservation score statistics:"); first = FALSE; } char *wigVarSuffix = NULL; (void)wigMafWiggleVar(tdb->track, consWig, &wigVarSuffix); if (cartUsualBooleanClosestToHome(cart, tdb, FALSE, wigVarSuffix,FALSE)) { printf(" "); subChar(consWig->uiLabel, '_', ' '); conservationStatsLink(tdb, consWig->uiLabel, consWig->table); } } } } puts("</P>\n"); /* no alignment to display when in visibilities where only wiggle is shown */ char *vis = cartOptionalString(cart, tdb->track); if (vis) { enum trackVisibility tv = hTvFromStringNoAbort(vis); if (tv == tvSquish || tv == tvDense) return; } #ifdef ADDEXONCAPITAL puts("<FORM ACTION=\"../cgi-bin/hgc\" NAME=\"gpForm\" METHOD=\"GET\">"); cartSaveSession(cart); cgiContinueHiddenVar("g"); cgiContinueHiddenVar("c"); cgiContinueHiddenVar("i"); printf("Capitalize "); cgiMakeDropListFull(codeVarName, codeAll, codeAll, ArraySize(codeAll), codeVarVal, autoSubmit); printf("exons based on "); capTrack = genePredDropDown(cart, trackHash, "gpForm", "hgc.multiCapTrack"); #endif printf("show "); cgiMakeDropListFull(showVarName, showAll, showAll, ArraySize(showAll), showVarVal, autoSubmit); printf("bases"); printf("<BR>\n"); printf("</FORM>\n"); #ifdef REVERSESTRAND /* notify if bases are complemented (hgTracks is on reverse strand) */ if (cartCgiUsualBoolean(cart, COMPLEMENT_BASES_VAR, FALSE)) puts("<EM>Alignment displayed on reverse strand</EM><BR>"); #endif puts("Place cursor over species for alignment detail. Click on 'B' to link to browser "); puts("for aligned species, click on 'D' to get DNA for aligned species.<BR>"); printf("<TT><PRE>"); /* notify if species removed from alignment */ if (speciesOffHash) { char *species; struct hashCookie hc = hashFirst(speciesOffHash); puts("<B>Components not displayed:</B> "); while ((species = hashNextName(&hc)) != NULL) printf("%s ", species); puts("<BR>"); } for (maf = subList; maf != NULL; maf = maf->next) { mafLowerCase(maf); #ifdef ADDEXONCAPITAL if (capTrack != NULL) capMafOnTrack(maf, capTrack, onlyCds); #endif printf("<B>Alignment block %d of %d in window, %d - %d, %d bps </B>\n", ++aliIx,realCount,maf->components->start + 1, maf->components->start + maf->components->size, maf->components->size); mafPrettyOut(stdout, maf, 70,onlyDiff, aliIx); } mafAliFreeList(&subList); } else { printf("No multiple alignment in browser window"); } printf("</PRE></TT>"); } }