void makeTableDescriptions(char *database, char *asFile) /* makeTableDescriptions - Add table descriptions to database.. */ { struct sqlConnection *conn = sqlConnect(database); struct lineFile *lf = lineFileOpen(asFile, TRUE); FILE *f = hgCreateTabFile(".", "tableDescriptions"); /* Open a tab file with name corresponding to tableName in tmpDir. */ char *line; /* struct asObject *asList = */ asParseFile(asFile); /* Just to check syntax */ if (sqlTableExists(conn, "chromInfo")) errAbort("%s looks like a genome database, has chromInfo, aborting", database); sqlRemakeTable(conn, "tableDescriptions", "NOSQLINJ CREATE TABLE tableDescriptions (\n" " tableName varchar(255) not null,\n" " autoSqlDef longblob not null,\n" " gbdAnchor varchar(255) not null,\n" " PRIMARY KEY(tableName(32))\n" ")" ); while (lineFileNextReal(lf, &line)) { if (startsWith("table", line)) { struct dyString *as = dyStringNew(0); char *name = trimSpaces(line + 6); /* Skip over table. */ char *escaped = NULL; fprintf(f, "%s\t", name); /* Putting lines into as. */ for (;;) { char *s; dyStringAppend(as, line); dyStringAppendC(as, '\n'); s = skipLeadingSpaces(line); if (s[0] == ')') break; if (!lineFileNext(lf, &line, NULL)) errAbort("Unexpected end of file, missing closing paren in %s", lf->fileName); } escaped = needMem(2*as->stringSize+1); fprintf(f, "%s\t", sqlEscapeTabFileString2(escaped, as->string)); fprintf(f, "\n"); freez(&escaped); dyStringFree(&as); } else errAbort("Expecting table line %d of %s", lf->lineIx, lf->fileName); } hgLoadTabFile(conn, ".", "tableDescriptions", &f); }
struct asObject *getAsObj(int columnCount) /* If the -as=table.as option was given, parse the autoSql file into an asObject. * Otherwise try to deduce autoSql from type; if no type info, just return NULL. */ { struct asObject *asObj = NULL; if (as != NULL) { asObj = asParseFile(as); if (asObj->next != NULL) errAbort("Can only handle .as files containing a single object."); if (typeBedN > 0) // abort if -type=bedN columns are not standard asCompareObjAgainstStandardBed(asObj, typeBedN, TRUE); } else if (bedDetail) asObj = bedDetailAsObj(); else if (typeBedN > 0) { char *asText = bedAsDef(typeBedN, columnCount); asObj = asParseText(asText); freeMem(asText); } return asObj; }
void verticalSplitSqlTable(char *oldTab, char *oldAs, char *splitSpec, char *outDir) /* verticalSplitSqlTable - Split a database table into two new related tables that share a field. */ { struct asObject *as = asParseFile(oldAs); if (as->next != NULL) errAbort("%d records in %s, only 1 allowed\n", slCount(as), oldAs); uglyf("Read %s from %s\n", as->name, oldAs); /* Read fields from splitSpec, and make sure there are no extra. */ struct hash *ra = raReadSingle(splitSpec); char *table1 = mustFindInSplitSpec("table1", ra, splitSpec); char *fields1 = mustFindInSplitSpec("fields1", ra, splitSpec); char *description1 = mustFindInSplitSpec("description1", ra, splitSpec); char *table2 = mustFindInSplitSpec("table2", ra, splitSpec); char *fields2 = mustFindInSplitSpec("fields2", ra, splitSpec); char *description2 = mustFindInSplitSpec("description2", ra, splitSpec); char *sharedKey = mustFindInSplitSpec("sharedKey", ra, splitSpec); if (ra->elCount > 7) errAbort("Extra fields in %s", splitSpec); /* Convert this=that strings to lists of pairs. */ struct slPair *fieldList1 = slPairFromString(fields1); struct slPair *fieldList2 = slPairFromString(fields2); /* Do some more checks */ if (sameString(table1, table2)) errAbort("Error: table1 and table2 are the same (%s) in %s", table1, splitSpec); checkSharedKeyInList(sharedKey, splitSpec, fields1, fieldList1); checkSharedKeyInList(sharedKey, splitSpec, fields2, fieldList2); struct asColumn *keyCol = asColumnFind(as, sharedKey); if (keyCol == NULL) errAbort("The sharedKey '%s' is not in %s", sharedKey, oldAs); /* Make sure that all fields in splitSpec are actually in the oldAs file. */ checkFieldsInAs(fieldList1, splitSpec, as, oldAs); checkFieldsInAs(fieldList2, splitSpec, as, oldAs); /* Make sure that all old table fields are covered */ if (!partialOk) { struct hash *covered = hashNew(0); struct slPair *field; for (field = fieldList1; field != NULL; field = field->next) hashAdd(covered, field->val, NULL); for (field = fieldList2; field != NULL; field = field->next) hashAdd(covered, field->val, NULL); struct asColumn *col; for (col = as->columnList; col != NULL; col = col->next) { if (!hashLookup(covered, col->name)) errAbort("Field %s in %s not output, use -partialOk flag if this is intentional", col->name, oldAs); } } /* Ok, input is checked, start on output.. */ if (lastChar(outDir) == '/') trimLastChar(outDir); makeDirsOnPath(outDir); /* Output .as files. */ outputPartialAs(as, table1, fieldList1, description1, outDir); outputPartialAs(as, table2, fieldList2, description2, outDir); /* Output first split file - a straight up subset of columns. */ char path[PATH_LEN]; safef(path, sizeof(path), "%s/%s.tab", outDir, table1); outputPartialTab(oldTab, as, fieldList1, path); /* Output second split file */ char errPath[PATH_LEN]; safef(path, sizeof(path), "%s/%s.tab", outDir, table2); safef(errPath, sizeof(path), "%s/mergeErrs.txt", outDir); outputUniqueOnSharedKey(oldTab, as, keyCol, fieldList2, path, errPath); }
int main(int argc, char *argv[]) { optionInit(&argc, argv, optionSpecs); if (argc < 2 || argc > 3) usage(); pushCarefulMemHandler(LIMIT_2or6GB); char *db = argv[1]; char *test = NULL; boolean doAllTests = (argc == 2); if (!doAllTests) { if (sameString(argv[2], pgSnpDbToTabOut) || sameString(argv[2], pgSnpKgDbToTabOutShort) || sameString(argv[2], pgSnpKgDbToTabOutLong) || sameString(argv[2], pgSnpKgDbToGpFx) || sameString(argv[2], snpConsDbToTabOutShort) || sameString(argv[2], snpConsDbToTabOutLong) || sameString(argv[2], vcfEx1) || sameString(argv[2], vcfEx2) || sameString(argv[2], bigBedToTabOut) || sameString(argv[2], snpBigWigToTabOut) || sameString(argv[2], vepOut) || sameString(argv[2], gpFx)) test = cloneString(argv[2]); else { warn("Unrecognized test name '%s'\n", argv[2]); usage(); } } if (udcCacheTimeout() < 300) udcSetCacheTimeout(300); udcSetDefaultDir("./udcCache"); struct annoAssembly *assembly = getAnnoAssembly(db); // First test: some rows of a pgSnp table struct streamerInfo pgSnpInfo = { NULL, assembly, db, "pgNA12878", arWords, pgSnpAsObj() }; if (doAllTests || sameString(test, pgSnpDbToTabOut)) dbToTabOut(&pgSnpInfo, "stdout", "chr1", 705881, 752721, FALSE); // Second test: some rows of a pgSnp table integrated with knownGene struct streamerInfo kgInfo = { NULL, assembly, db, "knownGene", arWords, asParseFile("../knownGene.as") }; pgSnpInfo.next = &kgInfo; if (doAllTests || sameString(test, pgSnpKgDbToTabOutShort)) dbToTabOut(&pgSnpInfo, "stdout", "chr1", 705881, 752721, FALSE); // Third test: all rows of a pgSnp table integrated with knownGene if (doAllTests || sameString(test, pgSnpKgDbToTabOutLong)) dbToTabOut(&pgSnpInfo, "stdout", NULL, 0, 0, FALSE); // Fourth test: some rows of snp135 integrated with phyloP scores if (doAllTests || sameString(test, snpConsDbToTabOutShort) || sameString(test, snpConsDbToTabOutLong)) { struct streamerInfo snp135Info = { NULL, assembly, db, "snp135", arWords, asParseFile("../snp132Ext.as") }; struct streamerInfo phyloPInfo = { NULL, assembly, db, "phyloP46wayPlacental", arWig, NULL }; snp135Info.next = &phyloPInfo; if (sameString(test, snpConsDbToTabOutShort)) dbToTabOut(&snp135Info, "stdout", "chr1", 737224, 738475, FALSE); else dbToTabOut(&snp135Info, "stdout", NULL, 0, 0, FALSE); } // Fifth test: VCF with genotypes if (doAllTests || sameString(test, vcfEx1)) { #if (defined USE_TABIX && defined KNETFILE_HOOKS) knetUdcInstall(); #endif//def USE_TABIX && KNETFILE_HOOKS struct streamerInfo vcfEx1 = { NULL, assembly, NULL, "http://genome.ucsc.edu/goldenPath/help/examples/vcfExample.vcf.gz", arWords, vcfAsObj() }; dbToTabOut(&vcfEx1, "stdout", NULL, 0, 0, FALSE); } if (doAllTests || sameString(test, vcfEx2)) { struct streamerInfo vcfEx2 = { NULL, assembly, NULL, "http://genome.ucsc.edu/goldenPath/help/examples/vcfExampleTwo.vcf", arWords, vcfAsObj() }; dbToTabOut(&vcfEx2, "stdout", NULL, 0, 0, FALSE); } if (doAllTests || sameString(test, pgSnpKgDbToGpFx)) { struct streamerInfo pg2SnpInfo = { NULL, assembly, NULL, "input/annoGrator/pgForTestingGpFx.pgSnp.tab", arWords, pgSnpAsObj() }; pg2SnpInfo.next = &kgInfo; dbToTabOut(&pg2SnpInfo, "stdout", NULL, 0, 0, TRUE); /* FIXME // 3base insertion CDS - chr3:124,646,699-124,646,718 dbToTabOut(&pg2SnpInfo, "stdout", "chr3",124646699,124646718, TRUE); */ } if (doAllTests || sameString(test, bigBedToTabOut)) { struct streamerInfo bigBedInfo = { NULL, assembly, NULL, "http://genome.ucsc.edu/goldenPath/help/examples/bigBedExample.bb", arWords, NULL }; dbToTabOut(&bigBedInfo, "stdout", "chr21", 34716800, 34733700, FALSE); } if (doAllTests || sameString(test, snpBigWigToTabOut)) { struct streamerInfo snp135Info = { NULL, assembly, db, "snp135", arWords, asParseFile("../snp132Ext.as") }; struct streamerInfo bigWigInfo = { NULL, assembly, NULL, "http://genome.ucsc.edu/goldenPath/help/examples/bigWigExample.bw", arWig, NULL }; snp135Info.next = &bigWigInfo; dbToTabOut(&snp135Info, "stdout", "chr21", 34716800, 34733700, FALSE); } if (doAllTests || sameString(test, vepOut)) { struct streamerInfo vepSamplePgSnp = { NULL, assembly, NULL, "input/annoGrator/vepSample.pgSnp.tab", arWords, asParseFile("../pgSnp.as") }; struct streamerInfo kgInfo = { NULL, assembly, db, "ensGene", arWords, asParseFile("../genePredExt.as") }; struct streamerInfo snpInfo = { NULL, assembly, db, "snp135", arWords, asParseFile("../snp132Ext.as") }; vepSamplePgSnp.next = &kgInfo; kgInfo.next = &snpInfo; // Instead of dbToTabOut, we need to make a VEP config data structure and // use it to create an annoFormatVep. struct streamerInfo *primaryInfo = &vepSamplePgSnp; struct annoStreamer *primary = NULL; struct annoGrator *gratorList = NULL; sourcesFromInfoList(primaryInfo, TRUE, &primary, &gratorList); struct annoStreamer *gpVarSource = (struct annoStreamer *)gratorList; struct annoStreamer *snpSource = gpVarSource->next; struct annoFormatter *vepOut = annoFormatVepNew("stdout", FALSE, primary, "vepSamplePgSnp", gpVarSource, "UCSC Genes ...", snpSource, "just dbSNP 135"); struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut); annoGratorQuerySetRegion(query, "chr1", 876900, 886920); annoGratorQueryExecute(query); annoGratorQuerySetRegion(query, "chr5", 135530, 145535); annoGratorQueryExecute(query); annoGratorQueryFree(&query); } if (doAllTests || sameString(test, gpFx)) { struct streamerInfo variants = { NULL, assembly, NULL, "input/annoGrator/moreVariants.pgSnp.tab", arWords, asParseFile("../pgSnp.as") }; struct streamerInfo kgInfo = { NULL, assembly, db, "knownGene", arWords, asParseFile("../knownGene.as") }; struct streamerInfo snpInfo = { NULL, assembly, db, "snp137", arWords, asParseFile("../snp132Ext.as") }; struct asObject *dbNsfpSeqChangeAs = bigBedAsFromFileName("/gbdb/hg19/dbNsfp/dbNsfpSeqChange.bb"); struct streamerInfo dbNsfpSeqChange = { NULL, assembly, NULL, "/gbdb/hg19/dbNsfp/dbNsfpSeqChange.bb", arWords, dbNsfpSeqChangeAs }; struct asObject *dbNsfpSiftAs = bigBedAsFromFileName("/gbdb/hg19/dbNsfp/dbNsfpSift.bb"); struct streamerInfo dbNsfpSift = { NULL, assembly, NULL, "/gbdb/hg19/dbNsfp/dbNsfpSift.bb", arWords, dbNsfpSiftAs }; variants.next = &kgInfo; kgInfo.next = &snpInfo; snpInfo.next = &dbNsfpSeqChange; dbNsfpSeqChange.next = &dbNsfpSift; // Instead of dbToTabOut, we need to make a VEP config data structure and // use it to create an annoFormatVep. struct streamerInfo *primaryInfo = &variants; struct annoStreamer *primary = NULL; struct annoGrator *gratorList = NULL; sourcesFromInfoList(primaryInfo, TRUE, &primary, &gratorList); struct annoStreamer *gpVarSource = (struct annoStreamer *)gratorList; struct annoStreamer *snpSource = gpVarSource->next; struct annoStreamer *dbNsfpSource = snpSource->next->next; struct annoFormatter *vepOut = annoFormatVepNew("stdout", FALSE, primary, "some more variants", gpVarSource, "UCSC Genes of course", snpSource, "now snp137."); annoFormatVepAddExtraItem(vepOut, dbNsfpSource, "SIFT", "SIFT score from dbNSFP", ""); struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut); annoGratorQuerySetRegion(query, "chr19", 45405960, 45419476); annoGratorQueryExecute(query); annoGratorQueryFree(&query); } return 0; }