void makeTableDescriptions(char *database, char *asFile)
/* makeTableDescriptions - Add table descriptions to database.. */
{
struct sqlConnection *conn = sqlConnect(database);
struct lineFile *lf = lineFileOpen(asFile, TRUE);
FILE *f = hgCreateTabFile(".", "tableDescriptions");
/* Open a tab file with name corresponding to tableName in tmpDir. */
char *line;

/* struct asObject *asList = */ asParseFile(asFile);	/* Just to check syntax */

if (sqlTableExists(conn, "chromInfo"))
    errAbort("%s looks like a genome database, has chromInfo, aborting", 
    	database);

sqlRemakeTable(conn, "tableDescriptions",
   "NOSQLINJ CREATE TABLE tableDescriptions (\n"
   "  tableName varchar(255) not null,\n"
   "  autoSqlDef longblob not null,\n"
   "  gbdAnchor varchar(255) not null,\n"
   "  PRIMARY KEY(tableName(32))\n"
   ")" );

while (lineFileNextReal(lf, &line))
    {
    if (startsWith("table", line))
        {
	struct dyString *as = dyStringNew(0);
	char *name = trimSpaces(line + 6);	/* Skip over table. */
	char *escaped = NULL;

	fprintf(f, "%s\t", name);

	/* Putting lines into as. */
	for (;;)
	    {
	    char *s;
	    dyStringAppend(as, line);
	    dyStringAppendC(as, '\n');
	    s = skipLeadingSpaces(line);
	    if (s[0] == ')')
	        break;
	    if (!lineFileNext(lf, &line, NULL))
	        errAbort("Unexpected end of file, missing closing paren in %s",
			lf->fileName);
	    }
	escaped = needMem(2*as->stringSize+1);
	fprintf(f, "%s\t", sqlEscapeTabFileString2(escaped, as->string));
	fprintf(f, "\n");

	freez(&escaped);
	dyStringFree(&as);
	}
    else
        errAbort("Expecting table line %d of %s", lf->lineIx, lf->fileName);
    }
hgLoadTabFile(conn, ".", "tableDescriptions", &f);
}
Exemple #2
0
struct asObject *getAsObj(int columnCount)
/* If the -as=table.as option was given, parse the autoSql file into an asObject.
 * Otherwise try to deduce autoSql from type; if no type info, just return NULL. */
{
struct asObject *asObj = NULL;
if (as != NULL)
    {
    asObj = asParseFile(as);
    if (asObj->next != NULL)
	errAbort("Can only handle .as files containing a single object.");
    if (typeBedN > 0)
	// abort if -type=bedN columns are not standard
	asCompareObjAgainstStandardBed(asObj, typeBedN, TRUE);
    }
else if (bedDetail)
    asObj = bedDetailAsObj();
else if (typeBedN > 0)
    {
    char *asText = bedAsDef(typeBedN, columnCount);
    asObj = asParseText(asText);
    freeMem(asText);
    }
return asObj;
}
void verticalSplitSqlTable(char *oldTab, char *oldAs, char *splitSpec, char *outDir)
/* verticalSplitSqlTable - Split a database table into two new related tables that share a field. */
{
struct asObject *as = asParseFile(oldAs);
if (as->next != NULL)
    errAbort("%d records in %s, only 1 allowed\n", slCount(as), oldAs);
uglyf("Read %s from %s\n", as->name, oldAs);

/* Read fields from splitSpec, and make sure there are no extra. */
struct hash *ra = raReadSingle(splitSpec);
char *table1 = mustFindInSplitSpec("table1", ra, splitSpec);
char *fields1 = mustFindInSplitSpec("fields1", ra, splitSpec);
char *description1 = mustFindInSplitSpec("description1", ra, splitSpec);
char *table2 = mustFindInSplitSpec("table2", ra, splitSpec);
char *fields2 = mustFindInSplitSpec("fields2", ra, splitSpec);
char *description2 = mustFindInSplitSpec("description2", ra, splitSpec);
char *sharedKey = mustFindInSplitSpec("sharedKey", ra, splitSpec);
if (ra->elCount > 7)
    errAbort("Extra fields in %s", splitSpec);

/* Convert this=that strings to lists of pairs. */
struct slPair *fieldList1 = slPairFromString(fields1);
struct slPair *fieldList2 = slPairFromString(fields2);

/* Do some more checks */
if (sameString(table1, table2))
    errAbort("Error: table1 and table2 are the same (%s) in %s", table1, splitSpec);
checkSharedKeyInList(sharedKey, splitSpec, fields1, fieldList1);
checkSharedKeyInList(sharedKey, splitSpec, fields2, fieldList2);
struct asColumn *keyCol = asColumnFind(as, sharedKey);
if (keyCol == NULL)
    errAbort("The sharedKey '%s' is not in %s", sharedKey, oldAs);

/* Make sure that all fields in splitSpec are actually in the oldAs file. */
checkFieldsInAs(fieldList1, splitSpec, as, oldAs);
checkFieldsInAs(fieldList2, splitSpec, as, oldAs);

/* Make sure that all old table fields are covered */
if (!partialOk)
    {
    struct hash *covered = hashNew(0);
    struct slPair *field;
    for (field = fieldList1; field != NULL; field = field->next)
        hashAdd(covered, field->val, NULL);
    for (field = fieldList2; field != NULL; field = field->next)
        hashAdd(covered, field->val, NULL);
    struct asColumn *col;
    for (col = as->columnList; col != NULL; col = col->next)
        {
	if (!hashLookup(covered, col->name))
	    errAbort("Field %s in %s not output, use -partialOk flag if this is intentional",
		col->name, oldAs);
	}
    }

/* Ok, input is checked, start on output.. */
if (lastChar(outDir) == '/')
    trimLastChar(outDir);
makeDirsOnPath(outDir);

/* Output .as files. */
outputPartialAs(as, table1, fieldList1, description1, outDir);
outputPartialAs(as, table2, fieldList2, description2, outDir);

/* Output first split file - a straight up subset of columns. */
char path[PATH_LEN];
safef(path, sizeof(path), "%s/%s.tab", outDir, table1);
outputPartialTab(oldTab, as, fieldList1, path);


/* Output second split file */
char errPath[PATH_LEN];
safef(path, sizeof(path), "%s/%s.tab", outDir, table2);
safef(errPath, sizeof(path), "%s/mergeErrs.txt", outDir);
outputUniqueOnSharedKey(oldTab, as, keyCol, fieldList2, path, errPath);
}
Exemple #4
0
int main(int argc, char *argv[])
{
optionInit(&argc, argv, optionSpecs);
if (argc < 2 || argc > 3)
    usage();
pushCarefulMemHandler(LIMIT_2or6GB);
char *db = argv[1];
char *test = NULL;
boolean doAllTests = (argc == 2);
if (!doAllTests)
    {
    if (sameString(argv[2], pgSnpDbToTabOut) ||
	sameString(argv[2], pgSnpKgDbToTabOutShort) ||
	sameString(argv[2], pgSnpKgDbToTabOutLong) ||
	sameString(argv[2], pgSnpKgDbToGpFx) ||
	sameString(argv[2], snpConsDbToTabOutShort) ||
	sameString(argv[2], snpConsDbToTabOutLong) ||
	sameString(argv[2], vcfEx1) ||
	sameString(argv[2], vcfEx2) ||
	sameString(argv[2], bigBedToTabOut) ||
	sameString(argv[2], snpBigWigToTabOut) ||
	sameString(argv[2], vepOut) ||
	sameString(argv[2], gpFx))
	test = cloneString(argv[2]);
    else
	{
	warn("Unrecognized test name '%s'\n", argv[2]);
	usage();
	}
    }

if (udcCacheTimeout() < 300)
    udcSetCacheTimeout(300);
udcSetDefaultDir("./udcCache");

struct annoAssembly *assembly = getAnnoAssembly(db);

// First test: some rows of a pgSnp table
struct streamerInfo pgSnpInfo = { NULL, assembly, db, "pgNA12878", arWords, pgSnpAsObj() };
if (doAllTests || sameString(test, pgSnpDbToTabOut))
    dbToTabOut(&pgSnpInfo, "stdout", "chr1", 705881, 752721, FALSE);

// Second test: some rows of a pgSnp table integrated with knownGene
struct streamerInfo kgInfo = { NULL, assembly, db, "knownGene", arWords,
			       asParseFile("../knownGene.as") };
pgSnpInfo.next = &kgInfo;
if (doAllTests || sameString(test, pgSnpKgDbToTabOutShort))
    dbToTabOut(&pgSnpInfo, "stdout", "chr1", 705881, 752721, FALSE);

// Third test: all rows of a pgSnp table integrated with knownGene
if (doAllTests || sameString(test, pgSnpKgDbToTabOutLong))
    dbToTabOut(&pgSnpInfo, "stdout", NULL, 0, 0, FALSE);

// Fourth test: some rows of snp135 integrated with phyloP scores
if (doAllTests || sameString(test, snpConsDbToTabOutShort) ||
    sameString(test, snpConsDbToTabOutLong))
    {
    struct streamerInfo snp135Info = { NULL, assembly, db, "snp135", arWords,
				       asParseFile("../snp132Ext.as") };
    struct streamerInfo phyloPInfo = { NULL, assembly, db, "phyloP46wayPlacental", arWig, NULL };
    snp135Info.next = &phyloPInfo;
    if (sameString(test, snpConsDbToTabOutShort))
	dbToTabOut(&snp135Info, "stdout", "chr1", 737224, 738475, FALSE);
    else
	dbToTabOut(&snp135Info, "stdout", NULL, 0, 0, FALSE);
    }

// Fifth test: VCF with genotypes
if (doAllTests || sameString(test, vcfEx1))
    {
#if (defined USE_TABIX && defined KNETFILE_HOOKS)
    knetUdcInstall();
#endif//def USE_TABIX && KNETFILE_HOOKS
    struct streamerInfo vcfEx1 = { NULL, assembly, NULL,
			   "http://genome.ucsc.edu/goldenPath/help/examples/vcfExample.vcf.gz",
				   arWords, vcfAsObj() };
    dbToTabOut(&vcfEx1, "stdout", NULL, 0, 0, FALSE);
    }

if (doAllTests || sameString(test, vcfEx2))
    {
    struct streamerInfo vcfEx2 = { NULL, assembly, NULL,
			   "http://genome.ucsc.edu/goldenPath/help/examples/vcfExampleTwo.vcf",
				   arWords, vcfAsObj() };
    dbToTabOut(&vcfEx2, "stdout", NULL, 0, 0, FALSE);
    }

if (doAllTests || sameString(test, pgSnpKgDbToGpFx))
    {
    struct streamerInfo pg2SnpInfo = { NULL, assembly, NULL,
				       "input/annoGrator/pgForTestingGpFx.pgSnp.tab",
				       arWords, pgSnpAsObj() };
    pg2SnpInfo.next = &kgInfo;

    dbToTabOut(&pg2SnpInfo, "stdout", NULL, 0, 0, TRUE);

    /*
    FIXME
    // 3base insertion CDS - chr3:124,646,699-124,646,718
    dbToTabOut(&pg2SnpInfo, "stdout", "chr3",124646699,124646718, TRUE);
    */
    }

if (doAllTests || sameString(test, bigBedToTabOut))
    {
    struct streamerInfo bigBedInfo = { NULL, assembly, NULL,
			   "http://genome.ucsc.edu/goldenPath/help/examples/bigBedExample.bb",
				       arWords, NULL };
    dbToTabOut(&bigBedInfo, "stdout", "chr21", 34716800, 34733700, FALSE);
    }

if (doAllTests || sameString(test, snpBigWigToTabOut))
    {
    struct streamerInfo snp135Info = { NULL, assembly, db, "snp135", arWords,
				       asParseFile("../snp132Ext.as") };
    struct streamerInfo bigWigInfo = { NULL, assembly, NULL,
			   "http://genome.ucsc.edu/goldenPath/help/examples/bigWigExample.bw",
				       arWig, NULL };
    snp135Info.next = &bigWigInfo;
    dbToTabOut(&snp135Info, "stdout", "chr21", 34716800, 34733700, FALSE);
    }

if (doAllTests || sameString(test, vepOut))
    {
    struct streamerInfo vepSamplePgSnp = { NULL, assembly, NULL,
					   "input/annoGrator/vepSample.pgSnp.tab",
					   arWords, asParseFile("../pgSnp.as") };
    struct streamerInfo kgInfo = { NULL, assembly, db, "ensGene", arWords,
				   asParseFile("../genePredExt.as") };
    struct streamerInfo snpInfo = { NULL, assembly, db, "snp135", arWords,
				    asParseFile("../snp132Ext.as") };
    vepSamplePgSnp.next = &kgInfo;
    kgInfo.next = &snpInfo;
    // Instead of dbToTabOut, we need to make a VEP config data structure and
    // use it to create an annoFormatVep.
    struct streamerInfo *primaryInfo = &vepSamplePgSnp;
    struct annoStreamer *primary = NULL;
    struct annoGrator *gratorList = NULL;
    sourcesFromInfoList(primaryInfo, TRUE, &primary, &gratorList);
    struct annoStreamer *gpVarSource = (struct annoStreamer *)gratorList;
    struct annoStreamer *snpSource = gpVarSource->next;
    struct annoFormatter *vepOut = annoFormatVepNew("stdout", FALSE, primary, "vepSamplePgSnp",
						    gpVarSource, "UCSC Genes ...",
						    snpSource, "just dbSNP 135");
    struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut);
    annoGratorQuerySetRegion(query, "chr1", 876900, 886920);
    annoGratorQueryExecute(query);
    annoGratorQuerySetRegion(query, "chr5", 135530, 145535);
    annoGratorQueryExecute(query);
    annoGratorQueryFree(&query);
    }

if (doAllTests || sameString(test, gpFx))
    {
    struct streamerInfo variants = { NULL, assembly, NULL,
					   "input/annoGrator/moreVariants.pgSnp.tab",
					   arWords, asParseFile("../pgSnp.as") };
    struct streamerInfo kgInfo = { NULL, assembly, db, "knownGene", arWords,
				   asParseFile("../knownGene.as") };
    struct streamerInfo snpInfo = { NULL, assembly, db, "snp137", arWords,
				    asParseFile("../snp132Ext.as") };
    struct asObject *dbNsfpSeqChangeAs =
	bigBedAsFromFileName("/gbdb/hg19/dbNsfp/dbNsfpSeqChange.bb");
    struct streamerInfo dbNsfpSeqChange =
	{ NULL, assembly, NULL, "/gbdb/hg19/dbNsfp/dbNsfpSeqChange.bb",
	  arWords, dbNsfpSeqChangeAs };
    struct asObject *dbNsfpSiftAs = bigBedAsFromFileName("/gbdb/hg19/dbNsfp/dbNsfpSift.bb");
    struct streamerInfo dbNsfpSift = { NULL, assembly, NULL, "/gbdb/hg19/dbNsfp/dbNsfpSift.bb",
				       arWords, dbNsfpSiftAs };
    variants.next = &kgInfo;
    kgInfo.next = &snpInfo;
    snpInfo.next = &dbNsfpSeqChange;
    dbNsfpSeqChange.next = &dbNsfpSift;
    // Instead of dbToTabOut, we need to make a VEP config data structure and
    // use it to create an annoFormatVep.
    struct streamerInfo *primaryInfo = &variants;
    struct annoStreamer *primary = NULL;
    struct annoGrator *gratorList = NULL;
    sourcesFromInfoList(primaryInfo, TRUE, &primary, &gratorList);
    struct annoStreamer *gpVarSource = (struct annoStreamer *)gratorList;
    struct annoStreamer *snpSource = gpVarSource->next;
    struct annoStreamer *dbNsfpSource = snpSource->next->next;
    struct annoFormatter *vepOut = annoFormatVepNew("stdout", FALSE, primary, "some more variants",
						    gpVarSource, "UCSC Genes of course",
						    snpSource, "now snp137.");
    annoFormatVepAddExtraItem(vepOut, dbNsfpSource, "SIFT", "SIFT score from dbNSFP", "");
    struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut);
    annoGratorQuerySetRegion(query, "chr19", 45405960, 45419476);
    annoGratorQueryExecute(query);
    annoGratorQueryFree(&query);
    }

return 0;
}