void tagStormQueryMain(char *query) /* tagStormQuery - Find stanzas in tag storm based on SQL-like query.. */ { /* Get parsed out query */ struct lineFile *lf = lineFileOnString("query", TRUE, cloneString(query)); struct rqlStatement *rql = rqlStatementParse(lf); int stormCount = slCount(rql->tableList); if (stormCount != 1) errAbort("Can only handle one tag storm file in query, got %d", stormCount); char *tagsFileName = rql->tableList->name; /* Read in tags */ struct tagStorm *tags = tagStormFromFile(tagsFileName); /* Expand any field names with wildcards. */ struct slName *allFieldList = tagStormFieldList(tags); rql->fieldList = wildExpandList(allFieldList, rql->fieldList, TRUE); /* Traverse tree applying query */ struct lm *lm = lmInit(0); doSelect = sameWord(rql->command, "select"); traverse(tags, tags->forest, rql, lm); tagStormFree(&tags); if (sameWord(rql->command, "count")) printf("%d\n", matchCount); }
void cdwQuery(char *rqlQuery) /* cdwQuery - Get list of tagged files.. */ { /* Turn rqlQuery string into a parsed out rqlStatement. */ struct rqlStatement *rql = rqlStatementParseString(rqlQuery); /* Load tags from database */ struct sqlConnection *conn = cdwConnect(); struct tagStorm *tags = cdwTagStorm(conn); /* Get list of all tag types in tree and use it to expand wildcards in the query * field list. */ struct slName *allFieldList = tagStormFieldList(tags); slSort(&allFieldList, slNameCmp); rql->fieldList = wildExpandList(allFieldList, rql->fieldList, TRUE); /* Output header row in tab case */ if (sameString(clOut, "tab")) { char before = '#'; struct slName *field; for (field = rql->fieldList; field != NULL; field = field->next) { printf("%c%s", before, field->name); before = '\t'; } printf("\n"); } /* Traverse tag tree outputting when rql statement matches in select case, just * updateing count in count case. */ doSelect = sameWord(rql->command, "select"); struct lm *lm = lmInit(0); traverse(0, tags, tags->forest, rql, lm); if (sameWord(rql->command, "count")) printf("%d\n", matchCount); /* Clean up and go home. */ tagStormFree(&tags); }
void hcaStormToBundles(char *inTags, char *dataUrl, char *schemaFile, char *outDir) /* hcaStormToBundles - Convert a HCA formatted tagStorm to a directory full of bundles.. */ { /* Check that have full path name for dataFileDir */ if (sameString("urls", dataUrl)) gUrls = TRUE; else if (!stringIn("://", dataUrl)) errAbort("data file directory must be a url."); /* Load up schema and put it in hash */ struct tagSchema *schemaList = tagSchemaFromFile(schemaFile); struct hash *schemaHash = tagSchemaHash(schemaList); /* Load up tagStorm get leaf list */ struct tagStorm *storm = tagStormFromFile(inTags); struct tagStanzaRef *refList = tagStormListLeaves(storm); verbose(1, "Got %d leaf nodes in %s\n", slCount(refList), inTags); /* Add in assay.sample_id as just a dupe of sample.id */ dupeValToNewTag(storm, storm->forest, "sample.id", "assay.sample_id"); dupeValToNewTag(storm, storm->forest, "project.id", "sample.project_id"); addMissingUuids(storm, "assay.seq.ena_experiment", "assay.id", FALSE); addMissingUuids(storm, "assay.seq.sra_experiment", "assay.id", FALSE); /* Do some figuring based on all fields available of what objects to make */ struct slName *allFields = tagStormFieldList(storm); verbose(1, "Got %d fields in %s\n", slCount(allFields), inTags); struct slName *topLevelList = ttjUniqToDotList(allFields, NULL, 0); verbose(1, "Got %d top level objects\n", slCount(topLevelList)); /* Make list of objects */ struct slName *topEl; struct ttjSubObj *objList = NULL; for (topEl = topLevelList; topEl != NULL; topEl = topEl->next) { verbose(1, " %s\n", topEl->name); struct ttjSubObj *obj = ttjMakeSubObj(allFields, topEl->name, topEl->name); slAddHead(&objList, obj); } /* Loop through stanzas making bundles */ struct tagStanzaRef *ref; int bundleIx = 0; makeDirsOnPath(outDir); for (ref = refList; ref != NULL; ref = ref->next) { /* Fetch stanza and comma-separated list of files. */ struct tagStanza *stanza = ref->stanza; char *fileCsv = tagFindVal(stanza, "assay.seq.files"); if (fileCsv == NULL) errAbort("Stanza without a files tag. Stanza starts line %d of %s", stanza->startLineIx, inTags); /* Make subdirectory for bundle */ ++bundleIx; char bundleDir[PATH_LEN]; safef(bundleDir, sizeof(bundleDir), "%s/bundle%d", outDir, bundleIx); makeDir(bundleDir); /* Make symbolic link of all files */ char localUrl[PATH_LEN*2]; if (gUrls) { struct slName *fileList = tagMustFindValList(stanza, "assay.seq.files"); splitPath(fileList->name, localUrl, NULL, NULL); dataUrl = localUrl; slFreeList(&fileList); } makeBundleJson(storm, bundleDir, stanza, objList, dataUrl, schemaHash); } verbose(1, "wrote json files into %s/bundle* dirs\n", outDir); }