void cdwGroupFile(char *groupName, char *where) /* cdwGroupFile - Associate a file with a group.. */ { /* Get group from database, error out if no good */ struct sqlConnection *conn = cdwConnectReadWrite(); struct cdwGroup *group = cdwNeedGroupFromName(conn, groupName); /* Get list of all stanzas matching query */ struct tagStorm *tags = cdwTagStorm(conn); struct dyString *rqlQuery = dyStringNew(0); dyStringPrintf(rqlQuery, "select accession from cdwFileTags where accession"); if (where != NULL) dyStringPrintf(rqlQuery, " and %s", where); struct slRef *ref, *matchRefList = tagStanzasMatchingQuery(tags, rqlQuery->string); /* Make one pass through mostly for early error reporting and building up * hash of cdwValidFiles keyed by accession */ struct hash *validHash = hashNew(0); for (ref = matchRefList; ref != NULL; ref = ref->next) { struct tagStanza *stanza = ref->val; char *acc = tagFindVal(stanza, "accession"); if (acc != NULL) { struct cdwValidFile *vf = cdwValidFileFromLicensePlate(conn, acc); if (vf == NULL) errAbort("%s not found in cdwValidFile", acc); hashAdd(validHash, acc, vf); } } /* Second pass through matching list we call routine that actually adds * the group/file relationship. */ for (ref = matchRefList; ref != NULL; ref = ref->next) { struct tagStanza *stanza = ref->val; char *acc = tagFindVal(stanza, "accession"); if (acc != NULL) { struct cdwValidFile *vf = hashFindVal(validHash, acc); if (vf != NULL) { addGroupToValidFile(conn, vf, group); } } } if (clDry) verbose(1, "Would have %s", (clRemove ? "removed" : "added")); else verbose(1, "%s", (clRemove ? "Removed" : "Added")); verbose(1, " group %s to %d files\n", group->name, validHash->elCount); }
void output(int depth, struct rqlStatement *rql, struct tagStorm *tags, struct tagStanza *stanza) /* Output stanza according to clOut */ { char *format = clOut; if (sameString(format, "ra")) { if (stanza->children == NULL) { struct slName *field; for (field = rql->fieldList; field != NULL; field = field->next) { char *val = tagFindVal(stanza, field->name); if (val != NULL) printf("%s\t%s\n", field->name, val); } printf("\n"); } } else if (sameString(format, "tab")) { if (stanza->children == NULL) { struct slName *field; char *connector = ""; for (field = rql->fieldList; field != NULL; field = field->next) { char *val = emptyForNull(tagFindVal(stanza, field->name)); printf("%s%s", connector, val); connector = "\t"; } printf("\n"); } } else if (sameString(format, "tags")) { struct slName *field; for (field = rql->fieldList; field != NULL; field = field->next) { char *val = tagFindLocalVal(stanza, field->name); if (val != NULL) { repeatCharOut(stdout, '\t', depth); printf("%s\t%s\n", field->name, val); } } printf("\n"); } else errAbort("Unrecognized format %s", format); }
void traverse(struct tagStorm *tags, struct tagStanza *list, struct rqlStatement *rql, struct lm *lm) /* Recursively traverse stanzas on list. */ { struct tagStanza *stanza; int limit = rql->limit; for (stanza = list; stanza != NULL; stanza = stanza->next) { if (stanza->children) traverse(tags, stanza->children, rql, lm); else /* Just apply query to leaves */ { if (tagStanzaRqlMatch(rql, stanza, lm)) { ++matchCount; if (doSelect && (limit < 0 || matchCount <= limit)) { struct slName *field; for (field = rql->fieldList; field != NULL; field = field->next) { char *val = tagFindVal(stanza, field->name); if (val != NULL) printf("%s\t%s\n", field->name, val); } printf("\n"); } } } } }
struct slName *tagFindValList(struct tagStanza *stanza, char *tag) /* Read in tag as a list. Do a slFreeList on this when done. * Returns NULL if no value */ { char *val = tagFindVal(stanza, tag); return csvParse(val); }
void cdwChangeAccess(char *chmodString, char *rqlWhere) /* cdwChangeAccess - Change access to files.. */ { char cWhere, cDir, cAccess; parseChmodString(chmodString, &cWhere, &cDir, &cAccess); /* Get list of all stanzas matching query */ struct sqlConnection *conn = cdwConnectReadWrite(); struct tagStorm *tags = cdwTagStorm(conn); struct dyString *rqlQuery = dyStringNew(0); dyStringPrintf(rqlQuery, "select accession from cdwFileTags where accession and %s", rqlWhere); struct slRef *ref, *matchRefList = tagStanzasMatchingQuery(tags, rqlQuery->string); /* Make one pass through mostly for early error reporting and building up * hash of cdwValidFiles keyed by accession */ struct hash *validHash = hashNew(0); for (ref = matchRefList; ref != NULL; ref = ref->next) { struct tagStanza *stanza = ref->val; char *acc = tagFindVal(stanza, "accession"); if (acc != NULL) { struct cdwValidFile *vf = cdwValidFileFromLicensePlate(conn, acc); if (vf == NULL) errAbort("%s not found in cdwValidFile", acc); hashAdd(validHash, acc, vf); } } /* Second pass through matching list we call routine that actually adds * the group/file relationship. */ for (ref = matchRefList; ref != NULL; ref = ref->next) { struct tagStanza *stanza = ref->val; char *acc = tagFindVal(stanza, "accession"); if (acc != NULL) { struct cdwValidFile *vf = hashFindVal(validHash, acc); if (vf != NULL) { changeAccess(conn, vf->fileId, cWhere, cDir, cAccess); } } } }
static void rTagStormCountDistinct(struct tagStanza *list, char *tag, struct hash *uniq) /* Fill in hash with number of times have seen each value of tag */ { char *requiredTag = "accession"; struct tagStanza *stanza; for (stanza = list; stanza != NULL; stanza = stanza->next) { if (tagFindVal(stanza, requiredTag)) { char *val = tagFindVal(stanza, tag); if (val != NULL) { hashIncInt(uniq, val); } } rTagStormCountDistinct(stanza->children, tag, uniq); } }
char *tagMustFindVal(struct tagStanza *stanza, char *name) /* Return value of tag of given name within stanza or any of it's parents. Abort if * not found. */ { char *val = tagFindVal(stanza, name); if (val == NULL) errAbort("Can't find tag named %s in stanza", name); return val; }
static void rCheck(struct tagStanza *stanzaList, char *fileName, struct slRef *wildList, struct hash *hash, struct slRef *requiredList, struct dyString *scratch) /* Recurse through tagStorm */ { struct tagStanza *stanza; struct dyString *csvScratch = dyStringNew(0); for (stanza = stanzaList; stanza != NULL; stanza = stanza->next) { struct slPair *pair; for (pair = stanza->tagList; pair != NULL; pair = pair->next) { /* Break out tag and value */ char *tag = tagSchemaFigureArrayName(pair->name, scratch); char *val = pair->val; /* Make sure val exists and is non-empty */ if (isEmpty(val)) { reportError(fileName, stanza->startLineIx, "%s tag has no value", tag); continue; } /* Check against SQL reserved words */ if (gReservedHash != NULL) { if (sqlReservedCheck(gReservedHash, tag)) { reportError(fileName, stanza->startLineIx, "%s in tag name is a SQL reserved word", tag); continue; } } /* Find schema in hash or wildSchemaList */ struct tagSchema *schema = hashFindVal(hash, tag); if (schema == NULL) { struct slRef *ref; for (ref = wildList; ref != NULL; ref = ref->next) { struct tagSchema *s = ref->val; if (wildMatch(s->name, tag)) { schema = s; break; } } } /* Do checking on tag */ if (schema == NULL) reportError(fileName, stanza->startLineIx, "Unrecognized tag %s", tag); else { char type = schema->type; char *pos = val; char *oneVal; while ((oneVal =csvParseNext(&pos, csvScratch)) != NULL) { if (type == '#') { char *end; long long v = strtoll(oneVal, &end, 10); if (end == oneVal || *end != 0) // oneVal is not integer reportError(fileName, stanza->startLineIx, "Non-integer value %s for %s", oneVal, tag); else if (v < schema->minVal) reportError(fileName, stanza->startLineIx, "Value %s too low for %s", oneVal, tag); else if (v > schema->maxVal) reportError(fileName, stanza->startLineIx, "Value %s too high for %s", oneVal, tag); } else if (type == '%') { char *end; double v = strtod(oneVal, &end); if (end == oneVal || *end != 0) // val is not just a floating point number reportError(fileName, stanza->startLineIx, "Non-numerical value %s for %s", oneVal, tag); else if (v < schema->minVal) reportError(fileName, stanza->startLineIx, "Value %s too low for %s", oneVal, tag); else if (v > schema->maxVal) reportError(fileName, stanza->startLineIx, "Value %s too high for %s", oneVal, tag); } else { boolean gotMatch = FALSE; struct slName *okVal; for (okVal = schema->allowedVals; okVal != NULL; okVal = okVal->next) { if (wildMatch(okVal->name, oneVal)) { gotMatch = TRUE; break; } } if (!gotMatch) reportError(fileName, stanza->startLineIx, "Unrecognized value '%s' for tag %s", oneVal, tag); } struct hash *uniqHash = schema->uniqHash; if (uniqHash != NULL) { if (hashLookup(uniqHash, oneVal)) reportError(fileName, stanza->startLineIx, "Non-unique value '%s' for tag %s", oneVal, tag); else hashAdd(uniqHash, oneVal, NULL); } } } } if (stanza->children) { rCheck(stanza->children, fileName, wildList, hash, requiredList, scratch); } else { struct slRef *ref; for (ref = requiredList; ref != NULL; ref = ref->next) { struct tagSchema *schema = ref->val; if (schema->objArrayPieces != NULL) // It's an array, complex to handle, needs own routine { checkInAllArrayItems(fileName, stanza, schema, scratch); } else { if (tagFindVal(stanza, schema->name) == NULL) reportError(fileName, stanza->startLineIx, "Missing required '%s' tag", schema->name); } } } } dyStringFree(&csvScratch); }
static char *lookupField(void *record, char *key) /* Lookup a field in a tagStanza. */ { struct tagStanza *stanza = record; return tagFindVal(stanza, key); }
void hcaStormToBundles(char *inTags, char *dataUrl, char *schemaFile, char *outDir) /* hcaStormToBundles - Convert a HCA formatted tagStorm to a directory full of bundles.. */ { /* Check that have full path name for dataFileDir */ if (sameString("urls", dataUrl)) gUrls = TRUE; else if (!stringIn("://", dataUrl)) errAbort("data file directory must be a url."); /* Load up schema and put it in hash */ struct tagSchema *schemaList = tagSchemaFromFile(schemaFile); struct hash *schemaHash = tagSchemaHash(schemaList); /* Load up tagStorm get leaf list */ struct tagStorm *storm = tagStormFromFile(inTags); struct tagStanzaRef *refList = tagStormListLeaves(storm); verbose(1, "Got %d leaf nodes in %s\n", slCount(refList), inTags); /* Add in assay.sample_id as just a dupe of sample.id */ dupeValToNewTag(storm, storm->forest, "sample.id", "assay.sample_id"); dupeValToNewTag(storm, storm->forest, "project.id", "sample.project_id"); addMissingUuids(storm, "assay.seq.ena_experiment", "assay.id", FALSE); addMissingUuids(storm, "assay.seq.sra_experiment", "assay.id", FALSE); /* Do some figuring based on all fields available of what objects to make */ struct slName *allFields = tagStormFieldList(storm); verbose(1, "Got %d fields in %s\n", slCount(allFields), inTags); struct slName *topLevelList = ttjUniqToDotList(allFields, NULL, 0); verbose(1, "Got %d top level objects\n", slCount(topLevelList)); /* Make list of objects */ struct slName *topEl; struct ttjSubObj *objList = NULL; for (topEl = topLevelList; topEl != NULL; topEl = topEl->next) { verbose(1, " %s\n", topEl->name); struct ttjSubObj *obj = ttjMakeSubObj(allFields, topEl->name, topEl->name); slAddHead(&objList, obj); } /* Loop through stanzas making bundles */ struct tagStanzaRef *ref; int bundleIx = 0; makeDirsOnPath(outDir); for (ref = refList; ref != NULL; ref = ref->next) { /* Fetch stanza and comma-separated list of files. */ struct tagStanza *stanza = ref->stanza; char *fileCsv = tagFindVal(stanza, "assay.seq.files"); if (fileCsv == NULL) errAbort("Stanza without a files tag. Stanza starts line %d of %s", stanza->startLineIx, inTags); /* Make subdirectory for bundle */ ++bundleIx; char bundleDir[PATH_LEN]; safef(bundleDir, sizeof(bundleDir), "%s/bundle%d", outDir, bundleIx); makeDir(bundleDir); /* Make symbolic link of all files */ char localUrl[PATH_LEN*2]; if (gUrls) { struct slName *fileList = tagMustFindValList(stanza, "assay.seq.files"); splitPath(fileList->name, localUrl, NULL, NULL); dataUrl = localUrl; slFreeList(&fileList); } makeBundleJson(storm, bundleDir, stanza, objList, dataUrl, schemaHash); } verbose(1, "wrote json files into %s/bundle* dirs\n", outDir); }
void rWriteJson(FILE *f, struct tagStorm *storm, struct tagStanza *stanza, struct ttjSubObj *obj, struct ttjSubObj *labeledObj, struct hash *schemaHash, struct dyString *scratch) /* Write out json object recursively */ { boolean isArray = allDigitNames(obj->children); struct ttjSubObj *field; if (isArray) { fprintf(f, "["); for (field = obj->children; field != NULL; field = field->next) { if (field != obj->children) // Only write comma separators after the first one fprintf(f, ","); rWriteJson(f, storm, stanza, field, labeledObj, schemaHash, scratch); } fprintf(f, "]"); } else { fprintf(f, "{"); boolean firstOut = TRUE; /* Figure out if we need to attach a core object and do so. The figuring bit is * frankly clunky. */ char *objType = labeledObj->name; if (sameString(objType, "submitter") || sameString(objType, "contributors")) objType = "contact"; else if (sameString(objType, "publications")) objType = "publication"; else if (sameString(objType, "protocol")) // protocol is actually just protocol_id objType = "string"; else if (sameString(objType, "protocols")) // but protocols array is protocol objType = "protocol"; else if (sameString(objType, "umi_barcode")) objType = "barcode"; if (objNeedsCore(objType)) printCore(f, objType, &firstOut); for (field = obj->children; field != NULL; field = field->next) { char *fieldName = field->name; if (field->children != NULL) { /* Look for funny characteristics_ as these are largely up to user. */ if (startsWith("characteristics_", field->name)) errAbort("No '.' allowed in field name after characteristics_ in %s", field->children->fullName); /* If actually have data in this stanza write our field. */ if (prefixDotInStanza(field->fullName, stanza, scratch)) { writeJsonTag(f, fieldName, &firstOut); rWriteJson(f, storm, stanza, field, field, schemaHash, scratch); } } else { char *val = tagFindVal(stanza, field->fullName); if (val != NULL) { boolean isNum = FALSE; char *schemaName = tagSchemaFigureArrayName(field->fullName, scratch); struct tagSchema *schema = hashFindVal(schemaHash, schemaName); if (schema != NULL) isNum = (schema->type == '#' || schema->type == '%'); if (sameString(fieldName, "files")) { writeJsonTag(f, "lanes", &firstOut); writeLaneArray(f, stanza, val); } else { boolean isArray = FALSE; writeJsonTag(f, fieldName, &firstOut); if (schema != NULL) isArray = schema->isArray; struct slName *list = csvParse(val); if (isArray) fputc('[', f); else { if (list->next != NULL) // more than one element errAbort("Multiple vals for scalar tag %s in stanza starting line %d of %s", field->fullName, stanza->startLineIx, storm->fileName); } struct slName *el; for (el = list; el != NULL; el = el->next) { writeJsonVal(f, el->name, isNum); if (el->next != NULL) fputc(',', f); } if (isArray) fputc(']', f); slFreeList(&list); } } } } fprintf(f, "}"); } }