void cdwGroupFile(char *groupName, char *where)
/* cdwGroupFile - Associate a file with a group.. */
/* Get group from database, error out if no good */
struct sqlConnection *conn = cdwConnectReadWrite();
struct cdwGroup *group = cdwNeedGroupFromName(conn, groupName);

/* Get list of all stanzas matching query */
struct tagStorm *tags = cdwTagStorm(conn);
struct dyString *rqlQuery = dyStringNew(0);
dyStringPrintf(rqlQuery, "select accession from cdwFileTags where accession");
if (where != NULL)
    dyStringPrintf(rqlQuery, " and %s", where);
struct slRef *ref, *matchRefList = tagStanzasMatchingQuery(tags, rqlQuery->string);

/* Make one pass through mostly for early error reporting and building up 
 * hash of cdwValidFiles keyed by accession */
struct hash *validHash = hashNew(0);
for (ref = matchRefList; ref != NULL; ref = ref->next)
    struct tagStanza *stanza = ref->val;
    char *acc = tagFindVal(stanza, "accession");
    if (acc != NULL)
	struct cdwValidFile *vf = cdwValidFileFromLicensePlate(conn, acc);
	if (vf == NULL)
	    errAbort("%s not found in cdwValidFile", acc);
	hashAdd(validHash, acc, vf);

/* Second pass through matching list we call routine that actually adds
 * the group/file relationship. */
for (ref = matchRefList; ref != NULL; ref = ref->next)
    struct tagStanza *stanza = ref->val;
    char *acc = tagFindVal(stanza, "accession");
    if (acc != NULL)
	struct cdwValidFile *vf = hashFindVal(validHash, acc);
	if (vf != NULL)
	    addGroupToValidFile(conn, vf, group);
if (clDry)
    verbose(1, "Would have %s", (clRemove ? "removed" : "added"));
    verbose(1, "%s", (clRemove ? "Removed" : "Added"));
verbose(1, " group %s to %d files\n", group->name, validHash->elCount);
void output(int depth, struct rqlStatement *rql, struct tagStorm *tags, struct tagStanza *stanza)
/* Output stanza according to clOut */
char *format = clOut;
if (sameString(format, "ra"))
    if (stanza->children == NULL)
	struct slName *field;
	for (field = rql->fieldList; field != NULL; field = field->next)
	    char *val = tagFindVal(stanza, field->name);
	    if (val != NULL)
		printf("%s\t%s\n", field->name, val);
else if (sameString(format, "tab"))
    if (stanza->children == NULL)
	struct slName *field;
	char *connector = "";
	for (field = rql->fieldList; field != NULL; field = field->next)
	    char *val = emptyForNull(tagFindVal(stanza, field->name));
	    printf("%s%s", connector, val);
	    connector = "\t";
else if (sameString(format, "tags"))
    struct slName *field;
    for (field = rql->fieldList; field != NULL; field = field->next)
	char *val = tagFindLocalVal(stanza, field->name);
	if (val != NULL)
	    repeatCharOut(stdout, '\t', depth);
	    printf("%s\t%s\n", field->name, val);
    errAbort("Unrecognized format %s", format);
void traverse(struct tagStorm *tags, struct tagStanza *list, 
    struct rqlStatement *rql, struct lm *lm)
/* Recursively traverse stanzas on list. */
struct tagStanza *stanza;
int limit = rql->limit;
for (stanza = list; stanza != NULL; stanza = stanza->next)
    if (stanza->children)
	traverse(tags, stanza->children, rql, lm);
    else    /* Just apply query to leaves */
	if (tagStanzaRqlMatch(rql, stanza, lm))
	    if (doSelect && (limit < 0 || matchCount <= limit))
		struct slName *field;
		for (field = rql->fieldList; field != NULL; field = field->next)
		    char *val = tagFindVal(stanza, field->name);
		    if (val != NULL)
			printf("%s\t%s\n", field->name, val);
struct slName *tagFindValList(struct tagStanza *stanza, char *tag)
/* Read in tag as a list. Do a slFreeList on this when done.
 * Returns NULL if no value */
char *val = tagFindVal(stanza, tag);
return csvParse(val);
void cdwChangeAccess(char *chmodString, char *rqlWhere)
/* cdwChangeAccess - Change access to files.. */
char cWhere, cDir, cAccess;
parseChmodString(chmodString, &cWhere, &cDir, &cAccess);

/* Get list of all stanzas matching query */
struct sqlConnection *conn = cdwConnectReadWrite();
struct tagStorm *tags = cdwTagStorm(conn);
struct dyString *rqlQuery = dyStringNew(0);
dyStringPrintf(rqlQuery, "select accession from cdwFileTags where accession and %s", rqlWhere);
struct slRef *ref, *matchRefList = tagStanzasMatchingQuery(tags, rqlQuery->string);

/* Make one pass through mostly for early error reporting and building up 
 * hash of cdwValidFiles keyed by accession */
struct hash *validHash = hashNew(0);
for (ref = matchRefList; ref != NULL; ref = ref->next)
    struct tagStanza *stanza = ref->val;
    char *acc = tagFindVal(stanza, "accession");
    if (acc != NULL)
	struct cdwValidFile *vf = cdwValidFileFromLicensePlate(conn, acc);
	if (vf == NULL)
	    errAbort("%s not found in cdwValidFile", acc);
	hashAdd(validHash, acc, vf);

/* Second pass through matching list we call routine that actually adds
 * the group/file relationship. */
for (ref = matchRefList; ref != NULL; ref = ref->next)
    struct tagStanza *stanza = ref->val;
    char *acc = tagFindVal(stanza, "accession");
    if (acc != NULL)
	struct cdwValidFile *vf = hashFindVal(validHash, acc);
	if (vf != NULL)
	    changeAccess(conn, vf->fileId, cWhere, cDir, cAccess);
static void rTagStormCountDistinct(struct tagStanza *list, char *tag, struct hash *uniq)
/* Fill in hash with number of times have seen each value of tag */
char *requiredTag = "accession";
struct tagStanza *stanza;
for (stanza = list; stanza != NULL; stanza = stanza->next)
    if (tagFindVal(stanza, requiredTag))
	char *val = tagFindVal(stanza, tag);
	if (val != NULL)
	    hashIncInt(uniq, val);
    rTagStormCountDistinct(stanza->children, tag, uniq);
char *tagMustFindVal(struct tagStanza *stanza, char *name)
/* Return value of tag of given name within stanza or any of it's parents. Abort if
 * not found. */
char *val = tagFindVal(stanza, name);
if (val == NULL)
    errAbort("Can't find tag named %s in stanza", name);
return val;
static void rCheck(struct tagStanza *stanzaList, char *fileName, 
    struct slRef *wildList, struct hash *hash, struct slRef *requiredList,
    struct dyString *scratch)
/* Recurse through tagStorm */
struct tagStanza *stanza;
struct dyString *csvScratch = dyStringNew(0);
for (stanza = stanzaList; stanza != NULL; stanza = stanza->next)
    struct slPair *pair;
    for (pair = stanza->tagList; pair != NULL; pair = pair->next)
	/* Break out tag and value */
	char *tag = tagSchemaFigureArrayName(pair->name, scratch);
	char *val = pair->val;

	/* Make sure val exists and is non-empty */
	if (isEmpty(val))
	    reportError(fileName, stanza->startLineIx, 
		"%s tag has no value", tag);

	/* Check against SQL reserved words */
	if (gReservedHash != NULL)
	    if (sqlReservedCheck(gReservedHash, tag))
		reportError(fileName, stanza->startLineIx, 
		    "%s in tag name is a SQL reserved word", tag);

	/* Find schema in hash or wildSchemaList */
	struct tagSchema *schema = hashFindVal(hash, tag);
	if (schema == NULL)
	    struct slRef *ref;
	    for (ref = wildList; ref != NULL; ref = ref->next)
		struct tagSchema *s = ref->val;
		if (wildMatch(s->name, tag))
		    schema = s;

	/* Do checking on tag */
	if (schema == NULL)
	    reportError(fileName, stanza->startLineIx, "Unrecognized tag %s", tag);
	    char type = schema->type;
	    char *pos = val;
	    char *oneVal;
	    while ((oneVal =csvParseNext(&pos, csvScratch)) != NULL)
		if (type == '#')
		    char *end;
		    long long v = strtoll(oneVal, &end, 10);
		    if (end == oneVal || *end != 0)	// oneVal is not integer
			reportError(fileName, stanza->startLineIx, 
			    "Non-integer value %s for %s", oneVal, tag);
		    else if (v < schema->minVal)
			reportError(fileName, stanza->startLineIx, 
			    "Value %s too low for %s", oneVal, tag);
		    else if (v > schema->maxVal)
			 reportError(fileName, stanza->startLineIx, 
			    "Value %s too high for %s", oneVal, tag);
		else if (type == '%')
		    char *end;
		    double v = strtod(oneVal, &end);
		    if (end == oneVal || *end != 0)	// val is not just a floating point number
			reportError(fileName, stanza->startLineIx, 
			    "Non-numerical value %s for %s", oneVal, tag);
		    else if (v < schema->minVal)
			reportError(fileName, stanza->startLineIx, 
			    "Value %s too low for %s", oneVal, tag);
		    else if (v > schema->maxVal)
			reportError(fileName, stanza->startLineIx, 
			    "Value %s too high for %s", oneVal, tag);
		    boolean gotMatch = FALSE;
		    struct slName *okVal;
		    for (okVal = schema->allowedVals; okVal != NULL; okVal = okVal->next)
			if (wildMatch(okVal->name, oneVal))
			    gotMatch = TRUE;
		    if (!gotMatch)
			reportError(fileName, stanza->startLineIx, 
			    "Unrecognized value '%s' for tag %s", oneVal, tag);

		struct hash *uniqHash = schema->uniqHash;
		if (uniqHash != NULL)
		    if (hashLookup(uniqHash, oneVal))
			reportError(fileName, stanza->startLineIx, 
			    "Non-unique value '%s' for tag %s", oneVal, tag);
			hashAdd(uniqHash, oneVal, NULL);
    if (stanza->children)
	rCheck(stanza->children, fileName, wildList, hash, requiredList, scratch);
	struct slRef *ref;
	for (ref = requiredList; ref != NULL; ref = ref->next)
	    struct tagSchema *schema = ref->val;
	    if (schema->objArrayPieces != NULL)  // It's an array, complex to handle, needs own routine
		checkInAllArrayItems(fileName, stanza, schema, scratch);
		if (tagFindVal(stanza, schema->name) == NULL)
		    reportError(fileName, stanza->startLineIx, 
			"Missing required '%s' tag", schema->name);
static char *lookupField(void *record, char *key)
/* Lookup a field in a tagStanza. */
struct tagStanza *stanza = record;
return tagFindVal(stanza, key);
void hcaStormToBundles(char *inTags, char *dataUrl, char *schemaFile, char *outDir)
/* hcaStormToBundles - Convert a HCA formatted tagStorm to a directory full of bundles.. */
/* Check that have full path name for dataFileDir */
if (sameString("urls", dataUrl))
   gUrls = TRUE;
else if (!stringIn("://", dataUrl))
    errAbort("data file directory must be a url.");

/* Load up schema and put it in hash */
struct tagSchema *schemaList = tagSchemaFromFile(schemaFile);
struct hash *schemaHash = tagSchemaHash(schemaList);

/* Load up tagStorm get leaf list */
struct tagStorm *storm = tagStormFromFile(inTags);
struct tagStanzaRef *refList = tagStormListLeaves(storm);
verbose(1, "Got %d leaf nodes in %s\n", slCount(refList), inTags);

/* Add in assay.sample_id as just a dupe of sample.id */
dupeValToNewTag(storm, storm->forest, "sample.id", "assay.sample_id");
dupeValToNewTag(storm, storm->forest, "project.id", "sample.project_id");
addMissingUuids(storm, "assay.seq.ena_experiment", "assay.id", FALSE);
addMissingUuids(storm, "assay.seq.sra_experiment", "assay.id", FALSE);

/* Do some figuring based on all fields available of what objects to make */
struct slName *allFields = tagStormFieldList(storm);
verbose(1, "Got %d fields in %s\n", slCount(allFields), inTags);
struct slName *topLevelList = ttjUniqToDotList(allFields, NULL, 0);
verbose(1, "Got %d top level objects\n", slCount(topLevelList));

/* Make list of objects */
struct slName *topEl;
struct ttjSubObj *objList = NULL;
for (topEl = topLevelList; topEl != NULL; topEl = topEl->next)
    verbose(1, "  %s\n", topEl->name);
    struct ttjSubObj *obj = ttjMakeSubObj(allFields, topEl->name, topEl->name);
    slAddHead(&objList, obj);

/* Loop through stanzas making bundles */
struct tagStanzaRef *ref;
int bundleIx = 0;
for (ref = refList; ref != NULL; ref = ref->next)
    /* Fetch stanza and comma-separated list of files. */
    struct tagStanza *stanza = ref->stanza;
    char *fileCsv = tagFindVal(stanza, "assay.seq.files");
    if (fileCsv == NULL)
        errAbort("Stanza without a files tag. Stanza starts line %d of %s",  
		stanza->startLineIx, inTags); 

    /* Make subdirectory for bundle */
    char bundleDir[PATH_LEN];
    safef(bundleDir, sizeof(bundleDir), "%s/bundle%d", outDir, bundleIx);

    /* Make symbolic link of all files */
    char localUrl[PATH_LEN*2];
    if (gUrls)
	struct slName *fileList = tagMustFindValList(stanza, "assay.seq.files");
	splitPath(fileList->name, localUrl, NULL, NULL);
	dataUrl = localUrl;

    makeBundleJson(storm, bundleDir, stanza, objList, dataUrl, schemaHash);
verbose(1, "wrote json files into %s/bundle* dirs\n", outDir);
void rWriteJson(FILE *f, struct tagStorm *storm, struct tagStanza *stanza, 
    struct ttjSubObj *obj, struct ttjSubObj *labeledObj, struct hash *schemaHash,
    struct dyString *scratch)
/* Write out json object recursively */
boolean isArray = allDigitNames(obj->children);
struct ttjSubObj *field; 
if (isArray)
    fprintf(f, "["); 
    for (field = obj->children; field != NULL; field = field->next)
	if (field != obj->children) // Only write comma separators after the first one
	   fprintf(f, ",");
	rWriteJson(f, storm, stanza, field, labeledObj, schemaHash, scratch);
    fprintf(f, "]");
    fprintf(f, "{"); 
    boolean firstOut = TRUE;

    /* Figure out if we need to attach a core object and do so.  The figuring bit is
     * frankly clunky. */
    char *objType = labeledObj->name;
    if (sameString(objType, "submitter") || sameString(objType, "contributors"))
         objType = "contact";
    else if (sameString(objType, "publications"))
         objType = "publication";
    else if (sameString(objType, "protocol"))  // protocol is actually just protocol_id
         objType = "string";
    else if (sameString(objType, "protocols")) // but protocols array is protocol
         objType = "protocol";
    else if (sameString(objType, "umi_barcode"))
         objType = "barcode";
    if (objNeedsCore(objType))
        printCore(f, objType, &firstOut);

    for (field = obj->children; field != NULL; field = field->next)
	char *fieldName = field->name;
	if (field->children != NULL)
	     /* Look for funny characteristics_ as these are largely up to user. */
	     if (startsWith("characteristics_", field->name))
	         errAbort("No '.' allowed in field name after characteristics_ in %s", 

	     /* If actually have data in this stanza write our field. */
	     if (prefixDotInStanza(field->fullName, stanza, scratch))
		 writeJsonTag(f, fieldName, &firstOut);
		 rWriteJson(f, storm, stanza, field, field, schemaHash, scratch);
	    char *val = tagFindVal(stanza, field->fullName);
	    if (val != NULL)
		boolean isNum = FALSE;
		char *schemaName = tagSchemaFigureArrayName(field->fullName, scratch);
		struct tagSchema *schema = hashFindVal(schemaHash, schemaName);
		if (schema != NULL)
		   isNum = (schema->type == '#' || schema->type == '%');
		if (sameString(fieldName, "files"))
		    writeJsonTag(f, "lanes", &firstOut);
		    writeLaneArray(f, stanza, val);
		    boolean isArray = FALSE;
		    writeJsonTag(f, fieldName, &firstOut);
		    if (schema != NULL)
			isArray = schema->isArray;
		    struct slName *list = csvParse(val);
		    if (isArray)
			fputc('[', f);
			if (list->next != NULL)  // more than one element
			   errAbort("Multiple vals for scalar tag %s in stanza starting line %d of %s",
				field->fullName, stanza->startLineIx, storm->fileName);
		    struct slName *el;
		    for (el = list; el != NULL; el = el->next)
			writeJsonVal(f, el->name, isNum);
			if (el->next != NULL)
			    fputc(',', f);
		    if (isArray)
			fputc(']', f);
    fprintf(f, "}");