Exemplo n.º 1
0
void cdwGroupFile(char *groupName, char *where)
/* cdwGroupFile - Associate a file with a group.. */
{
/* Get group from database, error out if no good */
struct sqlConnection *conn = cdwConnectReadWrite();
struct cdwGroup *group = cdwNeedGroupFromName(conn, groupName);

/* Get list of all stanzas matching query */
struct tagStorm *tags = cdwTagStorm(conn);
struct dyString *rqlQuery = dyStringNew(0);
dyStringPrintf(rqlQuery, "select accession from cdwFileTags where accession");
if (where != NULL)
    dyStringPrintf(rqlQuery, " and %s", where);
struct slRef *ref, *matchRefList = tagStanzasMatchingQuery(tags, rqlQuery->string);

/* Make one pass through mostly for early error reporting and building up 
 * hash of cdwValidFiles keyed by accession */
struct hash *validHash = hashNew(0);
for (ref = matchRefList; ref != NULL; ref = ref->next)
    {
    struct tagStanza *stanza = ref->val;
    char *acc = tagFindVal(stanza, "accession");
    if (acc != NULL)
        {
	struct cdwValidFile *vf = cdwValidFileFromLicensePlate(conn, acc);
	if (vf == NULL)
	    errAbort("%s not found in cdwValidFile", acc);
	hashAdd(validHash, acc, vf);
	}
    }

/* Second pass through matching list we call routine that actually adds
 * the group/file relationship. */
for (ref = matchRefList; ref != NULL; ref = ref->next)
    {
    struct tagStanza *stanza = ref->val;
    char *acc = tagFindVal(stanza, "accession");
    if (acc != NULL)
        {
	struct cdwValidFile *vf = hashFindVal(validHash, acc);
	if (vf != NULL)
	    {
	    addGroupToValidFile(conn, vf, group);
	    }
	}
    }
if (clDry)
    verbose(1, "Would have %s", (clRemove ? "removed" : "added"));
else
    verbose(1, "%s", (clRemove ? "Removed" : "Added"));
verbose(1, " group %s to %d files\n", group->name, validHash->elCount);
}
Exemplo n.º 2
0
void output(int depth, struct rqlStatement *rql, struct tagStorm *tags, struct tagStanza *stanza)
/* Output stanza according to clOut */
{
char *format = clOut;
if (sameString(format, "ra"))
    {
    if (stanza->children == NULL)
	{
	struct slName *field;
	for (field = rql->fieldList; field != NULL; field = field->next)
	    {
	    char *val = tagFindVal(stanza, field->name);
	    if (val != NULL)
		printf("%s\t%s\n", field->name, val);
	    }
	printf("\n");
	}
    }
else if (sameString(format, "tab"))
    {
    if (stanza->children == NULL)
	{
	struct slName *field;
	char *connector = "";
	for (field = rql->fieldList; field != NULL; field = field->next)
	    {
	    char *val = emptyForNull(tagFindVal(stanza, field->name));
	    printf("%s%s", connector, val);
	    connector = "\t";
	    }
	printf("\n");
	}
    }
else if (sameString(format, "tags"))
    {
    struct slName *field;
    for (field = rql->fieldList; field != NULL; field = field->next)
	{
	char *val = tagFindLocalVal(stanza, field->name);
	if (val != NULL)
	    {
	    repeatCharOut(stdout, '\t', depth);
	    printf("%s\t%s\n", field->name, val);
	    }
	}
    printf("\n");
    }
else
    errAbort("Unrecognized format %s", format);
}
Exemplo n.º 3
0
void traverse(struct tagStorm *tags, struct tagStanza *list, 
    struct rqlStatement *rql, struct lm *lm)
/* Recursively traverse stanzas on list. */
{
struct tagStanza *stanza;
int limit = rql->limit;
for (stanza = list; stanza != NULL; stanza = stanza->next)
    {
    if (stanza->children)
	traverse(tags, stanza->children, rql, lm);
    else    /* Just apply query to leaves */
	{
	if (tagStanzaRqlMatch(rql, stanza, lm))
	    {
	    ++matchCount;
	    if (doSelect && (limit < 0 || matchCount <= limit))
		{
		struct slName *field;
		for (field = rql->fieldList; field != NULL; field = field->next)
		    {
		    char *val = tagFindVal(stanza, field->name);
		    if (val != NULL)
			printf("%s\t%s\n", field->name, val);
		    }
		printf("\n");
		}
	    }
	}
    }
}
Exemplo n.º 4
0
struct slName *tagFindValList(struct tagStanza *stanza, char *tag)
/* Read in tag as a list. Do a slFreeList on this when done.
 * Returns NULL if no value */
{
char *val = tagFindVal(stanza, tag);
return csvParse(val);
}
Exemplo n.º 5
0
void cdwChangeAccess(char *chmodString, char *rqlWhere)
/* cdwChangeAccess - Change access to files.. */
{
char cWhere, cDir, cAccess;
parseChmodString(chmodString, &cWhere, &cDir, &cAccess);

/* Get list of all stanzas matching query */
struct sqlConnection *conn = cdwConnectReadWrite();
struct tagStorm *tags = cdwTagStorm(conn);
struct dyString *rqlQuery = dyStringNew(0);
dyStringPrintf(rqlQuery, "select accession from cdwFileTags where accession and %s", rqlWhere);
struct slRef *ref, *matchRefList = tagStanzasMatchingQuery(tags, rqlQuery->string);

/* Make one pass through mostly for early error reporting and building up 
 * hash of cdwValidFiles keyed by accession */
struct hash *validHash = hashNew(0);
for (ref = matchRefList; ref != NULL; ref = ref->next)
    {
    struct tagStanza *stanza = ref->val;
    char *acc = tagFindVal(stanza, "accession");
    if (acc != NULL)
        {
	struct cdwValidFile *vf = cdwValidFileFromLicensePlate(conn, acc);
	if (vf == NULL)
	    errAbort("%s not found in cdwValidFile", acc);
	hashAdd(validHash, acc, vf);
	}
    }

/* Second pass through matching list we call routine that actually adds
 * the group/file relationship. */
for (ref = matchRefList; ref != NULL; ref = ref->next)
    {
    struct tagStanza *stanza = ref->val;
    char *acc = tagFindVal(stanza, "accession");
    if (acc != NULL)
        {
	struct cdwValidFile *vf = hashFindVal(validHash, acc);
	if (vf != NULL)
	    {
	    changeAccess(conn, vf->fileId, cWhere, cDir, cAccess);
	    }
	}
    }
}
Exemplo n.º 6
0
static void rTagStormCountDistinct(struct tagStanza *list, char *tag, struct hash *uniq)
/* Fill in hash with number of times have seen each value of tag */
{
char *requiredTag = "accession";
struct tagStanza *stanza;
for (stanza = list; stanza != NULL; stanza = stanza->next)
    {
    if (tagFindVal(stanza, requiredTag))
	{
	char *val = tagFindVal(stanza, tag);
	if (val != NULL)
	    {
	    hashIncInt(uniq, val);
	    }
	}
    rTagStormCountDistinct(stanza->children, tag, uniq);
    }
}
Exemplo n.º 7
0
char *tagMustFindVal(struct tagStanza *stanza, char *name)
/* Return value of tag of given name within stanza or any of it's parents. Abort if
 * not found. */
{
char *val = tagFindVal(stanza, name);
if (val == NULL)
    errAbort("Can't find tag named %s in stanza", name);
return val;
}
Exemplo n.º 8
0
static void rCheck(struct tagStanza *stanzaList, char *fileName, 
    struct slRef *wildList, struct hash *hash, struct slRef *requiredList,
    struct dyString *scratch)
/* Recurse through tagStorm */
{
struct tagStanza *stanza;
struct dyString *csvScratch = dyStringNew(0);
for (stanza = stanzaList; stanza != NULL; stanza = stanza->next)
    {
    struct slPair *pair;
    for (pair = stanza->tagList; pair != NULL; pair = pair->next)
	{
	/* Break out tag and value */
	char *tag = tagSchemaFigureArrayName(pair->name, scratch);
	char *val = pair->val;

	/* Make sure val exists and is non-empty */
	if (isEmpty(val))
	    {
	    reportError(fileName, stanza->startLineIx, 
		"%s tag has no value", tag);
	    continue;
	    }

	/* Check against SQL reserved words */
	if (gReservedHash != NULL)
	    {
	    if (sqlReservedCheck(gReservedHash, tag))
	        {
		reportError(fileName, stanza->startLineIx, 
		    "%s in tag name is a SQL reserved word", tag);
		continue;
		}
	    }

	/* Find schema in hash or wildSchemaList */
	struct tagSchema *schema = hashFindVal(hash, tag);
	if (schema == NULL)
	    {
	    struct slRef *ref;
	    for (ref = wildList; ref != NULL; ref = ref->next)
		{
		struct tagSchema *s = ref->val;
		if (wildMatch(s->name, tag))
		    {
		    schema = s;
		    break;
		    }
		}
	    }

	/* Do checking on tag */
	if (schema == NULL)
	    reportError(fileName, stanza->startLineIx, "Unrecognized tag %s", tag);
	else
	    {
	    char type = schema->type;
	    char *pos = val;
	    char *oneVal;
	    while ((oneVal =csvParseNext(&pos, csvScratch)) != NULL)
		{
		if (type == '#')
		    {
		    char *end;
		    long long v = strtoll(oneVal, &end, 10);
		    if (end == oneVal || *end != 0)	// oneVal is not integer
			reportError(fileName, stanza->startLineIx, 
			    "Non-integer value %s for %s", oneVal, tag);
		    else if (v < schema->minVal)
			reportError(fileName, stanza->startLineIx, 
			    "Value %s too low for %s", oneVal, tag);
		    else if (v > schema->maxVal)
			 reportError(fileName, stanza->startLineIx, 
			    "Value %s too high for %s", oneVal, tag);
		    }
		else if (type == '%')
		    {
		    char *end;
		    double v = strtod(oneVal, &end);
		    if (end == oneVal || *end != 0)	// val is not just a floating point number
			reportError(fileName, stanza->startLineIx, 
			    "Non-numerical value %s for %s", oneVal, tag);
		    else if (v < schema->minVal)
			reportError(fileName, stanza->startLineIx, 
			    "Value %s too low for %s", oneVal, tag);
		    else if (v > schema->maxVal)
			reportError(fileName, stanza->startLineIx, 
			    "Value %s too high for %s", oneVal, tag);
		    }
		else
		    {
		    boolean gotMatch = FALSE;
		    struct slName *okVal;
		    for (okVal = schema->allowedVals; okVal != NULL; okVal = okVal->next)
			{
			if (wildMatch(okVal->name, oneVal))
			    {
			    gotMatch = TRUE;
			    break;
			    }
			}
		    if (!gotMatch)
			reportError(fileName, stanza->startLineIx, 
			    "Unrecognized value '%s' for tag %s", oneVal, tag);
		    }

		struct hash *uniqHash = schema->uniqHash;
		if (uniqHash != NULL)
		    {
		    if (hashLookup(uniqHash, oneVal))
			reportError(fileName, stanza->startLineIx, 
			    "Non-unique value '%s' for tag %s", oneVal, tag);
		    else
			hashAdd(uniqHash, oneVal, NULL);
		    }
		}
	    }
	}
    if (stanza->children)
	{
	rCheck(stanza->children, fileName, wildList, hash, requiredList, scratch);
	}
    else
	{
	struct slRef *ref;
	for (ref = requiredList; ref != NULL; ref = ref->next)
	    {
	    struct tagSchema *schema = ref->val;
	    if (schema->objArrayPieces != NULL)  // It's an array, complex to handle, needs own routine
	        {
		checkInAllArrayItems(fileName, stanza, schema, scratch);
		}
	    else
		{
		if (tagFindVal(stanza, schema->name) == NULL)
		    reportError(fileName, stanza->startLineIx, 
			"Missing required '%s' tag", schema->name);
		}
	    }
	}
    }
dyStringFree(&csvScratch);
}
Exemplo n.º 9
0
static char *lookupField(void *record, char *key)
/* Lookup a field in a tagStanza. */
{
struct tagStanza *stanza = record;
return tagFindVal(stanza, key);
}
Exemplo n.º 10
0
void hcaStormToBundles(char *inTags, char *dataUrl, char *schemaFile, char *outDir)
/* hcaStormToBundles - Convert a HCA formatted tagStorm to a directory full of bundles.. */
{
/* Check that have full path name for dataFileDir */
if (sameString("urls", dataUrl))
   gUrls = TRUE;
else if (!stringIn("://", dataUrl))
    errAbort("data file directory must be a url.");

/* Load up schema and put it in hash */
struct tagSchema *schemaList = tagSchemaFromFile(schemaFile);
struct hash *schemaHash = tagSchemaHash(schemaList);

/* Load up tagStorm get leaf list */
struct tagStorm *storm = tagStormFromFile(inTags);
struct tagStanzaRef *refList = tagStormListLeaves(storm);
verbose(1, "Got %d leaf nodes in %s\n", slCount(refList), inTags);

/* Add in assay.sample_id as just a dupe of sample.id */
dupeValToNewTag(storm, storm->forest, "sample.id", "assay.sample_id");
dupeValToNewTag(storm, storm->forest, "project.id", "sample.project_id");
addMissingUuids(storm, "assay.seq.ena_experiment", "assay.id", FALSE);
addMissingUuids(storm, "assay.seq.sra_experiment", "assay.id", FALSE);


/* Do some figuring based on all fields available of what objects to make */
struct slName *allFields = tagStormFieldList(storm);
verbose(1, "Got %d fields in %s\n", slCount(allFields), inTags);
struct slName *topLevelList = ttjUniqToDotList(allFields, NULL, 0);
verbose(1, "Got %d top level objects\n", slCount(topLevelList));

/* Make list of objects */
struct slName *topEl;
struct ttjSubObj *objList = NULL;
for (topEl = topLevelList; topEl != NULL; topEl = topEl->next)
    {
    verbose(1, "  %s\n", topEl->name);
    struct ttjSubObj *obj = ttjMakeSubObj(allFields, topEl->name, topEl->name);
    slAddHead(&objList, obj);
    }

/* Loop through stanzas making bundles */
struct tagStanzaRef *ref;
int bundleIx = 0;
makeDirsOnPath(outDir);
for (ref = refList; ref != NULL; ref = ref->next)
    {
    /* Fetch stanza and comma-separated list of files. */
    struct tagStanza *stanza = ref->stanza;
    char *fileCsv = tagFindVal(stanza, "assay.seq.files");
    if (fileCsv == NULL)
        errAbort("Stanza without a files tag. Stanza starts line %d of %s",  
		stanza->startLineIx, inTags); 

    /* Make subdirectory for bundle */
    ++bundleIx;
    char bundleDir[PATH_LEN];
    safef(bundleDir, sizeof(bundleDir), "%s/bundle%d", outDir, bundleIx);
    makeDir(bundleDir);

    /* Make symbolic link of all files */
    char localUrl[PATH_LEN*2];
    if (gUrls)
        {
	struct slName *fileList = tagMustFindValList(stanza, "assay.seq.files");
	splitPath(fileList->name, localUrl, NULL, NULL);
	dataUrl = localUrl;
	slFreeList(&fileList);
	}

    makeBundleJson(storm, bundleDir, stanza, objList, dataUrl, schemaHash);
    }
verbose(1, "wrote json files into %s/bundle* dirs\n", outDir);
}
Exemplo n.º 11
0
void rWriteJson(FILE *f, struct tagStorm *storm, struct tagStanza *stanza, 
    struct ttjSubObj *obj, struct ttjSubObj *labeledObj, struct hash *schemaHash,
    struct dyString *scratch)
/* Write out json object recursively */
{
boolean isArray = allDigitNames(obj->children);
struct ttjSubObj *field; 
if (isArray)
    {
    fprintf(f, "["); 
    for (field = obj->children; field != NULL; field = field->next)
        {
	if (field != obj->children) // Only write comma separators after the first one
	   fprintf(f, ",");
	rWriteJson(f, storm, stanza, field, labeledObj, schemaHash, scratch);
	}
    fprintf(f, "]");
    }
else
    { 
    fprintf(f, "{"); 
    boolean firstOut = TRUE;

    /* Figure out if we need to attach a core object and do so.  The figuring bit is
     * frankly clunky. */
    char *objType = labeledObj->name;
    if (sameString(objType, "submitter") || sameString(objType, "contributors"))
         objType = "contact";
    else if (sameString(objType, "publications"))
         objType = "publication";
    else if (sameString(objType, "protocol"))  // protocol is actually just protocol_id
         objType = "string";
    else if (sameString(objType, "protocols")) // but protocols array is protocol
         objType = "protocol";
    else if (sameString(objType, "umi_barcode"))
         objType = "barcode";
    if (objNeedsCore(objType))
        printCore(f, objType, &firstOut);


    for (field = obj->children; field != NULL; field = field->next)
	{
	char *fieldName = field->name;
	if (field->children != NULL)
	     {
	     /* Look for funny characteristics_ as these are largely up to user. */
	     if (startsWith("characteristics_", field->name))
	         errAbort("No '.' allowed in field name after characteristics_ in %s", 
		    field->children->fullName);

	     /* If actually have data in this stanza write our field. */
	     if (prefixDotInStanza(field->fullName, stanza, scratch))
		 {
		 writeJsonTag(f, fieldName, &firstOut);
		 rWriteJson(f, storm, stanza, field, field, schemaHash, scratch);
		 }
	     }
	else
	    {
	    char *val = tagFindVal(stanza, field->fullName);
	    if (val != NULL)
		{
		boolean isNum = FALSE;
		char *schemaName = tagSchemaFigureArrayName(field->fullName, scratch);
		struct tagSchema *schema = hashFindVal(schemaHash, schemaName);
		if (schema != NULL)
		   isNum = (schema->type == '#' || schema->type == '%');
		if (sameString(fieldName, "files"))
		    {
		    writeJsonTag(f, "lanes", &firstOut);
		    writeLaneArray(f, stanza, val);
		    }
		else
		    {
		    boolean isArray = FALSE;
		    writeJsonTag(f, fieldName, &firstOut);
		    if (schema != NULL)
			isArray = schema->isArray;
		    struct slName *list = csvParse(val);
		    if (isArray)
			fputc('[', f);
		    else
			{
			if (list->next != NULL)  // more than one element
			   errAbort("Multiple vals for scalar tag %s in stanza starting line %d of %s",
				field->fullName, stanza->startLineIx, storm->fileName);
			}
		    struct slName *el;
		    for (el = list; el != NULL; el = el->next)
			{
			writeJsonVal(f, el->name, isNum);
			if (el->next != NULL)
			    fputc(',', f);
			}
		    if (isArray)
			fputc(']', f);
		    slFreeList(&list);
		    }
		}
	    }
	}
    fprintf(f, "}");
    }
}