Exemple #1
0
static char *gvfItemName(struct track *tg, void *item)
/* ISCA requested that we append abbreviated variant origin to the item names. */
{
struct bed8Attrs *gvf = item;
struct dyString *name = dyStringNew(0);
int ix = stringArrayIx("var_origin", gvf->attrTags, gvf->attrCount);
if (ix >= 0)
    {
    char *origin = gvf->attrVals[ix];
    if (sameWord(origin, "Not tested") || sameWord(origin, "Not reported") ||
	sameWord(origin, "Tested - inconclusive") || sameWord(origin, "Not Provided"))
	dyStringPrintf(name, "%s_unk", gvf->name);
    else if (sameWord(origin, "De novo"))
	dyStringPrintf(name, "%s_dnovo", gvf->name);
    else if (sameWord(origin, "Maternal"))
	dyStringPrintf(name, "%s_mat", gvf->name);
    else if (sameWord(origin, "Paternal"))
	dyStringPrintf(name, "%s_pat", gvf->name);
    else if (sameWord(origin, "Biparental"))
	dyStringPrintf(name, "%s_bip", gvf->name);
    else if (sameWord(origin, "Uniparental"))
	dyStringPrintf(name, "%s_unip", gvf->name);
    else if (sameWord(origin, "Germline"))
	dyStringPrintf(name, "%s_germ", gvf->name);
    else if (sameWord(origin, "Somatic"))
	dyStringPrintf(name, "%s_som", gvf->name);
    else
	dyStringPrintf(name, "%s_%s", gvf->name, origin);
    }
else
    dyStringPrintf(name, "%s_unk", gvf->name);
return dyStringCannibalize(&name);
}
void edwCorrectFileTags(char *tabFileName)
/* edwCorrectFileTags - Use this to correct tags in the edwFile table and corresponding fields 
 * in the edwValidFile table without forcing a validateManifest rerun or a reupload.. */
{
struct sqlConnection *conn = edwConnectReadWrite();
char *requiredFields[] = {"accession",};
char *forbiddenFields[] = {"md5_sum", "size", "valid_key", "file_name"};
struct fieldedTable *table = fieldedTableFromTabFile(tabFileName, tabFileName,
	requiredFields, ArraySize(requiredFields));
checkForbiddenFields(table, forbiddenFields, ArraySize(forbiddenFields));
int accessionIx = stringArrayIx("accession", table->fields, table->fieldCount);

struct fieldedRow *fr;
for (fr = table->rowList; fr != NULL; fr = fr->next)
    {
    char *acc = fr->row[accessionIx];
    long long id = edwNeedFileIdForLicensePlate(conn, acc);
    struct edwFile *ef = edwFileFromId(conn, id);
    int i;
    char *tags = ef->tags;
    for (i=0; i<table->fieldCount; ++i)
        {
	if (i != accessionIx)
	    tags = cgiStringNewValForVar(tags, table->fields[i], fr->row[i]);
	}
    edwFileResetTags(conn, ef, tags);
    edwFileFree(&ef);
    }
}
Exemple #3
0
static char *getAttributeVal(const struct bed8Attrs *gvf, char *tag)
/* Return value corresponding to tag or NULL.  Don't free result. */
{
int ix = stringArrayIx(tag, gvf->attrTags, gvf->attrCount);
if (ix >= 0)
    return(gvf->attrVals[ix]);
return NULL;
}
INLINE boolean nameIsTdbField(char *name)
/* Return TRUE if name is a tdb->{field}, e.g. "track" or "shortLabel" etc. */
{
static char *tdbFieldNames[] =
    { "track", "table", "shortLabel", "longLabel", "type", "priority", "grp", "parent",
      "subtracks", "visibility" };
return (stringArrayIx(name, tdbFieldNames, ArraySize(tdbFieldNames)) >= 0);
}
static char *findType(struct hash *cvHash,char **requested,int requestCount,
                      char **queryBy, char **org,boolean silent)
/* returns the type that was requested or else the type associated with the term requested */
{
struct hashCookie hc = hashFirst(cvHash);
struct hashEl *hEl;
struct hash *ra;
char *type = typeOpt;

if (requested != NULL) // if no type, find it from requested terms.  Will validate terms match type
    {              // NOTE: Enter here even if there is a type, to confirm the type
    while ((hEl = hashNext(&hc)) != NULL)  // FIXME: This should be using mdbCv APIs to get hashes.
        {                                  // One per "request[]"
        ra = (struct hash *)hEl->val;
        if (sameWord(hashMustFindVal(ra, CV_TYPE),CV_TOT)) // TOT = typeOfTerm
            continue;
        char *val = hashFindVal(ra, *queryBy);
        if (val != NULL)
            {
            int ix = stringArrayIx(val,requested,requestCount);
            if (ix != -1) // found
                {
                char *thisType = hashMustFindVal(ra, CV_TYPE);
                char *thisOrg = hashFindVal(ra, ORGANISM);
                if (type == NULL)
                    {
                    if (thisOrg != NULL)
                        {
                        *org = strLower(cloneString(thisOrg));
                        }
                    type = thisType;
                    }
                else if (differentWord(type,thisType))
                    {
                    if (sameWord(CV_TERM_CONTROL,type))
                        type = thisType;
                    else if (differentWord(CV_TERM_CONTROL,thisType))
                        errAbort("Error: Requested %s of type '%s'.  But '%s' has type '%s'\n",
                                 *queryBy,type,requested[ix],thisType);
                    }
                }
            }
        }
    }
if (type == NULL && sameWord(*queryBy,CV_TERM))    // Special case of term becoming target
    {
    char *queryByTarget = CV_TARGET;
    type = findType(cvHash,requested,requestCount,&queryByTarget,org,TRUE); // silent here
    if (type != NULL)
        *queryBy = queryByTarget;
    }
if (type == NULL && !silent)    // Still not type? abort
    errAbort("Error: Required %s=%s ['%s', '%s', '%s', '%s' or '%s'] argument not found\n",
                    *queryBy,(requested != NULL) ? *requested : "?",
                    CV_TYPE, CV_TERM, CV_TAG, CV_TARGET, CV_LABEL);

return normalizeType(type);
}
boolean objNeedsCore(char *module)
/* Return TRUE if module of given name needs core defined */
{
static char *needCore[] = {"project", "sample", "assay", "barcode", "cell_line", "contact",
    "death", "donor", "enrichment", "imaging", 
    "preservation", "protocol", "publication", "rna", "seq", "single_cell", 
    "well"};
return (stringArrayIx(module, needCore, ArraySize(needCore)) >= 0);
}
Exemple #7
0
char *mapType(char *chromName, boolean isOrdered)
/* Return map type for info file. */
{
if (stringArrayIx(chromName, wellMapped, ArraySize(wellMapped)) >= 0)
    return "PLACED";
else if (isOrdered)
    return "ORDERED";
else
    return "RANDOM";
}
Exemple #8
0
int romanToArabicChrom(char *roman, struct lineFile *lf)
/* Convert chromosome from roman numeral to a regular number. */
{
static char *chromNames[16] = {"I", "II", "III", "IV", "V", "VI", 
	"VII", "VIII", "IX", "X", "XI", "XII", "XIII", "XIV", "XV", "XVI"};
int chromIx = stringArrayIx(roman, chromNames, ArraySize(chromNames));
if (chromIx < 0)
    errAbort("Unrecognized chromosome line %d of %s", lf->lineIx, lf->fileName);
return chromIx;
}
void checkForbiddenFields(struct fieldedTable *table, char **forbiddenFields, int forbiddenCount)
/* Make sure table doesn't include forbidden fields. */
{
int i;
for (i=0; i<forbiddenCount; ++i)
    {
    char *forbidden = forbiddenFields[i];
    if (stringArrayIx(forbidden, table->fields, table->fieldCount) >= 0)
        errAbort("Forbidden field %s in %s.", forbidden, table->name);
    }
}
Exemple #10
0
const struct vcfGenotype *vcfRecordFindGenotype(struct vcfRecord *record, char *sampleId)
/* Find the genotype and associated info for the individual, or return NULL.
 * This calls vcfParseGenotypes if it has not already been called. */
{
struct vcfFile *vcff = record->file;
if (sampleId == NULL || vcff->genotypeCount == 0)
    return NULL;
vcfParseGenotypes(record);
int ix = stringArrayIx(sampleId, vcff->genotypeIds, vcff->genotypeCount);
if (ix >= 0)
    return &(record->genotypes[ix]);
return NULL;
}
Exemple #11
0
static void genePredOptions(struct trackDb *track, char *type,
	struct sqlConnection *conn)
/* Put up sequence type options for gene prediction tracks. */
{
char *predType = cartUsualString(cart, hgtaGeneSeqType, genePredMenu[0]);
char *dupType = cloneString(type);
char *typeWords[3];
int typeWordCount, typeIx;

/* Type field has 1-3 words which are in order:
 *     genePred pepTable mrnaTable */
typeWordCount = chopLine(dupType, typeWords);
/* TypeIx will be 0 (genomic) 1 (protein) 2(mrna). */
typeIx = stringArrayIx(predType, genePredMenu, typeWordCount);
if (typeIx < 0)
    predType = genePredMenu[0];
htmlOpen("Select sequence type for %s", track->shortLabel);
hPrintf("<FORM ACTION=\"%s\" METHOD=GET>\n", getScriptName());
cartSaveSession(cart);

if (isRefGeneTrack(track->table))
    {
    /* RefGene covers all 3 types, but in it's own way. */
    for (typeIx = 0; typeIx < 3; ++typeIx)
	{
	genePredTypeButton(genePredMenu[typeIx], predType);
	hPrintf(" %s<BR>\n", genePredMenu[typeIx]);
	}
    }
else
    {
    /* Otherwise we always have genomic, and we have
     * peptide/mrna only if there are corresponding table
     * in the type field. */
    for (typeIx = 0; typeIx < typeWordCount; ++typeIx)
	{
	if (typeIx == 0 || sqlTableExists(conn, typeWords[typeIx]))
	    {
	    genePredTypeButton(genePredMenu[typeIx], predType);
	    hPrintf(" %s<BR>\n", genePredMenu[typeIx]);
	    }
	}
    }
cgiMakeButton(hgtaDoGenePredSequence, "submit");
hPrintf(" ");
cgiMakeButton(hgtaDoMainPage, "cancel");
hPrintf("</FORM>\n");
cgiDown(0.9);
htmlClose();
freez(&dupType);
}
Exemple #12
0
struct gfOutput *gfOutputAny(char *format,
                             int goodPpt, boolean qIsProt, boolean tIsProt,
                             boolean noHead, char *databaseName,
                             int databaseSeqCount, double databaseLetters,
                             double minIdentity,
                             FILE *f)
/* Initialize output in a variety of formats in file or memory.
 * Parameters:
 *    format - either 'psl', 'pslx', 'sim4', 'blast', 'wublast', 'axt', 'xml'
 *    goodPpt - minimum identity of alignments to output in parts per thousand
 *    qIsProt - true if query side is a protein.
 *    tIsProt - true if target (database) side is a protein.
 *    noHead - if true suppress header in psl/pslx output.
 *    databaseName - name of database.  Only used for blast output
 *    databaseSeq - number of sequences in database - only for blast
 *    databaseLetters - number of bases/aas in database - only blast
 *    minIdentity - minimum identity - only blast
 *    FILE *f - file.
 */
{
    struct gfOutput *out = NULL;
    static char *blastTypes[] = {"blast", "wublast", "blast8", "blast9", "xml"};

    if (format == NULL)
        format = "psl";
    if (sameWord(format, "psl"))
        out = gfOutputPsl(goodPpt, qIsProt, tIsProt, f, FALSE, noHead);
    else if (sameWord(format, "pslx"))
        out = gfOutputPsl(goodPpt, qIsProt, tIsProt, f, TRUE, noHead);
    else if (sameWord(format, "sim4"))
        out = gfOutputSim4(goodPpt, qIsProt, tIsProt, databaseName);
    else if (stringArrayIx(format, blastTypes, ArraySize(blastTypes)) >= 0)
        out = gfOutputBlast(goodPpt, qIsProt, tIsProt,
                            databaseName, databaseSeqCount, databaseLetters, format,
                            minIdentity, f);
    else if (sameWord(format, "axt"))
        out = gfOutputAxt(goodPpt, qIsProt, tIsProt, f);
    else if (sameWord(format, "maf"))
        out = gfOutputMaf(goodPpt, qIsProt, tIsProt, f);
    else
        errAbort("Unrecognized output format '%s'", format);
    return out;
}
Exemple #13
0
boolean isSupportedFormat(char *format)
/* Return TRUE if this is one of our supported formats */
{
/* First deal with non bigBed */
static char *otherSupportedFormats[] = {"unknown", "fastq", "bam", "bed", "gtf", 
    "bigWig", "bigBed", 
    "bedLogR", "bedRrbs", "bedMethyl", "broadPeak", "narrowPeak", 
    "bed_bedLogR", "bed_bedRrbs", "bed_bedMethyl", "bed_broadPeak", "bed_narrowPeak",
    "bedRnaElements", "openChromCombinedPeaks", "peptideMapping", "shortFrags", 
    "rcc", "idat", "fasta", "customTrack",
    };
static int otherSupportedFormatsCount = ArraySize(otherSupportedFormats);
if (stringArrayIx(format, otherSupportedFormats, otherSupportedFormatsCount) >= 0)
    return TRUE;

/* If starts with bed_ then skip over prefix.  It will be caught by bigBed */
if (startsWith("bed_", format))
    format += 4;
return edwIsSupportedBigBedFormat(format);
}
Exemple #14
0
static Color gvfColor(struct track *tg, void *item, struct hvGfx *hvg)
/* Color item by var_type attribute, according to Deanna Church's document
 * SvRepresentation2.doc attached to redmine #34. */
{
struct bed8Attrs *gvf = item;
Color dbVarUnknown = hvGfxFindColorIx(hvg, 0xb2, 0xb2, 0xb2);
int ix = stringArrayIx("var_type", gvf->attrTags, gvf->attrCount);
if (ix < 0)
    return dbVarUnknown;
char *varType = gvf->attrVals[ix];
if (sameString(varType, "CNV") || sameString(varType, "copy_number_variation"))
    return MG_BLACK;
else if (strstrNoCase(varType, "Gain"))
    return hvGfxFindColorIx(hvg, 0x00, 0x00, 0xff);
else if (strstrNoCase(varType, "Loss"))
    return hvGfxFindColorIx(hvg, 0xff, 0x00, 0x00);
else if (strstrNoCase(varType, "Insertion"))
    return hvGfxFindColorIx(hvg, 0xff, 0xcc, 0x00);
else if (strstrNoCase(varType, "Complex"))
    return hvGfxFindColorIx(hvg, 0x99, 0xcc, 0xff);
else if (strstrNoCase(varType, "Unknown"))
    return dbVarUnknown;
else if (strstrNoCase(varType, "Other"))
    return hvGfxFindColorIx(hvg, 0xcc, 0x99, 0xff);
else if (strstrNoCase(varType, "Inversion"))
    return hvGfxFindColorIx(hvg, 0x99, 0x33, 0xff); // Needs pattern
else if (strstrNoCase(varType, "LOH"))
    return hvGfxFindColorIx(hvg, 0x00, 0x00, 0xff); // Needs pattern
else if (strstrNoCase(varType, "Everted"))
    return hvGfxFindColorIx(hvg, 0x66, 0x66, 0x66); // Needs pattern
else if (strstrNoCase(varType, "Transchr"))
    return hvGfxFindColorIx(hvg, 0xb2, 0xb2, 0xb2); // Plus black vert. bar at broken end
else if (strstrNoCase(varType, "UPD"))
    return hvGfxFindColorIx(hvg, 0x00, 0xff, 0xff); // Needs pattern
return dbVarUnknown;
}
void doMiddle()
{
struct hash *cvHash = raReadAll((char *)cvFile(), CV_TERM);
struct hashCookie hc = hashFirst(cvHash);
struct hashEl *hEl;
struct slList *termList = NULL;
struct hash *ra;
int totalPrinted = 0;
boolean excludeDeprecated = (cgiOptionalString("deprecated") == NULL);

// Prepare an array of selected terms (if any)
int requestCount = 0;
char **requested = NULL;
char *requestVal = termOpt;
char *queryBy = CV_TERM;
if (tagOpt)
    {
    requestVal = tagOpt;
    queryBy = CV_TAG;
    }
else if (targetOpt)
    {
    requestVal = targetOpt;
    queryBy = CV_TERM;  // request target is special: lookup term, convert to target, display target
    }
else if (labelOpt)
    {
    requestVal = labelOpt;
    queryBy = CV_LABEL;
    }
if (requestVal)
    {
    (void)stripChar(requestVal,'\"');
    requestCount = chopCommas(requestVal,NULL);
    requested = needMem(requestCount * sizeof(char *));
    chopByChar(requestVal,',',requested,requestCount);
    }

char *org = NULL;
// if the org is specified in the type (eg. cell line)
// then use that for the org, otherwise use the command line option,
// otherwise use human.
char *type = findType(cvHash,requested,requestCount,&queryBy, &org, FALSE);
if (org == NULL)
    org = organismOptLower;
if (org == NULL)
    org = ORG_HUMAN;

// Special logic for requesting antibody by target
if (targetOpt && requestCount > 0 && sameWord(queryBy,CV_TERM) && sameWord(type,CV_TERM_ANTIBODY))
    {
    // Several antibodies may have same target.
    // requested target={antibody} and found antibody
    // Must now convert each of the requested terms to its target before displaying all targets
    char **targets = convertAntibodiesToTargets(cvHash,requested,requestCount);
    if (targets != NULL)
        {
        freeMem(requested);
        requested = targets;
        queryBy = CV_TARGET;
        }
    }
//warn("Query by: %s = '%s' type:%s",queryBy,requestVal?requestVal:"all",type);

// Get just the terms that match type and requested, then sort them
if (differentWord(type,CV_TOT) || typeOpt != NULL ) // If type resolves to typeOfTerm and
    {                                               // typeOfTerm was not requested,
    while ((hEl = hashNext(&hc)) != NULL)           // then just show definition
        {
        ra = (struct hash *)hEl->val;
        char *thisType = (char *)cvTermNormalized(hashMustFindVal(ra,CV_TYPE));
        if (differentWord(thisType,type) && (requested == NULL
        ||  differentWord(thisType,CV_TERM_CONTROL)))
            continue;
        // Skip all rows that do not match queryBy param if specified
        if (requested)
            {
            char *val = hashFindVal(ra, queryBy);
            if (val == NULL)
                {
                // Special case for input that has no target
                if (sameString(queryBy, CV_TARGET))
                    val = hashMustFindVal(ra, CV_TERM);
                else
                    continue;
                }
            if (-1 == stringArrayIx(val,requested,requestCount))
                continue;
            }
        else if (excludeDeprecated)
            {
            if (hashFindVal(ra, "deprecated") != NULL)
                continue;
            }
        slAddTail(&termList, ra);
        }
    }
slSort(&termList, termCmp);

boolean described = doTypeDefinition(type,FALSE,(slCount(termList) == 0));
boolean sortable = (slCount(termList) > 5);
if (sortable)
    {
    webIncludeResourceFile("HGStyle.css");
    jsIncludeFile("jquery.js",NULL);
    jsIncludeFile("utils.js",NULL);
    printf("<TABLE class='sortable' border=1 CELLSPACING=0 style='border: 2px outset #006600; "
           "background-color:%s;'>\n",COLOR_BG_DEFAULT);
    }
else
    printf("<TABLE BORDER=1 BGCOLOR=%s CELLSPACING=0 CELLPADDING=2>\n",COLOR_BG_DEFAULT);
if (slCount(termList) > 0)
    {
    doTypeHeader(type, org,sortable);

    // Print out the terms
    while ((ra = slPopHead(&termList)) != NULL)
        {
        if (doTypeRow( ra, org ))
            totalPrinted++;
        }
    }
puts("</TBODY></TABLE><BR>");
if (sortable)
    jsInline("{$(document).ready(function() "
         "{sortTable.initialize($('table.sortable')[0],true,true);});}\n");
if (totalPrinted == 0)
    {
    if (!described)
        warn("Error: Unrecognised type (%s)\n", type);
    }
else if (totalPrinted > 1)
    printf("Total = %d\n", totalPrinted);
}
Exemple #16
0
boolean cdwCheckEnrichedIn(char *enriched)
/* return TRUE if value is allowed */
{
return (stringArrayIx(enriched, edwSupportedEnrichedIn, edwSupportedEnrichedInCount) >= 0);
}
Exemple #17
0
void encode2Meta(char *database, char *manifestIn, char *outMetaRa)
/* encode2Meta - Create meta files.. */
{
int dbIx = stringArrayIx(database, metaDbs, ArraySize(metaDbs));
if (dbIx < 0)
    errAbort("Unrecognized database %s", database);

/* Create a three level meta.ra format file based on hgFixed.encodeExp
 * and database.metaDb tables. The levels are composite, experiment, file */
struct metaNode *metaTree = metaTreeNew("encode2");

/* Load up the manifest. */
struct encode2Manifest *mi, *miList = encode2ManifestShortLoadAll(manifestIn);
struct hash *miHash = hashNew(18);
for (mi = miList; mi != NULL; mi = mi->next)
    hashAdd(miHash, mi->fileName, mi);
verbose(1, "%d files in %s\n", miHash->elCount, manifestIn);

/* Load up encodeExp info. */
struct sqlConnection *expConn = sqlConnect(expDb);
struct encodeExp *expList = encodeExpLoadByQuery(expConn, "NOSQLINJ select * from encodeExp");
sqlDisconnect(&expConn);
verbose(1, "%d experiments in encodeExp\n", slCount(expList));

struct hash *compositeHash = hashNew(0);

/* Go through each  organism database in turn. */
int i;
for (i=0; i<ArraySize(metaDbs); ++i)
    {
    char *db = metaDbs[i];
    if (!sameString(database, db))
        continue;

    verbose(1, "exploring %s\n", db);
    struct mdbObj *mdb, *mdbList = getMdbList(db);
    verbose(1, "%d meta objects in %s\n", slCount(mdbList), db);

    /* Get info on all composites. */
    for (mdb = mdbList; mdb != NULL; mdb = mdb->next)
        {
	char *objType = mdbVarLookup(mdb->vars, "objType");
	if (objType != NULL && sameString(objType, "composite"))
	    {
	    char compositeName[256];
	    safef(compositeName, sizeof(compositeName), "%s", mdb->obj);
	    struct metaNode *compositeNode = metaNodeNew(compositeName);
	    slAddHead(&metaTree->children, compositeNode);
	    compositeNode->parent = metaTree;
	    struct mdbVar *v;
	    for (v=mdb->vars; v != NULL; v = v->next)
	        {
		metaNodeAddVar(compositeNode, v->var, v->val);
		}
	    metaNodeAddVar(compositeNode, "assembly", db);
	    hashAdd(compositeHash, mdb->obj, compositeNode);
	    }
	}

    /* Make up one more for experiments with no composite. */
    char *noCompositeName = "wgEncodeZz";
    struct metaNode *noCompositeNode = metaNodeNew(noCompositeName);
    slAddHead(&metaTree->children, noCompositeNode);
    noCompositeNode->parent = metaTree;
    hashAdd(compositeHash, noCompositeName, noCompositeNode);


    /* Now go through objects trying to tie experiments to composites. */ 
    struct hash *expToComposite = hashNew(16);
    for (mdb = mdbList; mdb != NULL; mdb = mdb->next)
        {
	char *composite = mdbVarLookup(mdb->vars, "composite");
	if (originalData(composite))
	    {
	    char *dccAccession = mdbVarLookup(mdb->vars, "dccAccession");
	    if (dccAccession != NULL)
	        {
		char *oldComposite = hashFindVal(expToComposite, dccAccession);
		if (oldComposite != NULL)
		    {
		    if (!sameString(oldComposite, composite))
		        verbose(2, "%s maps to %s ignoring mapping to %s", dccAccession, oldComposite, composite);
		    }
		else
		    {
		    hashAdd(expToComposite, dccAccession, composite);
		    }
		}
	    }
	}
    /* Now get info on all experiments in this organism. */
    struct hash *expHash = hashNew(0);
    struct encodeExp *exp;
    for (exp = expList; exp != NULL; exp = exp->next)
        {
	if (sameString(exp->organism, organisms[i]))
	    {
	    if (exp->accession != NULL)
		{
		char *composite = hashFindVal(expToComposite,  exp->accession);
		struct metaNode *compositeNode;
		if (composite != NULL)
		    {
		    compositeNode = hashMustFindVal(compositeHash, composite);
		    }
		else
		    {
		    compositeNode = noCompositeNode;
		    }
		struct metaNode *expNode = wrapNodeAroundExp(exp);
		hashAdd(expHash, expNode->name, expNode);
		slAddHead(&compositeNode->children, expNode);
		expNode->parent = compositeNode;
		}
	    }
	}

    for (mdb = mdbList; mdb != NULL; mdb = mdb->next)
	{
	char *fileName = NULL, *dccAccession = NULL;
	char *objType = mdbVarLookup(mdb->vars, "objType");
	if (objType != NULL && sameString(objType, "composite"))
	    continue;
	dccAccession = mdbVarLookup(mdb->vars, "dccAccession");
	if (dccAccession == NULL)
	    continue;
	char *composite = hashFindVal(expToComposite,  dccAccession);
	if (composite == NULL)
	    errAbort("Can't find composite for %s", mdb->obj);
	struct mdbVar *v;
	for (v = mdb->vars; v != NULL; v = v->next)
	    {
	    char *var = v->var, *val = v->val;
	    if (sameString("fileName", var))
		{
		fileName = val;
		char path[PATH_LEN];
		char *comma = strchr(fileName, ',');
		if (comma != NULL)
		     *comma = 0;	/* Cut off comma separated list. */
		safef(path, sizeof(path), "%s/%s/%s", db, 
		    composite, fileName);  /* Add database path */
		fileName = val = v->val = cloneString(path);
		}
	    }
	if (fileName != NULL)
	    {
	    if (hashLookup(miHash, fileName))
		{
		struct metaNode *expNode = hashFindVal(expHash, dccAccession);
		if (expNode != NULL)
		    {
		    struct metaNode *fileNode = metaNodeNew(mdb->obj);
		    slAddHead(&expNode->children, fileNode);
		    fileNode->parent = expNode;
		    struct mdbVar *v;
		    for (v=mdb->vars; v != NULL; v = v->next)
			{
			metaNodeAddVar(fileNode, v->var, v->val);
			}
		    }
		}
	    }
	}
#ifdef SOON
#endif /* SOON */
    }

struct hash *suppress = makeSuppress();
struct hash *closeEnoughTags = makeCloseEnoughTags();

metaTreeHoist(metaTree, closeEnoughTags);
metaTreeSortChildrenSortTags(metaTree);
FILE *f = mustOpen(outMetaRa, "w");
struct metaNode *node;
for (node = metaTree->children; node != NULL; node = node->next)
    metaTreeWrite(0, 0, BIGNUM, FALSE, NULL, node, suppress, f);
carefulClose(&f);

/* Write warning about tags in highest parent. */
struct mdbVar *v;
for (v = metaTree->vars; v != NULL; v = v->next)
    verbose(1, "Omitting universal %s %s\n", v->var, v->val);
}
Exemple #18
0
boolean isFinChrom(char *chrom)
/* Return TRUE if is a finished chromosome. */
{
return (stringArrayIx(chrom, finChroms, ArraySize(finChroms)) >= 0);
}
Exemple #19
0
void cdwTextForIndex(char *outFile)
/* cdwTextForIndex - Make text file used for building ixIxx indexes. */
{
struct sqlConnection *conn = cdwConnect();
struct hash *textHash = hashTextFields(conn, "cdwFileTags");

/* Start up query of all fields of fileTags table and get array of all fields from result */
char query[256];
sqlSafef(query, sizeof(query), "select * from cdwFileTags");
struct sqlResult *sr = sqlGetResult(conn, query);
char **allFields = NULL;
int fieldCount = sqlResultFieldArray(sr, &allFields);

/* Accession is special, make sure it's there */
int idIx = stringArrayIx("file_id", allFields, fieldCount);
if (idIx < 0)
    errAbort("Can't find file_id in cdwFileTags");

/* Make up an array that tells us the order of fields we'll output, starting with priority fields */
/* Get all priority fields first */
int order[fieldCount];
int fieldsUsed = 0;
int i;
struct hash *usedHash = hashNew(0);
for (i=0; i<ArraySize(priorityFields); ++i)
    {
    char *field = priorityFields[i];
    int pos = stringArrayIx(field, allFields, fieldCount);
    if (pos >= 0)
	{
	order[fieldsUsed++] = pos;
	hashAdd(usedHash, field, NULL);
	}
    }

/* Get other fields now */
for (i=0; i<fieldCount; ++i)
    {
    char *field = allFields[i];
    if (!hashLookup(usedHash, field) && hashLookup(textHash, field))
	order[fieldsUsed++] = i;
    }

/* Now loop through sql result and write output */
FILE *f = mustOpen(outFile, "w");
char **row;
while ((row = sqlNextRow(sr)) != NULL)
    {
    char *id = row[idIx];
    if (id != NULL)
	{
	fprintf(f, "%s", id);
	for (i=0; i<fieldsUsed; ++i)
	    {
	    char *val = row[order[i]];
	    if (val != NULL)
		fprintf(f, " %s", row[order[i]]);
	    }
	fprintf(f, "\n");
	}
    }

carefulClose(&f);
}
Exemple #20
0
void edwParseSubmitFile(struct sqlConnection *conn, char *submitLocalPath, char *submitUrl, 
    struct submitFileRow **retSubmitList)
/* Load and parse up this file as fielded table, make sure all required fields are there,
 * and calculate indexes of required fields.   This produces an edwFile list, but with
 * still quite a few fields missing - just what can be filled in from submit filled in. 
 * The submitUrl is just used for error reporting.  If it's local, just make it the
 * same as submitLocalPath. */
{
char *requiredFields[] = {"file_name", "format", "output_type", "experiment", "replicate", 
    "enriched_in", "md5_sum", "size",  "modified", "valid_key"};
struct fieldedTable *table = fieldedTableFromTabFile(submitLocalPath, submitUrl,
	requiredFields, ArraySize(requiredFields));

/* Get offsets of all required fields */
int fileIx = stringArrayIx("file_name", table->fields, table->fieldCount);
int formatIx = stringArrayIx("format", table->fields, table->fieldCount);
int outputIx = stringArrayIx("output_type", table->fields, table->fieldCount);
int experimentIx = stringArrayIx("experiment", table->fields, table->fieldCount);
int replicateIx = stringArrayIx("replicate", table->fields, table->fieldCount);
int enrichedIx = stringArrayIx("enriched_in", table->fields, table->fieldCount);
int md5Ix = stringArrayIx("md5_sum", table->fields, table->fieldCount);
int sizeIx = stringArrayIx("size", table->fields, table->fieldCount);
int modifiedIx = stringArrayIx("modified", table->fields, table->fieldCount);
int validIx = stringArrayIx("valid_key", table->fields, table->fieldCount);

/* See if we're doing replacement and check have all columns needed if so. */
int replacesIx = stringArrayIx(replacesTag, table->fields, table->fieldCount);
int replaceReasonIx = stringArrayIx(replaceReasonTag, table->fields, table->fieldCount);
boolean doReplace = (replacesIx != -1);
if (doReplace)
    if (replaceReasonIx == -1)
        errAbort("Error: got \"%s\" column without \"%s\" column in %s.", 
	    replacesTag, replaceReasonTag, submitUrl);

/* Loop through and make sure all field values are ok */
struct fieldedRow *fr;
for (fr = table->rowList; fr != NULL; fr = fr->next)
    {
    char **row = fr->row;
    char *fileName = row[fileIx];
    allGoodFileNameChars(fileName);
    char *format = row[formatIx];
    if (!isSupportedFormat(format))
	errAbort("Format %s is not supported", format);
    allGoodSymbolChars(row[outputIx]);
    char *experiment = row[experimentIx];
    if (!isExperimentId(experiment))
        errAbort("%s in experiment field does not seem to be an encode experiment", experiment);
    char *replicate = row[replicateIx];
    if (differentString(replicate, "pooled") && differentString(replicate, "n/a") )
	if (!isAllNum(replicate))
	    errAbort("%s is not a good value for the replicate column", replicate);
    char *enriched = row[enrichedIx];
    if (!encode3CheckEnrichedIn(enriched))
        errAbort("Enriched_in %s is not supported", enriched);
    char *md5 = row[md5Ix];
    if (strlen(md5) != 32 || !isAllHexLower(md5))
        errAbort("md5 '%s' is not in all lower case 32 character hexadecimal format.", md5);
    char *size = row[sizeIx];
    if (!isAllNum(size))
        errAbort("Invalid size '%s'", size);
    char *modified = row[modifiedIx];
    if (!isAllNum(modified))
        errAbort("Invalid modification time '%s'", modified);
    char *validIn = row[validIx];
    char *realValid = encode3CalcValidationKey(md5, sqlLongLong(size));
    if (!sameString(validIn, realValid))
        errAbort("The valid_key %s for %s doesn't fit", validIn, fileName);
    freez(&realValid);

    if (doReplace)
	{
	char *replaces = row[replacesIx];
	char *reason = row[replaceReasonIx];
	if (!isEmptyOrNa(replaces))
	    {
	    char *prefix = edwLicensePlateHead(conn);
	    if (!startsWith(prefix, replaces))
		errAbort("%s in replaces column is not an ENCODE file accession", replaces);
	    if (isEmptyOrNa(reason))
		errAbort("Replacing %s without a reason\n", replaces);
	    }
	}
    }

*retSubmitList = submitFileRowFromFieldedTable(conn, table, 
    fileIx, md5Ix, sizeIx, modifiedIx, replacesIx, replaceReasonIx);
}
Exemple #21
0
void ccFirst(char *source, char *dest, char *hostList, char *lockDir)
/* Do first instance of this program.  Copy file to first host,
 * make up lock directory, and then poll lock directory to see
 * if we're done. */
{
char *firstHost, *lastHost;
char **hosts;
char *hostBuf;
int hostCount;
int firstLock;
int childPid;
char *thisHost = getenv("HOST");
char ok;
long startTime = clock1000();

if (thisHost == NULL)
    errAbort("HOST environment variable undefined\n");
readAllWords(hostList, &hosts, &hostCount, &hostBuf);
if (hostCount <= 0)
    errAbort("%s is empty.", hostList);
if (stringArrayIx(thisHost, hosts, hostCount) < 0)
    errAbort("Current host (%s) not in host list\n", thisHost);
if (mkdir(lockDir, 0777) < 0)
    errAbort("Couldn't make lock directory %s\n", lockDir);
firstHost = thisHost;
lastHost = hosts[hostCount-1];
if (sameString(lastHost, thisHost) && hostCount > 1)
    lastHost = hosts[hostCount-2];
firstLock = makeLock(firstHost, lockDir);
if (firstLock < 0)
    errAbort("Couldn't make lock file %s/%s\n", lockDir, firstHost);
if (cpFile(source, dest) != 0)
    {
    warn("Couldn't copy %s to %s:%d\n", source, firstHost, dest);
    close(firstLock);
    cleanupLocks(lockDir);
    errAbort("Cleaned up locks in %s, aborting copy.", lockDir);
    }
ok = 1;
write(firstLock, &ok, 1);
close(firstLock);

childPid = fork();
if (childPid == 0)
    {
    /* Have child process keep copying. */
    ccMore(dest, hostList, 0, lockDir);
    }
else
    {
    int sleepIx = 0;
    int sleepTime = 10;
    int lastStart = 0, lastErr = 0, lastEnd = 0;

    /* Have parent process wait until last file done. */
    for (sleepIx = 0; ; ++sleepIx)
	{
	int lockFd;
	int i;
	int startCount = 0;
	int endCount = 0;
	int errCount = 0;
	int toGo = 0;
	int procCount = 0;
	int lastProcCount = 0;
	int finCount;
	boolean reportErr;

	for (i=0; i<hostCount; ++i)
	    {
	    char *ln = lockName(lockDir, hosts[i]);
	    lockFd = open(ln, O_RDONLY);
	    if (lockFd < 0)
		++toGo;
	    else
		{
		char ok;
		if (read(lockFd, &ok, 1) < 1)
		    ++startCount;
		else
		    {
		    if (ok)
			++endCount;
		    else
			++errCount;
		    }
		close(lockFd);
		}
	    }
	finCount = endCount + errCount;
	// if (lastStart != startCount || lastEnd != endCount || lastErr != errCount)
	    {
	    printf(" copies in progress %d finished %d errors %d total %d\n",
		startCount, endCount, errCount, hostCount);
	    lastStart = startCount;
	    lastEnd = endCount;
	    lastErr = errCount;
	    }
	if (finCount >= hostCount)
	    {
	    if (errCount > 0)
		{
		fprintf(stderr, "Errors copying to hosts:");
		for (i=0; i<hostCount; ++i)
		    {
		    char *ln = lockName(lockDir, hosts[i]);
		    lockFd = open(ln, O_RDONLY);
		    if (lockFd < 0)
			{
			fprintf(stderr, " ??%s??", hosts[i]);
			}
		    else
			{
			char ok;
			if (read(lockFd, &ok, 1) < 1)
			    {
			    fprintf(stderr, " ?%s?", hosts[i]);
			    ++startCount;
			    }
			else
			    {
			    if (!ok)
				{
				fprintf(stderr, " %s", hosts[i]);
				++errCount;
				}
			    }
			close(lockFd);
			}
		    }
		fprintf(stderr, "\n");
		}
	    cleanupLocks(lockDir);
	    break;
	    }
	sleep(sleepTime);
	}
    }
}
Exemple #22
0
static boolean cmpReal(char *pat, char *cmpOp)
/* Return TRUE if we have a real cmpOp. */
{
return isNotEmpty(pat) && stringArrayIx(cmpOp, cmpOpMenu, cmpOpMenuSize) > 0;
}
Exemple #23
0
struct submitFileRow *submitFileRowFromFieldedTable(
    struct sqlConnection *conn, struct fieldedTable *table,
    int fileIx, int md5Ix, int sizeIx, int modifiedIx, int replacesIx, int replaceReasonIx)
/* Turn parsed out table (still all just strings) into list of edwFiles. */
{
struct submitFileRow *sfr, *sfrList = NULL;
struct edwFile *bf;
struct fieldedRow *fr;
struct dyString *tags = dyStringNew(0);
char *ucscDbTag = "ucsc_db";
int ucscDbField = stringArrayIx(ucscDbTag, table->fields, table->fieldCount);


for (fr = table->rowList; fr != NULL; fr = fr->next)
    {
    char **row = fr->row;
    AllocVar(bf);
    bf->submitFileName = cloneString(row[fileIx]);
    safef(bf->md5, sizeof(bf->md5), "%s", row[md5Ix]);
    bf->size = sqlLongLong(row[sizeIx]);
    bf->updateTime = sqlLongLong(row[modifiedIx]);

    /* Add as tags any fields not included in fixed fields. */
    dyStringClear(tags);
    int i;
    for (i=0; i<table->fieldCount; ++i)
        {
	if (i != fileIx && i != md5Ix && i != sizeIx && i != modifiedIx)
	    {
	    cgiEncodeIntoDy(table->fields[i], row[i], tags);
	    }
	}
    if (ucscDbField < 0)
        {
	/* Try to make this field up from file name */
	char *slash = strchr(bf->submitFileName, '/');
	if (slash == NULL)
	    errAbort("Can't make up '%s' field from '%s'", ucscDbTag, bf->submitFileName);
	int len = slash - bf->submitFileName;
	char ucscDbVal[len+1];
	memcpy(ucscDbVal, bf->submitFileName, len);
	ucscDbVal[len] = 0;

	/* Do a little check on it */
	if (!sameString("mm9", ucscDbVal) && !sameString("mm10", ucscDbVal)
	    && !sameString("dm3", ucscDbVal) && !sameString("ce10", ucscDbVal)
	    && !sameString("hg19", ucscDbVal))
	    errAbort("Unrecognized ucsc_db %s - please arrange files so that the top " 
	             "level directory in the fileName in the manifest is a UCSC database name "
		     "like 'hg19' or 'mm10.'  Alternatively please include a ucsc_db column.",
		     ucscDbVal);

	/* Add it to tags. */
	cgiEncodeIntoDy(ucscDbTag, ucscDbVal, tags);
	}
    bf->tags = cloneString(tags->string);

    /* Fake other fields. */
    bf->edwFileName  = cloneString("");

    /* Allocate wrapper structure */
    AllocVar(sfr);
    sfr->file = bf;

    /* fill in fields about replacement maybe */
    if (replacesIx != -1)
        {
	char *replacesAcc = row[replacesIx];
	char *reason = row[replaceReasonIx];
	int fileId = edwFileIdForLicensePlate(conn, replacesAcc);
	if (fileId == 0)
	    errAbort("%s in %s column doesn't exist in warehouse", replacesAcc, replacesTag);
	sfr->replaces = cloneString(replacesAcc);
	sfr->replaceReason = cloneString(reason);
	sfr->replacesFile = fileId;
	}

    slAddHead(&sfrList, sfr);
    }
slReverse(&sfrList);
dyStringFree(&tags);
return sfrList;
}
void addSdrfToStormTop(char *sdrfFile, struct tagStorm *storm)
/* Add lines of sdrfFile as children of first top level stanza in storm. */
{
struct fieldedTable *table = fieldedTableFromTabFile(sdrfFile, sdrfFile, NULL, 0 );


/* Convert ArrayExpress field names to our field names */
int fieldIx;
char *lastNonTerm = NULL;
char *lastNonUnit = NULL;
for (fieldIx=0; fieldIx < table->fieldCount; fieldIx += 1)
    {
    char tagName[256];
    aeFieldToNormalField("sdrf.", table->fields[fieldIx], tagName, sizeof(tagName));
    if (lastNonTerm != NULL && sameString("sdrf.Term_Source_REF", tagName))
	 {
         safef(tagName, sizeof(tagName), "%s_Term_Source_REF", lastNonTerm);
	 table->fields[fieldIx] = lmCloneString(table->lm, tagName);
	 }
    else if (lastNonTerm != NULL && sameString("sdrf.Term_Accession_Number", tagName))
	 {
         safef(tagName, sizeof(tagName), "%s_Term_Accession_Number", lastNonTerm);
	 table->fields[fieldIx] = lmCloneString(table->lm, tagName);
	 }
    else if (lastNonUnit != NULL && startsWith("sdrf.Unit_", tagName))
         {
	 safef(tagName, sizeof(tagName), "%s_Unit", lastNonUnit);
	 lastNonTerm = lmCloneString(table->lm, tagName);
	 table->fields[fieldIx] = lastNonTerm;
	 }
    else
	 {
         lastNonTerm = lastNonUnit = lmCloneString(table->lm, tagName);
	 table->fields[fieldIx] = lastNonTerm;
	 }
    }


/* Make up fastq field indexes to handle processing of paired reads in fastq, which
 * take two lines of sdrf file. */
char *fieldsWithFastqs[] = 
/* Fields that contain the fastq file names */
    {
    "sdrf.Comment_FASTQ_URI",
    "sdrf.Comment_SUBMITTED_FILE_NAME",
    "sdrf.Scan_Name",
    };
boolean mightReuseStanza = TRUE;
bool *reuseMultiFields;  // If set this field can vary and line still reused
AllocArray(reuseMultiFields, table->fieldCount);
int i;
for (i=0; i<ArraySize(fieldsWithFastqs); ++i)
    {
    char *field = fieldsWithFastqs[i];
    int ix = stringArrayIx(field, table->fields, table->fieldCount);
    if (ix >=0)
	reuseMultiFields[ix] = TRUE;
    else if (i == 0)
	{
	mightReuseStanza = FALSE;
        break;	    // Make sure has first one if going to do paired read fastq processing
	}
    }


/* Make up a list and hash of fieldMergers to handle conversion of columns that occur
 * multiple times to a comma-separated list of values in a single column. */
struct fieldMerger
/* Something to help merge multiple columns with same name */
    {
    struct fieldMerger *next;	/* Next in list */
    char *name;	
    struct dyString *val;	/* Comma separated value */
    };
struct hash *fieldHash = hashNew(0);
struct fieldMerger *fmList = NULL;
for (fieldIx = 0; fieldIx < table->fieldCount; ++fieldIx)
    {
    char *fieldName = table->fields[fieldIx];
    if (hashLookup(fieldHash, fieldName) == NULL)
        {
	struct fieldMerger *fm;
	AllocVar(fm);
	fm->name = fieldName;
	fm->val = dyStringNew(0);
	slAddTail(&fmList, fm);
	hashAdd(fieldHash, fieldName, fm);
	}
    }

/* Grab top level stanza and make sure there is only one. */
struct tagStanza *topStanza = storm->forest;
if (topStanza == NULL || topStanza->next != NULL)
    internalErr();

/* Scan through table, making new stanzas for each row and hooking them into topStanza */
struct fieldedRow *fr, *lastFr = NULL;
struct tagStanza *stanza = NULL;
for (fr = table->rowList; fr != NULL; fr = fr->next)
    {
    /* Empty out any existing vals */
    struct fieldMerger *fm;
    for (fm = fmList; fm != NULL; fm = fm->next)
	dyStringClear(fm->val);

    /* Add all non-empty values from this row to our fieldMergers. */
    char **row = fr->row;
    for (fieldIx = 0; fieldIx < table->fieldCount; ++fieldIx)
        {
	char *fieldName = table->fields[fieldIx];
	fm = hashMustFindVal(fieldHash, fieldName);
	char *val = row[fieldIx];
	if (!isEmpty(val))
	    csvEscapeAndAppend(fm->val, val);
	}

    /* If only the reuseMultiFields are varying, append to those values in previous stanza,
     * otherwise make a new stanza */
    if (mightReuseStanza && lastFr != NULL 
        && sameExceptForSome(lastFr->row, fr->row, table->fieldCount, reuseMultiFields))
	{
	int i;
	for (i=0; i<ArraySize(fieldsWithFastqs); ++i)
	    {
	    char *fieldName = fieldsWithFastqs[i];
	    if ((fm = hashFindVal(fieldHash, fieldName)) != NULL)
	        {
		char *newVal = fm->val->string;
		char *oldVal = tagMustFindVal(stanza, fieldName);
		int bothSize = strlen(newVal) + strlen(oldVal) + 1 + 1;
		char bothBuf[bothSize];
		safef(bothBuf, bothSize, "%s,%s", oldVal, newVal);
		tagStanzaUpdateTag(storm, stanza, fieldName, bothBuf);
		}
	    }
	}
    else
        {
	/* Output all nonempty vals to stanza */
	stanza = tagStanzaNew(storm, topStanza);
	for (fm = fmList; fm != NULL; fm = fm->next)
	    if (fm->val->stringSize > 0)
		tagStanzaAppend(storm, stanza, fm->name, fm->val->string);
	}

    lastFr = fr;
    }
slReverse(&topStanza->children);
}