static char *gvfItemName(struct track *tg, void *item) /* ISCA requested that we append abbreviated variant origin to the item names. */ { struct bed8Attrs *gvf = item; struct dyString *name = dyStringNew(0); int ix = stringArrayIx("var_origin", gvf->attrTags, gvf->attrCount); if (ix >= 0) { char *origin = gvf->attrVals[ix]; if (sameWord(origin, "Not tested") || sameWord(origin, "Not reported") || sameWord(origin, "Tested - inconclusive") || sameWord(origin, "Not Provided")) dyStringPrintf(name, "%s_unk", gvf->name); else if (sameWord(origin, "De novo")) dyStringPrintf(name, "%s_dnovo", gvf->name); else if (sameWord(origin, "Maternal")) dyStringPrintf(name, "%s_mat", gvf->name); else if (sameWord(origin, "Paternal")) dyStringPrintf(name, "%s_pat", gvf->name); else if (sameWord(origin, "Biparental")) dyStringPrintf(name, "%s_bip", gvf->name); else if (sameWord(origin, "Uniparental")) dyStringPrintf(name, "%s_unip", gvf->name); else if (sameWord(origin, "Germline")) dyStringPrintf(name, "%s_germ", gvf->name); else if (sameWord(origin, "Somatic")) dyStringPrintf(name, "%s_som", gvf->name); else dyStringPrintf(name, "%s_%s", gvf->name, origin); } else dyStringPrintf(name, "%s_unk", gvf->name); return dyStringCannibalize(&name); }
void edwCorrectFileTags(char *tabFileName) /* edwCorrectFileTags - Use this to correct tags in the edwFile table and corresponding fields * in the edwValidFile table without forcing a validateManifest rerun or a reupload.. */ { struct sqlConnection *conn = edwConnectReadWrite(); char *requiredFields[] = {"accession",}; char *forbiddenFields[] = {"md5_sum", "size", "valid_key", "file_name"}; struct fieldedTable *table = fieldedTableFromTabFile(tabFileName, tabFileName, requiredFields, ArraySize(requiredFields)); checkForbiddenFields(table, forbiddenFields, ArraySize(forbiddenFields)); int accessionIx = stringArrayIx("accession", table->fields, table->fieldCount); struct fieldedRow *fr; for (fr = table->rowList; fr != NULL; fr = fr->next) { char *acc = fr->row[accessionIx]; long long id = edwNeedFileIdForLicensePlate(conn, acc); struct edwFile *ef = edwFileFromId(conn, id); int i; char *tags = ef->tags; for (i=0; i<table->fieldCount; ++i) { if (i != accessionIx) tags = cgiStringNewValForVar(tags, table->fields[i], fr->row[i]); } edwFileResetTags(conn, ef, tags); edwFileFree(&ef); } }
static char *getAttributeVal(const struct bed8Attrs *gvf, char *tag) /* Return value corresponding to tag or NULL. Don't free result. */ { int ix = stringArrayIx(tag, gvf->attrTags, gvf->attrCount); if (ix >= 0) return(gvf->attrVals[ix]); return NULL; }
INLINE boolean nameIsTdbField(char *name) /* Return TRUE if name is a tdb->{field}, e.g. "track" or "shortLabel" etc. */ { static char *tdbFieldNames[] = { "track", "table", "shortLabel", "longLabel", "type", "priority", "grp", "parent", "subtracks", "visibility" }; return (stringArrayIx(name, tdbFieldNames, ArraySize(tdbFieldNames)) >= 0); }
static char *findType(struct hash *cvHash,char **requested,int requestCount, char **queryBy, char **org,boolean silent) /* returns the type that was requested or else the type associated with the term requested */ { struct hashCookie hc = hashFirst(cvHash); struct hashEl *hEl; struct hash *ra; char *type = typeOpt; if (requested != NULL) // if no type, find it from requested terms. Will validate terms match type { // NOTE: Enter here even if there is a type, to confirm the type while ((hEl = hashNext(&hc)) != NULL) // FIXME: This should be using mdbCv APIs to get hashes. { // One per "request[]" ra = (struct hash *)hEl->val; if (sameWord(hashMustFindVal(ra, CV_TYPE),CV_TOT)) // TOT = typeOfTerm continue; char *val = hashFindVal(ra, *queryBy); if (val != NULL) { int ix = stringArrayIx(val,requested,requestCount); if (ix != -1) // found { char *thisType = hashMustFindVal(ra, CV_TYPE); char *thisOrg = hashFindVal(ra, ORGANISM); if (type == NULL) { if (thisOrg != NULL) { *org = strLower(cloneString(thisOrg)); } type = thisType; } else if (differentWord(type,thisType)) { if (sameWord(CV_TERM_CONTROL,type)) type = thisType; else if (differentWord(CV_TERM_CONTROL,thisType)) errAbort("Error: Requested %s of type '%s'. But '%s' has type '%s'\n", *queryBy,type,requested[ix],thisType); } } } } } if (type == NULL && sameWord(*queryBy,CV_TERM)) // Special case of term becoming target { char *queryByTarget = CV_TARGET; type = findType(cvHash,requested,requestCount,&queryByTarget,org,TRUE); // silent here if (type != NULL) *queryBy = queryByTarget; } if (type == NULL && !silent) // Still not type? abort errAbort("Error: Required %s=%s ['%s', '%s', '%s', '%s' or '%s'] argument not found\n", *queryBy,(requested != NULL) ? *requested : "?", CV_TYPE, CV_TERM, CV_TAG, CV_TARGET, CV_LABEL); return normalizeType(type); }
boolean objNeedsCore(char *module) /* Return TRUE if module of given name needs core defined */ { static char *needCore[] = {"project", "sample", "assay", "barcode", "cell_line", "contact", "death", "donor", "enrichment", "imaging", "preservation", "protocol", "publication", "rna", "seq", "single_cell", "well"}; return (stringArrayIx(module, needCore, ArraySize(needCore)) >= 0); }
char *mapType(char *chromName, boolean isOrdered) /* Return map type for info file. */ { if (stringArrayIx(chromName, wellMapped, ArraySize(wellMapped)) >= 0) return "PLACED"; else if (isOrdered) return "ORDERED"; else return "RANDOM"; }
int romanToArabicChrom(char *roman, struct lineFile *lf) /* Convert chromosome from roman numeral to a regular number. */ { static char *chromNames[16] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII", "XIII", "XIV", "XV", "XVI"}; int chromIx = stringArrayIx(roman, chromNames, ArraySize(chromNames)); if (chromIx < 0) errAbort("Unrecognized chromosome line %d of %s", lf->lineIx, lf->fileName); return chromIx; }
void checkForbiddenFields(struct fieldedTable *table, char **forbiddenFields, int forbiddenCount) /* Make sure table doesn't include forbidden fields. */ { int i; for (i=0; i<forbiddenCount; ++i) { char *forbidden = forbiddenFields[i]; if (stringArrayIx(forbidden, table->fields, table->fieldCount) >= 0) errAbort("Forbidden field %s in %s.", forbidden, table->name); } }
const struct vcfGenotype *vcfRecordFindGenotype(struct vcfRecord *record, char *sampleId) /* Find the genotype and associated info for the individual, or return NULL. * This calls vcfParseGenotypes if it has not already been called. */ { struct vcfFile *vcff = record->file; if (sampleId == NULL || vcff->genotypeCount == 0) return NULL; vcfParseGenotypes(record); int ix = stringArrayIx(sampleId, vcff->genotypeIds, vcff->genotypeCount); if (ix >= 0) return &(record->genotypes[ix]); return NULL; }
static void genePredOptions(struct trackDb *track, char *type, struct sqlConnection *conn) /* Put up sequence type options for gene prediction tracks. */ { char *predType = cartUsualString(cart, hgtaGeneSeqType, genePredMenu[0]); char *dupType = cloneString(type); char *typeWords[3]; int typeWordCount, typeIx; /* Type field has 1-3 words which are in order: * genePred pepTable mrnaTable */ typeWordCount = chopLine(dupType, typeWords); /* TypeIx will be 0 (genomic) 1 (protein) 2(mrna). */ typeIx = stringArrayIx(predType, genePredMenu, typeWordCount); if (typeIx < 0) predType = genePredMenu[0]; htmlOpen("Select sequence type for %s", track->shortLabel); hPrintf("<FORM ACTION=\"%s\" METHOD=GET>\n", getScriptName()); cartSaveSession(cart); if (isRefGeneTrack(track->table)) { /* RefGene covers all 3 types, but in it's own way. */ for (typeIx = 0; typeIx < 3; ++typeIx) { genePredTypeButton(genePredMenu[typeIx], predType); hPrintf(" %s<BR>\n", genePredMenu[typeIx]); } } else { /* Otherwise we always have genomic, and we have * peptide/mrna only if there are corresponding table * in the type field. */ for (typeIx = 0; typeIx < typeWordCount; ++typeIx) { if (typeIx == 0 || sqlTableExists(conn, typeWords[typeIx])) { genePredTypeButton(genePredMenu[typeIx], predType); hPrintf(" %s<BR>\n", genePredMenu[typeIx]); } } } cgiMakeButton(hgtaDoGenePredSequence, "submit"); hPrintf(" "); cgiMakeButton(hgtaDoMainPage, "cancel"); hPrintf("</FORM>\n"); cgiDown(0.9); htmlClose(); freez(&dupType); }
struct gfOutput *gfOutputAny(char *format, int goodPpt, boolean qIsProt, boolean tIsProt, boolean noHead, char *databaseName, int databaseSeqCount, double databaseLetters, double minIdentity, FILE *f) /* Initialize output in a variety of formats in file or memory. * Parameters: * format - either 'psl', 'pslx', 'sim4', 'blast', 'wublast', 'axt', 'xml' * goodPpt - minimum identity of alignments to output in parts per thousand * qIsProt - true if query side is a protein. * tIsProt - true if target (database) side is a protein. * noHead - if true suppress header in psl/pslx output. * databaseName - name of database. Only used for blast output * databaseSeq - number of sequences in database - only for blast * databaseLetters - number of bases/aas in database - only blast * minIdentity - minimum identity - only blast * FILE *f - file. */ { struct gfOutput *out = NULL; static char *blastTypes[] = {"blast", "wublast", "blast8", "blast9", "xml"}; if (format == NULL) format = "psl"; if (sameWord(format, "psl")) out = gfOutputPsl(goodPpt, qIsProt, tIsProt, f, FALSE, noHead); else if (sameWord(format, "pslx")) out = gfOutputPsl(goodPpt, qIsProt, tIsProt, f, TRUE, noHead); else if (sameWord(format, "sim4")) out = gfOutputSim4(goodPpt, qIsProt, tIsProt, databaseName); else if (stringArrayIx(format, blastTypes, ArraySize(blastTypes)) >= 0) out = gfOutputBlast(goodPpt, qIsProt, tIsProt, databaseName, databaseSeqCount, databaseLetters, format, minIdentity, f); else if (sameWord(format, "axt")) out = gfOutputAxt(goodPpt, qIsProt, tIsProt, f); else if (sameWord(format, "maf")) out = gfOutputMaf(goodPpt, qIsProt, tIsProt, f); else errAbort("Unrecognized output format '%s'", format); return out; }
boolean isSupportedFormat(char *format) /* Return TRUE if this is one of our supported formats */ { /* First deal with non bigBed */ static char *otherSupportedFormats[] = {"unknown", "fastq", "bam", "bed", "gtf", "bigWig", "bigBed", "bedLogR", "bedRrbs", "bedMethyl", "broadPeak", "narrowPeak", "bed_bedLogR", "bed_bedRrbs", "bed_bedMethyl", "bed_broadPeak", "bed_narrowPeak", "bedRnaElements", "openChromCombinedPeaks", "peptideMapping", "shortFrags", "rcc", "idat", "fasta", "customTrack", }; static int otherSupportedFormatsCount = ArraySize(otherSupportedFormats); if (stringArrayIx(format, otherSupportedFormats, otherSupportedFormatsCount) >= 0) return TRUE; /* If starts with bed_ then skip over prefix. It will be caught by bigBed */ if (startsWith("bed_", format)) format += 4; return edwIsSupportedBigBedFormat(format); }
static Color gvfColor(struct track *tg, void *item, struct hvGfx *hvg) /* Color item by var_type attribute, according to Deanna Church's document * SvRepresentation2.doc attached to redmine #34. */ { struct bed8Attrs *gvf = item; Color dbVarUnknown = hvGfxFindColorIx(hvg, 0xb2, 0xb2, 0xb2); int ix = stringArrayIx("var_type", gvf->attrTags, gvf->attrCount); if (ix < 0) return dbVarUnknown; char *varType = gvf->attrVals[ix]; if (sameString(varType, "CNV") || sameString(varType, "copy_number_variation")) return MG_BLACK; else if (strstrNoCase(varType, "Gain")) return hvGfxFindColorIx(hvg, 0x00, 0x00, 0xff); else if (strstrNoCase(varType, "Loss")) return hvGfxFindColorIx(hvg, 0xff, 0x00, 0x00); else if (strstrNoCase(varType, "Insertion")) return hvGfxFindColorIx(hvg, 0xff, 0xcc, 0x00); else if (strstrNoCase(varType, "Complex")) return hvGfxFindColorIx(hvg, 0x99, 0xcc, 0xff); else if (strstrNoCase(varType, "Unknown")) return dbVarUnknown; else if (strstrNoCase(varType, "Other")) return hvGfxFindColorIx(hvg, 0xcc, 0x99, 0xff); else if (strstrNoCase(varType, "Inversion")) return hvGfxFindColorIx(hvg, 0x99, 0x33, 0xff); // Needs pattern else if (strstrNoCase(varType, "LOH")) return hvGfxFindColorIx(hvg, 0x00, 0x00, 0xff); // Needs pattern else if (strstrNoCase(varType, "Everted")) return hvGfxFindColorIx(hvg, 0x66, 0x66, 0x66); // Needs pattern else if (strstrNoCase(varType, "Transchr")) return hvGfxFindColorIx(hvg, 0xb2, 0xb2, 0xb2); // Plus black vert. bar at broken end else if (strstrNoCase(varType, "UPD")) return hvGfxFindColorIx(hvg, 0x00, 0xff, 0xff); // Needs pattern return dbVarUnknown; }
void doMiddle() { struct hash *cvHash = raReadAll((char *)cvFile(), CV_TERM); struct hashCookie hc = hashFirst(cvHash); struct hashEl *hEl; struct slList *termList = NULL; struct hash *ra; int totalPrinted = 0; boolean excludeDeprecated = (cgiOptionalString("deprecated") == NULL); // Prepare an array of selected terms (if any) int requestCount = 0; char **requested = NULL; char *requestVal = termOpt; char *queryBy = CV_TERM; if (tagOpt) { requestVal = tagOpt; queryBy = CV_TAG; } else if (targetOpt) { requestVal = targetOpt; queryBy = CV_TERM; // request target is special: lookup term, convert to target, display target } else if (labelOpt) { requestVal = labelOpt; queryBy = CV_LABEL; } if (requestVal) { (void)stripChar(requestVal,'\"'); requestCount = chopCommas(requestVal,NULL); requested = needMem(requestCount * sizeof(char *)); chopByChar(requestVal,',',requested,requestCount); } char *org = NULL; // if the org is specified in the type (eg. cell line) // then use that for the org, otherwise use the command line option, // otherwise use human. char *type = findType(cvHash,requested,requestCount,&queryBy, &org, FALSE); if (org == NULL) org = organismOptLower; if (org == NULL) org = ORG_HUMAN; // Special logic for requesting antibody by target if (targetOpt && requestCount > 0 && sameWord(queryBy,CV_TERM) && sameWord(type,CV_TERM_ANTIBODY)) { // Several antibodies may have same target. // requested target={antibody} and found antibody // Must now convert each of the requested terms to its target before displaying all targets char **targets = convertAntibodiesToTargets(cvHash,requested,requestCount); if (targets != NULL) { freeMem(requested); requested = targets; queryBy = CV_TARGET; } } //warn("Query by: %s = '%s' type:%s",queryBy,requestVal?requestVal:"all",type); // Get just the terms that match type and requested, then sort them if (differentWord(type,CV_TOT) || typeOpt != NULL ) // If type resolves to typeOfTerm and { // typeOfTerm was not requested, while ((hEl = hashNext(&hc)) != NULL) // then just show definition { ra = (struct hash *)hEl->val; char *thisType = (char *)cvTermNormalized(hashMustFindVal(ra,CV_TYPE)); if (differentWord(thisType,type) && (requested == NULL || differentWord(thisType,CV_TERM_CONTROL))) continue; // Skip all rows that do not match queryBy param if specified if (requested) { char *val = hashFindVal(ra, queryBy); if (val == NULL) { // Special case for input that has no target if (sameString(queryBy, CV_TARGET)) val = hashMustFindVal(ra, CV_TERM); else continue; } if (-1 == stringArrayIx(val,requested,requestCount)) continue; } else if (excludeDeprecated) { if (hashFindVal(ra, "deprecated") != NULL) continue; } slAddTail(&termList, ra); } } slSort(&termList, termCmp); boolean described = doTypeDefinition(type,FALSE,(slCount(termList) == 0)); boolean sortable = (slCount(termList) > 5); if (sortable) { webIncludeResourceFile("HGStyle.css"); jsIncludeFile("jquery.js",NULL); jsIncludeFile("utils.js",NULL); printf("<TABLE class='sortable' border=1 CELLSPACING=0 style='border: 2px outset #006600; " "background-color:%s;'>\n",COLOR_BG_DEFAULT); } else printf("<TABLE BORDER=1 BGCOLOR=%s CELLSPACING=0 CELLPADDING=2>\n",COLOR_BG_DEFAULT); if (slCount(termList) > 0) { doTypeHeader(type, org,sortable); // Print out the terms while ((ra = slPopHead(&termList)) != NULL) { if (doTypeRow( ra, org )) totalPrinted++; } } puts("</TBODY></TABLE><BR>"); if (sortable) jsInline("{$(document).ready(function() " "{sortTable.initialize($('table.sortable')[0],true,true);});}\n"); if (totalPrinted == 0) { if (!described) warn("Error: Unrecognised type (%s)\n", type); } else if (totalPrinted > 1) printf("Total = %d\n", totalPrinted); }
boolean cdwCheckEnrichedIn(char *enriched) /* return TRUE if value is allowed */ { return (stringArrayIx(enriched, edwSupportedEnrichedIn, edwSupportedEnrichedInCount) >= 0); }
void encode2Meta(char *database, char *manifestIn, char *outMetaRa) /* encode2Meta - Create meta files.. */ { int dbIx = stringArrayIx(database, metaDbs, ArraySize(metaDbs)); if (dbIx < 0) errAbort("Unrecognized database %s", database); /* Create a three level meta.ra format file based on hgFixed.encodeExp * and database.metaDb tables. The levels are composite, experiment, file */ struct metaNode *metaTree = metaTreeNew("encode2"); /* Load up the manifest. */ struct encode2Manifest *mi, *miList = encode2ManifestShortLoadAll(manifestIn); struct hash *miHash = hashNew(18); for (mi = miList; mi != NULL; mi = mi->next) hashAdd(miHash, mi->fileName, mi); verbose(1, "%d files in %s\n", miHash->elCount, manifestIn); /* Load up encodeExp info. */ struct sqlConnection *expConn = sqlConnect(expDb); struct encodeExp *expList = encodeExpLoadByQuery(expConn, "NOSQLINJ select * from encodeExp"); sqlDisconnect(&expConn); verbose(1, "%d experiments in encodeExp\n", slCount(expList)); struct hash *compositeHash = hashNew(0); /* Go through each organism database in turn. */ int i; for (i=0; i<ArraySize(metaDbs); ++i) { char *db = metaDbs[i]; if (!sameString(database, db)) continue; verbose(1, "exploring %s\n", db); struct mdbObj *mdb, *mdbList = getMdbList(db); verbose(1, "%d meta objects in %s\n", slCount(mdbList), db); /* Get info on all composites. */ for (mdb = mdbList; mdb != NULL; mdb = mdb->next) { char *objType = mdbVarLookup(mdb->vars, "objType"); if (objType != NULL && sameString(objType, "composite")) { char compositeName[256]; safef(compositeName, sizeof(compositeName), "%s", mdb->obj); struct metaNode *compositeNode = metaNodeNew(compositeName); slAddHead(&metaTree->children, compositeNode); compositeNode->parent = metaTree; struct mdbVar *v; for (v=mdb->vars; v != NULL; v = v->next) { metaNodeAddVar(compositeNode, v->var, v->val); } metaNodeAddVar(compositeNode, "assembly", db); hashAdd(compositeHash, mdb->obj, compositeNode); } } /* Make up one more for experiments with no composite. */ char *noCompositeName = "wgEncodeZz"; struct metaNode *noCompositeNode = metaNodeNew(noCompositeName); slAddHead(&metaTree->children, noCompositeNode); noCompositeNode->parent = metaTree; hashAdd(compositeHash, noCompositeName, noCompositeNode); /* Now go through objects trying to tie experiments to composites. */ struct hash *expToComposite = hashNew(16); for (mdb = mdbList; mdb != NULL; mdb = mdb->next) { char *composite = mdbVarLookup(mdb->vars, "composite"); if (originalData(composite)) { char *dccAccession = mdbVarLookup(mdb->vars, "dccAccession"); if (dccAccession != NULL) { char *oldComposite = hashFindVal(expToComposite, dccAccession); if (oldComposite != NULL) { if (!sameString(oldComposite, composite)) verbose(2, "%s maps to %s ignoring mapping to %s", dccAccession, oldComposite, composite); } else { hashAdd(expToComposite, dccAccession, composite); } } } } /* Now get info on all experiments in this organism. */ struct hash *expHash = hashNew(0); struct encodeExp *exp; for (exp = expList; exp != NULL; exp = exp->next) { if (sameString(exp->organism, organisms[i])) { if (exp->accession != NULL) { char *composite = hashFindVal(expToComposite, exp->accession); struct metaNode *compositeNode; if (composite != NULL) { compositeNode = hashMustFindVal(compositeHash, composite); } else { compositeNode = noCompositeNode; } struct metaNode *expNode = wrapNodeAroundExp(exp); hashAdd(expHash, expNode->name, expNode); slAddHead(&compositeNode->children, expNode); expNode->parent = compositeNode; } } } for (mdb = mdbList; mdb != NULL; mdb = mdb->next) { char *fileName = NULL, *dccAccession = NULL; char *objType = mdbVarLookup(mdb->vars, "objType"); if (objType != NULL && sameString(objType, "composite")) continue; dccAccession = mdbVarLookup(mdb->vars, "dccAccession"); if (dccAccession == NULL) continue; char *composite = hashFindVal(expToComposite, dccAccession); if (composite == NULL) errAbort("Can't find composite for %s", mdb->obj); struct mdbVar *v; for (v = mdb->vars; v != NULL; v = v->next) { char *var = v->var, *val = v->val; if (sameString("fileName", var)) { fileName = val; char path[PATH_LEN]; char *comma = strchr(fileName, ','); if (comma != NULL) *comma = 0; /* Cut off comma separated list. */ safef(path, sizeof(path), "%s/%s/%s", db, composite, fileName); /* Add database path */ fileName = val = v->val = cloneString(path); } } if (fileName != NULL) { if (hashLookup(miHash, fileName)) { struct metaNode *expNode = hashFindVal(expHash, dccAccession); if (expNode != NULL) { struct metaNode *fileNode = metaNodeNew(mdb->obj); slAddHead(&expNode->children, fileNode); fileNode->parent = expNode; struct mdbVar *v; for (v=mdb->vars; v != NULL; v = v->next) { metaNodeAddVar(fileNode, v->var, v->val); } } } } } #ifdef SOON #endif /* SOON */ } struct hash *suppress = makeSuppress(); struct hash *closeEnoughTags = makeCloseEnoughTags(); metaTreeHoist(metaTree, closeEnoughTags); metaTreeSortChildrenSortTags(metaTree); FILE *f = mustOpen(outMetaRa, "w"); struct metaNode *node; for (node = metaTree->children; node != NULL; node = node->next) metaTreeWrite(0, 0, BIGNUM, FALSE, NULL, node, suppress, f); carefulClose(&f); /* Write warning about tags in highest parent. */ struct mdbVar *v; for (v = metaTree->vars; v != NULL; v = v->next) verbose(1, "Omitting universal %s %s\n", v->var, v->val); }
boolean isFinChrom(char *chrom) /* Return TRUE if is a finished chromosome. */ { return (stringArrayIx(chrom, finChroms, ArraySize(finChroms)) >= 0); }
void cdwTextForIndex(char *outFile) /* cdwTextForIndex - Make text file used for building ixIxx indexes. */ { struct sqlConnection *conn = cdwConnect(); struct hash *textHash = hashTextFields(conn, "cdwFileTags"); /* Start up query of all fields of fileTags table and get array of all fields from result */ char query[256]; sqlSafef(query, sizeof(query), "select * from cdwFileTags"); struct sqlResult *sr = sqlGetResult(conn, query); char **allFields = NULL; int fieldCount = sqlResultFieldArray(sr, &allFields); /* Accession is special, make sure it's there */ int idIx = stringArrayIx("file_id", allFields, fieldCount); if (idIx < 0) errAbort("Can't find file_id in cdwFileTags"); /* Make up an array that tells us the order of fields we'll output, starting with priority fields */ /* Get all priority fields first */ int order[fieldCount]; int fieldsUsed = 0; int i; struct hash *usedHash = hashNew(0); for (i=0; i<ArraySize(priorityFields); ++i) { char *field = priorityFields[i]; int pos = stringArrayIx(field, allFields, fieldCount); if (pos >= 0) { order[fieldsUsed++] = pos; hashAdd(usedHash, field, NULL); } } /* Get other fields now */ for (i=0; i<fieldCount; ++i) { char *field = allFields[i]; if (!hashLookup(usedHash, field) && hashLookup(textHash, field)) order[fieldsUsed++] = i; } /* Now loop through sql result and write output */ FILE *f = mustOpen(outFile, "w"); char **row; while ((row = sqlNextRow(sr)) != NULL) { char *id = row[idIx]; if (id != NULL) { fprintf(f, "%s", id); for (i=0; i<fieldsUsed; ++i) { char *val = row[order[i]]; if (val != NULL) fprintf(f, " %s", row[order[i]]); } fprintf(f, "\n"); } } carefulClose(&f); }
void edwParseSubmitFile(struct sqlConnection *conn, char *submitLocalPath, char *submitUrl, struct submitFileRow **retSubmitList) /* Load and parse up this file as fielded table, make sure all required fields are there, * and calculate indexes of required fields. This produces an edwFile list, but with * still quite a few fields missing - just what can be filled in from submit filled in. * The submitUrl is just used for error reporting. If it's local, just make it the * same as submitLocalPath. */ { char *requiredFields[] = {"file_name", "format", "output_type", "experiment", "replicate", "enriched_in", "md5_sum", "size", "modified", "valid_key"}; struct fieldedTable *table = fieldedTableFromTabFile(submitLocalPath, submitUrl, requiredFields, ArraySize(requiredFields)); /* Get offsets of all required fields */ int fileIx = stringArrayIx("file_name", table->fields, table->fieldCount); int formatIx = stringArrayIx("format", table->fields, table->fieldCount); int outputIx = stringArrayIx("output_type", table->fields, table->fieldCount); int experimentIx = stringArrayIx("experiment", table->fields, table->fieldCount); int replicateIx = stringArrayIx("replicate", table->fields, table->fieldCount); int enrichedIx = stringArrayIx("enriched_in", table->fields, table->fieldCount); int md5Ix = stringArrayIx("md5_sum", table->fields, table->fieldCount); int sizeIx = stringArrayIx("size", table->fields, table->fieldCount); int modifiedIx = stringArrayIx("modified", table->fields, table->fieldCount); int validIx = stringArrayIx("valid_key", table->fields, table->fieldCount); /* See if we're doing replacement and check have all columns needed if so. */ int replacesIx = stringArrayIx(replacesTag, table->fields, table->fieldCount); int replaceReasonIx = stringArrayIx(replaceReasonTag, table->fields, table->fieldCount); boolean doReplace = (replacesIx != -1); if (doReplace) if (replaceReasonIx == -1) errAbort("Error: got \"%s\" column without \"%s\" column in %s.", replacesTag, replaceReasonTag, submitUrl); /* Loop through and make sure all field values are ok */ struct fieldedRow *fr; for (fr = table->rowList; fr != NULL; fr = fr->next) { char **row = fr->row; char *fileName = row[fileIx]; allGoodFileNameChars(fileName); char *format = row[formatIx]; if (!isSupportedFormat(format)) errAbort("Format %s is not supported", format); allGoodSymbolChars(row[outputIx]); char *experiment = row[experimentIx]; if (!isExperimentId(experiment)) errAbort("%s in experiment field does not seem to be an encode experiment", experiment); char *replicate = row[replicateIx]; if (differentString(replicate, "pooled") && differentString(replicate, "n/a") ) if (!isAllNum(replicate)) errAbort("%s is not a good value for the replicate column", replicate); char *enriched = row[enrichedIx]; if (!encode3CheckEnrichedIn(enriched)) errAbort("Enriched_in %s is not supported", enriched); char *md5 = row[md5Ix]; if (strlen(md5) != 32 || !isAllHexLower(md5)) errAbort("md5 '%s' is not in all lower case 32 character hexadecimal format.", md5); char *size = row[sizeIx]; if (!isAllNum(size)) errAbort("Invalid size '%s'", size); char *modified = row[modifiedIx]; if (!isAllNum(modified)) errAbort("Invalid modification time '%s'", modified); char *validIn = row[validIx]; char *realValid = encode3CalcValidationKey(md5, sqlLongLong(size)); if (!sameString(validIn, realValid)) errAbort("The valid_key %s for %s doesn't fit", validIn, fileName); freez(&realValid); if (doReplace) { char *replaces = row[replacesIx]; char *reason = row[replaceReasonIx]; if (!isEmptyOrNa(replaces)) { char *prefix = edwLicensePlateHead(conn); if (!startsWith(prefix, replaces)) errAbort("%s in replaces column is not an ENCODE file accession", replaces); if (isEmptyOrNa(reason)) errAbort("Replacing %s without a reason\n", replaces); } } } *retSubmitList = submitFileRowFromFieldedTable(conn, table, fileIx, md5Ix, sizeIx, modifiedIx, replacesIx, replaceReasonIx); }
void ccFirst(char *source, char *dest, char *hostList, char *lockDir) /* Do first instance of this program. Copy file to first host, * make up lock directory, and then poll lock directory to see * if we're done. */ { char *firstHost, *lastHost; char **hosts; char *hostBuf; int hostCount; int firstLock; int childPid; char *thisHost = getenv("HOST"); char ok; long startTime = clock1000(); if (thisHost == NULL) errAbort("HOST environment variable undefined\n"); readAllWords(hostList, &hosts, &hostCount, &hostBuf); if (hostCount <= 0) errAbort("%s is empty.", hostList); if (stringArrayIx(thisHost, hosts, hostCount) < 0) errAbort("Current host (%s) not in host list\n", thisHost); if (mkdir(lockDir, 0777) < 0) errAbort("Couldn't make lock directory %s\n", lockDir); firstHost = thisHost; lastHost = hosts[hostCount-1]; if (sameString(lastHost, thisHost) && hostCount > 1) lastHost = hosts[hostCount-2]; firstLock = makeLock(firstHost, lockDir); if (firstLock < 0) errAbort("Couldn't make lock file %s/%s\n", lockDir, firstHost); if (cpFile(source, dest) != 0) { warn("Couldn't copy %s to %s:%d\n", source, firstHost, dest); close(firstLock); cleanupLocks(lockDir); errAbort("Cleaned up locks in %s, aborting copy.", lockDir); } ok = 1; write(firstLock, &ok, 1); close(firstLock); childPid = fork(); if (childPid == 0) { /* Have child process keep copying. */ ccMore(dest, hostList, 0, lockDir); } else { int sleepIx = 0; int sleepTime = 10; int lastStart = 0, lastErr = 0, lastEnd = 0; /* Have parent process wait until last file done. */ for (sleepIx = 0; ; ++sleepIx) { int lockFd; int i; int startCount = 0; int endCount = 0; int errCount = 0; int toGo = 0; int procCount = 0; int lastProcCount = 0; int finCount; boolean reportErr; for (i=0; i<hostCount; ++i) { char *ln = lockName(lockDir, hosts[i]); lockFd = open(ln, O_RDONLY); if (lockFd < 0) ++toGo; else { char ok; if (read(lockFd, &ok, 1) < 1) ++startCount; else { if (ok) ++endCount; else ++errCount; } close(lockFd); } } finCount = endCount + errCount; // if (lastStart != startCount || lastEnd != endCount || lastErr != errCount) { printf(" copies in progress %d finished %d errors %d total %d\n", startCount, endCount, errCount, hostCount); lastStart = startCount; lastEnd = endCount; lastErr = errCount; } if (finCount >= hostCount) { if (errCount > 0) { fprintf(stderr, "Errors copying to hosts:"); for (i=0; i<hostCount; ++i) { char *ln = lockName(lockDir, hosts[i]); lockFd = open(ln, O_RDONLY); if (lockFd < 0) { fprintf(stderr, " ??%s??", hosts[i]); } else { char ok; if (read(lockFd, &ok, 1) < 1) { fprintf(stderr, " ?%s?", hosts[i]); ++startCount; } else { if (!ok) { fprintf(stderr, " %s", hosts[i]); ++errCount; } } close(lockFd); } } fprintf(stderr, "\n"); } cleanupLocks(lockDir); break; } sleep(sleepTime); } } }
static boolean cmpReal(char *pat, char *cmpOp) /* Return TRUE if we have a real cmpOp. */ { return isNotEmpty(pat) && stringArrayIx(cmpOp, cmpOpMenu, cmpOpMenuSize) > 0; }
struct submitFileRow *submitFileRowFromFieldedTable( struct sqlConnection *conn, struct fieldedTable *table, int fileIx, int md5Ix, int sizeIx, int modifiedIx, int replacesIx, int replaceReasonIx) /* Turn parsed out table (still all just strings) into list of edwFiles. */ { struct submitFileRow *sfr, *sfrList = NULL; struct edwFile *bf; struct fieldedRow *fr; struct dyString *tags = dyStringNew(0); char *ucscDbTag = "ucsc_db"; int ucscDbField = stringArrayIx(ucscDbTag, table->fields, table->fieldCount); for (fr = table->rowList; fr != NULL; fr = fr->next) { char **row = fr->row; AllocVar(bf); bf->submitFileName = cloneString(row[fileIx]); safef(bf->md5, sizeof(bf->md5), "%s", row[md5Ix]); bf->size = sqlLongLong(row[sizeIx]); bf->updateTime = sqlLongLong(row[modifiedIx]); /* Add as tags any fields not included in fixed fields. */ dyStringClear(tags); int i; for (i=0; i<table->fieldCount; ++i) { if (i != fileIx && i != md5Ix && i != sizeIx && i != modifiedIx) { cgiEncodeIntoDy(table->fields[i], row[i], tags); } } if (ucscDbField < 0) { /* Try to make this field up from file name */ char *slash = strchr(bf->submitFileName, '/'); if (slash == NULL) errAbort("Can't make up '%s' field from '%s'", ucscDbTag, bf->submitFileName); int len = slash - bf->submitFileName; char ucscDbVal[len+1]; memcpy(ucscDbVal, bf->submitFileName, len); ucscDbVal[len] = 0; /* Do a little check on it */ if (!sameString("mm9", ucscDbVal) && !sameString("mm10", ucscDbVal) && !sameString("dm3", ucscDbVal) && !sameString("ce10", ucscDbVal) && !sameString("hg19", ucscDbVal)) errAbort("Unrecognized ucsc_db %s - please arrange files so that the top " "level directory in the fileName in the manifest is a UCSC database name " "like 'hg19' or 'mm10.' Alternatively please include a ucsc_db column.", ucscDbVal); /* Add it to tags. */ cgiEncodeIntoDy(ucscDbTag, ucscDbVal, tags); } bf->tags = cloneString(tags->string); /* Fake other fields. */ bf->edwFileName = cloneString(""); /* Allocate wrapper structure */ AllocVar(sfr); sfr->file = bf; /* fill in fields about replacement maybe */ if (replacesIx != -1) { char *replacesAcc = row[replacesIx]; char *reason = row[replaceReasonIx]; int fileId = edwFileIdForLicensePlate(conn, replacesAcc); if (fileId == 0) errAbort("%s in %s column doesn't exist in warehouse", replacesAcc, replacesTag); sfr->replaces = cloneString(replacesAcc); sfr->replaceReason = cloneString(reason); sfr->replacesFile = fileId; } slAddHead(&sfrList, sfr); } slReverse(&sfrList); dyStringFree(&tags); return sfrList; }
void addSdrfToStormTop(char *sdrfFile, struct tagStorm *storm) /* Add lines of sdrfFile as children of first top level stanza in storm. */ { struct fieldedTable *table = fieldedTableFromTabFile(sdrfFile, sdrfFile, NULL, 0 ); /* Convert ArrayExpress field names to our field names */ int fieldIx; char *lastNonTerm = NULL; char *lastNonUnit = NULL; for (fieldIx=0; fieldIx < table->fieldCount; fieldIx += 1) { char tagName[256]; aeFieldToNormalField("sdrf.", table->fields[fieldIx], tagName, sizeof(tagName)); if (lastNonTerm != NULL && sameString("sdrf.Term_Source_REF", tagName)) { safef(tagName, sizeof(tagName), "%s_Term_Source_REF", lastNonTerm); table->fields[fieldIx] = lmCloneString(table->lm, tagName); } else if (lastNonTerm != NULL && sameString("sdrf.Term_Accession_Number", tagName)) { safef(tagName, sizeof(tagName), "%s_Term_Accession_Number", lastNonTerm); table->fields[fieldIx] = lmCloneString(table->lm, tagName); } else if (lastNonUnit != NULL && startsWith("sdrf.Unit_", tagName)) { safef(tagName, sizeof(tagName), "%s_Unit", lastNonUnit); lastNonTerm = lmCloneString(table->lm, tagName); table->fields[fieldIx] = lastNonTerm; } else { lastNonTerm = lastNonUnit = lmCloneString(table->lm, tagName); table->fields[fieldIx] = lastNonTerm; } } /* Make up fastq field indexes to handle processing of paired reads in fastq, which * take two lines of sdrf file. */ char *fieldsWithFastqs[] = /* Fields that contain the fastq file names */ { "sdrf.Comment_FASTQ_URI", "sdrf.Comment_SUBMITTED_FILE_NAME", "sdrf.Scan_Name", }; boolean mightReuseStanza = TRUE; bool *reuseMultiFields; // If set this field can vary and line still reused AllocArray(reuseMultiFields, table->fieldCount); int i; for (i=0; i<ArraySize(fieldsWithFastqs); ++i) { char *field = fieldsWithFastqs[i]; int ix = stringArrayIx(field, table->fields, table->fieldCount); if (ix >=0) reuseMultiFields[ix] = TRUE; else if (i == 0) { mightReuseStanza = FALSE; break; // Make sure has first one if going to do paired read fastq processing } } /* Make up a list and hash of fieldMergers to handle conversion of columns that occur * multiple times to a comma-separated list of values in a single column. */ struct fieldMerger /* Something to help merge multiple columns with same name */ { struct fieldMerger *next; /* Next in list */ char *name; struct dyString *val; /* Comma separated value */ }; struct hash *fieldHash = hashNew(0); struct fieldMerger *fmList = NULL; for (fieldIx = 0; fieldIx < table->fieldCount; ++fieldIx) { char *fieldName = table->fields[fieldIx]; if (hashLookup(fieldHash, fieldName) == NULL) { struct fieldMerger *fm; AllocVar(fm); fm->name = fieldName; fm->val = dyStringNew(0); slAddTail(&fmList, fm); hashAdd(fieldHash, fieldName, fm); } } /* Grab top level stanza and make sure there is only one. */ struct tagStanza *topStanza = storm->forest; if (topStanza == NULL || topStanza->next != NULL) internalErr(); /* Scan through table, making new stanzas for each row and hooking them into topStanza */ struct fieldedRow *fr, *lastFr = NULL; struct tagStanza *stanza = NULL; for (fr = table->rowList; fr != NULL; fr = fr->next) { /* Empty out any existing vals */ struct fieldMerger *fm; for (fm = fmList; fm != NULL; fm = fm->next) dyStringClear(fm->val); /* Add all non-empty values from this row to our fieldMergers. */ char **row = fr->row; for (fieldIx = 0; fieldIx < table->fieldCount; ++fieldIx) { char *fieldName = table->fields[fieldIx]; fm = hashMustFindVal(fieldHash, fieldName); char *val = row[fieldIx]; if (!isEmpty(val)) csvEscapeAndAppend(fm->val, val); } /* If only the reuseMultiFields are varying, append to those values in previous stanza, * otherwise make a new stanza */ if (mightReuseStanza && lastFr != NULL && sameExceptForSome(lastFr->row, fr->row, table->fieldCount, reuseMultiFields)) { int i; for (i=0; i<ArraySize(fieldsWithFastqs); ++i) { char *fieldName = fieldsWithFastqs[i]; if ((fm = hashFindVal(fieldHash, fieldName)) != NULL) { char *newVal = fm->val->string; char *oldVal = tagMustFindVal(stanza, fieldName); int bothSize = strlen(newVal) + strlen(oldVal) + 1 + 1; char bothBuf[bothSize]; safef(bothBuf, bothSize, "%s,%s", oldVal, newVal); tagStanzaUpdateTag(storm, stanza, fieldName, bothBuf); } } } else { /* Output all nonempty vals to stanza */ stanza = tagStanzaNew(storm, topStanza); for (fm = fmList; fm != NULL; fm = fm->next) if (fm->val->stringSize > 0) tagStanzaAppend(storm, stanza, fm->name, fm->val->string); } lastFr = fr; } slReverse(&topStanza->children); }