void eapToHub(char *outDir) /* eapToHub - Convert some analysis results to a hub for easy viz.. */ { struct sqlConnection *conn = eapConnect(); struct eapGraph *eg = eapGraphNew(conn); makeDirsOnPath(outDir); char path[PATH_LEN]; safef(path, sizeof(path), "%s/%s", outDir, "hub.txt"); writeHubTxt(conn, path); safef(path, sizeof(path), "%s/%s", outDir, "genomes.txt"); writeGenomesTxt(conn, path); int i; struct edwExperiment *eeList = getUwDnaseExps(conn); for (i=0; i<ArraySize(assemblyArray); ++i) { char *a = assemblyArray[i]; safef(path, sizeof(path), "%s/%s", outDir, a); makeDir(path); safef(path, sizeof(path), "%s/%s/%s", outDir, a, "trackDb.txt"); struct fullExperiment *expList = getGoodExperiments(conn, eeList, a); slSort(&expList, fullExperimentCmp); writeTrackDbTxt(conn, eg, expList, a, path); safef(path, sizeof(path), "%s/%s/%s", outDir, a, "table.html"); writeTableHtml(conn, eg, expList, a, path); safef(path, sizeof(path), "%s/%s/%s", outDir, a, "fastq.tab"); writeFastqTab(conn, eg, expList, a, path); } }
void edwMakePlateFileNameAndPath(int edwFileId, char *submitFileName, char licensePlate[edwMaxPlateSize], char edwFile[PATH_LEN], char serverPath[PATH_LEN]) /* Convert file id to local file name, and full file path. Make any directories needed * along serverPath. */ { /* Preserve suffix. Give ourselves up to two suffixes. */ int nameSize = strlen(submitFileName); char *suffix = lastMatchCharExcept(submitFileName, submitFileName + nameSize, '.', '/'); if (suffix != NULL) { char *secondSuffix = lastMatchCharExcept(submitFileName, suffix, '.', '/'); if (secondSuffix != NULL) suffix = secondSuffix; } suffix = emptyForNull(suffix); /* Figure out edw file name, starting with license plate. */ edwMakeLicensePlate(edwLicensePlatePrefix, edwFileId, licensePlate, edwMaxPlateSize); /* Figure out directory and make any components not already there. */ char edwDir[PATH_LEN]; edwDirForTime(edwNow(), edwDir); char uploadDir[PATH_LEN]; safef(uploadDir, sizeof(uploadDir), "%s%s", edwRootDir, edwDir); makeDirsOnPath(uploadDir); /* Figure out full file names */ safef(edwFile, PATH_LEN, "%s%s%s", edwDir, licensePlate, suffix); safef(serverPath, PATH_LEN, "%s%s", edwRootDir, edwFile); }
char *docIdSubmit(struct sqlConnection *conn, char *docIdTable, struct docIdSub *docIdSub, char *docIdDir, char *type) { verbose(2, "Submitting------\n"); verbose(2, "status %d\n", docIdSub->status); verbose(2, "assembly %s\n", docIdSub->assembly); verbose(2, "submitDate %s\n", docIdSub->submitDate); verbose(2, "md5sum %s\n", docIdSub->md5sum); verbose(2, "valReport %s\n", docIdSub->valReport); verbose(2, "valVersion %s\n", docIdSub->valVersion); verbose(2, "metaData %s\n", docIdSub->metaData); verbose(2, "submitPath %s\n", docIdSub->submitPath); verbose(2, "submitter %s\n", docIdSub->submitter); verbose(2, "type %s\n", type); char query[1024 * 1024]; fillNull(&docIdSub->valReport); fillNull(&docIdSub->md5sum); sqlSafef(query, sizeof query, "insert into %s (status, assembly, submitDate, md5sum, valReport, valVersion, metaData, submitPath, submitter) values (\"%d\",\"%s\",\"%s\",\"%s\", \"%s\", \"%s\", \"%s\",\"%s\",\"%s\")\n", docIdTable, docIdSub->status, docIdSub->assembly, docIdSub->submitDate, docIdSub->md5sum, docIdSub->valReport, docIdSub->valVersion, docIdSub->metaData, docIdSub->submitPath, docIdSub->submitter); //docIdSub->submitDate, docIdSub->md5sum, docIdSub->valReport, "null", docIdSub->submitPath, docIdSub->submitter); //printf("query is %s\n", query); char *response = sqlQuickString(conn, query); printf("submitted got response %s\n", response); sqlSafef(query, sizeof query, "select last_insert_id()"); char *docId = cloneString(sqlQuickString(conn, query)); printf("submitted got docId %s\n", docId); if (!fileExists(docIdSub->submitPath)) errAbort("cannot open %s\n", docIdSub->submitPath); char *linkToFile = docIdGetPath(docId, docIdDir, type, docIdSub->submitPath); printf("linking %s to file %s\n", docIdSub->submitPath, linkToFile); char *slash = strrchr(linkToFile, '/'); if (slash == NULL) errAbort("can't find slash in path %s\n", linkToFile); *slash = 0; makeDirsOnPath(linkToFile); *slash = '/'; if (link(docIdSub->submitPath, linkToFile) < 0) errnoAbort("can't link %s to file %s\n", docIdSub->submitPath, linkToFile); return docId; }
char *edwTempDir() /* Returns pointer to edwTempDir. This is shared, so please don't modify. */ { static char path[PATH_LEN]; if (path[0] == 0) { /* Note code elsewhere depends on tmp dir being inside of edwRootDir - also good * to have it there so move to a permanent file is quick and unlikely to fail. */ safef(path, sizeof(path), "%s%s", edwRootDir, "tmp"); makeDirsOnPath(path); strcat(path, "/"); } return path; }
void rewriteLevel(struct raLevel *level, char *outDir, struct lm *lm) /* Rewrite files in level. */ { struct raFile *file; if (level->fileList != NULL) makeDirsOnPath(outDir); for (file = level->fileList; file != NULL; file = file->next) { char outName[FILENAME_LEN], outExtension[FILEEXT_LEN]; splitPath(file->name, NULL, outName, outExtension); char outPath[PATH_LEN]; safef(outPath, sizeof(outPath), "%s/%s%s", outDir, outName, outExtension); rewriteFile(level, file, outPath, lm); } }
void edwToEap1(char *dir) /* edwToEap1 - Help transforme edw format analysis tables to eap formatted ones.. */ { makeDirsOnPath(dir); struct sqlConnection *conn = edwConnect(); struct edwAnalysisJob *jobList = edwAnalysisJobLoadByQuery(conn, "select * from edwAnalysisJob order by id"); char jobFile[PATH_LEN]; safef(jobFile, PATH_LEN, "%s/%s", dir, "eapJob.tab"); transformJobTable(conn, jobList, jobFile); struct edwAnalysisSoftware *swList = edwAnalysisSoftwareLoadByQuery(conn, "select * from edwAnalysisSoftware order by id"); struct hash *swHash = hashSwList(swList); char softwareFile[PATH_LEN], swVersionFile[PATH_LEN]; safef(softwareFile, PATH_LEN, "%s/%s", dir, "eapSoftware.tab"); safef(swVersionFile, PATH_LEN, "%s/%s", dir, "eapSwVersion.tab"); transformSoftwareTable(conn, swList, softwareFile, swVersionFile); struct edwAnalysisStep *stepList = edwAnalysisStepLoadByQuery(conn, "select * from edwAnalysisStep order by id"); struct hash *stepHash = hashStepList(stepList); verbose(1, "stepHash has %d els\n", stepHash->elCount); char stepFile[PATH_LEN], stepVersionFile[PATH_LEN], stepSoftwareFile[PATH_LEN]; safef(stepFile, PATH_LEN, "%s/%s", dir, "eapStep.tab"); safef(stepVersionFile, PATH_LEN, "%s/%s", dir, "eapStepVersion.tab"); safef(stepSoftwareFile, PATH_LEN, "%s/%s", dir, "eapStepSoftware.tab"); transformStepTable(conn, stepList, swHash, stepFile, stepVersionFile, stepSoftwareFile); char stepVersionSwVersionFile[PATH_LEN]; safef(stepVersionSwVersionFile, PATH_LEN, "%s/%s", dir, "eapStepSwVersion.tab"); versionVsVersion(conn, stepList, stepHash, swList, swHash, stepVersionSwVersionFile); struct edwAnalysisRun *runList = edwAnalysisRunLoadByQuery(conn, "select * from edwAnalysisRun order by id"); char analysisFile[PATH_LEN], inputFile[PATH_LEN], outputFile[PATH_LEN]; safef(analysisFile, PATH_LEN, "%s/%s", dir, "eapAnalysis.tab"); safef(inputFile, PATH_LEN, "%s/%s", dir, "eapInput.tab"); safef(outputFile, PATH_LEN, "%s/%s", dir, "eapOutput.tab"); transformRun(conn, runList, stepHash, analysisFile, inputFile, outputFile); if (optionExists("load")) { loadEapDb(dir); } }
char *edwTempDirForToday(char dir[PATH_LEN]) /* Fills in dir with temp dir of the day, and returns a pointer to it. */ { char dayDir[PATH_LEN]; edwDirForTime(edwNow(), dayDir); safef(dir, PATH_LEN, "%s%stmp/", edwRootDir, dayDir); /* Bracket time consuming call to makeDirsOnPath with check that we didn't just do same * thing. */ static char lastDayDir[PATH_LEN] = ""; if (!sameString(dayDir, lastDayDir)) { strcpy(lastDayDir, dayDir); int len = strlen(dir); dir[len-1] = 0; makeDirsOnPath(dir); dir[len-1] = '/'; } return dir; }
void edwMakeFileNameAndPath(int edwFileId, char *submitFileName, char edwFile[PATH_LEN], char serverPath[PATH_LEN]) /* Convert file id to local file name, and full file path. Make any directories needed * along serverPath. */ { /* Preserve suffix. Give ourselves up to two suffixes. */ char *suffix = edwFindDoubleFileSuffix(submitFileName); /* Figure out edw file name, starting with baseName. */ char baseName[32]; edwMakeBabyName(edwFileId, baseName, sizeof(baseName)); /* Figure out directory and make any components not already there. */ char edwDir[PATH_LEN]; edwDirForTime(edwNow(), edwDir); char uploadDir[PATH_LEN]; safef(uploadDir, sizeof(uploadDir), "%s%s", edwRootDir, edwDir); makeDirsOnPath(uploadDir); /* Figure out full file names */ safef(edwFile, PATH_LEN, "%s%s%s", edwDir, baseName, suffix); safef(serverPath, PATH_LEN, "%s%s", edwRootDir, edwFile); }
void verticalSplitSqlTable(char *oldTab, char *oldAs, char *splitSpec, char *outDir) /* verticalSplitSqlTable - Split a database table into two new related tables that share a field. */ { struct asObject *as = asParseFile(oldAs); if (as->next != NULL) errAbort("%d records in %s, only 1 allowed\n", slCount(as), oldAs); uglyf("Read %s from %s\n", as->name, oldAs); /* Read fields from splitSpec, and make sure there are no extra. */ struct hash *ra = raReadSingle(splitSpec); char *table1 = mustFindInSplitSpec("table1", ra, splitSpec); char *fields1 = mustFindInSplitSpec("fields1", ra, splitSpec); char *description1 = mustFindInSplitSpec("description1", ra, splitSpec); char *table2 = mustFindInSplitSpec("table2", ra, splitSpec); char *fields2 = mustFindInSplitSpec("fields2", ra, splitSpec); char *description2 = mustFindInSplitSpec("description2", ra, splitSpec); char *sharedKey = mustFindInSplitSpec("sharedKey", ra, splitSpec); if (ra->elCount > 7) errAbort("Extra fields in %s", splitSpec); /* Convert this=that strings to lists of pairs. */ struct slPair *fieldList1 = slPairFromString(fields1); struct slPair *fieldList2 = slPairFromString(fields2); /* Do some more checks */ if (sameString(table1, table2)) errAbort("Error: table1 and table2 are the same (%s) in %s", table1, splitSpec); checkSharedKeyInList(sharedKey, splitSpec, fields1, fieldList1); checkSharedKeyInList(sharedKey, splitSpec, fields2, fieldList2); struct asColumn *keyCol = asColumnFind(as, sharedKey); if (keyCol == NULL) errAbort("The sharedKey '%s' is not in %s", sharedKey, oldAs); /* Make sure that all fields in splitSpec are actually in the oldAs file. */ checkFieldsInAs(fieldList1, splitSpec, as, oldAs); checkFieldsInAs(fieldList2, splitSpec, as, oldAs); /* Make sure that all old table fields are covered */ if (!partialOk) { struct hash *covered = hashNew(0); struct slPair *field; for (field = fieldList1; field != NULL; field = field->next) hashAdd(covered, field->val, NULL); for (field = fieldList2; field != NULL; field = field->next) hashAdd(covered, field->val, NULL); struct asColumn *col; for (col = as->columnList; col != NULL; col = col->next) { if (!hashLookup(covered, col->name)) errAbort("Field %s in %s not output, use -partialOk flag if this is intentional", col->name, oldAs); } } /* Ok, input is checked, start on output.. */ if (lastChar(outDir) == '/') trimLastChar(outDir); makeDirsOnPath(outDir); /* Output .as files. */ outputPartialAs(as, table1, fieldList1, description1, outDir); outputPartialAs(as, table2, fieldList2, description2, outDir); /* Output first split file - a straight up subset of columns. */ char path[PATH_LEN]; safef(path, sizeof(path), "%s/%s.tab", outDir, table1); outputPartialTab(oldTab, as, fieldList1, path); /* Output second split file */ char errPath[PATH_LEN]; safef(path, sizeof(path), "%s/%s.tab", outDir, table2); safef(errPath, sizeof(path), "%s/mergeErrs.txt", outDir); outputUniqueOnSharedKey(oldTab, as, keyCol, fieldList2, path, errPath); }
void regCompanionEnhProCellSpecificPairs(char *enhBed, char *cellDescriptions, char *geneLevels, char *pairsIn, char *outDir) /* regCompanionEnhProCellSpecificPairs - Select enh/pro pairs that are seen in a given cell * lines. */ { /* Load up cell descriptions into cell array */ struct expRecord *cell, *cellList = expRecordLoadAll(cellDescriptions); int cellCount = slCount(cellList); struct expRecord **cellArray; AllocArray(cellArray, cellCount); int i; for (i=0, cell = cellList; i < cellCount; ++i, cell = cell->next) cellArray[i] = cell; verbose(2, "Got %d cells in %s\n", cellCount, cellDescriptions); /* Load up enhBed into a hash keyed by name */ struct bed *enh, *enhList; int fieldCount; bedLoadAllReturnFieldCount(enhBed, &enhList, &fieldCount); if (fieldCount != 15) errAbort("Expecting bed 15 format in %s", enhBed); struct hash *enhHash = hashNew(16); for (enh = enhList; enh != NULL; enh = enh->next) { if (enh->expCount != cellCount) errAbort("Inconsistent input: %d cells in %s, but %d in %s\n", cellCount, cellDescriptions, enh->expCount, enhBed); hashAddUnique(enhHash, enh->name, enh); } verbose(2, "Got %d enhancers in %s\n", enhHash->elCount, enhBed); /* Get a hash with key of gene name and value an array of expression values. */ struct hash *geneHash = hashGeneLevels(geneLevels, cellCount); verbose(2, "Got %d genes in %s\n", geneHash->elCount, geneLevels); /* Open inPairs.bed, just to make sure it's there before we do any output. */ struct lineFile *lf = lineFileOpen(pairsIn, TRUE); /* Remove trailing slash from output dir if any */ if (lastChar(outDir) == '/') { int len = strlen(outDir); outDir[len-1] = 0; } /* Make output directory and open all output files. */ makeDirsOnPath(outDir); FILE *outFiles[cellCount]; for (i=0, cell = cellList; i < cellCount; ++i, cell = cell->next) { char path[PATH_LEN]; safef(path, sizeof(path), "%s/%s.bed", outDir, cell->description); outFiles[i] = mustOpen(path, "w"); } /* Stream through input file and copy to appropriate outputs. */ char *words[bedKnownFields*2]; // Make a little bigger than any known bed int wordCount, wordsRequired = 0; char *separator = "->"; int separatorSize = strlen(separator); int pairCount = 0; while ((wordCount = lineFileChop(lf, words)) != 0) { /* Make sure all lines have same # of fields, and at least 4. */ if (wordsRequired == 0) { wordsRequired = wordCount; lineFileExpectAtLeast(lf, 4, wordCount); } else lineFileExpectWords(lf, wordsRequired, wordCount); ++pairCount; /* Parse out name field. */ char *name = words[3]; char *sepPos = stringIn(separator, name); if (sepPos == NULL) errAbort("Expecting %s in %s line %d of %s", separator, name, lf->lineIx, lf->fileName); char *enhName = cloneStringZ(name, sepPos-name); char *geneName = sepPos + separatorSize; /* Look up enhancer and gene. */ enh = hashMustFindVal(enhHash, enhName); double *geneLevels = hashMustFindVal(geneHash, geneName); freez(&enhName); /* Output ones over minimum levels. */ for (i=0; i < cellCount; ++i) { double enhLevel = enh->expScores[i]; double geneLevel = geneLevels[i]; if (enhLevel >= minAct && geneLevel >= minExp) { int j; FILE *f = outFiles[i]; fprintf(f, "%s", words[0]); for (j=1; j<wordCount; ++j) fprintf(f, "\t%s", words[j]); fprintf(f, "\n"); } } } verbose(2, "Got %d pairs in %s\n", pairCount, pairsIn); /* Clean up. */ lineFileClose(&lf); for (i=0; i<cellCount; ++i) carefulClose(&outFiles[i]); }
void cdwFakeManifestFromSubmit(char *submitIdString, char *outDir) /* cdwFakeManifestFromSubmit - Create a fake submission based on a real one that is in the warehouse. */ { struct sqlConnection *conn = cdwConnect(); char query[512]; sqlSafef(query, sizeof(query), "select * from cdwSubmit where id=%s", submitIdString); struct cdwSubmit *submit = cdwSubmitLoadByQuery(conn, query); if (submit == NULL) errAbort("Can't find submission %s", submitIdString); uglyf("%d files in query\n", submit->newFiles); sqlSafef(query, sizeof(query), "select * from cdwFile where submitId=%s", submitIdString); struct cdwFile *ef, *efList = cdwFileLoadByQuery(conn, query); FILE *maniF = NULL, *valiF = NULL; for (ef = efList; ef != NULL; ef = ef->next) { struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id); if (vf != NULL) { /* First time through create out directory and open output files. */ if (maniF == NULL) { char *fakeVersion = "##validateManifest version 1.7"; makeDirsOnPath(outDir); setCurrentDir(outDir); maniF = mustOpen("manifest.txt", "w"); printSharedHeader(maniF); fprintf(maniF, "\n"); fprintf(maniF, "%s\n", fakeVersion); valiF = mustOpen("validated.txt", "w"); printSharedHeader(valiF); fprintf(valiF, "\tmd5_sum\tsize\tmodified\tvalid_key\n"); fprintf(valiF, "%s\n", fakeVersion); } /* Figure out file names */ char cdwPath[PATH_LEN], rootName[FILENAME_LEN], ext[FILEEXT_LEN]; safef(cdwPath, sizeof(cdwPath), "%s%s", cdwRootDir, ef->cdwFileName); splitPath(ef->cdwFileName, NULL, rootName, ext); char localPath[PATH_LEN]; safef(localPath, sizeof(localPath), "%s%s", rootName, ext); /* Create sym-linked file and write to manifest */ symlink(cdwPath, localPath); fprint2(maniF, valiF, "%s", localPath); /* Write other columns shared between manifest and validated */ fprint2(maniF, valiF, "\t%s", vf->format); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->outputType)); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->experiment)); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->enrichedIn)); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->ucscDb)); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->replicate)); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->part)); fprint2(maniF, valiF, "\t%s", naForEmpty(vf->pairedEnd)); fprintf(maniF, "\n"); /* Print out remaining fields in validated.txt */ fprintf(valiF, "\t%s\t%lld\t%lld\n", ef->md5, ef->size, ef->updateTime); } } carefulClose(&maniF); carefulClose(&valiF); }
void hcaStormToBundles(char *inTags, char *dataUrl, char *schemaFile, char *outDir) /* hcaStormToBundles - Convert a HCA formatted tagStorm to a directory full of bundles.. */ { /* Check that have full path name for dataFileDir */ if (sameString("urls", dataUrl)) gUrls = TRUE; else if (!stringIn("://", dataUrl)) errAbort("data file directory must be a url."); /* Load up schema and put it in hash */ struct tagSchema *schemaList = tagSchemaFromFile(schemaFile); struct hash *schemaHash = tagSchemaHash(schemaList); /* Load up tagStorm get leaf list */ struct tagStorm *storm = tagStormFromFile(inTags); struct tagStanzaRef *refList = tagStormListLeaves(storm); verbose(1, "Got %d leaf nodes in %s\n", slCount(refList), inTags); /* Add in assay.sample_id as just a dupe of sample.id */ dupeValToNewTag(storm, storm->forest, "sample.id", "assay.sample_id"); dupeValToNewTag(storm, storm->forest, "project.id", "sample.project_id"); addMissingUuids(storm, "assay.seq.ena_experiment", "assay.id", FALSE); addMissingUuids(storm, "assay.seq.sra_experiment", "assay.id", FALSE); /* Do some figuring based on all fields available of what objects to make */ struct slName *allFields = tagStormFieldList(storm); verbose(1, "Got %d fields in %s\n", slCount(allFields), inTags); struct slName *topLevelList = ttjUniqToDotList(allFields, NULL, 0); verbose(1, "Got %d top level objects\n", slCount(topLevelList)); /* Make list of objects */ struct slName *topEl; struct ttjSubObj *objList = NULL; for (topEl = topLevelList; topEl != NULL; topEl = topEl->next) { verbose(1, " %s\n", topEl->name); struct ttjSubObj *obj = ttjMakeSubObj(allFields, topEl->name, topEl->name); slAddHead(&objList, obj); } /* Loop through stanzas making bundles */ struct tagStanzaRef *ref; int bundleIx = 0; makeDirsOnPath(outDir); for (ref = refList; ref != NULL; ref = ref->next) { /* Fetch stanza and comma-separated list of files. */ struct tagStanza *stanza = ref->stanza; char *fileCsv = tagFindVal(stanza, "assay.seq.files"); if (fileCsv == NULL) errAbort("Stanza without a files tag. Stanza starts line %d of %s", stanza->startLineIx, inTags); /* Make subdirectory for bundle */ ++bundleIx; char bundleDir[PATH_LEN]; safef(bundleDir, sizeof(bundleDir), "%s/bundle%d", outDir, bundleIx); makeDir(bundleDir); /* Make symbolic link of all files */ char localUrl[PATH_LEN*2]; if (gUrls) { struct slName *fileList = tagMustFindValList(stanza, "assay.seq.files"); splitPath(fileList->name, localUrl, NULL, NULL); dataUrl = localUrl; slFreeList(&fileList); } makeBundleJson(storm, bundleDir, stanza, objList, dataUrl, schemaHash); } verbose(1, "wrote json files into %s/bundle* dirs\n", outDir); }