void makeTmpSai(struct sqlConnection *conn, struct cdwValidFile *vf, char *genoFile, char **retSampleFile, char **retSaiFile) /* Given a fastq file, make a subsample of it 100k reads long and align it with * bwa producing a sai file of given name. */ { /* Get fastq record */ long long fileId = vf->fileId; struct cdwFastqFile *fqf = cdwFastqFileFromFileId(conn, fileId); if (fqf == NULL) errAbort("No cdwFastqFile record for file id %lld", fileId); /* Create downsampled fastq in temp directory - downsampled more than default even. */ char sampleFastqName[PATH_LEN]; cdwMakeTempFastqSample(fqf->sampleFileName, FASTQ_SAMPLE_SIZE, sampleFastqName); verbose(1, "downsampled %s into %s\n", vf->licensePlate, sampleFastqName); /* Do alignment */ char cmd[3*PATH_LEN]; char *saiName = cloneString(rTempName(cdwTempDir(), "cdwPairSample", ".sai")); safef(cmd, sizeof(cmd), "bwa aln -t 3 %s %s > %s", genoFile, sampleFastqName, saiName); mustSystem(cmd); /* Save return variables, clean up, and go home. */ *retSampleFile = cloneString(sampleFastqName); *retSaiFile = saiName; cdwFastqFileFree(&fqf); }
void faCat(char *inFile, char *outFile, char *liftFile) /* faCat - Filter out fa records that don't match expression. */ { char *tempFile = rTempName("/tmp", "lift", ".lft"); struct lineFile *inLf = lineFileOpen(inFile, TRUE); FILE *outFh = NULL; FILE *tempFh = mustOpen(tempFile, "w"); DNA *seq; int seqSize; char *seqHeader; long int offset = 0; char *gap = NULL; int i, fileIndex = 1; char nameNew[512]; char outFileName[512]; char liftFileName[512]; char fastaHeader[512]; safef(nameNew,sizeof(nameNew), "%s.%d",name, fileIndex); safef(fastaHeader,sizeof(fastaHeader),">%s\n",nameNew); safef(outFileName, sizeof(outFileName), "%s.%d.fa",outFile, fileIndex); safef(liftFileName, sizeof(liftFileName), "%s.%d.lft",liftFile, fileIndex++); outFh = mustOpen(outFileName, "w"); gap = needMem(gapSize+1); for (i = 0 ; i < gapSize ; i++) { gap[i] = 'N'; } gap[i] = '\0'; mustWrite(outFh, fastaHeader, strlen(fastaHeader)); while (faMixedSpeedReadNext(inLf, &seq, &seqSize, &seqHeader)) { // if (vOption ^ recMatches(seq, seqSize, seqHeader)) // faWriteNext(outFh, seqHeader, seq, seqSize); /* output lift record: offset oldName oldSize newName newSize */ fprintf(tempFh, "%ld\t%s\t%d\t%s\t%d\n",offset, nameNew, 0, seqHeader, seqSize); offset += (seqSize + gapSize); writeSeqWithBreaks(outFh, seq, seqSize, 50); writeSeqWithBreaks(outFh, gap, gapSize, 50); if (offset > maxOutputSize) { carefulClose(&tempFh); carefulClose(&outFh); fixNewLength(tempFile, liftFileName, offset); tempFh = mustOpen(tempFile, "w"); safef(nameNew,sizeof(nameNew), "%s.%d",name, fileIndex); safef(fastaHeader,sizeof(fastaHeader),">%s\n",nameNew); safef(liftFileName, sizeof(liftFileName), "%s.%d.lft",liftFile, fileIndex); safef(outFileName, sizeof(outFileName), "%s.%d.fa",outFile, fileIndex++); outFh = mustOpen(outFileName, "w"); mustWrite(outFh, fastaHeader, strlen(fastaHeader)); offset = 0; } } carefulClose(&tempFh); fixNewLength(tempFile, liftFileName, offset); lineFileClose(&inLf); carefulClose(&outFh); unlink(tempFile); }
void fastqRepeatQa(struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf) /* Do repeat QA if possible on fastq file. */ { /* First see if total repeat content is already in our table, in which case we are done. */ long long fileId = ef->id; char query[512]; sqlSafef(query, sizeof(query), "select count(*) from cdwQaRepeat where fileId=%lld and repeatClass='total'" , fileId); if (sqlQuickNum(conn, query) != 0) return; /* We've done this already */ /* Get sample file name from fastq table. */ struct cdwFastqFile *fqf = cdwFastqFileForFileId(conn, fileId); if (fqf == NULL) errAbort("No edqFastqRecord for %s", vf->licensePlate); char *fastqPath = fqf->sampleFileName; char bwaIndex[PATH_LEN]; safef(bwaIndex, sizeof(bwaIndex), "%s%s/repeatMasker/repeatMasker.fa", cdwValDataDir, vf->ucscDb); char cmd[3*PATH_LEN]; char *saiName = cloneString(rTempName(cdwTempDir(), "cdwQaRepeat", ".sai")); safef(cmd, sizeof(cmd), "bwa aln %s %s > %s", bwaIndex, fastqPath, saiName); mustSystem(cmd); char *samName = cloneString(rTempName(cdwTempDir(), "cdwQaRepeat", ".sam")); safef(cmd, sizeof(cmd), "bwa samse %s %s %s > %s", bwaIndex, saiName, fastqPath, samName); mustSystem(cmd); remove(saiName); char *raName = cloneString(rTempName(cdwTempDir(), "cdwQaRepeat", ".ra")); safef(cmd, sizeof(cmd), "edwSamRepeatAnalysis %s %s", samName, raName); mustSystem(cmd); verbose(2, "mustSystem(%s)\n", cmd); remove(samName); raIntoCdwRepeatQa(raName, conn, fileId); remove(raName); #ifdef SOON #endif /* SOON */ freez(&saiName); freez(&samName); freez(&raName); cdwFastqFileFree(&fqf); }
char *veryTempName(char *dir, char *base, char *suffix) /* Make a temp name that should be uniq on file system */ { static int id = 0; char rebase[128]; safef(rebase, sizeof(rebase), "%d_%s", ++id, base); return cloneString(rTempName(dir, rebase, suffix)); }
static void _makeTempName(struct tempName *tn, char *base, char *suffix) /* Figure out a temp name, and how CGI and HTML will access it. */ { char *tname; tname = rTempName(__trashDir, base, suffix); strcpy(tn->forCgi, tname); strcpy(tn->forHtml, tname); }
void edwAlignFastqMakeBed(struct edwFile *ef, struct edwAssembly *assembly, char *fastqPath, struct edwValidFile *vf, FILE *bedF, double *retMapRatio, double *retDepth, double *retSampleCoverage) /* Take a sample fastq and run bwa on it, and then convert that file to a bed. * bedF and all the ret parameters can be NULL. */ { /* Hmm, tried doing this with Mark's pipeline code, but somehow it would be flaky the * second time it was run in same app. Resorting therefore to temp files. */ char genoFile[PATH_LEN]; safef(genoFile, sizeof(genoFile), "%s%s/bwaData/%s.fa", edwValDataDir, assembly->ucscDb, assembly->ucscDb); char cmd[3*PATH_LEN]; char *saiName = cloneString(rTempName(edwTempDir(), "edwSample1", ".sai")); safef(cmd, sizeof(cmd), "bwa aln -t 3 %s %s > %s", genoFile, fastqPath, saiName); mustSystem(cmd); char *samName = cloneString(rTempName(edwTempDir(), "ewdSample1", ".sam")); safef(cmd, sizeof(cmd), "bwa samse %s %s %s > %s", genoFile, saiName, fastqPath, samName); mustSystem(cmd); remove(saiName); /* Scan sam file to calculate vf->mapRatio, vf->sampleCoverage and vf->depth. * and also to produce little bed file for enrichment step. */ struct genomeRangeTree *grt = genomeRangeTreeNew(); long long hitCount=0, missCount=0, totalBasesInHits=0; scanSam(samName, bedF, grt, &hitCount, &missCount, &totalBasesInHits); verbose(1, "hitCount=%lld, missCount=%lld, totalBasesInHits=%lld, grt=%p\n", hitCount, missCount, totalBasesInHits, grt); if (retMapRatio) *retMapRatio = (double)hitCount/(hitCount+missCount); if (retDepth) *retDepth = (double)totalBasesInHits/assembly->baseCount * (double)vf->itemCount/vf->sampleCount; long long basesHitBySample = genomeRangeTreeSumRanges(grt); if (retSampleCoverage) *retSampleCoverage = (double)basesHitBySample/assembly->baseCount; genomeRangeTreeFree(&grt); remove(samName); }
int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, optionSpecs); saveId = optionExists("saveId"); inputList = optionVal("inputList",inputList); tempDir = optionVal("tempDir",tempDir); if ((argc < 2 && !inputList) || (argc > 1 && inputList)) usage(); if (tempDir[0]!=0 && lastChar(tempDir) != '/') tempDir = addSuffix(tempDir,"/"); if (argc-1 <= MAXFILES && !inputList) { chainMergeSort(argc-1, argv+1, stdout, 0); } else { char inp0[PATH_LEN]; safecpy(inp0, sizeof(inp0), rTempName(tempDir, "inputList0-", ".tmp")); if (!inputList) { FILE *f = mustOpen(inp0,"w"); int i=0; for (i=1; i<argc; ++i) { fprintf(f, "%s\n", argv[i]); } carefulClose(&f); inputList = inp0; } hierSort(inputList); if (sameString(inputList,inp0)) remove(inp0); } return 0; }
void gitReports() /* Generate code-review reports from git repo */ { int totalChangedLines = 0; int totalChangedFiles = 0; int userChangedLines = 0; int userChangedFiles = 0; tempMakeDiffName = cloneString(rTempName("/tmp", "makeDiff", ".tmp")); /* read the commits */ struct commit *commits = getCommits(), *c = NULL; /* make the user list */ for(c = commits; c; c = c->next) { if (!hashLookup(userHash, c->author)) { hashStore(userHash, c->author); struct slName *name = newSlName(c->author); slAddHead(&users, name); } } slNameSort(&users); /* create prefix dir */ char path[256]; safef(path, sizeof(path), "%s/%s", outDir, outPrefix); makeMyDir(path); /* create file dir */ safef(path, sizeof(path), "%s/%s/%s", outDir, outPrefix, "file"); makeMyDir(path); /* create user dir */ safef(path, sizeof(path), "%s/%s/%s", outDir, outPrefix, "user"); makeMyDir(path); char usersPath[1024]; safef(usersPath, sizeof(usersPath), "%s/%s/%s/index.html", outDir, outPrefix, "user"); FILE *h = mustOpen(usersPath, "w"); fprintf(h, "<html>\n<head>\n<title>Changes By User</title>\n</head>\n</body>\n"); fprintf(h, "<h1>Changes By User</h1>\n"); fprintf(h, "<h2>%s to %s (%s to %s) %s</h2>\n", startTag, endTag, startDate, endDate, title); fprintf(h, "<ul>\n"); struct slName*u; for(u = users; u; u = u->next) { printf("user: %s\n", u->name); /* create user/name dir */ safef(path, sizeof(path), "%s/%s/%s/%s", outDir, outPrefix, "user", u->name); makeMyDir(path); /* create user/name/context dir */ safef(path, sizeof(path), "%s/%s/%s/%s/%s", outDir, outPrefix, "user", u->name, "context"); makeMyDir(path); /* create user/name/full dir */ safef(path, sizeof(path), "%s/%s/%s/%s/%s", outDir, outPrefix, "user", u->name, "full"); makeMyDir(path); userChangedLines = 0; userChangedFiles = 0; /* make user's reports */ doUserCommits(u->name, commits, &userChangedLines, &userChangedFiles); doUserFiles(u->name, commits); char relPath[1024]; safef(relPath, sizeof(relPath), "%s/index.html", u->name); fprintf(h, "<li> <A href=\"%s\">%s</A> - changed lines: %d, files: %d</li>\n", relPath, u->name, userChangedLines, userChangedFiles); totalChangedLines += userChangedLines; totalChangedFiles += userChangedFiles; } fprintf(h, "</ul>\n"); if (u) { fprintf(h, "switch to <A href=\"index.html\">commits view</A>, <A href=\"../index.html\">user index</A>"); } else { fprintf(h, "<ul>\n"); fprintf(h, "<li> lines changed: %d</li>\n", totalChangedLines); fprintf(h, "<li> files changed: %d</li>\n", totalChangedFiles); fprintf(h, "</ul>\n"); } fprintf(h, "</body>\n</html>\n"); fclose(h); // make index of all files view doUserFiles(NULL, commits); // make main index page doMainIndex(); // tidying up unlink(tempMakeDiffName); freez(&tempMakeDiffName); }
void hierSort(char *inputList) /* Do a hierarchical merge sort so we don't run out of system file handles */ { int level = 0; char thisName[PATH_LEN]; char nextName[PATH_LEN]; char sortName[PATH_LEN]; char tmpNameBuf[PATH_LEN]; struct lineFile *thisLf = NULL; FILE *nextF = NULL; int sortCount = 0; FILE *sortF = NULL; int fileCount = 0; char *files[MAXFILES]; boolean more = FALSE; int block=0; char *line=NULL; safef(nextName, sizeof(nextName), "%s", inputList); do { block=0; safef(thisName, sizeof(thisName), "%s", nextName); safef(tmpNameBuf, sizeof(tmpNameBuf), "inputList%d-", level+1); safecpy(nextName, sizeof(nextName), rTempName(tempDir, tmpNameBuf, ".tmp")); thisLf = lineFileOpen(thisName,TRUE); if (!thisLf) errAbort("error lineFileOpen(%s) returned NULL\n",thisName); more = lineFileNext(thisLf, &line, NULL); while (more) { int i=0; fileCount = 0; while (more && fileCount < MAXFILES) { files[fileCount++]=cloneString(line); more = lineFileNext(thisLf, &line, NULL); } if (!more && block==0) { /* last level */ sortF = stdout; } else { if (!nextF) nextF = mustOpen(nextName,"w"); safef(tmpNameBuf, sizeof(tmpNameBuf), "sort%d-", sortCount++); safecpy(sortName, sizeof(sortName), rTempName(tempDir, tmpNameBuf, ".tmp")); fprintf(nextF, "%s\n", sortName); sortF = mustOpen(sortName,"w"); } chainMergeSort(fileCount, files, sortF, level); if (sortF != stdout) carefulClose(&sortF); for(i=0;i<fileCount;++i) freez(&files[i]); verboseDot(); verbose(2,"block=%d\n",block); ++block; } lineFileClose(&thisLf); if (nextF) carefulClose(&nextF); if (level > 0) { remove(thisName); } verbose(1,"\n"); verbose(2,"level=%d, block=%d\n",level,block); ++level; } while (block > 1); }
static void loadDatabase(char *database, char *track, int bedSize, struct bedStub *bedList) /* Load database from bedList. */ { struct sqlConnection *conn; struct dyString *dy = newDyString(1024); char *tab = (char *)NULL; int loadOptions = (optionExists("onServer") ? SQL_TAB_FILE_ON_SERVER : 0); if ( ! noLoad ) conn = sqlConnect(database); if ((char *)NULL != tmpDir) tab = cloneString(rTempName(tmpDir,"loadBed",".tab")); else tab = cloneString("bed.tab"); if (bedDetail && sqlTable == NULL) errAbort("bedDetail format requires sqlTable option"); if (bedDetail && !strictTab) errAbort("bedDetail format must be tab separated"); if (bedDetail && !noBin) noBin = TRUE; /* First make table definition. */ if (sqlTable != NULL && !oldTable) { /* Read from file. */ char *sql, *s; readInGulp(sqlTable, &sql, NULL); /* Chop off end-of-statement semicolon if need be. */ s = strchr(sql, ';'); if (s != NULL) *s = 0; if ( !noLoad ) { if (renameSqlTable) { char *pos = stringIn("CREATE TABLE ", sql); if (pos == NULL) errAbort("Can't find CREATE TABLE in %s\n", sqlTable); char *oldSql = cloneString(sql); nextWord(&pos); nextWord(&pos); char *tableName = nextWord(&pos); sql = replaceChars(oldSql, tableName, track); } verbose(1, "Creating table definition for %s\n", track); sqlRemakeTable(conn, track, sql); if (!noBin) addBinToEmptyTable(conn, track); adjustSqlTableColumns(conn, track, bedSize); } freez(&sql); } else if (!oldTable) { int minLength; if (noLoad) minLength=6; else if (maxChromNameLength) minLength = maxChromNameLength; else minLength = hGetMinIndexLength(database); verbose(2, "INDEX chrom length: %d\n", minLength); /* Create definition statement. */ verbose(1, "Creating table definition for %s\n", track); dyStringPrintf(dy, "CREATE TABLE %s (\n", track); if (!noBin) dyStringAppend(dy, " bin smallint unsigned not null,\n"); dyStringAppend(dy, " chrom varchar(255) not null,\n"); dyStringAppend(dy, " chromStart int unsigned not null,\n"); dyStringAppend(dy, " chromEnd int unsigned not null,\n"); if (bedSize >= 4) maybeBedGraph(4, dy, " name varchar(255) not null,\n"); if (bedSize >= 5) { if (allowNegativeScores) maybeBedGraph(5, dy, " score int not null,\n"); else maybeBedGraph(5, dy, " score int unsigned not null,\n"); } if (bedSize >= 6) maybeBedGraph(6, dy, " strand char(1) not null,\n"); if (bedSize >= 7) maybeBedGraph(7, dy, " thickStart int unsigned not null,\n"); if (bedSize >= 8) maybeBedGraph(8, dy, " thickEnd int unsigned not null,\n"); /* As of 2004-11-22 the reserved field is used as itemRgb in code */ if (bedSize >= 9) maybeBedGraph(9, dy, " reserved int unsigned not null,\n"); if (bedSize >= 10) maybeBedGraph(10, dy, " blockCount int unsigned not null,\n"); if (bedSize >= 11) maybeBedGraph(11, dy, " blockSizes longblob not null,\n"); if (bedSize >= 12) maybeBedGraph(12, dy, " chromStarts longblob not null,\n"); if (bedSize >= 13) maybeBedGraph(13, dy, " expCount int unsigned not null,\n"); if (bedSize >= 14) maybeBedGraph(14, dy, " expIds longblob not null,\n"); if (bedSize >= 15) maybeBedGraph(15, dy, " expScores longblob not null,\n"); dyStringAppend(dy, "#Indices\n"); if (nameIx && (bedSize >= 4) && (0 == bedGraph)) dyStringAppend(dy, " INDEX(name(16)),\n"); if (noBin) { dyStringPrintf(dy, " INDEX(chrom(%d),chromStart)\n", minLength); } else { dyStringPrintf(dy, " INDEX(chrom(%d),bin)\n", minLength); } dyStringAppend(dy, ")\n"); if (noLoad) verbose(2,"%s", dy->string); else sqlRemakeTable(conn, track, dy->string); } verbose(1, "Saving %s\n", tab); writeBedTab(tab, bedList, bedSize); if ( ! noLoad ) { verbose(1, "Loading %s\n", database); if (customTrackLoader) sqlLoadTabFile(conn, tab, track, loadOptions|SQL_TAB_FILE_WARN_ON_WARN); else sqlLoadTabFile(conn, tab, track, loadOptions); if (! noHistory) { char comment[256]; /* add a comment to the history table and finish up connection */ safef(comment, sizeof(comment), "Add %d element(s) from bed list to %s table", slCount(bedList), track); hgHistoryComment(conn, comment); } if(fillInScoreColumn != NULL) { char query[500]; char buf[500]; struct sqlResult *sr; safef(query, sizeof(query), "select sum(score) from %s", track); if(sqlQuickQuery(conn, query, buf, sizeof(buf))) { unsigned sum = sqlUnsigned(buf); if (!sum) { safef(query, sizeof(query), "select min(%s), max(%s) from %s", fillInScoreColumn, fillInScoreColumn, track); if ((sr = sqlGetResult(conn, query)) != NULL) { char **row = sqlNextRow(sr); if(row != NULL) { float min = sqlFloat(row[0]); float max = sqlFloat(row[1]); if ( !(max == -1 && min == -1)) // if score is -1 then ignore, as if it werent present { if (max == min || sameString(row[0],row[1])) // this will lead to 'inf' score value in SQL update causing an error errAbort("Could not set score in table %s max(%s)=min(%s)=%s\n", track, fillInScoreColumn, fillInScoreColumn, row[0]); sqlFreeResult(&sr); // Calculate a, b s/t f(x) = ax + b maps min-max => minScore-1000 float a = (1000-minScore) / (max - min); float b = 1000 - ((1000-minScore) * max) / (max - min); safef(query, sizeof(query), "update %s set score = round((%f * %s) + %f)", track, a, fillInScoreColumn, b); int changed = sqlUpdateRows(conn, query, NULL); verbose(2, "update query: %s; changed: %d\n", query, changed); } else { sqlFreeResult(&sr); verbose(2, "score not updated; all values for column %s are -1\n", fillInScoreColumn); } } } } } } sqlDisconnect(&conn); /* if temp dir specified, unlink file to make it disappear */ if ((char *)NULL != tmpDir) unlink(tab); } else verbose(1, "No load option selected, see file: %s\n", tab); } /* static void loadDatabase() */
void pairedEndQa(struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf) /* Look for other end, do a pairwise alignment, and save results in database. */ { verbose(2, "pairedEndQa on %u %s %s\n", ef->id, ef->cdwFileName, ef->submitFileName); /* Get other end, return if not found. */ struct cdwValidFile *otherVf = cdwOppositePairedEnd(conn, ef, vf); if (otherVf == NULL) return; if (otherVf->fileId > vf->fileId) return; struct cdwValidFile *vf1, *vf2; struct cdwQaPairedEndFastq *pair = cdwQaPairedEndFastqFromVfs(conn, vf, otherVf, &vf1, &vf2); if (pair != NULL) { cdwValidFileFree(&otherVf); return; } /* Get target assembly and figure out path for BWA index. */ struct cdwAssembly *assembly = cdwAssemblyForUcscDb(conn, vf->ucscDb); assert(assembly != NULL); char genoFile[PATH_LEN]; safef(genoFile, sizeof(genoFile), "%s%s/bwaData/%s.fa", cdwValDataDir, assembly->ucscDb, assembly->ucscDb); verbose(1, "aligning subsamples on %u vs. %u paired reads\n", vf1->fileId, vf2->fileId); /* Make alignments of subsamples. */ char *sample1 = NULL, *sample2 = NULL, *sai1 = NULL, *sai2 = NULL; makeTmpSai(conn, vf1, genoFile, &sample1, &sai1); makeTmpSai(conn, vf2, genoFile, &sample2, &sai2); /* Make paired end alignment */ char *tmpSam = cloneString(rTempName(cdwTempDir(), "cdwPairSample", ".sam")); char command[6*PATH_LEN]; safef(command, sizeof(command), "bwa sampe -n 1 -N 1 -f %s %s %s %s %s %s" , tmpSam, genoFile, sai1, sai2, sample1, sample2); mustSystem(command); /* Make ra file with pairing statistics */ char *tmpRa = cloneString(rTempName(cdwTempDir(), "cdwPairSample", ".ra")); safef(command, sizeof(command), "edwSamPairedEndStats -maxInsert=%d %s %s", maxInsert, tmpSam, tmpRa); mustSystem(command); /* Read RA file into variables. */ struct cdwQaPairedEndFastq *pe = cdwQaPairedEndFastqOneFromRa(tmpRa); /* Update database with record. */ struct sqlConnection *freshConn = cdwConnectReadWrite(); char query[256]; sqlSafef(query, sizeof(query), "insert into cdwQaPairedEndFastq " "(fileId1,fileId2,concordance,distanceMean,distanceStd,distanceMin,distanceMax,recordComplete) " " values (%u,%u,%g,%g,%g,%g,%g,1)" , vf1->fileId, vf2->fileId, pe->concordance, pe->distanceMean , pe->distanceStd, pe->distanceMin, pe->distanceMax); sqlUpdate(conn, query); sqlDisconnect(&freshConn); /* Clean up and go home. */ cdwValidFileFree(&otherVf); remove(sample1); remove(sample2); remove(sai1); remove(sai2); remove(tmpSam); remove(tmpRa); #ifdef SOON #endif /* SOON */ freez(&sample1); freez(&sample2); freez(&sai1); freez(&sai2); freez(&tmpSam); freez(&tmpRa); cdwQaPairedEndFastqFree(&pe); cdwValidFileFree(&otherVf); }