void pairedEndQa(struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf) /* Look for other end, do a pairwise alignment, and save results in database. */ { verbose(2, "pairedEndQa on %u %s %s\n", ef->id, ef->cdwFileName, ef->submitFileName); /* Get other end, return if not found. */ struct cdwValidFile *otherVf = cdwOppositePairedEnd(conn, ef, vf); if (otherVf == NULL) return; if (otherVf->fileId > vf->fileId) return; struct cdwValidFile *vf1, *vf2; struct cdwQaPairedEndFastq *pair = cdwQaPairedEndFastqFromVfs(conn, vf, otherVf, &vf1, &vf2); if (pair != NULL) { cdwValidFileFree(&otherVf); return; } /* Get target assembly and figure out path for BWA index. */ struct cdwAssembly *assembly = cdwAssemblyForUcscDb(conn, vf->ucscDb); assert(assembly != NULL); char genoFile[PATH_LEN]; safef(genoFile, sizeof(genoFile), "%s%s/bwaData/%s.fa", cdwValDataDir, assembly->ucscDb, assembly->ucscDb); verbose(1, "aligning subsamples on %u vs. %u paired reads\n", vf1->fileId, vf2->fileId); /* Make alignments of subsamples. */ char *sample1 = NULL, *sample2 = NULL, *sai1 = NULL, *sai2 = NULL; makeTmpSai(conn, vf1, genoFile, &sample1, &sai1); makeTmpSai(conn, vf2, genoFile, &sample2, &sai2); /* Make paired end alignment */ char *tmpSam = cloneString(rTempName(cdwTempDir(), "cdwPairSample", ".sam")); char command[6*PATH_LEN]; safef(command, sizeof(command), "bwa sampe -n 1 -N 1 -f %s %s %s %s %s %s" , tmpSam, genoFile, sai1, sai2, sample1, sample2); mustSystem(command); /* Make ra file with pairing statistics */ char *tmpRa = cloneString(rTempName(cdwTempDir(), "cdwPairSample", ".ra")); safef(command, sizeof(command), "edwSamPairedEndStats -maxInsert=%d %s %s", maxInsert, tmpSam, tmpRa); mustSystem(command); /* Read RA file into variables. */ struct cdwQaPairedEndFastq *pe = cdwQaPairedEndFastqOneFromRa(tmpRa); /* Update database with record. */ struct sqlConnection *freshConn = cdwConnectReadWrite(); char query[256]; sqlSafef(query, sizeof(query), "insert into cdwQaPairedEndFastq " "(fileId1,fileId2,concordance,distanceMean,distanceStd,distanceMin,distanceMax,recordComplete) " " values (%u,%u,%g,%g,%g,%g,%g,1)" , vf1->fileId, vf2->fileId, pe->concordance, pe->distanceMean , pe->distanceStd, pe->distanceMin, pe->distanceMax); sqlUpdate(conn, query); sqlDisconnect(&freshConn); /* Clean up and go home. */ cdwValidFileFree(&otherVf); remove(sample1); remove(sample2); remove(sai1); remove(sai2); remove(tmpSam); remove(tmpRa); #ifdef SOON #endif /* SOON */ freez(&sample1); freez(&sample2); freez(&sai1); freez(&sai2); freez(&tmpSam); freez(&tmpRa); cdwQaPairedEndFastqFree(&pe); cdwValidFileFree(&otherVf); }
void doClusterMotifDetails(struct sqlConnection *conn, struct trackDb *tdb, struct factorSource *cluster) /* Display details about TF binding motif(s) in cluster */ { char *motifTable = trackDbSetting(tdb, "motifTable"); // localizations char *motifPwmTable = trackDbSetting(tdb, "motifPwmTable"); // PWM used to draw sequence logo char *motifMapTable = trackDbSetting(tdb, "motifMapTable"); // map target to motif struct slName *motifNames = NULL, *mn; // list of canonical motifs for the factor struct dnaMotif *motif = NULL; struct bed6FloatScore *hit = NULL, *maxHit = NULL; char **row; char query[256]; if (motifTable != NULL && sqlTableExists(conn, motifTable)) { struct sqlResult *sr; int rowOffset; char where[256]; if (motifMapTable == NULL || !sqlTableExists(conn, motifMapTable)) { // Assume cluster name is motif name if there is no map table motifNames = slNameNew(cluster->name); } else { sqlSafef(query, sizeof(query), "select motif from %s where target = '%s'", motifMapTable, cluster->name); char *ret = sqlQuickString(conn, query); if (ret == NULL) { // missing target from table -- no canonical motif webNewEmptySection(); return; } motifNames = slNameListFromString(ret, ','); } for (mn = motifNames; mn != NULL; mn = mn->next) { sqlSafefFrag(where, sizeof(where), "name='%s' order by score desc limit 1", mn->name); sr = hRangeQuery(conn, motifTable, cluster->chrom, cluster->chromStart, cluster->chromEnd, where, &rowOffset); if ((row = sqlNextRow(sr)) != NULL) { hit = bed6FloatScoreLoad(row + rowOffset); if (maxHit == NULL || maxHit->score < hit->score) maxHit = hit; } sqlFreeResult(&sr); } } if (maxHit == NULL) { // Maintain table layout webNewEmptySection(); return; } hit = maxHit; webNewSection("Canonical Motif in Cluster"); char posLink[1024]; safef(posLink, sizeof(posLink),"<a href=\"%s&db=%s&position=%s%%3A%d-%d\">%s:%d-%d</a>", hgTracksPathAndSettings(), database, cluster->chrom, hit->chromStart+1, hit->chromEnd, cluster->chrom, hit->chromStart+1, hit->chromEnd); printf("<b>Motif Name:</b> %s<br>\n", hit->name); printf("<b>Motif Score"); printf(":</b> %.2f<br>\n", hit->score); printf("<b>Motif Position:</b> %s<br>\n", posLink); printf("<b>Motif Strand:</b> %c<br>\n", (int)hit->strand[0]); struct dnaSeq *seq = hDnaFromSeq(database, seqName, hit->chromStart, hit->chromEnd, dnaLower); if (seq == NULL) return; if (hit->strand[0] == '-') reverseComplement(seq->dna, seq->size); if (motifPwmTable != NULL && sqlTableExists(conn, motifPwmTable)) { motif = loadDnaMotif(hit->name, motifPwmTable); if (motif == NULL) return; motifLogoAndMatrix(&seq, 1, motif); } }
matrix * NMFgetSampleAccuracy(struct hash *config) /*Read all the folds and calculate training and testing accuracies from best models*/ { char * trainingDir = hashMustFindVal(config, "trainingDir"); char * validationDir = hashMustFindVal(config, "validationDir"); char * modelDir = hashMustFindVal(config, "modelDir"); int fold, folds = foldsCountFromDataDir(config); int split, splits = splitsCountFromDataDir(config); matrix * accuracies = NULL; char filename[256]; FILE * fp; for(split = 1; split <= splits; split++) { for(fold = 1; fold <= folds; fold++) { //cat togetehr the training and validation KH values and record which were used to train safef(filename, sizeof(filename), "%s/split%02d/fold%02d/metadata.tab", trainingDir, split, fold); fp = fopen(filename, "r"); if(fp == NULL) errAbort("Couldn't open file %s\n", filename); matrix * trMetadata = f_fill_matrix(fp, 1); fclose(fp); safef(filename, sizeof(filename), "%s/split%02d/fold%02d/metadata.tab", validationDir, split, fold); fp = fopen(filename, "r"); if(fp == NULL) errAbort("Couldn't open file %s\n", filename); matrix * valMetadata = f_fill_matrix(fp, 1); fclose(fp); struct slInt * trainingList = list_indices(trMetadata->cols); matrix * metadata = append_matrices(trMetadata, valMetadata, 1); safef(filename, sizeof(filename), "%s/split%02d/fold%02d/NMFpredictor.training.results", modelDir, split, fold); fp = fopen(filename , "r"); if(!fp) errAbort("Couldn't open training results file %s", filename); matrix * trainingPred = f_fill_matrix(fp, 1); fclose(fp); safef(filename, sizeof(filename), "%s/split%02d/fold%02d/NMFpredictor.validation.results", modelDir, split, fold); fp = fopen(filename , "r"); if(!fp) errAbort("Couldn't open validation results file %s", filename); matrix * valPred = f_fill_matrix(fp, 1); fclose(fp); //calc the accuracy by sample matrix * predictions = append_matrices(trainingPred, valPred, 1); matrix * accuraciesInFold = NMFpopulateAccuracyMatrix(predictions, metadata, trainingList); //add the accuracies to the running totals if(split == 1 && fold == 1) accuracies = copy_matrix(accuraciesInFold); else add_matrices_by_colLabel(accuracies, accuraciesInFold); //clean up free_matrix(trainingPred); free_matrix(valPred); free_matrix(predictions); free_matrix(trMetadata); free_matrix(valMetadata); free_matrix(metadata); free_matrix(accuraciesInFold); } } //normalize accuracies over number of splits and folds int i; for(i = 0; i < accuracies->cols; i++) { if(accuracies->graph[0][i] != NULL_FLAG) accuracies->graph[0][i] = (accuracies->graph[0][i] / ((folds-1) * splits)); if(accuracies->graph[1][i] != NULL_FLAG) accuracies->graph[1][i] = (accuracies->graph[1][i] / (1 * splits)); } return accuracies; }
void sqlExecProgProfile(char *profile, char *prog, char **progArgs, int userArgc, char *userArgv[]) /* * Exec one of the sql programs using user and password defined in localDb.XXX variables from ~/.hg.conf * progArgs is NULL-terminate array of program-specific arguments to add, * which maybe NULL. userArgv are arguments passed in from the command line. * The program is execvp-ed, this function does not return. */ { int i, j = 0, nargc=cntArgv(progArgs)+userArgc+6, defaultFileNo, returnStatus; pid_t child_id; char **nargv, defaultFileName[256], defaultFileArg[256], *homeDir; // install cleanup signal handlers sqlProgInitSigHandlers(); /* Assemble defaults file */ if ((homeDir = getenv("HOME")) == NULL) errAbort("sqlExecProgProfile: HOME is not defined in environment; cannot create temporary password file"); nukeOldCnfs(homeDir); // look for special parameter -profile=name for (i = 0; i < userArgc; i++) if (startsWith("-profile=", userArgv[i])) profile=cloneString(userArgv[i]+strlen("-profile=")); safef(defaultFileName, sizeof(defaultFileName), "%s/.hgsql.cnf-XXXXXX", homeDir); defaultFileNo=sqlMakeDefaultsFile(defaultFileName, profile, "client"); safef(defaultFileArg, sizeof(defaultFileArg), "--defaults-file=%s", defaultFileName); AllocArray(nargv, nargc); nargv[j++] = prog; nargv[j++] = defaultFileArg; /* --defaults-file must come before other options */ if (progArgs != NULL) { for (i = 0; progArgs[i] != NULL; i++) nargv[j++] = progArgs[i]; } for (i = 0; i < userArgc; i++) if (!startsWith("-profile=", userArgv[i])) nargv[j++] = userArgv[i]; nargv[j++] = NULL; // flush before forking so we can't accidentally get two copies of the output fflush(stdout); fflush(stderr); child_id = fork(); killChildPid = child_id; if (child_id == 0) { execvp(nargv[0], nargv); _exit(42); /* Why 42? Why not? Need something user defined that mysql isn't going to return */ } else { /* Wait until the child process completes, then delete the temp file */ wait(&returnStatus); unlink (defaultFileName); if (WIFEXITED(returnStatus)) { int childExitStatus = WEXITSTATUS(returnStatus); if (childExitStatus == 42) errAbort("sqlExecProgProfile: exec failed"); else // Propagate child's exit status: _exit(childExitStatus); } else errAbort("sqlExecProgProfile: child process exited with abnormal status %d", returnStatus); } }
static void parseDbXrefs() /* Parse the db_xref entries for various features to build a single dbx entry * in the kvt and to obtain the locus and mim ids for the kvt */ { static char* LOCUS_ID = "LocusID:"; static char* GENE_ID = "GeneID:"; static char* MIM_ID = "MIM:"; struct slName* head = NULL, *xref, *prevXref; struct keyVal* dbXrefKv = NULL; struct keyVal* locusLinkIdKv = NULL; struct keyVal* geneIdKv = NULL; struct keyVal* omimIdKv = NULL; if (dbXrefBuf == NULL) dbXrefBuf = dyStringNew(256); dyStringClear(dbXrefBuf); if (omimIdBuf == NULL) omimIdBuf = dyStringNew(256); dyStringClear(omimIdBuf); locusLinkId[0] = '\0'; /* split into a list and sort so we can remove dups */ if (gbCdsDbxField->val->stringSize > 0) head = slCat(head, parseDbXrefStr(gbCdsDbxField->val->string)); if (gbGeneDbxField->val->stringSize > 0) head = slCat(head, parseDbXrefStr(gbGeneDbxField->val->string)); slNameSort(&head); xref = head; prevXref = NULL; while (xref != NULL) { /* skip if dup of previous */ if ((prevXref == NULL) || !sameString(prevXref->name, xref->name)) { if (dbXrefBuf->stringSize > 0) dyStringAppendC(dbXrefBuf, ' '); dyStringAppend(dbXrefBuf, xref->name); updateKvt(&dbXrefKv, "dbx", dbXrefBuf->string); /* find number in db_xref like LocusID:27 or GeneID:27 */ if (startsWith(LOCUS_ID, xref->name)) { safef(locusLinkId, sizeof(locusLinkId), "%s", xref->name+strlen(LOCUS_ID)); updateKvt(&locusLinkIdKv, "loc", locusLinkId); } else if (startsWith(GENE_ID, xref->name)) { safef(geneId, sizeof(geneId), "%s", xref->name+strlen(GENE_ID)); updateKvt(&geneIdKv, "gni", geneId); } else if (startsWith(MIM_ID, xref->name)) { if (omimIdBuf->stringSize > 0) dyStringAppendC(omimIdBuf, ' '); dyStringAppend(omimIdBuf, xref->name+strlen(MIM_ID)); updateKvt(&omimIdKv, "mim", omimIdBuf->string); } } prevXref = xref; xref = xref->next; } slFreeList(&head); }
void testOutSequence(struct htmlPage *tablePage, struct htmlForm *mainForm, char *org, char *db, char *group, char *track, char *table, int expectedRows) /* Get as sequence and make sure count agrees with expected. */ /* mainForm not used */ { struct htmlPage *outPage; int attempts = 0; struct htmlFormVar *typeVar; if (tablePage->forms == NULL) errAbort("testOutSequence: Missing form (tablePage)"); htmlPageSetVar(tablePage, NULL, hgtaOutputType, "sequence"); outPage = quickSubmit(tablePage, org, db, group, track, table, "seqUi1", hgtaDoTopSubmit, "submit"); while (outPage == NULL && attempts < MAX_ATTEMPTS) { printf("testOutSequence: trying again to get seqUi1\n"); outPage = quickSubmit(tablePage, org, db, group, track, table, "seqUi1", hgtaDoTopSubmit, "submit"); attempts++; } if (outPage == NULL) { qaStatusSoftError(tablesTestList->status, "Error in testOutSequence - couldn't get outPage"); return; } if (outPage->forms == NULL) { qaStatusSoftError(tablesTestList->status, "Error in testOutSequence - missing form"); htmlPageFree(&outPage); return; } /* Since some genomic sequence things are huge, this will * only test in case where it's a gene prediction. */ typeVar = htmlFormVarGet(outPage->forms, hgtaGeneSeqType); if (typeVar != NULL) { struct htmlPage *seqPage; static char *types[] = {"protein", "mRNA"}; int i; for (i=0; i<ArraySize(types); ++i) { char *type = types[i]; if (slNameInList(typeVar->values, type)) { struct htmlPage *page; char testName[128]; htmlPageSetVar(outPage, NULL, hgtaGeneSeqType, type); safef(testName, sizeof(testName), "%sSeq", type); page = quickSubmit(outPage, org, db, group, track, table, testName, hgtaDoGenePredSequence, "submit"); checkFaOutput(page, expectedRows, TRUE); htmlPageFree(&page); } } htmlPageSetVar(outPage, NULL, hgtaGeneSeqType, "genomic"); serialSubmit(&outPage, org, db, group, track, table, "seqUi2", hgtaDoGenePredSequence, "submit"); // check that outPage != NULL /* On genomic page uncheck intron if it's there, then get results * and count them. */ if (htmlFormVarGet(outPage->forms, "hgSeq.intron") != NULL) htmlPageSetVar(outPage, NULL, "hgSeq.intron", NULL); seqPage = quickSubmit(outPage, org, db, group, track, table, "genomicSeq", hgtaDoGenomicDna, "submit"); // check that seqPage != NULL checkFaOutput(seqPage, expectedRows, FALSE); htmlPageFree(&seqPage); } htmlPageFree(&outPage); }
void doubleCellPrint(struct column *col, struct subjInfo *si, struct sqlConnection *conn) /* print double value */ { char *s = col->cellVal(col, si, conn); char buf[256]; if (sameString(s,".")) // known bad data value safef(buf,sizeof(buf),"%s", s); else { if (sameWord(col->name, "LastPVisit") || sameWord(col->name, "LastTrVisit")) { if (sameWord(s, "-1")) { safef(buf,sizeof(buf),"N/A"); } else if (sameWord(s, "-2")) { safef(buf,sizeof(buf),"N/D"); } else if (sameWord(s, "-3.000")||sameWord(s, "-3.0")||sameWord(s, "-3")) { safef(buf,sizeof(buf)," "); } else { safef(buf,sizeof(buf),"%.1f",sqlDouble(s)); } } else if (sameWord(col->name, "LastTrCD4Blk") || sameWord(col->name, "LastPCD4Blk") || sameWord(col->name, "LastPAntiGP120") || sameWord(col->name, "LastTrAntiGP120")) { if (sameWord(s, "-3.000")) { safef(buf,sizeof(buf)," "); } else if (sameWord(s, "-2")) { safef(buf,sizeof(buf),"N/D"); } else if (sameWord(s, "-1")) { safef(buf,sizeof(buf),"N/A"); } else { safef(buf,sizeof(buf),"%.3f",sqlDouble(s)); } } else { safef(buf,sizeof(buf),"%.1f",sqlDouble(s)); } } freeMem(s); hPrintf("<TD align=right>"); hPrintf("%s", buf); hPrintf("</TD>"); }
static void doBlat(struct sqlConnection *conn, int taxon, char *db) /* place probe seq from non-BAC with blat that have no alignments yet */ { int rc = 0; char *blatSpec=NULL; char cmdLine[256]; char path1[256]; char path2[256]; struct dyString *dy = dyStringNew(0); /* (non-BACs needing alignment) */ dyStringClear(dy); dyStringPrintf(dy, "select concat(\"vgPrb_\",e.id), e.seq" " from vgPrb e, vgPrbAli a" " where e.id = a.vgPrb" " and a.db = '%s'" " and a.status = 'new'" " and e.taxon = %d" " and e.type <> 'bac'" " and e.seq <> ''" " order by e.id" , db, taxon); //restore: rc = sqlSaveQuery(conn, dy->string, "blat.fa", TRUE); verbose(1,"rc = %d = count of sequences for blat, to get psls for taxon %d\n",rc,taxon); if (rc == 0) { unlink("blat.fa"); system("rm -f blatNearBest.psl; touch blatNearBest.psl"); /* make empty file */ return; } /* make .ooc and blat on kolossus */ safef(path1,sizeof(path1),"/gbdb/%s/%s.2bit",db,db); safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db); //restore: verbose(1,"copy: [%s] to [%s]\n",path1,path2); copyFile(path1,path2); safef(cmdLine,sizeof(cmdLine), "ssh kolossus 'cd %s; blat -makeOoc=11.ooc -tileSize=11" " -repMatch=1024 %s.2bit /dev/null /dev/null'", getCurrentDir(),db); //restore: system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date"); safef(cmdLine,sizeof(cmdLine), "ssh kolossus 'cd %s; blat %s.2bit blat.fa -ooc=11.ooc -noHead blat.psl'", getCurrentDir(),db); //restore: system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date"); /* using blat even with -fastMap was way too slow - took over a day, * so instead I will make a procedure to write a fake psl for the BACs * which you will see called below */ safef(path2,sizeof(path2),"%s.2bit",db); verbose(1,"rm %s\n",path2); unlink(path2); safef(path2,sizeof(path2),"11.ooc"); verbose(1,"rm %s\n",path2); unlink(path2); /* skip psl header and sort on query name */ safef(cmdLine,sizeof(cmdLine), "sort -k 10,10 blat.psl > blatS.psl"); verbose(1,"cmdLine=[%s]\n",cmdLine); system(cmdLine); /* keep near best within 5% of the best */ safef(cmdLine,sizeof(cmdLine), "pslCDnaFilter -globalNearBest=0.005 -minId=0.96 -minNonRepSize=20 -minCover=0.50" " blatS.psl blatNearBest.psl"); verbose(1,"cmdLine=[%s]\n",cmdLine); system(cmdLine); unlink("blat.fa"); unlink("blat.psl"); unlink("blatS.psl"); freez(&blatSpec); dyStringFree(&dy); }
static void doPslMapAli(struct sqlConnection *conn, int taxon, char *db, int fromTaxon, char *fromDb) { char cmd[256]; struct dyString *dy = dyStringNew(0); char path[256]; char dnaPath[256]; char toDb[12]; safef(toDb,sizeof(toDb),"%s", db); toDb[0]=toupper(toDb[0]); safef(dnaPath,sizeof(dnaPath),"/cluster/data/%s/nib", db); if (!fileExists(dnaPath)) { safef(dnaPath,sizeof(dnaPath),"/cluster/data/%s/%s.2bit", db, db); if (!fileExists(dnaPath)) errAbort("unable to locate nib dir or .2bit for %s: %s", db, dnaPath); } safef(path,sizeof(path),"/gbdb/%s/liftOver/%sTo%s.over.chain.gz", fromDb, fromDb, toDb); if (!fileExists(path)) errAbort("unable to locate chain file %s",path); /* get non-bac $db.vgProbes not yet aligned */ getPslMapAli(conn, db, fromTaxon, fromDb, FALSE); /* get bac $db.vgProbes not yet aligned */ getPslMapAli(conn, db, fromTaxon, fromDb, TRUE); /* get .fa for pslRecalcMatch use */ getPslMapFa(conn, db, fromTaxon); /* non-bac */ safef(cmd,sizeof(cmd), "zcat %s | pslMap -chainMapFile -swapMap nonBac.psl stdin stdout " "| sort -k 14,14 -k 16,16n > unscoredNB.psl" ,path); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd), "pslRecalcMatch unscoredNB.psl %s" " pslMap.fa nonBac.psl" ,dnaPath); verbose(1,"%s\n",cmd); system(cmd); /* bac */ safef(cmd,sizeof(cmd), "zcat %s | pslMap -chainMapFile -swapMap bac.psl stdin stdout " "| sort -k 14,14 -k 16,16n > unscoredB.psl" ,path); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd), "pslRecalcMatch unscoredB.psl %s" " pslMap.fa bacTemp.psl" ,dnaPath); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd), "pslCDnaFilter -globalNearBest=0.00001 -minCover=0.05" " bacTemp.psl bac.psl"); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd),"cat bac.psl nonBac.psl > vgPrbPslMap.psl"); verbose(1,"%s\n",cmd); system(cmd); dyStringFree(&dy); }
static struct tissueSampleVals *getTissueSampleVals(char *geneId, boolean doLogTransform, char *version, double *maxValRet) /* Get sample data for the gene. Optionally log10 it. Return maximum value seen */ { struct hash *tsHash = hashNew(0); struct tissueSampleVals *tsv; struct hashEl *hel; struct slDouble *val; double maxVal = 0; struct gtexSampleData *sd = NULL; char query[256]; char **row; char buf[256]; char *sampleDataTable = "gtexSampleData"; safef(buf, sizeof(buf), "%s%s", sampleDataTable, gtexVersionSuffixFromVersion(version)); struct sqlConnection *conn = hAllocConn("hgFixed"); assert(sqlTableExists(conn, buf)); sqlSafef(query, sizeof(query), "select * from %s where geneId='%s'", buf, geneId); struct sqlResult *sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { sd = gtexSampleDataLoad(row); if ((hel = hashLookup(tsHash, sd->tissue)) == NULL) { AllocVar(tsv); hashAdd(tsHash, sd->tissue, tsv); } else tsv = (struct tissueSampleVals *)hel->val; maxVal = max(maxVal, sd->score); val = slDoubleNew(sd->score); slAddHead(&tsv->valList, val); } /* Fill in tissue descriptions, fill values array and calculate stats for plotting Then make a list, suitable for sorting by tissue or score NOTE: Most of this not needed for R implementation */ struct gtexTissue *tis = NULL, *tissues = gtexGetTissues(version); struct tissueSampleVals *tsList = NULL; int i; if (doLogTransform) maxVal = log10(maxVal+1.0); for (tis = tissues; tis != NULL; tis = tis->next) { tsv = hashFindVal(tsHash, tis->name); if (tsv == NULL) { /* no non-zero values for this tissue/gene */ AllocVar(tsv); val = slDoubleNew(0.0); slAddHead(&tsv->valList, val); } tsv->name = tis->name; tsv->description = tis->description; tsv->color = tis->color; int count = tsv->count = slCount(tsv->valList); double *vals = AllocArray(tsv->vals, count); for (i=0; i<count; i++) { val = slPopHead(&tsv->valList); if (doLogTransform) vals[i] = log10(val->val+1.0); else vals[i] = val->val; } doubleBoxWhiskerCalc(tsv->count, tsv->vals, &tsv->min, &tsv->q1, &tsv->median, &tsv->q3, &tsv->max); slAddHead(&tsList, tsv); } if (maxValRet != NULL) *maxValRet = maxVal; return tsList; }
struct hash *makeMotifBed(char *gffDir, char *outBed) /* Make bed file from GFFs. Return hash of transcription factors. */ { static char *consLevelPath[3] = {"3", "2", "0"}; static char *consLevelBed[3] = {"2", "1", "0"}; static char *pLevelPath[3] = {"p001b", "p005b", "nobind"}; static char *pLevelBed[3] = {"good", "weak", "none"}; int cIx, pIx; FILE *f = mustOpen(outBed, "w"); struct hash *tfHash = newHash(0); struct hash *yrcHash = newHash(18); struct yrc *yrcList = NULL, *yrc; for (cIx=0; cIx<3; ++cIx) { for (pIx=0; pIx<3; ++pIx) { struct lineFile *lf; char *row[10]; char fileName[PATH_LEN]; char hashKey[256]; safef(fileName, sizeof(fileName), "%s/IGR_v24.%s.%s.GFF", gffDir, consLevelPath[cIx], pLevelPath[pIx]); lf = lineFileOpen(fileName, TRUE); while (lineFileRow(lf, row)) { char *name = row[9]; char *e; int chromIx, chromStart, chromEnd; if (!sameWord(row[8], "Site")) errAbort("Expecting 'Site' line %d of %s", lf->lineIx, lf->fileName); e = strchr(name, ';'); if (e == NULL) errAbort("Expecting semicolon line %d of %s", lf->lineIx, lf->fileName); *e = 0; chromIx = romanToArabicChrom(row[0], lf); chromStart = lineFileNeedNum(lf, row, 3); chromEnd = lineFileNeedNum(lf, row, 4); safef(hashKey, sizeof(hashKey), "%s.%d.%d", name, chromIx, chromStart); if ((yrc = hashFindVal(yrcHash, hashKey)) == NULL) { AllocVar(yrc); yrc->chromIx= chromIx; yrc->chromStart = chromStart; yrc->chromEnd = chromEnd; yrc->name = hashStoreName(tfHash, name); yrc->pLevel = pIx; yrc->consLevel = cIx; hashAdd(yrcHash, hashKey, yrc); slAddHead(&yrcList, yrc); } else { if (pIx < yrc->pLevel) yrc->pLevel = pIx; if (cIx < yrc->consLevel) yrc->consLevel = cIx; } } lineFileClose(&lf); } } for (yrc = yrcList; yrc != NULL; yrc = yrc->next) { fprintf(f, "chr%d\t", yrc->chromIx+1); fprintf(f, "%d\t", yrc->chromStart); fprintf(f, "%d\t", yrc->chromEnd); fprintf(f, "%s\t", yrc->name); fprintf(f, "%d\t", (int)(1000/(yrc->pLevel + yrc->consLevel + 1))); fprintf(f, "%s\t", pLevelBed[yrc->pLevel]); fprintf(f, "%s\n", consLevelBed[yrc->consLevel]); } carefulClose(&f); hashFree(&yrcHash); return tfHash; }
void getInvariants(char *db, struct snpExceptions *exceptionList, struct slName *chromList, char *fileBase) /* write list of invariants to output file */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr = NULL; struct snpExceptions *el = NULL; struct slName *chrom = NULL; char **row = NULL; char query[1024]; unsigned long int invariantCount; char thisFile[64]; FILE *outFile; int colCount, i; char idString[3]; for (el=exceptionList; el!=NULL; el=el->next) { if (el->exceptionId<10) safef(idString,sizeof(idString), "0%d", el->exceptionId); else safef(idString,sizeof(idString), "%d", el->exceptionId); invariantCount = 0; if (startsWith("select",el->query)) { safef(thisFile, sizeof(thisFile), "%s.%s.bed", fileBase, idString); outFile = mustOpen(thisFile, "w"); fprintf(outFile, "# exceptionId:\t%d\n# query:\t%s;\n", el->exceptionId, el->query); for (chrom=chromList; chrom!=NULL; chrom=chrom->next) { fflush(outFile); /* to keep an eye on output progress */ sqlSafef(query, sizeof(query), "%-s and chrom='%s'", el->query, chrom->name); sr = sqlGetResult(conn, query); colCount = sqlCountColumns(sr); while ((row = sqlNextRow(sr))!=NULL) { invariantCount++; fprintf(outFile, "%s", row[0]); for (i=1; i<colCount; i++) fprintf(outFile, "\t%s", row[i]); fprintf(outFile, "\n"); } } } else if (startsWith("group",el->query)) { struct slName *nameList = NULL; struct slName *name = NULL; safef(thisFile, sizeof(thisFile), "%s.%s.bed", fileBase, idString); outFile = mustOpen(thisFile, "w"); fprintf(outFile, "# exceptionId:\t%d\n# query:\t%s;\n", el->exceptionId, el->query); nameList = getGroupList(db, el->query); for (name=nameList; name!=NULL; name=name->next) { sqlSafef(query, sizeof(query), "select chrom,chromStart,chromEnd,name,%d as score,class,locType,observed " "from snp where name='%s'", el->exceptionId, name->name); sr = sqlGetResult(conn, query); colCount = sqlCountColumns(sr); while ((row = sqlNextRow(sr))!=NULL) { invariantCount++; fprintf(outFile, "%s", row[0]); for (i=1; i<colCount; i++) fprintf(outFile, "\t%s", row[i]); fprintf(outFile, "\n"); } } } else { printf("Invariant %d has no query string\n", el->exceptionId); continue; } carefulClose(&outFile); printf("Invariant %d has %lu exceptions, written to this file: %s\n", el->exceptionId, invariantCount, thisFile); fflush(stdout); sqlSafef(query, sizeof(query), "update snpExceptions set num=%lu where exceptionId=%d", invariantCount, el->exceptionId); sr=sqlGetResult(conn, query); /* there's probably a better way to do this */ } }
static void doDownload(struct sqlConnection *conn) /* Try to force user's browser to download by giving special response headers */ { int imageId = cartUsualInt(cart, hgpId, 0); char url[1024]; char *p = NULL; char dir[256]; char name[128]; char extension[64]; int w = 0, h = 0; int sd = -1; if (!visiGeneImageSize(conn, imageId, &w, &h)) imageId = 0; if (imageId == 0) { problemPage("invalid imageId",""); } else { p=visiGeneFullSizePath(conn, imageId); splitPath(p, dir, name, extension); safef(url,sizeof(url),"%s%s%s", dir, name, extension); sd = netUrlOpen(url); if (sd < 0) { problemPage("Couldn't open", url); } else { char *newUrl = NULL; int newSd = 0; /* url needed for err msgs and redirect url*/ if (netSkipHttpHeaderLinesHandlingRedirect(sd, url, &newSd, &newUrl)) { char buf[32*1024]; int readSize; if (newUrl) { freeMem(newUrl); sd = newSd; } printf("Content-Type: application/octet-stream\n"); printf("Content-Disposition: attachment; filename=%s%s\n", name, extension); printf("\n"); while ((readSize = read(sd, buf, sizeof(buf))) > 0) fwrite(buf, 1, readSize, stdout); close(sd); sd = -1; fflush(stdout); fclose(stdout); } else { problemPage("Skip http header problem", url); } freeMem(newUrl); } } }
void doImage(struct sqlConnection *conn) /* Put up image page. */ { int imageId = cartUsualInt(cart, hgpId, 0); char *sidUrl = cartSidUrlString(cart); char buf[1024]; char url[1024]; char *p = NULL; char dir[256]; char name[128]; char extension[64]; int w = 0, h = 0; htmlSetBgColor(0xE0E0E0); htmStart(stdout, "do image"); puts( "<script type=\"text/JavaScript\">" "document.getElementsByTagName('html')[0].style.height=\"100%\";" "document.getElementsByTagName('body')[0].style.height=\"100%\";" "</script>" ); if (!visiGeneImageSize(conn, imageId, &w, &h)) imageId = 0; if (imageId != 0) { printf("<B>"); smallCaption(conn, imageId); printf(".</B> Click image to zoom in, drag or arrow keys to move. " "Caption is below.<BR>\n"); p=visiGeneFullSizePath(conn, imageId); splitPath(p, dir, name, extension); #ifdef DEBUG safef(buf,sizeof(buf),"../bigImageTest.html?url=%s%s/%s&w=%d&h=%d", dir,name,name,w,h); #else safef(buf,sizeof(buf),"../bigImage.html?url=%s%s/%s&w=%d&h=%d", dir,name,name,w,h); #endif printf("<IFRAME name=\"bigImg\" width=\"100%%\" height=\"90%%\" SRC=\"%s\"></IFRAME><BR>\n", buf); fullCaption(conn, imageId); safef(buf,sizeof(buf),"%s%s%s", dir, name, extension); safef(url,sizeof(url),"%s?%s=go&%s&%s=%d", hgVisiGeneCgiName(), hgpDoDownload, sidUrl, hgpId, imageId); printf("<B>Full-size image:</B> %d x %d <A HREF='%s'> download </A> ", w, h, url); /* Currently this is dangerous for users with less than 1 GB RAM to use on large images, because their machines can thrash themselves into a coma. X-windows (i.e. used by FireFox) will allocate 5 bytes per pixel. If the image size in pixels times 5 exceeds real ram size, then Linux thrashes incessantly. But you can hit ctrl-alt-F1 to get a text only screen, then kill the bad processes (FF) and then you can restore desktop with ctrl-alt-F7. Hiram says that's a feature credited to SCO-Unix. On my 1GB machines at work/home, I never encountered any problem what-so-ever, even with the largest visiGene AllenBrain - about 19000x9000 pix. printf(" <A HREF='%s'> view </A>\n", buf); */ printf("\n"); } htmlEnd(); }
void dbTrash(char *db) /* dbTrash - drop tables from a database older than specified N hours. */ { char query[256]; struct sqlResult *sr; char **row; int updateTimeIx; int createTimeIx; int dataLengthIx; int indexLengthIx; int nameIx; int timeIxUsed; unsigned long long totalSize = 0; // expiredTableNames: table exists and is in metaInfo and subject to age limits struct slName *expiredTableNames = NULL; struct slName *lostTables = NULL; // tables existing but not in metaInfo unsigned long long lostTableCount = 0; struct hash *expiredHash = newHash(10); // as determined by metaInfo struct hash *notExpiredHash = newHash(10); struct sqlConnection *conn = sqlConnect(db); if (extFileCheck) checkExtFile(conn); time_t ageSeconds = (time_t)(ageHours * 3600); /* age in seconds */ sqlSafef(query,sizeof(query),"select name,UNIX_TIMESTAMP(lastUse) from %s WHERE " "lastUse < DATE_SUB(NOW(), INTERVAL %ld SECOND);", CT_META_INFO,ageSeconds); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) hashAddInt(expiredHash, row[0], sqlSigned(row[1])); sqlFreeResult(&sr); sqlSafef(query,sizeof(query),"select name,UNIX_TIMESTAMP(lastUse) from %s WHERE " "lastUse >= DATE_SUB(NOW(), INTERVAL %ld SECOND);",CT_META_INFO,ageSeconds); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) hashAddInt(notExpiredHash, row[0], sqlSigned(row[1])); sqlFreeResult(&sr); if (tableStatus) // show table status is very expensive, use only when asked { /* run through the table status business to get table size information */ sqlSafef(query,sizeof(query),"show table status"); STATUS_INIT; while ((row = sqlNextRow(sr)) != NULL) { /* if not doing history too, and this is the history table, next row */ if ((!historyToo) && (sameWord(row[nameIx],"history"))) continue; /* also skip the metaInfo table */ if ((!historyToo) && (sameWord(row[nameIx],CT_META_INFO))) continue; /* don't delete the extFile table */ if (sameWord(row[nameIx],CT_EXTFILE)) continue; SCAN_STATUS; if (hashLookup(expiredHash,row[nameIx])) { slNameAddHead(&expiredTableNames, row[nameIx]); verbose(3,"%s %ld drop %s\n",row[timeIxUsed], (unsigned long)timep, row[nameIx]); /* If sizes are non-NULL, add them up */ if ( ((char *)NULL != row[dataLengthIx]) && ((char *)NULL != row[indexLengthIx]) ) totalSize += sqlLongLong(row[dataLengthIx]) + sqlLongLong(row[indexLengthIx]); hashRemove(expiredHash, row[nameIx]); } else { if (hashLookup(notExpiredHash,row[nameIx])) verbose(3,"%s %ld OK %s\n",row[timeIxUsed], (unsigned long)timep, row[nameIx]); else { /* table exists, but not in metaInfo, is it old enough ? */ if (timep < dropTime) { slNameAddHead(&expiredTableNames, row[nameIx]); verbose(2,"%s %ld dropt %s lost table\n", row[timeIxUsed], (unsigned long)timep, row[nameIx]); /* If sizes are non-NULL, add them up */ if ( ((char *)NULL != row[dataLengthIx]) && ((char *)NULL != row[indexLengthIx]) ) totalSize += sqlLongLong(row[dataLengthIx]) + sqlLongLong(row[indexLengthIx]); } else verbose(3,"%s %ld OKt %s\n",row[timeIxUsed], (unsigned long)timep, row[nameIx]); } } } sqlFreeResult(&sr); } else { // simple 'show tables' is more efficient than 'show table status' sqlSafef(query,sizeof(query),"show tables"); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { if (hashLookup(expiredHash,row[0])) { slNameAddHead(&expiredTableNames, row[0]); time_t lastUse = (time_t)hashIntVal(expiredHash,row[0]); struct tm *lastUseTm = localtime(&lastUse); verbose(3,"%4d-%02d-%02d %02d:%02d:%02d %ld drop %s\n", lastUseTm->tm_year+1900, lastUseTm->tm_mon+1, lastUseTm->tm_mday, lastUseTm->tm_hour, lastUseTm->tm_min, lastUseTm->tm_sec, (unsigned long)lastUse,row[0]); hashRemove(expiredHash, row[0]); } else if (hashLookup(notExpiredHash,row[0])) { time_t lastUse = (time_t)hashIntVal(notExpiredHash,row[0]); struct tm *lastUseTm = localtime(&lastUse); verbose(3,"%4d-%02d-%02d %02d:%02d:%02d %ld OK %s\n", lastUseTm->tm_year+1900, lastUseTm->tm_mon+1, lastUseTm->tm_mday, lastUseTm->tm_hour, lastUseTm->tm_min, lastUseTm->tm_sec, (unsigned long)lastUse,row[0]); } else { struct slName *el = slNameNew(row[0]); slAddHead(&lostTables, el); } } sqlFreeResult(&sr); lostTableCount = slCount(lostTables); // If tables exist, but not in metaInfo, check their age to expire them. // It turns out even this show table status is slow too, so, only // run thru it if asked to eliminate lost tables. It is better to // do this operation with the stand-alone perl script on the customTrash // database machine. if (delLostTable && lostTables) { struct slName *el; for (el = lostTables; el != NULL; el = el->next) { if (sameWord(el->name,"history")) continue; if (sameWord(el->name,CT_META_INFO)) continue; if (sameWord(el->name,CT_EXTFILE)) continue; boolean oneTableOnly = FALSE; // protect against multiple tables /* get table time information to see if it is expired */ sqlSafef(query,sizeof(query),"show table status like '%s'", el->name); STATUS_INIT; while ((row = sqlNextRow(sr)) != NULL) { if (oneTableOnly) errAbort("ERROR: query: '%s' returned more than one table " "name\n", query); else oneTableOnly = TRUE; if (differentWord(row[nameIx], el->name)) errAbort("ERROR: query: '%s' did not return table name '%s' != '%s'\n", query, el->name, row[nameIx]); SCAN_STATUS; if (timep < dropTime) { slNameAddHead(&expiredTableNames, row[nameIx]); verbose(2,"%s %ld dropt %s lost table\n", row[timeIxUsed], (unsigned long)timep, row[nameIx]); } else verbose(3,"%s %ld OKt %s\n", row[timeIxUsed], (unsigned long)timep, row[nameIx]); } sqlFreeResult(&sr); } } } /* perhaps the table was already dropped, but not from the metaInfo */ struct hashEl *elList = hashElListHash(expiredHash); struct hashEl *el; for (el = elList; el != NULL; el = el->next) { verbose(2,"%s exists in %s only\n", el->name, CT_META_INFO); if (drop) ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */ } if (drop) { char comment[256]; if (expiredTableNames) { struct slName *el; int droppedCount = 0; /* customTrash DB user permissions do not have permissions to * drop tables. Must use standard special user that has all * permissions. If we are not using the standard user at this * point, then switch to it. */ if (sameWord(db,CUSTOM_TRASH)) { sqlDisconnect(&conn); conn = sqlConnect(db); } for (el = expiredTableNames; el != NULL; el = el->next) { verbose(2,"# drop %s\n", el->name); sqlDropTable(conn, el->name); ctTouchLastUse(conn, el->name, FALSE); /* removes metaInfo row */ ++droppedCount; } /* add a comment to the history table and finish up connection */ if (tableStatus) safef(comment, sizeof(comment), "Dropped %d tables with " "total size %llu, %llu lost tables", droppedCount, totalSize, lostTableCount); else safef(comment, sizeof(comment), "Dropped %d tables, no size info, %llu lost tables", droppedCount, lostTableCount); verbose(2,"# %s\n", comment); hgHistoryComment(conn, "%s", comment); } else { safef(comment, sizeof(comment), "Dropped no tables, none expired, %llu lost tables", lostTableCount); verbose(2,"# %s\n", comment); } } else { char comment[256]; if (expiredTableNames) { int droppedCount = slCount(expiredTableNames); if (tableStatus) safef(comment, sizeof(comment), "Would have dropped %d tables with " "total size %llu, %llu lost tables", droppedCount, totalSize, lostTableCount); else safef(comment, sizeof(comment), "Would have dropped %d tables, no size info, %llu lost tables", droppedCount, lostTableCount); verbose(2,"# %s\n", comment); } else { safef(comment, sizeof(comment), "Would have dropped no tables, none expired, %llu lost tables", lostTableCount); verbose(2,"# %s\n", comment); } } sqlDisconnect(&conn); }
static void doSeqAndExtFile(struct sqlConnection *conn, char *db, char *table) { int rc = 0; char cmd[256]; char path[256]; char bedPath[256]; char gbdbPath[256]; char *fname=NULL; struct dyString *dy = dyStringNew(0); dyStringClear(dy); dyStringPrintf(dy, "select distinct concat('vgPrb_',e.id), e.seq" " from vgPrb e join %s.%s v" " left join %s.seq s on s.acc = v.qName" " where concat('vgPrb_',e.id) = v.qName" " and s.acc is NULL" " order by e.id" , db, table, db); rc = sqlSaveQuery(conn, dy->string, "vgPrbExt.fa", TRUE); verbose(1,"rc = %d = count of sequences for vgPrbExt.fa, to use with %s track %s\n",rc,db,table); if (rc > 0) /* can set any desired minimum */ { safef(bedPath,sizeof(bedPath),"/cluster/data/%s/bed/visiGene/",db); if (!fileExists(bedPath)) { safef(cmd,sizeof(cmd),"mkdir %s",bedPath); verbose(1,"%s\n",cmd); system(cmd); } safef(gbdbPath,sizeof(gbdbPath),"/gbdb/%s/visiGene/",db); if (!fileExists(gbdbPath)) { safef(cmd,sizeof(cmd),"mkdir %s",gbdbPath); verbose(1,"%s\n",cmd); system(cmd); } while(1) { int i=0; safef(path,sizeof(path),"%svgPrbExt_AAAAAA.fa",bedPath); char *c = rStringIn("AAAAAA",path); srand( (unsigned)time( NULL ) ); for(i=0;i<6;++i) { *c++ += (int) 26 * (rand() / (RAND_MAX + 1.0)); } if (!fileExists(path)) break; } safef(cmd,sizeof(cmd),"cp vgPrbExt.fa %s",path); verbose(1,"%s\n",cmd); system(cmd); fname = rStringIn("/", path); ++fname; safef(cmd,sizeof(cmd),"ln -s %s %s%s",path,gbdbPath,fname); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd),"hgLoadSeq %s %s%s", db, gbdbPath,fname); verbose(1,"%s\n",cmd); system(cmd); } dyStringFree(&dy); }
void *netConnectHttpsThread(void *threadParam) /* use a thread to run socket back to user */ { /* child */ struct netConnectHttpsParams *params = threadParam; pthread_detach(params->thread); // this thread will never join back with it's progenitor int fd=0; char hostnameProto[256]; BIO *sbio; SSL_CTX *ctx; SSL *ssl; openSslInit(); ctx = SSL_CTX_new(SSLv23_client_method()); fd_set readfds; fd_set writefds; int err; struct timeval tv; /* TODO checking certificates char *certFile = NULL; char *certPath = NULL; if (certFile || certPath) { SSL_CTX_load_verify_locations(ctx,certFile,certPath); #if (OPENSSL_VERSION_NUMBER < 0x0090600fL) SSL_CTX_set_verify_depth(ctx,1); #endif } // verify paths and mode. */ sbio = BIO_new_ssl_connect(ctx); BIO_get_ssl(sbio, &ssl); if(!ssl) { xerr("Can't locate SSL pointer"); goto cleanup; } /* Don't want any retries since we are non-blocking bio now */ //SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY); safef(hostnameProto,sizeof(hostnameProto),"%s:%d",params->hostName,params->port); BIO_set_conn_hostname(sbio, hostnameProto); BIO_set_nbio(sbio, 1); /* non-blocking mode */ while (1) { if (BIO_do_connect(sbio) == 1) { break; /* Connected */ } if (! BIO_should_retry(sbio)) { xerr("BIO_do_connect() failed"); char s[256]; safef(s, sizeof s, "SSL error: %s", ERR_reason_error_string(ERR_get_error())); xerr(s); goto cleanup; } fd = BIO_get_fd(sbio, NULL); if (fd == -1) { xerr("unable to get BIO descriptor"); goto cleanup; } FD_ZERO(&readfds); FD_ZERO(&writefds); if (BIO_should_read(sbio)) { FD_SET(fd, &readfds); } else if (BIO_should_write(sbio)) { FD_SET(fd, &writefds); } else { /* BIO_should_io_special() */ FD_SET(fd, &readfds); FD_SET(fd, &writefds); } tv.tv_sec = (long) (DEFAULTCONNECTTIMEOUTMSEC/1000); // timeout default 10 seconds tv.tv_usec = (long) (((DEFAULTCONNECTTIMEOUTMSEC/1000)-tv.tv_sec)*1000000); err = select(fd + 1, &readfds, &writefds, NULL, &tv); if (err < 0) { xerr("select() error"); goto cleanup; } if (err == 0) { char s[256]; safef(s, sizeof s, "connection timeout to %s", params->hostName); xerr(s); goto cleanup; } } /* TODO checking certificates if (certFile || certPath) if (!check_cert(ssl, host)) return -1; */ /* we need to wait on both the user's socket and the BIO SSL socket * to see if we need to ferry data from one to the other */ char sbuf[32768]; // socket buffer sv[1] to user char bbuf[32768]; // bio buffer int srd = 0; int swt = 0; int brd = 0; int bwt = 0; while (1) { // Do NOT move this outside the while loop. /* Get underlying file descriptor, needed for select call */ fd = BIO_get_fd(sbio, NULL); if (fd == -1) { xerr("BIO doesn't seem to be initialized in https, unable to get descriptor."); goto cleanup; } FD_ZERO(&readfds); FD_ZERO(&writefds); if (brd == 0) FD_SET(fd, &readfds); if (swt < srd) FD_SET(fd, &writefds); if (srd == 0) FD_SET(params->sv[1], &readfds); tv.tv_sec = (long) (DEFAULTCONNECTTIMEOUTMSEC/1000); // timeout default 10 seconds tv.tv_usec = (long) (((DEFAULTCONNECTTIMEOUTMSEC/1000)-tv.tv_sec)*1000000); err = select(max(fd,params->sv[1]) + 1, &readfds, &writefds, NULL, &tv); /* Evaluate select() return code */ if (err < 0) { xerr("error during select()"); goto cleanup; } else if (err == 0) { /* Timed out - just quit */ xerr("https timeout expired"); goto cleanup; } else { if (FD_ISSET(params->sv[1], &readfds)) { swt = 0; srd = read(params->sv[1], sbuf, 32768); if (srd == -1) { if (errno != 104) // udcCache often closes causing "Connection reset by peer" xerrno("error reading https socket"); goto cleanup; } if (srd == 0) break; // user closed socket, we are done } if (FD_ISSET(fd, &writefds)) { int swtx = BIO_write(sbio, sbuf+swt, srd-swt); if (swtx <= 0) { if (!BIO_should_write(sbio)) { ERR_print_errors_fp(stderr); xerr("Error writing SSL connection"); goto cleanup; } } else { swt += swtx; if (swt >= srd) { swt = 0; srd = 0; } } } if (FD_ISSET(fd, &readfds)) { bwt = 0; brd = BIO_read(sbio, bbuf, 32768); if (brd <= 0) { if (BIO_should_read(sbio)) { brd = 0; continue; } else { if (brd == 0) break; ERR_print_errors_fp(stderr); xerr("Error reading SSL connection"); goto cleanup; } } // write the https data received immediately back on socket to user, and it's ok if it blocks. while(bwt < brd) { int bwtx = write(params->sv[1], bbuf+bwt, brd-bwt); if (bwtx == -1) { if ((errno != 104) // udcCache often closes causing "Connection reset by peer" && (errno != 32)) // udcCache often closes causing "Broken pipe" xerrno("error writing https data back to user socket"); goto cleanup; } bwt += bwtx; } brd = 0; bwt = 0; } } } cleanup: BIO_free_all(sbio); close(params->sv[1]); /* we are done with it */ return NULL; }
static void doPrimers(struct sqlConnection *conn, int taxon, char *db) /* get probe seq from primers */ { int rc = 0; struct dyString *dy = dyStringNew(0); char cmdLine[256]; char path1[256]; char path2[256]; dyStringClear(dy); dyStringAppend(dy, "select e.id, p.fPrimer, p.rPrimer from probe p, vgPrbMap m, vgPrb e, gene g"); dyStringPrintf(dy, " where p.id = m.probe and m.vgPrb = e.id and g.id = p.gene and g.taxon = %d",taxon); dyStringAppend(dy, " and e.state = 'new' and e.type='primersMrna'"); rc = sqlSaveQuery(conn, dy->string, "primers.query", FALSE); verbose(1,"rc = %d = count of primers for mrna search for taxon %d\n",rc,taxon); if (rc > 0) /* something to do */ { dyStringClear(dy); dyStringPrintf(dy, "select qName from %s.all_mrna",db); rc = 0; rc = sqlSaveQuery(conn, dy->string, "accFile.txt", FALSE); safef(cmdLine,sizeof(cmdLine),"getRna %s accFile.txt mrna.fa",db); system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date"); verbose(1,"rc = %d = count of mrna for %s\n",rc,db); system("date"); system("isPcr mrna.fa primers.query isPcr.fa -out=fa"); system("date"); system("ls -l"); processIsPcr(conn,taxon,db); unlink("mrna.fa"); unlink("accFile.txt"); unlink("isPcr.fa"); } unlink("primers.query"); /* find any remaining type primersMrna that couldn't be resolved and demote * them to type primersGenome */ dyStringClear(dy); dyStringAppend(dy, "update vgPrb set type='primersGenome'"); dyStringPrintf(dy, " where taxon = %d",taxon); dyStringAppend(dy, " and state = 'new' and type='primersMrna'"); sqlUpdate(conn, dy->string); /* get primers for those probes that did not find mrna isPcr matches * and then do them against the genome instead */ dyStringClear(dy); dyStringAppend(dy, "select e.id, p.fPrimer, p.rPrimer from probe p, vgPrbMap m, vgPrb e, gene g"); dyStringPrintf(dy, " where p.id = m.probe and m.vgPrb = e.id and g.id = p.gene and g.taxon = %d",taxon); dyStringAppend(dy, " and e.state = 'new' and e.type='primersGenome'"); rc = 0; rc = sqlSaveQuery(conn, dy->string, "primers.query", FALSE); verbose(1,"rc = %d = count of primers for genome search for taxon %d\n",rc,taxon); if (rc > 0) /* something to do */ { safef(path1,sizeof(path1),"/gbdb/%s/%s.2bit",db,db); safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db); verbose(1,"copy: [%s] to [%s]\n",path1,path2); copyFile(path1,path2); safef(cmdLine,sizeof(cmdLine), "ssh kolossus 'cd %s; isPcr %s.2bit primers.query isPcr.fa -out=fa'", getCurrentDir(),db); system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date"); safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db); verbose(1,"rm %s\n",path2); unlink(path2); system("ls -l"); processIsPcr(conn,taxon,db); unlink("mrna.fa"); unlink("accFile.txt"); unlink("isPcr.fa"); } unlink("primers.query"); /* find any remaining type primersGenome that couldn't be resolved and demote * them to type refSeq */ dyStringClear(dy); dyStringAppend(dy, "update vgPrb set type='refSeq'"); dyStringPrintf(dy, " where taxon = %d",taxon); dyStringAppend(dy, " and state = 'new' and type='primersGenome'"); sqlUpdate(conn, dy->string); dyStringFree(&dy); }
void testOneTable(struct htmlPage *trackPage, char *org, char *db, char *group, char *track, char *table) /* Test stuff on one table if we haven't already tested this table. */ { /* Why declared here and not globally? */ static struct hash *uniqHash = NULL; char fullName[256]; if (uniqHash == NULL) uniqHash = newHash(0); safef(fullName, sizeof(fullName), "%s.%s", db, table); if (!hashLookup(uniqHash, fullName)) { struct htmlPage *tablePage; struct htmlForm *mainForm; hashAdd(uniqHash, fullName, NULL); verbose(1, "Testing %s %s %s %s %s\n", naForNull(org), db, group, track, table); tablePage = quickSubmit(trackPage, org, db, group, track, table, "selectTable", hgtaTable, table); if (!isObsolete(table) && tablePage != NULL) { if ((mainForm = htmlFormGet(tablePage, "mainForm")) == NULL) { qaStatusSoftError(tablesTestList->status, "Couldn't get main form on tablePage for %s %s %s %s", db, group, track, table); } else { testSchema(tablePage, mainForm, org, db, group, track, table); testSummaryStats(tablePage, mainForm, org, db, group, track, table); if (outTypeAvailable(mainForm, "bed")) { if (outTypeAvailable(mainForm, "primaryTable")) { int rowCount; rowCount = testAllFields(tablePage, mainForm, org, db, group, track, table); testOneField(tablePage, mainForm, org, db, group, track, table, rowCount); testOutSequence(tablePage, mainForm, org, db, group, track, table, rowCount); testOutBed(tablePage, mainForm, org, db, group, track, table, rowCount); testOutHyperlink(tablePage, mainForm, org, db, group, track, table, rowCount); testOutGff(tablePage, mainForm, org, db, group, track, table); if (rowCount > 0) testOutCustomTrack(tablePage, mainForm, org, db, group, track, table); } } else if (outTypeAvailable(mainForm, "primaryTable")) { /* If BED type is not available then the region will be ignored, and * we'll end up scanning whole table. Make sure table is not huge * before proceeding. */ if (tableSize(db, table) < 500000) { int rowCount; rowCount = testAllFields(tablePage, mainForm, org, db, group, track, table); testOneField(tablePage, mainForm, org, db, group, track, table, rowCount); } } } htmlPageFree(&tablePage); } carefulCheckHeap(); } }
static struct genePos *tameAssociationFilter( struct slName *termList, boolean orLogic, struct column *col, struct sqlConnection *conn, struct genePos *list) /* Handle filtering when there are no wildcards present. */ { struct sqlResult *sr; char **row; struct slName *term; struct hash *passHash = newHash(17); struct hash *protHash = NULL; struct hash *prevHash = NULL; struct genePos *gp; int protCount = 0, termCount = 0, matchRow = 0, keyRow = 0; /* Make up protein-keyed hash if need be. */ if (col->protKey) { protHash = newHash(17); for (gp = list; gp != NULL; gp = gp->next) { hashAdd(protHash, gp->protein, gp->name); ++protCount; } } for (term = termList; term != NULL; term = term->next) { char query[1024]; safef(query, sizeof(query), col->invQueryOne, term->name); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *key = row[0]; ++matchRow; if (protHash != NULL) key = hashFindVal(protHash, key); if (key != NULL) { ++keyRow; if (prevHash == NULL || hashLookup(prevHash, key) != NULL) { hashStore(passHash, key); } } } if (!orLogic) { hashFree(&prevHash); if (term->next != NULL) { prevHash = passHash; passHash = newHash(17); } } sqlFreeResult(&sr); ++termCount; } list = weedUnlessInHash(list, passHash); hashFree(&prevHash); freeHash(&protHash); freeHash(&passHash); return list; }
void checkExp(char *bedFileName, char *tNibDir, char *nibList) { struct lineFile *bf = lineFileOpen(bedFileName , TRUE), *af = NULL; char *row[PSEUDOGENELINK_NUM_COLS] ; struct pseudoGeneLink *ps; char *tmpName[512], cmd[512]; struct axt *axtList = NULL, *axt, *mAxt = NULL; struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seqList = NULL; struct nibInfo *qNib = NULL, *tNib = NULL; FILE *op; int ret; if (nibHash == NULL) nibHash = hashNew(0); while (lineFileNextRow(bf, row, ArraySize(row))) { struct misMatch *misMatchList = NULL; struct binKeeper *bk = NULL; struct binElement *el, *elist = NULL; struct psl *mPsl = NULL, *rPsl = NULL, *pPsl = NULL, *psl ; struct misMatch *mf = NULL; ps = pseudoGeneLinkLoad(row); tmpName[0] = cloneString(ps->name); chopByChar(tmpName[0], '.', tmpName, sizeof(tmpName)); verbose(2,"name %s %s:%d-%d\n", ps->name, ps->chrom, ps->chromStart,ps->chromEnd); /* get expressed retro from hash */ bk = hashFindVal(mrnaHash, ps->chrom); elist = binKeeperFindSorted(bk, ps->chromStart, ps->chromEnd ) ; for (el = elist; el != NULL ; el = el->next) { rPsl = el->val; verbose(2,"retroGene %s %s:%d-%d\n",rPsl->qName, ps->chrom, ps->chromStart,ps->chromEnd); } /* find mrnas that overlap parent gene */ bk = hashFindVal(mrnaHash, ps->gChrom); elist = binKeeperFindSorted(bk, ps->gStart , ps->gEnd ) ; for (el = elist; el != NULL ; el = el->next) { pPsl = el->val; verbose(2,"parent %s %s:%d %d,%d\n", pPsl->qName, pPsl->tName,pPsl->tStart, pPsl->match, pPsl->misMatch); } /* find self chain */ bk = hashFindVal(chainHash, ps->chrom); elist = binKeeperFind(bk, ps->chromStart , ps->chromEnd ) ; slSort(&elist, chainCmpScoreDesc); for (el = elist; el != NULL ; el = el->next) { struct chain *chain = el->val, *subChain, *retChainToFree, *retChainToFree2; int qs = chain->qStart; int qe = chain->qEnd; int id = chain->id; if (chain->qStrand == '-') { qs = chain->qSize - chain->qEnd; qe = chain->qSize - chain->qStart; } if (!sameString(chain->qName , ps->gChrom) || !positiveRangeIntersection(qs, qe, ps->gStart, ps->gEnd)) { verbose(2," wrong chain %s:%d-%d %s:%d-%d parent %s:%d-%d\n", chain->qName, qs, qe, chain->tName,chain->tStart,chain->tEnd, ps->gChrom,ps->gStart,ps->gEnd); continue; } verbose(2,"chain id %d %4.0f",chain->id, chain->score); chainSubsetOnT(chain, ps->chromStart+7, ps->chromEnd-7, &subChain, &retChainToFree); if (subChain != NULL) chain = subChain; chainSubsetOnQ(chain, ps->gStart, ps->gEnd, &subChain, &retChainToFree2); if (subChain != NULL) chain = subChain; if (chain->qStrand == '-') { qs = chain->qSize - chain->qEnd; qe = chain->qSize - chain->qStart; } verbose(2," %s:%d-%d %s:%d-%d ", chain->qName, qs, qe, chain->tName,chain->tStart,chain->tEnd); if (subChain != NULL) verbose(2,"subChain %s:%d-%d %s:%d-%d\n", subChain->qName, subChain->qStart, subChain->qEnd, subChain->tName,subChain->tStart,subChain->tEnd); qNib = nibInfoFromCache(nibHash, tNibDir, chain->qName); tNib = nibInfoFromCache(nibHash, tNibDir, chain->tName); tSeq = nibInfoLoadStrand(tNib, chain->tStart, chain->tEnd, '+'); qSeq = nibInfoLoadStrand(qNib, chain->qStart, chain->qEnd, chain->qStrand); axtList = chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart, maxGap, BIGNUM); verbose(2,"axt count %d misMatch cnt %d\n",slCount(axtList), slCount(misMatchList)); for (axt = axtList; axt != NULL ; axt = axt->next) { addMisMatch(&misMatchList, axt, chain->qSize); } verbose(2,"%d in mismatch list %s id %d \n",slCount(misMatchList), chain->qName, id); chainFree(&retChainToFree); chainFree(&retChainToFree2); break; } /* create axt of each expressed retroGene to parent gene */ /* get alignment for each mrna overlapping retroGene */ bk = hashFindVal(mrnaHash, ps->chrom); elist = binKeeperFindSorted(bk, ps->chromStart , ps->chromEnd ) ; { char queryName[512]; char axtName[512]; char pslName[512]; safef(queryName, sizeof(queryName), "/tmp/query.%s.fa", ps->chrom); safef(axtName, sizeof(axtName), "/tmp/tmp.%s.axt", ps->chrom); safef(pslName, sizeof(pslName), "/tmp/tmp.%s.psl", ps->chrom); op = fopen(pslName,"w"); for (el = elist ; el != NULL ; el = el->next) { psl = el->val; pslOutput(psl, op, '\t','\n'); qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0); if (qSeq != NULL) slAddHead(&seqList, qSeq); else errAbort("seq %s not found \n", psl->qName); } fclose(op); faWriteAll(queryName, seqList); safef(cmd,sizeof(cmd),"pslPretty -long -axt %s %s %s %s",pslName , nibList, queryName, axtName); ret = system(cmd); if (ret != 0) errAbort("ret is %d %s\n",ret,cmd); verbose(2, "ret is %d %s\n",ret,cmd); af = lineFileOpen(axtName, TRUE); while ((axt = axtRead(af)) != NULL) slAddHead(&mAxt, axt); lineFileClose(&af); } slReverse(&mAxt); /* for each parent/retro pair, count bases matching retro and parent better */ for (el = elist; el != NULL ; el = el->next) { int i, scoreRetro=0, scoreParent=0, scoreNeither=0; struct dyString *parentMatch = newDyString(16*1024); struct dyString *retroMatch = newDyString(16*1024); mPsl = el->val; if (mAxt != NULL) { verbose(2,"mrna %s %s:%d %d,%d axt %s\n", mPsl->qName, mPsl->tName,mPsl->tStart, mPsl->match, mPsl->misMatch, mAxt->qName); assert(sameString(mPsl->qName, mAxt->qName)); for (i = 0 ; i< (mPsl->tEnd-mPsl->tStart) ; i++) { int j = mAxt->tStart - mPsl->tStart; verbose(5, "listLen = %d\n",slCount(&misMatchList)); if ((mf = matchFound(&misMatchList, (mPsl->tStart)+i)) != NULL) { if (toupper(mf->retroBase) == toupper(mAxt->qSym[j+i])) { verbose (3,"match retro[%d] %d %c == %c parent %c %d\n", i,mf->retroLoc, mf->retroBase, mAxt->qSym[j+i], mf->parentBase, mf->parentLoc); dyStringPrintf(retroMatch, "%d,", mf->retroLoc); scoreRetro++; } else if (toupper(mf->parentBase) == toupper(mAxt->qSym[j+i])) { verbose (3,"match parent[%d] %d %c == %c retro %c %d\n", i,mf->parentLoc, mf->parentBase, mAxt->qSym[j+i], mf->retroBase, mf->retroLoc); dyStringPrintf(parentMatch, "%d,", mf->parentLoc); scoreParent++; } else { verbose (3,"match neither[%d] %d %c != %c retro %c %d\n", i,mf->parentLoc, mf->parentBase, mAxt->tSym[j+i], mf->retroBase, mf->retroLoc); scoreNeither++; } } } verbose(2,"final score %s parent %d retro %d neither %d\n", mPsl->qName, scoreParent, scoreRetro, scoreNeither); fprintf(outFile,"%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%s\t%s\n", ps->chrom, ps->chromStart, ps->chromEnd, ps->name, ps->score, mPsl->tName, mPsl->tStart, mPsl->tEnd, mPsl->qName, scoreParent, scoreRetro, scoreNeither, parentMatch->string, retroMatch->string); mAxt = mAxt->next; } dyStringFree(&parentMatch); dyStringFree(&retroMatch); } } }
static void associationCalcDistances(struct order *ord, struct sqlConnection *conn, /* connection to main database. */ struct genePos **pGeneList, struct hash *geneHash, int maxCount) /* Fill in distance fields in geneList. */ { struct sqlResult *sr; char **row; struct hash *curTerms = newHash(8); struct hash *protHash = NULL; struct hash *lookupHash = geneHash; char query[512]; struct genePos *gp; char *geneId = curGeneId->name; if (ord->protKey) { /* Build up hash of genes keyed by protein names. (The geneHash * passed in is keyed by the mrna name. */ protHash = newHash(17); for (gp = *pGeneList; gp != NULL; gp = gp->next) { char *id = (ord->protKey ? (kgVersion == KG_III ? lookupProtein(conn, gp->name) : gp->protein) : gp->name); hashAdd(protHash, id, gp); } /* Also switch current gene id and lookup hash to protein. */ geneId = curGeneId->protein; lookupHash = protHash; } /* Build up hash full of all go IDs associated with gene. */ if (geneId != NULL) { safef(query, sizeof(query), ord->queryOne, geneId); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { hashAdd(curTerms, row[0], NULL); } sqlFreeResult(&sr); } /* Stream through association table counting matches. */ sr = sqlGetResult(conn, ord->queryAll); while ((row = sqlNextRow(sr)) != NULL) { if (hashLookup(curTerms, row[1])) { struct hashEl *hel = hashLookup(lookupHash, row[0]); while (hel != NULL) { gp = hel->val; gp->count += 1; hel = hashLookupNext(hel); } } } sqlFreeResult(&sr); /* Go through list translating non-zero counts to distances. */ for (gp = *pGeneList; gp != NULL; gp = gp->next) { if (gp->count > 0) { gp->distance = 1.0/gp->count; gp->count = 0; } if (sameString(gp->name, curGeneId->name)) /* Force self to top of list. */ gp->distance = 0; } hashFree(&protHash); hashFree(&curTerms); }
void bamLoadItemsCore(struct track *tg, boolean isPaired) /* Load BAM data into tg->items item list, unless zoomed out so far * that the data would just end up in dense mode and be super-slow. */ { /* protect against temporary network error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { struct hash *pairHash = isPaired ? hashNew(18) : NULL; int minAliQual = atoi(cartOrTdbString(cart, tg->tdb, BAM_MIN_ALI_QUAL, BAM_MIN_ALI_QUAL_DEFAULT)); char *colorMode = cartOrTdbString(cart, tg->tdb, BAM_COLOR_MODE, BAM_COLOR_MODE_DEFAULT); char *grayMode = cartOrTdbString(cart, tg->tdb, BAM_GRAY_MODE, BAM_GRAY_MODE_DEFAULT); char *userTag = cartOrTdbString(cart, tg->tdb, BAM_COLOR_TAG, BAM_COLOR_TAG_DEFAULT); int aliQualShadeMin = 0, aliQualShadeMax = 99, baseQualShadeMin = 0, baseQualShadeMax = 40; parseIntRangeSetting(tg->tdb, "aliQualRange", &aliQualShadeMin, &aliQualShadeMax); parseIntRangeSetting(tg->tdb, "baseQualRange", &baseQualShadeMin, &baseQualShadeMax); struct bamTrackData btd = {tg, pairHash, minAliQual, colorMode, grayMode, userTag, aliQualShadeMin, aliQualShadeMax, baseQualShadeMin, baseQualShadeMax}; char *fileName = trackDbSetting(tg->tdb, "bigDataUrl"); if (fileName == NULL) { if (tg->customPt) { errAbort("bamLoadItemsCore: can't find bigDataUrl for custom track %s", tg->track); } else { struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb); fileName = bamFileNameFromTable(conn, tg->table, chromName); hFreeConn(&conn); } } char *fileName2 = hReplaceGbdb(fileName); char posForBam[512]; safef(posForBam, sizeof(posForBam), "%s:%d-%d", chromName, winStart, winEnd); if (!isPaired) bamFetch(fileName2, posForBam, addBam, &btd, NULL); else { char *setting = trackDbSettingClosestToHomeOrDefault(tg->tdb, "pairSearchRange", "20000"); int pairSearchRange = atoi(setting); if (pairSearchRange > 0) safef(posForBam, sizeof(posForBam), "%s:%d-%d", chromName, max(0, winStart-pairSearchRange), winEnd+pairSearchRange); bamFetch(fileName2, posForBam, addBamPaired, &btd, NULL); struct hashEl *hel; struct hashCookie cookie = hashFirst(btd.pairHash); while ((hel = hashNext(&cookie)) != NULL) { struct linkedFeatures *lf = hel->val; if (lf->start < winEnd && lf->end > winStart) slAddHead(&(tg->items), lfsFromLf(lf)); } } freez(&fileName2); if (tg->visibility != tvDense) { slReverse(&(tg->items)); if (isPaired) slSort(&(tg->items), linkedFeaturesSeriesCmp); else if (sameString(colorMode, BAM_COLOR_MODE_STRAND)) slSort(&(tg->items), linkedFeaturesCmpOri); else if (sameString(colorMode, BAM_COLOR_MODE_GRAY) && sameString(grayMode, BAM_GRAY_MODE_ALI_QUAL)) slSort(&(tg->items), linkedFeaturesCmpScore); else slSort(&(tg->items), linkedFeaturesCmpStart); if (slCount(tg->items) > MAX_ITEMS_FOR_MAPBOX) { // flag drawItems to make a mapBox for the whole track tg->customInt = 1; tg->mapItem = dontMapItem; } } } errCatchEnd(errCatch); if (errCatch->gotError) { tg->networkErrMsg = cloneString(errCatch->message->string); tg->drawItems = bigDrawWarning; tg->totalHeight = bigWarnTotalHeight; } errCatchFree(&errCatch); }
struct annoStreamer *annoStreamDbNew(char *db, char *table, struct annoAssembly *aa, struct asObject *asObj, int maxOutRows) /* Create an annoStreamer (subclass) object from a database table described by asObj. */ { struct sqlConnection *conn = hAllocConn(db); if (!sqlTableExists(conn, table)) errAbort("annoStreamDbNew: table '%s' doesn't exist in database '%s'", table, db); struct annoStreamDb *self = NULL; AllocVar(self); struct annoStreamer *streamer = &(self->streamer); int dbtLen = strlen(db) + strlen(table) + 2; char dbTable[dbtLen]; safef(dbTable, dbtLen, "%s.%s", db, table); annoStreamerInit(streamer, aa, asObj, dbTable); streamer->rowType = arWords; streamer->setRegion = asdSetRegion; streamer->nextRow = asdNextRow; streamer->close = asdClose; self->conn = conn; self->table = cloneString(table); char *asFirstColumnName = streamer->asObj->columnList->name; if (sqlFieldIndex(self->conn, self->table, "bin") == 0) { self->hasBin = 1; self->minFinestBin = binFromRange(0, 1); } if (self->hasBin && !sameString(asFirstColumnName, "bin")) self->omitBin = 1; if (!asdInitBed3Fields(self)) errAbort("annoStreamDbNew: can't figure out which fields of %s.%s to use as " "{chrom, chromStart, chromEnd}.", db, table); self->makeBaselineQuery = asdMakeBaselineQuery; // When a table has an index on endField, sometimes the query optimizer uses it // and that ruins the sorting. Fortunately most tables don't anymore. self->endFieldIndexName = sqlTableIndexOnField(self->conn, self->table, self->endField); self->notSorted = FALSE; // Special case: genbank-updated tables are not sorted because new mappings are // tacked on at the end. if (isIncrementallyUpdated(table)) self->notSorted = TRUE; self->mergeBins = FALSE; self->maxOutRows = maxOutRows; self->useMaxOutRows = (maxOutRows > 0); self->needQuery = TRUE; self->chromList = annoAssemblySeqNames(aa); if (slCount(self->chromList) > 1000) { // Assembly has many sequences (e.g. scaffold-based assembly) -- // don't break up into per-sequence queries. Take our chances // with mysql being unhappy about the sqlResult being open too long. self->doQuery = asdDoQuerySimple; self->nextRowRaw = nextRowFromSqlResult; } else { // All-chromosome assembly -- if table is large, perform a series of // chunked queries. self->doQuery = asdDoQueryChunking; self->nextRowRaw = nextRowFromBuffer; } return (struct annoStreamer *)self; }
static void procGbEntry(struct lineFile *lf, struct hash *estAuthorHash) /* process one entry in the genbank file . readGbInfo should be called * first */ { char *words[16]; char date[64]; int wordCount; DNA *dna = NULL; int dnaSize; char sizeString[16]; char accVer[64]; int faSize; char *locus = gbLocusField->val->string; char *accession = gbAccessionField->val->string; int version = 0; char *gi = NULL; char *verChar = gbVersionField->val->string; char *s; char *org = gbOrganismField->val->string; char *synOrg = NULL; struct keyVal *seqKey, *sizeKey, *commentKey; boolean isEst = FALSE; char verNum[8]; char *com = gbCommentField->val->string; if (locus == NULL || accession == NULL) errAbort("No LOCUS or no ACCESSION line near %d of %s", lf->lineIx, lf->fileName); lmCleanup(&kvtMem); /* Chop off all but first word of accession. */ s = skipLeadingSpaces(accession); if (s != NULL) s = skipLeadingNonSpaces(s); if (s != NULL) *s = 0; /* Get version field (defaults to zero) */ if (verChar != NULL) { char *parts[2]; char *accVer; int partCount; partCount = chopByWhite(verChar, parts, ArraySize(parts)); /* Version is number after dot. */ accVer = parts[0]; if ((accVer = strchr(accVer, '.')) != NULL) version = atoi(accVer+1); if (partCount >= 2 && startsWith("GI:", parts[1])) gi = parts[1]+3; } gbfFlatten(kvt); /* Get additional keys. */ if (com != NULL) { if (startsWith("REVIEWED", com)) kvtAdd(kvt, "cur", "yes"); } safef(verNum, sizeof(verNum), "%d", version); kvtAdd(kvt, "ver", verNum); if (gi != NULL) kvtAdd(kvt, "ngi", gi); wordCount = chopLine(locus, words); if (wordCount >= 6) { kvtAdd(kvt, "mol", words[3]); kvtAdd(kvt, "cat", words[wordCount-2]); ncbiDateToSqlDate(words[wordCount-1], date, sizeof(date), accession); kvtAdd(kvt, "dat", date); } else if (wordCount == 5 && sameString(words[2], "bp") && isdigit(words[1][0])) { /* Check carefully. Probably it's just missing the molecule type... */ if (!isNcbiDate(words[4])) { errAbort("Strange LOCUS line in %s accession %s", lf->fileName, accession); } kvtAdd(kvt, "cat", words[3]); ncbiDateToSqlDate(words[4], date, sizeof(date), accession); kvtAdd(kvt, "dat", date); } else if (wordCount == 5 && sameString(words[2], "bp") && isdigit(words[1][0])) { kvtAdd(kvt, "mol", words[3]); } else { errAbort("Short LOCUS line in %s accession %s", lf->fileName, accession); } if (((wordCount >= 5) && sameString(words[4], "EST")) || ((wordCount >= 6) && sameString(words[5], "EST"))) { /* Try and figure out if it's a 3' or 5' EST */ char *dir = getEstDir(gbDefinitionField->val->string, com); if (dir != NULL) kvtAdd(kvt, "dir", dir); isEst = TRUE; } /* Handle other fields */ parseDbXrefs(); parseGene(); parseSourceOrganism(); parseMiscDiffs(); parseWarnings(); if (startsWith("synthetic construct", gbOrganismField->val->string)) { synOrg = findSyntheticTarget(); if (synOrg != NULL) hackSynClone(); } if (keepGbEntry(isEst)) { /* Handle sequence part of read. */ dna = gbfReadSequence(lf, &dnaSize); } /* just discard if no sequence */ if (dna != NULL) { seqKey = kvtAdd(kvt, "seq", dna); safef(sizeString, sizeof(sizeString), "%d", dnaSize); sizeKey = kvtAdd(kvt, "siz", sizeString); if (isEst) { char *author = gbAuthorsField->val->string; if (author != NULL) { struct authorExample *ae; struct hashEl *hel; if ((hel = hashLookup(estAuthorHash, author)) == NULL) { AllocVar(ae); hel = hashAdd(estAuthorHash, author, ae); ae->name = hel->name; ae->count = 1; strncpy(ae->accession, accession, sizeof(ae->accession)); slAddHead(&estAuthorList, ae); } else { ae = hel->val; ae->count += 1; } } } seqKey->val = NULL; /* Don't write out sequence here. */ commentKey = kvtGet(kvt, "com"); if (commentKey != NULL) commentKey->val = NULL; /* Don't write out comment either. */ setupOutputFiles(accession, org); if (faFile != NULL) { /* save fasta offset, size in ra */ safef(accVer, sizeof(accVer), "%s.%d", accession, version); gbFaWriteSeq(faFile, accVer, NULL, dna, -1); faSize = faFile->off - faFile->recOff; safef(faOffStr, sizeof(faOffStr), "%lld", (long long)faFile->recOff); kvtAdd(kvt, "fao", faOffStr); safef(faSizeStr, sizeof(faSizeStr), "%d", faSize); kvtAdd(kvt, "fas", faSizeStr); } if (gPepFa != NULL) { /* must write before writing kvt */ writePepSeq(); } kvtWriteAll(kvt, raFile, NULL); if (gbIdxFile != NULL) { /* use synthetic target if it was determined */ struct keyVal *molkv = kvtGet(kvt, "mol"); enum molType molType = (molkv->val != NULL) ? gbParseMolType(molkv->val) : mol_mRNA; gbProcessedWriteIdxRec(gbIdxFile, accession, version, kvtLookup(kvt, "dat"), ((synOrg != NULL) ? synOrg : org), molType); } } else gbfSkipSequence(lf); }
void addSdrfToStormTop(char *sdrfFile, struct tagStorm *storm) /* Add lines of sdrfFile as children of first top level stanza in storm. */ { struct fieldedTable *table = fieldedTableFromTabFile(sdrfFile, sdrfFile, NULL, 0 ); /* Convert ArrayExpress field names to our field names */ int fieldIx; char *lastNonTerm = NULL; char *lastNonUnit = NULL; for (fieldIx=0; fieldIx < table->fieldCount; fieldIx += 1) { char tagName[256]; aeFieldToNormalField("sdrf.", table->fields[fieldIx], tagName, sizeof(tagName)); if (lastNonTerm != NULL && sameString("sdrf.Term_Source_REF", tagName)) { safef(tagName, sizeof(tagName), "%s_Term_Source_REF", lastNonTerm); table->fields[fieldIx] = lmCloneString(table->lm, tagName); } else if (lastNonTerm != NULL && sameString("sdrf.Term_Accession_Number", tagName)) { safef(tagName, sizeof(tagName), "%s_Term_Accession_Number", lastNonTerm); table->fields[fieldIx] = lmCloneString(table->lm, tagName); } else if (lastNonUnit != NULL && startsWith("sdrf.Unit_", tagName)) { safef(tagName, sizeof(tagName), "%s_Unit", lastNonUnit); lastNonTerm = lmCloneString(table->lm, tagName); table->fields[fieldIx] = lastNonTerm; } else { lastNonTerm = lastNonUnit = lmCloneString(table->lm, tagName); table->fields[fieldIx] = lastNonTerm; } } /* Make up fastq field indexes to handle processing of paired reads in fastq, which * take two lines of sdrf file. */ char *fieldsWithFastqs[] = /* Fields that contain the fastq file names */ { "sdrf.Comment_FASTQ_URI", "sdrf.Comment_SUBMITTED_FILE_NAME", "sdrf.Scan_Name", }; boolean mightReuseStanza = TRUE; bool *reuseMultiFields; // If set this field can vary and line still reused AllocArray(reuseMultiFields, table->fieldCount); int i; for (i=0; i<ArraySize(fieldsWithFastqs); ++i) { char *field = fieldsWithFastqs[i]; int ix = stringArrayIx(field, table->fields, table->fieldCount); if (ix >=0) reuseMultiFields[ix] = TRUE; else if (i == 0) { mightReuseStanza = FALSE; break; // Make sure has first one if going to do paired read fastq processing } } /* Make up a list and hash of fieldMergers to handle conversion of columns that occur * multiple times to a comma-separated list of values in a single column. */ struct fieldMerger /* Something to help merge multiple columns with same name */ { struct fieldMerger *next; /* Next in list */ char *name; struct dyString *val; /* Comma separated value */ }; struct hash *fieldHash = hashNew(0); struct fieldMerger *fmList = NULL; for (fieldIx = 0; fieldIx < table->fieldCount; ++fieldIx) { char *fieldName = table->fields[fieldIx]; if (hashLookup(fieldHash, fieldName) == NULL) { struct fieldMerger *fm; AllocVar(fm); fm->name = fieldName; fm->val = dyStringNew(0); slAddTail(&fmList, fm); hashAdd(fieldHash, fieldName, fm); } } /* Grab top level stanza and make sure there is only one. */ struct tagStanza *topStanza = storm->forest; if (topStanza == NULL || topStanza->next != NULL) internalErr(); /* Scan through table, making new stanzas for each row and hooking them into topStanza */ struct fieldedRow *fr, *lastFr = NULL; struct tagStanza *stanza = NULL; for (fr = table->rowList; fr != NULL; fr = fr->next) { /* Empty out any existing vals */ struct fieldMerger *fm; for (fm = fmList; fm != NULL; fm = fm->next) dyStringClear(fm->val); /* Add all non-empty values from this row to our fieldMergers. */ char **row = fr->row; for (fieldIx = 0; fieldIx < table->fieldCount; ++fieldIx) { char *fieldName = table->fields[fieldIx]; fm = hashMustFindVal(fieldHash, fieldName); char *val = row[fieldIx]; if (!isEmpty(val)) csvEscapeAndAppend(fm->val, val); } /* If only the reuseMultiFields are varying, append to those values in previous stanza, * otherwise make a new stanza */ if (mightReuseStanza && lastFr != NULL && sameExceptForSome(lastFr->row, fr->row, table->fieldCount, reuseMultiFields)) { int i; for (i=0; i<ArraySize(fieldsWithFastqs); ++i) { char *fieldName = fieldsWithFastqs[i]; if ((fm = hashFindVal(fieldHash, fieldName)) != NULL) { char *newVal = fm->val->string; char *oldVal = tagMustFindVal(stanza, fieldName); int bothSize = strlen(newVal) + strlen(oldVal) + 1 + 1; char bothBuf[bothSize]; safef(bothBuf, bothSize, "%s,%s", oldVal, newVal); tagStanzaUpdateTag(storm, stanza, fieldName, bothBuf); } } } else { /* Output all nonempty vals to stanza */ stanza = tagStanzaNew(storm, topStanza); for (fm = fmList; fm != NULL; fm = fm->next) if (fm->val->stringSize > 0) tagStanzaAppend(storm, stanza, fm->name, fm->val->string); } lastFr = fr; } slReverse(&topStanza->children); }
void doFactorSource(struct sqlConnection *conn, struct trackDb *tdb, char *item, int start, int end) /* Display detailed info about a cluster of TFBS peaks from other tracks. */ { char extraWhere[256]; safef(extraWhere, sizeof extraWhere, "name='%s'", item); int rowOffset; struct sqlResult *sr = hRangeQuery(conn, tdb->table, seqName, start, end, extraWhere, &rowOffset); char **row = sqlNextRow(sr); struct factorSource *cluster = NULL; if (row != NULL) cluster = factorSourceLoad(row + rowOffset); sqlFreeResult(&sr); if (cluster == NULL) errAbort("Error loading cluster from track %s", tdb->track); char *sourceTable = trackDbRequiredSetting(tdb, "sourceTable"); char *factorLink = cluster->name; char *vocab = trackDbSetting(tdb, "controlledVocabulary"); if (vocab != NULL) { char *file = cloneFirstWord(vocab); factorLink = wgEncodeVocabLink(file, "term", factorLink, factorLink, factorLink, ""); } printf("<B>Factor:</B> %s<BR>\n", factorLink); printf("<B>Cluster Score (out of 1000):</B> %d<BR>\n", cluster->score); printPos(cluster->chrom, cluster->chromStart, cluster->chromEnd, NULL, TRUE, item); /* Get list of tracks we'll look through for input. */ char *inputTrackTable = trackDbRequiredSetting(tdb, "inputTrackTable"); char query[256]; sqlSafef(query, sizeof(query), "select tableName from %s where factor='%s' order by source", inputTrackTable, cluster->name); /* Next do the lists of hits and misses. We have the hits from the non-zero signals in * cluster->expScores. We need to figure out the sources actually assayed though * some other way. We'll do this by one of two techniques. */ char *inputTableFieldDisplay = trackDbSetting(tdb, "inputTableFieldDisplay"); if (inputTableFieldDisplay != NULL) { struct slName *fieldList = stringToSlNames(inputTableFieldDisplay); char *vocab = trackDbSetting(tdb, "controlledVocabulary"); /* In a new section put up list of hits. */ webNewSection("Assays for %s in Cluster", cluster->name); webPrintLinkTableStart(); printClusterTableHeader(fieldList, TRUE, FALSE, TRUE); printFactorSourceTableHits(cluster, conn, sourceTable, inputTrackTable, fieldList, FALSE, vocab); webPrintLinkTableEnd(); webNewSectionHeaderStart(); char sectionTitle[128]; safef(sectionTitle, sizeof(sectionTitle),"Assays for %s Without Hits in Cluster", cluster->name); jsBeginCollapsibleSectionOldStyle(cart, tdb->track, "cellNoHits", sectionTitle, FALSE); webNewSectionHeaderEnd(); webPrintLinkTableStart(); printClusterTableHeader(fieldList, TRUE, FALSE, FALSE); printFactorSourceTableHits(cluster, conn, sourceTable, inputTrackTable, fieldList, TRUE, vocab); webPrintLinkTableEnd(); jsEndCollapsibleSection(); } else { errAbort("Missing required trackDb setting %s for track %s", "inputTableFieldDisplay", tdb->track); } webNewSectionHeaderStart(); jsBeginCollapsibleSectionOldStyle(cart, tdb->track, "cellSources", "Cell Abbreviations", FALSE); webNewSectionHeaderEnd(); hPrintFactorSourceAbbrevTable(conn, tdb); jsEndCollapsibleSection(); doClusterMotifDetails(conn, tdb, cluster); }
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName) /* hgLoadChromGraph - Load up chromosome graph. */ { double minVal,maxVal; struct chromGraph *el, *list; FILE *f; char *tempDir = "."; char path[PATH_LEN], gbdbPath[PATH_LEN]; char *idTable = optionVal("idTable", NULL); char *pathPrefix = NULL; if (idTable == NULL) list = chromGraphLoadAll(fileName); else list = chromGraphListWithTable(fileName, db, idTable); if (list == NULL) errAbort("%s is empty", fileName); /* Figure out min/max values */ minVal = maxVal = list->val; for (el = list->next; el != NULL; el = el->next) { if (optionExists("minusLog10")) { if (el->val == 1) el->val = 0; else if (el->val > 0) el->val = -1 * log(el->val)/log(10); } if (el->val < minVal) minVal = el->val; if (el->val > maxVal) maxVal = el->val; } /* Sort and write out temp file. */ slSort(&list, chromGraphCmp); f = hgCreateTabFile(tempDir, track); for (el = list; el != NULL; el = el->next) chromGraphTabOut(el, f); if (doLoad) { struct dyString *dy = dyStringNew(0); struct sqlConnection *conn; /* Set up connection to database and create main table. */ conn = hAllocConn(db); dyStringPrintf(dy, createString, track, hGetMinIndexLength(db)); sqlRemakeTable(conn, track, dy->string); /* Load main table and clean up file handle. */ hgLoadTabFile(conn, tempDir, track, &f); hgRemoveTabFile(tempDir, track); /* If need be create meta table. If need be delete old row. */ if (!sqlTableExists(conn, "metaChromGraph")) sqlUpdate(conn, metaCreateString); else { dyStringClear(dy); dyStringPrintf(dy, "delete from metaChromGraph where name = '%s'", track); sqlUpdate(conn, dy->string); } /* Make chrom graph file */ safef(path, sizeof(path), "%s.cgb", track); chromGraphToBin(list, path); safef(path, sizeof(path), "/gbdb/%s/chromGraph", db); pathPrefix = optionVal("pathPrefix", path); safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track); /* Create new line in meta table */ dyStringClear(dy); dyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');", track, minVal, maxVal, gbdbPath); sqlUpdate(conn, dy->string); } }
matrix * SVMgetSampleAccuracy(struct hash *config) /*Read all the folds and calculate training and testing accuracies from best models*/ { char * trainingDir = hashMustFindVal(config, "trainingDir"); char * validationDir = hashMustFindVal(config, "validationDir"); char * modelDir = hashMustFindVal(config, "modelDir"); int fold, folds = foldsCountFromDataDir(config); int split, splits = splitsCountFromDataDir(config); matrix * accuracies = NULL; char filename[1024]; for(split = 1; split <= splits; split++) { for(fold = 1; fold <= folds; fold++) { //cat togetehr the training and validation KH values and record which were used to train safef(filename, sizeof(filename), "%s/split%02d/fold%02d/data.svm", trainingDir, split, fold); matrix * trMetadata = SVMtoMetadataMatrix(filename); safef(filename, sizeof(filename), "%s/split%02d/fold%02d/data.svm", validationDir, split, fold); matrix * valMetadata = SVMtoMetadataMatrix(filename); struct slInt * trainingList = list_indices(trMetadata->cols); matrix * metadata = append_matrices(trMetadata, valMetadata, 1); //cat together the guesses from SVM safef(filename, sizeof(filename), "%s/split%02d/fold%02d/svm.training.results", modelDir, split, fold); matrix * trainingPred = SVMpopulatePredictionsMatrix(filename); safef(trainingPred->rowLabels[0],MAX_LABEL, "Prediction"); copy_matrix_labels(trainingPred, trMetadata, 2,2); trainingPred->labels=1; safef(filename, sizeof(filename), "%s/split%02d/fold%02d/svm.validation.results", modelDir, split, fold); matrix * testingPred = SVMpopulatePredictionsMatrix(filename); safef(testingPred->rowLabels[0], MAX_LABEL, "Prediction"); copy_matrix_labels(testingPred, valMetadata, 2,2); testingPred->labels=1; matrix * predictions = append_matrices(trainingPred, testingPred, 1); //get accuracies matrix * accuraciesInFold = SVMpopulateAccuracyMatrix(predictions, metadata, trainingList); //add the accuracies to the running totals if(split == 1 && fold == 1) accuracies = copy_matrix(accuraciesInFold); else add_matrices_by_colLabel(accuracies, accuraciesInFold); //clean up free_matrix(trMetadata); free_matrix(valMetadata); free_matrix(metadata); free_matrix(trainingPred); free_matrix(testingPred); free_matrix(predictions); free_matrix(accuraciesInFold); slFreeList(&trainingList); } } //normalize accuracies over number of splits and folds int i; for(i = 0; i < accuracies->cols; i++) { if(accuracies->graph[0][i] != NULL_FLAG) accuracies->graph[0][i] = (accuracies->graph[0][i] / ((folds-1) * splits)); if(accuracies->graph[1][i] != NULL_FLAG) accuracies->graph[1][i] = (accuracies->graph[1][i] / (1 * splits)); } return accuracies; }
void crossCheck(struct sqlConnection *conn, char *motifWeights, char *geneToModule, char *geneToMotif, char *moduleToMotif) /* Do sanity check after loading. */ { struct hash *motifHash = newHash(0); struct hash *geneToModuleHash = newHash(16); struct hash *moduleToGeneHash = newHash(16); struct hash *moduleToMotifHash = newHash(16); struct hash *motifToModuleHash = newHash(16); struct hash *modMotHash = newHash(18); char query[512], modMot[64]; struct sqlResult *sr; char **row; int reusedMotifCount = 0; int fatalErrorCount = 0; /* Load up motif hash */ sqlSafef(query, sizeof(query), "select name from %s", motifWeights); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { hashAdd(motifHash, row[0], NULL); } sqlFreeResult(&sr); /* Load up moduleToMotif table and note how many times * a motif is used more than once just for curiousity * (this is not an error condition). */ sqlSafef(query, sizeof(query), "select module,motif from %s", moduleToMotif); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *module = cloneString(row[0]); char *motif = cloneString(row[1]); if (!hashLookup(motifHash, motif)) { warn("Motif %s is in %s but not %s", motif, moduleToMotif, motifWeights); ++fatalErrorCount; } hashAdd(moduleToMotifHash, module, motif); if (hashLookup(motifToModuleHash, motif)) ++reusedMotifCount; hashAdd(motifToModuleHash, motif, module); safef(modMot, sizeof(modMot), "%s %s", module, motif); hashAdd(modMotHash, modMot, NULL); } sqlFreeResult(&sr); verbose(1, "%d motifs reuses in modules\n", reusedMotifCount); verbose(1, "Cross-checking tables\n"); /* Load up geneToModule table, and make sure that all modules actually * exist in moduleToMotif table. */ sqlSafef(query, sizeof(query), "select gene,module from %s", geneToModule); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *gene = cloneString(row[0]); char *module = cloneString(row[1]); if (!hashLookup(moduleToMotifHash, module)) { warn("Module %s is in %s but not %s", module, geneToModule, moduleToMotif); ++fatalErrorCount; } hashAdd(geneToModuleHash, gene, module); hashAdd(moduleToGeneHash, module, gene); } sqlFreeResult(&sr); /* Scan again through moduleToMotif table and make sure that * all modules are present in geneToModule. */ sqlSafef(query, sizeof(query), "select module from %s", moduleToMotif); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *module = row[0]; if (!hashLookup(moduleToGeneHash, module)) { warn("Module %s is in %s but not %s", module, moduleToMotif, geneToModule); ++fatalErrorCount; } } sqlFreeResult(&sr); /* Scan through geneToMotif table checking things. */ sqlSafef(query, sizeof(query), "select gene,name from %s", geneToMotif); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *gene = row[0]; char *motif = row[1]; char *module = hashFindVal(geneToModuleHash, gene); if (module == NULL) { warn("Gene %s is in %s but not %s", gene, geneToMotif, geneToModule); ++fatalErrorCount; } safef(modMot, sizeof(modMot), "%s %s", module, motif); if (hashLookup(modMotHash, modMot) == NULL) { warn("Gene %s has motif %s, but that motif isn't in %s", gene, motif, module); ++fatalErrorCount; } if (!hashLookup(motifHash, motif)) { warn("Motif %s is in %s but not %s", motif, geneToMotif, motifWeights); ++fatalErrorCount; } } sqlFreeResult(&sr); verbose(1, "%d errors\n", fatalErrorCount); }