static void doPslMapAli(struct sqlConnection *conn, int taxon, char *db, int fromTaxon, char *fromDb) { char cmd[256]; struct dyString *dy = dyStringNew(0); char path[256]; char dnaPath[256]; char toDb[12]; safef(toDb,sizeof(toDb),"%s", db); toDb[0]=toupper(toDb[0]); safef(dnaPath,sizeof(dnaPath),"/cluster/data/%s/nib", db); if (!fileExists(dnaPath)) { safef(dnaPath,sizeof(dnaPath),"/cluster/data/%s/%s.2bit", db, db); if (!fileExists(dnaPath)) errAbort("unable to locate nib dir or .2bit for %s: %s", db, dnaPath); } safef(path,sizeof(path),"/gbdb/%s/liftOver/%sTo%s.over.chain.gz", fromDb, fromDb, toDb); if (!fileExists(path)) errAbort("unable to locate chain file %s",path); /* get non-bac $db.vgProbes not yet aligned */ getPslMapAli(conn, db, fromTaxon, fromDb, FALSE); /* get bac $db.vgProbes not yet aligned */ getPslMapAli(conn, db, fromTaxon, fromDb, TRUE); /* get .fa for pslRecalcMatch use */ getPslMapFa(conn, db, fromTaxon); /* non-bac */ safef(cmd,sizeof(cmd), "zcat %s | pslMap -chainMapFile -swapMap nonBac.psl stdin stdout " "| sort -k 14,14 -k 16,16n > unscoredNB.psl" ,path); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd), "pslRecalcMatch unscoredNB.psl %s" " pslMap.fa nonBac.psl" ,dnaPath); verbose(1,"%s\n",cmd); system(cmd); /* bac */ safef(cmd,sizeof(cmd), "zcat %s | pslMap -chainMapFile -swapMap bac.psl stdin stdout " "| sort -k 14,14 -k 16,16n > unscoredB.psl" ,path); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd), "pslRecalcMatch unscoredB.psl %s" " pslMap.fa bacTemp.psl" ,dnaPath); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd), "pslCDnaFilter -globalNearBest=0.00001 -minCover=0.05" " bacTemp.psl bac.psl"); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd),"cat bac.psl nonBac.psl > vgPrbPslMap.psl"); verbose(1,"%s\n",cmd); system(cmd); dyStringFree(&dy); }
struct fullExperiment *getFullExperimentList(struct sqlConnection *conn, struct edwExperiment *eeList, char *assembly, struct hash **retHash) /* Given list of edwExperiments, return list of ones replicated with full file sets on * both replicates. If optional retHash is non-NULL then return a hash full of same * experiments keyed by experiment accession */ { /* Build up a list of fullExperiments and a hash keyed by name. */ struct hash *hash = hashNew(14); struct fullExperiment *fullList = NULL; struct edwExperiment *ee; for (ee = eeList; ee != NULL; ee = ee->next) { struct fullExperiment *full = hashFindVal(hash, ee->accession); if (full == NULL) { AllocVar(full); full->name = cloneString(ee->accession); full->exp = ee; slAddHead(&fullList, full); hashAdd(hash, full->name, full); } } uglyf("Got %d in eeList, %d in fullList, %d in hash\n", slCount(eeList), slCount(fullList), hash->elCount); /* Build up SQL query to efficiently fetch all good files and valid files from our experiment */ struct dyString *q = dyStringNew(16*1024); sqlDyStringPrintf(q, "select edwValidFile.*,edwFile.*,eapOutput.* " " from edwValidFile,edwFile,eapOutput " " where edwValidFile.fileId = edwFile.id and edwFile.id = eapOutput.fileId " " and edwFile.deprecated='' and edwFile.errorMessage='' " " and edwValidFile.ucscDb != 'centro.hg19' " " and edwValidFile.ucscDb like '%%%s' and edwValidFile.experiment in (" , assembly); for (ee = eeList; ee != NULL; ee = ee->next) { dyStringPrintf(q, "'%s'", ee->accession); if (ee->next != NULL) dyStringAppendC(q, ','); } dyStringAppendC(q, ')'); /* Loop through this making up vFiles that ultimately are attached to replicates. */ int vCount = 0; struct sqlResult *sr = sqlGetResult(conn, q->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { ++vCount; struct edwValidFile *valid = edwValidFileLoad(row); fixOutputType(valid); struct edwFile *file = edwFileLoad(row + EDWVALIDFILE_NUM_COLS); struct eapOutput *eapOutput = eapOutputLoad(row + EDWVALIDFILE_NUM_COLS + EDWFILE_NUM_COLS); struct vFile *vf = vFileNew(file, valid, eapOutput); struct fullExperiment *full = hashMustFindVal(hash, valid->experiment); struct replicate *rep = findOrMakeReplicate(valid->replicate, &full->repList); char *format = valid->format; if (sameString(format, "bam")) slAddHead(&rep->bamList, vf); else if (sameString(format, "bigWig")) slAddHead(&rep->bigWigList, vf); else if (sameString(format, "narrowPeak") && !sameString(valid->outputType, "replicated_narrowPeak")) slAddHead(&rep->narrowList, vf); else if (sameString(format, "broadPeak") && !sameString(valid->outputType, "replicated_broadPeak")) slAddHead(&rep->broadList, vf); } sqlFreeResult(&sr); uglyf("Got %d vFiles\n", vCount); dyStringFree(&q); /* Free hash or return it, and return list. */ if (retHash == NULL) hashFree(&hash); else *retHash = hash; return fullList; }
static struct bigBedInterval *bigBedIntervalsMatchingName(struct bbiFile *bbi, struct fileOffsetSize *fosList, BbFirstWordMatch matcher, int fieldIx, void *target, struct lm *lm) /* Return list of intervals inside of sectors of bbiFile defined by fosList where the name * matches target somehow. */ { struct bigBedInterval *interval, *intervalList = NULL; struct fileOffsetSize *fos; boolean isSwapped = bbi->isSwapped; for (fos = fosList; fos != NULL; fos = fos->next) { /* Read in raw data */ udcSeek(bbi->udc, fos->offset); char *rawData = needLargeMem(fos->size); udcRead(bbi->udc, rawData, fos->size); /* Optionally uncompress data, and set data pointer to uncompressed version. */ char *uncompressedData = NULL; char *data = NULL; int dataSize = 0; if (bbi->uncompressBufSize > 0) { data = uncompressedData = needLargeMem(bbi->uncompressBufSize); dataSize = zUncompress(rawData, fos->size, uncompressedData, bbi->uncompressBufSize); } else { data = rawData; dataSize = fos->size; } /* Set up for "memRead" routines to more or less treat memory block like file */ char *blockPt = data, *blockEnd = data + dataSize; struct dyString *dy = dyStringNew(32); // Keep bits outside of chrom/start/end here /* Read next record into local variables. */ while (blockPt < blockEnd) { bits32 chromIx = memReadBits32(&blockPt, isSwapped); bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); int c; dyStringClear(dy); // TODO - can simplify this probably just to for (;;) {if ((c = *blockPt++) == 0) ... while ((c = *blockPt++) >= 0) { if (c == 0) break; dyStringAppendC(dy, c); } if ((*matcher)(dy->string, fieldIx, target)) { lmAllocVar(lm, interval); interval->start = s; interval->end = e; interval->rest = cloneString(dy->string); interval->chromId = chromIx; slAddHead(&intervalList, interval); } } /* Clean up temporary buffers. */ dyStringFree(&dy); freez(&uncompressedData); freez(&rawData); } slReverse(&intervalList); return intervalList; }
char *filterClause(char *db, char *table, char *chrom, char *extraClause) /* Get filter clause (something to put after 'where') * for table */ { struct sqlConnection *conn = NULL; char varPrefix[128]; int varPrefixSize, fieldNameSize; struct hashEl *varList, *var; struct dyString *dy = NULL; boolean needAnd = FALSE; char oldDb[128]; char dbTableBuf[256]; char explicitDb[128]; char splitTable[256]; char explicitDbTable[512]; /* Return just extraClause (which may be NULL) if no filter on us. */ if (! (anyFilter() && filteredOrLinked(db, table))) return cloneString(extraClause); safef(oldDb, sizeof(oldDb), "%s", db); dbOverrideFromTable(dbTableBuf, &db, &table); if (!sameString(oldDb, db)) safef(explicitDb, sizeof(explicitDb), "%s.", db); else explicitDb[0] = 0; /* Cope with split table and/or custom tracks. */ if (isCustomTrack(table)) { conn = hAllocConn(CUSTOM_TRASH); struct customTrack *ct = ctLookupName(table); safef(explicitDbTable, sizeof(explicitDbTable), "%s", ct->dbTableName); } else { conn = hAllocConn(db); safef(splitTable, sizeof(splitTable), "%s_%s", chrom, table); if (!sqlTableExists(conn, splitTable)) safef(splitTable, sizeof(splitTable), "%s", table); safef(explicitDbTable, sizeof(explicitDbTable), "%s%s", explicitDb, splitTable); } /* Get list of filter variables for this table. */ safef(varPrefix, sizeof(varPrefix), "%s%s.%s.", hgtaFilterVarPrefix, db, table); varPrefixSize = strlen(varPrefix); varList = cartFindPrefix(cart, varPrefix); if (varList == NULL) { hFreeConn(&conn); return cloneString(extraClause); } /* Create filter clause string, stepping through vars. */ dy = dyStringNew(0); for (var = varList; var != NULL; var = var->next) { /* Parse variable name into field and type. */ char field[64], *s, *type; s = var->name + varPrefixSize; type = strchr(s, '.'); if (type == NULL) internalErr(); fieldNameSize = type - s; if (fieldNameSize >= sizeof(field)) internalErr(); memcpy(field, s, fieldNameSize); field[fieldNameSize] = 0; sqlCkId(field); type += 1; /* rawLogic and rawQuery are handled below; * filterMaxOutputVar is not really a filter variable and is handled * in wiggle.c. */ if (startsWith("raw", type) || sameString(filterMaxOutputVar, type)) continue; /* Any other variables that are missing a name: * <varPrefix>..<type> * are illegal */ if (fieldNameSize < 1) { warn("Missing name in cart variable: %s\n", var->name); continue; } if (sameString(type, filterDdVar)) { char *patVar = filterPatternVarName(db, table, field); struct slName *patList = cartOptionalSlNameList(cart, patVar); normalizePatList(&patList); if (slCount(patList) > 0) { char *ddVal = cartString(cart, var->name); boolean neg = sameString(ddVal, ddOpMenu[1]); char *fieldType = getSqlType(conn, explicitDbTable, field); boolean needOr = FALSE; if (needAnd) dyStringAppend(dy, " and "); needAnd = TRUE; if (neg) dyStringAppend(dy, "not "); boolean composite = (slCount(patList) > 1); if (composite || neg) dyStringAppendC(dy, '('); struct slName *pat; for (pat = patList; pat != NULL; pat = pat->next) { char *sqlPat = sqlLikeFromWild(pat->name); if (needOr) dyStringAppend(dy, " OR "); needOr = TRUE; if (isSqlSetType(fieldType)) { sqlDyStringPrintfFrag(dy, "FIND_IN_SET('%s', %s.%s)>0 ", sqlPat, explicitDbTable , field); } else { sqlDyStringPrintfFrag(dy, "%s.%s ", explicitDbTable, field); if (sqlWildcardIn(sqlPat)) dyStringAppend(dy, "like "); else dyStringAppend(dy, "= "); sqlDyStringPrintf(dy, "'%s'", sqlPat); } freez(&sqlPat); } if (composite || neg) dyStringAppendC(dy, ')'); } } else if (sameString(type, filterCmpVar)) { char *patVar = filterPatternVarName(db, table, field); char *pat = trimSpaces(cartOptionalString(cart, patVar)); char *cmpVal = cartString(cart, var->name); if (cmpReal(pat, cmpVal)) { if (needAnd) dyStringAppend(dy, " and "); needAnd = TRUE; if (sameString(cmpVal, "in range")) { char *words[2]; int wordCount; char *dupe = cloneString(pat); wordCount = chopString(dupe, ", \t\n", words, ArraySize(words)); if (wordCount < 2) /* Fake short input */ words[1] = "2000000000"; if (strchr(pat, '.')) /* Assume floating point */ { double a = atof(words[0]), b = atof(words[1]); sqlDyStringPrintfFrag(dy, "%s.%s >= %f && %s.%s <= %f", explicitDbTable, field, a, explicitDbTable, field, b); } else { int a = atoi(words[0]), b = atoi(words[1]); sqlDyStringPrintfFrag(dy, "%s.%s >= %d && %s.%s <= %d", explicitDbTable, field, a, explicitDbTable, field, b); } freez(&dupe); } else { // cmpVal has been checked already above in cmpReal for legal values. sqlDyStringPrintfFrag(dy, "%s.%s %-s ", explicitDbTable, field, cmpVal); if (strchr(pat, '.')) /* Assume floating point. */ dyStringPrintf(dy, "%f", atof(pat)); else dyStringPrintf(dy, "%d", atoi(pat)); } } } } /* Handle rawQuery if any */ { char *varName; char *logic, *query; varName = filterFieldVarName(db, table, "", filterRawLogicVar); logic = cartUsualString(cart, varName, logOpMenu[0]); varName = filterFieldVarName(db, table, "", filterRawQueryVar); query = trimSpaces(cartOptionalString(cart, varName)); if (query != NULL && query[0] != 0) { if (needAnd) dyStringPrintf(dy, " %s ", logic); sqlSanityCheckWhere(query, dy); } } /* Clean up and return */ hFreeConn(&conn); hashElFreeList(&varList); if (dy->stringSize == 0) { dyStringFree(&dy); return cloneString(extraClause); } else { if (isNotEmpty(extraClause)) dyStringPrintf(dy, " and %s", extraClause); return dyStringCannibalize(&dy); } }
void verifyGreatAssemblies() { // First read in the assembly name and description information into name lists struct slName* supportedAssemblies = NULL; struct lineFile *lf = lineFileOpen(greatData, TRUE); int fieldCount = 1; char* row[fieldCount]; int wordCount; while ((wordCount = lineFileChopTab(lf, row)) != 0) { if (wordCount != fieldCount) errAbort("The %s file is not properly formatted.\n", greatData); slNameAddHead(&supportedAssemblies, row[0]); } lineFileClose(&lf); boolean invalidAssembly = TRUE; struct slName* currAssembly; for (currAssembly = supportedAssemblies; currAssembly != NULL; currAssembly = currAssembly->next) { if (!hDbIsActive(currAssembly->name)) { errAbort("Assembly %s in supported assembly file is not an active assembly.\n", currAssembly->name); } if (sameOk(database, currAssembly->name)) { invalidAssembly = FALSE; break; } } if (invalidAssembly) { slReverse(&supportedAssemblies); currAssembly = supportedAssemblies; struct dyString* dy = dyStringNew(0); addAssemblyToSupportedList(dy, currAssembly->name); currAssembly = currAssembly->next; while (currAssembly != NULL) { dyStringAppend(dy, ", "); if (currAssembly->next == NULL) dyStringAppend(dy, "and "); addAssemblyToSupportedList(dy, currAssembly->name); currAssembly = currAssembly->next; } hPrintf("<script type='text/javascript'>\n"); hPrintf("function logSpecies() {\n"); hPrintf("try {\n"); hPrintf("var r = new XMLHttpRequest();\n"); hPrintf("r.open('GET', 'http://great.stanford.edu/public/cgi-bin/logSpecies.php?species=%s');\n", database); hPrintf("r.send(null);\n"); hPrintf("} catch (err) { }\n"); hPrintf("}\n"); hPrintf("window.onload = logSpecies;\n"); hPrintf("</script>\n"); errAbort("GREAT only supports the %s assemblies." "\nPlease go back and ensure that one of those assemblies is chosen.", dyStringContents(dy)); htmlClose(); dyStringFree(&dy); } slNameFreeList(&supportedAssemblies); }
boolean lineFileParseHttpHeader(struct lineFile *lf, char **hdr, boolean *chunked, int *contentLength) /* Extract HTTP response header from lf into hdr, tell if it's * "Transfer-Encoding: chunked" or if it has a contentLength. */ { struct dyString *header = newDyString(1024); char *line; int lineSize; if (chunked != NULL) *chunked = FALSE; if (contentLength != NULL) *contentLength = -1; dyStringClear(header); if (lineFileNext(lf, &line, &lineSize)) { if (startsWith("HTTP/", line)) { char *version, *code; dyStringAppendN(header, line, lineSize-1); dyStringAppendC(header, '\n'); version = nextWord(&line); code = nextWord(&line); if (code == NULL) { warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string); *hdr = cloneString(header->string); dyStringFree(&header); return FALSE; } if (!sameString(code, "200")) { warn("%s: Errored HTTP response header: %s %s %s\n", lf->fileName, version, code, line); *hdr = cloneString(header->string); dyStringFree(&header); return FALSE; } while (lineFileNext(lf, &line, &lineSize)) { /* blank line means end of HTTP header */ if ((line[0] == '\r' && line[1] == 0) || line[0] == 0) break; if (strstr(line, "Transfer-Encoding: chunked") && chunked != NULL) *chunked = TRUE; dyStringAppendN(header, line, lineSize-1); dyStringAppendC(header, '\n'); if (strstr(line, "Content-Length:")) { code = nextWord(&line); code = nextWord(&line); if (contentLength != NULL) *contentLength = atoi(code); } } } else { /* put the line back, don't put it in header/hdr */ lineFileReuse(lf); warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string); *hdr = cloneString(header->string); dyStringFree(&header); return FALSE; } } else { *hdr = cloneString(header->string); dyStringFree(&header); return FALSE; } *hdr = cloneString(header->string); dyStringFree(&header); return TRUE; } /* lineFileParseHttpHeader */
struct bigBedInterval *bigBedIntervalQuery(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, int maxItems, struct lm *lm) /* Get data for interval. Return list allocated out of lm. Set maxItems to maximum * number of items to return, or to 0 for all items. */ { struct bigBedInterval *el, *list = NULL; int itemCount = 0; bbiAttachUnzoomedCir(bbi); bits32 chromId; struct fileOffsetSize *blockList = bbiOverlappingBlocks(bbi, bbi->unzoomedCir, chrom, start, end, &chromId); struct fileOffsetSize *block, *beforeGap, *afterGap; struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; struct dyString *dy = dyStringNew(32); /* Set up for uncompression optionally. */ char *uncompressBuf = NULL; if (bbi->uncompressBufSize > 0) uncompressBuf = needLargeMem(bbi->uncompressBufSize); for (block = blockList; block != NULL; ) { /* Find contigious blocks and read them into mergedBuf. */ fileOffsetSizeFindGap(block, &beforeGap, &afterGap); bits64 mergedOffset = block->offset; bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; udcSeek(udc, mergedOffset); char *mergedBuf = needLargeMem(mergedSize); udcMustRead(udc, mergedBuf, mergedSize); char *blockBuf = mergedBuf; /* Loop through individual blocks within merged section. */ for (;block != afterGap; block = block->next) { /* Uncompress if necessary. */ char *blockPt, *blockEnd; if (uncompressBuf) { blockPt = uncompressBuf; int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize); blockEnd = blockPt + uncSize; } else { blockPt = blockBuf; blockEnd = blockPt + block->size; } while (blockPt < blockEnd) { /* Read next record into local variables. */ bits32 chr = memReadBits32(&blockPt, isSwapped); // Read and discard chromId bits32 s = memReadBits32(&blockPt, isSwapped); bits32 e = memReadBits32(&blockPt, isSwapped); int c; dyStringClear(dy); while ((c = *blockPt++) >= 0) { if (c == 0) break; dyStringAppendC(dy, c); } /* If we're actually in range then copy it into a new element and add to list. */ if (chr == chromId && rangeIntersection(s, e, start, end) > 0) { ++itemCount; if (maxItems > 0 && itemCount > maxItems) break; lmAllocVar(lm, el); el->start = s; el->end = e; if (dy->stringSize > 0) el->rest = lmCloneString(lm, dy->string); slAddHead(&list, el); } } if (maxItems > 0 && itemCount > maxItems) break; blockBuf += block->size; } if (maxItems > 0 && itemCount > maxItems) break; freez(&mergedBuf); } freeMem(uncompressBuf); dyStringFree(&dy); slFreeList(&blockList); slReverse(&list); return list; }
static void processMrnaFa(struct sqlConnection *conn, int taxon, char *type, char *db) /* process isPcr results */ { struct dyString *dy = dyStringNew(0); struct lineFile *lf = lineFileOpen("mrna.fa", TRUE); int lineSize; char *line; char *name; char *dna; boolean more = lineFileNext(lf, &line, &lineSize); while(more) { if (line[0] != '>') errAbort("unexpected error out of phase\n"); name = cloneString(line+1); verbose(2,"name=%s\n",name); dyStringClear(dy); while((more=lineFileNext(lf, &line, &lineSize))) { if (line[0] == '>') { break; } dyStringAppend(dy,line); } dna = cloneString(dy->string); while(1) { int oldProbe = 0; dyStringClear(dy); dyStringPrintf(dy, "select id from vgPrb " "where taxon=%d and type='%s' and tName='%s' and state='new'",taxon,type,name); oldProbe = sqlQuickNum(conn,dy->string); if (oldProbe==0) break; /* no more records match */ /* record exists and hasn't already been updated */ int vgPrb = findVgPrbBySeq(conn,dna,taxon); if (vgPrb == 0) { dyStringClear(dy); dyStringAppend(dy, "update vgPrb set"); dyStringAppend(dy, " seq = '"); dyStringAppend(dy, dna); dyStringAppend(dy, "',\n"); dyStringPrintf(dy, " db = '%s',\n", db); dyStringAppend(dy, " state = 'seq'\n"); dyStringPrintf(dy, " where id=%d\n", oldProbe); dyStringPrintf(dy, " and state='%s'\n", "new"); verbose(2, "%s\n", dy->string); sqlUpdate(conn, dy->string); } else /* probe seq already exists */ { /* just re-map the probe table recs to it */ dyStringClear(dy); dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,oldProbe); sqlUpdate(conn, dy->string); /* and delete it from vgPrb */ dyStringClear(dy); dyStringPrintf(dy, "delete from vgPrb where id=%d",oldProbe); sqlUpdate(conn, dy->string); } } freez(&name); freez(&dna); } lineFileClose(&lf); dyStringFree(&dy); }
static void doAccessionsSeq(struct sqlConnection *conn, int taxon, char *db) /* get probe seq from Accessions */ { int rc = 0; struct dyString *dy = dyStringNew(0); /* get refSeq accessions and rna */ setTName(conn, taxon, db, "refSeq", "refSeq"); rc = getAccMrnas(conn, taxon, db, "refSeq", "refSeqAli"); verbose(1,"rc = %d = count of refSeq mrna for %s\n",rc,db); advanceType(conn,taxon,"refSeq","genRef"); /* get refSeq-in-gene.genbank accessions and rna */ setTName(conn, taxon, db, "genRef", "genbank"); rc = getAccMrnas(conn, taxon, db, "genRef", "refSeqAli"); verbose(1,"rc = %d = count of genRef mrna for %s\n",rc,db); advanceType(conn,taxon,"genRef","genbank"); /* get genbank accessions and rna */ setTName(conn, taxon, db, "genbank", "genbank"); rc = getAccMrnas(conn, taxon, db, "genbank", "all_mrna"); verbose(1,"rc = %d = count of genbank mrna for %s\n",rc,db); advanceType(conn,taxon,"genbank","flatRef"); /* get gene.name -> refFlat to refSeq accessions and rna */ setTNameMapped(conn, taxon, db, "flatRef", "name", "refFlat", "geneName", "name"); rc = getAccMrnas(conn, taxon, db, "flatRef", "refSeqAli"); verbose(1,"rc = %d = count of flatRef mrna for %s\n",rc,db); advanceType(conn,taxon,"flatRef","flatAll"); /* get gene.name -> refFlat to all_mrna accessions */ setTNameMapped(conn, taxon, db, "flatAll", "name", "refFlat", "geneName", "name"); rc = getAccMrnas(conn, taxon, db, "flatAll", "all_mrna"); verbose(1,"rc = %d = count of flatAll mrna for %s\n",rc,db); advanceType(conn,taxon,"flatAll","linkRef"); /* get gene.name -> refLink to refSeq accessions and rna */ setTNameMapped(conn, taxon, db, "linkRef", "name", "refLink", "name", "mrnaAcc"); rc = getAccMrnas(conn, taxon, db, "linkRef", "refSeqAli"); verbose(1,"rc = %d = count of linkRef mrna for %s\n",rc,db); advanceType(conn,taxon,"linkRef","linkAll"); /* get gene.name -> refLink to all_mrna accessions */ setTNameMapped(conn, taxon, db, "linkAll", "name", "refLink", "name", "mrnaAcc"); rc = getAccMrnas(conn, taxon, db, "linkAll", "all_mrna"); verbose(1,"rc = %d = count of linkAll mrna for %s\n",rc,db); advanceType(conn,taxon,"linkAll","kgAlRef"); /* get gene.name -> kgAlias to refSeq accessions and rna */ setTNameMapped(conn, taxon, db, "kgAlRef", "name", "kgAlias", "alias", "kgId"); rc = getAccMrnas(conn, taxon, db, "kgAlRef", "refSeqAli"); verbose(1,"rc = %d = count of kgAlRef mrna for %s\n",rc,db); advanceType(conn,taxon,"kgAlRef","kgAlAll"); /* get gene.name -> kgAlias to all_mrna accessions */ setTNameMapped(conn, taxon, db, "kgAlAll", "name", "kgAlias", "alias", "kgId"); rc = getAccMrnas(conn, taxon, db, "kgAlAll", "all_mrna"); verbose(1,"rc = %d = count of kgAlAll mrna for %s\n",rc,db); advanceType(conn,taxon,"kgAlAll","gene"); dyStringFree(&dy); }
static int doBacs(struct sqlConnection *conn, int taxon, char *db) /* fetch available sequence for bacEndPairs */ { struct dyString *dy = dyStringNew(0); struct dnaSeq *chromSeq = NULL; struct bac *bacs = bacRead(conn, taxon, db); struct bac *bac = NULL; char *chrom = cloneString(""); int count = 0; verbose(1,"bac list read done.\n"); for(bac=bacs;bac;bac=bac->next) { if (differentWord(chrom,bac->chrom)) { verbose(1,"switching to chrom %s\n",bac->chrom); dnaSeqFree(&chromSeq); chromSeq = hLoadChrom(bac->chrom,db); freez(&chrom); chrom = cloneString(bac->chrom); } char *dna = checkAndFetchBacDna(chromSeq, bac); if (sameString(bac->strand,"-")) { reverseComplement(dna,strlen(dna)); } dyStringClear(dy); dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",bac->probe); if (sqlQuickNum(conn,dy->string)>0) { /* record exists and hasn't already been updated */ int vgPrb = findVgPrbBySeq(conn,dna,taxon); if (vgPrb == 0) { dyStringClear(dy); dyStringAppend(dy, "update vgPrb set"); dyStringAppend(dy, " seq='"); dyStringAppend(dy, dna); dyStringAppend(dy, "',\n"); dyStringPrintf(dy, " tName='%s',\n", bac->chrom); dyStringPrintf(dy, " tStart=%d,\n", bac->chromStart); dyStringPrintf(dy, " tEnd=%d,\n", bac->chromEnd); dyStringPrintf(dy, " tStrand='%s',\n", bac->strand); dyStringPrintf(dy, " db='%s',\n", db); dyStringPrintf(dy, " state='%s'\n", "seq"); dyStringPrintf(dy, " where id=%d\n", bac->probe); dyStringPrintf(dy, " and state='%s'\n", "new"); //verbose(2, "%s\n", dy->string); // the sql string could be quite large sqlUpdate(conn, dy->string); } else /* probe seq already exists */ { /* just re-map the probe table recs to it */ dyStringClear(dy); dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,bac->probe); sqlUpdate(conn, dy->string); /* and delete it from vgPrb */ dyStringClear(dy); dyStringPrintf(dy, "delete from vgPrb where id=%d",bac->probe); sqlUpdate(conn, dy->string); } ++count; verbose(2,"%d finished bac for probe id %d size %d\n", count, bac->probe, bac->chromEnd - bac->chromStart); } freez(&dna); } freez(&chrom); dnaSeqFree(&chromSeq); bacFreeList(&bacs); dyStringFree(&dy); return count; }
static void doPrimers(struct sqlConnection *conn, int taxon, char *db) /* get probe seq from primers */ { int rc = 0; struct dyString *dy = dyStringNew(0); char cmdLine[256]; char path1[256]; char path2[256]; dyStringClear(dy); dyStringAppend(dy, "select e.id, p.fPrimer, p.rPrimer from probe p, vgPrbMap m, vgPrb e, gene g"); dyStringPrintf(dy, " where p.id = m.probe and m.vgPrb = e.id and g.id = p.gene and g.taxon = %d",taxon); dyStringAppend(dy, " and e.state = 'new' and e.type='primersMrna'"); rc = sqlSaveQuery(conn, dy->string, "primers.query", FALSE); verbose(1,"rc = %d = count of primers for mrna search for taxon %d\n",rc,taxon); if (rc > 0) /* something to do */ { dyStringClear(dy); dyStringPrintf(dy, "select qName from %s.all_mrna",db); rc = 0; rc = sqlSaveQuery(conn, dy->string, "accFile.txt", FALSE); safef(cmdLine,sizeof(cmdLine),"getRna %s accFile.txt mrna.fa",db); system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date"); verbose(1,"rc = %d = count of mrna for %s\n",rc,db); system("date"); system("isPcr mrna.fa primers.query isPcr.fa -out=fa"); system("date"); system("ls -l"); processIsPcr(conn,taxon,db); unlink("mrna.fa"); unlink("accFile.txt"); unlink("isPcr.fa"); } unlink("primers.query"); /* find any remaining type primersMrna that couldn't be resolved and demote * them to type primersGenome */ dyStringClear(dy); dyStringAppend(dy, "update vgPrb set type='primersGenome'"); dyStringPrintf(dy, " where taxon = %d",taxon); dyStringAppend(dy, " and state = 'new' and type='primersMrna'"); sqlUpdate(conn, dy->string); /* get primers for those probes that did not find mrna isPcr matches * and then do them against the genome instead */ dyStringClear(dy); dyStringAppend(dy, "select e.id, p.fPrimer, p.rPrimer from probe p, vgPrbMap m, vgPrb e, gene g"); dyStringPrintf(dy, " where p.id = m.probe and m.vgPrb = e.id and g.id = p.gene and g.taxon = %d",taxon); dyStringAppend(dy, " and e.state = 'new' and e.type='primersGenome'"); rc = 0; rc = sqlSaveQuery(conn, dy->string, "primers.query", FALSE); verbose(1,"rc = %d = count of primers for genome search for taxon %d\n",rc,taxon); if (rc > 0) /* something to do */ { safef(path1,sizeof(path1),"/gbdb/%s/%s.2bit",db,db); safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db); verbose(1,"copy: [%s] to [%s]\n",path1,path2); copyFile(path1,path2); safef(cmdLine,sizeof(cmdLine), "ssh kolossus 'cd %s; isPcr %s.2bit primers.query isPcr.fa -out=fa'", getCurrentDir(),db); system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date"); safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db); verbose(1,"rm %s\n",path2); unlink(path2); system("ls -l"); processIsPcr(conn,taxon,db); unlink("mrna.fa"); unlink("accFile.txt"); unlink("isPcr.fa"); } unlink("primers.query"); /* find any remaining type primersGenome that couldn't be resolved and demote * them to type refSeq */ dyStringClear(dy); dyStringAppend(dy, "update vgPrb set type='refSeq'"); dyStringPrintf(dy, " where taxon = %d",taxon); dyStringAppend(dy, " and state = 'new' and type='primersGenome'"); sqlUpdate(conn, dy->string); dyStringFree(&dy); }
static void processIsPcr(struct sqlConnection *conn, int taxon, char *db) /* process isPcr results */ { /* >NM_010919:371+1088 2 718bp CGCGGATCCAAGGACATCTTGGACCTTCCG CCCAAGCTTGCATGTGCTGCAGCGACTGCG */ struct dyString *dy = dyStringNew(0); struct lineFile *lf = lineFileOpen("isPcr.fa", TRUE); int lineSize; char *line; char *name; char *dna; char *word, *end; char *tName; int tStart; int tEnd; char *tStrand; int probeid=0; /* really a vgPrb id */ boolean more = lineFileNext(lf, &line, &lineSize); while(more) { if (line[0] != '>') errAbort("unexpected error out of phase\n"); name = cloneString(line); verbose(1,"name=%s\n",name); dyStringClear(dy); while((more=lineFileNext(lf, &line, &lineSize))) { if (line[0] == '>') { break; } dyStringAppend(dy,line); } dna = cloneString(dy->string); word = name+1; end = strchr(word,':'); tName = cloneStringZ(word,end-word); word = end+1; end = strchr(word,'+'); tStrand = "+"; if (!end) { end = strchr(word,'-'); tStrand = "-"; } tStart = atoi(word); word = end+1; end = strchr(word,' '); tEnd = atoi(word); word = end+1; end = strchr(word,' '); probeid = atoi(word); dyStringClear(dy); dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",probeid); if (sqlQuickNum(conn,dy->string)>0) { /* record exists and hasn't already been updated */ int vgPrb = findVgPrbBySeq(conn,dna,taxon); if (vgPrb == 0) { dyStringClear(dy); dyStringAppend(dy, "update vgPrb set"); dyStringAppend(dy, " seq='"); dyStringAppend(dy, dna); dyStringAppend(dy, "',\n"); dyStringPrintf(dy, " tName='%s',\n", tName); dyStringPrintf(dy, " tStart=%d,\n", tStart); dyStringPrintf(dy, " tEnd=%d,\n", tEnd); dyStringPrintf(dy, " tStrand='%s',\n", tStrand); dyStringPrintf(dy, " db='%s',\n", db); dyStringPrintf(dy, " state='%s'\n", "seq"); dyStringPrintf(dy, " where id=%d\n", probeid); dyStringPrintf(dy, " and state='%s'\n", "new"); verbose(2, "%s\n", dy->string); sqlUpdate(conn, dy->string); } else /* probe seq already exists */ { /* just re-map the probe table recs to it */ dyStringClear(dy); dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,probeid); sqlUpdate(conn, dy->string); /* and delete it from vgPrb */ dyStringClear(dy); dyStringPrintf(dy, "delete from vgPrb where id=%d",probeid); sqlUpdate(conn, dy->string); } } freez(&tName); freez(&name); freez(&dna); } lineFileClose(&lf); dyStringFree(&dy); }
static void populateMissingVgPrb(struct sqlConnection *conn) /* populate vgPrb where missing, usually after new records added to visiGene */ { struct sqlResult *sr; char **row; struct dyString *dy = dyStringNew(0); struct sqlConnection *conn2 = sqlConnect(database); struct sqlConnection *conn3 = sqlConnect(database); int probeCount=0, vgPrbCount=0; dyStringAppend(dy, "select p.id,p.gene,antibody,probeType,fPrimer,rPrimer,p.seq,bac,g.taxon" " from probe p join gene g" " left join vgPrbMap m on m.probe = p.id" " where g.id = p.gene" " and m.probe is NULL"); sr = sqlGetResult(conn, dy->string); while ((row = sqlNextRow(sr)) != NULL) { int id = sqlUnsigned(row[0]); /* int gene = sqlUnsigned(row[1]); */ /* int antibody = sqlUnsigned(row[2]); */ /* int probeType = sqlUnsigned(row[3]); */ char *fPrimer = row[4]; char *rPrimer = row[5]; char *seq = row[6]; int bac = sqlUnsigned(row[7]); int taxon = sqlUnsigned(row[8]); char *peType = "none"; int peProbe = id; char *peSeq = seq; char *tName = ""; int tStart = 0; int tEnd = 0; char *tStrand = " "; /* char *peGene = ""; int bacInfo = 0; int seqid = 0; int pslid = 0; */ char *state = "new"; char *db = ""; int vgPrb = 0; if (isNotEmpty(seq)) { peType = "probe"; state = "seq"; } else if (isNotEmpty(fPrimer) && isNotEmpty(rPrimer)) { peType = "primersMrna"; } else if (isNotEmpty(fPrimer) && isEmpty(rPrimer)) { /* only have fPrimer, it's probably a comment, not dna seq */ peType = "refSeq"; /* use accession or gene */ } else if (bac > 0) { peType = "bac"; /* use bacEndPairs */ } else { peType = "refSeq"; /* use accession or gene */ } if (!sameString(peSeq,"")) { vgPrb = findVgPrbBySeq(conn3,peSeq,taxon); } if (vgPrb == 0) { dyStringClear(dy); dyStringAppend(dy, "insert into vgPrb set"); dyStringPrintf(dy, " id=default,\n"); dyStringPrintf(dy, " type='%s',\n", peType); dyStringAppend(dy, " seq='"); dyStringAppend(dy, peSeq); dyStringAppend(dy, "',\n"); dyStringPrintf(dy, " tName='%s',\n", tName); dyStringPrintf(dy, " tStart=%d,\n", tStart); dyStringPrintf(dy, " tEnd=%d,\n", tEnd); dyStringPrintf(dy, " tStrand='%s',\n", tStrand); dyStringPrintf(dy, " db='%s',\n", db); dyStringPrintf(dy, " taxon='%d',\n", taxon); dyStringPrintf(dy, " state='%s'\n", state); verbose(2, "%s\n", dy->string); sqlUpdate(conn2, dy->string); vgPrb = sqlLastAutoId(conn2); vgPrbCount++; } dyStringClear(dy); dyStringAppend(dy, "insert into vgPrbMap set"); dyStringPrintf(dy, " probe=%d,\n", peProbe); dyStringPrintf(dy, " vgPrb=%d \n", vgPrb); verbose(2, "%s\n", dy->string); sqlUpdate(conn2, dy->string); probeCount++; } verbose(1, "# new probe records found = %d, # new vgPrb records added = %d\n", probeCount, vgPrbCount); dyStringFree(&dy); sqlFreeResult(&sr); sqlDisconnect(&conn3); sqlDisconnect(&conn2); }
static void doSeqAndExtFile(struct sqlConnection *conn, char *db, char *table) { int rc = 0; char cmd[256]; char path[256]; char bedPath[256]; char gbdbPath[256]; char *fname=NULL; struct dyString *dy = dyStringNew(0); dyStringClear(dy); dyStringPrintf(dy, "select distinct concat('vgPrb_',e.id), e.seq" " from vgPrb e join %s.%s v" " left join %s.seq s on s.acc = v.qName" " where concat('vgPrb_',e.id) = v.qName" " and s.acc is NULL" " order by e.id" , db, table, db); rc = sqlSaveQuery(conn, dy->string, "vgPrbExt.fa", TRUE); verbose(1,"rc = %d = count of sequences for vgPrbExt.fa, to use with %s track %s\n",rc,db,table); if (rc > 0) /* can set any desired minimum */ { safef(bedPath,sizeof(bedPath),"/cluster/data/%s/bed/visiGene/",db); if (!fileExists(bedPath)) { safef(cmd,sizeof(cmd),"mkdir %s",bedPath); verbose(1,"%s\n",cmd); system(cmd); } safef(gbdbPath,sizeof(gbdbPath),"/gbdb/%s/visiGene/",db); if (!fileExists(gbdbPath)) { safef(cmd,sizeof(cmd),"mkdir %s",gbdbPath); verbose(1,"%s\n",cmd); system(cmd); } while(1) { int i=0; safef(path,sizeof(path),"%svgPrbExt_AAAAAA.fa",bedPath); char *c = rStringIn("AAAAAA",path); srand( (unsigned)time( NULL ) ); for(i=0;i<6;++i) { *c++ += (int) 26 * (rand() / (RAND_MAX + 1.0)); } if (!fileExists(path)) break; } safef(cmd,sizeof(cmd),"cp vgPrbExt.fa %s",path); verbose(1,"%s\n",cmd); system(cmd); fname = rStringIn("/", path); ++fname; safef(cmd,sizeof(cmd),"ln -s %s %s%s",path,gbdbPath,fname); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd),"hgLoadSeq %s %s%s", db, gbdbPath,fname); verbose(1,"%s\n",cmd); system(cmd); } dyStringFree(&dy); }
/* write here first, then move to hg/lib/genePred.c */ void printExons(char *snpName, struct genePred *gene, char *chrom, int start, int end, struct dnaSeq *seq) { int iExon = 0; // which exon int startExon = 0; int endExon = 0; struct dyString *dy = NULL; char *seqBuffer; int size; char *ptr = seq->dna; // actual nucleotide positions int exonStart = 0; int exonEnd = 0; // arg checking if (start > end) { fprintf(stderr, "error with %s (start exceeds end)\n", gene->name); return; } startExon = findExonPos(gene, start); endExon = findExonPos(gene, end); // more checking if (startExon == -1 || endExon == -1) { fprintf(stderr, "error with %s (startExon = %d; endExon = %d)\n", gene->name, startExon, endExon); return; } // simple case if (startExon == endExon) { // printf("simple case; all in one exon\n"); size = end - start + 1; seqBuffer = needMem(size); strncpy(seqBuffer, &ptr[start], size); printf("> %s %s:%d-%d (%s)\n", gene->name, chrom, start, end, snpName); printLines(stdout, seqBuffer, 50); freeMem(seqBuffer); // *seqBuffer = 0; return; } // printf("not simple case; flank in multiple exons\n"); // printf("startExon = %d; endExon = %d\n", startExon, endExon); // append to dyString dy = newDyString(512); // remainder of first exon exonEnd = gene->exonEnds[startExon-1]; size = exonEnd - start + 1; seqBuffer = needMem(size); strncpy(seqBuffer, &ptr[start], size); dyStringPrintf(dy, "%s", seqBuffer); freeMem(seqBuffer); // middle exons for (iExon = startExon + 1; iExon < endExon; iExon++) { exonStart = gene->exonStarts[iExon-1]; exonEnd = gene->exonEnds[iExon-1]; size = exonEnd - exonStart + 1; seqBuffer = needMem(size); strncpy(seqBuffer, &ptr[exonStart], size); dyStringPrintf(dy, "%s", seqBuffer); freeMem(seqBuffer); } // start of last exon exonStart = gene->exonStarts[endExon-1]; size = end - exonStart + 1; seqBuffer = needMem(size); strncpy(seqBuffer, &ptr[exonStart], size); dyStringPrintf(dy, "%s", seqBuffer); freeMem(seqBuffer); printf("> %s %s:%d-%d (%s)\n", gene->name, chrom, start, end, snpName); printLines(stdout, dy->string, 50); dyStringFree(&dy); }
static void asdDoQueryChunking(struct annoStreamDb *self, char *minChrom, uint minEnd) /* Return a sqlResult for a query on table items in position range. * If doing a whole genome query, just select all rows from table. */ { struct annoStreamer *sSelf = &(self->streamer); boolean hasWhere = FALSE; struct dyString *query = self->makeBaselineQuery(self, &hasWhere); if (sSelf->chrom != NULL && self->rowBuf.size > 0 && !self->doNextChunk) { // We're doing a region query, we already got some rows, and don't need another chunk: resetRowBuf(&self->rowBuf); self->eof = TRUE; } if (self->useMaxOutRows) { self->maxOutRows -= self->rowBuf.size; if (self->maxOutRows <= 0) self->eof = TRUE; } if (self->eof) return; int queryMaxItems = ASD_CHUNK_SIZE; if (self->useMaxOutRows && self->maxOutRows < queryMaxItems) queryMaxItems = self->maxOutRows; if (self->hasBin) { // Results will be in bin order, but we can restore chromStart order by // accumulating initial coarse-bin items and merge-sorting them with // subsequent finest-bin items which will be in chromStart order. if (self->doNextChunk && self->mergeBins && !self->gotFinestBin) errAbort("annoStreamDb %s: can't continue merge in chunking query; " "increase ASD_CHUNK_SIZE", sSelf->name); self->mergeBins = TRUE; if (self->qLm == NULL) self->qLm = lmInit(0); } if (self->endFieldIndexName != NULL) // Don't let mysql use a (chrom, chromEnd) index because that messes up // sorting by chromStart. sqlDyStringPrintf(query, " IGNORE INDEX (%s) ", self->endFieldIndexName); if (sSelf->chrom != NULL) { uint start = sSelf->regionStart; if (minChrom) { if (differentString(minChrom, sSelf->chrom)) errAbort("annoStreamDb %s: nextRow minChrom='%s' but region chrom='%s'", sSelf->name, minChrom, sSelf->chrom); if (start < minEnd) start = minEnd; } if (self->doNextChunk && start < self->nextChunkStart) start = self->nextChunkStart; sqlDyStringAppend(query, hasWhere ? " and " : " where "); sqlDyStringPrintf(query, "%s = '%s' and ", self->chromField, sSelf->chrom); if (self->hasBin) { if (self->doNextChunk && self->gotFinestBin) // It would be way more elegant to make a hAddBinTopLevelOnly but this will do: dyStringPrintf(query, "bin > %d and ", self->minFinestBin); hAddBinToQuery(start, sSelf->regionEnd, query); } if (self->doNextChunk) sqlDyStringPrintf(query, "%s >= %u and ", self->startField, self->nextChunkStart); sqlDyStringPrintf(query, "%s < %u and %s > %u ", self->startField, sSelf->regionEnd, self->endField, start); if (self->notSorted) sqlDyStringPrintf(query, "order by %s ", self->startField); sqlDyStringPrintf(query, "limit %d", queryMaxItems); bufferRowsFromSqlQuery(self, query->string, queryMaxItems); if (self->rowBuf.size == 0) self->eof = TRUE; } else { // Genome-wide query: break it into chrom-by-chrom queries. if (self->queryChrom == NULL) self->queryChrom = self->chromList; else if (!self->doNextChunk) { self->queryChrom = self->queryChrom->next; resetMergeState(self); } if (minChrom != NULL) { // Skip chroms that precede minChrom while (self->queryChrom != NULL && strcmp(self->queryChrom->name, minChrom) < 0) { self->queryChrom = self->queryChrom->next; self->doNextChunk = FALSE; resetMergeState(self); } if (self->hasBin) { self->mergeBins = TRUE; if (self->qLm == NULL) self->qLm = lmInit(0); } } if (self->queryChrom == NULL) self->eof = TRUE; else { char *chrom = self->queryChrom->name; int start = 0; if (minChrom != NULL && sameString(chrom, minChrom)) start = minEnd; if (self->doNextChunk && start < self->nextChunkStart) start = self->nextChunkStart; uint end = annoAssemblySeqSize(self->streamer.assembly, self->queryChrom->name); sqlDyStringAppend(query, hasWhere ? " and " : " where "); sqlDyStringPrintf(query, "%s = '%s' ", self->chromField, chrom); if (start > 0 || self->doNextChunk) { dyStringAppend(query, "and "); if (self->hasBin) { if (self->doNextChunk && self->gotFinestBin) // It would be way more elegant to make a hAddBinTopLevelOnly but this will do: dyStringPrintf(query, "bin > %d and ", self->minFinestBin); hAddBinToQuery(start, end, query); } if (self->doNextChunk) sqlDyStringPrintf(query, "%s >= %u and ", self->startField, self->nextChunkStart); // region end is chromSize, so no need to constrain startField here: sqlDyStringPrintf(query, "%s > %u ", self->endField, start); } if (self->notSorted) sqlDyStringPrintf(query, "order by %s ", self->startField); dyStringPrintf(query, "limit %d", queryMaxItems); bufferRowsFromSqlQuery(self, query->string, queryMaxItems); // If there happens to be no items on chrom, try again with the next chrom: if (! self->eof && self->rowBuf.size == 0) asdDoQueryChunking(self, minChrom, minEnd); } } dyStringFree(&query); }
void syncOneRecord(struct sqlConnection *conn, char *type, struct jsonWrite *json, char *table, long long id) /* Send over one record and save UUID result to row of table defined by id in idField. */ { /* Construct dyString for URL */ struct dyString *dyUrl = dyStringNew(0); dyStringPrintf(dyUrl, "http://%s:%s@%s/%s/", gUserId, gPassword, gHost, type); verbose(2, "%s\n", dyUrl->string); /* Construct dyString for http header */ struct dyString *dyHeader = dyStringNew(0); dyStringPrintf(dyHeader, "Content-length: %d\r\n", json->dy->stringSize); dyStringPrintf(dyHeader, "Content-type: text/javascript\r\n"); /* Send header and then JSON */ int sd = netOpenHttpExt(dyUrl->string, "POST", dyHeader->string); mustWriteFd(sd, json->dy->string, json->dy->stringSize); /* Grab response */ struct dyString *dyText = netSlurpFile(sd); close(sd); uglyf("%s\n", dyText->string); /* Turn it into htmlPage structure - this will also parse out http header */ struct htmlPage *response = htmlPageParse(dyUrl->string, dyText->string); uglyf("status %s %d\n", response->status->version, response->status->status); /* If we got bad status abort with hopefully very informative message. */ int status = response->status->status; if (status != 200 && status != 201) // HTTP codes { errAbort("ERROR - Metadatabase returns %d to our post request\n" "POSTED JSON: %s\n" "URL: %s\n" "FULL RESPONSE: %s\n", status, json->dy->string, dyUrl->string, dyText->string); } /* Parse uuid out of json response. It should look something like { "status": "success", "@graph": [ { "description": "The macs2 peak calling software from Tao Liu.", "name": "macs2", "title": "macs2", "url": "https://github.com/taoliu/MACS/", "uuid": "9bda84fd-9872-49e3-9aa0-b71adbf9f31d", "schema_version": "1", "source_url": "https://github.com/taoliu/MACS/", "references": [], "@id": "/software/9bda84fd-9872-49e3-9aa0-b71adbf9f31d/", "@type": ["software", "item"], "aliases": [] } ], "@type": ["result"] } */ struct jsonElement *jsonRoot = jsonParse(response->htmlText); struct jsonElement *graph = jsonMustFindNamedField(jsonRoot, "", "@graph"); struct slRef *ref = jsonListVal(graph, "@graph"); assert(slCount(ref) == 1); struct jsonElement *graphEl = ref->val; char *uuid = jsonStringField(graphEl, "uuid"); uglyf("Got uuid %s\n", uuid); /* Save uuid to table */ char query[256]; sqlSafef(query, sizeof(query), "update %s set metaUuid='%s' where id=%lld", table, uuid, id); sqlUpdate(conn, query); /* Clean up */ dyStringFree(&dyUrl); dyStringFree(&dyHeader); dyStringFree(&dyText); response->fullText = NULL; // avoid double free of this htmlPageFree(&response); }
struct tagStorm *idfToStormTop(char *fileName) /* Convert an idf.txt format file to a tagStorm with a single top-level stanza */ { /* Create a tag storm with one as yet empty stanza */ struct tagStorm *storm = tagStormNew(fileName); struct tagStanza *stanza = tagStanzaNew(storm, NULL); /* Some stuff to help turn File_Data1, File_Data2, etc to a comma separated list */ char *additionalFilePrefix = "idf.Comment_AdditionalFile_Data"; struct dyString *additionalFileDy = dyStringNew(0); /* There can be multiple secondary accession tags, so handle these too */ char *secondaryAccessionTag = "idf.Comment_SecondaryAccession"; struct dyString *secondaryAccessionDy = dyStringNew(0); /* Parse lines from idf file into stanza */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; struct dyString *dyVal = dyStringNew(0); while (lineFileNextReal(lf, &line)) { /* Erase trailing tab... */ eraseTrailingSpaces(line); /* Parse line into tab-separated array and make sure it's a reasonable size */ char *row[256]; int rowSize = chopTabs(line, row); if (rowSize == ArraySize(row)) errAbort("Line %d of %s has too many fields", lf->lineIx, lf->fileName); if (rowSize < 2) continue; /* Convert first element to tagName */ char tagName[256]; aeFieldToNormalField("idf.", trimSpaces(row[0]), tagName, sizeof(tagName)); /* Special case where we already are a comma separated list */ if (sameString(tagName, "idf.Publication_Author_List")) { tagStanzaAppend(storm, stanza, tagName, row[1]); } else if (startsWith(additionalFilePrefix, tagName)) { csvEscapeAndAppend(additionalFileDy, row[1]); } else if (sameString(secondaryAccessionTag, tagName)) { csvEscapeAndAppend(secondaryAccessionDy, row[1]); } else { /* Convert rest of elements to possibly comma separated values */ dyStringClear(dyVal); int i; for (i=1; i<rowSize; ++i) csvEscapeAndAppend(dyVal, row[i]); tagStanzaAppend(storm, stanza, tagName, dyVal->string); } } if (additionalFileDy->stringSize != 0) tagStanzaAppend(storm, stanza, additionalFilePrefix, additionalFileDy->string); if (secondaryAccessionDy->stringSize != 0) tagStanzaAppend(storm, stanza, secondaryAccessionTag, secondaryAccessionDy->string); dyStringFree(&secondaryAccessionDy); dyStringFree(&additionalFileDy); dyStringFree(&dyVal); lineFileClose(&lf); return storm; }
void vcfTabOut(char *db, char *table, struct sqlConnection *conn, char *fields, FILE *f, boolean isTabix) /* Print out selected fields from VCF. If fields is NULL, then print out all fields. */ { struct hTableInfo *hti = NULL; hti = getHti(db, table, conn); struct hash *idHash = NULL; char *idField = getIdField(db, curTrack, table, hti); int idFieldNum = 0; /* if we know what field to use for the identifiers, get the hash of names */ if (idField != NULL) idHash = identifierHash(db, table); if (f == NULL) f = stdout; /* Convert comma separated list of fields to array. */ int fieldCount = chopByChar(fields, ',', NULL, 0); char **fieldArray; AllocArray(fieldArray, fieldCount); chopByChar(fields, ',', fieldArray, fieldCount); /* Get list of all fields in big bed and turn it into a hash of column indexes keyed by * column name. */ struct hash *fieldHash = hashNew(0); struct slName *bb, *bbList = vcfGetFields(); int i; for (bb = bbList, i=0; bb != NULL; bb = bb->next, ++i) { /* if we know the field for identifiers, save it away */ if ((idField != NULL) && sameString(idField, bb->name)) idFieldNum = i; hashAddInt(fieldHash, bb->name, i); } /* Create an array of column indexes corresponding to the selected field list. */ int *columnArray; AllocArray(columnArray, fieldCount); for (i=0; i<fieldCount; ++i) { columnArray[i] = hashIntVal(fieldHash, fieldArray[i]); } // If we are outputting a subset of fields, invalidate the VCF header. boolean allFields = (fieldCount == VCFDATALINE_NUM_COLS); if (!allFields) fprintf(f, "# Only selected columns are included below; output is not valid VCF.\n"); struct asObject *as = vcfAsObj(); struct asFilter *filter = NULL; if (anyFilter()) filter = asFilterFromCart(cart, db, table, as); /* Loop through outputting each region */ struct region *region, *regionList = getRegions(); int maxOut = bigFileMaxOutput(); struct trackDb *tdb = hashFindVal(fullTableToTdbHash, table); // Include the header, absolutely necessary for VCF parsing. boolean printedHeader = FALSE; // Temporary storage for row-ification: struct dyString *dyAlt = newDyString(1024); struct dyString *dyFilter = newDyString(1024); struct dyString *dyInfo = newDyString(1024); struct dyString *dyGt = newDyString(1024); struct vcfRecord *rec; for (region = regionList; region != NULL && (maxOut > 0); region = region->next) { char *fileName = vcfFileName(tdb, conn, table, region->chrom); struct vcfFile *vcff; if (isTabix) vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end, 100, maxOut); else vcff = vcfFileMayOpen(fileName, region->chrom, region->start, region->end, 100, maxOut, TRUE); if (vcff == NULL) noWarnAbort(); // If we are outputting all fields, but this VCF has no genotype info, omit the // genotype columns from output: if (allFields && vcff->genotypeCount == 0) fieldCount = VCFDATALINE_NUM_COLS - 2; if (!printedHeader) { fprintf(f, "%s", vcff->headerString); if (filter) fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList)); if (!allFields) { fprintf(f, "#%s", fieldArray[0]); for (i=1; i<fieldCount; ++i) fprintf(f, "\t%s", fieldArray[i]); fprintf(f, "\n"); } printedHeader = TRUE; } char *row[VCFDATALINE_NUM_COLS]; char numBuf[VCF_NUM_BUF_SIZE]; for (rec = vcff->records; rec != NULL && (maxOut > 0); rec = rec->next) { vcfRecordToRow(rec, region->chrom, numBuf, dyAlt, dyFilter, dyInfo, dyGt, row); if (asFilterOnRow(filter, row)) { /* if we're looking for identifiers, check if this matches */ if ((idHash != NULL) && (hashLookup(idHash, row[idFieldNum]) == NULL)) continue; // All fields output: after asFilter'ing, preserve original VCF chrom if (allFields && !sameString(rec->chrom, region->chrom)) row[0] = rec->chrom; int i; fprintf(f, "%s", row[columnArray[0]]); for (i=1; i<fieldCount; ++i) { fprintf(f, "\t%s", row[columnArray[i]]); } fprintf(f, "\n"); maxOut --; } } vcfFileFree(&vcff); freeMem(fileName); } if (maxOut == 0) warn("Reached output limit of %d data values, please make region smaller,\n\tor set a higher output line limit with the filter settings.", bigFileMaxOutput()); /* Clean up and exit. */ dyStringFree(&dyAlt); dyStringFree(&dyFilter); dyStringFree(&dyInfo); dyStringFree(&dyGt); hashFree(&fieldHash); freeMem(fieldArray); freeMem(columnArray); }
void initStep(struct sqlConnection *conn, struct stepInit *init) /* Create step based on initializer */ { /* Do a little validation on while counting up inputs and outputs */ int inCount = commaSepCount(init->inputTypes); int matchCount = commaSepCount(init->inputFormats); if (inCount != matchCount) errAbort("inputTypes has %d elements but inputFormats has %d in step %s", inCount, matchCount, init->name); int outCount = commaSepCount(init->outputTypes); matchCount = commaSepCount(init->outputFormats); if (outCount != matchCount) errAbort("outputTypes has %d elements but outputFormats has %d in step %s", outCount, matchCount, init->name); matchCount = commaSepCount(init->outputNamesInTempDir); if (outCount != matchCount) errAbort("outputTypes has %d elements but outputNamesInTempDir has %d in step %s", outCount, matchCount, init->name); struct dyString *query = dyStringNew(0); dyStringPrintf(query, "select count(*) from eapStep where name='%s'", init->name); int existingCount = sqlQuickNum(conn, query->string); if (existingCount > 0) { warn("%s already exists in eapStep", init->name); dyStringFree(&query); return; } /* Parse out software part and make sure that all pieces are there. */ char **softwareArray; int softwareCount; sqlStringDynamicArray(init->software, &softwareArray, &softwareCount); unsigned softwareIds[softwareCount]; int i; for (i=0; i<softwareCount; ++i) { char *name = softwareArray[i]; dyStringClear(query); dyStringPrintf(query, "select id from eapSoftware where name='%s'", name); unsigned softwareId = sqlQuickNum(conn, query->string); if (softwareId == 0) errAbort("Software %s doesn't exist by that name in eapSoftware", name); softwareIds[i] = softwareId; } /* Make step record. */ dyStringClear(query); dyStringAppend(query, "insert eapStep (name,cpusRequested," " inCount,inputTypes,inputFormats," " outCount,outputNamesInTempDir,outputTypes,outputFormats)" " values ("); dyStringPrintf(query, "'%s',", init->name); dyStringPrintf(query, "%d,", init->cpusRequested); dyStringPrintf(query, "%d,", inCount); dyStringPrintf(query, "'%s',", init->inputTypes); dyStringPrintf(query, "'%s',", init->inputFormats); dyStringPrintf(query, "%d,", outCount); dyStringPrintf(query, "'%s',", init->outputNamesInTempDir); dyStringPrintf(query, "'%s',", init->outputTypes); dyStringPrintf(query, "'%s'", init->outputFormats); dyStringPrintf(query, ")"); sqlUpdate(conn, query->string); /* Make software/step associations. */ for (i=0; i<softwareCount; ++i) { dyStringClear(query); dyStringPrintf(query, "insert eapStepSoftware (step,software) values ('%s','%s')", init->name, softwareArray[i]); sqlUpdate(conn, query->string); } /* Force step version stuff to be made right away */ eapCurrentStepVersion(conn, init->name); /* Clean up. */ dyStringFree(&query); freez(&softwareArray[0]); freez(&softwareArray); }
struct mafAli *hgMafFrag( char *database, /* Database, must already have hSetDb to this */ char *track, /* Name of MAF track */ char *chrom, /* Chromosome (in database genome) */ int start, int end, /* start/end in chromosome */ char strand, /* Chromosome strand. */ char *outName, /* Optional name to use in first component */ struct slName *orderList /* Optional order of organisms. */ ) /* mafFrag- Extract maf sequences for a region from database. * This creates a somewhat unusual MAF that extends from start * to end whether or not there are actually alignments. Where * there are no alignments (or alignments missing a species) * a . character fills in. The score is always zero, and * the sources just indicate the species. You can mafFree this * as normal. */ { int chromSize = hChromSize(database, chrom); struct sqlConnection *conn = hAllocConn(database); struct dnaSeq *native = hChromSeq(database, chrom, start, end); struct mafAli *maf, *mafList = mafLoadInRegion(conn, track, chrom, start, end); char masterSrc[128]; struct hash *orgHash = newHash(10); struct oneOrg *orgList = NULL, *org, *nativeOrg = NULL; int curPos = start, symCount = 0; struct slName *name; int order = 0; /* Check that the mafs are really copacetic, the particular * subtype we think is in the database that this (relatively) * simple code can handle. */ safef(masterSrc, sizeof(masterSrc), "%s.%s", database, chrom); mafCheckFirstComponentSrc(mafList, masterSrc); mafCheckFirstComponentStrand(mafList, '+'); slSort(&mafList, mafCmp); /* Prebuild organisms if possible from input orderList. */ for (name = orderList; name != NULL; name = name->next) { AllocVar(org); slAddHead(&orgList, org); hashAddSaveName(orgHash, name->name, org, &org->name); org->dy = dyStringNew(native->size*1.5); org->order = order++; if (nativeOrg == NULL) nativeOrg = org; } if (orderList == NULL) { AllocVar(org); slAddHead(&orgList, org); hashAddSaveName(orgHash, database, org, &org->name); org->dy = dyStringNew(native->size*1.5); if (nativeOrg == NULL) nativeOrg = org; } /* Go through all mafs in window, mostly building up * org->dy strings. */ for (maf = mafList; maf != NULL; maf = maf->next) { struct mafComp *mc, *mcMaster = maf->components; struct mafAli *subMaf = NULL; order = 0; if (curPos < mcMaster->start) { fillInMissing(nativeOrg, orgList, native, start, curPos, mcMaster->start); symCount += mcMaster->start - curPos; } if (curPos < mcMaster->start + mcMaster->size) /* Prevent worst * backtracking */ { if (mafNeedSubset(maf, masterSrc, curPos, end)) { subMaf = mafSubset(maf, masterSrc, curPos, end); if (subMaf == NULL) continue; } else subMaf = maf; for (mc = subMaf->components; mc != NULL; mc = mc->next, ++order) { /* Extract name up to dot into 'orgName' */ char buf[128], *e, *orgName; if ((mc->size == 0) || (mc->srcSize == 0)) /* skip over components without sequence */ continue; mc->leftStatus = mc->rightStatus = 0; /* squash annotation */ e = strchr(mc->src, '.'); if (e == NULL) orgName = mc->src; else { int len = e - mc->src; if (len >= sizeof(buf)) errAbort("organism/database name %s too long", mc->src); memcpy(buf, mc->src, len); buf[len] = 0; orgName = buf; } /* Look up dyString corresponding to org, and create a * new one if necessary. */ org = hashFindVal(orgHash, orgName); if (org == NULL) { if (orderList != NULL) errAbort("%s is not in orderList", orgName); AllocVar(org); slAddHead(&orgList, org); hashAddSaveName(orgHash, orgName, org, &org->name); org->dy = dyStringNew(native->size*1.5); dyStringAppendMultiC(org->dy, '.', symCount); if (nativeOrg == NULL) nativeOrg = org; } if (orderList == NULL && order > org->order) org->order = order; org->hit = TRUE; /* Fill it up with alignment. */ dyStringAppendN(org->dy, mc->text, subMaf->textSize); } for (org = orgList; org != NULL; org = org->next) { if (!org->hit) dyStringAppendMultiC(org->dy, '.', subMaf->textSize); org->hit = FALSE; } symCount += subMaf->textSize; curPos = mcMaster->start + mcMaster->size; if (subMaf != maf) mafAliFree(&subMaf); } } if (curPos < end) { fillInMissing(nativeOrg, orgList, native, start, curPos, end); symCount += end - curPos; } mafAliFreeList(&mafList); slSort(&orgList, oneOrgCmp); if (strand == '-') { for (org = orgList; org != NULL; org = org->next) reverseComplement(org->dy->string, org->dy->stringSize); } /* Construct our maf */ AllocVar(maf); maf->textSize = symCount; for (org = orgList; org != NULL; org = org->next) { struct mafComp *mc; AllocVar(mc); if (org == orgList) { if (outName != NULL) { mc->src = cloneString(outName); mc->srcSize = native->size; mc->strand = '+'; mc->start = 0; mc->size = native->size; } else { mc->src = cloneString(masterSrc); mc->srcSize = chromSize; mc->strand = strand; if (strand == '-') reverseIntRange(&start, &end, chromSize); mc->start = start; mc->size = end-start; } } else { int size = countAlpha(org->dy->string); mc->src = cloneString(org->name); mc->srcSize = size; mc->strand = '+'; mc->start = 0; mc->size = size; } mc->text = cloneString(org->dy->string); dyStringFree(&org->dy); slAddHead(&maf->components, mc); } slReverse(&maf->components); slFreeList(&orgList); freeHash(&orgHash); hFreeConn(&conn); return maf; }
struct axt *pslToAxt(struct psl *psl, struct hash *qHash, char *tNibDir, struct dlList *fileCache) { static char *tName = NULL, *qName = NULL; static struct dnaSeq *tSeq = NULL; struct dyString *q = newDyString(16*1024); struct dyString *t = newDyString(16*1024); int blockIx; int qs, ts ; int lastQ = 0, lastT = 0, size; int qOffset = 0; int tOffset = 0; struct axt *axt = NULL; boolean qIsNib = FALSE; boolean tIsNib = FALSE; int cnt = 0; //struct dnaSeq *tSeq = NULL; struct nibInfo *tNib = NULL; struct dnaSeq *qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0); // hGenBankGetMrna(psl->qName, NULL); /* freeDnaSeq(&qSeq); freez(&qName); assert(mrnaList != NULL); for (mrna = mrnaList; mrna != NULL ; mrna = mrna->next) { assert(mrna != NULL); cnt++; if (sameString(mrna->name, psl->qName)) { qSeq = cloneDnaSeq(mrna); assert(qSeq != NULL); break; } } */ if (qSeq == NULL) { warn("mrna sequence data not found %s, searched %d sequences\n",psl->qName,cnt); dyStringFree(&q); dyStringFree(&t); dnaSeqFree(&tSeq); dnaSeqFree(&qSeq); return NULL; } if (qSeq->size != psl->qSize) { warn("sequence %s aligned is different size %d from mrna.fa file %d \n",psl->qName,psl->qSize,qSeq->size); dyStringFree(&q); dyStringFree(&t); dnaSeqFree(&tSeq); dnaSeqFree(&qSeq); return NULL; } qName = cloneString(psl->qName); if (qIsNib && psl->strand[0] == '-') qOffset = psl->qSize - psl->qEnd; else qOffset = 0; verbose(5,"qString len = %d qOffset = %d\n",qSeq->size,qOffset); if (tName == NULL || !sameString(tName, psl->tName) || tIsNib) { freeDnaSeq(&tSeq); freez(&tName); tName = cloneString(psl->tName); tNib = nibInfoFromCache(nibHash, tNibDir, tName); assert(tNib !=NULL); tSeq = nibInfoLoadStrand(tNib, psl->tStart, psl->tEnd, '+'); assert(tSeq !=NULL); tOffset = psl->tStart; //readCachedSeqPart(tName, psl->tStart, psl->tEnd-psl->tStart, // tHash, fileCache, &tSeq, &tOffset, &tIsNib); } verbose(4,"strand t %s \n",psl->strand); if (tSeq != NULL) verbose(5,"tString len = %d tOffset = %d\n",tSeq->size,tOffset); else errAbort("tSeq is NULL\n"); if (psl->strand[0] == '-') reverseComplement(qSeq->dna, qSeq->size); //if (strlen(psl->strand) > 1 ) // if (psl->strand[1] == '-') // reverseComplement(tSeq->dna, tSeq->size); for (blockIx=0; blockIx < psl->blockCount; ++blockIx) { qs = psl->qStarts[blockIx] - qOffset; ts = psl->tStarts[blockIx] - tOffset; if (blockIx != 0) { int qGap, tGap, minGap; qGap = qs - lastQ; tGap = ts - lastT; minGap = min(qGap, tGap); if (minGap > 0) { writeGap(q, qGap, qSeq->dna + lastQ, t, tGap, tSeq->dna + lastT); } else if (qGap > 0) { writeInsert(q, t, qSeq->dna + lastQ, qGap); } else if (tGap > 0) { writeInsert(t, q, tSeq->dna + lastT, tGap); } } size = psl->blockSizes[blockIx]; assert(qSeq != NULL); dyStringAppendN(q, qSeq->dna + qs, size); lastQ = qs + size; dyStringAppendN(t, tSeq->dna + ts, size); lastT = ts + size; } if (strlen(q->string) != strlen(t->string)) warn("Symbol count(t) %d != %d inconsistent at t %s:%d and qName %s\n%s\n%s\n", (int)strlen(t->string), (int)strlen(q->string), psl->tName, psl->tStart, psl->qName, t->string, q->string); if (psl->strand[0] == '-') { reverseComplement(q->string, q->stringSize); reverseComplement(t->string, t->stringSize); } axt = axtCreate(q->string, t->string, min(q->stringSize,t->stringSize), psl); dyStringFree(&q); dyStringFree(&t); //dnaSeqFree(&tSeq); dnaSeqFree(&qSeq); if (qIsNib) freez(&qName); //if (tIsNib) // freez(&tName); return axt; }
struct rqlStatement *rqlStatementParse(struct lineFile *lf) /* Parse an RQL statement out of text */ { struct tokenizer *tkz = tokenizerOnLineFile(lf); tkz->uncommentShell = TRUE; tkz->uncommentC = TRUE; tkz->leaveQuotes = TRUE; struct rqlStatement *rql; AllocVar(rql); rql->command = cloneString(tokenizerMustHaveNext(tkz)); if (sameString(rql->command, "select")) { struct dyString *buf = dyStringNew(0); struct slName *list = NULL; char *tok = rqlParseFieldSpec(tkz, buf); /* Look for count(*) as special case. */ boolean countOnly = FALSE; if (sameString(tok, "count")) { char *paren = tokenizerNext(tkz); if (paren[0] == '(') { while ((paren = tokenizerMustHaveNext(tkz)) != NULL) { if (paren[0] == ')') break; } countOnly = TRUE; freez(&rql->command); rql->command = cloneString("count"); } else { tokenizerReuse(tkz); } } if (!countOnly) { list = slNameNew(tok); for (;;) { /* Parse out comma-separated field list. */ char *comma = tokenizerNext(tkz); if (comma == NULL || comma[0] != ',') { tokenizerReuse(tkz); break; } slNameAddHead(&list, rqlParseFieldSpec(tkz, buf)); } slReverse(&list); rql->fieldList = list; } dyStringFree(&buf); } else if (sameString(rql->command, "count")) { /* No parameters to count. */ } else errAbort("Unknown RQL command '%s line %d of %s\n", rql->command, lf->lineIx, lf->fileName); char *from = tokenizerNext(tkz); if (from != NULL) { if (sameString(from, "from")) { for (;;) { struct dyString *buf = dyStringNew(0); char *table = rqlParseFieldSpec(tkz, buf); slNameAddTail(&rql->tableList, table); char *comma = tokenizerNext(tkz); if (comma == NULL) break; if (comma[0] != ',') { tokenizerReuse(tkz); break; } dyStringFree(&buf); } } else { errAbort("missing 'from' clause in %s\n", rql->command); } } /* Parse where clause. */ char *where = tokenizerNext(tkz); if (where != NULL) { if (!sameString(where, "where")) { tokenizerReuse(tkz); } else { rql->whereClause = rqlParseExpression(tkz); rqlParseVarsUsed(rql->whereClause, &rql->whereVarList); } } /* Parse limit clause. */ char *limit = tokenizerNext(tkz); rql->limit = -1; if (limit != NULL) { if (!sameString(limit, "limit")) errAbort("Unknown clause '%s' line %d of %s", limit, lf->lineIx, lf->fileName); char *count = tokenizerMustHaveNext(tkz); if (!isdigit(count[0])) errAbort("Expecting number after limit, got %s line %d of %s", count, lf->lineIx, lf->fileName); rql->limit = atoi(count); } /* Check that are at end of statement. */ char *extra = tokenizerNext(tkz); if (extra != NULL) errAbort("Extra stuff starting with '%s' past end of statement line %d of %s", extra, lf->lineIx, lf->fileName); return rql; }
void checkExp(char *bedFileName, char *tNibDir, char *nibList) { struct lineFile *bf = lineFileOpen(bedFileName , TRUE), *af = NULL; char *row[PSEUDOGENELINK_NUM_COLS] ; struct pseudoGeneLink *ps; char *tmpName[512], cmd[512]; struct axt *axtList = NULL, *axt, *mAxt = NULL; struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seqList = NULL; struct nibInfo *qNib = NULL, *tNib = NULL; FILE *op; int ret; if (nibHash == NULL) nibHash = hashNew(0); while (lineFileNextRow(bf, row, ArraySize(row))) { struct misMatch *misMatchList = NULL; struct binKeeper *bk = NULL; struct binElement *el, *elist = NULL; struct psl *mPsl = NULL, *rPsl = NULL, *pPsl = NULL, *psl ; struct misMatch *mf = NULL; ps = pseudoGeneLinkLoad(row); tmpName[0] = cloneString(ps->name); chopByChar(tmpName[0], '.', tmpName, sizeof(tmpName)); verbose(2,"name %s %s:%d-%d\n", ps->name, ps->chrom, ps->chromStart,ps->chromEnd); /* get expressed retro from hash */ bk = hashFindVal(mrnaHash, ps->chrom); elist = binKeeperFindSorted(bk, ps->chromStart, ps->chromEnd ) ; for (el = elist; el != NULL ; el = el->next) { rPsl = el->val; verbose(2,"retroGene %s %s:%d-%d\n",rPsl->qName, ps->chrom, ps->chromStart,ps->chromEnd); } /* find mrnas that overlap parent gene */ bk = hashFindVal(mrnaHash, ps->gChrom); elist = binKeeperFindSorted(bk, ps->gStart , ps->gEnd ) ; for (el = elist; el != NULL ; el = el->next) { pPsl = el->val; verbose(2,"parent %s %s:%d %d,%d\n", pPsl->qName, pPsl->tName,pPsl->tStart, pPsl->match, pPsl->misMatch); } /* find self chain */ bk = hashFindVal(chainHash, ps->chrom); elist = binKeeperFind(bk, ps->chromStart , ps->chromEnd ) ; slSort(&elist, chainCmpScoreDesc); for (el = elist; el != NULL ; el = el->next) { struct chain *chain = el->val, *subChain, *retChainToFree, *retChainToFree2; int qs = chain->qStart; int qe = chain->qEnd; int id = chain->id; if (chain->qStrand == '-') { qs = chain->qSize - chain->qEnd; qe = chain->qSize - chain->qStart; } if (!sameString(chain->qName , ps->gChrom) || !positiveRangeIntersection(qs, qe, ps->gStart, ps->gEnd)) { verbose(2," wrong chain %s:%d-%d %s:%d-%d parent %s:%d-%d\n", chain->qName, qs, qe, chain->tName,chain->tStart,chain->tEnd, ps->gChrom,ps->gStart,ps->gEnd); continue; } verbose(2,"chain id %d %4.0f",chain->id, chain->score); chainSubsetOnT(chain, ps->chromStart+7, ps->chromEnd-7, &subChain, &retChainToFree); if (subChain != NULL) chain = subChain; chainSubsetOnQ(chain, ps->gStart, ps->gEnd, &subChain, &retChainToFree2); if (subChain != NULL) chain = subChain; if (chain->qStrand == '-') { qs = chain->qSize - chain->qEnd; qe = chain->qSize - chain->qStart; } verbose(2," %s:%d-%d %s:%d-%d ", chain->qName, qs, qe, chain->tName,chain->tStart,chain->tEnd); if (subChain != NULL) verbose(2,"subChain %s:%d-%d %s:%d-%d\n", subChain->qName, subChain->qStart, subChain->qEnd, subChain->tName,subChain->tStart,subChain->tEnd); qNib = nibInfoFromCache(nibHash, tNibDir, chain->qName); tNib = nibInfoFromCache(nibHash, tNibDir, chain->tName); tSeq = nibInfoLoadStrand(tNib, chain->tStart, chain->tEnd, '+'); qSeq = nibInfoLoadStrand(qNib, chain->qStart, chain->qEnd, chain->qStrand); axtList = chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart, maxGap, BIGNUM); verbose(2,"axt count %d misMatch cnt %d\n",slCount(axtList), slCount(misMatchList)); for (axt = axtList; axt != NULL ; axt = axt->next) { addMisMatch(&misMatchList, axt, chain->qSize); } verbose(2,"%d in mismatch list %s id %d \n",slCount(misMatchList), chain->qName, id); chainFree(&retChainToFree); chainFree(&retChainToFree2); break; } /* create axt of each expressed retroGene to parent gene */ /* get alignment for each mrna overlapping retroGene */ bk = hashFindVal(mrnaHash, ps->chrom); elist = binKeeperFindSorted(bk, ps->chromStart , ps->chromEnd ) ; { char queryName[512]; char axtName[512]; char pslName[512]; safef(queryName, sizeof(queryName), "/tmp/query.%s.fa", ps->chrom); safef(axtName, sizeof(axtName), "/tmp/tmp.%s.axt", ps->chrom); safef(pslName, sizeof(pslName), "/tmp/tmp.%s.psl", ps->chrom); op = fopen(pslName,"w"); for (el = elist ; el != NULL ; el = el->next) { psl = el->val; pslOutput(psl, op, '\t','\n'); qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0); if (qSeq != NULL) slAddHead(&seqList, qSeq); else errAbort("seq %s not found \n", psl->qName); } fclose(op); faWriteAll(queryName, seqList); safef(cmd,sizeof(cmd),"pslPretty -long -axt %s %s %s %s",pslName , nibList, queryName, axtName); ret = system(cmd); if (ret != 0) errAbort("ret is %d %s\n",ret,cmd); verbose(2, "ret is %d %s\n",ret,cmd); af = lineFileOpen(axtName, TRUE); while ((axt = axtRead(af)) != NULL) slAddHead(&mAxt, axt); lineFileClose(&af); } slReverse(&mAxt); /* for each parent/retro pair, count bases matching retro and parent better */ for (el = elist; el != NULL ; el = el->next) { int i, scoreRetro=0, scoreParent=0, scoreNeither=0; struct dyString *parentMatch = newDyString(16*1024); struct dyString *retroMatch = newDyString(16*1024); mPsl = el->val; if (mAxt != NULL) { verbose(2,"mrna %s %s:%d %d,%d axt %s\n", mPsl->qName, mPsl->tName,mPsl->tStart, mPsl->match, mPsl->misMatch, mAxt->qName); assert(sameString(mPsl->qName, mAxt->qName)); for (i = 0 ; i< (mPsl->tEnd-mPsl->tStart) ; i++) { int j = mAxt->tStart - mPsl->tStart; verbose(5, "listLen = %d\n",slCount(&misMatchList)); if ((mf = matchFound(&misMatchList, (mPsl->tStart)+i)) != NULL) { if (toupper(mf->retroBase) == toupper(mAxt->qSym[j+i])) { verbose (3,"match retro[%d] %d %c == %c parent %c %d\n", i,mf->retroLoc, mf->retroBase, mAxt->qSym[j+i], mf->parentBase, mf->parentLoc); dyStringPrintf(retroMatch, "%d,", mf->retroLoc); scoreRetro++; } else if (toupper(mf->parentBase) == toupper(mAxt->qSym[j+i])) { verbose (3,"match parent[%d] %d %c == %c retro %c %d\n", i,mf->parentLoc, mf->parentBase, mAxt->qSym[j+i], mf->retroBase, mf->retroLoc); dyStringPrintf(parentMatch, "%d,", mf->parentLoc); scoreParent++; } else { verbose (3,"match neither[%d] %d %c != %c retro %c %d\n", i,mf->parentLoc, mf->parentBase, mAxt->tSym[j+i], mf->retroBase, mf->retroLoc); scoreNeither++; } } } verbose(2,"final score %s parent %d retro %d neither %d\n", mPsl->qName, scoreParent, scoreRetro, scoreNeither); fprintf(outFile,"%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%s\t%s\n", ps->chrom, ps->chromStart, ps->chromEnd, ps->name, ps->score, mPsl->tName, mPsl->tStart, mPsl->tEnd, mPsl->qName, scoreParent, scoreRetro, scoreNeither, parentMatch->string, retroMatch->string); mAxt = mAxt->next; } dyStringFree(&parentMatch); dyStringFree(&retroMatch); } } }
void configPageSetTrackVis(int vis) /* Do config page after setting track visibility. If vis is -2, then visibility * is unchanged. If -1 then set visibility to default, otherwise it should * be tvHide, tvDense, etc. */ { struct dyString *title = dyStringNew(0); char *groupTarget = NULL; struct track *trackList = NULL; struct track *ideoTrack = NULL; struct group *groupList = NULL; withPriorityOverride = cartUsualBoolean(cart, configPriorityOverride, FALSE); /* Get track list and group them. */ ctList = customTracksParseCart(database, cart, &browserLines, &ctFileName); trackList = getTrackList(&groupList, vis); /* The ideogram for some reason is considered a track. * We don't really want to process it as one though, so * we see if it's there, and if necessary remove it. */ ideoTrack = chromIdeoTrack(trackList); if (ideoTrack != NULL) removeTrackFromGroup(ideoTrack); /* Fetch group to change on if any from CGI, * and remove var so it doesn't get used again. */ groupTarget = cloneString(cartUsualString(cart, configGroupTarget, "")); cartRemove(cart, configGroupTarget); if (sameString(groupTarget, "none")) freez(&groupTarget); dyStringPrintf(title, "Configure Image"); hPrintf("<FORM ACTION=\"%s\" NAME=\"mainForm\" METHOD=%s>\n", hgTracksName(), cartUsualString(cart, "formMethod", "POST")); webStartWrapperDetailedNoArgs(cart, database, "", title->string, FALSE, FALSE, FALSE, FALSE); cartSaveSession(cart); hPrintf("<INPUT TYPE=HIDDEN NAME=\"hgTracksConfigPage\" VALUE=\"\">"); /* do not want all the submit buttons named the same thing, this one is: */ cgiMakeButton("topSubmit", "submit"); // 3 column table hPrintf("<TABLE style=\"border:0px; \">\n"); hPrintf("<TR><TD>image width:"); hPrintf("<TD style=\"text-align: right\">"); hIntVar("pix", tl.picWidth, 4); hPrintf("<TD>pixels</TR>"); hPrintf("<TR><TD>label area width:"); hPrintf("<TD style=\"text-align: right\">"); hIntVar("hgt.labelWidth", leftLabelWidthChars, 2); hPrintf("<TD>characters<TD></TR>"); hPrintf("<TR><TD>text size:"); hPrintf("<TD style=\"text-align: right\">"); textSizeDropDown(); hPrintf("<TD>"); if (trackLayoutInclFontExtras()) { char *defaultStyle = cartUsualString(cart, "fontType", "medium"); cartMakeRadioButton(cart, "fontType", "medium", defaultStyle); hPrintf(" medium "); cartMakeRadioButton(cart, "fontType", "fixed", defaultStyle); hPrintf(" fixed "); cartMakeRadioButton(cart, "fontType", "bold", defaultStyle); hPrintf(" bold "); hPrintf(" "); } hPrintf("<TR><BR>"); hTableStart(); if (ideoTrack != NULL) { hPrintf("<TR><TD>"); hCheckBox("ideogram", cartUsualBoolean(cart, "ideogram", TRUE)); hPrintf("</TD><TD>"); hPrintf("Display chromosome ideogram above main graphic"); hPrintf("</TD></TR>\n"); } hPrintf("<TR><TD>"); hCheckBox("guidelines", cartUsualBoolean(cart, "guidelines", TRUE)); hPrintf("</TD><TD>"); hPrintf("Show light blue vertical guidelines"); hPrintf("</TD></TR>\n"); hPrintf("<TR><TD>"); hCheckBox("leftLabels", cartUsualBoolean(cart, "leftLabels", TRUE)); hPrintf("</TD><TD>"); hPrintf("Display labels to the left of items in tracks"); hPrintf("</TD></TR>\n"); hPrintf("<TR><TD>"); hCheckBox("centerLabels", cartUsualBoolean(cart, "centerLabels", TRUE)); hPrintf("</TD><TD>"); hPrintf("Display description above each track"); hPrintf("</TD></TR>\n"); hPrintf("<TR><TD>"); hCheckBox("trackControlsOnMain", cartUsualBoolean(cart, "trackControlsOnMain", TRUE)); hPrintf("</TD><TD>"); hPrintf("Show track controls under main graphic"); hPrintf("</TD></TR>\n"); hPrintf("<TR><TD>"); hCheckBox("nextItemArrows", cartUsualBoolean(cart, "nextItemArrows", FALSE)); hPrintf("</TD><TD>"); hPrintf("Next/previous item navigation"); hPrintf("</TD></TR>\n"); hPrintf("<TR><TD>"); hCheckBox("nextExonArrows", cartUsualBoolean(cart, "nextExonArrows", TRUE)); hPrintf("</TD><TD>"); hPrintf("Next/previous exon navigation"); hPrintf("</TD></TR>\n"); #ifdef PRIORITY_CHANGES_IN_CONFIG_UI hPrintf("<TR><TD>"); char *javascript="onClick=\"document.mainForm.hgTracksConfigPage.value='configure';document.mainForm.submit();\""; hCheckBoxJS(configPriorityOverride, cartUsualBoolean(cart, configPriorityOverride , FALSE), javascript); hPrintf("</TD><TD>"); hPrintf("Enable track re-ordering"); hPrintf("</TD></TR>\n"); #endif///def PRIORITY_CHANGES_IN_CONFIG_UI hPrintf("<TR><TD>"); hCheckBox("enableAdvancedJavascript", advancedJavascriptFeaturesEnabled(cart)); hPrintf("</TD><TD>"); hPrintf("Enable advanced javascript features"); hPrintf("</TD></TR>\n"); hTableEnd(); char *freeze = hFreezeFromDb(database); char buf[128]; if (stringIn(database, freeze)) safef(buf, sizeof buf, "Configure Tracks on %s %s: %s %s", organization, browserName, organism, freeze); else safef(buf, sizeof buf, "Configure Tracks on %s %s: %s %s (%s)", organization, browserName, organism, freeze, database); webNewSection(buf); hPrintf("Tracks: "); if(isSearchTracksSupported(database,cart)) { cgiMakeButtonWithMsg(TRACK_SEARCH, TRACK_SEARCH_BUTTON,TRACK_SEARCH_HINT); hPrintf(" "); } cgiMakeButtonWithMsg(configHideAll, "hide all","Hide all tracks in this genome assembly"); hPrintf(" "); cgiMakeButtonWithMsg(configShowAll, "show all","Show all tracks in this genome assembly"); hPrintf(" "); cgiMakeButtonWithMsg(configDefaultAll, "default","Display only default tracks"); hPrintf(" Groups: "); hButtonWithOnClick("hgt.collapseGroups", "collapse all", "Collapse all track groups", "return setAllTrackGroupVisibility(false)"); hPrintf(" "); hButtonWithOnClick("hgt.expandGroups", "expand all", "Expand all track groups", "return setAllTrackGroupVisibility(true)"); hPrintf("<P STYLE=\"margin-top:5;\">Control track and group visibility more selectively below.<P>"); trackConfig(trackList, groupList, groupTarget, vis); dyStringFree(&title); freez(&groupTarget); webEndSectionTables(); hPrintf("</FORM>"); }
static void printFactorSourceTableHits(struct factorSource *cluster, struct sqlConnection *conn, char *sourceTable, char *inputTrackTable, struct slName *fieldList, boolean invert, char *vocab) /* Put out a lines in an html table that shows assayed sources that have hits in this * cluster, or if invert is set, that have misses. */ { char *vocabFile = NULL; if (vocab) { vocabFile = cloneFirstWord(vocab); } /* Make the monster SQL query to get all assays*/ struct dyString *query = dyStringNew(0); sqlDyStringPrintf(query, "select %s.id,%s.name,%s.tableName", sourceTable, sourceTable, inputTrackTable); struct slName *field; for (field = fieldList; field != NULL; field = field->next) sqlDyStringPrintf(query, ",%s.%s", inputTrackTable, field->name); sqlDyStringPrintf(query, " from %s,%s ", inputTrackTable, sourceTable); sqlDyStringPrintf(query, " where %s.source = %s.description", inputTrackTable, sourceTable); sqlDyStringPrintf(query, " and factor='%s' order by %s.source", cluster->name, inputTrackTable); boolean encodeStanford = FALSE; if (startsWith("encode3", sourceTable) || startsWith("encode4", sourceTable)) encodeStanford = TRUE; int displayNo = 0; int fieldCount = slCount(fieldList); struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; while ((row = sqlNextRow(sr)) != NULL) { int sourceId = sqlUnsigned(row[0]); boolean hit = FALSE; int i; double signal = 0.0; for (i=0; i<cluster->expCount; i++) { if (cluster->expNums[i] == sourceId) { hit = TRUE; signal = cluster->expScores[i]; break; } } if (hit ^ invert) { printf("</TR><TR>\n"); webPrintIntCell(++displayNo); if (!invert) webPrintDoubleCell(signal); webPrintLinkCell(row[1]); int i = 0; // find position of CV metadata in field list int offset = 3; struct slName *field = fieldList; for (i=0; i<fieldCount && field != NULL; ++i, field = field->next) { char *fieldVal = row[i+offset]; if (vocab) { char *link = cloneString(factorSourceVocabLink(vocabFile, field->name, fieldVal)); webPrintLinkCell(link); } else webPrintLinkCell(fieldVal); } char *table = row[2]; if (encodeStanford) { char *file = stringIn("ENCFF", table); if (!file) webPrintLinkCell(table); else { webPrintLinkCellStart(); printf("<A target='_blank'" "href='https://www.encodeproject.org/files/%s'>%s</A>", file, file); webPrintLinkCellEnd(); } } else printMetadataForTable(table); } } sqlFreeResult(&sr); freez(&vocabFile); dyStringFree(&query); }
void submitRefToFiles(struct sqlConnection *conn, struct sqlConnection *conn2, struct sqlConnection *connSp, char *ref, char *fileRoot, char *inJax) /* Create a .ra and a .tab file for given reference. */ { /* Initially the tab file will have some duplicate lines, so * write to temp file, and then filter. */ char raName[PATH_LEN], tabName[PATH_LEN], capName[PATH_LEN]; FILE *ra = NULL, *tab = NULL, *cap = NULL; struct dyString *query = dyStringNew(0); struct sqlResult *sr; char **row; char *pubMed; struct slName *list, *el; boolean gotAny = FALSE; struct hash *uniqImageHash = newHash(0); struct hash *captionHash = newHash(0); int imageWidth = 0, imageHeight = 0; char path[PATH_LEN]; struct dyString *caption = dyStringNew(0); struct dyString *copyright = dyStringNew(0); struct dyString *probeNotes = dyStringNew(0); boolean lookedForCopyright = FALSE; safef(raName, sizeof(raName), "%s.ra", fileRoot); safef(tabName, sizeof(tabName), "%s.tab", fileRoot); safef(capName, sizeof(capName), "%s.txt", fileRoot); tab = mustOpen(tabName, "w"); cap = mustOpen(capName, "w"); sqlDyStringPrintf(query, "select authors,journal,title,year from BIB_Refs where "); sqlDyStringPrintf(query, "_Refs_key = '%s'", ref); sr = sqlGetResultVerbose(conn, query->string); row = sqlNextRow(sr); if (row == NULL) errAbort("Can't find _Refs_key %s in BIB_Refs", ref); /* Make ra file with stuff common to whole submission set. */ ra = mustOpen(raName, "w"); fprintf(ra, "submissionSource MGI\n"); fprintf(ra, "acknowledgement Thanks to the Gene Expression Database group at " "Mouse Genome Informatics (MGI) for collecting, annotating and sharing " "this image. The MGI images were last updated in VisiGene on March 28, 2006. " "Additional and more up to date annotations and images may be available " "directly at <A HREF='http://www.informatics.jax.org' target='_blank'>MGI.</A>\n"); fprintf(ra, "submitSet jax%s\n", ref); fprintf(ra, "taxon 10090\n"); /* Mus musculus taxon */ fprintf(ra, "fullDir http://hgwdev.gi.ucsc.edu/visiGene/full/inSitu/Mouse/jax\n"); fprintf(ra, "thumbDir http://hgwdev.gi.ucsc.edu/visiGene/200/inSitu/Mouse/jax\n"); fprintf(ra, "setUrl http://www.informatics.jax.org/\n"); fprintf(ra, "itemUrl http://www.informatics.jax.org/searches/image.cgi?%%s\n"); fprintf(ra, "abUrl http://www.informatics.jax.org/searches/antibody.cgi?%%s\n"); fprintf(ra, "journal %s\n", row[1]); fprintf(ra, "publication %s\n", row[2]); fprintf(ra, "year %s\n", row[3]); /* The contributor (author) list is in format Kent WJ; Haussler DH; format in * Jackson. We convert it to Kent W.J.,Haussler D.H., format for visiGene. */ fprintf(ra, "contributor "); list = charSepToSlNames(row[0], ';'); for (el = list; el != NULL; el = el->next) { char *lastName = skipLeadingSpaces(el->name); char *initials = strrchr(lastName, ' '); if (initials == NULL) initials = ""; else *initials++ = 0; fprintf(ra, "%s", lastName); if (initials[0] != 0) { char c; fprintf(ra, " "); while ((c = *initials++) != 0) fprintf(ra, "%c.", c); } fprintf(ra, ","); } fprintf(ra, "\n"); slNameFreeList(&list); sqlFreeResult(&sr); /* Add in link to PubMed record on publication. */ dyStringClear(query); sqlDyStringPrintf(query, "select ACC_Accession.accID from ACC_Accession,ACC_LogicalDB " "where ACC_Accession._Object_key = %s " "and ACC_Accession._LogicalDB_key = ACC_LogicalDB._LogicalDB_key " "and ACC_LogicalDB.name = 'PubMed'", ref); pubMed = sqlQuickStringVerbose(conn, query->string); if (pubMed != NULL) fprintf(ra, "pubUrl https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=%s\n", pubMed); freez(&pubMed); dyStringClear(query); sqlDyStringPrintf(query, "select distinct MRK_Marker.symbol as gene," "GXD_Specimen.sex as sex," "GXD_Specimen.age as age," "GXD_Specimen.ageMin as ageMin," "GXD_Specimen.ageMax as ageMax," "IMG_ImagePane.paneLabel as paneLabel," "ACC_Accession.numericPart as fileKey," "IMG_Image._Image_key as imageKey," "GXD_Assay._ProbePrep_key as probePrepKey," "GXD_Assay._AntibodyPrep_key as antibodyPrepKey," "GXD_Assay._ReporterGene_key as reporterGeneKey," "GXD_FixationMethod.fixation as fixation," "GXD_EmbeddingMethod.embeddingMethod as embedding," "GXD_Assay._Assay_key as assayKey," "GXD_Specimen.hybridization as sliceType," "GXD_Specimen._Genotype_key as genotypeKey," "IMG_ImagePane._ImagePane_key as imagePaneKey\n" "from MRK_Marker," "GXD_Assay," "GXD_Specimen," "GXD_InSituResult," "GXD_InSituResultImage," "GXD_FixationMethod," "GXD_EmbeddingMethod," "IMG_ImagePane," "IMG_Image," "ACC_Accession\n" "where MRK_Marker._Marker_key = GXD_Assay._Marker_key " "and GXD_Assay._Assay_key = GXD_Specimen._Assay_key " "and GXD_Specimen._Specimen_key = GXD_InSituResult._Specimen_key " "and GXD_InSituResult._Result_key = GXD_InSituResultImage._Result_key " "and GXD_InSituResultImage._ImagePane_key = IMG_ImagePane._ImagePane_key " "and GXD_FixationMethod._Fixation_key = GXD_Specimen._Fixation_key " "and GXD_EmbeddingMethod._Embedding_key = GXD_Specimen._Embedding_key " "and IMG_ImagePane._Image_key = IMG_Image._Image_key " "and IMG_Image._Image_key = ACC_Accession._Object_key " "and ACC_Accession.prefixPart = 'PIX:' " "and GXD_Assay._ImagePane_key is NULL " ); sqlDyStringPrintf(query, "and GXD_Assay._Refs_key = '%s'", ref); sr = sqlGetResultVerbose(conn, query->string); fprintf(tab, "#"); fprintf(tab, "gene\t"); fprintf(tab, "probeColor\t"); fprintf(tab, "sex\t"); fprintf(tab, "age\t"); fprintf(tab, "ageMin\t"); fprintf(tab, "ageMax\t"); fprintf(tab, "paneLabel\t"); fprintf(tab, "fileName\t"); fprintf(tab, "submitId\t"); fprintf(tab, "fPrimer\t"); fprintf(tab, "rPrimer\t"); fprintf(tab, "abName\t"); fprintf(tab, "abTaxon\t"); fprintf(tab, "abSubmitId\t"); fprintf(tab, "fixation\t"); fprintf(tab, "embedding\t"); fprintf(tab, "bodyPart\t"); fprintf(tab, "sliceType\t"); fprintf(tab, "genotype\t"); fprintf(tab, "strain\t"); fprintf(tab, "priority\t"); fprintf(tab, "captionId\t"); fprintf(tab, "imageWidth\t"); fprintf(tab, "imageHeight\n"); while ((row = sqlNextRow(sr)) != NULL) { char *gene = row[0]; char *sex = row[1]; char *age = row[2]; char *ageMin = row[3]; char *ageMax = row[4]; char *paneLabel = row[5]; char *fileKey = row[6]; char *imageKey = row[7]; char *probePrepKey = row[8]; char *antibodyPrepKey = row[9]; char *reporterGeneKey = row[10]; char *fixation = row[11]; char *embedding = row[12]; char *assayKey = row[13]; char *sliceType = row[14]; char *genotypeKey = row[15]; char *imagePaneKey = row[16]; double calcAge = -1; char *probeColor = ""; char *bodyPart = ""; char *abName = NULL; char *rPrimer = NULL, *fPrimer = NULL; char *genotype = NULL; char *strain = NULL; char *priority = NULL; char abTaxon[32]; char *captionId = ""; char *abSubmitId = NULL; verbose(3, " "); dumpRow(row, 16); if (age == NULL) continue; if (!lookedForCopyright) { struct sqlResult *sr = NULL; char **row; lookedForCopyright = TRUE; dyStringClear(query); sqlDyStringPrintf(query, "select note from MGI_NoteChunk,MGI_Note,MGI_NoteType,ACC_MGIType " "where MGI_Note._Object_key = %s " "and ACC_MGIType.name = 'Image' " "and ACC_MGIType._MGIType_key = MGI_Note._MGIType_key " "and MGI_NoteType.noteType='Copyright' " "and MGI_Note._NoteType_key = MGI_NoteType._NoteType_key " "and MGI_Note._Note_key = MGI_NoteChunk._Note_key " "order by sequenceNum" , imageKey); sr = sqlGetResultVerbose(conn2, query->string); while ((row = sqlNextRow(sr)) != NULL) dyStringAppend(copyright, row[0]); sqlFreeResult(&sr); verbose(2,"imageKey=%s\n",imageKey); if (copyright->stringSize != 0) { fprintf(ra, "copyright %s\n", copyright->string); } } /* Massage sex */ { if (sameString(sex, "Male")) sex = "male"; else if (sameString(sex, "Female")) sex = "female"; else sex = ""; } /* Massage age */ { char *embryoPat = "embryonic day "; char *newbornPat = "postnatal newborn"; char *dayPat = "postnatal day "; char *weekPat = "postnatal week "; char *adultPat = "postnatal adult"; double calcMinAge = atof(ageMin); double calcMaxAge = atof(ageMax); double mouseBirthAge = 21.0; //double mouseAdultAge = 63.0; /* Relative to conception, not birth */ if (age[0] == 0) { warn("age null, ageMin %s, ageMax %s\n", ageMin, ageMax); calcAge = (calcMinAge + calcMaxAge) * 0.5; } else if (startsWith(embryoPat, age)) calcAge = atof(age+strlen(embryoPat)); else if (sameString(newbornPat, age)) calcAge = mouseBirthAge; else if (startsWith(dayPat, age)) calcAge = atof(age+strlen(dayPat)) + mouseBirthAge; else if (startsWith(weekPat, age)) calcAge = 7.0 * atof(age+strlen(weekPat)) + mouseBirthAge; else if (sameString(adultPat, age) && calcMaxAge - calcMinAge > 1000 && calcMinAge < 365) calcAge = 365; /* Most adult mice are relatively young */ else { warn("Calculating age from %s", age); calcAge = (calcMinAge + calcMaxAge) * 0.5; } if (calcAge < calcMinAge) calcAge = calcMinAge; if (calcAge > calcMaxAge) calcAge = calcMaxAge; } /* Massage probeColor */ { if (!isStrNull(reporterGeneKey)) { /* Fixme: make sure that reporterGene's end up in probeType table. */ char *name = NULL; dyStringClear(query); sqlDyStringPrintf(query, "select term from VOC_Term where _Term_key = %s", reporterGeneKey); name = sqlQuickStringVerbose(conn2, query->string); if (name == NULL) warn("Can't find _ReporterGene_key %s in VOC_Term", reporterGeneKey); else if (sameString(name, "GFP")) probeColor = "green"; else if (sameString(name, "lacZ")) probeColor = "blue"; else warn("Don't know color of reporter gene %s", name); freez(&name); } if (!isStrNull(probePrepKey)) { char *name = NULL; dyStringClear(query); sqlDyStringPrintf(query, "select GXD_VisualizationMethod.visualization " "from GXD_VisualizationMethod,GXD_ProbePrep " "where GXD_ProbePrep._ProbePrep_key = %s " "and GXD_ProbePrep._Visualization_key = GXD_VisualizationMethod._Visualization_key" , probePrepKey); name = sqlQuickStringVerbose(conn2, query->string); if (name == NULL) warn("Can't find visualization from _ProbePrep_key %s", probePrepKey); probeColor = colorFromLabel(name, gene); freez(&name); if (probeColor[0] == 0) { dyStringClear(query); sqlDyStringPrintf(query, "select GXD_Label.label from GXD_Label,GXD_ProbePrep " "where GXD_ProbePrep._ProbePrep_key = %s " "and GXD_ProbePrep._Label_key = GXD_Label._Label_key" , probePrepKey); name = sqlQuickStringVerbose(conn2, query->string); if (name == NULL) warn("Can't find label from _ProbePrep_key %s", probePrepKey); probeColor = colorFromLabel(name, gene); } freez(&name); } if (!isStrNull(antibodyPrepKey) && probeColor[0] == 0 ) { char *name = NULL; dyStringClear(query); sqlDyStringPrintf(query, "select GXD_Label.label from GXD_Label,GXD_AntibodyPrep " "where GXD_AntibodyPrep._AntibodyPrep_key = %s " "and GXD_AntibodyPrep._Label_key = GXD_Label._Label_key" , antibodyPrepKey); name = sqlQuickStringVerbose(conn2, query->string); if (name == NULL) warn("Can't find label from _AntibodyPrep_key %s", antibodyPrepKey); probeColor = colorFromLabel(name, gene); freez(&name); } } /* Get abName, abTaxon, abSubmitId */ abTaxon[0] = 0; if (!isStrNull(antibodyPrepKey)) { struct sqlResult *sr = NULL; int orgKey = 0; char **row; dyStringClear(query); sqlDyStringPrintf(query, "select antibodyName,_Organism_key,GXD_Antibody._Antibody_key " "from GXD_AntibodyPrep,GXD_Antibody " "where GXD_AntibodyPrep._AntibodyPrep_key = %s " "and GXD_AntibodyPrep._Antibody_key = GXD_Antibody._Antibody_key" , antibodyPrepKey); sr = sqlGetResultVerbose(conn2, query->string); row = sqlNextRow(sr); if (row != NULL) { abName = cloneString(row[0]); orgKey = atoi(row[1]); abSubmitId = cloneString(row[2]); } sqlFreeResult(&sr); if (orgKey > 0) { char *latinName = NULL, *commonName = NULL; int spTaxon = 0; dyStringClear(query); sqlDyStringPrintf(query, "select latinName from MGI_Organism " "where _Organism_key = %d", orgKey); latinName = sqlQuickStringVerbose(conn2, query->string); if (latinName != NULL && !sameString(latinName, "Not Specified") && !sameString(latinName, "Not Applicable")) { char *e = strchr(latinName, '/'); if (e != NULL) *e = 0; /* Chop off / and after. */ spTaxon = spBinomialToTaxon(connSp, latinName); } else { dyStringClear(query); sqlDyStringPrintf(query, "select commonName from MGI_Organism " "where _Organism_key = %d", orgKey); commonName = sqlQuickStringVerbose(conn2, query->string); if (commonName != NULL && !sameString(commonName, "Not Applicable") && !sameString(commonName, "Not Specified")) { spTaxon = spCommonToTaxon(connSp, commonName); } } if (spTaxon != 0) safef(abTaxon, sizeof(abTaxon), "%d", spTaxon); freez(&latinName); freez(&commonName); } } if (abName == NULL) abName = cloneString(""); if (abSubmitId == NULL) abSubmitId = cloneString(""); /* Get rPrimer, lPrimer */ if (!isStrNull(probePrepKey)) { struct sqlResult *sr = NULL; char **row; dyStringClear(query); sqlDyStringPrintf(query, "select primer1sequence,primer2sequence " "from PRB_Probe,GXD_ProbePrep " "where PRB_Probe._Probe_key = GXD_ProbePrep._Probe_key " "and GXD_ProbePrep._ProbePrep_key = %s" , probePrepKey); sr = sqlGetResultVerbose(conn2, query->string); row = sqlNextRow(sr); if (row != NULL) { fPrimer = cloneString(row[0]); rPrimer = cloneString(row[1]); } sqlFreeResult(&sr); } /* Note Jackson database actually stores the primers very * erratically. In all the cases I can find for in situs * the primers are actually stored in free text in the PRB_Notes * e.g. ... primers CGCGGATCCAGGGGAAACAGAAGGGCTGCG and CCCAAGCTTAGACTGTACAGGCTGAGCC ... */ if (fPrimer == NULL || fPrimer[0]==0) { struct sqlResult *sr = NULL; char **row; dyStringClear(query); sqlDyStringPrintf(query, "select PRB_Notes.note from GXD_ProbePrep, PRB_Notes" " where GXD_ProbePrep._ProbePrep_key = %s" " and GXD_ProbePrep._Probe_key = PRB_Notes._Probe_key" " order by PRB_Notes.sequenceNum" , probePrepKey); sr = sqlGetResultVerbose(conn2, query->string); dyStringClear(probeNotes); while ((row = sqlNextRow(sr)) != NULL) dyStringAppend(probeNotes, row[0]); sqlFreeResult(&sr); if (probeNotes->stringSize > 0) { char f[256]; char r[256]; int i = 0; char *s = strstr(probeNotes->string," primers "); if (s) { s += strlen(" primers "); i = 0; while (strchr("ACGT",*s) && (i<sizeof(f))) f[i++] = *s++; f[i]=0; if (strstr(s," and ")==s) { s += strlen(" and "); i = 0; while (strchr("ACGT",*s) && (i<sizeof(r))) r[i++] = *s++; r[i]=0; if (strlen(f) >= 10 && strlen(r) >= 10) { fPrimer = cloneString(f); rPrimer = cloneString(r); } else { verbose(1, "bad primer parse:_ProbePrep_key=%s fPrimer=[%s], rPrimer=[%s]\n", probePrepKey,f,r); } } } } } if (fPrimer == NULL) fPrimer = cloneString(""); if (rPrimer == NULL) rPrimer = cloneString(""); fixation = blankOutUnknown(fixation); embedding = blankOutUnknown(embedding); /* Massage body part and slice type. We only handle whole mounts. */ if (sameString(sliceType, "whole mount")) { bodyPart = "whole"; priority = "100"; } else { sliceType = ""; priority = "1000"; } genotypeAndStrainFromKey(genotypeKey, conn2, &genotype, &strain); if (isStrNull(paneLabel)) paneLabel = cloneString(""); /* trying to suppress nulls in output */ stripChar(paneLabel, '"'); /* Get rid of a difficult quote to process. */ /* Fetch image dimensions from file. */ imageWidth=0; imageHeight=0; safef(path, sizeof(path), "%s/%s.jpg", inJax, fileKey); if (fileExists(path)) jpegSize(path,&imageWidth,&imageHeight); /* will errAbort if no valid .jpeg exists */ else warn("Picture Missing! %s ",path); /* Deal caption if any. Most of the work only happens the * first time see the image. */ if (!hashLookup(uniqImageHash, imageKey)) { struct sqlResult *sr = NULL; char **row; hashAdd(uniqImageHash, imageKey, NULL); dyStringClear(caption); dyStringClear(query); sqlDyStringPrintf(query, "select note from MGI_NoteChunk,MGI_Note,MGI_NoteType,ACC_MGIType " "where MGI_Note._Object_key = %s " "and ACC_MGIType.name = 'Image' " "and ACC_MGIType._MGIType_key = MGI_Note._MGIType_key " "and MGI_NoteType.noteType='Caption' " "and MGI_Note._NoteType_key = MGI_NoteType._NoteType_key " "and MGI_Note._Note_key = MGI_NoteChunk._Note_key " "order by sequenceNum" , imageKey); sr = sqlGetResultVerbose(conn2, query->string); while ((row = sqlNextRow(sr)) != NULL) dyStringAppend(caption, row[0]); sqlFreeResult(&sr); if (caption->stringSize > 0) { subChar(caption->string, '\t', ' '); subChar(caption->string, '\n', ' '); fprintf(cap, "%s\t%s\n", imageKey, caption->string); hashAdd(captionHash, imageKey, imageKey); } } if (hashLookup(captionHash, imageKey)) captionId = imageKey; else captionId = ""; fprintf(tab, "%s\t", gene); fprintf(tab, "%s\t", probeColor); fprintf(tab, "%s\t", sex); fprintf(tab, "%3.2f\t", calcAge); fprintf(tab, "%s\t", ageMin); fprintf(tab, "%s\t", ageMax); fprintf(tab, "%s\t", paneLabel); /* may have to change NULL to empty string or "0" ? */ fprintf(tab, "%s.jpg\t", fileKey); fprintf(tab, "%s\t", imageKey); fprintf(tab, "%s\t", fPrimer); fprintf(tab, "%s\t", rPrimer); fprintf(tab, "%s\t", abName); fprintf(tab, "%s\t", abTaxon); fprintf(tab, "%s\t", abSubmitId); fprintf(tab, "%s\t", fixation); fprintf(tab, "%s\t", embedding); fprintf(tab, "%s\t", bodyPart); fprintf(tab, "%s\t", sliceType); fprintf(tab, "%s\t", genotype); fprintf(tab, "%s\t", strain); fprintf(tab, "%s\t", priority); fprintf(tab, "%s\t", captionId); fprintf(tab, "%d\t", imageWidth); fprintf(tab, "%d\n", imageHeight); printExpression(tab, conn2, imagePaneKey, assayKey); gotAny = TRUE; freez(&genotype); freez(&abName); freez(&abSubmitId); freez(&rPrimer); freez(&fPrimer); } sqlFreeResult(&sr); carefulClose(&ra); carefulClose(&tab); carefulClose(&cap); if (!gotAny) { remove(raName); remove(capName); remove(tabName); } dyStringFree(&probeNotes); dyStringFree(©right); dyStringFree(&caption); dyStringFree(&query); hashFree(&uniqImageHash); hashFree(&captionHash); }
char *gsS3Upload(char *s3UploadUrl, char *inputFileName, off_t contentLength, char *base64Md5, char *hexMd5, char *contentType, boolean progress, char *fileName) /* call s3 upload */ { // S3 UPLOAD to Amazon Storage struct dyString *reqExtra = newDyString(256); dyStringPrintf(reqExtra, "Content-Length: %lld\r\n", (long long)contentLength); dyStringPrintf(reqExtra, "Content-MD5: %s\r\n", base64Md5); dyStringPrintf(reqExtra, "Content-Type: %s\r\n", contentType); int sd = netOpenHttpExt(s3UploadUrl, "PUT", reqExtra->string); if (sd < 0) errAbort("failed to open socket for [%s]", s3UploadUrl); unsigned char buffer[S3UPBUFSIZE]; int bufRead = 0; FILE *f = mustOpen(inputFileName,"rb"); off_t totalUploaded = 0; int lastPctUploaded = -1; // upload the file contents while ((bufRead = fread(&buffer, 1, S3UPBUFSIZE, f)) > 0) { int bufWrite = 0; while (bufWrite < bufRead) { int socketWrite = write(sd, buffer + bufWrite, bufRead - bufWrite); if (socketWrite == -1) { if (errno == 32) // broken pipe often happens when the ssh connection shuts down or has errors. { warn("broken pipe, S3 server closed the ssh connection."); break; } errnoAbort("error writing to socket for GenomeSpace upload"); } bufWrite += socketWrite; } if (errno == 32) break; totalUploaded += bufRead; int pctUploaded = 100.0*totalUploaded/contentLength; if (progress && (pctUploaded != lastPctUploaded)) { char nicenumber[1024]=""; sprintWithGreekByte(nicenumber, sizeof(nicenumber), contentLength); // Various global flags must be reset to draw a fresh html output page. webHeadAlreadyOutputed = FALSE; webInTextMode = FALSE; includedResourceFiles = NULL; htmlWarnBoxSetUpAlready=FALSE; htmlOpen("Uploading Output to GenomeSpace"); printf("Name: %s<br>\n", fileName); printf("Size: %s<br>\n", nicenumber); printf("Progress: %0d%%<br>\n", pctUploaded); printf("<br>\n"); printf("<FORM ACTION=\"/cgi-bin/hgTables\" METHOD=GET>\n" "<INPUT TYPE=SUBMIT NAME=\"%s\" VALUE=\"Back\" >" "<INPUT TYPE=SUBMIT NAME=\"Refresh\" VALUE=\"Refresh\" onclick='window.location=window.location;return false;' >" "</FORM>\n" , hgtaDoMainPage); puts("<script type=\"text/javascript\">"); puts("<!--"); puts("setTimeout(\"location = location;\",5000);"); puts("-->"); puts("</script>"); htmlClose(); fflush(stdout); lastPctUploaded = pctUploaded; } } carefulClose(&f); char *responseCode = NULL; char *s3UploadResponse = parseResponse(sd, &responseCode); if (!sameString(responseCode, "200 OK")) errAbort("Amazon S3 Response: %s", responseCode); dyStringFree(&reqExtra); return s3UploadResponse; }
void testDbFilters(struct htmlPage *dbPage, char *org, char *db, char *accColumn, struct slName *geneList) /* Test filter that returns just geneList. */ { struct slName *gene; int rowCount; char accFilter[256]; /* Start out with filter page. */ struct htmlPage *page = quickSubmit(dbPage, NULL, org, db, accColumn, NULL, "accOneFilterPage", advFilterVarName, "on"); verbose(1, "testFilters %s %s\n", org, db); if (page == NULL) return; /* Set up to filter exactly one gene. */ safef(accFilter, sizeof(accFilter), "near.as.%s.wild", accColumn); { htmlPageSetVar(page, NULL, accFilter, geneList->name); htmlPageSetVar(page, NULL, searchVarName, geneList->name); serialSubmit(&page, NULL, org, db, accColumn, geneList->name, "accOneFilterSubmit", "Submit", "on"); if (page == NULL) return; /* Make sure really got one gene. */ rowCount = nearCountUniqAccRows(page); if (rowCount != 1) { qaStatusSoftError(nearTestList->status, "Acc exact filter returned %d items", rowCount); } } /* Set up filter for all genes in list. */ { struct dyString *dy = newDyString(0); int geneCount = slCount(geneList); for (gene = geneList; gene != NULL; gene = gene->next) dyStringPrintf(dy, "%s ", gene->name); htmlPageSetVar(page, NULL, accFilter, dy->string); htmlPageSetVar(page, NULL, countVarName, "all"); /* despite 3 genes requested, must see all if many dupes */ serialSubmit(&page, NULL, org, db, accColumn, dy->string, "accMultiFilterSubmit", "Submit", "on"); dyStringFree(&dy); if (page == NULL) return; rowCount = nearCountUniqAccRows(page); if (rowCount != geneCount) { qaStatusSoftError(nearTestList->status, "Acc multi filter expecting %d, got %d items", geneCount, rowCount); } } /* Set up filter for wildcard in list. */ { struct dyString *dy = newDyString(0); char len = strlen(geneList->name); dyStringAppendN(dy, geneList->name, len-1); dyStringAppendC(dy, '*'); htmlPageSetVar(page, NULL, accFilter, dy->string); serialSubmit(&page, NULL, org, db, accColumn, dy->string, "accWildFilterSubmit", "Submit", "on"); dyStringFree(&dy); if (page == NULL) return; rowCount = nearCountRows(page); if (rowCount < 1) { qaStatusSoftError(nearTestList->status, "Acc wild filter no match"); } } /* Clear out advanced filters. */ { htmlPageFree(&page); page = quickSubmit(dbPage, NULL, org, db, NULL, NULL, "advFilterClear", advFilterClearVarName, "on"); } htmlPageFree(&page); }
static void doBlat(struct sqlConnection *conn, int taxon, char *db) /* place probe seq from non-BAC with blat that have no alignments yet */ { int rc = 0; char *blatSpec=NULL; char cmdLine[256]; char path1[256]; char path2[256]; struct dyString *dy = dyStringNew(0); /* (non-BACs needing alignment) */ dyStringClear(dy); dyStringPrintf(dy, "select concat(\"vgPrb_\",e.id), e.seq" " from vgPrb e, vgPrbAli a" " where e.id = a.vgPrb" " and a.db = '%s'" " and a.status = 'new'" " and e.taxon = %d" " and e.type <> 'bac'" " and e.seq <> ''" " order by e.id" , db, taxon); //restore: rc = sqlSaveQuery(conn, dy->string, "blat.fa", TRUE); verbose(1,"rc = %d = count of sequences for blat, to get psls for taxon %d\n",rc,taxon); if (rc == 0) { unlink("blat.fa"); system("rm -f blatNearBest.psl; touch blatNearBest.psl"); /* make empty file */ return; } /* make .ooc and blat on kolossus */ safef(path1,sizeof(path1),"/gbdb/%s/%s.2bit",db,db); safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db); //restore: verbose(1,"copy: [%s] to [%s]\n",path1,path2); copyFile(path1,path2); safef(cmdLine,sizeof(cmdLine), "ssh kolossus 'cd %s; blat -makeOoc=11.ooc -tileSize=11" " -repMatch=1024 %s.2bit /dev/null /dev/null'", getCurrentDir(),db); //restore: system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date"); safef(cmdLine,sizeof(cmdLine), "ssh kolossus 'cd %s; blat %s.2bit blat.fa -ooc=11.ooc -noHead blat.psl'", getCurrentDir(),db); //restore: system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date"); /* using blat even with -fastMap was way too slow - took over a day, * so instead I will make a procedure to write a fake psl for the BACs * which you will see called below */ safef(path2,sizeof(path2),"%s.2bit",db); verbose(1,"rm %s\n",path2); unlink(path2); safef(path2,sizeof(path2),"11.ooc"); verbose(1,"rm %s\n",path2); unlink(path2); /* skip psl header and sort on query name */ safef(cmdLine,sizeof(cmdLine), "sort -k 10,10 blat.psl > blatS.psl"); verbose(1,"cmdLine=[%s]\n",cmdLine); system(cmdLine); /* keep near best within 5% of the best */ safef(cmdLine,sizeof(cmdLine), "pslCDnaFilter -globalNearBest=0.005 -minId=0.96 -minNonRepSize=20 -minCover=0.50" " blatS.psl blatNearBest.psl"); verbose(1,"cmdLine=[%s]\n",cmdLine); system(cmdLine); unlink("blat.fa"); unlink("blat.psl"); unlink("blatS.psl"); freez(&blatSpec); dyStringFree(&dy); }