struct dyString *lineFileSlurpHttpBody(struct lineFile *lf, boolean chunked, int contentLength) /* Return a dyString that contains the http response body in lf. Handle * chunk-encoding and content-length. */ { struct dyString *body = newDyString(64*1024); char *line; int lineSize; dyStringClear(body); if (chunked) { /* Handle "Transfer-Encoding: chunked" body */ /* Procedure from RFC2068 section 19.4.6 */ char *csword; unsigned chunkSize = 0; unsigned size; do { /* Read line that has chunk size (in hex) as first word. */ if (lineFileNext(lf, &line, NULL)) csword = nextWord(&line); else break; if (sscanf(csword, "%x", &chunkSize) < 1) { warn("%s: chunked transfer-encoding chunk size parse error.\n", lf->fileName); break; } /* If chunk size is 0, read in a blank line & then we're done. */ if (chunkSize == 0) { lineFileNext(lf, &line, NULL); if (line == NULL || (line[0] != '\r' && line[0] != 0)) warn("%s: chunked transfer-encoding: expected blank line, got %s\n", lf->fileName, line); break; } /* Read (and save) lines until we have read in chunk. */ for (size = 0; size < chunkSize; size += lineSize) { if (! lineFileNext(lf, &line, &lineSize)) break; dyStringAppendN(body, line, lineSize-1); dyStringAppendC(body, '\n'); } /* Read blank line - or extra CRLF inserted in the middle of the * current line, in which case we need to trim it. */ if (size > chunkSize) { body->stringSize -= (size - chunkSize); body->string[body->stringSize] = 0; } else if (size == chunkSize) { lineFileNext(lf, &line, NULL); if (line == NULL || (line[0] != '\r' && line[0] != 0)) warn("%s: chunked transfer-encoding: expected blank line, got %s\n", lf->fileName, line); } } while (chunkSize > 0); /* Try to read in next line. If it's an HTTP header, put it back. */ /* If there is a next line but it's not an HTTP header, it's a footer. */ if (lineFileNext(lf, &line, NULL)) { if (startsWith("HTTP/", line)) lineFileReuse(lf); else { /* Got a footer -- keep reading until blank line */ warn("%s: chunked transfer-encoding: got footer %s, discarding it.\n", lf->fileName, line); while (lineFileNext(lf, &line, NULL)) { if ((line[0] == '\r' && line[1] == 0) || line[0] == 0) break; warn("discarding footer line: %s\n", line); } } } } else if (contentLength >= 0) { /* Read in known length */ int size; for (size = 0; size < contentLength; size += lineSize) { if (! lineFileNext(lf, &line, &lineSize)) break; dyStringAppendN(body, line, lineSize-1); dyStringAppendC(body, '\n'); } } else { /* Read in to end of file (assume it's not a persistent connection) */ while (lineFileNext(lf, &line, &lineSize)) { dyStringAppendN(body, line, lineSize-1); dyStringAppendC(body, '\n'); } } return(body); } /* lineFileSlurpHttpBody */
void initStep(struct sqlConnection *conn, struct stepInit *init) /* Create step based on initializer */ { /* Do a little validation on while counting up inputs and outputs */ int inCount = commaSepCount(init->inputTypes); int matchCount = commaSepCount(init->inputFormats); if (inCount != matchCount) errAbort("inputTypes has %d elements but inputFormats has %d in step %s", inCount, matchCount, init->name); int outCount = commaSepCount(init->outputTypes); matchCount = commaSepCount(init->outputFormats); if (outCount != matchCount) errAbort("outputTypes has %d elements but outputFormats has %d in step %s", outCount, matchCount, init->name); matchCount = commaSepCount(init->outputNamesInTempDir); if (outCount != matchCount) errAbort("outputTypes has %d elements but outputNamesInTempDir has %d in step %s", outCount, matchCount, init->name); struct dyString *query = dyStringNew(0); dyStringPrintf(query, "select count(*) from eapStep where name='%s'", init->name); int existingCount = sqlQuickNum(conn, query->string); if (existingCount > 0) { warn("%s already exists in eapStep", init->name); dyStringFree(&query); return; } /* Parse out software part and make sure that all pieces are there. */ char **softwareArray; int softwareCount; sqlStringDynamicArray(init->software, &softwareArray, &softwareCount); unsigned softwareIds[softwareCount]; int i; for (i=0; i<softwareCount; ++i) { char *name = softwareArray[i]; dyStringClear(query); dyStringPrintf(query, "select id from eapSoftware where name='%s'", name); unsigned softwareId = sqlQuickNum(conn, query->string); if (softwareId == 0) errAbort("Software %s doesn't exist by that name in eapSoftware", name); softwareIds[i] = softwareId; } /* Make step record. */ dyStringClear(query); dyStringAppend(query, "insert eapStep (name,cpusRequested," " inCount,inputTypes,inputFormats," " outCount,outputNamesInTempDir,outputTypes,outputFormats)" " values ("); dyStringPrintf(query, "'%s',", init->name); dyStringPrintf(query, "%d,", init->cpusRequested); dyStringPrintf(query, "%d,", inCount); dyStringPrintf(query, "'%s',", init->inputTypes); dyStringPrintf(query, "'%s',", init->inputFormats); dyStringPrintf(query, "%d,", outCount); dyStringPrintf(query, "'%s',", init->outputNamesInTempDir); dyStringPrintf(query, "'%s',", init->outputTypes); dyStringPrintf(query, "'%s'", init->outputFormats); dyStringPrintf(query, ")"); sqlUpdate(conn, query->string); /* Make software/step associations. */ for (i=0; i<softwareCount; ++i) { dyStringClear(query); dyStringPrintf(query, "insert eapStepSoftware (step,software) values ('%s','%s')", init->name, softwareArray[i]); sqlUpdate(conn, query->string); } /* Force step version stuff to be made right away */ eapCurrentStepVersion(conn, init->name); /* Clean up. */ dyStringFree(&query); freez(&softwareArray[0]); freez(&softwareArray); }
void bioImageLoad(char *setRaFile, char *itemTabFile) /* bioImageLoad - Load data into bioImage database. */ { struct hash *raHash = raReadSingle(setRaFile); struct hash *rowHash; struct lineFile *lf = lineFileOpen(itemTabFile, TRUE); char *line, *words[256]; struct sqlConnection *conn = sqlConnect(database); int rowSize; int submissionSetId; struct hash *fullDirHash = newHash(0); struct hash *screenDirHash = newHash(0); struct hash *thumbDirHash = newHash(0); struct hash *treatmentHash = newHash(0); struct hash *bodyPartHash = newHash(0); struct hash *sliceTypeHash = newHash(0); struct hash *imageTypeHash = newHash(0); struct hash *sectionSetHash = newHash(0); struct dyString *dy = dyStringNew(0); /* Read first line of tab file, and from it get all the field names. */ if (!lineFileNext(lf, &line, NULL)) errAbort("%s appears to be empty", lf->fileName); if (line[0] != '#') errAbort("First line of %s needs to start with #, and then contain field names", lf->fileName); rowHash = hashRowOffsets(line+1); rowSize = rowHash->elCount; if (rowSize >= ArraySize(words)) errAbort("Too many fields in %s", lf->fileName); /* Check that have all required fields */ { char *fieldName; int i; for (i=0; i<ArraySize(requiredSetFields); ++i) { fieldName = requiredSetFields[i]; if (!hashLookup(raHash, fieldName)) errAbort("Field %s is not in %s", fieldName, setRaFile); } for (i=0; i<ArraySize(requiredItemFields); ++i) { fieldName = requiredItemFields[i]; if (!hashLookup(rowHash, fieldName)) errAbort("Field %s is not in %s", fieldName, itemTabFile); } for (i=0; i<ArraySize(requiredFields); ++i) { fieldName = requiredFields[i]; if (!hashLookup(rowHash, fieldName) && !hashLookup(raHash, fieldName)) errAbort("Field %s is not in %s or %s", fieldName, setRaFile, itemTabFile); } } /* Create/find submission record. */ submissionSetId = saveSubmissionSet(conn, raHash); /* Process rest of tab file. */ while (lineFileNextRowTab(lf, words, rowSize)) { int fullDir = cachedId(conn, "location", "name", fullDirHash, "fullDir", raHash, rowHash, words); int screenDir = cachedId(conn, "location", "name", screenDirHash, "screenDir", raHash, rowHash, words); int thumbDir = cachedId(conn, "location", "name", thumbDirHash, "thumbDir", raHash, rowHash, words); int bodyPart = cachedId(conn, "bodyPart", "name", bodyPartHash, "bodyPart", raHash, rowHash, words); int sliceType = cachedId(conn, "sliceType", "name", sliceTypeHash, "sliceType", raHash, rowHash, words); int imageType = cachedId(conn, "imageType", "name", imageTypeHash, "imageType", raHash, rowHash, words); int treatment = cachedId(conn, "treatment", "conditions", treatmentHash, "treatment", raHash, rowHash, words); char *fileName = getVal("fileName", raHash, rowHash, words, NULL); char *submitId = getVal("submitId", raHash, rowHash, words, NULL); char *taxon = getVal("taxon", raHash, rowHash, words, NULL); char *isEmbryo = getVal("isEmbryo", raHash, rowHash, words, NULL); char *age = getVal("age", raHash, rowHash, words, NULL); char *sectionSet = getVal("sectionSet", raHash, rowHash, words, ""); char *sectionIx = getVal("sectionIx", raHash, rowHash, words, "0"); char *gene = getVal("gene", raHash, rowHash, words, ""); char *locusLink = getVal("locusLink", raHash, rowHash, words, ""); char *refSeq = getVal("refSeq", raHash, rowHash, words, ""); char *genbank = getVal("genbank", raHash, rowHash, words, ""); char *priority = getVal("priority", raHash, rowHash, words, "200"); int sectionId = 0; int oldId; // char *xzy = getVal("xzy", raHash, rowHash, words, xzy); if (sectionSet[0] != 0 && !sameString(sectionSet, "0")) { struct hashEl *hel = hashLookup(sectionSetHash, sectionSet); if (hel != NULL) sectionId = ptToInt(hel->val); else { sqlUpdate(conn, "insert into sectionSet values(default)"); sectionId = sqlLastAutoId(conn); hashAdd(sectionSetHash, sectionSet, intToPt(sectionId)); } } dyStringClear(dy); dyStringAppend(dy, "select id from image "); dyStringPrintf(dy, "where fileName = '%s' ", fileName); dyStringPrintf(dy, "and fullLocation = %d", fullDir); oldId = sqlQuickNum(conn, dy->string); if (oldId != 0) { if (replace) { dyStringClear(dy); dyStringPrintf(dy, "delete from image where id = %d", oldId); sqlUpdate(conn, dy->string); } else errAbort("%s is already in database line %d of %s", fileName, lf->lineIx, lf->fileName); } dyStringClear(dy); dyStringAppend(dy, "insert into image set\n"); dyStringPrintf(dy, " id = default,\n"); dyStringPrintf(dy, " fileName = '%s',\n", fileName); dyStringPrintf(dy, " fullLocation = %d,\n", fullDir); dyStringPrintf(dy, " screenLocation = %d,\n", screenDir); dyStringPrintf(dy, " thumbLocation = %d,\n", thumbDir); dyStringPrintf(dy, " submissionSet = %d,\n", submissionSetId); dyStringPrintf(dy, " sectionSet = %d,\n", sectionId); dyStringPrintf(dy, " sectionIx = %s,\n", sectionIx); dyStringPrintf(dy, " submitId = '%s',\n", submitId); dyStringPrintf(dy, " gene = '%s',\n", gene); dyStringPrintf(dy, " locusLink = '%s',\n", locusLink); dyStringPrintf(dy, " refSeq = '%s',\n", refSeq); dyStringPrintf(dy, " genbank = '%s',\n", genbank); dyStringPrintf(dy, " priority = %s,\n", priority); dyStringPrintf(dy, " taxon = %s,\n", taxon); dyStringPrintf(dy, " isEmbryo = %s,\n", isEmbryo); dyStringPrintf(dy, " age = %s,\n", age); dyStringPrintf(dy, " bodyPart = %d,\n", bodyPart); dyStringPrintf(dy, " sliceType = %d,\n", sliceType); dyStringPrintf(dy, " imageType = %d,\n", imageType); dyStringPrintf(dy, " treatment = %d\n", treatment); sqlUpdate(conn, dy->string); } }
boolean xpParseNext(struct xp *xp, char *tag) /* Skip through file until get given tag. Then parse out the * tag and all of it's children (calling atStartTag/atEndTag). * You can call this repeatedly to process all of a given tag * in file. */ { char c; int i, attCount = 0; struct dyString *text = NULL; boolean isClosed; boolean inside = (tag == NULL); struct xpStack *initialStack = xp->stack; for (;;) { /* Load up text until next tag. */ for (;;) { if ((c = xpGetChar(xp)) == 0) return FALSE; if (c == '<') break; if (c == '&') xpLookup(xp, xp->endTag, text); else { if (c == '\n') ++xp->lineIx; if (text != NULL) dyStringAppendC(text, c); } } /* Get next character to figure out what type of tag. */ c = xpGetChar(xp); if (c == 0) xpError(xp, "End of file inside tag"); else if (c == '?' || c == '!') xpEatComment(xp, c); else if (c == '/') /* Closing tag. */ { struct xpStack *stack = xp->stack; if (stack >= xp->stackBufEnd) xpError(xp, "Extra end tag"); xpParseEndTag(xp, stack->tag->string); if (inside) xp->atEndTag(xp->userData, stack->tag->string, stack->text->string); xp->stack += 1; if (xp->stack == initialStack) return TRUE; } else /* Start tag. */ { /* Push new frame on stack and check for overflow and unallocated strings. */ struct xpStack *stack = --xp->stack; if (stack < xp->stackBuf) xpError(xp, "Stack overflow"); if (stack->tag == NULL) stack->tag = newDyString(32); else dyStringClear(stack->tag); if (stack->text == NULL) stack->text = newDyString(256); else dyStringClear(stack->text); text = stack->text; /* Parse the start tag. */ xpUngetChar(xp); xpParseStartTag(xp, ArraySize(xp->attDyBuf), stack->tag, &attCount, xp->attDyBuf, &isClosed); if (!inside && sameString(stack->tag->string, tag)) { inside = TRUE; initialStack = xp->stack + 1; } /* Call user start function, and if closed tag, end function too. */ if (inside) { /* Unpack attributes into simple array of strings. */ for (i=0; i<attCount; ++i) xp->attBuf[i] = xp->attDyBuf[i]->string; xp->attBuf[attCount] = NULL; xp->atStartTag(xp->userData, stack->tag->string, xp->attBuf); } if (isClosed) { if (inside) xp->atEndTag(xp->userData, stack->tag->string, stack->text->string); xp->stack += 1; if (xp->stack == initialStack) return TRUE; } } } }
struct tagStorm *idfToStormTop(char *fileName) /* Convert an idf.txt format file to a tagStorm with a single top-level stanza */ { /* Create a tag storm with one as yet empty stanza */ struct tagStorm *storm = tagStormNew(fileName); struct tagStanza *stanza = tagStanzaNew(storm, NULL); /* Some stuff to help turn File_Data1, File_Data2, etc to a comma separated list */ char *additionalFilePrefix = "idf.Comment_AdditionalFile_Data"; struct dyString *additionalFileDy = dyStringNew(0); /* There can be multiple secondary accession tags, so handle these too */ char *secondaryAccessionTag = "idf.Comment_SecondaryAccession"; struct dyString *secondaryAccessionDy = dyStringNew(0); /* Parse lines from idf file into stanza */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; struct dyString *dyVal = dyStringNew(0); while (lineFileNextReal(lf, &line)) { /* Erase trailing tab... */ eraseTrailingSpaces(line); /* Parse line into tab-separated array and make sure it's a reasonable size */ char *row[256]; int rowSize = chopTabs(line, row); if (rowSize == ArraySize(row)) errAbort("Line %d of %s has too many fields", lf->lineIx, lf->fileName); if (rowSize < 2) continue; /* Convert first element to tagName */ char tagName[256]; aeFieldToNormalField("idf.", trimSpaces(row[0]), tagName, sizeof(tagName)); /* Special case where we already are a comma separated list */ if (sameString(tagName, "idf.Publication_Author_List")) { tagStanzaAppend(storm, stanza, tagName, row[1]); } else if (startsWith(additionalFilePrefix, tagName)) { csvEscapeAndAppend(additionalFileDy, row[1]); } else if (sameString(secondaryAccessionTag, tagName)) { csvEscapeAndAppend(secondaryAccessionDy, row[1]); } else { /* Convert rest of elements to possibly comma separated values */ dyStringClear(dyVal); int i; for (i=1; i<rowSize; ++i) csvEscapeAndAppend(dyVal, row[i]); tagStanzaAppend(storm, stanza, tagName, dyVal->string); } } if (additionalFileDy->stringSize != 0) tagStanzaAppend(storm, stanza, additionalFilePrefix, additionalFileDy->string); if (secondaryAccessionDy->stringSize != 0) tagStanzaAppend(storm, stanza, secondaryAccessionTag, secondaryAccessionDy->string); dyStringFree(&secondaryAccessionDy); dyStringFree(&additionalFileDy); dyStringFree(&dyVal); lineFileClose(&lf); return storm; }
static int doBacs(struct sqlConnection *conn, int taxon, char *db) /* fetch available sequence for bacEndPairs */ { struct dyString *dy = dyStringNew(0); struct dnaSeq *chromSeq = NULL; struct bac *bacs = bacRead(conn, taxon, db); struct bac *bac = NULL; char *chrom = cloneString(""); int count = 0; verbose(1,"bac list read done.\n"); for(bac=bacs;bac;bac=bac->next) { if (differentWord(chrom,bac->chrom)) { verbose(1,"switching to chrom %s\n",bac->chrom); dnaSeqFree(&chromSeq); chromSeq = hLoadChrom(bac->chrom,db); freez(&chrom); chrom = cloneString(bac->chrom); } char *dna = checkAndFetchBacDna(chromSeq, bac); if (sameString(bac->strand,"-")) { reverseComplement(dna,strlen(dna)); } dyStringClear(dy); dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",bac->probe); if (sqlQuickNum(conn,dy->string)>0) { /* record exists and hasn't already been updated */ int vgPrb = findVgPrbBySeq(conn,dna,taxon); if (vgPrb == 0) { dyStringClear(dy); dyStringAppend(dy, "update vgPrb set"); dyStringAppend(dy, " seq='"); dyStringAppend(dy, dna); dyStringAppend(dy, "',\n"); dyStringPrintf(dy, " tName='%s',\n", bac->chrom); dyStringPrintf(dy, " tStart=%d,\n", bac->chromStart); dyStringPrintf(dy, " tEnd=%d,\n", bac->chromEnd); dyStringPrintf(dy, " tStrand='%s',\n", bac->strand); dyStringPrintf(dy, " db='%s',\n", db); dyStringPrintf(dy, " state='%s'\n", "seq"); dyStringPrintf(dy, " where id=%d\n", bac->probe); dyStringPrintf(dy, " and state='%s'\n", "new"); //verbose(2, "%s\n", dy->string); // the sql string could be quite large sqlUpdate(conn, dy->string); } else /* probe seq already exists */ { /* just re-map the probe table recs to it */ dyStringClear(dy); dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,bac->probe); sqlUpdate(conn, dy->string); /* and delete it from vgPrb */ dyStringClear(dy); dyStringPrintf(dy, "delete from vgPrb where id=%d",bac->probe); sqlUpdate(conn, dy->string); } ++count; verbose(2,"%d finished bac for probe id %d size %d\n", count, bac->probe, bac->chromEnd - bac->chromStart); } freez(&dna); } freez(&chrom); dnaSeqFree(&chromSeq); bacFreeList(&bacs); dyStringFree(&dy); return count; }
static void processMrnaFa(struct sqlConnection *conn, int taxon, char *type, char *db) /* process isPcr results */ { struct dyString *dy = dyStringNew(0); struct lineFile *lf = lineFileOpen("mrna.fa", TRUE); int lineSize; char *line; char *name; char *dna; boolean more = lineFileNext(lf, &line, &lineSize); while(more) { if (line[0] != '>') errAbort("unexpected error out of phase\n"); name = cloneString(line+1); verbose(2,"name=%s\n",name); dyStringClear(dy); while((more=lineFileNext(lf, &line, &lineSize))) { if (line[0] == '>') { break; } dyStringAppend(dy,line); } dna = cloneString(dy->string); while(1) { int oldProbe = 0; dyStringClear(dy); dyStringPrintf(dy, "select id from vgPrb " "where taxon=%d and type='%s' and tName='%s' and state='new'",taxon,type,name); oldProbe = sqlQuickNum(conn,dy->string); if (oldProbe==0) break; /* no more records match */ /* record exists and hasn't already been updated */ int vgPrb = findVgPrbBySeq(conn,dna,taxon); if (vgPrb == 0) { dyStringClear(dy); dyStringAppend(dy, "update vgPrb set"); dyStringAppend(dy, " seq = '"); dyStringAppend(dy, dna); dyStringAppend(dy, "',\n"); dyStringPrintf(dy, " db = '%s',\n", db); dyStringAppend(dy, " state = 'seq'\n"); dyStringPrintf(dy, " where id=%d\n", oldProbe); dyStringPrintf(dy, " and state='%s'\n", "new"); verbose(2, "%s\n", dy->string); sqlUpdate(conn, dy->string); } else /* probe seq already exists */ { /* just re-map the probe table recs to it */ dyStringClear(dy); dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,oldProbe); sqlUpdate(conn, dy->string); /* and delete it from vgPrb */ dyStringClear(dy); dyStringPrintf(dy, "delete from vgPrb where id=%d",oldProbe); sqlUpdate(conn, dy->string); } } freez(&name); freez(&dna); } lineFileClose(&lf); dyStringFree(&dy); }
void paraFlow(char *fileName, int pfArgc, char **pfArgv) /* parse and dump. */ { struct pfCompile *pfc; struct pfParse *program, *module; char baseDir[256], baseName[128], baseSuffix[64]; char defFile[PATH_LEN]; char *parseFile = "out.parse"; char *typeFile = "out.typed"; char *boundFile = "out.bound"; char *scopeFile = "out.scope"; char *foldedFile = "out.folded"; char *cFile = "out.c"; FILE *parseF = mustOpen(parseFile, "w"); FILE *typeF = mustOpen(typeFile, "w"); FILE *scopeF = mustOpen(scopeFile, "w"); FILE *boundF = mustOpen(boundFile, "w"); FILE *foldedF = mustOpen(foldedFile, "w"); if (endPhase < 0) return; verbose(2, "Phase 0 - initialization\n"); pfc = pfCompileNew(); getPaths(pfc); splitPath(fileName, baseDir, baseName, baseSuffix); pfc->baseDir = cloneString(baseDir); safef(defFile, sizeof(defFile), "%s%s.pfh", baseDir, baseName); if (endPhase < 1) return ; verbose(2, "Phase 1 - tokenizing\n"); pfTokenizeInto(pfc, baseDir, baseName); if (endPhase < 2) return; verbose(2, "Phase 2 - parsing\n"); program = pfParseInto(pfc); dumpParseTree(pfc, program, parseF); carefulClose(&parseF); if (endPhase < 3) return; verbose(2, "Phase 3 - binding names\n"); pfBindVars(pfc, program); dumpParseTree(pfc, program, boundF); carefulClose(&boundF); if (endPhase < 4) return; verbose(2, "Phase 4 - type checking\n"); pfTypeCheck(pfc, &program); dumpParseTree(pfc, program, typeF); carefulClose(&typeF); if (endPhase < 5) return; verbose(2, "Phase 5 - polymorphic, para, and flow checks\n"); checkPolymorphic(pfc, pfc->scopeRefList); checkParaFlow(pfc, program); printScopeInfo(scopeF, 0, program); carefulClose(&scopeF); if (endPhase < 6) return; verbose(2, "Phase 6 - constant folding\n"); pfConstFold(pfc, program); dumpParseTree(pfc, program, foldedF); if (optionExists("asm")) { struct dyString *gccFiles; if (endPhase < 7) return; verbose(2, "Phase 7 - nothing\n"); if (endPhase < 8) return; verbose(2, "Phase 8 - Code generation\n"); pfc->backEnd = backEndFind("mac-pentium"); gccFiles = asmCoder(pfc, program, baseDir, baseName); if (endPhase < 9) return; verbose(2, "Phase 9 - Assembling pentium code\n"); { char *libName = hashMustFindVal(pfc->cfgHash,"runAsmLib"); struct dyString *dy = dyStringNew(0); int err; dyStringPrintf(dy, "gcc "); dyStringPrintf(dy, "-I %s ", pfc->cIncludeDir); dyStringPrintf(dy, "-o %s%s ", baseDir, baseName); dyStringAppend(dy, gccFiles->string); dyStringPrintf(dy, "%s ", libName); dyStringPrintf(dy, " %s ", pfc->runtimeLib); dyStringPrintf(dy, "%s ", pfc->jkwebLib); verbose(2, "%s\n", dy->string); err = system(dy->string); if (err != 0) errAbort("Couldn't assemble: %s", dy->string); dyStringFree(&dy); } dyStringFree(&gccFiles); } else { verbose(2, "Phase 7 - nothing\n"); if (endPhase < 8) return; verbose(2, "Phase 8 - C code generation\n"); pfCodeC(pfc, program, baseDir, cFile); verbose(2, "%d modules, %d tokens, %d parseNodes\n", pfc->moduleHash->elCount, pfc->tkz->tokenCount, pfParseCount(program)); if (endPhase < 9) return; verbose(2, "Phase 9 - compiling C code\n"); /* Now run gcc on it. */ { struct dyString *dy = dyStringNew(0); int err; for (module = program->children; module != NULL; module = module->next) { if (module->name[0] != '<' && module->type != pptModuleRef) { struct pfModule *mod = hashMustFindVal(pfc->moduleHash, module->name); char *cName = replaceSuffix(mod->fileName, ".pf", ".c"); char *oName = replaceSuffix(mod->fileName, ".pf", ".o"); dyStringClear(dy); dyStringAppend(dy, "gcc "); dyStringAppend(dy, "-O "); dyStringPrintf(dy, "-I %s ", pfc->cIncludeDir); dyStringAppend(dy, "-c "); dyStringAppend(dy, "-o "); dyStringPrintf(dy, "%s ", oName); dyStringPrintf(dy, "%s ", cName); verbose(2, "%s\n", dy->string); err = system(dy->string); if (err != 0) errAbort("Couldn't compile %s.c", module->name); freeMem(oName); freeMem(cName); } } dyStringClear(dy); dyStringAppend(dy, "gcc "); dyStringAppend(dy, "-O "); dyStringPrintf(dy, "-I %s ", pfc->cIncludeDir); dyStringPrintf(dy, "-o %s%s ", baseDir, baseName); dyStringPrintf(dy, "%s ", cFile); for (module = program->children; module != NULL; module = module->next) { if (module->name[0] != '<') { struct pfModule *mod = hashMustFindVal(pfc->moduleHash, module->name); char *suffix = (module->type == pptModuleRef ? ".pfh" : ".pf"); char *oName = replaceSuffix(mod->fileName, suffix, ".o"); dyStringPrintf(dy, "%s ", oName); freeMem(oName); } } dyStringPrintf(dy, " %s ", pfc->runtimeLib); dyStringPrintf(dy, "%s ", pfc->jkwebLib); dyStringAppend(dy, "-lpthread -lm"); verbose(2, "%s\n", dy->string); err = system(dy->string); if (err != 0) errnoAbort("problem compiling:\n", dy->string); dyStringFree(&dy); } } if (endPhase < 10) return; verbose(2, "Phase 10 - execution\n"); /* Now go run program itself. */ { struct dyString *dy = dyStringNew(0); int err; int i; if (baseDir[0] == 0) dyStringPrintf(dy, "./%s", baseName); else dyStringPrintf(dy, "%s%s", baseDir, baseName); for (i=0; i<pfArgc; ++i) { dyStringAppendC(dy, ' '); dyStringAppend(dy, pfArgv[i]); } err = system(dy->string); if (err != 0) errAbort("problem running %s", baseName); dyStringFree(&dy); } }
struct submitFileRow *submitFileRowFromFieldedTable( struct sqlConnection *conn, struct fieldedTable *table, int fileIx, int md5Ix, int sizeIx, int modifiedIx, int replacesIx, int replaceReasonIx) /* Turn parsed out table (still all just strings) into list of edwFiles. */ { struct submitFileRow *sfr, *sfrList = NULL; struct edwFile *bf; struct fieldedRow *fr; struct dyString *tags = dyStringNew(0); char *ucscDbTag = "ucsc_db"; int ucscDbField = stringArrayIx(ucscDbTag, table->fields, table->fieldCount); for (fr = table->rowList; fr != NULL; fr = fr->next) { char **row = fr->row; AllocVar(bf); bf->submitFileName = cloneString(row[fileIx]); safef(bf->md5, sizeof(bf->md5), "%s", row[md5Ix]); bf->size = sqlLongLong(row[sizeIx]); bf->updateTime = sqlLongLong(row[modifiedIx]); /* Add as tags any fields not included in fixed fields. */ dyStringClear(tags); int i; for (i=0; i<table->fieldCount; ++i) { if (i != fileIx && i != md5Ix && i != sizeIx && i != modifiedIx) { cgiEncodeIntoDy(table->fields[i], row[i], tags); } } if (ucscDbField < 0) { /* Try to make this field up from file name */ char *slash = strchr(bf->submitFileName, '/'); if (slash == NULL) errAbort("Can't make up '%s' field from '%s'", ucscDbTag, bf->submitFileName); int len = slash - bf->submitFileName; char ucscDbVal[len+1]; memcpy(ucscDbVal, bf->submitFileName, len); ucscDbVal[len] = 0; /* Do a little check on it */ if (!sameString("mm9", ucscDbVal) && !sameString("mm10", ucscDbVal) && !sameString("dm3", ucscDbVal) && !sameString("ce10", ucscDbVal) && !sameString("hg19", ucscDbVal)) errAbort("Unrecognized ucsc_db %s - please arrange files so that the top " "level directory in the fileName in the manifest is a UCSC database name " "like 'hg19' or 'mm10.' Alternatively please include a ucsc_db column.", ucscDbVal); /* Add it to tags. */ cgiEncodeIntoDy(ucscDbTag, ucscDbVal, tags); } bf->tags = cloneString(tags->string); /* Fake other fields. */ bf->edwFileName = cloneString(""); /* Allocate wrapper structure */ AllocVar(sfr); sfr->file = bf; /* fill in fields about replacement maybe */ if (replacesIx != -1) { char *replacesAcc = row[replacesIx]; char *reason = row[replaceReasonIx]; int fileId = edwFileIdForLicensePlate(conn, replacesAcc); if (fileId == 0) errAbort("%s in %s column doesn't exist in warehouse", replacesAcc, replacesTag); sfr->replaces = cloneString(replacesAcc); sfr->replaceReason = cloneString(reason); sfr->replacesFile = fileId; } slAddHead(&sfrList, sfr); } slReverse(&sfrList); dyStringFree(&tags); return sfrList; }
static struct slName *getProbeList(struct sqlConnection *conn, int id) /* Get list of probes with hyperlinks to probe info page. */ { struct slName *returnList = NULL; char query[256]; char *sidUrl = cartSidUrlString(cart); struct dyString *dy = dyStringNew(0); struct slInt *probeList = NULL, *probe; int submissionSource = 0; /* Make up a list of all probes in this image. */ safef(query, sizeof(query), "select probe from imageProbe where image=%d", id); probeList = sqlQuickNumList(conn, query); safef(query, sizeof(query), "select submissionSet.submissionSource from image, submissionSet" " where image.submissionSet = submissionSet.id and image.id=%d", id); submissionSource = sqlQuickNum(conn, query); for (probe = probeList; probe != NULL; probe = probe->next) { char *type; /* Create hyperlink to probe page around gene name. */ dyStringClear(dy); dyStringPrintf(dy, "<A HREF=\"%s?%s&%s=%d&%s=%d\" target=_parent>", hgVisiGeneCgiName(), sidUrl, hgpDoProbe, probe->val, hgpSs, submissionSource); safef(query, sizeof(query), "select probeType.name from probeType,probe where probe.id = %d " "and probe.probeType = probeType.id", probe->val); type = sqlQuickString(conn, query); dyStringPrintf(dy, "%s", naForEmpty(type)); if (sameWord(type, "antibody")) { char *abName; safef(query, sizeof(query), "select antibody.name from probe,antibody " "where probe.id = %d and probe.antibody = antibody.id" , probe->val); abName = sqlQuickString(conn, query); if (abName != NULL) { dyStringPrintf(dy, " %s", abName); freeMem(abName); } } else if (sameWord(type, "RNA")) { safef(query, sizeof(query), "select length(seq) from probe where id=%d", probe->val); if (sqlQuickNum(conn, query) > 0) dyStringPrintf(dy, " sequenced"); else { safef(query, sizeof(query), "select length(fPrimer) from probe where id=%d", probe->val); if (sqlQuickNum(conn, query) > 0) dyStringPrintf(dy, " from primers"); } } else if (sameWord(type, "BAC")) { char *name; safef(query, sizeof(query), "select bac.name from probe,bac " "where probe.id = %d and probe.bac = bac.id" , probe->val); name = sqlQuickString(conn, query); if (name != NULL) { dyStringPrintf(dy, " %s", name); freeMem(name); } } dyStringPrintf(dy, "</A>"); freez(&type); /* Add to return list. */ slNameAddTail(&returnList, dy->string); } slFreeList(&probeList); slReverse(&returnList); return returnList; }
char *getKnownGeneUrl(struct sqlConnection *conn, int geneId) /* Given gene ID, try and find known gene on browser in same * species. */ { char query[256]; int taxon; char *url = NULL; char *genomeDb = NULL; /* Figure out taxon. */ safef(query, sizeof(query), "select taxon from gene where id = %d", geneId); taxon = sqlQuickNum(conn, query); genomeDb = hDbForTaxon(conn, taxon); if (genomeDb != NULL) { /* Make sure known genes track exists - we may need * to tweak this at some point for model organisms. */ safef(query, sizeof(query), "%s.knownToVisiGene", genomeDb); if (!sqlTableExists(conn, query)) genomeDb = NULL; } /* If no db for that organism revert to human. */ if (genomeDb == NULL) genomeDb = hDefaultDb(); safef(query, sizeof(query), "%s.knownToVisiGene", genomeDb); if (sqlTableExists(conn, query)) { struct dyString *dy = dyStringNew(0); char *knownGene = NULL; if (sqlCountColumnsInTable(conn, query) == 3) { dyStringPrintf(dy, "select name from %s.knownToVisiGene where geneId = %d", genomeDb, geneId); } else { struct slName *imageList, *image; safef(query, sizeof(query), "select imageProbe.image from probe,imageProbe " "where probe.gene=%d and imageProbe.probe=probe.id", geneId); imageList = sqlQuickList(conn, query); if (imageList != NULL) { dyStringPrintf(dy, "select name from %s.knownToVisiGene ", genomeDb); dyStringAppend(dy, "where value in("); for (image = imageList; image != NULL; image = image->next) { dyStringPrintf(dy, "'%s'", image->name); if (image->next != NULL) dyStringAppendC(dy, ','); } dyStringAppend(dy, ")"); slFreeList(&imageList); } } if (dy->stringSize > 0) { knownGene = sqlQuickString(conn, dy->string); if (knownGene != NULL) { dyStringClear(dy); dyStringPrintf(dy, "../cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_chrom=none", genomeDb, knownGene); url = dyStringCannibalize(&dy); } } dyStringFree(&dy); } freez(&genomeDb); return url; }
static struct slName *geneProbeList(struct sqlConnection *conn, int id) /* Get list of gene names with hyperlinks to probe info page. */ { struct slName *returnList = NULL; char query[256], **row; struct sqlResult *sr; struct dyString *dy = dyStringNew(0); struct probeAndColor *pcList = NULL, *pc; int probeCount = 0; /* Make up a list of all probes in this image. */ safef(query, sizeof(query), "select probe,probeColor from imageProbe where image=%d", id); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { AllocVar(pc); pc->probe = sqlUnsigned(row[0]); pc->probeColor = sqlUnsigned(row[1]); slAddHead(&pcList, pc); ++probeCount; } slReverse(&pcList); for (pc = pcList; pc != NULL; pc = pc->next) { int geneId; char *geneName; int probe = pc->probe; char *geneUrl = NULL; /* Get gene ID and name. */ safef(query, sizeof(query), "select gene from probe where id = %d", probe); geneId = sqlQuickNum(conn, query); geneName = vgGeneNameFromId(conn, geneId); /* Get url for known genes page if any. */ geneUrl = getKnownGeneUrl(conn, geneId); /* Print gene name, surrounded by hyperlink to known genes * page if possible. */ dyStringClear(dy); if (geneUrl != NULL) dyStringPrintf(dy, "<A HREF=\"%s\" target=_parent>", geneUrl); dyStringPrintf(dy, "%s", geneName); if (geneUrl != NULL) dyStringAppend(dy, "</A>"); freez(&geneName); /* Add color if there's more than one probe for this image. */ if (probeCount > 1) { char *color; safef(query, sizeof(query), "select probeColor.name from probeColor " "where probeColor.id = %d" , pc->probeColor); color = sqlQuickString(conn, query); if (color != NULL) dyStringPrintf(dy, " (%s)", color); freez(&color); } /* Add to return list. */ slNameAddTail(&returnList, dy->string); } slFreeList(&pcList); slReverse(&returnList); return returnList; }
boolean lineFileParseHttpHeader(struct lineFile *lf, char **hdr, boolean *chunked, int *contentLength) /* Extract HTTP response header from lf into hdr, tell if it's * "Transfer-Encoding: chunked" or if it has a contentLength. */ { struct dyString *header = newDyString(1024); char *line; int lineSize; if (chunked != NULL) *chunked = FALSE; if (contentLength != NULL) *contentLength = -1; dyStringClear(header); if (lineFileNext(lf, &line, &lineSize)) { if (startsWith("HTTP/", line)) { char *version, *code; dyStringAppendN(header, line, lineSize-1); dyStringAppendC(header, '\n'); version = nextWord(&line); code = nextWord(&line); if (code == NULL) { warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string); *hdr = cloneString(header->string); dyStringFree(&header); return FALSE; } if (!sameString(code, "200")) { warn("%s: Errored HTTP response header: %s %s %s\n", lf->fileName, version, code, line); *hdr = cloneString(header->string); dyStringFree(&header); return FALSE; } while (lineFileNext(lf, &line, &lineSize)) { /* blank line means end of HTTP header */ if ((line[0] == '\r' && line[1] == 0) || line[0] == 0) break; if (strstr(line, "Transfer-Encoding: chunked") && chunked != NULL) *chunked = TRUE; dyStringAppendN(header, line, lineSize-1); dyStringAppendC(header, '\n'); if (strstr(line, "Content-Length:")) { code = nextWord(&line); code = nextWord(&line); if (contentLength != NULL) *contentLength = atoi(code); } } } else { /* put the line back, don't put it in header/hdr */ lineFileReuse(lf); warn("%s: Expecting HTTP/<version> <code> header line, got this: %s\n", lf->fileName, header->string); *hdr = cloneString(header->string); dyStringFree(&header); return FALSE; } } else { *hdr = cloneString(header->string); dyStringFree(&header); return FALSE; } *hdr = cloneString(header->string); dyStringFree(&header); return TRUE; } /* lineFileParseHttpHeader */
boolean lineFileNextFull(struct lineFile *lf, char **retFull, int *retFullSize, char **retRaw, int *retRawSize) // Fetch next line from file joining up any that are continued by ending '\' // If requested, and was joined, the unjoined raw lines are also returned // NOTE: comment lines can't be continued! ("# comment \ \n more comment" is 2 lines.) { // May have requested reusing the last full line. if (lf->fullLineReuse) { lf->fullLineReuse = FALSE; assert(lf->fullLine != NULL); *retFull = dyStringContents(lf->fullLine); if (retFullSize) *retFullSize = dyStringLen(lf->fullLine); if (retRaw != NULL) { assert(lf->rawLines != NULL); *retRaw = dyStringContents(lf->rawLines); if (retRawSize) *retRawSize = dyStringLen(lf->rawLines); } return TRUE; } // Empty pointers *retFull = NULL; if (retRaw != NULL) *retRaw = NULL; // Prepare lf buffers if (lf->fullLine == NULL) { lf->fullLine = dyStringNew(1024); lf->rawLines = dyStringNew(1024); // Better to always create it than test every time } else { dyStringClear(lf->fullLine); dyStringClear(lf->rawLines); } char *line; while (lineFileNext(lf, &line, NULL)) { char *start = skipLeadingSpaces(line); // Will the next line continue this one? char *end = start; if (*start == '#') // Comment lines can't be continued! end = start + strlen(start); else { while (*end != '\0') // walking forward for efficiency (avoid strlens()) { for (;*end != '\0' && *end != '\\'; end++) ; // Tight loop to find '\' if (*end == '\0') break; // This could be a continuation char *slash = end; if (*(++end) == '\\') // escaped continue; end = skipLeadingSpaces(end); if (*end == '\0') // Just whitespace after '\', so true continuation mark { if (retRaw != NULL) // Only if actually requested. { dyStringAppendN(lf->rawLines,line,(end - line)); dyStringAppendC(lf->rawLines,'\n'); // New lines delimit raw lines. } end = slash; // Don't need to zero, because of appending by length break; } } } // Stitch together full lines if (dyStringLen(lf->fullLine) == 0) dyStringAppendN(lf->fullLine,line,(end - line)); // includes first line's whitespace else if (start < end) // don't include continued line's leading spaces dyStringAppendN(lf->fullLine,start,(end - start)); if (*end == '\\') continue; // Got a full line now! *retFull = dyStringContents(lf->fullLine); if (retFullSize) *retFullSize = dyStringLen(lf->fullLine); if (retRaw != NULL && dyStringLen(lf->rawLines) > 0) // Only if actually requested & continued { // This is the final line which doesn't have a continuation char dyStringAppendN(lf->rawLines,line,(end - line)); *retRaw = dyStringContents(lf->rawLines); if (retRawSize) *retRawSize = dyStringLen(lf->rawLines); } return TRUE; } return FALSE; }
static void populateMissingVgPrb(struct sqlConnection *conn) /* populate vgPrb where missing, usually after new records added to visiGene */ { struct sqlResult *sr; char **row; struct dyString *dy = dyStringNew(0); struct sqlConnection *conn2 = sqlConnect(database); struct sqlConnection *conn3 = sqlConnect(database); int probeCount=0, vgPrbCount=0; dyStringAppend(dy, "select p.id,p.gene,antibody,probeType,fPrimer,rPrimer,p.seq,bac,g.taxon" " from probe p join gene g" " left join vgPrbMap m on m.probe = p.id" " where g.id = p.gene" " and m.probe is NULL"); sr = sqlGetResult(conn, dy->string); while ((row = sqlNextRow(sr)) != NULL) { int id = sqlUnsigned(row[0]); /* int gene = sqlUnsigned(row[1]); */ /* int antibody = sqlUnsigned(row[2]); */ /* int probeType = sqlUnsigned(row[3]); */ char *fPrimer = row[4]; char *rPrimer = row[5]; char *seq = row[6]; int bac = sqlUnsigned(row[7]); int taxon = sqlUnsigned(row[8]); char *peType = "none"; int peProbe = id; char *peSeq = seq; char *tName = ""; int tStart = 0; int tEnd = 0; char *tStrand = " "; /* char *peGene = ""; int bacInfo = 0; int seqid = 0; int pslid = 0; */ char *state = "new"; char *db = ""; int vgPrb = 0; if (isNotEmpty(seq)) { peType = "probe"; state = "seq"; } else if (isNotEmpty(fPrimer) && isNotEmpty(rPrimer)) { peType = "primersMrna"; } else if (isNotEmpty(fPrimer) && isEmpty(rPrimer)) { /* only have fPrimer, it's probably a comment, not dna seq */ peType = "refSeq"; /* use accession or gene */ } else if (bac > 0) { peType = "bac"; /* use bacEndPairs */ } else { peType = "refSeq"; /* use accession or gene */ } if (!sameString(peSeq,"")) { vgPrb = findVgPrbBySeq(conn3,peSeq,taxon); } if (vgPrb == 0) { dyStringClear(dy); dyStringAppend(dy, "insert into vgPrb set"); dyStringPrintf(dy, " id=default,\n"); dyStringPrintf(dy, " type='%s',\n", peType); dyStringAppend(dy, " seq='"); dyStringAppend(dy, peSeq); dyStringAppend(dy, "',\n"); dyStringPrintf(dy, " tName='%s',\n", tName); dyStringPrintf(dy, " tStart=%d,\n", tStart); dyStringPrintf(dy, " tEnd=%d,\n", tEnd); dyStringPrintf(dy, " tStrand='%s',\n", tStrand); dyStringPrintf(dy, " db='%s',\n", db); dyStringPrintf(dy, " taxon='%d',\n", taxon); dyStringPrintf(dy, " state='%s'\n", state); verbose(2, "%s\n", dy->string); sqlUpdate(conn2, dy->string); vgPrb = sqlLastAutoId(conn2); vgPrbCount++; } dyStringClear(dy); dyStringAppend(dy, "insert into vgPrbMap set"); dyStringPrintf(dy, " probe=%d,\n", peProbe); dyStringPrintf(dy, " vgPrb=%d \n", vgPrb); verbose(2, "%s\n", dy->string); sqlUpdate(conn2, dy->string); probeCount++; } verbose(1, "# new probe records found = %d, # new vgPrb records added = %d\n", probeCount, vgPrbCount); dyStringFree(&dy); sqlFreeResult(&sr); sqlDisconnect(&conn3); sqlDisconnect(&conn2); }
static void readPartHeaderMB(struct mimeBuf *b, struct mimePart *p, char *altHeader) /* Reads the header lines of the mimePart, saves the header settings in a hash. */ { struct dyString *fullLine = dyStringNew(0); char *key=NULL, *val=NULL; struct lineFile *lf = NULL; char *line = NULL; char *lineAhead = NULL; int size = 0; p->hdr = newHash(3); //debug //fprintf(stderr,"headers dumpMB: "); //dumpMB(b); //debug if (altHeader) { lf = lineFileOnString("MIME Header", TRUE, altHeader); } /* read ahead one line, skipping any leading blanks lines */ do { if (altHeader) lineFileNext(lf, &lineAhead, &size); else lineAhead = getLineMB(b); } while (sameString(lineAhead,"")); do { /* accumulate a full header line - some emailers split into mpl lines */ dyStringClear(fullLine); do { line = lineAhead; if (altHeader) lineFileNext(lf, &lineAhead, &size); else lineAhead = getLineMB(b); dyStringAppend(fullLine,line); if (!altHeader) freez(&line); } while (isspace(lineAhead[0])); line = fullLine->string; //fprintf(stderr,"found a line! [%s]\n",line); //debug key = line; val = strchr(line,':'); if (!val) errAbort("readPartHeaderMB error - header-line colon not found, line=[%s]",line); *val = 0; val++; key=trimSpaces(key); // since the hash is case-sensitive, convert to lower case for ease of matching tolowers(key); val=trimSpaces(val); hashAdd(p->hdr,key,cloneString(val)); //debug //fprintf(stderr,"MIME header: key=[%s], val=[%s]\n",key,val); //fflush(stderr); } while (!sameString(lineAhead,"")); if (altHeader) { if (nlType == nlt_undet) nlType = lf->nlType; lineFileClose(&lf); } else { freez(&lineAhead); } dyStringFree(&fullLine); }
static void processIsPcr(struct sqlConnection *conn, int taxon, char *db) /* process isPcr results */ { /* >NM_010919:371+1088 2 718bp CGCGGATCCAAGGACATCTTGGACCTTCCG CCCAAGCTTGCATGTGCTGCAGCGACTGCG */ struct dyString *dy = dyStringNew(0); struct lineFile *lf = lineFileOpen("isPcr.fa", TRUE); int lineSize; char *line; char *name; char *dna; char *word, *end; char *tName; int tStart; int tEnd; char *tStrand; int probeid=0; /* really a vgPrb id */ boolean more = lineFileNext(lf, &line, &lineSize); while(more) { if (line[0] != '>') errAbort("unexpected error out of phase\n"); name = cloneString(line); verbose(1,"name=%s\n",name); dyStringClear(dy); while((more=lineFileNext(lf, &line, &lineSize))) { if (line[0] == '>') { break; } dyStringAppend(dy,line); } dna = cloneString(dy->string); word = name+1; end = strchr(word,':'); tName = cloneStringZ(word,end-word); word = end+1; end = strchr(word,'+'); tStrand = "+"; if (!end) { end = strchr(word,'-'); tStrand = "-"; } tStart = atoi(word); word = end+1; end = strchr(word,' '); tEnd = atoi(word); word = end+1; end = strchr(word,' '); probeid = atoi(word); dyStringClear(dy); dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",probeid); if (sqlQuickNum(conn,dy->string)>0) { /* record exists and hasn't already been updated */ int vgPrb = findVgPrbBySeq(conn,dna,taxon); if (vgPrb == 0) { dyStringClear(dy); dyStringAppend(dy, "update vgPrb set"); dyStringAppend(dy, " seq='"); dyStringAppend(dy, dna); dyStringAppend(dy, "',\n"); dyStringPrintf(dy, " tName='%s',\n", tName); dyStringPrintf(dy, " tStart=%d,\n", tStart); dyStringPrintf(dy, " tEnd=%d,\n", tEnd); dyStringPrintf(dy, " tStrand='%s',\n", tStrand); dyStringPrintf(dy, " db='%s',\n", db); dyStringPrintf(dy, " state='%s'\n", "seq"); dyStringPrintf(dy, " where id=%d\n", probeid); dyStringPrintf(dy, " and state='%s'\n", "new"); verbose(2, "%s\n", dy->string); sqlUpdate(conn, dy->string); } else /* probe seq already exists */ { /* just re-map the probe table recs to it */ dyStringClear(dy); dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,probeid); sqlUpdate(conn, dy->string); /* and delete it from vgPrb */ dyStringClear(dy); dyStringPrintf(dy, "delete from vgPrb where id=%d",probeid); sqlUpdate(conn, dy->string); } } freez(&tName); freez(&name); freez(&dna); } lineFileClose(&lf); dyStringFree(&dy); }
void condenseValues() /* combine values for single snp */ { char query[512]; struct sqlConnection *conn = hAllocConn(); struct sqlResult *sr; char **row; FILE *f; struct dyString *ssList = newDyString(255); struct dyString *buildList = newDyString(255); char *currentSnpString = NULL; int currentSnpNum = 0; int count = 0; char firstBuild[32]; char lastBuild[32]; f = hgCreateTabFile(".", "SNPSubSNPLinkCondense"); sqlSafef(query, sizeof(query), "select snp_id, subsnp_id, build_id from SNPSubSNPLink"); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { if (currentSnpString == NULL) { currentSnpString = cloneString(row[0]); currentSnpNum = sqlUnsigned(row[0]); dyStringPrintf(ssList, "%s", row[1]); dyStringPrintf(buildList, "%s", row[2]); safef(firstBuild, sizeof(firstBuild), row[2]); safef(lastBuild, sizeof(firstBuild), row[2]); } else if (!sameString(row[0], currentSnpString)) { fprintf(f, "%s\t%s\t%s\t%s\t%s\t%d\n", currentSnpString, ssList->string, buildList->string, firstBuild, lastBuild, count); if (currentSnpNum > sqlUnsigned(row[0])) errAbort("snps out of order: %d before %s\n", currentSnpNum, row[0]); currentSnpString = cloneString(row[0]); currentSnpNum = sqlUnsigned(row[0]); dyStringClear(ssList); dyStringPrintf(ssList, "%s", row[1]); dyStringClear(buildList); dyStringPrintf(buildList, "%s", row[2]); safef(firstBuild, sizeof(firstBuild), row[2]); safef(lastBuild, sizeof(lastBuild), row[2]); count = 1; } else { count++; dyStringAppend(ssList, ","); dyStringAppend(ssList, row[1]); dyStringAppend(buildList, ","); dyStringAppend(buildList, row[2]); safef(lastBuild, sizeof(lastBuild), row[2]); } } fprintf(f, "%s\t%s\t%s\t%s\t%s\t%d\n", currentSnpString, ssList->string, buildList->string, firstBuild, lastBuild, count); sqlFreeResult(&sr); hFreeConn(&conn); carefulClose(&f); }
static void doPrimers(struct sqlConnection *conn, int taxon, char *db) /* get probe seq from primers */ { int rc = 0; struct dyString *dy = dyStringNew(0); char cmdLine[256]; char path1[256]; char path2[256]; dyStringClear(dy); dyStringAppend(dy, "select e.id, p.fPrimer, p.rPrimer from probe p, vgPrbMap m, vgPrb e, gene g"); dyStringPrintf(dy, " where p.id = m.probe and m.vgPrb = e.id and g.id = p.gene and g.taxon = %d",taxon); dyStringAppend(dy, " and e.state = 'new' and e.type='primersMrna'"); rc = sqlSaveQuery(conn, dy->string, "primers.query", FALSE); verbose(1,"rc = %d = count of primers for mrna search for taxon %d\n",rc,taxon); if (rc > 0) /* something to do */ { dyStringClear(dy); dyStringPrintf(dy, "select qName from %s.all_mrna",db); rc = 0; rc = sqlSaveQuery(conn, dy->string, "accFile.txt", FALSE); safef(cmdLine,sizeof(cmdLine),"getRna %s accFile.txt mrna.fa",db); system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date"); verbose(1,"rc = %d = count of mrna for %s\n",rc,db); system("date"); system("isPcr mrna.fa primers.query isPcr.fa -out=fa"); system("date"); system("ls -l"); processIsPcr(conn,taxon,db); unlink("mrna.fa"); unlink("accFile.txt"); unlink("isPcr.fa"); } unlink("primers.query"); /* find any remaining type primersMrna that couldn't be resolved and demote * them to type primersGenome */ dyStringClear(dy); dyStringAppend(dy, "update vgPrb set type='primersGenome'"); dyStringPrintf(dy, " where taxon = %d",taxon); dyStringAppend(dy, " and state = 'new' and type='primersMrna'"); sqlUpdate(conn, dy->string); /* get primers for those probes that did not find mrna isPcr matches * and then do them against the genome instead */ dyStringClear(dy); dyStringAppend(dy, "select e.id, p.fPrimer, p.rPrimer from probe p, vgPrbMap m, vgPrb e, gene g"); dyStringPrintf(dy, " where p.id = m.probe and m.vgPrb = e.id and g.id = p.gene and g.taxon = %d",taxon); dyStringAppend(dy, " and e.state = 'new' and e.type='primersGenome'"); rc = 0; rc = sqlSaveQuery(conn, dy->string, "primers.query", FALSE); verbose(1,"rc = %d = count of primers for genome search for taxon %d\n",rc,taxon); if (rc > 0) /* something to do */ { safef(path1,sizeof(path1),"/gbdb/%s/%s.2bit",db,db); safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db); verbose(1,"copy: [%s] to [%s]\n",path1,path2); copyFile(path1,path2); safef(cmdLine,sizeof(cmdLine), "ssh kolossus 'cd %s; isPcr %s.2bit primers.query isPcr.fa -out=fa'", getCurrentDir(),db); system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date"); safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db); verbose(1,"rm %s\n",path2); unlink(path2); system("ls -l"); processIsPcr(conn,taxon,db); unlink("mrna.fa"); unlink("accFile.txt"); unlink("isPcr.fa"); } unlink("primers.query"); /* find any remaining type primersGenome that couldn't be resolved and demote * them to type refSeq */ dyStringClear(dy); dyStringAppend(dy, "update vgPrb set type='refSeq'"); dyStringPrintf(dy, " where taxon = %d",taxon); dyStringAppend(dy, " and state = 'new' and type='primersGenome'"); sqlUpdate(conn, dy->string); dyStringFree(&dy); }
void writeSections(struct bbiChromUsage *usageList, struct lineFile *lf, int itemsPerSlot, struct bbiBoundsArray *bounds, int sectionCount, FILE *f, int resTryCount, int resScales[], int resSizes[], boolean doCompress, bits32 *retMaxSectionSize) /* Read through lf, chunking it into sections that get written to f. Save info * about sections in bounds. */ { int maxSectionSize = 0; struct bbiChromUsage *usage = usageList; int itemIx = 0, sectionIx = 0; bits32 reserved32 = 0; UBYTE reserved8 = 0; struct sectionItem items[itemsPerSlot]; struct sectionItem *lastB = NULL; bits32 resEnds[resTryCount]; int resTry; for (resTry = 0; resTry < resTryCount; ++resTry) resEnds[resTry] = 0; struct dyString *stream = dyStringNew(0); /* remove initial browser and track lines */ lineFileRemoveInitialCustomTrackLines(lf); for (;;) { /* Get next line of input if any. */ char *row[5]; int rowSize = lineFileChopNext(lf, row, ArraySize(row)); /* Figure out whether need to output section. */ boolean sameChrom = FALSE; if (rowSize > 0) sameChrom = sameString(row[0], usage->name); if (itemIx >= itemsPerSlot || rowSize == 0 || !sameChrom) { /* Figure out section position. */ bits32 chromId = usage->id; bits32 sectionStart = items[0].start; bits32 sectionEnd = items[itemIx-1].end; /* Save section info for indexing. */ assert(sectionIx < sectionCount); struct bbiBoundsArray *section = &bounds[sectionIx++]; section->offset = ftell(f); section->range.chromIx = chromId; section->range.start = sectionStart; section->range.end = sectionEnd; /* Output section header to stream. */ dyStringClear(stream); UBYTE type = bwgTypeBedGraph; bits16 itemCount = itemIx; dyStringWriteOne(stream, chromId); // chromId dyStringWriteOne(stream, sectionStart); // start dyStringWriteOne(stream, sectionEnd); // end dyStringWriteOne(stream, reserved32); // itemStep dyStringWriteOne(stream, reserved32); // itemSpan dyStringWriteOne(stream, type); // type dyStringWriteOne(stream, reserved8); // reserved dyStringWriteOne(stream, itemCount); // itemCount /* Output each item in section to stream. */ int i; for (i=0; i<itemIx; ++i) { struct sectionItem *item = &items[i]; dyStringWriteOne(stream, item->start); dyStringWriteOne(stream, item->end); dyStringWriteOne(stream, item->val); } /* Save stream to file, compressing if need be. */ if (stream->stringSize > maxSectionSize) maxSectionSize = stream->stringSize; if (doCompress) { size_t maxCompSize = zCompBufSize(stream->stringSize); char compBuf[maxCompSize]; int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize); mustWrite(f, compBuf, compSize); } else mustWrite(f, stream->string, stream->stringSize); /* If at end of input we are done. */ if (rowSize == 0) break; /* Set up for next section. */ itemIx = 0; if (!sameChrom) { usage = usage->next; assert(usage != NULL); if (!sameString(row[0], usage->name)) errAbort("read %s, expecting %s on line %d in file %s\n", row[0], usage->name, lf->lineIx, lf->fileName); assert(sameString(row[0], usage->name)); lastB = NULL; for (resTry = 0; resTry < resTryCount; ++resTry) resEnds[resTry] = 0; } } /* Parse out input. */ lineFileExpectWords(lf, 4, rowSize); bits32 start = lineFileNeedNum(lf, row, 1); bits32 end = lineFileNeedNum(lf, row, 2); float val = lineFileNeedDouble(lf, row, 3); /* Verify that inputs meets our assumption - that it is a sorted bedGraph file. */ if (start > end) errAbort("Start (%u) after end (%u) line %d of %s", start, end, lf->lineIx, lf->fileName); if (lastB != NULL) { if (lastB->start > start) errAbort("BedGraph not sorted on start line %d of %s", lf->lineIx, lf->fileName); if (lastB->end > start) errAbort("Overlapping regions in bedGraph line %d of %s", lf->lineIx, lf->fileName); } /* Do zoom counting. */ for (resTry = 0; resTry < resTryCount; ++resTry) { bits32 resEnd = resEnds[resTry]; if (start >= resEnd) { resSizes[resTry] += 1; resEnds[resTry] = resEnd = start + resScales[resTry]; } while (end > resEnd) { resSizes[resTry] += 1; resEnds[resTry] = resEnd = resEnd + resScales[resTry]; } } /* Save values in output array. */ struct sectionItem *b = &items[itemIx]; b->start = start; b->end = end; b->val = val; lastB = b; itemIx += 1; } assert(sectionIx == sectionCount); *retMaxSectionSize = maxSectionSize; }
void xpParseStartTag(struct xp *xp, int maxAttCount, /* Maximum attribute count. */ struct dyString *retName, /* Returns tag name */ int *retAttCount, /* Returns attribute count. */ struct dyString **retAttributes, /* Name, value, name, value... */ boolean *retClosed) /* If true then is self-closing (ends in />) */ /* Call this after the first '<' in a tag has been read. It'll * parse out until the '>' tag. */ { char c, quotC; int attCount = 0; struct dyString *dy; int lineStart; dyStringClear(retName); /* Skip white space after '<' and before tag name. */ for (;;) { if ((c = xpGetChar(xp)) == 0) xpUnexpectedEof(xp); if (isspace(c)) { if (c == '\n') ++xp->lineIx; } else break; } /* Read in tag name. */ for (;;) { dyStringAppendC(retName, c); if ((c = xpGetChar(xp)) == 0) xpUnexpectedEof(xp); if (c == '>' || c == '/' || isspace(c)) break; } if (c == '\n') ++xp->lineIx; /* Parse attributes. */ if (c != '>' && c != '/') { for (;;) { /* Skip leading white space. */ for (;;) { if ((c = xpGetChar(xp)) == 0) xpUnexpectedEof(xp); if (isspace(c)) { if (c == '\n') ++xp->lineIx; } else break; } if (c == '>' || c == '/') break; /* Allocate space in attribute table. */ if (attCount >= maxAttCount - 2) xpError(xp, "Attribute stack overflow"); dy = retAttributes[attCount]; if (dy == NULL) dy = retAttributes[attCount] = newDyString(64); else dyStringClear(dy); ++attCount; /* Read until not a label character. */ for (;;) { dyStringAppendC(dy, c); if ((c = xpGetChar(xp)) == 0) xpUnexpectedEof(xp); if (isspace(c)) { if (c == '\n') ++xp->lineIx; break; } if (c == '=') break; if (c == '/' || c == '>') xpError(xp, "Expecting '=' after attribute name"); } /* Skip white space until '=' */ if (c != '=') { for (;;) { if ((c = xpGetChar(xp)) == 0) xpUnexpectedEof(xp); if (isspace(c)) { if (c == '\n') ++xp->lineIx; } else break; } if (c != '=') xpError(xp, "Expecting '=' after attribute name"); } /* Skip space until quote. */ for (;;) { if ((c = xpGetChar(xp)) == 0) xpUnexpectedEof(xp); else if (isspace(c)) { if (c == '\n') ++xp->lineIx; } else break; } if (c != '\'' && c != '"') xpError(xp, "Expecting quoted string after ="); /* Allocate space in attribute table. */ if (attCount >= maxAttCount - 2) xpError(xp, "Attribute stack overflow"); dy = retAttributes[attCount]; if (dy == NULL) dy = retAttributes[attCount] = newDyString(64); else dyStringClear(dy); ++attCount; /* Read until next quote. */ quotC = c; lineStart = xp->lineIx; for (;;) { if ((c = xpGetChar(xp)) == 0) xpError(xp, "End of file inside literal string that started at line %d", lineStart); if (c == quotC) break; if (c == '&') xpLookup(xp, xp->endTag, dy); else { if (c == '\n') ++xp->lineIx; dyStringAppendC(dy, c); } } } } if (c == '/') { *retClosed = TRUE; c = xpGetChar(xp); if (c != '>') xpError(xp, "Expecting '>' after '/'"); } else *retClosed = FALSE; *retAttCount = attCount; }
int checkTableCoords(char *db) /* Check several invariants (see comments in check*() above), * summarize errors, return nonzero if there are errors. */ { struct sqlConnection *conn = hAllocConn(db); struct slName *tableList = NULL, *curTable = NULL; struct slName *allChroms = NULL; boolean gotError = FALSE; allChroms = hAllChromNames(db); if (theTable == NULL) tableList = getTableNames(conn); else if (sqlTableExists(conn, theTable)) tableList = newSlName(theTable); else errAbort("Error: specified table \"%s\" does not exist in database %s.", theTable, db); for (curTable = tableList; curTable != NULL; curTable = curTable->next) { struct hTableInfo *hti = NULL; struct slName *chromList = NULL, *chromPtr = NULL; char *table = curTable->name; char tableChrom[32], trackName[128], tableChromPrefix[33]; hParseTableName(db, table, trackName, tableChrom); hti = hFindTableInfo(db, tableChrom, trackName); if (hti != NULL && hti->isPos) { /* watch out for presence of both split and non-split tables; * hti for non-split will be replaced with hti of split. */ if (splitAndNonSplitExist(conn, table, tableChrom)) continue; safef(tableChromPrefix, sizeof(tableChromPrefix), "%s_", tableChrom); if (hti->isSplit) chromList = newSlName(tableChrom); else chromList = allChroms; /* invariant: chrom must be described in chromInfo. */ /* items with bad chrom will be invisible to hGetBedRange(), so * catch them here by SQL query. */ /* The SQL query is too huge for scaffold-based db's, check count: */ if (hChromCount(db) <= MAX_SEQS_SUPPORTED) { if (isNotEmpty(hti->chromField)) { struct dyString *bigQuery = newDyString(1024); dyStringClear(bigQuery); sqlDyStringPrintf(bigQuery, "select count(*) from %s where ", table); for (chromPtr=chromList; chromPtr != NULL; chromPtr=chromPtr->next) { sqlDyStringPrintf(bigQuery, "%s != '%s' ", hti->chromField, chromPtr->name); if (chromPtr->next != NULL) dyStringAppend(bigQuery, "AND "); } gotError |= reportErrors(BAD_CHROM, table, sqlQuickNum(conn, bigQuery->string)); dyStringFree(&bigQuery); } for (chromPtr=chromList; chromPtr != NULL; chromPtr=chromPtr->next) { char *chrom = chromPtr->name; struct bed *bedList = hGetBedRange(db, table, chrom, 0, 0, NULL); if (hti->isSplit && isNotEmpty(hti->chromField)) gotError |= checkSplitTableOnlyChrom(bedList, table, hti, tableChrom); gotError |= checkStartEnd(bedList, table, hti, testChromSize(chrom)); if (hti->hasCDS) gotError |= checkCDSStartEnd(bedList, table, hti); if (hti->hasBlocks && !ignoreBlocks) gotError |= checkBlocks(bedList, table, hti); bedFreeList(&bedList); } } } } return gotError; }
void addSdrfToStormTop(char *sdrfFile, struct tagStorm *storm) /* Add lines of sdrfFile as children of first top level stanza in storm. */ { struct fieldedTable *table = fieldedTableFromTabFile(sdrfFile, sdrfFile, NULL, 0 ); /* Convert ArrayExpress field names to our field names */ int fieldIx; char *lastNonTerm = NULL; char *lastNonUnit = NULL; for (fieldIx=0; fieldIx < table->fieldCount; fieldIx += 1) { char tagName[256]; aeFieldToNormalField("sdrf.", table->fields[fieldIx], tagName, sizeof(tagName)); if (lastNonTerm != NULL && sameString("sdrf.Term_Source_REF", tagName)) { safef(tagName, sizeof(tagName), "%s_Term_Source_REF", lastNonTerm); table->fields[fieldIx] = lmCloneString(table->lm, tagName); } else if (lastNonTerm != NULL && sameString("sdrf.Term_Accession_Number", tagName)) { safef(tagName, sizeof(tagName), "%s_Term_Accession_Number", lastNonTerm); table->fields[fieldIx] = lmCloneString(table->lm, tagName); } else if (lastNonUnit != NULL && startsWith("sdrf.Unit_", tagName)) { safef(tagName, sizeof(tagName), "%s_Unit", lastNonUnit); lastNonTerm = lmCloneString(table->lm, tagName); table->fields[fieldIx] = lastNonTerm; } else { lastNonTerm = lastNonUnit = lmCloneString(table->lm, tagName); table->fields[fieldIx] = lastNonTerm; } } /* Make up fastq field indexes to handle processing of paired reads in fastq, which * take two lines of sdrf file. */ char *fieldsWithFastqs[] = /* Fields that contain the fastq file names */ { "sdrf.Comment_FASTQ_URI", "sdrf.Comment_SUBMITTED_FILE_NAME", "sdrf.Scan_Name", }; boolean mightReuseStanza = TRUE; bool *reuseMultiFields; // If set this field can vary and line still reused AllocArray(reuseMultiFields, table->fieldCount); int i; for (i=0; i<ArraySize(fieldsWithFastqs); ++i) { char *field = fieldsWithFastqs[i]; int ix = stringArrayIx(field, table->fields, table->fieldCount); if (ix >=0) reuseMultiFields[ix] = TRUE; else if (i == 0) { mightReuseStanza = FALSE; break; // Make sure has first one if going to do paired read fastq processing } } /* Make up a list and hash of fieldMergers to handle conversion of columns that occur * multiple times to a comma-separated list of values in a single column. */ struct fieldMerger /* Something to help merge multiple columns with same name */ { struct fieldMerger *next; /* Next in list */ char *name; struct dyString *val; /* Comma separated value */ }; struct hash *fieldHash = hashNew(0); struct fieldMerger *fmList = NULL; for (fieldIx = 0; fieldIx < table->fieldCount; ++fieldIx) { char *fieldName = table->fields[fieldIx]; if (hashLookup(fieldHash, fieldName) == NULL) { struct fieldMerger *fm; AllocVar(fm); fm->name = fieldName; fm->val = dyStringNew(0); slAddTail(&fmList, fm); hashAdd(fieldHash, fieldName, fm); } } /* Grab top level stanza and make sure there is only one. */ struct tagStanza *topStanza = storm->forest; if (topStanza == NULL || topStanza->next != NULL) internalErr(); /* Scan through table, making new stanzas for each row and hooking them into topStanza */ struct fieldedRow *fr, *lastFr = NULL; struct tagStanza *stanza = NULL; for (fr = table->rowList; fr != NULL; fr = fr->next) { /* Empty out any existing vals */ struct fieldMerger *fm; for (fm = fmList; fm != NULL; fm = fm->next) dyStringClear(fm->val); /* Add all non-empty values from this row to our fieldMergers. */ char **row = fr->row; for (fieldIx = 0; fieldIx < table->fieldCount; ++fieldIx) { char *fieldName = table->fields[fieldIx]; fm = hashMustFindVal(fieldHash, fieldName); char *val = row[fieldIx]; if (!isEmpty(val)) csvEscapeAndAppend(fm->val, val); } /* If only the reuseMultiFields are varying, append to those values in previous stanza, * otherwise make a new stanza */ if (mightReuseStanza && lastFr != NULL && sameExceptForSome(lastFr->row, fr->row, table->fieldCount, reuseMultiFields)) { int i; for (i=0; i<ArraySize(fieldsWithFastqs); ++i) { char *fieldName = fieldsWithFastqs[i]; if ((fm = hashFindVal(fieldHash, fieldName)) != NULL) { char *newVal = fm->val->string; char *oldVal = tagMustFindVal(stanza, fieldName); int bothSize = strlen(newVal) + strlen(oldVal) + 1 + 1; char bothBuf[bothSize]; safef(bothBuf, bothSize, "%s,%s", oldVal, newVal); tagStanzaUpdateTag(storm, stanza, fieldName, bothBuf); } } } else { /* Output all nonempty vals to stanza */ stanza = tagStanzaNew(storm, topStanza); for (fm = fmList; fm != NULL; fm = fm->next) if (fm->val->stringSize > 0) tagStanzaAppend(storm, stanza, fm->name, fm->val->string); } lastFr = fr; } slReverse(&topStanza->children); }
void loadGeneToMotif(struct sqlConnection *conn, char *fileName, char *table, struct hash *geneToModuleHash, struct hash *moduleAndMotifHash, struct hash *motifHash, struct hash *positionsHash, char *regionTable) /* Load file which is a big matrix with genes for rows and motifs for * columns. There is a semicolon-separated list of numbers in the matrix * where a gene has the motif, and an empty (tab separated) field * where there is no motif. The numbers are relative to the * region associated with the gene in the positionsHash. * Only load bits of this where motif actually occurs in module associated * with gene. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; FILE *f = hgCreateTabFile(tmpDir, table); char *motifNames[32*1024], *row[32*1024]; int motifCount, rowSize, i; char *gene, *module; int geneCount = 0, total = 0; struct dyString *dy = dyStringNew(512); struct genomePos *motifPosList = NULL, *motifPosForGene; struct genomePos *regionPosList = NULL, *regionPos; /* Read first line, which is labels. */ if (!lineFileNextReal(lf, &line)) errAbort("Empty file %s", fileName); subChar(line, ' ', '_'); motifCount = chopLine(line, motifNames); if (motifCount >= ArraySize(motifNames)) errAbort("Too many motifs line 1 of %s", fileName); lineFileExpectAtLeast(lf, 2, motifCount); motifNames[0] = NULL; for (i=1; i<motifCount; ++i) { char name[64]; motifNames[i] = cloneString(fixMotifName(motifNames[i],name,sizeof(name))); if (!hashLookup(motifHash, motifNames[i])) errAbort("Motif %s is in %s but not modules_motifs.gxm", motifNames[i], fileName); } /* Read subsequent lines. */ while ((rowSize = lineFileChopTab(lf, row)) != 0) { lineFileExpectWords(lf, motifCount, rowSize); gene = row[0]; module = hashFindVal(geneToModuleHash, gene); if (module == NULL) { warn("WARNING: Gene %s in line %d of %s but not module_assignments.tab", gene, lf->lineIx, lf->fileName); continue; } regionPos = NULL; for (i=1; i<rowSize; ++i) { if (row[i][0] != 0) { if (hashLookup2(moduleAndMotifHash, module, motifNames[i])) { regionPos = hashFindVal(positionsHash, gene); if (regionPos == NULL) { warn("WARNING: %s in %s but not gene_positions.tab", gene, fileName); i = rowSize; continue; } motifPosForGene = convertMotifPos(row[i], regionPos, hashMustFindVal(motifHash, motifNames[i]), lf); motifPosList = slCat(motifPosForGene, motifPosList); ++total; } } } if (regionPos != NULL) { slAddHead(®ionPosList, regionPos); } ++geneCount; } lineFileClose(&lf); /* Output sorted table of all motif hits. */ { struct genomePos *pos; slSort(&motifPosList, genomePosCmp); for (pos = motifPosList; pos != NULL; pos = pos->next) { int start = pos->start; int end = pos->end; if (start < 0) start = 0; fprintf(f, "%d\t", binFromRange(start, end)); fprintf(f, "%s\t", pos->chrom); fprintf(f, "%d\t%d\t", start, end); fprintf(f, "%s\t", pos->motif); fprintf(f, "%d\t", pos->score); fprintf(f, "%c\t", pos->strand); fprintf(f, "%s\n", pos->name); } sqlDyStringPrintf(dy, "CREATE TABLE %s (\n" " bin smallInt unsigned not null,\n" " chrom varChar(255) not null,\n" " chromStart int not null,\n" " chromEnd int not null,\n" " name varchar(255) not null,\n" " score int not null,\n" " strand char(1) not null,\n" " gene varchar(255) not null,\n" " #Indices\n" " INDEX(gene(12)),\n" " INDEX(name(16)),\n" " INDEX(chrom(8),bin)\n" ")\n", table); sqlRemakeTable(conn, table, dy->string); verbose(1, "%d genes, %d motifs, %d motifs in genes\n", geneCount, motifCount-1, total); hgLoadTabFile(conn, tmpDir, table, &f); // hgRemoveTabFile(tmpDir, table); verbose(1, "Loaded %s table\n", table); slFreeList(&motifPosList); } /* Now output sorted table of upstream regions. */ { FILE *f = hgCreateTabFile(tmpDir, regionTable); struct genomePos *pos; dyStringClear(dy); sqlDyStringPrintf(dy, "CREATE TABLE %s (\n" " bin smallInt unsigned not null,\n" " chrom varChar(255) not null,\n" " chromStart int not null,\n" " chromEnd int not null,\n" " name varchar(255) not null,\n" " score int not null,\n" " strand char(1) not null,\n" " #Indices\n" " INDEX(name(16)),\n" " INDEX(chrom(8),bin)\n" ")\n", regionTable); sqlRemakeTable(conn, regionTable, dy->string); slSort(®ionPosList, genomePosCmp); for (pos = regionPosList; pos != NULL; pos = pos->next) { int start = pos->start; int end = pos->end; if (start < 0) start = 0; fprintf(f, "%d\t", binFromRange(start, end)); fprintf(f, "%s\t", pos->chrom); fprintf(f, "%d\t%d\t", start, end); fprintf(f, "%s\t", pos->name); fprintf(f, "%d\t", pos->score); fprintf(f, "%c\n", pos->strand); } hgLoadTabFile(conn, tmpDir, regionTable, &f); // hgRemoveTabFile(tmpDir, regionTable); } }
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName) /* hgLoadChromGraph - Load up chromosome graph. */ { double minVal,maxVal; struct chromGraph *el, *list; FILE *f; char *tempDir = "."; char path[PATH_LEN], gbdbPath[PATH_LEN]; char *idTable = optionVal("idTable", NULL); char *pathPrefix = NULL; if (idTable == NULL) list = chromGraphLoadAll(fileName); else list = chromGraphListWithTable(fileName, db, idTable); if (list == NULL) errAbort("%s is empty", fileName); /* Figure out min/max values */ minVal = maxVal = list->val; for (el = list->next; el != NULL; el = el->next) { if (optionExists("minusLog10")) { if (el->val == 1) el->val = 0; else if (el->val > 0) el->val = -1 * log(el->val)/log(10); } if (el->val < minVal) minVal = el->val; if (el->val > maxVal) maxVal = el->val; } /* Sort and write out temp file. */ slSort(&list, chromGraphCmp); f = hgCreateTabFile(tempDir, track); for (el = list; el != NULL; el = el->next) chromGraphTabOut(el, f); if (doLoad) { struct dyString *dy = dyStringNew(0); struct sqlConnection *conn; /* Set up connection to database and create main table. */ conn = hAllocConn(db); dyStringPrintf(dy, createString, track, hGetMinIndexLength(db)); sqlRemakeTable(conn, track, dy->string); /* Load main table and clean up file handle. */ hgLoadTabFile(conn, tempDir, track, &f); hgRemoveTabFile(tempDir, track); /* If need be create meta table. If need be delete old row. */ if (!sqlTableExists(conn, "metaChromGraph")) sqlUpdate(conn, metaCreateString); else { dyStringClear(dy); dyStringPrintf(dy, "delete from metaChromGraph where name = '%s'", track); sqlUpdate(conn, dy->string); } /* Make chrom graph file */ safef(path, sizeof(path), "%s.cgb", track); chromGraphToBin(list, path); safef(path, sizeof(path), "/gbdb/%s/chromGraph", db); pathPrefix = optionVal("pathPrefix", path); safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track); /* Create new line in meta table */ dyStringClear(dy); dyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');", track, minVal, maxVal, gbdbPath); sqlUpdate(conn, dy->string); } }
static void doBlat(struct sqlConnection *conn, int taxon, char *db) /* place probe seq from non-BAC with blat that have no alignments yet */ { int rc = 0; char *blatSpec=NULL; char cmdLine[256]; char path1[256]; char path2[256]; struct dyString *dy = dyStringNew(0); /* (non-BACs needing alignment) */ dyStringClear(dy); dyStringPrintf(dy, "select concat(\"vgPrb_\",e.id), e.seq" " from vgPrb e, vgPrbAli a" " where e.id = a.vgPrb" " and a.db = '%s'" " and a.status = 'new'" " and e.taxon = %d" " and e.type <> 'bac'" " and e.seq <> ''" " order by e.id" , db, taxon); //restore: rc = sqlSaveQuery(conn, dy->string, "blat.fa", TRUE); verbose(1,"rc = %d = count of sequences for blat, to get psls for taxon %d\n",rc,taxon); if (rc == 0) { unlink("blat.fa"); system("rm -f blatNearBest.psl; touch blatNearBest.psl"); /* make empty file */ return; } /* make .ooc and blat on kolossus */ safef(path1,sizeof(path1),"/gbdb/%s/%s.2bit",db,db); safef(path2,sizeof(path2),"%s/%s.2bit",getCurrentDir(),db); //restore: verbose(1,"copy: [%s] to [%s]\n",path1,path2); copyFile(path1,path2); safef(cmdLine,sizeof(cmdLine), "ssh kolossus 'cd %s; blat -makeOoc=11.ooc -tileSize=11" " -repMatch=1024 %s.2bit /dev/null /dev/null'", getCurrentDir(),db); //restore: system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date"); safef(cmdLine,sizeof(cmdLine), "ssh kolossus 'cd %s; blat %s.2bit blat.fa -ooc=11.ooc -noHead blat.psl'", getCurrentDir(),db); //restore: system("date"); verbose(1,"cmdLine: [%s]\n",cmdLine); system(cmdLine); system("date"); /* using blat even with -fastMap was way too slow - took over a day, * so instead I will make a procedure to write a fake psl for the BACs * which you will see called below */ safef(path2,sizeof(path2),"%s.2bit",db); verbose(1,"rm %s\n",path2); unlink(path2); safef(path2,sizeof(path2),"11.ooc"); verbose(1,"rm %s\n",path2); unlink(path2); /* skip psl header and sort on query name */ safef(cmdLine,sizeof(cmdLine), "sort -k 10,10 blat.psl > blatS.psl"); verbose(1,"cmdLine=[%s]\n",cmdLine); system(cmdLine); /* keep near best within 5% of the best */ safef(cmdLine,sizeof(cmdLine), "pslCDnaFilter -globalNearBest=0.005 -minId=0.96 -minNonRepSize=20 -minCover=0.50" " blatS.psl blatNearBest.psl"); verbose(1,"cmdLine=[%s]\n",cmdLine); system(cmdLine); unlink("blat.fa"); unlink("blat.psl"); unlink("blatS.psl"); freez(&blatSpec); dyStringFree(&dy); }
static void parseDbXrefs() /* Parse the db_xref entries for various features to build a single dbx entry * in the kvt and to obtain the locus and mim ids for the kvt */ { static char* LOCUS_ID = "LocusID:"; static char* GENE_ID = "GeneID:"; static char* MIM_ID = "MIM:"; struct slName* head = NULL, *xref, *prevXref; struct keyVal* dbXrefKv = NULL; struct keyVal* locusLinkIdKv = NULL; struct keyVal* geneIdKv = NULL; struct keyVal* omimIdKv = NULL; if (dbXrefBuf == NULL) dbXrefBuf = dyStringNew(256); dyStringClear(dbXrefBuf); if (omimIdBuf == NULL) omimIdBuf = dyStringNew(256); dyStringClear(omimIdBuf); locusLinkId[0] = '\0'; /* split into a list and sort so we can remove dups */ if (gbCdsDbxField->val->stringSize > 0) head = slCat(head, parseDbXrefStr(gbCdsDbxField->val->string)); if (gbGeneDbxField->val->stringSize > 0) head = slCat(head, parseDbXrefStr(gbGeneDbxField->val->string)); slNameSort(&head); xref = head; prevXref = NULL; while (xref != NULL) { /* skip if dup of previous */ if ((prevXref == NULL) || !sameString(prevXref->name, xref->name)) { if (dbXrefBuf->stringSize > 0) dyStringAppendC(dbXrefBuf, ' '); dyStringAppend(dbXrefBuf, xref->name); updateKvt(&dbXrefKv, "dbx", dbXrefBuf->string); /* find number in db_xref like LocusID:27 or GeneID:27 */ if (startsWith(LOCUS_ID, xref->name)) { safef(locusLinkId, sizeof(locusLinkId), "%s", xref->name+strlen(LOCUS_ID)); updateKvt(&locusLinkIdKv, "loc", locusLinkId); } else if (startsWith(GENE_ID, xref->name)) { safef(geneId, sizeof(geneId), "%s", xref->name+strlen(GENE_ID)); updateKvt(&geneIdKv, "gni", geneId); } else if (startsWith(MIM_ID, xref->name)) { if (omimIdBuf->stringSize > 0) dyStringAppendC(omimIdBuf, ' '); dyStringAppend(omimIdBuf, xref->name+strlen(MIM_ID)); updateKvt(&omimIdKv, "mim", omimIdBuf->string); } } prevXref = xref; xref = xref->next; } slFreeList(&head); }
static void doSeqAndExtFile(struct sqlConnection *conn, char *db, char *table) { int rc = 0; char cmd[256]; char path[256]; char bedPath[256]; char gbdbPath[256]; char *fname=NULL; struct dyString *dy = dyStringNew(0); dyStringClear(dy); dyStringPrintf(dy, "select distinct concat('vgPrb_',e.id), e.seq" " from vgPrb e join %s.%s v" " left join %s.seq s on s.acc = v.qName" " where concat('vgPrb_',e.id) = v.qName" " and s.acc is NULL" " order by e.id" , db, table, db); rc = sqlSaveQuery(conn, dy->string, "vgPrbExt.fa", TRUE); verbose(1,"rc = %d = count of sequences for vgPrbExt.fa, to use with %s track %s\n",rc,db,table); if (rc > 0) /* can set any desired minimum */ { safef(bedPath,sizeof(bedPath),"/cluster/data/%s/bed/visiGene/",db); if (!fileExists(bedPath)) { safef(cmd,sizeof(cmd),"mkdir %s",bedPath); verbose(1,"%s\n",cmd); system(cmd); } safef(gbdbPath,sizeof(gbdbPath),"/gbdb/%s/visiGene/",db); if (!fileExists(gbdbPath)) { safef(cmd,sizeof(cmd),"mkdir %s",gbdbPath); verbose(1,"%s\n",cmd); system(cmd); } while(1) { int i=0; safef(path,sizeof(path),"%svgPrbExt_AAAAAA.fa",bedPath); char *c = rStringIn("AAAAAA",path); srand( (unsigned)time( NULL ) ); for(i=0;i<6;++i) { *c++ += (int) 26 * (rand() / (RAND_MAX + 1.0)); } if (!fileExists(path)) break; } safef(cmd,sizeof(cmd),"cp vgPrbExt.fa %s",path); verbose(1,"%s\n",cmd); system(cmd); fname = rStringIn("/", path); ++fname; safef(cmd,sizeof(cmd),"ln -s %s %s%s",path,gbdbPath,fname); verbose(1,"%s\n",cmd); system(cmd); safef(cmd,sizeof(cmd),"hgLoadSeq %s %s%s", db, gbdbPath,fname); verbose(1,"%s\n",cmd); system(cmd); } dyStringFree(&dy); }
void gensatImageDownload(char *gensatXml, char *outDir, char *outLog) /* gensatImageDownload - Download images from gensat guided by xml file.. */ { struct xap *xap; struct gsGensatImage *image; char *ftpUri = "ftp://ftp.ncbi.nih.gov/pub/gensat"; char *jpgCgiUri = "http://www.ncbi.nlm.nih.gov/projects/gensat/gensat_img.cgi?action=image&mode=full&fmt=jpeg&id="; char finalJpg[PATH_LEN]; char finalDir[PATH_LEN]; char wgetSource[PATH_LEN]; struct hash *dirHash = newHash(16); struct dyString *mkdir = dyStringNew(0); int imageIx = 0; fLog = mustOpen(outLog, "a"); fprintf(fLog, "starting gensatImageDownload from %s to %s\n", gensatXml, outDir); xap = xapListOpen(gensatXml, "GensatImageSet", gsStartHandler, gsEndHandler); while ((image = xapListNext(xap, "GensatImage")) != NULL) { int id = image->gsGensatImageId->text; char *imageFile = image->gsGensatImageImageInfo->gsGensatImageImageInfoFullImg ->gsGensatImageInfo->gsGensatImageInfoFilename->text; /* Mangle file name a little */ subChar(imageFile, '(', '_'); stripChar(imageFile, ')'); /* Figure out name of jpeg file in outDir. */ verbose(1, "image %d, id %d\n", ++imageIx, id); safef(finalJpg, sizeof(finalJpg), "%s/%s", outDir, imageFile); stripString(finalJpg, ".full"); /* Image magick can't handle two suffixes */ chopSuffix(finalJpg); strcat(finalJpg, ".jpg"); /* Create directory that it goes in if necessary */ splitPath(finalJpg, finalDir, NULL, NULL); if (!hashLookup(dirHash, finalDir)) { hashAdd(dirHash, finalDir, NULL); dyStringClear(mkdir); dyStringPrintf(mkdir, "mkdir -p %s", finalDir); if (system(mkdir->string) != 0) errAbort("Couldn't %s", mkdir->string); } /* Download it - either directly via ftp, or indirectly via cgi. */ if (fileExists(finalJpg)) { verbose(1, "already have %s\n", imageFile); fprintf(fLog, "%s already downloaded\n", finalJpg); } else { if (endsWith(imageFile, ".jpg")) { safef(wgetSource, sizeof(wgetSource), "%s/%s", ftpUri, imageFile); if (safeGetOne(wgetSource, finalJpg)) fprintf(fLog, "Got via ftp %s\n", finalJpg); } else { safef(wgetSource, sizeof(wgetSource), "%s%d", jpgCgiUri, id); if (safeGetOne(wgetSource, finalJpg)) fprintf(fLog, "Got via cgi %s\n", finalJpg); } } } carefulClose(&fLog); }
char *scanSettingsForCT(char *userName, char *sessionName, char *contents, int *pLiveCount, int *pExpiredCount) /* Parse the CGI-encoded session contents into {var,val} pairs and search * for custom tracks. If found, refresh the custom track. Parsing code * taken from cartParseOverHash. * If any nonexistent custom track files are found, return a SQL update * command that will remove those from this session. We can't just do * the update here because that messes up the caller's query. */ { int contentLength = strlen(contents); struct dyString *newContents = dyStringNew(contentLength+1); struct dyString *oneSetting = dyStringNew(contentLength / 4); char *updateIfAny = NULL; char *contentsToChop = cloneString(contents); char *namePt = contentsToChop; verbose(3, "Scanning %s %s\n", userName, sessionName); while (isNotEmpty(namePt)) { char *dataPt = strchr(namePt, '='); char *nextNamePt; if (dataPt == NULL) errAbort("Mangled session content string %s", namePt); *dataPt++ = 0; nextNamePt = strchr(dataPt, '&'); if (nextNamePt != NULL) *nextNamePt++ = 0; dyStringClear(oneSetting); dyStringPrintf(oneSetting, "%s=%s%s", namePt, dataPt, (nextNamePt ? "&" : "")); if (startsWith(CT_FILE_VAR_PREFIX, namePt)) { boolean thisGotLiveCT = FALSE, thisGotExpiredCT = FALSE; cgiDecode(dataPt, dataPt, strlen(dataPt)); verbose(3, "Found variable %s = %s\n", namePt, dataPt); /* If the file does not exist, omit this setting from newContents so * it doesn't get copied from session to session. If it does exist, * leave it up to customFactoryTestExistence to parse the file for * possible customTrash table references, some of which may exist * and some not. */ if (! fileExists(dataPt)) { verbose(3, "Removing %s from %s %s\n", oneSetting->string, userName, sessionName); thisGotExpiredCT = TRUE; } else { char *db = namePt + strlen(CT_FILE_VAR_PREFIX); dyStringAppend(newContents, oneSetting->string); customFactoryTestExistence(db, dataPt, &thisGotLiveCT, &thisGotExpiredCT); } if (thisGotLiveCT && pLiveCount != NULL) (*pLiveCount)++; if (thisGotExpiredCT && pExpiredCount != NULL) (*pExpiredCount)++; if (thisGotExpiredCT) { if (verboseLevel() >= 3) verbose(3, "Found expired custom track in %s %s: %s\n", userName, sessionName, dataPt); else verbose(2, "Found expired custom track: %s\n", dataPt); } if (thisGotLiveCT) verbose(4, "Found live custom track: %s\n", dataPt); } else dyStringAppend(newContents, oneSetting->string); namePt = nextNamePt; } if (newContents->stringSize != contentLength) { struct dyString *update = dyStringNew(contentLength*2); if (newContents->stringSize > contentLength) errAbort("Uh, why is newContents (%d) longer than original (%d)??", newContents->stringSize, contentLength); dyStringPrintf(update, "UPDATE %s set contents='", savedSessionTable); dyStringAppendN(update, newContents->string, newContents->stringSize); dyStringPrintf(update, "', lastUse=now(), useCount=useCount+1 " "where userName=\"%s\" and sessionName=\"%s\";", userName, sessionName); verbose(3, "Removing one or more dead CT file settings from %s %s " "(original length %d, now %d)\n", userName, sessionName, contentLength, newContents->stringSize); updateIfAny = dyStringCannibalize(&update); } dyStringFree(&oneSetting); dyStringFree(&newContents); freeMem(contentsToChop); return updateIfAny; }