boolean cleanTable(char *table) /* clean a specific table */ { struct sqlResult *sr; char **row; char query[256]; int *ids; int totalRows = 0; boolean squealed = FALSE; time_t cleanStart = time(NULL); verbose(1, "-------------------\n"); verbose(1, "Cleaning table %s\n", table); verbose(1, "%s\n", ctime(&cleanStart)); totalRows = sqlTableSize(conn, table); verbose(1,"totalRows=%d\n", totalRows); if (totalRows==0) { verbose(1,"table %s is empty!", table); return FALSE; } AllocArray(ids, totalRows); // This is a super-fast query because it only needs to read the index which is cached in memory. sqlSafef(query,sizeof(query), "select id from %s" , table); sr = sqlGetResult(conn, query); int i = 0; while ((row = sqlNextRow(sr)) != NULL) { ids[i++] = sqlUnsigned(row[0]); if (i >= totalRows) break; } sqlFreeResult(&sr); totalRows = i; // in case they differed. int purgeRangeStart = -1; int purgeRangeEnd = -1; if (optionExists("purgeStart")) // manual purge range specified { purgeStart = optionInt("purgeStart", -1); purgeEnd = optionInt("purgeEnd", -1); if (purgeStart < 1 || purgeStart > 720) errAbort("Invalid purgeStart"); if (purgeEnd < 0) purgeEnd = 0; if (purgeStart < purgeEnd) errAbort("purgeStart should be greater than purgeEnd (in days ago)"); purgeRangeStart = binaryIdSearch(ids, totalRows, table, purgeStart); purgeRangeEnd = binaryIdSearch(ids, totalRows, table, purgeEnd); verbose(1, "manual purge range: purgeStart %d purgeEnd %d rangeStart %d rangeEnd %d rangeSize=%d ids[rs]=%d\n", purgeStart, purgeEnd, purgeRangeStart, purgeRangeEnd, purgeRangeEnd-purgeRangeStart, ids[purgeRangeStart]); if (!optionExists("dryRun")) cleanTableSection(table, ids[purgeRangeStart], ids[purgeRangeEnd]); } else // figure out purge-ranges automatically { int firstUseAge = 0; if (sameString(table, sessionDbTableName)) firstUseAge = 14; if (sameString(table, userDbTableName)) firstUseAge = 365; int day = sqlQuickNum(conn, NOSQLINJ "select dayofweek(now())"); // These old records take a long time to go through, 5k sessionDb to 55k userDb old recs to look at, // and typically produce only a few hundred deletions. // they are growing slowly and expire rarely, so we don't need to scan them // frequently and aggressively. So ONLY scan them once per week by doing 1/7 per day. // Also don't need to worry much about the // borders of the split-over-7-days divisions shifting much because the set is so nearly static. YAWN. int firstUseIndex = binaryIdSearch(ids, totalRows, table, firstUseAge); int oldRangeSize = (firstUseIndex - 0) / 7; int oldRangeStart = oldRangeSize * (day-1); int oldRangeEnd = oldRangeStart + oldRangeSize; verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d day %d: rangeStart %d rangeEnd %d rangeSize=%d ids[oldRangeStart]=%d\n", firstUseAge, firstUseIndex, day, oldRangeStart, oldRangeEnd, oldRangeEnd-oldRangeStart, ids[oldRangeStart]); //int oldRangeStart = 0; //int oldRangeEnd = firstUseIndex; //verbose(1, "old cleaner: firstUseAge=%d firstUseIndex = %d rangeStart %d rangeEnd %d rangeSize=%d ids[firstUseIndex]=%d\n", //firstUseAge, firstUseIndex, oldRangeStart, oldRangeEnd, oldRangeEnd-oldRangeStart, ids[firstUseIndex]); // newly old can be expected to have some delete action // these records have newly crossed the threshold into being old enough to have possibly expired. int newOldRangeStart = firstUseIndex; int newOldRangeEnd = binaryIdSearch(ids, totalRows, table, firstUseAge - 1); verbose(1, "newOld cleaner: firstUseAge=%d rangeStart %d rangeEnd %d rangeSize=%d ids[newOldRangeStart]=%d\n", firstUseAge, newOldRangeStart, newOldRangeEnd, newOldRangeEnd-newOldRangeStart, ids[newOldRangeStart]); // this is the main delete action of cleaning out new robots (20k to 50k or more) int robo1RangeStart = binaryIdSearch(ids, totalRows, table, 2); int robo1RangeEnd = binaryIdSearch(ids, totalRows, table, 1); verbose(1, "robot cleaner1: twoDayIndex = %d oneDayIndex %d rangeSize=%d ids[rs]=%d\n", robo1RangeStart, robo1RangeEnd, robo1RangeEnd-robo1RangeStart, ids[robo1RangeStart]); int robo2RangeStart = -1; int robo2RangeEnd = -1; if (sameString(table, userDbTableName)) { // secondary robot cleaning only for userDb., produces a somewhat lesser, perhaps 3 to 5k deletions robo2RangeStart = binaryIdSearch(ids, totalRows, table, 7); robo2RangeEnd = binaryIdSearch(ids, totalRows, table, 6); verbose(1, "robot cleaner2: sevenDayIndex = %d sixDayIndex %d rangeSize=%d ids[rs]=%d\n", robo2RangeStart, robo2RangeEnd, robo2RangeEnd-robo2RangeStart, ids[robo2RangeStart]); } /* cannot clean until we have all the ranges determined since deleting messes up binSearch */ if (!optionExists("dryRun")) { verbose(1, "old cleaner:\n"); cleanTableSection(table, ids[oldRangeStart], ids[oldRangeEnd]); } if (!optionExists("dryRun")) { verbose(1, "newOld cleaner:\n"); cleanTableSection(table, ids[newOldRangeStart], ids[newOldRangeEnd]); } if (!optionExists("dryRun")) { verbose(1, "robot cleaner1:\n"); cleanTableSection(table, ids[robo1RangeStart], ids[robo1RangeEnd]); } if (sameString(table, userDbTableName)) { if (!optionExists("dryRun")) { verbose(1, "robot cleaner2:\n"); cleanTableSection(table, ids[robo2RangeStart], ids[robo2RangeEnd]); } } } /* int found = binaryIdSearch(ids, totalRows, table, 1); if ((found >= 0) && (found < totalRows)) verbose(1, "1 days ago found = %d, id == ids[found] = %d \n", found, ids[found]); found = binaryIdSearch(ids, totalRows, table, 2); if ((found >= 0) && (found < totalRows)) verbose(1, "2 days ago found = %d, id == ids[found] = %d \n", found, ids[found]); found = binaryIdSearch(ids, totalRows, table, 30); if ((found >= 0) && (found < totalRows)) verbose(1, "30 days ago found = %d, id == ids[found] = %d \n", found, ids[found]); */ /* if (daysAgoFirstUse < 14) { hitEnd = TRUE; break; } */ /* if (daysAgoFirstUse < 365) { hitEnd = TRUE; break; } */ // may need to pass back this data from the cleanTableSection call TODO //verbose(1, "%s: #rows count=%d delCount=%d\n\n", table, count, delCount); time_t cleanEnd = time(NULL); int minutes = difftime(cleanEnd, cleanStart) / 60; verbose(1, "%s\n", ctime(&cleanEnd)); verbose(1, "%d minutes total\n\n", minutes); squealed = checkMaxTableSizeExceeded(table); return squealed; }
void bioImageLoad(char *setRaFile, char *itemTabFile) /* bioImageLoad - Load data into bioImage database. */ { struct hash *raHash = raReadSingle(setRaFile); struct hash *rowHash; struct lineFile *lf = lineFileOpen(itemTabFile, TRUE); char *line, *words[256]; struct sqlConnection *conn = sqlConnect(database); int rowSize; int submissionSetId; struct hash *fullDirHash = newHash(0); struct hash *screenDirHash = newHash(0); struct hash *thumbDirHash = newHash(0); struct hash *treatmentHash = newHash(0); struct hash *bodyPartHash = newHash(0); struct hash *sliceTypeHash = newHash(0); struct hash *imageTypeHash = newHash(0); struct hash *sectionSetHash = newHash(0); struct dyString *dy = dyStringNew(0); /* Read first line of tab file, and from it get all the field names. */ if (!lineFileNext(lf, &line, NULL)) errAbort("%s appears to be empty", lf->fileName); if (line[0] != '#') errAbort("First line of %s needs to start with #, and then contain field names", lf->fileName); rowHash = hashRowOffsets(line+1); rowSize = rowHash->elCount; if (rowSize >= ArraySize(words)) errAbort("Too many fields in %s", lf->fileName); /* Check that have all required fields */ { char *fieldName; int i; for (i=0; i<ArraySize(requiredSetFields); ++i) { fieldName = requiredSetFields[i]; if (!hashLookup(raHash, fieldName)) errAbort("Field %s is not in %s", fieldName, setRaFile); } for (i=0; i<ArraySize(requiredItemFields); ++i) { fieldName = requiredItemFields[i]; if (!hashLookup(rowHash, fieldName)) errAbort("Field %s is not in %s", fieldName, itemTabFile); } for (i=0; i<ArraySize(requiredFields); ++i) { fieldName = requiredFields[i]; if (!hashLookup(rowHash, fieldName) && !hashLookup(raHash, fieldName)) errAbort("Field %s is not in %s or %s", fieldName, setRaFile, itemTabFile); } } /* Create/find submission record. */ submissionSetId = saveSubmissionSet(conn, raHash); /* Process rest of tab file. */ while (lineFileNextRowTab(lf, words, rowSize)) { int fullDir = cachedId(conn, "location", "name", fullDirHash, "fullDir", raHash, rowHash, words); int screenDir = cachedId(conn, "location", "name", screenDirHash, "screenDir", raHash, rowHash, words); int thumbDir = cachedId(conn, "location", "name", thumbDirHash, "thumbDir", raHash, rowHash, words); int bodyPart = cachedId(conn, "bodyPart", "name", bodyPartHash, "bodyPart", raHash, rowHash, words); int sliceType = cachedId(conn, "sliceType", "name", sliceTypeHash, "sliceType", raHash, rowHash, words); int imageType = cachedId(conn, "imageType", "name", imageTypeHash, "imageType", raHash, rowHash, words); int treatment = cachedId(conn, "treatment", "conditions", treatmentHash, "treatment", raHash, rowHash, words); char *fileName = getVal("fileName", raHash, rowHash, words, NULL); char *submitId = getVal("submitId", raHash, rowHash, words, NULL); char *taxon = getVal("taxon", raHash, rowHash, words, NULL); char *isEmbryo = getVal("isEmbryo", raHash, rowHash, words, NULL); char *age = getVal("age", raHash, rowHash, words, NULL); char *sectionSet = getVal("sectionSet", raHash, rowHash, words, ""); char *sectionIx = getVal("sectionIx", raHash, rowHash, words, "0"); char *gene = getVal("gene", raHash, rowHash, words, ""); char *locusLink = getVal("locusLink", raHash, rowHash, words, ""); char *refSeq = getVal("refSeq", raHash, rowHash, words, ""); char *genbank = getVal("genbank", raHash, rowHash, words, ""); char *priority = getVal("priority", raHash, rowHash, words, "200"); int sectionId = 0; int oldId; // char *xzy = getVal("xzy", raHash, rowHash, words, xzy); if (sectionSet[0] != 0 && !sameString(sectionSet, "0")) { struct hashEl *hel = hashLookup(sectionSetHash, sectionSet); if (hel != NULL) sectionId = ptToInt(hel->val); else { sqlUpdate(conn, "insert into sectionSet values(default)"); sectionId = sqlLastAutoId(conn); hashAdd(sectionSetHash, sectionSet, intToPt(sectionId)); } } dyStringClear(dy); dyStringAppend(dy, "select id from image "); dyStringPrintf(dy, "where fileName = '%s' ", fileName); dyStringPrintf(dy, "and fullLocation = %d", fullDir); oldId = sqlQuickNum(conn, dy->string); if (oldId != 0) { if (replace) { dyStringClear(dy); dyStringPrintf(dy, "delete from image where id = %d", oldId); sqlUpdate(conn, dy->string); } else errAbort("%s is already in database line %d of %s", fileName, lf->lineIx, lf->fileName); } dyStringClear(dy); dyStringAppend(dy, "insert into image set\n"); dyStringPrintf(dy, " id = default,\n"); dyStringPrintf(dy, " fileName = '%s',\n", fileName); dyStringPrintf(dy, " fullLocation = %d,\n", fullDir); dyStringPrintf(dy, " screenLocation = %d,\n", screenDir); dyStringPrintf(dy, " thumbLocation = %d,\n", thumbDir); dyStringPrintf(dy, " submissionSet = %d,\n", submissionSetId); dyStringPrintf(dy, " sectionSet = %d,\n", sectionId); dyStringPrintf(dy, " sectionIx = %s,\n", sectionIx); dyStringPrintf(dy, " submitId = '%s',\n", submitId); dyStringPrintf(dy, " gene = '%s',\n", gene); dyStringPrintf(dy, " locusLink = '%s',\n", locusLink); dyStringPrintf(dy, " refSeq = '%s',\n", refSeq); dyStringPrintf(dy, " genbank = '%s',\n", genbank); dyStringPrintf(dy, " priority = %s,\n", priority); dyStringPrintf(dy, " taxon = %s,\n", taxon); dyStringPrintf(dy, " isEmbryo = %s,\n", isEmbryo); dyStringPrintf(dy, " age = %s,\n", age); dyStringPrintf(dy, " bodyPart = %d,\n", bodyPart); dyStringPrintf(dy, " sliceType = %d,\n", sliceType); dyStringPrintf(dy, " imageType = %d,\n", imageType); dyStringPrintf(dy, " treatment = %d\n", treatment); sqlUpdate(conn, dy->string); } }
void tryToDeprecate(struct sqlConnection *conn) /* CGI variables are set - if possible deprecate, otherwise put up error message. */ { pushWarnHandler(localWarn); fileList = cgiString("fileList"); reason = cloneString(trimSpaces(cgiString("reason"))); if (isEmpty(reason)) { warn("Please enter a reason for deprecation."); getFileListAndReason(conn); } else { /* Go through list of accessions and make sure they are all well formed and correspond to files that exist. */ boolean ok = TRUE; struct slName *accList = slNameListOfUniqueWords(cloneString(fileList), FALSE); struct slName *acc; struct slInt *idList = NULL, *idEl; for (acc = accList; acc != NULL; acc = acc->next) { char *licensePlate = acc->name; if (!startsWith(edwLicensePlatePrefix, licensePlate)) { ok = FALSE; warn("%s is not an accession, doesn't start with %s", licensePlate, edwLicensePlatePrefix); break; } char query[256]; sqlSafef(query, sizeof(query), "select fileId from edwValidFile where licensePlate='%s'", licensePlate); int id = sqlQuickNum(conn, query); if (id == 0) { ok = FALSE; warn("%s - no such accession. ", licensePlate); break; } /* check to see is it ok tor deprecate this file */ if (!okToDeprecateThisFile(conn, id, userEmail)) { ok = FALSE; warn("You can not deprecate %s which was originally uploaded by %s.\n", licensePlate, edwFindOwnerNameFromFileId(conn, id)); warn("Please click the check box below to override this rule."); break; } idEl = slIntNew(id); slAddTail(&idList, idEl); } if (accList == NULL) { warn("Please enter some file accessions"); ok = FALSE; } /* If a problem then put up page to try again, otherwise do deprecation. */ if (!ok) getFileListAndReason(conn); else { deprecateFileList(conn, idList, reason); printf("Deprecated %d files<BR>\n", slCount(idList)); cgiMakeButton("submit", "Deprecate More Files"); printf(" "); edwPrintLogOutButton(); } } }
static void processMrnaFa(struct sqlConnection *conn, int taxon, char *type, char *db) /* process isPcr results */ { struct dyString *dy = dyStringNew(0); struct lineFile *lf = lineFileOpen("mrna.fa", TRUE); int lineSize; char *line; char *name; char *dna; boolean more = lineFileNext(lf, &line, &lineSize); while(more) { if (line[0] != '>') errAbort("unexpected error out of phase\n"); name = cloneString(line+1); verbose(2,"name=%s\n",name); dyStringClear(dy); while((more=lineFileNext(lf, &line, &lineSize))) { if (line[0] == '>') { break; } dyStringAppend(dy,line); } dna = cloneString(dy->string); while(1) { int oldProbe = 0; dyStringClear(dy); dyStringPrintf(dy, "select id from vgPrb " "where taxon=%d and type='%s' and tName='%s' and state='new'",taxon,type,name); oldProbe = sqlQuickNum(conn,dy->string); if (oldProbe==0) break; /* no more records match */ /* record exists and hasn't already been updated */ int vgPrb = findVgPrbBySeq(conn,dna,taxon); if (vgPrb == 0) { dyStringClear(dy); dyStringAppend(dy, "update vgPrb set"); dyStringAppend(dy, " seq = '"); dyStringAppend(dy, dna); dyStringAppend(dy, "',\n"); dyStringPrintf(dy, " db = '%s',\n", db); dyStringAppend(dy, " state = 'seq'\n"); dyStringPrintf(dy, " where id=%d\n", oldProbe); dyStringPrintf(dy, " and state='%s'\n", "new"); verbose(2, "%s\n", dy->string); sqlUpdate(conn, dy->string); } else /* probe seq already exists */ { /* just re-map the probe table recs to it */ dyStringClear(dy); dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,oldProbe); sqlUpdate(conn, dy->string); /* and delete it from vgPrb */ dyStringClear(dy); dyStringPrintf(dy, "delete from vgPrb where id=%d",oldProbe); sqlUpdate(conn, dy->string); } } freez(&name); freez(&dna); } lineFileClose(&lf); dyStringFree(&dy); }
void initStep(struct sqlConnection *conn, struct stepInit *init) /* Create step based on initializer */ { /* Do a little validation on while counting up inputs and outputs */ int inCount = commaSepCount(init->inputTypes); int matchCount = commaSepCount(init->inputFormats); if (inCount != matchCount) errAbort("inputTypes has %d elements but inputFormats has %d in step %s", inCount, matchCount, init->name); int outCount = commaSepCount(init->outputTypes); matchCount = commaSepCount(init->outputFormats); if (outCount != matchCount) errAbort("outputTypes has %d elements but outputFormats has %d in step %s", outCount, matchCount, init->name); matchCount = commaSepCount(init->outputNamesInTempDir); if (outCount != matchCount) errAbort("outputTypes has %d elements but outputNamesInTempDir has %d in step %s", outCount, matchCount, init->name); struct dyString *query = dyStringNew(0); dyStringPrintf(query, "select count(*) from eapStep where name='%s'", init->name); int existingCount = sqlQuickNum(conn, query->string); if (existingCount > 0) { warn("%s already exists in eapStep", init->name); dyStringFree(&query); return; } /* Parse out software part and make sure that all pieces are there. */ char **softwareArray; int softwareCount; sqlStringDynamicArray(init->software, &softwareArray, &softwareCount); unsigned softwareIds[softwareCount]; int i; for (i=0; i<softwareCount; ++i) { char *name = softwareArray[i]; dyStringClear(query); dyStringPrintf(query, "select id from eapSoftware where name='%s'", name); unsigned softwareId = sqlQuickNum(conn, query->string); if (softwareId == 0) errAbort("Software %s doesn't exist by that name in eapSoftware", name); softwareIds[i] = softwareId; } /* Make step record. */ dyStringClear(query); dyStringAppend(query, "insert eapStep (name,cpusRequested," " inCount,inputTypes,inputFormats," " outCount,outputNamesInTempDir,outputTypes,outputFormats)" " values ("); dyStringPrintf(query, "'%s',", init->name); dyStringPrintf(query, "%d,", init->cpusRequested); dyStringPrintf(query, "%d,", inCount); dyStringPrintf(query, "'%s',", init->inputTypes); dyStringPrintf(query, "'%s',", init->inputFormats); dyStringPrintf(query, "%d,", outCount); dyStringPrintf(query, "'%s',", init->outputNamesInTempDir); dyStringPrintf(query, "'%s',", init->outputTypes); dyStringPrintf(query, "'%s'", init->outputFormats); dyStringPrintf(query, ")"); sqlUpdate(conn, query->string); /* Make software/step associations. */ for (i=0; i<softwareCount; ++i) { dyStringClear(query); dyStringPrintf(query, "insert eapStepSoftware (step,software) values ('%s','%s')", init->name, softwareArray[i]); sqlUpdate(conn, query->string); } /* Force step version stuff to be made right away */ eapCurrentStepVersion(conn, init->name); /* Clean up. */ dyStringFree(&query); freez(&softwareArray[0]); freez(&softwareArray); }
void txGeneAlias(char *genomeDb, char *uniProtDb, char *xrefFile, char *evFile, char *oldToNew, char *aliasFile, char *protAliasFile) /* txGeneAlias - Make kgAlias and kgProtAlias tables.. */ { /* Read and hash oldToNew */ struct hash *newToOldHash = loadNewToOldHash(oldToNew); /* Load evidence into hash */ struct hash *evHash = newHash(18); struct txRnaAccs *ev, *evList = txRnaAccsLoadAll(evFile); for (ev = evList; ev != NULL; ev = ev->next) hashAdd(evHash, ev->name, ev); /* Open connections to our databases */ struct sqlConnection *gConn = sqlConnect(genomeDb); struct sqlConnection *uConn = sqlConnect(uniProtDb); struct sqlResult *sr; char **row; char query[256]; /* Open files. */ struct lineFile *lf = lineFileOpen(xrefFile, TRUE); FILE *fAlias = mustOpen(aliasFile, "w"); FILE *fProt = mustOpen(protAliasFile, "w"); /* Stream through xref file, which has much of the info we need, * and which contains a line for each gene. */ char *words[KGXREF_NUM_COLS]; while (lineFileRowTab(lf, words)) { /* Load the xref, and output most of it's fields as aliases. */ struct kgXref *x = kgXrefLoad(words); char *id = x->kgID; outAlias(fAlias, id, x->kgID); outAlias(fAlias, id, x->mRNA); outAlias(fAlias, id, x->spID); outAlias(fAlias, id, x->spDisplayID); outAlias(fAlias, id, x->geneSymbol); outAlias(fAlias, id, x->refseq); outAlias(fAlias, id, x->protAcc); char *old = hashFindVal(newToOldHash, id); if (old != NULL) outAlias(fAlias, id, old); /* If we've got a uniProt ID, use that to get more info from uniProt. */ char *acc = x->spID; if ((acc[0] != 0) && (acc = spLookupPrimaryAccMaybe(uConn, acc)) != NULL) { /* Get current accession and output a bunch of easy protein aliases. */ outProt(fProt, id, acc, acc); outProt(fProt, id, acc, x->spDisplayID); outProt(fProt, id, acc, x->geneSymbol); outProt(fProt, id, acc, x->protAcc); if (old != NULL) outProt(fProt, id, acc, old); /* Throw in old swissProt accessions. */ sqlSafef(query, sizeof(query), "select val from otherAcc where acc = '%s'", acc); sr = sqlGetResult(uConn, query); while ((row = sqlNextRow(sr)) != NULL) { outAlias(fAlias, id, row[0]); outProt(fProt, id, acc, row[0]); } /* Throw in gene names that SwissProt knows about */ struct slName *gene, *geneList = spGenes(uConn, acc); for (gene = geneList; gene != NULL; gene = gene->next) { outAlias(fAlias, id, gene->name); outProt(fProt, id, acc, gene->name); } slFreeList(&geneList); } /* Throw in gene names from genbank. */ /* At some point we may want to restrict this to the primary transcript in a cluster. */ ev = hashFindVal(evHash, id); if (ev != NULL) { int i; for (i=0; i<ev->accCount; ++i) { sqlSafef(query, sizeof(query), "select geneName from gbCdnaInfo where acc='%s'", acc); int nameId = sqlQuickNum(gConn, query); if (nameId != 0) { char name[64]; sqlSafef(query, sizeof(query), "select name from geneName where id=%d", nameId); if (sqlQuickQuery(gConn, query, name, sizeof(name))) outAlias(fAlias, id, name); } } } kgXrefFree(&x); } carefulClose(&fAlias); carefulClose(&fProt); }
void cartSimNoInsert(char *host, char *user, char *password, char *database, char *milliDelayString, char *iterationString) /* cartSimNoInsert - simulates N users accessing cart at regular intervals * where cart data is read and then written back unchanged */ { int milliDelay = sqlUnsigned(milliDelayString); int iterations = sqlUnsigned(iterationString); /* Figure out size of tables. */ struct sqlConnection *conn = sqlConnectRemote(host, user, password, database); int userDbSize = sqlQuickNum(conn, "NOSQLINJ select count(*) from userDb"); int sessionDbSize = sqlQuickNum(conn, "NOSQLINJ select count(*) from sessionDb"); int sampleSize = min(userDbSize, sessionDbSize); int maxSampleSize = 8*1024; sampleSize = min(sampleSize, maxSampleSize); verbose(2, "# userDb has %d rows, sessionDb has %d rows, sampling %d\n" , userDbSize, sessionDbSize, sampleSize); /* Get sample of user id's. */ int *userIds = getSomeInts(conn, "userDb", "id", sampleSize); int *sessionIds = getSomeInts(conn, "sessionDb", "id", sampleSize); /* Get userCount random indexes. */ int *randomIxArray, ix; AllocArray(randomIxArray, userCount); verbose(2, "random user ix:\n"); for (ix=0; ix<userCount; ++ix) { randomIxArray[ix] = rand() % sampleSize; verbose(2, "%d ", randomIxArray[ix]); } verbose(2, "\n"); sqlDisconnect(&conn); int iteration = 0; int querySize = 1024*1024*16; char *query = needLargeMem(querySize); for (;;) { for (ix = 0; ix < userCount; ++ix) { int randomIx = randomIxArray[ix]; long startTime = clock1000(); struct sqlConnection *conn = sqlConnectRemote(host, user, password, database); long connectTime = clock1000(); sqlSafef(query, querySize, "select contents from userDb where id=%d", userIds[randomIx]); char *userContents = sqlQuickString(conn, query); long userReadTime = clock1000(); sqlSafef(query, querySize, "select contents from sessionDb where id=%d", sessionIds[randomIx]); char *sessionContents = sqlQuickString(conn, query); long sessionReadTime = clock1000(); sqlSafef(query, querySize, "update userDb set contents='%s' where id=%d", userContents, userIds[randomIx]); if (!readOnly) sqlUpdate(conn, query); long userWriteTime = clock1000(); sqlSafef(query, querySize, "update sessionDb set contents='%s' where id=%d", sessionContents, sessionIds[randomIx]); if (!readOnly) sqlUpdate(conn, query); long sessionWriteTime = clock1000(); sqlDisconnect(&conn); long disconnectTime = clock1000(); printf("%ld total, %ld size, %ld connect, %ld userRead, %ld sessionRead, %ld userWrite, %ld sessionWrite\n", disconnectTime - startTime, (long) strlen(userContents) + strlen(sessionContents), connectTime - startTime, userReadTime - connectTime, sessionReadTime - userReadTime, userWriteTime - sessionReadTime, sessionWriteTime - userReadTime); freez(&userContents); freez(&sessionContents); sleep1000(milliDelay); if (++iteration >= iterations) return; } } }
int edwFileFetch(struct sqlConnection *conn, struct edwFile *ef, int fd, char *submitFileName, unsigned submitId, unsigned submitDirId, unsigned hostId) /* Fetch file and if successful update a bunch of the fields in ef with the result. * Returns fileId. */ { ef->id = makeNewEmptyFileRecord(conn, submitId, submitDirId, ef->submitFileName, ef->size); /* Update edwSubmit with file in transit info */ char query[256]; sqlSafef(query, sizeof(query), "update edwSubmit set fileIdInTransit=%lld where id=%u", (long long)ef->id, submitId); sqlUpdate(conn, query); sqlSafef(query, sizeof(query), "select paraFetchStreams from edwHost where id=%u", hostId); int paraFetchStreams = sqlQuickNum(conn, query); struct paraFetchInterruptContext interruptContext = {.conn=conn, .submitId=submitId}; /* Wrap getting the file, the actual data transfer, with an error catcher that * will remove partly uploaded files. Perhaps some day we'll attempt to rescue * ones that are just truncated by downloading the rest, but not now. */ struct errCatch *errCatch = errCatchNew(); char tempName[PATH_LEN] = ""; char edwFile[PATH_LEN] = "", edwPath[PATH_LEN]; if (errCatchStart(errCatch)) { /* Now make temp file name and open temp file in an atomic operation */ char *tempDir = edwTempDir(); safef(tempName, PATH_LEN, "%sedwSubmitXXXXXX", tempDir); int localFd = mustMkstemp(tempName); /* Update file name in database with temp file name so web app can track us. */ char query[PATH_LEN+128]; sqlSafef(query, sizeof(query), "update edwFile set edwFileName='%s' where id=%lld", tempName + strlen(edwRootDir), (long long)ef->id); sqlUpdate(conn, query); /* Do actual upload tracking how long it takes. */ ef->startUploadTime = edwNow(); mustCloseFd(&localFd); if (!parallelFetchInterruptable(submitFileName, tempName, paraFetchStreams, 4, FALSE, FALSE, paraFetchInterruptFunction, &interruptContext)) { if (interruptContext.isInterrupted) errAbort("Submission stopped by user."); else errAbort("parallel fetch of %s failed", submitFileName); } ef->endUploadTime = edwNow(); /* Rename file both in file system and (via ef) database. */ edwMakeFileNameAndPath(ef->id, submitFileName, edwFile, edwPath); mustRename(tempName, edwPath); if (endsWith(edwPath, ".gz") && !encode3IsGzipped(edwPath)) errAbort("%s has .gz suffix, but is not gzipped", submitFileName); ef->edwFileName = cloneString(edwFile); } errCatchEnd(errCatch); if (errCatch->gotError) { /* Attempt to remove any partial file. */ if (tempName[0] != 0) { verbose(1, "Removing partial %s\n", tempName); parallelFetchRemovePartial(tempName); remove(tempName); } handleSubmitError(conn, submitId, errCatch->message->string); // Throws further assert(FALSE); // We never get here } errCatchFree(&errCatch); /* Now we got the file. We'll go ahead and save the file name and stuff. */ sqlSafef(query, sizeof(query), "update edwFile set" " edwFileName='%s', startUploadTime=%lld, endUploadTime=%lld" " where id = %d" , ef->edwFileName, ef->startUploadTime, ef->endUploadTime, ef->id); sqlUpdate(conn, query); /* Wrap the validations in an error catcher that will save error to file table in database */ errCatch = errCatchNew(); boolean success = FALSE; if (errCatchStart(errCatch)) { /* Check MD5 sum here. */ unsigned char md5bin[16]; md5ForFile(edwPath, md5bin); char md5[33]; hexBinaryString(md5bin, sizeof(md5bin), md5, sizeof(md5)); if (!sameWord(md5, ef->md5)) errAbort("%s has md5 mismatch: %s != %s. File may be corrupted in upload, or file may have " "been changed since validateManifest was run. Please check that md5 of file " "before upload is really %s. If it is then try submitting again, otherwise " "rerun validateManifest and then try submitting again. \n", ef->submitFileName, ef->md5, md5, ef->md5); /* Finish updating a bunch more of edwFile record. Note there is a requirement in * the validFile section that ef->updateTime be updated last. A nonzero ef->updateTime * is used as a sign of record complete. */ struct dyString *dy = dyStringNew(0); /* Includes tag so query may be long */ sqlDyStringPrintf(dy, "update edwFile set md5='%s',size=%lld,updateTime=%lld", md5, ef->size, ef->updateTime); dyStringAppend(dy, ", tags='"); dyStringAppend(dy, ef->tags); dyStringPrintf(dy, "' where id=%d", ef->id); sqlUpdate(conn, dy->string); dyStringFree(&dy); /* Update edwSubmit so file no longer shown as in transit */ sqlSafef(query, sizeof(query), "update edwSubmit set fileIdInTransit=0 where id=%u", submitId); sqlUpdate(conn, query); success = TRUE; } errCatchEnd(errCatch); if (errCatch->gotError) { handleFileError(conn, submitId, ef->id, errCatch->message->string); } return ef->id; }
int checkTableCoords(char *db) /* Check several invariants (see comments in check*() above), * summarize errors, return nonzero if there are errors. */ { struct sqlConnection *conn = hAllocConn(db); struct slName *tableList = NULL, *curTable = NULL; struct slName *allChroms = NULL; boolean gotError = FALSE; allChroms = hAllChromNames(db); if (theTable == NULL) tableList = getTableNames(conn); else if (sqlTableExists(conn, theTable)) tableList = newSlName(theTable); else errAbort("Error: specified table \"%s\" does not exist in database %s.", theTable, db); for (curTable = tableList; curTable != NULL; curTable = curTable->next) { struct hTableInfo *hti = NULL; struct slName *chromList = NULL, *chromPtr = NULL; char *table = curTable->name; char tableChrom[32], trackName[128], tableChromPrefix[33]; hParseTableName(db, table, trackName, tableChrom); hti = hFindTableInfo(db, tableChrom, trackName); if (hti != NULL && hti->isPos) { /* watch out for presence of both split and non-split tables; * hti for non-split will be replaced with hti of split. */ if (splitAndNonSplitExist(conn, table, tableChrom)) continue; safef(tableChromPrefix, sizeof(tableChromPrefix), "%s_", tableChrom); if (hti->isSplit) chromList = newSlName(tableChrom); else chromList = allChroms; /* invariant: chrom must be described in chromInfo. */ /* items with bad chrom will be invisible to hGetBedRange(), so * catch them here by SQL query. */ /* The SQL query is too huge for scaffold-based db's, check count: */ if (hChromCount(db) <= MAX_SEQS_SUPPORTED) { if (isNotEmpty(hti->chromField)) { struct dyString *bigQuery = newDyString(1024); dyStringClear(bigQuery); sqlDyStringPrintf(bigQuery, "select count(*) from %s where ", table); for (chromPtr=chromList; chromPtr != NULL; chromPtr=chromPtr->next) { sqlDyStringPrintf(bigQuery, "%s != '%s' ", hti->chromField, chromPtr->name); if (chromPtr->next != NULL) dyStringAppend(bigQuery, "AND "); } gotError |= reportErrors(BAD_CHROM, table, sqlQuickNum(conn, bigQuery->string)); dyStringFree(&bigQuery); } for (chromPtr=chromList; chromPtr != NULL; chromPtr=chromPtr->next) { char *chrom = chromPtr->name; struct bed *bedList = hGetBedRange(db, table, chrom, 0, 0, NULL); if (hti->isSplit && isNotEmpty(hti->chromField)) gotError |= checkSplitTableOnlyChrom(bedList, table, hti, tableChrom); gotError |= checkStartEnd(bedList, table, hti, testChromSize(chrom)); if (hti->hasCDS) gotError |= checkCDSStartEnd(bedList, table, hti); if (hti->hasBlocks && !ignoreBlocks) gotError |= checkBlocks(bedList, table, hti); bedFreeList(&bedList); } } } } return gotError; }
char *getKnownGeneUrl(struct sqlConnection *conn, int geneId) /* Given gene ID, try and find known gene on browser in same * species. */ { char query[256]; int taxon; char *url = NULL; char *genomeDb = NULL; /* Figure out taxon. */ safef(query, sizeof(query), "select taxon from gene where id = %d", geneId); taxon = sqlQuickNum(conn, query); genomeDb = hDbForTaxon(conn, taxon); if (genomeDb != NULL) { /* Make sure known genes track exists - we may need * to tweak this at some point for model organisms. */ safef(query, sizeof(query), "%s.knownToVisiGene", genomeDb); if (!sqlTableExists(conn, query)) genomeDb = NULL; } /* If no db for that organism revert to human. */ if (genomeDb == NULL) genomeDb = hDefaultDb(); safef(query, sizeof(query), "%s.knownToVisiGene", genomeDb); if (sqlTableExists(conn, query)) { struct dyString *dy = dyStringNew(0); char *knownGene = NULL; if (sqlCountColumnsInTable(conn, query) == 3) { dyStringPrintf(dy, "select name from %s.knownToVisiGene where geneId = %d", genomeDb, geneId); } else { struct slName *imageList, *image; safef(query, sizeof(query), "select imageProbe.image from probe,imageProbe " "where probe.gene=%d and imageProbe.probe=probe.id", geneId); imageList = sqlQuickList(conn, query); if (imageList != NULL) { dyStringPrintf(dy, "select name from %s.knownToVisiGene ", genomeDb); dyStringAppend(dy, "where value in("); for (image = imageList; image != NULL; image = image->next) { dyStringPrintf(dy, "'%s'", image->name); if (image->next != NULL) dyStringAppendC(dy, ','); } dyStringAppend(dy, ")"); slFreeList(&imageList); } } if (dy->stringSize > 0) { knownGene = sqlQuickString(conn, dy->string); if (knownGene != NULL) { dyStringClear(dy); dyStringPrintf(dy, "../cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_chrom=none", genomeDb, knownGene); url = dyStringCannibalize(&dy); } } dyStringFree(&dy); } freez(&genomeDb); return url; }
char *visiGeneHypertextGenotype(struct sqlConnection *conn, int id) /* Return genotype of organism if any in nifty hypertext format. */ { int genotypeId; struct slName *geneIdList, *geneId; char query[256]; struct dyString *html; /* Look up genotype ID. */ safef(query, sizeof(query), "select specimen.genotype from image,specimen " "where image.id=%d and image.specimen = specimen.id", id); genotypeId = sqlQuickNum(conn, query); if (genotypeId == 0) return NULL; /* Get list of genes involved. */ safef(query, sizeof(query), "select distinct allele.gene from genotypeAllele,allele " "where genotypeAllele.genotype=%d " "and genotypeAllele.allele = allele.id" , genotypeId); geneIdList = sqlQuickList(conn, query); if (geneIdList == NULL) return cloneString("wild type"); /* Loop through each gene adding information to html. */ html = dyStringNew(0); for (geneId = geneIdList; geneId != NULL; geneId = geneId->next) { char *geneName; struct slName *alleleList, *allele; int alleleCount; boolean needsSlash = FALSE; /* Get gene name. */ safef(query, sizeof(query), "select name from gene where id=%s", geneId->name); geneName = sqlQuickString(conn, query); if (geneName == NULL) internalErr(); /* Process each allele of gene. */ safef(query, sizeof(query), "select allele.name from genotypeAllele,allele " "where genotypeAllele.genotype=%d " "and genotypeAllele.allele = allele.id " "and allele.gene=%s" , genotypeId, geneId->name); alleleList = sqlQuickList(conn, query); alleleCount = slCount(alleleList); for (allele = alleleList; allele != NULL; allele = allele->next) { char *simplifiedAllele = getSimplifiedAllele(geneName, allele->name); int repCount = 1, rep; if (alleleCount == 1) repCount = 2; for (rep = 0; rep < repCount; ++rep) { if (needsSlash) dyStringAppendC(html, '/'); else needsSlash = TRUE; dyStringAppend(html, geneName); dyStringPrintf(html, "<SUP>%s</SUP>", simplifiedAllele); } freeMem(simplifiedAllele); } if (geneId->next != NULL) dyStringAppendC(html, ' '); slFreeList(&alleleList); freeMem(geneName); } slFreeList(&geneIdList); return dyStringCannibalize(&html); }
static struct slName *getProbeList(struct sqlConnection *conn, int id) /* Get list of probes with hyperlinks to probe info page. */ { struct slName *returnList = NULL; char query[256]; char *sidUrl = cartSidUrlString(cart); struct dyString *dy = dyStringNew(0); struct slInt *probeList = NULL, *probe; int submissionSource = 0; /* Make up a list of all probes in this image. */ safef(query, sizeof(query), "select probe from imageProbe where image=%d", id); probeList = sqlQuickNumList(conn, query); safef(query, sizeof(query), "select submissionSet.submissionSource from image, submissionSet" " where image.submissionSet = submissionSet.id and image.id=%d", id); submissionSource = sqlQuickNum(conn, query); for (probe = probeList; probe != NULL; probe = probe->next) { char *type; /* Create hyperlink to probe page around gene name. */ dyStringClear(dy); dyStringPrintf(dy, "<A HREF=\"%s?%s&%s=%d&%s=%d\" target=_parent>", hgVisiGeneCgiName(), sidUrl, hgpDoProbe, probe->val, hgpSs, submissionSource); safef(query, sizeof(query), "select probeType.name from probeType,probe where probe.id = %d " "and probe.probeType = probeType.id", probe->val); type = sqlQuickString(conn, query); dyStringPrintf(dy, "%s", naForEmpty(type)); if (sameWord(type, "antibody")) { char *abName; safef(query, sizeof(query), "select antibody.name from probe,antibody " "where probe.id = %d and probe.antibody = antibody.id" , probe->val); abName = sqlQuickString(conn, query); if (abName != NULL) { dyStringPrintf(dy, " %s", abName); freeMem(abName); } } else if (sameWord(type, "RNA")) { safef(query, sizeof(query), "select length(seq) from probe where id=%d", probe->val); if (sqlQuickNum(conn, query) > 0) dyStringPrintf(dy, " sequenced"); else { safef(query, sizeof(query), "select length(fPrimer) from probe where id=%d", probe->val); if (sqlQuickNum(conn, query) > 0) dyStringPrintf(dy, " from primers"); } } else if (sameWord(type, "BAC")) { char *name; safef(query, sizeof(query), "select bac.name from probe,bac " "where probe.id = %d and probe.bac = bac.id" , probe->val); name = sqlQuickString(conn, query); if (name != NULL) { dyStringPrintf(dy, " %s", name); freeMem(name); } } dyStringPrintf(dy, "</A>"); freez(&type); /* Add to return list. */ slNameAddTail(&returnList, dy->string); } slFreeList(&probeList); slReverse(&returnList); return returnList; }
static struct slName *geneProbeList(struct sqlConnection *conn, int id) /* Get list of gene names with hyperlinks to probe info page. */ { struct slName *returnList = NULL; char query[256], **row; struct sqlResult *sr; struct dyString *dy = dyStringNew(0); struct probeAndColor *pcList = NULL, *pc; int probeCount = 0; /* Make up a list of all probes in this image. */ safef(query, sizeof(query), "select probe,probeColor from imageProbe where image=%d", id); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { AllocVar(pc); pc->probe = sqlUnsigned(row[0]); pc->probeColor = sqlUnsigned(row[1]); slAddHead(&pcList, pc); ++probeCount; } slReverse(&pcList); for (pc = pcList; pc != NULL; pc = pc->next) { int geneId; char *geneName; int probe = pc->probe; char *geneUrl = NULL; /* Get gene ID and name. */ safef(query, sizeof(query), "select gene from probe where id = %d", probe); geneId = sqlQuickNum(conn, query); geneName = vgGeneNameFromId(conn, geneId); /* Get url for known genes page if any. */ geneUrl = getKnownGeneUrl(conn, geneId); /* Print gene name, surrounded by hyperlink to known genes * page if possible. */ dyStringClear(dy); if (geneUrl != NULL) dyStringPrintf(dy, "<A HREF=\"%s\" target=_parent>", geneUrl); dyStringPrintf(dy, "%s", geneName); if (geneUrl != NULL) dyStringAppend(dy, "</A>"); freez(&geneName); /* Add color if there's more than one probe for this image. */ if (probeCount > 1) { char *color; safef(query, sizeof(query), "select probeColor.name from probeColor " "where probeColor.id = %d" , pc->probeColor); color = sqlQuickString(conn, query); if (color != NULL) dyStringPrintf(dy, " (%s)", color); freez(&color); } /* Add to return list. */ slNameAddTail(&returnList, dy->string); } slFreeList(&pcList); slReverse(&returnList); return returnList; }
static void displayMappingInfo(struct sqlConnection *conn, struct mappingInfo *mi) /* display information from a transMap table */ { struct ucscRetroInfo *pg = mi->pg; double wt[12]; /* weights on score function*/ char query[512]; char *name; char alignTbl[128]; char scoreSql[128]; struct psl *psl; float coverFactor = 0; float maxOverlap = 0; if (mi->suffix == NULL) { safef(alignTbl, sizeof(alignTbl), "%s%sAli", mi->tblPre, mi->geneSet); sqlSafef(scoreSql, sizeof(scoreSql), "select max(score) from %s%sInfo", mi->tblPre, mi->geneSet); } else { safef(alignTbl, sizeof(alignTbl), "%s%sAli%s", mi->tblPre, mi->geneSet, mi->suffix); sqlSafef(scoreSql, sizeof(scoreSql), "select max(score) from %s%sInfo%s", mi->tblPre, mi->geneSet, mi->suffix); } printf("<TABLE class=\"transMap\">\n"); printf("<H3>Retrogene Statistics:</H3>\n"); printf("<THEAD>\n"); printf("<TR><TH>Feature<TH>Value </TR>\n"); printf("</THEAD><TBODY>\n"); if (sameString(pg->type, "singleExon")) printf("<TR><TH>Type of Parent<TD>%s</tr>\n",pg->type); else printf("<TR><TH>Expression of Retrogene<TD>%s</TR>\n",pg->type); printf("<TR><TH>Score <TD>%d (range from 0 - %d)</TR>\n", pg->score, sqlQuickNum(conn, scoreSql) ); printf("<TR><TH>Parent Gene Alignment Coverage (Bases Matching Parent) <TD>%d %% (%d bp) </TR>\n", pg->coverage, pg->matches); printf("<TR><TH>Introns Processed Out <TD>%d out of %d (%d exons covered)\n", pg->processedIntrons, (pg->parentSpliceCount/2), pg->exonCover); printf("<TR><TH>Possible Introns or Gaps in Retrogene<TD>%d,%d\n", pg->intronCount, pg->alignGapCount); printf("<TR><TH>Conserved Splice Sites<TD>%d</TR>\n", pg->conservedSpliceSites); printf("<TR><TH>Parent Splice Sites<TD>%d</TR>\n", pg->parentSpliceCount); psl = getAlignments(conn, alignTbl, mi->pg->name); if (psl != NULL) { maxOverlap = (float)pg->maxOverlap/(float)(psl->match+psl->misMatch+psl->repMatch) ; coverFactor = ((float)(psl->qSize-psl->qEnd)/(float)psl->qSize); } else { maxOverlap = 0; } wt[0] = 0; wt[1] = 0.85; wt[2] = 0.2; wt[3] = 0.3; wt[4] = 0.8; wt[5] = 1; wt[6] = 1 ; wt[7] = 0.5; wt[8] = 0.5; wt[9] = 1; wt[10] = 1; #ifdef debug char table[512]; struct psl *pslList = getParentAligns(conn, mi, &table); if (psl != NULL) { printf("<TR><TH>Blocks in retro:gap%%/intronsSpliced <TD>\n"); printBlocks(psl, MAXBLOCKGAP, pslList); printf("</td></TR>\n"); } if (pslList != NULL) { printf("<TR><TH>Exons in parent:gap%% <TD>\n"); printBlocks(pslList, MAXBLOCKGAP, NULL); printf("</td></TR>\n"); pslFreeList(&pslList); } #endif printf("<TR><TH>Length of PolyA Tail<TD>%d As out of %d bp </TR><TR><TH>%% A's from Parent PolyA tail (Position)<TD>%5.1f %%\n",pg->polyA,pg->polyAlen, (float)pg->polyA*100/(float)pg->polyAlen); if (pg->polyAstart < 0) printf(" (%d bp before end of retrogene)<br>\n",-(pg->polyAstart)); else printf(" (%d bp past end of retrogene)<br>\n",pg->polyAstart); printf("<tr><th>mRNA Expression Evidence<td>"); if (!sameString(pg->overName, "none")) printf("%s (overlap: %d bp)\n", pg->overName, pg->maxOverlap); else printf("No overlapping"); printf("<TR><TH>BESTORF Score (>50 is good)<TD>%4.0f</td></TR>\n",pg->posConf); #ifdef score printf("<TR><TH>score function<TD>1:xon %d %4.1f conSS %d 2: ax %4.1f 3: pA %4.1f 4: net + %4.1f max (%d, %d) 5: procIntrons %d %4.1f 6:in.cnt %d -%4.1f 7:overlap - %4.1f 8:cov %d*(qe %d- qsz %d)/%d=%4.1f 9:tRep - %4.1f 10:oldintron %d %4.1f </td></TR>\n", pg->exonCover, wt[1]*(log(pg->exonCover+1)/log(2))*200 , pg->conservedSpliceSites, wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000), wt[3]*(log(pg->polyAlen+2)*200) , wt[4]*overlapOrtholog*10 , pg->overlapMouse, pg->overlapDog, pg->processedIntrons, wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) , pg->intronCount, wt[6]*pow(pg->intronCount,0.5)*750 , wt[7]*(maxOverlap*300), pg->coverage, pg->qEnd, pg->qSize , pg->qSize, wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0), wt[9]*(pg->tReps*10), pg->alignGapCount, wt[10]*pg->alignGapCount); printf("<TR><TH>score function<TD>%4.1f+ %4.1f+ %4.1f+ %4.1f+ %4.1f - %4.1f - %4.1f+ %4.1f - %4.1f - %4.1f</td></TR>\n", wt[1]*(log(pg->exonCover+1)/log(2))*200 , wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000), wt[3]*(log(pg->polyAlen+2)*200) , wt[4]*overlapOrtholog*10 , wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) , (float)wt[6]*pow(pg->intronCount,0.5)*750 , (float)wt[7]*(maxOverlap*300), wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0), wt[9]*(pg->tReps*10), wt[10]*pg->alignGapCount); if (pg->kaku > 0 && pg->kaku < 1000000) printf("<TR><TH>KA/KU mutation rate in non-syn sites vs utr with repect to parent gene<TD>%4.2f</TR>\n", pg->kaku); #endif #ifdef xxx sqlSafef(query, sizeof(query), "select * from refGene where chrom = '%d' and txEnd > %d and txStart %d and name = '%s'", pg->chrom, pg->gStart, pg->gEnd , pg->overName ); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) overlappingGene = genePredLoad(row); if (overlappingGene != NULL) { printf ("CDS exons %d ",genePredcountCdsExons(overlappingGene)); } #endif printf("</tr>\n"); if ( differentString("none",pg->overName) && sqlFieldIndex(conn, "refGene", "exonFrames") != -1) { sqlSafef(query, sizeof(query), "select concat(exonFrames,'(',cdsStart,')') from refGene where name = '%s' and chrom = '%s'" , pg->overName, pg->chrom); if (sqlQuickString(conn, query) != NULL) printf("<TR><TH>Frame of retro %s (start)<TD>%s</TR>\n", pg->overName, sqlQuickString(conn, query)); } name = cloneString(pg->name); chopSuffix(name); sqlSafef(query, sizeof(query), "select concat(exonFrames,'(',cdsStart,')') from rbRetroParent where name like '%s%%' and chrom = '%s'" , name, pg->chrom); if (hTableExists(database, "rbRetroParent")) { if ( sqlQuickString(conn, query) != NULL) printf("<TR><TH>Frames of mapped parent %s (start)<TD>%s</TR>\n", name, sqlQuickString(conn, query)); } printf("</TBODY></TABLE>\n"); }
void edwScriptSubmitStatus() /* edwScriptSubmitStatus - Programatically check status of submission.. */ { /* Pause a second - prevent inadvertent harsh denial of service from scripts. */ sleep(2); edwScriptRegistryFromCgi(); /* Get submission from url. */ struct sqlConnection *conn = edwConnect(); char query[512]; char *url = cgiString("url"); struct edwSubmit *sub = edwMostRecentSubmission(conn, url); char *status = NULL; if (sub == NULL) { int posInQueue = edwSubmitPositionInQueue(conn, url, NULL); if (posInQueue == -1) errAbort("%s has not been submitted", url); else status = "pending"; } else { time_t endUploadTime = sub->endUploadTime; if (!isEmpty(sub->errorMessage)) { status = "error"; } else if (endUploadTime == 0) { status = "uploading"; } else { safef(query, sizeof(query), "select count(*) from edwFile where submitId=%u and errorMessage != ''", sub->id); int errCount = sqlQuickNum(conn, query); int newValid = edwSubmitCountNewValid(sub, conn); if (newValid + errCount < sub->newFiles) status = "validating"; else if (errCount > 0) status = "error"; else status = "success"; } } /* Construct JSON result */ struct dyString *dy = dyStringNew(0); dyStringPrintf(dy, "{\n"); dyStringPrintf(dy, " \"status\": \"%s\"", status); if (sameString(status, "error")) { dyStringPrintf(dy, ",\n"); dyStringPrintf(dy, " \"errors\": [\n"); int errCount = 0; if (!isEmpty(sub->errorMessage)) { addErrFile(dy, errCount, sub->url, sub->errorMessage); ++errCount; } safef(query, sizeof(query), "select * from edwFile where submitId=%u and errorMessage != ''", sub->id); struct edwFile *file, *fileList = edwFileLoadByQuery(conn, query); for (file = fileList; file != NULL; file = file->next) { addErrFile(dy, errCount, file->submitFileName, file->errorMessage); ++errCount; } dyStringPrintf(dy, "\n ]\n"); dyStringPrintf(dy, "}\n"); } else { dyStringPrintf(dy, "\n}\n"); } /* Write out HTTP response */ printf("Content-Length: %d\r\n", dy->stringSize); puts("Content-Type: application/json; charset=UTF-8\r"); puts("\r"); printf("%s", dy->string); }
static void processIsPcr(struct sqlConnection *conn, int taxon, char *db) /* process isPcr results */ { /* >NM_010919:371+1088 2 718bp CGCGGATCCAAGGACATCTTGGACCTTCCG CCCAAGCTTGCATGTGCTGCAGCGACTGCG */ struct dyString *dy = dyStringNew(0); struct lineFile *lf = lineFileOpen("isPcr.fa", TRUE); int lineSize; char *line; char *name; char *dna; char *word, *end; char *tName; int tStart; int tEnd; char *tStrand; int probeid=0; /* really a vgPrb id */ boolean more = lineFileNext(lf, &line, &lineSize); while(more) { if (line[0] != '>') errAbort("unexpected error out of phase\n"); name = cloneString(line); verbose(1,"name=%s\n",name); dyStringClear(dy); while((more=lineFileNext(lf, &line, &lineSize))) { if (line[0] == '>') { break; } dyStringAppend(dy,line); } dna = cloneString(dy->string); word = name+1; end = strchr(word,':'); tName = cloneStringZ(word,end-word); word = end+1; end = strchr(word,'+'); tStrand = "+"; if (!end) { end = strchr(word,'-'); tStrand = "-"; } tStart = atoi(word); word = end+1; end = strchr(word,' '); tEnd = atoi(word); word = end+1; end = strchr(word,' '); probeid = atoi(word); dyStringClear(dy); dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",probeid); if (sqlQuickNum(conn,dy->string)>0) { /* record exists and hasn't already been updated */ int vgPrb = findVgPrbBySeq(conn,dna,taxon); if (vgPrb == 0) { dyStringClear(dy); dyStringAppend(dy, "update vgPrb set"); dyStringAppend(dy, " seq='"); dyStringAppend(dy, dna); dyStringAppend(dy, "',\n"); dyStringPrintf(dy, " tName='%s',\n", tName); dyStringPrintf(dy, " tStart=%d,\n", tStart); dyStringPrintf(dy, " tEnd=%d,\n", tEnd); dyStringPrintf(dy, " tStrand='%s',\n", tStrand); dyStringPrintf(dy, " db='%s',\n", db); dyStringPrintf(dy, " state='%s'\n", "seq"); dyStringPrintf(dy, " where id=%d\n", probeid); dyStringPrintf(dy, " and state='%s'\n", "new"); verbose(2, "%s\n", dy->string); sqlUpdate(conn, dy->string); } else /* probe seq already exists */ { /* just re-map the probe table recs to it */ dyStringClear(dy); dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,probeid); sqlUpdate(conn, dy->string); /* and delete it from vgPrb */ dyStringClear(dy); dyStringPrintf(dy, "delete from vgPrb where id=%d",probeid); sqlUpdate(conn, dy->string); } } freez(&tName); freez(&name); freez(&dna); } lineFileClose(&lf); dyStringFree(&dy); }
void cartSimulate(char *host, char *user, char *password, char *database) /* Simulate action of various UCSC Genome Browser CGIs on cart. */ { /* Figure out size of tables. */ struct sqlConnection *conn = sqlConnectRemote(host, user, password, database); int userDbSize = sqlQuickNum(conn, "NOSQLINJ select count(*) from userDb"); if (userDbSize == 0) errAbort("%s.%s table is empty", database, userTable); int maxSampleSize = 1024*1024; int sampleSize = min(userDbSize, maxSampleSize); verbose(2, "# userDb has %d rows, sampling %d\n" , userDbSize, sampleSize); /* Get sample of user id's. */ int *userIds = getSomeInts(conn, "userDb", "id", sampleSize); /* Get userCount random indexes. */ int *randomIxArray, ix; AllocArray(randomIxArray, userCount); verbose(2, "random user ix:\n"); for (ix=0; ix<userCount; ++ix) { randomIxArray[ix] = rand() % sampleSize; verbose(2, "%d ", randomIxArray[ix]); } verbose(2, "\n"); sqlDisconnect(&conn); int iteration = 0; for (;;) { for (ix = 0; ix < userCount; ++ix) { int randomIx = rand()%sampleSize; boolean doNew = randomBitFromProb(newRatio); long startTime = clock1000(); struct sqlConnection *conn = sqlConnectRemote(host, user, password, database); long connectTime = clock1000(); struct dyString *contents = fakeCart(randomFakeSize()); char *userContents = NULL; int userId = userIds[randomIx]; if (doNew) userId = userIds[randomIx] = dummyInsert(conn, userTable); int userUseCount = dummyQuery(conn, userTable, userId, &userContents); long userReadTime = clock1000(); sleep1000(cgiDelay); long cgiSleepTime = clock1000(); updateOne(conn, userTable, contents->string, userId, userUseCount); long userWriteTime = clock1000(); sqlDisconnect(&conn); long disconnectTime = clock1000(); printf("%ld total, %ld oldSize, %ld newSize, %ld connect, %ld userRead, %ld userWrite, %ld disconnect\n", disconnectTime - startTime - (cgiSleepTime - userReadTime), (long) strlen(userContents), (long)contents->stringSize, connectTime - startTime, userReadTime - connectTime, userWriteTime - cgiSleepTime, disconnectTime - userWriteTime ); dyStringFree(&contents); freez(&userContents); sleep1000(hitDelay); if (++iteration >= iterations) return; } } errAbort("cartSimulate(%s %s %s %s) not implemented", host, user, password, database); }
static int doBacs(struct sqlConnection *conn, int taxon, char *db) /* fetch available sequence for bacEndPairs */ { struct dyString *dy = dyStringNew(0); struct dnaSeq *chromSeq = NULL; struct bac *bacs = bacRead(conn, taxon, db); struct bac *bac = NULL; char *chrom = cloneString(""); int count = 0; verbose(1,"bac list read done.\n"); for(bac=bacs;bac;bac=bac->next) { if (differentWord(chrom,bac->chrom)) { verbose(1,"switching to chrom %s\n",bac->chrom); dnaSeqFree(&chromSeq); chromSeq = hLoadChrom(bac->chrom,db); freez(&chrom); chrom = cloneString(bac->chrom); } char *dna = checkAndFetchBacDna(chromSeq, bac); if (sameString(bac->strand,"-")) { reverseComplement(dna,strlen(dna)); } dyStringClear(dy); dyStringPrintf(dy, "select count(*) from vgPrb where id=%d and state='new'",bac->probe); if (sqlQuickNum(conn,dy->string)>0) { /* record exists and hasn't already been updated */ int vgPrb = findVgPrbBySeq(conn,dna,taxon); if (vgPrb == 0) { dyStringClear(dy); dyStringAppend(dy, "update vgPrb set"); dyStringAppend(dy, " seq='"); dyStringAppend(dy, dna); dyStringAppend(dy, "',\n"); dyStringPrintf(dy, " tName='%s',\n", bac->chrom); dyStringPrintf(dy, " tStart=%d,\n", bac->chromStart); dyStringPrintf(dy, " tEnd=%d,\n", bac->chromEnd); dyStringPrintf(dy, " tStrand='%s',\n", bac->strand); dyStringPrintf(dy, " db='%s',\n", db); dyStringPrintf(dy, " state='%s'\n", "seq"); dyStringPrintf(dy, " where id=%d\n", bac->probe); dyStringPrintf(dy, " and state='%s'\n", "new"); //verbose(2, "%s\n", dy->string); // the sql string could be quite large sqlUpdate(conn, dy->string); } else /* probe seq already exists */ { /* just re-map the probe table recs to it */ dyStringClear(dy); dyStringPrintf(dy, "update vgPrbMap set vgPrb=%d where vgPrb=%d",vgPrb,bac->probe); sqlUpdate(conn, dy->string); /* and delete it from vgPrb */ dyStringClear(dy); dyStringPrintf(dy, "delete from vgPrb where id=%d",bac->probe); sqlUpdate(conn, dy->string); } ++count; verbose(2,"%d finished bac for probe id %d size %d\n", count, bac->probe, bac->chromEnd - bac->chromStart); } freez(&dna); } freez(&chrom); dnaSeqFree(&chromSeq); bacFreeList(&bacs); dyStringFree(&dy); return count; }
void vgRemoveSubmission(char *database, char *submissionSetId) /* vgRemoveSubmission - Remove submissionSet and associated images.. */ { struct sqlConnection *conn = sqlConnect(database); int submitId = atoi(submissionSetId); char *submitName; struct dyString *query = dyStringNew(0); struct slInt *imageList = NULL, *imageProbeList = NULL; int imageFileCount, contributorCount; /* As a sanity check get the name of submission set and print it */ sqlDyStringPrintf(query, "select name from submissionSet where id=%d", submitId); submitName = sqlQuickString(conn, query->string); if (submitName == NULL) errAbort("No submissionSetId %s in %s", submissionSetId, database); verbose(1, "Removing submissionSet named %s\n", submitName); /* Figure out how many submissionContributors we'll delete. */ dyStringClear(query); sqlDyStringPrintf(query, "select count(*) from submissionContributor where submissionSet=%d", submitId); contributorCount = sqlQuickNum(conn, query->string); /* Actually delete submissionContributors. */ dyStringClear(query); sqlDyStringPrintf(query, "delete from submissionContributor where submissionSet=%d", submitId); maybeUpdate(conn, query->string); verbose(1, "Deleted %d submissionContributors\n", contributorCount); /* Get list of images we'll delete. */ dyStringClear(query); sqlDyStringPrintf(query, "select id from image where submissionSet=%d", submitId); imageList = sqlQuickNumList(conn, query->string); /* Get list of imageProbes. */ if (imageList != NULL) { dyStringClear(query); sqlDyStringPrintf(query, "select id from imageProbe where image "); intInClause(query, imageList); imageProbeList = sqlQuickNumList(conn, query->string); } /* Delete expressionLevel's tied to imageProbes. */ if (imageProbeList != NULL) { int oldExpLevel = sqlQuickNum(conn, NOSQLINJ "select count(*) from expressionLevel"); int newExpLevel; dyStringClear(query); sqlDyStringPrintf(query, "delete from expressionLevel where imageProbe "); intInClause(query, imageProbeList); maybeUpdate(conn, query->string); newExpLevel = sqlQuickNum(conn, NOSQLINJ "select count(*) from expressionLevel"); verbose(1, "Deleted %d expressionLevels\n", oldExpLevel - newExpLevel); } /* Delete image probes. */ if (imageProbeList != NULL) { dyStringClear(query); sqlDyStringPrintf(query, "delete from imageProbe where image "); intInClause(query, imageList); maybeUpdate(conn, query->string); } verbose(1, "Deleted %d image probes.\n", slCount(imageProbeList)); /* Delete images. */ dyStringClear(query); sqlDyStringPrintf(query, "delete from image where submissionSet=%d", submitId); maybeUpdate(conn, query->string); verbose(1, "Deleted %d images.\n", slCount(imageList)); /* Delete imageFiles. */ dyStringClear(query); sqlDyStringPrintf(query, "select count(*) from imageFile where submissionSet=%d", submitId); imageFileCount = sqlQuickNum(conn, query->string); dyStringClear(query); sqlDyStringPrintf(query, "delete from imageFile where submissionSet=%d", submitId); maybeUpdate(conn, query->string); verbose(1, "Deleted %d imageFile records.\n", imageFileCount); /* Delete submissionSet record. */ dyStringClear(query); sqlDyStringPrintf(query, "delete from submissionSet where id=%d", submitId); maybeUpdate(conn, query->string); dyStringFree(&query); slFreeList(&imageList); sqlDisconnect(&conn); }
void showCdsEvidence(char *geneName, struct trackDb *tdb, char *evTable) /* Print out stuff from cdsEvidence table. */ { struct sqlConnection *conn = hAllocConn(database); double bestScore = 0; if (sqlTableExists(conn, evTable)) { webNewSection("CDS Prediction Information"); char query[512]; sqlSafef(query, sizeof(query), "select count(*) from %s where name='%s'", evTable, geneName); if (sqlQuickNum(conn, query) > 0) { sqlSafef(query, sizeof(query), "select * from %s where name='%s' order by score desc", evTable, geneName); struct sqlResult *sr = sqlGetResult(conn, query); char **row; webPrintLinkTableStart(); webPrintLabelCell("ORF<BR>size"); webPrintLabelCell("start in<BR>transcript"); webPrintLabelCell("end in<BR>transcript"); webPrintLabelCell("source"); webPrintLabelCell("accession"); webPrintLabelCell("ad-hoc<BR>score"); webPrintLabelCell("start<BR>codon"); webPrintLabelCell("end<BR>codon"); webPrintLabelCell("piece<BR>count"); webPrintLabelCell("piece list"); webPrintLabelCell("frame"); webPrintLinkTableNewRow(); while ((row = sqlNextRow(sr)) != NULL) { struct cdsEvidence *ev = cdsEvidenceLoad(row); webPrintIntCell(ev->end - ev->start); int i; webPrintIntCell(ev->start+1); webPrintIntCell(ev->end); webPrintLinkCell(ev->source); webPrintLinkCell(ev->accession); webPrintLinkCellRightStart(); printf("%3.2f", ev->score); bestScore = max(ev->score, bestScore); webPrintLinkCellEnd(); webPrintLinkCell(ev->startComplete ? "yes" : "no"); webPrintLinkCell(ev->endComplete ? "yes" : "no"); webPrintIntCell(ev->cdsCount); webPrintLinkCellRightStart(); for (i=0; i<ev->cdsCount; ++i) { int start = ev->cdsStarts[i]; int end = start + ev->cdsSizes[i]; printf("%d-%d ", start+1, end); } webPrintLinkCellEnd(); webPrintLinkCellRightStart(); for (i=0; i<ev->cdsCount; ++i) { if (i>0) printf(","); printf("%d", ev->cdsStarts[i]%3 + 1); } webPrintLinkCellEnd(); webPrintLinkTableNewRow(); } sqlFreeResult(&sr); webPrintLinkTableEnd(); printf("This table shows CDS predictions for this transcript from a number of " "sources including alignments against UniProtKB proteins, alignments against Genbank " "mRNAs with CDS regions annotated by the sequence submitter, and " "Victor Solovyev's bestorf program. Each prediction is assigned an ad-hoc score " "score is based on several factors including the quality of " "any associated alignments, the quality of the source, and the length of the " "prediction. For RefSeq transcripts with annotated CDSs the ad-hoc score " "is over a million unless there are severe problems mapping the mRNA to the " "genome. In other cases the score generally ranges from 0 to 50,000. " "The highest scoring prediction in this table is used to define the CDS " "boundaries for this transcript.<P>If no score is 2000 or more, the transcript " "is considered non-coding. In cases where the CDS is subject to " "nonsense-mediated decay the CDS is removed. The CDS is also removed " "from transcripts when evidence points to it being in an artifact of an " "incompletely processed transcript. Specifically if the CDS is entirely " "enclosed in the 3' UTR or an intron of a refSeq or other high quality " "transcript, the CDS is removed."); } else { printf("no significant CDS prediction found, likely %s is noncoding", geneName); } } hFreeConn(&conn); }