static char *settingsLabel(struct userSettings *us, char *varName) /* Given varName return corresponding label. */ { char *spacedString = cloneString(varName + strlen(us->savePrefix)); subChar(spacedString, '_', ' '); return spacedString; }
void uToT(struct dnaSeq *seqList) /* Convert any u's in sequence to t's. */ { struct dnaSeq *seq; for (seq = seqList; seq != NULL; seq = seq->next) subChar(seq->dna, 'u', 't'); }
struct hash *readDescriptionFile(char *fileName) /* Return two column file keyed by first column with * values in second column. Strip any tabs from values. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; struct hash *hash = newHash(16); while (lineFileNextReal(lf, &line)) { char *key = nextWord(&line); char *val = skipLeadingSpaces(line); if (val != NULL && val[0] != 0) { char *desc = hashFindVal(hash, key); subChar(val, '\t', ' '); stripChar(val, '\r'); /* if gene exists in hash */ if (desc != NULL) { struct dyString *dy = dyStringNew(1024); dyStringPrintf(dy, "%s ", desc); dyStringAppend(dy, val); val = cloneString(dy->string); dyStringFree(&dy); } /* add to gene and description to hash */ hashAdd(hash, key, cloneString(val)); } } lineFileClose(&lf); return hash; }
void gsToUcsc(char *gsName, char *ucscName) /* Convert from * AC020585.5~1.2 Fragment 2 of 29 (AC020585.5:1..1195) * to * AC020585.5_1_2 */ { char *s, *e, *d; int size; /* Copy in accession and version. */ d = ucscName; s = gsName; e = strchr(s, '~'); if (e == NULL) errAbort("Expecting . in %s", gsName); size = e - s; memcpy(d, s, size); d += size; /* Skip over tilde and replace it with _ */ s = e+1; *d++ = '_'; e = skipToSpaces(s); if (e == NULL) e = s + strlen(s); size = e - s; memcpy(d, s, size); d[size] = 0; subChar(d, '.', '_'); return; }
static void printTabularHeaderRow(const struct vcfInfoDef *def) /* Parse the column header parts out of def->description and print as table header row; * call this only when looksTabular returns TRUE. */ { regmatch_t substrArr[PATH_LEN]; if (regexMatchSubstr(def->description, COL_DESC_REGEX, substrArr, ArraySize(substrArr))) { puts("<TR>"); // Make a copy of the part of def->description that matches the regex, // then chop by '|' and print out header column tags: int matchSize = substrArr[0].rm_eo - substrArr[0].rm_so; char copy[matchSize+1]; safencpy(copy, sizeof(copy), def->description + substrArr[0].rm_so, matchSize); // Turn '_' into ' ' so description words can wrap inside headers, saving some space subChar(copy, '_', ' '); char *words[PATH_LEN]; int descColCount = chopByChar(copy, '|', words, ArraySize(words)); int i; for (i = 0; i < descColCount; i++) printf("<TH class='withThinBorder'>%s</TH>", words[i]); puts("</TR>"); } else errAbort("printTabularHeaderRow: code bug, if looksTabular returns true then " "regex should work here"); }
void setupTable(struct sqlConnection *conn) /* Set up the autoTest table for testing. Call dropTable() later to remove table. */ { struct lineFile *lf = lineFileOpen("output/newTest.sql", TRUE); char *update = NULL; char *line = NULL; struct dyString *ds = newDyString(256); if(sqlTableExists(conn, testTableName)) errAbort("dbLinkTest.c::setupTable() - Table %s.%s already exists. Can't create another.", sqlGetDatabase(conn), testTableName); while(lineFileNextReal(lf, &line)) { char *tmp = strstr(line, "not null"); if(tmp != NULL) { *tmp = ','; tmp++; *tmp = '\0'; } subChar(line, '\t', ' '); dyStringPrintf(ds, "%s", line); } update = replaceChars(ds->string, "PRIMARY KEY", "UNIQUE"); sqlUpdate(conn, update); freez(&update); dyStringFree(&ds); lineFileClose(&lf); }
char *semiUniqName(char *base) /* Figure out a name likely to be unique. * Name will have no periods. Returns a static * buffer, so best to clone result unless using * immediately. */ { int pid = getpid(); int num = time(NULL)&0xFFFFF; char host[512]; strcpy(host, getHost()); char *s = strchr(host, '.'); if (s != NULL) *s = 0; subChar(host, '-', '_'); subChar(host, ':', '_'); static char name[PATH_LEN]; safef(name, sizeof(name), "%s_%s_%x_%x", base, host, pid, num); return name; }
char *escapeAltFixTerm(char *term) /* Special tweaks for SQL search of alt/fix terms that may include '.' and '_' characters. */ { // If there is a ".", make it into a single-character wildcard so that "GL383518.1" // can match "chr1_GL383518v1_alt". char termCpy[strlen(term)+1]; safecpy(termCpy, sizeof termCpy, term); subChar(termCpy, '.', '?'); // Escape '_' because that is an important character in alt/fix sequence names, and support // wildcards: return sqlLikeFromWild(termCpy); }
int umatch(String ing, String desired){ int j; if(abs(getSyllables(ing) - getSyllables(desired))<1){ int i; while(i<ing.length()){ j += subChar(ing[i],desired[i]); i++; } } else{ j = 257; } return j; }
struct frag *newFrag(char *fullFinfName, struct lineFile *lf) /* Return a frag based on full name in Finf file. */ { char *s; struct frag *frag; AllocVar(frag); s = strchr(fullFinfName, '~'); if (s == NULL || strlen(s) > sizeof(frag->name) || !isdigit(s[1])) errAbort("Badly formated name %s line %d of %s", fullFinfName, lf->lineIx, lf->fileName); strcpy(frag->name, s+1); subChar(frag->name, '.', '_'); return frag; }
void getText(struct kgXref *kg, struct hash *refSeqHash, struct sqlConnection *conn, struct sqlConnection *spConn, struct sqlConnection *goConn, FILE *f) /* Get loads of text and write it out. */ { char query[512]; struct hash *uniqHash = hashNew(0); char *spAcc = spFindAcc(spConn, kg->spID); subChar(kg->description, '\n', ' '); fprintf(f, "%s\t%s\t%s\t%s", kg->kgID, kg->geneSymbol, kg->kgID, kg->description); hashAdd(uniqHash, kg->geneSymbol, NULL); hashAdd(uniqHash, kg->kgID, NULL); addSimple(kg->kgID, "kgAlias", "kgID", "alias", conn, uniqHash, f); addSimple(kg->kgID, "kgProtAlias", "kgID", "alias", conn, uniqHash, f); if (refSeqHash != NULL) { char *s = hashFindVal(refSeqHash, kg->refseq); if (s == NULL && strchr(kg->refseq, '.') != NULL) { char *accOnly = cloneString(kg->refseq); chopSuffix(accOnly); s = hashFindVal(refSeqHash, accOnly); freeMem(accOnly); } if (s == NULL) s = ""; fprintf(f, "\t%s", s); } sqlSafef(query, sizeof(query), "select commentVal.val from comment,commentVal " "where comment.acc='%s' and comment.commentVal=commentVal.id" , spAcc); addText(query, spConn, f); sqlSafef(query, sizeof(query), "select term.name from goaPart,term " "where goaPart.dbObjectId='%s' " "and goaPart.goId=term.acc" , spAcc); addText(query, goConn, f); fprintf(f, "\n"); hashFree(&uniqHash); freeMem(spAcc); }
char *encodeRnaName(struct track *tg, void *item) /* Return RNA gene name. */ { struct encodeRna *el = item; char *full = el->name; static char abbrev[SMALLBUF]; char *e; strcpy(abbrev, skipChr(full)); subChar(abbrev, '_', ' '); abbr(abbrev, " pseudogene"); if ((e = strstr(abbrev, "-related")) != NULL) strcpy(e, "-like"); return abbrev; }
void addText(char *query, struct sqlConnection *conn, FILE *f) /* Add results of query to line. Convert newlines to spaces. */ { char **row; struct sqlResult *sr; fprintf(f, "\t"); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *val = row[0]; subChar(val, '\n', ' '); fprintf(f, "%s ", val); } sqlFreeResult(&sr); }
struct hash *getRefSeqSummary(struct sqlConnection *conn) /* Return hash keyed by refSeq NM_ id, with description values. */ { struct hash *hash = hashNew(16); char query[256]; sqlSafef(query, sizeof(query), "select mrnaAcc,summary from %s", summaryTable); struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { subChar(row[1], '\n', ' '); hashAdd(hash, row[0], cloneString(row[1])); } sqlFreeResult(&sr); verbose(1, "%d %s elements\n", hash->elCount, summaryTable); return hash; }
struct hash *loadModuleToMotif(struct sqlConnection *conn, char *fileName, char *table) /* Load up file which has a line per module. The first word is the module * number, the rest of the tab-separated fields are motif names. * Return hash keyed by module&motif. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *module, *motif; FILE *f = hgCreateTabFile(tmpDir, table); struct dyString *dy = dyStringNew(512); int motifCount = 0, moduleCount = 0; struct hash *hash = newHash(18); while (lineFileNextReal(lf, &line)) { ++moduleCount; subChar(line, ' ', '_'); module = nextWord(&line); while ((motif = nextWord(&line)) != NULL) { ++motifCount; fprintf(f, "%s\t%s\n", module, motif); hashAdd2(hash, module, motif, NULL); } } dyStringPrintf(dy, "CREATE TABLE %s (\n" " module int not null,\n" " motif varchar(255) not null,\n" " #Indices\n" " INDEX(module),\n" " INDEX(motif(16))\n" ")\n", table); sqlRemakeTable(conn, table, dy->string); verbose(1, "%d modules, %d motifs in modules\n", moduleCount, motifCount); hgLoadTabFile(conn, tmpDir, table, &f); hgRemoveTabFile(tmpDir, table); verbose(1, "Loaded %s table\n", table); lineFileClose(&lf); return hash; }
char *findVal(struct lineFile *lf, char *group, char *key) /* Return value that matches key in group or NULL. */ { char *s, *var, *val; static char buf[512]; if (strlen(group) >= sizeof(buf)) errAbort("Line too long line %d of %s", lf->lineIx, lf->fileName); strcpy(buf, group); s = buf; for (;;) { var = nextWord(&s); if (var == NULL) return ""; s = skipLeadingSpaces(s); if (s == NULL || s[0] == 0) errAbort("Unmatched key/val pair in group line %d of %s", lf->lineIx, lf->fileName); val = s; if (s[0] == '\'' || s[0] == '"') { if (!parseQuotedString(val, val, &s)) errAbort("Unmatched quote line %d of %s", lf->lineIx, lf->fileName); } else { int end; val = nextWord(&s); end = strlen(val) - 1; if (val[end] == ';') val[end] = 0; } s = skipLeadingSpaces(s); if (s != NULL && s[0] == ';') s += 1; if (sameString(key, var)) { subChar(val, '\t', ' '); return val; } } }
static struct qaStatus *qaStatusOnPage(struct errCatch *errCatch, struct htmlPage *page, long startTime, struct htmlPage **retPage) /* Assuming you have fetched page with the given error catcher, * starting the fetch at the given startTime, then create a * qaStatus that describes how the fetch went. If *retPage is non-null * then return the page there, otherwise free it. */ { char *errMessage = NULL; struct qaStatus *qs; AllocVar(qs); if (errCatch->gotError || page == NULL) { errMessage = errCatch->message->string; qs->hardError = TRUE; } else { if (page->status->status != 200) { dyStringPrintf(errCatch->message, "HTTP status code %d\n", page->status->status); errMessage = errCatch->message->string; qs->hardError = TRUE; htmlPageFree(&page); } else { errMessage = qaScanForErrorMessage(page->fullText); } } qs->errMessage = cloneString(errMessage); if (qs->errMessage != NULL) subChar(qs->errMessage, '\n', ' '); qs->milliTime = clock1000() - startTime; if (retPage != NULL) *retPage = page; else htmlPageFree(&page); return qs; }
int lineToExp(char *line, char *fileName) /* Convert line to an expression record file. * Return number of expression records. */ { FILE *f = mustOpen(fileName, "w"); struct hash *hash = newHash(10); /* Integer valued hash */ char *word; int wordCount = 0; struct expCounter *ec; char *spaced; char name[128]; while ((word = nextTabWord(&line)) != NULL) { if ((ec = hashFindVal(hash, word)) == NULL) { AllocVar(ec); hashAddSaveName(hash, word, ec, &ec->name); } spaced = cloneString(word); subChar(spaced, '_', ' '); ec->count += 1; if (ec->count > 1) safef(name, sizeof(name), "%s %d", spaced, ec->count); else safef(name, sizeof(name), "%s", spaced); fprintf(f, "%d\t", wordCount); fprintf(f, "%s\t", name); fprintf(f, "%s\t", name); fprintf(f, "%s\t", "http://www.affymetrix.com/analysis/index.affx"); fprintf(f, "%s\t", "http://expression.gnf.org"); fprintf(f, "%s\t", "http://www.gnf.org"); fprintf(f, "3\t"); fprintf(f, "%s,%s,%s,\n", chip, "n/a", spaced); ++wordCount; } carefulClose(&f); return wordCount; }
int lineToExp(char *line, FILE *f) /* Convert line to an expression record file. * Return number of expression records. */ { struct hash *hash = newHash(10); /* Integer valued hash */ char *word; int wordCount = 0; struct expCounter *ec; char *spaced; char name[128]; while ((word = nextTabWord(&line)) != NULL) { if ((ec = hashFindVal(hash, word)) == NULL) { AllocVar(ec); hashAddSaveName(hash, word, ec, &ec->name); } spaced = cloneString(word); subChar(spaced, '_', ' '); ec->count += 1; if (ec->count > 1) safef(name, sizeof(name), "%s %d", spaced, ec->count); else safef(name, sizeof(name), "%s", spaced); fprintf(f, "%d\t", wordCount); fprintf(f, "%s\t", name); fprintf(f, "%s\t", name); fprintf(f, "%s\t", expUrl); fprintf(f, "%s\t", expRef); fprintf(f, "%s\t", expCredit); fprintf(f, "3\t"); fprintf(f, "%s,%s,%s,\n", chip, "n/a", spaced); ++wordCount; } return wordCount; }
boolean doGetBedOrCt(struct sqlConnection *conn, boolean doCt, boolean doCtFile, boolean redirectToGb) /* Actually output bed or custom track. Return TRUE unless no results. */ { char *db = cloneString(database); char *table = curTable; struct hTableInfo *hti = getHti(db, table, conn); struct featureBits *fbList = NULL, *fbPtr; struct customTrack *ctNew = NULL; boolean doCtHdr = (cartUsualBoolean(cart, hgtaPrintCustomTrackHeaders, FALSE) || doCt || doCtFile); char *ctWigOutType = cartCgiUsualString(cart, hgtaCtWigOutType, outWigData); char *fbQual = fbOptionsToQualifier(); char fbTQ[128]; int fields = hTableInfoBedFieldCount(hti); boolean gotResults = FALSE; struct region *region, *regionList = getRegions(); boolean isBedGr = isBedGraph(curTable); boolean isBgWg = isBigWigTable(curTable); boolean needSubtrackMerge = anySubtrackMerge(database, curTable); boolean doDataPoints = FALSE; boolean isWig = isWiggle(database, table); struct wigAsciiData *wigDataList = NULL; struct dataVector *dataVectorList = NULL; boolean doRgb = bedItemRgb(hTrackDbForTrack(db, curTable)); if (!cartUsualBoolean(cart, hgtaDoGreatOutput, FALSE) && !doCt) { textOpen(); } if (cartUsualBoolean(cart, hgtaDoGreatOutput, FALSE)) fputs("#", stdout); if ((isWig || isBedGr || isBgWg) && sameString(outWigData, ctWigOutType)) doDataPoints = TRUE; for (region = regionList; region != NULL; region = region->next) { struct bed *bedList = NULL, *bed; struct lm *lm = lmInit(64*1024); struct dataVector *dv = NULL; if (isWig && doDataPoints) { if (needSubtrackMerge) { dv = wiggleDataVector(curTrack, curTable, conn, region); if (dv != NULL) slAddHead(&dataVectorList, dv); } else { int count = 0; struct wigAsciiData *wigData = NULL; struct wigAsciiData *asciiData; struct wigAsciiData *next; wigData = getWiggleAsData(conn, curTable, region); for (asciiData = wigData; asciiData; asciiData = next) { next = asciiData->next; if (asciiData->count) { slAddHead(&wigDataList, asciiData); ++count; } } slReverse(&wigDataList); } } else if (isBedGr && doDataPoints) { dv = bedGraphDataVector(curTable, conn, region); if (dv != NULL) slAddHead(&dataVectorList, dv); } else if (isBgWg && doDataPoints) { dv = bigWigDataVector(curTable, conn, region); if (dv != NULL) slAddHead(&dataVectorList, dv); } else if (isWig || isBgWg) { dv = wiggleDataVector(curTrack, curTable, conn, region); bedList = dataVectorToBedList(dv); dataVectorFree(&dv); } else if (isBedGr) { bedList = getBedGraphAsBed(conn, curTable, region); } else { bedList = cookedBedList(conn, curTable, region, lm, &fields); } /* this is a one-time only initial creation of the custom track * structure to receive the results. gotResults turns it off after * the first time. */ if (doCtHdr && !gotResults && ((bedList != NULL) || (wigDataList != NULL) || (dataVectorList != NULL))) { ctNew = beginCustomTrack(table, fields, doCt, (isWig || isBedGr || isBgWg), doDataPoints); } if (doDataPoints && (wigDataList || dataVectorList)) gotResults = TRUE; else { if ((fbQual == NULL) || (fbQual[0] == 0)) { for (bed = bedList; bed != NULL; bed = bed->next) { if (bed->name != NULL) { subChar(bed->name, ' ', '_'); } if (doCt) { struct bed *dupe = cloneBed(bed); /* Out of local memory. */ slAddHead(&ctNew->bedList, dupe); } else { if (doRgb) bedTabOutNitemRgb(bed, fields, stdout); else bedTabOutN(bed, fields, stdout); } gotResults = TRUE; } } else { safef(fbTQ, sizeof(fbTQ), "%s:%s", hti->rootName, fbQual); fbList = fbFromBed(db, fbTQ, hti, bedList, 0, 0, FALSE, FALSE); if (fields >= 6) fields = 6; else if (fields >= 4) fields = 4; else fields = 3; if (doCt && ctNew) { ctNew->fieldCount = fields; safef(ctNew->tdb->type, strlen(ctNew->tdb->type)+1, "bed %d", fields); } for (fbPtr=fbList; fbPtr != NULL; fbPtr=fbPtr->next) { if (fbPtr->name != NULL) { char *ptr = strchr(fbPtr->name, ' '); if (ptr != NULL) *ptr = 0; } if (doCt) { struct bed *fbBed = fbToBedOne(fbPtr); slAddHead(&ctNew->bedList, fbBed ); } else { if (fields >= 6) hPrintf("%s\t%d\t%d\t%s\t%d\t%c\n", fbPtr->chrom, fbPtr->start, fbPtr->end, fbPtr->name, 0, fbPtr->strand); else if (fields >= 4) hPrintf("%s\t%d\t%d\t%s\n", fbPtr->chrom, fbPtr->start, fbPtr->end, fbPtr->name); else hPrintf("%s\t%d\t%d\n", fbPtr->chrom, fbPtr->start, fbPtr->end); } gotResults = TRUE; } featureBitsFreeList(&fbList); } } bedList = NULL; lmCleanup(&lm); } if (!gotResults) { hPrintf(NO_RESULTS); } else if (doCt) { int wigDataSize = 0; /* Load existing custom tracks and add this new one: */ struct customTrack *ctList = getCustomTracks(); removeNamedCustom(&ctList, ctNew->tdb->table); if (doDataPoints) { if (needSubtrackMerge || isBedGr || isBgWg) { slReverse(&dataVectorList); wigDataSize = dataVectorWriteWigAscii(dataVectorList, ctNew->wigAscii, 0, NULL); // TODO: see if can make prettier wig output here that // doesn't necessarily have one value per base } else { struct wiggleDataStream *wds = NULL; /* create an otherwise empty wds so we can print out the list */ wds = wiggleDataStreamNew(); wds->ascii = wigDataList; wigDataSize = wds->asciiOut(wds, db, ctNew->wigAscii, TRUE, FALSE); #if defined(DEBUG) /* dbg */ /* allow file readability for debug */ chmod(ctNew->wigAscii, 0666); #endif wiggleDataStreamFree(&wds); } } else slReverse(&ctNew->bedList); slAddHead(&ctList, ctNew); /* Save the custom tracks out to file (overwrite the old file): */ customTracksSaveCart(db, cart, ctList); /* Put up redirect-to-browser page. */ if (redirectToGb) { char browserUrl[256]; char headerText[512]; int redirDelay = 3; safef(browserUrl, sizeof(browserUrl), "%s?%s&db=%s", hgTracksName(), cartSidUrlString(cart), database); safef(headerText, sizeof(headerText), "<META HTTP-EQUIV=\"REFRESH\" CONTENT=\"%d;URL=%s\">", redirDelay, browserUrl); webStartHeader(cart, database, headerText, "Table Browser: %s %s: %s", hOrganism(database), freezeName, "get custom track"); if (doDataPoints) { hPrintf("There are %d data points in custom track. ", wigDataSize); } else { hPrintf("There are %d items in custom track. ", slCount(ctNew->bedList)); } hPrintf("You will be automatically redirected to the genome browser in\n" "%d seconds, or you can \n" "<A HREF=\"%s\">click here to continue</A>.\n", redirDelay, browserUrl); } } else if (doDataPoints) { if (needSubtrackMerge || isBedGr || isBgWg) { slReverse(&dataVectorList); dataVectorWriteWigAscii(dataVectorList, "stdout", 0, NULL); } else { /* create an otherwise empty wds so we can print out the list */ struct wiggleDataStream *wds = NULL; wds = wiggleDataStreamNew(); wds->ascii = wigDataList; wds->asciiOut(wds, db, "stdout", TRUE, FALSE); wiggleDataStreamFree(&wds); } } return gotResults; }
void doGvf(struct trackDb *tdb, char *item) /* Show details for variants represented as GVF, stored in a bed8Attrs table */ { struct sqlConnection *conn = hAllocConn(database); int start = cartInt(cart, "o"); char query[1024]; sqlSafef(query, sizeof(query), "select * from %s where name = '%s' and chrom = '%s' and chromStart = %d", tdb->table, item, seqName, start); struct sqlResult *sr = sqlGetResult(conn, query); char **row; if ((row = sqlNextRow(sr)) == NULL) errAbort("doGvfDetails: can't find item '%s' in %s at %s:%d", item, database, seqName, start); int rowOffset = hOffsetPastBin(database, seqName, tdb->table); struct bed8Attrs *ba = bed8AttrsLoad(row+rowOffset); bedPrintPos((struct bed *)ba, 3, tdb); int i = 0; // Note: this loop modifies ba->attrVals[i], assuming we won't use them again: for (i = 0; i < ba->attrCount; i++) { // The ID is the bed8Attrs name and has already been displayed: if (sameString(ba->attrTags[i], "ID")) continue; cgiDecode(ba->attrVals[i], ba->attrVals[i], strlen(ba->attrVals[i])); char *tag = ba->attrTags[i]; // User-defined keywords used in dbVar's GVF: if (sameString(tag, "var_type")) // This one isn't anymore, but I add it back (hg18.txt). tag = "Variant type"; else if (sameString(tag, "clinical_int")) tag = "Clinical interpretation"; else if (sameString(tag, "var_origin")) tag = "Variant origin"; else if (islower(tag[0])) // Uppercase for nice display, assuming user doesn't care which keywords are // user-defined vs. GVF standard: tag[0] = toupper(tag[0]); // GVF standard Start_range and End_range tags (1-based coords): if (sameString(tag, "Start_range") || sameString(tag, "End_range")) { char *copy = cloneString(ba->attrVals[i]); char *words[3]; int wordCount = chopCommas(copy, words); if (wordCount == 2 && (sameString(".", words[0]) || isInteger(words[0])) && (sameString(".", words[1]) || isInteger(words[1]))) { boolean isStartRange = sameString(tag, "Start_range"); char *rangeStart = words[0], *rangeEnd = words[1]; if (sameString(".", rangeStart)) rangeStart = "unknown"; if (sameString(".", rangeEnd)) rangeEnd = "unknown"; if (isStartRange) printf("<B>Start range</B>: outer start %s, inner start %s<BR>\n", rangeStart, rangeEnd); else printf("<B>End range</B>: inner end %s, outer end %s<BR>\n", rangeStart, rangeEnd); } else // not formatted as expected, just print as-is: printf("<B>%s</B>: %s<BR>\n", tag, htmlEncode(ba->attrVals[i])); } // Parent sounds like mom or dad (as in var_origin)... tweak it too: else if (sameString(tag, "Parent")) { printf("<B>Variant region:</B> " "<A HREF=\"http://www.ncbi.nlm.nih.gov/dbvar/variants/%s/\" " "TARGET=_BLANK>%s</A><BR>\n", ba->attrVals[i], htmlEncode(ba->attrVals[i])); } else if (sameString(tag, "Name")) { char *url = trackDbSetting(tdb, "url"); // Show the Name only if it hasn't already appeared in the URL: if (url == NULL || !stringIn("$$", url)) printf("<B>%s</B>: %s<BR>\n", tag, htmlEncode(ba->attrVals[i])); } else if (sameWord(tag, "Phenotype_id") && startsWith("HPO:HP:", ba->attrVals[i])) { subChar(tag, '_', ' '); printf("<B>%s</B>: <A HREF=\"http://www.berkeleybop.org/obo/%s\" " "TARGET=_BLANK>%s</A><BR>\n", tag, ba->attrVals[i]+strlen("HPO:"), htmlEncode(ba->attrVals[i])); } else { subChar(tag, '_', ' '); printf("<B>%s</B>: %s<BR>\n", tag, htmlEncode(ba->attrVals[i])); } } sqlFreeResult(&sr); hFreeConn(&conn); /* printTrackHtml is done in genericClickHandlerPlus. */ }
void hgTrackDb(char *org, char *database, char *trackDbName, char *sqlFile, char *hgRoot, boolean strict) /* hgTrackDb - Create trackDb table from text files. */ { struct trackDb *td; char tab[PATH_LEN]; safef(tab, sizeof(tab), "%s.tab", trackDbName); struct trackDb *tdbList = buildTrackDb(org, database, hgRoot, strict); tdbList = flatten(tdbList); slSort(&tdbList, trackDbCmp); verbose(1, "Loaded %d track descriptions total\n", slCount(tdbList)); /* Write to tab-separated file; hold off on html, since it must be encoded */ { verbose(2, "Starting write of tabs to %s\n", tab); FILE *f = mustOpen(tab, "w"); for (td = tdbList; td != NULL; td = td->next) { hVarSubstTrackDb(td, database); char *hold = td->html; td->html = ""; subChar(td->type, '\t', ' '); /* Tabs confuse things. */ subChar(td->shortLabel, '\t', ' '); /* Tabs confuse things. */ subChar(td->longLabel, '\t', ' '); /* Tabs confuse things. */ trackDbTabOut(td, f); td->html = hold; } carefulClose(&f); verbose(2, "Wrote tab representation to %s\n", tab); } /* Update database */ { char *create, *end; char query[256]; struct sqlConnection *conn = sqlConnect(database); /* Load in table definition. */ readInGulp(sqlFile, &create, NULL); create = trimSpaces(create); create = substituteTrackName(create, trackDbName); end = create + strlen(create)-1; if (*end == ';') *end = 0; sqlRemakeTable(conn, trackDbName, create); /* Load in regular fields. */ sqlSafef(query, sizeof(query), "load data local infile '%s' into table %s", tab, trackDbName); verbose(2, "sending mysql \"%s\"\n", query); sqlUpdate(conn, query); verbose(2, "done tab file load"); /* Load in html and settings fields. */ for (td = tdbList; td != NULL; td = td->next) { if (isEmpty(td->html)) { if (strict && !trackDbLocalSetting(td, "parent") && !trackDbLocalSetting(td, "superTrack") && !sameString(td->track,"cytoBandIdeo")) { fprintf(stderr, "Warning: html missing for %s %s %s '%s'\n",org, database, td->track, td->shortLabel); } } else { updateBigTextField(conn, trackDbName, "tableName", td->track, "html", td->html); } if (td->settingsHash != NULL) { char *settings = settingsFromHash(td->settingsHash); updateBigTextField(conn, trackDbName, "tableName", td->track, "settings", settings); if (showSettings) { verbose(1, "%s: type='%s';", td->track, td->type); if (isNotEmpty(settings)) { char *oneLine = replaceChars(settings, "\n", "; "); eraseTrailingSpaces(oneLine); verbose(1, " %s", oneLine); freeMem(oneLine); } verbose(1, "\n"); } freeMem(settings); } } sqlDisconnect(&conn); verbose(1, "Loaded database %s\n", database); } }
void txGeneXref(char *genomeDb, char *uniProtDb, char *genePredFile, char *infoFile, char *pickFile, char *evFile, char *outFile) /* txGeneXref - Make kgXref type table for genes.. */ { /* Load picks into hash. We don't use cdsPicksLoadAll because empty fields * cause that autoSql-generated routine problems. */ struct hash *pickHash = newHash(18); struct hash *geneToProtHash = makeGeneToProtHash(genePredFile); struct cdsPick *pick; struct lineFile *lf = lineFileOpen(pickFile, TRUE); char *row[CDSPICK_NUM_COLS]; while (lineFileRowTab(lf, row)) { pick = cdsPickLoad(row); removePickVersions(pick); hashAdd(pickHash, pick->name, pick); } /* Load evidence into hash */ struct hash *evHash = newHash(18); struct txRnaAccs *ev, *evList = txRnaAccsLoadAll(evFile); for (ev = evList; ev != NULL; ev = ev->next) hashAdd(evHash, ev->name, ev); /* Open connections to our databases */ struct sqlConnection *gConn = sqlConnect(genomeDb); struct sqlConnection *uConn = sqlConnect(uniProtDb); /* Read in info file, and loop through it to make out file. */ struct txInfo *info, *infoList = txInfoLoadAll(infoFile); FILE *f = mustOpen(outFile, "w"); for (info = infoList; info != NULL; info = info->next) { char *kgID = info->name; char *mRNA = ""; char *spID = ""; char *spDisplayID = ""; char *geneSymbol = NULL; char *refseq = ""; char *protAcc = ""; char *description = NULL; char query[256]; char *proteinId = hashMustFindVal(geneToProtHash, info->name); boolean isAb = sameString(info->category, "antibodyParts"); pick = hashFindVal(pickHash, info->name); ev = hashFindVal(evHash, info->name); if (pick != NULL) { /* Fill in the relatively straightforward fields. */ refseq = pick->refSeq; if (info->orfSize > 0) { protAcc = pick->refProt; spID = proteinId; if (sameString(protAcc, spID)) spID = pick->uniProt; if (spID[0] != 0) spDisplayID = spAnyAccToId(uConn, spID); } /* Fill in gene symbol and description from refseq if possible. */ if (refseq[0] != 0) { struct sqlResult *sr; safef(query, sizeof(query), "select name,product from refLink where mrnaAcc='%s'", refseq); sr = sqlGetResult(gConn, query); char **row = sqlNextRow(sr); if (row != NULL) { geneSymbol = cloneString(row[0]); if (!sameWord("unknown protein", row[1])) description = cloneString(row[1]); } sqlFreeResult(&sr); } /* If need be try uniProt for gene symbol and description. */ if (spID[0] != 0 && (geneSymbol == NULL || description == NULL)) { char *acc = spLookupPrimaryAcc(uConn, spID); if (description == NULL) description = spDescription(uConn, acc); if (geneSymbol == NULL) { struct slName *nameList = spGenes(uConn, acc); if (nameList != NULL) geneSymbol = cloneString(nameList->name); slFreeList(&nameList); } } } /* If it's an antibody fragment use that as name. */ if (isAb) { geneSymbol = cloneString("abParts"); description = cloneString("Parts of antibodies, mostly variable regions."); isAb = TRUE; } if (ev == NULL) { mRNA = cloneString(""); if (!isAb) { errAbort("%s is %s but not %s\n", info->name, infoFile, evFile); } } else { mRNA = cloneString(ev->primary); chopSuffix(mRNA); } /* Still no joy? Try genbank RNA records. */ if (geneSymbol == NULL || description == NULL) { if (ev != NULL) { int i; for (i=0; i<ev->accCount; ++i) { char *acc = ev->accs[i]; chopSuffix(acc); if (geneSymbol == NULL) { safef(query, sizeof(query), "select geneName.name from gbCdnaInfo,geneName " "where geneName.id=gbCdnaInfo.geneName and gbCdnaInfo.acc = '%s'", acc); geneSymbol = sqlQuickString(gConn, query); if (geneSymbol != NULL) { if (sameString(geneSymbol, "n/a")) geneSymbol = NULL; } } if (description == NULL) { safef(query, sizeof(query), "select description.name from gbCdnaInfo,description " "where description.id=gbCdnaInfo.description " "and gbCdnaInfo.acc = '%s'", acc); description = sqlQuickString(gConn, query); if (description != NULL) { if (sameString(description, "n/a")) description = NULL; } } } } } if (geneSymbol == NULL) geneSymbol = mRNA; if (description == NULL) description = mRNA; /* Get rid of some characters that will cause havoc downstream. */ stripChar(geneSymbol, '\''); subChar(geneSymbol, '<', '['); subChar(geneSymbol, '>', ']'); /* Abbreviate geneSymbol if too long */ if (strlen(geneSymbol) > 40) strcpy(geneSymbol+37, "..."); fprintf(f, "%s\t", kgID); fprintf(f, "%s\t", mRNA); fprintf(f, "%s\t", spID); fprintf(f, "%s\t", spDisplayID); fprintf(f, "%s\t", geneSymbol); fprintf(f, "%s\t", refseq); fprintf(f, "%s\t", protAcc); fprintf(f, "%s\n", description); } carefulClose(&f); }
void undosPath(char *path) /* Convert '\' to '/' in path. */ { subChar(path, '\\', '/'); }
void printExpression(FILE *f, struct sqlConnection *conn, char *imagePaneKey, char *assayKey) /* Print associated expression info on assay/pane as indented lines. */ { struct dyString *query = dyStringNew(0); struct sqlResult *sr; char **row; sqlDyStringPrintf(query, "select GXD_Structure.printName,GXD_InSituResult._Strength_key,GXD_Pattern.pattern " "from GXD_Structure,GXD_InSituResult,GXD_InSituResultImage," "GXD_ISResultStructure,GXD_Pattern,GXD_Specimen " "where GXD_InSituResultImage._ImagePane_key = %s " "and GXD_InSituResultImage._Result_key = GXD_ISResultStructure._Result_key " "and GXD_InSituResultImage._Result_key = GXD_InSituResult._Result_key " "and GXD_ISResultStructure._Structure_key = GXD_Structure._Structure_key " "and GXD_Pattern._Pattern_key = GXD_InSituResult._Pattern_key " "and GXD_Specimen._Specimen_key = GXD_InSituResult._Specimen_key " "and GXD_Specimen._Assay_key = %s " , imagePaneKey, assayKey); sr = sqlGetResultVerbose(conn, query->string); while ((row = sqlNextRow(sr)) != NULL) { char *bodyPart = skipLeadingSpaces(row[0]); if (bodyPart[0] != 0) { int expression = sqlSigned(row[1]); char *pattern = skipLeadingSpaces(row[2]); float level; int lastChar = strlen(bodyPart)-1; /* Strip trailing # if any from body part. */ if (bodyPart[lastChar] == '#') bodyPart[lastChar] = 0; subChar(bodyPart,';','-'); /* treat expression 2 "present" as full 1.0 so will appear as (+) * treat -1 and -2 (not applic and not spec) as 1.0 + so won't disappear * 0 shouldn't happen but we'll catch it anyway */ switch(expression) { case 3: case 4: case 5: case 6: case 7: level = (float)(expression - 2) / 6.0; break; case 1: level = 0.0; break; case -2: case -1: case 0: case 2: case 8: default: level = 1.0; } fprintf(f, "\texpression\t%s\t%f\t\t\t%s\n", bodyPart, level, pattern); } } sqlFreeResult(&sr); }
static void legalizeName(Symbol* sym) { if (sym->hasFlag(FLAG_EXTERN)) return; for (const char* ch = sym->cname; *ch != '\0'; ch++) { switch (*ch) { case '>': ch = subChar(sym, ch, "_GREATER_"); break; case '<': ch = subChar(sym, ch, "_LESS_"); break; case '=': { /* To help generated code readability, we'd like to convert = into "ASSIGN" and == into "EQUALS". Unfortunately, because of the character-at-a-time approach taken here combined with the fact that subChar() returns a completely new string on every call, the way I implemented this is a bit ugly (in part because I didn't want to spend the time to reimplement this whole function -BLC */ static const char* equalsStr = "_EQUALS_"; static int equalsLen = strlen(equalsStr); if (*(ch+1) == '=') { // If we're in the == case, replace the first = with EQUALS ch = subChar(sym, ch, equalsStr); } else { if ((ch-equalsLen >= sym->cname) && strncmp(ch-equalsLen, equalsStr, equalsLen) == 0) { // Otherwise, if the thing preceding this '=' is the // string _EQUALS_, we must have been the second '=' and // we should just replace ourselves with an underscore to // make things legal. ch = subChar(sym, ch, "_"); } else { // Otherwise, this must have simply been a standalone '=' ch = subChar(sym, ch, "_ASSIGN_"); } } break; } case '*': ch = subChar(sym, ch, "_ASTERISK_"); break; case '/': ch = subChar(sym, ch, "_SLASH_"); break; case '%': ch = subChar(sym, ch, "_PERCENT_"); break; case '+': ch = subChar(sym, ch, "_PLUS_"); break; case '-': ch = subChar(sym, ch, "_HYPHEN_"); break; case '^': ch = subChar(sym, ch, "_CARET_"); break; case '&': ch = subChar(sym, ch, "_AMPERSAND_"); break; case '|': ch = subChar(sym, ch, "_BAR_"); break; case '!': ch = subChar(sym, ch, "_EXCLAMATION_"); break; case '#': ch = subChar(sym, ch, "_POUND_"); break; case '?': ch = subChar(sym, ch, "_QUESTION_"); break; case '$': ch = subChar(sym, ch, "_DOLLAR_"); break; case '~': ch = subChar(sym, ch, "_TILDE_"); break; case '.': ch = subChar(sym, ch, "_DOT_"); break; case ' ': ch = subChar(sym, ch, "_SPACE_"); break; default: break; } } // Add chpl_ to operator names. if ((sym->cname[0] == '_' && (sym->cname[1] == '_' || (sym->cname[1] >= 'A' && sym->cname[1] <= 'Z')))) { sym->cname = astr("chpl__", sym->cname); } }
static void mafOrAxtClick2(struct sqlConnection *conn, struct sqlConnection *conn2, struct trackDb *tdb, char *axtOtherDb, char *fileName) /* Display details for MAF or AXT tracks. */ { hgBotDelay(); if (winEnd - winStart > 30000) { printf("Zoom so that window is 30,000 bases or less to see alignments and conservation statistics\n"); } else { struct mafAli *mafList = NULL, *maf, *subList = NULL; int aliIx = 0, realCount = 0; char dbChrom[64]; char option[128]; char *capTrack; struct consWiggle *consWig, *consWiggles; struct hash *speciesOffHash = NULL; char *speciesOrder = NULL; char *speciesTarget = trackDbSetting(tdb, SPECIES_TARGET_VAR); char buffer[1024]; int useTarg = FALSE; int useIrowChains = FALSE; safef(option, sizeof(option), "%s.%s", tdb->track, MAF_CHAIN_VAR); if (cartCgiUsualBoolean(cart, option, FALSE) && trackDbSetting(tdb, "irows") != NULL) useIrowChains = TRUE; safef(buffer, sizeof(buffer), "%s.vis",tdb->track); if (useIrowChains) { if (!cartVarExists(cart, buffer) && (speciesTarget != NULL)) useTarg = TRUE; else { char *val; val = cartUsualString(cart, buffer, "useCheck"); useTarg = sameString("useTarg",val); } } if (sameString(tdb->type, "bigMaf")) { char *fileName = trackDbSetting(tdb, "bigDataUrl"); struct bbiFile *bbi = bigBedFileOpen(fileName); mafList = bigMafLoadInRegion(bbi, seqName, winStart, winEnd); } else mafList = mafOrAxtLoadInRegion2(conn,conn2, tdb, seqName, winStart, winEnd, axtOtherDb, fileName); safef(dbChrom, sizeof(dbChrom), "%s.%s", hubConnectSkipHubPrefix(database), seqName); safef(option, sizeof(option), "%s.speciesOrder", tdb->track); speciesOrder = cartUsualString(cart, option, NULL); if (speciesOrder == NULL) speciesOrder = trackDbSetting(tdb, "speciesOrder"); for (maf = mafList; maf != NULL; maf = maf->next) { int mcCount = 0; struct mafComp *mc; struct mafAli *subset; struct mafComp *nextMc; /* remove empty components and configured off components * from MAF, and ignore * the entire MAF if all components are empty * (solely for gap annotation) */ if (!useTarg) { for (mc = maf->components->next; mc != NULL; mc = nextMc) { char buf[64]; char *organism; mafSrcDb(mc->src, buf, sizeof buf); organism = hOrganism(buf); if (!organism) organism = buf; nextMc = mc->next; safef(option, sizeof(option), "%s.%s", tdb->track, buf); if (!cartUsualBoolean(cart, option, TRUE)) { if (speciesOffHash == NULL) speciesOffHash = newHash(4); hashStoreName(speciesOffHash, organism); } if (!cartUsualBoolean(cart, option, TRUE)) slRemoveEl(&maf->components, mc); else mcCount++; } } if (mcCount == 0) continue; if (speciesOrder) { int speciesCt; char *species[2048]; struct mafComp **newOrder, *mcThis; int i; mcCount = 0; speciesCt = chopLine(cloneString(speciesOrder), species); newOrder = needMem((speciesCt + 1) * sizeof (struct mafComp *)); newOrder[mcCount++] = maf->components; for (i = 0; i < speciesCt; i++) { if ((mcThis = mafMayFindCompSpecies(maf, species[i], '.')) == NULL) continue; newOrder[mcCount++] = mcThis; } maf->components = NULL; for (i = 0; i < mcCount; i++) { newOrder[i]->next = 0; slAddHead(&maf->components, newOrder[i]); } slReverse(&maf->components); } subset = mafSubsetE(maf, dbChrom, winStart, winEnd, TRUE); if (subset != NULL) { /* Reformat MAF if needed so that sequence from current * database is the first component and on the * plus strand. */ mafMoveComponentToTop(subset, dbChrom); if (subset->components->strand == '-') mafFlipStrand(subset); subset->score = mafScoreMultiz(subset); slAddHead(&subList, subset); ++realCount; } } slReverse(&subList); mafAliFreeList(&mafList); if (subList != NULL) { char *showVarName = "hgc.showMultiBase"; char *showVarVal = cartUsualString(cart, showVarName, "all"); boolean onlyDiff = sameWord(showVarVal, "diff"); #ifdef ADDEXONCAPITAL char *codeVarName = "hgc.multiCapCoding"; char *codeVarVal = cartUsualString(cart, codeVarName, "coding"); boolean onlyCds = sameWord(codeVarVal, "coding"); #endif /* add links for conservation score statistics */ consWiggles = wigMafWiggles(database, tdb); int wigCount = slCount(consWiggles); if (wigCount == 1) { conservationStatsLink(tdb, "Conservation score statistics", consWiggles->table); } else if (wigCount > 1) { /* multiple wiggles. List all that have been turned on with * checkboxes */ /* Scan for cart variables -- do any exist, are any turned on ? */ boolean wigSet = FALSE; boolean wigOn = FALSE; for (consWig = consWiggles; consWig != NULL; consWig = consWig->next) { char *wigVarSuffix = NULL; (void)wigMafWiggleVar(tdb->track, consWig, &wigVarSuffix); if (cartVarExistsAnyLevel(cart, tdb, FALSE, wigVarSuffix)) { wigSet = TRUE; if (cartBooleanClosestToHome(cart, tdb, FALSE, wigVarSuffix)) wigOn = TRUE; } } /* If there are no cart vars, turn on the first (default) wig */ if (!wigSet) { char *prefix = tdb->track; // use when setting things to the cart if (tdbIsContainerChild(tdb)) prefix = tdbGetContainer(tdb)->track; cartSetBoolean(cart, wigMafWiggleVar(prefix, consWiggles, NULL), TRUE); wigOn = TRUE; } if (wigOn) { boolean first = TRUE; for (consWig = consWiggles; consWig != NULL; consWig = consWig->next) { if (first) { printf("Conservation score statistics:"); first = FALSE; } char *wigVarSuffix = NULL; (void)wigMafWiggleVar(tdb->track, consWig, &wigVarSuffix); if (cartUsualBooleanClosestToHome(cart, tdb, FALSE, wigVarSuffix,FALSE)) { printf(" "); subChar(consWig->uiLabel, '_', ' '); conservationStatsLink(tdb, consWig->uiLabel, consWig->table); } } } } puts("</P>\n"); /* no alignment to display when in visibilities where only wiggle is shown */ char *vis = cartOptionalString(cart, tdb->track); if (vis) { enum trackVisibility tv = hTvFromStringNoAbort(vis); if (tv == tvSquish || tv == tvDense) return; } #ifdef ADDEXONCAPITAL puts("<FORM ACTION=\"../cgi-bin/hgc\" NAME=\"gpForm\" METHOD=\"GET\">"); cartSaveSession(cart); cgiContinueHiddenVar("g"); cgiContinueHiddenVar("c"); cgiContinueHiddenVar("i"); printf("Capitalize "); cgiMakeDropListFull(codeVarName, codeAll, codeAll, ArraySize(codeAll), codeVarVal, autoSubmit); printf("exons based on "); capTrack = genePredDropDown(cart, trackHash, "gpForm", "hgc.multiCapTrack"); #endif printf("show "); cgiMakeDropListFull(showVarName, showAll, showAll, ArraySize(showAll), showVarVal, autoSubmit); printf("bases"); printf("<BR>\n"); printf("</FORM>\n"); #ifdef REVERSESTRAND /* notify if bases are complemented (hgTracks is on reverse strand) */ if (cartCgiUsualBoolean(cart, COMPLEMENT_BASES_VAR, FALSE)) puts("<EM>Alignment displayed on reverse strand</EM><BR>"); #endif puts("Place cursor over species for alignment detail. Click on 'B' to link to browser "); puts("for aligned species, click on 'D' to get DNA for aligned species.<BR>"); printf("<TT><PRE>"); /* notify if species removed from alignment */ if (speciesOffHash) { char *species; struct hashCookie hc = hashFirst(speciesOffHash); puts("<B>Components not displayed:</B> "); while ((species = hashNextName(&hc)) != NULL) printf("%s ", species); puts("<BR>"); } for (maf = subList; maf != NULL; maf = maf->next) { mafLowerCase(maf); #ifdef ADDEXONCAPITAL if (capTrack != NULL) capMafOnTrack(maf, capTrack, onlyCds); #endif printf("<B>Alignment block %d of %d in window, %d - %d, %d bps </B>\n", ++aliIx,realCount,maf->components->start + 1, maf->components->start + maf->components->size, maf->components->size); mafPrettyOut(stdout, maf, 70,onlyDiff, aliIx); } mafAliFreeList(&subList); } else { printf("No multiple alignment in browser window"); } printf("</PRE></TT>"); } }
void blatSeq(char *userSeq, char *organism) /* Blat sequence user pasted in. */ { FILE *f; struct dnaSeq *seqList = NULL, *seq; struct tempName pslTn, faTn; int maxSingleSize, maxTotalSize, maxSeqCount; int minSingleSize = minMatchShown; char *genome, *db; char *type = cgiString("type"); char *seqLetters = cloneString(userSeq); struct serverTable *serve; int conn; int oneSize, totalSize = 0, seqCount = 0; boolean isTx = FALSE; boolean isTxTx = FALSE; boolean txTxBoth = FALSE; struct gfOutput *gvo; boolean qIsProt = FALSE; enum gfType qType, tType; struct hash *tFileCache = gfFileCacheNew(); boolean feelingLucky = cgiBoolean("Lucky"); getDbAndGenome(cart, &db, &genome, oldVars); if(!feelingLucky) cartWebStart(cart, db, "%s BLAT Results", trackHubSkipHubName(organism)); /* Load user sequence and figure out if it is DNA or protein. */ if (sameWord(type, "DNA")) { seqList = faSeqListFromMemText(seqLetters, TRUE); uToT(seqList); isTx = FALSE; } else if (sameWord(type, "translated RNA") || sameWord(type, "translated DNA")) { seqList = faSeqListFromMemText(seqLetters, TRUE); uToT(seqList); isTx = TRUE; isTxTx = TRUE; txTxBoth = sameWord(type, "translated DNA"); } else if (sameWord(type, "protein")) { seqList = faSeqListFromMemText(seqLetters, FALSE); isTx = TRUE; qIsProt = TRUE; } else { seqList = faSeqListFromMemTextRaw(seqLetters); isTx = !seqIsDna(seqList); if (!isTx) { for (seq = seqList; seq != NULL; seq = seq->next) { seq->size = dnaFilteredSize(seq->dna); dnaFilter(seq->dna, seq->dna); toLowerN(seq->dna, seq->size); subChar(seq->dna, 'u', 't'); } } else { for (seq = seqList; seq != NULL; seq = seq->next) { seq->size = aaFilteredSize(seq->dna); aaFilter(seq->dna, seq->dna); toUpperN(seq->dna, seq->size); } qIsProt = TRUE; } } if (seqList != NULL && seqList->name[0] == 0) { freeMem(seqList->name); seqList->name = cloneString("YourSeq"); } trimUniq(seqList); /* If feeling lucky only do the first on. */ if(feelingLucky && seqList != NULL) { seqList->next = NULL; } /* Figure out size allowed. */ maxSingleSize = (isTx ? 10000 : 75000); maxTotalSize = maxSingleSize * 2.5; #ifdef LOWELAB maxSeqCount = 200; #else maxSeqCount = 25; #endif /* Create temporary file to store sequence. */ trashDirFile(&faTn, "hgSs", "hgSs", ".fa"); faWriteAll(faTn.forCgi, seqList); /* Create a temporary .psl file with the alignments against genome. */ trashDirFile(&pslTn, "hgSs", "hgSs", ".pslx"); f = mustOpen(pslTn.forCgi, "w"); gvo = gfOutputPsl(0, qIsProt, FALSE, f, FALSE, TRUE); serve = findServer(db, isTx); /* Write header for extended (possibly protein) psl file. */ if (isTx) { if (isTxTx) { qType = gftDnaX; tType = gftDnaX; } else { qType = gftProt; tType = gftDnaX; } } else { qType = gftDna; tType = gftDna; } pslxWriteHead(f, qType, tType); if (qType == gftProt) { minSingleSize = 14; } else if (qType == gftDnaX) { minSingleSize = 36; } /* Loop through each sequence. */ for (seq = seqList; seq != NULL; seq = seq->next) { printf(" "); fflush(stdout); /* prevent apache cgi timeout by outputting something */ oneSize = realSeqSize(seq, !isTx); if ((seqCount&1) == 0) // Call bot delay every 2nd time starting with first time hgBotDelay(); if (++seqCount > maxSeqCount) { warn("More than 25 input sequences, stopping at %s.", seq->name); break; } if (oneSize > maxSingleSize) { warn("Sequence %s is %d letters long (max is %d), skipping", seq->name, oneSize, maxSingleSize); continue; } if (oneSize < minSingleSize) { warn("Warning: Sequence %s is only %d letters long (%d is the recommended minimum)", seq->name, oneSize, minSingleSize); // we could use "continue;" here to actually enforce skipping, // but let's give the short sequence a chance, it might work. // minimum possible length = tileSize+stepSize, so mpl=16 for dna stepSize=5, mpl=10 for protein. if (qIsProt && oneSize < 1) // protein does not tolerate oneSize==0 continue; } totalSize += oneSize; if (totalSize > maxTotalSize) { warn("Sequence %s would take us over the %d letter limit, stopping here.", seq->name, maxTotalSize); break; } conn = gfConnect(serve->host, serve->port); if (isTx) { gvo->reportTargetStrand = TRUE; if (isTxTx) { gfAlignTransTrans(&conn, serve->nibDir, seq, FALSE, 5, tFileCache, gvo, !txTxBoth); if (txTxBoth) { reverseComplement(seq->dna, seq->size); conn = gfConnect(serve->host, serve->port); gfAlignTransTrans(&conn, serve->nibDir, seq, TRUE, 5, tFileCache, gvo, FALSE); } } else { gfAlignTrans(&conn, serve->nibDir, seq, 5, tFileCache, gvo); } } else { gfAlignStrand(&conn, serve->nibDir, seq, FALSE, minMatchShown, tFileCache, gvo); reverseComplement(seq->dna, seq->size); conn = gfConnect(serve->host, serve->port); gfAlignStrand(&conn, serve->nibDir, seq, TRUE, minMatchShown, tFileCache, gvo); } gfOutputQuery(gvo, f); } carefulClose(&f); showAliPlaces(pslTn.forCgi, faTn.forCgi, serve->db, qType, tType, organism, feelingLucky); if(!feelingLucky) cartWebEnd(); gfFileCacheFree(&tFileCache); }
void loadGeneToMotif(struct sqlConnection *conn, char *fileName, char *table, struct hash *geneToModuleHash, struct hash *moduleAndMotifHash, struct hash *motifHash, struct hash *positionsHash, char *regionTable) /* Load file which is a big matrix with genes for rows and motifs for * columns. There is a semicolon-separated list of numbers in the matrix * where a gene has the motif, and an empty (tab separated) field * where there is no motif. The numbers are relative to the * region associated with the gene in the positionsHash. * Only load bits of this where motif actually occurs in module associated * with gene. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; FILE *f = hgCreateTabFile(tmpDir, table); char *motifNames[32*1024], *row[32*1024]; int motifCount, rowSize, i; char *gene, *module; int geneCount = 0, total = 0; struct dyString *dy = dyStringNew(512); struct genomePos *motifPosList = NULL, *motifPosForGene; struct genomePos *regionPosList = NULL, *regionPos; /* Read first line, which is labels. */ if (!lineFileNextReal(lf, &line)) errAbort("Empty file %s", fileName); subChar(line, ' ', '_'); motifCount = chopLine(line, motifNames); if (motifCount >= ArraySize(motifNames)) errAbort("Too many motifs line 1 of %s", fileName); lineFileExpectAtLeast(lf, 2, motifCount); motifNames[0] = NULL; for (i=1; i<motifCount; ++i) { char name[64]; motifNames[i] = cloneString(fixMotifName(motifNames[i],name,sizeof(name))); if (!hashLookup(motifHash, motifNames[i])) errAbort("Motif %s is in %s but not modules_motifs.gxm", motifNames[i], fileName); } /* Read subsequent lines. */ while ((rowSize = lineFileChopTab(lf, row)) != 0) { lineFileExpectWords(lf, motifCount, rowSize); gene = row[0]; module = hashFindVal(geneToModuleHash, gene); if (module == NULL) { warn("WARNING: Gene %s in line %d of %s but not module_assignments.tab", gene, lf->lineIx, lf->fileName); continue; } regionPos = NULL; for (i=1; i<rowSize; ++i) { if (row[i][0] != 0) { if (hashLookup2(moduleAndMotifHash, module, motifNames[i])) { regionPos = hashFindVal(positionsHash, gene); if (regionPos == NULL) { warn("WARNING: %s in %s but not gene_positions.tab", gene, fileName); i = rowSize; continue; } motifPosForGene = convertMotifPos(row[i], regionPos, hashMustFindVal(motifHash, motifNames[i]), lf); motifPosList = slCat(motifPosForGene, motifPosList); ++total; } } } if (regionPos != NULL) { slAddHead(®ionPosList, regionPos); } ++geneCount; } lineFileClose(&lf); /* Output sorted table of all motif hits. */ { struct genomePos *pos; slSort(&motifPosList, genomePosCmp); for (pos = motifPosList; pos != NULL; pos = pos->next) { int start = pos->start; int end = pos->end; if (start < 0) start = 0; fprintf(f, "%d\t", binFromRange(start, end)); fprintf(f, "%s\t", pos->chrom); fprintf(f, "%d\t%d\t", start, end); fprintf(f, "%s\t", pos->motif); fprintf(f, "%d\t", pos->score); fprintf(f, "%c\t", pos->strand); fprintf(f, "%s\n", pos->name); } dyStringPrintf(dy, "CREATE TABLE %s (\n" " bin smallInt unsigned not null,\n" " chrom varChar(255) not null,\n" " chromStart int not null,\n" " chromEnd int not null,\n" " name varchar(255) not null,\n" " score int not null,\n" " strand char(1) not null,\n" " gene varchar(255) not null,\n" " #Indices\n" " INDEX(gene(12)),\n" " INDEX(name(16)),\n" " INDEX(chrom(8),bin)\n" ")\n", table); sqlRemakeTable(conn, table, dy->string); verbose(1, "%d genes, %d motifs, %d motifs in genes\n", geneCount, motifCount-1, total); hgLoadTabFile(conn, tmpDir, table, &f); // hgRemoveTabFile(tmpDir, table); verbose(1, "Loaded %s table\n", table); slFreeList(&motifPosList); } /* Now output sorted table of upstream regions. */ { FILE *f = hgCreateTabFile(tmpDir, regionTable); struct genomePos *pos; dyStringClear(dy); dyStringPrintf(dy, "CREATE TABLE %s (\n" " bin smallInt unsigned not null,\n" " chrom varChar(255) not null,\n" " chromStart int not null,\n" " chromEnd int not null,\n" " name varchar(255) not null,\n" " score int not null,\n" " strand char(1) not null,\n" " #Indices\n" " INDEX(name(16)),\n" " INDEX(chrom(8),bin)\n" ")\n", regionTable); sqlRemakeTable(conn, regionTable, dy->string); slSort(®ionPosList, genomePosCmp); for (pos = regionPosList; pos != NULL; pos = pos->next) { int start = pos->start; int end = pos->end; if (start < 0) start = 0; fprintf(f, "%d\t", binFromRange(start, end)); fprintf(f, "%s\t", pos->chrom); fprintf(f, "%d\t%d\t", start, end); fprintf(f, "%s\t", pos->name); fprintf(f, "%d\t", pos->score); fprintf(f, "%c\n", pos->strand); } hgLoadTabFile(conn, tmpDir, regionTable, &f); // hgRemoveTabFile(tmpDir, regionTable); } }
void submitRefToFiles(struct sqlConnection *conn, struct sqlConnection *conn2, struct sqlConnection *connSp, char *ref, char *fileRoot, char *inJax) /* Create a .ra and a .tab file for given reference. */ { /* Initially the tab file will have some duplicate lines, so * write to temp file, and then filter. */ char raName[PATH_LEN], tabName[PATH_LEN], capName[PATH_LEN]; FILE *ra = NULL, *tab = NULL, *cap = NULL; struct dyString *query = dyStringNew(0); struct sqlResult *sr; char **row; char *pubMed; struct slName *list, *el; boolean gotAny = FALSE; struct hash *uniqImageHash = newHash(0); struct hash *captionHash = newHash(0); int imageWidth = 0, imageHeight = 0; char path[PATH_LEN]; struct dyString *caption = dyStringNew(0); struct dyString *copyright = dyStringNew(0); struct dyString *probeNotes = dyStringNew(0); boolean lookedForCopyright = FALSE; safef(raName, sizeof(raName), "%s.ra", fileRoot); safef(tabName, sizeof(tabName), "%s.tab", fileRoot); safef(capName, sizeof(capName), "%s.txt", fileRoot); tab = mustOpen(tabName, "w"); cap = mustOpen(capName, "w"); sqlDyStringPrintf(query, "select authors,journal,title,year from BIB_Refs where "); sqlDyStringPrintf(query, "_Refs_key = '%s'", ref); sr = sqlGetResultVerbose(conn, query->string); row = sqlNextRow(sr); if (row == NULL) errAbort("Can't find _Refs_key %s in BIB_Refs", ref); /* Make ra file with stuff common to whole submission set. */ ra = mustOpen(raName, "w"); fprintf(ra, "submissionSource MGI\n"); fprintf(ra, "acknowledgement Thanks to the Gene Expression Database group at " "Mouse Genome Informatics (MGI) for collecting, annotating and sharing " "this image. The MGI images were last updated in VisiGene on March 28, 2006. " "Additional and more up to date annotations and images may be available " "directly at <A HREF='http://www.informatics.jax.org' target='_blank'>MGI.</A>\n"); fprintf(ra, "submitSet jax%s\n", ref); fprintf(ra, "taxon 10090\n"); /* Mus musculus taxon */ fprintf(ra, "fullDir http://hgwdev.gi.ucsc.edu/visiGene/full/inSitu/Mouse/jax\n"); fprintf(ra, "thumbDir http://hgwdev.gi.ucsc.edu/visiGene/200/inSitu/Mouse/jax\n"); fprintf(ra, "setUrl http://www.informatics.jax.org/\n"); fprintf(ra, "itemUrl http://www.informatics.jax.org/searches/image.cgi?%%s\n"); fprintf(ra, "abUrl http://www.informatics.jax.org/searches/antibody.cgi?%%s\n"); fprintf(ra, "journal %s\n", row[1]); fprintf(ra, "publication %s\n", row[2]); fprintf(ra, "year %s\n", row[3]); /* The contributor (author) list is in format Kent WJ; Haussler DH; format in * Jackson. We convert it to Kent W.J.,Haussler D.H., format for visiGene. */ fprintf(ra, "contributor "); list = charSepToSlNames(row[0], ';'); for (el = list; el != NULL; el = el->next) { char *lastName = skipLeadingSpaces(el->name); char *initials = strrchr(lastName, ' '); if (initials == NULL) initials = ""; else *initials++ = 0; fprintf(ra, "%s", lastName); if (initials[0] != 0) { char c; fprintf(ra, " "); while ((c = *initials++) != 0) fprintf(ra, "%c.", c); } fprintf(ra, ","); } fprintf(ra, "\n"); slNameFreeList(&list); sqlFreeResult(&sr); /* Add in link to PubMed record on publication. */ dyStringClear(query); sqlDyStringPrintf(query, "select ACC_Accession.accID from ACC_Accession,ACC_LogicalDB " "where ACC_Accession._Object_key = %s " "and ACC_Accession._LogicalDB_key = ACC_LogicalDB._LogicalDB_key " "and ACC_LogicalDB.name = 'PubMed'", ref); pubMed = sqlQuickStringVerbose(conn, query->string); if (pubMed != NULL) fprintf(ra, "pubUrl https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=%s\n", pubMed); freez(&pubMed); dyStringClear(query); sqlDyStringPrintf(query, "select distinct MRK_Marker.symbol as gene," "GXD_Specimen.sex as sex," "GXD_Specimen.age as age," "GXD_Specimen.ageMin as ageMin," "GXD_Specimen.ageMax as ageMax," "IMG_ImagePane.paneLabel as paneLabel," "ACC_Accession.numericPart as fileKey," "IMG_Image._Image_key as imageKey," "GXD_Assay._ProbePrep_key as probePrepKey," "GXD_Assay._AntibodyPrep_key as antibodyPrepKey," "GXD_Assay._ReporterGene_key as reporterGeneKey," "GXD_FixationMethod.fixation as fixation," "GXD_EmbeddingMethod.embeddingMethod as embedding," "GXD_Assay._Assay_key as assayKey," "GXD_Specimen.hybridization as sliceType," "GXD_Specimen._Genotype_key as genotypeKey," "IMG_ImagePane._ImagePane_key as imagePaneKey\n" "from MRK_Marker," "GXD_Assay," "GXD_Specimen," "GXD_InSituResult," "GXD_InSituResultImage," "GXD_FixationMethod," "GXD_EmbeddingMethod," "IMG_ImagePane," "IMG_Image," "ACC_Accession\n" "where MRK_Marker._Marker_key = GXD_Assay._Marker_key " "and GXD_Assay._Assay_key = GXD_Specimen._Assay_key " "and GXD_Specimen._Specimen_key = GXD_InSituResult._Specimen_key " "and GXD_InSituResult._Result_key = GXD_InSituResultImage._Result_key " "and GXD_InSituResultImage._ImagePane_key = IMG_ImagePane._ImagePane_key " "and GXD_FixationMethod._Fixation_key = GXD_Specimen._Fixation_key " "and GXD_EmbeddingMethod._Embedding_key = GXD_Specimen._Embedding_key " "and IMG_ImagePane._Image_key = IMG_Image._Image_key " "and IMG_Image._Image_key = ACC_Accession._Object_key " "and ACC_Accession.prefixPart = 'PIX:' " "and GXD_Assay._ImagePane_key is NULL " ); sqlDyStringPrintf(query, "and GXD_Assay._Refs_key = '%s'", ref); sr = sqlGetResultVerbose(conn, query->string); fprintf(tab, "#"); fprintf(tab, "gene\t"); fprintf(tab, "probeColor\t"); fprintf(tab, "sex\t"); fprintf(tab, "age\t"); fprintf(tab, "ageMin\t"); fprintf(tab, "ageMax\t"); fprintf(tab, "paneLabel\t"); fprintf(tab, "fileName\t"); fprintf(tab, "submitId\t"); fprintf(tab, "fPrimer\t"); fprintf(tab, "rPrimer\t"); fprintf(tab, "abName\t"); fprintf(tab, "abTaxon\t"); fprintf(tab, "abSubmitId\t"); fprintf(tab, "fixation\t"); fprintf(tab, "embedding\t"); fprintf(tab, "bodyPart\t"); fprintf(tab, "sliceType\t"); fprintf(tab, "genotype\t"); fprintf(tab, "strain\t"); fprintf(tab, "priority\t"); fprintf(tab, "captionId\t"); fprintf(tab, "imageWidth\t"); fprintf(tab, "imageHeight\n"); while ((row = sqlNextRow(sr)) != NULL) { char *gene = row[0]; char *sex = row[1]; char *age = row[2]; char *ageMin = row[3]; char *ageMax = row[4]; char *paneLabel = row[5]; char *fileKey = row[6]; char *imageKey = row[7]; char *probePrepKey = row[8]; char *antibodyPrepKey = row[9]; char *reporterGeneKey = row[10]; char *fixation = row[11]; char *embedding = row[12]; char *assayKey = row[13]; char *sliceType = row[14]; char *genotypeKey = row[15]; char *imagePaneKey = row[16]; double calcAge = -1; char *probeColor = ""; char *bodyPart = ""; char *abName = NULL; char *rPrimer = NULL, *fPrimer = NULL; char *genotype = NULL; char *strain = NULL; char *priority = NULL; char abTaxon[32]; char *captionId = ""; char *abSubmitId = NULL; verbose(3, " "); dumpRow(row, 16); if (age == NULL) continue; if (!lookedForCopyright) { struct sqlResult *sr = NULL; char **row; lookedForCopyright = TRUE; dyStringClear(query); sqlDyStringPrintf(query, "select note from MGI_NoteChunk,MGI_Note,MGI_NoteType,ACC_MGIType " "where MGI_Note._Object_key = %s " "and ACC_MGIType.name = 'Image' " "and ACC_MGIType._MGIType_key = MGI_Note._MGIType_key " "and MGI_NoteType.noteType='Copyright' " "and MGI_Note._NoteType_key = MGI_NoteType._NoteType_key " "and MGI_Note._Note_key = MGI_NoteChunk._Note_key " "order by sequenceNum" , imageKey); sr = sqlGetResultVerbose(conn2, query->string); while ((row = sqlNextRow(sr)) != NULL) dyStringAppend(copyright, row[0]); sqlFreeResult(&sr); verbose(2,"imageKey=%s\n",imageKey); if (copyright->stringSize != 0) { fprintf(ra, "copyright %s\n", copyright->string); } } /* Massage sex */ { if (sameString(sex, "Male")) sex = "male"; else if (sameString(sex, "Female")) sex = "female"; else sex = ""; } /* Massage age */ { char *embryoPat = "embryonic day "; char *newbornPat = "postnatal newborn"; char *dayPat = "postnatal day "; char *weekPat = "postnatal week "; char *adultPat = "postnatal adult"; double calcMinAge = atof(ageMin); double calcMaxAge = atof(ageMax); double mouseBirthAge = 21.0; //double mouseAdultAge = 63.0; /* Relative to conception, not birth */ if (age[0] == 0) { warn("age null, ageMin %s, ageMax %s\n", ageMin, ageMax); calcAge = (calcMinAge + calcMaxAge) * 0.5; } else if (startsWith(embryoPat, age)) calcAge = atof(age+strlen(embryoPat)); else if (sameString(newbornPat, age)) calcAge = mouseBirthAge; else if (startsWith(dayPat, age)) calcAge = atof(age+strlen(dayPat)) + mouseBirthAge; else if (startsWith(weekPat, age)) calcAge = 7.0 * atof(age+strlen(weekPat)) + mouseBirthAge; else if (sameString(adultPat, age) && calcMaxAge - calcMinAge > 1000 && calcMinAge < 365) calcAge = 365; /* Most adult mice are relatively young */ else { warn("Calculating age from %s", age); calcAge = (calcMinAge + calcMaxAge) * 0.5; } if (calcAge < calcMinAge) calcAge = calcMinAge; if (calcAge > calcMaxAge) calcAge = calcMaxAge; } /* Massage probeColor */ { if (!isStrNull(reporterGeneKey)) { /* Fixme: make sure that reporterGene's end up in probeType table. */ char *name = NULL; dyStringClear(query); sqlDyStringPrintf(query, "select term from VOC_Term where _Term_key = %s", reporterGeneKey); name = sqlQuickStringVerbose(conn2, query->string); if (name == NULL) warn("Can't find _ReporterGene_key %s in VOC_Term", reporterGeneKey); else if (sameString(name, "GFP")) probeColor = "green"; else if (sameString(name, "lacZ")) probeColor = "blue"; else warn("Don't know color of reporter gene %s", name); freez(&name); } if (!isStrNull(probePrepKey)) { char *name = NULL; dyStringClear(query); sqlDyStringPrintf(query, "select GXD_VisualizationMethod.visualization " "from GXD_VisualizationMethod,GXD_ProbePrep " "where GXD_ProbePrep._ProbePrep_key = %s " "and GXD_ProbePrep._Visualization_key = GXD_VisualizationMethod._Visualization_key" , probePrepKey); name = sqlQuickStringVerbose(conn2, query->string); if (name == NULL) warn("Can't find visualization from _ProbePrep_key %s", probePrepKey); probeColor = colorFromLabel(name, gene); freez(&name); if (probeColor[0] == 0) { dyStringClear(query); sqlDyStringPrintf(query, "select GXD_Label.label from GXD_Label,GXD_ProbePrep " "where GXD_ProbePrep._ProbePrep_key = %s " "and GXD_ProbePrep._Label_key = GXD_Label._Label_key" , probePrepKey); name = sqlQuickStringVerbose(conn2, query->string); if (name == NULL) warn("Can't find label from _ProbePrep_key %s", probePrepKey); probeColor = colorFromLabel(name, gene); } freez(&name); } if (!isStrNull(antibodyPrepKey) && probeColor[0] == 0 ) { char *name = NULL; dyStringClear(query); sqlDyStringPrintf(query, "select GXD_Label.label from GXD_Label,GXD_AntibodyPrep " "where GXD_AntibodyPrep._AntibodyPrep_key = %s " "and GXD_AntibodyPrep._Label_key = GXD_Label._Label_key" , antibodyPrepKey); name = sqlQuickStringVerbose(conn2, query->string); if (name == NULL) warn("Can't find label from _AntibodyPrep_key %s", antibodyPrepKey); probeColor = colorFromLabel(name, gene); freez(&name); } } /* Get abName, abTaxon, abSubmitId */ abTaxon[0] = 0; if (!isStrNull(antibodyPrepKey)) { struct sqlResult *sr = NULL; int orgKey = 0; char **row; dyStringClear(query); sqlDyStringPrintf(query, "select antibodyName,_Organism_key,GXD_Antibody._Antibody_key " "from GXD_AntibodyPrep,GXD_Antibody " "where GXD_AntibodyPrep._AntibodyPrep_key = %s " "and GXD_AntibodyPrep._Antibody_key = GXD_Antibody._Antibody_key" , antibodyPrepKey); sr = sqlGetResultVerbose(conn2, query->string); row = sqlNextRow(sr); if (row != NULL) { abName = cloneString(row[0]); orgKey = atoi(row[1]); abSubmitId = cloneString(row[2]); } sqlFreeResult(&sr); if (orgKey > 0) { char *latinName = NULL, *commonName = NULL; int spTaxon = 0; dyStringClear(query); sqlDyStringPrintf(query, "select latinName from MGI_Organism " "where _Organism_key = %d", orgKey); latinName = sqlQuickStringVerbose(conn2, query->string); if (latinName != NULL && !sameString(latinName, "Not Specified") && !sameString(latinName, "Not Applicable")) { char *e = strchr(latinName, '/'); if (e != NULL) *e = 0; /* Chop off / and after. */ spTaxon = spBinomialToTaxon(connSp, latinName); } else { dyStringClear(query); sqlDyStringPrintf(query, "select commonName from MGI_Organism " "where _Organism_key = %d", orgKey); commonName = sqlQuickStringVerbose(conn2, query->string); if (commonName != NULL && !sameString(commonName, "Not Applicable") && !sameString(commonName, "Not Specified")) { spTaxon = spCommonToTaxon(connSp, commonName); } } if (spTaxon != 0) safef(abTaxon, sizeof(abTaxon), "%d", spTaxon); freez(&latinName); freez(&commonName); } } if (abName == NULL) abName = cloneString(""); if (abSubmitId == NULL) abSubmitId = cloneString(""); /* Get rPrimer, lPrimer */ if (!isStrNull(probePrepKey)) { struct sqlResult *sr = NULL; char **row; dyStringClear(query); sqlDyStringPrintf(query, "select primer1sequence,primer2sequence " "from PRB_Probe,GXD_ProbePrep " "where PRB_Probe._Probe_key = GXD_ProbePrep._Probe_key " "and GXD_ProbePrep._ProbePrep_key = %s" , probePrepKey); sr = sqlGetResultVerbose(conn2, query->string); row = sqlNextRow(sr); if (row != NULL) { fPrimer = cloneString(row[0]); rPrimer = cloneString(row[1]); } sqlFreeResult(&sr); } /* Note Jackson database actually stores the primers very * erratically. In all the cases I can find for in situs * the primers are actually stored in free text in the PRB_Notes * e.g. ... primers CGCGGATCCAGGGGAAACAGAAGGGCTGCG and CCCAAGCTTAGACTGTACAGGCTGAGCC ... */ if (fPrimer == NULL || fPrimer[0]==0) { struct sqlResult *sr = NULL; char **row; dyStringClear(query); sqlDyStringPrintf(query, "select PRB_Notes.note from GXD_ProbePrep, PRB_Notes" " where GXD_ProbePrep._ProbePrep_key = %s" " and GXD_ProbePrep._Probe_key = PRB_Notes._Probe_key" " order by PRB_Notes.sequenceNum" , probePrepKey); sr = sqlGetResultVerbose(conn2, query->string); dyStringClear(probeNotes); while ((row = sqlNextRow(sr)) != NULL) dyStringAppend(probeNotes, row[0]); sqlFreeResult(&sr); if (probeNotes->stringSize > 0) { char f[256]; char r[256]; int i = 0; char *s = strstr(probeNotes->string," primers "); if (s) { s += strlen(" primers "); i = 0; while (strchr("ACGT",*s) && (i<sizeof(f))) f[i++] = *s++; f[i]=0; if (strstr(s," and ")==s) { s += strlen(" and "); i = 0; while (strchr("ACGT",*s) && (i<sizeof(r))) r[i++] = *s++; r[i]=0; if (strlen(f) >= 10 && strlen(r) >= 10) { fPrimer = cloneString(f); rPrimer = cloneString(r); } else { verbose(1, "bad primer parse:_ProbePrep_key=%s fPrimer=[%s], rPrimer=[%s]\n", probePrepKey,f,r); } } } } } if (fPrimer == NULL) fPrimer = cloneString(""); if (rPrimer == NULL) rPrimer = cloneString(""); fixation = blankOutUnknown(fixation); embedding = blankOutUnknown(embedding); /* Massage body part and slice type. We only handle whole mounts. */ if (sameString(sliceType, "whole mount")) { bodyPart = "whole"; priority = "100"; } else { sliceType = ""; priority = "1000"; } genotypeAndStrainFromKey(genotypeKey, conn2, &genotype, &strain); if (isStrNull(paneLabel)) paneLabel = cloneString(""); /* trying to suppress nulls in output */ stripChar(paneLabel, '"'); /* Get rid of a difficult quote to process. */ /* Fetch image dimensions from file. */ imageWidth=0; imageHeight=0; safef(path, sizeof(path), "%s/%s.jpg", inJax, fileKey); if (fileExists(path)) jpegSize(path,&imageWidth,&imageHeight); /* will errAbort if no valid .jpeg exists */ else warn("Picture Missing! %s ",path); /* Deal caption if any. Most of the work only happens the * first time see the image. */ if (!hashLookup(uniqImageHash, imageKey)) { struct sqlResult *sr = NULL; char **row; hashAdd(uniqImageHash, imageKey, NULL); dyStringClear(caption); dyStringClear(query); sqlDyStringPrintf(query, "select note from MGI_NoteChunk,MGI_Note,MGI_NoteType,ACC_MGIType " "where MGI_Note._Object_key = %s " "and ACC_MGIType.name = 'Image' " "and ACC_MGIType._MGIType_key = MGI_Note._MGIType_key " "and MGI_NoteType.noteType='Caption' " "and MGI_Note._NoteType_key = MGI_NoteType._NoteType_key " "and MGI_Note._Note_key = MGI_NoteChunk._Note_key " "order by sequenceNum" , imageKey); sr = sqlGetResultVerbose(conn2, query->string); while ((row = sqlNextRow(sr)) != NULL) dyStringAppend(caption, row[0]); sqlFreeResult(&sr); if (caption->stringSize > 0) { subChar(caption->string, '\t', ' '); subChar(caption->string, '\n', ' '); fprintf(cap, "%s\t%s\n", imageKey, caption->string); hashAdd(captionHash, imageKey, imageKey); } } if (hashLookup(captionHash, imageKey)) captionId = imageKey; else captionId = ""; fprintf(tab, "%s\t", gene); fprintf(tab, "%s\t", probeColor); fprintf(tab, "%s\t", sex); fprintf(tab, "%3.2f\t", calcAge); fprintf(tab, "%s\t", ageMin); fprintf(tab, "%s\t", ageMax); fprintf(tab, "%s\t", paneLabel); /* may have to change NULL to empty string or "0" ? */ fprintf(tab, "%s.jpg\t", fileKey); fprintf(tab, "%s\t", imageKey); fprintf(tab, "%s\t", fPrimer); fprintf(tab, "%s\t", rPrimer); fprintf(tab, "%s\t", abName); fprintf(tab, "%s\t", abTaxon); fprintf(tab, "%s\t", abSubmitId); fprintf(tab, "%s\t", fixation); fprintf(tab, "%s\t", embedding); fprintf(tab, "%s\t", bodyPart); fprintf(tab, "%s\t", sliceType); fprintf(tab, "%s\t", genotype); fprintf(tab, "%s\t", strain); fprintf(tab, "%s\t", priority); fprintf(tab, "%s\t", captionId); fprintf(tab, "%d\t", imageWidth); fprintf(tab, "%d\n", imageHeight); printExpression(tab, conn2, imagePaneKey, assayKey); gotAny = TRUE; freez(&genotype); freez(&abName); freez(&abSubmitId); freez(&rPrimer); freez(&fPrimer); } sqlFreeResult(&sr); carefulClose(&ra); carefulClose(&tab); carefulClose(&cap); if (!gotAny) { remove(raName); remove(capName); remove(tabName); } dyStringFree(&probeNotes); dyStringFree(©right); dyStringFree(&caption); dyStringFree(&query); hashFree(&uniqImageHash); hashFree(&captionHash); }