struct slName *getTablesForField(struct sqlConnection *conn, char *splitPrefix, char *table, char *splitSuffix) /* Get tables that match field. */ { struct slName *list = NULL, *el; if (splitPrefix != NULL || splitSuffix != NULL) { char query[256], **row; struct sqlResult *sr; safef(query, sizeof(query), "show tables like '%s%s%s'", emptyForNull(splitPrefix), table, emptyForNull(splitSuffix)); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { el = slNameNew(row[0]); slAddHead(&list, el); } sqlFreeResult(&sr); slReverse(&list); } if (list == NULL) { if (sqlTableExists(conn, table)) list = slNameNew(table); } return list; }
struct composite *makeCompositeList(struct encode2Manifest *manList, struct hash *metaHash) /* Return a list of composites with everything on manList */ { struct composite *comp, *compList = NULL; struct hash *compHash = hashNew(0); char compName[256]; struct encode2Manifest *man; for (man = manList; man != NULL; man = man->next) { char *realComp = tagVal(man, metaHash, "composite"); if (realComp != NULL) safef(compName, sizeof(compName), "%s", realComp); else { char *lab = emptyForNull(tagVal(man, metaHash, "lab")); char *dataType = emptyForNull(tagVal(man, metaHash, "dataType")); safef(compName, sizeof(compName), "comp%s%s", lab, dataType); } comp = hashFindVal(compHash, compName); if (comp == NULL) { AllocVar(comp); comp->name = cloneString(compName); hashAdd(compHash, compName, comp); slAddTail(&compList, comp); } struct slRef *manRef = slRefNew(man); slAddTail(&comp->manRefList, manRef); } hashFree(&compHash); return compList; }
void writeSeriesList(char *fileName, struct series *list) /* Write out list to file in tab separated format, adding initial id field. */ { FILE *f = mustOpen(fileName, "w"); int id = 0; struct series *series; for (series = list; series != NULL; series = series->next) { fprintf(f, "%d\t%s\t%s\t%s\n", ++id, series->name, emptyForNull(series->dataType), emptyForNull(series->grantee)); } carefulClose(&f); }
void printLeafTrackList(FILE *f, char *indent, struct view *view, struct composite *comp, struct slName *varList, struct taggedFile *tfList, char *type) /* Print list of low level tracks under view */ { struct taggedFile *tf; for (tf = tfList; tf != NULL; tf = tf->next) { if (sameString(tf->manifest->outputType, view->name)) { fprintf(f, "%strack t%d\n", indent, ++trackId); fprintf(f, "%sparent %s\n", indent, view->trackName); fprintf(f, "%stype %s\n", indent, type); fprintf(f, "%ssubGroups view=%s", indent, view->name); struct slName *var; for (var = varList; var != NULL; var = var->next) { char *val = metaTagValFindVal(tf->tagList, var->name); if (val != NULL) fprintf(f, " %s=%s", var->name, val); } fprintf(f, "\n"); fprintf(f, "%sshortLabel", indent); for (var = varList; var != NULL; var = var->next) { char *val = metaTagValFindVal(tf->tagList, var->name); if (val != NULL) fprintf(f, " %s", val); } fprintf(f, "\n"); char *lab = emptyForNull(metaTagValFindVal(tf->tagList, "lab")); char *dataType = emptyForNull(metaTagValFindVal(tf->tagList, "dataType")); fprintf(f, "%slongLabel %s %s", indent, lab, dataType); boolean gotOne = FALSE; for (var = varList; var != NULL; var = var->next) { char *val = metaTagValFindVal(tf->tagList, var->name); if (val != NULL) { if (gotOne) fprintf(f, ","); else gotOne = TRUE; fprintf(f, " %s %s", var->name, val); } } fprintf(f, "\n"); fprintf(f, "%sbigDataUrl %s\n", indent, tf->manifest->fileName); fprintf(f, "\n"); } } }
void edwMakePlateFileNameAndPath(int edwFileId, char *submitFileName, char licensePlate[edwMaxPlateSize], char edwFile[PATH_LEN], char serverPath[PATH_LEN]) /* Convert file id to local file name, and full file path. Make any directories needed * along serverPath. */ { /* Preserve suffix. Give ourselves up to two suffixes. */ int nameSize = strlen(submitFileName); char *suffix = lastMatchCharExcept(submitFileName, submitFileName + nameSize, '.', '/'); if (suffix != NULL) { char *secondSuffix = lastMatchCharExcept(submitFileName, suffix, '.', '/'); if (secondSuffix != NULL) suffix = secondSuffix; } suffix = emptyForNull(suffix); /* Figure out edw file name, starting with license plate. */ edwMakeLicensePlate(edwLicensePlatePrefix, edwFileId, licensePlate, edwMaxPlateSize); /* Figure out directory and make any components not already there. */ char edwDir[PATH_LEN]; edwDirForTime(edwNow(), edwDir); char uploadDir[PATH_LEN]; safef(uploadDir, sizeof(uploadDir), "%s%s", edwRootDir, edwDir); makeDirsOnPath(uploadDir); /* Figure out full file names */ safef(edwFile, PATH_LEN, "%s%s%s", edwDir, licensePlate, suffix); safef(serverPath, PATH_LEN, "%s%s", edwRootDir, edwFile); }
struct dyString *readAndReplaceTableName(char *fileName, char *table) /* Read file into string. While doing so strip any leading comments * and insist that the first non-comment line contain the words * "create table" followed by a table name. Replace the table name, * and copy the rest of the file verbatem. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct dyString *dy = dyStringNew(0); char *line, *word; if (!lineFileNextReal(lf, &line)) errAbort("No real lines in %s\n", fileName); word = nextWord(&line); if (!sameWord(word, "create")) errAbort("Expecting first word in file to be CREATE. Got %s", word); word = nextWord(&line); if (word == NULL || !sameWord(word, "table")) errAbort("Expecting second word in file to be table. Got %s", emptyForNull(word)); word = nextWord(&line); if (word == NULL) errAbort("Expecting table name on same line as CREATE TABLE"); sqlDyStringPrintf(dy, "CREATE TABLE %s ", table); if (line != NULL) dyStringAppend(dy, line); dyStringAppendC(dy, '\n'); while (lineFileNext(lf, &line, NULL)) { dyStringAppend(dy, line); dyStringAppendC(dy, '\n'); } lineFileClose(&lf); return dy; }
void transformJobTable(struct sqlConnection *conn, struct edwAnalysisJob *jobList, char *outName) /* Transform edwAnalysisJob to edwAnalysis */ { FILE *f = mustOpen(outName, "w"); struct edwAnalysisJob *job; for (job = jobList; job != NULL; job = job->next) { struct eapJob j = {0}; j.id = job->id; j.commandLine = sqlEscapeTabFileString(job->commandLine); assert(job->commandLine); j.startTime = job->startTime; j.endTime = job->endTime; j.stderr = sqlEscapeTabFileString(emptyForNull(job->stderr)); j.returnCode = job->returnCode; j.cpusRequested = max(1, job->cpusRequested); char buf[16]; if (isEmpty(job->parasolId)) { safef(buf, sizeof(buf), "%d", job->pid); j.parasolId = buf; } else j.parasolId = job->parasolId; eapJobTabOut(&j, f); freez(&j.commandLine); freez(&j.stderr); } carefulClose(&f); }
static void showTableDataRows(struct fieldedTable *table, int pageSize, int maxLenField, struct hash *tagOutputWrappers, void *wrapperContext) /* Render data rows into HTML */ { int count = 0; struct fieldedRow *row; boolean isNum[table->fieldCount]; int i; for (i=0; i<table->fieldCount; ++i) isNum[i] = fieldedTableColumnIsNumeric(table, i); for (row = table->rowList; row != NULL; row = row->next) { if (++count > pageSize) break; printf("<TR>\n"); int fieldIx = 0; for (fieldIx=0; fieldIx<table->fieldCount; ++fieldIx) { char shortVal[maxLenField+1]; char *longVal = emptyForNull(row->row[fieldIx]); char *val = longVal; int valLen = strlen(val); if (maxLenField > 0 && maxLenField < valLen) { if (valLen > maxLenField) { memcpy(shortVal, val, maxLenField-3); shortVal[maxLenField-3] = 0; strcat(shortVal, "..."); val = shortVal; } } if (isNum[fieldIx]) webPrintLinkCellRightStart(); else webPrintLinkCellStart(); boolean printed = FALSE; if (tagOutputWrappers != NULL && !isEmpty(val)) { char *field = table->fields[fieldIx]; webTableOutputWrapperType *printer = hashFindVal(tagOutputWrappers, field); if (printer != NULL) { printer(table, row, field, longVal, val, wrapperContext); printed = TRUE; } } if (!printed) printf("%s", val); webPrintLinkCellEnd(); } printf("</TR>\n"); } }
struct raField *raFieldNew(char *name, char *val, struct lm *lm) /* Return new raField. */ { struct raField *field; lmAllocVar(lm, field); field->name = lmCloneString(lm, name); val = emptyForNull(skipLeadingSpaces(val)); field->val = lmCloneString(lm, val); return field; }
static struct hash *getCoveredTables(struct joiner *joiner, char *db, struct sqlConnection *conn) /* Get list of tables covered in database. */ { struct hash *hash = hashNew(0); struct joinerIgnore *ig; struct slName *spec; struct joinerSet *js; struct joinerField *jf; /* First put in all the ignored tables. */ for (ig = joiner->tablesIgnored; ig != NULL; ig = ig->next) { if (slNameInList(ig->dbList, db)) { for (spec = ig->tableList; spec != NULL; spec = spec->next) { verbose(3,"ignoreTable: '%s'\n", spec->name); addTablesLike(hash, conn, spec->name); } } } /* Now put in tables that are in one of the identifiers. */ for (js = joiner->jsList; js != NULL; js = js->next) { for (jf = js->fieldList; jf != NULL; jf = jf->next) { if (slNameInList(jf->dbList, db)) { char spec[512]; safef(spec, sizeof(spec), "%s%s%s", emptyForNull(jf->splitPrefix), jf->table, emptyForNull(jf->splitSuffix)); addTablesLike(hash, conn, spec); verbose(4,"ident: '%s', table: '%s'\n", js->name, spec); } } } return hash; }
static void writeInfo(FILE *infoFh, struct gffGroup *group) /* write a row for a GTF group from the info file */ { // scan lineList for group and protein ids struct gffLine *ll; char *geneId = NULL, *proteinId = NULL, *geneName = NULL, *transcriptName = NULL; for (ll = group->lineList; ll != NULL; ll = ll->next) { saveName(&geneId, ll->geneId); saveName(&proteinId, ll->proteinId); saveName(&geneName, ll->geneName); saveName(&transcriptName, ll->transcriptName); } fprintf(infoFh, "%s\t%s\t%s\t%s\t%d\t%d\t%c\t%s\t%s\t%s\n", group->name, emptyForNull(geneId), group->source, group->seq, group->start, group->end, group->strand, emptyForNull(proteinId), emptyForNull(geneName), emptyForNull(transcriptName)); }
static struct rqlEval rqlEvalArrayIx(struct rqlParse *p, void *record, RqlEvalLookup lookup, struct lm *lm) /* Handle parse tree generated by an indexed array. */ { struct rqlParse *array = p->children; struct rqlParse *index = array->next; struct rqlEval arrayVal = rqlLocalEval(array, record, lookup, lm); struct rqlEval indexVal = rqlLocalEval(index, record, lookup, lm); struct rqlEval res; res.type = rqlTypeString; res.val.s = emptyForNull(lmCloneSomeWord(lm, arrayVal.val.s, indexVal.val.i)); return res; }
void output(int depth, struct rqlStatement *rql, struct tagStorm *tags, struct tagStanza *stanza) /* Output stanza according to clOut */ { char *format = clOut; if (sameString(format, "ra")) { if (stanza->children == NULL) { struct slName *field; for (field = rql->fieldList; field != NULL; field = field->next) { char *val = tagFindVal(stanza, field->name); if (val != NULL) printf("%s\t%s\n", field->name, val); } printf("\n"); } } else if (sameString(format, "tab")) { if (stanza->children == NULL) { struct slName *field; char *connector = ""; for (field = rql->fieldList; field != NULL; field = field->next) { char *val = emptyForNull(tagFindVal(stanza, field->name)); printf("%s%s", connector, val); connector = "\t"; } printf("\n"); } } else if (sameString(format, "tags")) { struct slName *field; for (field = rql->fieldList; field != NULL; field = field->next) { char *val = tagFindLocalVal(stanza, field->name); if (val != NULL) { repeatCharOut(stdout, '\t', depth); printf("%s\t%s\n", field->name, val); } } printf("\n"); } else errAbort("Unrecognized format %s", format); }
void getUrl(struct sqlConnection *conn) /* Put up URL. */ { edwMustGetUserFromEmail(conn, userEmail); printf("Please enter a URL for a validated manifest file:<BR>"); printf("URL "); cgiMakeTextVar("url", emptyForNull(cgiOptionalString("url")), 80); cgiMakeButton("submitUrl", "submit"); printf("<BR>\n"); cgiMakeCheckBox("update", FALSE); printf(" Update information associated with files that have already been uploaded."); printf("<BR>Submission by %s", userEmail); edwPrintLogOutButton(); }
static void refLinkUpdate(struct sqlConnection *conn, struct gbStatus* status) /* Update the refLink table for the current entry */ { int geneId; char *gen = emptyForNull(raFieldCurVal("gen")); char *pro = emptyForNull(raFieldCurVal("pro")); gen = sqlEscapeString2(alloca(2*strlen(gen)+1), gen); pro = sqlEscapeString2(alloca(2*strlen(pro)+1), pro); /* can either have locus id (old locus link db) or gene id, or both, * in which case the geneId is used */ geneId = (raGeneId != 0) ? raGeneId : raLocusLinkId; if (status->stateChg & GB_NEW) sqlUpdaterAddRow(refLinkUpd, "%s\t%s\t%s\t%s\t%u\t%u\t%u\t%u", gen, pro, raAcc, raProtAcc, raFieldCurId("gen"), raFieldCurId("pro"), geneId, raOmimId); else if (status->stateChg & GB_META_CHG) sqlUpdaterModRow(refLinkUpd, 1, "name='%s', product='%s', protAcc='%s', " "geneName=%u, prodName=%u, locusLinkId=%u, " "omimId=%u where mrnaAcc='%s'", gen, pro, raProtAcc, raFieldCurId("gen"), raFieldCurId("pro"), geneId, raOmimId, raAcc); }
struct customTrack *chromGraphParser(char *genomeDb, struct customPp *cpp, char *formatType, char *markerType, char *columnLabels, char *name, char *description, struct hash *settings, boolean report) /* Parse out a chromGraph file (not including any track lines) */ { char *minVal = hashFindVal(settings, "minVal"); char *maxVal = hashFindVal(settings, "maxVal"); /* Get first lines of track. If track empty then * might as well return NULL here. */ struct slName *preview = getPreview(cpp, 10); if (preview == NULL) return NULL; /* Figure out format type - scanning preview if it isn't well defined. */ struct sqlConnection *conn = hAllocConn(genomeDb); int colCount; if (sameString(formatType, cgfFormatGuess)) { if (!figureOutFormat(preview, &formatType, &colCount)) errAbort("Can't figure out format for chromGraph track %s", emptyForNull(name)); } hashMayRemove(settings, "formatType"); /* Now that we know format can count columns and determine how to * chop up lines. */ colCount = countColumns(preview, formatType); Chopper chopper = getChopper(formatType); /* Figure out marker type - scanning marker column of preview if it isn't * well defined. */ if (sameString(markerType, cgfMarkerGuess)) { markerType = guessMarkerType(preview, chopper, conn, colCount); if (markerType == NULL) errAbort("Can't figure out marker column type for chromGraph track %s", emptyForNull(name)); } hashMayRemove(settings, "markerType"); /* Figure out if columns are labeled in file, using preview if needed. */ if (sameString(columnLabels, cgfColLabelGuess)) { if (firstRowConsistentWithData(preview->name, chopper, colCount)) columnLabels = cgfColLabelNumbered; else columnLabels = cgfColLabelFirstRow; } hashMayRemove(settings, "columnLabels"); returnPreview(cpp, &preview); boolean labelsInData = sameString(columnLabels, cgfColLabelFirstRow); /* Load data into list of labeled temp files. */ struct labeledFile *fileEl, *fileList; fileList = parseToLabeledFiles(cpp, colCount, formatType, markerType, labelsInData, conn, report); saveLabeledFileList(fileList); /* Create a customTrack for each element in file list. */ struct customTrack *outputTracks = NULL; for (fileEl = fileList; fileEl != NULL; fileEl = fileEl->next) { struct customTrack *track; AllocVar(track); struct trackDb *tdb = customTrackTdbDefault(); track->tdb = tdb; /* Figure out short and long names and type*/ char shortLabel[128]; char longLabel[512]; if (name == NULL) name = track->tdb->shortLabel; if (description == NULL) description = track->tdb->longLabel; if (colCount > 1 || labelsInData) { safef(shortLabel, sizeof(shortLabel), "%s %s", name, fileEl->label); safef(longLabel, sizeof(longLabel), "%s %s", description, fileEl->label); } else { safef(shortLabel, sizeof(shortLabel), "%s", name); safef(longLabel, sizeof(longLabel), "%s", description); } tdb->shortLabel = cloneString(shortLabel); tdb->longLabel = cloneString(longLabel); tdb->type = "chromGraph"; tdb->track = customTrackTableFromLabel(tdb->shortLabel); tdb->table = cloneString(tdb->track); track->dbTableName = NULL; /* Create settings */ struct dyString *dy = dyStringNew(0); dyStringAppend(dy, hashToRaString(settings)); dyStringPrintf(dy, "binaryFile %s\n", fileEl->fileName); dyStringPrintf(dy, "type %s\n", tdb->type); dyStringPrintf(dy, "tdbType %s\n", tdb->type); /* Needed if outside factory */ if (minVal == NULL) dyStringPrintf(dy, "minVal %g\n", fileEl->minVal); if (maxVal == NULL) dyStringPrintf(dy, "maxVal %g\n", fileEl->maxVal); tdb->settings = dyStringCannibalize(&dy); /* Add to list. */ slAddHead(&outputTracks, track); } hFreeConn(&conn); slReverse(&outputTracks); return outputTracks; }
void writeMdbListAsResults(struct mdbObj *mdbList, char *fileName) /* Write selected fields in list in tab-separated result format to file. */ { FILE *f = mustOpen(fileName, "w"); struct mdbObj *mdb; int id = 0; for (mdb = mdbList; mdb != NULL; mdb = mdb->next) { char *experiment = NULL; char *replicate = NULL; char *objType = NULL; char *fileName = NULL; char *md5sum = NULL; char *tableName = NULL; char *view = NULL; char *dateSubmitted = NULL; char *dateResubmitted = NULL; char *dateUnrestricted = NULL; struct mdbVar *v; for (v = mdb->vars; v != NULL; v = v->next) { char *var = v->var, *val = v->val; if (sameString(var, "expId")) experiment = val; else if (sameString(var, "replicate")) replicate = val; else if (sameString(var, "objType")) objType = val; else if (sameString(var, "fileName")) { fileName = val; /* Do surgery on file name, cutting off second comma separated index file. */ char *comma = strchr(fileName, ','); if (comma != NULL) *comma = 0; } else if (sameString(var, "md5sum")) { md5sum = val; char *comma = strchr(fileName, ','); if (comma != NULL) *comma = 0; } else if (sameString(var, "tableName")) tableName = val; else if (sameString(var, "view")) view = val; else if (sameString(var, "dateSubmitted")) dateSubmitted = val; else if (sameString(var, "dateResubmitted")) dateResubmitted = val; else if (sameString(var, "dateUnrestricted")) dateUnrestricted = val; } if (objType != NULL && !sameString(objType, "composite")) { if (experiment != NULL) { fprintf(f, "%d\t", ++id); fprintf(f, "%s\t", emptyForNull(experiment)); fprintf(f, "%s\t", emptyForNull(replicate)); fprintf(f, "%s\t", emptyForNull(view)); fprintf(f, "%s\t", emptyForNull(objType)); fprintf(f, "%s\t", emptyForNull(fileName)); fprintf(f, "%s\t", emptyForNull(md5sum)); fprintf(f, "%s\t", emptyForNull(tableName)); fprintf(f, "%s\t", emptyForNull(dateSubmitted)); fprintf(f, "%s\t", emptyForNull(dateResubmitted)); fprintf(f, "%s\n", emptyForNull(dateUnrestricted)); } else { if (!startsWith("wgEncodeUmassWengTfbsValid", mdb->obj)) // These validation files not associated with encode experiment warn("No experiment for %s", mdb->obj); } } } carefulClose(&f); }
static void synonymPrint(struct section *section, struct sqlConnection *conn, char *id) /* Print out SwissProt comments - looking up typeId/commentVal. */ { char *protAcc = getSwissProtAcc(conn, spConn, id); char *spDisplayId; char *refSeqAcc = ""; char *mrnaAcc = ""; char *oldDisplayId; char condStr[255]; char *kgProteinID; char *parAcc; /* parent accession of a variant splice protein */ char *chp; if (isRgdGene(conn)) { rgdGene2SynonymPrint(section,conn, id); return; } if (sqlTablesExist(conn, "kgAlias")) printAlias(id, conn); if (sameWord(genome, "Zebrafish")) { char *xrefTable = "ensXRefZfish"; char *geneIdCol = "ensGeneId"; /* get Gene Symbol and RefSeq accession from Zebrafish-specific */ /* cross-reference table */ printGeneSymbol(id, xrefTable, geneIdCol, conn); refSeqAcc = getRefSeqAcc(id, xrefTable, geneIdCol, conn); hPrintf("<B>ENSEMBL ID:</B> %s", id); } else { char query[256]; char *toRefTable = genomeOptionalSetting("knownToRef"); if (toRefTable != NULL && sqlTableExists(conn, toRefTable)) { safef(query, sizeof(query), "select value from %s where name='%s'", toRefTable, id); refSeqAcc = emptyForNull(sqlQuickString(conn, query)); } if (sqlTableExists(conn, "kgXref")) { safef(query, sizeof(query), "select mRNA from kgXref where kgID='%s'", id); mrnaAcc = emptyForNull(sqlQuickString(conn, query)); } if (sameWord(genome, "C. elegans")) hPrintf("<B>WormBase ID:</B> %s<BR>", id); else hPrintf("<B>UCSC ID:</B> %s<BR>", id); } if (refSeqAcc[0] != 0) { hPrintf("<B>RefSeq Accession: </B> <A HREF=\""); printOurRefseqUrl(stdout, refSeqAcc); hPrintf("\">%s</A><BR>\n", refSeqAcc); } else if (mrnaAcc[0] != 0) { safef(condStr, sizeof(condStr), "acc = '%s'", mrnaAcc); if (sqlGetField(database, "gbCdnaInfo", "acc", condStr) != NULL) { hPrintf("<B>Representative RNA: </B> <A HREF=\""); printOurMrnaUrl(stdout, mrnaAcc); hPrintf("\">%s</A><BR>\n", mrnaAcc); } else /* do not show URL link if it is not found in gbCdnaInfo */ { hPrintf("<B>Representative RNA: %s </B>", mrnaAcc); } } if (protAcc != NULL) { kgProteinID = cloneString(""); if (hTableExists(sqlGetDatabase(conn), "knownGene") && (isNotEmpty(cartOptionalString(cart, hggChrom)) && differentWord(cartOptionalString(cart, hggChrom),"none"))) { safef(condStr, sizeof(condStr), "name = '%s' and chrom = '%s' and txStart=%s and txEnd=%s", id, cartOptionalString(cart, hggChrom), cartOptionalString(cart, hggStart), cartOptionalString(cart, hggEnd)); kgProteinID = sqlGetField(database, "knownGene", "proteinID", condStr); } hPrintf("<B>Protein: "); if (strstr(kgProteinID, "-") != NULL) { parAcc = cloneString(kgProteinID); chp = strstr(parAcc, "-"); *chp = '\0'; /* show variant splice protein and the UniProt link here */ hPrintf("<A HREF=\"http://www.uniprot.org/uniprot%s\" " "TARGET=_blank>%s</A></B>, splice isoform of ", kgProteinID, kgProteinID); hPrintf("<A HREF=\"http://www.uniprot.org/uniprot/%s\" " "TARGET=_blank>%s</A></B>\n", parAcc, parAcc); } else { hPrintf("<A HREF=\"http://www.uniprot.org/uniprot/%s\" " "TARGET=_blank>%s</A></B>\n", protAcc, protAcc); } /* show SWISS-PROT display ID if it is different than the accession ID */ /* but, if display name is like: Q03399 | Q03399_HUMAN, then don't show display name */ spDisplayId = spAnyAccToId(spConn, protAcc); if (spDisplayId == NULL) { errAbort("<br>%s seems to no longer be a valid protein ID in our latest UniProtKB DB.", protAcc); } if (strstr(spDisplayId, protAcc) == NULL) { hPrintf(" (aka %s", spDisplayId); /* show once if the new and old displayId are the same */ oldDisplayId = oldSpDisplayId(spDisplayId); if (oldDisplayId != NULL) { if (!sameWord(spDisplayId, oldDisplayId) && !sameWord(protAcc, oldDisplayId)) { hPrintf(" or %s", oldDisplayId); } } hPrintf(")<BR>\n"); } } printCcds(id, conn); }
static void bigBedClick(char *fileName, struct trackDb *tdb, char *item, int start, int end, int bedSize) /* Handle click in generic bigBed track. */ { boolean showUrl = FALSE; char *chrom = cartString(cart, "c"); /* Open BigWig file and get interval list. */ struct bbiFile *bbi = bigBedFileOpen(fileName); struct lm *lm = lmInit(0); int ivStart = start, ivEnd = end; if (start == end) { // item is an insertion; expand the search range from 0 bases to 2 so we catch it: ivStart = max(0, start-1); ivEnd++; } struct bigBedInterval *bbList = bigBedIntervalQuery(bbi, chrom, ivStart, ivEnd, 0, lm); /* Get bedSize if it's not already defined. */ if (bedSize == 0) { bedSize = bbi->definedFieldCount; showUrl = TRUE; } char *scoreFilter = cartOrTdbString(cart, tdb, "scoreFilter", NULL); int minScore = 0; if (scoreFilter) minScore = atoi(scoreFilter); /* Find particular item in list - matching start, and item if possible. */ boolean found = FALSE; boolean firstTime = TRUE; struct bigBedInterval *bb; for (bb = bbList; bb != NULL; bb = bb->next) { if (!(bb->start == start && bb->end == end)) continue; if (bedSize > 3) { char *name = cloneFirstWordByTab(bb->rest); boolean match = sameString(name, item); freez(&name); if (!match) continue; } found = TRUE; if (firstTime) printf("<BR>\n"); int seq1Seq2Fields = 0; // check for seq1 and seq2 in columns 7+8 (eg, pairedTagAlign) boolean seq1Seq2 = sameOk(trackDbSetting(tdb, BASE_COLOR_USE_SEQUENCE), "seq1Seq2"); if (seq1Seq2 && bedSize == 6) seq1Seq2Fields = 2; char *fields[bedSize+seq1Seq2Fields]; char startBuf[16], endBuf[16]; char *rest = cloneString(bb->rest); int bbFieldCount = bigBedIntervalToRow(bb, chrom, startBuf, endBuf, fields, bedSize+seq1Seq2Fields); if (bbFieldCount != bedSize+seq1Seq2Fields) { errAbort("Disagreement between trackDb field count (%d) and %s fieldCount (%d)", bedSize, fileName, bbFieldCount); } struct bed *bed = bedLoadN(fields, bedSize); if (bedSize >= 6 && scoreFilter && bed->score < minScore) continue; if (showUrl && (bedSize >= 4)) printCustomUrl(tdb, item, TRUE); bedPrintPos(bed, bedSize, tdb); // display seq1 and seq2 if (seq1Seq2 && bedSize+seq1Seq2Fields == 8) printf("<table><tr><th>Sequence 1</th><th>Sequence 2</th></tr>" "<tr><td> %s </td><td> %s </td></tr></table>", fields[6], fields[7]); else if (isNotEmpty(rest)) { char *restFields[256]; int restCount = chopTabs(rest, restFields); int restBedFields = bedSize - 3; if (restCount > restBedFields) { if (0 == extraFieldsPrint(tdb,NULL,restFields + restBedFields,restCount - restBedFields)) { int i; char label[20]; safef(label, sizeof(label), "nonBedFieldsLabel"); printf("<B>%s </B>", trackDbSettingOrDefault(tdb, label, "Non-BED fields:")); for (i = restBedFields; i < restCount; i++) printf("%s%s", (i > 0 ? "\t" : ""), restFields[i]); printf("<BR>\n"); } } } if (isCustomTrack(tdb->track)) { time_t timep = bbiUpdateTime(bbi); printBbiUpdateTime(&timep); } } if (!found) { printf("No item %s starting at %d\n", emptyForNull(item), start); } lmCleanup(&lm); bbiFileClose(&bbi); }
void processRefSeq(char *database, char *faFile, char *raFile, char *pslFile, char *loc2refFile, char *pepFile, char *mim2locFile) /* hgRefSeqMrna - Load refSeq mRNA alignments and other info into * refSeqGene table. */ { struct lineFile *lf; struct hash *raHash, *rsiHash = newHash(0); struct hash *loc2mimHash = newHash(0); struct refSeqInfo *rsiList = NULL, *rsi; char *s, *line, *row[5]; int wordCount, dotMod = 0; int noLocCount = 0; int rsiCount = 0; int noProtCount = 0; struct psl *psl; struct sqlConnection *conn = hgStartUpdate(database); struct hash *productHash = loadNameTable(conn, "productName", 16); struct hash *geneHash = loadNameTable(conn, "geneName", 16); char *kgName = "refGene"; FILE *kgTab = hgCreateTabFile(".", kgName); FILE *productTab = hgCreateTabFile(".", "productName"); FILE *geneTab = hgCreateTabFile(".", "geneName"); FILE *refLinkTab = hgCreateTabFile(".", "refLink"); FILE *refPepTab = hgCreateTabFile(".", "refPep"); FILE *refMrnaTab = hgCreateTabFile(".", "refMrna"); struct exon *exonList = NULL, *exon; char *answer; char cond_str[200]; /* Make refLink and other tables table if they don't exist already. */ sqlMaybeMakeTable(conn, "refLink", refLinkTableDef); sqlUpdate(conn, "NOSQLINJ delete from refLink"); sqlMaybeMakeTable(conn, "refGene", refGeneTableDef); sqlUpdate(conn, "NOSQLINJ delete from refGene"); sqlMaybeMakeTable(conn, "refPep", refPepTableDef); sqlUpdate(conn, "NOSQLINJ delete from refPep"); sqlMaybeMakeTable(conn, "refMrna", refMrnaTableDef); sqlUpdate(conn, "NOSQLINJ delete from refMrna"); /* Scan through locus link to omim ID file and put in hash. */ { char *row[2]; printf("Scanning %s\n", mim2locFile); lf = lineFileOpen(mim2locFile, TRUE); while (lineFileRow(lf, row)) { hashAdd(loc2mimHash, row[1], intToPt(atoi(row[0]))); } lineFileClose(&lf); } /* Scan through .ra file and make up start of refSeqInfo * objects in hash and list. */ printf("Scanning %s\n", raFile); lf = lineFileOpen(raFile, TRUE); while ((raHash = hashNextRa(lf)) != NULL) { if (clDots > 0 && ++dotMod == clDots ) { dotMod = 0; dotOut(); } AllocVar(rsi); slAddHead(&rsiList, rsi); if ((s = hashFindVal(raHash, "acc")) == NULL) errAbort("No acc near line %d of %s", lf->lineIx, lf->fileName); rsi->mrnaAcc = cloneString(s); if ((s = hashFindVal(raHash, "siz")) == NULL) errAbort("No siz near line %d of %s", lf->lineIx, lf->fileName); rsi->size = atoi(s); if ((s = hashFindVal(raHash, "gen")) != NULL) rsi->geneName = cloneString(s); //!!!else //!!! warn("No gene name for %s", rsi->mrnaAcc); if ((s = hashFindVal(raHash, "cds")) != NULL) parseCds(s, 0, rsi->size, &rsi->cdsStart, &rsi->cdsEnd); else rsi->cdsEnd = rsi->size; if ((s = hashFindVal(raHash, "ngi")) != NULL) rsi->ngi = atoi(s); rsi->geneNameId = putInNameTable(geneHash, geneTab, rsi->geneName); s = hashFindVal(raHash, "pro"); if (s != NULL) rsi->productName = cloneString(s); rsi->productNameId = putInNameTable(productHash, productTab, s); hashAdd(rsiHash, rsi->mrnaAcc, rsi); freeHashAndVals(&raHash); } lineFileClose(&lf); if (clDots) printf("\n"); /* Scan through loc2ref filling in some gaps in rsi. */ printf("Scanning %s\n", loc2refFile); lf = lineFileOpen(loc2refFile, TRUE); while (lineFileNext(lf, &line, NULL)) { char *mrnaAcc; if (line[0] == '#') continue; wordCount = chopTabs(line, row); if (wordCount < 5) errAbort("Expecting at least 5 tab-separated words line %d of %s", lf->lineIx, lf->fileName); mrnaAcc = row[1]; mrnaAcc = accWithoutSuffix(mrnaAcc); if (mrnaAcc[2] != '_') warn("%s is and odd name %d of %s", mrnaAcc, lf->lineIx, lf->fileName); if ((rsi = hashFindVal(rsiHash, mrnaAcc)) != NULL) { rsi->locusLinkId = lineFileNeedNum(lf, row, 0); rsi->omimId = ptToInt(hashFindVal(loc2mimHash, row[0])); rsi->proteinAcc = cloneString(accWithoutSuffix(row[4])); } } lineFileClose(&lf); /* Report how many seem to be missing from loc2ref file. * Write out knownInfo file. */ printf("Writing %s\n", "refLink.tab"); for (rsi = rsiList; rsi != NULL; rsi = rsi->next) { ++rsiCount; if (rsi->locusLinkId == 0) ++noLocCount; if (rsi->proteinAcc == NULL) ++noProtCount; fprintf(refLinkTab, "%s\t%s\t%s\t%s\t%u\t%u\t%u\t%u\n", emptyForNull(rsi->geneName), emptyForNull(rsi->productName), emptyForNull(rsi->mrnaAcc), emptyForNull(rsi->proteinAcc), rsi->geneNameId, rsi->productNameId, rsi->locusLinkId, rsi->omimId); } if (noLocCount) printf("Missing locusLinkIds for %d of %d\n", noLocCount, rsiCount); if (noProtCount) printf("Missing protein accessions for %d of %d\n", noProtCount, rsiCount); /* Process alignments and write them out as genes. */ lf = pslFileOpen(pslFile); dotMod = 0; while ((psl = pslNext(lf)) != NULL) { if (hashFindVal(rsiHash, psl->qName) != NULL) { if (clDots > 0 && ++dotMod == clDots ) { dotMod = 0; dotOut(); } sqlSafefFrag(cond_str, sizeof cond_str, "extAC='%s'", psl->qName); answer = sqlGetField(proteinDB, "spXref2", "displayID", cond_str); if (answer == NULL) { fprintf(stderr, "%s NOT FOUND.\n", psl->qName); fflush(stderr); } if (answer != NULL) { struct genePred *gp = NULL; exonList = pslToExonList(psl); fprintf(kgTab, "%s\t%s\t%c\t%d\t%d\t", psl->qName, psl->tName, psl->strand[0], psl->tStart, psl->tEnd); rsi = hashMustFindVal(rsiHash, psl->qName); gp = genePredFromPsl(psl, rsi->cdsStart, rsi->cdsEnd, genePredStdInsertMergeSize); if (!gp) errAbort("Cannot convert psl (%s) to genePred.\n", psl->qName); fprintf(kgTab, "%d\t%d\t", gp->cdsStart, gp->cdsEnd); fprintf(kgTab, "%d\t", slCount(exonList)); fflush(kgTab); for (exon = exonList; exon != NULL; exon = exon->next) fprintf(kgTab, "%d,", exon->start); fprintf(kgTab, "\t"); for (exon = exonList; exon != NULL; exon = exon->next) fprintf(kgTab, "%d,", exon->end); fprintf(kgTab, "\n"); slFreeList(&exonList); } } else { fprintf(stderr, "%s found in psl, but not in .fa or .ra data files.\n", psl->qName); fflush(stderr); } } if (clDots) printf("\n"); if (!clTest) { writeSeqTable(pepFile, refPepTab, FALSE, TRUE); writeSeqTable(faFile, refMrnaTab, FALSE, FALSE); } carefulClose(&kgTab); carefulClose(&productTab); carefulClose(&geneTab); carefulClose(&refLinkTab); carefulClose(&refPepTab); carefulClose(&refMrnaTab); if (!clTest) { printf("Loading database with %s\n", kgName); fflush(stdout); hgLoadTabFile(conn, ".", kgName, NULL); printf("Loading database with %s\n", "productName"); fflush(stdout); hgLoadTabFile(conn, ".", "productName", NULL); printf("Loading database with %s\n", "geneName"); fflush(stdout); hgLoadTabFile(conn, ".", "geneName", NULL); printf("Loading database with %s\n", "refLink"); fflush(stdout); hgLoadTabFile(conn, ".", "refLink", NULL); printf("Loading database with %s\n", "refPep"); fflush(stdout); hgLoadTabFile(conn, ".", "refPep", NULL); printf("Loading database with %s\n", "refMrna"); fflush(stdout); hgLoadTabFile(conn, ".", "refMrna", NULL); } }
void txGeneFromBed(char *inBed, char *inPicks, char *ucscFa, char *uniProtFa, char *refPepFa, char *outKg) /* txGeneFromBed - Convert from bed to knownGenes format table (genePred + uniProt ID). */ { /* Load protein sequence into hashes */ struct hash *uniProtHash = faReadAllIntoHash(uniProtFa, dnaUpper); struct hash *ucscProtHash = faReadAllIntoHash(ucscFa, dnaUpper); struct hash *refProtHash =faReadAllIntoHash(refPepFa, dnaUpper); /* Load picks into hash. We don't use cdsPicksLoadAll because empty fields * cause that autoSql-generated routine problems. */ struct hash *pickHash = newHash(18); struct cdsPick *pick; struct lineFile *lf = lineFileOpen(inPicks, TRUE); char *row[CDSPICK_NUM_COLS]; while (lineFileRowTab(lf, row)) { pick = cdsPickLoad(row); hashAdd(pickHash, pick->name, pick); } /* Load in bed */ struct bed *bed, *bedList = bedLoadNAll(inBed, 12); /* Do reformatting and write output. */ FILE *f = mustOpen(outKg, "w"); for (bed = bedList; bed != NULL; bed = bed->next) { char *protAcc = NULL; if (bed->thickStart < bed->thickEnd) { pick = hashMustFindVal(pickHash, bed->name); struct dnaSeq *spSeq = NULL, *uniSeq = NULL, *refPep = NULL, *ucscSeq; ucscSeq = hashMustFindVal(ucscProtHash, bed->name); if (pick->swissProt[0]) spSeq = hashMustFindVal(uniProtHash, pick->swissProt); if (pick->uniProt[0]) uniSeq = hashMustFindVal(uniProtHash, pick->uniProt); if (pick->refProt[0]) refPep = hashMustFindVal(refProtHash, pick->refProt); /* First we look for an exact match between the ucsc protein and * something from swissProt/uniProt. */ if (spSeq != NULL && sameString(ucscSeq->dna, spSeq->dna)) protAcc = pick->swissProt; if (protAcc == NULL && uniSeq != NULL && sameString(ucscSeq->dna, uniSeq->dna)) protAcc = pick->uniProt; if (protAcc == NULL && refPep != NULL && sameString(ucscSeq->dna, refPep->dna)) { protAcc = cloneString(pick->refProt); chopSuffix(protAcc); } if (protAcc == NULL) { if (pick->uniProt[0]) protAcc = pick->uniProt; else { protAcc = cloneString(pick->refProt); chopSuffix(protAcc); } } } outputKg(bed, emptyForNull(protAcc), f); } carefulClose(&f); }