static char *findType(struct hash *cvHash,char **requested,int requestCount, char **queryBy, char **org,boolean silent) /* returns the type that was requested or else the type associated with the term requested */ { struct hashCookie hc = hashFirst(cvHash); struct hashEl *hEl; struct hash *ra; char *type = typeOpt; if (requested != NULL) // if no type, find it from requested terms. Will validate terms match type { // NOTE: Enter here even if there is a type, to confirm the type while ((hEl = hashNext(&hc)) != NULL) // FIXME: This should be using mdbCv APIs to get hashes. { // One per "request[]" ra = (struct hash *)hEl->val; if (sameWord(hashMustFindVal(ra, CV_TYPE),CV_TOT)) // TOT = typeOfTerm continue; char *val = hashFindVal(ra, *queryBy); if (val != NULL) { int ix = stringArrayIx(val,requested,requestCount); if (ix != -1) // found { char *thisType = hashMustFindVal(ra, CV_TYPE); char *thisOrg = hashFindVal(ra, ORGANISM); if (type == NULL) { if (thisOrg != NULL) { *org = strLower(cloneString(thisOrg)); } type = thisType; } else if (differentWord(type,thisType)) { if (sameWord(CV_TERM_CONTROL,type)) type = thisType; else if (differentWord(CV_TERM_CONTROL,thisType)) errAbort("Error: Requested %s of type '%s'. But '%s' has type '%s'\n", *queryBy,type,requested[ix],thisType); } } } } } if (type == NULL && sameWord(*queryBy,CV_TERM)) // Special case of term becoming target { char *queryByTarget = CV_TARGET; type = findType(cvHash,requested,requestCount,&queryByTarget,org,TRUE); // silent here if (type != NULL) *queryBy = queryByTarget; } if (type == NULL && !silent) // Still not type? abort errAbort("Error: Required %s=%s ['%s', '%s', '%s', '%s' or '%s'] argument not found\n", *queryBy,(requested != NULL) ? *requested : "?", CV_TYPE, CV_TERM, CV_TAG, CV_TARGET, CV_LABEL); return normalizeType(type); }
enum wiggleAggregateFunctionEnum wigFetchAggregateFunctionWithCart(struct cart *theCart, struct trackDb *tdb, char *name, char **optString) /****** windowingFunction - Whiskers by default **************************/ { char *Default = wiggleAggregateFunctionEnumToString(wiggleAggregateTransparent); boolean parentLevel = isNameAtParentLevel(tdb,name); char *aggregateFunction = NULL; enum wiggleAggregateFunctionEnum ret; aggregateFunction = cloneString(cartOptionalStringClosestToHome(theCart, tdb, parentLevel, AGGREGATE)); /* If windowingFunction is a string, it came from the cart, otherwise * see if it is specified in the trackDb option, finally * return the default. */ if (!aggregateFunction) { char * tdbDefault = trackDbSettingClosestToHomeOrDefault(tdb, AGGREGATE, Default); freeMem(aggregateFunction); if (differentWord(Default,tdbDefault)) aggregateFunction = cloneString(tdbDefault); else { struct hashEl *hel; /* no aggregateFunction from trackDb, maybe it is in tdb->settings * (custom tracks keep settings here) */ aggregateFunction = cloneString(Default); if ((tdb->settings != (char *)NULL) && (tdb->settingsHash != (struct hash *)NULL)) { if ((hel =hashLookup(tdb->settingsHash, AGGREGATE)) !=NULL) if (differentWord(Default,(char *)hel->val)) { freeMem(aggregateFunction); aggregateFunction = cloneString((char *)hel->val); } } } } if (optString) *optString = cloneString(aggregateFunction); ret = wiggleAggregateFunctionStringToEnum(aggregateFunction); freeMem(aggregateFunction); return(ret); } /* enum wiggleWindowingEnum wigFetchWindowingFunctionWithCart() */
static char *wigCheckBinaryOption(struct trackDb *tdb, char *Default, char *notDefault, char *tdbString, char *secondTdbString) /* A common operation for binary options (two values possible) * check for trackDb.ra, then tdb->settings values * return one of the two possibilities if found * (the tdbString and secondTdbString are a result of * early naming conventions changing over time resulting in * two possible names for the same thing ...) */ { char *tdbDefault = trackDbSettingClosestToHomeOrDefault(tdb, tdbString, "NONE"); char *ret; ret = Default; /* the answer, unless found to be otherwise */ if (sameWord("NONE",tdbDefault) && (secondTdbString != (char *)NULL)) tdbDefault = trackDbSettingClosestToHomeOrDefault(tdb, secondTdbString, "NONE"); if (differentWord("NONE",tdbDefault)) { if (differentWord(Default,tdbDefault)) ret = notDefault; } else { struct hashEl *hel; /* no setting from trackDb, maybe it is in tdb->settings * (custom tracks keep settings here) */ if ((tdb->settings != (char *)NULL) && (tdb->settingsHash != (struct hash *)NULL)) { if ((hel = hashLookup(tdb->settingsHash, tdbString)) != NULL) { if (differentWord(Default,(char *)hel->val)) ret = notDefault; } else if (secondTdbString != (char *)NULL) { if ((hel = hashLookup(tdb->settingsHash, secondTdbString)) != NULL) { if (differentWord(Default,(char *)hel->val)) ret = notDefault; } } } } return(cloneString(ret)); }
static void intersectDataVector(char *table, struct dataVector *dataVector1, struct region *region, struct sqlConnection *conn) /* Perform intersection (if specified) on dataVector. */ { /* If table is type wig (not bedGraph), then intersection has already been * performed on each input (other selected subtracks must be the same type * as table). * Otherwise, handle intersection here. */ if (anyIntersection() && !isWiggle(database, table) && !isBigWigTable(table)) { char *track2 = cartString(cart, hgtaIntersectTrack); char *table2 = cartString(cart, hgtaIntersectTable); if (table2 && differentWord(table2, table)) { struct trackDb *tdb2 = findTrack(track2, fullTrackList); struct trackTable *tt2 = trackTableNew(tdb2, table2, conn); struct dataVector *dataVector2 = dataVectorFetchOneRegion(tt2, region, conn); char *op = cartString(cart, hgtaIntersectOp); boolean dv2IsWiggle = (isWiggle(database, table2) || isBigWigTable(table2) || isBedGraph(table2)); dataVectorIntersect(dataVector1, dataVector2, dv2IsWiggle, sameString(op, "none")); dataVectorFree(&dataVector2); } } }
static char *getDbForGenome(char *genome, struct cart *cart) /* Function to find the default database for the given Genome. It looks in the cart first and then, if that database's Genome matches the passed-in Genome, returns it. If the Genome does not match, it returns the default database that does match that Genome. param Genome - The Genome for which to find a database param cart - The cart to use to first search for a suitable database name return - The database matching this Genome type */ { char *retDb = cartUsualString(cart, dbCgiName, NULL); if ((retDb == NULL) || !hDbExists(retDb)) { retDb = hDefaultDb(); } /* If genomes don't match, then get the default db for that genome */ if (differentWord(genome, hGenome(retDb))) { retDb = hDefaultDbForGenome(genome); } return retDb; }
enum wiggleSmoothingEnum wigFetchSmoothingWindowWithCart(struct cart *theCart, struct trackDb *tdb, char *name, char **optString) /****** smoothingWindow - OFF by default **************************/ { char * Default = wiggleSmoothingEnumToString(wiggleSmoothingOff); boolean parentLevel = isNameAtParentLevel(tdb,name); char * smoothingWindow = NULL; enum wiggleSmoothingEnum ret; smoothingWindow = cloneString(cartOptionalStringClosestToHome(theCart, tdb, parentLevel, SMOOTHINGWINDOW)); if (!smoothingWindow) /* if nothing from the Cart, check trackDb/settings */ { char * tdbDefault = trackDbSettingClosestToHomeOrDefault(tdb, SMOOTHINGWINDOW, Default); if (differentWord(Default,tdbDefault)) smoothingWindow = cloneString(tdbDefault); else { struct hashEl *hel; /* no smoothingWindow from trackDb, maybe it is in tdb->settings * (custom tracks keep settings here) */ smoothingWindow = cloneString(Default); if ((tdb->settings != (char *)NULL) && (tdb->settingsHash != (struct hash *)NULL)) { if ((hel = hashLookup(tdb->settingsHash, SMOOTHINGWINDOW)) != NULL) if (differentWord(Default,(char *)hel->val)) { freeMem(smoothingWindow); smoothingWindow = cloneString((char *)hel->val); } } } } if (optString) *optString = cloneString(smoothingWindow); ret = wiggleSmoothingStringToEnum(smoothingWindow); freeMem(smoothingWindow); return(ret); } /* enum wiggleSmoothingEnum wigFetchSmoothingWindowWithCart() */
char *showTableField(struct trackDb *track, char *varName, boolean useJoiner) /* Show table control and label. */ { struct slName *name, *nameList = NULL; char *selTable; if (track == NULL) nameList = tablesForDb(findSelDb()); else nameList = cartTrackDbTablesForTrack(database, track, useJoiner); /* Get currently selected table. If it isn't in our list * then revert to first in list. */ selTable = cartUsualString(cart, varName, nameList->name); if (!slNameInListUseCase(nameList, selTable)) selTable = nameList->name; /* Print out label and drop-down list. */ hPrintf("<B>table: </B>"); hPrintf("<SELECT NAME=\"%s\" %s>\n", varName, onChangeTable()); struct trackDb *selTdb = NULL; for (name = nameList; name != NULL; name = name->next) { struct trackDb *tdb = NULL; if (track != NULL) tdb = findTdbForTable(database,track,name->name, ctLookupName); hPrintf("<OPTION VALUE=\"%s\"", name->name); // Disable options for related tables that are noGenome -- if a non-positional table // is selected then we output its entire contents. if (cartTrackDbIsNoGenome(database, name->name) && (track == NULL || differentString(track->table, name->name))) hPrintf(" DISABLED"NO_GENOME_CLASS); else if (sameString(selTable, name->name)) { hPrintf(" SELECTED"); selTdb = tdb; } if (tdb != NULL) if ((curTrack == NULL) || differentWord(tdb->shortLabel, curTrack->shortLabel)) hPrintf(">%s (%s)\n", tdb->shortLabel, name->name); else hPrintf(">%s\n", name->name); else hPrintf(">%s\n", name->name); } hPrintf("</SELECT>\n"); if (!trackHubDatabase(database)) { char *restrictDate = encodeRestrictionDateDisplay(database,selTdb); if (restrictDate) { hPrintf("<A HREF=\'%s\' TARGET=BLANK>restricted until:</A> %s", ENCODE_DATA_RELEASE_POLICY, restrictDate); freeMem(restrictDate); } } return selTable; }
void chainMethods(struct track *tg, struct trackDb *tdb, int wordCount, char *words[]) /* Fill in custom parts of alignment chains. */ { struct cartOptions *chainCart; AllocVar(chainCart); boolean normScoreAvailable = chainDbNormScoreAvailable(tdb); /* what does the cart say about coloring option */ chainCart->chainColor = chainFetchColorOption(cart, tdb, FALSE); chainCart->scoreFilter = cartUsualIntClosestToHome(cart, tdb, FALSE, SCORE_FILTER, 0); linkedFeaturesMethods(tg); tg->itemColor = lfChromColor; /* default coloring option */ /* if normScore column is available, then allow coloring */ if (normScoreAvailable) { switch (chainCart->chainColor) { case (chainColorScoreColors): tg->itemColor = chainScoreColor; tg->colorShades = shadesOfGray; break; case (chainColorNoColors): setNoColor(tg); break; default: case (chainColorChromColors): break; } } else { char *optionStr; /* this old option was broken before */ optionStr = cartUsualStringClosestToHome(cart, tdb, FALSE, "color", "on"); if (differentWord("on",optionStr)) { setNoColor(tg); chainCart->chainColor = chainColorNoColors; } else chainCart->chainColor = chainColorChromColors; } tg->loadItems = chainLoadItems; tg->drawItems = chainDraw; tg->mapItemName = lfMapNameFromExtra; tg->subType = lfSubChain; tg->extraUiData = (void *) chainCart; }
int findChromIx(char *name) { int i; for (i=0; i<chromCount; ++i) { if (!differentWord(chromNames[i], name)) return i; } errAbort("Unknown chromosome %s", name); return -1; }
static void gapSanityCheck(struct agpGap *gapList) { int prevEnd = 0; int prevStart = 0; char *prevChr = NULL; char *prevType = NULL; struct agpGap *gap; for (gap = gapList; gap; gap = gap->next) { int chrSize = hashIntVal(cInfoHash, gap->chrom); if (gap->chromStart < 0) verbose(1, "WARNING: gap chromStart < 0 at %s:%d-%d\n", gap->chrom, gap->chromStart, gap->chromEnd); if (gap->chromEnd > chrSize) verbose(1, "WARNING: gap chromEnd > chromSize(%d) " "at %s:%d-%d\n", chrSize, gap->chrom, gap->chromStart, gap->chromEnd); if (gap->chromEnd == chrSize && differentString(gap->type, "telomere")) verbose(1, "WARNING: gap at end of chromosome not telomere " "at %s:%d-%d, type: %s\n", gap->chrom, gap->chromStart, gap->chromEnd, gap->type); if (gap->chromStart >= gap->chromEnd) verbose(1, "WARNING: gap chromStart >= chromEnd at %s:%d-%d\n", gap->chrom, gap->chromStart, gap->chromEnd); if (prevEnd > 0) { if (sameWord(prevChr, gap->chrom) && (prevEnd >= gap->chromStart)) verbose(1,"WARNING: overlapping gap at " "%s:%d-%d(%s) and %s:%d-%d(%s)\n", gap->chrom, prevStart, prevEnd, prevType, gap->chrom, gap->chromStart, gap->chromEnd, gap->type); } else { prevStart = gap->chromStart; prevEnd = gap->chromEnd; prevType = gap->type; } if (isNotEmpty(prevChr)) { if (differentWord(prevChr, gap->chrom)) { freeMem(prevChr); prevChr = cloneString(gap->chrom); } } else prevChr = cloneString(gap->chrom); prevStart = gap->chromStart; prevEnd = gap->chromEnd; } }
struct gffGene *gffFindGeneIgnoreCase(struct gff *gff, char *geneName) /* Find gene with given name. Not case sensitive. */ { struct gffGene *g; for (g=gff->genes; g!=NULL; g=g->next) { if (differentWord(geneName, g->name) == 0) return g; } return NULL; }
struct hash *hashPsls(char *pslFileName) { struct psl *pslList = NULL, *psl = NULL, *pslSubList = NULL, *pslNext = NULL; struct hash *pslHash = newHash(15); char *last = NULL; char key[128]; char *tmp = NULL; pslList = pslLoadAll(pslFileName); /* Fix psl names */ for(psl = pslList; psl != NULL; psl = psl->next) { tmp = strrchr(psl->qName, ';'); *tmp = '\0'; tmp = strstr(psl->qName,prefix); assert(tmp); /* checks if there are 2 occurrences of ":" in probe name as in full name */ /* if probe name is shortened to fit in the seq table, there is only 1 ":"*/ /* e.g. full: consensus:HG-U133A:212933_x_at; short:HG-U133A:212933_x_at;*/ if (countChars(psl->qName, *prefix) == 2) { tmp = strstr(tmp+1,prefix); assert(tmp); } tmp = tmp + strlen(prefix); safef(psl->qName, strlen(psl->qName), "%s", tmp); } /* Sort based on query name. */ slSort(&pslList, pslCmpQuery); /* For each psl, if it is has the same query name add it to the sublist. Otherwise store the sublist in the hash and start another. */ for(psl = pslList; psl != NULL; psl = pslNext) { pslNext = psl->next; if(last != NULL && differentWord(last, psl->qName)) { hashAddUnique(pslHash, last, pslSubList); pslSubList = NULL; } slAddTail(&pslSubList, psl); last = psl->qName; } /* Add the last sublist */ hashAddUnique(pslHash, last, pslSubList); return pslHash; }
static void wikiTrackMapItem(struct track *tg, struct hvGfx *hvg, void *item, char *itemName, char *mapItemName, int start, int end, int x, int y, int width, int height) /* create a special map box item with different i=hgcClickName and * pop-up statusLine with the item name */ { char *userName; /* already been determined to be enabled by getting here, need to verify * userName vs editors and vs owner */ (void) wikiTrackEnabled(database, &userName); char *hgcClickName = tg->mapItemName(tg, item); char *statusLine = tg->itemName(tg, item); boolean editor = isWikiEditor(userName); struct wikiTrack *wikiItem = NULL; boolean enableHgcClick = FALSE; /* allow hgc click (i.e. delete privs) if the following are true 1. this is the item 0 "add new item" 2. logged into the wiki 3. user is an editor or user is the owner */ if (differentWord("0", hgcClickName)) wikiItem = findWikiItemId(hgcClickName); else enableHgcClick = TRUE; /* item 0 "add new item" must go to hgc */ if (wikiItem) { if (isNotEmpty(userName) && sameWord(userName, wikiItem->owner)) enableHgcClick = TRUE; /* owner has delete privls */ if (editor) enableHgcClick = TRUE; /* editors have delete privls */ } if (enableHgcClick) { mapBoxHgcOrHgGene(hvg, start, end, x, y, width, height, tg->track, hgcClickName, statusLine, NULL, FALSE, NULL ); } else { /* go directly to the wiki description */ char *directUrl = wikiUrl(wikiItem); mapBoxHgcOrHgGene(hvg, start, end, x, y, width, height, tg->track, hgcClickName, statusLine, directUrl, FALSE, NULL); freeMem(directUrl); } }
void wigFetchYLineMarkValueWithCart(struct cart *theCart,struct trackDb *tdb, char *name, double *tDbYMark ) /* y= marker line value * User requested value is defined in the cart * A Default value can be defined as * yLineMark declaration from trackDb *****************************************************************************/ { boolean parentLevel = isNameAtParentLevel(tdb,name); char *yLineMarkValue = NULL; /* string from cart */ double yLineValue; /* value from cart or trackDb */ char * tdbDefault = cloneString( trackDbSettingClosestToHomeOrDefault(tdb, YLINEMARK, "NONE") ); if (sameWord("NONE",tdbDefault)) { struct hashEl *hel; /* no yLineMark from trackDb, maybe it is in tdb->settings * (custom tracks keep settings here) */ if ((tdb->settings != (char *)NULL) && (tdb->settingsHash != (struct hash *)NULL)) { if ((hel = hashLookup(tdb->settingsHash, YLINEMARK)) != NULL) { freeMem(tdbDefault); tdbDefault = cloneString((char *)hel->val); } } } /* If nothing else, it is zero */ yLineValue = 0.0; /* Let's see if a value is available in the cart */ yLineMarkValue = cartOptionalStringClosestToHome(theCart, tdb, parentLevel, YLINEMARK); /* if yLineMarkValue is non-Null, it is the requested value */ if (yLineMarkValue) yLineValue = atof(yLineMarkValue); else /* See if a default line is specified in the trackDb.ra file */ if (differentWord("NONE",tdbDefault)) yLineValue = atof(tdbDefault); /* If possible to return */ if (tDbYMark) *tDbYMark = yLineValue; freeMem(tdbDefault); } /* void wigFetchYLineMarkValueWithCart() */
static int linkCmpPriority(const void *va, const void *vb) /* Compare to sort links based on priority. */ { const struct link *a = *((struct link **)va); const struct link *b = *((struct link **)vb); float dif = a->priority - b->priority; if (dif < 0) return -1; else if (dif > 0) return 1; else { return differentWord(b->shortLabel, a->shortLabel); } }
/****** itemRgb - not on by default **************************/ boolean bedItemRgb(struct trackDb *tdb) { char *Default="Off"; /* anything different than this will turn it on */ char *tdbDefault = (char *)NULL; if (tdb) tdbDefault = trackDbSettingClosestToHome(tdb, OPT_ITEM_RGB); if (tdbDefault) { if (differentWord(Default,tdbDefault)) return TRUE; } return FALSE; } /* boolean bedItemRgb(struct trackDb *tdb) */
static void initialPlacement(struct chrGapList *bounding, struct bed *placed) /* simply place the items from the 'placed' list into the gaps * defined by the 'bounding' list, the locations of the 'placed' * items are what they say they are in their bed structure. */ { struct bed *bedEl; int unplacedCount = 0; for (bedEl = placed; bedEl != NULL; bedEl = bedEl->next) { struct chrGapList *cl; boolean placedOK = FALSE; for (cl = bounding; cl != NULL; cl = cl->next) { struct gap *gl; if (differentWord(cl->chrom, bedEl->chrom)) continue; for (gl = cl->gList; gl != NULL; gl = gl->next) { if ( ((gl->downstream != NULL) && (gl->upstream != NULL)) && ((bedEl->chromEnd <= gl->downstream->chromStart) && (bedEl->chromStart >= gl->upstream->chromEnd)) ) { placeItem(bedEl, gl); placedOK = TRUE; verbose(5,"item: %s:%d-%d, gap: %s:%d-%d\n", bedEl->chrom, bedEl->chromStart, bedEl->chromEnd, bedEl->chrom, gl->upstream->chromEnd, gl->downstream->chromStart); break; } } } if (! placedOK) { verbose(3,"can not place item: %s:%d-%d\n", bedEl->chrom, bedEl->chromStart, bedEl->chromEnd); ++unplacedCount; } } if (unplacedCount) verbose(2,"Could not place %d items\n", unplacedCount); }
static void linksPrint(struct section *section, struct sqlConnection *conn, char *geneId) /* Print the links section. */ { int maxPerRow = 6, itemPos = 0; int rowIx = 0; struct link *link, *linkList = section->items; webPrintLinkTableStart(); printGenomicSeqLink(conn, geneId, curGeneChrom, curGeneStart, curGeneEnd); printMrnaSeqLink(conn,geneId); printProteinSeqLink(conn,geneId); hPrintf("</TR>\n<TR>"); ++rowIx; for (link = linkList; link != NULL; link = link->next) { char *url = linkGetUrl(link, conn, geneId); if (url != NULL) { boolean fakeOut = link->useHgsid && differentWord(link->name,"wikiTrack"); char *target = (fakeOut ? "" : " TARGET=_blank"); if (++itemPos > maxPerRow) { hPrintf("</TR>\n<TR>"); itemPos = 1; ++rowIx; } if (fakeOut) webPrintLinkCellStart(); else webPrintLinkOutCellStart(); hPrintf("<A HREF=\"%s\"%s class=\"toc\">", url, target); hPrintf("%s", link->shortLabel); hPrintf("</A>"); webPrintLinkCellEnd(); freez(&url); } } webFinishPartialLinkOutTable(rowIx, itemPos, maxPerRow); webPrintLinkTableEnd(); }
void readPrimerInfo(struct lineFile *sf) /* Read in primer info from all.primers file */ { int wordCount; char *words[5]; char *dist1, *dist[2]; struct sts *sts; stsHash = newHash(16); while (lineFileChopCharNext(sf, '\t', words, 5)) { verbose(2, "# line %d words1-4: '%s' '%s' '%s' '%s'\n", sf->lineIx, words[1], words[2], words[3], words[4]); if (words[1] && words[2] && words[3] && words[4]) { AllocVar(sts); sts->dbstsId = cloneString(words[0]); sts->leftPrimer = cloneString(words[1]); sts->rightPrimer = cloneString(words[2]); sts->size = cloneString(words[3]); sts->ucscId = cloneString(words[4]); sts->found = FALSE; dist1 = cloneString(words[3]); if (sts->leftPrimer && dist1 && differentWord("-", dist1)) { wordCount = chopByChar(dist1, '-', dist, ArraySize(dist)); sts->minSize = sqlUnsigned(dist[0]); if (wordCount == 1) sts->maxSize = sqlUnsigned(dist[0]); else sts->maxSize = sqlUnsigned(dist[1]); if (sts->maxSize == 0) sts->maxSize = 1000; sts->next = NULL; sts->place = NULL; sts->epcr = NULL; hashAdd(stsHash, sts->dbstsId, sts); } slAddHead(&stsList, sts); } } }
static void synonymPrint(struct section *section, struct sqlConnection *conn, char *id) /* Print out SwissProt comments - looking up typeId/commentVal. */ { char *protAcc = getSwissProtAcc(conn, spConn, id); char *spDisplayId; char *refSeqAcc = ""; char *mrnaAcc = ""; char *oldDisplayId; char condStr[255]; char *kgProteinID; char *parAcc; /* parent accession of a variant splice protein */ char *chp; if (isRgdGene(conn)) { rgdGene2SynonymPrint(section,conn, id); return; } if (sqlTablesExist(conn, "kgAlias")) printAlias(id, conn); if (sameWord(genome, "Zebrafish")) { char *xrefTable = "ensXRefZfish"; char *geneIdCol = "ensGeneId"; /* get Gene Symbol and RefSeq accession from Zebrafish-specific */ /* cross-reference table */ printGeneSymbol(id, xrefTable, geneIdCol, conn); refSeqAcc = getRefSeqAcc(id, xrefTable, geneIdCol, conn); hPrintf("<B>ENSEMBL ID:</B> %s", id); } else { char query[256]; char *toRefTable = genomeOptionalSetting("knownToRef"); if (toRefTable != NULL && sqlTableExists(conn, toRefTable)) { safef(query, sizeof(query), "select value from %s where name='%s'", toRefTable, id); refSeqAcc = emptyForNull(sqlQuickString(conn, query)); } if (sqlTableExists(conn, "kgXref")) { safef(query, sizeof(query), "select mRNA from kgXref where kgID='%s'", id); mrnaAcc = emptyForNull(sqlQuickString(conn, query)); } if (sameWord(genome, "C. elegans")) hPrintf("<B>WormBase ID:</B> %s<BR>", id); else hPrintf("<B>UCSC ID:</B> %s<BR>", id); } if (refSeqAcc[0] != 0) { hPrintf("<B>RefSeq Accession: </B> <A HREF=\""); printOurRefseqUrl(stdout, refSeqAcc); hPrintf("\">%s</A><BR>\n", refSeqAcc); } else if (mrnaAcc[0] != 0) { safef(condStr, sizeof(condStr), "acc = '%s'", mrnaAcc); if (sqlGetField(database, "gbCdnaInfo", "acc", condStr) != NULL) { hPrintf("<B>Representative RNA: </B> <A HREF=\""); printOurMrnaUrl(stdout, mrnaAcc); hPrintf("\">%s</A><BR>\n", mrnaAcc); } else /* do not show URL link if it is not found in gbCdnaInfo */ { hPrintf("<B>Representative RNA: %s </B>", mrnaAcc); } } if (protAcc != NULL) { kgProteinID = cloneString(""); if (hTableExists(sqlGetDatabase(conn), "knownGene") && (isNotEmpty(cartOptionalString(cart, hggChrom)) && differentWord(cartOptionalString(cart, hggChrom),"none"))) { safef(condStr, sizeof(condStr), "name = '%s' and chrom = '%s' and txStart=%s and txEnd=%s", id, cartOptionalString(cart, hggChrom), cartOptionalString(cart, hggStart), cartOptionalString(cart, hggEnd)); kgProteinID = sqlGetField(database, "knownGene", "proteinID", condStr); } hPrintf("<B>Protein: "); if (strstr(kgProteinID, "-") != NULL) { parAcc = cloneString(kgProteinID); chp = strstr(parAcc, "-"); *chp = '\0'; /* show variant splice protein and the UniProt link here */ hPrintf("<A HREF=\"http://www.uniprot.org/uniprot%s\" " "TARGET=_blank>%s</A></B>, splice isoform of ", kgProteinID, kgProteinID); hPrintf("<A HREF=\"http://www.uniprot.org/uniprot/%s\" " "TARGET=_blank>%s</A></B>\n", parAcc, parAcc); } else { hPrintf("<A HREF=\"http://www.uniprot.org/uniprot/%s\" " "TARGET=_blank>%s</A></B>\n", protAcc, protAcc); } /* show SWISS-PROT display ID if it is different than the accession ID */ /* but, if display name is like: Q03399 | Q03399_HUMAN, then don't show display name */ spDisplayId = spAnyAccToId(spConn, protAcc); if (spDisplayId == NULL) { errAbort("<br>%s seems to no longer be a valid protein ID in our latest UniProtKB DB.", protAcc); } if (strstr(spDisplayId, protAcc) == NULL) { hPrintf(" (aka %s", spDisplayId); /* show once if the new and old displayId are the same */ oldDisplayId = oldSpDisplayId(spDisplayId); if (oldDisplayId != NULL) { if (!sameWord(spDisplayId, oldDisplayId) && !sameWord(protAcc, oldDisplayId)) { hPrintf(" or %s", oldDisplayId); } } hPrintf(")<BR>\n"); } } printCcds(id, conn); }
void analyse(int start, int stop) { struct hash *hash; char line[512]; int lineCount = 0; char *words[32]; int wordCount; struct cdnaInfo *cdnaList = NULL; struct cdnaInfo *ci = NULL; int cdnaCount; int maxCdnaCount = stop - start; cdnaCount = 1; if (start > 1) { for (;;) { if (!fgets(line, sizeof(line), inFile)) errAbort("Not %d cDNAs in file, only %d\n", start, cdnaCount); ++lineCount; if (line[0] == '#') /* Skip comments. */ continue; wordCount = chopString(line, whiteSpaceChopper, words, ArraySize(words)); if (wordCount <= 0) /* Skip empty lines. */ continue; if (!differentWord(words[1], "alignments")) { ++cdnaCount; if (cdnaCount >= start) break; } } } cdnaCount = 0; hash = newHash(14); /* Hash table with 16k entries. */ for (;;) { if (!fgets(line, sizeof(line), inFile)) break; ++lineCount; if (line[0] == '#') /* Skip comments. */ continue; wordCount = chopString(line, whiteSpaceChopper, words, ArraySize(words)); if (wordCount <= 0) /* Skip empty lines. */ continue; if (wordCount < 4) /* Everyone else has at least four words. */ { errAbort("Short line %d:\n", lineCount); } if (sameWord(words[1], "Blasting")) { char *cdnaName = words[2]; if ((ci = lookupInfo(hash, cdnaName)) == NULL) { struct hashEl *hel; ci = needMem(sizeof(*ci)); hel = hashAdd(hash, cdnaName, ci); ci->next = cdnaList; cdnaList = ci; ci->ix = atoi(words[0]); ci->name = hel->name; } } else if (sameWord(words[2], "hits")) { /* Newer style - includes cDNA matching range. */ if (ci == NULL) continue; hitLine(ci, lineCount, words[0], words[1], words[3], words[4], words[5], words[9]); } else if (sameWord(words[1], "hits")) /* Older style - no cDNA matching range. */ { if (ci == NULL) continue; hitLine(ci, lineCount, words[0], NULL, words[2], words[3], words[4], words[8]); } else if (sameWord(words[1], "alignments")) { struct dnaSeq *cdnaSeq; struct wormCdnaInfo info; if (ci == NULL) continue; if (differentWord(ci->name, words[3])) errAbort("Line %d - %s is not %s", lineCount, words[3], ci->name); if (!ci->finished) { if (!anyCdnaSeq(ci->name, &cdnaSeq, &info)) { warn("Can't find cDNA %s", ci->name); ci->isDupe = TRUE; } else { ci->baseCount = cdnaSeq->size; ci->baseCrc = dnaCrc(cdnaSeq->dna, cdnaSeq->size); slReverse(&ci->roughAli); ci->roughScore = bestRoughScore(ci->roughAli); filterDupeCdna(ci, cdnaSeq); ci->isBackwards = (info.orientation == '-'); refineAlis(ci, cdnaSeq); ci->fineScore = bestFineScore(ci->fineAli); ci->isEmbryonic = info.isEmbryonic; ci->finished = TRUE; freeDnaSeq(&cdnaSeq); ++cdnaCount; if (cdnaCount >= maxCdnaCount) break; } } } else { errAbort("Can't deal with line %d\n", lineCount); } } slReverse(&cdnaList); doGoodBad(cdnaList); doUnusual(cdnaList); //makeCdnaToGene(cdnaList); /* Clean up. */ /* These two are slow and not really necessary. */ #ifdef FASTIDIOUS slFreeList(&cdnaList); freeHash(&hash); #endif uglyf("Done analyse\n"); }
void bedItemOverlapCount(struct hash *chromHash, char *infile, char *outfile){ unsigned maxChromSize = 0; unitSize *counts = (unitSize *)NULL; FILE *f = mustOpen(outfile, "w"); struct hashCookie hc = hashFirst(chromHash); struct hashEl *hel; while( (hel = hashNext(&hc)) != NULL) { unsigned num = (unsigned) ptToInt(hel->val); maxChromSize = max(num, maxChromSize); } verbose(2,"#\tmaxChromSize: %u\n", maxChromSize); if (maxChromSize < 1) errAbort("maxChromSize is zero ?"); /* Allocate just once for the largest chrom and reuse this array */ counts = needHugeMem(sizeof(unitSize) * maxChromSize); /* Reset the array to be zero to be reused */ memset((void *)counts, 0, sizeof(unitSize)*(size_t)maxChromSize); unsigned chromSize = 0; char *prevChrom = (char *)NULL; boolean outputToDo = FALSE; struct hash *seenHash = newHash(5); struct lineFile *bf = lineFileOpen(infile , TRUE); struct bed *bed = (struct bed *)NULL; char *row[12]; int numFields = doBed12 ? 12 : 3; while (lineFileNextRow(bf,row, numFields)) { int i; bed = bedLoadN(row, numFields); verbose(3,"#\t%s\t%d\t%d\n",bed->chrom,bed->chromStart, bed->chromEnd); if (prevChrom && differentWord(bed->chrom,prevChrom)) // End a chr { verbose(2,"#\tchrom %s done, size %d\n", prevChrom, chromSize); if (outputToDo) outputCounts(counts, prevChrom, chromSize, f); outputToDo = FALSE; memset((void *)counts, 0, sizeof(unitSize)*(size_t)maxChromSize); /* zero counts */ freez(&prevChrom); // prevChrom is now NULL so it will be caught by next if! } if ((char *)NULL == prevChrom) // begin a chr { if (hashLookup(seenHash, bed->chrom)) errAbort("ERROR:input file not sorted. %s seen before on line %d\n", bed->chrom, bf->lineIx); hashAdd(seenHash, bed->chrom, NULL); prevChrom = cloneString(bed->chrom); chromSize = hashIntVal(chromHash, prevChrom); verbose(2,"#\tchrom %s starting, size %d\n", prevChrom,chromSize); } if (bed->chromEnd > chromSize) { // check for circular chrM if (doBed12 || bed->chromStart>=chromSize || differentWord(bed->chrom,"chrM")) { warn("ERROR: %s\t%d\t%d", bed->chrom, bed->chromStart, bed->chromEnd); errAbort("chromEnd > chromSize ? %d > %d", bed->chromEnd,chromSize); } for (i = bed->chromStart; i < chromSize; ++i) INCWOVERFLOW(counts,i); for (i = 0; i < (bed->chromEnd - chromSize); ++i) INCWOVERFLOW(counts,i); } else if (doBed12) { int *starts = bed->chromStarts; int *sizes = bed->blockSizes; int *endStarts = &bed->chromStarts[bed->blockCount]; for(; starts < endStarts; starts++, sizes++) { unsigned int end = *starts + *sizes + bed->chromStart; for (i = *starts + bed->chromStart; i < end; ++i) INCWOVERFLOW(counts,i); } } else { for (i = bed->chromStart; i < bed->chromEnd; ++i) INCWOVERFLOW(counts, i); } outputToDo = TRUE; bedFree(&bed); // plug the memory leak } lineFileClose(&bf); // Note, next file could be on same chr! if (outputToDo) outputCounts(counts, prevChrom, chromSize, f); if (doOutBounds) fprintf(stderr, "min %lu max %lu\n", (unsigned long)overMin, (unsigned long)overMax); verbose(2,"#\tchrom %s done, size %d\n", prevChrom, chromSize); carefulClose(&f); freeMem(counts); freez(&prevChrom); // hashFreeWithVals(&chromHash, freez); freeHash(&seenHash); }
void hitLine(struct cdnaInfo *ci, int lineCount, char *cdnaName, char *cdnaRange, char *mitoChrom, char *chromName, char *genomeRange, char *scoreString) /* Process line with hit info. */ { char *s; struct roughAli *ra; if (ci == NULL || differentWord(cdnaName, ci->name)) { errAbort("Got hit line %d without corresponding blasting for cDNA %s", lineCount, cdnaName); } /* Allocate structure to hold data for line. */ ra = needMem(sizeof(*ra)); /* Grab range of bases covered in cDNA */ if (cdnaRange != NULL) { s = cdnaRange; ra->cStart = atoi(s); s = strchr(s, '-')+1; ra->cEnd = atoi(s)+1; if (ra->cStart >= ra->cEnd) { int temp; warn("%s line %d cdna %s: cStart %d > cEnd %d. Swapping.", cdnaName, lineCount, cdnaName, ra->cStart, ra->cEnd); addRedoHash(ci, "swappedCdna"); temp = ra->cStart; ra->cStart = ra->cEnd; ra->cEnd = ra->cStart; } } /* Grab range of bases covered in genome. */ s = genomeRange; ra->gStart = atoi(s); s = strchr(s, '-')+1; ra->gEnd = atoi(s)+1; if (ra->gStart >= ra->gEnd) { int temp; warn("%s line %d cdna %s: gStart %d > gEnd %d. Swapping.", cdnaName, lineCount, cdnaName, ra->gStart, ra->gEnd); addRedoHash(ci, "swappedGenome"); temp = ra->gStart; ra->gStart = ra->gEnd; ra->gEnd = ra->gStart; } /* Figure out chromosome it's on. */ if (!differentWord(mitoChrom, "Mitochondrial")) ra->chromIx = 6; else { int i; for (i=0; i<chromCount; ++i) { if (!differentWord(chromName, chromNames[i])) { ra->chromIx = i; break; } } } /* Get alignment score. */ ra->score = atoi(scoreString); /* Hang it on list. */ ra->next = ci->roughAli; ci->roughAli = ra; }
boolean viewableOutput(char *output) /* Returns TRUE if this is a viewable output type */ { return differentWord(output, "reads"); }
boolean gffReadGenes(struct gff *gff) /* Read all the gene (as opposed to base) info in file. */ { int wordCount; struct gffSegLine seg; char curGroup[128]; struct gffGene *gene = NULL; GffIntron *intron = NULL; GffExon *exon = NULL; boolean warnedUnknown = FALSE; boolean isNewGene; curGroup[0] = 0; /* Start off with no group */ /* Line scanning loop. */ for (;;) { /* Get next line and parse it into segLine data structure. */ if (!_gffGetLine(gff)) break; /* End of file. */ if (gff->buf[0] == '#') continue; /* Ignore sharp containing lines. */ wordCount = gffSegLineScan(gff, &seg); if (wordCount < 9) continue; /* Ignore blank lines and short ones. */ /* Make sure that start is less than or equal end. */ if (seg.start > seg.end) { warn("start greater than end line %d of %s.\n", gff->lineNumber, gff->fileName); return FALSE; } /* Get the gene we're working on. First see if * it's the same as last time around. */ isNewGene = FALSE; if (strcmp(seg.group, curGroup) != 0) { strcpy(curGroup, seg.group); if ((gene = gffFindGene(gff, seg.group)) == NULL) { /* It's a new gene! */ if (!checkWordCount(gff, wordCount)) return FALSE; isNewGene = TRUE; gene = gffNeedMem(gff, sizeof(*gene)); strcpy(gene->name, seg.group); slAddTail(&gff->genes, gene); gene->strand = seg.strand[0]; gene->frame = atoi(seg.frame); if (differentWord(seg.feature, "CDS") == 0) { gene->start = seg.start-1; gene->end = seg.end-1; } } } /* Look at what sort of feature it is, and decide what to do. */ if (differentWord(seg.feature, "CDS")==0) { /* CDS (coding segments) have been processed already * for the most part. Here just make sure they aren't * duplicated. */ if (!checkWordCount(gff, wordCount)) return FALSE; if (!isNewGene) { if (gene->start != 0 || gene->end != 0) { warn("Warning duplicate CDS for %s\n", seg.group); warn("Line %d of %s\n", gff->lineNumber, gff->fileName); } } } else if (differentWord(seg.feature, "SE") == 0 || differentWord(seg.feature, "IE") == 0 || differentWord(seg.feature, "FE") == 0 || differentWord(seg.feature, "E") == 0 || differentWord(seg.feature, "exon") == 0) { /* It's some sort of exon. We'll deal with the complications * of it being possibly on the minus strand later, so can * tread initial, final, single, and regular exons the same * here. */ if (!checkWordCount(gff, wordCount)) return FALSE; exon = gffNeedMem(gff, sizeof(*exon)); exon->start = seg.start-1; exon->end = seg.end-1; exon->frame = atoi(seg.frame); gffSegmentInsertSort(&gene->exons, exon); } else if (differentWord(seg.feature, "I") == 0 || differentWord(seg.feature, "intron") == 0) { /* It's an intron. */ if (!checkWordCount(gff, wordCount)) return FALSE; intron = gffNeedMem(gff, sizeof(*intron)); intron->start = seg.start-1; intron->end = seg.end-1; intron->frame = atoi(seg.frame); gffSegmentInsertSort(&gene->introns, intron); } else if (strcmp(seg.feature, "IG") == 0) { /* I don't know what it is, but we can ignore it. */ } else { if (!warnedUnknown) { warn("Unknown feature %s line %d of %s, ignoring\n", seg.feature, gff->lineNumber, gff->fileName); warnedUnknown = TRUE; } } } /* Fix up gene length from exons if needed. */ for (gene = gff->genes; gene != NULL; gene = gene->next) { if (gene->start >= gene->end) { offsetsFromExons(gene); } } return TRUE; }
static void gapToLift(char *db, char *outFile) /* gapToLift - create lift file from gap table(s). */ { FILE *out = mustOpen(outFile, "w"); struct sqlConnection *conn = sqlConnect(db); struct chromInfo *cInfoList = loadChromInfo(conn); struct agpGap *gapList = loadAllGaps(conn, db, cInfoList); struct agpGap *gap; int start = 0; int end = 0; char *prevChr = NULL; int liftCount = 0; int chrSize = 0; static struct hash *chrDone = NULL; chrDone = newHash(0); if (isNotEmpty(bedFileName)) { bedFile = mustOpen(bedFileName, "w"); verbose(2,"#\tbed output requested to %s\n", bedFileName); } for (gap = gapList; gap; gap = gap->next) { verbose(3,"#\t%s\t%d\t%d\t%s\n", gap->chrom, gap->chromStart, gap->chromEnd, gap->bridge); if (prevChr && sameWord(prevChr, gap->chrom)) { /* continuing same segment, check for gap break, * or gap at end of chrom */ if (sameWord("no",gap->bridge) || (gap->chromEnd == chrSize)) { end = gap->chromStart; liftCount = liftOutLine(out, gap->chrom, start, end, liftCount, chrSize); start = gap->chromEnd; end = start; } else end = gap->chromEnd; } else /* new chrom encountered */ { /* output last segment of previous chrom when necessary */ if (prevChr && differentWord(prevChr, gap->chrom)) { if (end < chrSize) liftCount = liftOutLine(out, prevChr, start, chrSize, liftCount, chrSize); } liftCount = 0; chrSize = hashIntVal(cInfoHash, gap->chrom); hashAddInt(chrDone, gap->chrom, 1); if (gap->chromStart > 0) { /* starting first segment at position 0 */ start = 0; end = gap->chromStart; /* does the first gap break it ? Or gap goes to end of chrom. */ if (sameWord("no",gap->bridge) || (gap->chromEnd == chrSize)) { liftCount = liftOutLine(out, gap->chrom, start, end, liftCount, chrSize); start = gap->chromEnd; end = start; } } else /* first gap is actually the beginning of the chrom */ { /* thus, first segment starts after this first gap */ start = gap->chromEnd; end = start; } } prevChr = gap->chrom; /* remember prev chrom to detect next chrom */ } /* potentially a last one */ if (end < chrSize) liftCount = liftOutLine(out, prevChr, start, chrSize, liftCount, chrSize); /* check that all chroms have been used */ struct hashCookie cookie = hashFirst(cInfoHash); struct hashEl *hel; while ((hel = hashNext(&cookie)) != NULL) { if (NULL == hashLookup(chrDone, hel->name)) { chrSize = hashIntVal(cInfoHash, hel->name); verbose(2, "#\tno gaps on chrom: %s, size: %d\n", hel->name, chrSize); liftCount = liftOutLine(out, hel->name, 0, chrSize, 0, chrSize); } } carefulClose(&out); sqlDisconnect(&conn); }
void getDbGenomeClade(struct cart *cart, char **retDb, char **retGenome, char **retClade, struct hash *oldVars) /* Examine CGI and cart variables to determine which db, genome, or clade * has been selected, and then adjust as necessary so that all three are * consistent. Detect changes and reset db-specific cart variables. * Save db, genome and clade in the cart so it will be consistent hereafter. * The order of preference here is as follows: * If we got a request that explicitly names the db, that takes * highest priority, and we synch the organism to that db. * If we get a cgi request for a specific organism then we use that * organism to choose the DB. If just clade, go from there. * In the cart only, we use the same order of preference. * If someone requests an Genome we try to give them the same db as * was in their cart, unless the Genome doesn't match. */ { boolean gotClade = hGotClade(); *retDb = cgiOptionalString(dbCgiName); *retGenome = cgiOptionalString(orgCgiName); *retClade = cgiOptionalString(cladeCgiName); /* phoneHome business */ phoneHome(); /* Was the database passed in as a cgi param? * If so, it takes precedence and determines the genome. */ if (*retDb && hDbExists(*retDb)) { *retGenome = hGenome(*retDb); } /* If no db was passed in as a cgi param then was the organism (a.k.a. genome) * passed in as a cgi param? * If so, the we use the proper database for that genome. */ else if (*retGenome && !sameWord(*retGenome, "0")) { *retDb = getDbForGenome(*retGenome, cart); *retGenome = hGenome(*retDb); } else if (*retClade && gotClade) { *retGenome = hDefaultGenomeForClade(*retClade); *retDb = getDbForGenome(*retGenome, cart); } /* If no cgi params passed in then we need to inspect the session */ else { *retDb = cartOptionalString(cart, dbCgiName); *retGenome = cartOptionalString(cart, orgCgiName); *retClade = cartOptionalString(cart, cladeCgiName); /* If there was a db found in the session that determines everything. */ if (*retDb && hDbExists(*retDb)) { *retGenome = hGenome(*retDb); } else if (*retGenome && !sameWord(*retGenome, "0")) { *retDb = hDefaultDbForGenome(*retGenome); } else if (*retClade && gotClade) { *retGenome = hDefaultGenomeForClade(*retClade); *retDb = getDbForGenome(*retGenome, cart); } /* If no organism in the session then get the default db and organism. */ else { *retDb = hDefaultDb(); *retGenome = hGenome(*retDb); } } *retDb = cloneString(*retDb); *retGenome = cloneString(*retGenome); *retClade = hClade(*retGenome); /* Detect change of database and reset db-specific cart variables: */ if (oldVars) { char *oldDb = hashFindVal(oldVars, "db"); char *oldOrg = hashFindVal(oldVars, "org"); char *oldClade = hashFindVal(oldVars, "clade"); if ((!IS_CART_VAR_EMPTY(oldDb) && differentWord(oldDb, *retDb)) || (!IS_CART_VAR_EMPTY(oldOrg) && differentWord(oldOrg, *retGenome)) || (!IS_CART_VAR_EMPTY(oldClade) && differentWord(oldClade, *retClade))) { /* Change position to default -- unless it was passed in via CGI: */ if (cgiOptionalString("position") == NULL) cartSetString(cart, "position", hDefaultPos(*retDb)); /* hgNear search term -- unless it was passed in via CGI: */ if (cgiOptionalString("near_search") == NULL) cartRemove(cart, "near_search"); /* hgBlat results (hgUserPsl track): */ cartRemove(cart, "ss"); /* hgTables correlate: */ cartRemove(cart, "hgta_correlateTrack"); cartRemove(cart, "hgta_correlateTable"); cartRemove(cart, "hgta_correlateGroup"); cartRemove(cart, "hgta_correlateOp"); cartRemove(cart, "hgta_nextCorrelateTrack"); cartRemove(cart, "hgta_nextCorrelateTable"); cartRemove(cart, "hgta_nextCorrelateGroup"); cartRemove(cart, "hgta_nextCorrelateOp"); cartRemove(cart, "hgta_corrWinSize"); cartRemove(cart, "hgta_corrMaxLimitCount"); } } /* Save db, genome (as org) and clade in cart. */ cartSetString(cart, "db", *retDb); cartSetString(cart, "org", *retGenome); if (gotClade) cartSetString(cart, "clade", *retClade); }
struct trackHubSettingSpec *trackHubSettingsForVersion(char *specHost, char *version) /* Return list of settings with support level. Version can be version string or spec url */ { struct htmlPage *page = NULL; if (version == NULL) { version = trackHubVersionDefault(specHost, &page); if (version == NULL) errAbort("Can't get default spec from host %s", specHost); } /* Retrieve specs from file url. * Settings are the first text word within any <code> tag having class="level-" attribute. * The level represents the level of support for the setting (e.g. base, full, deprecated) * The support level ('level-*') is the class value of the * <code> tag. * E.g. <code class="level-full">boxedConfig on</code> produces: * setting=boxedConfig, class=full */ if (page == NULL) page = trackHubVersionSpecMustGet(specHost, version); if (page == NULL) errAbort("Can't get settings spec for version %s from host %s", version, specHost); verbose(5, "Found %d tags\n", slCount(page->tags)); struct trackHubSettingSpec *spec, *savedSpec; struct hash *specHash = hashNew(0); struct htmlTag *tag; struct htmlAttribute *attr; char buf[256]; for (tag = page->tags; tag != NULL; tag = tag->next) { if (differentWord(tag->name, "code")) continue; attr = tag->attributes; if (attr == NULL || differentString(attr->name, "class") || !startsWith("level-", attr->val)) continue; AllocVar(spec); int len = min(tag->next->start - tag->end, sizeof buf - 1); memcpy(buf, tag->end, len); buf[len] = 0; verbose(6, "Found spec: %s\n", buf); spec->name = cloneString(firstWordInLine(buf)); if (spec->name == NULL || strlen(spec->name) == 0) { warn("ERROR: format problem with trackDbHub.html -- contact UCSC."); continue; } spec->level = cloneString(chopPrefixAt(attr->val, '-')); verbose(6, "spec: name=%s, level=%s\n", spec->name, spec->level); savedSpec = (struct trackHubSettingSpec *)hashFindVal(specHash, spec->name); if (savedSpec != NULL) verbose(6, "found spec %s level %s in hash\n", savedSpec->name, savedSpec->level); if (savedSpec == NULL) { hashAdd(specHash, spec->name, spec); verbose(6, "added spec %s at level %s\n", spec->name, spec->level); } else if (trackHubSettingLevelCmp(spec, savedSpec) > 0) { hashReplace(specHash, spec->name, spec); verbose(6, "replaced spec %s at level %s, was %s\n", spec->name, spec->level, savedSpec->level); } } struct hashEl *el, *list = hashElListHash(specHash); int settingsCt = slCount(list); verbose(5, "Found %d settings's\n", slCount(list)); if (settingsCt == 0) errAbort("Can't find hub setting info for version %s (host %s)." " Use -version to indicate a different version number or url.", version, specHost); slSort(&list, hashElCmp); struct trackHubSettingSpec *specs = NULL; int baseCt = 0; int requiredCt = 0; int deprecatedCt = 0; for (el = list; el != NULL; el = el->next) { if (sameString(((struct trackHubSettingSpec *)el->val)->level, "base")) baseCt++; else if (sameString(((struct trackHubSettingSpec *)el->val)->level, "required")) requiredCt++; else if (sameString(((struct trackHubSettingSpec *)el->val)->level, "deprecated")) deprecatedCt++; slAddHead(&specs, el->val); } slReverse(&specs); verbose(3, "Found %d supported settings for this version (%d required, %d base, %d deprecated)\n", slCount(specs), requiredCt, baseCt, deprecatedCt); return specs; }
int rbTreeCmpWord(void *a, void *b) /* Set up rbTree so as to work on case-insensitive strings. */ { return differentWord(a,b); }
/* The single externally visible routine. * Future improvements will need to add a couple more arguments to * satisify the needs of the command line version and its options. * Currently, this is used only in customTrack input parsing. */ void wigAsciiToBinary( char *wigAscii, char *wigFile, char *wibFile, double *upperLimit, double *lowerLimit, struct wigEncodeOptions *options) /* given the three file names, read the ascii wigAscii file and produce * the wigFile and wibFile outputs */ { struct lineFile *lf; /* for line file utilities */ char *line = (char *) NULL; /* to receive data input line */ char *words[10]; /* to split data input line */ int wordCount = 0; /* result of split */ int validLines = 0; /* counting only lines with data */ double dataValue = 0.0; /* from data input */ boolean bedData = FALSE; /* in bed format data */ boolean variableStep = FALSE; /* in variableStep data */ boolean fixedStep = FALSE; /* in fixedStep data */ char *prevChromName = (char *)NULL; /* to watch for chrom name changes */ int trackCount = 0; /* We abort if we see more than one track. */ if ((wigAscii == (char *)NULL) || (wigFile == (char *)NULL) || (wibFile == (char *)NULL)) errAbort("wigAsciiToBinary: missing data file names, ascii: %s, wig: %s, wib: %s", wigAscii, wigFile, wibFile); /* need to be careful here and initialize all the global variables */ freez(&wibFileName); /* send this name to the global */ wibFileName = cloneString(wibFile); /* variable for use in output_row() */ lineCount = 0; /* to count all lines */ add_offset = 0; /* to allow "lifting" of the data */ validLines = 0; /* to count only lines with data */ rowCount = 0; /* to count rows output */ bincount = 0; /* to count up to binsize */ binsize = 1024; /* # of data points per table row */ dataSpan = 1; /* default bases spanned per data point */ chromStart = 0; /* for table row data */ previousOffset = 0; /* for data missing detection */ fileOffset = 0; /* current location within binary data file */ fileOffsetBegin = 0;/* location in binary data file where this bin starts*/ freez(&data_values); freez(&validData); data_values = (double *) needMem( (size_t) (binsize * sizeof(double))); validData = (unsigned char *) needMem( (size_t) (binsize * sizeof(unsigned char))); if (options != NULL) { if (options->lift != 0) add_offset = options->lift; if (options->noOverlap) noOverlap = TRUE; if (options->flagOverlapSpanData) flagOverlapSpanData = TRUE; if (options->wibSizeLimit > 0) wibSizeLimit = options->wibSizeLimit; } /* limits for the complete set of data, they must change from these initial defaults during processing */ overallLowerLimit = wigEncodeStartingLowerLimit; overallUpperLimit = wigEncodeStartingUpperLimit; binout = mustOpen(wibFile,"w"); /* binary data file */ wigout = mustOpen(wigFile,"w"); /* table row definition file */ #if defined(DEBUG) /* dbg */ chmod(wibFile, 0666); chmod(wigFile, 0666); #endif lf = lineFileOpen(wigAscii, TRUE); /* input file */ while (lineFileNext(lf, &line, NULL)) { boolean readingFrameSlipped; ++lineCount; if ((wibSizeLimit > 0) && (wibSize >= wibSizeLimit)) errAbort("data size limit of %lld data values has been exceeded. This data can be efficiently displayed with the <A HREF='/goldenPath/help/bigWig.html' TARGET=_blank>bigWig file format</A> in a custom track, or in a <A HREF='/goldenPath/help/hgTrackHubHelp.html' TARGET=_blank>Track Hub</A> for multiple large datasets.", wibSizeLimit); line = skipLeadingSpaces(line); /* ignore blank or comment lines */ if ((line == (char *)NULL) || (line[0] == '\0') || (line[0] == '#')) continue; /* !!! go to next line of input */ wordCount = chopByWhite(line, words, ArraySize(words)); if (sameWord("track",words[0])) { /* Allow (and ignore) one track line, but no more. */ ++trackCount; if (trackCount > 1) errAbort("Multiple tracks seen, second at line %d of %s, can only handle one.", lf->lineIx, lf->fileName); continue; } else if (sameWord("browser", words[0])) { continue; /* ignore browser lines if present */ } else if (sameWord("variableStep",words[0])) { int i; boolean foundChrom = FALSE; /* safest thing to do if we were processing anything is to * output that previous block and start anew * Future improvement could get fancy here and decide if it * is really necessary to start over, although the concept * of a line between data points on one item may use this * block behavior later to define line segments, so don't * get too quick to be fancy here. This line behavior * implies that feature names will need to be specified to * identify the line segments that belong together. */ if (variableStep || bedData || fixedStep) { output_row(); validLines = 0; /* to cause reset for first offset */ } dataSpan = 1; /* default bases spanned per data point */ for(i = 1; i < wordCount; ++i) { if (startsWith("chrom",words[i])) { setChromName(words[i]); foundChrom = TRUE; } else if (startsWith("span",words[i])) setDataSpan(words[i]); else errAbort("illegal specification on variableStep at line %lu: %s", lineCount, words[i]); } if (!foundChrom) errAbort("missing chrom=<name> specification on variableStep declaration at line %lu", lineCount); variableStep = TRUE; bedData = FALSE; fixedStep = FALSE; freez(&prevChromName); prevChromName = cloneString(chromName); continue; /* !!! go to next input line */ } else if (sameWord("fixedStep",words[0])) { boolean foundChrom = FALSE; boolean foundStart = FALSE; int i; /* same comment as above */ if (variableStep || bedData || fixedStep) { output_row(); validLines = 0; /* to cause reset for first offset */ } stepSize = 1; /* default step size */ dataSpan = 0; /* this will match step size if not set*/ for(i = 1; i < wordCount; ++i) { if (startsWith("chrom",words[i])) { setChromName(words[i]); foundChrom = TRUE; } else if (startsWith("start",words[i])) { setFixedStart(words[i]); foundStart = TRUE; } else if (startsWith("step",words[i])) setStepSize(words[i]); else if (startsWith("span",words[i])) setDataSpan(words[i]); else errAbort("illegal specification on variableStep at line %lu: %s", lineCount, words[i]); } if (dataSpan == 0) dataSpan = stepSize; if (!foundChrom) errAbort("missing chrom=<name> specification on fixedStep declaration at line %lu", lineCount); if (!foundStart) errAbort("missing start=<position> specification on fixedStep declaration at line %lu", lineCount); if (noOverlap && validLines && prevChromName) { if (sameWord(prevChromName,chromName) && (fixedStart < chromStart)) errAbort("specified fixedStep chromStart %llu is less than expected next chromStart %llu", fixedStart, chromStart); } variableStep = FALSE; bedData = FALSE; fixedStep = TRUE; freez(&prevChromName); prevChromName = cloneString(chromName); continue; /* !!! go to next input line */ } else if (wordCount == 4) { /* while in bedData, we do not necessarily need to start a new * batch unless the chrom name is changing, since dataSpan * is always 1 for bedData. As above, this may change in * the future if each bed line specification is talking * about a different feature. */ if (variableStep || fixedStep || (bedData && ((prevChromName != (char *)NULL) && differentWord(prevChromName,words[0])))) { output_row(); validLines = 0; /* to cause reset for first offset */ } dataSpan = 1; /* default bases spanned per data point */ variableStep = FALSE; bedData = TRUE; fixedStep = FALSE; freez(&chromName); chromName=cloneString(words[0]); freez(&featureName); featureName=cloneString(words[0]); bedChromStart = sqlLongLong(words[1]); bedChromEnd = sqlLongLong(words[2]); bedDataValue = sqlDouble(words[3]); /* the bed format coordinate system is zero relative, half-open, * hence, no adjustment of bedChromStart is needed here, unlike the * fixed and variable step formats which will subtract one from the * incoming coordinate. */ if (bedChromStart >= bedChromEnd) errAbort("Found chromStart >= chromEnd at line %lu (%llu > %llu)", lineCount, bedChromStart, bedChromEnd); if (bedChromEnd > (bedChromStart + 10000000)) errAbort("Limit of 10,000,000 length specification for bed format at line %lu, found: %llu)", lineCount, bedChromEnd-bedChromStart); if ((validLines > 0) && (bedChromStart < previousOffset)) errAbort("chrom positions not in numerical order at line %lu. previous: %llu > %llu <-current (bed)", lineCount, previousOffset, bedChromStart); freez(&prevChromName); prevChromName = cloneString(chromName); } /* We must be in one of these data formats at this point */ if (!(variableStep || fixedStep || bedData)) errAbort("at the line beginning: %s, variableStep or fixedStep data declaration not found or BED data 4 column format not recognized.", words[0]); if (variableStep && (wordCount != 2)) errAbort("Expecting two words for variableStep data at line %lu, found %d", lineCount, wordCount); if (fixedStep && (wordCount != 1)) errAbort("Expecting one word for fixedStep data at line %lu, found %d", lineCount, wordCount); if (bedData && (wordCount != 4)) errAbort("Expecting four words for bed format data at line %lu, found %d", lineCount, wordCount); ++validLines; /* counting good lines of data input */ /* Offset is the incoming specified position for this value, * fixedStart has already been converted to zero * relative half open */ if (variableStep) { Offset = sqlLongLong(words[0]); Offset = BASE_0(Offset); /* zero relative half open */ dataValue = sqlDouble(words[1]); } else if (fixedStep) { Offset = fixedStart + (stepSize * (validLines - 1)); dataValue = sqlDouble(words[0]); } else if (bedData) { Offset = bedChromStart; dataValue = bedDataValue; } if (dataValue > overallUpperLimit) overallUpperLimit = dataValue; if (dataValue < overallLowerLimit) overallLowerLimit = dataValue; /* see if this is the first time through, establish chromStart */ if (validLines == 1) { chromStart = Offset; verbose(2, "first offset: %llu\n", chromStart); } else if ((validLines > 1) && (Offset <= previousOffset)) errAbort("chrom positions not in numerical order at line %lu. previous: %llu > %llu " "<-current (offset)", lineCount, BASE_1(previousOffset), BASE_1(Offset)); /* if we are working on a zoom level and the data is not exactly * spaced according to the span, then we need to put each value * in its own row in order to keep positioning correct for these * data values. The number of skipped bases has to be an even * multiple of dataSpan */ readingFrameSlipped = FALSE; if ((validLines > 1) && (dataSpan > 1)) { unsigned long long prevEnd = previousOffset + dataSpan; int skippedBases; int spansSkipped; skippedBases = Offset - previousOffset; if (flagOverlapSpanData && (prevEnd > Offset)) errAbort("ERROR: data points overlapping at input line %lu.\n" "previous data point position: %s:%llu-%llu overlaps current: %s:%llu-%llu", lineCount, chromName, BASE_1(previousOffset), prevEnd, chromName, BASE_1(Offset),Offset+dataSpan); spansSkipped = skippedBases / dataSpan; if ((spansSkipped * dataSpan) != skippedBases) readingFrameSlipped = TRUE; } if (readingFrameSlipped) { verbose(2, "data not spanning %llu bases, prev: %llu, this: %llu, at line: %lu\n", dataSpan, previousOffset, Offset, lineCount); output_row(); chromStart = Offset; /* a full reset here */ } /* Check to see if data is being skipped */ else if ( (validLines > 1) && (Offset > (previousOffset + dataSpan)) ) { unsigned long long off; unsigned long long fillSize; /* number of bytes */ verbose(2, "missing data offsets: %llu - %llu\n", BASE_1(previousOffset),BASE_0(Offset)); /* If we are just going to fill the rest of this bin with * no data, then may as well stop here. No need to fill * it with nothing. */ fillSize = (Offset - (previousOffset + dataSpan)) / dataSpan; verbose(2, "filling NO_DATA for %llu bytes\n", fillSize); if (fillSize + bincount >= binsize) { verbose(2, "completing a bin due to NO_DATA for %llu bytes, only %llu - %llu = %llu to go\n", fillSize, binsize, bincount, binsize - bincount); verbose(2, "Offset: %llu, previousOffset: %llu\n", Offset, previousOffset); output_row(); chromStart = Offset; /* a full reset here */ } else { fillSize = 0; /* fill missing data with NO_DATA indication */ for (off = previousOffset + dataSpan; off < Offset; off += dataSpan) { ++fillSize; ++fileOffset; ++bincount; /* count scores in this bin */ if (bincount >= binsize) break; } verbose(2, "filled NO_DATA for %llu bytes\n", fillSize); /* If that finished off this bin, output it * This most likely should not happen here. The * check above: if (fillSize + bincount >= binsize) * should have caught this case already. */ if (bincount >= binsize) { output_row(); chromStart = Offset; /* a full reset here */ } } } /* With perhaps the missing data taken care of, back to the * real data. */ if (bedData) { unsigned long long bedSize = bedChromEnd - bedChromStart; for ( ; bedSize > 0; --bedSize ) { setDataValue(bedDataValue); Offset += 1; } Offset -= 1; /* loop above increments this one too much. * This Offset is supposed to be the last * valid chrom position written, not the * next to be written */ } else { setDataValue(dataValue); } previousOffset = Offset; /* remember position for gap calculations */ } /* reading file input loop end */ /* Done with input file, any data points left in this bin ? */ if (bincount) output_row(); lineFileClose(&lf); fclose(binout); fclose(wigout); freez(&chromName); freez(&featureName); freez(&data_values); freez(&validData); freez(&wibFileName); /* return limits if pointers are given */ if (upperLimit) *upperLimit = overallUpperLimit; if (lowerLimit) *lowerLimit = overallLowerLimit; if (wibSizeLimit > 0) options->wibSizeLimit = wibSize; }