void doTransRegCodeProbe(struct trackDb *tdb, char *item, char *codeTable, char *motifTable, char *tfToConditionTable, char *conditionTable) /* Display detailed info on a ChIP-chip probe from transRegCode experiments. */ { char query[256]; struct sqlResult *sr; char **row; int rowOffset = hOffsetPastBin(database, seqName, tdb->table); struct sqlConnection *conn = hAllocConn(database); struct transRegCodeProbe *probe = NULL; cartWebStart(cart, database, "ChIP-chip Probe Info"); sqlSafef(query, sizeof(query), "select * from %s where name = '%s'", tdb->table, item); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) probe = transRegCodeProbeLoad(row+rowOffset); sqlFreeResult(&sr); if (probe != NULL) { struct tfData *tfList = NULL, *tf; struct hash *tfHash = newHash(0); struct transRegCode *trc; int i; /* Print basic info. */ printf("<B>Name:</B> %s<BR>\n", probe->name); printPosOnChrom(probe->chrom, probe->chromStart, probe->chromEnd, NULL, TRUE, probe->name); /* Make up list of all transcriptionFactors. */ for (i=0; i<probe->tfCount; ++i) { /* Parse out factor and condition. */ char *tfName = probe->tfList[i]; char *condition = strchr(tfName, '_'); struct tfCond *cond; if (condition != NULL) *condition++ = 0; else condition = "n/a"; tf = hashFindVal(tfHash, tfName); if (tf == NULL) { AllocVar(tf); hashAddSaveName(tfHash, tfName, tf, &tf->name); slAddHead(&tfList, tf); } AllocVar(cond); cond->name = cloneString(condition); cond->binding = probe->bindVals[i]; slAddHead(&tf->conditionList, cond); } slSort(&tfList, tfDataCmpName); /* Fold in motif hits in region. */ if (sqlTableExists(conn, codeTable)) { sr = hRangeQuery(conn, codeTable, probe->chrom, probe->chromStart, probe->chromEnd, "chipEvidence != 'none'", &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { trc = transRegCodeLoad(row+rowOffset); tf = hashFindVal(tfHash, trc->name); if (tf != NULL) slAddTail(&tf->trcList, trc); } sqlFreeResult(&sr); } if (tfList == NULL) printf("No significant immunoprecipitation."); else { tfBindLevelSection(tfList, conn, motifTable, tfToConditionTable); } transRegCodeProbeFree(&probe); growthConditionSection(conn, conditionTable); } printf("\n<HR>\n"); printTrackHtml(tdb); hFreeConn(&conn); }
void addIfNew(char *name) { struct hashEl *hel = hashLookup(nameHash, name); if (hel == NULL) hashAdd(nameHash, cloneString(name), NULL); }
void xmfaToMaf(char *in, char *out) /* xmfaToMaf - Convert from xmfa to maf format. */ { int c; FILE *input = mustOpen(in, "r"); FILE *output = mustOpen(out, "w"); char* commentLine; struct dnaSeq* sequence; struct mafAli *ali; struct sqlConnection* conn = hAllocConn(); mafWriteStart(output, "mlagan"); AllocVar(ali); while(myFaReadMixedNext(input, TRUE, "default name", TRUE, &commentLine, &sequence)) { char srcName[128]; c = fgetc(input); if(c == '=' || c == '>') { /* add the current sequence and process the block if we've see an '='*/ char org[32]; char chrom[32]; int start; int stop; char strand; struct mafComp *comp; double score; char buffer[1024]; ungetc(c, input); AllocVar(comp); /* parse the comment line */ sscanf(commentLine, ">%s %[^:]:%d-%d %c", org, chrom, &start, &stop, &strand); /* build the name */ safef(srcName, sizeof(srcName), "%s.%s", optionVal(org, org), chrom); comp->src = cloneString(srcName); sqlSafef(buffer, 1024, "SELECT size FROM %s.chromInfo WHERE chrom = \"%s\"", optionVal(org, org), chrom); assert(sqlQuickQuery(conn, buffer, buffer, 1024) != 0); comp->srcSize = atoi(buffer); comp->strand = strand; start = start - 1; comp->start = start; comp->size = ungappedSize(sequence); if(strand == '-') comp->start = comp->srcSize - (comp->start + comp->size); comp->text = sequence->dna; sequence->dna = 0; slAddHead(&ali->components, comp); freeDnaSeq(&sequence); if(c == '=') { fscanf(input, "= score=%lf\n", &score); ali->score = score; slReverse(&ali->components); mafWrite(output, ali); mafAliFree(&ali); AllocVar(ali); } } } mafWriteEnd(output); }
static char *makeResultName(char *tableName, char *path) /* return path in trash for corresponding autoupgrade result file */ { safef(path, AUTOUPGRPATHSIZE, "../trash/AUTO_UPGRADE_RESULT_%s", tableName); return cloneString(path); }
static void saveAxtBundle(char *chromName, int chromSize, int chromOffset, struct ffAli *ali, struct dnaSeq *tSeq, struct hash *t3Hash, struct dnaSeq *qSeq, boolean qIsRc, boolean tIsRc, enum ffStringency stringency, int minMatch, struct gfOutput *out) /* Save alignment to axtBundle. */ { struct axtData *ad = out->data; struct ffAli *sAli, *eAli, *ff, *rt, *eFf = NULL; struct axt *axt; struct dyString *q = newDyString(1024), *t = newDyString(1024); struct axtBundle *gab; struct trans3 *t3List = NULL; if (t3Hash != NULL) t3List = hashMustFindVal(t3Hash, tSeq->name); AllocVar(gab); gab->tSize = chromSize; gab->qSize = qSeq->size; for (sAli = ali; sAli != NULL; sAli = eAli) { eAli = ffNextBreak(sAli, 8, tSeq, t3List); dyStringClear(q); dyStringClear(t); for (ff = sAli; ff != eAli; ff = ff->right) { dyStringAppendN(q, ff->nStart, ff->nEnd - ff->nStart); dyStringAppendN(t, ff->hStart, ff->hEnd - ff->hStart); rt = ff->right; if (rt != eAli) { int nGap = rt->nStart - ff->nEnd; int nhStart = trans3GenoPos(rt->hStart, tSeq, t3List, FALSE) + chromOffset; int ohEnd = trans3GenoPos(ff->hEnd, tSeq, t3List, TRUE) + chromOffset; int hGap = nhStart - ohEnd; int gap = Blatmax(nGap, hGap); if (nGap < 0 || hGap < 0) { errAbort("Negative gap size in %s vs %s", tSeq->name, qSeq->name); } if (nGap == gap) { dyStringAppendN(q, ff->nEnd, gap); dyStringAppendMultiC(t, '-', gap); } else { dyStringAppendN(t, ff->hEnd, gap); dyStringAppendMultiC(q, '-', gap); } } eFf = ff; /* Keep track of last block in bunch */ } assert(t->stringSize == q->stringSize); AllocVar(axt); axt->qName = cloneString(qSeq->name); axt->qStart = sAli->nStart - qSeq->dna; axt->qEnd = eFf->nEnd - qSeq->dna; axt->qStrand = (qIsRc ? '-' : '+'); axt->tName = cloneString(chromName); axt->tStart = trans3GenoPos(sAli->hStart, tSeq, t3List, FALSE) + chromOffset; axt->tEnd = trans3GenoPos(eFf->hEnd, tSeq, t3List, TRUE) + chromOffset; axt->tStrand = (tIsRc ? '-' : '+'); axt->symCount = t->stringSize; axt->qSym = cloneString(q->string); axt->tSym = cloneString(t->string); axt->frame = trans3Frame(sAli->hStart, t3List); if (out->qIsProt) axt->score = axtScoreProteinDefault(axt); else axt->score = axtScoreDnaDefault(axt); slAddHead(&gab->axtList, axt); } slReverse(&gab->axtList); dyStringFree(&q); dyStringFree(&t); slAddHead(&ad->bundleList, gab); }
char *scanSettingsForCT(char *userName, char *sessionName, char *contents, int *pLiveCount, int *pExpiredCount) /* Parse the CGI-encoded session contents into {var,val} pairs and search * for custom tracks. If found, refresh the custom track. Parsing code * taken from cartParseOverHash. * If any nonexistent custom track files are found, return a SQL update * command that will remove those from this session. We can't just do * the update here because that messes up the caller's query. */ { int contentLength = strlen(contents); struct dyString *newContents = dyStringNew(contentLength+1); struct dyString *oneSetting = dyStringNew(contentLength / 4); char *updateIfAny = NULL; char *contentsToChop = cloneString(contents); char *namePt = contentsToChop; verbose(3, "Scanning %s %s\n", userName, sessionName); while (isNotEmpty(namePt)) { char *dataPt = strchr(namePt, '='); char *nextNamePt; if (dataPt == NULL) errAbort("Mangled session content string %s", namePt); *dataPt++ = 0; nextNamePt = strchr(dataPt, '&'); if (nextNamePt != NULL) *nextNamePt++ = 0; dyStringClear(oneSetting); dyStringPrintf(oneSetting, "%s=%s%s", namePt, dataPt, (nextNamePt ? "&" : "")); if (startsWith(CT_FILE_VAR_PREFIX, namePt)) { boolean thisGotLiveCT = FALSE, thisGotExpiredCT = FALSE; cgiDecode(dataPt, dataPt, strlen(dataPt)); verbose(3, "Found variable %s = %s\n", namePt, dataPt); /* If the file does not exist, omit this setting from newContents so * it doesn't get copied from session to session. If it does exist, * leave it up to customFactoryTestExistence to parse the file for * possible customTrash table references, some of which may exist * and some not. */ if (! fileExists(dataPt)) { verbose(3, "Removing %s from %s %s\n", oneSetting->string, userName, sessionName); thisGotExpiredCT = TRUE; } else { char *db = namePt + strlen(CT_FILE_VAR_PREFIX); dyStringAppend(newContents, oneSetting->string); customFactoryTestExistence(db, dataPt, &thisGotLiveCT, &thisGotExpiredCT); } if (thisGotLiveCT && pLiveCount != NULL) (*pLiveCount)++; if (thisGotExpiredCT && pExpiredCount != NULL) (*pExpiredCount)++; if (thisGotExpiredCT) { if (verboseLevel() >= 3) verbose(3, "Found expired custom track in %s %s: %s\n", userName, sessionName, dataPt); else verbose(2, "Found expired custom track: %s\n", dataPt); } if (thisGotLiveCT) verbose(4, "Found live custom track: %s\n", dataPt); } else dyStringAppend(newContents, oneSetting->string); namePt = nextNamePt; } if (newContents->stringSize != contentLength) { struct dyString *update = dyStringNew(contentLength*2); if (newContents->stringSize > contentLength) errAbort("Uh, why is newContents (%d) longer than original (%d)??", newContents->stringSize, contentLength); dyStringPrintf(update, "UPDATE %s set contents='", savedSessionTable); dyStringAppendN(update, newContents->string, newContents->stringSize); dyStringPrintf(update, "', lastUse=now(), useCount=useCount+1 " "where userName=\"%s\" and sessionName=\"%s\";", userName, sessionName); verbose(3, "Removing one or more dead CT file settings from %s %s " "(original length %d, now %d)\n", userName, sessionName, contentLength, newContents->stringSize); updateIfAny = dyStringCannibalize(&update); } dyStringFree(&oneSetting); dyStringFree(&newContents); freeMem(contentsToChop); return updateIfAny; }
char *sqlStringComma(char **pS) /* Return string at *pS. (Either quoted or not.) Advance *pS. */ { return cloneString(sqlGetOptQuoteString(pS)); }
void txGeneFromBed(char *inBed, char *inPicks, char *ucscFa, char *uniProtFa, char *refPepFa, char *outKg) /* txGeneFromBed - Convert from bed to knownGenes format table (genePred + uniProt ID). */ { /* Load protein sequence into hashes */ struct hash *uniProtHash = faReadAllIntoHash(uniProtFa, dnaUpper); struct hash *ucscProtHash = faReadAllIntoHash(ucscFa, dnaUpper); struct hash *refProtHash =faReadAllIntoHash(refPepFa, dnaUpper); /* Load picks into hash. We don't use cdsPicksLoadAll because empty fields * cause that autoSql-generated routine problems. */ struct hash *pickHash = newHash(18); struct cdsPick *pick; struct lineFile *lf = lineFileOpen(inPicks, TRUE); char *row[CDSPICK_NUM_COLS]; while (lineFileRowTab(lf, row)) { pick = cdsPickLoad(row); hashAdd(pickHash, pick->name, pick); } /* Load in bed */ struct bed *bed, *bedList = bedLoadNAll(inBed, 12); /* Do reformatting and write output. */ FILE *f = mustOpen(outKg, "w"); for (bed = bedList; bed != NULL; bed = bed->next) { char *protAcc = NULL; if (bed->thickStart < bed->thickEnd) { pick = hashMustFindVal(pickHash, bed->name); struct dnaSeq *spSeq = NULL, *uniSeq = NULL, *refPep = NULL, *ucscSeq; ucscSeq = hashMustFindVal(ucscProtHash, bed->name); if (pick->swissProt[0]) spSeq = hashMustFindVal(uniProtHash, pick->swissProt); if (pick->uniProt[0]) uniSeq = hashMustFindVal(uniProtHash, pick->uniProt); if (pick->refProt[0]) refPep = hashMustFindVal(refProtHash, pick->refProt); /* First we look for an exact match between the ucsc protein and * something from swissProt/uniProt. */ if (spSeq != NULL && sameString(ucscSeq->dna, spSeq->dna)) protAcc = pick->swissProt; if (protAcc == NULL && uniSeq != NULL && sameString(ucscSeq->dna, uniSeq->dna)) protAcc = pick->uniProt; if (protAcc == NULL && refPep != NULL && sameString(ucscSeq->dna, refPep->dna)) { protAcc = cloneString(pick->refProt); chopSuffix(protAcc); } if (protAcc == NULL) { if (pick->uniProt[0]) protAcc = pick->uniProt; else { protAcc = cloneString(pick->refProt); chopSuffix(protAcc); } } } outputKg(bed, emptyForNull(protAcc), f); } carefulClose(&f); }
static struct chrGapList *createGaps(struct bed *bounds) { struct bed *bedEl = NULL; char *prevChr = NULL; struct chrGapList *gaps = NULL; struct gap *prevGap = NULL; struct bed *prevBedEl = NULL; struct chrGapList *curChrList = NULL; int boundingChrCount = 0; int overlappedBounding = 0; for (bedEl = bounds; bedEl != NULL; bedEl = bedEl->next) { /* the first bedEl does not yet start a new gap, must have a second */ if ((NULL == prevChr) || differentWord(prevChr,bedEl->chrom)) { struct chrGapList *cEl; AllocVar(cEl); cEl->chrom = cloneString(bedEl->chrom); cEl->gList = NULL; if (prevChr) { if (NULL == prevGap) { verbose(2,"WARNING: only one element on %s ! No gap defined.\n", prevChr); slPopHead(&gaps); --boundingChrCount; } freeMem(prevChr); } prevChr = cloneString(bedEl->chrom); prevGap = NULL; prevBedEl = bedEl; /* bounding element before first gap */ verbose(4,"new chrom on bounding gap creation %s, adding %#lx\n", prevChr, (unsigned long) cEl); slAddHead(&gaps,cEl); ++boundingChrCount; curChrList = cEl; } else { struct gap *gEl; AllocVar(gEl); gEl->prev = prevGap; /* first one is NULL */ gEl->upstream = prevBedEl; gEl->isUpstreamBound = TRUE; /* bounding element */ gEl->downstream = bedEl; gEl->isDownstreamBound = TRUE; /* bounding element */ gEl->next = NULL; /* not there yet */ if (prevGap == NULL) /* first one is NULL */ { curChrList->gList = gEl; /* starting the list */ } else { prevGap->next = gEl; } prevGap = gEl; /* gapSize is between downstream and upstream */ gEl->gapSize = bedEl->chromStart - prevBedEl->chromEnd; verbose(5,"gap: %s:%d-%d size %d (%d)\n", bedEl->chrom, gEl->upstream->chromEnd, gEl->downstream->chromStart, gEl->gapSize, gEl->downstream->chromStart - gEl->upstream->chromEnd); if (gEl->gapSize < 0) { ++overlappedBounding; if (verboseLevel()>3) { warn("WARNING: overlapping bounding elements at\n\t" "%s:%d-%d <-> %s:%d-%d", prevBedEl->chrom, prevBedEl->chromStart, prevBedEl->chromEnd, bedEl->chrom, bedEl->chromStart, bedEl->chromEnd); } gEl->gapSize = 0; } prevBedEl = bedEl; } } if (prevChr) { /* potentially the last one is a single item on a chrom */ if (NULL == prevGap) { verbose(2,"WARNING: only one element on %s ! No gap defined.\n", prevChr); slPopHead(&gaps); --boundingChrCount; } freeMem(prevChr); } slReverse(&gaps); verbose(3,"bounding chrom count: %d (=? %d), overlapped items: %d\n", boundingChrCount, slCount(gaps), overlappedBounding); return(gaps); }
struct knownMore *knownMoreLoad(char **row) /* Load a knownMore from row fetched with select * from knownMore * from database. Dispose of this with knownMoreFree(). */ { struct knownMore *ret; AllocVar(ret); ret->name = cloneString(row[0]); ret->transId = cloneString(row[1]); ret->geneId = cloneString(row[2]); ret->gbGeneName = sqlUnsigned(row[3]); ret->gbProductName = sqlUnsigned(row[4]); ret->gbProteinAcc = cloneString(row[5]); ret->gbNgi = cloneString(row[6]); ret->gbPgi = cloneString(row[7]); ret->omimId = sqlUnsigned(row[8]); ret->omimName = cloneString(row[9]); ret->hugoId = sqlUnsigned(row[10]); ret->hugoSymbol = cloneString(row[11]); ret->hugoName = cloneString(row[12]); ret->hugoMap = cloneString(row[13]); ret->pmId1 = sqlUnsigned(row[14]); ret->pmId2 = sqlUnsigned(row[15]); ret->refSeqAcc = cloneString(row[16]); ret->aliases = cloneString(row[17]); ret->locusLinkId = sqlUnsigned(row[18]); ret->gdbId = cloneString(row[19]); return ret; }
static void randomPlacement(char *bounding, char *placed) { struct bed *boundingElements = bedLoadAll(bounding); struct bed *placeItems = bedLoadAll(placed); struct bed *nearestNeighbors = NULL; int boundingCount = slCount(boundingElements); int placedCount = slCount(placeItems); int neighborCount = 0; struct chrGapList *boundingGaps = NULL; struct chrGapList *duplicateGapList = NULL; struct chrGapList *neighborGaps = NULL; struct statistic *statsList = NULL; struct statistic *statEl = NULL; if (neighbor) { nearestNeighbors = bedLoadAll(neighbor); slSort(&nearestNeighbors, bedCmp); /* order by chrom,chromStart */ neighborCount = slCount(nearestNeighbors); verbose(2, "neighbor element count: %d\n", neighborCount); neighborGaps = createGaps(nearestNeighbors); } slSort(&boundingElements, bedCmp); /* order by chrom,chromStart */ slSort(&placeItems, bedCmp); /* order by chrom,chromStart */ verbose(2, "bounding element count: %d\n", boundingCount); verbose(2, "placed item count: %d\n", placedCount); boundingGaps = createGaps(boundingElements); if (TRUE) /* display initial placement stats only */ { char *neighborName = NULL; if (neighbor) { neighborName = cloneString(neighbor); duplicateGapList = cloneGapList(neighborGaps); } else { neighborName = cloneString(bounding); duplicateGapList = cloneGapList(boundingGaps); } verbose(2,"stats before initial placement: =================\n"); statEl = gapStats(duplicateGapList, (char *)NULL, (char *)NULL, (char *)NULL); printf("statistics on gaps before any placements:\n\t(%s)\n", neighborName); statsPrint(statEl); slAddHead(&statsList,statEl); initialPlacement(duplicateGapList,placeItems); verbose(2,"stats after initial placement: =================\n"); statEl = gapStats(duplicateGapList, zeroBedOutFile, shoulderBedOutFile, distOut); printf("statistics after initial placement of placed items:\n\t(%s)\n", placed); statsPrint(statEl); slAddHead(&statsList,statEl); freeChrList(&duplicateGapList, FALSE); slReverse(&statsList); freeMem(neighborName); } if (trials > 0) { int trial; srand48((long int)seed); /* for default seed=0, same set of randoms */ slSort(&placeItems, bedCmpSize); /* order by size of elements */ slReverse(&placeItems); /* largest ones first */ measurePlaced(placeItems); /* show placed item characteristics */ for (trial = 0; trial < trials; ++trial) { struct bed *randomPlacedBedList; duplicateGapList = cloneGapList(boundingGaps); randomPlacedBedList = randomTrial(duplicateGapList,placeItems); if (neighbor) { struct chrGapList *duplicateNeighborList; slSort(&randomPlacedBedList,bedCmp);/*order by chrom,chromStart*/ duplicateNeighborList = cloneGapList(neighborGaps); initialPlacement(duplicateNeighborList,randomPlacedBedList); statEl = gapStats(duplicateNeighborList, (char *)NULL, (char *)NULL, (char *)NULL); freeChrList(&duplicateNeighborList, FALSE); } else statEl = gapStats(duplicateGapList, (char *)NULL, (char *)NULL, (char *)NULL); slAddHead(&statsList,statEl); /* this gap list has temporary bed elements that were * created by the randomTrial(), they need to be freed as * the list is released, hence the TRUE signal. * It isn't a true freeBedList operation because the chrom * names are left intact in the original copy of the bed * list. (The names were being shared.) */ if ((trial == (trials - 1)) && (bedOutFile != NULL)) { bedListOutput(duplicateGapList, bedOutFile); } freeChrList(&duplicateGapList, TRUE); } slReverse(&statsList); statsPrint(statsList); } if (neighbor) { bedFreeList(&nearestNeighbors); freeChrList(&neighborGaps, FALSE); } bedFreeList(&boundingElements); bedFreeList(&placeItems); freeChrList(&boundingGaps, FALSE); }
void encode2Meta(char *database, char *manifestIn, char *outMetaRa) /* encode2Meta - Create meta files.. */ { int dbIx = stringArrayIx(database, metaDbs, ArraySize(metaDbs)); if (dbIx < 0) errAbort("Unrecognized database %s", database); /* Create a three level meta.ra format file based on hgFixed.encodeExp * and database.metaDb tables. The levels are composite, experiment, file */ struct metaNode *metaTree = metaTreeNew("encode2"); /* Load up the manifest. */ struct encode2Manifest *mi, *miList = encode2ManifestShortLoadAll(manifestIn); struct hash *miHash = hashNew(18); for (mi = miList; mi != NULL; mi = mi->next) hashAdd(miHash, mi->fileName, mi); verbose(1, "%d files in %s\n", miHash->elCount, manifestIn); /* Load up encodeExp info. */ struct sqlConnection *expConn = sqlConnect(expDb); struct encodeExp *expList = encodeExpLoadByQuery(expConn, "NOSQLINJ select * from encodeExp"); sqlDisconnect(&expConn); verbose(1, "%d experiments in encodeExp\n", slCount(expList)); struct hash *compositeHash = hashNew(0); /* Go through each organism database in turn. */ int i; for (i=0; i<ArraySize(metaDbs); ++i) { char *db = metaDbs[i]; if (!sameString(database, db)) continue; verbose(1, "exploring %s\n", db); struct mdbObj *mdb, *mdbList = getMdbList(db); verbose(1, "%d meta objects in %s\n", slCount(mdbList), db); /* Get info on all composites. */ for (mdb = mdbList; mdb != NULL; mdb = mdb->next) { char *objType = mdbVarLookup(mdb->vars, "objType"); if (objType != NULL && sameString(objType, "composite")) { char compositeName[256]; safef(compositeName, sizeof(compositeName), "%s", mdb->obj); struct metaNode *compositeNode = metaNodeNew(compositeName); slAddHead(&metaTree->children, compositeNode); compositeNode->parent = metaTree; struct mdbVar *v; for (v=mdb->vars; v != NULL; v = v->next) { metaNodeAddVar(compositeNode, v->var, v->val); } metaNodeAddVar(compositeNode, "assembly", db); hashAdd(compositeHash, mdb->obj, compositeNode); } } /* Make up one more for experiments with no composite. */ char *noCompositeName = "wgEncodeZz"; struct metaNode *noCompositeNode = metaNodeNew(noCompositeName); slAddHead(&metaTree->children, noCompositeNode); noCompositeNode->parent = metaTree; hashAdd(compositeHash, noCompositeName, noCompositeNode); /* Now go through objects trying to tie experiments to composites. */ struct hash *expToComposite = hashNew(16); for (mdb = mdbList; mdb != NULL; mdb = mdb->next) { char *composite = mdbVarLookup(mdb->vars, "composite"); if (originalData(composite)) { char *dccAccession = mdbVarLookup(mdb->vars, "dccAccession"); if (dccAccession != NULL) { char *oldComposite = hashFindVal(expToComposite, dccAccession); if (oldComposite != NULL) { if (!sameString(oldComposite, composite)) verbose(2, "%s maps to %s ignoring mapping to %s", dccAccession, oldComposite, composite); } else { hashAdd(expToComposite, dccAccession, composite); } } } } /* Now get info on all experiments in this organism. */ struct hash *expHash = hashNew(0); struct encodeExp *exp; for (exp = expList; exp != NULL; exp = exp->next) { if (sameString(exp->organism, organisms[i])) { if (exp->accession != NULL) { char *composite = hashFindVal(expToComposite, exp->accession); struct metaNode *compositeNode; if (composite != NULL) { compositeNode = hashMustFindVal(compositeHash, composite); } else { compositeNode = noCompositeNode; } struct metaNode *expNode = wrapNodeAroundExp(exp); hashAdd(expHash, expNode->name, expNode); slAddHead(&compositeNode->children, expNode); expNode->parent = compositeNode; } } } for (mdb = mdbList; mdb != NULL; mdb = mdb->next) { char *fileName = NULL, *dccAccession = NULL; char *objType = mdbVarLookup(mdb->vars, "objType"); if (objType != NULL && sameString(objType, "composite")) continue; dccAccession = mdbVarLookup(mdb->vars, "dccAccession"); if (dccAccession == NULL) continue; char *composite = hashFindVal(expToComposite, dccAccession); if (composite == NULL) errAbort("Can't find composite for %s", mdb->obj); struct mdbVar *v; for (v = mdb->vars; v != NULL; v = v->next) { char *var = v->var, *val = v->val; if (sameString("fileName", var)) { fileName = val; char path[PATH_LEN]; char *comma = strchr(fileName, ','); if (comma != NULL) *comma = 0; /* Cut off comma separated list. */ safef(path, sizeof(path), "%s/%s/%s", db, composite, fileName); /* Add database path */ fileName = val = v->val = cloneString(path); } } if (fileName != NULL) { if (hashLookup(miHash, fileName)) { struct metaNode *expNode = hashFindVal(expHash, dccAccession); if (expNode != NULL) { struct metaNode *fileNode = metaNodeNew(mdb->obj); slAddHead(&expNode->children, fileNode); fileNode->parent = expNode; struct mdbVar *v; for (v=mdb->vars; v != NULL; v = v->next) { metaNodeAddVar(fileNode, v->var, v->val); } } } } } #ifdef SOON #endif /* SOON */ } struct hash *suppress = makeSuppress(); struct hash *closeEnoughTags = makeCloseEnoughTags(); metaTreeHoist(metaTree, closeEnoughTags); metaTreeSortChildrenSortTags(metaTree); FILE *f = mustOpen(outMetaRa, "w"); struct metaNode *node; for (node = metaTree->children; node != NULL; node = node->next) metaTreeWrite(0, 0, BIGNUM, FALSE, NULL, node, suppress, f); carefulClose(&f); /* Write warning about tags in highest parent. */ struct mdbVar *v; for (v = metaTree->vars; v != NULL; v = v->next) verbose(1, "Omitting universal %s %s\n", v->var, v->val); }
boolean myFaReadMixedNext(FILE *f, boolean preserveCase, char *defaultName, boolean mustStartWithComment, char **retCommentLine, struct dnaSeq **retSeq) /* Read next sequence from .fa file. Return sequence in retSeq. * If retCommentLine is non-null return the '>' line in retCommentLine. * The whole thing returns FALSE at end of file. * Contains parameter to preserve mixed case. */ { char lineBuf[1024]; int lineSize; char *words[1]; int c; off_t offset = ftello(f); size_t dnaSize = 0; DNA *dna, *sequence, b; int bogusChars = 0; char *name = defaultName; if (name == NULL) name = ""; dnaUtilOpen(); if (retCommentLine != NULL) *retCommentLine = NULL; *retSeq = NULL; /* Skip first lines until it starts with '>' */ for (;;) { if(fgets(lineBuf, sizeof(lineBuf), f) == NULL) { if (ferror(f)) errnoAbort("read of fasta file failed"); return FALSE; } lineSize = strlen(lineBuf); if (lineBuf[0] == '>') { if (retCommentLine != NULL) *retCommentLine = cloneString(lineBuf); offset = ftello(f); chopByWhite(lineBuf, words, ArraySize(words)); name = words[0]+1; break; } else if (!mustStartWithComment) { if (fseeko(f, offset, SEEK_SET) < 0) errnoAbort("fseek on fasta file failed"); break; } else offset += lineSize; } /* Count up DNA. */ for (;;) { c = fgetc(f); if (c == EOF || c == '>' || c == '=') break; if (!isspace(c) && !isdigit(c)) { ++dnaSize; } } /* Allocate DNA and fill it up from file. */ dna = sequence = needHugeMem(dnaSize+1); if (fseeko(f, offset, SEEK_SET) < 0) errnoAbort("fseek on fasta file failed"); for (;;) { c = fgetc(f); if (c == EOF || c == '>' || c == '=') break; if (!isspace(c) && !isdigit(c)) { // check for non-DNA char if (ntChars[c] == 0) { *dna++ = preserveCase ? 'N' : 'n'; } else { *dna++ = preserveCase ? c : ntChars[c]; } } } if (c == '>' || c == '=') ungetc(c, f); *dna = 0; *retSeq = newDnaSeq(sequence, dnaSize, name); if (ferror(f)) errnoAbort("read of fasta file failed"); return TRUE; }
void addBacEndInfo(char *spFile) /* Add BAC end info from Shiaw-Pyng's file to clones in cloneHash. */ { struct lineFile *lf = lineFileOpen(spFile, TRUE); char *line; int lineSize, wordCount; int spCount = 0; char *words[16]; while (lineFileNext(lf, &line, &lineSize)) { char *s, *e, c; struct clone *clone; struct endInfo *end; char *firstWord; char *contig; if (line[0] == '#') continue; wordCount = chopLine(line, words); if (wordCount == 0) continue; firstWord = words[0]; s = strchr(firstWord, '.'); if (s == NULL) errAbort("Expecting dot line %d of %s\n", lf->lineIx, lf->fileName); *s++ = 0; if ((clone = hashFindVal(cloneHash, firstWord)) == NULL) { warn("%s in %s but not .finf files", firstWord, spFile); continue; } if (!startsWith("Contig", s)) errAbort("Expecting .Contig line %d of %s\n", lf->lineIx, lf->fileName); s += 6; contig = s; if (wordCount == 1) { /* Older style - just one word. */ e = strrchr(contig, '.'); if (e == NULL) errAbort("Expecting last dot line %d of %s\n", lf->lineIx, lf->fileName); *e++ = 0; AllocVar(end); subChar(s, '.', '_'); end->contig = cloneString(contig); end->text = cloneString(e); c = lastChar(end->text); if (!(c == 'L' || c == 'R')) c = '?'; end->lr = c; slAddHead(&clone->spList, end); ++spCount; } else if (wordCount == 15) { /* Newer style - 15 words. */ if (!sameWord(words[11], "total_repeats")) { AllocVar(end); end->contig = cloneString(contig); end->text = cloneString(words[2]); c = words[3][0]; if (!(c == 'L' || c == 'R')) c = '?'; end->lr = c; slAddHead(&clone->spList, end); ++spCount; } } else { lineFileExpectWords(lf, 15, wordCount); } } lineFileClose(&lf); printf("Info on %d ends in %s\n", spCount, spFile); }
struct bbiChromUsage *bbiChromUsageFromBedFile(struct lineFile *lf, struct hash *chromSizesHash, struct bbExIndexMaker *eim, int *retMinDiff, double *retAveSize, bits64 *retBedCount, boolean tabSep) /* Go through bed file and collect chromosomes and statistics. If eim parameter is non-NULL * collect max field sizes there too. */ { int maxRowSize = (eim == NULL ? 3 : bbExIndexMakerMaxIndexField(eim) + 1); char *row[maxRowSize]; struct bbiChromUsage *usage = NULL, *usageList = NULL; int lastStart = -1; bits32 id = 0; bits64 totalBases = 0, bedCount = 0; int minDiff = BIGNUM; lineFileRemoveInitialCustomTrackLines(lf); for (;;) { int rowSize = 0; if (tabSep) rowSize = lineFileChopCharNext(lf, '\t', row, maxRowSize); else rowSize = lineFileChopNext(lf, row, maxRowSize); if (rowSize == 0) break; lineFileExpectAtLeast(lf, maxRowSize, rowSize); char *chrom = row[0]; int start = lineFileNeedNum(lf, row, 1); int end = lineFileNeedNum(lf, row, 2); if (eim != NULL) bbExIndexMakerUpdateMaxFieldSize(eim, row); if (start > end) { errAbort("end (%d) before start (%d) line %d of %s", end, start, lf->lineIx, lf->fileName); } ++bedCount; totalBases += (end - start); if (usage == NULL || differentString(usage->name, chrom)) { /* make sure chrom names are sorted in ASCII order */ if ((usage != NULL) && strcmp(usage->name, chrom) > 0) { errAbort("%s is not case-sensitive sorted at line %d. Please use \"sort -k1,1 -k2,2n\" with LC_COLLATE=C, or bedSort and try again.", lf->fileName, lf->lineIx); } struct hashEl *chromHashEl = hashLookup(chromSizesHash, chrom); if (chromHashEl == NULL) errAbort("%s is not found in chromosome sizes file", chrom); int chromSize = ptToInt(chromHashEl->val); AllocVar(usage); usage->name = cloneString(chrom); usage->id = id++; usage->size = chromSize; slAddHead(&usageList, usage); lastStart = -1; } if (end > usage->size) errAbort("End coordinate %d bigger than %s size of %d line %d of %s", end, usage->name, usage->size, lf->lineIx, lf->fileName); usage->itemCount += 1; if (lastStart >= 0) { int diff = start - lastStart; if (diff < minDiff) { if (diff < 0) errAbort("%s is not sorted at line %d. Please use \"sort -k1,1 -k2,2n\" or bedSort and try again.", lf->fileName, lf->lineIx); minDiff = diff; } } lastStart = start; } slReverse(&usageList); double aveSize = 0; if (bedCount > 0) aveSize = (double)totalBases/bedCount; *retMinDiff = minDiff; *retAveSize = aveSize; *retBedCount = bedCount; return usageList; }
void readFinfFiles(char *gsDir) /* Read in .finf files and save info in cloneHash/cloneList. */ { struct lineFile *lf; struct clone *clone = NULL; struct endInfo *end; char fileName[512]; int i; char *words[7]; char lastClone[64]; char cloneName[64]; int gsInfoCount = 0; struct frag *frag; boolean isFin; char *s, *e; strcpy(lastClone, ""); for (i=0; i<ArraySize(gsFiles); ++i) { isFin = (i <= 0); sprintf(fileName, "%s/%s", gsDir, gsFiles[i]); printf("Reading info from %s\n", fileName); lf = lineFileOpen(fileName, TRUE); while (lineFileRow(lf, words)) { if (!sameString(words[1], lastClone)) { struct clone *oldClone; strcpy(lastClone, words[1]); strcpy(cloneName, words[1]); AllocVar(clone); s = strchr(cloneName, '.'); if (s == NULL) errAbort("Bad clone name format line %d of %s\n", lf->lineIx, lf->fileName); if (strlen(s) >= sizeof(clone->version)) errAbort("Bad clone name format line %d of %s\n", lf->lineIx, lf->fileName); strcpy(clone->version, s); chopSuffix(cloneName); clone->size = atoi(words[3]); if ((oldClone = hashFindVal(cloneHash, cloneName)) != NULL) { if (isFin && clone->size == oldClone->size && sameString(clone->version, oldClone->version)) warn("Apparently benign duplication of %s line %d of %s", cloneName, lf->lineIx, lf->fileName); else warn("%s duplicated line %d of %s (size %d oldSize %d)", cloneName, lf->lineIx, lf->fileName, clone->size, oldClone->size); } hashAddSaveName(cloneHash, cloneName, clone, &clone->name); clone->isFin = isFin; slAddHead(&cloneList, clone); } frag = newFrag(words[0], lf); slAddTail(&clone->fragList, frag); ++clone->fragCount; if (!clone->isFin && !sameString(words[6], "?") && !sameString(words[6], "i") && !sameString(words[6], "w")) { char *s = strchr(words[0], '~'); char c; if (s == NULL) errAbort("Expecting ~ in fragment name line %d of %s\n", lf->lineIx, lf->fileName); ++s; AllocVar(end); end->contig = cloneString(s); subChar(s, '.', '_'); end->text = cloneString(words[6]); c = lastChar(end->text); if (!(c == 'L' || c == 'R')) c = '?'; end->lr = c; slAddHead(&clone->gsList, end); ++gsInfoCount; } } lineFileClose(&lf); } printf("Found %d ends in %d clones\n", gsInfoCount, slCount(cloneList)); }
void readOneOut(char *rmskFile) /* Read .out file rmskFile, check each line, and print OK lines to .tab. */ { struct lineFile *lf; char *line, *words[24]; int lineSize, wordCount; /* Open .out file and process header. */ lf = lineFileOpen(rmskFile, TRUE); if (!lineFileNext(lf, &line, &lineSize)) errAbort("Empty %s", lf->fileName); if (!startsWith(" SW perc perc", line)) { if (!startsWith(" SW perc perc", line)) errAbort("%s doesn't seem to be a RepeatMasker .out file, first " "line seen:\n%s", lf->fileName, line); } lineFileNext(lf, &line, &lineSize); lineFileNext(lf, &line, &lineSize); /* Process line oriented records of .out file. */ while (lineFileNext(lf, &line, &lineSize)) { static struct rmskOut r; char *s; wordCount = chopLine(line, words); if (wordCount < 14) errAbort("Expecting 14 or 15 words line %d of %s", lf->lineIx, lf->fileName); r.swScore = atoi(words[0]); r.milliDiv = makeMilli(words[1], lf); r.milliDel = makeMilli(words[2], lf); r.milliIns = makeMilli(words[3], lf); r.genoName = words[4]; r.genoStart = atoi(words[5])-1; r.genoEnd = atoi(words[6]); r.genoLeft = parenSignInt(words[7], lf); r.strand[0] = (words[8][0] == '+' ? '+' : '-'); r.repName = words[9]; r.repClass = words[10]; char *repClassTest = cloneString(r.repClass); stripChar(repClassTest, '('); stripChar(repClassTest, ')'); int nonDigitCount = countLeadingNondigits(repClassTest); int wordOffset = 0; // this repClass is only digits, (or only (digits) with surrounding parens) // this is the sign of an empty field here // due to custom library in use that has no class/family indication if (0 == nonDigitCount) { wordOffset = 1; r.repClass = cloneString("Unspecified"); r.repFamily = cloneString("Unspecified"); } else { s = strchr(r.repClass, '/'); if (s == NULL) r.repFamily = r.repClass; else { *s++ = 0; r.repFamily = s; } } r.repStart = parenSignInt(words[11-wordOffset], lf); r.repEnd = atoi(words[12-wordOffset]); r.repLeft = parenSignInt(words[13-wordOffset], lf); r.id[0] = ((wordCount > (14-wordOffset)) ? words[14-wordOffset][0] : ' '); if (checkRepeat(&r, lf)) { FILE *f = getFileForChrom(r.genoName); if (!noBin) fprintf(f, "%u\t", hFindBin(r.genoStart, r.genoEnd)); rmskOutTabOut(&r, f); } } }
void endHandler(struct xap *xap, char *name) /* Called at end of a tag */ { struct table *table = xap->stack->object; struct table *parentTable = xap->stack[1].object; struct field *field; struct fieldRef *fieldRef; struct assocRef *assocRef; char *text = skipLeadingSpaces(xap->stack->text->string); char *primaryKeyVal = NULL; struct assoc *assoc; static struct dyString *uniq = NULL; if (table->promoted) /* Simple case - copy text to parent table. */ { for (fieldRef = table->parentKeys; fieldRef != NULL; fieldRef = fieldRef->next) { field = fieldRef->field; if (field->table == parentTable) { struct dyString **parentContent = contentStack + table->fieldCount; struct dyString *dy = parentContent[field->tablePos]; if (!field->isString && text[0] == 0) text = "0"; dyStringAppend(dy, text); break; } } } else { if (text[0] != 0) { field = hashFindVal(table->fieldHash, textField); if (field == NULL) errAbort("No text for %s expected in dtd", table->name); dyStringAppendEscapedForTabFile(contentStack[field->tablePos], text); } /* Construct uniq string from fields, etc. */ if (uniq == NULL) uniq = dyStringNew(0); else dyStringClear(uniq); for (field = table->fieldList; field != NULL; field = field->next) { if (!(field->isPrimaryKey && field->isMadeUpKey)) { struct dyString *dy = contentStack[field->tablePos]; if (dy->stringSize == 0 && !field->isString) dyStringAppendC(dy, '0'); dyStringAppendN(uniq, dy->string, dy->stringSize); dyStringAppendC(uniq, '\t'); } } for (assoc = table->assocList; assoc != NULL; assoc = assoc->next) { dyStringPrintf(uniq, "%p\t%s\t", assoc->f, assoc->childKey); } primaryKeyVal = hashFindVal(table->uniqHash, uniq->string); if (primaryKeyVal == NULL) { struct dyString *priDy = contentStack[table->primaryKey->tablePos]; if (table->madeUpPrimary) { table->lastId += 1; dyStringPrintf(priDy, "%d", table->lastId); } primaryKeyVal = priDy->string; for (field = table->fieldList; field != NULL; field = field->next) { struct dyString *dy = contentStack[field->tablePos]; fprintf(table->tabFile, "%s", dy->string); if (field->next != NULL) fprintf(table->tabFile, "\t"); } fprintf(table->tabFile, "\n"); hashAdd(table->uniqHash, uniq->string, cloneString(primaryKeyVal)); } for (fieldRef = table->parentKeys; fieldRef != NULL; fieldRef = fieldRef->next) { field = fieldRef->field; if (field->table == parentTable) { struct dyString **parentContent = contentStack + table->fieldCount; struct dyString *dy = parentContent[field->tablePos]; dyStringAppend(dy, primaryKeyVal); break; } } for (assocRef = table->parentAssocs; assocRef != NULL; assocRef = assocRef->next) { if (assocRef->parent == parentTable) { assoc = assocNew(assocRef->assoc->tabFile, primaryKeyVal); slAddHead(&parentTable->assocList, assoc); } } slReverse(&table->assocList); for (assoc = table->assocList; assoc != NULL; assoc = assoc->next) fprintf(assoc->f, "%s\t%s\n", primaryKeyVal, assoc->childKey); assocFreeList(&table->assocList); } contentStack += table->fieldCount; }
char *getKnownGeneUrl(struct sqlConnection *conn, int geneId) /* Given gene ID, try and find known gene on browser in same * species. */ { char query[256]; char tableName[256]; int taxon; char *url = NULL; char *genomeDb = NULL; /* Figure out taxon. */ sqlSafef(query, sizeof(query), "select taxon from gene where id = %d", geneId); taxon = sqlQuickNum(conn, query); genomeDb = hDbForTaxon(conn, taxon); if (genomeDb != NULL) { /* Make sure known genes track exists - we may need * to tweak this at some point for model organisms. */ safef(tableName, sizeof(tableName), "%s.knownToVisiGene", genomeDb); if (!sqlTableExists(conn, tableName)) genomeDb = NULL; } /* If no db for that organism revert to human. */ if (genomeDb == NULL) genomeDb = hDefaultDb(); safef(tableName, sizeof(tableName), "%s.knownToVisiGene", genomeDb); if (sqlTableExists(conn, tableName)) { struct dyString *dy = dyStringNew(0); char *knownGene = NULL; if (sqlCountColumnsInTable(conn, tableName) == 3) { sqlDyStringPrintf(dy, "select name from %s.knownToVisiGene where geneId = %d", genomeDb, geneId); } else { struct slName *imageList, *image; sqlSafef(query, sizeof(query), "select imageProbe.image from probe,imageProbe " "where probe.gene=%d and imageProbe.probe=probe.id", geneId); imageList = sqlQuickList(conn, query); if (imageList != NULL) { sqlDyStringPrintf(dy, "select name from %s.knownToVisiGene ", genomeDb); dyStringAppend(dy, "where value in("); for (image = imageList; image != NULL; image = image->next) { sqlDyStringPrintf(dy, "'%s'", image->name); if (image->next != NULL) dyStringAppendC(dy, ','); } dyStringAppend(dy, ")"); slFreeList(&imageList); } } if (dy->stringSize > 0) { knownGene = sqlQuickString(conn, dy->string); if (knownGene != NULL) { char temp[1024]; safef(temp, sizeof temp, "../cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_chrom=none", genomeDb, knownGene); url = cloneString(temp); } } dyStringFree(&dy); } freez(&genomeDb); return url; }
void refreshNamedSessionCustomTracks(char *centralDbName) /* refreshNamedSessionCustomTracks -- cron robot for keeping alive custom * tracks that are referenced by saved sessions. */ { struct sqlConnection *conn = hConnectCentral(); struct slPair *updateList = NULL, *update; char *actualDbName = sqlGetDatabase(conn); int liveCount=0, expiredCount=0; setUdcCacheDir(); /* programs that use udc must call this to initialize cache dir location */ if (!sameString(centralDbName, actualDbName)) errAbort("Central database specified in hg.conf file is %s but %s " "was specified on the command line.", actualDbName, centralDbName); else verbose(2, "Got connection to %s\n", centralDbName); long long threshold = 0; int atime = optionInt("atime", 0); if (atime > 0) { time_t now = time(NULL); threshold = now - ((long long)atime * 24 * 60 * 60); } if (sqlTableExists(conn, savedSessionTable)) { struct sessionInfo *sessionList = NULL, *si; struct sqlResult *sr = NULL; char **row = NULL; char query[512]; safef(query, sizeof(query), "select userName,sessionName,UNIX_TIMESTAMP(lastUse),contents from %s " "order by userName,sessionName", savedSessionTable); sr = sqlGetResult(conn, query); // Slurp results into memory instead of processing row by row, // reducing the chance of lost connection. while ((row = sqlNextRow(sr)) != NULL) { if (atime > 0) { long long lastUse = atoll(row[2]); if (lastUse < threshold) { verbose(2, "User %s session %s is older than %d days, skipping.\n", row[0], row[1], atime); continue; } } AllocVar(si); safecpy(si->userName, sizeof(si->userName), row[0]); safecpy(si->sessionName, sizeof(si->sessionName), row[1]); si->contents = cloneString(row[3]); slAddHead(&sessionList, si); } sqlFreeResult(&sr); for (si = sessionList; si != NULL; si = si->next) { char *updateIfAny = scanSettingsForCT(si->userName, si->sessionName, si->contents, &liveCount, &expiredCount); if (updateIfAny) { AllocVar(update); update->name = updateIfAny; slAddHead(&updateList, update); } } } /* Now that we're done reading from savedSessionTable, we can modify it: */ if (optionExists("hardcore")) { for (update = updateList; update != NULL; update = update->next) sqlUpdate(conn, update->name); } hDisconnectCentral(&conn); verbose(1, "Found %d live and %d expired custom tracks in %s.\n", liveCount, expiredCount, centralDbName); }
char *visiGeneHypertextGenotype(struct sqlConnection *conn, int id) /* Return genotype of organism if any in nifty hypertext format. */ { int genotypeId; struct slName *geneIdList, *geneId; char query[256]; struct dyString *html; /* Look up genotype ID. */ sqlSafef(query, sizeof(query), "select specimen.genotype from image,specimen " "where image.id=%d and image.specimen = specimen.id", id); genotypeId = sqlQuickNum(conn, query); if (genotypeId == 0) return NULL; /* Get list of genes involved. */ sqlSafef(query, sizeof(query), "select distinct allele.gene from genotypeAllele,allele " "where genotypeAllele.genotype=%d " "and genotypeAllele.allele = allele.id" , genotypeId); geneIdList = sqlQuickList(conn, query); if (geneIdList == NULL) return cloneString("wild type"); /* Loop through each gene adding information to html. */ html = dyStringNew(0); for (geneId = geneIdList; geneId != NULL; geneId = geneId->next) { char *geneName; struct slName *alleleList, *allele; int alleleCount; boolean needsSlash = FALSE; /* Get gene name. */ sqlSafef(query, sizeof(query), "select name from gene where id='%s'", geneId->name); geneName = sqlQuickString(conn, query); if (geneName == NULL) internalErr(); /* Process each allele of gene. */ sqlSafef(query, sizeof(query), "select allele.name from genotypeAllele,allele " "where genotypeAllele.genotype=%d " "and genotypeAllele.allele = allele.id " "and allele.gene=%s" , genotypeId, geneId->name); alleleList = sqlQuickList(conn, query); alleleCount = slCount(alleleList); for (allele = alleleList; allele != NULL; allele = allele->next) { char *simplifiedAllele = getSimplifiedAllele(geneName, allele->name); int repCount = 1, rep; if (alleleCount == 1) repCount = 2; for (rep = 0; rep < repCount; ++rep) { if (needsSlash) dyStringAppendC(html, '/'); else needsSlash = TRUE; dyStringAppend(html, geneName); dyStringPrintf(html, "<SUP>%s</SUP>", simplifiedAllele); } freeMem(simplifiedAllele); } if (geneId->next != NULL) dyStringAppendC(html, ' '); slFreeList(&alleleList); freeMem(geneName); } slFreeList(&geneIdList); return dyStringCannibalize(&html); }
void liftOverMerge(char *oldFile, char *newFile) /* liftOverMerge - Merge regions in BED5 generated by liftOver -multiple */ { struct bed *bedList = NULL, *bed = NULL, *otherBed = NULL, *nextBed = NULL; struct bedList *bedListHeaders = NULL, *bedListHeader = NULL; FILE *f = mustOpen(newFile, "w"); bedList = bedLoadNAll(oldFile, 5); /* break down bed list into a list of lists, one per "region", where region * is the name field in the bed */ for (bed = bedList; bed != NULL; bed = nextBed) { verbose(3, "%s:%d-%d %s %d\n", bed->chrom, bed->chromStart, bed->chromEnd, bed->name, bed->score); if (bedListHeader == NULL || differentString(bed->name, bedListHeader->name)) { verbose(2, "region %s\n", bed->name); AllocVar(bedListHeader); bedListHeader->name = cloneString(bed->name); slAddHead(&bedListHeaders, bedListHeader); } nextBed = bed->next; slAddHead(&bedListHeader->bed, bed); } slReverse(&bedListHeaders); for (bedListHeader = bedListHeaders; bedListHeader != NULL; bedListHeader = bedListHeader->next) { int ix = 1; verbose(3, "region %s\n", bedListHeader->name); slReverse(&bedListHeader->bed); /* traverse list of bed lists, merging overlapping entries * for each region */ for (bed = bedListHeader->bed; bed != NULL; bed = bed->next) { for (otherBed = bed->next; otherBed != NULL; otherBed = nextBed) { nextBed = otherBed->next; if (sameString(bed->chrom, otherBed->chrom) && (max(bed->chromStart, otherBed->chromStart) <= min(bed->chromEnd, otherBed->chromEnd) + mergeGap)) { /* these regions overlap (or are within the merge gap), * so create one that is a merge, and drop the other */ verbose(2,"merging %s:%d-%d, %s:%d-%d (overlap=%d)", otherBed->chrom, otherBed->chromStart, otherBed->chromEnd, bed->chrom, bed->chromStart, bed->chromEnd, min(bed->chromEnd, otherBed->chromEnd) - max(bed->chromStart, otherBed->chromStart)); bed->chromStart = min(otherBed->chromStart, bed->chromStart); bed->chromEnd = max(otherBed->chromEnd, bed->chromEnd); verbose(2," to %s:%d-%d\n", bed->chrom, bed->chromStart, bed->chromEnd); slRemoveEl(&bedListHeader->bed, otherBed); } } } for (otherBed = bedListHeader->bed; otherBed != NULL; otherBed = otherBed->next) { otherBed->score = ix++; bedOutputN(otherBed, 5, f, '\t', '\n'); } } }
int main(int argc, char *argv[]) { struct sqlConnection *conn; FILE *inf; FILE *o1; char cond_str[256]; char *database; char *proteinFileName; char *outputFileName; char *answer; char *alias; char *id; char *chp0, *chp1, *chp2, *chp; char *kgID; char line[2000]; if (argc != 4) usage(); database = cloneString(argv[1]); proteinFileName = cloneString(argv[2]); outputFileName = cloneString(argv[3]); conn = hAllocConn(database); o1 = mustOpen(outputFileName, "w"); if ((inf = mustOpen(proteinFileName, "r")) == NULL) { fprintf(stderr, "Can't open file %s.\n", proteinFileName); exit(8); } while (fgets(line, 1000, inf) != NULL) { chp = strstr(line, "ID "); if (chp != line) { fprintf(stderr, "expected ID line, but got: %s\n", line); exit(1); } chp = chp + strlen("ID "); id = chp; chp = strstr(id, " "); *chp = '\0'; id = strdup(id); sqlSafefFrag(cond_str, sizeof cond_str, "proteinID = '%s'", id); answer = sqlGetField(database, "knownGene", "name", cond_str); kgID = NULL; if (answer != NULL) { kgID = strdup(answer); } if (fgets(line, 1000, inf) == NULL) { break; } do { /* "//" signal end of a record */ if ((line[0] == '/') && (line[1] == '/')) break; // work on GN (Gene Name) line only chp = strstr(line, "GN "); if (chp != NULL) { chp = line + strlen(line) -2; if (*chp == '.') { *chp = '\0'; } else { chp++; *chp = '\0'; } chp0 = line + 5; while (chp0 != NULL) { while (*chp0 == ' ') chp0++; chp1 = strstr(chp0, " OR "); chp2 = strstr(chp0, " AND "); chp = NULL; if (chp1 != NULL) { if (chp2 != NULL) { if (chp1 < chp2) { chp = chp1; } else { chp = chp2; } } else { chp = chp1; } } if (chp2!= NULL) { if (chp1 != NULL) { if (chp1 < chp2) { chp = chp1; } else { chp = chp2; } } else { chp = chp2; } } if (chp == NULL) { alias = strdup(chp0); chp0 = NULL; } else { *chp = '\0'; alias = strdup(chp0); chp0 = chp+4; } if (kgID != NULL) { // clean up "(XXXX" or "XXXX)" if (*alias == '(') alias++; chp = strstr(alias, ")"); if (chp != NULL) *chp = '\0'; fprintf(o1, "%s\t%s\n", kgID, alias); } } } } while (fgets(line, 1000, inf) != NULL); } fclose(o1); hFreeConn(&conn); return(0); }