static void simpleFillInSequence(char *seqDir, struct agpFrag *agpList, DNA *dna, int dnaSize) /* Fill in DNA array with sequences from simple clones. */ { struct agpFrag *agp; char underline = '_'; for (agp = agpList; agp != NULL; agp = agp->next) { char clone[128]; char path[512]; struct dnaSeq *seq; int size; strcpy(clone, agp->frag); chopSuffixAt(clone,underline); sprintf(path, "%s/%s.fa", seqDir, clone); seq = faReadAllDna(path); if (slCount(seq) != 1) errAbort("Can only handle exactly one clone in %s.", path); size = agp->fragEnd - agp->fragStart; if (agp->strand[0] == '-') reverseComplement(seq->dna + agp->fragStart, size); memcpy(dna + agp->chromStart, seq->dna + agp->fragStart, size); freeDnaSeq(&seq); } }
void writeSizeListToBedFile(FILE *File, struct sizeList *sList) { struct sizeList *sl=NULL; char *name; for ( sl = sList; sl != NULL; sl = sl->next) { if (endsWith(sl->name,"+") || endsWith(sl->name,"-")) chopSuffixAt(sl->name, '_'); fprintf(File, "%s\t%d\t%d\t%s\n", sl->chrom, sl->chromStart, sl->chromEnd, sl->name); } }
static void saveProtFastaPath(struct brokenRefPepTbl* brpTbl, struct brokenRefPep* brp, char *mrnaAcc, char *mrnaFa) /* save protein fasta file path; mangles mrnaFa string */ { char protFa[PATH_LEN]; safef(brp->mrnaAcc, sizeof(brp->mrnaAcc), "%s", mrnaAcc); chopSuffixAt(mrnaFa, '/'); safef(protFa, sizeof(protFa), "%s/pep.fa", mrnaFa); // ignore result from hashStore() (void) hashStore(brpTbl->protFaHash, protFa); safef(brp->newFaPath, sizeof(brp->newFaPath), "%s", protFa); brpTbl->numToRepair++; }
static boolean drawGtexRBoxplot(char *geneName, struct tissueSampleVals *tsvList, boolean doLogTransform, char *version, struct tempName *pngTn) /* Draw a box-and-whiskers plot from GTEx sample data, using R boxplot */ { /* Create R data frame. This is a tab-sep file, one row per sample, * with columns for sample, tissue, rpkm */ struct tempName dfTn; trashDirFile(&dfTn, "hgc", "gtexGene", ".df.txt"); FILE *f = fopen(dfTn.forCgi, "w"); if (f == NULL) errAbort("can't create temp file %s", dfTn.forCgi); fprintf(f, "sample\ttissue\trpkm\n"); struct tissueSampleVals *tsv; int sampleId=1; int i; for (tsv = tsvList; tsv != NULL; tsv = tsv->next) { int count = tsv->count; // remove trailing parenthesized phrases as not worth label length chopSuffixAt(tsv->description, '('); for (i=0; i<count; i++) fprintf(f, "%d\t%s\t%0.3f\n", sampleId++, tsv->description, tsv->vals[i]); } fclose(f); // Plot to PNG file if (!pngTn) return FALSE; trashDirFile(pngTn, "hgc", "gtexGene", ".png"); char cmd[256]; /* Exec R in quiet mode, without reading/saving environment or workspace */ safef(cmd, sizeof(cmd), "Rscript --vanilla --slave hgcData/gtexBoxplot.R %s %s %s %s %s %s", geneName, dfTn.forCgi, pngTn->forHtml, doLogTransform ? "log=TRUE" : "log=FALSE", "order=alpha", version); //NOTE: use "order=score" to order bargraph by median RPKM, descending int ret = system(cmd); if (ret == 0) return TRUE; return FALSE; }
struct ctgInfo *ctgInfoLoadOne(struct lineFile *lf) /* Load one line of ctgInfo from a whitespaces-separated file. Dispose of this with ctgInfo(). */ { struct ctgInfo *el = NULL; char *row[5]; char *acc = NULL; if (lineFileRow(lf, row)) { AllocVar(el); el->ctgName = cloneString(row[0]); /* remove suffix to get accession */ acc = cloneString(row[0]); chopSuffixAt(acc, '_'); el->acc = acc; el->start = sqlUnsigned(row[3]); el->end = sqlUnsigned(row[4]); } return el; }
static void agpSangerUnfinished(char *agpFile, char *contigFasta, char *agpOut) /* Fix agp to match unfinished contigs in fasta */ { struct lineFile *lf = lineFileOpen(agpFile, TRUE); char *line, *words[16]; int lineSize, wordCount; unsigned lastPos = 0; struct agpFrag *agp; struct agpGap *gap; FILE *f; char *lastObj = NULL; f = mustOpen(agpOut, "w"); char *newChrom = NULL; struct hash *hash = hashFasta(contigFasta); verbose(2,"#\tprocessing AGP file: %s\n", agpFile); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == 0 || line[0] == '#' || line[0] == '\n') continue; //verbose(2,"#\tline: %d\n", lf->lineIx); wordCount = chopLine(line, words); if (wordCount < 5) errAbort("Bad line %d of %s: need at least 5 words, got %d\n", lf->lineIx, lf->fileName, wordCount); if (!lastObj || !sameString(words[0],lastObj)) { freez(&newChrom); newChrom = cloneString(words[0]); lastPos = 0; } if (words[4][0] != 'N') { lineFileExpectAtLeast(lf, 9, wordCount); agp = agpFragLoad(words); /* agp is 1-based but agp loaders do not adjust for 0-based: */ agp->chromStart -= 1; agp->fragStart -= 1; if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart) errAbort("Sizes don't match in %s and %s line %d of %s\n", agp->chrom, agp->frag, lf->lineIx, lf->fileName); char *root = cloneString(agp->frag); chopSuffixAt(root, '.'); struct hashEl *e, *elist = hashLookup(hash, root); for (e = elist; e; e = hashLookupNext(e)) { struct unfinishedContig *u = e->val; if ((u->fragStart <= agp->fragStart) && (u->fragEnd >= agp->fragEnd)) { agp->frag = cloneString(u->frag); agp->fragEnd -= u->fragStart; agp->fragStart -= u->fragStart; } } freeMem(root); } else { lineFileExpectAtLeast(lf, 8, wordCount); gap = agpGapLoad(words); /* to be consistent with agpFrag */ gap->chromStart -= 1; agp = (struct agpFrag*)gap; } if (agp->chromStart != lastPos) errAbort("Start doesn't match previous end line %d of %s\n" "agp->chromStart: %u\n" "agp->chromEnd: %u\n" "lastPos: %u\n" ,lf->lineIx, lf->fileName ,agp->chromStart ,agp->chromEnd ,lastPos ); lastPos = agp->chromEnd; freez(&lastObj); lastObj = cloneString(words[0]); /* not agp->chrom which may be modified already */ if (words[4][0] != 'N') { /* agpFragOutput assumes 0-based-half-open, but writes 1-based for agp */ agpFragOutput(agp, f, '\t', '\n'); agpFragFree(&agp); } else { /* restore back to 1-based for agp * because agpGapOutput doesn't compensate */ gap->chromStart += 1; agpGapOutput(gap, f, '\t', '\n'); agpGapFree(&gap); } } carefulClose(&f); }
void configMultiRegionPage() /* Do multi-region config page after setting track visibility. If vis is -2, then visibility * is unchanged. If -1 then set visibility to default, otherwise it should * be tvHide, tvDense, etc. */ { char *groupTarget; struct track *trackList; struct track *ideoTrack; struct group *groupList; int vis = -2; configInitTrackList(vis, &groupTarget, &trackList, &ideoTrack, &groupList); hPrintf("<FORM ACTION=\"%s\" NAME=\"mainForm\" METHOD=%s>\n", hgTracksName(), cartUsualString(cart, "formMethod", "POST")); webStartWrapperDetailedNoArgs(cart, database, "", "", FALSE, FALSE, FALSE, FALSE); cartSaveSession(cart); hPrintf("<BR>\n"); hTableStart(); virtModeType = cartUsualString(cart, "virtModeType", virtModeType); hPrintf("<TR><TD>"); cgiMakeRadioButton("virtModeType", "default", sameWord("default", virtModeType)); hPrintf("</TD><TD>"); hPrintf("Exit multi-region mode"); hPrintf("</TD></TR>\n"); struct sqlConnection *conn = NULL; if (!trackHubDatabase(database)) // no db conn for assembly hubs conn = hAllocConn(database); // Do we have a gene table for exonMostly? findBestEMGeneTable(trackList); if (emGeneTable) { hPrintf("<TR><TD>"); cgiMakeRadioButton("virtModeType", "exonMostly", sameWord("exonMostly", virtModeType)); hPrintf("</TD><TD>"); hPrintf("Show exons using %s. Use padding of: ", emGeneTrack->shortLabel); hIntVar("emPadding", cartUsualInt(cart, "emPadding", emPadding), 3); hPrintf(" bases."); hPrintf("</TD></TR>\n"); } if (emGeneTable) { hPrintf("<TR><TD>"); cgiMakeRadioButton("virtModeType", "geneMostly", sameWord("geneMostly", virtModeType)); hPrintf("</TD><TD>"); hPrintf("Show genes using %s. Use padding of: ", emGeneTrack->shortLabel); hIntVar("gmPadding", cartUsualInt(cart, "gmPadding", gmPadding), 3); hPrintf(" bases."); hPrintf("</TD></TR>\n"); } /* obsolete if (conn && sqlTableExists(conn,"knownCanonical")) { hPrintf("<TR><TD>"); cgiMakeRadioButton("virtModeType", "kcGenes", sameWord("kcGenes", virtModeType)); hPrintf("</TD><TD>"); hPrintf("Show gene regions genome-wide."); hPrintf("</TD></TR>\n"); } */ hPrintf("<TR><TD>"); cgiMakeRadioButton("virtModeType", "customUrl", sameWord("customUrl", virtModeType)); hPrintf("</TD><TD>"); hPrintf("Enter Custom regions as BED, or a URL to them:<br>"); multiRegionsBedUrl = cartUsualString(cart, "multiRegionsBedUrl", multiRegionsBedUrl); struct dyString *dyMultiRegionsBedInput = dyStringNew(256); if (strstr(multiRegionsBedUrl,"://")) { dyStringAppend(dyMultiRegionsBedInput, multiRegionsBedUrl); } else { if (fileExists(multiRegionsBedUrl)) { struct lineFile *lf = lineFileMayOpen(multiRegionsBedUrl, TRUE); char *line; int lineSize; while (lineFileNext(lf, &line, &lineSize)) { dyStringPrintf(dyMultiRegionsBedInput, "%s\n", line); } lineFileClose(&lf); } } hPrintf("<TEXTAREA NAME='multiRegionsBedInput' ID='multiRegionsBedInput' rows='4' cols='58' style='white-space: pre;'>%s</TEXTAREA>", dyMultiRegionsBedInput->string); hPrintf("</TD></TR>\n"); /* The AllChroms option will be released in future if (emGeneTable && sqlTableExists(conn, emGeneTable)) { hPrintf("<TR><TD>"); cgiMakeRadioButton("virtModeType", "singleTrans", sameWord("singleTrans", virtModeType)); hPrintf("</TD><TD>"); hPrintf("Show only one transcript using an ID from %s : ", emGeneTrack->shortLabel); char *trans = cartUsualString(cart, "singleTransId", singleTransId); char sql[1024]; sqlSafef(sql, sizeof sql, "select name from %s where name='%s'", emGeneTable, trans); char *result = sqlQuickString(conn, sql); if (!result) { sqlSafef(sql, sizeof sql, "select name from %s limit 1", emGeneTable); trans = sqlQuickString(conn, sql); } hTextVar("singleTransId", trans, 20); hPrintf("</TD></TR>\n"); } */ if (conn) { boolean altLocExists = sqlTableExists(conn, "altLocations"); boolean fixLocExists = sqlTableExists(conn, "fixLocations"); if (altLocExists || fixLocExists) { hPrintf("<TR><TD>"); cgiMakeRadioButton("virtModeType", "singleAltHaplo", sameWord("singleAltHaplo", virtModeType)); hPrintf("</TD><TD>"); hPrintf("Show one alternate haplotype"); if (fixLocExists) hPrintf(" or fix patch"); hPrintf(", placed on its chromosome, using ID: "); char *haplo = cartUsualString(cart, "singleAltHaploId", singleAltHaploId); char *foundHaplo = NULL; char sql[1024]; if (altLocExists) { sqlSafef(sql, sizeof sql, "select name from altLocations where name rlike '^%s(:[0-9-]+)?'", haplo); foundHaplo = sqlQuickString(conn, sql); } if (!foundHaplo && fixLocExists) { sqlSafef(sql, sizeof sql, "select name from fixLocations where name rlike '^%s(:[0-9-]+)?'", haplo); foundHaplo = sqlQuickString(conn, sql); } if (!foundHaplo) { if (altLocExists) sqlSafef(sql, sizeof sql, "select name from altLocations limit 1"); else sqlSafef(sql, sizeof sql, "select name from fixLocations limit 1"); haplo = sqlQuickString(conn, sql); chopSuffixAt(haplo, ':'); } hTextVar("singleAltHaploId", haplo, 60); hPrintf("</TD></TR>\n"); } } /* disable demo for now if (sameString(database,"hg19") || sameString(database, "hg38")) { hPrintf("<TR><TD>"); cgiMakeRadioButton("virtModeType", "demo1", sameWord("demo1", virtModeType)); hPrintf("</TD><TD>"); hPrintf("demo1 two windows on two chroms (default pos on chr21, and same loc on chr22)"); hPrintf("</TD></TR>\n"); } */ /* Disabled for now hPrintf("<TR><TD>"); cgiMakeRadioButton("virtModeType", "demo2", sameWord("demo2", virtModeType)); hPrintf("</TD><TD>"); hPrintf("demo2 multiple "); hIntVar("demo2NumWindows", cartUsualInt(cart, "demo2NumWindows", demo2NumWindows), 3); hPrintf(" windows on one chrom chr21 def posn, window size "); hIntVar("demo2WindowSize", cartUsualInt(cart, "demo2WindowSize", demo2WindowSize), 3); hPrintf(" and step size "); hIntVar("demo2StepSize", cartUsualInt(cart, "demo2StepSize", demo2StepSize), 3); hPrintf(" exon-like"); hPrintf("</TD></TR>\n"); */ /* The AllChroms option will be released in future if (conn) // requires chromInfo from database. { // TODO allow it to use assembly hubs via trackHubAllChromInfo() ? hPrintf("<TR><TD>"); cgiMakeRadioButton("virtModeType", "allChroms", sameWord("allChroms", virtModeType)); hPrintf("</TD><TD>"); hPrintf("<br>Show all chromosomes.<br><span style='color:red'>Warning:</span> Turn off all tracks except bigBed, bigWig, and very sparse tracks.<br>Press Hide All to hide all tracks."); hPrintf("</TD></TR>\n"); } */ /* Disabled for now hPrintf("<TR><TD>"); cgiMakeRadioButton("virtModeType", "demo4", sameWord("demo4", virtModeType)); hPrintf("</TD><TD>"); hPrintf("demo4 multiple (311) windows showing exons from TITIN gene uc031rqd.1."); hPrintf("</TD></TR>\n"); */ /* Disabled for now hPrintf("<TR><TD>"); cgiMakeRadioButton("virtModeType", "demo5", sameWord("demo5", virtModeType)); hPrintf("</TD><TD>"); hPrintf("demo5 alt locus on hg38. Shows alt chrom surrounded by regions of same size from reference genome."); hPrintf("</TD></TR>\n"); */ /* Disabled for now hPrintf("<TR><TD>"); cgiMakeRadioButton("virtModeType", "demo6", sameWord("demo6", virtModeType)); hPrintf("</TD><TD>"); hPrintf("demo6 shows zoomed in exon-exon junction from SOD1 gene, between exon1 and exon2."); hPrintf("</TD></TR>\n"); */ hTableEnd(); hPrintf("<BR>\n"); hPrintf("<TABLE style=\"border:0px; \">\n"); hPrintf("<TR><TD>"); hCheckBox("emAltHighlight", cartUsualBoolean(cart, "emAltHighlight", FALSE)); hPrintf("</TD><TD>"); hPrintf("Highlight alternating regions in multi-region view"); hPrintf("</TD></TR>\n"); hPrintf("</TABLE>\n"); hPrintf("<BR>\n"); hPrintf("<TABLE style=\"border:0px;width:650px \">\n"); hPrintf("<TR><TD>"); cgiMakeButton("topSubmit", "submit"); hPrintf("</TD><TD align=right>"); hPrintf("<A HREF=\"../goldenPath/help/multiRegionHelp.html\" target=_blank>Help</A>\n"); hPrintf("</TD></TR>\n"); hPrintf("</TABLE>\n"); hFreeConn(&conn); cgiDown(0.9); freez(&groupTarget); webEndSectionTables(); hPrintf("</FORM>"); }
static void agpMergeChromScaf(char *agpFile, char *agpOut, boolean filtering) /* Create a combined agp file from the chrom.agp and scaffold.agp, * merging in only scaffolds from scaffold.agp * that are not already in chroms. */ { struct lineFile *lf = lineFileOpen(agpFile, TRUE); char *line, *words[16]; int lineSize, wordCount; unsigned lastPos = 0; struct agpFrag *agp; struct agpGap *gap; FILE *f; char *lastObj = NULL; f = mustOpen(agpOut, filtering ? "a" : "w"); char *newChrom = NULL; static struct hash *hash = NULL; boolean skipping = FALSE; if (!hash) hash = hashNew(0); verbose(2,"#\tprocessing AGP file: %s\n", agpFile); while (lineFileNext(lf, &line, &lineSize)) { if (line[0] == 0 || line[0] == '#' || line[0] == '\n') continue; //verbose(2,"#\tline: %d\n", lf->lineIx); wordCount = chopLine(line, words); if (wordCount < 5) errAbort("Bad line %d of %s: need at least 5 words, got %d\n", lf->lineIx, lf->fileName, wordCount); if (!lastObj || !sameString(words[0],lastObj)) { freez(&newChrom); newChrom = cloneString(words[0]); lastPos = 0; } skipping = FALSE; if (filtering) { if (hashLookup(hash, words[0])) skipping = TRUE; } if (words[4][0] != 'N') { lineFileExpectAtLeast(lf, 9, wordCount); agp = agpFragLoad(words); /* agp is 1-based but agp loaders do not adjust for 0-based: */ agp->chromStart -= 1; agp->fragStart -= 1; if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart) errAbort("Sizes don't match in %s and %s line %d of %s\n", agp->chrom, agp->frag, lf->lineIx, lf->fileName); if (!filtering) { char *root = cloneString(agp->frag); chopSuffixAt(root, '.'); hashStore(hash, root); freeMem(root); } } else { lineFileExpectAtLeast(lf, 8, wordCount); gap = agpGapLoad(words); /* to be consistent with agpFrag */ gap->chromStart -= 1; agp = (struct agpFrag*)gap; } if (agp->chromStart != lastPos) errAbort("Start doesn't match previous end line %d of %s\n" "agp->chromStart: %u\n" "agp->chromEnd: %u\n" "lastPos: %u\n" ,lf->lineIx, lf->fileName ,agp->chromStart ,agp->chromEnd ,lastPos ); lastPos = agp->chromEnd; freez(&lastObj); lastObj = cloneString(words[0]); /* not agp->chrom which may be modified already */ if (words[4][0] != 'N') { /* agpFragOutput assumes 0-based-half-open, but writes 1-based for agp */ if (!skipping) agpFragOutput(agp, f, '\t', '\n'); agpFragFree(&agp); } else { /* restore back to 1-based for agp * because agpGapOutput doesn't compensate */ gap->chromStart += 1; if (!skipping) agpGapOutput(gap, f, '\t', '\n'); agpGapFree(&gap); } } carefulClose(&f); }
static void clusterClone(int argc, char *argv[]) { int i; for (i=1; i < argc; ++i) { struct lineFile *lf; struct psl *psl; unsigned tSize; char *prevAccPart = (char *)NULL; char *prevAccName = (char *)NULL; char *prevTargetName = (char *)NULL; struct hashEl *el; struct hash *chrHash = newHash(0); struct hash *coordHash = newHash(0); struct coordEl *coord; struct coordEl **coordListPt = (struct coordEl **) NULL; unsigned querySize = 0; int partCount = 0; int partsConsidered = 0; verbose(2,"#\tprocess: %s\n", argv[i]); lf=pslFileOpen(argv[i]); while ((struct psl *)NULL != (psl = pslNext(lf)) ) { char *accName = (char *)NULL; char *targetName = (char *)NULL; int chrCount = 0; double percentCoverage; accName = cloneString(psl->qName); if ((char *)NULL == prevAccPart) { prevAccPart = cloneString(psl->qName); /* first time */ querySize = psl->qSize; ++partsConsidered; } chopSuffixAt(accName,'_'); if ((char *)NULL == prevAccName) prevAccName = cloneString(accName); /* first time */ if ((char *)NULL == prevTargetName) prevTargetName = cloneString(psl->tName); /* first time */ /* encountered a new accession name, process the one we * were working on */ if (differentWord(accName, prevAccName)) { if (partCount > 0) processResult(chrHash, coordHash, prevAccName, querySize, partsConsidered); else verbose(1,"# ERROR %s %s - no coordinates found in %d parts considered\n", prevTargetName, prevAccName, partsConsidered); freeMem(prevAccName); prevAccName = cloneString(accName); freeHash(&chrHash); freeHash(&coordHash); chrHash = newHash(0); coordHash = newHash(0); querySize = 0; partCount = 0; partsConsidered = 0; } tSize = psl->tEnd - psl->tStart; percentCoverage = 100.0*((double)(tSize+1)/(psl->qSize + 1)); if (differentWord(psl->qName, prevAccPart)) { ++partsConsidered; querySize += psl->qSize; freeMem(prevAccPart); prevAccPart = cloneString(psl->qName); } targetName = cloneString(psl->tName); if (differentWord(targetName, prevTargetName)) { freeMem(prevTargetName); prevTargetName = cloneString(targetName); } /* keep a hash of chrom names encountered */ el = hashLookup(chrHash, targetName); if (el == NULL) { if (percentCoverage > minCover) { hashAddInt(chrHash, targetName, 1); chrCount = 1; } else { hashAddInt(chrHash, targetName, 0); chrCount = 0; } } else { if (percentCoverage > minCover) { chrCount = ptToInt(el->val) + 1; el->val=intToPt(chrCount); } } AllocVar(coord); coord->start = psl->tStart; coord->end = psl->tEnd; coord->qSize = psl->qSize; coord->strand = sameWord(psl->strand,"+") ? 1 : 0; /* when coverage is sufficient */ if (percentCoverage > minCover) { ++partCount; coord->name = cloneString(psl->qName); /* for each chrom name, accumulate a list of coordinates */ el = hashLookup(coordHash, targetName); if (el == NULL) { AllocVar(coordListPt); hashAdd(coordHash, targetName, coordListPt); } else { coordListPt = el->val; } slAddHead(coordListPt,coord); verbose(2,"# %s\t%u\t%u\t%u\t%.4f\t%d %s:%d-%d %s\n", psl->qName, psl->qSize, tSize, tSize - psl->qSize, percentCoverage, chrCount, psl->tName, psl->tStart, psl->tEnd, psl->strand); } else { verbose(3,"# %s\t%u\t%u\t%u\t%.4f\t%d %s:%d-%d %s\n", psl->qName, psl->qSize, tSize, tSize - psl->qSize, percentCoverage, chrCount, psl->tName, psl->tStart, psl->tEnd, psl->strand); } freeMem(accName); freeMem(targetName); pslFree(&psl); } if (partCount > 0) processResult(chrHash, coordHash, prevAccName, querySize, partsConsidered); else verbose(1,"# ERROR %s %s - no coordinates found\n", prevTargetName, prevAccName); freeMem(prevAccName); freeHash(&chrHash); freeHash(&coordHash); lineFileClose(&lf); } } /* static void clusterClone() */