void pslSort(char *command, char *outFile, char *tempDir, char *inDirs[], int inDirCount) /* Do the two step sort. */ { int i; struct slName *fileList = NULL, *name; char *inDir; struct slName *dirDir, *dirFile; char fileName[512]; int fileCount; int totalFilesProcessed = 0; int filesPerMidFile; int midFileCount = 0; FILE *f; struct lineFile *lf; boolean doReflect = FALSE; boolean suppressSelf = FALSE; boolean firstOnly = endsWith(command, "1"); boolean secondOnly = endsWith(command, "2"); if (startsWith("dirs", command)) ; else if (startsWith("g2g", command)) { doReflect = TRUE; suppressSelf = TRUE; } else usage(); if (!secondOnly) { makeDir(tempDir); /* Figure out how many files to process. */ for (i=0; i<inDirCount; ++i) { inDir = inDirs[i]; dirDir = listDir(inDir, "*.psl"); if (slCount(dirDir) == 0) dirDir = listDir(inDir, "*.psl.gz"); if (slCount(dirDir) == 0) errAbort("No psl files in %s\n", inDir); verbose(1, "%s with %d files\n", inDir, slCount(dirDir)); for (dirFile = dirDir; dirFile != NULL; dirFile = dirFile->next) { sprintf(fileName, "%s/%s", inDir, dirFile->name); name = newSlName(fileName); slAddHead(&fileList, name); } slFreeList(&dirDir); } verbose(1, "%d files in %d dirs\n", slCount(fileList), inDirCount); slReverse(&fileList); fileCount = slCount(fileList); filesPerMidFile = round(sqrt(fileCount)); // if (filesPerMidFile > 20) // filesPerMidFile = 20; /* bandaide! Should keep track of mem usage. */ verbose(1, "Got %d files %d files per mid file\n", fileCount, filesPerMidFile); /* Read in files a group at a time, sort, and write merged, sorted * output of one group. */ name = fileList; while (totalFilesProcessed < fileCount) { int filesInMidFile = 0; struct psl *pslList = NULL, *psl; int lfileCount = 0; struct lm *lm = lmInit(256*1024); for (filesInMidFile = 0; filesInMidFile < filesPerMidFile && name != NULL; ++filesInMidFile, ++totalFilesProcessed, name = name->next) { boolean reflectMe = FALSE; if (doReflect) { reflectMe = !selfFile(name->name); } verbose(2, "Reading %s (%d of %d)\n", name->name, totalFilesProcessed+1, fileCount); lf = pslFileOpen(name->name); while ((psl = nextLmPsl(lf, lm)) != NULL) { if (psl->qStart == psl->tStart && psl->strand[0] == '+' && suppressSelf && sameString(psl->qName, psl->tName)) { continue; } ++lfileCount; slAddHead(&pslList, psl); if (reflectMe) { psl = mirrorLmPsl(psl, lm); slAddHead(&pslList, psl); } } lineFileClose(&lf); } slSort(&pslList, pslCmpQuery); makeMidName(tempDir, midFileCount, fileName); verbose(1, "Writing %s\n", fileName); f = mustOpen(fileName, "w"); if (!nohead) pslWriteHead(f); for (psl = pslList; psl != NULL; psl = psl->next) { pslTabOut(psl, f); } fclose(f); pslList = NULL; lmCleanup(&lm); verbose(2, "lfileCount %d\n", lfileCount); ++midFileCount; } } if (!firstOnly) pslSort2(outFile, tempDir); }
void sortRoughAlis(struct roughAli **pAli) { slSort(pAli, cmpRoughAli); }
void txGeneCanonical(char *codingCluster, char *infoFile, char *noncodingGraph, char *genesBed, char *nearCoding, char *outCanonical, char *outIsoforms, char *outClusters) /* txGeneCanonical - Pick a canonical version of each gene - that is the form * to use when just interested in a single splicing varient. Produces final * transcript clusters as well. */ { /* Read in input into lists in memory. */ struct txCluster *coding, *codingList = txClusterLoadAll(codingCluster); struct txGraph *graph, *graphList = txGraphLoadAll(noncodingGraph); struct bed *bed, *nextBed, *bedList = bedLoadNAll(genesBed, 12); struct txInfo *info, *infoList = txInfoLoadAll(infoFile); struct bed *nearList = bedLoadNAll(nearCoding, 12); /* Make hash of all beds. */ struct hash *bedHash = hashNew(18); for (bed = bedList; bed != NULL; bed = bed->next) hashAdd(bedHash, bed->name, bed); /* Make has of all info. */ struct hash *infoHash = hashNew(18); for (info = infoList; info != NULL; info = info->next) hashAdd(infoHash, info->name, info); /* Make a binKeeper structure that we'll populate with coding genes. */ struct hash *sizeHash = minChromSizeFromBeds(bedList); struct hash *keeperHash = minChromSizeKeeperHash(sizeHash); /* Make list of coding genes and toss them into binKeeper. * This will eat up bed list, but bedHash is ok. */ struct gene *gene, *geneList = NULL; for (coding = codingList; coding != NULL; coding = coding->next) { gene = geneFromCluster(coding, bedHash, infoHash); slAddHead(&geneList, gene); struct binKeeper *bk = hashMustFindVal(keeperHash, gene->chrom); binKeeperAdd(bk, gene->start, gene->end, gene); } /* Go through near-coding genes and add them to the coding gene * they most overlap. */ for (bed = nearList; bed != NULL; bed = nextBed) { nextBed = bed->next; gene = mostOverlappingGene(keeperHash, bed); if (gene == NULL) errAbort("%s is near coding, but doesn't overlap any coding!?", bed->name); geneAddBed(gene, bed); } /* Add non-coding genes. */ for (graph = graphList; graph != NULL; graph = graph->next) { gene = geneFromGraph(graph, bedHash); slAddHead(&geneList, gene); } /* Sort so it all looks nicer. */ slSort(&geneList, geneCmp); /* Open up output files. */ FILE *fCan = mustOpen(outCanonical, "w"); FILE *fIso = mustOpen(outIsoforms, "w"); FILE *fClus = mustOpen(outClusters, "w"); /* Loop through, making up gene name, and writing output. */ int geneId = 0; for (gene = geneList; gene != NULL; gene = gene->next) { /* Make up name. */ char name[16]; safef(name, sizeof(name), "g%05d", ++geneId); /* Reverse transcript list just to make it look better. */ slReverse(&gene->txList); /* Write out canonical file output */ bed = hashMustFindVal(bedHash, gene->niceTx->name); fprintf(fCan, "%s\t%d\t%d\t%d\t%s\t%s\n", bed->chrom, bed->chromStart, bed->chromEnd, geneId, gene->niceTx->name, gene->niceTx->name); /* Write out isoforms output. */ for (bed = gene->txList; bed != NULL; bed = bed->next) fprintf(fIso, "%d\t%s\n", geneId, bed->name); /* Write out cluster output, starting with bed 6 standard fields. */ fprintf(fClus, "%s\t%d\t%d\t%s\t%d\t%c\t", gene->chrom, gene->start, gene->end, name, 0, gene->strand); /* Write out thick-start/thick end. */ if (gene->isCoding) { int thickStart = gene->end, thickEnd = gene->start; for (bed = gene->txList; bed != NULL; bed = bed->next) { if (bed->thickStart < bed->thickEnd) { thickStart = min(thickStart, bed->thickStart); thickEnd = max(thickEnd, bed->thickEnd); } } fprintf(fClus, "%d\t%d\t", thickStart, thickEnd); } else { fprintf(fClus, "%d\t%d\t", gene->start, gene->start); } /* We got no rgb value, just write out zero. */ fprintf(fClus, "0\t"); /* Get exons from exonTree. */ struct range *exon, *exonList = rangeTreeList(gene->exonTree); fprintf(fClus, "%d\t", slCount(exonList)); for (exon = exonList; exon != NULL; exon = exon->next) fprintf(fClus, "%d,", exon->start - gene->start); fprintf(fClus, "\t"); for (exon = exonList; exon != NULL; exon = exon->next) fprintf(fClus, "%d,", exon->end - exon->start); fprintf(fClus, "\t"); /* Write out associated transcripts. */ fprintf(fClus, "%d\t", slCount(gene->txList)); for (bed = gene->txList; bed != NULL; bed = bed->next) fprintf(fClus, "%s,", bed->name); fprintf(fClus, "\t"); /* Write out nice value */ fprintf(fClus, "%s\t", gene->niceTx->name); /* Write out coding/noncoding value. */ fprintf(fClus, "%d\n", gene->isCoding); } /* Close up files. */ carefulClose(&fCan); carefulClose(&fIso); carefulClose(&fClus); }
struct trackDb *showTrackField(struct grp *selGroup, char *trackVar, char *trackScript, boolean disableNoGenome) /* Show track control. Returns selected track. */ { struct trackDb *track, *selTrack = NULL; if (trackScript == NULL) trackScript = ""; if (sameString(selGroup->name, "allTables")) { char *selDb = findSelDb(); struct slName *dbList = getDbListForGenome(), *db; hPrintf("<B>database:</B>\n"); hPrintf("<SELECT NAME=%s %s>\n", trackVar, trackScript); for (db = dbList; db != NULL; db = db->next) { hPrintf(" <OPTION VALUE=%s%s>%s\n", db->name, (sameString(db->name, selDb) ? " SELECTED" : ""), db->name); } hPrintf("</SELECT>\n"); } else { boolean allTracks = sameString(selGroup->name, "allTracks"); hPrintf("<B>track:</B>\n"); hPrintf("<SELECT NAME=\"%s\" %s>\n", trackVar, trackScript); if (allTracks) { selTrack = findSelectedTrack(fullTrackList, NULL, trackVar); slSort(&fullTrackList, trackDbCmpShortLabel); } else { selTrack = findSelectedTrack(fullTrackList, selGroup, trackVar); } boolean selTrackIsDisabled = FALSE; struct trackDb *firstEnabled = NULL; for (track = fullTrackList; track != NULL; track = track->next) { if (allTracks || sameString(selGroup->name, track->grp)) { hPrintf(" <OPTION VALUE=\"%s\"", track->track); if (cartTrackDbIsNoGenome(database, track->table)) hPrintf(NO_GENOME_CLASS); if (disableNoGenome && isNoGenomeDisabled(database, track->table)) { hPrintf(" DISABLED"); if (track == selTrack) selTrackIsDisabled = TRUE; } else if (firstEnabled == NULL) firstEnabled = track; if (track == selTrack && !selTrackIsDisabled) hPrintf(" SELECTED"); hPrintf(">%s</OPTION>", track->shortLabel); } } if (selTrackIsDisabled) selTrack = firstEnabled; hPrintf("</SELECT>\n"); } hPrintf("\n"); return selTrack; }
static void showLinkedTables(struct joiner *joiner, struct dbTable *inList, char *varPrefix, char *buttonName, char *buttonText) /* Print section with list of linked tables and check boxes to turn them * on. */ { struct dbTable *outList = NULL, *out, *in; char dtName[256]; struct hash *uniqHash = newHash(0); struct hash *inHash = newHash(8); /* Build up list of tables we link to in outList. */ for (in = inList; in != NULL; in = in->next) { struct sqlConnection *conn = NULL; if (!trackHubDatabase(database)) conn = hAllocConn(in->db); struct joinerPair *jpList, *jp; /* Keep track of tables in inList. */ safef(dtName, sizeof(dtName), "%s.%s", inList->db, inList->table); hashAdd(inHash, dtName, NULL); /* First table in input is not allowed in output. */ if (in == inList) hashAdd(uniqHash, dtName, NULL); /* Scan through joining information and add tables, * avoiding duplicate additions. */ jpList = joinerRelate(joiner, in->db, in->table); for (jp = jpList; jp != NULL; jp = jp->next) { safef(dtName, sizeof(dtName), "%s.%s", jp->b->database, jp->b->table); if (!hashLookup(uniqHash, dtName) && !cartTrackDbIsAccessDenied(jp->b->database, jp->b->table)) { hashAdd(uniqHash, dtName, NULL); out = dbTableNew(jp->b->database, jp->b->table); slAddHead(&outList, out); } } joinerPairFreeList(&jpList); hFreeConn(&conn); } slSort(&outList, dbTableCmp); /* Print html. */ if (outList != NULL) { webNewSection("Linked Tables"); hTableStart(); for (out = outList; out != NULL; out = out->next) { struct sqlConnection *conn = hAllocConn(out->db); struct asObject *asObj = asForTable(conn, out->table); char *var = dbTableVar(varPrefix, out->db, out->table); hPrintf("<TR>"); hPrintf("<TD>"); cgiMakeCheckBox(var, varOn(var)); hPrintf("</TD>"); hPrintf("<TD>%s</TD>", out->db); hPrintf("<TD>%s</TD>", out->table); hPrintf("<TD>"); if (asObj != NULL) hPrintf("%s", asObj->comment); else hPrintf(" "); hPrintf("</TD>"); hPrintf("</TR>"); hFreeConn(&conn); } hTableEnd(); hPrintf("<BR>"); cgiMakeButton(buttonName, buttonText); } }
int main(int argc, char *argv[]) { struct hash *bacHash; char line[1024]; int lineCount; char *words[256]; int wordCount; int fileIx; char *fileName; FILE *f; if (argc < 2) usage(); bacHash = newHash(16); for (fileIx = 1; fileIx < argc; ++fileIx) { fileName = argv[fileIx]; uglyf("Processing %s\n", fileName); f = mustOpen(fileName, "r"); lineCount = 0; while (fgets(line, sizeof(line), f)) { ++lineCount; wordCount = chopLine(line, words); if (wordCount == ArraySize(words)) errAbort("Too many words line %d of %s\n", lineCount, fileName); if (wordCount != 0) { char *bacName; int cIx; struct contigTrack *ctList = NULL, *ct; struct bacTrack *bt; struct hashEl *hel; /* Check line syntax and parse it. */ if (!sameString(words[1], "glues")) errAbort("Bad format line %d of %s\n", lineCount, fileName); bacName = words[2]; for (cIx = 4; cIx < wordCount; cIx += 5) { char *parts[3]; int partCount; AllocVar(ct); ct->ix = atoi(words[cIx]); ct->strand = words[cIx+1][0]; ct->dir = words[cIx+2][0]; partCount = chopString(words[cIx+3], "(-)", parts, ArraySize(parts)); if (partCount != 2) errAbort("Bad format line %d of %s\n", lineCount, fileName); ct->start = atoi(parts[0]); ct->end = atoi(parts[1]); ct->cookedScore = atof(words[cIx+4]); slAddHead(&ctList, ct); } slSort(&ctList, cmpContigTrack); /* Lookup bacTrack and make it if new. */ hel = hashLookup(bacHash, bacName); if (hel == NULL) { AllocVar(bt); hel = hashAdd(bacHash, bacName, bt); bt->name = hel->name; slAddHead(&bacList, bt); } else { bt = hel->val; } /* Process pairs into bacTrack. */ addPairs(bt, ctList); slFreeList(&ctList); } } fclose(f); } slSort(&bacList, cmpBacTrack); printStats(); return 0; }
/* Start at the calculated median point, scan through the * coordinates and adjust the start and end of the clustered region * to include the appropriate section. */ static int extendLimits(struct coordEl **coordListPt, unsigned median, unsigned querySize, unsigned *startExtended, unsigned *endExtended, char *ctgName, int partsConsidered) { struct coordEl *coord; unsigned halfLength = querySize / 2; boolean firstCoordinate = TRUE; int partsUsed = 0; int partsNotUsed = 0; char *cloneName = (char *)NULL; boolean tooManyParts = FALSE; if (halfLength > median) *startExtended = 0; else *startExtended = median - halfLength; *endExtended = median + halfLength; verbose(2,"# starting limits: %u - %u\n", *startExtended, *endExtended); /* sort the list descending by end coordinates */ slSort(coordListPt,endDescending); if (coordListPt) coord = *coordListPt; else coord = NULL; /* Walk through this list extending the start. * Same discussion as below, although reverse the sense of start * and end. Here the list is sorted in descending order by end * coordinate. Those end coordinates are compared with the * extending start coordinate to move it out. */ verbose(2,"# after end sort\n"); firstCoordinate = TRUE; while (coord != NULL) { if (firstCoordinate) { if (*endExtended > coord->end) { *endExtended = coord->end; verbose(2,"# end brought in to: %u\n", *endExtended); } firstCoordinate = FALSE; } verbose(2,"# %s %u - %u %u %u %c\n", coord->name, coord->start, coord->end, *startExtended, coord->qSize, (coord->strand == 1) ? '+' : '-'); if (coord->end < *startExtended) { unsigned gap = *startExtended - coord->end; if (gap > maxGap) { verbose(2,"# more than max Gap encountered: %u\n", gap); break; /* exit this while loop */ } *startExtended = coord->start; verbose(2,"# start extended to: %u\n", *startExtended); } else if (coord->start < *startExtended) { *startExtended = coord->start; verbose(2,"# start extended to: %u\n", *startExtended); } coord = coord->next; } /* sort the list by start coordinates */ slSort(coordListPt,startCompare); if (coordListPt) coord = *coordListPt; else coord = NULL; /* Walk through this list extending the end. The list is in order * by start coordinates. Going down that list checking the * extended end with these start coordinates, eventually we reach a * point where the start coordinates are past the end leaving a * gap. As long as the gap is within the specified maxGap limit, * then it is OK to jump to that next piece. The new end becomes * the end of this new piece. * And secondly, even if the starts aren't past the extending end, * the piece under examination may have a new end that is longer, * in which case the extending end moves to that point. */ verbose(2,"# extending end\n"); /* The first coordinate check will ensure that the extended start * coordinate is not less than the smallest start coordinate. * Thus, only the actual part coverage will determine the maximum * limits and we won't go beyond the parts. */ firstCoordinate = TRUE; while (coord != NULL) { if (firstCoordinate) { if (*startExtended < coord->start) { *startExtended = coord->start; verbose(2,"# start brought in to: %u\n", *startExtended); } firstCoordinate = FALSE; } verbose(2,"# %s %u %u - %u %u %c\n", coord->name, *endExtended, coord->start, coord->end, coord->qSize, (coord->strand == 1) ? '+' : '-'); if (coord->start > *endExtended) { unsigned gap = coord->start - *endExtended; if (gap > maxGap) { verbose(2,"# more than max Gap encountered: %u\n", gap); break; /* exit this while loop */ } *endExtended = coord->end; verbose(2,"# end extended to: %u\n", *endExtended); } else if (coord->end > *endExtended) { *endExtended = coord->end; verbose(2,"# end extended to: %u\n", *endExtended); } coord = coord->next; } /* Let's count the number of parts included in the decided range */ if (coordListPt) coord = *coordListPt; else coord = NULL; partsUsed = 0; partsNotUsed = 0; while (coord != NULL) { if ( (coord->start >= *startExtended) && (coord->end <= *endExtended)) ++partsUsed; else ++partsNotUsed; if (coord->next == NULL) cloneName = cloneString(coord->name); coord = coord->next; } if (partsUsed < 1) { verbose(1,"# ERROR %s %s - no parts found in the answer, %d considered\n", ctgName, cloneName, partsNotUsed); } else if (partsUsed > partsConsidered) { tooManyParts = TRUE; verbose(1,"# ERROR %s %s too many parts used: %d > %d considered\n", ctgName, cloneName, partsUsed, partsConsidered); } else if ((partsUsed + partsNotUsed) < 1) { verbose(1,"# ERROR %s %s - no parts found in the answer, used or unused\n", ctgName, cloneName); } else { verbose(2,"# %s %s total parts considered %d, parts used %d, parts unused %d, fraction %% %7.2f\n", ctgName, cloneName, partsUsed+partsNotUsed, partsUsed, partsNotUsed, 100.0 * (double) partsUsed / (double) (partsUsed+partsNotUsed) ); } /* If agp output, we need to do it here */ if (agp) { if (coordListPt) coord = *coordListPt; else coord = NULL; int cloneCount = 0; while (coord != NULL) { if ( (coord->start >= *startExtended) && (coord->end <= *endExtended)) { ++cloneCount; /* +1 to the start for 1 relative coordinates in the AGP file */ /* The status D will be fixed later */ /* These ones with tooManyParts need to be fixed elsewhere */ /* If allowDuplicates is requested, let them be used */ if(tooManyParts && (! allowDuplicates) ) { verbose(1,"#AGP %s\t%u\t%u\t%d\tD\t%s\t%u\t%u\t%c\n", ctgName, coord->start+1, coord->end, cloneCount, coord->name, coord->start - *startExtended + 1, coord->end - *startExtended, (coord->strand == 1) ? '+' : '-'); } else { printf("%s\t%u\t%u\t%d\tD\t%s\t%u\t%u\t%c\n", ctgName, coord->start+1, coord->end, cloneCount, coord->name, coord->start - *startExtended + 1, coord->end - *startExtended, (coord->strand == 1) ? '+' : '-'); } } coord = coord->next; } } freeMem(cloneName); return (partsUsed); } /* static int extendLimits() */
static void parseBedGraphSection(struct lineFile *lf, boolean clipDontDie, struct hash *chromSizeHash, struct lm *lm, int itemsPerSlot, struct bwgSection **pSectionList) /* Parse out bedGraph section until we get to something that is not in bedGraph format. */ { /* Set up hash and list to store chromosomes. */ struct hash *chromHash = hashNew(0); struct bedGraphChrom *chrom, *chromList = NULL; /* Collect lines in items on appropriate chromosomes. */ struct bwgBedGraphItem *item; char *line; while (lineFileNextReal(lf, &line)) { /* Check for end of section. */ if (stepTypeLine(line)) { lineFileReuse(lf); break; } /* Parse out our line and make sure it has exactly 4 columns. */ char *words[5]; int wordCount = chopLine(line, words); lineFileExpectWords(lf, 4, wordCount); /* Get chromosome. */ char *chromName = words[0]; chrom = hashFindVal(chromHash, chromName); if (chrom == NULL) { lmAllocVar(chromHash->lm, chrom); hashAddSaveName(chromHash, chromName, chrom, &chrom->name); chrom->size = (chromSizeHash ? hashIntVal(chromSizeHash, chromName) : BIGNUM); slAddHead(&chromList, chrom); } /* Convert to item and add to chromosome list. */ lmAllocVar(lm, item); item->start = lineFileNeedNum(lf, words, 1); item->end = lineFileNeedNum(lf, words, 2); item->val = lineFileNeedDouble(lf, words, 3); /* Do sanity checking on coordinates. */ if (item->start > item->end) errAbort("bedGraph error: start (%u) after end line (%u) %d of %s.", item->start, item->end, lf->lineIx, lf->fileName); if (item->end > chrom->size) { warn("bedGraph error line %d of %s: chromosome %s has size %u but item ends at %u", lf->lineIx, lf->fileName, chrom->name, chrom->size, item->end); if (!clipDontDie) noWarnAbort(); } else { slAddHead(&chrom->itemList, item); } } slSort(&chromList, bedGraphChromCmpName); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { slSort(&chrom->itemList, bwgBedGraphItemCmp); /* Break up into sections of no more than items-per-slot size. */ struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList; for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem) { /* Find end item of this section, and start item for next section. * Terminate list at end item. */ int sectionSize = 0; int i; endItem = startItem; for (i=0; i<itemsPerSlot; ++i) { if (nextStartItem == NULL) break; endItem = nextStartItem; nextStartItem = nextStartItem->next; ++sectionSize; } endItem->next = NULL; /* Fill in section and add it to section list. */ struct bwgSection *section; lmAllocVar(lm, section); section->chrom = cloneString(chrom->name); section->start = startItem->start; section->end = endItem->end; section->type = bwgTypeBedGraph; section->items.bedGraphList = startItem; section->itemCount = sectionSize; slAddHead(pSectionList, section); } } /* Free up hash, no longer needed. Free's chromList as a side effect since chromList is in * hash's memory. */ hashFree(&chromHash); chromList = NULL; }
struct bwgSection *bwgParseWig( char *fileName, /* Name of ascii wig file. */ boolean clipDontDie, /* Skip items outside chromosome rather than aborting. */ struct hash *chromSizeHash, /* If non-NULL items checked to be inside chromosome. */ int maxSectionSize, /* Biggest size of a section. 100 - 100,000 is usual range. */ struct lm *lm) /* Memory pool to allocate from. */ /* Parse out ascii wig file - allocating memory in lm. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; struct bwgSection *sectionList = NULL; /* remove initial browser and track lines */ lineFileRemoveInitialCustomTrackLines(lf); while (lineFileNextReal(lf, &line)) { verbose(2, "processing %s\n", line); if (stringIn("chrom=", line)) parseSteppedSection(lf, clipDontDie, chromSizeHash, line, lm, maxSectionSize, §ionList); else { /* Check for bed... */ char *dupe = cloneString(line); char *words[5]; int wordCount = chopLine(dupe, words); if (wordCount != 4) errAbort("Unrecognized line %d of %s:\n%s\n", lf->lineIx, lf->fileName, line); /* Parse out a bed graph line just to check numerical format. */ char *chrom = words[0]; int start = lineFileNeedNum(lf, words, 1); int end = lineFileNeedNum(lf, words, 2); double val = lineFileNeedDouble(lf, words, 3); verbose(2, "bedGraph %s:%d-%[email protected]%g\n", chrom, start, end, val); /* Push back line and call bed parser. */ lineFileReuse(lf); parseBedGraphSection(lf, clipDontDie, chromSizeHash, lm, maxSectionSize, §ionList); } } slSort(§ionList, bwgSectionCmp); /* Check for overlap. */ struct bwgSection *section, *nextSection; for (section = sectionList; section != NULL; section = nextSection) { nextSection = section->next; if (nextSection != NULL) { if (sameString(section->chrom, nextSection->chrom)) { if (section->end > nextSection->start) { errAbort("There's more than one value for %s base %d (in coordinates that start with 1).\n", section->chrom, nextSection->start+1); } } } } return sectionList; }
void doTransRegCodeProbe(struct trackDb *tdb, char *item, char *codeTable, char *motifTable, char *tfToConditionTable, char *conditionTable) /* Display detailed info on a ChIP-chip probe from transRegCode experiments. */ { char query[256]; struct sqlResult *sr; char **row; int rowOffset = hOffsetPastBin(database, seqName, tdb->table); struct sqlConnection *conn = hAllocConn(database); struct transRegCodeProbe *probe = NULL; cartWebStart(cart, database, "ChIP-chip Probe Info"); sqlSafef(query, sizeof(query), "select * from %s where name = '%s'", tdb->table, item); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) probe = transRegCodeProbeLoad(row+rowOffset); sqlFreeResult(&sr); if (probe != NULL) { struct tfData *tfList = NULL, *tf; struct hash *tfHash = newHash(0); struct transRegCode *trc; int i; /* Print basic info. */ printf("<B>Name:</B> %s<BR>\n", probe->name); printPosOnChrom(probe->chrom, probe->chromStart, probe->chromEnd, NULL, TRUE, probe->name); /* Make up list of all transcriptionFactors. */ for (i=0; i<probe->tfCount; ++i) { /* Parse out factor and condition. */ char *tfName = probe->tfList[i]; char *condition = strchr(tfName, '_'); struct tfCond *cond; if (condition != NULL) *condition++ = 0; else condition = "n/a"; tf = hashFindVal(tfHash, tfName); if (tf == NULL) { AllocVar(tf); hashAddSaveName(tfHash, tfName, tf, &tf->name); slAddHead(&tfList, tf); } AllocVar(cond); cond->name = cloneString(condition); cond->binding = probe->bindVals[i]; slAddHead(&tf->conditionList, cond); } slSort(&tfList, tfDataCmpName); /* Fold in motif hits in region. */ if (sqlTableExists(conn, codeTable)) { sr = hRangeQuery(conn, codeTable, probe->chrom, probe->chromStart, probe->chromEnd, "chipEvidence != 'none'", &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { trc = transRegCodeLoad(row+rowOffset); tf = hashFindVal(tfHash, trc->name); if (tf != NULL) slAddTail(&tf->trcList, trc); } sqlFreeResult(&sr); } if (tfList == NULL) printf("No significant immunoprecipitation."); else { tfBindLevelSection(tfList, conn, motifTable, tfToConditionTable); } transRegCodeProbeFree(&probe); growthConditionSection(conn, conditionTable); } printf("\n<HR>\n"); printTrackHtml(tdb); hFreeConn(&conn); }
static void parseVariableStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm, int itemsPerSlot, char *chrom, int chromSize, bits32 span, struct bwgSection **pSectionList) /* Read the single column data in section until get to end. */ { struct lm *lmLocal = lmInit(0); /* Stream through section until get to end of file or next section, * adding values from single column to list. */ char *words[2]; char *line; struct bwgVariableStepItem *item, *itemList = NULL; int originalSectionSize = 0; while (lineFileNextReal(lf, &line)) { if (steppedSectionEnd(line, 2)) { lineFileReuse(lf); break; } chopLine(line, words); lmAllocVar(lmLocal, item); int start = lineFileNeedNum(lf, words, 0); if (start <= 0) { errAbort("line %d of %s: zero or negative chromosome coordinate not allowed", lf->lineIx, lf->fileName); } item->start = start - 1; item->val = lineFileNeedDouble(lf, words, 1); if (item->start + span > chromSize) { warn("line %d of %s: chromosome %s has %u bases, but item ends at %u", lf->lineIx, lf->fileName, chrom, chromSize, item->start + span); if (!clipDontDie) noWarnAbort(); } else { slAddHead(&itemList, item); ++originalSectionSize; } } slSort(&itemList, bwgVariableStepItemCmp); /* Break up into sections of no more than items-per-slot size. */ int sizeLeft = originalSectionSize; for (item = itemList; item != NULL; ) { /* Figure out size of this section */ int sectionSize = sizeLeft; if (sectionSize > itemsPerSlot) sectionSize = itemsPerSlot; sizeLeft -= sectionSize; /* Convert from list to array representation. */ struct bwgVariableStepPacked *packed, *p; p = lmAllocArray(lm, packed, sectionSize); int i; for (i=0; i<sectionSize; ++i) { p->start = item->start; p->val = item->val; item = item->next; ++p; } /* Fill in section and add it to list. */ struct bwgSection *section; lmAllocVar(lm, section); section->chrom = chrom; section->start = packed[0].start; section->end = packed[sectionSize-1].start + span; section->type = bwgTypeVariableStep; section->items.variableStepPacked = packed; section->itemSpan = span; section->itemCount = sectionSize; slAddHead(pSectionList, section); } lmCleanup(&lmLocal); }
static void tfBindLevelSection(struct tfData *tfList, struct sqlConnection *conn, char *motifTable, char *tfToConditionTable) /* Print info on individual transcription factors that bind * with e-val between minVal and maxVal. */ { struct tfData *tf; struct transRegCode *trc; webNewSection("Transcription Factors Showing IP Over this Probe "); hTableStart(); printf("<TR>"); colLabel("Transcription", 1); colLabel("Growth Condition", 3); colLabel("Motif Information", 3); printf("</TR>\n"); printf("<TR>"); colLabel("Factor", 1); colLabel("Good IP (P<0.001)", 1); colLabel("Weak IP (P<0.005)", 1); colLabel("No IP (P>0.005)", 1); colLabel("Hits", 1); colLabel("Scores", 1); colLabel("Conservation (2 max)", 1); printf("</TR>\n"); for (tf = tfList; tf != NULL; tf = tf->next) { struct hash *boundHash = newHash(8); slSort(&tf->conditionList, tfCondCmpName); printf("<TR>"); /* Print transcription name. */ printf("<TD>"); sacCerHgGeneLinkName(conn, tf->name); printf("</TD>"); /* Print stong and weak growth conditions. */ ipPrintInRange(tf->conditionList, 0.0, 0.002, boundHash); ipPrintInRange(tf->conditionList, 0.002, 0.006, boundHash); /* Grab list of all conditions tested from database and * print out ones not in strong or weak as none. */ { char query[256], **row; struct sqlResult *sr; boolean isFirst = TRUE; boolean gotAny = FALSE; sqlSafef(query, sizeof(query), "select growthCondition from %s where name='%s'", tfToConditionTable, tf->name); sr = sqlGetResult(conn, query); printf("<TD>"); while ((row = sqlNextRow(sr)) != NULL) { if (!hashLookup(boundHash, row[0])) { if (isFirst) isFirst = FALSE; else printf(", "); printf("%s", row[0]); gotAny = TRUE; } } sqlFreeResult(&sr); if (!gotAny) printf(" "); printf("</TD>"); } /* Print motif info. */ if (tf->trcList == NULL) printf("<TD>0</TD><TD>n/a</TD><TD>n/a</TD>\n"); else { printf("<TD>%d</TD>", slCount(tf->trcList)); /* Print scores. */ printf("<TD>"); for (trc = tf->trcList; trc != NULL; trc = trc->next) { double score; if (trc != tf->trcList) printf(", "); score = motifScoreHere( trc->chrom, trc->chromStart, trc->chromEnd, trc->name, motifTable); transRegCodeAnchor(trc); printf("%3.1f</A>", score); } printf("</TD><TD>"); for (trc = tf->trcList; trc != NULL; trc = trc->next) { if (trc != tf->trcList) printf(", "); printf("%d", trc->consSpecies); } printf("</TD>"); } printf("</TR>\n"); hashFree(&boundHash); } hTableEnd(); }
struct nameOff *scanIntronFile(char *preIntronQ, char *startIntronQ, char *endIntronQ, char *postIntronQ, boolean invert) { char intronFileName[600]; FILE *f; char lineBuf[4*1024]; char *words[4*128]; int wordCount; int lineCount = 0; int preLenQ = strlen(preIntronQ); int startLenQ = strlen(startIntronQ); int endLenQ = strlen(endIntronQ); int postLenQ = strlen(postIntronQ); char *preIntronF, *startIntronF, *endIntronF, *postIntronF; int preLenF, startLenF, endLenF, postLenF; int preIx = 6, startIx = 7, endIx =8, postIx = 9; struct nameOff *list = NULL, *el; boolean addIt; int i; if (preLenQ > 25 || postLenQ > 25 || startLenQ > 40 || endLenQ > 40) { errAbort("Can only handle queries up to 25 bases on either side of the intron " "and 40 bases inside the intron."); } sprintf(intronFileName, "%s%s", wormCdnaDir(), "introns.txt"); f = mustOpen(intronFileName, "r"); while (fgets(lineBuf, sizeof(lineBuf), f) != NULL) { ++lineCount; wordCount = chopByWhite(lineBuf, words, ArraySize(words)); if (wordCount == ArraySize(words)) { warn("May have truncated end of line %d of %s", lineCount, intronFileName); } if (wordCount == 0) continue; if (wordCount < 11) errAbort("Unexpected short line %d of %s", lineCount, intronFileName); preIntronF = words[preIx]; startIntronF = words[startIx]; endIntronF = words[endIx]; postIntronF = words[postIx]; preLenF = strlen(preIntronF); startLenF = strlen(startIntronF); endLenF = strlen(endIntronF); postLenF = strlen(postIntronF); addIt = FALSE; if ( ( preLenQ == 0 || patMatch(preIntronQ, preIntronF+preLenF-preLenQ+countSpecial(preIntronQ), preLenQ)) && (startLenQ == 0 || patMatch(startIntronQ, startIntronF, startLenQ)) && ( endLenQ == 0 || patMatch(endIntronQ, endIntronF+endLenF-endLenQ+countSpecial(endIntronQ), endLenQ)) && ( postLenQ == 0 || patMatch(postIntronQ, postIntronF, postLenQ)) ) { addIt = TRUE; } if (invert) addIt = !addIt; if (addIt) { addIntronToHistogram(preIntronF+preLenF, startIntronF, endIntronF+endLenF, postIntronF); AllocVar(el); el->chrom = cloneString(words[1]); el->name = cloneString(words[5]); el->start = atoi(words[2]); el->end = atoi(words[3]); el->cdnaCount = atoi(words[0]); memcpy(el->startI, startIntronF, 2); memcpy(el->endI, endIntronF + endLenF - 2, 2); assert(wordCount == el->cdnaCount + 10); for (i=10; i<wordCount; ++i) { struct slName *name = newSlName(words[i]); slAddHead(&el->cdnaNames, name); } slReverse(&el->cdnaNames); assert(slCount(el->cdnaNames) == el->cdnaCount); slAddHead(&list, el); } } fclose(f); slSort(&list, cmpCounts); return list; }
void doFrame(struct sqlConnection *conn, boolean forceImageToList) /* Make a html frame page. Fill frame with thumbnail, control bar, * and image panes. */ { int imageId = cartUsualInt(cart, hgpId, 0); char *sidUrl = cartSidUrlString(cart); char *listSpec = cartUsualString(cart, hgpListSpec, ""); struct tempName matchTempName; char *matchFile = NULL; struct visiMatch *matchList = visiSearch(conn, listSpec); #ifdef SOON if (!cartUsualBoolean(cart, hgpIncludeMutants, FALSE)) matchList = removeMutants(conn, matchList); #endif /* SOON */ matchList = onePerImageFile(conn, matchList); weighMatches(conn, matchList); slSort(&matchList, visiMatchCmpWeight); if (forceImageToList) { if (matchList != NULL) imageId = matchList->imageId; else imageId = 0; } trashDirFile(&matchTempName, "vg", "visiMatch", ".tab"); matchFile = matchTempName.forCgi; saveMatchFile(matchFile, matchList); cartSetString(cart, hgpMatchFile, matchFile); cartSetInt(cart, hgpId, imageId); //puts("\n"); puts("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Frameset//EN\" \"http://www.w3.org/TR/html4/frameset.dtd\">"); printf("<HTML>\n"); printf("<HEAD>\n"); printf("<TITLE>\n"); printf("%s ", hgVisiGeneShortName()); printf("%s",titleMessage); printf("</TITLE>\n"); printf("</HEAD>\n"); printf(" <frameset rows=\"27,*\">\n"); printf(" <frame name=\"controls\" src=\"%s?%s=go&%s&%s=%d\" noresize marginwidth=\"0\" marginheight=\"0\" frameborder=\"0\">\n", hgVisiGeneCgiName(), hgpDoControls, sidUrl, hgpId, imageId); printf(" <frameset cols=\"230,*\"> \n"); printf(" <frame src=\"%s?%s=go&%s&%s=%d\" noresize frameborder=\"0\" name=\"list\">\n", hgVisiGeneCgiName(), hgpDoThumbnails, sidUrl, hgpId, imageId); printf(" <frame src=\"%s?%s=go&%s&%s=%d\" name=\"image\" noresize frameborder=\"0\">\n", hgVisiGeneCgiName(), hgpDoImage, sidUrl, hgpId, imageId); printf(" </frameset>\n"); printf(" <noframes>\n"); printf(" <body>\n"); printf(" <p>This web page uses frames, but your browser doesn't support them.</p>\n"); printf(" </body>\n"); printf(" </noframes>\n"); printf("</frameset>\n"); printf("</HTML>\n"); }
void hgLoadChromGraph(boolean doLoad, char *db, char *track, char *fileName) /* hgLoadChromGraph - Load up chromosome graph. */ { double minVal,maxVal; struct chromGraph *el, *list; FILE *f; char *tempDir = "."; char path[PATH_LEN], gbdbPath[PATH_LEN]; char *idTable = optionVal("idTable", NULL); char *pathPrefix = NULL; if (idTable == NULL) list = chromGraphLoadAll(fileName); else list = chromGraphListWithTable(fileName, db, idTable); if (list == NULL) errAbort("%s is empty", fileName); /* Figure out min/max values */ minVal = maxVal = list->val; for (el = list->next; el != NULL; el = el->next) { if (optionExists("minusLog10")) { if (el->val == 1) el->val = 0; else if (el->val > 0) el->val = -1 * log(el->val)/log(10); } if (el->val < minVal) minVal = el->val; if (el->val > maxVal) maxVal = el->val; } /* Sort and write out temp file. */ slSort(&list, chromGraphCmp); f = hgCreateTabFile(tempDir, track); for (el = list; el != NULL; el = el->next) chromGraphTabOut(el, f); if (doLoad) { struct dyString *dy = dyStringNew(0); struct sqlConnection *conn; /* Set up connection to database and create main table. */ conn = hAllocConn(db); sqlDyStringPrintf(dy, createString, track, hGetMinIndexLength(db)); sqlRemakeTable(conn, track, dy->string); /* Load main table and clean up file handle. */ hgLoadTabFile(conn, tempDir, track, &f); hgRemoveTabFile(tempDir, track); /* If need be create meta table. If need be delete old row. */ if (!sqlTableExists(conn, "metaChromGraph")) sqlUpdate(conn, metaCreateString); else { dyStringClear(dy); sqlDyStringPrintf(dy, "delete from metaChromGraph where name = '%s'", track); sqlUpdate(conn, dy->string); } /* Make chrom graph file */ safef(path, sizeof(path), "%s.cgb", track); chromGraphToBin(list, path); safef(path, sizeof(path), "/gbdb/%s/chromGraph", db); pathPrefix = optionVal("pathPrefix", path); safef(gbdbPath, sizeof(gbdbPath), "%s/%s.cgb", pathPrefix, track); /* Create new line in meta table */ dyStringClear(dy); sqlDyStringPrintf(dy, "insert into metaChromGraph values('%s',%f,%f,'%s');", track, minVal, maxVal, gbdbPath); sqlUpdate(conn, dy->string); } }
void checkExp(char *bedFileName, char *tNibDir, char *nibList) { struct lineFile *bf = lineFileOpen(bedFileName , TRUE), *af = NULL; char *row[PSEUDOGENELINK_NUM_COLS] ; struct pseudoGeneLink *ps; char *tmpName[512], cmd[512]; struct axt *axtList = NULL, *axt, *mAxt = NULL; struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seqList = NULL; struct nibInfo *qNib = NULL, *tNib = NULL; FILE *op; int ret; if (nibHash == NULL) nibHash = hashNew(0); while (lineFileNextRow(bf, row, ArraySize(row))) { struct misMatch *misMatchList = NULL; struct binKeeper *bk = NULL; struct binElement *el, *elist = NULL; struct psl *mPsl = NULL, *rPsl = NULL, *pPsl = NULL, *psl ; struct misMatch *mf = NULL; ps = pseudoGeneLinkLoad(row); tmpName[0] = cloneString(ps->name); chopByChar(tmpName[0], '.', tmpName, sizeof(tmpName)); verbose(2,"name %s %s:%d-%d\n", ps->name, ps->chrom, ps->chromStart,ps->chromEnd); /* get expressed retro from hash */ bk = hashFindVal(mrnaHash, ps->chrom); elist = binKeeperFindSorted(bk, ps->chromStart, ps->chromEnd ) ; for (el = elist; el != NULL ; el = el->next) { rPsl = el->val; verbose(2,"retroGene %s %s:%d-%d\n",rPsl->qName, ps->chrom, ps->chromStart,ps->chromEnd); } /* find mrnas that overlap parent gene */ bk = hashFindVal(mrnaHash, ps->gChrom); elist = binKeeperFindSorted(bk, ps->gStart , ps->gEnd ) ; for (el = elist; el != NULL ; el = el->next) { pPsl = el->val; verbose(2,"parent %s %s:%d %d,%d\n", pPsl->qName, pPsl->tName,pPsl->tStart, pPsl->match, pPsl->misMatch); } /* find self chain */ bk = hashFindVal(chainHash, ps->chrom); elist = binKeeperFind(bk, ps->chromStart , ps->chromEnd ) ; slSort(&elist, chainCmpScoreDesc); for (el = elist; el != NULL ; el = el->next) { struct chain *chain = el->val, *subChain, *retChainToFree, *retChainToFree2; int qs = chain->qStart; int qe = chain->qEnd; int id = chain->id; if (chain->qStrand == '-') { qs = chain->qSize - chain->qEnd; qe = chain->qSize - chain->qStart; } if (!sameString(chain->qName , ps->gChrom) || !positiveRangeIntersection(qs, qe, ps->gStart, ps->gEnd)) { verbose(2," wrong chain %s:%d-%d %s:%d-%d parent %s:%d-%d\n", chain->qName, qs, qe, chain->tName,chain->tStart,chain->tEnd, ps->gChrom,ps->gStart,ps->gEnd); continue; } verbose(2,"chain id %d %4.0f",chain->id, chain->score); chainSubsetOnT(chain, ps->chromStart+7, ps->chromEnd-7, &subChain, &retChainToFree); if (subChain != NULL) chain = subChain; chainSubsetOnQ(chain, ps->gStart, ps->gEnd, &subChain, &retChainToFree2); if (subChain != NULL) chain = subChain; if (chain->qStrand == '-') { qs = chain->qSize - chain->qEnd; qe = chain->qSize - chain->qStart; } verbose(2," %s:%d-%d %s:%d-%d ", chain->qName, qs, qe, chain->tName,chain->tStart,chain->tEnd); if (subChain != NULL) verbose(2,"subChain %s:%d-%d %s:%d-%d\n", subChain->qName, subChain->qStart, subChain->qEnd, subChain->tName,subChain->tStart,subChain->tEnd); qNib = nibInfoFromCache(nibHash, tNibDir, chain->qName); tNib = nibInfoFromCache(nibHash, tNibDir, chain->tName); tSeq = nibInfoLoadStrand(tNib, chain->tStart, chain->tEnd, '+'); qSeq = nibInfoLoadStrand(qNib, chain->qStart, chain->qEnd, chain->qStrand); axtList = chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart, maxGap, BIGNUM); verbose(2,"axt count %d misMatch cnt %d\n",slCount(axtList), slCount(misMatchList)); for (axt = axtList; axt != NULL ; axt = axt->next) { addMisMatch(&misMatchList, axt, chain->qSize); } verbose(2,"%d in mismatch list %s id %d \n",slCount(misMatchList), chain->qName, id); chainFree(&retChainToFree); chainFree(&retChainToFree2); break; } /* create axt of each expressed retroGene to parent gene */ /* get alignment for each mrna overlapping retroGene */ bk = hashFindVal(mrnaHash, ps->chrom); elist = binKeeperFindSorted(bk, ps->chromStart , ps->chromEnd ) ; { char queryName[512]; char axtName[512]; char pslName[512]; safef(queryName, sizeof(queryName), "/tmp/query.%s.fa", ps->chrom); safef(axtName, sizeof(axtName), "/tmp/tmp.%s.axt", ps->chrom); safef(pslName, sizeof(pslName), "/tmp/tmp.%s.psl", ps->chrom); op = fopen(pslName,"w"); for (el = elist ; el != NULL ; el = el->next) { psl = el->val; pslOutput(psl, op, '\t','\n'); qSeq = twoBitReadSeqFrag(twoBitFile, psl->qName, 0, 0); if (qSeq != NULL) slAddHead(&seqList, qSeq); else errAbort("seq %s not found \n", psl->qName); } fclose(op); faWriteAll(queryName, seqList); safef(cmd,sizeof(cmd),"pslPretty -long -axt %s %s %s %s",pslName , nibList, queryName, axtName); ret = system(cmd); if (ret != 0) errAbort("ret is %d %s\n",ret,cmd); verbose(2, "ret is %d %s\n",ret,cmd); af = lineFileOpen(axtName, TRUE); while ((axt = axtRead(af)) != NULL) slAddHead(&mAxt, axt); lineFileClose(&af); } slReverse(&mAxt); /* for each parent/retro pair, count bases matching retro and parent better */ for (el = elist; el != NULL ; el = el->next) { int i, scoreRetro=0, scoreParent=0, scoreNeither=0; struct dyString *parentMatch = newDyString(16*1024); struct dyString *retroMatch = newDyString(16*1024); mPsl = el->val; if (mAxt != NULL) { verbose(2,"mrna %s %s:%d %d,%d axt %s\n", mPsl->qName, mPsl->tName,mPsl->tStart, mPsl->match, mPsl->misMatch, mAxt->qName); assert(sameString(mPsl->qName, mAxt->qName)); for (i = 0 ; i< (mPsl->tEnd-mPsl->tStart) ; i++) { int j = mAxt->tStart - mPsl->tStart; verbose(5, "listLen = %d\n",slCount(&misMatchList)); if ((mf = matchFound(&misMatchList, (mPsl->tStart)+i)) != NULL) { if (toupper(mf->retroBase) == toupper(mAxt->qSym[j+i])) { verbose (3,"match retro[%d] %d %c == %c parent %c %d\n", i,mf->retroLoc, mf->retroBase, mAxt->qSym[j+i], mf->parentBase, mf->parentLoc); dyStringPrintf(retroMatch, "%d,", mf->retroLoc); scoreRetro++; } else if (toupper(mf->parentBase) == toupper(mAxt->qSym[j+i])) { verbose (3,"match parent[%d] %d %c == %c retro %c %d\n", i,mf->parentLoc, mf->parentBase, mAxt->qSym[j+i], mf->retroBase, mf->retroLoc); dyStringPrintf(parentMatch, "%d,", mf->parentLoc); scoreParent++; } else { verbose (3,"match neither[%d] %d %c != %c retro %c %d\n", i,mf->parentLoc, mf->parentBase, mAxt->tSym[j+i], mf->retroBase, mf->retroLoc); scoreNeither++; } } } verbose(2,"final score %s parent %d retro %d neither %d\n", mPsl->qName, scoreParent, scoreRetro, scoreNeither); fprintf(outFile,"%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%s\t%s\n", ps->chrom, ps->chromStart, ps->chromEnd, ps->name, ps->score, mPsl->tName, mPsl->tStart, mPsl->tEnd, mPsl->qName, scoreParent, scoreRetro, scoreNeither, parentMatch->string, retroMatch->string); mAxt = mAxt->next; } dyStringFree(&parentMatch); dyStringFree(&retroMatch); } } }
void writeMergers(struct cdnaAliList *calList, char *cdnaName, char *bacNames[]) /* Write out any mergers indicated by this cdna. This destroys calList. */ { struct cdnaAliList *startBac, *endBac, *cal, *prevCal, *nextCal; int bacCount; int bacIx; { if (sameString(cdnaName, "R08304_AND_R08305")) { uglyf("Got you %s\n", cdnaName); } } slSort(&calList, cmpCal); for (startBac = calList; startBac != NULL; startBac = endBac) { /* Scan until find a cal that isn't pointing into the same BAC. */ bacCount = 1; bacIx = startBac->bacIx; prevCal = startBac; for (cal = startBac->next; cal != NULL; cal = cal->next) { if (cal->bacIx != bacIx) { prevCal->next = NULL; break; } ++bacCount; prevCal = cal; } endBac = cal; if (bacCount > 1) { while (startBac != NULL) { struct cdnaAliList *clumpList = NULL, *leftoverList = NULL; for (cal = startBac; cal != NULL; cal = nextCal) { nextCal = cal->next; if (noMajorOverlap(cal, clumpList)) { slAddHead(&clumpList, cal); } else { slAddHead(&leftoverList, cal); } } slReverse(&clumpList); slReverse(&leftoverList); if (slCount(clumpList) > 1) { char lastStrand = 0; boolean switchedStrand = FALSE; if (!allSameContig(clumpList)) { fprintf(mergerOut, "%s glues %s contigs", cdnaName, bacNames[bacIx]); lastStrand = clumpList->strand; for (cal = clumpList; cal != NULL; cal = cal->next) { if (cal->strand != lastStrand) switchedStrand = TRUE; fprintf(mergerOut, " %d %c %c' (%d-%d) %3.1f%%", cal->seqIx, cal->strand, cal->dir, cal->start, cal->end, 100.0*cal->cookedScore); } fprintf(mergerOut, "\n"); } } freeCalList(&clumpList); startBac = leftoverList; } } else { freeCalList(&startBac); } } }
void axtChain(char *axtIn, char *tNibDir, char *qNibDir, char *chainOut) /* axtChain - Chain together axt alignments.. */ { struct hash *pairHash = newHash(0); /* Hash keyed by qSeq<strand>tSeq */ struct seqPair *spList = NULL, *sp; FILE *f = mustOpen(chainOut, "w"); char *qName = "", *tName = ""; struct dnaSeq *qSeq = NULL, *tSeq = NULL; char qStrand = 0, tStrand = 0; struct chain *chainList = NULL, *chain; FILE *details = NULL; struct dnaSeq *seq = NULL; struct hash *qFaHash = newHash(0); struct hash *tFaHash = newHash(0); FILE *faF; boolean qIsTwoBit = twoBitIsFile(qNibDir); boolean tIsTwoBit = twoBitIsFile(tNibDir); axtScoreSchemeDnaWrite(scoreScheme, f, "axtChain"); if (detailsName != NULL) details = mustOpen(detailsName, "w"); /* Read input file and divide alignments into various parts. */ if (optionExists("psl")) spList = readPslBlocks(axtIn, pairHash, f); else spList = readAxtBlocks(axtIn, pairHash, f); if (optionExists("faQ")) { faF = mustOpen(qNibDir, "r"); verbose(1, "reading query fasta sequence from '%s'\n", qNibDir); while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq)) hashAdd(qFaHash, seq->name, seq); fclose(faF); } if (optionExists("faT")) { faF = mustOpen(tNibDir, "r"); verbose(1, "reading target fasta sequence from '%s'\n", tNibDir); while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq)) hashAdd(tFaHash, seq->name, seq); fclose(faF); } for (sp = spList; sp != NULL; sp = sp->next) { slReverse(&sp->blockList); removeExactOverlaps(&sp->blockList); verbose(1, "%d blocks after duplicate removal\n", slCount(sp->blockList)); if (optionExists("faQ")) { assert (qFaHash != NULL); loadFaSeq(qFaHash, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand, qNibDir); } else { loadIfNewSeq(qNibDir, qIsTwoBit, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand); } if (optionExists("faT")) { assert (tFaHash != NULL); loadFaSeq(tFaHash, sp->tName, '+', &tName, &tSeq, &tStrand, tNibDir); } else { loadIfNewSeq(tNibDir, tIsTwoBit, sp->tName, '+', &tName, &tSeq, &tStrand); } chainPair(sp, qSeq, tSeq, &chainList, details); } slSort(&chainList, chainCmpScore); for (chain = chainList; chain != NULL; chain = chain->next) { assert(chain->qStart == chain->blockList->qStart && chain->tStart == chain->blockList->tStart); chainWrite(chain, f); } carefulClose(&f); }
static struct jsonWrite *rTdbToJw(struct trackDb *tdb, struct hash *fieldHash, struct hash *excludeTypesHash, int depth, int maxDepth) /* Recursively build and return a new jsonWrite object with JSON for tdb and its children, * or NULL if tdb or all children have been filtered out by excludeTypesHash. * If excludeTypesHash is non-NULL, omit any tracks/views/subtracks with type in excludeTypesHash. * If fieldHash is non-NULL, include only the field names indexed in fieldHash. */ { if (maxDepth >= 0 && depth > maxDepth) return NULL; boolean doSubtracks = (tdb->subtracks && fieldOk("subtracks", fieldHash)); // If excludeTypesHash is given and tdb is a leaf track/subtrack, look up the first word // of tdb->type in excludeTypesHash; if found, return NULL. if (excludeTypesHash && !doSubtracks) { char typeCopy[PATH_LEN]; safecpy(typeCopy, sizeof(typeCopy), tdb->type); if (hashLookup(excludeTypesHash, firstWordInLine(typeCopy))) return NULL; } boolean gotSomething = !doSubtracks; struct jsonWrite *jwNew = jsonWriteNew(); jsonWriteObjectStart(jwNew, NULL); writeTdbSimple(jwNew, tdb, fieldHash); if (tdb->parent && fieldOk("parent", fieldHash)) { // We can't link to an object in JSON and better not recurse here or else infinite loop. if (tdbIsSuperTrackChild(tdb)) { // Supertracks have been omitted from fullTrackList, so add the supertrack object's // non-parent/child info here. jsonWriteObjectStart(jwNew, "parent"); writeTdbSimple(jwNew, tdb->parent, fieldHash); jsonWriteObjectEnd(jwNew); } else // Just the name so we don't have infinite loops. jsonWriteString(jwNew, "parent", tdb->parent->track); } if (doSubtracks) { jsonWriteListStart(jwNew, "subtracks"); slSort(&tdb->subtracks, trackDbViewCmp); struct trackDb *subTdb; for (subTdb = tdb->subtracks; subTdb != NULL; subTdb = subTdb->next) { struct jsonWrite *jwSub = rTdbToJw(subTdb, fieldHash, excludeTypesHash, depth+1, maxDepth); if (jwSub) { gotSomething = TRUE; jsonWriteAppend(jwNew, NULL, jwSub); jsonWriteFree(&jwSub); } } jsonWriteListEnd(jwNew); } jsonWriteObjectEnd(jwNew); if (! gotSomething) // All children were excluded; clean up and null out jwNew. jsonWriteFree(&jwNew); return jwNew; }
void bamLoadItemsCore(struct track *tg, boolean isPaired) /* Load BAM data into tg->items item list, unless zoomed out so far * that the data would just end up in dense mode and be super-slow. */ { /* protect against temporary network error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { struct hash *pairHash = isPaired ? hashNew(18) : NULL; int minAliQual = atoi(cartOrTdbString(cart, tg->tdb, BAM_MIN_ALI_QUAL, BAM_MIN_ALI_QUAL_DEFAULT)); char *colorMode = cartOrTdbString(cart, tg->tdb, BAM_COLOR_MODE, BAM_COLOR_MODE_DEFAULT); char *grayMode = cartOrTdbString(cart, tg->tdb, BAM_GRAY_MODE, BAM_GRAY_MODE_DEFAULT); char *userTag = cartOrTdbString(cart, tg->tdb, BAM_COLOR_TAG, BAM_COLOR_TAG_DEFAULT); int aliQualShadeMin = 0, aliQualShadeMax = 99, baseQualShadeMin = 0, baseQualShadeMax = 40; parseIntRangeSetting(tg->tdb, "aliQualRange", &aliQualShadeMin, &aliQualShadeMax); parseIntRangeSetting(tg->tdb, "baseQualRange", &baseQualShadeMin, &baseQualShadeMax); struct bamTrackData btd = {tg, pairHash, minAliQual, colorMode, grayMode, userTag, aliQualShadeMin, aliQualShadeMax, baseQualShadeMin, baseQualShadeMax}; char *fileName = trackDbSetting(tg->tdb, "bigDataUrl"); if (fileName == NULL) { if (tg->customPt) { errAbort("bamLoadItemsCore: can't find bigDataUrl for custom track %s", tg->track); } else { struct sqlConnection *conn = hAllocConnTrack(database, tg->tdb); fileName = bamFileNameFromTable(conn, tg->table, chromName); hFreeConn(&conn); } } char *fileName2 = hReplaceGbdb(fileName); char posForBam[512]; safef(posForBam, sizeof(posForBam), "%s:%d-%d", chromName, winStart, winEnd); char *cacheDir = cfgOption("cramRef"); char *refUrl = trackDbSetting(tg->tdb, "refUrl"); if (!isPaired) bamFetchPlus(fileName2, posForBam, addBam, &btd, NULL, refUrl, cacheDir); else { char *setting = trackDbSettingClosestToHomeOrDefault(tg->tdb, "pairSearchRange", "20000"); int pairSearchRange = atoi(setting); if (pairSearchRange > 0) safef(posForBam, sizeof(posForBam), "%s:%d-%d", chromName, max(0, winStart-pairSearchRange), winEnd+pairSearchRange); bamFetchPlus(fileName2, posForBam, addBamPaired, &btd, NULL, refUrl, cacheDir); struct hashEl *hel; struct hashCookie cookie = hashFirst(btd.pairHash); while ((hel = hashNext(&cookie)) != NULL) { struct linkedFeatures *lf = hel->val; if (lf->start < winEnd && lf->end > winStart) slAddHead(&(tg->items), lfsFromLf(lf)); } } freez(&fileName2); if (tg->visibility != tvDense) { slReverse(&(tg->items)); if (isPaired) slSort(&(tg->items), linkedFeaturesSeriesCmp); else if (sameString(colorMode, BAM_COLOR_MODE_STRAND)) slSort(&(tg->items), linkedFeaturesCmpOri); else if (sameString(colorMode, BAM_COLOR_MODE_GRAY) && sameString(grayMode, BAM_GRAY_MODE_ALI_QUAL)) slSort(&(tg->items), linkedFeaturesCmpScore); else slSort(&(tg->items), linkedFeaturesCmpStart); if (slCount(tg->items) > MAX_ITEMS_FOR_MAPBOX) { // flag drawItems to make a mapBox for the whole track tg->customInt = 1; tg->mapItem = dontMapItem; } } } errCatchEnd(errCatch); if (errCatch->gotError) { tg->networkErrMsg = cloneString(errCatch->message->string); tg->drawItems = bigDrawWarning; tg->totalHeight = bigWarnTotalHeight; } errCatchFree(&errCatch); }
struct mafAli *hgMafFrag( char *database, /* Database, must already have hSetDb to this */ char *track, /* Name of MAF track */ char *chrom, /* Chromosome (in database genome) */ int start, int end, /* start/end in chromosome */ char strand, /* Chromosome strand. */ char *outName, /* Optional name to use in first component */ struct slName *orderList /* Optional order of organisms. */ ) /* mafFrag- Extract maf sequences for a region from database. * This creates a somewhat unusual MAF that extends from start * to end whether or not there are actually alignments. Where * there are no alignments (or alignments missing a species) * a . character fills in. The score is always zero, and * the sources just indicate the species. You can mafFree this * as normal. */ { int chromSize = hChromSize(database, chrom); struct sqlConnection *conn = hAllocConn(database); struct dnaSeq *native = hChromSeq(database, chrom, start, end); struct mafAli *maf, *mafList = mafLoadInRegion(conn, track, chrom, start, end); char masterSrc[128]; struct hash *orgHash = newHash(10); struct oneOrg *orgList = NULL, *org, *nativeOrg = NULL; int curPos = start, symCount = 0; struct slName *name; int order = 0; /* Check that the mafs are really copacetic, the particular * subtype we think is in the database that this (relatively) * simple code can handle. */ safef(masterSrc, sizeof(masterSrc), "%s.%s", database, chrom); mafCheckFirstComponentSrc(mafList, masterSrc); mafCheckFirstComponentStrand(mafList, '+'); slSort(&mafList, mafCmp); /* Prebuild organisms if possible from input orderList. */ for (name = orderList; name != NULL; name = name->next) { AllocVar(org); slAddHead(&orgList, org); hashAddSaveName(orgHash, name->name, org, &org->name); org->dy = dyStringNew(native->size*1.5); org->order = order++; if (nativeOrg == NULL) nativeOrg = org; } if (orderList == NULL) { AllocVar(org); slAddHead(&orgList, org); hashAddSaveName(orgHash, database, org, &org->name); org->dy = dyStringNew(native->size*1.5); if (nativeOrg == NULL) nativeOrg = org; } /* Go through all mafs in window, mostly building up * org->dy strings. */ for (maf = mafList; maf != NULL; maf = maf->next) { struct mafComp *mc, *mcMaster = maf->components; struct mafAli *subMaf = NULL; order = 0; if (curPos < mcMaster->start) { fillInMissing(nativeOrg, orgList, native, start, curPos, mcMaster->start); symCount += mcMaster->start - curPos; } if (curPos < mcMaster->start + mcMaster->size) /* Prevent worst * backtracking */ { if (mafNeedSubset(maf, masterSrc, curPos, end)) { subMaf = mafSubset(maf, masterSrc, curPos, end); if (subMaf == NULL) continue; } else subMaf = maf; for (mc = subMaf->components; mc != NULL; mc = mc->next, ++order) { /* Extract name up to dot into 'orgName' */ char buf[128], *e, *orgName; if ((mc->size == 0) || (mc->srcSize == 0)) /* skip over components without sequence */ continue; mc->leftStatus = mc->rightStatus = 0; /* squash annotation */ e = strchr(mc->src, '.'); if (e == NULL) orgName = mc->src; else { int len = e - mc->src; if (len >= sizeof(buf)) errAbort("organism/database name %s too long", mc->src); memcpy(buf, mc->src, len); buf[len] = 0; orgName = buf; } /* Look up dyString corresponding to org, and create a * new one if necessary. */ org = hashFindVal(orgHash, orgName); if (org == NULL) { if (orderList != NULL) errAbort("%s is not in orderList", orgName); AllocVar(org); slAddHead(&orgList, org); hashAddSaveName(orgHash, orgName, org, &org->name); org->dy = dyStringNew(native->size*1.5); dyStringAppendMultiC(org->dy, '.', symCount); if (nativeOrg == NULL) nativeOrg = org; } if (orderList == NULL && order > org->order) org->order = order; org->hit = TRUE; /* Fill it up with alignment. */ dyStringAppendN(org->dy, mc->text, subMaf->textSize); } for (org = orgList; org != NULL; org = org->next) { if (!org->hit) dyStringAppendMultiC(org->dy, '.', subMaf->textSize); org->hit = FALSE; } symCount += subMaf->textSize; curPos = mcMaster->start + mcMaster->size; if (subMaf != maf) mafAliFree(&subMaf); } } if (curPos < end) { fillInMissing(nativeOrg, orgList, native, start, curPos, end); symCount += end - curPos; } mafAliFreeList(&mafList); slSort(&orgList, oneOrgCmp); if (strand == '-') { for (org = orgList; org != NULL; org = org->next) reverseComplement(org->dy->string, org->dy->stringSize); } /* Construct our maf */ AllocVar(maf); maf->textSize = symCount; for (org = orgList; org != NULL; org = org->next) { struct mafComp *mc; AllocVar(mc); if (org == orgList) { if (outName != NULL) { mc->src = cloneString(outName); mc->srcSize = native->size; mc->strand = '+'; mc->start = 0; mc->size = native->size; } else { mc->src = cloneString(masterSrc); mc->srcSize = chromSize; mc->strand = strand; if (strand == '-') reverseIntRange(&start, &end, chromSize); mc->start = start; mc->size = end-start; } } else { int size = countAlpha(org->dy->string); mc->src = cloneString(org->name); mc->srcSize = size; mc->strand = '+'; mc->start = 0; mc->size = size; } mc->text = cloneString(org->dy->string); dyStringFree(&org->dy); slAddHead(&maf->components, mc); } slReverse(&maf->components); slFreeList(&orgList); freeHash(&orgHash); hFreeConn(&conn); return maf; }
void hgFindSpec(char *org, char *database, char *hgFindSpecName, char *sqlFile, char *hgRoot, boolean strict) /* hgFindSpec - Create hgFindSpec table from text files. */ { struct hash *uniqHash = newHash(8); struct hash *htmlHash = newHash(8); struct hgFindSpec *hfsList = NULL, *hfs; char rootDir[512], orgDir[512], asmDir[512]; char tab[512]; snprintf(tab, sizeof(tab), "%s.tab", hgFindSpecName); /* Create track list from hgRoot and hgRoot/org and hgRoot/org/assembly * ra format database. */ sprintf(rootDir, "%s", hgRoot); sprintf(orgDir, "%s/%s", hgRoot, org); sprintf(asmDir, "%s/%s/%s", hgRoot, org, database); layerOn(strict, database, asmDir, uniqHash, htmlHash, FALSE, &hfsList); layerOn(strict, database, orgDir, uniqHash, htmlHash, FALSE, &hfsList); layerOn(strict, database, rootDir, uniqHash, htmlHash, TRUE, &hfsList); slSort(&hfsList, hgFindSpecCmp); if (verboseLevel() > 0) printf("Loaded %d search specs total\n", slCount(hfsList)); /* Write to tab-separated file. */ { FILE *f = mustOpen(tab, "w"); for (hfs = hfsList; hfs != NULL; hfs = hfs->next) hgFindSpecTabOut(hfs, f); carefulClose(&f); } /* Update database */ { char *create, *end; char query[256]; struct sqlConnection *conn = sqlConnect(database); /* Load in table definition. */ readInGulp(sqlFile, &create, NULL); create = trimSpaces(create); create = subTrackName(create, hgFindSpecName); end = create + strlen(create)-1; if (*end == ';') *end = 0; sqlRemakeTable(conn, hgFindSpecName, create); /* Load in regular fields. */ sqlSafef(query, sizeof query, "load data local infile '%s' into table %s", tab, hgFindSpecName); sqlUpdate(conn, query); /* Load in settings fields. */ for (hfs = hfsList; hfs != NULL; hfs = hfs->next) { if (hfs->settingsHash != NULL) { char *settings = settingsFromHash(hfs->settingsHash); updateBigTextField(conn, hgFindSpecName, "searchName", hfs->searchName, "searchSettings", settings); freeMem(settings); } } sqlDisconnect(&conn); if (verboseLevel() > 0) printf("Loaded database %s\n", database); } }
/* set the ordering for block were tree must be constructed */ static void orderTreeless(struct malnBlk *blk, struct Genome *treelessRootGenome) { sortTreelessRootGenome = treelessRootGenome; slSort(&(blk->comps), orderTreelessCmp); sortTreelessRootGenome = NULL; }
struct g2cFile *loadG2cFile(char *fileName) { char lineBuf[1024*8]; int lineLen; char *words[256*8]; int wordCount; FILE *f; int lineCount = 0; struct g2cFile *gf = alloc(sizeof(*gf)); int hitCount = 0; int cdnaCount = 0; int geneCount = 0; gf->name = fileName; f = mustOpen(fileName, "r"); gf->cdnaHash = newHash(14); while (fgets(lineBuf, sizeof(lineBuf), f) != NULL) { ++lineCount; lineLen = strlen(lineBuf); if (lineLen >= sizeof(lineBuf) - 1) { errAbort("%s\nLine %d of %s too long, can only handle %d chars\n", lineBuf, lineCount, fileName, sizeof(lineBuf)-1); } wordCount = chopString(lineBuf, whiteSpaceChopper, words, ArraySize(words)); if (wordCount > 0) { struct gene *gene = alloc(sizeof(*gene)); char *geneName = words[0]; int i; /* Create new gene struct and put it on list. */ gene->name = cloneString(geneName); slAddHead(&gf->geneList, gene); ++geneCount; /* Put all cdna hits on gene. */ for (i=1; i<wordCount; ++i) { struct cdnaHit *hit; struct cdnaVal *cdnaVal; struct hashEl *hel; char *cdnaName = words[i]; /* Get cdna, or if it's the first time we've seen it * make up a data structure for it and hang it on * hash list and cdna list. */ if ((hel = hashLookup(gf->cdnaHash, cdnaName)) == NULL) { cdnaVal = alloc(sizeof(*cdnaVal)); hel = hashAdd(gf->cdnaHash, cdnaName, cdnaVal); cdnaVal->name = hel->name; slAddHead(&gf->cdnaList, cdnaVal); ++cdnaCount; } else { cdnaVal = hel->val; } ++cdnaVal->useCount; /* Make up new cdna hit and hang it on the gene. */ hit = alloc(sizeof(*hit)); hit->hel = hel; hit->name = hel->name; slAddHead(&gene->hitList, hit); ++hitCount; } slReverse(&gene->hitList); } } slReverse(&gf->geneList); slSort(&gf->geneList, cmpName); slSort(&gf->cdnaList, cmpName); fclose(f); reportHashStats(gf->cdnaHash); printf("Loaded %s. %d genes %d cdnas %d hits\n", fileName, geneCount, cdnaCount, hitCount); return gf; }
void metaSortTags(struct meta *meta) /* Do canonical sort so that the first tag stays first but the * rest are alphabetical. */ { slSort(&meta->tagList->next, metaTagValCmp); }
void update(struct g2cFile *old, struct g2cFile *up) { struct gene *oldGene, *upGene; struct cdnaHit *oldHit, *upHit; struct hash *geneHash; struct hashEl *hel; int sameHitCount = 0; int newHitCount = 0; int newGeneCount = 0; int updatedGeneCount = 0; int altCount = 0; struct geneFamily smallFamily; struct geneFamily *family; printf("Updating %s with %s\n", old->name, up->name); /* Hash the existing gene names for faster lookup. */ geneHash = newHash(12); for (oldGene = old->geneList; oldGene != NULL; oldGene = oldGene->next) hashAdd(geneHash, oldGene->name, oldGene); for (upGene = up->geneList; upGene != NULL; upGene = upGene->next) { boolean changedGene = FALSE; if (isAltSplicedName(upGene->name)) { family = getAltFamily(geneHash, upGene->name); ++altCount; } else { hel = hashLookup(geneHash, upGene->name); if (hel != NULL) { smallFamily.gene = hel->val; smallFamily.next = NULL; family = &smallFamily; } else family = NULL; } /* Set corresponding gene in old file to NULL until we * need to find it. */ oldGene = NULL; for (upHit = upGene->hitList; upHit != NULL; upHit = upHit->next) { if ((oldHit = findHitInFamily(family, upHit->name)) != NULL) ++sameHitCount; else { if (oldGene == NULL) { /* We haven't found corresponding gene yet. First * look for it in the family. */ struct geneFamily *member; for (member = family; member != NULL; member = member->next) { if (strcmp(member->gene->name, upGene->name) == 0) { oldGene = member->gene; break; } } /* The corresponding gene doesn't exist yet. We * have to make it up and hang it on the genelist * for the file, the hash list, and the family list. */ if (oldGene == NULL) { oldGene = alloc(sizeof(*oldGene)); oldGene->name = upGene->name; slAddHead(&old->geneList, oldGene); hashAdd(geneHash, oldGene->name, oldGene); member = alloc(sizeof(*member)); member->gene = oldGene; slAddHead(&family, member); ++newGeneCount; } } oldHit = alloc(sizeof(*oldHit)); oldHit->name = upHit->name; oldHit->hel = hel; slAddHead(&oldGene->hitList, oldHit); ++newHitCount; changedGene = TRUE; } } if (changedGene) ++updatedGeneCount; } slSort(&old->geneList, cmpName); printf("Updated %d genes (including %d alt spliced ones) with %d cdna hits (%d hits unchanged) %d new genes\n", updatedGeneCount, altCount, newHitCount, sameHitCount, newGeneCount); }
void sortFineAlis(struct fineAli **pAli) { slSort(pAli, cmpFineAli); }
void doMiddle() { struct hash *cvHash = raReadAll((char *)cvFile(), CV_TERM); struct hashCookie hc = hashFirst(cvHash); struct hashEl *hEl; struct slList *termList = NULL; struct hash *ra; int totalPrinted = 0; boolean excludeDeprecated = (cgiOptionalString("deprecated") == NULL); // Prepare an array of selected terms (if any) int requestCount = 0; char **requested = NULL; char *requestVal = termOpt; char *queryBy = CV_TERM; if (tagOpt) { requestVal = tagOpt; queryBy = CV_TAG; } else if (targetOpt) { requestVal = targetOpt; queryBy = CV_TERM; // request target is special: lookup term, convert to target, display target } else if (labelOpt) { requestVal = labelOpt; queryBy = CV_LABEL; } if (requestVal) { (void)stripChar(requestVal,'\"'); requestCount = chopCommas(requestVal,NULL); requested = needMem(requestCount * sizeof(char *)); chopByChar(requestVal,',',requested,requestCount); } char *org = NULL; // if the org is specified in the type (eg. cell line) // then use that for the org, otherwise use the command line option, // otherwise use human. char *type = findType(cvHash,requested,requestCount,&queryBy, &org, FALSE); if (org == NULL) org = organismOptLower; if (org == NULL) org = ORG_HUMAN; // Special logic for requesting antibody by target if (targetOpt && requestCount > 0 && sameWord(queryBy,CV_TERM) && sameWord(type,CV_TERM_ANTIBODY)) { // Several antibodies may have same target. // requested target={antibody} and found antibody // Must now convert each of the requested terms to its target before displaying all targets char **targets = convertAntibodiesToTargets(cvHash,requested,requestCount); if (targets != NULL) { freeMem(requested); requested = targets; queryBy = CV_TARGET; } } //warn("Query by: %s = '%s' type:%s",queryBy,requestVal?requestVal:"all",type); // Get just the terms that match type and requested, then sort them if (differentWord(type,CV_TOT) || typeOpt != NULL ) // If type resolves to typeOfTerm and { // typeOfTerm was not requested, while ((hEl = hashNext(&hc)) != NULL) // then just show definition { ra = (struct hash *)hEl->val; char *thisType = (char *)cvTermNormalized(hashMustFindVal(ra,CV_TYPE)); if (differentWord(thisType,type) && (requested == NULL || differentWord(thisType,CV_TERM_CONTROL))) continue; // Skip all rows that do not match queryBy param if specified if (requested) { char *val = hashFindVal(ra, queryBy); if (val == NULL) { // Special case for input that has no target if (sameString(queryBy, CV_TARGET)) val = hashMustFindVal(ra, CV_TERM); else continue; } if (-1 == stringArrayIx(val,requested,requestCount)) continue; } else if (excludeDeprecated) { if (hashFindVal(ra, "deprecated") != NULL) continue; } slAddTail(&termList, ra); } } slSort(&termList, termCmp); boolean described = doTypeDefinition(type,FALSE,(slCount(termList) == 0)); boolean sortable = (slCount(termList) > 5); if (sortable) { webIncludeResourceFile("HGStyle.css"); jsIncludeFile("jquery.js",NULL); jsIncludeFile("utils.js",NULL); printf("<TABLE class='sortable' border=1 CELLSPACING=0 style='border: 2px outset #006600; " "background-color:%s;'>\n",COLOR_BG_DEFAULT); } else printf("<TABLE BORDER=1 BGCOLOR=%s CELLSPACING=0 CELLPADDING=2>\n",COLOR_BG_DEFAULT); if (slCount(termList) > 0) { doTypeHeader(type, org,sortable); // Print out the terms while ((ra = slPopHead(&termList)) != NULL) { if (doTypeRow( ra, org )) totalPrinted++; } } puts("</TBODY></TABLE><BR>"); if (sortable) jsInline("{$(document).ready(function() " "{sortTable.initialize($('table.sortable')[0],true,true);});}\n"); if (totalPrinted == 0) { if (!described) warn("Error: Unrecognised type (%s)\n", type); } else if (totalPrinted > 1) printf("Total = %d\n", totalPrinted); }
void hgTrackDb(char *org, char *database, char *trackDbName, char *sqlFile, char *hgRoot, boolean strict) /* hgTrackDb - Create trackDb table from text files. */ { struct trackDb *td; char tab[PATH_LEN]; safef(tab, sizeof(tab), "%s.tab", trackDbName); struct trackDb *tdbList = buildTrackDb(org, database, hgRoot, strict); tdbList = flatten(tdbList); slSort(&tdbList, trackDbCmp); verbose(1, "Loaded %d track descriptions total\n", slCount(tdbList)); /* Write to tab-separated file; hold off on html, since it must be encoded */ { verbose(2, "Starting write of tabs to %s\n", tab); FILE *f = mustOpen(tab, "w"); for (td = tdbList; td != NULL; td = td->next) { hVarSubstTrackDb(td, database); char *hold = td->html; td->html = ""; subChar(td->type, '\t', ' '); /* Tabs confuse things. */ subChar(td->shortLabel, '\t', ' '); /* Tabs confuse things. */ subChar(td->longLabel, '\t', ' '); /* Tabs confuse things. */ trackDbTabOut(td, f); td->html = hold; } carefulClose(&f); verbose(2, "Wrote tab representation to %s\n", tab); } /* Update database */ { char *create, *end; char query[256]; struct sqlConnection *conn = sqlConnect(database); /* Load in table definition. */ readInGulp(sqlFile, &create, NULL); create = trimSpaces(create); create = substituteTrackName(create, trackDbName); end = create + strlen(create)-1; if (*end == ';') *end = 0; sqlRemakeTable(conn, trackDbName, create); /* Load in regular fields. */ sqlSafef(query, sizeof(query), "load data local infile '%s' into table %s", tab, trackDbName); verbose(2, "sending mysql \"%s\"\n", query); sqlUpdate(conn, query); verbose(2, "done tab file load"); /* Load in html and settings fields. */ for (td = tdbList; td != NULL; td = td->next) { if (isEmpty(td->html)) { if (strict && !trackDbLocalSetting(td, "parent") && !trackDbLocalSetting(td, "superTrack") && !sameString(td->track,"cytoBandIdeo")) { fprintf(stderr, "Warning: html missing for %s %s %s '%s'\n",org, database, td->track, td->shortLabel); } } else { updateBigTextField(conn, trackDbName, "tableName", td->track, "html", td->html); } if (td->settingsHash != NULL) { char *settings = settingsFromHash(td->settingsHash); updateBigTextField(conn, trackDbName, "tableName", td->track, "settings", settings); if (showSettings) { verbose(1, "%s: type='%s';", td->track, td->type); if (isNotEmpty(settings)) { char *oneLine = replaceChars(settings, "\n", "; "); eraseTrailingSpaces(oneLine); verbose(1, " %s", oneLine); freeMem(oneLine); } verbose(1, "\n"); } freeMem(settings); } } sqlDisconnect(&conn); verbose(1, "Loaded database %s\n", database); } }
int main(int argc, char *argv[]) { char *outName; char xaFileName[512]; char region[64]; FILE *xaFile, *out; struct xaAli *xaList = NULL, *xa; char *sortBy; char *subtitle; int (*cmp)(const void *va, const void *vb); if (argc != 3) { usage(); } sortBy = argv[1]; outName = argv[2]; if (sameWord(sortBy, "score")) { cmp = cmpXaScore; subtitle = "(sorted by alignment score)"; } else if (sameWord(sortBy, "briggsae")) { cmp = cmpXaQuery; subtitle = "(sorted by <I>C. briggsae</I> region)"; } else if (sameWord(sortBy, "elegans")) { cmp = cmpXaTarget; subtitle = "(sorted by <I>C. elegans</I> region)"; } else usage(); /* Read in alignment file. */ sprintf(xaFileName, "%s%s/all%s", wormXenoDir(), "cbriggsae", xaAlignSuffix()); printf("Scanning %s\n", xaFileName); xaFile = xaOpenVerify(xaFileName); while ((xa = xaReadNext(xaFile, FALSE)) != NULL) { xa->milliScore = round(0.001 * xa->milliScore * (xa->tEnd - xa->tStart)); freeMem(xa->qSym); freeMem(xa->tSym); freeMem(xa->hSym); slAddHead(&xaList, xa); } /* Sort by score. */ printf("Sorting..."); slSort(&xaList, cmp); printf(" best score %d\n", xaList->milliScore); /* Write out .html */ printf("Writing %s\n", outName); out = mustOpen(outName, "w"); htmStart(out, "C. briggsae/C. elegans Homologies"); fprintf(out, "<H2>Regions with Sequenced <I>C. briggsae</I> Homologs</H2>\n"); fprintf(out, "<H3>%s</H3>\n", subtitle); fprintf(out, "<TT><PRE><B>"); fprintf(out, "Score <I>C. elegans Region</I> <I>C. briggsae</I> Region </B>\n"); fprintf(out, "--------------------------------------------------------\n"); for (xa = xaList; xa != NULL; xa = xa->next) { fprintf(out, "%6d ", xa->milliScore); sprintf(region, "%s:%d-%d", xa->target, xa->tStart, xa->tEnd); fprintf(out, "<A HREF=\"../cgi-bin/tracks.exe?where=%s\">%21s</A> %s:%d-%d %c", region, region, xa->query, xa->qStart, xa->qEnd, xa->qStrand); fprintf(out, "\n"); } htmEnd(out); return 0; }