void wigAsciiAverageOverBed(char *inWig, char *inBed, char *out) /* wigAsciiAverageOverBed - Read wiggle file and bed file and emit file with average wig value * over each bed record.. */ { struct rbTree *wigTree = wigIntoRangeTree(inWig); verbose(1, "Read %d sections from %s\n", wigTree->n, inWig); struct bed *bed, *bedList = bedLoadAll(inBed); verbose(1, "Read %d items from %s\n", slCount(bedList), inBed); FILE *f = mustOpen(out, "w"); for (bed = bedList; bed != NULL; bed = bed->next) { fprintf(f, "%s\t%d\t%d\t%s\t%f\n", bed->chrom, bed->chromStart, bed->chromEnd, bed->name, averageWigForBed(wigTree, bed) ); } carefulClose(&f); }
void doBeds(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, char *netTable, char *bedFileName, char *bedTableName, char *outBedName, char *selectedFileName, int *foundCount, int *notFoundCount) /* Map over beds. */ { FILE *bedOut = NULL; FILE *selectedOut = NULL; struct bed *bed=NULL, *bedList = NULL, *orthoBed=NULL; /* Load beds. */ warn("Loading beds."); if(bedFileName) bedList=bedLoadAll(bedFileName); else bedList=loadBedFromTable(conn, bedTableName, chrom, 0, BIGNUM); /* Convert beds. */ warn("Converting beds."); assert(outBedName); bedOut = mustOpen(outBedName, "w"); if (selectedFileName != NULL) selectedOut = mustOpen(selectedFileName, "w"); for(bed = bedList; bed != NULL; bed = bed->next) { if(differentString(bed->chrom, chrom)) continue; occassionalDot(); orthoBed = orthoBedFromBed(conn, db, orthoDb, netTable, bed); if(orthoBed != NULL && orthoBed->blockCount > 0) { (*foundCount)++; bedTabOutN(orthoBed, 12, bedOut); if (selectedOut != NULL) bedTabOutN(bed, 12, selectedOut); } else (*notFoundCount)++; bedFree(&orthoBed); } bedFreeList(&bedList); carefulClose(&selectedOut); carefulClose(&bedOut); }
void borfMatcher(char *bedIn, char *borfIn, char *bedOutFile, char *genePredOutFile) /* Top level function to open files and call other functions. */ { struct borf *borf = NULL, *borfList = NULL; struct bed *bed = NULL, *bedList = NULL; struct genePred *gp = NULL; float threshold = optionFloat("minScore", 50); FILE *bedOut = mustOpen(bedOutFile, "w"); FILE *genePredOut = mustOpen(genePredOutFile, "w"); boolean keepSmall = optionExists("keepSmall"); boolean keepNmd = optionExists("keepNmd"); borfList = borfLoadAll(borfIn); bedList = bedLoadAll(bedIn); dotForUserInit(slCount(bedList)/10); for(bed = bedList, borf = borfList; bed != NULL && borf != NULL; bed = bed->next, borf = borf->next) { dotForUser(); if(!stringIn(bed->name, borf->name)) errAbort("Trying to match up %s bed with %s borf - bad idea!", bed->name, borf->name); /* Have to adjust cds end. Borf puts stop codon outside of cds, we put it inside. */ borf->cdsEnd = min(borf->cdsEnd+3, borf->size); if((borf->score > threshold || (keepSmall && borf->cdsSize > 0)) && sameString(borf->strand, "+")) { setThickStartStop(bed, borf); if(keepNmd || !nmdTarget(bed)) { gp = bedToGenePred(bed); bedTabOutN(bed, 12, bedOut); genePredTabOut(gp, genePredOut); genePredFree(&gp); } } } warn("Done."); carefulClose(&bedOut); carefulClose(&genePredOut); }
static void processSeqsFromBed(struct twoBitFile *tbf, char *bedFileName, FILE *outFile) /* Get sequences defined by beds. Exclude introns. */ { struct bed *bed, *bedList = bedLoadAll(bedFileName); for (bed = bedList; bed != NULL; bed = bed->next) { struct dnaSeq *seq = twoBitAndBedToSeq(tbf, bed); char* seqName = NULL; if (clBedPos) { char buf[1024]; safef(buf, 1024, "%s:%d-%d", bed->chrom, bed->chromStart, bed->chromEnd); seqName = buf; } else seqName = seq->name; if (noMask) toUpperN(seq->dna, seq->size); faWriteNext(outFile, seqName, seq->dna, seq->size); dnaSeqFree(&seq); } }
void pickCassettePcrPrimers(char *db, char *bedFileName, char *primerFaName, char *primerBedName) /* pickCassettePcrPrimers - Takes a bedFile with three exons and for each bed calls primer3 to pick primers that will detect the inclusion or exclusion of the exon.. */ { struct bed *bed=NULL, *bedList = NULL; FILE *primerFa = NULL; FILE *primerBed = NULL; struct cassetteSeq *cseq = NULL; int targetExon = optionInt("targetExon", 1); hSetDb(db); bed = bedList = bedLoadAll(bedFileName); primerFa = mustOpen(primerFaName, "w"); primerBed = mustOpen(primerBedName, "w"); for(bed=bedList; bed != NULL; bed = bed->next) { cseq = cassetteSeqFromBed(bed, targetExon); callPrimer3(cseq, primerFa, primerBed); cassetteSeqFree(&cseq); } bedFreeList(&bedList); carefulClose(&primerFa); carefulClose(&primerBed); }
void calculateBinomialP(char* regdomFn, char* antigapFn, int totalRegions, int hitRegions) /* Calculate binomial p-value of enrichment based on regulatory domains and regions hit */ { struct regdom* regdoms = readInitializedRegdomFile(regdomFn); // This will hold the union of all regulatory domains for quick search struct genomeRangeTree *ranges = getRangeTreeOfRegdoms(regdoms); // NOTE: Each of these regions must be non-overlapping. struct bed* antigaps = bedLoadAll(antigapFn); long totalNonGapBases = getTotalNonGapBases(antigaps); long annotatedNonGapBases = getAnnotatedNonGapBases(ranges, antigaps); double annotationWeight = (double)annotatedNonGapBases/(double)totalNonGapBases; double binomP = getBinomPval(totalRegions, hitRegions, annotationWeight); printf("%e\n", binomP); regdomFreeList(®doms); bedFreeList(&antigaps); genomeRangeTreeFree(&ranges); }
struct genePred *gpFromBedFile(char *file) /* Load entries from a bed file, convert them to genePreds and return them. */ { struct bed *bedList = NULL, *bed = NULL; struct genePred *gpList = NULL, *gp = NULL; bedList = bedLoadAll(file); for(bed = bedList; bed != NULL; bed = bed->next) { gp = bedToGenePred(bed); /* pslxFileOpen gaks if strand is not + or -. bedToGenePred returns * the bed strand, which might be empty (for #fields < 6) or ".". * If so, fake out the strand to + in order to get readable PSL. */ if (! (sameString(gp->strand, "+") || sameString(gp->strand, "-"))) { gp->strand[0] = '+'; gp->strand[1] = '\0'; } slAddHead(&gpList, gp); } slReverse(&gpList); bedFreeList(&bedList); return gpList; }
void consForBed() /* Open and read the bed file. Load consFile into an double array for easy access and process. */ { char *bedFileName = NULL; char *chrom = NULL; struct bed *bedList = NULL, *bed = NULL; char *consFileName = NULL; int *consProb = NULL; char *consBedName = NULL; FILE *consBedOut = NULL; char *summaryBedName = NULL; FILE *summaryBedOut = NULL; /* Get the output file names. */ consBedName = optionVal("bedConsOut", NULL); if(consBedName == NULL) errAbort("Must specify an output file for bed conservation."); summaryBedName = optionVal("summary", NULL); /* What chromosome are we on? */ chrom = optionVal("chrom", NULL); if(chrom == NULL) errAbort("Must specify a chromosome."); /* read in the beds. */ warn("Reading in beds."); bedFileName = optionVal("bedFile", NULL); if(bedFileName != NULL) bedList = bedLoadAll(bedFileName); else errAbort("Must specify a bedFile.\n"); /* Read in the conservation scores. */ consFileName = optionVal("consFile", NULL); if(consFileName != NULL) consProb = readInConservationVals(consFileName); else errAbort("Must specify a conservation file."); /* Open output files */ consBedOut = mustOpen(consBedName,"w"); if(summaryBedName != NULL) summaryBedOut = mustOpen(summaryBedName, "w"); /* Process each individual bed. */ warn("Writing out conservation for beds."); for(bed = bedList; bed != NULL; bed = bed->next) { if(differentString(chrom, bed->chrom)) continue; outputBedConservation(bed, consProb, consBedOut, summaryBedOut); } warn("Cleaning up"); carefulClose(&consBedOut); carefulClose(&summaryBedOut); freez(&consProb); warn("Done."); }
//============================== MAIN ========================================= int main(int argc, char *argv[]) { Flower *flower; /* * Arguments/options */ char * st_logLevelString = NULL; char * cactusDiskDatabaseString = NULL; char * flowerName = "0"; char * outputFile = NULL; char * species = NULL; char * geneFile = NULL; /////////////////////////////////////////////////////////////////////////// // (0) Parse the inputs handed by genomeCactus.py / setup stuff. /////////////////////////////////////////////////////////////////////////// while(1) { static struct option long_options[] = { { "genePslFile", required_argument, 0, 'g' }, { "species", required_argument, 0, 's' }, { "st_logLevel", required_argument, 0, 'a' }, { "cactusDisk", required_argument, 0, 'c' }, { "outputFile", required_argument, 0, 'o' }, { "help", no_argument, 0, 'h' }, { 0, 0, 0, 0 } }; int option_index = 0; int key = getopt_long(argc, argv, "s:g:o:a:c:h", long_options, &option_index); if(key == -1) { break; } switch(key) { case 'a': st_logLevelString = stString_copy(optarg); break; case 'c': cactusDiskDatabaseString = stString_copy(optarg); break; case 'o': outputFile = stString_copy(optarg); break; case 's': species = stString_copy(optarg); break; case 'g': geneFile = stString_copy(optarg); break; case 'h': usage(); return 0; default: usage(); return 1; } } /////////////////////////////////////////////////////////////////////////// // (0) Check the inputs. /////////////////////////////////////////////////////////////////////////// assert(cactusDiskDatabaseString != NULL); assert(outputFile != NULL); assert(species != NULL); assert(geneFile != NULL); ////////////////////////////////////////////// //Set up st_logging ////////////////////////////////////////////// st_setLogLevelFromString(st_logLevelString); ////////////////////////////////////////////// //Log (some of) the inputs ////////////////////////////////////////////// st_logInfo("Flower disk name : %s\n", cactusDiskDatabaseString); st_logInfo("Output file : %s\n", outputFile); st_logInfo("Species: %s\n", species); st_logInfo("GenePslFile: %s\n", geneFile); ////////////////////////////////////////////// //Load the database ////////////////////////////////////////////// stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString); CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0); st_logInfo("Set up the flower disk\n"); /////////////////////////////////////////////////////////////////////////// // Parse the basic reconstruction problem /////////////////////////////////////////////////////////////////////////// flower = cactusDisk_getFlower(cactusDisk, cactusMisc_stringToName(flowerName)); st_logInfo("Parsed the top level flower of the cactus tree to check\n"); /////////////////////////////////////////////////////////////////////////// // Recursive check the flowers. /////////////////////////////////////////////////////////////////////////// int64_t startTime = time(NULL); FILE *fileHandle = fopen(outputFile, "w"); struct bed *gene = bedLoadAll(geneFile); mapGenes(flower, fileHandle, gene, species); fclose(fileHandle); st_logInfo("Map genes in %" PRIi64 " seconds/\n", time(NULL) - startTime); /////////////////////////////////////////////////////////////////////////// // Clean up. /////////////////////////////////////////////////////////////////////////// cactusDisk_destruct(cactusDisk); return 0; }
void hgExperiment(char *database, char *table, char *expFile, char *posFile, char *dataFile) /* Main function */ { struct lineFile *lf; int *data = NULL; int *scores; FILE *f = NULL; char expTable[32]; char *words[3]; int wordCt; struct bed *bedList, *bed; int expCount; struct hash *expHash, *dataHash; struct hashEl *hel; /* Open experiment file and use it to create experiment table. Use optional fields if present, otherwise defaults */ safef(expTable, ArraySize(expTable), "%sExps", table); expHash = makeExpsTable(database, expTable, expFile, &expCount); /* Read in positions file */ bedList = bedLoadAll(posFile); slSort(&bedList, bedCmp); /* Read data file into a hash of arrays of data values, keyed by name */ dataHash = newHash(0); lf = lineFileOpen(dataFile, TRUE); while ((wordCt = lineFileChopNext(lf, words, ArraySize(words)))) { /* format: <region-name> <experiment-name> <data-value> */ char *name, *exp; int expId; int value; if (wordCt != 3) errAbort("Expecting 3 words in data file, got %d line %d of %s", wordCt, lf->lineIx, lf->fileName); name = words[0]; hel = hashLookup(dataHash, name); if (!hel) { AllocArray(data, expCount); hel = hashAdd(dataHash, name, data); } data = (int *)hel->val; exp = words[1]; expId = hashIntVal(expHash, exp); if (expId < 0 || expId > expCount-1) errAbort("Invalid experiment ID %d for %s, line %d of %s", expId, exp, lf->lineIx, lf->fileName); //value = atoi(words[2]); value = round(atof(words[2])); if (data[expId] != 0) errAbort("Extra experiment data value %d for %s %s, line %d of %s", value, name, exp, lf->lineIx, lf->fileName); data[expId] = value; } lineFileClose(&lf); /* Fill in BED15 fields - add experiment values, and setup block (only 1)*/ for (bed = bedList; bed != NULL; bed = bed->next) { int i; bed->thickStart = bed->chromStart; bed->thickEnd = bed->chromEnd; bed->blockCount = 1; AllocArray(bed->blockSizes, 1); bed->blockSizes[0] = bed->chromEnd - bed->chromStart; AllocArray(bed->chromStarts, 1); bed->chromStarts[0] = 0; bed->expCount = expCount; AllocArray(bed->expIds, expCount); for (i = 0; i < expCount; i++) bed->expIds[i] = i; AllocArray(bed->expScores, expCount); scores = hashMustFindVal(dataHash, bed->name); for (i = 0; i < expCount; i++) bed->expScores[i] = scores[i]; /* set score for bed to the average of the scores in all experiments */ calculateAverage(bed); } /* from affyPslAndAtlsoToBed ? convertIntensitiesToRatios(bedList); */ /* Write BED data file */ f = hgCreateTabFile(tabDir, table); for (bed = bedList; bed != NULL; bed = bed->next) bedTabOutN(bed, 15, f); /* Cleanup */ carefulClose(&f); freeHash(&expHash); freeHash(&dataHash); bedFreeList(&bedList); }
static void randomPlacement(char *bounding, char *placed) { struct bed *boundingElements = bedLoadAll(bounding); struct bed *placeItems = bedLoadAll(placed); struct bed *nearestNeighbors = NULL; int boundingCount = slCount(boundingElements); int placedCount = slCount(placeItems); int neighborCount = 0; struct chrGapList *boundingGaps = NULL; struct chrGapList *duplicateGapList = NULL; struct chrGapList *neighborGaps = NULL; struct statistic *statsList = NULL; struct statistic *statEl = NULL; if (neighbor) { nearestNeighbors = bedLoadAll(neighbor); slSort(&nearestNeighbors, bedCmp); /* order by chrom,chromStart */ neighborCount = slCount(nearestNeighbors); verbose(2, "neighbor element count: %d\n", neighborCount); neighborGaps = createGaps(nearestNeighbors); } slSort(&boundingElements, bedCmp); /* order by chrom,chromStart */ slSort(&placeItems, bedCmp); /* order by chrom,chromStart */ verbose(2, "bounding element count: %d\n", boundingCount); verbose(2, "placed item count: %d\n", placedCount); boundingGaps = createGaps(boundingElements); if (TRUE) /* display initial placement stats only */ { char *neighborName = NULL; if (neighbor) { neighborName = cloneString(neighbor); duplicateGapList = cloneGapList(neighborGaps); } else { neighborName = cloneString(bounding); duplicateGapList = cloneGapList(boundingGaps); } verbose(2,"stats before initial placement: =================\n"); statEl = gapStats(duplicateGapList, (char *)NULL, (char *)NULL, (char *)NULL); printf("statistics on gaps before any placements:\n\t(%s)\n", neighborName); statsPrint(statEl); slAddHead(&statsList,statEl); initialPlacement(duplicateGapList,placeItems); verbose(2,"stats after initial placement: =================\n"); statEl = gapStats(duplicateGapList, zeroBedOutFile, shoulderBedOutFile, distOut); printf("statistics after initial placement of placed items:\n\t(%s)\n", placed); statsPrint(statEl); slAddHead(&statsList,statEl); freeChrList(&duplicateGapList, FALSE); slReverse(&statsList); freeMem(neighborName); } if (trials > 0) { int trial; srand48((long int)seed); /* for default seed=0, same set of randoms */ slSort(&placeItems, bedCmpSize); /* order by size of elements */ slReverse(&placeItems); /* largest ones first */ measurePlaced(placeItems); /* show placed item characteristics */ for (trial = 0; trial < trials; ++trial) { struct bed *randomPlacedBedList; duplicateGapList = cloneGapList(boundingGaps); randomPlacedBedList = randomTrial(duplicateGapList,placeItems); if (neighbor) { struct chrGapList *duplicateNeighborList; slSort(&randomPlacedBedList,bedCmp);/*order by chrom,chromStart*/ duplicateNeighborList = cloneGapList(neighborGaps); initialPlacement(duplicateNeighborList,randomPlacedBedList); statEl = gapStats(duplicateNeighborList, (char *)NULL, (char *)NULL, (char *)NULL); freeChrList(&duplicateNeighborList, FALSE); } else statEl = gapStats(duplicateGapList, (char *)NULL, (char *)NULL, (char *)NULL); slAddHead(&statsList,statEl); /* this gap list has temporary bed elements that were * created by the randomTrial(), they need to be freed as * the list is released, hence the TRUE signal. * It isn't a true freeBedList operation because the chrom * names are left intact in the original copy of the bed * list. (The names were being shared.) */ if ((trial == (trials - 1)) && (bedOutFile != NULL)) { bedListOutput(duplicateGapList, bedOutFile); } freeChrList(&duplicateGapList, TRUE); } slReverse(&statsList); statsPrint(statsList); } if (neighbor) { bedFreeList(&nearestNeighbors); freeChrList(&neighborGaps, FALSE); } bedFreeList(&boundingElements); bedFreeList(&placeItems); freeChrList(&boundingGaps, FALSE); }