void wigAsciiAverageOverBed(char *inWig, char *inBed, char *out)
/* wigAsciiAverageOverBed - Read wiggle file and bed file and emit file with average wig value
 * over each bed record.. */
{
    struct rbTree *wigTree = wigIntoRangeTree(inWig);
    verbose(1, "Read %d sections from %s\n", wigTree->n, inWig);
    struct bed *bed, *bedList = bedLoadAll(inBed);
    verbose(1, "Read %d items from %s\n", slCount(bedList), inBed);
    FILE *f = mustOpen(out, "w");
    for (bed = bedList; bed != NULL; bed = bed->next)
    {
        fprintf(f, "%s\t%d\t%d\t%s\t%f\n",
                bed->chrom, bed->chromStart, bed->chromEnd, bed->name,
                averageWigForBed(wigTree, bed) );
    }
    carefulClose(&f);
}
Ejemplo n.º 2
0
void doBeds(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, 
	    char *netTable, char *bedFileName, char *bedTableName,
	    char *outBedName, char *selectedFileName, int *foundCount, int *notFoundCount)	
/* Map over beds. */
{
FILE *bedOut = NULL;
FILE *selectedOut = NULL;
struct bed *bed=NULL, *bedList = NULL, *orthoBed=NULL;
/* Load beds. */
warn("Loading beds.");
if(bedFileName)
    bedList=bedLoadAll(bedFileName);
else
    bedList=loadBedFromTable(conn, bedTableName, chrom, 0, BIGNUM);
/* Convert beds. */
warn("Converting beds.");
assert(outBedName);
bedOut = mustOpen(outBedName, "w");
if (selectedFileName != NULL)
    selectedOut = mustOpen(selectedFileName, "w");
for(bed = bedList; bed != NULL; bed = bed->next)
    {
    if(differentString(bed->chrom, chrom))
	continue;
    occassionalDot();
    orthoBed = orthoBedFromBed(conn, db, orthoDb, netTable, bed);
    if(orthoBed != NULL && orthoBed->blockCount > 0)
	{
	(*foundCount)++;
	bedTabOutN(orthoBed, 12, bedOut);
        if (selectedOut != NULL)
            bedTabOutN(bed, 12, selectedOut);
	}
    else
	(*notFoundCount)++;
    bedFree(&orthoBed);
    }
bedFreeList(&bedList);
carefulClose(&selectedOut);
carefulClose(&bedOut);
}
Ejemplo n.º 3
0
void borfMatcher(char *bedIn, char *borfIn, char *bedOutFile, char *genePredOutFile)
/* Top level function to open files and call other functions. */
{
struct borf *borf = NULL, *borfList = NULL;
struct bed *bed = NULL, *bedList = NULL;
struct genePred *gp = NULL;
float threshold = optionFloat("minScore", 50);
FILE *bedOut = mustOpen(bedOutFile, "w");
FILE *genePredOut = mustOpen(genePredOutFile, "w");
boolean keepSmall = optionExists("keepSmall");
boolean keepNmd = optionExists("keepNmd");

borfList = borfLoadAll(borfIn);
bedList = bedLoadAll(bedIn);
dotForUserInit(slCount(bedList)/10);
for(bed = bedList, borf = borfList; bed != NULL && borf != NULL; bed = bed->next, borf = borf->next)
    {
    dotForUser();
    if(!stringIn(bed->name, borf->name))
	errAbort("Trying to match up %s bed with %s borf - bad idea!", bed->name, borf->name);
    /* Have to adjust cds end. Borf puts stop codon outside of cds, 
       we put it inside. */
    borf->cdsEnd = min(borf->cdsEnd+3, borf->size);
    if((borf->score > threshold || (keepSmall && borf->cdsSize > 0)) && sameString(borf->strand, "+"))
	{
	setThickStartStop(bed, borf);
	if(keepNmd || !nmdTarget(bed))
	    {
	    gp = bedToGenePred(bed);
	    bedTabOutN(bed, 12, bedOut);
	    genePredTabOut(gp, genePredOut);
	    genePredFree(&gp);
	    }
	}
    }
warn("Done.");
carefulClose(&bedOut);
carefulClose(&genePredOut);
}
Ejemplo n.º 4
0
static void processSeqsFromBed(struct twoBitFile *tbf, char *bedFileName, FILE *outFile)
/* Get sequences defined by beds.  Exclude introns. */
{
struct bed *bed, *bedList = bedLoadAll(bedFileName);
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    struct dnaSeq *seq = twoBitAndBedToSeq(tbf, bed);
    char* seqName = NULL;
    if (clBedPos) 
        {
        char buf[1024];
        safef(buf, 1024, "%s:%d-%d", bed->chrom, bed->chromStart, bed->chromEnd);
        seqName = buf;
        }
    else
        seqName = seq->name;
    if (noMask)
        toUpperN(seq->dna, seq->size);
    faWriteNext(outFile, seqName, seq->dna, seq->size);
    dnaSeqFree(&seq);
    }
}
void pickCassettePcrPrimers(char *db, char *bedFileName, char *primerFaName, char *primerBedName)
/* pickCassettePcrPrimers - Takes a bedFile with three exons and for each bed calls primer3 to pick primers that will detect the inclusion or exclusion of the exon.. */
{
struct bed *bed=NULL, *bedList = NULL;
FILE *primerFa = NULL;
FILE *primerBed = NULL;
struct cassetteSeq *cseq = NULL;
int targetExon = optionInt("targetExon", 1);
hSetDb(db);
bed = bedList = bedLoadAll(bedFileName);

primerFa = mustOpen(primerFaName, "w");
primerBed = mustOpen(primerBedName, "w");
for(bed=bedList; bed != NULL; bed = bed->next)
    {
    cseq = cassetteSeqFromBed(bed, targetExon);
    callPrimer3(cseq, primerFa, primerBed);
    cassetteSeqFree(&cseq);
    }
bedFreeList(&bedList);
carefulClose(&primerFa);
carefulClose(&primerBed);
}
Ejemplo n.º 6
0
void calculateBinomialP(char* regdomFn, char* antigapFn, int totalRegions, int hitRegions)
/* Calculate binomial p-value of enrichment based on regulatory domains and regions hit */
{
	struct regdom* regdoms = readInitializedRegdomFile(regdomFn);

	// This will hold the union of all regulatory domains for quick search
	struct genomeRangeTree *ranges = getRangeTreeOfRegdoms(regdoms);

	// NOTE: Each of these regions must be non-overlapping.
	struct bed* antigaps = bedLoadAll(antigapFn);
	long totalNonGapBases = getTotalNonGapBases(antigaps);
	long annotatedNonGapBases = getAnnotatedNonGapBases(ranges, antigaps);

	double annotationWeight = (double)annotatedNonGapBases/(double)totalNonGapBases;

	double binomP = getBinomPval(totalRegions, hitRegions, annotationWeight);

	printf("%e\n", binomP);

	regdomFreeList(&regdoms);
	bedFreeList(&antigaps);
	genomeRangeTreeFree(&ranges);
}
Ejemplo n.º 7
0
struct genePred *gpFromBedFile(char *file) 
/* Load entries from a bed file, convert them to genePreds
   and return them. */
{
struct bed *bedList = NULL, *bed = NULL;
struct genePred *gpList = NULL, *gp = NULL;
bedList = bedLoadAll(file);
for(bed = bedList; bed != NULL; bed = bed->next)
    {
    gp = bedToGenePred(bed);
    /* pslxFileOpen gaks if strand is not + or -.  bedToGenePred returns
     * the bed strand, which might be empty (for #fields < 6) or ".".
     * If so, fake out the strand to + in order to get readable PSL. */
    if (! (sameString(gp->strand, "+") || sameString(gp->strand, "-")))
	{
	gp->strand[0] = '+';
	gp->strand[1] = '\0';
	}
    slAddHead(&gpList, gp);
    }
slReverse(&gpList);
bedFreeList(&bedList);
return gpList;
}
Ejemplo n.º 8
0
void consForBed() 
/* Open and read the bed file. Load consFile into an double 
   array for easy access and process. */
{
char *bedFileName = NULL;
char *chrom = NULL;
struct bed *bedList = NULL, *bed = NULL;
char *consFileName = NULL;
int *consProb = NULL;

char *consBedName = NULL;
FILE *consBedOut = NULL;
char *summaryBedName = NULL;
FILE *summaryBedOut = NULL;

/* Get the output file names. */
consBedName = optionVal("bedConsOut", NULL);
if(consBedName == NULL)
    errAbort("Must specify an output file for bed conservation.");

summaryBedName = optionVal("summary", NULL);

/* What chromosome are we on? */
chrom = optionVal("chrom", NULL);
if(chrom == NULL)
    errAbort("Must specify a chromosome.");

/* read in the beds. */
warn("Reading in beds.");
bedFileName = optionVal("bedFile", NULL);
if(bedFileName != NULL)
    bedList = bedLoadAll(bedFileName);
else
    errAbort("Must specify a bedFile.\n");

/* Read in the conservation scores. */
consFileName = optionVal("consFile", NULL);
if(consFileName != NULL)
    consProb = readInConservationVals(consFileName);
else
    errAbort("Must specify a conservation file.");

/* Open output files */
consBedOut = mustOpen(consBedName,"w");
if(summaryBedName != NULL)
    summaryBedOut = mustOpen(summaryBedName, "w");

/* Process each individual bed. */
warn("Writing out conservation for beds.");
for(bed = bedList; bed != NULL; bed = bed->next)
    {
    if(differentString(chrom, bed->chrom))
	continue;
    outputBedConservation(bed, consProb, consBedOut, summaryBedOut);
    }
warn("Cleaning up");
carefulClose(&consBedOut);
carefulClose(&summaryBedOut);
freez(&consProb);
warn("Done.");
}
//============================== MAIN =========================================
int main(int argc, char *argv[]) {
   Flower *flower;

   /*
    * Arguments/options
    */
   char * st_logLevelString = NULL;
   char * cactusDiskDatabaseString = NULL;
   char * flowerName = "0";
   char * outputFile = NULL;
   char * species = NULL;
   char * geneFile = NULL;

   ///////////////////////////////////////////////////////////////////////////
   // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
   ///////////////////////////////////////////////////////////////////////////

   while(1) {
      static struct option long_options[] = {
         { "genePslFile", required_argument, 0, 'g' },
         { "species", required_argument, 0, 's' },
         { "st_logLevel", required_argument, 0, 'a' },
         { "cactusDisk", required_argument, 0, 'c' },
         { "outputFile", required_argument, 0, 'o' },
         { "help", no_argument, 0, 'h' },
         { 0, 0, 0, 0 }
      };

      int option_index = 0;

      int key = getopt_long(argc, argv, "s:g:o:a:c:h", long_options, &option_index);

      if(key == -1) {
         break;
      }

      switch(key) {
         case 'a':
            st_logLevelString = stString_copy(optarg);
            break;
         case 'c':
            cactusDiskDatabaseString = stString_copy(optarg);
            break;
         case 'o':
            outputFile = stString_copy(optarg);
            break;
         case 's':
            species = stString_copy(optarg);
            break;
         case 'g':
            geneFile = stString_copy(optarg);
            break;
         case 'h':
            usage();
            return 0;
         default:
            usage();
            return 1;
      }
   }

   ///////////////////////////////////////////////////////////////////////////
   // (0) Check the inputs.
   ///////////////////////////////////////////////////////////////////////////

   assert(cactusDiskDatabaseString != NULL);
   assert(outputFile != NULL);
   assert(species != NULL);
   assert(geneFile != NULL);

   //////////////////////////////////////////////
   //Set up st_logging
   //////////////////////////////////////////////

   st_setLogLevelFromString(st_logLevelString);

   //////////////////////////////////////////////
   //Log (some of) the inputs
   //////////////////////////////////////////////

   st_logInfo("Flower disk name : %s\n", cactusDiskDatabaseString);
   st_logInfo("Output file : %s\n", outputFile);
   st_logInfo("Species: %s\n", species);
   st_logInfo("GenePslFile: %s\n", geneFile);

   //////////////////////////////////////////////
   //Load the database
   //////////////////////////////////////////////

   stKVDatabaseConf *kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskDatabaseString);
   CactusDisk *cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
   st_logInfo("Set up the flower disk\n");

   ///////////////////////////////////////////////////////////////////////////
   // Parse the basic reconstruction problem
   ///////////////////////////////////////////////////////////////////////////
   flower = cactusDisk_getFlower(cactusDisk, cactusMisc_stringToName(flowerName));
   st_logInfo("Parsed the top level flower of the cactus tree to check\n");

   ///////////////////////////////////////////////////////////////////////////
   // Recursive check the flowers.
   ///////////////////////////////////////////////////////////////////////////

   int64_t startTime = time(NULL);
   FILE *fileHandle = fopen(outputFile, "w");
   struct bed *gene = bedLoadAll(geneFile);
   mapGenes(flower, fileHandle, gene, species);
   fclose(fileHandle);
   st_logInfo("Map genes in %" PRIi64 " seconds/\n", time(NULL) - startTime);

   ///////////////////////////////////////////////////////////////////////////
   // Clean up.
   ///////////////////////////////////////////////////////////////////////////

   cactusDisk_destruct(cactusDisk);

   return 0;
}
void hgExperiment(char *database, char *table, 
                        char *expFile, char *posFile, char *dataFile)
/* Main function */
{
struct lineFile *lf;
int *data = NULL;
int *scores;
FILE *f = NULL;
char expTable[32];
char *words[3];
int wordCt;
struct bed *bedList, *bed;
int expCount;
struct hash *expHash, *dataHash;
struct hashEl *hel;

/* Open experiment file and use it to create experiment table.
   Use optional fields if present, otherwise defaults */
safef(expTable, ArraySize(expTable), "%sExps", table);
expHash = makeExpsTable(database, expTable, expFile, &expCount);

/* Read in positions file */
bedList = bedLoadAll(posFile);
slSort(&bedList, bedCmp);

/* Read data file into a hash of arrays of data values, keyed by name */
dataHash = newHash(0);
lf = lineFileOpen(dataFile, TRUE);
while ((wordCt = lineFileChopNext(lf, words, ArraySize(words))))
    {
    /* format: <region-name> <experiment-name> <data-value> */
    char *name, *exp;
    int expId;
    int value;
    if (wordCt != 3)
        errAbort("Expecting 3 words in data file, got %d line %d of %s", 
		wordCt, lf->lineIx, lf->fileName);
    name = words[0];
    hel = hashLookup(dataHash, name);
    if (!hel)
        {
        AllocArray(data, expCount);
        hel = hashAdd(dataHash, name, data);
        }
    data = (int *)hel->val;
    exp = words[1];
    expId = hashIntVal(expHash, exp);
    if (expId < 0 || expId > expCount-1)
        errAbort("Invalid experiment ID %d for %s, line %d of %s",
                 expId, exp, lf->lineIx, lf->fileName);
    //value = atoi(words[2]);
    value = round(atof(words[2]));
    if (data[expId] != 0)
        errAbort("Extra experiment data value %d for %s %s, line %d of %s",
                         value, name, exp, lf->lineIx, lf->fileName);
    data[expId] = value;
    }
lineFileClose(&lf);

/* Fill in BED15 fields - add experiment values, and setup block (only 1)*/
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    int i;
    bed->thickStart = bed->chromStart;
    bed->thickEnd = bed->chromEnd;
    bed->blockCount = 1;
    AllocArray(bed->blockSizes, 1);
    bed->blockSizes[0] = bed->chromEnd - bed->chromStart;
    AllocArray(bed->chromStarts, 1);
    bed->chromStarts[0] = 0;
    bed->expCount = expCount;
    AllocArray(bed->expIds, expCount);
    for (i = 0; i < expCount; i++)
        bed->expIds[i] = i;
    AllocArray(bed->expScores, expCount);
    scores = hashMustFindVal(dataHash, bed->name);
    for (i = 0; i < expCount; i++)
        bed->expScores[i] = scores[i];
    /* set score for bed to the average of the scores in all experiments */
    calculateAverage(bed);
    }

/* from affyPslAndAtlsoToBed ?
   convertIntensitiesToRatios(bedList);
   */

/* Write BED data file */
f = hgCreateTabFile(tabDir, table);
for (bed = bedList; bed != NULL; bed = bed->next)
    bedTabOutN(bed, 15, f);

/* Cleanup */
carefulClose(&f);
freeHash(&expHash);
freeHash(&dataHash);
bedFreeList(&bedList);
}
static void randomPlacement(char *bounding, char *placed)
{
struct bed *boundingElements = bedLoadAll(bounding);
struct bed *placeItems = bedLoadAll(placed);
struct bed *nearestNeighbors = NULL;
int boundingCount = slCount(boundingElements);
int placedCount = slCount(placeItems);
int neighborCount = 0;
struct chrGapList *boundingGaps = NULL;
struct chrGapList *duplicateGapList = NULL;
struct chrGapList *neighborGaps = NULL;
struct statistic *statsList = NULL;
struct statistic *statEl = NULL;

if (neighbor)
    {
    nearestNeighbors = bedLoadAll(neighbor);
    slSort(&nearestNeighbors, bedCmp);	/* order by chrom,chromStart */
    neighborCount = slCount(nearestNeighbors);
    verbose(2, "neighbor element count: %d\n", neighborCount);
    neighborGaps = createGaps(nearestNeighbors);
    }
slSort(&boundingElements, bedCmp);	/* order by chrom,chromStart */
slSort(&placeItems, bedCmp);		/* order by chrom,chromStart */

verbose(2, "bounding element count: %d\n", boundingCount);
verbose(2, "placed item count: %d\n", placedCount);

boundingGaps = createGaps(boundingElements);

if (TRUE)	/*	display initial placement stats only	*/
    {
    char *neighborName = NULL;

    if (neighbor)
	{
	neighborName = cloneString(neighbor);
	duplicateGapList = cloneGapList(neighborGaps);
	}
    else
	{
	neighborName = cloneString(bounding);
	duplicateGapList = cloneGapList(boundingGaps);
	}

    verbose(2,"stats before initial placement:  =================\n");
    statEl = gapStats(duplicateGapList, (char *)NULL, (char *)NULL, (char *)NULL);
    printf("statistics on gaps before any placements:\n\t(%s)\n", neighborName);
    statsPrint(statEl);
    slAddHead(&statsList,statEl);

    initialPlacement(duplicateGapList,placeItems);

    verbose(2,"stats after initial placement:  =================\n");
    statEl = gapStats(duplicateGapList, zeroBedOutFile, shoulderBedOutFile,
	distOut);
    printf("statistics after initial placement of placed items:\n\t(%s)\n",
		placed);
    statsPrint(statEl);
    slAddHead(&statsList,statEl);

    freeChrList(&duplicateGapList, FALSE);
    slReverse(&statsList);
    freeMem(neighborName);
    }

if (trials > 0)
    {
    int trial;

    srand48((long int)seed);	/* for default seed=0, same set of randoms */

    slSort(&placeItems, bedCmpSize);	/* order by size of elements */
    slReverse(&placeItems);		/* largest ones first	*/
    measurePlaced(placeItems);		/* show placed item characteristics */
    for (trial = 0; trial < trials; ++trial)
	{
	struct bed *randomPlacedBedList;
	duplicateGapList = cloneGapList(boundingGaps);
	randomPlacedBedList = randomTrial(duplicateGapList,placeItems);
	if (neighbor)
	    {
	    struct chrGapList *duplicateNeighborList;
	    slSort(&randomPlacedBedList,bedCmp);/*order by chrom,chromStart*/
	    duplicateNeighborList = cloneGapList(neighborGaps);
	    initialPlacement(duplicateNeighborList,randomPlacedBedList);
	    statEl = gapStats(duplicateNeighborList, (char *)NULL, (char *)NULL, (char *)NULL);
	    freeChrList(&duplicateNeighborList, FALSE);
	    }
	else
	    statEl = gapStats(duplicateGapList, (char *)NULL, (char *)NULL, (char *)NULL);

	slAddHead(&statsList,statEl);
	/*	this gap list has temporary bed elements that were
	 *	created by the randomTrial(), they need to be freed as
	 *	the list is released, hence the TRUE signal.
	 *	It isn't a true freeBedList operation because the chrom
	 *	names are left intact in the original copy of the bed
	 *	list.  (The names were being shared.)
	 */
	if ((trial == (trials - 1)) && (bedOutFile != NULL))
	    {
	    bedListOutput(duplicateGapList, bedOutFile);
	    }
	freeChrList(&duplicateGapList, TRUE);
	}
    slReverse(&statsList);
    statsPrint(statsList);
    }
if (neighbor)
    {
    bedFreeList(&nearestNeighbors);
    freeChrList(&neighborGaps, FALSE);
    }
bedFreeList(&boundingElements);
bedFreeList(&placeItems);
freeChrList(&boundingGaps, FALSE);
}