void bulkChr2XRegression(char *spliceFile, char *spliceSelectionFile) 
/* Top level function to load files and iterate through splices of interest. */
{
FILE *tmpFile = NULL;
struct genomeBit *gpList = NULL, *gp = NULL;
struct bed *bedList = NULL, *bed = NULL, *retList = NULL;
warn("Loading beds from %s", spliceFile);
bedList = loadBedFileWithHeader(spliceFile);
warn("Loading splices of interest from %s", spliceSelectionFile);
gpList = loadGpList(spliceSelectionFile);
warn("Loaded %d splices, and %d splices of interest.", slCount(bedList), slCount(gpList));
warn("Analyzing splices of interest.");

/* Clean out the summary files produced by R script. */
tmpFile = mustOpen("maxScores.html", "w");
fprintf(tmpFile, "<head><body><table>\n");
fprintf(tmpFile, "<tr><th>Position</th><th>MaxDiff Levels</th><th>MaxDiff</th><th>Var Diff</th><th>MaxDiff/Var</th><th>Percent Diff</th><th>Cass Var/Stable Var</th><th>Plot</th></tr>\n");
carefulClose(&tmpFile);

tmpFile = mustOpen("allScores.tab", "w");
carefulClose(&tmpFile);

tmpFile = mustOpen("cassettes.sample", "w");
carefulClose(&tmpFile);

for(gp = gpList; gp != NULL; gp = gp->next) 
    {
    retList = findBedsFromGp(gp, bedList);
    if(retList != NULL) 
	{
	for(bed = retList; bed != NULL; bed = bed->next) 
	    {
	    doAnalysisForBed(bed);
	    }
	bedFreeList(&retList);
	}
    else
	{
	bedsNotFound++;
	warn("Couldn't find bed for genome bit %s:%d-%d", gp->chrom, gp->chromStart, gp->chromEnd);
	}
    }
warn("");
warn("%d genome bits had multiple beds, %d had no bed, %d analyzed", multipleBedForGp, bedsNotFound, bedsAnalyzed);
warn("Cleaning up.");
tmpFile = mustOpen("maxScores.html", "a");
fprintf(tmpFile, "</table></body></html>\n");
carefulClose(&tmpFile);
bedFreeList(&bedList);
}
Example #2
0
static struct bed* regionsLoad(char* sectionsBed)
/* return a bed3 list of regions for times when -regions is used. */
/* If the filename has a comma then a number, then take just that line */
{
    struct bed* list = NULL;
    unsigned ix = 0;
    if (strchr(sectionsBed, ',')) {
        char* number_part = chopPrefixAt(sectionsBed, ',');
        if (number_part)
            ix = sqlUnsigned(number_part);
    }
    list = readAtLeastBed3(sectionsBed);
    if (list && (ix > 0)) {
        struct bed* single = slElementFromIx(list, ix - 1);
        if (single) {
            struct bed* rem;
            while ((rem = slPopHead(&list)) != single)
                bedFree(&rem);
            rem = single->next;
            bedFreeList(&rem);
            single->next = NULL;
            list = single;
        }
    }
    return list;
}
Example #3
0
void metaBigClose(struct metaBig** pMb)
/* close the file and free up everything. */
{
    struct metaBig* mb = *pMb;
    hashFree(&mb->chromSizeHash);
    if (mb->rgList)
        hashFree(&mb->rgList);
    if (mb->sections)
        bedFreeList(&mb->sections);
    if (mb->originalFileName)
        freeMem(mb->originalFileName);
    if (mb->fileName)
        freeMem(mb->fileName);
    if (mb->baseFileName)
        freeMem(mb->baseFileName);
    if (mb->remoteSiteAndDir)
        freeMem(mb->remoteSiteAndDir);
#ifdef USE_HTSLIB
    if (mb->idx)
        hts_idx_destroy(mb->idx);
#endif
    if (mb->type == isaBigBed)
        bigBedFileClose(&mb->big.bbi);
#ifdef USE_HTSLIB
    else if (mb->type == isaBam)
        sam_close(mb->big.bam);
#endif
    else
        bigWigFileClose(&mb->big.bbi);
#ifdef USE_HTSLIB
    if (mb->header)
        bam_hdr_destroy(mb->header);
#endif
    freez(pMb);
}
Example #4
0
long metaBigNumItems(struct metaBig* mb, boolean verbose)
/* return the total number of items in a bigBed or BAM */
/* used on a bigWig will return 0 */
/* unfortunately this is a loop through the entire file basically. */
/* nicer would be something that just glances at the index, but doing that */
/* might count items that would be filtered out upon fetching. */
{
    long sum = 0;
    struct bed* section;
    struct bed* chroms = NULL;
    if (mb->type == isaBigWig)
        return 0;
    else if (mb->type == isaBigBed)
        return (long)bigBedItemCount(mb->big.bbi);
    else
        chroms = sectionsFromChromSizes(mb->chromSizeHash);
    for (section = chroms; section != NULL; section = section->next) {
        struct lm* lm = lmInit(0);
        struct bed6* list = metaBigBed6Fetch(mb, section->chrom, section->chromStart, section->chromEnd, lm);
        int num = slCount(list);
        if (verbose)
            printf("Number of items in %s of %s: %d\n", section->chrom, mb->fileName, num);
        sum += num;
        lmCleanup(&lm);
    }
    bedFreeList(&chroms);
    return sum;
}
int countCassetteExons(struct altGraphX *agList, float minConfidence, FILE *outfile, FILE *bedOutFile)
/* count up the number of cassette exons that have a certain
   confidence, returns number of edges. If outfile != NULL will output fasta sequences
   to outfile. */
{
struct altGraphX *ag = NULL;
int edge =0;
int cassetteCount = 0;
int i =0;
int mod3 = 0;
int counter =0;
boolean outputted = FALSE;
float estPrior = cgiOptionalDouble("estPrior", 10);
FILE *log = mustOpen("confidences.log", "w");
FILE *html = mustOpen("confidences.html", "w");
FILE *sizes = mustOpen("sizes.log", "w");
int minSize = cgiOptionalInt("minSize", 0);
startHtml(html);
for(ag = agList; ag != NULL; ag = ag->next)
    {
    outputted = FALSE;
    for(i=0;i<ag->edgeCount; i++)
	{
	if(ag->edgeTypes[i] == ggCassette)
	    {
	    float conf = altGraphCassetteConfForEdge(ag, i, estPrior);
	    struct bed *bed, *bedList = altGraphGetExonCassette(ag, i);
	    char buff[256];
	    int size = ag->vPositions[ag->edgeEnds[i]] - ag->vPositions[ag->edgeStarts[i]];
	    boolean filtersOk = FALSE;
	    if(ag->name == NULL)
		ag->name = cloneString("");

	    slSort(&bedList, bedCmpMaxScore);
	    for(bed=bedList; bed != NULL; bed = bed->next)
		{
		snprintf(buff, sizeof(buff), "%s.%d", ag->name, counter);
		bed->name = cloneString(buff);
		fprintf(log, "%f\n", conf);
		fprintf(sizes, "%d\n%d\n%d\n", bed->blockSizes[0], bed->blockSizes[1], bed->blockSizes[2]);
		filtersOk = bedPassFilters(bed, ag, i);
		if(conf >= minConfidence && size >= minSize && filtersOk) 
		    {
		    writeCassetteExon(bed, ag, i, &outputted, bedOutFile, outfile, html, conf);
		    cassetteCount++;
		    if((size % 3) == 0)
			mod3++;
		    }
		counter++;
		}
	    bedFreeList(&bedList);
	    }
	}
    }
endHtml(html);
carefulClose(&html);
carefulClose(&log);
warn("%d cassettes are mod 3", mod3);
return cassetteCount;
}
Example #6
0
void doParDetails(struct trackDb *tdb, char *name)
/* show details of a PAR item. */
{
// load entire PAR table (t's tiny) and partition
struct bed *pars = loadParTable(tdb);
if (slCount(pars) & 1)
    errAbort("par items not paired in %s", tdb->table);

struct bed *clickedPar = getClickedPar(name, &pars);
struct bed *homPar = getHomologousPar(clickedPar, &pars);
slSort(&pars, parCmp);

cartWebStart(cart, database, "Pseudoautosomal regions");
webPrintLinkTableStart();

// header
webPrintLabelCell("");
webPrintLabelCell("Selected PAR");
webPrintLabelCell("Homologous PAR");

// selected
webPrintLinkTableNewRow();
printHomPairRow(clickedPar, homPar);
if (pars != NULL)
    printOtherPars(clickedPar, pars);

webPrintLinkTableEnd();
printTrackHtml(tdb);
webEnd();

bedFreeList(&pars);
bedFree(&clickedPar);
bedFree(&homPar);
}
Example #7
0
void hgPhMouse(char *database, char *track, int fileCount, char *fileNames[])
/* hgPhMouse - Load phMouse track. */
{
int i;
char *fileName;
char *tabName = "phMouse.tab";
FILE *f = mustOpen(tabName, "w");
struct lineFile *lf;
char *words[32], *s, c;
int wordCount;
int oneSize, totalSize = 0;

for (i=0; i<fileCount; ++i)
    {
    struct bed *bedList = NULL, *bed;
    fileName = fileNames[i];
    lf = lineFileOpen(fileName, TRUE);
    printf("Reading %s ", fileName);
    fflush(stdout);
    while ((wordCount = lineFileChop(lf, words)) > 0)
        {
	if (wordCount < 7)
	   errAbort("Expecting at least 7 words line %d of %s", 
	   	lf->lineIx, fileName);
	AllocVar(bed);
	bed->chrom = cloneString(words[0]);
	bed->chromStart = lineFileNeedNum(lf, words, 1);
	bed->chromEnd = lineFileNeedNum(lf, words, 2);
	bed->score = lineFileNeedNum(lf, words, 6);
	s = strrchr(words[3], '|');
	c = s[1];
	s[0] = 0;
	if (c != '+' && c != '-')
	    errAbort("Misformed strandless trace name line %d of %s",
	    	lf->lineIx, lf->fileName);
	bed->name = cloneString(words[3]);
	bed->strand[0] = c;
	slAddHead(&bedList, bed);
	}
    oneSize = slCount(bedList);
    printf("%d alignments ", oneSize);
    totalSize += oneSize;
    fflush(stdout);
    slSort(&bedList, bedCmp);
    printf("sorted ");
    fflush(stdout);
    for (bed = bedList; bed != NULL; bed = bed->next)
        {
	int bin = hFindBin(bed->chromStart, bed->chromEnd);
	fprintf(f, "%d\t", bin);
	bedTabOutN(bed, 6, f);
	}
    printf("tabbed out\n");
    bedFreeList(&bedList);
    }
carefulClose(&f);
printf("Loading %d items into %s.%s\n", totalSize, database, track);
loadDatabase(database, track, tabName);
remove(tabName);
}
Example #8
0
static struct bed* subset_beds(char* sectionString, struct bed** pRegions, struct hash* chromHash)
/* in the situation where both a regions bed file is given AND the filename specifies subsections, */
/* intersect the two.  For simplictity sake,  */
{
    struct bed* fname_ranges = parseSectionString(sectionString, chromHash);
    struct bed* bed;
    struct bed* subset = NULL;
    struct bed* regions = *pRegions;
    slSort(&fname_ranges, bedCmp);
    bed = fname_ranges;
    while (bed != NULL) {
        /* each iteration of the loop should be a separate chrom */
        struct bed* region;
        struct rbTree* tree = rangeTreeNew();
        while ((bed != NULL) && (bed->next != NULL) && (sameString(bed->chrom, bed->next->chrom))) {
            rangeTreeAdd(tree, bed->chromStart, bed->chromEnd);
            bed = bed->next;
        }
        rangeTreeAdd(tree, bed->chromStart, bed->chromEnd);
        /* now we're at a point that we're dealing only with one chromosome. */
        for (region = regions; region != NULL; region = region->next) {
            if (sameString(region->chrom, bed->chrom) && rangeTreeOverlaps(tree, region->chromStart, region->chromEnd)
                && rangeTreeFindEnclosing(tree, region->chromStart, region->chromEnd)) {
                struct bed* clone = cloneBed(region);
                slAddHead(&subset, clone);
            } else if (sameString(region->chrom, bed->chrom) && rangeTreeOverlaps(tree, region->chromStart, region->chromEnd))
                errAbort("range specified in file overlaps but is not contained by range specified on command-line");
        }
        rangeTreeFree(&tree);
        bed = bed->next;
    }
    if (subset == NULL) {
        errAbort("no ranges specified in file were contained in ranges specified on command-line");
    }
    slReverse(&subset);
    bedFreeList(&fname_ranges);
    bedFreeList(pRegions);
    return subset;
}
Example #9
0
File: sax.c Project: hjanime/bwtool
void wigsax_bed4(FILE *out, struct metaBig *mb, struct bed *region, int alpha, int window, double mean, double std, boolean wig_out)
/* output the bed4 style when it's being run over an interval */
{
    struct bed *outBedList = NULL;
    struct bed *bed;
    struct perBaseWig *wigList = perBaseWigLoadContinue(mb, region->chrom, region->chromStart, region->chromEnd);
    struct perBaseWig *pbw;
    struct slDouble *datList = NULL;
    struct slDouble *oneDub;
    /* Maybe sometime I'll put back the option to use multiple alphabets at a time. */
    int alphaS = alpha;
    int alphaE = alpha;
    for (pbw = wigList; pbw != NULL; pbw = pbw->next)
    {
	struct bed *bedList = make_initial_bed_list(pbw, alphaE - alphaS + 2);
	int i, j;
	int data_len = pbw->chromEnd - pbw->chromStart; 
	for (i = alphaS; i <= alphaE; i++)
	{
	    char *sax = sax_from_array_force_window(pbw->data, data_len, i, window, mean, std);
	    for (j = 0, bed = bedList; ((j < data_len) && (bed != NULL)); j++, bed = bed->next)
		bed->name[i-alphaS] = sax[j];
	    freeMem(sax);
	}
	if (wig_out)
	    for (j = 0; j < data_len; j++)
	    {
		struct slDouble *dub = newSlDouble(pbw->data[j]);
		slAddHead(&datList, dub);
	    }
	while ((bed = slPopHead(&bedList)) != NULL)
	    slAddHead(&outBedList, bed);
    }
    slReverse(&outBedList);
    slReverse(&datList);
    perBaseWigFreeList(&wigList);
    oneDub = datList;
    for (bed = outBedList; bed != NULL; bed = bed->next)
    {
	bedOutputN(bed, 4, out, '\t', (wig_out) ? '\t' : '\n');
	if (wig_out)
	{
	    if (oneDub == NULL)
		errAbort("data inconsistency. programmer error\n");
	    fprintf(out, "%0.4f\n", oneDub->val);
	    oneDub = oneDub->next;
	}
    }
    bedFreeList(&outBedList);
    slFreeList(&datList);
}
Example #10
0
void perBaseWigFree(struct perBaseWig** pRegion)
/* Free-up a perBaseWig */
{
    struct perBaseWig* pbw = *pRegion;
    if (!pRegion || !pbw)
        return;
    if (pbw->subsections)
        bedFreeList(&pbw->subsections);
    if (pbw->name)
        freeMem(pbw->name);
    freeMem(pbw->chrom);
    freez(&pbw->data);
    freez(pRegion);
}
void findBeds(struct cutter *cutters, struct dnaSeq *seqs, char *outputFile)
/* Output all beds found to a file. */
{
struct dnaSeq *seq;
FILE *f = mustOpen(outputFile, "w");
for (seq = seqs; seq != NULL; seq = seq->next)
    {
    struct bed *bedList = matchEnzymes(cutters, seq, 0);
    if (bedList)
	{
	spitBedList(bedList, f);
	bedFreeList(&bedList);
	}
    }
carefulClose(&f);
}
void findCounts(struct cutter *cutters, struct dnaSeq *seqs, char *outputFile)
/* Go through each sequence, and each time add the counts of the enzymes */
/* encountered to the hash of counts. */
{
struct dnaSeq *seq;
struct hash *countHash = initCutterCountHash(cutters);
for (seq = seqs; seq != NULL; seq = seq->next)
    {
    struct bed *bedList = matchEnzymes(cutters, seq, 0);
    if (bedList)
	{
	addCountsToHash(countHash, bedList);
	bedFreeList(&bedList);
	}    
    }
writeHashToFile(countHash, outputFile);
}
Example #13
0
void affyPslAndAtlasToBedOld(char *pslFile, char *atlasFile, char *bedOut, char *expRecOut)
/** Main function that does all the work for old-style*/
{
struct hash *bedHash = NULL;
struct affyAtlas *aaList=NULL, *aa=NULL;
struct expRecord *erList=NULL, *er=NULL;
struct bed *bedList=NULL, *bed=NULL;
int expCount = 0;
FILE *erOut = NULL, *bOut=NULL;
warn("loading atlas file");
aaList = affyAtlasLoadAll(atlasFile);
expCount = countExperiments(aaList);
warn("creating list of beds from alignments");
bedList = createBedsFromPsls(pslFile, expCount);
warn("creating hash from list of beds");
bedHash = createBedHash(bedList);
warn("appending experiments to beds in hash");
appendExperiments(bedHash, aaList, &erList);
warn("Running sanity Checks");
checkAllBeds(&bedList, expCount);
warn("%d beds were missing experiments." , missingExpsCount);
warn("%d beds had no experiments.", noExpCount);
warn("Calculating average intensities");
convertIntensitiesToRatios(bedList);
calculateAverages(bedList);

warn("writing expRecords out");
erOut = mustOpen(expRecOut, "w");
for(er = erList; er != NULL; er = er->next)
    expRecordTabOut(er, erOut);
carefulClose(&erOut);

warn("writing beds out");
bOut = mustOpen(bedOut, "w");
for(bed = bedList; bed != NULL; bed = bed->next)
    bedTabOutN(bed, 15, bOut);
carefulClose(&bOut);

warn("cleaning up..");
freeHash(&bedHash);
bedFreeList(&bedList);

warn("Done.");
}
Example #14
0
void doBeds(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, 
	    char *netTable, char *bedFileName, char *bedTableName,
	    char *outBedName, char *selectedFileName, int *foundCount, int *notFoundCount)	
/* Map over beds. */
{
FILE *bedOut = NULL;
FILE *selectedOut = NULL;
struct bed *bed=NULL, *bedList = NULL, *orthoBed=NULL;
/* Load beds. */
warn("Loading beds.");
if(bedFileName)
    bedList=bedLoadAll(bedFileName);
else
    bedList=loadBedFromTable(conn, bedTableName, chrom, 0, BIGNUM);
/* Convert beds. */
warn("Converting beds.");
assert(outBedName);
bedOut = mustOpen(outBedName, "w");
if (selectedFileName != NULL)
    selectedOut = mustOpen(selectedFileName, "w");
for(bed = bedList; bed != NULL; bed = bed->next)
    {
    if(differentString(bed->chrom, chrom))
	continue;
    occassionalDot();
    orthoBed = orthoBedFromBed(conn, db, orthoDb, netTable, bed);
    if(orthoBed != NULL && orthoBed->blockCount > 0)
	{
	(*foundCount)++;
	bedTabOutN(orthoBed, 12, bedOut);
        if (selectedOut != NULL)
            bedTabOutN(bed, 12, selectedOut);
	}
    else
	(*notFoundCount)++;
    bedFree(&orthoBed);
    }
bedFreeList(&bedList);
carefulClose(&selectedOut);
carefulClose(&bedOut);
}
Example #15
0
int main(int argc, char *argv[])
/* The program */
{
struct bed *bedList = NULL;
struct dnaSeq *targets = NULL, *target;
struct dnaSeq *queries = NULL, *query;
if (argc != 4)
    usage();
targets = dnaLoadAll(argv[2]);
queries = dnaLoadAll(argv[1]);
for (target = targets; target != NULL; target = target->next)
    for (query = queries; query != NULL; query = query->next)
	{
	struct bed *oneList = oligoMatch(target, query);
	bedList = slCat(bedList, oneList);
	}
outputBed6(bedList, argv[3]);
bedFreeList(&bedList);
dnaSeqFreeList(&targets);
dnaSeqFreeList(&queries);
return 0;
}
Example #16
0
struct genePred *convertBedsToGps(char *bedFile)
/* Load beds from a file and convert to bare bones genePredictions. */
{
struct genePred *gpList = NULL, *gp =NULL;
struct bed *bedList=NULL, *bed=NULL;
bedList = bedLoadNAll(bedFile, 6);
if(bedList->strand == NULL)
    errAbort("Beds must have strand information.");
for(bed=bedList; bed!=NULL; bed=bed->next)
    {
    AllocVar(gp);
    gp->chrom = cloneString(bed->chrom);
    gp->txStart = gp->cdsStart = bed->chromStart;
    gp->txEnd = gp->cdsEnd = bed->chromEnd;
    gp->name = cloneString(bed->name);
    safef(gp->strand, sizeof(gp->strand), "%s", bed->strand);
    slAddHead(&gpList, gp);
    }
bedFreeList(&bedList);
slReverse(&gpList);
return gpList;
}
int hgSeqItemsInRange(char *db, char *table, char *chrom, int chromStart,
                      int chromEnd, char *sqlConstraints)
/* Print out dna sequence of all items (that match sqlConstraints, if nonNULL)
   in the given range in table.  Return number of items. */
{
    struct hTableInfo *hti;
    struct bed *bedList;
    char rootName[256];
    char parsedChrom[32];
    int itemCount;

    hParseTableName(db, table, rootName, parsedChrom);
    hti = hFindTableInfo(db, chrom, rootName);
    if (hti == NULL)
        webAbort("Error", "Could not find table info for table %s (%s)",
                 rootName, table);
    bedList = hGetBedRange(db, table, chrom, chromStart, chromEnd,
                           sqlConstraints);

    itemCount = hgSeqBed(db, hti, bedList);
    bedFreeList(&bedList);
    return itemCount;
}
void pickCassettePcrPrimers(char *db, char *bedFileName, char *primerFaName, char *primerBedName)
/* pickCassettePcrPrimers - Takes a bedFile with three exons and for each bed calls primer3 to pick primers that will detect the inclusion or exclusion of the exon.. */
{
struct bed *bed=NULL, *bedList = NULL;
FILE *primerFa = NULL;
FILE *primerBed = NULL;
struct cassetteSeq *cseq = NULL;
int targetExon = optionInt("targetExon", 1);
hSetDb(db);
bed = bedList = bedLoadAll(bedFileName);

primerFa = mustOpen(primerFaName, "w");
primerBed = mustOpen(primerBedName, "w");
for(bed=bedList; bed != NULL; bed = bed->next)
    {
    cseq = cassetteSeqFromBed(bed, targetExon);
    callPrimer3(cseq, primerFa, primerBed);
    cassetteSeqFree(&cseq);
    }
bedFreeList(&bedList);
carefulClose(&primerFa);
carefulClose(&primerBed);
}
void createIntronBeds(char *agxFile, char *bedFile)
/* Make intron beds for evaluation. */
{
struct altGraphX *ag=NULL, *agList = NULL;
struct bed *bed=NULL, *bedList=NULL;
FILE *bedOut = NULL;
int count;
warn("Rading AltGraphX list.");
agList = altGraphXLoadAll(agxFile);
warn("Converting to intron beds.");
bedOut = mustOpen(bedFile, "w");
for(ag = agList; ag != NULL; ag = ag->next)
    {
    occassionalDot();
    bedList = bedIntronsFromAgx(ag);
    for(bed=bedList; bed != NULL; bed=bed->next)
	{
	bedTabOutN(bed, 12, bedOut);
	}
    bedFreeList(&bedList);
    }
altGraphXFreeList(&agList);
}
Example #20
0
void calculateBinomialP(char* regdomFn, char* antigapFn, int totalRegions, int hitRegions)
/* Calculate binomial p-value of enrichment based on regulatory domains and regions hit */
{
	struct regdom* regdoms = readInitializedRegdomFile(regdomFn);

	// This will hold the union of all regulatory domains for quick search
	struct genomeRangeTree *ranges = getRangeTreeOfRegdoms(regdoms);

	// NOTE: Each of these regions must be non-overlapping.
	struct bed* antigaps = bedLoadAll(antigapFn);
	long totalNonGapBases = getTotalNonGapBases(antigaps);
	long annotatedNonGapBases = getAnnotatedNonGapBases(ranges, antigaps);

	double annotationWeight = (double)annotatedNonGapBases/(double)totalNonGapBases;

	double binomP = getBinomPval(totalRegions, hitRegions, annotationWeight);

	printf("%e\n", binomP);

	regdomFreeList(&regdoms);
	bedFreeList(&antigaps);
	genomeRangeTreeFree(&ranges);
}
Example #21
0
void intronSizes(char *database, char *table)
/* intronSizes - Output list of intron sizes.. */
{
    struct dyString *query = newDyString(1024);
    struct sqlConnection *conn;
    struct sqlResult *sr;
    char **row;
    struct genePred *gp;
    int rowOffset;
    struct bed *bedList = NULL, *bed = NULL;

    hSetDb(database);
    rowOffset = hOffsetPastBin(NULL, table);
    conn = hAllocConn(database);
    sqlDyStringPrintf(query, "select * from %s", table);
    if (chromName != NULL)
        dyStringPrintf(query, " where chrom = '%s'", chromName);
    if (cgiBoolean("withUtr"))
    {
        dyStringPrintf(query, " %s txStart != cdsStart",
                       (chromName == NULL ? "where" : "and"));
    }
    sr = sqlGetResult(conn, query->string);
    while ((row = sqlNextRow(sr)) != NULL)
    {
        gp = genePredLoad(row+rowOffset);
        genePredIntrons(gp, &bedList);
        slReverse(&bedList);
        for (bed = bedList ; bed != NULL ; bed=bed->next)
            bedTabOutN(bed,6, stdout);
        bedFreeList(&bedList);
        genePredFree(&gp);
    }
    sqlFreeResult(&sr);
    hFreeConn(&conn);
}
Example #22
0
struct genePred *gpFromBedFile(char *file) 
/* Load entries from a bed file, convert them to genePreds
   and return them. */
{
struct bed *bedList = NULL, *bed = NULL;
struct genePred *gpList = NULL, *gp = NULL;
bedList = bedLoadAll(file);
for(bed = bedList; bed != NULL; bed = bed->next)
    {
    gp = bedToGenePred(bed);
    /* pslxFileOpen gaks if strand is not + or -.  bedToGenePred returns
     * the bed strand, which might be empty (for #fields < 6) or ".".
     * If so, fake out the strand to + in order to get readable PSL. */
    if (! (sameString(gp->strand, "+") || sameString(gp->strand, "-")))
	{
	gp->strand[0] = '+';
	gp->strand[1] = '\0';
	}
    slAddHead(&gpList, gp);
    }
slReverse(&gpList);
bedFreeList(&bedList);
return gpList;
}
static void randomPlacement(char *bounding, char *placed)
{
struct bed *boundingElements = bedLoadAll(bounding);
struct bed *placeItems = bedLoadAll(placed);
struct bed *nearestNeighbors = NULL;
int boundingCount = slCount(boundingElements);
int placedCount = slCount(placeItems);
int neighborCount = 0;
struct chrGapList *boundingGaps = NULL;
struct chrGapList *duplicateGapList = NULL;
struct chrGapList *neighborGaps = NULL;
struct statistic *statsList = NULL;
struct statistic *statEl = NULL;

if (neighbor)
    {
    nearestNeighbors = bedLoadAll(neighbor);
    slSort(&nearestNeighbors, bedCmp);	/* order by chrom,chromStart */
    neighborCount = slCount(nearestNeighbors);
    verbose(2, "neighbor element count: %d\n", neighborCount);
    neighborGaps = createGaps(nearestNeighbors);
    }
slSort(&boundingElements, bedCmp);	/* order by chrom,chromStart */
slSort(&placeItems, bedCmp);		/* order by chrom,chromStart */

verbose(2, "bounding element count: %d\n", boundingCount);
verbose(2, "placed item count: %d\n", placedCount);

boundingGaps = createGaps(boundingElements);

if (TRUE)	/*	display initial placement stats only	*/
    {
    char *neighborName = NULL;

    if (neighbor)
	{
	neighborName = cloneString(neighbor);
	duplicateGapList = cloneGapList(neighborGaps);
	}
    else
	{
	neighborName = cloneString(bounding);
	duplicateGapList = cloneGapList(boundingGaps);
	}

    verbose(2,"stats before initial placement:  =================\n");
    statEl = gapStats(duplicateGapList, (char *)NULL, (char *)NULL, (char *)NULL);
    printf("statistics on gaps before any placements:\n\t(%s)\n", neighborName);
    statsPrint(statEl);
    slAddHead(&statsList,statEl);

    initialPlacement(duplicateGapList,placeItems);

    verbose(2,"stats after initial placement:  =================\n");
    statEl = gapStats(duplicateGapList, zeroBedOutFile, shoulderBedOutFile,
	distOut);
    printf("statistics after initial placement of placed items:\n\t(%s)\n",
		placed);
    statsPrint(statEl);
    slAddHead(&statsList,statEl);

    freeChrList(&duplicateGapList, FALSE);
    slReverse(&statsList);
    freeMem(neighborName);
    }

if (trials > 0)
    {
    int trial;

    srand48((long int)seed);	/* for default seed=0, same set of randoms */

    slSort(&placeItems, bedCmpSize);	/* order by size of elements */
    slReverse(&placeItems);		/* largest ones first	*/
    measurePlaced(placeItems);		/* show placed item characteristics */
    for (trial = 0; trial < trials; ++trial)
	{
	struct bed *randomPlacedBedList;
	duplicateGapList = cloneGapList(boundingGaps);
	randomPlacedBedList = randomTrial(duplicateGapList,placeItems);
	if (neighbor)
	    {
	    struct chrGapList *duplicateNeighborList;
	    slSort(&randomPlacedBedList,bedCmp);/*order by chrom,chromStart*/
	    duplicateNeighborList = cloneGapList(neighborGaps);
	    initialPlacement(duplicateNeighborList,randomPlacedBedList);
	    statEl = gapStats(duplicateNeighborList, (char *)NULL, (char *)NULL, (char *)NULL);
	    freeChrList(&duplicateNeighborList, FALSE);
	    }
	else
	    statEl = gapStats(duplicateGapList, (char *)NULL, (char *)NULL, (char *)NULL);

	slAddHead(&statsList,statEl);
	/*	this gap list has temporary bed elements that were
	 *	created by the randomTrial(), they need to be freed as
	 *	the list is released, hence the TRUE signal.
	 *	It isn't a true freeBedList operation because the chrom
	 *	names are left intact in the original copy of the bed
	 *	list.  (The names were being shared.)
	 */
	if ((trial == (trials - 1)) && (bedOutFile != NULL))
	    {
	    bedListOutput(duplicateGapList, bedOutFile);
	    }
	freeChrList(&duplicateGapList, TRUE);
	}
    slReverse(&statsList);
    statsPrint(statsList);
    }
if (neighbor)
    {
    bedFreeList(&nearestNeighbors);
    freeChrList(&neighborGaps, FALSE);
    }
bedFreeList(&boundingElements);
bedFreeList(&placeItems);
freeChrList(&boundingGaps, FALSE);
}
void doExpRatio(struct trackDb *tdb, char *item, struct customTrack *ct)
/* Generic expression ratio deatils using microarrayGroups.ra file */
/* and not the expRecord tables. */
{
char *expScale = trackDbRequiredSetting(tdb, "expScale");
char *expStep = trackDbRequiredSetting(tdb, "expStep");
double maxScore = atof(expScale);
double stepSize = atof(expStep);
struct bed *bedList;
char *itemName = cgiUsualString("i2","none");
char *expName = (item == NULL) ? itemName : item;
char *tdbSetting = trackDbSettingOrDefault(tdb, "expColor", "redGreen");
char *colorVal = NULL;
enum expColorType colorScheme;
char colorVarName[256];
safef(colorVarName, sizeof(colorVarName), "%s.color", tdb->track);
colorVal = cartUsualString(cart, colorVarName, tdbSetting);
colorScheme = getExpColorType(colorVal);

if (sameWord(tdb->grp, "cancerGenomics"))
    {
    /* set global flag */
    isCancerGenomicsTrack = TRUE;
    }

if (!ct)
    {
    genericHeader(tdb, itemName);
    bedList = loadMsBed(tdb, tdb->table, seqName, winStart, winEnd);
    }
else if (ct->dbTrack)
    {
    genericHeader(tdb, itemName);
    printCustomUrl(tdb, itemName, TRUE);
    bedList = ctLoadMultScoresBedDb(ct, seqName, winStart, winEnd);
    }
else
    bedList = bedFilterListInRange(ct->bedList, NULL, seqName, winStart, winEnd);
if (bedList == NULL)
    printf("<b>No Expression Data in this Range.</b>\n");
else if (expName && sameString(expName, "zoomInMore"))
    printf("<b>Too much data to display in detail in this range.</b>\n");
else
    {
    struct microarrayGroups *groupings = NULL;
    struct maGrouping *combineGroup;
    struct hash *erHash = newHash(6);
    int i;
    if (!ct)
	{
	groupings = maGetTrackGroupings(database, tdb);
	combineGroup = maCombineGroupingFromCart(groupings, cart, tdb->track);
	}
    else
	combineGroup = maGetGroupingFromCt(ct);
    maBedClumpGivenGrouping(bedList, combineGroup);
    for (i = 0; i < combineGroup->numGroups; i++)
	{
	/* make stupid exprecord hash.perhaps eventually this won't be needed */
	char id[16];
	struct expRecord *er = basicExpRecord(combineGroup->names[i], i, 2);
	safef(id, sizeof(id), "%d", i);
	hashAdd(erHash, id, er);
	}
    puts("<h2></h2><p>\n");
    msBedPrintTable(bedList, erHash, itemName, expName, -1*maxScore, maxScore,
	stepSize, 2, msBedDefaultPrintHeader, msBedExpressionPrintRow,
	printExprssnColorKey, getColorForExprBed, colorScheme);
    hashTraverseEls(erHash, erHashElFree);
    hashFree(&erHash);
    microarrayGroupsFree(&groupings);
    }
puts("<h2></h2><p>\n");
bedFreeList(&bedList);
}
void hgExperiment(char *database, char *table, 
                        char *expFile, char *posFile, char *dataFile)
/* Main function */
{
struct lineFile *lf;
int *data = NULL;
int *scores;
FILE *f = NULL;
char expTable[32];
char *words[3];
int wordCt;
struct bed *bedList, *bed;
int expCount;
struct hash *expHash, *dataHash;
struct hashEl *hel;

/* Open experiment file and use it to create experiment table.
   Use optional fields if present, otherwise defaults */
safef(expTable, ArraySize(expTable), "%sExps", table);
expHash = makeExpsTable(database, expTable, expFile, &expCount);

/* Read in positions file */
bedList = bedLoadAll(posFile);
slSort(&bedList, bedCmp);

/* Read data file into a hash of arrays of data values, keyed by name */
dataHash = newHash(0);
lf = lineFileOpen(dataFile, TRUE);
while ((wordCt = lineFileChopNext(lf, words, ArraySize(words))))
    {
    /* format: <region-name> <experiment-name> <data-value> */
    char *name, *exp;
    int expId;
    int value;
    if (wordCt != 3)
        errAbort("Expecting 3 words in data file, got %d line %d of %s", 
		wordCt, lf->lineIx, lf->fileName);
    name = words[0];
    hel = hashLookup(dataHash, name);
    if (!hel)
        {
        AllocArray(data, expCount);
        hel = hashAdd(dataHash, name, data);
        }
    data = (int *)hel->val;
    exp = words[1];
    expId = hashIntVal(expHash, exp);
    if (expId < 0 || expId > expCount-1)
        errAbort("Invalid experiment ID %d for %s, line %d of %s",
                 expId, exp, lf->lineIx, lf->fileName);
    //value = atoi(words[2]);
    value = round(atof(words[2]));
    if (data[expId] != 0)
        errAbort("Extra experiment data value %d for %s %s, line %d of %s",
                         value, name, exp, lf->lineIx, lf->fileName);
    data[expId] = value;
    }
lineFileClose(&lf);

/* Fill in BED15 fields - add experiment values, and setup block (only 1)*/
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    int i;
    bed->thickStart = bed->chromStart;
    bed->thickEnd = bed->chromEnd;
    bed->blockCount = 1;
    AllocArray(bed->blockSizes, 1);
    bed->blockSizes[0] = bed->chromEnd - bed->chromStart;
    AllocArray(bed->chromStarts, 1);
    bed->chromStarts[0] = 0;
    bed->expCount = expCount;
    AllocArray(bed->expIds, expCount);
    for (i = 0; i < expCount; i++)
        bed->expIds[i] = i;
    AllocArray(bed->expScores, expCount);
    scores = hashMustFindVal(dataHash, bed->name);
    for (i = 0; i < expCount; i++)
        bed->expScores[i] = scores[i];
    /* set score for bed to the average of the scores in all experiments */
    calculateAverage(bed);
    }

/* from affyPslAndAtlsoToBed ?
   convertIntensitiesToRatios(bedList);
   */

/* Write BED data file */
f = hgCreateTabFile(tabDir, table);
for (bed = bedList; bed != NULL; bed = bed->next)
    bedTabOutN(bed, 15, f);

/* Cleanup */
carefulClose(&f);
freeHash(&expHash);
freeHash(&dataHash);
bedFreeList(&bedList);
}
Example #26
0
void bwtool_split(struct hash *options, char *regions, char *size_s, char *bigfile, char *tmp_dir, char *outputfile)
/* bwtool_split - main for the splitting program */
{
    struct metaBig *mb = metaBigOpenWithTmpDir(bigfile, tmp_dir, regions);
    FILE *output = mustOpen(outputfile, "w");
    struct bed *section;
    struct bed *splitList = NULL;
    int size = 0;
    unsigned min_gap = sqlUnsigned((char *)hashOptionalVal(options, "min_gap", "1"));
    unsigned chunk_size = sqlUnsigned(size_s);
    char chrom[256] = "";
    int start = -1, end = 0;
    boolean over_size = FALSE;
    int ix = 1;
    int gap = 0;
    for (section = mb->sections; section != NULL; section = section->next)
    {
	struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart,
							      section->chromEnd);
	struct perBaseWig *pbw;
	for (pbw = pbwList; pbw != NULL; pbw = pbw->next)
	{
	    int length = pbw->chromEnd - pbw->chromStart;
	    if (end > 0)
		gap = pbw->chromStart - end;
	    if (!sameString(chrom, pbw->chrom))
	    {
		if (!sameString(chrom, ""))
		    slAddHead(&splitList, newBed(chrom, start, end));
		strcpy(chrom, pbw->chrom);
		start = pbw->chromStart;
		end = pbw->chromEnd;
		if (size + length > chunk_size)
		    size = length;
		else
		    size += length;
	    }
	    else
	    {
		if ((size + length + gap > chunk_size) && (gap >= min_gap))
		{
		    slAddHead(&splitList, newBed(chrom, start, end));
		    start = pbw->chromStart;
		    end = pbw->chromEnd;
		    size = length;
		}
		else
		{
		    size += length + gap;
		    end = pbw->chromEnd;
		}
	    }
	}
	perBaseWigFreeList(&pbwList);
    }
    slAddHead(&splitList, newBed(chrom, start, end));
    slReverse(&splitList);
    for (section = splitList; section != NULL; section = section->next)
    {
	fprintf(output, "%s\t%d\t%d\n", section->chrom, section->chromStart, section->chromEnd);
    }
    carefulClose(&output);
    metaBigClose(&mb);
    bedFreeList(&splitList);
}
Example #27
0
void bwtool_find_max(struct hash *options, char *favorites, char *regions, double fill,
		     char *bigfile, char *tmp_dir, char *outputfile)
/* find max points in a range */
{
    boolean med_base = (hashFindVal(options, "median-base") != NULL) ? TRUE : FALSE;
    boolean with_max = (hashFindVal(options, "with-max") != NULL) ? TRUE : FALSE;
    struct metaBig *mb = metaBigOpen_check(bigfile, tmp_dir, NULL);
    FILE *out = mustOpen(outputfile, "w");
    struct bed6 *sections6 = readBed6Soft(regions);
    struct bed *sections = bed12FromBed6(&sections6);
    struct bed *section;
    for (section = sections; section != NULL; section = section->next)
    {
	struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart,
							      section->chromEnd);
	struct perBaseWig *pbw;
	struct slInt *ii;
	int i, size;
	double max = -DBL_MAX;
	struct slInt *list = NULL;
	for (pbw = pbwList; pbw != NULL; pbw = pbw->next)
	{
	    int pbw_off = pbw->chromStart - section->chromStart;
	    for (i = 0; i < pbw->len; i++)
	    {
		if (pbw->data[i] > max)
		{
		    slFreeList(&list);
		    struct slInt *new_int = slIntNew(i + pbw_off);
		    slAddHead(&list, new_int);
		    max = pbw->data[i];
		}
		else if (pbw->data[i] == max)
		{
		    struct slInt *new_int = slIntNew(i + pbw_off);
		    slAddHead(&list, new_int);
		}
	    }
	}
	slReverse(&list);
	if (list)
	{
	    size = slCount(list);
	    if (med_base)
	    {
		section->blockCount = 1;
		AllocArray(section->blockSizes, sizeof(int));
		AllocArray(section->chromStarts, sizeof(int));
		section->blockSizes[0] = 1;
		section->chromStarts[0] = median_base_calc(&list);
	    }
	    else
	    {
		section->blockCount = size;
		AllocArray(section->blockSizes, sizeof(int) * size);
		AllocArray(section->chromStarts, sizeof(int) * size);
		for (i = 0, ii = list; (i < size) && (ii != NULL); i++, ii = ii->next)
		{
		    section->blockSizes[i] = 1;
		    section->chromStarts[i] = ii->val;
		}
	    }
	    if (!with_max)
		bedTabOutN(section, 12, out);
	    else
	    {
		bedOutputN(section, 12, out, '\t', '\t');
		fprintf(out, "%f\n", max);
	    }
	    slFreeList(&list);
	}
	perBaseWigFree(&pbwList);
    }
    metaBigClose(&mb);
    bedFreeList(&sections);
    carefulClose(&out);
}
Example #28
0
void outputBedsFromPsls(struct hash *pslHash,char *bedOutName, char *expRecordOutName, 
			char *affyFileName, char *expFileName)
/** For each set of entries in affyFile find matching psl and create a bed. */
{
struct bed *bed = NULL, *b=NULL;
struct psl *pslList = NULL, *psl = NULL;
struct hash *expHash = NULL;
int numExps = 0;
int expCount = 0;
int i =0;
char *probeSet = NULL;
char *row[4];
char key[128];
struct slName *expNames = NULL, *name = NULL;
FILE *bedOut = NULL;
FILE *expRecordOut = NULL;
char *toDiffFileName = optionVal("toDiffFile", NULL);
FILE *toDiffOut = NULL;
struct lineFile *lf = NULL;
fillInExpHash(expFileName, &expHash, &expNames, &expCount);
lf = lineFileOpen(affyFileName, TRUE);
bedOut = mustOpen(bedOutName, "w");
if(toDiffFileName != NULL)
    toDiffOut = mustOpen(toDiffFileName, "w");

/* Loop through either adding experiments to beds or if new
   probeset create bed from psl and start over. */
while(lineFileChopNextTab(lf, row, sizeof(row)))
    {
    /* Do we have to make a new bed? */
    if(probeSet == NULL || differentWord(probeSet, row[0]))
	{
	occassionalDot();
	numExps = 0;
	/* If we have probeset print out the current beds. */
	if(probeSet != NULL)
	    {
	    for(b = bed; b != NULL; b = b->next)
		{
		int avgCount = 0;
		for(i = 0; i < b->expCount; i++)
		    if(b->expScores[i] != -10000)
			avgCount++;
		if(avgCount != 0 && b->score > 0)
		    b->score = log(b->score / avgCount) * 100;
		else
		    b->score = 0;
		bedTabOutN(b, 15, bedOut);
		if(toDiffOut != NULL)
		    outputToDiffRecord(b, expNames, toDiffOut);
		}
	    }
	bedFreeList(&bed);
	/* Lookup key in pslHash to find list of psl. */
	safef(key, sizeof(key), "%s", row[0]);
	pslList = hashFindVal(pslHash, key);
	/* Can have multiple psls. */
	for(psl = pslList; psl != NULL; psl = psl->next)
	    {
	    b = bedFromPsl(psl);
	    AllocArray(b->expIds, expCount );
	    AllocArray(b->expScores, expCount);
	    b->expCount = expCount;
	    initBedScores(b, expCount);
	    slAddHead(&bed, b);
	    }
	}
    if(bed != NULL)
	{
	/* Allocate larger arrays if necessary. */
	if(numExps > expCount)
	    {
	    errAbort("Supposed to be %d experiments but probeset %s has at least %d",
		     expCount, bed->name, numExps);
	    }
	for(b = bed; b != NULL; b = b->next)
	    {
	    int exp = hashIntVal(expHash, row[1]);
	    if(differentWord(row[3], "NaN"))
	       b->expScores[exp] = atof(row[3]);
	    if(differentWord(row[2], "NaN"))
	       b->score += atof(row[2]);
	    }
	numExps++;
	}
    freez(&probeSet);
    probeSet = cloneString(row[0]);
    }
expRecordOut = mustOpen(expRecordOutName, "w");
i = 0;
for(name = expNames; name != NULL; name = name->next)
    {
    subChar(name->name, ',', '_');	    
    subChar(name->name, ' ', '_');
    fprintf(expRecordOut, "%d\t%s\tuclaExp\tuclaExp\tuclaExp\tuclaExp\t1\t%s,\n", i++, name->name, name->name);
    }
hashFree(&expHash);
slFreeList(&expNames);
carefulClose(&expRecordOut);
carefulClose(&bedOut);
lineFileClose(&lf);
}
Example #29
0
int checkTableCoords(char *db)
/* Check several invariants (see comments in check*() above), 
 * summarize errors, return nonzero if there are errors. */
{
struct sqlConnection *conn = hAllocConn(db);
struct slName *tableList = NULL, *curTable = NULL;
struct slName *allChroms = NULL;
boolean gotError = FALSE;

allChroms = hAllChromNames(db);
if (theTable == NULL)
    tableList = getTableNames(conn);
else if (sqlTableExists(conn, theTable))
    tableList = newSlName(theTable);
else
    errAbort("Error: specified table \"%s\" does not exist in database %s.",
	     theTable, db);

for (curTable = tableList;  curTable != NULL;  curTable = curTable->next)
    {
    struct hTableInfo *hti = NULL;
    struct slName *chromList = NULL, *chromPtr = NULL;
    char *table = curTable->name;
    char tableChrom[32], trackName[128], tableChromPrefix[33];
    hParseTableName(db, table, trackName, tableChrom);
    hti = hFindTableInfo(db, tableChrom, trackName);
    if (hti != NULL && hti->isPos)
	{
	/* watch out for presence of both split and non-split tables; 
	 * hti for non-split will be replaced with hti of split. */
	if (splitAndNonSplitExist(conn, table, tableChrom))
	    continue;
	safef(tableChromPrefix, sizeof(tableChromPrefix), "%s_", tableChrom);
	if (hti->isSplit)
	    chromList = newSlName(tableChrom);
	else
	    chromList = allChroms;
	/* invariant: chrom must be described in chromInfo. */
        /* items with bad chrom will be invisible to hGetBedRange(), so 
	 * catch them here by SQL query. */
	/* The SQL query is too huge for scaffold-based db's, check count: */
	if (hChromCount(db) <= MAX_SEQS_SUPPORTED)
	    {
	    if (isNotEmpty(hti->chromField))
		{
		struct dyString *bigQuery = newDyString(1024);
		dyStringClear(bigQuery);
		sqlDyStringPrintf(bigQuery, "select count(*) from %s where ",
			       table);
		for (chromPtr=chromList; chromPtr != NULL;
		       chromPtr=chromPtr->next)
		    {
		    sqlDyStringPrintf(bigQuery, "%s != '%s' ",
				   hti->chromField, chromPtr->name);
		    if (chromPtr->next != NULL)
			dyStringAppend(bigQuery, "AND ");
		    }
		gotError |= reportErrors(BAD_CHROM, table,
					 sqlQuickNum(conn, bigQuery->string));
		dyStringFree(&bigQuery);
		}
	    for (chromPtr=chromList; chromPtr != NULL; chromPtr=chromPtr->next)
		{
		char *chrom = chromPtr->name;
		struct bed *bedList = hGetBedRange(db, table, chrom, 0, 0, NULL);
		if (hti->isSplit && isNotEmpty(hti->chromField))
		    gotError |= checkSplitTableOnlyChrom(bedList, table, hti,
							 tableChrom);
		gotError |= checkStartEnd(bedList, table, hti,
					  testChromSize(chrom));
		if (hti->hasCDS)
		    gotError |= checkCDSStartEnd(bedList, table, hti);
		if (hti->hasBlocks && !ignoreBlocks)
		    gotError |= checkBlocks(bedList, table, hti);
		bedFreeList(&bedList);
		}
	    }
	}
    }
return gotError;
}