Exemplo n.º 1
0
void bedFirstCodingExonSize(char *inBed, char *overBed, char *underBed, char *outSize)
/* bedFirstCodingExonSize - Figure out size of first coding exon. */
{
FILE *fSize = mustOpen(outSize, "w");
FILE *fOver = NULL, *fUnder = NULL;
if (overBed)
    fOver = mustOpen(overBed, "w");
if (underBed)
    fUnder = mustOpen(underBed, "w");
struct bed *bed, *bedList = bedLoadNAll(inBed, 12);
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    if (bed->thickStart < bed->thickEnd)
        {
	int firstCdsSize = bedFirstCdsSize(bed);
	fprintf(fSize, "%s\t%d\n", bed->name, firstCdsSize);
	if (firstCdsSize >= threshold)
	    {
	    if (fOver != NULL)
	        bedTabOutN(bed, 12, fOver);
	    }
	else
	    {
	    if (fUnder != NULL)
	        bedTabOutN(bed, 12, fUnder);
	    }
	}
    }
carefulClose(&fSize);
carefulClose(&fOver);
carefulClose(&fUnder);
}
Exemplo n.º 2
0
void hgPhMouse(char *database, char *track, int fileCount, char *fileNames[])
/* hgPhMouse - Load phMouse track. */
{
int i;
char *fileName;
char *tabName = "phMouse.tab";
FILE *f = mustOpen(tabName, "w");
struct lineFile *lf;
char *words[32], *s, c;
int wordCount;
int oneSize, totalSize = 0;

for (i=0; i<fileCount; ++i)
    {
    struct bed *bedList = NULL, *bed;
    fileName = fileNames[i];
    lf = lineFileOpen(fileName, TRUE);
    printf("Reading %s ", fileName);
    fflush(stdout);
    while ((wordCount = lineFileChop(lf, words)) > 0)
        {
	if (wordCount < 7)
	   errAbort("Expecting at least 7 words line %d of %s", 
	   	lf->lineIx, fileName);
	AllocVar(bed);
	bed->chrom = cloneString(words[0]);
	bed->chromStart = lineFileNeedNum(lf, words, 1);
	bed->chromEnd = lineFileNeedNum(lf, words, 2);
	bed->score = lineFileNeedNum(lf, words, 6);
	s = strrchr(words[3], '|');
	c = s[1];
	s[0] = 0;
	if (c != '+' && c != '-')
	    errAbort("Misformed strandless trace name line %d of %s",
	    	lf->lineIx, lf->fileName);
	bed->name = cloneString(words[3]);
	bed->strand[0] = c;
	slAddHead(&bedList, bed);
	}
    oneSize = slCount(bedList);
    printf("%d alignments ", oneSize);
    totalSize += oneSize;
    fflush(stdout);
    slSort(&bedList, bedCmp);
    printf("sorted ");
    fflush(stdout);
    for (bed = bedList; bed != NULL; bed = bed->next)
        {
	int bin = hFindBin(bed->chromStart, bed->chromEnd);
	fprintf(f, "%d\t", bin);
	bedTabOutN(bed, 6, f);
	}
    printf("tabbed out\n");
    bedFreeList(&bedList);
    }
carefulClose(&f);
printf("Loading %d items into %s.%s\n", totalSize, database, track);
loadDatabase(database, track, tabName);
remove(tabName);
}
void txCdsToGene(char *txBed, char *txFa, char *txCds, char *outGtf, char *outFa)
/* txCdsToGene - Convert transcript bed and best cdsEvidence to genePred and 
 * protein sequence. */
{
struct hash *txSeqHash = faReadAllIntoHash(txFa, dnaLower);
verbose(2, "Read %d transcript sequences from %s\n", txSeqHash->elCount, txFa);
struct hash *cdsHash = cdsEvidenceReadAllIntoHash(txCds);
verbose(2, "Read %d cdsEvidence from %s\n", cdsHash->elCount, txCds);
struct lineFile *lf = lineFileOpen(txBed, TRUE);
FILE *fGtf = mustOpen(outGtf, "w");
FILE *fFa = mustOpen(outFa, "w");
char *row[12];
while (lineFileRow(lf, row))
    {
    struct bed *bed = bedLoad12(row);
    verbose(2, "processing %s\n", bed->name);
    struct cdsEvidence *cds = hashFindVal(cdsHash, bed->name);
    struct dnaSeq *txSeq = hashFindVal(txSeqHash, bed->name);
    char *cdsSource = NULL;
    if (txSeq == NULL)
        errAbort("%s is in %s but not %s", bed->name, txBed, txFa);
    if (cds != NULL)
	{
        outputProtein(cds, txSeq, fFa);
	if (cds->cdsCount > 1)
	    {
	    struct bed *newBed = breakUpBedAtCdsBreaks(cds, bed);
	    if (fTweaked)
	        fprintf(fTweaked, "%s\n", newBed->name);
	    bedFree(&bed);
	    bed = newBed;
	    }
	cdsSource = cds->accession;
	if (sameString(cds->accession, "."))
	    cdsSource = cds->source;
	}

    /* Set bed CDS bounds and optionally output bed. */
    cdsEvidenceSetBedThick(cds, bed);
    if (fBed)
        bedTabOutN(bed, 12, fBed);

    /* Parse out bed name, which is in format chrom.geneId.txId.accession */
    char *geneName = cloneString(bed->name);
    char *accession = strrchr(geneName, '.');
    assert(accession != NULL);
    *accession++ = 0;
    chopSuffix(geneName);

    /* Output as GTF */
    bedToGtf(bed, accession, cdsSource, geneName, fGtf);

    /* Clean up for next iteration of loop. */
    freez(&geneName);
    bedFree(&bed);
    }
lineFileClose(&lf);
carefulClose(&fFa);
carefulClose(&fGtf);
}
Exemplo n.º 4
0
void writeBedList(struct bed *bedList, FILE *f)
/* Write all beds in list to file. */
{
struct bed *bed;
for (bed = bedList; bed != NULL; bed = bed->next)
    bedTabOutN(bed, 12, f);
}
Exemplo n.º 5
0
static void writeBeds(struct bed4 *beds, FILE *fh)
/* write bed to a file */
{
struct bed4 *bed;
for (bed = beds; bed != NULL; bed = bed->next)
    bedTabOutN((struct bed*)bed, 4, fh);
}
void outputBed(struct cassetteSeq *cseq, FILE *primerBed) 
/* Output a bed linked features track to see where primers are. */
{
struct bed *bed = NULL;
struct bed *cbed = cseq->bed;
int leftStart=0, rightStart =0;
bed = cloneBed(cbed);

if(cseq->leftPrimer == NULL || cseq->rightPrimer == NULL)
    return;
leftStart = calcGenomePos(cbed, cseq->leftPrimer, cseq->seq);
rightStart = calcGenomePos(cbed, cseq->rightPrimer, cseq->seq);
if(sameString(bed->strand, "+")) 
    {
    bed->chromStart = bed->thickStart = leftStart;
    bed->chromStarts[0] = 0;
    bed->blockSizes[0] = strlen(cseq->leftPrimer);
    bed->chromStarts[1] = rightStart - leftStart;
    bed->blockSizes[1] = strlen(cseq->leftPrimer);
    bed->chromEnd = bed->thickEnd = bed->chromStarts[1] + bed->chromStart + bed->blockSizes[1];
    }
else
    {
    bed->chromStart = bed->thickStart = rightStart;
    bed->chromStarts[0] = 0;
    bed->blockSizes[0] = strlen(cseq->rightPrimer);
    bed->chromStarts[1] = leftStart - rightStart;
    bed->blockSizes[1] = strlen(cseq->rightPrimer);
    bed->chromEnd = bed->thickEnd = bed->chromStarts[1] + bed->chromStart + bed->blockSizes[1];
    }
bed->blockCount = 2;
checkBedMatchesSeqs(cseq, bed);
bedTabOutN(bed, 12, primerBed);
bedFree(&bed);
}
void spitBedList(struct bed *bedList, FILE *output)
/* Simply output the beds to a file one at a time. */
{
struct bed *bed;
for (bed = bedList; bed != NULL; bed = bed->next)
    bedTabOutN(bed, 6, output);
}
Exemplo n.º 8
0
void pslToBed(char *pslFile, char *bedFile, struct hash *cdsHash, bool doPosName)
/* pslToBed -- tranform a psl format file to a bed format file */
{
struct lineFile *pslLf = pslFileOpen(pslFile);
FILE *bedFh = mustOpen(bedFile, "w");
struct psl *psl;

while ((psl = pslNext(pslLf)) != NULL)
    {
    struct bed *bed = bedFromPsl(psl);
    if (doPosName)
        {
        char *newName = needMem(512);
        safef(newName, 512, "%s:%d-%d", psl->qName, psl->qStart, psl->qEnd);
        freeMem(bed->name);
        bed->name = newName;
        }

    if (cdsHash)
	{
	struct cds *cds = hashFindVal(cdsHash, psl->qName);
	if (cds == NULL)
	    bed->thickStart = bed->thickEnd = bed->chromStart;
	else
	    setThick(psl, bed, cds);
	}
    bedTabOutN(bed, 12, bedFh);
    bedFree(&bed);
    pslFree(&psl);
    }
carefulClose(&bedFh);
lineFileClose(&pslLf);
}
Exemplo n.º 9
0
void gffToBed(char *inGff, char *outBed)
/* gffToBed - Convert a gff file (gff1 or gff2) to bed.  Not tested with gff3 */
{
struct gffFile *gff = gffRead(inGff);
FILE *f = mustOpen(outBed, "w");
char *exonFeature = bestExonFeature(gff);
gffGroupLines(gff);
separateGroupsByChromosome(gff);
struct gffGroup *group;
for (group = gff->groupList; group != NULL; group = group->next)
    {
    struct genePred *gp;
    if (gff->isGtf)
        gp = genePredFromGroupedGtf(gff, group, group->name, FALSE, FALSE);
    else
        gp = genePredFromGroupedGff(gff, group, group->name, exonFeature, FALSE, FALSE);
    if (gp != NULL)
	{
	assert(gp->txStart == gp->exonStarts[0]);
	struct bed *bed = bedFromGenePred(gp);
	bedTabOutN(bed, 12, f);
	bedFree(&bed);
	}
    }
carefulClose(&f);
}
Exemplo n.º 10
0
void outputBed6(struct bed *bedList, char *output)
/* self-explainatory */
{
FILE *outputFile = mustOpen(output, "w");
struct bed *bed = NULL;
for (bed = bedList; bed != NULL; bed = bed->next)
    bedTabOutN(bed, 6, outputFile);
carefulClose(&outputFile);
}
Exemplo n.º 11
0
void writeCluster(struct bedNamedScore *clusterList, FILE *out)
/* Takes a list of bed lines and writes out a single blocked bed line into the out file */
{
int size = slCount(clusterList);
if (size < clMinCluster)
    return;

int blockStarts[size];
int blockSizes[size];
double score = 0;
struct bedNamedScore *last = clusterList;

slReverse(&clusterList);

// create our output bed object and assign values to all the fields we care about
struct bed outBed;

outBed.chrom = cloneString(clusterList->chrom);
outBed.chromStart = clusterList->chromStart;
outBed.chromEnd = last->chromEnd;

// the name of each record is merely the size of the cluster, mostly for viewing on the browser
char sizeBuf[8];
safef(sizeBuf, 8, "%d", size);
outBed.name = sizeBuf;

outBed.strand[0] = clusterList->strand;
outBed.strand[1] = '\0';
outBed.blockCount = size;

int i;
for (i = 0; i < size; i++)
    blockSizes[i] = 1;
outBed.blockSizes = blockSizes;

// get the blockStarts and also calculate the final score, which is just the average of the scores * 10
// because the input values are decimal numbers from 0.0000-100.0000 and our bed output is an int 0-1000
i = 0;
struct bedNamedScore *cur;
for (cur = clusterList; cur != NULL; cur = cur->next)
    {
    blockStarts[i] = cur->chromStart - outBed.chromStart;
    score += cur->score;
    i++;
    }
outBed.chromStarts = blockStarts;
outBed.score = (int)(score * 10 / size);
        
// zero out unused fields
outBed.thickStart = outBed.chromStart;
outBed.thickEnd = outBed.chromStart;
outBed.itemRgb = 0;
        
// finally print our struct out as a bed12
bedTabOutN(&outBed, 12, out);   
}
Exemplo n.º 12
0
void doBeds(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, 
	    char *netTable, char *bedFileName, char *bedTableName,
	    char *outBedName, char *selectedFileName, int *foundCount, int *notFoundCount)	
/* Map over beds. */
{
FILE *bedOut = NULL;
FILE *selectedOut = NULL;
struct bed *bed=NULL, *bedList = NULL, *orthoBed=NULL;
/* Load beds. */
warn("Loading beds.");
if(bedFileName)
    bedList=bedLoadAll(bedFileName);
else
    bedList=loadBedFromTable(conn, bedTableName, chrom, 0, BIGNUM);
/* Convert beds. */
warn("Converting beds.");
assert(outBedName);
bedOut = mustOpen(outBedName, "w");
if (selectedFileName != NULL)
    selectedOut = mustOpen(selectedFileName, "w");
for(bed = bedList; bed != NULL; bed = bed->next)
    {
    if(differentString(bed->chrom, chrom))
	continue;
    occassionalDot();
    orthoBed = orthoBedFromBed(conn, db, orthoDb, netTable, bed);
    if(orthoBed != NULL && orthoBed->blockCount > 0)
	{
	(*foundCount)++;
	bedTabOutN(orthoBed, 12, bedOut);
        if (selectedOut != NULL)
            bedTabOutN(bed, 12, selectedOut);
	}
    else
	(*notFoundCount)++;
    bedFree(&orthoBed);
    }
bedFreeList(&bedList);
carefulClose(&selectedOut);
carefulClose(&bedOut);
}
void doStrand(struct bed *start, struct bed *end, FILE *f)
/* Assuming all beds from start up to end are on same strand,
 * make a merged bed with all their blocks and output it. */
{
struct rbTree *rangeTree = rangeTreeNew();
struct bed *bed;
for (bed = start; bed != end; bed = bed->next)
    bedIntoRangeTree(bed, rangeTree);
bed = bedFromRangeTree(rangeTree, start->chrom, start->name, start->strand);
bedTabOutN(bed, 12, f);
bedFree(&bed);
rangeTreeFree(&rangeTree);
}
void doAnalysisForBed(struct bed *bed) 
{
char *hgdbTestTable = cgiUsualString("hgdbTestTable","affyTrans_hg12");
char *hgdbTestName = "sugnet";
FILE *tmpFile = NULL;
char commandBuffer[4096];
char *fileNameRoot = getFileNameForBed(bed);
char bedFile[512];
char dataFile[512];
int retVal = 0;


/* Print out bed. */
safef(bedFile, sizeof(bedFile), "%s.bed", fileNameRoot);
tmpFile = mustOpen(bedFile, "w");
bedTabOutN(bed, 12, tmpFile);
carefulClose(&tmpFile);

/* Get samples for bed. */
safef(dataFile, sizeof(dataFile), "%s.data", fileNameRoot);
safef(commandBuffer, sizeof(commandBuffer), "samplesForCoordinates bedFile=%s hgdbTestName=%s hgdbTestTable=%s > %s", 
      bedFile, hgdbTestName, hgdbTestTable, dataFile);
retVal = system(commandBuffer);
if(retVal != 0)
    {
    warn("%s failed running command:\n%s", fileNameRoot, commandBuffer);
    return;
    }
safef(commandBuffer, sizeof(commandBuffer), "cp %s tmp.data", dataFile);
retVal = system(commandBuffer);

/* Run R analysis on data file. */
warn("Running R for %s", fileNameRoot);
fflush(stderr);
safef(commandBuffer, sizeof(commandBuffer), "R --vanilla < /cluster/home/sugnet/sugnet/R/maReg/R/runAnalysis.R");
warn("Done with R");
fflush(stderr);
retVal = system(commandBuffer);
if(retVal != 0)
    {
    warn("%s failed running command:\n%s", fileNameRoot, commandBuffer);
    return;
    }



bedsAnalyzed++;
}
Exemplo n.º 15
0
void affyPslAndAtlasToBedOld(char *pslFile, char *atlasFile, char *bedOut, char *expRecOut)
/** Main function that does all the work for old-style*/
{
struct hash *bedHash = NULL;
struct affyAtlas *aaList=NULL, *aa=NULL;
struct expRecord *erList=NULL, *er=NULL;
struct bed *bedList=NULL, *bed=NULL;
int expCount = 0;
FILE *erOut = NULL, *bOut=NULL;
warn("loading atlas file");
aaList = affyAtlasLoadAll(atlasFile);
expCount = countExperiments(aaList);
warn("creating list of beds from alignments");
bedList = createBedsFromPsls(pslFile, expCount);
warn("creating hash from list of beds");
bedHash = createBedHash(bedList);
warn("appending experiments to beds in hash");
appendExperiments(bedHash, aaList, &erList);
warn("Running sanity Checks");
checkAllBeds(&bedList, expCount);
warn("%d beds were missing experiments." , missingExpsCount);
warn("%d beds had no experiments.", noExpCount);
warn("Calculating average intensities");
convertIntensitiesToRatios(bedList);
calculateAverages(bedList);

warn("writing expRecords out");
erOut = mustOpen(expRecOut, "w");
for(er = erList; er != NULL; er = er->next)
    expRecordTabOut(er, erOut);
carefulClose(&erOut);

warn("writing beds out");
bOut = mustOpen(bedOut, "w");
for(bed = bedList; bed != NULL; bed = bed->next)
    bedTabOutN(bed, 15, bOut);
carefulClose(&bOut);

warn("cleaning up..");
freeHash(&bedHash);
bedFreeList(&bedList);

warn("Done.");
}
Exemplo n.º 16
0
void bwtool_find_thresh(struct hash *options, char *favorites, char *regions, double fill,
			char *thresh_type, char *thresh_s, char *bigfile, char *tmp_dir, char *outputfile)
/* the other kind of finding, based on thresholding. */
{
    boolean inverse = (hashFindVal(options, "inverse") != NULL) ? TRUE : FALSE;
    enum bw_op_type op= get_bw_op_type(thresh_type, inverse);
    struct metaBig *mb = metaBigOpen_check(bigfile, tmp_dir, regions);
    double thresh = sqlDouble(thresh_s);
    FILE *out = mustOpen(outputfile, "w");
    struct bed out_bed;
    struct bed *section;
    for (section = mb->sections; section != NULL; section = section->next)
    {
	struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart,
							      section->chromEnd);
	struct perBaseWig *pbw;
	int i, len;
	if (pbwList)
	{
	    out_bed.chrom = pbwList->chrom;
	    for (pbw = pbwList; pbw != NULL; pbw = pbw->next)
	    {
		i = 0;
		len = pbw->chromEnd - pbw->chromStart;
		out_bed.chromStart = out_bed.chromEnd = 0;
		while (i < len)
		{
		    while ((i < len) && (!fit_thresh(pbw->data[i], thresh, op)))
			i++;
		    out_bed.chromStart = i + pbw->chromStart;
		    while ((i < len) && (fit_thresh(pbw->data[i], thresh, op)))
			i++;
		    out_bed.chromEnd = i + pbw->chromStart;
		    if (out_bed.chromEnd > out_bed.chromStart)
			bedTabOutN(&out_bed, 3, out);
		}
	    }
	perBaseWigFree(&pbwList);
	}
    }
    metaBigClose(&mb);
    carefulClose(&out);
}
Exemplo n.º 17
0
void doPsls(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, 
	    char *netTable, char *pslFileName, char *pslTableName,
	    char *outBedName, char *selectedFileName, 
            int *foundCount, int *notFoundCount)
/* Map over psls. */
{
FILE *bedOut = NULL;
FILE *selectedOut = NULL;
struct bed *bed = NULL;
struct psl *psl=NULL, *pslList = NULL;
/* Load psls. */
warn("Loading psls.");
if(pslFileName)
    pslList=pslLoadAll(pslFileName);
else
    pslList=loadPslFromTable(conn, pslTableName, chrom, 0, BIGNUM);
/* Convert psls. */
warn("Converting psls.");
assert(outBedName);
bedOut = mustOpen(outBedName, "w");
if (selectedFileName != NULL)
    selectedOut = mustOpen(selectedFileName, "w");
for(psl = pslList; psl != NULL; psl = psl->next)
    {
    if(differentString(psl->tName, chrom))
	continue;
    occassionalDot();
    bed = orthoBedFromPsl(conn, db, orthoDb, netTable, psl);
    if(bed != NULL && bed->blockCount > 0)
	{
	(*foundCount)++;
	bedTabOutN(bed, 12, bedOut);
        if (selectedOut != NULL)
            pslTabOut(psl, selectedOut);
	}
    else
	(*notFoundCount)++;
    bedFree(&bed);
    }
carefulClose(&selectedOut);
carefulClose(&bedOut);
}
Exemplo n.º 18
0
void bedViewOut(struct altSpliceSite *as, FILE *out)
{
struct bed *bed = NULL;
AllocVar(bed);
bed->chrom = cloneString(as->chrom);
bed->chromStart  = as->chromStart;
bed->chromEnd  = maxInArray(as->altStarts, as->altCount);
AllocArray(bed->chromStarts, 2);
AllocArray(bed->blockSizes, 2);
bed->thickStart = as->altBpStarts[1];
bed->thickEnd = as->altBpEnds[1];
bed->blockCount = 2;
bed->chromStarts[0] = 0;
bed->chromStarts[1] = bed->chromEnd - bed->chromStart -1; 
bed->blockSizes[0] = bed->blockSizes[1] = 1;
bed->name = cloneString(as->agName);
bed->score = as->spliceTypes[1];
safef(bed->strand, sizeof(bed->strand), "%s", as->strand);
bedTabOutN(bed, 12, out);
bedFree(&bed);
}
Exemplo n.º 19
0
void borfMatcher(char *bedIn, char *borfIn, char *bedOutFile, char *genePredOutFile)
/* Top level function to open files and call other functions. */
{
struct borf *borf = NULL, *borfList = NULL;
struct bed *bed = NULL, *bedList = NULL;
struct genePred *gp = NULL;
float threshold = optionFloat("minScore", 50);
FILE *bedOut = mustOpen(bedOutFile, "w");
FILE *genePredOut = mustOpen(genePredOutFile, "w");
boolean keepSmall = optionExists("keepSmall");
boolean keepNmd = optionExists("keepNmd");

borfList = borfLoadAll(borfIn);
bedList = bedLoadAll(bedIn);
dotForUserInit(slCount(bedList)/10);
for(bed = bedList, borf = borfList; bed != NULL && borf != NULL; bed = bed->next, borf = borf->next)
    {
    dotForUser();
    if(!stringIn(bed->name, borf->name))
	errAbort("Trying to match up %s bed with %s borf - bad idea!", bed->name, borf->name);
    /* Have to adjust cds end. Borf puts stop codon outside of cds, 
       we put it inside. */
    borf->cdsEnd = min(borf->cdsEnd+3, borf->size);
    if((borf->score > threshold || (keepSmall && borf->cdsSize > 0)) && sameString(borf->strand, "+"))
	{
	setThickStartStop(bed, borf);
	if(keepNmd || !nmdTarget(bed))
	    {
	    gp = bedToGenePred(bed);
	    bedTabOutN(bed, 12, bedOut);
	    genePredTabOut(gp, genePredOut);
	    genePredFree(&gp);
	    }
	}
    }
warn("Done.");
carefulClose(&bedOut);
carefulClose(&genePredOut);
}
void bedMergeOverlappingBlocks(char *inBed, char *outBed)
/* bedMergeOverlappingBlocks - Fix faulty BED 12 files with illegal overlapping blocks. Also reports a summary of the changes.. */
{
int badBeds = 0;
FILE *log = NULL;
FILE *newBedFile = mustOpen(outBed, "w");
char *logName = optionVal("report", NULL);
struct lineFile *lf = lineFileOpen(inBed, TRUE);
char *line, *row[12];
boolean isItemRgb = FALSE;
if (logName)
    log = mustOpen(logName, "w");
while (lineFileNext(lf, &line, NULL))
    {
    struct bed *bed;
    int numFields = chopByWhite(line, row, ArraySize(row));
    /* strange it's reading empty lines... whatever */
    if (numFields == 0)
	continue;
    if (numFields < 12)
	errAbort("file %s doesn't appear to be in blocked-bed format. At least 12 fields required, got %d", inBed, numFields);
    if (bedParseRgb(row[8]))
	isItemRgb = TRUE;
    bed = bedLoadN(row, numFields);
    badBeds += fixBed(bed, lf->lineIx, log);
    if (isItemRgb)
	bedTabOutNitemRgb(bed, numFields, newBedFile);
    else
	bedTabOutN(bed, numFields, newBedFile);
    }
lineFileClose(&lf);
if (log)
    {
    fprintf(log, "Fixed %d bad beds in all.\n", badBeds);
    carefulClose(&log);
    }
carefulClose(&newBedFile);
}
void createIntronBeds(char *agxFile, char *bedFile)
/* Make intron beds for evaluation. */
{
struct altGraphX *ag=NULL, *agList = NULL;
struct bed *bed=NULL, *bedList=NULL;
FILE *bedOut = NULL;
int count;
warn("Rading AltGraphX list.");
agList = altGraphXLoadAll(agxFile);
warn("Converting to intron beds.");
bedOut = mustOpen(bedFile, "w");
for(ag = agList; ag != NULL; ag = ag->next)
    {
    occassionalDot();
    bedList = bedIntronsFromAgx(ag);
    for(bed=bedList; bed != NULL; bed=bed->next)
	{
	bedTabOutN(bed, 12, bedOut);
	}
    bedFreeList(&bedList);
    }
altGraphXFreeList(&agList);
}
Exemplo n.º 22
0
void intronSizes(char *database, char *table)
/* intronSizes - Output list of intron sizes.. */
{
    struct dyString *query = newDyString(1024);
    struct sqlConnection *conn;
    struct sqlResult *sr;
    char **row;
    struct genePred *gp;
    int rowOffset;
    struct bed *bedList = NULL, *bed = NULL;

    hSetDb(database);
    rowOffset = hOffsetPastBin(NULL, table);
    conn = hAllocConn(database);
    sqlDyStringPrintf(query, "select * from %s", table);
    if (chromName != NULL)
        dyStringPrintf(query, " where chrom = '%s'", chromName);
    if (cgiBoolean("withUtr"))
    {
        dyStringPrintf(query, " %s txStart != cdsStart",
                       (chromName == NULL ? "where" : "and"));
    }
    sr = sqlGetResult(conn, query->string);
    while ((row = sqlNextRow(sr)) != NULL)
    {
        gp = genePredLoad(row+rowOffset);
        genePredIntrons(gp, &bedList);
        slReverse(&bedList);
        for (bed = bedList ; bed != NULL ; bed=bed->next)
            bedTabOutN(bed,6, stdout);
        bedFreeList(&bedList);
        genePredFree(&gp);
    }
    sqlFreeResult(&sr);
    hFreeConn(&conn);
}
void writeCassetteExon(struct bed *bedList, struct altGraphX *ag, int eIx, boolean *outputted, 
		       FILE *bedOutFile, FILE *outfile, FILE *html, float conf )
/* Write out the information for a cassette exon. */
{
int i = eIx;
struct bed *bed=NULL;
if(bedOutFile != NULL)
    bedTabOutN(bedList,12, bedOutFile);
writeBrowserLink(html, ag, conf, i);
if(!outputted)
    {
    altGraphXTabOut(ag, stdout);
    *outputted = TRUE;
    }
if(outfile != NULL)
    {
    struct dnaSeq *seq = hChromSeq(ag->tName, ag->vPositions[ag->edgeStarts[i]], ag->vPositions[ag->edgeEnds[i]]);
    if(sameString(ag->strand , "+")) 
	reverseComplement(seq->dna, seq->size);
    if(seq->size < 200)
	faWriteNext(outfile, seq->name, seq->dna, seq->size);
    freeDnaSeq(&seq);
    }
}
void txCdsBadBed(char *database, 
	char *altSpliceBed, char *outBed)
/* txCdsBadBed - Create a bed file with regions that don't really have CDS, 
 * but that might look like it.. */
{
/* Open up database and make sure all the tables we want are there. */
char *refTrack = "refGene";
char *vegaPseudo = "vegaPseudoGene";
char *retroPseudo = "retroMrnaInfo";
struct sqlConnection *conn = sqlConnect(database);
if (!sqlTableExists(conn, refTrack))
    errAbort("table %s doesn't exist in %s", refTrack, database);
if (!sqlTableExists(conn, vegaPseudo))
    errAbort("table %s doesn't exist in %s", vegaPseudo, database);
if (!sqlTableExists(conn, retroPseudo))
    errAbort("table %s doesn't exist in %s", retroPseudo, database);

/* Read in alt file and output larger retained and bleeding introns. */
struct bed *bed, *intronyList = loadRetainedAndBleeding(altSpliceBed);
FILE *f = mustOpen(outBed, "w");
for (bed = intronyList; bed != NULL; bed = bed->next)
    {
    int size = bed->chromEnd - bed->chromStart;
    if (size > 400)
	{
	fprintf(f, "%s\t%d\t%d\t", bed->chrom, bed->chromStart, bed->chromEnd);
	fprintf(f, "%s%d\t", bed->name, ++id);
	fprintf(f, "%d\t%s\t", bed->score, bed->strand);
	fprintf(f, "0\t0\t0\t1\t");
	fprintf(f, "%d,\t%d,\n", bed->chromEnd - bed->chromStart, 0);
	}
    }

/* Read in refGene, and write out larger 3' UTRs, and occassional antisense copies.  */
char query[512];
safef(query, sizeof(query), "select * from %s", refTrack);
int rowOffset = 0;
if (sqlFieldIndex(conn, refTrack, "bin") == 0)
    rowOffset = 1;
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gp = genePredLoad(row + rowOffset);
    int start, end;
    if (gp->strand[0] == '+')
        {
	start = gp->cdsEnd;
	end = gp->txEnd;
	}
    else
        {
	start = gp->txStart;
	end = gp->cdsStart;
	}
    if (end - start > 400)
        {
	gpPartOutAsBed(gp, start, end, f, "utr", ++id, 400);
	}
    if (rand()%20 == 0)
        {
	gp->strand[0] = (gp->strand[0] == '+' ? '-' : '+');
	gpPartOutAsBed(gp, gp->txStart, gp->txEnd, f, "anti", ++id, 0);
	}
    }
sqlFreeResult(&sr);

/* Write out vega pseudo-genes. */
safef(query, sizeof(query), "select * from %s", vegaPseudo);
rowOffset = 0;
if (sqlFieldIndex(conn, vegaPseudo, "bin") == 0)
    rowOffset = 1;
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gp = genePredLoad(row + rowOffset);
    gpPartOutAsBed(gp, gp->txStart, gp->txEnd, f, "vega", ++id, 0);
    }

/* Write out retroGenes. */
safef(query, sizeof(query), "select * from %s where score > 600", retroPseudo);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct bed *bed = bedLoad12(row);
    char name[128];
    safef(name, sizeof(name), "retro_%d_%s", ++id, bed->name);
    bed->name = name;
    bedTabOutN(bed, 12, f);
    }

carefulClose(&f);
}
Exemplo n.º 25
0
void outputBedsFromPsls(struct hash *pslHash,char *bedOutName, char *expRecordOutName, 
			char *affyFileName, char *expFileName)
/** For each set of entries in affyFile find matching psl and create a bed. */
{
struct bed *bed = NULL, *b=NULL;
struct psl *pslList = NULL, *psl = NULL;
struct hash *expHash = NULL;
int numExps = 0;
int expCount = 0;
int i =0;
char *probeSet = NULL;
char *row[4];
char key[128];
struct slName *expNames = NULL, *name = NULL;
FILE *bedOut = NULL;
FILE *expRecordOut = NULL;
char *toDiffFileName = optionVal("toDiffFile", NULL);
FILE *toDiffOut = NULL;
struct lineFile *lf = NULL;
fillInExpHash(expFileName, &expHash, &expNames, &expCount);
lf = lineFileOpen(affyFileName, TRUE);
bedOut = mustOpen(bedOutName, "w");
if(toDiffFileName != NULL)
    toDiffOut = mustOpen(toDiffFileName, "w");

/* Loop through either adding experiments to beds or if new
   probeset create bed from psl and start over. */
while(lineFileChopNextTab(lf, row, sizeof(row)))
    {
    /* Do we have to make a new bed? */
    if(probeSet == NULL || differentWord(probeSet, row[0]))
	{
	occassionalDot();
	numExps = 0;
	/* If we have probeset print out the current beds. */
	if(probeSet != NULL)
	    {
	    for(b = bed; b != NULL; b = b->next)
		{
		int avgCount = 0;
		for(i = 0; i < b->expCount; i++)
		    if(b->expScores[i] != -10000)
			avgCount++;
		if(avgCount != 0 && b->score > 0)
		    b->score = log(b->score / avgCount) * 100;
		else
		    b->score = 0;
		bedTabOutN(b, 15, bedOut);
		if(toDiffOut != NULL)
		    outputToDiffRecord(b, expNames, toDiffOut);
		}
	    }
	bedFreeList(&bed);
	/* Lookup key in pslHash to find list of psl. */
	safef(key, sizeof(key), "%s", row[0]);
	pslList = hashFindVal(pslHash, key);
	/* Can have multiple psls. */
	for(psl = pslList; psl != NULL; psl = psl->next)
	    {
	    b = bedFromPsl(psl);
	    AllocArray(b->expIds, expCount );
	    AllocArray(b->expScores, expCount);
	    b->expCount = expCount;
	    initBedScores(b, expCount);
	    slAddHead(&bed, b);
	    }
	}
    if(bed != NULL)
	{
	/* Allocate larger arrays if necessary. */
	if(numExps > expCount)
	    {
	    errAbort("Supposed to be %d experiments but probeset %s has at least %d",
		     expCount, bed->name, numExps);
	    }
	for(b = bed; b != NULL; b = b->next)
	    {
	    int exp = hashIntVal(expHash, row[1]);
	    if(differentWord(row[3], "NaN"))
	       b->expScores[exp] = atof(row[3]);
	    if(differentWord(row[2], "NaN"))
	       b->score += atof(row[2]);
	    }
	numExps++;
	}
    freez(&probeSet);
    probeSet = cloneString(row[0]);
    }
expRecordOut = mustOpen(expRecordOutName, "w");
i = 0;
for(name = expNames; name != NULL; name = name->next)
    {
    subChar(name->name, ',', '_');	    
    subChar(name->name, ' ', '_');
    fprintf(expRecordOut, "%d\t%s\tuclaExp\tuclaExp\tuclaExp\tuclaExp\t1\t%s,\n", i++, name->name, name->name);
    }
hashFree(&expHash);
slFreeList(&expNames);
carefulClose(&expRecordOut);
carefulClose(&bedOut);
lineFileClose(&lf);
}
Exemplo n.º 26
0
void affyPslAndAtlasToBedNew(char *pslFile, char *atlasFile, char *bedOut, 
	char *expRecOut)
/** Main function that does all the work for new-style*/
{
struct lineFile *lf = lineFileOpen(atlasFile, TRUE);
char *line, *name;
int i, wordCount, expCount;
char **row;
double *data, median;
double invMedian, ratio, logRatio;
char *affyId;
struct hash *hash = newHash(17);
struct psl *psl;
struct bed *bed;
FILE *f = NULL;
int dataCount = 0, pslCount = 0, bedCount = 0;
int minExpVal = 20;

/* Open Atlas file and use first line to create experiment table. */
if (!lineFileNextReal(lf, &line))
    errAbort("%s is empty", lf->fileName);
if (startsWith("Affy", line))
    line += 4;
if (line[0] != '\t')
    errAbort("%s doesn't seem to be a new format atlas file", lf->fileName);
expCount = lineToExp(line+1, expRecOut);
if (expCount <= 0)
    errAbort("No experiments in %s it seems", lf->fileName);
warn("%d experiments\n", expCount);

f = mustOpen(bedOut, "w");

/* Build up a hash keyed by affyID with an int array of data
 * for value.  Do output in short case. */
AllocArray(row, expCount);
while (lineFileNextReal(lf, &line))
    {
    affyId = nextWord(&line);

    wordCount = chopByWhite(line, row, expCount);
    if (wordCount != expCount)
        errAbort("Expecting %d data points, got %d line %d of %s", 
		expCount, wordCount, lf->lineIx, lf->fileName);
    if (hashLookup(hash, affyId))
	{
        warn("Duplicate %s, skipping all but first.", affyId);
	continue;
	}
    AllocArray(data, expCount);
    for (i=0; i<expCount; ++i)
	{
        data[i] = atof(row[i]);
        if (data[i] < minExpVal)
	    data[i] = minExpVal;
	}
    median = findPositiveMedian(data, expCount, minExpVal);
    if (median >= 0)
	{
	invMedian = 1.0/median;
	for (i=0; i<expCount; ++i)
	    {
	    double val = data[i];
	    val = safeLog2(invMedian*val);
	    data[i] = val;
	    }
	if (shortOut)
	    shortDataOut(f, affyId, expCount, data);
	else
	    hashAdd(hash, affyId, data);
        }
    data = NULL;
    ++dataCount;
    }
lineFileClose(&lf);
warn("%d rows of expression data\n", dataCount);

/* Stream through psl file, converting it to bed with expression data. */
if (!shortOut)
    {
    lf = pslFileOpen(pslFile);
    while ((psl = pslNext(lf)) != NULL)
	{
	++pslCount;
        /* get probe id from sequence name */
        name=parseNameFromHgc(psl->qName);
	data = hashFindVal(hash, name);
        if (data != NULL)
	    {
            struct bed *bed = bedFromPsl(psl);
	    bed->expCount = expCount;
	    AllocArray(bed->expIds, expCount);
	    AllocArray(bed->expScores, expCount);
	    for (i=0; i<expCount; ++i)
		{
		bed->expScores[i] = data[i];
		bed->expIds[i] = i;
		}
	    bedTabOutN(bed, 15, f);
	    ++bedCount;

	    bedFree(&bed);
	    }
	pslFree(&psl);
	}
    warn("%d records in %s", pslCount, pslFile);
    warn("%d records written to %s", bedCount, bedOut);
    }
lineFileClose(&lf);
carefulClose(&f);
}
void pickIntrons()
/** Top level routine, actually picks the introns. */
{
char *htmlFileName=NULL, *htmlFrameFileName=NULL;
char *bedFileName=NULL, *orthoBedFileName=NULL;
FILE *htmlOut=NULL, *htmlFrameOut=NULL;
FILE *bedOut=NULL, *orthoBedOut=NULL;
char *orthoEvalFile = NULL;
char *db = NULL;
struct orthoEval *ev=NULL, *evList = NULL;
struct intronEv *iv=NULL, *ivList = NULL;
int maxPicks = optionInt("numPicks", 100);
int i=0;
boolean isRefSeq=FALSE, isMgcBad=FALSE;
struct hash *posHash = newHash(12), *agxHash = newHash(12);
struct bed *bed = NULL;
char buff[256];

htmlFileName = optionVal("htmlFile", NULL);
htmlFrameFileName = optionVal("htmlFrameFile", "frame.html");
orthoEvalFile = optionVal("orthoEvalFile", NULL);
db = optionVal("db", NULL);
bedFileName = optionVal("bedOutFile", NULL);
orthoBedFileName = optionVal("orthoBedOut", NULL);
if(htmlFileName == NULL || orthoEvalFile == NULL || db == NULL || 
   bedFileName == NULL || orthoBedFileName == NULL )
    errAbort("Missing parameters. Use -help for usage.");

warn("Loading orthoEvals.");
evList = orthoEvalLoadAll(orthoEvalFile);
warn("Creating intron records");
for(ev = evList; ev != NULL; ev = ev->next)
    {
    for(i=0; i<ev->numIntrons; i++)
	{
	occassionalDot();
	iv = intronIvForEv(ev, i);
	slAddHead(&ivList, iv);
	}
    }
warn("\nDone");
warn("Sorting");
slSort(&ivList, intronEvalCmp);
warn("Done.");
htmlOut = mustOpen(htmlFileName, "w");
bedOut = mustOpen(bedFileName, "w");
htmlFrameOut = mustOpen(htmlFrameFileName, "w");
orthoBedOut = mustOpen(orthoBedFileName, "w");
i=0;
fprintf(htmlOut, "<html><body><table border=1><tr><th>Num</th><th>Mouse Acc.</th><th>Score</th><th>TS Pick</th></tr>\n");
warn("Filtering");
safef(buff, sizeof(buff), "tmp");
for(iv = ivList; iv != NULL && maxPicks > 0; iv = iv->next)
    {
    if(isUniqueCoordAndAgx(db, iv, posHash, agxHash) && iv->support == 0 && !isOverlappedByRefSeq(db, iv) &&
       ! isOverlappedByEst(db, iv) && ! isOverlappedByMRna(db, iv))
	{
	boolean twinScan = (coordOverlappedByTable(db, iv->chrom, iv->e1S, iv->e1E, "mgcTSExpPcr") &&
			    coordOverlappedByTable(db, iv->chrom, iv->e2S, iv->e2E, "mgcTSExpPcr"));
	bed = bedForIv(iv);
	if(sameString(buff, "tmp"))
	    safef(buff, sizeof(buff), "%s:%d-%d", bed->chrom, bed->chromStart-50, bed->chromEnd+50);
//	isMgcBad = isOverlappedByMgcBad(iv);
	fprintf(htmlOut, "<tr><td>%d</td><td><a target=\"browser\" "
		"href=\"http://mgc.cse.ucsc.edu/cgi-bin/hgTracks?db=hg15&position=%s:%d-%d\"> "
		"%s </a></td><td>%d</td><td>%s</td></tr>\n", 
		++i,bed->chrom, bed->chromStart-50, bed->chromEnd+50, bed->name, bed->score, 
		twinScan ? "yes" : "no");

	bedTabOutN(bed, 12, bedOut);
	bedTabOutN(iv->ev->orthoBed, 12, orthoBedOut);
	bedFree(&bed);
	maxPicks--;
	}
    }
writeOutFrames(htmlFrameOut, htmlFileName, db, bedFileName, buff);
fprintf(htmlOut, "</table></body></html>\n");
carefulClose(&bedOut);
carefulClose(&htmlOut);
carefulClose(&htmlFrameOut);
carefulClose(&orthoBedOut);
warn("Done.");
hashFree(&posHash);
hashFree(&agxHash);
}
Exemplo n.º 28
0
boolean doGetBedOrCt(struct sqlConnection *conn, boolean doCt,
                     boolean doCtFile, boolean redirectToGb)
/* Actually output bed or custom track. Return TRUE unless no results. */
{
char *db = cloneString(database);
char *table = curTable;
struct hTableInfo *hti = getHti(db, table, conn);
struct featureBits *fbList = NULL, *fbPtr;
struct customTrack *ctNew = NULL;
boolean doCtHdr = (cartUsualBoolean(cart, hgtaPrintCustomTrackHeaders, FALSE)
	|| doCt || doCtFile);
char *ctWigOutType = cartCgiUsualString(cart, hgtaCtWigOutType, outWigData);
char *fbQual = fbOptionsToQualifier();
char fbTQ[128];
int fields = hTableInfoBedFieldCount(hti);
boolean gotResults = FALSE;
struct region *region, *regionList = getRegions();
boolean isBedGr = isBedGraph(curTable);
boolean isBgWg = isBigWigTable(curTable);
boolean needSubtrackMerge = anySubtrackMerge(database, curTable);
boolean doDataPoints = FALSE;
boolean isWig = isWiggle(database, table);
struct wigAsciiData *wigDataList = NULL;
struct dataVector *dataVectorList = NULL;
boolean doRgb = bedItemRgb(hTrackDbForTrack(db, curTable));

if (!cartUsualBoolean(cart, hgtaDoGreatOutput, FALSE) && !doCt)
    {
    textOpen();
    }

if (cartUsualBoolean(cart, hgtaDoGreatOutput, FALSE))
    fputs("#", stdout);

if ((isWig || isBedGr || isBgWg) && sameString(outWigData, ctWigOutType))
    doDataPoints = TRUE;

for (region = regionList; region != NULL; region = region->next)
    {
    struct bed *bedList = NULL, *bed;
    struct lm *lm = lmInit(64*1024);
    struct dataVector *dv = NULL;

    if (isWig && doDataPoints)
        {
        if (needSubtrackMerge)
            {
            dv = wiggleDataVector(curTrack, curTable, conn, region);
            if (dv != NULL)
                slAddHead(&dataVectorList, dv);
            }
        else
            {
            int count = 0;
            struct wigAsciiData *wigData = NULL;
            struct wigAsciiData *asciiData;
            struct wigAsciiData *next;

            wigData = getWiggleAsData(conn, curTable, region);
            for (asciiData = wigData; asciiData; asciiData = next)
                {
                next = asciiData->next;
                if (asciiData->count)
                    {
                    slAddHead(&wigDataList, asciiData);
                    ++count;
                    }
                }
            slReverse(&wigDataList);
            }
        }
    else if (isBedGr && doDataPoints)
        {
        dv = bedGraphDataVector(curTable, conn, region);
        if (dv != NULL)
            slAddHead(&dataVectorList, dv);
        }
    else if (isBgWg && doDataPoints)
        {
        dv = bigWigDataVector(curTable, conn, region);
        if (dv != NULL)
            slAddHead(&dataVectorList, dv);
        }
    else if (isWig || isBgWg)
        {
        dv = wiggleDataVector(curTrack, curTable, conn, region);
        bedList = dataVectorToBedList(dv);
        dataVectorFree(&dv);
        }
    else if (isBedGr)
        {
        bedList = getBedGraphAsBed(conn, curTable, region);
        }
    else
        {
        bedList = cookedBedList(conn, curTable, region, lm, &fields);
        }

    /*  this is a one-time only initial creation of the custom track
     *  structure to receive the results.  gotResults turns it off after
     *  the first time.
     */
    if (doCtHdr && !gotResults &&
	((bedList != NULL) || (wigDataList != NULL) ||
         (dataVectorList != NULL)))
        {
        ctNew = beginCustomTrack(table, fields,
                                 doCt, (isWig || isBedGr || isBgWg), doDataPoints);
        }

    if (doDataPoints && (wigDataList || dataVectorList))
        gotResults = TRUE;
    else
        {
        if ((fbQual == NULL) || (fbQual[0] == 0))
            {
            for (bed = bedList;  bed != NULL;  bed = bed->next)
                {
                if (bed->name != NULL)
                    {
                    subChar(bed->name, ' ', '_');
                    }
                if (doCt)
                    {
                    struct bed *dupe = cloneBed(bed); /* Out of local memory. */
                    slAddHead(&ctNew->bedList, dupe);
                    }
                else
                    {
                    if (doRgb)
                        bedTabOutNitemRgb(bed, fields, stdout);
                    else
                        bedTabOutN(bed, fields, stdout);
                    }

                gotResults = TRUE;
                }
            }
        else
            {
            safef(fbTQ, sizeof(fbTQ), "%s:%s", hti->rootName, fbQual);
            fbList = fbFromBed(db, fbTQ, hti, bedList, 0, 0, FALSE, FALSE);
            if (fields >= 6)
                fields = 6;
            else if (fields >= 4)
                fields = 4;
            else
                fields = 3;
            if (doCt && ctNew)
                {
                ctNew->fieldCount = fields;
                safef(ctNew->tdb->type, strlen(ctNew->tdb->type)+1,
                      "bed %d", fields);
                }
            for (fbPtr=fbList;  fbPtr != NULL;  fbPtr=fbPtr->next)
                {
                if (fbPtr->name != NULL)
                    {
                    char *ptr = strchr(fbPtr->name, ' ');
                    if (ptr != NULL)
                        *ptr = 0;
                    }
                if (doCt)
                    {
                    struct bed *fbBed = fbToBedOne(fbPtr);
                    slAddHead(&ctNew->bedList, fbBed );
                    }
                else
                    {
                    if (fields >= 6)
                        hPrintf("%s\t%d\t%d\t%s\t%d\t%c\n",
                                fbPtr->chrom, fbPtr->start, fbPtr->end, fbPtr->name,
                                0, fbPtr->strand);
                    else if (fields >= 4)
                        hPrintf("%s\t%d\t%d\t%s\n",
                                fbPtr->chrom, fbPtr->start, fbPtr->end, fbPtr->name);
                    else
                        hPrintf("%s\t%d\t%d\n",
                                fbPtr->chrom, fbPtr->start, fbPtr->end);
                    }
                gotResults = TRUE;
                }
            featureBitsFreeList(&fbList);
            }
        }
    bedList = NULL;
    lmCleanup(&lm);
    }
if (!gotResults)
    {
    hPrintf(NO_RESULTS);
    }
else if (doCt)
    {
    int wigDataSize = 0;
    /* Load existing custom tracks and add this new one: */
    struct customTrack *ctList = getCustomTracks();
    removeNamedCustom(&ctList, ctNew->tdb->table);
    if (doDataPoints)
        {
        if (needSubtrackMerge || isBedGr || isBgWg)
            {
            slReverse(&dataVectorList);
            wigDataSize = dataVectorWriteWigAscii(dataVectorList, ctNew->wigAscii, 0, NULL);
            // TODO: see if can make prettier wig output here that
            // doesn't necessarily have one value per base
            }
        else
            {
            struct wiggleDataStream *wds = NULL;
            /* create an otherwise empty wds so we can print out the list */
            wds = wiggleDataStreamNew();
            wds->ascii = wigDataList;
            wigDataSize = wds->asciiOut(wds, db, ctNew->wigAscii, TRUE, FALSE);
#if defined(DEBUG)    /*      dbg     */
            /* allow file readability for debug */
            chmod(ctNew->wigAscii, 0666);
#endif
            wiggleDataStreamFree(&wds);
            }
        }
    else
        slReverse(&ctNew->bedList);

    slAddHead(&ctList, ctNew);
    /* Save the custom tracks out to file (overwrite the old file): */
    customTracksSaveCart(db, cart, ctList);
    /*  Put up redirect-to-browser page. */
    if (redirectToGb)
        {
        char browserUrl[256];
        char headerText[512];
        int redirDelay = 3;
        safef(browserUrl, sizeof(browserUrl),
              "%s?%s&db=%s", hgTracksName(), cartSidUrlString(cart), database);
        safef(headerText, sizeof(headerText),
              "<META HTTP-EQUIV=\"REFRESH\" CONTENT=\"%d;URL=%s\">",
              redirDelay, browserUrl);
        webStartHeader(cart, database, headerText,
                       "Table Browser: %s %s: %s", hOrganism(database),
                       freezeName, "get custom track");
        if (doDataPoints)
            {
            hPrintf("There are %d data points in custom track. ", wigDataSize);
            }
        else
            {
            hPrintf("There are %d items in custom track. ",
                    slCount(ctNew->bedList));
            }
        hPrintf("You will be automatically redirected to the genome browser in\n"
                "%d seconds, or you can \n"
                "<A HREF=\"%s\">click here to continue</A>.\n",
                redirDelay, browserUrl);
        }
    }
else if (doDataPoints)
    {
    if (needSubtrackMerge || isBedGr || isBgWg)
        {
        slReverse(&dataVectorList);
        dataVectorWriteWigAscii(dataVectorList, "stdout", 0, NULL);
        }
    else
        {
        /*	create an otherwise empty wds so we can print out the list */
        struct wiggleDataStream *wds = NULL;
        wds = wiggleDataStreamNew();
        wds->ascii = wigDataList;
        wds->asciiOut(wds, db, "stdout", TRUE, FALSE);
        wiggleDataStreamFree(&wds);
        }
    }
return gotResults;
}
Exemplo n.º 29
0
void bwtool_find_max(struct hash *options, char *favorites, char *regions, double fill,
		     char *bigfile, char *tmp_dir, char *outputfile)
/* find max points in a range */
{
    boolean med_base = (hashFindVal(options, "median-base") != NULL) ? TRUE : FALSE;
    boolean with_max = (hashFindVal(options, "with-max") != NULL) ? TRUE : FALSE;
    struct metaBig *mb = metaBigOpen_check(bigfile, tmp_dir, NULL);
    FILE *out = mustOpen(outputfile, "w");
    struct bed6 *sections6 = readBed6Soft(regions);
    struct bed *sections = bed12FromBed6(&sections6);
    struct bed *section;
    for (section = sections; section != NULL; section = section->next)
    {
	struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart,
							      section->chromEnd);
	struct perBaseWig *pbw;
	struct slInt *ii;
	int i, size;
	double max = -DBL_MAX;
	struct slInt *list = NULL;
	for (pbw = pbwList; pbw != NULL; pbw = pbw->next)
	{
	    int pbw_off = pbw->chromStart - section->chromStart;
	    for (i = 0; i < pbw->len; i++)
	    {
		if (pbw->data[i] > max)
		{
		    slFreeList(&list);
		    struct slInt *new_int = slIntNew(i + pbw_off);
		    slAddHead(&list, new_int);
		    max = pbw->data[i];
		}
		else if (pbw->data[i] == max)
		{
		    struct slInt *new_int = slIntNew(i + pbw_off);
		    slAddHead(&list, new_int);
		}
	    }
	}
	slReverse(&list);
	if (list)
	{
	    size = slCount(list);
	    if (med_base)
	    {
		section->blockCount = 1;
		AllocArray(section->blockSizes, sizeof(int));
		AllocArray(section->chromStarts, sizeof(int));
		section->blockSizes[0] = 1;
		section->chromStarts[0] = median_base_calc(&list);
	    }
	    else
	    {
		section->blockCount = size;
		AllocArray(section->blockSizes, sizeof(int) * size);
		AllocArray(section->chromStarts, sizeof(int) * size);
		for (i = 0, ii = list; (i < size) && (ii != NULL); i++, ii = ii->next)
		{
		    section->blockSizes[i] = 1;
		    section->chromStarts[i] = ii->val;
		}
	    }
	    if (!with_max)
		bedTabOutN(section, 12, out);
	    else
	    {
		bedOutputN(section, 12, out, '\t', '\t');
		fprintf(out, "%f\n", max);
	    }
	    slFreeList(&list);
	}
	perBaseWigFree(&pbwList);
    }
    metaBigClose(&mb);
    bedFreeList(&sections);
    carefulClose(&out);
}
void hgExperiment(char *database, char *table, 
                        char *expFile, char *posFile, char *dataFile)
/* Main function */
{
struct lineFile *lf;
int *data = NULL;
int *scores;
FILE *f = NULL;
char expTable[32];
char *words[3];
int wordCt;
struct bed *bedList, *bed;
int expCount;
struct hash *expHash, *dataHash;
struct hashEl *hel;

/* Open experiment file and use it to create experiment table.
   Use optional fields if present, otherwise defaults */
safef(expTable, ArraySize(expTable), "%sExps", table);
expHash = makeExpsTable(database, expTable, expFile, &expCount);

/* Read in positions file */
bedList = bedLoadAll(posFile);
slSort(&bedList, bedCmp);

/* Read data file into a hash of arrays of data values, keyed by name */
dataHash = newHash(0);
lf = lineFileOpen(dataFile, TRUE);
while ((wordCt = lineFileChopNext(lf, words, ArraySize(words))))
    {
    /* format: <region-name> <experiment-name> <data-value> */
    char *name, *exp;
    int expId;
    int value;
    if (wordCt != 3)
        errAbort("Expecting 3 words in data file, got %d line %d of %s", 
		wordCt, lf->lineIx, lf->fileName);
    name = words[0];
    hel = hashLookup(dataHash, name);
    if (!hel)
        {
        AllocArray(data, expCount);
        hel = hashAdd(dataHash, name, data);
        }
    data = (int *)hel->val;
    exp = words[1];
    expId = hashIntVal(expHash, exp);
    if (expId < 0 || expId > expCount-1)
        errAbort("Invalid experiment ID %d for %s, line %d of %s",
                 expId, exp, lf->lineIx, lf->fileName);
    //value = atoi(words[2]);
    value = round(atof(words[2]));
    if (data[expId] != 0)
        errAbort("Extra experiment data value %d for %s %s, line %d of %s",
                         value, name, exp, lf->lineIx, lf->fileName);
    data[expId] = value;
    }
lineFileClose(&lf);

/* Fill in BED15 fields - add experiment values, and setup block (only 1)*/
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    int i;
    bed->thickStart = bed->chromStart;
    bed->thickEnd = bed->chromEnd;
    bed->blockCount = 1;
    AllocArray(bed->blockSizes, 1);
    bed->blockSizes[0] = bed->chromEnd - bed->chromStart;
    AllocArray(bed->chromStarts, 1);
    bed->chromStarts[0] = 0;
    bed->expCount = expCount;
    AllocArray(bed->expIds, expCount);
    for (i = 0; i < expCount; i++)
        bed->expIds[i] = i;
    AllocArray(bed->expScores, expCount);
    scores = hashMustFindVal(dataHash, bed->name);
    for (i = 0; i < expCount; i++)
        bed->expScores[i] = scores[i];
    /* set score for bed to the average of the scores in all experiments */
    calculateAverage(bed);
    }

/* from affyPslAndAtlsoToBed ?
   convertIntensitiesToRatios(bedList);
   */

/* Write BED data file */
f = hgCreateTabFile(tabDir, table);
for (bed = bedList; bed != NULL; bed = bed->next)
    bedTabOutN(bed, 15, f);

/* Cleanup */
carefulClose(&f);
freeHash(&expHash);
freeHash(&dataHash);
bedFreeList(&bedList);
}