void bedFirstCodingExonSize(char *inBed, char *overBed, char *underBed, char *outSize) /* bedFirstCodingExonSize - Figure out size of first coding exon. */ { FILE *fSize = mustOpen(outSize, "w"); FILE *fOver = NULL, *fUnder = NULL; if (overBed) fOver = mustOpen(overBed, "w"); if (underBed) fUnder = mustOpen(underBed, "w"); struct bed *bed, *bedList = bedLoadNAll(inBed, 12); for (bed = bedList; bed != NULL; bed = bed->next) { if (bed->thickStart < bed->thickEnd) { int firstCdsSize = bedFirstCdsSize(bed); fprintf(fSize, "%s\t%d\n", bed->name, firstCdsSize); if (firstCdsSize >= threshold) { if (fOver != NULL) bedTabOutN(bed, 12, fOver); } else { if (fUnder != NULL) bedTabOutN(bed, 12, fUnder); } } } carefulClose(&fSize); carefulClose(&fOver); carefulClose(&fUnder); }
void hgPhMouse(char *database, char *track, int fileCount, char *fileNames[]) /* hgPhMouse - Load phMouse track. */ { int i; char *fileName; char *tabName = "phMouse.tab"; FILE *f = mustOpen(tabName, "w"); struct lineFile *lf; char *words[32], *s, c; int wordCount; int oneSize, totalSize = 0; for (i=0; i<fileCount; ++i) { struct bed *bedList = NULL, *bed; fileName = fileNames[i]; lf = lineFileOpen(fileName, TRUE); printf("Reading %s ", fileName); fflush(stdout); while ((wordCount = lineFileChop(lf, words)) > 0) { if (wordCount < 7) errAbort("Expecting at least 7 words line %d of %s", lf->lineIx, fileName); AllocVar(bed); bed->chrom = cloneString(words[0]); bed->chromStart = lineFileNeedNum(lf, words, 1); bed->chromEnd = lineFileNeedNum(lf, words, 2); bed->score = lineFileNeedNum(lf, words, 6); s = strrchr(words[3], '|'); c = s[1]; s[0] = 0; if (c != '+' && c != '-') errAbort("Misformed strandless trace name line %d of %s", lf->lineIx, lf->fileName); bed->name = cloneString(words[3]); bed->strand[0] = c; slAddHead(&bedList, bed); } oneSize = slCount(bedList); printf("%d alignments ", oneSize); totalSize += oneSize; fflush(stdout); slSort(&bedList, bedCmp); printf("sorted "); fflush(stdout); for (bed = bedList; bed != NULL; bed = bed->next) { int bin = hFindBin(bed->chromStart, bed->chromEnd); fprintf(f, "%d\t", bin); bedTabOutN(bed, 6, f); } printf("tabbed out\n"); bedFreeList(&bedList); } carefulClose(&f); printf("Loading %d items into %s.%s\n", totalSize, database, track); loadDatabase(database, track, tabName); remove(tabName); }
void txCdsToGene(char *txBed, char *txFa, char *txCds, char *outGtf, char *outFa) /* txCdsToGene - Convert transcript bed and best cdsEvidence to genePred and * protein sequence. */ { struct hash *txSeqHash = faReadAllIntoHash(txFa, dnaLower); verbose(2, "Read %d transcript sequences from %s\n", txSeqHash->elCount, txFa); struct hash *cdsHash = cdsEvidenceReadAllIntoHash(txCds); verbose(2, "Read %d cdsEvidence from %s\n", cdsHash->elCount, txCds); struct lineFile *lf = lineFileOpen(txBed, TRUE); FILE *fGtf = mustOpen(outGtf, "w"); FILE *fFa = mustOpen(outFa, "w"); char *row[12]; while (lineFileRow(lf, row)) { struct bed *bed = bedLoad12(row); verbose(2, "processing %s\n", bed->name); struct cdsEvidence *cds = hashFindVal(cdsHash, bed->name); struct dnaSeq *txSeq = hashFindVal(txSeqHash, bed->name); char *cdsSource = NULL; if (txSeq == NULL) errAbort("%s is in %s but not %s", bed->name, txBed, txFa); if (cds != NULL) { outputProtein(cds, txSeq, fFa); if (cds->cdsCount > 1) { struct bed *newBed = breakUpBedAtCdsBreaks(cds, bed); if (fTweaked) fprintf(fTweaked, "%s\n", newBed->name); bedFree(&bed); bed = newBed; } cdsSource = cds->accession; if (sameString(cds->accession, ".")) cdsSource = cds->source; } /* Set bed CDS bounds and optionally output bed. */ cdsEvidenceSetBedThick(cds, bed); if (fBed) bedTabOutN(bed, 12, fBed); /* Parse out bed name, which is in format chrom.geneId.txId.accession */ char *geneName = cloneString(bed->name); char *accession = strrchr(geneName, '.'); assert(accession != NULL); *accession++ = 0; chopSuffix(geneName); /* Output as GTF */ bedToGtf(bed, accession, cdsSource, geneName, fGtf); /* Clean up for next iteration of loop. */ freez(&geneName); bedFree(&bed); } lineFileClose(&lf); carefulClose(&fFa); carefulClose(&fGtf); }
void writeBedList(struct bed *bedList, FILE *f) /* Write all beds in list to file. */ { struct bed *bed; for (bed = bedList; bed != NULL; bed = bed->next) bedTabOutN(bed, 12, f); }
static void writeBeds(struct bed4 *beds, FILE *fh) /* write bed to a file */ { struct bed4 *bed; for (bed = beds; bed != NULL; bed = bed->next) bedTabOutN((struct bed*)bed, 4, fh); }
void outputBed(struct cassetteSeq *cseq, FILE *primerBed) /* Output a bed linked features track to see where primers are. */ { struct bed *bed = NULL; struct bed *cbed = cseq->bed; int leftStart=0, rightStart =0; bed = cloneBed(cbed); if(cseq->leftPrimer == NULL || cseq->rightPrimer == NULL) return; leftStart = calcGenomePos(cbed, cseq->leftPrimer, cseq->seq); rightStart = calcGenomePos(cbed, cseq->rightPrimer, cseq->seq); if(sameString(bed->strand, "+")) { bed->chromStart = bed->thickStart = leftStart; bed->chromStarts[0] = 0; bed->blockSizes[0] = strlen(cseq->leftPrimer); bed->chromStarts[1] = rightStart - leftStart; bed->blockSizes[1] = strlen(cseq->leftPrimer); bed->chromEnd = bed->thickEnd = bed->chromStarts[1] + bed->chromStart + bed->blockSizes[1]; } else { bed->chromStart = bed->thickStart = rightStart; bed->chromStarts[0] = 0; bed->blockSizes[0] = strlen(cseq->rightPrimer); bed->chromStarts[1] = leftStart - rightStart; bed->blockSizes[1] = strlen(cseq->rightPrimer); bed->chromEnd = bed->thickEnd = bed->chromStarts[1] + bed->chromStart + bed->blockSizes[1]; } bed->blockCount = 2; checkBedMatchesSeqs(cseq, bed); bedTabOutN(bed, 12, primerBed); bedFree(&bed); }
void spitBedList(struct bed *bedList, FILE *output) /* Simply output the beds to a file one at a time. */ { struct bed *bed; for (bed = bedList; bed != NULL; bed = bed->next) bedTabOutN(bed, 6, output); }
void pslToBed(char *pslFile, char *bedFile, struct hash *cdsHash, bool doPosName) /* pslToBed -- tranform a psl format file to a bed format file */ { struct lineFile *pslLf = pslFileOpen(pslFile); FILE *bedFh = mustOpen(bedFile, "w"); struct psl *psl; while ((psl = pslNext(pslLf)) != NULL) { struct bed *bed = bedFromPsl(psl); if (doPosName) { char *newName = needMem(512); safef(newName, 512, "%s:%d-%d", psl->qName, psl->qStart, psl->qEnd); freeMem(bed->name); bed->name = newName; } if (cdsHash) { struct cds *cds = hashFindVal(cdsHash, psl->qName); if (cds == NULL) bed->thickStart = bed->thickEnd = bed->chromStart; else setThick(psl, bed, cds); } bedTabOutN(bed, 12, bedFh); bedFree(&bed); pslFree(&psl); } carefulClose(&bedFh); lineFileClose(&pslLf); }
void gffToBed(char *inGff, char *outBed) /* gffToBed - Convert a gff file (gff1 or gff2) to bed. Not tested with gff3 */ { struct gffFile *gff = gffRead(inGff); FILE *f = mustOpen(outBed, "w"); char *exonFeature = bestExonFeature(gff); gffGroupLines(gff); separateGroupsByChromosome(gff); struct gffGroup *group; for (group = gff->groupList; group != NULL; group = group->next) { struct genePred *gp; if (gff->isGtf) gp = genePredFromGroupedGtf(gff, group, group->name, FALSE, FALSE); else gp = genePredFromGroupedGff(gff, group, group->name, exonFeature, FALSE, FALSE); if (gp != NULL) { assert(gp->txStart == gp->exonStarts[0]); struct bed *bed = bedFromGenePred(gp); bedTabOutN(bed, 12, f); bedFree(&bed); } } carefulClose(&f); }
void outputBed6(struct bed *bedList, char *output) /* self-explainatory */ { FILE *outputFile = mustOpen(output, "w"); struct bed *bed = NULL; for (bed = bedList; bed != NULL; bed = bed->next) bedTabOutN(bed, 6, outputFile); carefulClose(&outputFile); }
void writeCluster(struct bedNamedScore *clusterList, FILE *out) /* Takes a list of bed lines and writes out a single blocked bed line into the out file */ { int size = slCount(clusterList); if (size < clMinCluster) return; int blockStarts[size]; int blockSizes[size]; double score = 0; struct bedNamedScore *last = clusterList; slReverse(&clusterList); // create our output bed object and assign values to all the fields we care about struct bed outBed; outBed.chrom = cloneString(clusterList->chrom); outBed.chromStart = clusterList->chromStart; outBed.chromEnd = last->chromEnd; // the name of each record is merely the size of the cluster, mostly for viewing on the browser char sizeBuf[8]; safef(sizeBuf, 8, "%d", size); outBed.name = sizeBuf; outBed.strand[0] = clusterList->strand; outBed.strand[1] = '\0'; outBed.blockCount = size; int i; for (i = 0; i < size; i++) blockSizes[i] = 1; outBed.blockSizes = blockSizes; // get the blockStarts and also calculate the final score, which is just the average of the scores * 10 // because the input values are decimal numbers from 0.0000-100.0000 and our bed output is an int 0-1000 i = 0; struct bedNamedScore *cur; for (cur = clusterList; cur != NULL; cur = cur->next) { blockStarts[i] = cur->chromStart - outBed.chromStart; score += cur->score; i++; } outBed.chromStarts = blockStarts; outBed.score = (int)(score * 10 / size); // zero out unused fields outBed.thickStart = outBed.chromStart; outBed.thickEnd = outBed.chromStart; outBed.itemRgb = 0; // finally print our struct out as a bed12 bedTabOutN(&outBed, 12, out); }
void doBeds(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, char *netTable, char *bedFileName, char *bedTableName, char *outBedName, char *selectedFileName, int *foundCount, int *notFoundCount) /* Map over beds. */ { FILE *bedOut = NULL; FILE *selectedOut = NULL; struct bed *bed=NULL, *bedList = NULL, *orthoBed=NULL; /* Load beds. */ warn("Loading beds."); if(bedFileName) bedList=bedLoadAll(bedFileName); else bedList=loadBedFromTable(conn, bedTableName, chrom, 0, BIGNUM); /* Convert beds. */ warn("Converting beds."); assert(outBedName); bedOut = mustOpen(outBedName, "w"); if (selectedFileName != NULL) selectedOut = mustOpen(selectedFileName, "w"); for(bed = bedList; bed != NULL; bed = bed->next) { if(differentString(bed->chrom, chrom)) continue; occassionalDot(); orthoBed = orthoBedFromBed(conn, db, orthoDb, netTable, bed); if(orthoBed != NULL && orthoBed->blockCount > 0) { (*foundCount)++; bedTabOutN(orthoBed, 12, bedOut); if (selectedOut != NULL) bedTabOutN(bed, 12, selectedOut); } else (*notFoundCount)++; bedFree(&orthoBed); } bedFreeList(&bedList); carefulClose(&selectedOut); carefulClose(&bedOut); }
void doStrand(struct bed *start, struct bed *end, FILE *f) /* Assuming all beds from start up to end are on same strand, * make a merged bed with all their blocks and output it. */ { struct rbTree *rangeTree = rangeTreeNew(); struct bed *bed; for (bed = start; bed != end; bed = bed->next) bedIntoRangeTree(bed, rangeTree); bed = bedFromRangeTree(rangeTree, start->chrom, start->name, start->strand); bedTabOutN(bed, 12, f); bedFree(&bed); rangeTreeFree(&rangeTree); }
void doAnalysisForBed(struct bed *bed) { char *hgdbTestTable = cgiUsualString("hgdbTestTable","affyTrans_hg12"); char *hgdbTestName = "sugnet"; FILE *tmpFile = NULL; char commandBuffer[4096]; char *fileNameRoot = getFileNameForBed(bed); char bedFile[512]; char dataFile[512]; int retVal = 0; /* Print out bed. */ safef(bedFile, sizeof(bedFile), "%s.bed", fileNameRoot); tmpFile = mustOpen(bedFile, "w"); bedTabOutN(bed, 12, tmpFile); carefulClose(&tmpFile); /* Get samples for bed. */ safef(dataFile, sizeof(dataFile), "%s.data", fileNameRoot); safef(commandBuffer, sizeof(commandBuffer), "samplesForCoordinates bedFile=%s hgdbTestName=%s hgdbTestTable=%s > %s", bedFile, hgdbTestName, hgdbTestTable, dataFile); retVal = system(commandBuffer); if(retVal != 0) { warn("%s failed running command:\n%s", fileNameRoot, commandBuffer); return; } safef(commandBuffer, sizeof(commandBuffer), "cp %s tmp.data", dataFile); retVal = system(commandBuffer); /* Run R analysis on data file. */ warn("Running R for %s", fileNameRoot); fflush(stderr); safef(commandBuffer, sizeof(commandBuffer), "R --vanilla < /cluster/home/sugnet/sugnet/R/maReg/R/runAnalysis.R"); warn("Done with R"); fflush(stderr); retVal = system(commandBuffer); if(retVal != 0) { warn("%s failed running command:\n%s", fileNameRoot, commandBuffer); return; } bedsAnalyzed++; }
void affyPslAndAtlasToBedOld(char *pslFile, char *atlasFile, char *bedOut, char *expRecOut) /** Main function that does all the work for old-style*/ { struct hash *bedHash = NULL; struct affyAtlas *aaList=NULL, *aa=NULL; struct expRecord *erList=NULL, *er=NULL; struct bed *bedList=NULL, *bed=NULL; int expCount = 0; FILE *erOut = NULL, *bOut=NULL; warn("loading atlas file"); aaList = affyAtlasLoadAll(atlasFile); expCount = countExperiments(aaList); warn("creating list of beds from alignments"); bedList = createBedsFromPsls(pslFile, expCount); warn("creating hash from list of beds"); bedHash = createBedHash(bedList); warn("appending experiments to beds in hash"); appendExperiments(bedHash, aaList, &erList); warn("Running sanity Checks"); checkAllBeds(&bedList, expCount); warn("%d beds were missing experiments." , missingExpsCount); warn("%d beds had no experiments.", noExpCount); warn("Calculating average intensities"); convertIntensitiesToRatios(bedList); calculateAverages(bedList); warn("writing expRecords out"); erOut = mustOpen(expRecOut, "w"); for(er = erList; er != NULL; er = er->next) expRecordTabOut(er, erOut); carefulClose(&erOut); warn("writing beds out"); bOut = mustOpen(bedOut, "w"); for(bed = bedList; bed != NULL; bed = bed->next) bedTabOutN(bed, 15, bOut); carefulClose(&bOut); warn("cleaning up.."); freeHash(&bedHash); bedFreeList(&bedList); warn("Done."); }
void bwtool_find_thresh(struct hash *options, char *favorites, char *regions, double fill, char *thresh_type, char *thresh_s, char *bigfile, char *tmp_dir, char *outputfile) /* the other kind of finding, based on thresholding. */ { boolean inverse = (hashFindVal(options, "inverse") != NULL) ? TRUE : FALSE; enum bw_op_type op= get_bw_op_type(thresh_type, inverse); struct metaBig *mb = metaBigOpen_check(bigfile, tmp_dir, regions); double thresh = sqlDouble(thresh_s); FILE *out = mustOpen(outputfile, "w"); struct bed out_bed; struct bed *section; for (section = mb->sections; section != NULL; section = section->next) { struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd); struct perBaseWig *pbw; int i, len; if (pbwList) { out_bed.chrom = pbwList->chrom; for (pbw = pbwList; pbw != NULL; pbw = pbw->next) { i = 0; len = pbw->chromEnd - pbw->chromStart; out_bed.chromStart = out_bed.chromEnd = 0; while (i < len) { while ((i < len) && (!fit_thresh(pbw->data[i], thresh, op))) i++; out_bed.chromStart = i + pbw->chromStart; while ((i < len) && (fit_thresh(pbw->data[i], thresh, op))) i++; out_bed.chromEnd = i + pbw->chromStart; if (out_bed.chromEnd > out_bed.chromStart) bedTabOutN(&out_bed, 3, out); } } perBaseWigFree(&pbwList); } } metaBigClose(&mb); carefulClose(&out); }
void doPsls(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, char *netTable, char *pslFileName, char *pslTableName, char *outBedName, char *selectedFileName, int *foundCount, int *notFoundCount) /* Map over psls. */ { FILE *bedOut = NULL; FILE *selectedOut = NULL; struct bed *bed = NULL; struct psl *psl=NULL, *pslList = NULL; /* Load psls. */ warn("Loading psls."); if(pslFileName) pslList=pslLoadAll(pslFileName); else pslList=loadPslFromTable(conn, pslTableName, chrom, 0, BIGNUM); /* Convert psls. */ warn("Converting psls."); assert(outBedName); bedOut = mustOpen(outBedName, "w"); if (selectedFileName != NULL) selectedOut = mustOpen(selectedFileName, "w"); for(psl = pslList; psl != NULL; psl = psl->next) { if(differentString(psl->tName, chrom)) continue; occassionalDot(); bed = orthoBedFromPsl(conn, db, orthoDb, netTable, psl); if(bed != NULL && bed->blockCount > 0) { (*foundCount)++; bedTabOutN(bed, 12, bedOut); if (selectedOut != NULL) pslTabOut(psl, selectedOut); } else (*notFoundCount)++; bedFree(&bed); } carefulClose(&selectedOut); carefulClose(&bedOut); }
void bedViewOut(struct altSpliceSite *as, FILE *out) { struct bed *bed = NULL; AllocVar(bed); bed->chrom = cloneString(as->chrom); bed->chromStart = as->chromStart; bed->chromEnd = maxInArray(as->altStarts, as->altCount); AllocArray(bed->chromStarts, 2); AllocArray(bed->blockSizes, 2); bed->thickStart = as->altBpStarts[1]; bed->thickEnd = as->altBpEnds[1]; bed->blockCount = 2; bed->chromStarts[0] = 0; bed->chromStarts[1] = bed->chromEnd - bed->chromStart -1; bed->blockSizes[0] = bed->blockSizes[1] = 1; bed->name = cloneString(as->agName); bed->score = as->spliceTypes[1]; safef(bed->strand, sizeof(bed->strand), "%s", as->strand); bedTabOutN(bed, 12, out); bedFree(&bed); }
void borfMatcher(char *bedIn, char *borfIn, char *bedOutFile, char *genePredOutFile) /* Top level function to open files and call other functions. */ { struct borf *borf = NULL, *borfList = NULL; struct bed *bed = NULL, *bedList = NULL; struct genePred *gp = NULL; float threshold = optionFloat("minScore", 50); FILE *bedOut = mustOpen(bedOutFile, "w"); FILE *genePredOut = mustOpen(genePredOutFile, "w"); boolean keepSmall = optionExists("keepSmall"); boolean keepNmd = optionExists("keepNmd"); borfList = borfLoadAll(borfIn); bedList = bedLoadAll(bedIn); dotForUserInit(slCount(bedList)/10); for(bed = bedList, borf = borfList; bed != NULL && borf != NULL; bed = bed->next, borf = borf->next) { dotForUser(); if(!stringIn(bed->name, borf->name)) errAbort("Trying to match up %s bed with %s borf - bad idea!", bed->name, borf->name); /* Have to adjust cds end. Borf puts stop codon outside of cds, we put it inside. */ borf->cdsEnd = min(borf->cdsEnd+3, borf->size); if((borf->score > threshold || (keepSmall && borf->cdsSize > 0)) && sameString(borf->strand, "+")) { setThickStartStop(bed, borf); if(keepNmd || !nmdTarget(bed)) { gp = bedToGenePred(bed); bedTabOutN(bed, 12, bedOut); genePredTabOut(gp, genePredOut); genePredFree(&gp); } } } warn("Done."); carefulClose(&bedOut); carefulClose(&genePredOut); }
void bedMergeOverlappingBlocks(char *inBed, char *outBed) /* bedMergeOverlappingBlocks - Fix faulty BED 12 files with illegal overlapping blocks. Also reports a summary of the changes.. */ { int badBeds = 0; FILE *log = NULL; FILE *newBedFile = mustOpen(outBed, "w"); char *logName = optionVal("report", NULL); struct lineFile *lf = lineFileOpen(inBed, TRUE); char *line, *row[12]; boolean isItemRgb = FALSE; if (logName) log = mustOpen(logName, "w"); while (lineFileNext(lf, &line, NULL)) { struct bed *bed; int numFields = chopByWhite(line, row, ArraySize(row)); /* strange it's reading empty lines... whatever */ if (numFields == 0) continue; if (numFields < 12) errAbort("file %s doesn't appear to be in blocked-bed format. At least 12 fields required, got %d", inBed, numFields); if (bedParseRgb(row[8])) isItemRgb = TRUE; bed = bedLoadN(row, numFields); badBeds += fixBed(bed, lf->lineIx, log); if (isItemRgb) bedTabOutNitemRgb(bed, numFields, newBedFile); else bedTabOutN(bed, numFields, newBedFile); } lineFileClose(&lf); if (log) { fprintf(log, "Fixed %d bad beds in all.\n", badBeds); carefulClose(&log); } carefulClose(&newBedFile); }
void createIntronBeds(char *agxFile, char *bedFile) /* Make intron beds for evaluation. */ { struct altGraphX *ag=NULL, *agList = NULL; struct bed *bed=NULL, *bedList=NULL; FILE *bedOut = NULL; int count; warn("Rading AltGraphX list."); agList = altGraphXLoadAll(agxFile); warn("Converting to intron beds."); bedOut = mustOpen(bedFile, "w"); for(ag = agList; ag != NULL; ag = ag->next) { occassionalDot(); bedList = bedIntronsFromAgx(ag); for(bed=bedList; bed != NULL; bed=bed->next) { bedTabOutN(bed, 12, bedOut); } bedFreeList(&bedList); } altGraphXFreeList(&agList); }
void intronSizes(char *database, char *table) /* intronSizes - Output list of intron sizes.. */ { struct dyString *query = newDyString(1024); struct sqlConnection *conn; struct sqlResult *sr; char **row; struct genePred *gp; int rowOffset; struct bed *bedList = NULL, *bed = NULL; hSetDb(database); rowOffset = hOffsetPastBin(NULL, table); conn = hAllocConn(database); sqlDyStringPrintf(query, "select * from %s", table); if (chromName != NULL) dyStringPrintf(query, " where chrom = '%s'", chromName); if (cgiBoolean("withUtr")) { dyStringPrintf(query, " %s txStart != cdsStart", (chromName == NULL ? "where" : "and")); } sr = sqlGetResult(conn, query->string); while ((row = sqlNextRow(sr)) != NULL) { gp = genePredLoad(row+rowOffset); genePredIntrons(gp, &bedList); slReverse(&bedList); for (bed = bedList ; bed != NULL ; bed=bed->next) bedTabOutN(bed,6, stdout); bedFreeList(&bedList); genePredFree(&gp); } sqlFreeResult(&sr); hFreeConn(&conn); }
void writeCassetteExon(struct bed *bedList, struct altGraphX *ag, int eIx, boolean *outputted, FILE *bedOutFile, FILE *outfile, FILE *html, float conf ) /* Write out the information for a cassette exon. */ { int i = eIx; struct bed *bed=NULL; if(bedOutFile != NULL) bedTabOutN(bedList,12, bedOutFile); writeBrowserLink(html, ag, conf, i); if(!outputted) { altGraphXTabOut(ag, stdout); *outputted = TRUE; } if(outfile != NULL) { struct dnaSeq *seq = hChromSeq(ag->tName, ag->vPositions[ag->edgeStarts[i]], ag->vPositions[ag->edgeEnds[i]]); if(sameString(ag->strand , "+")) reverseComplement(seq->dna, seq->size); if(seq->size < 200) faWriteNext(outfile, seq->name, seq->dna, seq->size); freeDnaSeq(&seq); } }
void txCdsBadBed(char *database, char *altSpliceBed, char *outBed) /* txCdsBadBed - Create a bed file with regions that don't really have CDS, * but that might look like it.. */ { /* Open up database and make sure all the tables we want are there. */ char *refTrack = "refGene"; char *vegaPseudo = "vegaPseudoGene"; char *retroPseudo = "retroMrnaInfo"; struct sqlConnection *conn = sqlConnect(database); if (!sqlTableExists(conn, refTrack)) errAbort("table %s doesn't exist in %s", refTrack, database); if (!sqlTableExists(conn, vegaPseudo)) errAbort("table %s doesn't exist in %s", vegaPseudo, database); if (!sqlTableExists(conn, retroPseudo)) errAbort("table %s doesn't exist in %s", retroPseudo, database); /* Read in alt file and output larger retained and bleeding introns. */ struct bed *bed, *intronyList = loadRetainedAndBleeding(altSpliceBed); FILE *f = mustOpen(outBed, "w"); for (bed = intronyList; bed != NULL; bed = bed->next) { int size = bed->chromEnd - bed->chromStart; if (size > 400) { fprintf(f, "%s\t%d\t%d\t", bed->chrom, bed->chromStart, bed->chromEnd); fprintf(f, "%s%d\t", bed->name, ++id); fprintf(f, "%d\t%s\t", bed->score, bed->strand); fprintf(f, "0\t0\t0\t1\t"); fprintf(f, "%d,\t%d,\n", bed->chromEnd - bed->chromStart, 0); } } /* Read in refGene, and write out larger 3' UTRs, and occassional antisense copies. */ char query[512]; safef(query, sizeof(query), "select * from %s", refTrack); int rowOffset = 0; if (sqlFieldIndex(conn, refTrack, "bin") == 0) rowOffset = 1; struct sqlResult *sr = sqlGetResult(conn, query); char **row; while ((row = sqlNextRow(sr)) != NULL) { struct genePred *gp = genePredLoad(row + rowOffset); int start, end; if (gp->strand[0] == '+') { start = gp->cdsEnd; end = gp->txEnd; } else { start = gp->txStart; end = gp->cdsStart; } if (end - start > 400) { gpPartOutAsBed(gp, start, end, f, "utr", ++id, 400); } if (rand()%20 == 0) { gp->strand[0] = (gp->strand[0] == '+' ? '-' : '+'); gpPartOutAsBed(gp, gp->txStart, gp->txEnd, f, "anti", ++id, 0); } } sqlFreeResult(&sr); /* Write out vega pseudo-genes. */ safef(query, sizeof(query), "select * from %s", vegaPseudo); rowOffset = 0; if (sqlFieldIndex(conn, vegaPseudo, "bin") == 0) rowOffset = 1; sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct genePred *gp = genePredLoad(row + rowOffset); gpPartOutAsBed(gp, gp->txStart, gp->txEnd, f, "vega", ++id, 0); } /* Write out retroGenes. */ safef(query, sizeof(query), "select * from %s where score > 600", retroPseudo); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct bed *bed = bedLoad12(row); char name[128]; safef(name, sizeof(name), "retro_%d_%s", ++id, bed->name); bed->name = name; bedTabOutN(bed, 12, f); } carefulClose(&f); }
void outputBedsFromPsls(struct hash *pslHash,char *bedOutName, char *expRecordOutName, char *affyFileName, char *expFileName) /** For each set of entries in affyFile find matching psl and create a bed. */ { struct bed *bed = NULL, *b=NULL; struct psl *pslList = NULL, *psl = NULL; struct hash *expHash = NULL; int numExps = 0; int expCount = 0; int i =0; char *probeSet = NULL; char *row[4]; char key[128]; struct slName *expNames = NULL, *name = NULL; FILE *bedOut = NULL; FILE *expRecordOut = NULL; char *toDiffFileName = optionVal("toDiffFile", NULL); FILE *toDiffOut = NULL; struct lineFile *lf = NULL; fillInExpHash(expFileName, &expHash, &expNames, &expCount); lf = lineFileOpen(affyFileName, TRUE); bedOut = mustOpen(bedOutName, "w"); if(toDiffFileName != NULL) toDiffOut = mustOpen(toDiffFileName, "w"); /* Loop through either adding experiments to beds or if new probeset create bed from psl and start over. */ while(lineFileChopNextTab(lf, row, sizeof(row))) { /* Do we have to make a new bed? */ if(probeSet == NULL || differentWord(probeSet, row[0])) { occassionalDot(); numExps = 0; /* If we have probeset print out the current beds. */ if(probeSet != NULL) { for(b = bed; b != NULL; b = b->next) { int avgCount = 0; for(i = 0; i < b->expCount; i++) if(b->expScores[i] != -10000) avgCount++; if(avgCount != 0 && b->score > 0) b->score = log(b->score / avgCount) * 100; else b->score = 0; bedTabOutN(b, 15, bedOut); if(toDiffOut != NULL) outputToDiffRecord(b, expNames, toDiffOut); } } bedFreeList(&bed); /* Lookup key in pslHash to find list of psl. */ safef(key, sizeof(key), "%s", row[0]); pslList = hashFindVal(pslHash, key); /* Can have multiple psls. */ for(psl = pslList; psl != NULL; psl = psl->next) { b = bedFromPsl(psl); AllocArray(b->expIds, expCount ); AllocArray(b->expScores, expCount); b->expCount = expCount; initBedScores(b, expCount); slAddHead(&bed, b); } } if(bed != NULL) { /* Allocate larger arrays if necessary. */ if(numExps > expCount) { errAbort("Supposed to be %d experiments but probeset %s has at least %d", expCount, bed->name, numExps); } for(b = bed; b != NULL; b = b->next) { int exp = hashIntVal(expHash, row[1]); if(differentWord(row[3], "NaN")) b->expScores[exp] = atof(row[3]); if(differentWord(row[2], "NaN")) b->score += atof(row[2]); } numExps++; } freez(&probeSet); probeSet = cloneString(row[0]); } expRecordOut = mustOpen(expRecordOutName, "w"); i = 0; for(name = expNames; name != NULL; name = name->next) { subChar(name->name, ',', '_'); subChar(name->name, ' ', '_'); fprintf(expRecordOut, "%d\t%s\tuclaExp\tuclaExp\tuclaExp\tuclaExp\t1\t%s,\n", i++, name->name, name->name); } hashFree(&expHash); slFreeList(&expNames); carefulClose(&expRecordOut); carefulClose(&bedOut); lineFileClose(&lf); }
void affyPslAndAtlasToBedNew(char *pslFile, char *atlasFile, char *bedOut, char *expRecOut) /** Main function that does all the work for new-style*/ { struct lineFile *lf = lineFileOpen(atlasFile, TRUE); char *line, *name; int i, wordCount, expCount; char **row; double *data, median; double invMedian, ratio, logRatio; char *affyId; struct hash *hash = newHash(17); struct psl *psl; struct bed *bed; FILE *f = NULL; int dataCount = 0, pslCount = 0, bedCount = 0; int minExpVal = 20; /* Open Atlas file and use first line to create experiment table. */ if (!lineFileNextReal(lf, &line)) errAbort("%s is empty", lf->fileName); if (startsWith("Affy", line)) line += 4; if (line[0] != '\t') errAbort("%s doesn't seem to be a new format atlas file", lf->fileName); expCount = lineToExp(line+1, expRecOut); if (expCount <= 0) errAbort("No experiments in %s it seems", lf->fileName); warn("%d experiments\n", expCount); f = mustOpen(bedOut, "w"); /* Build up a hash keyed by affyID with an int array of data * for value. Do output in short case. */ AllocArray(row, expCount); while (lineFileNextReal(lf, &line)) { affyId = nextWord(&line); wordCount = chopByWhite(line, row, expCount); if (wordCount != expCount) errAbort("Expecting %d data points, got %d line %d of %s", expCount, wordCount, lf->lineIx, lf->fileName); if (hashLookup(hash, affyId)) { warn("Duplicate %s, skipping all but first.", affyId); continue; } AllocArray(data, expCount); for (i=0; i<expCount; ++i) { data[i] = atof(row[i]); if (data[i] < minExpVal) data[i] = minExpVal; } median = findPositiveMedian(data, expCount, minExpVal); if (median >= 0) { invMedian = 1.0/median; for (i=0; i<expCount; ++i) { double val = data[i]; val = safeLog2(invMedian*val); data[i] = val; } if (shortOut) shortDataOut(f, affyId, expCount, data); else hashAdd(hash, affyId, data); } data = NULL; ++dataCount; } lineFileClose(&lf); warn("%d rows of expression data\n", dataCount); /* Stream through psl file, converting it to bed with expression data. */ if (!shortOut) { lf = pslFileOpen(pslFile); while ((psl = pslNext(lf)) != NULL) { ++pslCount; /* get probe id from sequence name */ name=parseNameFromHgc(psl->qName); data = hashFindVal(hash, name); if (data != NULL) { struct bed *bed = bedFromPsl(psl); bed->expCount = expCount; AllocArray(bed->expIds, expCount); AllocArray(bed->expScores, expCount); for (i=0; i<expCount; ++i) { bed->expScores[i] = data[i]; bed->expIds[i] = i; } bedTabOutN(bed, 15, f); ++bedCount; bedFree(&bed); } pslFree(&psl); } warn("%d records in %s", pslCount, pslFile); warn("%d records written to %s", bedCount, bedOut); } lineFileClose(&lf); carefulClose(&f); }
void pickIntrons() /** Top level routine, actually picks the introns. */ { char *htmlFileName=NULL, *htmlFrameFileName=NULL; char *bedFileName=NULL, *orthoBedFileName=NULL; FILE *htmlOut=NULL, *htmlFrameOut=NULL; FILE *bedOut=NULL, *orthoBedOut=NULL; char *orthoEvalFile = NULL; char *db = NULL; struct orthoEval *ev=NULL, *evList = NULL; struct intronEv *iv=NULL, *ivList = NULL; int maxPicks = optionInt("numPicks", 100); int i=0; boolean isRefSeq=FALSE, isMgcBad=FALSE; struct hash *posHash = newHash(12), *agxHash = newHash(12); struct bed *bed = NULL; char buff[256]; htmlFileName = optionVal("htmlFile", NULL); htmlFrameFileName = optionVal("htmlFrameFile", "frame.html"); orthoEvalFile = optionVal("orthoEvalFile", NULL); db = optionVal("db", NULL); bedFileName = optionVal("bedOutFile", NULL); orthoBedFileName = optionVal("orthoBedOut", NULL); if(htmlFileName == NULL || orthoEvalFile == NULL || db == NULL || bedFileName == NULL || orthoBedFileName == NULL ) errAbort("Missing parameters. Use -help for usage."); warn("Loading orthoEvals."); evList = orthoEvalLoadAll(orthoEvalFile); warn("Creating intron records"); for(ev = evList; ev != NULL; ev = ev->next) { for(i=0; i<ev->numIntrons; i++) { occassionalDot(); iv = intronIvForEv(ev, i); slAddHead(&ivList, iv); } } warn("\nDone"); warn("Sorting"); slSort(&ivList, intronEvalCmp); warn("Done."); htmlOut = mustOpen(htmlFileName, "w"); bedOut = mustOpen(bedFileName, "w"); htmlFrameOut = mustOpen(htmlFrameFileName, "w"); orthoBedOut = mustOpen(orthoBedFileName, "w"); i=0; fprintf(htmlOut, "<html><body><table border=1><tr><th>Num</th><th>Mouse Acc.</th><th>Score</th><th>TS Pick</th></tr>\n"); warn("Filtering"); safef(buff, sizeof(buff), "tmp"); for(iv = ivList; iv != NULL && maxPicks > 0; iv = iv->next) { if(isUniqueCoordAndAgx(db, iv, posHash, agxHash) && iv->support == 0 && !isOverlappedByRefSeq(db, iv) && ! isOverlappedByEst(db, iv) && ! isOverlappedByMRna(db, iv)) { boolean twinScan = (coordOverlappedByTable(db, iv->chrom, iv->e1S, iv->e1E, "mgcTSExpPcr") && coordOverlappedByTable(db, iv->chrom, iv->e2S, iv->e2E, "mgcTSExpPcr")); bed = bedForIv(iv); if(sameString(buff, "tmp")) safef(buff, sizeof(buff), "%s:%d-%d", bed->chrom, bed->chromStart-50, bed->chromEnd+50); // isMgcBad = isOverlappedByMgcBad(iv); fprintf(htmlOut, "<tr><td>%d</td><td><a target=\"browser\" " "href=\"http://mgc.cse.ucsc.edu/cgi-bin/hgTracks?db=hg15&position=%s:%d-%d\"> " "%s </a></td><td>%d</td><td>%s</td></tr>\n", ++i,bed->chrom, bed->chromStart-50, bed->chromEnd+50, bed->name, bed->score, twinScan ? "yes" : "no"); bedTabOutN(bed, 12, bedOut); bedTabOutN(iv->ev->orthoBed, 12, orthoBedOut); bedFree(&bed); maxPicks--; } } writeOutFrames(htmlFrameOut, htmlFileName, db, bedFileName, buff); fprintf(htmlOut, "</table></body></html>\n"); carefulClose(&bedOut); carefulClose(&htmlOut); carefulClose(&htmlFrameOut); carefulClose(&orthoBedOut); warn("Done."); hashFree(&posHash); hashFree(&agxHash); }
boolean doGetBedOrCt(struct sqlConnection *conn, boolean doCt, boolean doCtFile, boolean redirectToGb) /* Actually output bed or custom track. Return TRUE unless no results. */ { char *db = cloneString(database); char *table = curTable; struct hTableInfo *hti = getHti(db, table, conn); struct featureBits *fbList = NULL, *fbPtr; struct customTrack *ctNew = NULL; boolean doCtHdr = (cartUsualBoolean(cart, hgtaPrintCustomTrackHeaders, FALSE) || doCt || doCtFile); char *ctWigOutType = cartCgiUsualString(cart, hgtaCtWigOutType, outWigData); char *fbQual = fbOptionsToQualifier(); char fbTQ[128]; int fields = hTableInfoBedFieldCount(hti); boolean gotResults = FALSE; struct region *region, *regionList = getRegions(); boolean isBedGr = isBedGraph(curTable); boolean isBgWg = isBigWigTable(curTable); boolean needSubtrackMerge = anySubtrackMerge(database, curTable); boolean doDataPoints = FALSE; boolean isWig = isWiggle(database, table); struct wigAsciiData *wigDataList = NULL; struct dataVector *dataVectorList = NULL; boolean doRgb = bedItemRgb(hTrackDbForTrack(db, curTable)); if (!cartUsualBoolean(cart, hgtaDoGreatOutput, FALSE) && !doCt) { textOpen(); } if (cartUsualBoolean(cart, hgtaDoGreatOutput, FALSE)) fputs("#", stdout); if ((isWig || isBedGr || isBgWg) && sameString(outWigData, ctWigOutType)) doDataPoints = TRUE; for (region = regionList; region != NULL; region = region->next) { struct bed *bedList = NULL, *bed; struct lm *lm = lmInit(64*1024); struct dataVector *dv = NULL; if (isWig && doDataPoints) { if (needSubtrackMerge) { dv = wiggleDataVector(curTrack, curTable, conn, region); if (dv != NULL) slAddHead(&dataVectorList, dv); } else { int count = 0; struct wigAsciiData *wigData = NULL; struct wigAsciiData *asciiData; struct wigAsciiData *next; wigData = getWiggleAsData(conn, curTable, region); for (asciiData = wigData; asciiData; asciiData = next) { next = asciiData->next; if (asciiData->count) { slAddHead(&wigDataList, asciiData); ++count; } } slReverse(&wigDataList); } } else if (isBedGr && doDataPoints) { dv = bedGraphDataVector(curTable, conn, region); if (dv != NULL) slAddHead(&dataVectorList, dv); } else if (isBgWg && doDataPoints) { dv = bigWigDataVector(curTable, conn, region); if (dv != NULL) slAddHead(&dataVectorList, dv); } else if (isWig || isBgWg) { dv = wiggleDataVector(curTrack, curTable, conn, region); bedList = dataVectorToBedList(dv); dataVectorFree(&dv); } else if (isBedGr) { bedList = getBedGraphAsBed(conn, curTable, region); } else { bedList = cookedBedList(conn, curTable, region, lm, &fields); } /* this is a one-time only initial creation of the custom track * structure to receive the results. gotResults turns it off after * the first time. */ if (doCtHdr && !gotResults && ((bedList != NULL) || (wigDataList != NULL) || (dataVectorList != NULL))) { ctNew = beginCustomTrack(table, fields, doCt, (isWig || isBedGr || isBgWg), doDataPoints); } if (doDataPoints && (wigDataList || dataVectorList)) gotResults = TRUE; else { if ((fbQual == NULL) || (fbQual[0] == 0)) { for (bed = bedList; bed != NULL; bed = bed->next) { if (bed->name != NULL) { subChar(bed->name, ' ', '_'); } if (doCt) { struct bed *dupe = cloneBed(bed); /* Out of local memory. */ slAddHead(&ctNew->bedList, dupe); } else { if (doRgb) bedTabOutNitemRgb(bed, fields, stdout); else bedTabOutN(bed, fields, stdout); } gotResults = TRUE; } } else { safef(fbTQ, sizeof(fbTQ), "%s:%s", hti->rootName, fbQual); fbList = fbFromBed(db, fbTQ, hti, bedList, 0, 0, FALSE, FALSE); if (fields >= 6) fields = 6; else if (fields >= 4) fields = 4; else fields = 3; if (doCt && ctNew) { ctNew->fieldCount = fields; safef(ctNew->tdb->type, strlen(ctNew->tdb->type)+1, "bed %d", fields); } for (fbPtr=fbList; fbPtr != NULL; fbPtr=fbPtr->next) { if (fbPtr->name != NULL) { char *ptr = strchr(fbPtr->name, ' '); if (ptr != NULL) *ptr = 0; } if (doCt) { struct bed *fbBed = fbToBedOne(fbPtr); slAddHead(&ctNew->bedList, fbBed ); } else { if (fields >= 6) hPrintf("%s\t%d\t%d\t%s\t%d\t%c\n", fbPtr->chrom, fbPtr->start, fbPtr->end, fbPtr->name, 0, fbPtr->strand); else if (fields >= 4) hPrintf("%s\t%d\t%d\t%s\n", fbPtr->chrom, fbPtr->start, fbPtr->end, fbPtr->name); else hPrintf("%s\t%d\t%d\n", fbPtr->chrom, fbPtr->start, fbPtr->end); } gotResults = TRUE; } featureBitsFreeList(&fbList); } } bedList = NULL; lmCleanup(&lm); } if (!gotResults) { hPrintf(NO_RESULTS); } else if (doCt) { int wigDataSize = 0; /* Load existing custom tracks and add this new one: */ struct customTrack *ctList = getCustomTracks(); removeNamedCustom(&ctList, ctNew->tdb->table); if (doDataPoints) { if (needSubtrackMerge || isBedGr || isBgWg) { slReverse(&dataVectorList); wigDataSize = dataVectorWriteWigAscii(dataVectorList, ctNew->wigAscii, 0, NULL); // TODO: see if can make prettier wig output here that // doesn't necessarily have one value per base } else { struct wiggleDataStream *wds = NULL; /* create an otherwise empty wds so we can print out the list */ wds = wiggleDataStreamNew(); wds->ascii = wigDataList; wigDataSize = wds->asciiOut(wds, db, ctNew->wigAscii, TRUE, FALSE); #if defined(DEBUG) /* dbg */ /* allow file readability for debug */ chmod(ctNew->wigAscii, 0666); #endif wiggleDataStreamFree(&wds); } } else slReverse(&ctNew->bedList); slAddHead(&ctList, ctNew); /* Save the custom tracks out to file (overwrite the old file): */ customTracksSaveCart(db, cart, ctList); /* Put up redirect-to-browser page. */ if (redirectToGb) { char browserUrl[256]; char headerText[512]; int redirDelay = 3; safef(browserUrl, sizeof(browserUrl), "%s?%s&db=%s", hgTracksName(), cartSidUrlString(cart), database); safef(headerText, sizeof(headerText), "<META HTTP-EQUIV=\"REFRESH\" CONTENT=\"%d;URL=%s\">", redirDelay, browserUrl); webStartHeader(cart, database, headerText, "Table Browser: %s %s: %s", hOrganism(database), freezeName, "get custom track"); if (doDataPoints) { hPrintf("There are %d data points in custom track. ", wigDataSize); } else { hPrintf("There are %d items in custom track. ", slCount(ctNew->bedList)); } hPrintf("You will be automatically redirected to the genome browser in\n" "%d seconds, or you can \n" "<A HREF=\"%s\">click here to continue</A>.\n", redirDelay, browserUrl); } } else if (doDataPoints) { if (needSubtrackMerge || isBedGr || isBgWg) { slReverse(&dataVectorList); dataVectorWriteWigAscii(dataVectorList, "stdout", 0, NULL); } else { /* create an otherwise empty wds so we can print out the list */ struct wiggleDataStream *wds = NULL; wds = wiggleDataStreamNew(); wds->ascii = wigDataList; wds->asciiOut(wds, db, "stdout", TRUE, FALSE); wiggleDataStreamFree(&wds); } } return gotResults; }
void bwtool_find_max(struct hash *options, char *favorites, char *regions, double fill, char *bigfile, char *tmp_dir, char *outputfile) /* find max points in a range */ { boolean med_base = (hashFindVal(options, "median-base") != NULL) ? TRUE : FALSE; boolean with_max = (hashFindVal(options, "with-max") != NULL) ? TRUE : FALSE; struct metaBig *mb = metaBigOpen_check(bigfile, tmp_dir, NULL); FILE *out = mustOpen(outputfile, "w"); struct bed6 *sections6 = readBed6Soft(regions); struct bed *sections = bed12FromBed6(§ions6); struct bed *section; for (section = sections; section != NULL; section = section->next) { struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart, section->chromEnd); struct perBaseWig *pbw; struct slInt *ii; int i, size; double max = -DBL_MAX; struct slInt *list = NULL; for (pbw = pbwList; pbw != NULL; pbw = pbw->next) { int pbw_off = pbw->chromStart - section->chromStart; for (i = 0; i < pbw->len; i++) { if (pbw->data[i] > max) { slFreeList(&list); struct slInt *new_int = slIntNew(i + pbw_off); slAddHead(&list, new_int); max = pbw->data[i]; } else if (pbw->data[i] == max) { struct slInt *new_int = slIntNew(i + pbw_off); slAddHead(&list, new_int); } } } slReverse(&list); if (list) { size = slCount(list); if (med_base) { section->blockCount = 1; AllocArray(section->blockSizes, sizeof(int)); AllocArray(section->chromStarts, sizeof(int)); section->blockSizes[0] = 1; section->chromStarts[0] = median_base_calc(&list); } else { section->blockCount = size; AllocArray(section->blockSizes, sizeof(int) * size); AllocArray(section->chromStarts, sizeof(int) * size); for (i = 0, ii = list; (i < size) && (ii != NULL); i++, ii = ii->next) { section->blockSizes[i] = 1; section->chromStarts[i] = ii->val; } } if (!with_max) bedTabOutN(section, 12, out); else { bedOutputN(section, 12, out, '\t', '\t'); fprintf(out, "%f\n", max); } slFreeList(&list); } perBaseWigFree(&pbwList); } metaBigClose(&mb); bedFreeList(§ions); carefulClose(&out); }
void hgExperiment(char *database, char *table, char *expFile, char *posFile, char *dataFile) /* Main function */ { struct lineFile *lf; int *data = NULL; int *scores; FILE *f = NULL; char expTable[32]; char *words[3]; int wordCt; struct bed *bedList, *bed; int expCount; struct hash *expHash, *dataHash; struct hashEl *hel; /* Open experiment file and use it to create experiment table. Use optional fields if present, otherwise defaults */ safef(expTable, ArraySize(expTable), "%sExps", table); expHash = makeExpsTable(database, expTable, expFile, &expCount); /* Read in positions file */ bedList = bedLoadAll(posFile); slSort(&bedList, bedCmp); /* Read data file into a hash of arrays of data values, keyed by name */ dataHash = newHash(0); lf = lineFileOpen(dataFile, TRUE); while ((wordCt = lineFileChopNext(lf, words, ArraySize(words)))) { /* format: <region-name> <experiment-name> <data-value> */ char *name, *exp; int expId; int value; if (wordCt != 3) errAbort("Expecting 3 words in data file, got %d line %d of %s", wordCt, lf->lineIx, lf->fileName); name = words[0]; hel = hashLookup(dataHash, name); if (!hel) { AllocArray(data, expCount); hel = hashAdd(dataHash, name, data); } data = (int *)hel->val; exp = words[1]; expId = hashIntVal(expHash, exp); if (expId < 0 || expId > expCount-1) errAbort("Invalid experiment ID %d for %s, line %d of %s", expId, exp, lf->lineIx, lf->fileName); //value = atoi(words[2]); value = round(atof(words[2])); if (data[expId] != 0) errAbort("Extra experiment data value %d for %s %s, line %d of %s", value, name, exp, lf->lineIx, lf->fileName); data[expId] = value; } lineFileClose(&lf); /* Fill in BED15 fields - add experiment values, and setup block (only 1)*/ for (bed = bedList; bed != NULL; bed = bed->next) { int i; bed->thickStart = bed->chromStart; bed->thickEnd = bed->chromEnd; bed->blockCount = 1; AllocArray(bed->blockSizes, 1); bed->blockSizes[0] = bed->chromEnd - bed->chromStart; AllocArray(bed->chromStarts, 1); bed->chromStarts[0] = 0; bed->expCount = expCount; AllocArray(bed->expIds, expCount); for (i = 0; i < expCount; i++) bed->expIds[i] = i; AllocArray(bed->expScores, expCount); scores = hashMustFindVal(dataHash, bed->name); for (i = 0; i < expCount; i++) bed->expScores[i] = scores[i]; /* set score for bed to the average of the scores in all experiments */ calculateAverage(bed); } /* from affyPslAndAtlsoToBed ? convertIntensitiesToRatios(bedList); */ /* Write BED data file */ f = hgCreateTabFile(tabDir, table); for (bed = bedList; bed != NULL; bed = bed->next) bedTabOutN(bed, 15, f); /* Cleanup */ carefulClose(&f); freeHash(&expHash); freeHash(&dataHash); bedFreeList(&bedList); }