Exemplo n.º 1
0
void doParDetails(struct trackDb *tdb, char *name)
/* show details of a PAR item. */
{
// load entire PAR table (t's tiny) and partition
struct bed *pars = loadParTable(tdb);
if (slCount(pars) & 1)
    errAbort("par items not paired in %s", tdb->table);

struct bed *clickedPar = getClickedPar(name, &pars);
struct bed *homPar = getHomologousPar(clickedPar, &pars);
slSort(&pars, parCmp);

cartWebStart(cart, database, "Pseudoautosomal regions");
webPrintLinkTableStart();

// header
webPrintLabelCell("");
webPrintLabelCell("Selected PAR");
webPrintLabelCell("Homologous PAR");

// selected
webPrintLinkTableNewRow();
printHomPairRow(clickedPar, homPar);
if (pars != NULL)
    printOtherPars(clickedPar, pars);

webPrintLinkTableEnd();
printTrackHtml(tdb);
webEnd();

bedFreeList(&pars);
bedFree(&clickedPar);
bedFree(&homPar);
}
void txCdsToGene(char *txBed, char *txFa, char *txCds, char *outGtf, char *outFa)
/* txCdsToGene - Convert transcript bed and best cdsEvidence to genePred and 
 * protein sequence. */
{
struct hash *txSeqHash = faReadAllIntoHash(txFa, dnaLower);
verbose(2, "Read %d transcript sequences from %s\n", txSeqHash->elCount, txFa);
struct hash *cdsHash = cdsEvidenceReadAllIntoHash(txCds);
verbose(2, "Read %d cdsEvidence from %s\n", cdsHash->elCount, txCds);
struct lineFile *lf = lineFileOpen(txBed, TRUE);
FILE *fGtf = mustOpen(outGtf, "w");
FILE *fFa = mustOpen(outFa, "w");
char *row[12];
while (lineFileRow(lf, row))
    {
    struct bed *bed = bedLoad12(row);
    verbose(2, "processing %s\n", bed->name);
    struct cdsEvidence *cds = hashFindVal(cdsHash, bed->name);
    struct dnaSeq *txSeq = hashFindVal(txSeqHash, bed->name);
    char *cdsSource = NULL;
    if (txSeq == NULL)
        errAbort("%s is in %s but not %s", bed->name, txBed, txFa);
    if (cds != NULL)
	{
        outputProtein(cds, txSeq, fFa);
	if (cds->cdsCount > 1)
	    {
	    struct bed *newBed = breakUpBedAtCdsBreaks(cds, bed);
	    if (fTweaked)
	        fprintf(fTweaked, "%s\n", newBed->name);
	    bedFree(&bed);
	    bed = newBed;
	    }
	cdsSource = cds->accession;
	if (sameString(cds->accession, "."))
	    cdsSource = cds->source;
	}

    /* Set bed CDS bounds and optionally output bed. */
    cdsEvidenceSetBedThick(cds, bed);
    if (fBed)
        bedTabOutN(bed, 12, fBed);

    /* Parse out bed name, which is in format chrom.geneId.txId.accession */
    char *geneName = cloneString(bed->name);
    char *accession = strrchr(geneName, '.');
    assert(accession != NULL);
    *accession++ = 0;
    chopSuffix(geneName);

    /* Output as GTF */
    bedToGtf(bed, accession, cdsSource, geneName, fGtf);

    /* Clean up for next iteration of loop. */
    freez(&geneName);
    bedFree(&bed);
    }
lineFileClose(&lf);
carefulClose(&fFa);
carefulClose(&fGtf);
}
Exemplo n.º 3
0
char *altGraphId(struct sqlConnection *conn, struct genePred *gp)
/* Return altGraphId that overlaps most with genePred. */
{
int rowOffset;
struct sqlResult *sr;
char **row;
struct bed *bestBed = NULL;
int intersect, bestIntersect = 0;
char extra[64];
char *ret = NULL;

safef(extra, sizeof(extra), "strand='%s'", gp->strand);
sr = hRangeQuery(conn, "agxBed", gp->chrom, gp->txStart, gp->txEnd,
	extra, &rowOffset);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct bed *bed = bedLoad12(row+rowOffset);
    intersect = gpBedBasesShared(gp, bed);
    if (intersect > bestIntersect)
	{
	bestIntersect = intersect;
	bestBed = bed;
	}
    else
        bedFree(&bed);
    }
if (bestBed != NULL)
    {
    ret = cloneString(bestBed->name);
    bedFree(&bestBed);
    }
return ret;
}
Exemplo n.º 4
0
static void mafFrags(char *database, char *track, char *bedFile, char *mafFile)
/* mafFrags - Collect MAFs from regions specified in a 6 column bed file. */
{
struct slName *orgList = NULL;
struct lineFile *lf = lineFileOpen(bedFile, TRUE);
FILE *f = mustOpen(mafFile, "w");

if (optionExists("orgs"))
    {
    char *orgFile = optionVal("orgs", NULL);
    char *buf;
    readInGulp(orgFile, &buf, NULL);
    orgList = stringToSlNames(buf);

    /* Ensure that org list starts with database. */
    struct slName *me = slNameFind(orgList, database);
    if (me == NULL)
        errAbort("Need to have reference database '%s' in %s", database, orgFile);
    if (me != orgList)
        {
	slRemoveEl(&orgList, me);
	slAddHead(&orgList, me);
	}
    }
mafWriteStart(f, "zero");

if (bed12)
    {
    char *row[12];
    while (lineFileRow(lf, row))
	{
	struct bed *bed = bedLoadN(row, ArraySize(row));
	struct mafAli *maf = mafFromBed12(database, track, bed, orgList);
	if (meFirst)
	    moveMeToFirst(maf, bed->name);
	mafWrite(f, maf);
	mafAliFree(&maf);
	bedFree(&bed);
	}
    }
else
    {
    char *row[6];
    while (lineFileRow(lf, row))
	{
	struct bed *bed = bedLoadN(row, ArraySize(row));
        processBed6(database, track, f, bed, orgList);
	bedFree(&bed);
	}
    }
mafWriteEnd(f);
carefulClose(&f);
}
Exemplo n.º 5
0
void pslToBed(char *pslFile, char *bedFile, struct hash *cdsHash, bool doPosName)
/* pslToBed -- tranform a psl format file to a bed format file */
{
struct lineFile *pslLf = pslFileOpen(pslFile);
FILE *bedFh = mustOpen(bedFile, "w");
struct psl *psl;

while ((psl = pslNext(pslLf)) != NULL)
    {
    struct bed *bed = bedFromPsl(psl);
    if (doPosName)
        {
        char *newName = needMem(512);
        safef(newName, 512, "%s:%d-%d", psl->qName, psl->qStart, psl->qEnd);
        freeMem(bed->name);
        bed->name = newName;
        }

    if (cdsHash)
	{
	struct cds *cds = hashFindVal(cdsHash, psl->qName);
	if (cds == NULL)
	    bed->thickStart = bed->thickEnd = bed->chromStart;
	else
	    setThick(psl, bed, cds);
	}
    bedTabOutN(bed, 12, bedFh);
    bedFree(&bed);
    pslFree(&psl);
    }
carefulClose(&bedFh);
lineFileClose(&pslLf);
}
Exemplo n.º 6
0
void gffToBed(char *inGff, char *outBed)
/* gffToBed - Convert a gff file (gff1 or gff2) to bed.  Not tested with gff3 */
{
struct gffFile *gff = gffRead(inGff);
FILE *f = mustOpen(outBed, "w");
char *exonFeature = bestExonFeature(gff);
gffGroupLines(gff);
separateGroupsByChromosome(gff);
struct gffGroup *group;
for (group = gff->groupList; group != NULL; group = group->next)
    {
    struct genePred *gp;
    if (gff->isGtf)
        gp = genePredFromGroupedGtf(gff, group, group->name, FALSE, FALSE);
    else
        gp = genePredFromGroupedGff(gff, group, group->name, exonFeature, FALSE, FALSE);
    if (gp != NULL)
	{
	assert(gp->txStart == gp->exonStarts[0]);
	struct bed *bed = bedFromGenePred(gp);
	bedTabOutN(bed, 12, f);
	bedFree(&bed);
	}
    }
carefulClose(&f);
}
static struct chromAnn* chromAnnBedReaderRead(struct chromAnnReader *car)
/* read next BED and convert to a chromAnn */
{
struct rowReader *rr = car->data;
if (!rowReaderNext(rr))
    return NULL;
rowReaderExpectAtLeast(rr, 3);

char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL;
struct bed *bed = bedLoadN(rr->row, rr->numCols);
struct chromAnn *ca = chromAnnNew(bed->chrom, bed->strand[0], bed->name, rawCols,
                                  strVectorWrite, strVectorFree);

if ((bed->blockCount == 0) || (car->opts & chromAnnRange))
    {
    if (car->opts & chromAnnCds)
        {
        if (bed->thickStart < bed->thickEnd)
            chromAnnBlkNew(ca, bed->thickStart, bed->thickEnd);
        }
    else
        chromAnnBlkNew(ca, bed->chromStart, bed->chromEnd);
    }
else
    addBedBlocks(ca, car->opts, bed);

chromAnnFinish(ca);
bedFree(&bed);
return ca;
}
void outputBed(struct cassetteSeq *cseq, FILE *primerBed) 
/* Output a bed linked features track to see where primers are. */
{
struct bed *bed = NULL;
struct bed *cbed = cseq->bed;
int leftStart=0, rightStart =0;
bed = cloneBed(cbed);

if(cseq->leftPrimer == NULL || cseq->rightPrimer == NULL)
    return;
leftStart = calcGenomePos(cbed, cseq->leftPrimer, cseq->seq);
rightStart = calcGenomePos(cbed, cseq->rightPrimer, cseq->seq);
if(sameString(bed->strand, "+")) 
    {
    bed->chromStart = bed->thickStart = leftStart;
    bed->chromStarts[0] = 0;
    bed->blockSizes[0] = strlen(cseq->leftPrimer);
    bed->chromStarts[1] = rightStart - leftStart;
    bed->blockSizes[1] = strlen(cseq->leftPrimer);
    bed->chromEnd = bed->thickEnd = bed->chromStarts[1] + bed->chromStart + bed->blockSizes[1];
    }
else
    {
    bed->chromStart = bed->thickStart = rightStart;
    bed->chromStarts[0] = 0;
    bed->blockSizes[0] = strlen(cseq->rightPrimer);
    bed->chromStarts[1] = leftStart - rightStart;
    bed->blockSizes[1] = strlen(cseq->rightPrimer);
    bed->chromEnd = bed->thickEnd = bed->chromStarts[1] + bed->chromStart + bed->blockSizes[1];
    }
bed->blockCount = 2;
checkBedMatchesSeqs(cseq, bed);
bedTabOutN(bed, 12, primerBed);
bedFree(&bed);
}
static void showMrnaFromGenePred(struct sqlConnection *conn, 
	char *geneId, char *geneName)
/* Get mRNA sequence for gene from gene prediction. */
{
char *table = genomeSetting("knownGene");
struct sqlResult *sr;
char **row;
char query[256];
boolean hasBin = hIsBinned(sqlGetDatabase(conn), table);

hPrintf("<TT><PRE>");
safef(query, sizeof(query), 
    "select * from %s where name='%s'"
    " and chrom='%s' and txStart=%d and txEnd=%d", 
    table, geneId, curGeneChrom, curGeneStart, curGeneEnd);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gene = genePredLoad(row+hasBin);
    struct bed *bed = bedFromGenePred(gene);
    struct dnaSeq *seq = hSeqForBed(sqlGetDatabase(conn), bed);
    hPrintf(">%s (%s predicted mRNA)\n", geneId, geneName);
    writeSeqWithBreaks(stdout, seq->dna, seq->size, 50);
    dnaSeqFree(&seq);
    bedFree(&bed);
    genePredFree(&gene);
    }
else
    errAbort("Couldn't find %s at %s:%d-%d", geneId, 
    	curGeneChrom, curGeneStart, curGeneEnd);
sqlFreeResult(&sr);
hPrintf("</TT></PRE>");
}
Exemplo n.º 10
0
static struct bed* regionsLoad(char* sectionsBed)
/* return a bed3 list of regions for times when -regions is used. */
/* If the filename has a comma then a number, then take just that line */
{
    struct bed* list = NULL;
    unsigned ix = 0;
    if (strchr(sectionsBed, ',')) {
        char* number_part = chopPrefixAt(sectionsBed, ',');
        if (number_part)
            ix = sqlUnsigned(number_part);
    }
    list = readAtLeastBed3(sectionsBed);
    if (list && (ix > 0)) {
        struct bed* single = slElementFromIx(list, ix - 1);
        if (single) {
            struct bed* rem;
            while ((rem = slPopHead(&list)) != single)
                bedFree(&rem);
            rem = single->next;
            bedFreeList(&rem);
            single->next = NULL;
            list = single;
        }
    }
    return list;
}
Exemplo n.º 11
0
void checkInputOpenFiles(struct inInfo *array, int count)
/* Make sure all of the input is there and of right format before going forward. Since
 * this is going to take a while we want to fail fast. */
{
int i;
for (i=0; i<count; ++i)
    {
    struct inInfo *in = &array[i];
    switch (in->type)
        {
	case itBigWig:
	    {
	    /* Just open and close, it will abort if any problem. */
	    in->bbi = bigWigFileOpen(in->fileName);
	    break;
	    }
	case itPromoterBed:
	case itUnstrandedBed:
	case itBlockedBed:
	    {
	    struct lineFile *lf = in->lf = lineFileOpen(in->fileName, TRUE);
	    char *line;
	    lineFileNeedNext(lf, &line, NULL);
	    char *dupe = cloneString(line);
	    char *row[256];
	    int wordCount = chopLine(dupe, row);
	    struct bed *bed = NULL;
	    switch (in->type)
	        {
		case itPromoterBed:
		    lineFileExpectAtLeast(lf, 6, wordCount);
		    bed = bedLoadN(row, 6);
		    char strand = bed->strand[0];
		    if (strand != '+' && strand != '-')
		        errAbort("%s must be stranded, got %s in that field", lf->fileName, row[6]);
		    break;
		case itUnstrandedBed:
		    lineFileExpectAtLeast(lf, 4, wordCount);
		    bed = bedLoadN(row, 4);
		    break;
		case itBlockedBed:
		    lineFileExpectAtLeast(lf, 4, wordCount);
		    bed = bedLoadN(row, 12);
		    break;
		default:
		    internalErr();
		    break;
		}
	    bedFree(&bed);
	    freez(&dupe);
	    lineFileReuse(lf);
	    break;
	    }
	default:
	    internalErr();
	    break;
	}
    }
}
Exemplo n.º 12
0
void loadGappedBed(struct track *tg)
/* Convert bed info in window to linked feature. */
{
struct sqlResult *sr;
char **row;
int rowOffset;
struct bed *bed;
struct linkedFeatures *lfList = NULL, *lf;
struct trackDb *tdb = tg->tdb;
int scoreMin = atoi(trackDbSettingClosestToHomeOrDefault(tdb, "scoreMin", "0"));
int scoreMax = atoi(trackDbSettingClosestToHomeOrDefault(tdb, "scoreMax", "1000"));
boolean useItemRgb = FALSE;

useItemRgb = bedItemRgb(tdb);

if (tg->isBigBed)
    { // avoid opening an unneeded db connection for bigBed; required not to use mysql for parallel fetch tracks
    bigBedAddLinkedFeaturesFrom(tg, chromName, winStart, winEnd,
          scoreMin, scoreMax, useItemRgb, 12, &lfList);
    }
else
    {
    /* Use tg->tdb->track because subtracks inherit composite track's tdb
     * by default, and the variable is named after the composite track. */
    struct sqlConnection *conn = hAllocConn(database);
    char *scoreFilterClause = getScoreFilterClause(cart, tg->tdb,NULL);
    if (scoreFilterClause != NULL)
	{
	sr = hRangeQuery(conn, tg->table, chromName, winStart, winEnd,scoreFilterClause, &rowOffset);
	freeMem(scoreFilterClause);
	}
    else
	{
	sr = hRangeQuery(conn, tg->table, chromName, winStart, winEnd, NULL, &rowOffset);
	}
    while ((row = sqlNextRow(sr)) != NULL)
	{
	bed = bedLoad12(row+rowOffset);
	adjustBedScoreGrayLevel(tdb, bed, scoreMin, scoreMax);
	lf = lfFromBedExtra(bed, scoreMin, scoreMax);
	if (useItemRgb)
	    {
	    lf->extra = (void *)USE_ITEM_RGB;       /* signal for coloring */
	    lf->filterColor=bed->itemRgb;
	    }
	slAddHead(&lfList, lf);
	bedFree(&bed);
	}
    sqlFreeResult(&sr);
    hFreeConn(&conn);
    }
slReverse(&lfList);
if(tg->extraUiData)
    filterBed(tg, &lfList);
slSort(&lfList, linkedFeaturesCmp);
tg->items = lfList;
}
void slRefAndBedFree(struct slRef **pRef)
/* designed to be called by slRefListAndBedFree. */
{
struct slRef *ref;
if ((ref = *pRef) == NULL) 
    return;
bedFree((struct bed **)&(ref->val));
freez(pRef);
}
void bedPlusFree(struct bedPlus **pBp)
/* freedom. */
{
struct bedPlus *bp;
if ((bp = *pBp) == NULL)
    return;
bedFree((struct bed **)&(bp->bed));
freeMem((char *)bp->rest);
freez(pBp);
}
Exemplo n.º 15
0
struct bed *orthoBedFromPsl(struct sqlConnection *conn, char *db, char *orthoDb,
			    char *netTable, struct psl *psl)
/** Produce a bed on the orthologous genome from the original psl. */
{
struct bed *bed = NULL, *orthoBed = NULL;
int i;
bed = bedFromPsl(psl);
orthoBed = orthoBedFromBed(conn, db, orthoDb, netTable, bed);
bedFree(&bed);
return orthoBed;
}
void cassetteSeqFree(struct cassetteSeq **pCseq)
/* Free a cassetteSeq */
{
struct cassetteSeq *cseq = *pCseq;
dnaSeqFree(&cseq->seq);
bedFree(&cseq->bed);
freez(&cseq->name);
freez(&cseq->rightPrimer);
freez(&cseq->leftPrimer);
freez(&cseq);
pCseq = NULL;
}
Exemplo n.º 17
0
/* convert one line read from a bed file to a PSL */
void cnvBedRec(char *line, struct hash *chromSizes, FILE *pslFh)
{
char *row[12];
int numCols = chopByWhite(line, row, ArraySize(row));
if (numCols < 4)
    errAbort("bed must have at least 4 columns");
struct bed *bed = bedLoadN(row, numCols);
struct psl* psl = bedToPsl(bed, chromSizes);
pslTabOut(psl, pslFh);
pslFree(&psl);
bedFree(&bed);
}
Exemplo n.º 18
0
/* convert one line read from a bed file to a genePred */
void cnvBedRec(char *line, FILE *gpFh)
{
char *row[12];
int numCols = chopByWhite(line, row, ArraySize(row));
if (numCols < 4)
    errAbort("bed must have at least 4 columns");
struct bed *bed = bedLoadN(row, numCols);
struct genePred* gp = bedToGenePred(bed);
genePredTabOut(gp, gpFh);
genePredFree(&gp);
bedFree(&bed);
}
void doStrand(struct bed *start, struct bed *end, FILE *f)
/* Assuming all beds from start up to end are on same strand,
 * make a merged bed with all their blocks and output it. */
{
struct rbTree *rangeTree = rangeTreeNew();
struct bed *bed;
for (bed = start; bed != end; bed = bed->next)
    bedIntoRangeTree(bed, rangeTree);
bed = bedFromRangeTree(rangeTree, start->chrom, start->name, start->strand);
bedTabOutN(bed, 12, f);
bedFree(&bed);
rangeTreeFree(&rangeTree);
}
Exemplo n.º 20
0
struct linkedFeatures *bedMungToLinkedFeatures(struct bed **pBed, struct trackDb *tdb,
	int fieldCount, int scoreMin, int scoreMax, boolean useItemRgb)
/* Convert bed to a linkedFeature, destroying bed in the process. */
{
struct bed *bed = *pBed;
if (fieldCount < 12)
    bed8To12(bed);
adjustBedScoreGrayLevel(tdb, bed, scoreMin, scoreMax);
struct linkedFeatures *lf = lfFromBedExtra(bed, scoreMin, scoreMax);
if (useItemRgb)
    {
    lf->extra = (void *)USE_ITEM_RGB;       /* signal for coloring */
    lf->filterColor=bed->itemRgb;
    }
bedFree(pBed);
return lf;
}
Exemplo n.º 21
0
void doPsls(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, 
	    char *netTable, char *pslFileName, char *pslTableName,
	    char *outBedName, char *selectedFileName, 
            int *foundCount, int *notFoundCount)
/* Map over psls. */
{
FILE *bedOut = NULL;
FILE *selectedOut = NULL;
struct bed *bed = NULL;
struct psl *psl=NULL, *pslList = NULL;
/* Load psls. */
warn("Loading psls.");
if(pslFileName)
    pslList=pslLoadAll(pslFileName);
else
    pslList=loadPslFromTable(conn, pslTableName, chrom, 0, BIGNUM);
/* Convert psls. */
warn("Converting psls.");
assert(outBedName);
bedOut = mustOpen(outBedName, "w");
if (selectedFileName != NULL)
    selectedOut = mustOpen(selectedFileName, "w");
for(psl = pslList; psl != NULL; psl = psl->next)
    {
    if(differentString(psl->tName, chrom))
	continue;
    occassionalDot();
    bed = orthoBedFromPsl(conn, db, orthoDb, netTable, psl);
    if(bed != NULL && bed->blockCount > 0)
	{
	(*foundCount)++;
	bedTabOutN(bed, 12, bedOut);
        if (selectedOut != NULL)
            pslTabOut(psl, selectedOut);
	}
    else
	(*notFoundCount)++;
    bedFree(&bed);
    }
carefulClose(&selectedOut);
carefulClose(&bedOut);
}
Exemplo n.º 22
0
static void subset_with_sections(struct metaBig* mb, struct bed** p_list)
/* mainly for chopgenome */
{
    struct genomeRangeTree* grt = genomeRangeTreeNew();
    struct bed* sec;
    struct bed* list;
    struct bed* newlist = NULL;
    struct bed* head;
    for (sec = mb->sections; sec != NULL; sec = sec->next)
        genomeRangeTreeAdd(grt, sec->chrom, sec->chromStart, sec->chromEnd);
    list = *p_list;
    while ((head = slPopHead(&list)) != NULL) {
        if (genomeRangeTreeOverlaps(grt, head->chrom, head->chromStart, head->chromEnd) && genomeRangeTreeFindEnclosing(grt, head->chrom, head->chromStart, head->chromEnd))
            slAddHead(&newlist, head);
        else
            bedFree(&head);
    }
    slReverse(&newlist);
    *p_list = newlist;
}
Exemplo n.º 23
0
void doBeds(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, 
	    char *netTable, char *bedFileName, char *bedTableName,
	    char *outBedName, char *selectedFileName, int *foundCount, int *notFoundCount)	
/* Map over beds. */
{
FILE *bedOut = NULL;
FILE *selectedOut = NULL;
struct bed *bed=NULL, *bedList = NULL, *orthoBed=NULL;
/* Load beds. */
warn("Loading beds.");
if(bedFileName)
    bedList=bedLoadAll(bedFileName);
else
    bedList=loadBedFromTable(conn, bedTableName, chrom, 0, BIGNUM);
/* Convert beds. */
warn("Converting beds.");
assert(outBedName);
bedOut = mustOpen(outBedName, "w");
if (selectedFileName != NULL)
    selectedOut = mustOpen(selectedFileName, "w");
for(bed = bedList; bed != NULL; bed = bed->next)
    {
    if(differentString(bed->chrom, chrom))
	continue;
    occassionalDot();
    orthoBed = orthoBedFromBed(conn, db, orthoDb, netTable, bed);
    if(orthoBed != NULL && orthoBed->blockCount > 0)
	{
	(*foundCount)++;
	bedTabOutN(orthoBed, 12, bedOut);
        if (selectedOut != NULL)
            bedTabOutN(bed, 12, selectedOut);
	}
    else
	(*notFoundCount)++;
    bedFree(&orthoBed);
    }
bedFreeList(&bedList);
carefulClose(&selectedOut);
carefulClose(&bedOut);
}
Exemplo n.º 24
0
void bedViewOut(struct altSpliceSite *as, FILE *out)
{
struct bed *bed = NULL;
AllocVar(bed);
bed->chrom = cloneString(as->chrom);
bed->chromStart  = as->chromStart;
bed->chromEnd  = maxInArray(as->altStarts, as->altCount);
AllocArray(bed->chromStarts, 2);
AllocArray(bed->blockSizes, 2);
bed->thickStart = as->altBpStarts[1];
bed->thickEnd = as->altBpEnds[1];
bed->blockCount = 2;
bed->chromStarts[0] = 0;
bed->chromStarts[1] = bed->chromEnd - bed->chromStart -1; 
bed->blockSizes[0] = bed->blockSizes[1] = 1;
bed->name = cloneString(as->agName);
bed->score = as->spliceTypes[1];
safef(bed->strand, sizeof(bed->strand), "%s", as->strand);
bedTabOutN(bed, 12, out);
bedFree(&bed);
}
Exemplo n.º 25
0
static void addFilteredBedsOnRegion(struct bbiFile *bbi, struct region *region,
	char *table, struct asFilter *filter, struct lm *bedLm, struct bed **pBedList)
/* Add relevant beds in reverse order to pBedList */
{
struct lm *bbLm = lmInit(0);
struct bigBedInterval *ivList = NULL, *iv;
ivList = bigBedIntervalQuery(bbi, region->chrom, region->start, region->end, 0, bbLm);
char *row[bbi->fieldCount];
char startBuf[16], endBuf[16];
for (iv = ivList; iv != NULL; iv = iv->next)
    {
    bigBedIntervalToRow(iv, region->chrom, startBuf, endBuf, row, bbi->fieldCount);
    if (asFilterOnRow(filter, row))
        {
	struct bed *bed = bedLoadN(row, bbi->definedFieldCount);
	struct bed *lmBed = lmCloneBed(bed, bedLm);
	slAddHead(pBedList, lmBed);
	bedFree(&bed);
	}
    }

lmCleanup(&bbLm);
}
Exemplo n.º 26
0
void affyPslAndAtlasToBedNew(char *pslFile, char *atlasFile, char *bedOut, 
	char *expRecOut)
/** Main function that does all the work for new-style*/
{
struct lineFile *lf = lineFileOpen(atlasFile, TRUE);
char *line, *name;
int i, wordCount, expCount;
char **row;
double *data, median;
double invMedian, ratio, logRatio;
char *affyId;
struct hash *hash = newHash(17);
struct psl *psl;
struct bed *bed;
FILE *f = NULL;
int dataCount = 0, pslCount = 0, bedCount = 0;
int minExpVal = 20;

/* Open Atlas file and use first line to create experiment table. */
if (!lineFileNextReal(lf, &line))
    errAbort("%s is empty", lf->fileName);
if (startsWith("Affy", line))
    line += 4;
if (line[0] != '\t')
    errAbort("%s doesn't seem to be a new format atlas file", lf->fileName);
expCount = lineToExp(line+1, expRecOut);
if (expCount <= 0)
    errAbort("No experiments in %s it seems", lf->fileName);
warn("%d experiments\n", expCount);

f = mustOpen(bedOut, "w");

/* Build up a hash keyed by affyID with an int array of data
 * for value.  Do output in short case. */
AllocArray(row, expCount);
while (lineFileNextReal(lf, &line))
    {
    affyId = nextWord(&line);

    wordCount = chopByWhite(line, row, expCount);
    if (wordCount != expCount)
        errAbort("Expecting %d data points, got %d line %d of %s", 
		expCount, wordCount, lf->lineIx, lf->fileName);
    if (hashLookup(hash, affyId))
	{
        warn("Duplicate %s, skipping all but first.", affyId);
	continue;
	}
    AllocArray(data, expCount);
    for (i=0; i<expCount; ++i)
	{
        data[i] = atof(row[i]);
        if (data[i] < minExpVal)
	    data[i] = minExpVal;
	}
    median = findPositiveMedian(data, expCount, minExpVal);
    if (median >= 0)
	{
	invMedian = 1.0/median;
	for (i=0; i<expCount; ++i)
	    {
	    double val = data[i];
	    val = safeLog2(invMedian*val);
	    data[i] = val;
	    }
	if (shortOut)
	    shortDataOut(f, affyId, expCount, data);
	else
	    hashAdd(hash, affyId, data);
        }
    data = NULL;
    ++dataCount;
    }
lineFileClose(&lf);
warn("%d rows of expression data\n", dataCount);

/* Stream through psl file, converting it to bed with expression data. */
if (!shortOut)
    {
    lf = pslFileOpen(pslFile);
    while ((psl = pslNext(lf)) != NULL)
	{
	++pslCount;
        /* get probe id from sequence name */
        name=parseNameFromHgc(psl->qName);
	data = hashFindVal(hash, name);
        if (data != NULL)
	    {
            struct bed *bed = bedFromPsl(psl);
	    bed->expCount = expCount;
	    AllocArray(bed->expIds, expCount);
	    AllocArray(bed->expScores, expCount);
	    for (i=0; i<expCount; ++i)
		{
		bed->expScores[i] = data[i];
		bed->expIds[i] = i;
		}
	    bedTabOutN(bed, 15, f);
	    ++bedCount;

	    bedFree(&bed);
	    }
	pslFree(&psl);
	}
    warn("%d records in %s", pslCount, pslFile);
    warn("%d records written to %s", bedCount, bedOut);
    }
lineFileClose(&lf);
carefulClose(&f);
}
Exemplo n.º 27
0
void txInfoAssemble(char *txBedFile, char *cdsEvFile, char *txCdsPredictFile, char *altSpliceFile,
	char *exceptionFile, char *sizePolyAFile, char *pslFile, char *flipFile, char *outFile)
/* txInfoAssemble - Assemble information from various sources into txInfo table.. */
{
/* Build up hash of evidence keyed by transcript name. */
struct hash *cdsEvHash = hashNew(18);
struct cdsEvidence *cdsEv, *cdsEvList = cdsEvidenceLoadAll(cdsEvFile);
for (cdsEv = cdsEvList; cdsEv != NULL; cdsEv = cdsEv->next)
    hashAddUnique(cdsEvHash, cdsEv->name, cdsEv);
verbose(2, "Loaded %d elements from %s\n", cdsEvHash->elCount, cdsEvFile);

/* Build up hash of bestorf structures keyed by transcript name */
struct hash *predictHash = hashNew(18);
struct cdsEvidence *predict, *predictList = cdsEvidenceLoadAll(txCdsPredictFile);
for (predict = predictList; predict != NULL; predict = predict->next)
     hashAddUnique(predictHash, predict->name, predict);
verbose(2, "Loaded %d predicts from %s\n", predictHash->elCount, txCdsPredictFile);

/* Build up structure for random access of retained introns */
struct bed *altSpliceList = bedLoadNAll(altSpliceFile, 6);
verbose(2, "Loaded %d alts from %s\n", slCount(altSpliceList), altSpliceFile);
struct hash *altSpliceHash = bedsIntoHashOfKeepers(altSpliceList);

/* Read in exception info. */
struct hash *selenocysteineHash, *altStartHash;
genbankExceptionsHash(exceptionFile, &selenocysteineHash, &altStartHash);

/* Read in polyA sizes */
struct hash *sizePolyAHash = hashNameIntFile(sizePolyAFile);
verbose(2, "Loaded %d from %s\n", sizePolyAHash->elCount, sizePolyAFile);

/* Read in psls */
struct hash *pslHash = hashNew(20);
struct psl *psl, *pslList = pslLoadAll(pslFile);
for (psl = pslList; psl != NULL; psl = psl->next)
    hashAdd(pslHash, psl->qName, psl);
verbose(2, "Loaded %d from %s\n", pslHash->elCount, pslFile);

/* Read in accessions that we flipped for better splice sites. */
struct hash *flipHash = hashWordsInFile(flipFile, 0);

/* Open primary gene input and output. */
struct lineFile *lf = lineFileOpen(txBedFile, TRUE);
FILE *f = mustOpen(outFile, "w");

/* Main loop - process each gene */
char *row[12];
while (lineFileRow(lf, row))
    {
    struct bed *bed = bedLoad12(row);
    verbose(3, "Processing %s\n", bed->name);

    /* Initialize info to zero */
    struct txInfo info;
    ZeroVar(&info);

    /* Figure out name, sourceAcc, and isRefSeq from bed->name */
    info.name = bed->name;
    info.category = "n/a";
    if (isRfam(bed->name) || stringIn("tRNA", bed->name) != NULL)
	{
	info.sourceAcc = cloneString(bed->name);
	}
    else 
	{
	info.sourceAcc = txAccFromTempName(bed->name);
	}
    info.isRefSeq = startsWith("NM_", info.sourceAcc);

    if (startsWith("antibody.", info.sourceAcc) 
	|| startsWith("CCDS", info.sourceAcc) || isRfam(info.sourceAcc)
	|| stringIn("tRNA", info.sourceAcc) != NULL)
        {
	/* Fake up some things for antibody frag and CCDS that don't have alignments. */
	info.sourceSize = bedTotalBlockSize(bed);
	info.aliCoverage = 1.0;
	info.aliIdRatio = 1.0;
	info. genoMapCount = 1;
	}
    else
	{
	/* Loop through all psl's associated with our RNA.  Figure out
	 * our overlap with each, and pick best one. */
	struct hashEl *hel, *firstPslHel = hashLookup(pslHash, info.sourceAcc);
	if (firstPslHel == NULL)
	    errAbort("%s is not in %s", info.sourceAcc, pslFile);
	int mapCount = 0;
	struct psl *psl, *bestPsl = NULL;
	int coverage, bestCoverage = 0;
	boolean isFlipped = (hashLookup(flipHash, info.sourceAcc) != NULL);
	for (hel = firstPslHel; hel != NULL; hel = hashLookupNext(hel))
	    {
	    psl = hel->val;
	    mapCount += 1;
	    coverage = pslBedOverlap(psl, bed);
	    if (coverage > bestCoverage)
		{
		bestCoverage = coverage;
		bestPsl = psl;
		}
	    /* If we flipped it, try it on the opposite strand too. */
	    if (isFlipped)
		{
		psl->strand[0] = (psl->strand[0] == '+' ? '-' : '+');
		coverage = pslBedOverlap(psl, bed);
		if (coverage > bestCoverage)
		    {
		    bestCoverage = coverage;
		    bestPsl = psl;
		    }
		psl->strand[0] = (psl->strand[0] == '+' ? '-' : '+');
		}
	    }
	if (bestPsl == NULL)
	    errAbort("%s has no overlapping alignments with %s in %s", 
		    bed->name, info.sourceAcc, pslFile);

	/* Figure out and save alignment statistics. */
	int polyA = hashIntValDefault(sizePolyAHash, bed->name, 0);
	info.sourceSize = bestPsl->qSize - polyA;
	info.aliCoverage = (double)bestCoverage / info.sourceSize;
	info.aliIdRatio = (double)(bestPsl->match + bestPsl->repMatch)/
			    (bestPsl->match + bestPsl->misMatch + bestPsl->repMatch);
	info. genoMapCount = mapCount;
	}


    /* Get orf size and start/end complete from cdsEv. */
    if (bed->thickStart < bed->thickEnd)
	{
	cdsEv = hashFindVal(cdsEvHash, bed->name);
	if (cdsEv != NULL)
	    {
	    info.orfSize = cdsEv->end - cdsEv->start;
	    info.startComplete = cdsEv->startComplete;
	    info.endComplete = cdsEv->endComplete;
	    }
	}

    /* Get score from prediction. */
    predict = hashFindVal(predictHash, bed->name);
    if (predict != NULL)
        info.cdsScore = predict->score;

    /* Figure out nonsense-mediated-decay from bed itself. */
    info.nonsenseMediatedDecay = isNonsenseMediatedDecayTarget(bed);

    /* Figure out if retained intron from bed and alt-splice keeper hash */
    info.retainedIntron = hasRetainedIntron(bed, altSpliceHash);
    info.strangeSplice = countStrangeSplices(bed, altSpliceHash);
    info.atacIntrons = countAtacIntrons(bed, altSpliceHash);
    info.bleedIntoIntron = addIntronBleed(bed, altSpliceHash);

    /* Look up selenocysteine info. */
    info.selenocysteine = (hashLookup(selenocysteineHash, bed->name) != NULL);

    /* Loop through bed looking for small gaps indicative of frame shift/stop */
    int i, lastBlock = bed->blockCount-1;
    int exonCount = 1;
    for (i=0; i < lastBlock; ++i)
        {
	int gapStart = bed->chromStarts[i] + bed->blockSizes[i];
	int gapEnd = bed->chromStarts[i+1];
	int gapSize = gapEnd - gapStart;
	switch (gapSize)
	    {
	    case 1:
	    case 2:
	        info.genomicFrameShift = TRUE;
		break;
	    case 3:
	        info.genomicStop = TRUE;
		break;
	    default:
	        exonCount += 1;
		break;
	    }
	}
    info.exonCount = exonCount;

    /* Write info, free bed. */
    txInfoTabOut(&info, f);
    bedFree(&bed);
    }

/* Clean up and go home. */
carefulClose(&f);
}
Exemplo n.º 28
0
struct bed *pathToBed(struct path *path, struct splice *splice,
		      int source, int sink, boolean spoofEnds)
/* Construct a bed for the path. If spoofEnds is TRUE,
   ensure that there is at least a 1bp exon at splice
   sites. */
{
struct bed *bed = NULL;
int vertIx = 0;
int *verts = path->vertices;
int *vPos = splice->vPositions;
unsigned char *vTypes = splice->vTypes;
int i = 0;
struct dyString *buff = newDyString(256);

AllocVar(bed);
bed->chrom = cloneString(splice->tName);
bed->chromStart = BIGNUM;
bed->chromEnd = 0;
safef(bed->strand, sizeof(bed->strand), "%s", splice->strand);
bed->score = splice->type;
AllocArray(bed->chromStarts, path->vCount);
AllocArray(bed->blockSizes, path->vCount);

/* If necessary tack on a fake exon. */
if(spoofEnds && verts[vertIx] != source && verts[vertIx+1] <= splice->vCount &&
   pathEdgeType(vTypes, verts[vertIx], verts[vertIx+1]) != ggExon)
    {
    bed->blockSizes[bed->blockCount] = 1;
    bed->chromStarts[bed->blockCount] = vPos[verts[vertIx]] - 1;
    bed->chromStart = bed->thickStart = min(bed->chromStart, vPos[verts[vertIx]] - 1 );
    bed->chromEnd = bed->thickEnd = max(bed->chromEnd, vPos[verts[vertIx+1]]);
    bed->blockCount++;
    }

/* For each edge that is an exon count up the base pairs. */
for(vertIx = 0; vertIx < path->vCount - 1; vertIx++)
    {
    if(verts[vertIx] != source && verts[vertIx] <= splice->vCount)
	{
	/* If exon add up the base pairs. */
	if(pathEdgeType(vTypes, verts[vertIx], verts[vertIx+1]) == ggExon)
	    {
	    bed->blockSizes[bed->blockCount] = vPos[verts[vertIx+1]] - vPos[verts[vertIx]];
	    bed->chromStarts[bed->blockCount] = vPos[verts[vertIx]];
	    bed->chromStart = bed->thickStart = min(bed->chromStart, vPos[verts[vertIx]]);
	    bed->chromEnd = bed->thickEnd = max(bed->chromEnd, vPos[verts[vertIx+1]]);
	    bed->blockCount++;
	    }
	}
    }

/* if spoofing ends tack on a 1bp exon as necessary. */
vertIx = path->vCount - 2;
if(spoofEnds && verts[vertIx] != source && verts[vertIx+1] <= splice->vCount &&
   pathEdgeType(vTypes, verts[vertIx], verts[vertIx+1]) != ggExon)
    {
    bed->blockSizes[bed->blockCount] = 1;
    bed->chromStarts[bed->blockCount] = vPos[verts[vertIx+1]];
    bed->chromStart = bed->thickStart = min(bed->chromStart, vPos[verts[vertIx+1]]);
    bed->chromEnd = bed->thickEnd = max(bed->chromEnd, vPos[verts[vertIx+1]]+1);
    bed->blockCount++;
    }

/* Fix up the name and adjust the chromStarts. */
dyStringPrintf(buff, "%s.%d.", splice->name, slIxFromElement(splice->paths, path));
for(i = 0; i < path->vCount; i++)
    {
    if(path->vertices[i] != sink && path->vertices[i] <= splice->vCount)
	dyStringPrintf(buff, "%d,", path->vertices[i]);
    }
if(splice->type == alt5Prime || splice->type == alt3Prime ||
   splice->type == altRetInt || splice->type == altCassette)
    {
    int pathIx = slIxFromElement(splice->paths, path);
    if(pathIx == 0)
	dyStringPrintf(buff, "-Ex");
    else if(pathIx == 1)
	dyStringPrintf(buff, "-Inc");
    }
bed->name = cloneString(buff->string);
for(i = 0; i < bed->blockCount; i++)
    bed->chromStarts[i] -= bed->chromStart;

/* If we don't have any blocks, quit now. */
if(bed->blockCount == 0)
    bedFree(&bed);
dyStringFree(&buff);
return bed;
}
void txCdsPick(char *inBed, char *inTce, char *refToPepTab, char *outTce, char *outPick)
/* txCdsPick - Pick best CDS if any for transcript given evidence.. */
{
struct hash *pepToRefHash, *refToPepHash;
hashRefToPep(refToPepTab, &refToPepHash, &pepToRefHash);
struct hash *txCdsInfoHash = loadAndWeighTce(inTce, refToPepHash, pepToRefHash);
verbose(2, "Read info on %d transcripts from %s\n", 
	txCdsInfoHash->elCount, inTce);
struct lineFile *lf = lineFileOpen(inBed, TRUE);
FILE *fTce = mustOpen(outTce, "w");
FILE *fPick = mustOpen(outPick, "w");
char *row[12];
while (lineFileRow(lf, row))
    {
    struct bed *bed = bedLoad12(row);
    struct txCdsInfo *tx = hashFindVal(txCdsInfoHash, bed->name);
    struct cdsPick pick;
    ZeroVar(&pick);
    pick.name = bed->name;
    pick.refSeq = pick.refProt = pick.swissProt = pick.uniProt = pick.ccds = "";
    if (tx != NULL && tx->cdsList->score >= weightedThreshold)
        {
	struct cdsEvidence *cds, *bestCds = tx->cdsList;
	int bestSize = bestCds->end - bestCds->start;
	int minSize = bestSize*0.50;
	cdsEvidenceTabOut(bestCds, fTce);
	pick.start = bestCds->start;
	pick.end = bestCds->end;
	pick.source = bestCds->source;
	pick.score = bestCds->score;
	pick.startComplete = bestCds->startComplete;
	pick.endComplete = bestCds->endComplete;
	for (cds = tx->cdsList; cds != NULL; cds = cds->next)
	    {
	    char *source = cds->source;
	    if (rangeIntersection(bestCds->start, bestCds->end, cds->start, cds->end)
	    	>= minSize)
		{
		if (startsWith("RefPep", source))
		    {
		    if (pick.refProt[0] == 0)
			{
			pick.refProt = cds->accession;
			if (pick.refSeq[0] == 0)
			    pick.refSeq = hashMustFindVal(pepToRefHash, cds->accession);
			}
		    }
		else if (startsWith("RefSeq", source))
		    {
		    if (pick.refSeq[0] == 0)
		        pick.refSeq = cds->accession;
		    }
		else if (sameString("swissProt", source))
		    {
		    if (pick.swissProt[0] == 0)
			{
			pick.swissProt = cds->accession;
			if (pick.uniProt[0] == 0)
			    pick.uniProt = cds->accession;
			}
		    }
		else if (sameString("trembl", source))
		    {
		    if (pick.uniProt[0] == 0)
			pick.uniProt = cds->accession;
		    }
		else if (sameString("txCdsPredict", source))
		    {
		    }
		else if (sameString("genbankCds", source))
		    {
		    }
		else if (sameString("ccds", source))
		    {
		    if (pick.ccds[0] == 0)
		        pick.ccds = cds->accession;
		    }
		else
		    errAbort("Unknown source %s", source);
		}
	    }

	if (exceptionsOut)
	    transferExceptions(bestCds->accession, bestCds->source, pepToRefHash,
	    	bed->name, exceptionsOut);
	}
    else
        {
	pick.source = "noncoding";
	}
    cdsPickTabOut(&pick, fPick);
    bedFree(&bed);
    }
carefulClose(&fPick);
carefulClose(&fTce);
}
Exemplo n.º 30
0
/*	bedGraphLoadItems - an ordinary bed load, but we are interested
 *		in only the chrom, start, end, and the graphColumn
 */
static void bedGraphLoadItems(struct track *tg)
{
struct sqlConnection *conn;
struct sqlResult *sr = (struct sqlResult *) NULL;
char **row = (char **)NULL;
int rowOffset = 0;
struct bedGraphItem *bgList = NULL;
int itemsLoaded = 0;
int colCount = 0;
struct wigCartOptions *wigCart = (struct wigCartOptions *) tg->wigCartData;
int graphColumn = 5;
char *tableName;

if(sameString(tg->table, "affyTranscription"))
    wigCart->colorTrack = "affyTransfrags";
graphColumn = wigCart->graphColumn;


#ifndef GBROWSE
if (isCustomTrack(tg->table) && tg->customPt)
    {
    struct customTrack *ct = (struct customTrack *) tg->customPt;
    tableName = ct->dbTableName;
    conn = hAllocConn(CUSTOM_TRASH);
    }
else 
#endif /* GBROWSE */
    {
    tableName = tg->table;
    conn = hAllocConnTrack(database, tg->tdb);
    }

sr = hRangeQuery(conn, tableName, chromName, winStart, winEnd, NULL,
	&rowOffset);

colCount = sqlCountColumns(sr) - rowOffset;
/*	Must have at least four good columns	*/
if (colCount < 4)
    errAbort("bedGraphLoadItems: table %s only has %d data columns, must be at least 4", tableName, colCount);

if (colCount < graphColumn)
    errAbort("bedGraphLoadItems: table %s only has %d data columns, specified graph column %d does not exist",
	tableName, colCount, graphColumn);

/*	before loop, determine actual row[graphColumn] index */
graphColumn += (rowOffset - 1);

while ((row = sqlNextRow(sr)) != NULL)
    {
    struct bedGraphItem *bg;
    struct bed *bed;

    ++itemsLoaded;

    /*	load chrom, start, end	*/
    bed = bedLoadN(row+rowOffset, 3);

    AllocVar(bg);
    bg->start = bed->chromStart;
    bg->end = bed->chromEnd;
    if ((colCount > 4) && ((graphColumn + rowOffset) != 4))
	bg->name = cloneString(row[3+rowOffset]);
    else
	{
	char name[128];
	safef(name,ArraySize(name),"%s.%d", bed->chrom, itemsLoaded);
	bg->name = cloneString(name);
	}
    bg->dataValue = sqlFloat(row[graphColumn]);
    /* filled in by DrawItems	*/
    bg->graphUpperLimit = wigEncodeStartingUpperLimit;
    bg->graphLowerLimit = wigEncodeStartingLowerLimit;
    slAddHead(&bgList, bg);
    bedFree(&bed);
    }

sqlFreeResult(&sr);
hFreeConn(&conn);

slReverse(&bgList);
tg->items = bgList;
}	/*	bedGraphLoadItems()	*/