Esempio n. 1
0
static void chkGenePredRows(struct gbSelect* select,
                             struct sqlConnection* conn,
                             char* table, boolean isRefFlat, 
                             struct metaDataTbls* metaDataTbls,
                             unsigned typeFlags)
/* check rows of genePred or refFlat table */
{
unsigned iRow = 0;
char **row;
char *geneName = NULL;

int rowOff = (isRefFlat ? 1 : 0);  /* columns to skip to genePred */
if (sqlFieldIndex(conn, table, "bin") >= 0)
    rowOff++;

char query[512];
sqlSafef(query, sizeof(query), "SELECT * FROM %s", table);
struct sqlResult *sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred* gene = genePredLoad(row+rowOff);
    if (isRefFlat)
        geneName = row[0];
    chkGenePred(gene, geneName, iRow, select->release->genome->database, table,
                metaDataTbls, typeFlags);
    genePredFree(&gene);
    iRow++;
    }
sqlFreeResult(&sr);

}
void findGenePredOverlap(struct hash *chromHash, char **row, FILE *outFh)
/* find and output overlaps with a genePred object */
{
struct genePred *gene = genePredLoad(row);
struct binKeeper *chromBins = getChromBins(chromHash, gene->chrom,
                                           gene->strand);
struct geneLoc *geneLocList = NULL;
struct geneLoc *geneLoc;
int iExon;

/* get any with overlaping exons */
for (iExon = 0; iExon < gene->exonCount; iExon++)
    {
    int exonStart = gene->exonStarts[iExon];
    int exonEnd = gene->exonEnds[iExon];
    if (gCdsOnly)
        {
        exonStart = max(exonStart, gene->cdsStart);
        exonEnd = min(exonEnd, gene->cdsEnd);
        }
    if (exonStart < exonEnd)
        findOverlapingExons(&geneLocList, chromBins, exonStart, exonEnd);
    }
for (geneLoc = geneLocList; geneLoc != NULL; geneLoc = geneLoc->next)
    fprintf(outFh, "%s\t%s\t%s\t%d\t%d\t%s\t%d\t%d\t%d\n",
            geneLoc->chrom, geneLoc->strand,
            gene->name, gene->txStart, gene->txEnd,
            geneLoc->name, geneLoc->start, geneLoc->end, geneLoc->numOverlap);
geneLocUnlink(&geneLocList);
genePredFree(&gene);
}
void liftGenePredExt(char *destFile, struct hash *liftHash, int sourceCount, char *sources[])
/* Lift a genePred files. */
{
char *row[GENEPREDX_NUM_COLS];
struct lineFile* lf;
FILE* dest = mustOpen(destFile, "w");
int iSrc;
int colCount;

for (iSrc = 0; iSrc < sourceCount; iSrc++)
    {
    verbose(1, "Lifting %s\n", sources[iSrc]);
    lf = lineFileOpen(sources[iSrc], TRUE);
    while ((colCount = lineFileChopNextTab(lf, row, ArraySize(row))))
        {
        struct genePred* gp = genePredExtLoad(row, colCount);
        if (liftGenePredObj(liftHash, gp, lf))
            genePredTabOut(gp, dest);
        genePredFree(&gp);
        }
    lineFileClose(&lf);
    if (dots)
        verbose(1, "\n");
    }

carefulClose(&dest);
}
static void checkGenePred(char *fileTbl)
/* check a genePred file or table */
{
struct sqlConnection *conn = NULL;
struct genePredReader *gpr;
struct genePred *gp;
int iRec = 0;


if (fileExists(fileTbl))
    {
    gpr = genePredReaderFile(fileTbl, NULL);
    }
else if (gDb != NULL)
    {
    conn = hAllocConn(gDb);
    gpr = genePredReaderQuery(conn, fileTbl, NULL);
    }
else
    {
    errAbort("file %s doesn't exist, must specify -db=db if this is a table", fileTbl);
    }

while ((gp = genePredReaderNext(gpr)) != NULL)
    {
    checkAGenePred(fileTbl, ++iRec, gp);
    genePredFree(&gp);
    }
genePredReaderFree(&gpr);
hFreeConn(&conn);
}
static void showMrnaFromGenePred(struct sqlConnection *conn, 
	char *geneId, char *geneName)
/* Get mRNA sequence for gene from gene prediction. */
{
char *table = genomeSetting("knownGene");
struct sqlResult *sr;
char **row;
char query[256];
boolean hasBin = hIsBinned(sqlGetDatabase(conn), table);

hPrintf("<TT><PRE>");
safef(query, sizeof(query), 
    "select * from %s where name='%s'"
    " and chrom='%s' and txStart=%d and txEnd=%d", 
    table, geneId, curGeneChrom, curGeneStart, curGeneEnd);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gene = genePredLoad(row+hasBin);
    struct bed *bed = bedFromGenePred(gene);
    struct dnaSeq *seq = hSeqForBed(sqlGetDatabase(conn), bed);
    hPrintf(">%s (%s predicted mRNA)\n", geneId, geneName);
    writeSeqWithBreaks(stdout, seq->dna, seq->size, 50);
    dnaSeqFree(&seq);
    bedFree(&bed);
    genePredFree(&gene);
    }
else
    errAbort("Couldn't find %s at %s:%d-%d", geneId, 
    	curGeneChrom, curGeneStart, curGeneEnd);
sqlFreeResult(&sr);
hPrintf("</TT></PRE>");
}
static struct chromAnn* chromAnnGenePredReaderRead(struct chromAnnReader *car)
/* Read the next genePred row and create a chromAnn object row read from a
 * GenePred file or table.  If there is no CDS, and chromAnnCds is specified,
 * it will return a record with zero-length range.*/
{
struct rowReader *rr = car->data;
if (!rowReaderNext(rr))
    return NULL;
rowReaderExpectAtLeast(rr, GENEPRED_NUM_COLS);

char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL;
struct genePred *gp = genePredLoad(rr->row);
struct chromAnn* ca = chromAnnNew(gp->chrom, gp->strand[0], gp->name, rawCols,
                                  strVectorWrite, strVectorFree);

if (car->opts & chromAnnRange)
    {
    if (car->opts & chromAnnCds)
        {
        if (gp->cdsStart < gp->cdsEnd)
            chromAnnBlkNew(ca, gp->cdsStart, gp->cdsEnd);
        }
    else
        chromAnnBlkNew(ca, gp->txStart, gp->txEnd);
    }
else
    addGenePredBlocks(ca, car->opts, gp);

chromAnnFinish(ca);
genePredFree(&gp);
return ca;
}
Esempio n. 7
0
void convertPsl(struct psl *psl, struct genbankCds *cds, FILE *genePredFh)
/* convert a cds and psl and output */
{
    struct genePred *genePred = pslToGenePred(psl, cds);
    if (genePred != NULL)
    {
        genePredTabOut(genePred, genePredFh);
        genePredFree(&genePred);
    }
}
Esempio n. 8
0
void geneFreeList(struct genePred **gList)
/* Free a list of dynamically allocated genePred's */
{
struct genePred *el, *next;

for (el = *gList; el != NULL; el = next)
    {
    next = el->next;
    genePredFree(&el);
    }
*gList = NULL;
}
Esempio n. 9
0
static void gbGeneTblWriteGeneFlat(struct gbGeneTbl *ggt, struct gbStatus* status,
                                   struct psl* psl, struct sqlConnection *conn)
/* write genePred flat row */
{
struct genePred* gp
    = genePredFromPsl3(psl, &status->cds, 0, genePredPslCdsMod3,
                       genePredStdInsertMergeSize, genePredStdInsertMergeSize);
FILE *fh = gbGeneTblGetFlatTabFh(ggt, conn);
fprintf(fh, "%s\t", ((status->geneName == NULL) ? "" : status->geneName));
genePredTabOut(gp, fh);
genePredFree(&gp);
}
Esempio n. 10
0
void fillInGene(struct chain *chain, struct genePred *gene, struct genePred **pGene)
/** Fill in syntenic gene structure with initial information for gene. */
{
FILE *cdsErrorFp;
struct genePred *synGene = NULL;
int qs, qe;
struct chain *subChain=NULL, *toFree=NULL;
AllocVar(synGene);
chainSubSetForRegion(chain, gene->txStart, gene->txEnd , &subChain, &toFree);    
if(subChain == NULL)
    {
    *pGene= NULL;
    return;
    }
qChainRangePlusStrand(subChain, &qs, &qe);
synGene->chrom = cloneString(subChain->qName);
synGene->name = cloneString(gene->name);
synGene->txStart = qs;
synGene->txEnd = qe;
AllocArray(synGene->exonStarts, gene->exonCount);
AllocArray(synGene->exonEnds, gene->exonCount);
if(chain->qStrand == '+')
    strncpy(synGene->strand,  gene->strand, sizeof(synGene->strand));
else
    {
    if(gene->strand[0] == '+')
	strncpy(synGene->strand,  "-", sizeof(synGene->strand));
    else if(gene->strand[0] == '-')
	strncpy(synGene->strand,  "+", sizeof(synGene->strand));
    else
	errAbort("Don't recognize strand %s from gene %s", gene->strand, gene->name);
    }
chainFree(&toFree);
chainSubSetForRegion(chain, gene->cdsStart, gene->cdsEnd , &subChain, &toFree);    
if(subChain == NULL )
    {
    if(optionExists("cdsErrorFile"))
        {
        cdsErrorFp = fopen( optionVal("cdsErrorFile",NULL), "a" );
        fprintf( cdsErrorFp, "%s\t%s\t%u\t%u\t%u\t%u\t%s\t%d\n", gene->name, gene->chrom, gene->txStart, 
		 gene->txEnd, gene->cdsStart, gene->cdsEnd, gene->strand, gene->exonCount );
        fclose(cdsErrorFp);
        }
    *pGene = NULL;
    genePredFree(&synGene); 
    return;
    }
qChainRangePlusStrand(subChain, &qs, &qe);
synGene->cdsStart = qs;
synGene->cdsEnd = qe;
chainFree(&toFree);
*pGene = synGene;
}
Esempio n. 11
0
/* convert one line read from a bed file to a genePred */
void cnvBedRec(char *line, FILE *gpFh)
{
char *row[12];
int numCols = chopByWhite(line, row, ArraySize(row));
if (numCols < 4)
    errAbort("bed must have at least 4 columns");
struct bed *bed = bedLoadN(row, numCols);
struct genePred* gp = bedToGenePred(bed);
genePredTabOut(gp, gpFh);
genePredFree(&gp);
bedFree(&bed);
}
Esempio n. 12
0
static void gtfGroupToGenePred(struct gffFile *gtf, struct gffGroup *group, FILE *gpFh,
                               FILE *infoFh)
/* convert one gtf group to a genePred */
{
unsigned optFields = (clGenePredExt ? genePredAllFlds : 0);
struct errCatch *errCatch = errCatchNew();

if (errCatchStart(errCatch))
    {
    struct genePred *gp = genePredFromGroupedGtf(gtf, group, group->name, optFields, clGxfOptions);
    if (gp == NULL)
        {
        if (!clIgnoreGroupsWithoutExons)
            {
            char *msg = "no exons defined for group %s, feature %s (perhaps try -ignoreGroupsWithoutExons)";
            if (clAllErrors)
                {
                fprintf(stderr, msg, group->name, group->lineList->feature);
                fputc('\n', stderr);
                badGroupCount++;
                }
            else
                errAbort(msg, group->name, group->lineList->feature);
            }
        }
    else
        {
        genePredTabOut(gp, gpFh);
        genePredFree(&gp);
        }
    }
errCatchEnd(errCatch);
if (errCatch->gotError)
    {
    // drop trailing newline in caught message
    if (endsWith(errCatch->message->string, "\n"))
        dyStringResize(errCatch->message, dyStringLen(errCatch->message)-1);
    if (clAllErrors)
        {
        fprintf(stderr, "%s\n", errCatch->message->string);
        badGroupCount++;
        }
    else
        errAbort("%s", errCatch->message->string);
    }
else
    {
    if (infoFh != NULL)
        writeInfo(infoFh, group);
    }
errCatchFree(&errCatch); 
}
Esempio n. 13
0
static void printCcdsHgGeneUrl(struct sqlConnection *conn, char *ccdsId, char* kgId)
/* output a URL to hgGene for a ccds */
{
char where[128];
struct genePredReader *gpr;
struct genePred *ccdsGene = NULL, *kgGene = NULL;

/* get ccds genePred to get location */
sqlSafefFrag(where, sizeof(where), "chrom = '%s' and name = '%s'", seqName, ccdsId);
gpr = genePredReaderQuery(conn, "ccdsGene", where);
ccdsGene = genePredReaderAll(gpr);
genePredReaderFree(&gpr);
if (ccdsGene == NULL)
    errAbort("%s not found in ccdsGene table for chrom %s", ccdsId, seqName);
else if (ccdsGene->next != NULL)
    errAbort("multiple %s rows found in ccdsGene table for chrom %s", ccdsId, seqName);

/* get KG genePred, as need exact location for link */
sqlSafefFrag(where, sizeof(where), "name = '%s' and strand = '%s'", kgId,
      ccdsGene->strand);
gpr = genePredReaderRangeQuery(conn, "knownGene", seqName,
                               ccdsGene->txStart, ccdsGene->txEnd, where);
kgGene = genePredReaderAll(gpr);
genePredReaderFree(&gpr);
if (kgGene == NULL)
    errAbort("%s not found in knownGene table for chrom %s", kgId, seqName);
else if (kgGene->next != NULL)
    errAbort("multiple %s rows found in knownGene table for chrom %s", kgId, seqName);

printf("../cgi-bin/hgGene?%s&%s=%s&%s=%s&%s=%s&%s=%d&%s=%d",
       cartSidUrlString(cart),
       "db", database,
       "hgg_gene", kgId,
       "hgg_chrom", seqName,
       "hgg_start", kgGene->txStart,
       "hgg_end", kgGene->txEnd);
genePredFree(&ccdsGene);
genePredFree(&kgGene);
}
Esempio n. 14
0
void doGenePreds(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, 
	    char *netTable, char *geneFileName, char *geneTableName,
	    char *outBedName, char *selectedFileName, int *foundCount, int *notFoundCount)	
/* Map over genePreds. */
{
FILE *bedOut = NULL;
FILE *selectedOut = NULL;
FILE *cdsErrorFp = NULL;
struct genePred *gene = NULL, *geneList = NULL;
struct bed *bed = NULL;

//init output files
if(optionExists("cdsErrorFile"))
{
    cdsErrorFp = fopen( optionVal("cdsErrorFile", NULL), "w" );
    fprintf( cdsErrorFp, "#name\tchrom\ttxStart\ttxEnd\tcdsStart\tcdsEnd\tstrand\texonCount\n" );
    fclose(cdsErrorFp);
}

warn("Loading Gene Predictions.");
assert(outBedName);
if(geneFileName)
    geneList=genePredLoadAll(geneFileName);
else
    geneList=loadGeneFromTable(conn, geneTableName, chrom, 0, BIGNUM);
/* Convert genePreds. */
warn("Converting genes.");
bedOut = mustOpen(outBedName, "w");
if (selectedFileName != NULL)
    selectedOut = mustOpen(selectedFileName, "w");
for(gene = geneList; gene != NULL; gene = gene->next)
    {
    struct genePred *synGene = NULL;
    if(differentString(gene->chrom, chrom))
	continue;
    synGene = orthoBedFromGene(conn, db, orthoDb, netTable, gene);
    occassionalDot();
    if(synGene != NULL && synGene->exonCount > 0)
	{
	(*foundCount)++;
	genePredTabOut(synGene, bedOut);
        if (selectedOut != NULL)
            genePredTabOut(gene, selectedOut);
	}
    else
	(*notFoundCount)++;
    genePredFree(&synGene);
    }
carefulClose(&selectedOut);
 carefulClose(&bedOut);
}
static void getGeneAnns(struct sqlConnection *conn, struct hash *refSeqVerInfoTbl, char *outFile)
/* get request genePred annotations from database */
{
struct genePredReader *gpr = genePredReaderQuery(conn, "refGene", NULL);
FILE *fh = mustOpen(outFile, "w");
struct genePred *gp;
while ((gp = genePredReaderNext(gpr)) != NULL)
    {
    processGenePred(fh, refSeqVerInfoTbl, gp);
    genePredFree(&gp);
    }
carefulClose(&fh);
genePredReaderFree(&gpr);
}
static void genePredHisto(char *what, char *gpFile, char *outFile)
/* get data for generating histograms from a genePred file. */
{
struct genePredReader *gpr = genePredReaderFile(gpFile, NULL);
histoFuncType histoFunc = getHistoFunc(what);
struct genePred *gp;
FILE *outFh = mustOpen(outFile, "w");

while ((gp = genePredReaderNext(gpr)) != NULL)
    {
    histoFunc(gp, outFh);
    genePredFree(&gp);
    }
carefulClose(&outFh);
genePredReaderFree(&gpr);
}
Esempio n. 17
0
static void gbGeneTblWriteGene(struct gbGeneTbl *ggt, struct gbStatus* status,
                               struct psl* psl, struct sqlConnection *conn)
/* write genePred row */
{
struct genePred* gp
    = genePredFromPsl3(psl, &status->cds, 
                       (ggt->hasExtCols ? genePredAllFlds : 0), genePredPslCdsMod3,
                       genePredStdInsertMergeSize, genePredStdInsertMergeSize);
FILE *fh = gbGeneTblGetTabFh(ggt, conn);
if (ggt->hasExtCols)
    {
    /* add gene name */
    freeMem(gp->name2);
    gp->name2 = cloneString(status->geneName);
    }
if (ggt->hasBin)
    fprintf(fh, "%u\t", hFindBin(gp->txStart, gp->txEnd));
genePredTabOut(gp, fh);
genePredFree(&gp);
}
Esempio n. 18
0
static void capAliTextOnTrack(struct mafAli *maf,
                              char *db, char *chrom,
                              char *track, boolean onlyCds)
/* Capitalize exons in alignment. */
{
int rowOffset;
struct sqlConnection *conn = sqlConnect(db);
struct mafComp *selfMc = maf->components, *mc;
int start = selfMc->start;
int end = start + selfMc->size;
struct sqlResult *sr = hRangeQuery(conn, track, chrom, start, end,
		NULL, &rowOffset);
char **row;

while ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gp = genePredLoad(row+rowOffset);
    int i;
    for (i=0; i<gp->exonCount; ++i)
        {
	int s = gp->exonStarts[i];
	int e = gp->exonEnds[i];
	if (onlyCds)
	    {
	    if (s < gp->cdsStart) s = gp->cdsStart;
	    if (e > gp->cdsEnd) e = gp->cdsEnd;
	    }
	if (s < start) s = start;
	if (e > end) e = end;
	if (findAliRange(selfMc->text, maf->textSize, s-start, e-start, &s, &e))
	    {
	    for (mc = maf->components; mc != NULL; mc = mc->next)
		if (mc->text)
		    toUpperN(mc->text + s, e-s);
	    }
	}
    genePredFree(&gp);
    }
sqlFreeResult(&sr);
sqlDisconnect(&conn);
}
Esempio n. 19
0
void borfMatcher(char *bedIn, char *borfIn, char *bedOutFile, char *genePredOutFile)
/* Top level function to open files and call other functions. */
{
struct borf *borf = NULL, *borfList = NULL;
struct bed *bed = NULL, *bedList = NULL;
struct genePred *gp = NULL;
float threshold = optionFloat("minScore", 50);
FILE *bedOut = mustOpen(bedOutFile, "w");
FILE *genePredOut = mustOpen(genePredOutFile, "w");
boolean keepSmall = optionExists("keepSmall");
boolean keepNmd = optionExists("keepNmd");

borfList = borfLoadAll(borfIn);
bedList = bedLoadAll(bedIn);
dotForUserInit(slCount(bedList)/10);
for(bed = bedList, borf = borfList; bed != NULL && borf != NULL; bed = bed->next, borf = borf->next)
    {
    dotForUser();
    if(!stringIn(bed->name, borf->name))
	errAbort("Trying to match up %s bed with %s borf - bad idea!", bed->name, borf->name);
    /* Have to adjust cds end. Borf puts stop codon outside of cds, 
       we put it inside. */
    borf->cdsEnd = min(borf->cdsEnd+3, borf->size);
    if((borf->score > threshold || (keepSmall && borf->cdsSize > 0)) && sameString(borf->strand, "+"))
	{
	setThickStartStop(bed, borf);
	if(keepNmd || !nmdTarget(bed))
	    {
	    gp = bedToGenePred(bed);
	    bedTabOutN(bed, 12, bedOut);
	    genePredTabOut(gp, genePredOut);
	    genePredFree(&gp);
	    }
	}
    }
warn("Done.");
carefulClose(&bedOut);
carefulClose(&genePredOut);
}
void addGenePred(struct hash *chromHash, char **row)
/* add a genePred's exons to the approriate binkeeper object in hash */
{
struct genePred *gene = genePredLoad(row);
int iExon;
struct binKeeper *chromBins = getChromBins(chromHash, gene->chrom,
                                           gene->strand);
struct geneLoc *geneLoc = geneLocNew(chromHash->lm, gene->name, gene->chrom,
                                     gene->strand, gene->txStart, gene->txEnd);
for (iExon = 0; iExon < gene->exonCount; iExon++)
    {
    int exonStart = gene->exonStarts[iExon];
    int exonEnd = gene->exonEnds[iExon];
    if (gCdsOnly)
        {
        exonStart = max(exonStart, gene->cdsStart);
        exonEnd = min(exonEnd, gene->cdsEnd);
        }
    if (exonStart < exonEnd)
        binKeeperAdd(chromBins, exonStart, exonEnd, geneLoc);
    }
genePredFree(&gene);
}
Esempio n. 21
0
void intronSizes(char *database, char *table)
/* intronSizes - Output list of intron sizes.. */
{
    struct dyString *query = newDyString(1024);
    struct sqlConnection *conn;
    struct sqlResult *sr;
    char **row;
    struct genePred *gp;
    int rowOffset;
    struct bed *bedList = NULL, *bed = NULL;

    hSetDb(database);
    rowOffset = hOffsetPastBin(NULL, table);
    conn = hAllocConn(database);
    sqlDyStringPrintf(query, "select * from %s", table);
    if (chromName != NULL)
        dyStringPrintf(query, " where chrom = '%s'", chromName);
    if (cgiBoolean("withUtr"))
    {
        dyStringPrintf(query, " %s txStart != cdsStart",
                       (chromName == NULL ? "where" : "and"));
    }
    sr = sqlGetResult(conn, query->string);
    while ((row = sqlNextRow(sr)) != NULL)
    {
        gp = genePredLoad(row+rowOffset);
        genePredIntrons(gp, &bedList);
        slReverse(&bedList);
        for (bed = bedList ; bed != NULL ; bed=bed->next)
            bedTabOutN(bed,6, stdout);
        bedFreeList(&bedList);
        genePredFree(&gp);
    }
    sqlFreeResult(&sr);
    hFreeConn(&conn);
}
Esempio n. 22
0
void intronEnds(char *database, char *table)
/* intronEnds - Gather stats on intron ends.. */
{
struct dyString *query = newDyString(1024);
struct sqlConnection *conn;
struct sqlResult *sr;
char **row;
struct genePred *gp;
int total = 0;
int gtag = 0;
int gcag = 0;
int atac = 0;
int ctac = 0;
DNA ends[4];
int exonIx, txStart;
struct dnaSeq *seq;
int rowOffset;
char strand;

rowOffset = hOffsetPastBin(database, NULL, table);
conn = hAllocConn(database);
sqlDyStringPrintf(query, "select * from %s", table);
if (chromName != NULL)
    dyStringPrintf(query, " where chrom = '%s'", chromName);
if (cgiBoolean("withUtr"))
    {
    dyStringPrintf(query, " %s txStart != cdsStart", 
        (chromName == NULL ? "where" : "and"));
    }
sr = sqlGetResult(conn, query->string);
while ((row = sqlNextRow(sr)) != NULL)
    {
    gp = genePredLoad(row+rowOffset);
    strand = gp->strand[0];
    txStart = gp->txStart;
    seq = hDnaFromSeq(database, gp->chrom, txStart, gp->txEnd, dnaLower);
    for (exonIx=1; exonIx < gp->exonCount; ++exonIx)
        {
	++total;
	memcpy(ends, seq->dna + gp->exonEnds[exonIx-1] - txStart, 2);
	memcpy(ends+2, seq->dna + gp->exonStarts[exonIx] - txStart - 2, 2);
	if (strand == '-')
	    reverseComplement(ends, 4);
	if (ends[0] == 'g' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'g')
	   ++gtag;
	if (ends[0] == 'g' && ends[1] == 'c' && ends[2] == 'a' && ends[3] == 'g')
	   ++gcag;
	if (ends[0] == 'a' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'c')
	   ++atac;
	if (ends[0] == 'c' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'c')
	   ++ctac;
	}
    freeDnaSeq(&seq);
    genePredFree(&gp);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
printf("gt/ag %d (%4.2f)\n", gtag, 100.0*gtag/total);
printf("gc/ag %d (%4.2f)\n", gcag, 100.0*gcag/total);
printf("at/ac %d (%4.2f)\n", atac, 100.0*atac/total);
printf("ct/ac %d (%4.2f)\n", ctac, 100.0*ctac/total);
printf("Total %d\n", total);
}
Esempio n. 23
0
struct genePred *orthoBedFromGene(struct sqlConnection *conn, char *db, char *orthoDb,
			    char *netTable, struct genePred *gene)
/** Produce a genePred on the orthologous genome from the original gene. */
{
struct genePred *synGene= NULL;
int i;
int *blockSizes;
struct chain *chain = NULL;
int diff = 0;
AllocArray(blockSizes, gene->exonCount);
for(i=0; i<gene->exonCount; i++)
    blockSizes[i] = gene->exonEnds[i] - gene->exonStarts[i];

chain = chainForBlocks(conn, db, netTable, 
		       gene->chrom, gene->txStart, gene->txEnd,
		       (int *)gene->exonStarts, blockSizes, gene->exonCount);
if(chain == NULL)
    return NULL;
fillInGene(chain, gene, &synGene);
if(synGene == NULL)
    return NULL;
if(chain->qStrand == '+')
    {
    for(i=0; i<gene->exonCount; i++)
	{
	addExonToGene(chain, gene, synGene, i);
	}
    }
else
    {
    for(i=gene->exonCount-1; i>=0; i--)
	{
	addExonToGene(chain, gene, synGene, i);
	}
    }

if(synGene->exonCount > 0 && synGene->exonStarts[0] != 0)
    diff = synGene->exonStarts[0];

/* Make sure the txStart/End and cdsStart/End are at reasonable places. */
if(synGene->exonCount > 0) 
    {
    synGene->txStart = synGene->exonStarts[0];
    synGene->txEnd = synGene->exonEnds[synGene->exonCount - 1];
    }

/* Adjust cdsStart to be in an exon */
for(i = 0; i < synGene->exonCount; i++) 
    {
    if(synGene->cdsStart >= synGene->exonStarts[i] &&
       synGene->cdsStart < synGene->exonEnds[i])
        break;  /* found in exon */
    if(synGene->cdsStart < synGene->exonStarts[i])
        {
        /* move to next exon */
        synGene->cdsStart = synGene->exonStarts[i];
        break;
        }
    }
if(i == synGene->exonCount)
    synGene->cdsStart = synGene->txEnd;  /* didn't find start */

/* Adjust cdsEnd to be in an exon */
for(i = synGene->exonCount-1; i >= 0; i--) 
    {
    if(synGene->cdsEnd > synGene->exonStarts[i] &&
       synGene->cdsEnd <= synGene->exonEnds[i])
        break;  /* found in exon */
    if(synGene->cdsEnd >= synGene->exonEnds[i])
        {
        /* move to previous exon */
        synGene->cdsEnd = synGene->exonEnds[i];
        break;
        }
    }
if(i == -1)
    synGene->cdsEnd = synGene->txStart;  /* didn't find start */

if (synGene->cdsStart >= synGene->cdsEnd)
    synGene->cdsStart = synGene->cdsEnd = synGene->txEnd; /* no CDS left */

if(synGene->exonCount == 0)
    genePredFree(&synGene);

return synGene;
}
Esempio n. 24
0
struct annoRow *annoGratorGpVarIntegrate(struct annoGrator *gSelf,
					 struct annoStreamRows *primaryData,
					 boolean *retRJFilterFailed, struct lm *callerLm)
// integrate a variant and a genePred, generate as many rows as
// needed to capture all the changes
{
struct annoGratorGpVar *self = (struct annoGratorGpVar *)gSelf;
lmCleanup(&(self->lm));
self->lm = lmInit(0);
// Temporarily tweak primaryRow's start and end to find upstream/downstream overlap:
struct annoRow *primaryRow = primaryData->rowList;
int pStart = primaryRow->start, pEnd = primaryRow->end;
if (primaryRow->start <= GPRANGE)
    primaryRow->start = 0;
else
    primaryRow->start -= GPRANGE;
primaryRow->end += GPRANGE;
struct annoRow *rows = annoGratorIntegrate(gSelf, primaryData, retRJFilterFailed, self->lm);
primaryRow->start = pStart;
primaryRow->end = pEnd;

if (self->variantFromRow == NULL)
    setVariantFromRow(self, primaryData);
if (self->curChromSeq == NULL || differentString(self->curChromSeq->name, primaryRow->chrom))
    {
    dnaSeqFree(&self->curChromSeq);
    struct twoBitFile *tbf = self->grator.streamer.assembly->tbf;
    self->curChromSeq = twoBitReadSeqFragLower(tbf, primaryRow->chrom, 0, 0);
    }
// TODO Performance improvement: instead of creating the transcript sequence for each
// variant that intersects the transcript, cache transcript sequence; possibly
// an slPair with a concatenation of {chrom, txStart, txEnd, cdsStart, cdsEnd,
// exonStarts, exonEnds} as the name, and sequence as the val.  When something in
// the list is no longer in the list of rows from the internal annoGratorIntegrate call,
// drop it.
// BETTER YET: make a callback for gpFx to get CDS sequence only when it needs it.
char *refAllele = getGenomicSequence(self->curChromSeq->dna, primaryRow->start, primaryRow->end,
				     self->lm);
struct variant *variant = self->variantFromRow(self, primaryRow, refAllele);

if (rows == NULL)
    {
    // No genePreds means that the primary variant is intergenic.
    if (self->funcFilter != NULL && self->funcFilter->intergenic)
	return aggvIntergenicRow(self, variant, retRJFilterFailed, callerLm);
    else if (retRJFilterFailed && self->gpVarOverlapRule == agoMustOverlap)
	*retRJFilterFailed = TRUE;
    return NULL;
    }
if (retRJFilterFailed && *retRJFilterFailed)
    return NULL;

struct annoRow *outRows = NULL;

int hasFrames = (asColumnFindIx(gSelf->mySource->asObj->columnList, "exonFrames") >= 0);

for(; rows; rows = rows->next)
    {
    char **inWords = rows->data;

    // work around genePredLoad's trashing its input
    char *saveExonStarts = lmCloneString(self->lm, inWords[8]);
    char *saveExonEnds = lmCloneString(self->lm, inWords[9]);
    struct genePred *gp = hasFrames ? genePredExtLoad(inWords, GENEPREDX_NUM_COLS) :
				      genePredLoad(inWords);
    inWords[8] = saveExonStarts;
    inWords[9] = saveExonEnds;

    struct annoRow *outRow = aggvGenRows(self, variant, gp, rows, callerLm);
    if (outRow != NULL)
	{
	slReverse(&outRow);
	outRows = slCat(outRow, outRows);
	}
    genePredFree(&gp);
    }
slReverse(&outRows);
// If all rows failed the filter, and we must overlap, set *retRJFilterFailed.
if (outRows == NULL && retRJFilterFailed && self->gpVarOverlapRule == agoMustOverlap)
    *retRJFilterFailed = TRUE;
return outRows;
}