void findGenePredOverlap(struct hash *chromHash, char **row, FILE *outFh)
/* find and output overlaps with a genePred object */
{
struct genePred *gene = genePredLoad(row);
struct binKeeper *chromBins = getChromBins(chromHash, gene->chrom,
                                           gene->strand);
struct geneLoc *geneLocList = NULL;
struct geneLoc *geneLoc;
int iExon;

/* get any with overlaping exons */
for (iExon = 0; iExon < gene->exonCount; iExon++)
    {
    int exonStart = gene->exonStarts[iExon];
    int exonEnd = gene->exonEnds[iExon];
    if (gCdsOnly)
        {
        exonStart = max(exonStart, gene->cdsStart);
        exonEnd = min(exonEnd, gene->cdsEnd);
        }
    if (exonStart < exonEnd)
        findOverlapingExons(&geneLocList, chromBins, exonStart, exonEnd);
    }
for (geneLoc = geneLocList; geneLoc != NULL; geneLoc = geneLoc->next)
    fprintf(outFh, "%s\t%s\t%s\t%d\t%d\t%s\t%d\t%d\t%d\n",
            geneLoc->chrom, geneLoc->strand,
            gene->name, gene->txStart, gene->txEnd,
            geneLoc->name, geneLoc->start, geneLoc->end, geneLoc->numOverlap);
geneLocUnlink(&geneLocList);
genePredFree(&gene);
}
Ejemplo n.º 2
0
static void wrapHgGeneLink(struct sqlConnection *conn, char *name,
	char *label, char *geneTable)
/* Wrap label with link to hgGene if possible. */
{
char query[256];
struct sqlResult *sr;
char **row;
int rowOffset = hOffsetPastBin(database, seqName, "sgdGene");
sqlSafef(query, sizeof(query),
    "select * from %s where name = '%s'", geneTable, name);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gp = genePredLoad(row+rowOffset);
    printf("<A HREF=\"../cgi-bin/hgGene?db=%s", database);
    printf("&hgg_gene=%s", gp->name);
    printf("&hgg_chrom=%s", gp->chrom);
    printf("&hgg_start=%d", gp->txStart);
    printf("&hgg_end=%d", gp->txEnd);
    printf("\">");
    printf("%s", label);
    printf("</A>");
    }
else
    printf("%s", label);
sqlFreeResult(&sr);
}
Ejemplo n.º 3
0
static void chkGenePredRows(struct gbSelect* select,
                             struct sqlConnection* conn,
                             char* table, boolean isRefFlat, 
                             struct metaDataTbls* metaDataTbls,
                             unsigned typeFlags)
/* check rows of genePred or refFlat table */
{
unsigned iRow = 0;
char **row;
char *geneName = NULL;

int rowOff = (isRefFlat ? 1 : 0);  /* columns to skip to genePred */
if (sqlFieldIndex(conn, table, "bin") >= 0)
    rowOff++;

char query[512];
sqlSafef(query, sizeof(query), "SELECT * FROM %s", table);
struct sqlResult *sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred* gene = genePredLoad(row+rowOff);
    if (isRefFlat)
        geneName = row[0];
    chkGenePred(gene, geneName, iRow, select->release->genome->database, table,
                metaDataTbls, typeFlags);
    genePredFree(&gene);
    iRow++;
    }
sqlFreeResult(&sr);

}
void liftGenePred(char *destFile, struct hash *liftHash, int sourceCount, char *sources[])
/* Lift a genePred files. */
{
char *row[GENEPRED_NUM_COLS];
struct lineFile* lf;
FILE* dest = mustOpen(destFile, "w");
int iSrc;

for (iSrc = 0; iSrc < sourceCount; iSrc++)
    {
    verbose(1, "Lifting %s\n", sources[iSrc]);
    lf = lineFileOpen(sources[iSrc], TRUE);
    while (lineFileChopNextTab(lf, row, ArraySize(row)))
        {
        struct genePred* gp = genePredLoad(row);
        if (liftGenePredObj(liftHash, gp, lf))
            genePredTabOut(gp, dest);
        genePredFree(&gp);
        }
    lineFileClose(&lf);
    if (dots)
        verbose(1, "\n");
    }

carefulClose(&dest);
}
Ejemplo n.º 5
0
void freen(char *chrom)
/* Test something */
{
uglyTime(NULL);
struct sqlConnection *conn = sqlConnect("hg19");
uglyTime("connect");
char query[512];
sqlSafef(query, sizeof(query), "select * from knownGene where chrom='%s'", chrom);
struct sqlResult *sr = sqlGetResult(conn, query);
uglyTime("get result");
char **row;
struct rbTree *rt = rangeTreeNew();
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gp = genePredLoad(row);
    int i;
    int exonCount = gp->exonCount;
    for (i=0; i<exonCount; ++i)
        rangeTreeAdd(rt, gp->exonStarts[i], gp->exonEnds[i]);
    }
uglyTime("Add rows");
struct range *list = rangeTreeList(rt);
uglyTime("Did list");
uglyf("%d items in chrom %s\n", slCount(list), chrom);
}
static void showMrnaFromGenePred(struct sqlConnection *conn, 
	char *geneId, char *geneName)
/* Get mRNA sequence for gene from gene prediction. */
{
char *table = genomeSetting("knownGene");
struct sqlResult *sr;
char **row;
char query[256];
boolean hasBin = hIsBinned(sqlGetDatabase(conn), table);

hPrintf("<TT><PRE>");
safef(query, sizeof(query), 
    "select * from %s where name='%s'"
    " and chrom='%s' and txStart=%d and txEnd=%d", 
    table, geneId, curGeneChrom, curGeneStart, curGeneEnd);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gene = genePredLoad(row+hasBin);
    struct bed *bed = bedFromGenePred(gene);
    struct dnaSeq *seq = hSeqForBed(sqlGetDatabase(conn), bed);
    hPrintf(">%s (%s predicted mRNA)\n", geneId, geneName);
    writeSeqWithBreaks(stdout, seq->dna, seq->size, 50);
    dnaSeqFree(&seq);
    bedFree(&bed);
    genePredFree(&gene);
    }
else
    errAbort("Couldn't find %s at %s:%d-%d", geneId, 
    	curGeneChrom, curGeneStart, curGeneEnd);
sqlFreeResult(&sr);
hPrintf("</TT></PRE>");
}
static struct chromAnn* chromAnnGenePredReaderRead(struct chromAnnReader *car)
/* Read the next genePred row and create a chromAnn object row read from a
 * GenePred file or table.  If there is no CDS, and chromAnnCds is specified,
 * it will return a record with zero-length range.*/
{
struct rowReader *rr = car->data;
if (!rowReaderNext(rr))
    return NULL;
rowReaderExpectAtLeast(rr, GENEPRED_NUM_COLS);

char **rawCols = (car->opts & chromAnnSaveLines) ? rowReaderCloneColumns(rr) : NULL;
struct genePred *gp = genePredLoad(rr->row);
struct chromAnn* ca = chromAnnNew(gp->chrom, gp->strand[0], gp->name, rawCols,
                                  strVectorWrite, strVectorFree);

if (car->opts & chromAnnRange)
    {
    if (car->opts & chromAnnCds)
        {
        if (gp->cdsStart < gp->cdsEnd)
            chromAnnBlkNew(ca, gp->cdsStart, gp->cdsEnd);
        }
    else
        chromAnnBlkNew(ca, gp->txStart, gp->txEnd);
    }
else
    addGenePredBlocks(ca, car->opts, gp);

chromAnnFinish(ca);
genePredFree(&gp);
return ca;
}
Ejemplo n.º 8
0
void readGenes(char *fileName, 
	struct hash **retHash, struct chromGenes **retList)
/* Read genes into a hash of chromGenes. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *hash = newHash(8);
struct chromGenes *chrom, *chromList = NULL;
struct genePred *gp;
char *row[10];
int count = 0;

while (lineFileRow(lf, row))
    {
    gp = genePredLoad(row);
    if ((chrom = hashFindVal(hash, gp->chrom)) == NULL)
        {
	AllocVar(chrom);
	hashAddSaveName(hash, gp->chrom, chrom, &chrom->name);
	slAddHead(&chromList, chrom);
	}
    slAddHead(&chrom->geneList, gp);
    ++count;
    }
printf("Read %d genes in %d chromosomes in %s\n", count, 
	slCount(chromList), fileName);
lineFileClose(&lf);
slSort(&chromList, chromGenesCmpName);
*retHash = hash;
*retList = chromList;
}
void printBands(char *database, struct refLink *rl, FILE *f)
/* Print name of genes and bands it occurs on. */
{
struct sqlConnection *conn = hAllocConn(database);
struct sqlResult *sr;
char **row;
struct genePred *gp;
char query[512];
int count = 0;
struct dyString *bands = newDyString(0);
char band[64];

sprintf(query, "select * from refGene where name = '%s'", rl->mrnaAcc);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    ++count;
    gp = genePredLoad(row);
    if (hChromBand(database, gp->chrom, (gp->txStart + gp->txEnd)/2, band))
        dyStringPrintf(bands, "%s,", band);
    else
        dyStringPrintf(bands, "n/a,");
    }
if (count > 0)
    fprintf(f, "%s\t%s\t%d\t%s\n", rl->name, rl->mrnaAcc, count, bands->string);

dyStringFree(&bands);
sqlFreeResult(&sr);
hFreeConn(&conn);
}
Ejemplo n.º 10
0
static struct genePred *getCurGenePred(struct sqlConnection *conn)
/* Return current gene in genePred. */
{
char *track = genomeSetting("knownGene");
char table[HDB_MAX_TABLE_STRING];
boolean hasBin;
char query[256];
struct sqlResult *sr;
char **row;
struct genePred *gp = NULL;
if (!hFindSplitTable(sqlGetDatabase(conn), curGeneChrom, track, table, sizeof table, &hasBin))
    errAbort("track %s not found", track);
bool hasAttrId = sqlColumnExists(conn, table, "alignId");
sqlSafef(query, sizeof(query),
	"select * from %s where name = '%s' "
	"and chrom = '%s' and txStart=%d and txEnd=%d"
	, table, curGeneId, curGeneChrom, curGeneStart, curGeneEnd);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    gp = genePredLoad(row + hasBin);

#define  ALIGNIDFIELD      11  // Gencode Id
    if (hasAttrId)
	curAlignId = cloneString(row[ALIGNIDFIELD]);
    else
	curAlignId = gp->name;
    }
sqlFreeResult(&sr);
if (gp == NULL)
    errAbort("getCurGenePred: Can't find %s", query);
return gp;
}
Ejemplo n.º 11
0
struct psl *getParentAligns(struct sqlConnection *conn, struct mappingInfo *mi, char **table)
{
struct ucscRetroInfo *pg = mi->pg;
struct psl *pslList = NULL;
char query[512];
if (startsWith("August",mi->geneSet))
    {
    if (hTableExists(database, "augustusXAli"))
        {
        *table = cloneString( "augustusXAli");
        pslList = loadPslRangeT(*table, mi->seqId, pg->gChrom, pg->gStart, pg->gEnd);
        }
    else if (hTableExists(database, "augustusX"))
        {
        struct sqlResult *sr;
        char **row;
        int targetSize = 0;
        *table = cloneString( "augustusX");
        sqlSafef(query, sizeof(query), "select * from augustusX where chrom = '%s' and txEnd > %d and txStart < %d and name like '%s%%'", 
                pg->gChrom, pg->gStart, pg->gEnd , mi->seqId );
        sr = sqlGetResult(conn, query);
        if ((row = sqlNextRow(sr)) != NULL)
            {
            struct genePred *gp = genePredLoad(row+1);
            sqlSafef(query, sizeof(query), 
                    "select size from chromInfo where chrom = '%s' " , gp->chrom); 
            sqlFreeResult(&sr);
            targetSize = sqlNeedQuickNum(conn, query) ;
            pslList = pslFromGenePred(gp, targetSize);
            }   
        }

    }
else if (hTableExists(database, "all_mrna"))
    {
    char parent[255];
    char *dotPtr ;
    *table = cloneString( "all_mrna");
    safef(parent, sizeof(parent), "%s",pg->name);
    /* strip off version and unique suffix when looking for parent gene*/
    dotPtr = rStringIn(".",parent) ;
    if (dotPtr != NULL)
        *dotPtr = '\0';
    pslList = loadPslRangeT(*table, mi->gbAcc, pg->gChrom, pg->gStart, pg->gEnd);
    if (pslList == NULL)
        {
        *table = cloneString( "refSeqAli");
        pslList = loadPslRangeT(*table, mi->gbAcc, pg->gChrom, pg->gStart, pg->gEnd);
        }
    }
else
    printf("no all_mrna table found<br>\n");
return pslList;
}
Ejemplo n.º 12
0
struct genePred *readGenes(char *chrom)
/* Slurp in the genes for one chrom */
{
struct genePred *list=NULL, *el;
char query[512];
struct sqlConnection *conn = hAllocConn();
struct sqlResult *sr;
char **row;
sqlSafef(query, sizeof(query), "select * from %s where chrom='%s' ", geneTable, chrom);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    el = genePredLoad(row);
    slAddHead(&list,el);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
slReverse(&list);  /* could possibly skip if it made much difference in speed. */
return list;
}
Ejemplo n.º 13
0
struct genePred *loadGeneFromTable(struct sqlConnection *conn, char *table,
			     char *chrom, int chromStart, int chromEnd)
/** Load all of the genes between chromstart and chromEnd */
{
struct sqlResult *sr = NULL;
char **row = NULL;
int rowOffset = -100;
struct genePred *geneList = NULL;
struct genePred *gene = NULL;
int i=0;
sr = hRangeQuery(conn, table, chrom, chromStart, chromEnd, NULL, &rowOffset);
while ((row = sqlNextRow(sr)) != NULL)
    {
    gene = genePredLoad(row+rowOffset);
    slSafeAddHead(&geneList, gene);
    }
sqlFreeResult(&sr);
slReverse(&geneList);
return geneList;
}
Ejemplo n.º 14
0
static void capAliTextOnTrack(struct mafAli *maf,
                              char *db, char *chrom,
                              char *track, boolean onlyCds)
/* Capitalize exons in alignment. */
{
int rowOffset;
struct sqlConnection *conn = sqlConnect(db);
struct mafComp *selfMc = maf->components, *mc;
int start = selfMc->start;
int end = start + selfMc->size;
struct sqlResult *sr = hRangeQuery(conn, track, chrom, start, end,
		NULL, &rowOffset);
char **row;

while ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gp = genePredLoad(row+rowOffset);
    int i;
    for (i=0; i<gp->exonCount; ++i)
        {
	int s = gp->exonStarts[i];
	int e = gp->exonEnds[i];
	if (onlyCds)
	    {
	    if (s < gp->cdsStart) s = gp->cdsStart;
	    if (e > gp->cdsEnd) e = gp->cdsEnd;
	    }
	if (s < start) s = start;
	if (e > end) e = end;
	if (findAliRange(selfMc->text, maf->textSize, s-start, e-start, &s, &e))
	    {
	    for (mc = maf->components; mc != NULL; mc = mc->next)
		if (mc->text)
		    toUpperN(mc->text + s, e-s);
	    }
	}
    genePredFree(&gp);
    }
sqlFreeResult(&sr);
sqlDisconnect(&conn);
}
void geneStarts(char *chromosome, int start, int end)
/* geneStarts - print start of genes in database. */
{
    struct sqlConnection *conn = sqlConnect("hg3");
    struct sqlResult *sr;
    char **row;
    char query[256];
    struct genePred *gp;

    sprintf(query,
            "select * from genieKnown where chrom = '%s' and txStart >= %d and txStart < %d",
            chromosome, start, end);
    sr = sqlGetResult(conn, query);
    while ((row = sqlNextRow(sr)) != NULL)
    {
        gp = genePredLoad(row);
        printf("%s on %s:%d-%d\n", gp->name, gp->chrom, gp->txStart, gp->txEnd);
    }
    sqlFreeResult(&sr);
    sqlDisconnect(&conn);
}
struct genePred *loadGenePred(char *database, char *chrom, char *track, struct binKeeper *bk)
/* Load in a gene prediction track to bk. */
{
struct sqlConnection *conn = hAllocConn(database);
struct sqlResult *sr;
char **row;
int rowOffset;
struct genePred *list = NULL, *el;

sr = hChromQuery(conn, track, chrom, NULL, &rowOffset);
while ((row = sqlNextRow(sr)) != NULL)
    {
    el = genePredLoad(row + rowOffset);
    binKeeperAdd(bk, el->txStart, el->txEnd, el);
    slAddHead(&list, el);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
slReverse(&list);
return list;
}
Ejemplo n.º 17
0
void txCdsGoodBed(char *database, char *outBed, char *outCds)
/* txCdsGoodBed - Create positive example training set for SVM. This is based on
 * the refSeq reviewed genes, but we fragment a certain percentage of them so as 
 * not to end up with a SVM that *requires* a complete transcript. */
{
struct sqlConnection *conn = sqlConnect(database);
char *refTrack = "refGene";
char *statusTable = "refSeqStatus";
if (!sqlTableExists(conn, refTrack))
    errAbort("table %s doesn't exist in %s", refTrack, database);
if (!sqlTableExists(conn, statusTable))
    errAbort("table %s doesn't exist in %s", statusTable, database);
FILE *fBed = mustOpen(outBed, "w");
FILE *fCds = mustOpen(outCds, "w");
char *query =
   "NOSQLINJ select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds "
   "from refGene r,refSeqStatus s where r.name=s.mrnaAcc and s.status='Reviewed'";
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;
double randScale = 1.0/RAND_MAX;
int id = 0;
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gp = genePredLoad(row);
    int start = gp->txStart, end = gp->txEnd;
    char *type = "refReviewed";
    if (rand()*randScale < frag)
        {
	double midRatio = rand()*randScale;
	if (midRatio > 0.5)
	     gpFragLimits(gp, 0, midRatio, &start, &end);
	else
	     gpFragLimits(gp, midRatio, 1.0, &start, &end);
	type = "refFrag";
	}
    gpPartOutAsBed(gp, start, end, fBed, type, ++id, 0);
    gpPartOutAsCds(gp, start, end, fCds, type, id);
    }
carefulClose(&fBed);
}
void addGenePred(struct hash *chromHash, char **row)
/* add a genePred's exons to the approriate binkeeper object in hash */
{
struct genePred *gene = genePredLoad(row);
int iExon;
struct binKeeper *chromBins = getChromBins(chromHash, gene->chrom,
                                           gene->strand);
struct geneLoc *geneLoc = geneLocNew(chromHash->lm, gene->name, gene->chrom,
                                     gene->strand, gene->txStart, gene->txEnd);
for (iExon = 0; iExon < gene->exonCount; iExon++)
    {
    int exonStart = gene->exonStarts[iExon];
    int exonEnd = gene->exonEnds[iExon];
    if (gCdsOnly)
        {
        exonStart = max(exonStart, gene->cdsStart);
        exonEnd = min(exonEnd, gene->cdsEnd);
        }
    if (exonStart < exonEnd)
        binKeeperAdd(chromBins, exonStart, exonEnd, geneLoc);
    }
genePredFree(&gene);
}
Ejemplo n.º 19
0
struct genePred *getCurGenePred(struct sqlConnection *conn)
/* Return current gene in genePred. */
{
char *track = genomeSetting("knownGene");
char table[64];
boolean hasBin;
char query[256];
struct sqlResult *sr;
char **row;
struct genePred *gp = NULL;
hFindSplitTable(sqlGetDatabase(conn), curGeneChrom, track, table, &hasBin);
sqlSafef(query, sizeof(query),
	"select * from %s where name = '%s' "
	"and chrom = '%s' and txStart=%d and txEnd=%d"
	, table, curGeneId, curGeneChrom, curGeneStart, curGeneEnd);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    gp = genePredLoad(row + hasBin);
sqlFreeResult(&sr);
if (gp == NULL)
    errAbort("getCurGenePred: Can't find %s", query);
return gp;
}
Ejemplo n.º 20
0
void intronSizes(char *database, char *table)
/* intronSizes - Output list of intron sizes.. */
{
    struct dyString *query = newDyString(1024);
    struct sqlConnection *conn;
    struct sqlResult *sr;
    char **row;
    struct genePred *gp;
    int rowOffset;
    struct bed *bedList = NULL, *bed = NULL;

    hSetDb(database);
    rowOffset = hOffsetPastBin(NULL, table);
    conn = hAllocConn(database);
    sqlDyStringPrintf(query, "select * from %s", table);
    if (chromName != NULL)
        dyStringPrintf(query, " where chrom = '%s'", chromName);
    if (cgiBoolean("withUtr"))
    {
        dyStringPrintf(query, " %s txStart != cdsStart",
                       (chromName == NULL ? "where" : "and"));
    }
    sr = sqlGetResult(conn, query->string);
    while ((row = sqlNextRow(sr)) != NULL)
    {
        gp = genePredLoad(row+rowOffset);
        genePredIntrons(gp, &bedList);
        slReverse(&bedList);
        for (bed = bedList ; bed != NULL ; bed=bed->next)
            bedTabOutN(bed,6, stdout);
        bedFreeList(&bedList);
        genePredFree(&gp);
    }
    sqlFreeResult(&sr);
    hFreeConn(&conn);
}
void txCdsBadBed(char *database, 
	char *altSpliceBed, char *outBed)
/* txCdsBadBed - Create a bed file with regions that don't really have CDS, 
 * but that might look like it.. */
{
/* Open up database and make sure all the tables we want are there. */
char *refTrack = "refGene";
char *vegaPseudo = "vegaPseudoGene";
char *retroPseudo = "retroMrnaInfo";
struct sqlConnection *conn = sqlConnect(database);
if (!sqlTableExists(conn, refTrack))
    errAbort("table %s doesn't exist in %s", refTrack, database);
if (!sqlTableExists(conn, vegaPseudo))
    errAbort("table %s doesn't exist in %s", vegaPseudo, database);
if (!sqlTableExists(conn, retroPseudo))
    errAbort("table %s doesn't exist in %s", retroPseudo, database);

/* Read in alt file and output larger retained and bleeding introns. */
struct bed *bed, *intronyList = loadRetainedAndBleeding(altSpliceBed);
FILE *f = mustOpen(outBed, "w");
for (bed = intronyList; bed != NULL; bed = bed->next)
    {
    int size = bed->chromEnd - bed->chromStart;
    if (size > 400)
	{
	fprintf(f, "%s\t%d\t%d\t", bed->chrom, bed->chromStart, bed->chromEnd);
	fprintf(f, "%s%d\t", bed->name, ++id);
	fprintf(f, "%d\t%s\t", bed->score, bed->strand);
	fprintf(f, "0\t0\t0\t1\t");
	fprintf(f, "%d,\t%d,\n", bed->chromEnd - bed->chromStart, 0);
	}
    }

/* Read in refGene, and write out larger 3' UTRs, and occassional antisense copies.  */
char query[512];
safef(query, sizeof(query), "select * from %s", refTrack);
int rowOffset = 0;
if (sqlFieldIndex(conn, refTrack, "bin") == 0)
    rowOffset = 1;
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gp = genePredLoad(row + rowOffset);
    int start, end;
    if (gp->strand[0] == '+')
        {
	start = gp->cdsEnd;
	end = gp->txEnd;
	}
    else
        {
	start = gp->txStart;
	end = gp->cdsStart;
	}
    if (end - start > 400)
        {
	gpPartOutAsBed(gp, start, end, f, "utr", ++id, 400);
	}
    if (rand()%20 == 0)
        {
	gp->strand[0] = (gp->strand[0] == '+' ? '-' : '+');
	gpPartOutAsBed(gp, gp->txStart, gp->txEnd, f, "anti", ++id, 0);
	}
    }
sqlFreeResult(&sr);

/* Write out vega pseudo-genes. */
safef(query, sizeof(query), "select * from %s", vegaPseudo);
rowOffset = 0;
if (sqlFieldIndex(conn, vegaPseudo, "bin") == 0)
    rowOffset = 1;
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gp = genePredLoad(row + rowOffset);
    gpPartOutAsBed(gp, gp->txStart, gp->txEnd, f, "vega", ++id, 0);
    }

/* Write out retroGenes. */
safef(query, sizeof(query), "select * from %s where score > 600", retroPseudo);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct bed *bed = bedLoad12(row);
    char name[128];
    safef(name, sizeof(name), "retro_%d_%s", ++id, bed->name);
    bed->name = name;
    bedTabOutN(bed, 12, f);
    }

carefulClose(&f);
}
Ejemplo n.º 22
0
void intronEnds(char *database, char *table)
/* intronEnds - Gather stats on intron ends.. */
{
struct dyString *query = newDyString(1024);
struct sqlConnection *conn;
struct sqlResult *sr;
char **row;
struct genePred *gp;
int total = 0;
int gtag = 0;
int gcag = 0;
int atac = 0;
int ctac = 0;
DNA ends[4];
int exonIx, txStart;
struct dnaSeq *seq;
int rowOffset;
char strand;

rowOffset = hOffsetPastBin(database, NULL, table);
conn = hAllocConn(database);
sqlDyStringPrintf(query, "select * from %s", table);
if (chromName != NULL)
    dyStringPrintf(query, " where chrom = '%s'", chromName);
if (cgiBoolean("withUtr"))
    {
    dyStringPrintf(query, " %s txStart != cdsStart", 
        (chromName == NULL ? "where" : "and"));
    }
sr = sqlGetResult(conn, query->string);
while ((row = sqlNextRow(sr)) != NULL)
    {
    gp = genePredLoad(row+rowOffset);
    strand = gp->strand[0];
    txStart = gp->txStart;
    seq = hDnaFromSeq(database, gp->chrom, txStart, gp->txEnd, dnaLower);
    for (exonIx=1; exonIx < gp->exonCount; ++exonIx)
        {
	++total;
	memcpy(ends, seq->dna + gp->exonEnds[exonIx-1] - txStart, 2);
	memcpy(ends+2, seq->dna + gp->exonStarts[exonIx] - txStart - 2, 2);
	if (strand == '-')
	    reverseComplement(ends, 4);
	if (ends[0] == 'g' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'g')
	   ++gtag;
	if (ends[0] == 'g' && ends[1] == 'c' && ends[2] == 'a' && ends[3] == 'g')
	   ++gcag;
	if (ends[0] == 'a' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'c')
	   ++atac;
	if (ends[0] == 'c' && ends[1] == 't' && ends[2] == 'a' && ends[3] == 'c')
	   ++ctac;
	}
    freeDnaSeq(&seq);
    genePredFree(&gp);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
printf("gt/ag %d (%4.2f)\n", gtag, 100.0*gtag/total);
printf("gc/ag %d (%4.2f)\n", gcag, 100.0*gcag/total);
printf("at/ac %d (%4.2f)\n", atac, 100.0*atac/total);
printf("ct/ac %d (%4.2f)\n", ctac, 100.0*ctac/total);
printf("Total %d\n", total);
}
Ejemplo n.º 23
0
void knownToVisiGene(char *database)
/* knownToVisiGene - Create knownToVisiGene table by riffling through various other knownTo tables. */
{
char *tempDir = ".";
FILE *f = hgCreateTabFile(tempDir, outTable);
struct sqlConnection *hConn = sqlConnect(database);
struct sqlConnection *iConn = sqlConnect(visiDb);
struct sqlResult *sr;
char **row;
struct hash *geneImageHash = newHash(18);
struct hash *locusLinkImageHash = newHash(18);
struct hash *refSeqImageHash = newHash(18);
struct hash *genbankImageHash = newHash(18);
struct hash *probeImageHash = newHash(18);
struct hash *knownToLocusLinkHash = newHash(18);
struct hash *knownToRefSeqHash = newHash(18);
struct hash *knownToGeneHash = newHash(18);
struct hash *favorHugoHash = newHash(18);
struct hash *knownToProbeHash = newHash(18);
struct hash *knownToAllProbeHash = newHash(18);
struct genePred *knownList = NULL, *known;
struct hash *dupeHash = newHash(17);


probesDb  = optionVal("probesDb", database);
struct sqlConnection *probesConn = sqlConnect(probesDb);
vgProbes = sqlTableExists(probesConn,"vgProbes");
vgAllProbes = sqlTableExists(probesConn,"vgAllProbes");

/* Go through and make up hashes of images keyed by various fields. */
sr = sqlGetResult(iConn,
        NOSQLINJ "select image.id,imageFile.priority,gene.name,gene.locusLink,gene.refSeq,gene.genbank"
	",probe.id,submissionSet.privateUser,vgPrbMap.vgPrb,gene.id"
	" from image,imageFile,imageProbe,probe,gene,submissionSet,vgPrbMap"
	" where image.imageFile = imageFile.id"
	" and image.id = imageProbe.image"
	" and imageProbe.probe = probe.id"
	" and probe.gene = gene.id"
	" and image.submissionSet=submissionSet.id"
	" and vgPrbMap.probe = probe.id");

while ((row = sqlNextRow(sr)) != NULL)
    {
    int id = sqlUnsigned(row[0]);
    float priority = atof(row[1]);
    int privateUser = sqlSigned(row[7]);
    char vgPrb_Id[256];
    safef(vgPrb_Id, sizeof(vgPrb_Id), "vgPrb_%s",row[8]);
    int geneId = sqlUnsigned(row[9]);
    if (privateUser == 0)
	{
	addPrioritizedImage(probeImageHash, id, priority, geneId, vgPrb_Id);
	addPrioritizedImage(geneImageHash, id, priority, geneId, row[2]);
	addPrioritizedImage(locusLinkImageHash, id, priority, geneId, row[3]);
	addPrioritizedImage(refSeqImageHash, id, priority, geneId, row[4]);
	addPrioritizedImage(genbankImageHash, id, priority, geneId, row[5]);
	}
    }
verbose(2, "Made hashes of image: geneImageHash %d, locusLinkImageHash %d, refSeqImageHash %d"
           ", genbankImageHash %d probeImageHash %d\n", 
            geneImageHash->elCount, locusLinkImageHash->elCount, refSeqImageHash->elCount, 
	    genbankImageHash->elCount, probeImageHash->elCount);
sqlFreeResult(&sr);

/* Build up list of known genes. */
sr = sqlGetResult(hConn, NOSQLINJ "select * from knownGene");
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *known = genePredLoad(row);
    if (!hashLookup(dupeHash, known->name))
        {
	hashAdd(dupeHash, known->name, NULL);
	slAddHead(&knownList, known);
	}
    }
slReverse(&knownList);
sqlFreeResult(&sr);
verbose(2, "Got %d known genes\n", slCount(knownList));

/* Build up hashes from knownGene to other things. */
if (vgProbes)
    bestProbeOverlap(probesConn, "vgProbes", knownList, knownToProbeHash);
if (vgAllProbes)
    bestProbeOverlap(probesConn, "vgAllProbes", knownList, knownToAllProbeHash);

foldIntoHash(hConn, "knownToLocusLink", "name", "value", knownToLocusLinkHash, NULL, FALSE);
foldIntoHash(hConn, "knownToRefSeq", "name", "value", knownToRefSeqHash, NULL, FALSE);
foldIntoHash(hConn, "kgXref", "kgID", "geneSymbol", knownToGeneHash, favorHugoHash, FALSE);
foldIntoHash(hConn, "kgAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE);
foldIntoHash(hConn, "kgProtAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE);

verbose(2, "knownToLocusLink %d, knownToRefSeq %d, knownToGene %d knownToProbe %d knownToAllProbe %d\n", 
   knownToLocusLinkHash->elCount, knownToRefSeqHash->elCount, knownToGeneHash->elCount,
   knownToProbeHash->elCount, knownToAllProbeHash->elCount);

/* Try and find an image for each gene. */
for (known = knownList; known != NULL; known = known->next)
    {
    char *name = known->name;
    struct prioritizedImage *best = NULL;
    {
    best = bestImage(name, knownToLocusLinkHash, locusLinkImageHash);
    if (!best)
	best = bestImage(name, knownToRefSeqHash, refSeqImageHash);
    if (!best)
	{
	best = hashFindVal(genbankImageHash, name);
	}
    if (!best)
	best = bestImage(name, knownToGeneHash, geneImageHash);
    if (vgProbes && !best)
	best = bestImage(name, knownToProbeHash, probeImageHash);
    if (vgAllProbes && !best)
	best = bestImage(name, knownToAllProbeHash, probeImageHash);
    }	    
    if (best)
        {
	fprintf(f, "%s\t%d\t%d\n", name, best->imageId, best->geneId);
	}
    }

createTable(hConn, outTable);
hgLoadTabFile(hConn, tempDir, outTable, &f);
hgRemoveTabFile(tempDir, outTable);
}
Ejemplo n.º 24
0
int main(int argc, char *argv[])
{
long enteredMainTime = clock1000();
struct dyString *output = newDyString(10000);

setUdcCacheDir();
cgiSpoof(&argc, argv);
pushWarnHandler(htmlVaBadRequestAbort);
pushAbortHandler(htmlVaBadRequestAbort);

char *database = cgiString("db");
char *cmd = cgiString("cmd");
char *jsonp = cgiOptionalString("jsonp");
if (!hDbExists(database))
    errAbort("Invalid database '%s'", database);

if (!strcmp(cmd, "defaultPos"))
    {
    dyStringPrintf(output, "{\"pos\": \"%s\"}", hDefaultPos(database));
    }
else if (!strcmp(cmd, "metaDb"))
    {
    // Return list of values for given metaDb var
    // e.g. http://genome.ucsc.edu/hgApi?db=hg18&cmd=metaDb&var=cell

    struct sqlConnection *conn = hAllocConn(database);
    boolean metaDbExists = sqlTableExists(conn, "metaDb");
    if (metaDbExists)
        {
        char *var = cgiOptionalString("var");
        if (!var)
            errAbort("Missing var parameter");
        boolean fileSearch = (cgiOptionalInt("fileSearch",0) == 1);
        struct slPair *pairs = mdbValLabelSearch(conn, var, MDB_VAL_STD_TRUNCATION, FALSE,
                                                 !fileSearch, fileSearch);
        struct slPair *pair;
        dyStringPrintf(output, "[\n");
        for (pair = pairs; pair != NULL; pair = pair->next)
            {
            if (pair != pairs)
                dyStringPrintf(output, ",\n");
            dyStringPrintf(output, "['%s','%s']", javaScriptLiteralEncode(mdbPairLabel(pair)),
                           javaScriptLiteralEncode(mdbPairVal(pair)));
            }
        dyStringPrintf(output, "\n]\n");
        }
    else
        errAbort("Assembly does not support metaDb");
    }
// TODO: move to lib since hgTracks and hgApi share
#define METADATA_VALUE_PREFIX    "hgt_mdbVal"
else if (startsWith(METADATA_VALUE_PREFIX, cmd))
    {
    // Returns metaDb value control: drop down or free text, with or without help link.
    // e.g. http://genome.ucsc.edu/hgApi?db=hg18&cmd=hgt_mdbVal3&var=cell

    // TODO: Move guts to lib, so that hgTracks::searchTracks.c and hgApi.c can share

    struct sqlConnection *conn = hAllocConn(database);
    boolean metaDbExists = sqlTableExists(conn, "metaDb");
    if (metaDbExists)
        {
        char *var = cgiOptionalString("var");
        if (!var)
            errAbort("Missing var parameter");

        int ix = atoi(cmd+strlen(METADATA_VALUE_PREFIX)); // 1 based index
        if (ix == 0) //
            errAbort("Unsupported 'cmd' parameter");

        enum cvSearchable searchBy = cvSearchMethod(var);
        char name[128];
        safef(name,sizeof name,"%s%i",METADATA_VALUE_PREFIX,ix);
        if (searchBy == cvSearchBySingleSelect || searchBy == cvSearchByMultiSelect)
            {
            boolean fileSearch = (cgiOptionalInt("fileSearch",0) == 1);
            struct slPair *pairs = mdbValLabelSearch(conn, var, MDB_VAL_STD_TRUNCATION, FALSE,
                                                     !fileSearch, fileSearch);
            if (slCount(pairs) > 0)
                {
                char *dropDownHtml =
                                cgiMakeSelectDropList((searchBy == cvSearchByMultiSelect),
                                                      name, pairs, NULL, ANYLABEL, "mdbVal",
                                                      "style='min-width: 200px; font-size: .9em;' "
                                                      "onchange='findTracksMdbValChanged(this);'");
                if (dropDownHtml)
                    {
                    dyStringAppend(output,dropDownHtml);
                    freeMem(dropDownHtml);
                    }
                slPairFreeList(&pairs);
                }
            }
        else if (searchBy == cvSearchByFreeText)
            {
            dyStringPrintf(output,"<input type='text' name='%s' value='' class='mdbVal freeText' "
                           "onchange='findTracksMdbValChanged(this);' style='max-width:310px; "
                           "width:310px; font-size:.9em;'>", name);
            }
        else if (searchBy == cvSearchByWildList)
            {
            dyStringPrintf(output,"<input type='text' name='%s' value='' class='mdbVal wildList' "
                           "title='enter comma separated list of values' "
                           "onchange='findTracksMdbValChanged(this);' style='max-width:310px; "
                           "width:310px; font-size:.9em;'>", name);
            }
        else if (searchBy == cvSearchByDateRange || searchBy == cvSearchByIntegerRange)
            {
            // TO BE IMPLEMENTED
            }
        else
            errAbort("Metadata variable not searchable");

        dyStringPrintf(output,"<span id='helpLink%i'>&nbsp;</span>",ix);
        }
    else
        errAbort("Assembly does not support metaDb");
    }
else if (!strcmp(cmd, "tableMetadata"))
    { // returns an html table with metadata for a given track
    char *trackName = cgiOptionalString("track");
    boolean showLonglabel = (NULL != cgiOptionalString("showLonglabel"));
    boolean showShortLabel = (NULL != cgiOptionalString("showShortLabel"));
    if (trackName != NULL)
        {
        // hTrackDbForTrackAndAncestors avoids overhead of getting whole track list!
        struct trackDb *tdb = hTrackDbForTrackAndAncestors(database, trackName);
        if (tdb != NULL)
            {
            char * html = metadataAsHtmlTable(database,tdb,showLonglabel,showShortLabel);
            if (html)
                {
                dyStringAppend(output,html);
                freeMem(html);
                }
            else
                dyStringPrintf(output,"No metadata found for track %s.",trackName);
            }
        else
            dyStringPrintf(output,"Track %s not found",trackName);
        }
    else
        dyStringAppend(output,"No track variable found");
    }
else if (sameString(cmd, "codonToPos") || sameString(cmd, "exonToPos"))
    {
    char query[256];
    struct sqlResult *sr;
    char **row;
    struct genePred *gp;
    char *name = cgiString("name");
    char *table = cgiString("table");
    int num = cgiInt("num");
    struct sqlConnection *conn = hAllocConn(database);
    sqlSafef(query, sizeof(query), "select name, chrom, strand, txStart, txEnd, cdsStart, cdsEnd, exonCount, exonStarts, exonEnds from %s where name = '%s'", table, name);
    sr = sqlGetResult(conn, query);
    if ((row = sqlNextRow(sr)) != NULL)
        {
        gp = genePredLoad(row);
        boolean found;
        int start, end;
        if (sameString(cmd, "codonToPos"))
            found = codonToPos(gp, num, &start, &end);
        else
            found = exonToPos(gp, num, &start, &end);
        if (found)
            dyStringPrintf(output, "{\"pos\": \"%s:%d-%d\"}", gp->chrom, start + 1, end);
        else
            dyStringPrintf(output, "{\"error\": \"%d is an invalid %s for this gene\"}", num, sameString(cmd, "codonToPos") ? "codon" : "exon");
        }
    else
        dyStringPrintf(output, "{\"error\": \"Couldn't find item: %s\"}", name);
    sqlFreeResult(&sr);
    hFreeConn(&conn);
    }
else
    {
    warn("unknown cmd: %s",cmd);
    errAbort("Unsupported 'cmd' parameter");
    }

apiOut(dyStringContents(output), jsonp);
cgiExitTime("hgApi", enteredMainTime);
return 0;
}
Ejemplo n.º 25
0
static void displayMappingInfo(struct sqlConnection *conn, struct mappingInfo *mi)
/* display information from a transMap table */
{
struct ucscRetroInfo *pg = mi->pg;
double  wt[12];     /* weights on score function*/
char query[512];
char *name;
char alignTbl[128];
char scoreSql[128];
struct psl *psl;
float coverFactor = 0;
float maxOverlap = 0;
if (mi->suffix == NULL)
    {
    safef(alignTbl, sizeof(alignTbl), "%s%sAli", mi->tblPre, mi->geneSet);
    sqlSafef(scoreSql, sizeof(scoreSql), "select max(score) from %s%sInfo", mi->tblPre, mi->geneSet);
    }
else
    {
    safef(alignTbl, sizeof(alignTbl), "%s%sAli%s", mi->tblPre, mi->geneSet, mi->suffix);
    sqlSafef(scoreSql, sizeof(scoreSql), "select max(score) from %s%sInfo%s", mi->tblPre, mi->geneSet, mi->suffix);
    }
printf("<TABLE class=\"transMap\">\n");
printf("<H3>Retrogene Statistics:</H3>\n");
printf("<THEAD>\n");
printf("<TR><TH>Feature<TH>Value </TR>\n");
printf("</THEAD><TBODY>\n");
if (sameString(pg->type, "singleExon"))
    printf("<TR><TH>Type of Parent<TD>%s</tr>\n",pg->type);
else 
    printf("<TR><TH>Expression of Retrogene<TD>%s</TR>\n",pg->type);
printf("<TR><TH>Score <TD>%d (range from 0 - %d)</TR>\n",  
        pg->score,
        sqlQuickNum(conn, scoreSql) );
printf("<TR><TH>Parent Gene Alignment Coverage (Bases&nbsp;Matching Parent) <TD>%d %% &nbsp;(%d bp) </TR>\n", pg->coverage, pg->matches);
printf("<TR><TH>Introns Processed Out <TD>%d out of %d (%d exons covered)\n", pg->processedIntrons, (pg->parentSpliceCount/2), pg->exonCover);
printf("<TR><TH>Possible Introns or Gaps in Retrogene<TD>%d,%d\n", pg->intronCount, pg->alignGapCount);
printf("<TR><TH>Conserved Splice Sites<TD>%d</TR>\n",  pg->conservedSpliceSites);
printf("<TR><TH>Parent Splice Sites<TD>%d</TR>\n",  pg->parentSpliceCount);
psl = getAlignments(conn, alignTbl, mi->pg->name);
if (psl != NULL)
    {
    maxOverlap = (float)pg->maxOverlap/(float)(psl->match+psl->misMatch+psl->repMatch)  ;
    coverFactor = ((float)(psl->qSize-psl->qEnd)/(float)psl->qSize);
    }
else 
    {
    maxOverlap = 0;
    }
wt[0] = 0; wt[1] = 0.85; wt[2] = 0.2; wt[3] = 0.3; wt[4] = 0.8; 
wt[5] = 1; wt[6] = 1  ; wt[7] = 0.5; wt[8] = 0.5; wt[9] = 1; wt[10] = 1;
#ifdef debug
char table[512];
struct psl *pslList = getParentAligns(conn, mi, &table);
if (psl != NULL)
    {
    printf("<TR><TH>Blocks in retro:gap%%/intronsSpliced <TD>\n");
    printBlocks(psl, MAXBLOCKGAP, pslList);
    printf("</td></TR>\n");  
    }
if (pslList != NULL)
    {
    printf("<TR><TH>Exons in parent:gap%% <TD>\n");
    printBlocks(pslList, MAXBLOCKGAP, NULL);
    printf("</td></TR>\n");  
    pslFreeList(&pslList);
    }
#endif
printf("<TR><TH>Length of PolyA Tail<TD>%d As&nbsp;out&nbsp;of&nbsp;%d&nbsp;bp </TR><TR><TH>%% A's from Parent PolyA tail (Position)<TD>%5.1f&nbsp;%%\n",pg->polyA,pg->polyAlen, (float)pg->polyA*100/(float)pg->polyAlen);
if (pg->polyAstart < 0)
    printf("&nbsp;(%d&nbsp;bp&nbsp;before&nbsp;end&nbsp;of&nbsp;retrogene)<br>\n",-(pg->polyAstart));
else
    printf("&nbsp;(%d&nbsp;bp&nbsp;past&nbsp;end&nbsp;of&nbsp;retrogene)<br>\n",pg->polyAstart);

printf("<tr><th>mRNA Expression Evidence<td>");
if (!sameString(pg->overName, "none"))
    printf("%s&nbsp;(overlap:&nbsp;&nbsp;%d&nbsp;bp)\n", pg->overName, pg->maxOverlap);
else
    printf("No&nbsp;overlapping");
printf("<TR><TH>BESTORF Score (>50 is good)<TD>%4.0f</td></TR>\n",pg->posConf);
#ifdef score
printf("<TR><TH>score function<TD>1:xon %d %4.1f conSS %d 2: ax %4.1f 3: pA %4.1f 4: net + %4.1f max (%d, %d) 5: procIntrons %d %4.1f 6:in.cnt %d -%4.1f 7:overlap - %4.1f  8:cov %d*(qe %d- qsz %d)/%d=%4.1f 9:tRep - %4.1f 10:oldintron %d %4.1f </td></TR>\n",
                pg->exonCover,
                wt[1]*(log(pg->exonCover+1)/log(2))*200 , 
                pg->conservedSpliceSites,
                wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000),
                wt[3]*(log(pg->polyAlen+2)*200) ,
                wt[4]*overlapOrtholog*10 , pg->overlapMouse, pg->overlapDog,
                pg->processedIntrons,
                wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) ,
                pg->intronCount, 
                wt[6]*pow(pg->intronCount,0.5)*750 ,
                wt[7]*(maxOverlap*300),
                pg->coverage, pg->qEnd, pg->qSize , pg->qSize,
                wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0),
                wt[9]*(pg->tReps*10), 
                pg->alignGapCount,
                wt[10]*pg->alignGapCount);
printf("<TR><TH>score function<TD>%4.1f+ %4.1f+ %4.1f+ %4.1f+ %4.1f - %4.1f - %4.1f+ %4.1f - %4.1f - %4.1f</td></TR>\n",
                wt[1]*(log(pg->exonCover+1)/log(2))*200 , 
                wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000),
                wt[3]*(log(pg->polyAlen+2)*200) ,
                wt[4]*overlapOrtholog*10 , 
                wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) ,
                (float)wt[6]*pow(pg->intronCount,0.5)*750 ,
                (float)wt[7]*(maxOverlap*300),
                wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0),
                wt[9]*(pg->tReps*10), 
                wt[10]*pg->alignGapCount);
if (pg->kaku > 0 && pg->kaku < 1000000)
    printf("<TR><TH>KA/KU mutation rate in non-syn sites vs utr with repect to parent gene<TD>%4.2f</TR>\n",  pg->kaku);
#endif
#ifdef xxx
sqlSafef(query, sizeof(query), "select * from refGene where chrom = '%d' and txEnd > %d and txStart %d and name = '%s'", 
        pg->chrom, pg->gStart, pg->gEnd , pg->overName );
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    overlappingGene = genePredLoad(row);
if (overlappingGene != NULL)
    {
    printf ("CDS exons %d ",genePredcountCdsExons(overlappingGene));
    }

#endif
printf("</tr>\n");
if ( differentString("none",pg->overName) &&
    sqlFieldIndex(conn, "refGene", "exonFrames") != -1)
    {
    sqlSafef(query, sizeof(query), 
            "select concat(exonFrames,'(',cdsStart,')') from refGene where name = '%s' and chrom = '%s'" , 
            pg->overName, pg->chrom);
    if (sqlQuickString(conn, query) != NULL)
        printf("<TR><TH>Frame of retro %s (start)<TD>%s</TR>\n",  
            pg->overName, sqlQuickString(conn, query));
    }

name = cloneString(pg->name);
chopSuffix(name);
sqlSafef(query, sizeof(query), 
        "select concat(exonFrames,'(',cdsStart,')') from rbRetroParent where name like '%s%%' and chrom = '%s'" , 
        name, pg->chrom);
if (hTableExists(database, "rbRetroParent"))
    {
    if ( sqlQuickString(conn, query) != NULL)
        printf("<TR><TH>Frames of mapped parent %s (start)<TD>%s</TR>\n",  
            name, sqlQuickString(conn, query));
    }
printf("</TBODY></TABLE>\n");
}
Ejemplo n.º 26
0
struct annoRow *annoGratorGpVarIntegrate(struct annoGrator *gSelf,
					 struct annoStreamRows *primaryData,
					 boolean *retRJFilterFailed, struct lm *callerLm)
// integrate a variant and a genePred, generate as many rows as
// needed to capture all the changes
{
struct annoGratorGpVar *self = (struct annoGratorGpVar *)gSelf;
lmCleanup(&(self->lm));
self->lm = lmInit(0);
// Temporarily tweak primaryRow's start and end to find upstream/downstream overlap:
struct annoRow *primaryRow = primaryData->rowList;
int pStart = primaryRow->start, pEnd = primaryRow->end;
if (primaryRow->start <= GPRANGE)
    primaryRow->start = 0;
else
    primaryRow->start -= GPRANGE;
primaryRow->end += GPRANGE;
struct annoRow *rows = annoGratorIntegrate(gSelf, primaryData, retRJFilterFailed, self->lm);
primaryRow->start = pStart;
primaryRow->end = pEnd;

if (self->variantFromRow == NULL)
    setVariantFromRow(self, primaryData);
if (self->curChromSeq == NULL || differentString(self->curChromSeq->name, primaryRow->chrom))
    {
    dnaSeqFree(&self->curChromSeq);
    struct twoBitFile *tbf = self->grator.streamer.assembly->tbf;
    self->curChromSeq = twoBitReadSeqFragLower(tbf, primaryRow->chrom, 0, 0);
    }
// TODO Performance improvement: instead of creating the transcript sequence for each
// variant that intersects the transcript, cache transcript sequence; possibly
// an slPair with a concatenation of {chrom, txStart, txEnd, cdsStart, cdsEnd,
// exonStarts, exonEnds} as the name, and sequence as the val.  When something in
// the list is no longer in the list of rows from the internal annoGratorIntegrate call,
// drop it.
// BETTER YET: make a callback for gpFx to get CDS sequence only when it needs it.
char *refAllele = getGenomicSequence(self->curChromSeq->dna, primaryRow->start, primaryRow->end,
				     self->lm);
struct variant *variant = self->variantFromRow(self, primaryRow, refAllele);

if (rows == NULL)
    {
    // No genePreds means that the primary variant is intergenic.
    if (self->funcFilter != NULL && self->funcFilter->intergenic)
	return aggvIntergenicRow(self, variant, retRJFilterFailed, callerLm);
    else if (retRJFilterFailed && self->gpVarOverlapRule == agoMustOverlap)
	*retRJFilterFailed = TRUE;
    return NULL;
    }
if (retRJFilterFailed && *retRJFilterFailed)
    return NULL;

struct annoRow *outRows = NULL;

int hasFrames = (asColumnFindIx(gSelf->mySource->asObj->columnList, "exonFrames") >= 0);

for(; rows; rows = rows->next)
    {
    char **inWords = rows->data;

    // work around genePredLoad's trashing its input
    char *saveExonStarts = lmCloneString(self->lm, inWords[8]);
    char *saveExonEnds = lmCloneString(self->lm, inWords[9]);
    struct genePred *gp = hasFrames ? genePredExtLoad(inWords, GENEPREDX_NUM_COLS) :
				      genePredLoad(inWords);
    inWords[8] = saveExonStarts;
    inWords[9] = saveExonEnds;

    struct annoRow *outRow = aggvGenRows(self, variant, gp, rows, callerLm);
    if (outRow != NULL)
	{
	slReverse(&outRow);
	outRows = slCat(outRow, outRows);
	}
    genePredFree(&gp);
    }
slReverse(&outRows);
// If all rows failed the filter, and we must overlap, set *retRJFilterFailed.
if (outRows == NULL && retRJFilterFailed && self->gpVarOverlapRule == agoMustOverlap)
    *retRJFilterFailed = TRUE;
return outRows;
}