Пример #1
0
static void fakePslFromGenePred(char *db, char *fileTbl, char *pslOut, char *cdsOut)
/* check a genePred */
{
struct genePredReader *gpr;
struct genePred *gp;
FILE *pslFh = mustOpen(pslOut, "w");
FILE *cdsFh = mustOpen(cdsOut, "w");

struct hash *chromHash = getChromHash(db);

if (fileExists(fileTbl))
    {
    gpr = genePredReaderFile(fileTbl, NULL);
    }
else
    {
    struct sqlConnection *conn = hAllocConn(db);
    gpr = genePredReaderQuery(conn, fileTbl, NULL);
    hFreeConn(&conn);
    }

while ((gp = genePredReaderNext(gpr)) != NULL)
    {
    cnvGenePred(chromHash, gp, pslFh, cdsFh);
    }
genePredReaderFree(&gpr);
carefulClose(&pslFh);
carefulClose(&cdsFh);
}
Пример #2
0
int palOutPredsInBeds(struct sqlConnection *conn, struct cart *cart,
    struct bed *beds, char *table )
/* output the alignments whose names and coords match a bed*/
{
struct genePred *list = NULL;

for(; beds; beds = beds->next)
    {
    char where[10 * 1024];

    sqlSafefFrag(where, sizeof where,
	"name = '%s' and chrom='%s' and txEnd > %d and txStart <= %d",
	beds->name, beds->chrom, beds->chromStart, beds->chromEnd);

    struct genePredReader *reader = genePredReaderQuery( conn, table, where);
    struct genePred *pred;
    while ((pred = genePredReaderNext(reader)) != NULL)
	slAddHead(&list, pred);

    genePredReaderFree(&reader);
    }

int outCount = 0;
if (list != NULL)
    {
    slReverse(&list);
    outCount = palOutPredList( conn, cart, list);
    genePredFreeList(&list);
    }

return outCount;
}
Пример #3
0
struct genePred *getPredsFromBeds(char *file, char *table, char *db)
{
struct sqlConnection *conn = hAllocConn(db);
struct lineFile *lf = lineFileOpen(file, TRUE);
char *words[5000];
int wordsRead;
struct genePred *list = NULL;

while( (wordsRead = lineFileChopNext(lf, words, sizeof(words)/sizeof(char *)) ))
    {
    if (wordsRead != 4)
	errAbort("file '%s' must be bed4. Line %d has %d fields", 
	    file, lf->lineIx, wordsRead);

    char where[10 * 1024];
    sqlSafefFrag(where, sizeof where, 
	"name = '%s' and chrom='%s' and txStart = %d and txEnd = %d",
	words[3], words[0], sqlUnsigned(words[1]), sqlUnsigned(words[2]));

    //printf("table %s where %s\n",table,where);
    struct genePredReader *reader = genePredReaderQuery( conn, table, where);
    struct genePred *pred;
    while ((pred = genePredReaderNext(reader)) != NULL)
	slAddHead(&list, pred);

    genePredReaderFree(&reader);
    }

hFreeConn(&conn);

if (list != NULL)
    slReverse(&list);

return list;
}
static void checkGenePred(char *fileTbl)
/* check a genePred file or table */
{
struct sqlConnection *conn = NULL;
struct genePredReader *gpr;
struct genePred *gp;
int iRec = 0;


if (fileExists(fileTbl))
    {
    gpr = genePredReaderFile(fileTbl, NULL);
    }
else if (gDb != NULL)
    {
    conn = hAllocConn(gDb);
    gpr = genePredReaderQuery(conn, fileTbl, NULL);
    }
else
    {
    errAbort("file %s doesn't exist, must specify -db=db if this is a table", fileTbl);
    }

while ((gp = genePredReaderNext(gpr)) != NULL)
    {
    checkAGenePred(fileTbl, ++iRec, gp);
    genePredFree(&gp);
    }
genePredReaderFree(&gpr);
hFreeConn(&conn);
}
Пример #5
0
struct genePred *genePredReaderLoadQuery(struct sqlConnection* conn,
                                         char* table, char* where)
/* Function that encapsulates doing a query and loading the results */
{
struct genePredReader *gpr = genePredReaderQuery(conn, table, where);
struct genePred *gpList = genePredReaderAll(gpr);
genePredReaderFree(&gpr);
return gpList;
}
static void getGeneAnns(struct sqlConnection *conn, struct hash *refSeqVerInfoTbl, char *outFile)
/* get request genePred annotations from database */
{
struct genePredReader *gpr = genePredReaderQuery(conn, "refGene", NULL);
FILE *fh = mustOpen(outFile, "w");
struct genePred *gp;
while ((gp = genePredReaderNext(gpr)) != NULL)
    {
    processGenePred(fh, refSeqVerInfoTbl, gp);
    genePredFree(&gp);
    }
carefulClose(&fh);
genePredReaderFree(&gpr);
}
Пример #7
0
/* load one or more genePreds from the database */
struct genePred *getPredsForName(char *name, char *geneTable, char *db)
{
struct sqlConnection *conn = hAllocConn(db);
struct genePred *list = NULL;
char splitTable[HDB_MAX_TABLE_STRING];
struct genePred *gene;
boolean hasBin;
struct genePredReader *reader;

boolean found =  hFindSplitTable(db, NULL, geneTable,
	splitTable, &hasBin);

if (!found)
    errAbort("can't find table %s\n", geneTable);

char extra[2048];
if (onlyChrom != NULL)
    safef(extra, sizeof extra, "name='%s' and chrom='%s'", name, onlyChrom);
else
    safef(extra, sizeof extra, "name='%s'", name);

reader = genePredReaderQuery( conn, splitTable, extra);

while ((gene  = genePredReaderNext(reader)) != NULL)
    {
    verbose(2, "got gene %s\n",gene->name);
    slAddHead(&list, gene);
    }

if (list == NULL)
    errAbort("no genePred for gene %s in %s\n",name, geneTable);

slReverse(&list);

genePredReaderFree(&reader);
hFreeConn(&conn);

return list;
}
Пример #8
0
/* query the list of gene names from the frames table */
static struct genePred *queryPreds(char *dbName, char *geneTable)
{
struct sqlConnection *conn = hAllocConn(dbName);
struct genePred *list = NULL;
char buf[2048];
char *extra = NULL;
struct genePredReader *reader;

if (onlyChrom != NULL)
    {
    safef(buf, sizeof buf, "chrom='%s'", onlyChrom);
    extra = buf;
    }

reader = genePredReaderQuery( conn, geneTable, extra);

list = genePredReaderAll(reader);

hFreeConn(&conn);

return list;
}
Пример #9
0
static void printCcdsHgGeneUrl(struct sqlConnection *conn, char *ccdsId, char* kgId)
/* output a URL to hgGene for a ccds */
{
char where[128];
struct genePredReader *gpr;
struct genePred *ccdsGene = NULL, *kgGene = NULL;

/* get ccds genePred to get location */
sqlSafefFrag(where, sizeof(where), "chrom = '%s' and name = '%s'", seqName, ccdsId);
gpr = genePredReaderQuery(conn, "ccdsGene", where);
ccdsGene = genePredReaderAll(gpr);
genePredReaderFree(&gpr);
if (ccdsGene == NULL)
    errAbort("%s not found in ccdsGene table for chrom %s", ccdsId, seqName);
else if (ccdsGene->next != NULL)
    errAbort("multiple %s rows found in ccdsGene table for chrom %s", ccdsId, seqName);

/* get KG genePred, as need exact location for link */
sqlSafefFrag(where, sizeof(where), "name = '%s' and strand = '%s'", kgId,
      ccdsGene->strand);
gpr = genePredReaderRangeQuery(conn, "knownGene", seqName,
                               ccdsGene->txStart, ccdsGene->txEnd, where);
kgGene = genePredReaderAll(gpr);
genePredReaderFree(&gpr);
if (kgGene == NULL)
    errAbort("%s not found in knownGene table for chrom %s", kgId, seqName);
else if (kgGene->next != NULL)
    errAbort("multiple %s rows found in knownGene table for chrom %s", kgId, seqName);

printf("../cgi-bin/hgGene?%s&%s=%s&%s=%s&%s=%s&%s=%d&%s=%d",
       cartSidUrlString(cart),
       "db", database,
       "hgg_gene", kgId,
       "hgg_chrom", seqName,
       "hgg_start", kgGene->txStart,
       "hgg_end", kgGene->txEnd);
genePredFree(&ccdsGene);
genePredFree(&kgGene);
}
Пример #10
0
void kgGetCds(char *db, char *spDb, char *geneTable, FILE *outf)
/* get CDS info */
{
struct sqlConnection *conn = NULL;
struct genePred *gp;
int cdsCnt;
struct genePredReader *gpr;
int iExon, exonStart, exonEnd;

if (db != NULL)
    conn = sqlConnect(db);

gpr = genePredReaderQuery(conn, geneTable, NULL);
while ((gp = genePredReaderNext(gpr)) != NULL)
    {
    cdsCnt = 0;
    for (iExon = 0; iExon < gp->exonCount; iExon++)
    	{
    	if (genePredCdsExon(gp, iExon, &exonStart, &exonEnd))
    	    {
	    sprintf(cdsBloc[cdsCnt], "%d-%d;", exonStart, exonEnd);
	    
	    cdsCnt++;
	    }
    	}
    if (cdsCnt > 0) 
    	{
	processAlign(db, spDb, gp->name, cdsCnt, outf);
	}
    else
    	{
	fprintf(stderr, "%s does not have cds.\n", gp->name);
	}
    }
sqlDisconnect(&conn);
}
struct genePred *getOverlappingGeneDb(struct genePred **list, char *table, char *chrom, int cStart, int cEnd, char *name, int *retOverlap, char *db)
{
/* read all genes from a table find the gene with the biggest overlap. 
   Cache the list of genes to so we only read it once */

struct genePred *el = NULL, *bestMatch = NULL, *gp = NULL;
int overlap = 0 , bestOverlap = 0, i;
int *eFrames;

if (list == NULL)
    return NULL;

if (*list == NULL)
    {
    struct genePred *gpList = NULL;
    struct sqlConnection *conn = sqlConnect(db);
    struct genePredReader *gpr = NULL;
    if (!hTableExistsDb(db,table))
        table = altTable;
    if (!hTableExistsDb(db,table))
        {
        verbose(2,"no table %s in %s\n",table, db);
        return NULL;
        }
    gpr = genePredReaderQuery(conn, table, NULL);
    verbose(1,"Loading Predictions from %s in %s\n",table, db);
    gpList = genePredReaderAll(gpr);
    if (gpList != NULL)
        {
        hashAdd(geneListHash, db, gpList);
        *list = gpList;
        }
    sqlDisconnect(&conn);
    }
for (el = *list; el != NULL; el = el->next)
    {
    if (chrom != NULL && el->chrom != NULL)
        {
        overlap = 0;
        if ( sameString(chrom, el->chrom))
            {
            for (i = 0 ; i<(el->exonCount); i++)
                {
                overlap += positiveRangeIntersection(cStart,cEnd, el->exonStarts[i], el->exonEnds[i]) ;
                }
            if (overlap > 20 && sameString(name, el->name))
                {
                bestMatch = el;
                bestOverlap = overlap;
                *retOverlap = bestOverlap;
                }
            if (overlap > bestOverlap)
                {
                bestMatch = el;
                bestOverlap = overlap;
                *retOverlap = bestOverlap;
                }
            }
        }
    }
if (bestMatch != NULL)
    {
    /* Allocate genePred and fill in values. */
    AllocVar(gp);
    gp->name = cloneString(bestMatch->name);
    gp->chrom = cloneString(bestMatch->chrom);
    gp->strand[1] = bestMatch->strand[1];
    gp->strand[0] = bestMatch->strand[0];
    gp->txStart = bestMatch->txStart;
    gp->txEnd = bestMatch->txEnd;
    gp->cdsStart = bestMatch->cdsStart;
    gp->cdsEnd = bestMatch->cdsEnd;
    gp->exonCount = bestMatch->exonCount;
    AllocArray(gp->exonStarts, bestMatch->exonCount);
    AllocArray(gp->exonEnds, bestMatch->exonCount);
    for (i=0; i<bestMatch->exonCount; ++i)
        {
        gp->exonStarts[i] = bestMatch->exonStarts[i] ;
        gp->exonEnds[i] = bestMatch->exonEnds[i] ;
        }
    gp->optFields = bestMatch->optFields;
    gp->id = bestMatch->id;

    if (bestMatch->optFields & genePredName2Fld)
        gp->name2 = cloneString(bestMatch->name2);
    else
        gp->name2 = NULL;
    if (bestMatch->optFields & genePredCdsStatFld)
        {
        gp->cdsStartStat = bestMatch->cdsStartStat;
        gp->cdsEndStat = bestMatch->cdsEndStat;
        }
    if (bestMatch->optFields & genePredExonFramesFld)
        {
        gp->exonFrames = AllocArray(eFrames, bestMatch->exonCount);
        for (i = 0; i < bestMatch->exonCount; i++)
            gp->exonFrames[i] = bestMatch->exonFrames[i];
        }
    eFrames = gp->exonFrames;
    }

return gp;
}