void mafOrtholog(char *database, char *track, char *genePredFile, char *outFile)
/* mafOrtholog - find orthlogs in other species based on maf alignment and reference genePred */
{
struct slName *orgList = NULL;
FILE *f = mustOpen(outFile, "w");
struct genePredReader *gpr = genePredReaderFile(genePredFile, NULL);
struct genePred *gpList = genePredReaderAll(gpr), *gp = NULL;
struct sqlConnection *conn = hAllocConn();

if (optionExists("nibDir"))
    nibDir = optionVal("nibDir", NULL);
if (optionExists("orgs"))
    {
    char *orgFile = optionVal("orgs", NULL);
    char *buf;
    readInGulp(orgFile, &buf, NULL);
    orgList = stringToSlNames(buf);
    }

for (gp = gpList ; gp != NULL ; gp=gp->next)
    {
    struct mafAli *maf = NULL;
    if (thickOnly)
        maf = mafLoadInRegion(conn, track,
        gp->chrom, gp->cdsStart, gp->cdsEnd);
    else
        maf = mafLoadInRegion(conn, track,
        gp->chrom, gp->txStart, gp->txEnd);
    if (meFirst)
        moveMeToFirst(maf, database);
    printOrthologs(f, maf, gp);
    mafAliFree(&maf);
    }
carefulClose(&f);
}
Beispiel #2
0
struct genePred *genePredReaderLoadFile(char* gpFile, char* chrom)
/* Function that encapsulates reading a genePred file */
{

struct genePredReader *gpr = genePredReaderFile(gpFile, chrom);
struct genePred *gpList = genePredReaderAll(gpr);
genePredReaderFree(&gpr);
return gpList;
}
Beispiel #3
0
struct genePred *genePredReaderLoadQuery(struct sqlConnection* conn,
                                         char* table, char* where)
/* Function that encapsulates doing a query and loading the results */
{
struct genePredReader *gpr = genePredReaderQuery(conn, table, where);
struct genePred *gpList = genePredReaderAll(gpr);
genePredReaderFree(&gpr);
return gpList;
}
Beispiel #4
0
struct genePred *genePredReaderLoadRangeQuery(struct sqlConnection* conn,
                                              char* table, char* chrom,
                                              int start, int end, 
                                              char* extraWhere)
/* Function that encapsulates doing a range query and loading the results */
{
struct genePredReader *gpr = genePredReaderRangeQuery(conn, table, chrom,
                                                      start, end, extraWhere);
struct genePred *gpList = genePredReaderAll(gpr);
genePredReaderFree(&gpr);
return gpList;
}
Beispiel #5
0
static void printCcdsHgGeneUrl(struct sqlConnection *conn, char *ccdsId, char* kgId)
/* output a URL to hgGene for a ccds */
{
char where[128];
struct genePredReader *gpr;
struct genePred *ccdsGene = NULL, *kgGene = NULL;

/* get ccds genePred to get location */
sqlSafefFrag(where, sizeof(where), "chrom = '%s' and name = '%s'", seqName, ccdsId);
gpr = genePredReaderQuery(conn, "ccdsGene", where);
ccdsGene = genePredReaderAll(gpr);
genePredReaderFree(&gpr);
if (ccdsGene == NULL)
    errAbort("%s not found in ccdsGene table for chrom %s", ccdsId, seqName);
else if (ccdsGene->next != NULL)
    errAbort("multiple %s rows found in ccdsGene table for chrom %s", ccdsId, seqName);

/* get KG genePred, as need exact location for link */
sqlSafefFrag(where, sizeof(where), "name = '%s' and strand = '%s'", kgId,
      ccdsGene->strand);
gpr = genePredReaderRangeQuery(conn, "knownGene", seqName,
                               ccdsGene->txStart, ccdsGene->txEnd, where);
kgGene = genePredReaderAll(gpr);
genePredReaderFree(&gpr);
if (kgGene == NULL)
    errAbort("%s not found in knownGene table for chrom %s", kgId, seqName);
else if (kgGene->next != NULL)
    errAbort("multiple %s rows found in knownGene table for chrom %s", kgId, seqName);

printf("../cgi-bin/hgGene?%s&%s=%s&%s=%s&%s=%s&%s=%d&%s=%d",
       cartSidUrlString(cart),
       "db", database,
       "hgg_gene", kgId,
       "hgg_chrom", seqName,
       "hgg_start", kgGene->txStart,
       "hgg_end", kgGene->txEnd);
genePredFree(&ccdsGene);
genePredFree(&kgGene);
}
Beispiel #6
0
/* query the list of gene names from the frames table */
static struct genePred *queryPreds(char *dbName, char *geneTable)
{
struct sqlConnection *conn = hAllocConn(dbName);
struct genePred *list = NULL;
char buf[2048];
char *extra = NULL;
struct genePredReader *reader;

if (onlyChrom != NULL)
    {
    safef(buf, sizeof buf, "chrom='%s'", onlyChrom);
    extra = buf;
    }

reader = genePredReaderQuery( conn, geneTable, extra);

list = genePredReaderAll(reader);

hFreeConn(&conn);

return list;
}
struct genePred *getOverlappingGeneDb(struct genePred **list, char *table, char *chrom, int cStart, int cEnd, char *name, int *retOverlap, char *db)
{
/* read all genes from a table find the gene with the biggest overlap. 
   Cache the list of genes to so we only read it once */

struct genePred *el = NULL, *bestMatch = NULL, *gp = NULL;
int overlap = 0 , bestOverlap = 0, i;
int *eFrames;

if (list == NULL)
    return NULL;

if (*list == NULL)
    {
    struct genePred *gpList = NULL;
    struct sqlConnection *conn = sqlConnect(db);
    struct genePredReader *gpr = NULL;
    if (!hTableExistsDb(db,table))
        table = altTable;
    if (!hTableExistsDb(db,table))
        {
        verbose(2,"no table %s in %s\n",table, db);
        return NULL;
        }
    gpr = genePredReaderQuery(conn, table, NULL);
    verbose(1,"Loading Predictions from %s in %s\n",table, db);
    gpList = genePredReaderAll(gpr);
    if (gpList != NULL)
        {
        hashAdd(geneListHash, db, gpList);
        *list = gpList;
        }
    sqlDisconnect(&conn);
    }
for (el = *list; el != NULL; el = el->next)
    {
    if (chrom != NULL && el->chrom != NULL)
        {
        overlap = 0;
        if ( sameString(chrom, el->chrom))
            {
            for (i = 0 ; i<(el->exonCount); i++)
                {
                overlap += positiveRangeIntersection(cStart,cEnd, el->exonStarts[i], el->exonEnds[i]) ;
                }
            if (overlap > 20 && sameString(name, el->name))
                {
                bestMatch = el;
                bestOverlap = overlap;
                *retOverlap = bestOverlap;
                }
            if (overlap > bestOverlap)
                {
                bestMatch = el;
                bestOverlap = overlap;
                *retOverlap = bestOverlap;
                }
            }
        }
    }
if (bestMatch != NULL)
    {
    /* Allocate genePred and fill in values. */
    AllocVar(gp);
    gp->name = cloneString(bestMatch->name);
    gp->chrom = cloneString(bestMatch->chrom);
    gp->strand[1] = bestMatch->strand[1];
    gp->strand[0] = bestMatch->strand[0];
    gp->txStart = bestMatch->txStart;
    gp->txEnd = bestMatch->txEnd;
    gp->cdsStart = bestMatch->cdsStart;
    gp->cdsEnd = bestMatch->cdsEnd;
    gp->exonCount = bestMatch->exonCount;
    AllocArray(gp->exonStarts, bestMatch->exonCount);
    AllocArray(gp->exonEnds, bestMatch->exonCount);
    for (i=0; i<bestMatch->exonCount; ++i)
        {
        gp->exonStarts[i] = bestMatch->exonStarts[i] ;
        gp->exonEnds[i] = bestMatch->exonEnds[i] ;
        }
    gp->optFields = bestMatch->optFields;
    gp->id = bestMatch->id;

    if (bestMatch->optFields & genePredName2Fld)
        gp->name2 = cloneString(bestMatch->name2);
    else
        gp->name2 = NULL;
    if (bestMatch->optFields & genePredCdsStatFld)
        {
        gp->cdsStartStat = bestMatch->cdsStartStat;
        gp->cdsEndStat = bestMatch->cdsEndStat;
        }
    if (bestMatch->optFields & genePredExonFramesFld)
        {
        gp->exonFrames = AllocArray(eFrames, bestMatch->exonCount);
        for (i = 0; i < bestMatch->exonCount; i++)
            gp->exonFrames[i] = bestMatch->exonFrames[i];
        }
    eFrames = gp->exonFrames;
    }

return gp;
}