Example #1
0
void liftAcross(char *liftAcross, char *srcFile, char *dstOut)
/* liftAcross - convert one coordinate system to another, no overlapping items. */
{
struct hash *lftHash = readLift(liftAcross);
struct genePred *gpList = genePredExtLoadAll(srcFile);
struct genePred *gp = NULL;
FILE *out = mustOpen(dstOut, "w");

if (bedOut)
    bedRegionOutput(lftHash);

int genePredItemCount = 0;
for (gp = gpList; gp != NULL; gp = gp->next)
    {
    struct liftSpec *lsFound = hashFindVal(lftHash, gp->chrom);
    if (lsFound)
	{
	struct genePred *gpLifted = liftGenePred(gp, lsFound);
	struct genePred *gpl;
	for (gpl = gpLifted; gpl != NULL; gpl = gpl->next)
	    genePredTabOut(gpl, out);
	genePredFreeList(&gpLifted);
	}
    else
	{
	genePredTabOut(gp, out);
	}
    ++genePredItemCount;
    }
/* lftHash and gpList are left allocated to disappear at exit */
verbose(2,"#\tgene pred item count: %d\n", genePredItemCount);
}
Example #2
0
void doGenePreds(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, 
	    char *netTable, char *geneFileName, char *geneTableName,
	    char *outBedName, char *selectedFileName, int *foundCount, int *notFoundCount)	
/* Map over genePreds. */
{
FILE *bedOut = NULL;
FILE *selectedOut = NULL;
FILE *cdsErrorFp = NULL;
struct genePred *gene = NULL, *geneList = NULL;
struct bed *bed = NULL;

//init output files
if(optionExists("cdsErrorFile"))
{
    cdsErrorFp = fopen( optionVal("cdsErrorFile", NULL), "w" );
    fprintf( cdsErrorFp, "#name\tchrom\ttxStart\ttxEnd\tcdsStart\tcdsEnd\tstrand\texonCount\n" );
    fclose(cdsErrorFp);
}

warn("Loading Gene Predictions.");
assert(outBedName);
if(geneFileName)
    geneList=genePredLoadAll(geneFileName);
else
    geneList=loadGeneFromTable(conn, geneTableName, chrom, 0, BIGNUM);
/* Convert genePreds. */
warn("Converting genes.");
bedOut = mustOpen(outBedName, "w");
if (selectedFileName != NULL)
    selectedOut = mustOpen(selectedFileName, "w");
for(gene = geneList; gene != NULL; gene = gene->next)
    {
    struct genePred *synGene = NULL;
    if(differentString(gene->chrom, chrom))
	continue;
    synGene = orthoBedFromGene(conn, db, orthoDb, netTable, gene);
    occassionalDot();
    if(synGene != NULL && synGene->exonCount > 0)
	{
	(*foundCount)++;
	genePredTabOut(synGene, bedOut);
        if (selectedOut != NULL)
            genePredTabOut(gene, selectedOut);
	}
    else
	(*notFoundCount)++;
    genePredFree(&synGene);
    }
carefulClose(&selectedOut);
 carefulClose(&bedOut);
}
void liftGenePredExt(char *destFile, struct hash *liftHash, int sourceCount, char *sources[])
/* Lift a genePred files. */
{
char *row[GENEPREDX_NUM_COLS];
struct lineFile* lf;
FILE* dest = mustOpen(destFile, "w");
int iSrc;
int colCount;

for (iSrc = 0; iSrc < sourceCount; iSrc++)
    {
    verbose(1, "Lifting %s\n", sources[iSrc]);
    lf = lineFileOpen(sources[iSrc], TRUE);
    while ((colCount = lineFileChopNextTab(lf, row, ArraySize(row))))
        {
        struct genePred* gp = genePredExtLoad(row, colCount);
        if (liftGenePredObj(liftHash, gp, lf))
            genePredTabOut(gp, dest);
        genePredFree(&gp);
        }
    lineFileClose(&lf);
    if (dots)
        verbose(1, "\n");
    }

carefulClose(&dest);
}
Example #4
0
void gffIntoDatabase(char *database, char *fileName, char *table, int offset)
/* Load a gff file into database. */
{
struct gffFile *gff = gffFileNew("");
struct gffGroup *group;
struct genePred *gpList = NULL, *gp;
FILE *f;
char *tabName = "genePred.tab";

/* Load fixed gff and convert it to genePred. */
gffFileAdd(gff, fileName, 0);
gffGroupLines(gff);
for (group = gff->groupList; group != NULL; group = group->next)
    {
    gp = genePredFromGroupedGff(gff, group, group->name, "exon", 
                                genePredCdsStatFld|genePredExonFramesFld,
                                genePredGxfDefaults);
    if (gp != NULL)
	{
	slAddHead(&gpList, gp);
	genePredOffset(gp, offset);
	}
    }
slSort(&gpList, genePredCmp);

/* Create tab-delimited file. */
f = mustOpen(tabName, "w");
for (gp = gpList; gp != NULL; gp = gp->next)
    genePredTabOut(gp, f);
carefulClose(&f);

/* Load into database. */
loadIntoDatabase(database, createGenePred, "sanger22", tabName);
}
Example #5
0
void convertPsl(struct psl *psl, struct genbankCds *cds, FILE *genePredFh)
/* convert a cds and psl and output */
{
    struct genePred *genePred = pslToGenePred(psl, cds);
    if (genePred != NULL)
    {
        genePredTabOut(genePred, genePredFh);
        genePredFree(&genePred);
    }
}
Example #6
0
/* convert one line read from a bed file to a genePred */
void cnvBedRec(char *line, FILE *gpFh)
{
char *row[12];
int numCols = chopByWhite(line, row, ArraySize(row));
if (numCols < 4)
    errAbort("bed must have at least 4 columns");
struct bed *bed = bedLoadN(row, numCols);
struct genePred* gp = bedToGenePred(bed);
genePredTabOut(gp, gpFh);
genePredFree(&gp);
bedFree(&bed);
}
Example #7
0
static void processGenePred(FILE *fh, struct hash *refSeqVerInfoTbl, struct genePred *gp)
/* check if a genePred has been select, if so, write including version in name */
{
struct refSeqVerInfo *rsvi = hashFindVal(refSeqVerInfoTbl, gp->name);
if (rsvi != NULL)
    {
    char buf[GENBANK_ACC_BUFSZ], *hold = gp->name;
    gp->name = addVer(gp->name, rsvi->ver, buf, sizeof(buf));
    genePredTabOut(gp, fh);
    gp->name = hold;
    }
}
Example #8
0
static void gbGeneTblWriteGeneFlat(struct gbGeneTbl *ggt, struct gbStatus* status,
                                   struct psl* psl, struct sqlConnection *conn)
/* write genePred flat row */
{
struct genePred* gp
    = genePredFromPsl3(psl, &status->cds, 0, genePredPslCdsMod3,
                       genePredStdInsertMergeSize, genePredStdInsertMergeSize);
FILE *fh = gbGeneTblGetFlatTabFh(ggt, conn);
fprintf(fh, "%s\t", ((status->geneName == NULL) ? "" : status->geneName));
genePredTabOut(gp, fh);
genePredFree(&gp);
}
Example #9
0
static void gtfGroupToGenePred(struct gffFile *gtf, struct gffGroup *group, FILE *gpFh,
                               FILE *infoFh)
/* convert one gtf group to a genePred */
{
unsigned optFields = (clGenePredExt ? genePredAllFlds : 0);
struct errCatch *errCatch = errCatchNew();

if (errCatchStart(errCatch))
    {
    struct genePred *gp = genePredFromGroupedGtf(gtf, group, group->name, optFields, clGxfOptions);
    if (gp == NULL)
        {
        if (!clIgnoreGroupsWithoutExons)
            {
            char *msg = "no exons defined for group %s, feature %s (perhaps try -ignoreGroupsWithoutExons)";
            if (clAllErrors)
                {
                fprintf(stderr, msg, group->name, group->lineList->feature);
                fputc('\n', stderr);
                badGroupCount++;
                }
            else
                errAbort(msg, group->name, group->lineList->feature);
            }
        }
    else
        {
        genePredTabOut(gp, gpFh);
        genePredFree(&gp);
        }
    }
errCatchEnd(errCatch);
if (errCatch->gotError)
    {
    // drop trailing newline in caught message
    if (endsWith(errCatch->message->string, "\n"))
        dyStringResize(errCatch->message, dyStringLen(errCatch->message)-1);
    if (clAllErrors)
        {
        fprintf(stderr, "%s\n", errCatch->message->string);
        badGroupCount++;
        }
    else
        errAbort("%s", errCatch->message->string);
    }
else
    {
    if (infoFh != NULL)
        writeInfo(infoFh, group);
    }
errCatchFree(&errCatch); 
}
Example #10
0
static void createCcdsGene(struct sqlConnection *conn, char *ccdsGeneFile,
                           struct genomeInfo *genome, struct hash* ignoreTbl,
                           struct hash *gotCcds)
/* create the ccdsGene tab file from the ccds database */
{
struct ccdsLocationsJoin *locs = loadLocations(conn, genome, ignoreTbl, gotCcds);
struct genePred *gp, *genes = buildCcdsGene(&locs);
FILE *genesFh;

genesFh = mustOpen(ccdsGeneFile, "w");
for (gp = genes; gp != NULL; gp = gp->next)
    {
    if (loadDb)
        fprintf(genesFh, "%d\t", binFromRange(gp->txStart, gp->txEnd));
    genePredTabOut(gp, genesFh);
    }
carefulClose(&genesFh);
genePredFreeList(&genes);
}
Example #11
0
static void gbGeneTblWriteGene(struct gbGeneTbl *ggt, struct gbStatus* status,
                               struct psl* psl, struct sqlConnection *conn)
/* write genePred row */
{
struct genePred* gp
    = genePredFromPsl3(psl, &status->cds, 
                       (ggt->hasExtCols ? genePredAllFlds : 0), genePredPslCdsMod3,
                       genePredStdInsertMergeSize, genePredStdInsertMergeSize);
FILE *fh = gbGeneTblGetTabFh(ggt, conn);
if (ggt->hasExtCols)
    {
    /* add gene name */
    freeMem(gp->name2);
    gp->name2 = cloneString(status->geneName);
    }
if (ggt->hasBin)
    fprintf(fh, "%u\t", hFindBin(gp->txStart, gp->txEnd));
genePredTabOut(gp, fh);
genePredFree(&gp);
}
Example #12
0
void copyGene(char *db, struct genePred *gene, FILE *tabFh)
/* copy one gene to the tab file */
{
unsigned holdOptFields = gene->optFields;
unsigned optFields = (genePredScoreFld|genePredName2Fld|genePredCdsStatFld|genePredExonFramesFld);

if (gGenePredExt && ((optFields & optFields) != optFields))
    errAbort("genePred %s doesn't have fields required for -genePredExt", gene->name);

if (gNoValidate || checkGene(db, gene))
    {
    if (!gGenePredExt)
        gene->optFields = 0;  /* omit optional fields */

    if (gBin)
        fprintf(tabFh, "%u\t", hFindBin(gene->txStart, gene->txEnd));
    genePredTabOut(gene, tabFh);

    gene->optFields = holdOptFields; /* restore optional fields */
    }
}
Example #13
0
void borfMatcher(char *bedIn, char *borfIn, char *bedOutFile, char *genePredOutFile)
/* Top level function to open files and call other functions. */
{
struct borf *borf = NULL, *borfList = NULL;
struct bed *bed = NULL, *bedList = NULL;
struct genePred *gp = NULL;
float threshold = optionFloat("minScore", 50);
FILE *bedOut = mustOpen(bedOutFile, "w");
FILE *genePredOut = mustOpen(genePredOutFile, "w");
boolean keepSmall = optionExists("keepSmall");
boolean keepNmd = optionExists("keepNmd");

borfList = borfLoadAll(borfIn);
bedList = bedLoadAll(bedIn);
dotForUserInit(slCount(bedList)/10);
for(bed = bedList, borf = borfList; bed != NULL && borf != NULL; bed = bed->next, borf = borf->next)
    {
    dotForUser();
    if(!stringIn(bed->name, borf->name))
	errAbort("Trying to match up %s bed with %s borf - bad idea!", bed->name, borf->name);
    /* Have to adjust cds end. Borf puts stop codon outside of cds, 
       we put it inside. */
    borf->cdsEnd = min(borf->cdsEnd+3, borf->size);
    if((borf->score > threshold || (keepSmall && borf->cdsSize > 0)) && sameString(borf->strand, "+"))
	{
	setThickStartStop(bed, borf);
	if(keepNmd || !nmdTarget(bed))
	    {
	    gp = bedToGenePred(bed);
	    bedTabOutN(bed, 12, bedOut);
	    genePredTabOut(gp, genePredOut);
	    genePredFree(&gp);
	    }
	}
    }
warn("Done.");
carefulClose(&bedOut);
carefulClose(&genePredOut);
}