Ejemplo n.º 1
0
static void gtfToGenePred(char *gtfFile, char *gpFile, char *infoFile)
/* gtfToGenePred -  convert a GTF file to a genePred.. */
{
struct gffFile *gtf = gffRead(gtfFile);
FILE *gpFh, *infoFh = NULL;
struct gffGroup *group;

if (!gtf->isGtf)
    errAbort("%s doesn't appear to be a GTF file (GFF not supported by this program)", gtfFile);
gffGroupLines(gtf);
gpFh = mustOpen(gpFile, "w");
if (infoFile != NULL)
    {
    infoFh = mustOpen(infoFile, "w");
    fputs(infoHeader, infoFh);
    }

if (!doSimple)
    for (group = gtf->groupList; group != NULL; group = group->next)
	if (inclGroup(group))
	    gtfGroupToGenePred(gtf, group, gpFh, infoFh);

carefulClose(&gpFh);
gffFileFree(&gtf);
}
Ejemplo n.º 2
0
void gffToBed(char *inGff, char *outBed)
/* gffToBed - Convert a gff file (gff1 or gff2) to bed.  Not tested with gff3 */
{
struct gffFile *gff = gffRead(inGff);
FILE *f = mustOpen(outBed, "w");
char *exonFeature = bestExonFeature(gff);
gffGroupLines(gff);
separateGroupsByChromosome(gff);
struct gffGroup *group;
for (group = gff->groupList; group != NULL; group = group->next)
    {
    struct genePred *gp;
    if (gff->isGtf)
        gp = genePredFromGroupedGtf(gff, group, group->name, FALSE, FALSE);
    else
        gp = genePredFromGroupedGff(gff, group, group->name, exonFeature, FALSE, FALSE);
    if (gp != NULL)
	{
	assert(gp->txStart == gp->exonStarts[0]);
	struct bed *bed = bedFromGenePred(gp);
	bedTabOutN(bed, 12, f);
	bedFree(&bed);
	}
    }
carefulClose(&f);
}
Ejemplo n.º 3
0
void ldGencodeIntron(char *database, char *table,  
                        int gtfCount, char *gtfNames[])
/* Load Gencode intron status table from GTF files with
 * intron_id and intron_status keywords */
{
struct gffFile *gff, *gffList = NULL;
struct gffLine *gffLine;
struct gencodeIntron *intron, *intronList = NULL;
struct sqlConnection *conn;
FILE *f;
int i;
int introns = 0;

for (i=0; i<gtfCount; i++)
    {
    verbose(1, "Reading %s\n", gtfNames[i]);
    gff = gffRead(gtfNames[i]);
    for (gffLine = gff->lineList; gffLine != NULL; gffLine = gffLine->next)
        {
        if (sameWord(gffLine->feature, "intron"))
            {
            AllocVar(intron);
            intron->chrom = gffLine->seq;
            intron->chromStart = gffLine->start;
            intron->chromEnd = gffLine->end;
            intron->name = gffLine->intronId;
            intron->strand[0] = gffLine->strand;
            intron->strand[1] = 0;
            intron->status = gffLine->intronStatus;
            intron->transcript = gffLine->group;
            intron->geneId = gffLine->geneId;
            slAddHead(&intronList, intron);
            verbose(2, "%s %s\n", intron->chrom, intron->name);
            introns++;
            }
        }
    }
slSort(&intronList, bedCmp);
f = hgCreateTabFile(".", table);
for (intron = intronList; intron != NULL; intron = intron->next)
    gencodeIntronTabOut(intron, f);
carefulClose(&f);

verbose(1, "%d introns in %d files\n", introns, gtfCount);
hSetDb(database);
conn = sqlConnect(database);
gencodeIntronTableCreate(conn, table, hGetMinIndexLength());
hgLoadTabFile(conn, ".", table, &f);
sqlDisconnect(&conn);
}