static void gtfToGenePred(char *gtfFile, char *gpFile, char *infoFile) /* gtfToGenePred - convert a GTF file to a genePred.. */ { struct gffFile *gtf = gffRead(gtfFile); FILE *gpFh, *infoFh = NULL; struct gffGroup *group; if (!gtf->isGtf) errAbort("%s doesn't appear to be a GTF file (GFF not supported by this program)", gtfFile); gffGroupLines(gtf); gpFh = mustOpen(gpFile, "w"); if (infoFile != NULL) { infoFh = mustOpen(infoFile, "w"); fputs(infoHeader, infoFh); } if (!doSimple) for (group = gtf->groupList; group != NULL; group = group->next) if (inclGroup(group)) gtfGroupToGenePred(gtf, group, gpFh, infoFh); carefulClose(&gpFh); gffFileFree(>f); }
void gffToBed(char *inGff, char *outBed) /* gffToBed - Convert a gff file (gff1 or gff2) to bed. Not tested with gff3 */ { struct gffFile *gff = gffRead(inGff); FILE *f = mustOpen(outBed, "w"); char *exonFeature = bestExonFeature(gff); gffGroupLines(gff); separateGroupsByChromosome(gff); struct gffGroup *group; for (group = gff->groupList; group != NULL; group = group->next) { struct genePred *gp; if (gff->isGtf) gp = genePredFromGroupedGtf(gff, group, group->name, FALSE, FALSE); else gp = genePredFromGroupedGff(gff, group, group->name, exonFeature, FALSE, FALSE); if (gp != NULL) { assert(gp->txStart == gp->exonStarts[0]); struct bed *bed = bedFromGenePred(gp); bedTabOutN(bed, 12, f); bedFree(&bed); } } carefulClose(&f); }
void ldGencodeIntron(char *database, char *table, int gtfCount, char *gtfNames[]) /* Load Gencode intron status table from GTF files with * intron_id and intron_status keywords */ { struct gffFile *gff, *gffList = NULL; struct gffLine *gffLine; struct gencodeIntron *intron, *intronList = NULL; struct sqlConnection *conn; FILE *f; int i; int introns = 0; for (i=0; i<gtfCount; i++) { verbose(1, "Reading %s\n", gtfNames[i]); gff = gffRead(gtfNames[i]); for (gffLine = gff->lineList; gffLine != NULL; gffLine = gffLine->next) { if (sameWord(gffLine->feature, "intron")) { AllocVar(intron); intron->chrom = gffLine->seq; intron->chromStart = gffLine->start; intron->chromEnd = gffLine->end; intron->name = gffLine->intronId; intron->strand[0] = gffLine->strand; intron->strand[1] = 0; intron->status = gffLine->intronStatus; intron->transcript = gffLine->group; intron->geneId = gffLine->geneId; slAddHead(&intronList, intron); verbose(2, "%s %s\n", intron->chrom, intron->name); introns++; } } } slSort(&intronList, bedCmp); f = hgCreateTabFile(".", table); for (intron = intronList; intron != NULL; intron = intron->next) gencodeIntronTabOut(intron, f); carefulClose(&f); verbose(1, "%d introns in %d files\n", introns, gtfCount); hSetDb(database); conn = sqlConnect(database); gencodeIntronTableCreate(conn, table, hGetMinIndexLength()); hgLoadTabFile(conn, ".", table, &f); sqlDisconnect(&conn); }