void mafOrtholog(char *database, char *track, char *genePredFile, char *outFile) /* mafOrtholog - find orthlogs in other species based on maf alignment and reference genePred */ { struct slName *orgList = NULL; FILE *f = mustOpen(outFile, "w"); struct genePredReader *gpr = genePredReaderFile(genePredFile, NULL); struct genePred *gpList = genePredReaderAll(gpr), *gp = NULL; struct sqlConnection *conn = hAllocConn(); if (optionExists("nibDir")) nibDir = optionVal("nibDir", NULL); if (optionExists("orgs")) { char *orgFile = optionVal("orgs", NULL); char *buf; readInGulp(orgFile, &buf, NULL); orgList = stringToSlNames(buf); } for (gp = gpList ; gp != NULL ; gp=gp->next) { struct mafAli *maf = NULL; if (thickOnly) maf = mafLoadInRegion(conn, track, gp->chrom, gp->cdsStart, gp->cdsEnd); else maf = mafLoadInRegion(conn, track, gp->chrom, gp->txStart, gp->txEnd); if (meFirst) moveMeToFirst(maf, database); printOrthologs(f, maf, gp); mafAliFree(&maf); } carefulClose(&f); }
struct genePred *genePredReaderLoadFile(char* gpFile, char* chrom) /* Function that encapsulates reading a genePred file */ { struct genePredReader *gpr = genePredReaderFile(gpFile, chrom); struct genePred *gpList = genePredReaderAll(gpr); genePredReaderFree(&gpr); return gpList; }
struct genePred *genePredReaderLoadQuery(struct sqlConnection* conn, char* table, char* where) /* Function that encapsulates doing a query and loading the results */ { struct genePredReader *gpr = genePredReaderQuery(conn, table, where); struct genePred *gpList = genePredReaderAll(gpr); genePredReaderFree(&gpr); return gpList; }
struct genePred *genePredReaderLoadRangeQuery(struct sqlConnection* conn, char* table, char* chrom, int start, int end, char* extraWhere) /* Function that encapsulates doing a range query and loading the results */ { struct genePredReader *gpr = genePredReaderRangeQuery(conn, table, chrom, start, end, extraWhere); struct genePred *gpList = genePredReaderAll(gpr); genePredReaderFree(&gpr); return gpList; }
static void printCcdsHgGeneUrl(struct sqlConnection *conn, char *ccdsId, char* kgId) /* output a URL to hgGene for a ccds */ { char where[128]; struct genePredReader *gpr; struct genePred *ccdsGene = NULL, *kgGene = NULL; /* get ccds genePred to get location */ sqlSafefFrag(where, sizeof(where), "chrom = '%s' and name = '%s'", seqName, ccdsId); gpr = genePredReaderQuery(conn, "ccdsGene", where); ccdsGene = genePredReaderAll(gpr); genePredReaderFree(&gpr); if (ccdsGene == NULL) errAbort("%s not found in ccdsGene table for chrom %s", ccdsId, seqName); else if (ccdsGene->next != NULL) errAbort("multiple %s rows found in ccdsGene table for chrom %s", ccdsId, seqName); /* get KG genePred, as need exact location for link */ sqlSafefFrag(where, sizeof(where), "name = '%s' and strand = '%s'", kgId, ccdsGene->strand); gpr = genePredReaderRangeQuery(conn, "knownGene", seqName, ccdsGene->txStart, ccdsGene->txEnd, where); kgGene = genePredReaderAll(gpr); genePredReaderFree(&gpr); if (kgGene == NULL) errAbort("%s not found in knownGene table for chrom %s", kgId, seqName); else if (kgGene->next != NULL) errAbort("multiple %s rows found in knownGene table for chrom %s", kgId, seqName); printf("../cgi-bin/hgGene?%s&%s=%s&%s=%s&%s=%s&%s=%d&%s=%d", cartSidUrlString(cart), "db", database, "hgg_gene", kgId, "hgg_chrom", seqName, "hgg_start", kgGene->txStart, "hgg_end", kgGene->txEnd); genePredFree(&ccdsGene); genePredFree(&kgGene); }
/* query the list of gene names from the frames table */ static struct genePred *queryPreds(char *dbName, char *geneTable) { struct sqlConnection *conn = hAllocConn(dbName); struct genePred *list = NULL; char buf[2048]; char *extra = NULL; struct genePredReader *reader; if (onlyChrom != NULL) { safef(buf, sizeof buf, "chrom='%s'", onlyChrom); extra = buf; } reader = genePredReaderQuery( conn, geneTable, extra); list = genePredReaderAll(reader); hFreeConn(&conn); return list; }
struct genePred *getOverlappingGeneDb(struct genePred **list, char *table, char *chrom, int cStart, int cEnd, char *name, int *retOverlap, char *db) { /* read all genes from a table find the gene with the biggest overlap. Cache the list of genes to so we only read it once */ struct genePred *el = NULL, *bestMatch = NULL, *gp = NULL; int overlap = 0 , bestOverlap = 0, i; int *eFrames; if (list == NULL) return NULL; if (*list == NULL) { struct genePred *gpList = NULL; struct sqlConnection *conn = sqlConnect(db); struct genePredReader *gpr = NULL; if (!hTableExistsDb(db,table)) table = altTable; if (!hTableExistsDb(db,table)) { verbose(2,"no table %s in %s\n",table, db); return NULL; } gpr = genePredReaderQuery(conn, table, NULL); verbose(1,"Loading Predictions from %s in %s\n",table, db); gpList = genePredReaderAll(gpr); if (gpList != NULL) { hashAdd(geneListHash, db, gpList); *list = gpList; } sqlDisconnect(&conn); } for (el = *list; el != NULL; el = el->next) { if (chrom != NULL && el->chrom != NULL) { overlap = 0; if ( sameString(chrom, el->chrom)) { for (i = 0 ; i<(el->exonCount); i++) { overlap += positiveRangeIntersection(cStart,cEnd, el->exonStarts[i], el->exonEnds[i]) ; } if (overlap > 20 && sameString(name, el->name)) { bestMatch = el; bestOverlap = overlap; *retOverlap = bestOverlap; } if (overlap > bestOverlap) { bestMatch = el; bestOverlap = overlap; *retOverlap = bestOverlap; } } } } if (bestMatch != NULL) { /* Allocate genePred and fill in values. */ AllocVar(gp); gp->name = cloneString(bestMatch->name); gp->chrom = cloneString(bestMatch->chrom); gp->strand[1] = bestMatch->strand[1]; gp->strand[0] = bestMatch->strand[0]; gp->txStart = bestMatch->txStart; gp->txEnd = bestMatch->txEnd; gp->cdsStart = bestMatch->cdsStart; gp->cdsEnd = bestMatch->cdsEnd; gp->exonCount = bestMatch->exonCount; AllocArray(gp->exonStarts, bestMatch->exonCount); AllocArray(gp->exonEnds, bestMatch->exonCount); for (i=0; i<bestMatch->exonCount; ++i) { gp->exonStarts[i] = bestMatch->exonStarts[i] ; gp->exonEnds[i] = bestMatch->exonEnds[i] ; } gp->optFields = bestMatch->optFields; gp->id = bestMatch->id; if (bestMatch->optFields & genePredName2Fld) gp->name2 = cloneString(bestMatch->name2); else gp->name2 = NULL; if (bestMatch->optFields & genePredCdsStatFld) { gp->cdsStartStat = bestMatch->cdsStartStat; gp->cdsEndStat = bestMatch->cdsEndStat; } if (bestMatch->optFields & genePredExonFramesFld) { gp->exonFrames = AllocArray(eFrames, bestMatch->exonCount); for (i = 0; i < bestMatch->exonCount; i++) gp->exonFrames[i] = bestMatch->exonFrames[i]; } eFrames = gp->exonFrames; } return gp; }