static void fakePslFromGenePred(char *db, char *fileTbl, char *pslOut, char *cdsOut) /* check a genePred */ { struct genePredReader *gpr; struct genePred *gp; FILE *pslFh = mustOpen(pslOut, "w"); FILE *cdsFh = mustOpen(cdsOut, "w"); struct hash *chromHash = getChromHash(db); if (fileExists(fileTbl)) { gpr = genePredReaderFile(fileTbl, NULL); } else { struct sqlConnection *conn = hAllocConn(db); gpr = genePredReaderQuery(conn, fileTbl, NULL); hFreeConn(&conn); } while ((gp = genePredReaderNext(gpr)) != NULL) { cnvGenePred(chromHash, gp, pslFh, cdsFh); } genePredReaderFree(&gpr); carefulClose(&pslFh); carefulClose(&cdsFh); }
int palOutPredsInBeds(struct sqlConnection *conn, struct cart *cart, struct bed *beds, char *table ) /* output the alignments whose names and coords match a bed*/ { struct genePred *list = NULL; for(; beds; beds = beds->next) { char where[10 * 1024]; sqlSafefFrag(where, sizeof where, "name = '%s' and chrom='%s' and txEnd > %d and txStart <= %d", beds->name, beds->chrom, beds->chromStart, beds->chromEnd); struct genePredReader *reader = genePredReaderQuery( conn, table, where); struct genePred *pred; while ((pred = genePredReaderNext(reader)) != NULL) slAddHead(&list, pred); genePredReaderFree(&reader); } int outCount = 0; if (list != NULL) { slReverse(&list); outCount = palOutPredList( conn, cart, list); genePredFreeList(&list); } return outCount; }
struct genePred *getPredsFromBeds(char *file, char *table, char *db) { struct sqlConnection *conn = hAllocConn(db); struct lineFile *lf = lineFileOpen(file, TRUE); char *words[5000]; int wordsRead; struct genePred *list = NULL; while( (wordsRead = lineFileChopNext(lf, words, sizeof(words)/sizeof(char *)) )) { if (wordsRead != 4) errAbort("file '%s' must be bed4. Line %d has %d fields", file, lf->lineIx, wordsRead); char where[10 * 1024]; sqlSafefFrag(where, sizeof where, "name = '%s' and chrom='%s' and txStart = %d and txEnd = %d", words[3], words[0], sqlUnsigned(words[1]), sqlUnsigned(words[2])); //printf("table %s where %s\n",table,where); struct genePredReader *reader = genePredReaderQuery( conn, table, where); struct genePred *pred; while ((pred = genePredReaderNext(reader)) != NULL) slAddHead(&list, pred); genePredReaderFree(&reader); } hFreeConn(&conn); if (list != NULL) slReverse(&list); return list; }
static void checkGenePred(char *fileTbl) /* check a genePred file or table */ { struct sqlConnection *conn = NULL; struct genePredReader *gpr; struct genePred *gp; int iRec = 0; if (fileExists(fileTbl)) { gpr = genePredReaderFile(fileTbl, NULL); } else if (gDb != NULL) { conn = hAllocConn(gDb); gpr = genePredReaderQuery(conn, fileTbl, NULL); } else { errAbort("file %s doesn't exist, must specify -db=db if this is a table", fileTbl); } while ((gp = genePredReaderNext(gpr)) != NULL) { checkAGenePred(fileTbl, ++iRec, gp); genePredFree(&gp); } genePredReaderFree(&gpr); hFreeConn(&conn); }
struct genePred *genePredReaderLoadQuery(struct sqlConnection* conn, char* table, char* where) /* Function that encapsulates doing a query and loading the results */ { struct genePredReader *gpr = genePredReaderQuery(conn, table, where); struct genePred *gpList = genePredReaderAll(gpr); genePredReaderFree(&gpr); return gpList; }
static void getGeneAnns(struct sqlConnection *conn, struct hash *refSeqVerInfoTbl, char *outFile) /* get request genePred annotations from database */ { struct genePredReader *gpr = genePredReaderQuery(conn, "refGene", NULL); FILE *fh = mustOpen(outFile, "w"); struct genePred *gp; while ((gp = genePredReaderNext(gpr)) != NULL) { processGenePred(fh, refSeqVerInfoTbl, gp); genePredFree(&gp); } carefulClose(&fh); genePredReaderFree(&gpr); }
/* load one or more genePreds from the database */ struct genePred *getPredsForName(char *name, char *geneTable, char *db) { struct sqlConnection *conn = hAllocConn(db); struct genePred *list = NULL; char splitTable[HDB_MAX_TABLE_STRING]; struct genePred *gene; boolean hasBin; struct genePredReader *reader; boolean found = hFindSplitTable(db, NULL, geneTable, splitTable, &hasBin); if (!found) errAbort("can't find table %s\n", geneTable); char extra[2048]; if (onlyChrom != NULL) safef(extra, sizeof extra, "name='%s' and chrom='%s'", name, onlyChrom); else safef(extra, sizeof extra, "name='%s'", name); reader = genePredReaderQuery( conn, splitTable, extra); while ((gene = genePredReaderNext(reader)) != NULL) { verbose(2, "got gene %s\n",gene->name); slAddHead(&list, gene); } if (list == NULL) errAbort("no genePred for gene %s in %s\n",name, geneTable); slReverse(&list); genePredReaderFree(&reader); hFreeConn(&conn); return list; }
/* query the list of gene names from the frames table */ static struct genePred *queryPreds(char *dbName, char *geneTable) { struct sqlConnection *conn = hAllocConn(dbName); struct genePred *list = NULL; char buf[2048]; char *extra = NULL; struct genePredReader *reader; if (onlyChrom != NULL) { safef(buf, sizeof buf, "chrom='%s'", onlyChrom); extra = buf; } reader = genePredReaderQuery( conn, geneTable, extra); list = genePredReaderAll(reader); hFreeConn(&conn); return list; }
static void printCcdsHgGeneUrl(struct sqlConnection *conn, char *ccdsId, char* kgId) /* output a URL to hgGene for a ccds */ { char where[128]; struct genePredReader *gpr; struct genePred *ccdsGene = NULL, *kgGene = NULL; /* get ccds genePred to get location */ sqlSafefFrag(where, sizeof(where), "chrom = '%s' and name = '%s'", seqName, ccdsId); gpr = genePredReaderQuery(conn, "ccdsGene", where); ccdsGene = genePredReaderAll(gpr); genePredReaderFree(&gpr); if (ccdsGene == NULL) errAbort("%s not found in ccdsGene table for chrom %s", ccdsId, seqName); else if (ccdsGene->next != NULL) errAbort("multiple %s rows found in ccdsGene table for chrom %s", ccdsId, seqName); /* get KG genePred, as need exact location for link */ sqlSafefFrag(where, sizeof(where), "name = '%s' and strand = '%s'", kgId, ccdsGene->strand); gpr = genePredReaderRangeQuery(conn, "knownGene", seqName, ccdsGene->txStart, ccdsGene->txEnd, where); kgGene = genePredReaderAll(gpr); genePredReaderFree(&gpr); if (kgGene == NULL) errAbort("%s not found in knownGene table for chrom %s", kgId, seqName); else if (kgGene->next != NULL) errAbort("multiple %s rows found in knownGene table for chrom %s", kgId, seqName); printf("../cgi-bin/hgGene?%s&%s=%s&%s=%s&%s=%s&%s=%d&%s=%d", cartSidUrlString(cart), "db", database, "hgg_gene", kgId, "hgg_chrom", seqName, "hgg_start", kgGene->txStart, "hgg_end", kgGene->txEnd); genePredFree(&ccdsGene); genePredFree(&kgGene); }
void kgGetCds(char *db, char *spDb, char *geneTable, FILE *outf) /* get CDS info */ { struct sqlConnection *conn = NULL; struct genePred *gp; int cdsCnt; struct genePredReader *gpr; int iExon, exonStart, exonEnd; if (db != NULL) conn = sqlConnect(db); gpr = genePredReaderQuery(conn, geneTable, NULL); while ((gp = genePredReaderNext(gpr)) != NULL) { cdsCnt = 0; for (iExon = 0; iExon < gp->exonCount; iExon++) { if (genePredCdsExon(gp, iExon, &exonStart, &exonEnd)) { sprintf(cdsBloc[cdsCnt], "%d-%d;", exonStart, exonEnd); cdsCnt++; } } if (cdsCnt > 0) { processAlign(db, spDb, gp->name, cdsCnt, outf); } else { fprintf(stderr, "%s does not have cds.\n", gp->name); } } sqlDisconnect(&conn); }
struct genePred *getOverlappingGeneDb(struct genePred **list, char *table, char *chrom, int cStart, int cEnd, char *name, int *retOverlap, char *db) { /* read all genes from a table find the gene with the biggest overlap. Cache the list of genes to so we only read it once */ struct genePred *el = NULL, *bestMatch = NULL, *gp = NULL; int overlap = 0 , bestOverlap = 0, i; int *eFrames; if (list == NULL) return NULL; if (*list == NULL) { struct genePred *gpList = NULL; struct sqlConnection *conn = sqlConnect(db); struct genePredReader *gpr = NULL; if (!hTableExistsDb(db,table)) table = altTable; if (!hTableExistsDb(db,table)) { verbose(2,"no table %s in %s\n",table, db); return NULL; } gpr = genePredReaderQuery(conn, table, NULL); verbose(1,"Loading Predictions from %s in %s\n",table, db); gpList = genePredReaderAll(gpr); if (gpList != NULL) { hashAdd(geneListHash, db, gpList); *list = gpList; } sqlDisconnect(&conn); } for (el = *list; el != NULL; el = el->next) { if (chrom != NULL && el->chrom != NULL) { overlap = 0; if ( sameString(chrom, el->chrom)) { for (i = 0 ; i<(el->exonCount); i++) { overlap += positiveRangeIntersection(cStart,cEnd, el->exonStarts[i], el->exonEnds[i]) ; } if (overlap > 20 && sameString(name, el->name)) { bestMatch = el; bestOverlap = overlap; *retOverlap = bestOverlap; } if (overlap > bestOverlap) { bestMatch = el; bestOverlap = overlap; *retOverlap = bestOverlap; } } } } if (bestMatch != NULL) { /* Allocate genePred and fill in values. */ AllocVar(gp); gp->name = cloneString(bestMatch->name); gp->chrom = cloneString(bestMatch->chrom); gp->strand[1] = bestMatch->strand[1]; gp->strand[0] = bestMatch->strand[0]; gp->txStart = bestMatch->txStart; gp->txEnd = bestMatch->txEnd; gp->cdsStart = bestMatch->cdsStart; gp->cdsEnd = bestMatch->cdsEnd; gp->exonCount = bestMatch->exonCount; AllocArray(gp->exonStarts, bestMatch->exonCount); AllocArray(gp->exonEnds, bestMatch->exonCount); for (i=0; i<bestMatch->exonCount; ++i) { gp->exonStarts[i] = bestMatch->exonStarts[i] ; gp->exonEnds[i] = bestMatch->exonEnds[i] ; } gp->optFields = bestMatch->optFields; gp->id = bestMatch->id; if (bestMatch->optFields & genePredName2Fld) gp->name2 = cloneString(bestMatch->name2); else gp->name2 = NULL; if (bestMatch->optFields & genePredCdsStatFld) { gp->cdsStartStat = bestMatch->cdsStartStat; gp->cdsEndStat = bestMatch->cdsEndStat; } if (bestMatch->optFields & genePredExonFramesFld) { gp->exonFrames = AllocArray(eFrames, bestMatch->exonCount); for (i = 0; i < bestMatch->exonCount; i++) gp->exonFrames[i] = bestMatch->exonFrames[i]; } eFrames = gp->exonFrames; } return gp; }