int nextGapPos(char *chrom, int desiredPos, struct sqlConnection *conn) { /* Find next gap on the chrom and return midpoint */ struct sqlResult *sr; char **row; int pos = -1; int start, end; struct hTableInfo *hti = hFindTableInfo(db, chrom, "gap"); struct dyString *query = newDyString(1024); if (hti == NULL) errAbort("table %s.gap doesn't exist", db); dyStringPrintf(query, "select chromStart,chromEnd from "); if (hti->isSplit) dyStringPrintf(query, "%s_gap where ", chrom); else dyStringPrintf(query, "gap where %s='%s' AND ", hti->chromField, chrom); dyStringPrintf(query, "(chromStart >= %d and chromEnd-chromStart > %d)\ order by chromStart limit 1", desiredPos, minGap); sr = sqlGetResult(conn, query->string); freeDyString(&query); if ((row = sqlNextRow(sr)) != NULL) { start = sqlSigned(row[0]); end = sqlSigned(row[1]); pos = start + (end - start)/2; } sqlFreeResult(&sr); return pos; }
int nextRepeatPos(char *chrom, int desiredPos, struct sqlConnection *conn) /* Find next 0% diverged repeat on the chrom and return midpoint */ { struct sqlResult *sr; char **row; int pos = -1; int start, end; struct hTableInfo *hti = hFindTableInfo(db, chrom, "rmsk"); struct dyString *query = newDyString(1024); if (hti == NULL) errAbort("table %s.rmsk doesn't exist", db); dyStringPrintf(query, "select genoStart,genoEnd from "); if (hti->isSplit) dyStringPrintf(query, "%s_rmsk where ", chrom); else dyStringPrintf(query, "rmsk where %s='%s' AND ", hti->chromField, chrom); dyStringPrintf(query, "(genoStart >= %d AND \ milliDiv=0 AND \ repClass<>'Simple_repeat' AND repClass<>'Low_complexity' AND \ genoEnd-genoStart>%d) order by genoStart limit 1", desiredPos, minRepeat); sr = sqlGetResult(conn, query->string); freeDyString(&query); if ((row = sqlNextRow(sr)) != NULL) { start = sqlSigned(row[0]); end = sqlSigned(row[1]); pos = start + (end - start)/2; } sqlFreeResult(&sr); return pos; }
static char *otherOrgPositionFromDb(struct otherOrg *otherOrg, char *id) /* Get position of id from other organism database, if possible. */ { struct hTableInfo *hti = hFindTableInfo(otherOrg->db, NULL, otherOrg->geneTable); if (hti == NULL) return NULL; // table not found struct sqlConnection *conn = hAllocConn(otherOrg->db); char query[512]; safef(query, sizeof(query), "select concat(%s, ':', %s+1, '-', %s) from %s " "where %s = '%s'", hti->chromField, hti->startField, hti->endField, otherOrg->geneTable, hti->nameField, id); char *pos = sqlQuickString(conn, query); if (pos != NULL) { char posPlus[2048]; safef(posPlus, sizeof(posPlus), "%s&%s=%s&hgFind.matches=%s", pos, otherOrg->geneTable, hTrackOpenVis(sqlGetDatabase(conn), otherOrg->geneTable), id); hFreeConn(&conn); freez(&pos); return cloneString(posPlus); } else { hFreeConn(&conn); return NULL; } }
void printBiggestGap(char *database, struct sqlConnection *conn, struct slName *chromList, struct hash *chromHash, char *track) /* Look up track in database, figure out which type it is, call * appropriate biggest gap finder, and then print result. */ { struct trackDb *tdb = hTrackInfo(conn, track); struct hTableInfo *hti = hFindTableInfo(database, chromList->name, tdb->table); char *typeWord = cloneFirstWord(tdb->type); boolean isBig = FALSE, isBigBed = FALSE; struct bbiFile *bbi = NULL; if (sameString(typeWord, "bigBed")) { isBig = TRUE; isBigBed = TRUE; bbi = bigBedFileOpen( bbiNameFromSettingOrTable(tdb, conn, tdb->table) ); } else if (sameString(typeWord, "bigWig")) { isBig = TRUE; bbi = bigWigFileOpen( bbiNameFromSettingOrTable(tdb, conn, tdb->table) ); } char *biggestChrom = NULL; int biggestSize = 0, biggestStart = 0, biggestEnd = 0; struct slName *chrom; for (chrom = chromList; chrom != NULL; chrom = chrom->next) { if (!allParts && strchr(chrom->name, '_')) // Generally skip weird chroms continue; if (female && sameString(chrom->name, "chrY")) continue; int chromSize = hashIntVal(chromHash, chrom->name); struct rbTree *rt = rangeTreeNew(); int start = 0, end = 0, size = 0; if (isBig) bigCoverageIntoTree(tdb, bbi, chrom->name, chromSize, rt, isBigBed); else tableCoverageIntoTree(hti, tdb, conn, chrom->name, chromSize, rt); if (rt->n > 0) // Want to keep completely uncovered chromosome uncovered addGaps(conn, chrom->name, rt); biggestGapFromRangeTree(rt, chromSize, &start, &end, &size); if (size > biggestSize) { biggestSize = size; biggestStart = start; biggestEnd = end; biggestChrom = chrom->name; } rangeTreeFree(&rt); } printf("%s\t%s:%d-%d\t", track, biggestChrom, biggestStart+1, biggestEnd); if (noComma) printf("%d", biggestSize); else printLongWithCommas(stdout, biggestSize); putchar('\n'); freez(&typeWord); bbiFileClose(&bbi); }
void hgDeleteChrom(char *db, int chromCount, char *chromNames[]) /* hgDeleteChrom - output SQL commands to delete chrom(s) from db. */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr = NULL; char **row = NULL; int i; for (i=0; i < chromCount; i++) { char *chrom = hgOfficialChromName(db, chromNames[i]); if (chrom == NULL) errAbort("Error: \"%s\" is not a chromosome in %s.", chromNames[i], db); printf("delete from %s.chromInfo where chrom = \"%s\";\n", db, chrom); sr = sqlGetResult(conn, "NOSQLINJ SHOW TABLES"); while((row = sqlNextRow(sr)) != NULL) { char *table = row[0]; char tChrom[32]; char rootName[128]; struct hTableInfo *hti = NULL; if (sscanf(table, "chr%32[^_]_random_%128s", tChrom, rootName) == 2 || sscanf(table, "chr%32[^_]_%128s", tChrom, rootName) == 2) hti = hFindTableInfo(db, chrom, rootName); else hti = hFindTableInfo(db, chrom, table);; if (hti->isPos) { if (hti->isSplit) { char tableCmp[256]; safef(tableCmp, sizeof(tableCmp), "%s_%s", chrom, hti->rootName); if (sameWord(table, tableCmp)) printf("drop table %s.%s;\n", db, table); } else printf("delete from %s.%s where %s = \"%s\";\n", db, table, hti->chromField, chrom); } } sqlFreeResult(&sr); } hFreeConn(&conn); }
static void positionalTblCheck(char *db, char *table) /* positionalTblCheck - check that positional tables are sorted. */ { struct hTableInfo *tblInfo = hFindTableInfo(db, NULL, table); if (tblInfo == NULL) errAbort("cant find table %s.%s or %s.*_%s", db, table, db, table); if (!tblInfo->isPos) errAbort("%s.%s does not appear to be a positional table", db, table); struct slName *tbl, *tbls = hSplitTableNames(db, table); struct sqlConnection *conn = hAllocConn(db); for (tbl = tbls; tbl != NULL; tbl = tbl->next) checkTblOrder(conn, tbl->name, tblInfo->chromField, tblInfo->startField); hFreeConn(&conn); }
void hgSelect(char *db, char *table, char *outFile) /* select from genome tables, handling split tables and bin column */ { struct hTableInfo *tblInfo; /* get table info upfront so don't have to wait long find for error */ tblInfo = hFindTableInfo(db, NULL, table); if (tblInfo == NULL) errAbort("Error: no table: %s or *_%s", table, table); struct sqlConnection *conn = hAllocConn(db); FILE* outFh = mustOpen(outFile, "w"); if (tblInfo->isSplit) selectFromSplitTable(db, table, tblInfo, conn, outFh); else selectFromTable(table, tblInfo, conn, outFh); carefulClose(&outFh); }
void hgSeqOptions(struct cart *cart, char *db, char *table) /* Print out HTML FORM entries for gene region and sequence display options. */ { struct hTableInfo *hti; char chrom[32]; char rootName[256]; if ((table == NULL) || (table[0] == 0)) { hti = NULL; } else { hParseTableName(db, table, rootName, chrom); hti = hFindTableInfo(db, chrom, rootName); if (hti == NULL) webAbort("Error", "Could not find table info for table %s (%s)", rootName, table); } hgSeqOptionsHtiCart(hti, cart); }
char *makeQuery(char *table, boolean sortByChromStart) /* Make a query to get chrom,chromStart,chromEnd (or whatever the appropriate * field names are for table), ordered by chrom and possibly by start. */ { char query[512]; char *db = hGetDb(); struct hTableInfo *hti = hFindTableInfo(NULL, table); if (hti == NULL) errAbort("Can't find table info for %s.%s", db, table); if (! hti->isPos) errAbort("Table must be positional, but looks like %s.%s isn't", db, table); if (sortByChromStart) safef(query, sizeof(query), "select %s,%s,%s from %s order by %s,%s", hti->chromField, hti->startField, hti->endField, table, hti->chromField, hti->startField); else safef(query, sizeof(query), "select %s,%s,%s from %s order by %s", hti->chromField, hti->startField, hti->endField, table, hti->chromField); return(cloneString(query)); }
int hgSeqItemsInRange(char *db, char *table, char *chrom, int chromStart, int chromEnd, char *sqlConstraints) /* Print out dna sequence of all items (that match sqlConstraints, if nonNULL) in the given range in table. Return number of items. */ { struct hTableInfo *hti; struct bed *bedList; char rootName[256]; char parsedChrom[32]; int itemCount; hParseTableName(db, table, rootName, parsedChrom); hti = hFindTableInfo(db, chrom, rootName); if (hti == NULL) webAbort("Error", "Could not find table info for table %s (%s)", rootName, table); bedList = hGetBedRange(db, table, chrom, chromStart, chromEnd, sqlConstraints); itemCount = hgSeqBed(db, hti, bedList); bedFreeList(&bedList); return itemCount; }
struct bed *getRegionAsBed( char *db, char *table, /* Database and table. */ struct region *region, /* Region to get data for. */ char *filter, /* Filter to add to SQL where clause if any. */ struct hash *idHash, /* Restrict to id's in this hash if non-NULL. */ struct lm *lm, /* Where to allocate memory. */ int *retFieldCount) /* Number of fields. */ /* Return a bed list of all items in the given range in table. * Cleanup result via lmCleanup(&lm) rather than bedFreeList. */ { char *fields = NULL; struct sqlResult *sr; struct hTableInfo *hti; struct bed *bedList=NULL, *bed; char **row; int fieldCount; boolean isPsl, isGenePred, isBedWithBlocks; boolean pslKnowIfProtein = FALSE, pslIsProtein = FALSE; struct sqlConnection *conn = NULL; char *dbTable = NULL; if (isCustomTrack(table)) { struct customTrack *ct = ctLookupName(table); dbTable = ct->dbTableName; conn = hAllocConn(CUSTOM_TRASH); hti = hFindTableInfo(CUSTOM_TRASH, region->chrom, dbTable); } else { dbTable = table; struct trackDb *tdb; if(sameWord(db, database)) tdb = tdbForTrack(db, table, &fullTrackList); else tdb = hTrackDbForTrack(db, table); conn = (tdb ? hAllocConnTrack(db, tdb) : hAllocConn(db)); hti = hFindTableInfo(db, region->chrom, table); } if (hti == NULL) errAbort("Could not find table info for table %s.%s", db,table); if (isWiggle(db, table)) { bedList = getWiggleAsBed(db, table, region, filter, idHash, lm, conn); fieldCount = 4; } else { bedSqlFieldsExceptForChrom(hti, &fieldCount, &fields); isPsl = htiIsPsl(hti); isGenePred = sameString("exonEnds", hti->endsSizesField); isBedWithBlocks = ( (sameString("chromStarts", hti->startsField) || sameString("blockStarts", hti->startsField)) && sameString("blockSizes", hti->endsSizesField)); /* All beds have at least chrom,start,end. We omit the chrom * from the query since we already know it. */ sr = regionQuery(conn, dbTable, fields, region, TRUE, filter); while (sr != NULL && (row = sqlNextRow(sr)) != NULL) { /* If have a name field apply hash filter. */ if (fieldCount >= 4 && idHash != NULL) if (!hashLookup(idHash, row[2])) continue; bed = bedFromRow(region->chrom, row, fieldCount, isPsl, isGenePred, isBedWithBlocks, &pslKnowIfProtein, &pslIsProtein, lm); slAddHead(&bedList, bed); } freez(&fields); sqlFreeResult(&sr); slReverse(&bedList); } hFreeConn(&conn); if (retFieldCount) *retFieldCount = fieldCount; return(bedList); }
static void chkPslTable(struct gbSelect* select, struct sqlConnection* conn, char* rootTable, char* chrom, struct metaDataTbls* metaDataTbls, unsigned typeFlags) /* Validate a PSL of a mrna/est to genome alignment against the metadata. If * not a chromosome-specific table, chrom should be null. Chromosome-specific * tables are not required to exist (for testing purposes). Also count the * number of alignments of a mrna. */ { struct hTableInfo* tableInfo; char table[64]; unsigned iRow = 0; unsigned rowOffset; char accWhere[64]; char query[512]; struct sqlResult *sr; char **row; /* need to specify an explicit chrom table, as there is an mrna table which is * not psl, so using mrna as a root name with a chrom that doesn't exist * returns the mrna instead of null */ if (chrom != NULL) safef(table, sizeof(table), "%s_%s", chrom, rootTable); else safef(table, sizeof(table), "%s", rootTable); gbVerbEnter(3, "chkPslTable %s", table); tableInfo = hFindTableInfo(select->release->genome->database, chrom, table); if (tableInfo == NULL) { /* If all table, require it */ if (chrom == NULL) { if (testMode) fprintf(stderr, "Warning: no psl table %s.%s\n", select->release->genome->database, table); else gbError("no psl table %s.%s", select->release->genome->database, table); } } else { rowOffset = (tableInfo->hasBin) ? 1 : 0; // FIXME: might be better as sqlDyString accWhere[0] = '\0'; if (select->accPrefix != NULL) sqlSafefFrag(accWhere, sizeof(accWhere), " WHERE qName LIKE '%s%%'", select->accPrefix); sqlSafef(query, sizeof(query), "SELECT * FROM %s%-s", table, accWhere); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { struct psl* psl = pslLoad(row+rowOffset); chkPsl(psl, iRow, select->release->genome->database, table, metaDataTbls, typeFlags); pslFree(&psl); iRow++; } sqlFreeResult(&sr); } gbVerbLeave(3, "chkPslTable %s", table); }
int checkTableCoords(char *db) /* Check several invariants (see comments in check*() above), * summarize errors, return nonzero if there are errors. */ { struct sqlConnection *conn = hAllocConn(db); struct slName *tableList = NULL, *curTable = NULL; struct slName *allChroms = NULL; boolean gotError = FALSE; allChroms = hAllChromNames(db); if (theTable == NULL) tableList = getTableNames(conn); else if (sqlTableExists(conn, theTable)) tableList = newSlName(theTable); else errAbort("Error: specified table \"%s\" does not exist in database %s.", theTable, db); for (curTable = tableList; curTable != NULL; curTable = curTable->next) { struct hTableInfo *hti = NULL; struct slName *chromList = NULL, *chromPtr = NULL; char *table = curTable->name; char tableChrom[32], trackName[128], tableChromPrefix[33]; hParseTableName(db, table, trackName, tableChrom); hti = hFindTableInfo(db, tableChrom, trackName); if (hti != NULL && hti->isPos) { /* watch out for presence of both split and non-split tables; * hti for non-split will be replaced with hti of split. */ if (splitAndNonSplitExist(conn, table, tableChrom)) continue; safef(tableChromPrefix, sizeof(tableChromPrefix), "%s_", tableChrom); if (hti->isSplit) chromList = newSlName(tableChrom); else chromList = allChroms; /* invariant: chrom must be described in chromInfo. */ /* items with bad chrom will be invisible to hGetBedRange(), so * catch them here by SQL query. */ /* The SQL query is too huge for scaffold-based db's, check count: */ if (hChromCount(db) <= MAX_SEQS_SUPPORTED) { if (isNotEmpty(hti->chromField)) { struct dyString *bigQuery = newDyString(1024); dyStringClear(bigQuery); sqlDyStringPrintf(bigQuery, "select count(*) from %s where ", table); for (chromPtr=chromList; chromPtr != NULL; chromPtr=chromPtr->next) { sqlDyStringPrintf(bigQuery, "%s != '%s' ", hti->chromField, chromPtr->name); if (chromPtr->next != NULL) dyStringAppend(bigQuery, "AND "); } gotError |= reportErrors(BAD_CHROM, table, sqlQuickNum(conn, bigQuery->string)); dyStringFree(&bigQuery); } for (chromPtr=chromList; chromPtr != NULL; chromPtr=chromPtr->next) { char *chrom = chromPtr->name; struct bed *bedList = hGetBedRange(db, table, chrom, 0, 0, NULL); if (hti->isSplit && isNotEmpty(hti->chromField)) gotError |= checkSplitTableOnlyChrom(bedList, table, hti, tableChrom); gotError |= checkStartEnd(bedList, table, hti, testChromSize(chrom)); if (hti->hasCDS) gotError |= checkCDSStartEnd(bedList, table, hti); if (hti->hasBlocks && !ignoreBlocks) gotError |= checkBlocks(bedList, table, hti); bedFreeList(&bedList); } } } } return gotError; }