int nextGapPos(char *chrom, int desiredPos, struct sqlConnection *conn)
{
/* Find next gap on the chrom and return midpoint */
struct sqlResult *sr;
char **row;
int pos = -1;
int start, end;
struct hTableInfo *hti = hFindTableInfo(db, chrom, "gap");
struct dyString *query = newDyString(1024);

if (hti == NULL)
    errAbort("table %s.gap doesn't exist", db);
dyStringPrintf(query, "select chromStart,chromEnd from ");
if (hti->isSplit)
    dyStringPrintf(query, "%s_gap where ", chrom);
else
    dyStringPrintf(query, "gap where %s='%s' AND ", hti->chromField, chrom);

dyStringPrintf(query, "(chromStart >= %d and chromEnd-chromStart > %d)\
    order by chromStart limit 1",
        desiredPos, minGap);
sr = sqlGetResult(conn, query->string);
freeDyString(&query);

if ((row = sqlNextRow(sr)) != NULL)
    {
    start = sqlSigned(row[0]);
    end = sqlSigned(row[1]);
    pos = start + (end - start)/2;
    }
sqlFreeResult(&sr);
return pos;
}
int nextRepeatPos(char *chrom, int desiredPos, struct sqlConnection *conn)
/* Find next 0% diverged repeat on the chrom and return midpoint */
{
struct sqlResult *sr;
char **row;
int pos = -1;
int start, end;
struct hTableInfo *hti = hFindTableInfo(db, chrom, "rmsk");
struct dyString *query = newDyString(1024);

if (hti == NULL)
    errAbort("table %s.rmsk doesn't exist", db);
dyStringPrintf(query, "select genoStart,genoEnd from ");
if (hti->isSplit)
    dyStringPrintf(query, "%s_rmsk where ", chrom);
else
    dyStringPrintf(query, "rmsk where %s='%s' AND ", hti->chromField, chrom);
dyStringPrintf(query,
    "(genoStart >= %d AND \
    milliDiv=0 AND \
    repClass<>'Simple_repeat' AND repClass<>'Low_complexity' AND \
    genoEnd-genoStart>%d) order by genoStart limit 1",
        desiredPos, minRepeat);
sr = sqlGetResult(conn, query->string);
freeDyString(&query);

if ((row = sqlNextRow(sr)) != NULL)
    {
    start = sqlSigned(row[0]);
    end = sqlSigned(row[1]);
    pos = start + (end - start)/2;
    }
sqlFreeResult(&sr);
return pos;
}
static char *otherOrgPositionFromDb(struct otherOrg *otherOrg, char *id)
/* Get position of id from other organism database, if possible. */
{
struct hTableInfo *hti = hFindTableInfo(otherOrg->db, NULL,
                                        otherOrg->geneTable);
if (hti == NULL)
    return NULL;  // table  not found

struct sqlConnection *conn = hAllocConn(otherOrg->db);
char query[512];
safef(query, sizeof(query),
      "select concat(%s, ':', %s+1, '-', %s) from %s "
      "where %s = '%s'",
      hti->chromField, hti->startField, hti->endField,
      otherOrg->geneTable, hti->nameField, id);
char *pos = sqlQuickString(conn, query);
if (pos != NULL)
    {
    char posPlus[2048];
    safef(posPlus, sizeof(posPlus), "%s&%s=%s&hgFind.matches=%s",
          pos,
          otherOrg->geneTable, hTrackOpenVis(sqlGetDatabase(conn), otherOrg->geneTable),
          id);
    hFreeConn(&conn);
    freez(&pos);
    return cloneString(posPlus);
    }
else
    {
    hFreeConn(&conn);
    return NULL;
    }
}
示例#4
0
void printBiggestGap(char *database, struct sqlConnection *conn, 
	struct slName *chromList, struct hash *chromHash, char *track)
/* Look up track in database, figure out which type it is, call
 * appropriate biggest gap finder, and then print result. */
{
struct trackDb *tdb = hTrackInfo(conn, track);
struct hTableInfo *hti = hFindTableInfo(database, chromList->name, tdb->table);
char *typeWord = cloneFirstWord(tdb->type);
boolean isBig = FALSE, isBigBed = FALSE;
struct bbiFile *bbi = NULL;
if (sameString(typeWord, "bigBed"))
    {
    isBig = TRUE;
    isBigBed = TRUE;
    bbi = bigBedFileOpen( bbiNameFromSettingOrTable(tdb, conn, tdb->table) );
    }
else if (sameString(typeWord, "bigWig"))
    {
    isBig = TRUE;
    bbi = bigWigFileOpen( bbiNameFromSettingOrTable(tdb, conn, tdb->table) );
    }
char *biggestChrom = NULL;
int biggestSize = 0, biggestStart = 0, biggestEnd = 0;

struct slName *chrom;
for (chrom = chromList; chrom != NULL; chrom = chrom->next)
    {
    if (!allParts && strchr(chrom->name, '_'))	// Generally skip weird chroms
        continue;
    if (female && sameString(chrom->name, "chrY"))
        continue;
    int chromSize = hashIntVal(chromHash, chrom->name);
    struct rbTree *rt = rangeTreeNew();
    int start = 0, end = 0, size = 0;
    if (isBig)
	bigCoverageIntoTree(tdb, bbi, chrom->name, chromSize, rt, isBigBed);
    else
        tableCoverageIntoTree(hti, tdb, conn, chrom->name, chromSize, rt);
    if (rt->n > 0)	// Want to keep completely uncovered chromosome uncovered
	addGaps(conn, chrom->name, rt);
    biggestGapFromRangeTree(rt, chromSize, &start, &end, &size);
    if (size > biggestSize)
        {
	biggestSize = size;
	biggestStart = start;
	biggestEnd = end;
	biggestChrom = chrom->name;
	}
    rangeTreeFree(&rt);
    }
printf("%s\t%s:%d-%d\t", track, biggestChrom, biggestStart+1, biggestEnd);
if (noComma)
    printf("%d", biggestSize);
else
    printLongWithCommas(stdout, biggestSize);
putchar('\n');
freez(&typeWord);
bbiFileClose(&bbi);
}
示例#5
0
void hgDeleteChrom(char *db, int chromCount, char *chromNames[])
/* hgDeleteChrom - output SQL commands to delete chrom(s) from db. */
{
struct sqlConnection *conn = hAllocConn(db);
struct sqlResult *sr = NULL;
char **row = NULL;
int i;
for (i=0;  i < chromCount;  i++)
    {
    char *chrom = hgOfficialChromName(db, chromNames[i]);
    if (chrom == NULL)
	errAbort("Error: \"%s\" is not a chromosome in %s.",
		 chromNames[i], db);
    printf("delete from %s.chromInfo where chrom = \"%s\";\n", db, chrom);
    sr = sqlGetResult(conn, "NOSQLINJ SHOW TABLES");
    while((row = sqlNextRow(sr)) != NULL)
	{
	char *table = row[0];
	char tChrom[32];
	char rootName[128];
	struct hTableInfo *hti = NULL;
	if (sscanf(table, "chr%32[^_]_random_%128s", tChrom, rootName) == 2 ||
	    sscanf(table, "chr%32[^_]_%128s", tChrom, rootName) == 2)
	    hti = hFindTableInfo(db, chrom, rootName);
	else
	    hti = hFindTableInfo(db, chrom, table);;
	if (hti->isPos)
	    {
	    if (hti->isSplit)
		{
		char tableCmp[256];
		safef(tableCmp, sizeof(tableCmp), "%s_%s", chrom, hti->rootName);
		if (sameWord(table, tableCmp))
		    printf("drop table %s.%s;\n", db, table);
		}
	    else
		printf("delete from %s.%s where %s = \"%s\";\n",
		       db, table, hti->chromField, chrom);
	    }
	}
    sqlFreeResult(&sr);
    }
hFreeConn(&conn);
}
示例#6
0
static void positionalTblCheck(char *db, char *table)
/* positionalTblCheck - check that positional tables are sorted. */
{
struct hTableInfo *tblInfo = hFindTableInfo(db, NULL, table);
if (tblInfo == NULL)
    errAbort("cant find table %s.%s or %s.*_%s", db, table, db, table);
if (!tblInfo->isPos)
    errAbort("%s.%s does not appear to be a positional table", db, table);
struct slName *tbl, *tbls = hSplitTableNames(db, table);
struct sqlConnection *conn = hAllocConn(db);
for (tbl = tbls; tbl != NULL; tbl = tbl->next)
    checkTblOrder(conn, tbl->name, tblInfo->chromField, tblInfo->startField);
hFreeConn(&conn);
}
void hgSelect(char *db, char *table, char *outFile)
/* select from genome tables, handling split tables and bin column */
{
struct hTableInfo *tblInfo;

/* get table info upfront so don't have to wait long find for error */
tblInfo = hFindTableInfo(db, NULL, table);
if (tblInfo == NULL)
    errAbort("Error: no table: %s or *_%s", table, table);

struct sqlConnection *conn = hAllocConn(db);
FILE* outFh = mustOpen(outFile, "w");
if (tblInfo->isSplit)
    selectFromSplitTable(db, table, tblInfo, conn, outFh);
else
    selectFromTable(table, tblInfo, conn, outFh);

carefulClose(&outFh);
}
void hgSeqOptions(struct cart *cart, char *db, char *table)
/* Print out HTML FORM entries for gene region and sequence display options. */
{
    struct hTableInfo *hti;
    char chrom[32];
    char rootName[256];

    if ((table == NULL) || (table[0] == 0))
    {
        hti = NULL;
    }
    else
    {
        hParseTableName(db, table, rootName, chrom);
        hti = hFindTableInfo(db, chrom, rootName);
        if (hti == NULL)
            webAbort("Error", "Could not find table info for table %s (%s)",
                     rootName, table);
    }
    hgSeqOptionsHtiCart(hti, cart);
}
char *makeQuery(char *table, boolean sortByChromStart)
/* Make a query to get chrom,chromStart,chromEnd (or whatever the appropriate 
 * field names are for table), ordered by chrom and possibly by start. */
{
char query[512];
char *db = hGetDb();
struct hTableInfo *hti = hFindTableInfo(NULL, table);
if (hti == NULL)
    errAbort("Can't find table info for %s.%s", db, table);
if (! hti->isPos)
    errAbort("Table must be positional, but looks like %s.%s isn't",
	     db, table);
if (sortByChromStart)
    safef(query, sizeof(query), "select %s,%s,%s from %s order by %s,%s",
	  hti->chromField, hti->startField, hti->endField, table,
	  hti->chromField, hti->startField);
else
    safef(query, sizeof(query), "select %s,%s,%s from %s order by %s",
	  hti->chromField, hti->startField, hti->endField, table,
	  hti->chromField);
return(cloneString(query));
}
int hgSeqItemsInRange(char *db, char *table, char *chrom, int chromStart,
                      int chromEnd, char *sqlConstraints)
/* Print out dna sequence of all items (that match sqlConstraints, if nonNULL)
   in the given range in table.  Return number of items. */
{
    struct hTableInfo *hti;
    struct bed *bedList;
    char rootName[256];
    char parsedChrom[32];
    int itemCount;

    hParseTableName(db, table, rootName, parsedChrom);
    hti = hFindTableInfo(db, chrom, rootName);
    if (hti == NULL)
        webAbort("Error", "Could not find table info for table %s (%s)",
                 rootName, table);
    bedList = hGetBedRange(db, table, chrom, chromStart, chromEnd,
                           sqlConstraints);

    itemCount = hgSeqBed(db, hti, bedList);
    bedFreeList(&bedList);
    return itemCount;
}
示例#11
0
struct bed *getRegionAsBed(
        char *db, char *table,  /* Database and table. */
        struct region *region,  /* Region to get data for. */
        char *filter,           /* Filter to add to SQL where clause if any. */
        struct hash *idHash,    /* Restrict to id's in this hash if non-NULL. */
        struct lm *lm,          /* Where to allocate memory. */
        int *retFieldCount)     /* Number of fields. */
/* Return a bed list of all items in the given range in table.
 * Cleanup result via lmCleanup(&lm) rather than bedFreeList.  */
{
char *fields = NULL;
struct sqlResult *sr;
struct hTableInfo *hti;
struct bed *bedList=NULL, *bed;
char **row;
int fieldCount;
boolean isPsl, isGenePred, isBedWithBlocks;
boolean pslKnowIfProtein = FALSE, pslIsProtein = FALSE;
struct sqlConnection *conn = NULL;
char *dbTable = NULL;

if (isCustomTrack(table))
    {
    struct customTrack *ct = ctLookupName(table);
    dbTable = ct->dbTableName;
    conn = hAllocConn(CUSTOM_TRASH);
    hti = hFindTableInfo(CUSTOM_TRASH, region->chrom, dbTable);
    }
else
    {
    dbTable = table;
    struct trackDb *tdb;
    if(sameWord(db, database))
        tdb = tdbForTrack(db, table, &fullTrackList);
    else
        tdb = hTrackDbForTrack(db, table);
    conn = (tdb ? hAllocConnTrack(db, tdb) : hAllocConn(db));
    hti = hFindTableInfo(db, region->chrom, table);
    }
if (hti == NULL)
    errAbort("Could not find table info for table %s.%s", db,table);


if (isWiggle(db, table))
    {
    bedList = getWiggleAsBed(db, table, region, filter, idHash, lm, conn);
    fieldCount = 4;
    }
else
    {
    bedSqlFieldsExceptForChrom(hti, &fieldCount, &fields);
    isPsl = htiIsPsl(hti);
    isGenePred = sameString("exonEnds", hti->endsSizesField);
    isBedWithBlocks = (
        (sameString("chromStarts", hti->startsField) ||
	 sameString("blockStarts", hti->startsField))
	     && sameString("blockSizes", hti->endsSizesField));

    /* All beds have at least chrom,start,end.  We omit the chrom
     * from the query since we already know it. */
    sr = regionQuery(conn, dbTable, fields, region, TRUE, filter);
    while (sr != NULL && (row = sqlNextRow(sr)) != NULL)
	{
	/* If have a name field apply hash filter. */
	if (fieldCount >= 4 && idHash != NULL)
	    if (!hashLookup(idHash, row[2]))
		continue;
	bed = bedFromRow(region->chrom, row, fieldCount, isPsl, isGenePred,
			 isBedWithBlocks, &pslKnowIfProtein, &pslIsProtein, lm);
	slAddHead(&bedList, bed);
	}
    freez(&fields);
    sqlFreeResult(&sr);
    slReverse(&bedList);
    }
hFreeConn(&conn);
if (retFieldCount)
    *retFieldCount = fieldCount;
return(bedList);
}
示例#12
0
static void chkPslTable(struct gbSelect* select, struct sqlConnection* conn,
                        char* rootTable, char* chrom,
                        struct metaDataTbls* metaDataTbls,
                        unsigned typeFlags)
/* Validate a PSL of a mrna/est to genome alignment against the metadata.  If
 * not a chromosome-specific table, chrom should be null.  Chromosome-specific
 * tables are not required to exist (for testing purposes).  Also count the
 * number of alignments of a mrna. */
{
struct hTableInfo* tableInfo;
char table[64];
unsigned iRow = 0;
unsigned rowOffset;
char accWhere[64];
char query[512];
struct sqlResult *sr;
char **row;

/* need to specify an explicit chrom table, as there is an mrna table which is
 * not psl, so using mrna as a root name with a chrom that doesn't exist
 * returns the mrna instead of null */

if (chrom != NULL)
    safef(table, sizeof(table), "%s_%s", chrom, rootTable);
else
    safef(table, sizeof(table), "%s", rootTable);

gbVerbEnter(3, "chkPslTable %s", table);

tableInfo = hFindTableInfo(select->release->genome->database, chrom, table);
if (tableInfo == NULL)
    {
    /* If all table, require it */
    if (chrom == NULL)
        {
        if (testMode)
            fprintf(stderr, "Warning: no psl table %s.%s\n",
                    select->release->genome->database, table);
        else
            gbError("no psl table %s.%s", select->release->genome->database,
                    table);
        }
    }
else
    {
    rowOffset = (tableInfo->hasBin) ? 1 : 0;
    // FIXME: might be better as sqlDyString
    accWhere[0] = '\0';
    if (select->accPrefix != NULL)
        sqlSafefFrag(accWhere, sizeof(accWhere), " WHERE qName LIKE '%s%%'",
              select->accPrefix);
    sqlSafef(query, sizeof(query), "SELECT * FROM %s%-s", table, accWhere);
    sr = sqlGetResult(conn, query);
    while ((row = sqlNextRow(sr)) != NULL)
        {
        struct psl* psl = pslLoad(row+rowOffset);
        chkPsl(psl, iRow, select->release->genome->database, table,
               metaDataTbls, typeFlags);
        pslFree(&psl);
        iRow++;
        }
    sqlFreeResult(&sr);
    }
gbVerbLeave(3, "chkPslTable %s", table);
}
示例#13
0
int checkTableCoords(char *db)
/* Check several invariants (see comments in check*() above), 
 * summarize errors, return nonzero if there are errors. */
{
struct sqlConnection *conn = hAllocConn(db);
struct slName *tableList = NULL, *curTable = NULL;
struct slName *allChroms = NULL;
boolean gotError = FALSE;

allChroms = hAllChromNames(db);
if (theTable == NULL)
    tableList = getTableNames(conn);
else if (sqlTableExists(conn, theTable))
    tableList = newSlName(theTable);
else
    errAbort("Error: specified table \"%s\" does not exist in database %s.",
	     theTable, db);

for (curTable = tableList;  curTable != NULL;  curTable = curTable->next)
    {
    struct hTableInfo *hti = NULL;
    struct slName *chromList = NULL, *chromPtr = NULL;
    char *table = curTable->name;
    char tableChrom[32], trackName[128], tableChromPrefix[33];
    hParseTableName(db, table, trackName, tableChrom);
    hti = hFindTableInfo(db, tableChrom, trackName);
    if (hti != NULL && hti->isPos)
	{
	/* watch out for presence of both split and non-split tables; 
	 * hti for non-split will be replaced with hti of split. */
	if (splitAndNonSplitExist(conn, table, tableChrom))
	    continue;
	safef(tableChromPrefix, sizeof(tableChromPrefix), "%s_", tableChrom);
	if (hti->isSplit)
	    chromList = newSlName(tableChrom);
	else
	    chromList = allChroms;
	/* invariant: chrom must be described in chromInfo. */
        /* items with bad chrom will be invisible to hGetBedRange(), so 
	 * catch them here by SQL query. */
	/* The SQL query is too huge for scaffold-based db's, check count: */
	if (hChromCount(db) <= MAX_SEQS_SUPPORTED)
	    {
	    if (isNotEmpty(hti->chromField))
		{
		struct dyString *bigQuery = newDyString(1024);
		dyStringClear(bigQuery);
		sqlDyStringPrintf(bigQuery, "select count(*) from %s where ",
			       table);
		for (chromPtr=chromList; chromPtr != NULL;
		       chromPtr=chromPtr->next)
		    {
		    sqlDyStringPrintf(bigQuery, "%s != '%s' ",
				   hti->chromField, chromPtr->name);
		    if (chromPtr->next != NULL)
			dyStringAppend(bigQuery, "AND ");
		    }
		gotError |= reportErrors(BAD_CHROM, table,
					 sqlQuickNum(conn, bigQuery->string));
		dyStringFree(&bigQuery);
		}
	    for (chromPtr=chromList; chromPtr != NULL; chromPtr=chromPtr->next)
		{
		char *chrom = chromPtr->name;
		struct bed *bedList = hGetBedRange(db, table, chrom, 0, 0, NULL);
		if (hti->isSplit && isNotEmpty(hti->chromField))
		    gotError |= checkSplitTableOnlyChrom(bedList, table, hti,
							 tableChrom);
		gotError |= checkStartEnd(bedList, table, hti,
					  testChromSize(chrom));
		if (hti->hasCDS)
		    gotError |= checkCDSStartEnd(bedList, table, hti);
		if (hti->hasBlocks && !ignoreBlocks)
		    gotError |= checkBlocks(bedList, table, hti);
		bedFreeList(&bedList);
		}
	    }
	}
    }
return gotError;
}