Ejemplo n.º 1
0
void gapFileToTable(struct sqlConnection *conn, char *gapFileName,
		    char *gapTableName)
/* Build a single gap table from a single gap file. */
{
struct lineFile *lf = lineFileOpen(gapFileName, TRUE);
char tabFileName[256];
FILE *tabFile = NULL;
char *words[16];
int wordCount;

safef(tabFileName, sizeof(tabFileName), "%s.tab", gapTableName);
tabFile = mustOpen(tabFileName, "w");
while ((wordCount = lineFileChop(lf, words)) > 0)
    {
    if (wordCount < 5)
	errAbort("Short line %d of %s", lf->lineIx, lf->fileName);
    if (words[4][0] == 'N' || words[4][0] == 'U')
	{
	int len = strlen(words[0]);
	if (len > maxChromNameSize)
	    {
	    maxChromNameSize = len;
	    if (maxChromNameSize > 254)
		errAbort("ERROR: chrom name size is over 254(%d) characters: "
			"'%s'", maxChromNameSize, words[0]);
	    }
	struct agpGap gap;
	agpGapStaticLoad(words, &gap);
	gap.chromStart -= 1;
	fprintf(tabFile, "%u\t", hFindBin(gap.chromStart, gap.chromEnd));
	agpGapTabOut(&gap, tabFile);
	}
    }
lineFileClose(&lf);
fclose(tabFile);

if (! noLoad)
    {
    struct dyString *ds = newDyString(2048);
    if (unsplit)
	sqlDyStringPrintf(ds,  createGapUnsplit, gapTableName,
		maxChromNameSize, maxChromNameSize);
    else
	sqlDyStringPrintf(ds, createGapSplit, gapTableName);
    char query[1024];
    sqlRemakeTable(conn, gapTableName, ds->string);
    sqlSafef(query, sizeof(query), "LOAD data local infile '%s' into table %s", 
	  tabFileName, gapTableName);
    sqlUpdate(conn, query);
    remove(tabFileName);
    freeDyString(&ds);
    }
}
Ejemplo n.º 2
0
void getSeqGapsUnsplit(struct sqlConnection *conn, struct hash *chromHash)
/* Return a tree of ranges for sequence gaps in all chromosomes, 
 * assuming an unsplit gap table -- when the table is unsplit, it's 
 * probably for a scaffold assembly where we *really* don't want 
 * to do one query per scaffold! */
{
struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
struct rbTree *tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
int rowOffset = hOffsetPastBin(sqlGetDatabase(conn), NULL, "gap");
struct sqlResult *sr;
char **row;
char *prevChrom = NULL;

sr = sqlGetResult(conn, "NOSQLINJ select * from gap order by chrom");
while ((row = sqlNextRow(sr)) != NULL)
    {
    struct agpGap gap;
    struct simpleRange *range;
    agpGapStaticLoad(row+rowOffset, &gap);
    if (prevChrom == NULL)
	prevChrom = cloneString(gap.chrom);
    else if (! sameString(prevChrom, gap.chrom))
	{
	setNGap(prevChrom, chromHash, tree);
	freeMem(prevChrom);
	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
	tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
	prevChrom = cloneString(gap.chrom);
	}
    lmAllocVar(tree->lm, range);
    range->start = gap.chromStart;
    range->end = gap.chromEnd;
    rbTreeAdd(tree, range);
    }
if (prevChrom != NULL)
    {
    setNGap(prevChrom, chromHash, tree);
    freeMem(prevChrom);
    }
sqlFreeResult(&sr);
}
Ejemplo n.º 3
0
struct rbTree *getSeqGaps(struct sqlConnection *conn, char *chrom)
/* Return a tree of ranges for sequence gaps in chromosome */
{
struct rbTree *tree = rbTreeNew(simpleRangeCmp);
int rowOffset;
struct sqlResult *sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset);
char **row;

while ((row = sqlNextRow(sr)) != NULL)
    {
    struct agpGap gap;
    struct simpleRange *range;
    agpGapStaticLoad(row+rowOffset, &gap);
    lmAllocVar(tree->lm, range);
    range->start = gap.chromStart;
    range->end = gap.chromEnd;
    rbTreeAdd(tree, range);
    }
sqlFreeResult(&sr);
return tree;
}
Ejemplo n.º 4
0
int countBases(struct sqlConnection *conn, char *chrom, int chromSize,
    char *database)
/* Count bases, generally not including gaps, in chromosome. */
{
static boolean gapsLoaded = FALSE;
struct sqlResult *sr;
int totalGaps = 0;
char **row;
int rowOffset;

if (countGaps)
    return chromSize;

/*	If doing all chroms, then load up all the gaps and be done with
 *	it instead of re-reading the gap table for every chrom
 */
if (sameWord(clChrom,"all"))
    {
    if (!gapsLoaded)
	gapHash = loadAllGaps(conn, database);
    gapsLoaded = TRUE;
    totalGaps = hashIntValDefault(gapHash, chrom, 0);
    }
else
    {
    sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset);
    while ((row = sqlNextRow(sr)) != NULL)
	{
	int gapSize;
	struct agpGap gap;
	agpGapStaticLoad(row+rowOffset, &gap);
	gapSize = gap.chromEnd - gap.chromStart;
	totalGaps += gapSize;
	}
    sqlFreeResult(&sr);
    }
return chromSize - totalGaps;
}
Ejemplo n.º 5
0
long long gapsInRegion(struct sqlConnection *conn, struct region *regionList,
                       int limit)
/* Return count of gaps in all regions to limit number of regions,
 *	limit=0 == no limit, do them all
 */
{
    long long gapBases = 0;
    char *splitTable = chromTable(conn, "gap");
    int regionCount = 0;

    if (sqlTableExists(conn, splitTable))
    {
        struct region *region;
        for (region = regionList;
                (region != NULL) && (!(limit && (regionCount >= limit)));
                region = region->next, ++regionCount)
        {
            int rowOffset;
            char **row;
            struct agpGap gap;
            struct sqlResult *sr = hRangeQuery(conn, "gap",
                                               region->chrom, region->start, region->end,
                                               NULL, &rowOffset);
            while ((row = sqlNextRow(sr)) != NULL)
            {
                agpGapStaticLoad(row + rowOffset, &gap);
                if (gap.chromStart < region->start) gap.chromStart = region->start;
                if (gap.chromEnd > region->end) gap.chromEnd = region->end;
                gapBases += gap.chromEnd - gap.chromStart;
            }
            sqlFreeResult(&sr);
        }
    }
    freez(&splitTable);
    return gapBases;
}
Ejemplo n.º 6
0
static struct hash *loadAllGaps(struct sqlConnection *conn, char *db)
/*	working on all chroms, fetch all per-chrom gap counts at once
 *	returns hash by chrom name to gap counts for that chrom
 */
{ 
struct chromInfo *cInfo;
struct sqlResult *sr;
char **row;
struct hash *ret;
int totalGapSize = 0;
int gapCount = 0;

ret = newHash(0);

/*	If not split, read in whole gulp, create per-chrom hash of sizes */
if (hTableExists(db, "gap"))
    {
    char *prevChrom = NULL;
    int totalGapsThisChrom = 0;
    
    sr = sqlGetResult(conn,
	NOSQLINJ "select chrom,chromStart,chromEnd from gap order by chrom");
    while ((row = sqlNextRow(sr)) != NULL)
	{
	int gapSize = sqlUnsigned(row[2]) - sqlUnsigned(row[1]);
	++gapCount;
	if (prevChrom && sameWord(prevChrom,row[0]))
	    {
	    totalGapsThisChrom += gapSize;
	    totalGapSize += gapSize;
	    }
	else
	    {
	    if (prevChrom)
		{
		hashAddInt(ret, prevChrom, totalGapsThisChrom);
		freeMem(prevChrom);
		prevChrom = cloneString(row[0]);
		totalGapsThisChrom = gapSize;
		totalGapSize += gapSize;
		}
	    else
		{
		prevChrom = cloneString(row[0]);
		totalGapsThisChrom = gapSize;
		totalGapSize += gapSize;
		}
	    }
	}
	/*	and the last one	*/
	if (prevChrom && (totalGapsThisChrom > 0))
	    {
	    hashAddInt(ret, prevChrom, totalGapsThisChrom);
	    freeMem(prevChrom);
	    }
    sqlFreeResult(&sr);
    }
else
    {
    /*	for each chrom name, fetch the gap count	*/
    for (cInfo = chromInfoList; cInfo != NULL; cInfo = cInfo->next)
	{
	int rowOffset;
	int totalGapsThisChrom = 0;
	sr = hChromQuery(conn, "gap", cInfo->chrom, NULL, &rowOffset);
	while ((row = sqlNextRow(sr)) != NULL)
	    {
	    int gapSize;
	    struct agpGap gap;
	    ++gapCount;
	    agpGapStaticLoad(row+rowOffset, &gap);
	    gapSize = gap.chromEnd - gap.chromStart;
	    totalGapsThisChrom += gapSize;
	    totalGapSize += gapSize;
	    }
	sqlFreeResult(&sr);
	hashAddInt(ret, cInfo->chrom, totalGapsThisChrom);
	}
    }
verbose(2,"#\tloaded %d gaps covering %d bases\n", gapCount, totalGapSize);
return ret;
}
void splitAgp(char *agpName, char *goldFileName, char *gapFileName)
/* Split up agp file into gold and gap files. */
{
    struct lineFile *lf;
    char *words[16];
    int wordCount;
    FILE *goldTab, *gapTab;

    /* Scan through .agp file splitting it into gold
     * and gap components. */
    goldTab = mustOpen(goldFileName, "w");
    gapTab = mustOpen(gapFileName, "w");
    lf = lineFileOpen(agpName, TRUE);
    while ((wordCount = lineFileChop(lf, words)) > 0)
    {
        int start, end;
        if (wordCount < 5)
            errAbort("Short line %d of %s", lf->lineIx, lf->fileName);
        int len = strlen(words[0]);
        if (len > maxChromNameSize)
        {
            maxChromNameSize = len;
            if (maxChromNameSize > 254)
                errAbort("ERROR: chrom name size is over 254(%d) characters: "
                         "'%s'", maxChromNameSize, words[0]);
        }

        start = sqlUnsigned(words[1])-1;
        end = sqlUnsigned(words[2]);
        if (words[4][0] == 'N' || words[4][0] == 'U')
        {
            struct agpGap gap;
            agpGapStaticLoad(words, &gap);
            gap.chromStart -= 1;
            fprintf(gapTab, "%u\t", hFindBin(start, end));
            agpGapTabOut(&gap, gapTab);
            verbose(3,"#GAP\t%s:%d-%d\n", gap.chrom, gap.chromStart, gap.chromEnd);
        }
        else
        {
            struct agpFrag gold;
            agpFragStaticLoad(words, &gold);
            agpFragValidate(&gold);
            len = strlen(words[5]);
            if (len > maxFragNameSize)
            {
                maxFragNameSize = len;
                if (maxFragNameSize > 254)
                    errAbort("ERROR: fragment name size is over 254(%d) "
                             "characters: '%s'", maxFragNameSize, words[5]);
            }
            // file is 1-based. agpFragLoad() now assumes 0-based.
            // and agpFragTabOut() will assume 1-based, but we will load
            // the generated file straight into the database, so
            // subtract 2:
            gold.chromStart -= 2;
            gold.fragStart  -= 2;
            fprintf(goldTab, "%u\t", hFindBin(start, end));
            agpFragTabOut(&gold, goldTab);
        }
    }
    lineFileClose(&lf);
    carefulClose(&goldTab);
    carefulClose(&gapTab);

}