struct hash *loadGenePositions(char *database, struct sqlConnection *conn, char *fileName)
/* Read in 7 column file and convert to hash of gene
 * positions. */
{
struct hash *hash = newHash(16);
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[7];
int count = 0;
struct genomePos *posList = NULL, *pos;

while (lineFileRow(lf, row))
    {
    int geneStart,geneEnd,upSize,downSize;

    AllocVar(pos);
    hashAddSaveName(hash, row[0], pos, &pos->name);
    slAddHead(&posList, pos);
    pos->chrom = hgOfficialChromName(database, row[1]);
    if (pos->chrom == NULL)
        errAbort("Unrecognized chromosome %s line %d of %s",
		row[1], lf->lineIx, lf->fileName);
    geneStart = lineFileNeedNum(lf, row, 2);
    geneEnd = lineFileNeedNum(lf, row, 3);
    pos->strand = row[4][0];
    if (pos->strand != '+' && pos->strand != '-')
        errAbort("Unrecognized strand %s line %d of %s",
		row[4], lf->lineIx, lf->fileName);
    upSize = lineFileNeedNum(lf, row, 5);
    downSize = lineFileNeedNum(lf, row, 6);
    if (pos->strand == '+')
        {
	pos->start = geneStart - upSize;
	pos->end = geneStart + downSize;
	}
    else	
        {
	pos->start = geneEnd - downSize;
	pos->end = geneEnd + upSize;
	}
    ++count;
    }
verbose(1, "%d genes in %s\n", count, fileName);
return hash;
}
Esempio n. 2
0
void hgDeleteChrom(char *db, int chromCount, char *chromNames[])
/* hgDeleteChrom - output SQL commands to delete chrom(s) from db. */
{
struct sqlConnection *conn = hAllocConn(db);
struct sqlResult *sr = NULL;
char **row = NULL;
int i;
for (i=0;  i < chromCount;  i++)
    {
    char *chrom = hgOfficialChromName(db, chromNames[i]);
    if (chrom == NULL)
	errAbort("Error: \"%s\" is not a chromosome in %s.",
		 chromNames[i], db);
    printf("delete from %s.chromInfo where chrom = \"%s\";\n", db, chrom);
    sr = sqlGetResult(conn, "NOSQLINJ SHOW TABLES");
    while((row = sqlNextRow(sr)) != NULL)
	{
	char *table = row[0];
	char tChrom[32];
	char rootName[128];
	struct hTableInfo *hti = NULL;
	if (sscanf(table, "chr%32[^_]_random_%128s", tChrom, rootName) == 2 ||
	    sscanf(table, "chr%32[^_]_%128s", tChrom, rootName) == 2)
	    hti = hFindTableInfo(db, chrom, rootName);
	else
	    hti = hFindTableInfo(db, chrom, table);;
	if (hti->isPos)
	    {
	    if (hti->isSplit)
		{
		char tableCmp[256];
		safef(tableCmp, sizeof(tableCmp), "%s_%s", chrom, hti->rootName);
		if (sameWord(table, tableCmp))
		    printf("drop table %s.%s;\n", db, table);
		}
	    else
		printf("delete from %s.%s where %s = \"%s\";\n",
		       db, table, hti->chromField, chrom);
	    }
	}
    sqlFreeResult(&sr);
    }
hFreeConn(&conn);
}
Esempio n. 3
0
int main(int argc, char *argv[])
/* Check args and call snpMaskGenes. */
{
if (argc != 5)
    usage();
database = argv[1];
if(!hDbExists(database))
    errAbort("%s does not exist\n", database);
hSetDb(database);
if(!hTableExistsDb(database, "snp"))
    errAbort("no snp table in %s\n", database);
chromName = argv[2];
if(hgOfficialChromName(chromName) == NULL)
    errAbort("no such chromosome %s in %s\n", chromName, database);
// check that nib file exists
// or, use hNibForChrom from hdb.c
snpMaskGenes(argv[3], argv[4]);
return 0;
}
Esempio n. 4
0
static struct bed4 *parseRegionInput(char *db, char *inputString, int maxRegions, int maxErrs,
                                     struct dyString *dyWarn)
/* scan the user region definition, turn into a bed list */
{
int regionCount = 0;
int errCount = 0;
struct bed4 *bedList = NULL;
struct lineFile *lf = lineFileOnString("userData", TRUE, inputString);
char *line = NULL;
while (lineFileNextReal(lf, &line))
    {
    char *chromName = NULL;
    int chromStart = 0;
    int chromEnd = 0;
    char *regionName = NULL;
    // Chop a copy of line so we can display line if there's an error.
    char copy[strlen(line)+1];
    safecpy(copy, sizeof(copy), line);
    char *words[5];
    int wordCount = chopByWhite(copy, words, ArraySize(words));
    boolean badFormat = FALSE;
    boolean gotError = FALSE;
    /*	might be something of the form: chrom:start-end optionalRegionName */
    if (((1 == wordCount) || (2 == wordCount)) &&
	    hgParseChromRange(NULL, words[0], &chromName,
		&chromStart, &chromEnd))
	{
	if (2 == wordCount)
	    regionName = cloneString(words[1]);
	}
    else if (!((3 == wordCount) || (4 == wordCount)))
	{
	dyStringPrintf(dyWarn, "line %d: '%s': "
                       "unrecognized format.  Please enter 3- or 4-column BED or "
                       "a chr:start-end position range optionally followed by a name.\n",
                       lf->lineIx, line);
        badFormat = TRUE;
        gotError = TRUE;
	}
    else
	{
	chromName = words[0];
        // Make sure chromStart and chromEnd are numbers
        if (!isNumericString(words[1]))
            {
            dyStringPrintf(dyWarn, "line %d: '%s': chromStart must be a number but is '%s'\n",
                           lf->lineIx, line, words[1]);
            gotError = TRUE;
            }
        if (!isNumericString(words[2]))
            {
            dyStringPrintf(dyWarn, "line %d: '%s': chromEnd must be a number but is '%s'\n",
                           lf->lineIx, line, words[2]);
            gotError = TRUE;
            }
        if (! gotError)
            {
            chromStart = atoi(words[1]);
            chromEnd = atoi(words[2]);
            if (wordCount > 3)
                regionName = cloneString(words[3]);
            }
	}
    char *officialChromName = chromName ? hgOfficialChromName(db, chromName) : NULL;
    if (! badFormat)
        {
        if (NULL == officialChromName)
            {
            dyStringPrintf(dyWarn,
                           "line %d: '%s': chrom name '%s' not recognized in this assembly\n",
                           lf->lineIx, line, chromName ? chromName : words[0]);
            gotError = TRUE;
            }
        else if (illegalCoordinate(db, officialChromName, chromStart, chromEnd, line, lf->lineIx,
                                   dyWarn))
            {
            gotError = TRUE;
            }
        }
    if (gotError)
        {
        errCount++;
        if (errCount > maxErrs && maxErrs > 0)
            {
            dyStringPrintf(dyWarn, "Exceeded maximum number of errors (%d), quitting\n", maxErrs);
            break;
            }
        else
            continue;
        }
    ++regionCount;
    if (regionCount > maxRegions && maxRegions > 0)
	{
	dyStringPrintf(dyWarn,
                       "line %d: limit of %d region definitions exceeded, skipping the rest\n",
                       lf->lineIx, maxRegions);
	break;
	}
    struct bed4 *bedEl = bed4New(officialChromName, chromStart, chromEnd, regionName);
    slAddHead(&bedList, bedEl);
    }
lineFileClose(&lf);
// Keep regions in same order as user entered them:
slReverse(&bedList);
return (bedList);
}
Esempio n. 5
0
static struct bed *parseRegionInput(char *inputString)
/* scan the user region definition, turn into a bed list */
{
int itemCount = 0;
struct bed *bedList = NULL;
struct bed *bedEl;
int wordCount;
char *words[5];
struct lineFile *lf;

lf = lineFileOnString("userData", TRUE, inputString);
while (0 != (wordCount = lineFileChopNext(lf, words, ArraySize(words))))
    {
    char *chromName = NULL;
    int chromStart = 0;
    int chromEnd = 0;
    char *regionName = NULL;
    /*	might be something of the form: chrom:start-end optionalRegionName */
    if (((1 == wordCount) || (2 == wordCount)) &&
	    hgParseChromRange(NULL, words[0], &chromName,
		&chromStart, &chromEnd))
	{
	if (2 == wordCount)
	    regionName = cloneString(words[1]);
	}
    else if (!((3 == wordCount) || (4 == wordCount)))
	{
	int i;
	struct dyString *errMessage = dyStringNew(0);
	for (i = 0; i < wordCount; ++i)
	    dyStringPrintf(errMessage, "%s ", words[i]);
	errAbort("line %d: '%s'<BR>\n"
	"illegal bed size, expected 3 or 4 fields, found %d\n",
		    lf->lineIx, dyStringCannibalize(&errMessage), wordCount);
	}
    else
	{
	chromName = hgOfficialChromName(database, words[0]);
	chromStart = sqlSigned(words[1]);
	chromEnd = sqlSigned(words[2]);
	if (wordCount > 3)
	    regionName = cloneString(words[3]);
	}
    ++itemCount;
    if (itemCount > 1000)
	{
	warn("limit 1000 region definitions reached at line %d<BR>\n",
		lf->lineIx);
	break;
	}
    AllocVar(bedEl);
    bedEl->chrom = chromName;
    if (NULL == bedEl->chrom)
	errAbort("at line %d, chrom name '%s' %s %s not recognized in this assembly %d",
	    lf->lineIx, words[0], words[1], words[2], wordCount);
    bedEl->chromStart = chromStart;
    bedEl->chromEnd = chromEnd;
    if (illegalCoordinate(bedEl->chrom, bedEl->chromStart, bedEl->chromEnd))
	errAbort("illegal input at line %d: %s %d %d",
		lf->lineIx, bedEl->chrom, bedEl->chromStart, bedEl->chromEnd);
    if (wordCount > 3)
	bedEl->name = regionName;
    else
	bedEl->name = NULL;
/* if we wanted to give artifical names to each item */
#ifdef NOT
	{
	char name[128];
	safef(name, ArraySize(name), "item_%04d", itemCount);
	bedEl->name = cloneString(name);
	}
#endif
    slAddHead(&bedList, bedEl);
    }
lineFileClose(&lf);
//    slSort(&bedList, bedCmp);	/* this would do chrom,chromStart order */
slReverse(&bedList);	/* with no sort, it is in order as user entered */
return (bedList);
}