struct hash *loadGenePositions(char *database, struct sqlConnection *conn, char *fileName) /* Read in 7 column file and convert to hash of gene * positions. */ { struct hash *hash = newHash(16); struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[7]; int count = 0; struct genomePos *posList = NULL, *pos; while (lineFileRow(lf, row)) { int geneStart,geneEnd,upSize,downSize; AllocVar(pos); hashAddSaveName(hash, row[0], pos, &pos->name); slAddHead(&posList, pos); pos->chrom = hgOfficialChromName(database, row[1]); if (pos->chrom == NULL) errAbort("Unrecognized chromosome %s line %d of %s", row[1], lf->lineIx, lf->fileName); geneStart = lineFileNeedNum(lf, row, 2); geneEnd = lineFileNeedNum(lf, row, 3); pos->strand = row[4][0]; if (pos->strand != '+' && pos->strand != '-') errAbort("Unrecognized strand %s line %d of %s", row[4], lf->lineIx, lf->fileName); upSize = lineFileNeedNum(lf, row, 5); downSize = lineFileNeedNum(lf, row, 6); if (pos->strand == '+') { pos->start = geneStart - upSize; pos->end = geneStart + downSize; } else { pos->start = geneEnd - downSize; pos->end = geneEnd + upSize; } ++count; } verbose(1, "%d genes in %s\n", count, fileName); return hash; }
void hgDeleteChrom(char *db, int chromCount, char *chromNames[]) /* hgDeleteChrom - output SQL commands to delete chrom(s) from db. */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr = NULL; char **row = NULL; int i; for (i=0; i < chromCount; i++) { char *chrom = hgOfficialChromName(db, chromNames[i]); if (chrom == NULL) errAbort("Error: \"%s\" is not a chromosome in %s.", chromNames[i], db); printf("delete from %s.chromInfo where chrom = \"%s\";\n", db, chrom); sr = sqlGetResult(conn, "NOSQLINJ SHOW TABLES"); while((row = sqlNextRow(sr)) != NULL) { char *table = row[0]; char tChrom[32]; char rootName[128]; struct hTableInfo *hti = NULL; if (sscanf(table, "chr%32[^_]_random_%128s", tChrom, rootName) == 2 || sscanf(table, "chr%32[^_]_%128s", tChrom, rootName) == 2) hti = hFindTableInfo(db, chrom, rootName); else hti = hFindTableInfo(db, chrom, table);; if (hti->isPos) { if (hti->isSplit) { char tableCmp[256]; safef(tableCmp, sizeof(tableCmp), "%s_%s", chrom, hti->rootName); if (sameWord(table, tableCmp)) printf("drop table %s.%s;\n", db, table); } else printf("delete from %s.%s where %s = \"%s\";\n", db, table, hti->chromField, chrom); } } sqlFreeResult(&sr); } hFreeConn(&conn); }
int main(int argc, char *argv[]) /* Check args and call snpMaskGenes. */ { if (argc != 5) usage(); database = argv[1]; if(!hDbExists(database)) errAbort("%s does not exist\n", database); hSetDb(database); if(!hTableExistsDb(database, "snp")) errAbort("no snp table in %s\n", database); chromName = argv[2]; if(hgOfficialChromName(chromName) == NULL) errAbort("no such chromosome %s in %s\n", chromName, database); // check that nib file exists // or, use hNibForChrom from hdb.c snpMaskGenes(argv[3], argv[4]); return 0; }
static struct bed4 *parseRegionInput(char *db, char *inputString, int maxRegions, int maxErrs, struct dyString *dyWarn) /* scan the user region definition, turn into a bed list */ { int regionCount = 0; int errCount = 0; struct bed4 *bedList = NULL; struct lineFile *lf = lineFileOnString("userData", TRUE, inputString); char *line = NULL; while (lineFileNextReal(lf, &line)) { char *chromName = NULL; int chromStart = 0; int chromEnd = 0; char *regionName = NULL; // Chop a copy of line so we can display line if there's an error. char copy[strlen(line)+1]; safecpy(copy, sizeof(copy), line); char *words[5]; int wordCount = chopByWhite(copy, words, ArraySize(words)); boolean badFormat = FALSE; boolean gotError = FALSE; /* might be something of the form: chrom:start-end optionalRegionName */ if (((1 == wordCount) || (2 == wordCount)) && hgParseChromRange(NULL, words[0], &chromName, &chromStart, &chromEnd)) { if (2 == wordCount) regionName = cloneString(words[1]); } else if (!((3 == wordCount) || (4 == wordCount))) { dyStringPrintf(dyWarn, "line %d: '%s': " "unrecognized format. Please enter 3- or 4-column BED or " "a chr:start-end position range optionally followed by a name.\n", lf->lineIx, line); badFormat = TRUE; gotError = TRUE; } else { chromName = words[0]; // Make sure chromStart and chromEnd are numbers if (!isNumericString(words[1])) { dyStringPrintf(dyWarn, "line %d: '%s': chromStart must be a number but is '%s'\n", lf->lineIx, line, words[1]); gotError = TRUE; } if (!isNumericString(words[2])) { dyStringPrintf(dyWarn, "line %d: '%s': chromEnd must be a number but is '%s'\n", lf->lineIx, line, words[2]); gotError = TRUE; } if (! gotError) { chromStart = atoi(words[1]); chromEnd = atoi(words[2]); if (wordCount > 3) regionName = cloneString(words[3]); } } char *officialChromName = chromName ? hgOfficialChromName(db, chromName) : NULL; if (! badFormat) { if (NULL == officialChromName) { dyStringPrintf(dyWarn, "line %d: '%s': chrom name '%s' not recognized in this assembly\n", lf->lineIx, line, chromName ? chromName : words[0]); gotError = TRUE; } else if (illegalCoordinate(db, officialChromName, chromStart, chromEnd, line, lf->lineIx, dyWarn)) { gotError = TRUE; } } if (gotError) { errCount++; if (errCount > maxErrs && maxErrs > 0) { dyStringPrintf(dyWarn, "Exceeded maximum number of errors (%d), quitting\n", maxErrs); break; } else continue; } ++regionCount; if (regionCount > maxRegions && maxRegions > 0) { dyStringPrintf(dyWarn, "line %d: limit of %d region definitions exceeded, skipping the rest\n", lf->lineIx, maxRegions); break; } struct bed4 *bedEl = bed4New(officialChromName, chromStart, chromEnd, regionName); slAddHead(&bedList, bedEl); } lineFileClose(&lf); // Keep regions in same order as user entered them: slReverse(&bedList); return (bedList); }
static struct bed *parseRegionInput(char *inputString) /* scan the user region definition, turn into a bed list */ { int itemCount = 0; struct bed *bedList = NULL; struct bed *bedEl; int wordCount; char *words[5]; struct lineFile *lf; lf = lineFileOnString("userData", TRUE, inputString); while (0 != (wordCount = lineFileChopNext(lf, words, ArraySize(words)))) { char *chromName = NULL; int chromStart = 0; int chromEnd = 0; char *regionName = NULL; /* might be something of the form: chrom:start-end optionalRegionName */ if (((1 == wordCount) || (2 == wordCount)) && hgParseChromRange(NULL, words[0], &chromName, &chromStart, &chromEnd)) { if (2 == wordCount) regionName = cloneString(words[1]); } else if (!((3 == wordCount) || (4 == wordCount))) { int i; struct dyString *errMessage = dyStringNew(0); for (i = 0; i < wordCount; ++i) dyStringPrintf(errMessage, "%s ", words[i]); errAbort("line %d: '%s'<BR>\n" "illegal bed size, expected 3 or 4 fields, found %d\n", lf->lineIx, dyStringCannibalize(&errMessage), wordCount); } else { chromName = hgOfficialChromName(database, words[0]); chromStart = sqlSigned(words[1]); chromEnd = sqlSigned(words[2]); if (wordCount > 3) regionName = cloneString(words[3]); } ++itemCount; if (itemCount > 1000) { warn("limit 1000 region definitions reached at line %d<BR>\n", lf->lineIx); break; } AllocVar(bedEl); bedEl->chrom = chromName; if (NULL == bedEl->chrom) errAbort("at line %d, chrom name '%s' %s %s not recognized in this assembly %d", lf->lineIx, words[0], words[1], words[2], wordCount); bedEl->chromStart = chromStart; bedEl->chromEnd = chromEnd; if (illegalCoordinate(bedEl->chrom, bedEl->chromStart, bedEl->chromEnd)) errAbort("illegal input at line %d: %s %d %d", lf->lineIx, bedEl->chrom, bedEl->chromStart, bedEl->chromEnd); if (wordCount > 3) bedEl->name = regionName; else bedEl->name = NULL; /* if we wanted to give artifical names to each item */ #ifdef NOT { char name[128]; safef(name, ArraySize(name), "item_%04d", itemCount); bedEl->name = cloneString(name); } #endif slAddHead(&bedList, bedEl); } lineFileClose(&lf); // slSort(&bedList, bedCmp); /* this would do chrom,chromStart order */ slReverse(&bedList); /* with no sort, it is in order as user entered */ return (bedList); }