void processExcludes(char *exclude) /* Combine alwaysExclude and command like -exclude arg (if given), and * process into a list. If it contains "genbank", add genbankExclude. */ { struct dyString *allExcludes = newDyString(512); char *patterns[128]; int numPats = 0, i = 0; dyStringAppend(allExcludes, alwaysExclude); if (exclude != NULL) dyStringPrintf(allExcludes, ",%s", exclude); numPats = chopCommas(allExcludes->string, patterns); for (i=0; i < numPats; i++) { if (sameWord(patterns[i], "genbank")) { char *gbPatterns[128]; int gbNumPats = 0, j = 0; char *gbExclude = cloneString(genbankExclude); gbNumPats = chopCommas(gbExclude, gbPatterns); for (j=0; j < gbNumPats; j++) { struct slName *pat = newSlName(gbPatterns[j]); slAddHead(&excludePatterns, pat); } freeMem(gbExclude); } else { struct slName *pat = newSlName(patterns[i]); slAddHead(&excludePatterns, pat); } } dyStringFree(&allExcludes); }
struct slName *getFileList(char *listFile, char *bulkDir) /* Get list of files to work on from listFile. */ { char **faFiles; char *faBuf; int faCount; int i; char path[512], dir[256], name[128], extension[64]; struct slName *list = NULL, *el; readAllWords(listFile, &faFiles, &faCount, &faBuf); for (i = 0; i<faCount; ++i) { splitPath(faFiles[i], dir, name, extension); sprintf(path, "%s/%s.%s", bulkDir, name, "psl"); if (fileExists(path)) { el = newSlName(path); slAddHead(&list, el); } } slReverse(&list); return list; }
void jkUniq(char *fileName) /* Remove dupe lines from file. */ { struct slName *lineList = NULL, *lineEl; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; int lineSize; struct hash *hash = newHash(0); FILE *f; while (lineFileNext(lf, &line, &lineSize)) { if (!hashLookup(hash, line)) { hashAdd(hash, line, NULL); lineEl = newSlName(line); slAddHead(&lineList, lineEl); } } slReverse(&lineList); lineFileClose(&lf); f = mustOpen(fileName, "w"); for (lineEl = lineList; lineEl != NULL; lineEl = lineEl->next) { fputs(lineEl->name, f); fputc('\n', f); } fclose(f); slFreeList(&lineList); freeHash(&hash); }
int main(int argc, char *argv[]) { char *inName, *name; int chunkSize = 4048*1024; FILE *in; int accSize = 0; int newAccSize; int oneSize; char line[512]; int lineCount; char *words[16]; int wordCount; struct slName *bacs = NULL, *bn; char *dirName; char *outDir; if (argc != 4) usage(); inName = argv[1]; dirName = argv[2]; outDir = argv[3]; in = mustOpen(inName, "r"); while (fgets(line, sizeof(line), in)) { char *sizeString; ++lineCount; wordCount = chopLine(line, words); if (wordCount == 0) continue; if (wordCount != 9) errAbort("Line %d of %s doesn't look like an ls -l line", lineCount, inName); sizeString = words[4]; if (!isdigit(sizeString[0])) errAbort("Line %d of %s doesn't look like an ls - l line", lineCount, inName); name = words[8]; oneSize = atoi(sizeString); newAccSize = accSize + oneSize; if (newAccSize > chunkSize) { finishJob(&bacs, accSize); accSize = oneSize; if (oneSize > chunkSize) warn("Size %d of %s exceed chunk size %d", oneSize, name, chunkSize); } else { accSize = newAccSize; } bn = newSlName(name); slAddHead(&bacs, bn); } if (bacs != NULL) finishJob(&bacs, accSize); printf("%d total jobs\n", jobCount); writeInLists(outDir, dirName); //writeJobs("job", "in", startMachine, stopMachine, "cc"); }
struct slName *listDirRegEx(char *dir, char *regEx, int flags) /* Return an alphabetized list of all files that match * the regular expression pattern in directory. * See REGCOMP(3) for flags (e.g. REG_ICASE) */ { struct slName *list = NULL, *name; struct dirent *de; DIR *d; regex_t re; int err = regcomp(&re, regEx, flags | REG_NOSUB); if(err) errAbort("regcomp failed; err: %d", err); if ((d = opendir(dir)) == NULL) return NULL; while ((de = readdir(d)) != NULL) { char *fileName = de->d_name; if (differentString(fileName, ".") && differentString(fileName, "..")) { if (!regexec(&re, fileName, 0, NULL, 0)) { name = newSlName(fileName); slAddHead(&list, name); } } } closedir(d); regfree(&re); slNameSort(&list); return list; }
struct slName *listDir(char *dir, char *pattern) /* Return an alphabetized list of all files that match * the wildcard pattern in directory. */ { struct slName *list = NULL, *name; struct dirent *de; DIR *d; if ((d = opendir(dir)) == NULL) return NULL; while ((de = readdir(d)) != NULL) { char *fileName = de->d_name; if (differentString(fileName, ".") && differentString(fileName, "..")) { if (pattern == NULL || wildMatch(pattern, fileName)) { name = newSlName(fileName); slAddHead(&list, name); } } } closedir(d); slNameSort(&list); return list; }
static int releaseSanity(struct gbRelease* release, char *database) /* Run sanity checks on a release */ { unsigned orgCats; int checkedSetCnt = 0; /* check if native, and/or xeno should be included */ orgCats = getLoadOrgCats(database, release->srcDb, GB_MRNA); if (orgCats != 0) { if (checkRelease(release, database, GB_MRNA, orgCats, NULL)) checkedSetCnt++; } orgCats = getLoadOrgCats(database, release->srcDb, GB_EST); if (orgCats != 0) { struct slName* prefixes, *prefix; boolean checkedSome = FALSE; if (gOptions.accPrefixRestrict != NULL) prefixes = newSlName(gOptions.accPrefixRestrict); else prefixes = gbReleaseGetAccPrefixes(release, GB_PROCESSED, GB_EST); for (prefix = prefixes; prefix != NULL; prefix = prefix->next) { if (checkRelease(release, database, GB_EST, orgCats, prefix->name)) checkedSome = TRUE; } slFreeList(&prefixes); if (checkedSome) checkedSetCnt++; } return checkedSetCnt; }
void pushPosString(struct speciesInfo *si) { flushPosString(si); struct slName *newName = newSlName(si->posString); slAddTail(&si->posStrings, newName); freez(&si->posString); }
static struct slName* parseDbXrefStr(char* xrefStr) /* Split while-space seperated list of db_xref values into a list */ { struct slName* head = NULL; char *xref; while ((xref = nextWord(&xrefStr)) != NULL) slSafeAddHead(&head, newSlName(xref)); return head; }
struct slName *getTableNames(struct sqlConnection *conn) /* Return a list of names of tables that have not been excluded by * command line options. */ { char *query = hoursOld ? "NOSQLINJ show table status" : "NOSQLINJ show tables"; struct sqlResult *sr = sqlGetResult(conn, query); struct slName *tableList = NULL; char **row = NULL; int startTime = clock1(); int ageThresh = hoursOld * 3600; while((row = sqlNextRow(sr)) != NULL) { struct slName *tableName = NULL; struct slName *pat = NULL; boolean gotMatch = FALSE; if (hoursOld) { if (row[11] != NULL) { int tableUpdateTime = sqlDateToUnixTime(row[11]); int ageInSeconds = startTime - tableUpdateTime; if (ageInSeconds > ageThresh) continue; } else { verbose(2, "Got NULL update time for table %s.%s with hoursOld=%d\n", sqlGetDatabase(conn), row[0], hoursOld); } } for (pat = excludePatterns; pat != NULL; pat=pat->next) { if (wildMatch(pat->name, row[0])) { gotMatch = TRUE; break; } } if (gotMatch) continue; if (verboseLevel() >= 3 || justList) fprintf(stderr, "Adding %s\n", row[0]); tableName = newSlName(row[0]); slAddHead(&tableList, tableName); } sqlFreeResult(&sr); if (justList) exit(0); slReverse(&tableList); return tableList; }
static void parseMultiOption(struct hash *hash, char *name, char* val, struct optionSpec *spec) /* process multiple instances of an option, requres that the optionSpec of the option */ { struct slName *valList; switch (spec->flags & OPTION_TYPE_MASK) { case OPTION_STRING: valList = hashFindVal(hash, name); if (valList == NULL) /* first multi option */ { valList = newSlName(val); hashAdd(hash, name, valList); } else { struct slName *el = newSlName(val); slAddTail(valList, el); /* added next multi option */ } break; default: errAbort("UNIMPLEMENTED: multiple instances of a non-string option is not currently implemented"); } }
struct slName *getWhiteListFromFile() /* Read in the -justThese file and store the names of the enzymes */ /* in a list of slNames. */ { struct slName *whiteList = NULL; struct lineFile *lf = lineFileOpen(justThese, TRUE); char *line; while (lineFileNextReal(lf, &line)) { struct slName *newName = newSlName(line); slAddHead(&whiteList, newName); } lineFileClose(&lf); return whiteList; }
struct slName *readAllLines(char *fileName) /* Read all lines of file into a list. (Removes trailing carriage return.) */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct slName *list = NULL, *el; char *line; while (lineFileNext(lf, &line, NULL)) { el = newSlName(line); slAddHead(&list, el); } slReverse(&list); return list; }
struct slName* loadAccList(char *fname) { struct slName* accList = NULL; struct lineFile* accFh; char *line; accFh = lineFileOpen(fname, TRUE); while (lineFileNextReal(accFh, &line)) { line = trimSpaces(line); slSafeAddHead(&accList, newSlName(line)); } lineFileClose(&accFh); return accList; }
static void readMappingResults(char **argv) { FILE *fp; char buf[500], chr[50], id[500]; int beg, i, j, len; char *ch; struct slName *ali; struct hashEl *el; struct rbTree *tr; aliHash = newHash(8); for (i = 4; i <= 5; i++) { fp = mustOpen(argv[i], "r"); while (fgets(buf, 500, fp)) { if (ncbi && i >= 6 && i <= 7) { if (sscanf(buf, "%[^\t]\t%*c %s %d %d", id, chr, &beg, &len) != 4) errAbort("error: %s", buf); if ((ch = strchr(id, ' '))) *ch = '\0'; if (i >= 6 && i <= 7) { j = strlen(id); sprintf(id+j, "/%d", i-5); } } else { if (sscanf(buf, "%[^\t]\t%*c %s %d %d", id, chr, &beg, &len) != 4){ errAbort("error: %s", buf); } if ((ch = strchr(id, ' '))) *ch = '\0'; } ali = newSlName(id); el = hashLookup(aliHash, chr); if (el == NULL) { tr = rangeTreeNew(); hashAdd(aliHash, chr, tr); } else tr = (struct rbTree *)(el->val); rangeTreeAddValHead(tr, beg, beg + len - 1, &ali); } fclose(fp); } }
void randomEst(char *database, int count, char *output) /* randomEst - Select random ESTs from database. */ { struct sqlConnection *conn = sqlConnect(database); struct sqlResult *sr; char **row; int i, elIx, okCount = 0; struct slName *list = NULL, *el; FILE *f = NULL; char **array = NULL; struct dnaSeq *seq; struct hash *uniqHash = newHash(0); hSetDb(database); printf("Scanning database\n"); sr = sqlGetResult(conn, "select acc,type,direction from mrna"); while ((row = sqlNextRow(sr)) != NULL) { if (sameString(row[1], "EST") && sameString(row[2], "3")) { el = newSlName(row[0]); slAddHead(&list, el); ++okCount; } } sqlFreeResult(&sr); printf("Got %d 3' ESTs\n", okCount); AllocArray(array, okCount); for (i=0, el = list; el != NULL; el = el->next, ++i) array[i] = el->name; printf("Selecting %d to put into %s\n", count, output); f = mustOpen(output, "w"); for (i=0; i<count; ++i) { char *name; elIx = rand()%okCount; name = array[elIx]; if (!hashLookup(uniqHash, name)) { hashAdd(uniqHash, name, NULL); seq = hRnaSeq(name); faWriteNext(f, seq->name, seq->dna, seq->size); freeDnaSeq(&seq); } } }
void fillInExpHash(char *expFileName, struct hash **expHash, struct slName **expNames, int *expCount) /** Read all of the names from the expFileName and store them in a hash. */ { struct lineFile *lf = lineFileOpen(expFileName, TRUE); char *line = NULL; int lineSize = 0; struct slName *name = NULL; *expCount = 0; *expHash = newHash(5); while(lineFileNextReal(lf, &line)) { hashAddInt(*expHash, line, (*expCount)++); name = newSlName(line); slAddHead(expNames, name); } slReverse(expNames); }
void checkExtFile(struct sqlConnection *conn) /* check extFile table for files that have been removed */ { char query[256]; struct sqlResult *sr; char **row; char buffer[4 * 1024]; char *name = buffer; struct slName *list = NULL; if (! sqlTableExists(conn, CT_EXTFILE)) { verbose(2,"WARNING: -extFile option specified, extFile table does not exist\n"); verbose(2,"at this time (Jan 2009), the extFile table is unused.\n"); return; } sqlSafef(query,sizeof(query),"select id,path from %s",CT_EXTFILE); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { if (topDir != NULL) safef(buffer, sizeof buffer, "%s/%s",topDir, row[1]); else name = row[1]; if (!fileExists(name)) { struct slName *new = newSlName(row[0]); slAddHead(&list, new); } } sqlFreeResult(&sr); struct slName *one; for(one = list; one; one = one->next) { sqlSafef(query,sizeof(query),"delete from %s where id='%s'", CT_EXTFILE, one->name); if (extDel) sqlUpdate(conn, query); verbose(2,"%s\n",query); } slFreeList(&list); }
struct slName *findIsoligamers(struct cutter *enz, struct cutter *enzList) /* Find isoligamers to an enzyme in a list of enzymes. */ { struct slName *list = NULL; struct cutter *cur; if (!enz || !enzList) return NULL; for (cur = enzList; cur != NULL; cur = cur->next) { if (!sameString(cur->name, enz->name) && (sameStickyEnd(enz, cur))) { struct slName *newname = newSlName(cur->name); slAddHead(&list, newname); } } slReverse(&list); return list; }
static void buildChromSizes(char *db) /* build table of chromosome sizes and list of chromosomes */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr; char **row; gChromSizes = hashNew(8); sr = sqlGetResult(conn, NOSQLINJ "SELECT chrom,size FROM chromInfo"); while ((row = sqlNextRow(sr)) != NULL) { unsigned sz = gbParseUnsigned(NULL, row[1]); hashAddInt(gChromSizes, row[0], sz); slSafeAddHead(&gChroms, newSlName(row[0])); } sqlFreeResult(&sr); hFreeConn(&conn); }
struct slName *getOrderList(char *file) /* Read in the species list. */ { struct lineFile *lf = lineFileOpen(file, TRUE); char *row[1]; struct slName *nameList = NULL; while (lineFileRow(lf, row)) { struct slName *name = newSlName(row[0]); slAddHead(&nameList, name); } slReverse(&nameList); lineFileClose(&lf); return nameList; }
struct slName *getChromList(char *db) /* Get list of all chromosomes. */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr = NULL; char **row = NULL; struct slName *list = NULL; struct slName *el = NULL; char *query = NOSQLINJ "select chrom from chromInfo"; sr = sqlGetResult(conn, query); while ((row=sqlNextRow(sr))!=NULL) { el = newSlName(row[0]); slAddHead(&list, el); } slReverse(&list); sqlFreeResult(&sr); hFreeConn(&conn); return list; }
void bedCons(char *database, char *refAliTrack, char *bedTrack) /* bedCons - Look at conservation of a BED track vs. a refence * (nonredundant) alignment track. */ { struct slName *chromList, *chrom; struct stats *stats = NULL; struct hash *otherHash = makeOtherHash(database, "mouseChrom"); if (optionExists("chrom")) chromList = newSlName(optionVal("chrom", NULL)); else chromList = hAllChromNames(database); AllocVar(stats); for (chrom = chromList; chrom != NULL; chrom = chrom->next) { uglyf("%s\n", chrom->name); oneChrom(database, chrom->name, refAliTrack, bedTrack, otherHash, stats); } printStats(stats); }
struct slName *listDir(char *dir, char *pattern) /* Return an alphabetized list of all files that match * the wildcard pattern in directory. */ { long hFile; struct _finddata_t fileInfo; struct slName *list = NULL, *name; boolean otherDir = FALSE; char *currentDir; if (dir == NULL || sameString(".", dir) || sameString("", dir)) dir = ""; else { currentDir = getCurrentDir(); setCurrentDir(dir); otherDir = TRUE; } if (pattern == NULL) pattern = *; if( (hFile = _findfirst( pattern, &fileInfo)) == -1L ) return NULL; do { if (!sameString(".", fileInfo.name) && !sameString("..", fileInfo.name)) { name = newSlName(fileInfo.name); slAddHead(&list, name); } } while( _findnext( hFile, &fileInfo) == 0 ); _findclose( hFile ); if (otherDir) setCurrentDir(currentDir); slNameSort(&list); return list; }
struct slName *getGroupList(char *db, char *group) /* Get list of all rsIds from where clause. */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr = NULL; char **row = NULL; struct slName *list = NULL; struct slName *el = NULL; char query[256]; sqlSafef(query,sizeof(query),"select name from snp %s", group); sr = sqlGetResult(conn, query); while ((row=sqlNextRow(sr))!=NULL) { el = newSlName(row[0]); slAddHead(&list, el); } slReverse(&list); sqlFreeResult(&sr); hFreeConn(&conn); return list; }
struct slName *wormGeneToOrfNames(char *name) /* Returns list of cosmid.N type ORF names that are known by abc-12 type name. */ { struct slName *synList = NULL; char synFileName[512]; FILE *synFile; char lineBuf[128]; int nameLen = strlen(name); /* genes are supposed to be lower case. */ tolowers(name); /* Open synonym file and loop through each line of it */ sprintf(synFileName, "%ssyn", wormFeaturesDir()); if ((synFile = fopen(synFileName, "r")) == NULL) errAbort("Can't find synonym file '%s'. (errno: %d)\n", synFileName, errno); while (fgets(lineBuf, ArraySize(lineBuf), synFile)) { /* If first part of line matches chop up line. */ if (strncmp(name, lineBuf, nameLen) == 0) { char *syns[32]; int count; count = chopString(lineBuf, whiteSpaceChopper, syns, ArraySize(syns)); /* Looks like we got a synonym. Add all the aliases. */ if (strcmp(name, syns[0]) == 0) { int i; for (i=1; i<count; ++i) slAddTail(&synList, newSlName(syns[i])); break; } } } fclose(synFile); return synList; }
void findCutters(char *gcgFile, char *genome, char *outputFile) /* findCutters - Find REBASE restriction enzymes using their GCG file. */ { struct cutter *cutters = readGcg(gcgFile); struct dnaSeq *seqs = dnaLoadAll(genome); struct slName *whiteList = NULL; if (justThis) whiteList = newSlName(justThis); if (justThese) { struct slName *listFromJustThese = getWhiteListFromFile(); whiteList = slCat(whiteList, listFromJustThese); } if (justThese || justThis) cullCutters(&cutters, TRUE, whiteList, 0); if (countsOnly) findCounts(cutters, seqs, outputFile); else findBeds(cutters, seqs, outputFile); cutterFreeList(&cutters); freeDnaSeqList(&seqs); slNameFreeList(&whiteList); }
static void getReleasePartitions(struct gbSelect** selectList, struct gbRelease* release, unsigned state, unsigned types, unsigned orgCats, char *limitAccPrefix) /* Get partitions for a release and add to list */ { if (types & GB_MRNA) getTypePartitions(selectList, release, GB_MRNA, orgCats, NULL); if ((types & GB_EST) && (release->srcDb != GB_REFSEQ)) { struct slName* prefixes, *prefix; if (limitAccPrefix != NULL) prefixes = newSlName(limitAccPrefix); else prefixes = gbReleaseGetAccPrefixes(release, state, GB_EST); for (prefix = prefixes; prefix != NULL; prefix = prefix->next) getTypePartitions(selectList, release, GB_EST, orgCats, prefix->name); slFreeList(&prefixes); } }
void expToRna(char *database, char *rnaTable, char *expTable, char *outName) /* expToRna - Make a little two column table that associates * rnaClusters with expression info. */ { struct slName *chromList = NULL, *chromEl; char *chrom = optionVal("chrom", NULL); FILE *f = mustOpen(outName, "w"); if (chrom != NULL) chromList = newSlName(chrom); else chromList = hAllChromNames(database); for (chromEl = chromList; chromEl != NULL; chromEl = chromEl->next) { chrom = chromEl->name; uglyf("%s\n", chrom); doOneChrom(database, chrom, rnaTable, expTable, f); } printf("%d dupe, %d uniq, %d miss, %d total, %d hits\n", dupeCount, uniqCount, missCount, dupeCount + uniqCount + missCount, hitCount); }
static char * getConservationTrackName( struct sqlConnection *conn) { struct slName *dbList = hTrackDbList(); struct slName *dbl = dbList; char *ret = NULL; for(; dbl; dbl = dbl->next) { char query[512]; sqlSafef(query, sizeof query, "select tableName from %s where shortLabel='Conservation'", dbl->name); struct sqlResult *sr = sqlGetResult(conn, query); char **row; struct slName *tableList = NULL; while ((row = sqlNextRow(sr)) != NULL) { struct slName *name = newSlName(row[0]); slAddHead(&tableList, name); } sqlFreeResult(&sr); struct slName *l = tableList; for(; l; l = l->next) if (sqlTableExists(conn, l->name)) ret = cloneString(l->name); slFreeList(&tableList); if (ret != NULL) break; } slFreeList(&dbList); return ret; }