void uniqSize(char *ooDir, char *agpFile, char *glFile, char *altFile) /* Figure out unique parts of genome from all the * gold.22 files in ooDir */ { struct fileInfo *chromDirs, *chromEl; struct fileInfo *contigDirs, *contigEl; char subDir[512]; struct chromInfo *ciList = NULL, *ci, *ciTotal; chromDirs = listDirX(ooDir, "*", FALSE); for (chromEl = chromDirs; chromEl != NULL; chromEl = chromEl->next) { char *chromName = chromEl->name; int dirNameLen = strlen(chromName); if (dirNameLen > 0 && dirNameLen <= 2 && chromEl->isDir) { struct chromInfo *ctgList = NULL, *ctg; sprintf(subDir, "%s/%s", ooDir, chromName); contigDirs = listDirX(subDir, "NT*", FALSE); for (contigEl = contigDirs; contigEl != NULL; contigEl = contigEl->next) { if (contigEl->isDir) { int nSize, uSize; char *contigName = contigEl->name; char fileName[512]; sprintf(fileName, "%s/%s/%s", subDir, contigName, agpFile); if (!fileExists(fileName) && altFile != NULL) sprintf(fileName, "%s/%s/%s", subDir, contigName, altFile); if (fileExists(fileName)) { ctg = oneContigInfo(fileName); slAddHead(&ctgList, ctg); getSizes(fileName, &uSize, &nSize); sprintf(fileName, "%s/%s/%s", subDir, contigName, glFile); if (fileExists(fileName)) addStretchInfo(fileName, ctg); } else { warn("No %s in %s/%s", agpFile, subDir, contigName); } } } slFreeList(&contigDirs); ci = combineChromInfo(ctgList, chromName); slAddHead(&ciList, ci); } } slReverse(&ciList); printHeader(stdout); for (ci = ciList; ci != NULL; ci = ci->next) printChromInfo(ci, stdout); ciTotal = combineChromInfo(ciList, "total"); printChromInfo(ciTotal, stdout); }
void checkOurDir(char *ourDir, struct contig *contigList, struct hash *hash) /* Check that our directories look ok. */ { struct us { struct us *next; /* Next in list */ char *contig; /* NT_XXXXXX or NG_XXXXXX */ char *chrom; /* 1, 2, 3, etc. */ }; struct hash *ourHash = newHash(0); struct us *usList = NULL, *us; struct fileInfo *chromList = NULL, *chromFi, *ctgList = NULL, *ctgFi; char chromDir[512], ctgDir[512]; struct contig *contig; int problemCount = 0; /* Build up a hash that says where each contig is. */ chromList = listDirX(ourDir, "*", FALSE); for (chromFi = chromList; chromFi != NULL; chromFi = chromFi->next) { if (chromFi->isDir && strlen(chromFi->name) <= 2) { sprintf(chromDir, "%s/%s", ourDir, chromFi->name); ctgList = listDirX(chromDir, "N?_*", FALSE); for (ctgFi = ctgList; ctgFi != NULL; ctgFi = ctgFi->next) { if (ctgFi->isDir) { AllocVar(us); slAddHead(&usList, us); us->contig = ctgFi->name; us->chrom = chromFi->name; hashAdd(ourHash, us->contig, us); } } } } printf("We have %d contigs\n", slCount(usList)); /* Check each contig. */ for (contig = contigList; contig != NULL; contig = contig->next) { if ((us = hashFindVal(ourHash, contig->name)) == NULL) { ++problemCount; printf("%s is not in %s\n", contig->name, ourDir); } else { sprintf(ctgDir, "%s/%s/%s", ourDir, us->chrom, us->contig); problemCount += checkOurContig(ctgDir, contig); } } freeHash(&ourHash); }
void ooChains(char *ffaDir, char *ooDir) /* ooChains - make chains (partially ordered clone fragments) for oo dir. */ { struct hash *cloneHash = newHash(16); struct clone *cloneList, *clone; char fileName[512]; int i; struct fileInfo *chromDir = NULL, *ctgDir = NULL, *chrom, *ctg; /* Read in input from ffaDir. */ sprintf(fileName, "%s/%s", ffaDir, infoFile); printf("Reading %s\n", fileName); cloneList = readCloneList(fileName, cloneHash); for (i=0; i<ArraySize(finfFiles); ++i) { sprintf(fileName, "%s/%s", ffaDir, finfFiles[i]); printf("Reading %s\n", fileName); readFinf(fileName, cloneHash); } printf("Making chains\n"); for (clone = cloneList; clone != NULL; clone = clone->next) { slReverse(&clone->fragList); makeChains(clone); } /* Make output in each of contig dirs. */ chromDir = listDirX(ooDir, "*", FALSE); for (chrom = chromDir; chrom != NULL; chrom = chrom->next) { char *chromName = chrom->name; if (chrom->isDir && strlen(chromName) <= 2 && chromName[0] != '.') { printf("Processing %s\n", chromName); sprintf(fileName, "%s/%s", ooDir, chromName); ctgDir = listDirX(fileName, "ctg*", TRUE); for (ctg = ctgDir; ctg != NULL; ctg = ctg->next) { printf("."); fflush(stdout); if (ctg->isDir) writeRelevantChains(ctg->name, cloneHash); } printf("\n"); slFreeList(&ctgDir); } } printf("Got chains in %d of %d files. %d total links (including phase2)\n", clonesWithChainsCount, slCount(cloneList), linkCount); }
void doRewrite(char *outDir, char *inDir, char *trackFile) /* Do some sort of rewrite on entire system. */ { /* Make list and hash of root dir */ struct lm *rootLm = lmInit(0); char rootName[PATH_LEN]; safef(rootName, sizeof(rootName), "%s/%s", inDir, trackFile); struct raLevel *rootLevel = raLevelRead(rootName, rootLm); /* Make subdirectory list. */ struct fileInfo *org, *orgList = listDirX(inDir, "*", FALSE); for (org = orgList; org != NULL; org = org->next) { if (org->isDir) { struct lm *orgLm = lmInit(0); char inOrgDir[PATH_LEN], outOrgDir[PATH_LEN]; safef(inOrgDir, sizeof(inOrgDir), "%s/%s", inDir, org->name); safef(outOrgDir, sizeof(outOrgDir), "%s/%s", outDir, org->name); char inOrgFile[PATH_LEN]; safef(inOrgFile, sizeof(inOrgFile), "%s/%s", inOrgDir, trackFile); struct raLevel *orgLevel = raLevelRead(inOrgFile, orgLm); orgLevel->parent = rootLevel; rewriteLevel(orgLevel, outOrgDir, orgLm); struct fileInfo *db, *dbList = listDirX(inOrgDir, "*", FALSE); for (db = dbList; db != NULL; db = db->next) { if (db->isDir) { struct lm *dbLm = lmInit(0); char inDbDir[PATH_LEN], outDbDir[PATH_LEN]; safef(inDbDir, sizeof(inDbDir), "%s/%s", inOrgDir, db->name); safef(outDbDir, sizeof(outDbDir), "%s/%s", outOrgDir, db->name); char inDbFile[PATH_LEN]; safef(inDbFile, sizeof(inDbFile), "%s/%s", inDbDir, trackFile); struct raLevel *dbLevel = raLevelRead(inDbFile, dbLm); dbLevel->parent = orgLevel; rewriteLevel(dbLevel, outDbDir, dbLm); hashFree(&dbLevel->trackHash); lmCleanup(&dbLm); } } hashFree(&orgLevel->trackHash); lmCleanup(&orgLm); } } hashFree(&rootLevel->trackHash); lmCleanup(&rootLm); }
void chimpHiQualDiffs(char *axtDir, char *qacName, char *bedName) /* chimpHiQualDiffs - Create list of chimp high quality differences. */ { struct hash *qacHash = qacReadToHash(qacName); struct fileInfo *axtEl, *axtList = listDirX(axtDir, "*.axt", TRUE); FILE *f = mustOpen(bedName, "w"); if (axtList==NULL) axtList = listDirX(axtDir, "*.axt.gz", TRUE); if (axtList==NULL) printf("No axt files were found in the '%s' directory.\n",axtDir); for (axtEl = axtList; axtEl != NULL; axtEl = axtEl->next) axtHiQualDiffs(axtEl->name, qacHash, f); carefulClose(&f); }
void catDir(int dirCount, char *dirs[]) /* catDir - concatenate files in directory - for those times when too * many files for cat to handle.. */ { int i; struct fileInfo *list, *el; for (i=0; i<dirCount; ++i) { list = listDirX(dirs[i], NULL, TRUE); for (el = list; el != NULL; el = el->next) { char *name = el->name; if (el->isDir && recurse) { catDir(1, &name); } else if (wildCard == NULL || wildMatch(wildCard, name)) { if (suffix == NULL || endsWith(name, suffix)) catFile(name); } } slFreeList(&list); } }
struct clonePos *readClonesFromOoDir(char *ooDir, struct hash *cloneHash) /* Read in clones from ooDir. */ { struct clonePos *cloneList = NULL; struct fileInfo *chrFiList = NULL, *chrFi; struct fileInfo *glFiList = NULL, *glFi; char pathName[512]; struct hash *chromDirHash = newHash(4); char *chromLst = optionVal("chromLst", NULL); if (chromLst != NULL) { struct lineFile *clf = lineFileOpen(chromLst, TRUE); char *row[1]; while (lineFileRow(clf, row)) { hashAdd(chromDirHash, row[0], NULL); verbose(3,"%s\n",row[0]); } lineFileClose(&clf); } verbose(2,"ooDir: %s\n",ooDir); chrFiList = listDirX(ooDir, "*", FALSE); for (chrFi = chrFiList; chrFi != NULL; chrFi = chrFi->next) { verbose(2,"%s\n",chrFi->name); if ( ((chrFi->isDir && strlen(chrFi->name) <= 2)) || hashLookup(chromDirHash, chrFi->name) ) { sprintf(pathName, "%s/%s", ooDir, chrFi->name); verbose(2,"%s\n",pathName); glFiList = listDirX(pathName, "*.gl", TRUE); for (glFi = glFiList; glFi != NULL; glFi = glFi->next) addCloneInfo(glFi->name, cloneHash, &cloneList); slFreeList(&glFiList); } } slFreeList(&chrFiList); slReverse(&cloneList); slSort(&cloneList, cmpClonePos); if (slCount(cloneList) < 0) errAbort("No .gl files in %s\n", ooDir); printf("Got %d clones\n", slCount(cloneList)); hashFree(&chromDirHash); return cloneList; }
void ooSplitFins(char *finTrans, char *ooDir) /* ooSplitFins - Create splitFin files (list of split finished clones). */ { struct hash *splitCloneHash = newHash(8); struct clone *cloneList, *clone; char fileName[512]; int i; struct fileInfo *chromDir = NULL, *ctgDir = NULL, *chrom, *ctg; int splitCount = 0; /* Read in finished clones and put ones with more than * one fragment in hash. */ cloneList = readTrans(finTrans); for (clone = cloneList; clone != NULL; clone = clone->next) if (slCount(clone->fragList) > 1) { hashAdd(splitCloneHash, clone->name, clone); ++splitCount; } printf("Found %d split clones in %s\n", splitCount, finTrans); /* Scan over all contigs in ooDir. */ chromDir = listDirX(ooDir, "*", FALSE); for (chrom = chromDir; chrom != NULL; chrom = chrom->next) { char *chromName = chrom->name; if (chrom->isDir && strlen(chromName) <= 2 && chromName[0] != '.') { printf("Processing %s\n", chromName); sprintf(fileName, "%s/%s", ooDir, chromName); ctgDir = listDirX(fileName, "ctg*", TRUE); for (ctg = ctgDir; ctg != NULL; ctg = ctg->next) { fflush(stdout); if (ctg->isDir) writeRelevantSplits(ctg->name, splitCloneHash); } slFreeList(&ctgDir); } } }
void trimFosmids(char *sangDir, char *outFile) /* trimFosmids - Process Fosmid end reads to remove low quality bases and put in one big file. */ { struct hash *hash = newHash(20); struct fileInfo *dirList, *dirEl; struct fileInfo *subList, *subEl; struct fileInfo *faList, *faEl; FILE *f = mustOpen(outFile, "w"); unsigned long totalSize = 0; dirList = listDirX(sangDir, "*", TRUE); for (dirEl = dirList; dirEl != NULL; dirEl = dirEl->next) { if (dirEl->isDir) { printf("%s", dirEl->name); fflush(stdout); subList = listDirX(dirEl->name, "*", TRUE); for (subEl = subList; subEl != NULL; subEl = subEl->next) { if (subEl->isDir) { printf("."); fflush(stdout); faList = listDirX(subEl->name, "*.fasta", TRUE); for (faEl = faList; faEl != NULL; faEl = faEl->next) { uglyf("%s\n", faEl->name); // totalSize += filterByQual(faEl->name, f, 19, 15, hash); } slFreeList(&faList); } } printf("\n"); slFreeList(&subList); } } printf("Total size %lu bytes\n", totalSize); fclose(f); }
void crunchDirDir(char *dirDir, FILE *f) /* Crunch dir of dirs . */ { struct fileInfo *dirList = listDirX(dirDir, NULL, FALSE), *dir; for (dir = dirList; dir != NULL; dir = dir->next) { struct fileInfo *fileList, *file; char path[PATH_LEN]; struct namePos *posList = NULL, *pos; int minPos = -1; if (!dir->isDir) { warn("%s isn't a dir, skipping", dir->name); continue; } if (sameString(dir->name, "CVS")) continue; /* Skip CVS directories in test suite. */ safef(path, sizeof(path), "%s/%s", dirDir, dir->name); fileList = listDirX(path, NULL, FALSE); for (file = fileList; file != NULL; file = file->next) { if (file->isDir) continue; AllocVar(pos); pos->name = file->name; safef(path, sizeof(path), "%s/%s/%s", dirDir, dir->name, file->name); pos->pos = firstLinePos(path); slAddHead(&posList, pos); } slSort(&posList, namePosCmp); for (pos = posList; pos != NULL; pos = pos->next) { safef(path, sizeof(path), "%s/%s/%s", dirDir, dir->name, pos->name); minPos = crunchOne(path, f, dir->name, minPos+1); } slFreeList(&posList); slFreeList(&fileList); } slFreeList(&dirList); }
static void rPathsInDirAndSubdirs(char *dir, char *wildcard, struct slName **pList) /* Recursively add directory contents that match wildcard (* for all) to list */ { struct fileInfo *fi, *fiList = listDirX(dir, wildcard, TRUE); for (fi = fiList; fi != NULL; fi = fi->next) { if (fi->isDir) rPathsInDirAndSubdirs(fi->name, wildcard, pList); else slNameAddHead(pList, fi->name); } slFreeList(&fiList); }
struct hash *makeImageHash(char *sourceImageDir) /* look in each subdir for .jpg files * but only look in subdirs, and only one level deep. * hash key is the gene name which is the first part of filename up to "_" * and the hash value is the relative path to the file from sourceImageDir. */ { struct hash *hash = newHash(0); struct fileInfo *dList = NULL, *dEntry; dList = listDirX(sourceImageDir, "*", FALSE); for (dEntry = dList; dEntry != NULL; dEntry = dEntry->next) { if (dEntry->isDir) { char newDir[256]; struct fileInfo *fList = NULL, *fEntry; safef(newDir,sizeof(newDir),"%s/%s",sourceImageDir,dEntry->name); fList = listDirX(newDir, "*.jpg", FALSE); for (fEntry = fList; fEntry != NULL; fEntry = fEntry->next) { char newPath[256]; char *underBar=NULL; safef(newPath,sizeof(newPath),"%s/%s",dEntry->name,fEntry->name); underBar = strchr(fEntry->name,'_'); if (underBar) { char *key = cloneStringZ(fEntry->name,underBar-fEntry->name); char *val = cloneString(newPath); hashAdd(hash, key, val); verbose(2, "imageHash key=%s value=%s\n", key, val); } } slFreeList(&fList); } } slFreeList(&dList); return hash; }
void crunchDir(char *dir, FILE *f) /* Crunch list of files in dir. */ { struct fileInfo *fileList = listDirX(dir, NULL, FALSE), *file; for (file = fileList; file != NULL; file = file->next) { char path[PATH_LEN]; if (file->isDir) continue; safef(path, sizeof(path), "%s/%s", dir, file->name); chopSuffix(file->name); crunchOne(path, f, file->name, 0); } slFreeList(&fileList); }
void afsCombine(char *inDir, char *outFile) /* afsCombine - Combine output from multiple runs of aliFragScore. */ { struct hash *hash = newHash(16); struct scoredFrag *fragList = NULL, *frag; struct fileInfo *fi, *fiList = listDirX(inDir, "*", TRUE); FILE *f; for (fi = fiList; fi != NULL; fi = fi->next) foldIn(fi->name, hash, &fragList); slReverse(&fragList); f = mustOpen(outFile, "w"); for (frag = fragList; frag != NULL; frag = frag->next) { fprintf(f, "%s\t%d\t%d\t%f\t%f\n", frag->frag, frag->perfectCount, frag->posCount, frag->posTotal, frag->total); } carefulClose(&f); }
void hgCtgPos(char *database, char *ooDir) /* hgCtgPos - Store contig positions ( from lift files ) in database.. */ { struct ctgPos *ctgList = NULL; char liftFileName[512]; struct fileInfo *fiList, *fi; static char *liftNames[2] = {"lift/ordered.lft", "lift/random.lft"}; int i; struct hash *chromDirHash = newHash(4); char *chromLst = optionVal("chromLst", NULL); if (chromLst != NULL) { struct lineFile *clf = lineFileOpen(chromLst, TRUE); char *row[1]; while (lineFileRow(clf, row)) { hashAdd(chromDirHash, row[0], NULL); } lineFileClose(&clf); } fiList = listDirX(ooDir, "*", FALSE); for (fi = fiList; fi != NULL; fi = fi->next) { if (fi->isDir && ((strlen(fi->name) <= 2) || startsWith("NA_", fi->name) || hashLookup(chromDirHash, fi->name) )) { for (i=0; i<ArraySize(liftNames); ++i) { sprintf(liftFileName, "%s/%s/%s", ooDir, fi->name, liftNames[i]); if (fileExists(liftFileName)) { addCtgFile(liftFileName, &ctgList); } } } } slSort(&ctgList, cmpCtgPos); printf("Got %d contigs total\n", slCount(ctgList)); saveCtgPos(ctgList, database); hashFree(&chromDirHash); }
void hgLoadGap(char *database, char *ooDir, char *oneChrom) /* hgLoadGap - Put chromosome .gap files into browser database.. */ { struct sqlConnection *conn = sqlConnect(database); /* target prefix is used in zoo browser */ if (oneChrom != NULL) { if (startsWith("chr", oneChrom)) oneChrom += 3; else if (startsWith("target", oneChrom)) oneChrom += 6; } if (unsplit) gapFileToTable(conn, ooDir, "gap"); else { struct fileInfo *chrFiList, *chrFi; char pathName[512]; boolean gotAny = FALSE; chrFiList = listDirX(ooDir, "*", FALSE); for (chrFi = chrFiList; chrFi != NULL; chrFi = chrFi->next) { if (chrFi->isDir && ((strlen(chrFi->name) <= 2) || startsWith("NA_", chrFi->name))) { if (oneChrom == NULL || sameWord(chrFi->name, oneChrom)) { safef(pathName, sizeof(pathName), "%s/%s", ooDir, chrFi->name); makeGap(conn, pathName); gotAny = TRUE; verbose(2, "done %s\n", chrFi->name); } } } slFreeList(&chrFiList); if (!gotAny) errAbort("No .gap files found"); } sqlDisconnect(&conn); }
struct fileInfo *readDirs(int dirCount, char *dirs[]) /* Return extended listing of all dirs. */ { struct fileInfo *allFiles = NULL, *oneDir, *oneFile; int i; double totalSize = 0; for (i=0; i<dirCount; ++i) { printf("Listing %s...", dirs[i]); fflush(stdout); oneDir = listDirX(dirs[i], "*.fa", TRUE); printf("got %d files\n", slCount(oneDir)); allFiles = slCat(allFiles, oneDir); } for (oneFile = allFiles; oneFile != NULL; oneFile = oneFile->next) totalSize += oneFile->size; printf("Total %d files %e bytes\n", slCount(allFiles), totalSize); return allFiles; }
void tpfDirToTabFile(char *tpfDir, char *fileName) /* Read TPF directory and make tab-separated file */ { FILE *f = mustOpen(fileName, "w"); char tpfFile[512]; char ourChrom[16]; struct fileInfo *chrom, *dir = listDirX(tpfDir, "Chr*", FALSE); if (dir == NULL) errAbort("No Chr files in %s", tpfDir); for (chrom = dir; chrom != NULL; chrom = chrom->next) { if (chrom->isDir) { sprintf(tpfFile, "%s/%s/%s", tpfDir, chrom->name, "tpf.txt"); sprintf(ourChrom, "chr%s", chrom->name+3); addTpfToTabFile(ourChrom, tpfFile, f); } } carefulClose(&f); }
void makeGl(struct sqlConnection *conn, char *chromDir, struct hash *cloneVerHash) /* Read in .gl files in chromDir and use them to create the * gl tables for the corresponding chromosome(s). */ { struct dyString *ds = newDyString(2048); struct fileInfo *fiList, *fi; char dir[256], chrom[128], ext[64]; char *glFileName; char glTable[128]; char *tab = "gl.tab"; fiList = listDirX(chromDir, "*.gl", TRUE); for (fi = fiList; fi != NULL; fi = fi->next) { glFileName = fi->name; printf("Processing %s\n", glFileName); splitPath(glFileName, dir, chrom, ext); sprintf(glTable, "%s_gl", chrom); if ( (! noLoad) && sqlTableExists(conn, glTable)) { dyStringClear(ds); dyStringPrintf(ds, "DROP table %s", glTable); sqlUpdate(conn, ds->string); } dyStringClear(ds); dyStringPrintf(ds, createGl, glTable, maxFragNameSize); verbose(2, "%s", ds->string); if (! noLoad) sqlMaybeMakeTable(conn, glTable, ds->string); dyStringClear(ds); addGlBin(glFileName, tab); dyStringPrintf(ds, "LOAD data local infile '%s' into table %s", tab, glTable); if (! noLoad) sqlUpdate(conn, ds->string); } freeDyString(&ds); }
void doPieceJob(FILE *con, FILE *sh, char *ooDir, char *chrom, char *cdna, char *conDir, char *pslDir, char *outDir, char *logDir, char *errDir, char *inDir) /* Do mrna or EST alignment jobs on contigs. */ { struct fileInfo *fileList, *fel; char chromDir[512]; char pslSubDir[512]; char contigDir[512]; char faName[512]; char jobName[512]; printf("Piece job on %s %s\n", chrom, cdna); fprintf(sh, "#Stitching %s pieces for chromosome %s\n", cdna, chrom); /* Create a directory for result from each contig. */ sprintf(pslSubDir, "%s/%s", pslDir, chrom); makeDir(pslSubDir); /* List each contig and make a job for it. */ sprintf(chromDir, "%s/%s", ooDir, chrom); fileList = listDirX(chromDir, "ctg*", FALSE); for (fel = fileList; fel != NULL; fel = fel->next) { char *contig = fel->name; sprintf(contigDir, "%s/%s/%s", ooDir, chrom, contig); sprintf(faName, "%s/%s.fa", contigDir, contig); sprintf(jobName, "%s_%s", chrom, contig); if (fileExists(faName)) { fprintf(con, "log = %s/%s.%s\n", logDir, jobName, cdna); fprintf(con, "error = %s/%s.%s\n", errDir, jobName, cdna); fprintf(con, "output = %s/%s.%s\n", outDir, jobName, cdna); makeSimpleIn(inDir, jobName, faName); fprintf(con, "arguments = %s/%s %s/%s mrna /var/tmp/hg/h/10.ooc %s/%s.%s.psl\n", inDir, jobName, inDir, cdna, pslSubDir, contig, cdna); fprintf(con, "queue 1\n\n"); } } }
struct hash *loadChroms(char *dir) /* Load zipped chromosome files into memory. */ { FILE *f; char fastaScan[16]; safef(fastaScan, sizeof(fastaScan), "*.%s", faExtn); struct fileInfo *chromEl, *chromList = listDirX(dir, fastaScan, TRUE); struct hash *chromHash = newHash(0); struct dnaSeq *seq; char chrom[128]; char *faName; int count = 0; verbose(2, "# scanning '%s/%s'\n", dir, fastaScan); for (chromEl = chromList; chromEl != NULL; chromEl = chromEl->next) { char *fileName = chromEl->name; splitPath(fileName, NULL, chrom, NULL); chopSuffix(chrom); if (startsWith("chr0", chrom)) /* Convert chr01 to chr1, etc. */ stripChar(chrom, '0'); if (sameString(chrom, "chrmt")) strcpy(chrom, "chr17"); f = fopen(fileName, "r"); AllocVar(seq); seq->name = cloneString(chrom); if (!faFastReadNext(f, &seq->dna, &seq->size, &faName)) errAbort("Couldn't load sequence from %s", fileName); seq->dna = cloneMem(seq->dna, seq->size+1); toUpperN(seq->dna, seq->size); hashAdd(chromHash, chrom, seq); verbose(3, "# loadChrom %s '%s'\n", fileName, chrom); fclose(f); f = NULL; count++; } if (0 == count) errAbort("not fasta files found in '%s/%s'\n", dir, fastaScan); return chromHash; }
void vgPatchJax(char *database, char *dir) /* vgPatchJax - Patch Jackson labs part of visiGene database. */ { struct sqlConnection *conn = sqlConnect(database); struct fileInfo *raList, *ra; struct dyString *query = dyStringNew(0); raList = listDirX(dir, "*.ra", TRUE); for (ra = raList; ra != NULL; ra = ra->next) { struct hash *hash = raReadSingle(ra->name); char *submitSet = hashMustFindVal(hash, "submitSet"); char *year = hashMustFindVal(hash, "year"); dyStringClear(query); dyStringPrintf(query, "update submissionSet set year=%s " "where name = '%s'" , year, submitSet); sqlUpdate(conn, query->string); } sqlDisconnect(&conn); }
void makeGap(struct sqlConnection *conn, char *chromDir) /* Read in .gap files in chromDir and use them to create the * gap table for the corresponding chromosome(s). */ { struct fileInfo *fiList, *fi; fiList = listDirX(chromDir, "*.gap", TRUE); for (fi = fiList; fi != NULL; fi = fi->next) { char dir[256], chrom[128], ext[64]; char *ptr; char gapName[128]; char *gapFileName = fi->name; verbose(1, "Processing %s\n", gapFileName); /* Get full path name of .gap file and process it * into table names. */ splitPath(gapFileName, dir, chrom, ext); while ((ptr = strchr(chrom, '.')) != NULL) *ptr = '_'; safef(gapName, sizeof(gapName), "%s_gap", chrom); gapFileToTable(conn, gapFileName, gapName); } }
void nibbParseImageDir(char *sourceDir, char *goodTab, char *badTab) /* nibbParseImageDir - Look through nibb image directory and allowing for * typos and the like create a table that maps a file name to clone name, * developmental stage, and view of body part. */ { struct fileInfo *l1List, *l1, *l2List, *l2, *l3List, *l3; struct hash *stageHash = hashNew(0); struct hash *viewHash = hashNew(0); struct hash *otherHash = hashNew(0); struct hash *probeHash = hashNew(0); struct hash *fixHash = hashFixers(); struct imageInfo *imageList = NULL, *image; FILE *good = mustOpen(goodTab, "w"); FILE *bad = mustOpen(badTab, "w"); int goodCount = 0, badCount = 0; int jpgCount = 0, jpgDir = 0; l1List = listDirX(sourceDir, "XL*", FALSE); for (l1 = l1List; l1 != NULL; l1 = l1->next) { char l1Path[PATH_LEN]; safef(l1Path, sizeof(l1Path), "%s/%s", sourceDir, l1->name); l2List = listDirX(l1Path, "XL*", FALSE); for (l2 = l2List; l2 != NULL; l2 = l2->next) { char l2Path[PATH_LEN]; char cloneName[64], *permanentCloneName; char *cloneDir = l2->name; char *cloneEnd; int cloneNameSize = 0; if (stringIx(cloneDir, skipDir) >= 0) continue; /* Figure out clone name, whish is directory component up to * first underbar. */ cloneEnd = strchr(cloneDir, '_'); if (cloneEnd != NULL) cloneNameSize = cloneEnd - cloneDir; else errAbort("Strangely formatted image dir %s, no underbar", cloneDir); if (cloneNameSize >= sizeof(cloneName)) errAbort("Clone name too long in dir %s", cloneDir); if (cloneNameSize < 8 || cloneNameSize > 12) errAbort("Clone name wrong size %s", cloneDir); memcpy(cloneName, cloneDir, cloneNameSize); cloneName[cloneNameSize] = 0; /* Check format is XL###L##. We already checked the XL. */ if (!isdigit(cloneName[2]) || !isdigit(cloneName[3]) || !isdigit(cloneName[4]) || isdigit(cloneName[5]) || !isdigit(cloneName[6]) || !isdigit(cloneName[7])) errAbort("Strangely formatted clone name %s", cloneDir); permanentCloneName = hashStoreName(probeHash, cloneName); /* Get all files in dir. */ safef(l2Path, sizeof(l2Path), "%s/%s/%s", sourceDir, l1->name, l2->name); l3List = listDirX(l2Path, "*.jpg", FALSE); for (l3 = l3List; l3 != NULL; l3 = l3->next) { char *fileName = l3->name; if (stringIx(l3->name, skipFile) >= 0) continue; image = getImageInfo(fixHash, permanentCloneName, l1->name, cloneDir, fileName, stageHash, viewHash, otherHash, probeHash); slAddHead(&imageList, image); ++jpgCount; } ++jpgDir; } } slReverse(&imageList); verbose(1, "%d jpg images in %d directories\n", jpgCount, jpgDir); #ifdef OLD verbose(1, "%d probes, %d stages, %d views, %d other\n", probeHash->elCount, stageHash->elCount, viewHash->elCount, otherHash->elCount); printHash("stages", stageHash); printHash("views", viewHash); printHash("other", otherHash); #endif /* OLD */ for (image = imageList; image != NULL; image = image->next) { if (image->clone != NULL && image->stage != NULL && image->view != NULL) { imageInfoOut(image, good); ++goodCount; } else { imageInfoOut(image, bad); ++badCount; } } verbose(1, "%d (%4.1f%%) parsed ok, %d (%4.2f%%) didn't\n", goodCount, 100.0 * goodCount/(goodCount + badCount), badCount, 100.0 * badCount/(goodCount + badCount)); carefulClose(&good); carefulClose(&bad); }
void hgGoldGapGl(char *database, char *gsDir, char *ooSubDir, boolean doGl, char *oneChrom) /* hgGoldGapGl - Put chromosome .agp and .gl files into browser database.. */ { struct fileInfo *chrFiList, *chrFi; struct sqlConnection *conn = NULL; char ooDir[512]; char pathName[512]; struct hash *cloneVerHash = newHash(0); boolean gotAny = FALSE; struct hash *chromDirHash = newHash(4); char *chromLst = optionVal("chromLst", NULL); if (! noLoad) conn = sqlConnect(database); verbose(2,"#\tcomplete gold, gap and .gl files produced\n"); if (chromLst != NULL) { struct lineFile *clf = lineFileOpen(chromLst, TRUE); char *row[1]; while (lineFileRow(clf, row)) { hashAdd(chromDirHash, row[0], NULL); } lineFileClose(&clf); } sprintf(ooDir, "%s/%s", gsDir, ooSubDir); /* target prefix is used in zoo browser */ if (oneChrom != NULL && (startsWith("chr", oneChrom) || startsWith("target", oneChrom))) oneChrom += 3; if (doGl) { sprintf(pathName, "%s/ffa/sequence.inf", gsDir); makeCloneVerHash(pathName, cloneVerHash); } chrFiList = listDirX(ooDir, "*", FALSE); for (chrFi = chrFiList; chrFi != NULL; chrFi = chrFi->next) { if (chrFi->isDir && ((strlen(chrFi->name) <= 2) || startsWith("NA_", chrFi->name) || (NULL != hashLookup(chromDirHash, chrFi->name)))) { if (oneChrom == NULL || sameWord(chrFi->name, oneChrom)) { sprintf(pathName, "%s/%s", ooDir, chrFi->name); makeGoldAndGap(conn, pathName); if (doGl) makeGl(conn, pathName, cloneVerHash); gotAny = TRUE; uglyf("done %s\n", chrFi->name); } } } slFreeList(&chrFiList); if (! noLoad) sqlDisconnect(&conn); hashFree(&chromDirHash); if (!gotAny) errAbort("No contig agp and gold files found"); }
void cdnaOnOoJobs(char *ooDir, char *conDir, int cdnaCount, char *cdnaTypes[]) /* cdnaOnOoJobs - make condor submission file for EST and mRNA alignments on draft assembly. */ { char chromDir[512]; char chromFile[512]; char conFile[512]; char shFile[512]; char conPslDir[512]; char conOutDir[512]; char conErrDir[512]; char conLogDir[512]; char conInDir[512]; struct fileInfo *cfaList, *cfa; struct fileInfo *chromList, *chromEl; static char lastChromName[64] = "9X8Y"; /* Something uniq. */ boolean lastDoFull = FALSE; FILE *con, *sh; int i; /* Set up basic directory structure in output dir. */ makeDir(conDir); sprintf(conPslDir, "%s/psl", conDir); makeDir(conPslDir); sprintf(conOutDir, "%s/out", conDir); makeDir(conOutDir); sprintf(conLogDir, "%s/log", conDir); makeDir(conLogDir); sprintf(conErrDir, "%s/err", conDir); makeDir(conErrDir); sprintf(conInDir, "%s/in", conDir); makeDir(conInDir); /* Create list files for mrna and est. */ for (i=0; i<cdnaCount; i++) { char fileName[512]; sprintf(fileName, "/var/tmp/hg/h/mrna/%s.fa", cdnaTypes[i]); makeSimpleIn(conInDir, cdnaTypes[i], fileName); } /* Create condor submission file and write header. */ sprintf(conFile, "%s/all.con", conDir); con = mustOpen(conFile, "w"); fprintf(con, "#File created by cdnaOnOoJobs %s %s\n\n", ooDir, conDir); fprintf(con, "universe = vanilla\n" "notification = error\n" "requirements = memory > 250\n" "executable = /cse/guests/kent/bin/i386/psLayout\n" "initialdir = %s\n" "\n" , conDir); /* Create shell script to finish job. */ sprintf(shFile, "%s/finish.sh", conDir); sh = mustOpen(shFile, "w"); /* Loop through each chromosome directory. */ chromList = listDirX(ooDir, "*", FALSE); for (chromEl = chromList; chromEl != NULL; chromEl = chromEl->next) { int len = strlen(chromEl->name); if (chromEl->isDir && len <= 2) { sprintf(chromDir, "%s/%s", ooDir, chromEl->name); cfaList = listDirX(chromDir, "chr*.fa", FALSE); slSort(&cfaList, cmpFileInfoUnderbar); /* Get list of assembled chromosomes in dir. */ for (cfa = cfaList; cfa != NULL; cfa = cfa->next) { /* See if is _random version of previous chromosome, in which * case we follow the lead of last time. */ printf("%s size %d\n", cfa->name, cfa->size); sprintf(chromFile, "%s/%s", chromDir, cfa->name); if (sameChrom(lastChromName, cfa->name)) { if (lastDoFull) { for (i=0; i<cdnaCount; ++i) doFullJob(con, chromFile, lastChromName, conDir, conPslDir, conOutDir, conLogDir, conErrDir, conInDir, cdnaTypes[i]); } } else { strcpy(lastChromName, cfa->name); chopSuffix(lastChromName); if (cfa->size < 60000000) { lastDoFull = TRUE; for (i=0; i<cdnaCount; ++i) doFullJob(con, chromFile, lastChromName, conDir, conPslDir, conOutDir, conLogDir, conErrDir, conInDir, cdnaTypes[i]); } else { lastDoFull = FALSE; for (i=0; i<cdnaCount; ++i) doPieceJob(con, sh, ooDir, lastChromName+3, cdnaTypes[i], conDir, conPslDir, conOutDir, conLogDir, conErrDir, conInDir); } } } } } }
struct fileInfo* getGbFiles(struct gbUpdate* update, unsigned types) /* generate list of genbank files for an update and type */ { char relDir[PATH_LEN]; struct fileInfo* files = NULL; /* relative path, including directory */ char* updateDot = strchr(update->name, '.'); /* figure out path to input file */ strcpy(relDir, "download/"); strcat(relDir, update->release->name); if (update->release->srcDb == GB_GENBANK) { /* genbank */ if (update->isFull) { if (types & GB_MRNA) files = slCat(files, listDirX(relDir, "gbpri*.seq.gz", TRUE)); if (types & GB_EST) files = slCat(files, listDirX(relDir, "gbest*.seq.gz", TRUE)); } else { char dailyDir[PATH_LEN]; char dailyFile[PATH_LEN]; strcpy(dailyDir, relDir); strcat(dailyDir, "/daily-nc"); strcpy(dailyFile, "nc"); strcat(dailyFile, updateDot+1); strcat(dailyFile, ".flat.gz"); files = slCat(files, listDirX(dailyDir, dailyFile, TRUE)); } } else { /* refseq */ if (update->isFull) { char fullDir[PATH_LEN]; strcpy(fullDir, relDir); strcat(fullDir, "/cumulative"); files = slCat(files, listDirX(fullDir, "rscu.gbff.Z", TRUE)); } else { /* need to reverse year and month-day parts in * rsnc.1231.2001.gbff.Z */ char dailyDir[PATH_LEN]; char dailyFile[PATH_LEN]; int len; strcpy(dailyDir, relDir); strcat(dailyDir, "/daily"); strcpy(dailyFile, "rsnc."); strcat(dailyFile, updateDot+6); len = strlen(dailyFile); strncpy(dailyFile+len, updateDot, 5); /* include dot */ dailyFile[len+5] = '\0'; /* strncpy didn't null term */ strcat(dailyFile, ".gbff.Z"); files = slCat(files, listDirX(dailyDir, dailyFile, TRUE)); } } if (files == NULL) errAbort("no input files found for release %s update %s", update->release->name, update->name); return files; }
void makeGoldAndGap(struct sqlConnection *conn, char *chromDir) /* Read in .agp files in chromDir and use them to create the * gold and gap tables for the corresponding chromosome(s). */ { struct dyString *ds = newDyString(2048); struct fileInfo *fiList, *fi; char dir[256], chrom[128], ext[64]; char goldName[128], gapName[128]; char *agpName; char *ptr; char goldFileName[128]; char gapFileName[128]; if (! noLoad) { safef(goldFileName, ArraySize(goldFileName), "%s", goldTabName); safef(gapFileName, ArraySize(gapFileName), "%s", gapTabName); } fiList = listDirX(chromDir, "*.agp", TRUE); for (fi = fiList; fi != NULL; fi = fi->next) { /* Get full path name of .agp file and process it * into table names. */ agpName = fi->name; printf("Processing %s\n", agpName); splitPath(agpName, dir, chrom, ext); while ((ptr = strchr(chrom, '.')) != NULL) *ptr = '_'; sprintf(goldName, "%s_gold", chrom); sprintf(gapName, "%s_gap", chrom); if (noLoad) { safef(goldFileName, ArraySize(goldFileName), "%s_gold.tab", chrom); safef(gapFileName, ArraySize(gapFileName), "%s_gap.tab", chrom); } /* Create gold & gap tab separated files. */ splitAgp(fi->name, goldFileName, gapFileName); /* Create gold table and load it up. */ dyStringClear(ds); dyStringPrintf(ds, createGold, goldName); dyStringPrintf(ds, goldSplitIndex, maxFragNameSize); verbose(2, "%s", ds->string); if (! noLoad) sqlRemakeTable(conn, goldName, ds->string); dyStringClear(ds); dyStringPrintf(ds, "LOAD data local infile '%s' into table %s", goldFileName, goldName); if (! noLoad) { sqlUpdate(conn, ds->string); remove(goldFileName); } /* Create gap table and load it up. */ dyStringClear(ds); dyStringPrintf(ds, createGap, gapName); dyStringAppend(ds, gapSplitIndex); verbose(2, "%s", ds->string); if (! noLoad) { sqlRemakeTable(conn, gapName, ds->string); sqlMaybeMakeTable(conn, gapName, ds->string); } dyStringClear(ds); dyStringPrintf(ds, "LOAD data local infile '%s' into table %s", gapFileName, gapName); if (! noLoad) { sqlUpdate(conn, ds->string); remove(gapFileName); } } freeDyString(&ds); }