void uniqSize(char *ooDir, char *agpFile, char *glFile, char *altFile)
/* Figure out unique parts of genome from all the
 * gold.22 files in ooDir */
{
struct fileInfo *chromDirs, *chromEl;
struct fileInfo *contigDirs, *contigEl;
char subDir[512];
struct chromInfo *ciList = NULL, *ci, *ciTotal;

chromDirs = listDirX(ooDir, "*", FALSE);
for (chromEl = chromDirs; chromEl != NULL; chromEl = chromEl->next)
    {
    char *chromName = chromEl->name;
    int dirNameLen = strlen(chromName);
    if (dirNameLen > 0 && dirNameLen <= 2 && chromEl->isDir)
	{
	struct chromInfo *ctgList = NULL, *ctg;
	sprintf(subDir, "%s/%s", ooDir, chromName);
	contigDirs = listDirX(subDir, "NT*", FALSE);
	for (contigEl = contigDirs; contigEl != NULL; contigEl = contigEl->next)
	    {
	    if (contigEl->isDir)
		{
		int nSize, uSize;
		char *contigName = contigEl->name;
		char fileName[512];
		sprintf(fileName, "%s/%s/%s", subDir, contigName, agpFile);
		if (!fileExists(fileName) && altFile != NULL)
		    sprintf(fileName, "%s/%s/%s", subDir, contigName, altFile);
		if (fileExists(fileName))
		    {
		    ctg = oneContigInfo(fileName);
		    slAddHead(&ctgList, ctg);
		    getSizes(fileName, &uSize, &nSize);
		    sprintf(fileName, "%s/%s/%s", subDir, contigName, glFile);
		    if (fileExists(fileName))
			addStretchInfo(fileName, ctg);
		    }
		else
		    {
		    warn("No %s in %s/%s", agpFile, subDir, contigName);
		    }
		}
	    }
	slFreeList(&contigDirs);
	ci = combineChromInfo(ctgList, chromName);
	slAddHead(&ciList, ci);
	}
    }
slReverse(&ciList);

printHeader(stdout);
for (ci = ciList; ci != NULL; ci = ci->next)
    printChromInfo(ci, stdout);
ciTotal = combineChromInfo(ciList, "total");
printChromInfo(ciTotal, stdout);
}
void checkOurDir(char *ourDir, struct contig *contigList, struct hash *hash)
/* Check that our directories look ok. */
{
struct us
    {
    struct us *next;    /* Next in list */
    char *contig;	/* NT_XXXXXX or NG_XXXXXX */
    char *chrom;        /* 1, 2, 3, etc. */
    };
struct hash *ourHash = newHash(0);
struct us *usList = NULL, *us;
struct fileInfo *chromList = NULL, *chromFi, *ctgList = NULL, *ctgFi;
char chromDir[512], ctgDir[512];
struct contig *contig;
int problemCount = 0;

/* Build up a hash that says where each contig is. */
chromList = listDirX(ourDir, "*", FALSE);
for (chromFi = chromList; chromFi != NULL; chromFi = chromFi->next)
    {
    if (chromFi->isDir && strlen(chromFi->name) <= 2)
        {
	sprintf(chromDir, "%s/%s", ourDir, chromFi->name);
	ctgList = listDirX(chromDir, "N?_*", FALSE);
	for (ctgFi = ctgList; ctgFi != NULL; ctgFi = ctgFi->next)
	    {
	    if (ctgFi->isDir)
	        {
		AllocVar(us);
		slAddHead(&usList, us);
		us->contig = ctgFi->name;
		us->chrom = chromFi->name;
		hashAdd(ourHash, us->contig, us);
		}
	    }
	}
    }
printf("We have %d contigs\n", slCount(usList));

/* Check each contig. */
for (contig = contigList; contig != NULL; contig = contig->next)
    {
    if ((us = hashFindVal(ourHash, contig->name)) == NULL)
        {
	++problemCount;
	printf("%s is not in %s\n", contig->name, ourDir);
	}
    else
        {
	sprintf(ctgDir, "%s/%s/%s", ourDir, us->chrom, us->contig);
	problemCount += checkOurContig(ctgDir, contig);
	}
    }
freeHash(&ourHash);
}
void ooChains(char *ffaDir, char *ooDir)
/* ooChains - make chains (partially ordered clone fragments) for oo dir. */
{
struct hash *cloneHash = newHash(16);
struct clone *cloneList, *clone;
char fileName[512];
int i;
struct fileInfo *chromDir = NULL, *ctgDir = NULL, *chrom, *ctg;

/* Read in input from ffaDir. */
sprintf(fileName, "%s/%s", ffaDir, infoFile);
printf("Reading %s\n", fileName);
cloneList = readCloneList(fileName, cloneHash);
for (i=0; i<ArraySize(finfFiles); ++i)
    {
    sprintf(fileName, "%s/%s", ffaDir, finfFiles[i]);
    printf("Reading %s\n", fileName);
    readFinf(fileName, cloneHash);
    }
printf("Making chains\n");
for (clone = cloneList; clone != NULL; clone = clone->next)
    {
    slReverse(&clone->fragList);
    makeChains(clone);
    }

/* Make output in each of contig dirs. */
chromDir = listDirX(ooDir, "*", FALSE);
for (chrom = chromDir; chrom != NULL; chrom = chrom->next)
    {
    char *chromName = chrom->name;
    if (chrom->isDir && strlen(chromName) <= 2 && chromName[0] != '.')
        {
	printf("Processing %s\n", chromName);
	sprintf(fileName, "%s/%s", ooDir, chromName);
	ctgDir = listDirX(fileName, "ctg*", TRUE);
	for (ctg = ctgDir; ctg != NULL; ctg = ctg->next)
	    {
	    printf("."); 
	    fflush(stdout);
	    if (ctg->isDir)
	        writeRelevantChains(ctg->name, cloneHash);
	    }
	printf("\n");
	slFreeList(&ctgDir);
	}
    }
printf("Got chains in %d of %d files.  %d total links (including phase2)\n",
	clonesWithChainsCount, slCount(cloneList), linkCount);
}
void doRewrite(char *outDir, char *inDir, char *trackFile)
/* Do some sort of rewrite on entire system. */
{
/* Make list and hash of root dir */
struct lm *rootLm = lmInit(0);
char rootName[PATH_LEN];
safef(rootName, sizeof(rootName), "%s/%s", inDir, trackFile);
struct raLevel *rootLevel = raLevelRead(rootName, rootLm);

/* Make subdirectory list. */
struct fileInfo *org, *orgList = listDirX(inDir, "*", FALSE);
for (org = orgList; org != NULL; org = org->next)
    {
    if (org->isDir)
	{
	struct lm *orgLm = lmInit(0);
	char inOrgDir[PATH_LEN], outOrgDir[PATH_LEN];
	safef(inOrgDir, sizeof(inOrgDir), "%s/%s", inDir, org->name);
	safef(outOrgDir, sizeof(outOrgDir), "%s/%s", outDir, org->name);
	char inOrgFile[PATH_LEN];
	safef(inOrgFile, sizeof(inOrgFile), "%s/%s", inOrgDir, trackFile);
	struct raLevel *orgLevel = raLevelRead(inOrgFile, orgLm);
	orgLevel->parent = rootLevel;
	rewriteLevel(orgLevel, outOrgDir, orgLm);
	struct fileInfo *db, *dbList = listDirX(inOrgDir, "*", FALSE);
	for (db = dbList; db != NULL; db = db->next)
	    {
	    if (db->isDir)
	        {
		struct lm *dbLm = lmInit(0);
		char inDbDir[PATH_LEN], outDbDir[PATH_LEN];
		safef(inDbDir, sizeof(inDbDir), "%s/%s", inOrgDir, db->name);
		safef(outDbDir, sizeof(outDbDir), "%s/%s", outOrgDir, db->name);
		char inDbFile[PATH_LEN];
		safef(inDbFile, sizeof(inDbFile), "%s/%s", inDbDir, trackFile);
		struct raLevel *dbLevel = raLevelRead(inDbFile, dbLm);
		dbLevel->parent = orgLevel;
		rewriteLevel(dbLevel, outDbDir, dbLm);
		hashFree(&dbLevel->trackHash);
		lmCleanup(&dbLm);
		}
	    }
	hashFree(&orgLevel->trackHash);
	lmCleanup(&orgLm);
	}
    }
hashFree(&rootLevel->trackHash);
lmCleanup(&rootLm);
}
Example #5
0
void chimpHiQualDiffs(char *axtDir, char *qacName, char *bedName)
/* chimpHiQualDiffs - Create list of chimp high quality differences. */
{
struct hash *qacHash = qacReadToHash(qacName);
struct fileInfo *axtEl, *axtList = listDirX(axtDir, "*.axt", TRUE);
FILE *f = mustOpen(bedName, "w");

if (axtList==NULL)
    axtList = listDirX(axtDir, "*.axt.gz", TRUE);
if (axtList==NULL)
    printf("No axt files were found in the '%s' directory.\n",axtDir);
for (axtEl = axtList; axtEl != NULL; axtEl = axtEl->next)
    axtHiQualDiffs(axtEl->name, qacHash, f);
carefulClose(&f);
}
void catDir(int dirCount, char *dirs[])
/* catDir - concatenate files in directory - for those times when too 
 * many files for cat to handle.. */
{
int i;
struct fileInfo *list, *el;

for (i=0; i<dirCount; ++i)
    {
    list = listDirX(dirs[i], NULL, TRUE);
    for (el = list; el != NULL; el = el->next)
        {
	char *name = el->name;
	if (el->isDir && recurse)
	    {
	    catDir(1, &name);
	    }
	else if (wildCard == NULL || wildMatch(wildCard, name))
	    {
	    if (suffix == NULL || endsWith(name, suffix))
		catFile(name);
	    }
	}
    slFreeList(&list);
    }
}
Example #7
0
struct clonePos *readClonesFromOoDir(char *ooDir, struct hash *cloneHash)
/* Read in clones from ooDir. */
{
    struct clonePos *cloneList = NULL;
    struct fileInfo *chrFiList = NULL, *chrFi;
    struct fileInfo *glFiList = NULL, *glFi;
    char pathName[512];
    struct hash *chromDirHash = newHash(4);
    char *chromLst = optionVal("chromLst", NULL);

    if (chromLst != NULL)
    {
        struct lineFile *clf = lineFileOpen(chromLst, TRUE);
        char *row[1];
        while (lineFileRow(clf, row))
        {
            hashAdd(chromDirHash, row[0], NULL);
            verbose(3,"%s\n",row[0]);
        }
        lineFileClose(&clf);
    }

    verbose(2,"ooDir: %s\n",ooDir);
    chrFiList = listDirX(ooDir, "*", FALSE);
    for (chrFi = chrFiList; chrFi != NULL; chrFi = chrFi->next)
    {
        verbose(2,"%s\n",chrFi->name);
        if ( ((chrFi->isDir && strlen(chrFi->name) <= 2))
                || hashLookup(chromDirHash, chrFi->name) )
        {
            sprintf(pathName, "%s/%s", ooDir, chrFi->name);
            verbose(2,"%s\n",pathName);
            glFiList = listDirX(pathName, "*.gl", TRUE);
            for (glFi = glFiList; glFi != NULL; glFi = glFi->next)
                addCloneInfo(glFi->name, cloneHash, &cloneList);
            slFreeList(&glFiList);
        }
    }
    slFreeList(&chrFiList);
    slReverse(&cloneList);
    slSort(&cloneList, cmpClonePos);
    if (slCount(cloneList) < 0)
        errAbort("No .gl files in %s\n", ooDir);
    printf("Got %d clones\n", slCount(cloneList));
    hashFree(&chromDirHash);
    return cloneList;
}
void ooSplitFins(char *finTrans, char *ooDir)
/* ooSplitFins - Create splitFin files (list of split finished clones). */
{
    struct hash *splitCloneHash = newHash(8);
    struct clone *cloneList, *clone;
    char fileName[512];
    int i;
    struct fileInfo *chromDir = NULL, *ctgDir = NULL, *chrom, *ctg;
    int splitCount = 0;

    /* Read in finished clones and put ones with more than
     * one fragment in hash. */
    cloneList = readTrans(finTrans);
    for (clone = cloneList; clone != NULL; clone = clone->next)
        if (slCount(clone->fragList) > 1)
        {
            hashAdd(splitCloneHash, clone->name, clone);
            ++splitCount;
        }
    printf("Found %d split clones in %s\n", splitCount, finTrans);

    /* Scan over all contigs in ooDir. */
    chromDir = listDirX(ooDir, "*", FALSE);
    for (chrom = chromDir; chrom != NULL; chrom = chrom->next)
    {
        char *chromName = chrom->name;
        if (chrom->isDir && strlen(chromName) <= 2 && chromName[0] != '.')
        {
            printf("Processing %s\n", chromName);
            sprintf(fileName, "%s/%s", ooDir, chromName);
            ctgDir = listDirX(fileName, "ctg*", TRUE);
            for (ctg = ctgDir; ctg != NULL; ctg = ctg->next)
            {
                fflush(stdout);
                if (ctg->isDir)
                    writeRelevantSplits(ctg->name, splitCloneHash);
            }
            slFreeList(&ctgDir);
        }
    }
}
Example #9
0
void trimFosmids(char *sangDir, char *outFile)
/* trimFosmids - Process Fosmid end reads to remove low quality bases and put in one big file. */
{
struct hash *hash = newHash(20);
struct fileInfo *dirList, *dirEl;
struct fileInfo *subList, *subEl;
struct fileInfo *faList, *faEl;
FILE *f = mustOpen(outFile, "w");
unsigned long totalSize = 0;

dirList = listDirX(sangDir, "*", TRUE);
for (dirEl = dirList; dirEl != NULL; dirEl = dirEl->next)
    {
    if (dirEl->isDir)
	{
	printf("%s", dirEl->name);
	fflush(stdout);
	subList = listDirX(dirEl->name, "*", TRUE);
	for (subEl = subList; subEl != NULL; subEl = subEl->next)
	    {
	    if (subEl->isDir)
	        {
		printf(".");
		fflush(stdout);
		faList = listDirX(subEl->name, "*.fasta", TRUE);
		for (faEl = faList; faEl != NULL; faEl = faEl->next)
		    {
		    uglyf("%s\n", faEl->name);
		    // totalSize += filterByQual(faEl->name, f, 19, 15, hash);
		    }
		slFreeList(&faList);
		}
	    }
	printf("\n");
	slFreeList(&subList);
	}
    }
printf("Total size %lu bytes\n", totalSize);
fclose(f);
}
Example #10
0
void crunchDirDir(char *dirDir, FILE *f)
/* Crunch dir of dirs . */
{
struct fileInfo *dirList = listDirX(dirDir, NULL, FALSE), *dir;
for (dir = dirList; dir != NULL; dir = dir->next)
    {
    struct fileInfo *fileList, *file;
    char path[PATH_LEN];
    struct namePos *posList = NULL, *pos;
    int minPos = -1;
    if (!dir->isDir)
	 {
         warn("%s isn't a dir, skipping", dir->name);
	 continue;
	 }
    if (sameString(dir->name, "CVS"))
         continue;	/* Skip CVS directories in test suite. */
    safef(path, sizeof(path), "%s/%s", dirDir, dir->name);
    fileList = listDirX(path, NULL, FALSE);
    for (file = fileList; file != NULL; file = file->next)
        {
	if (file->isDir)
	    continue;
	AllocVar(pos);
	pos->name = file->name;
	safef(path, sizeof(path), "%s/%s/%s", dirDir, dir->name, file->name);
	pos->pos = firstLinePos(path);
	slAddHead(&posList, pos);
	}
    slSort(&posList, namePosCmp);
    for (pos = posList; pos != NULL; pos = pos->next)
        {
	safef(path, sizeof(path), "%s/%s/%s", dirDir, dir->name, pos->name);
	minPos = crunchOne(path, f, dir->name, minPos+1);
	}
    slFreeList(&posList);
    slFreeList(&fileList);
    }
slFreeList(&dirList);
}
Example #11
0
static void rPathsInDirAndSubdirs(char *dir, char *wildcard, struct slName **pList)
/* Recursively add directory contents that match wildcard (* for all) to list */
{
struct fileInfo *fi, *fiList = listDirX(dir, wildcard, TRUE);
for (fi = fiList; fi != NULL; fi = fi->next)
   {
   if (fi->isDir)
       rPathsInDirAndSubdirs(fi->name, wildcard, pList);
   else
       slNameAddHead(pList, fi->name);
   }
slFreeList(&fiList);
}
Example #12
0
struct hash *makeImageHash(char *sourceImageDir)
/* look in each subdir for .jpg files
 * but only look in subdirs, and only one level deep.
 * hash key is the gene name which is the first part of filename up to "_" 
 * and the hash value is the relative path to the file from sourceImageDir. */
{
struct hash *hash = newHash(0);
struct fileInfo *dList = NULL, *dEntry;
dList = listDirX(sourceImageDir, "*", FALSE);
for (dEntry = dList; dEntry != NULL; dEntry = dEntry->next)
    {
    if (dEntry->isDir)
	{
	char newDir[256];
	struct fileInfo *fList = NULL, *fEntry;
	safef(newDir,sizeof(newDir),"%s/%s",sourceImageDir,dEntry->name);
	fList = listDirX(newDir, "*.jpg", FALSE);   
	for (fEntry = fList; fEntry != NULL; fEntry = fEntry->next)
	    {
	    char newPath[256];
	    char *underBar=NULL;
	    safef(newPath,sizeof(newPath),"%s/%s",dEntry->name,fEntry->name);
	    underBar = strchr(fEntry->name,'_');
	    if (underBar)
		{
		char *key = cloneStringZ(fEntry->name,underBar-fEntry->name);
		char *val = cloneString(newPath);
    		hashAdd(hash, key, val);
		verbose(2, "imageHash key=%s value=%s\n", key, val);
		}
	    }
	slFreeList(&fList);
	}
    }
slFreeList(&dList);    
return hash;
}
Example #13
0
void crunchDir(char *dir, FILE *f)
/* Crunch list of files in dir. */
{
struct fileInfo *fileList = listDirX(dir, NULL, FALSE), *file;
for (file = fileList; file != NULL; file = file->next)
    {
    char path[PATH_LEN];
    if (file->isDir)
        continue;
    safef(path, sizeof(path), "%s/%s", dir, file->name);
    chopSuffix(file->name);
    crunchOne(path, f, file->name, 0);
    }
slFreeList(&fileList);
}
void afsCombine(char *inDir, char *outFile)
/* afsCombine - Combine output from multiple runs of aliFragScore. */
{
struct hash *hash = newHash(16);
struct scoredFrag *fragList = NULL, *frag;
struct fileInfo *fi, *fiList = listDirX(inDir, "*", TRUE);
FILE *f;

for (fi = fiList; fi != NULL; fi = fi->next)
   foldIn(fi->name, hash, &fragList);
slReverse(&fragList);
f = mustOpen(outFile, "w");
for (frag = fragList; frag != NULL; frag = frag->next)
    {
    fprintf(f, "%s\t%d\t%d\t%f\t%f\n", frag->frag, frag->perfectCount, frag->posCount,
    	frag->posTotal, frag->total);
    }
carefulClose(&f);
}
Example #15
0
void hgCtgPos(char *database, char *ooDir)
/* hgCtgPos - Store contig positions ( from lift files ) in database.. */
{
struct ctgPos *ctgList = NULL;
char liftFileName[512];
struct fileInfo *fiList, *fi;
static char *liftNames[2] = {"lift/ordered.lft", "lift/random.lft"};
int i;
struct hash *chromDirHash = newHash(4);
char *chromLst = optionVal("chromLst", NULL);

if (chromLst != NULL)
    {
    struct lineFile *clf = lineFileOpen(chromLst, TRUE);
    char *row[1];
    while (lineFileRow(clf, row))
        {
        hashAdd(chromDirHash, row[0], NULL);
        }
    lineFileClose(&clf);
    }

fiList = listDirX(ooDir, "*", FALSE);
for (fi = fiList; fi != NULL; fi = fi->next)
    {
    if (fi->isDir && ((strlen(fi->name) <= 2) || startsWith("NA_", fi->name)
		|| hashLookup(chromDirHash, fi->name) ))
        {
	for (i=0; i<ArraySize(liftNames); ++i)
	    {
	    sprintf(liftFileName, "%s/%s/%s", ooDir, fi->name, liftNames[i]);
	    if (fileExists(liftFileName))
	        {
		addCtgFile(liftFileName, &ctgList);
		}
	    }
	}
    }
slSort(&ctgList, cmpCtgPos);
printf("Got %d contigs total\n", slCount(ctgList));
saveCtgPos(ctgList, database);
hashFree(&chromDirHash);
}
Example #16
0
void hgLoadGap(char *database, char *ooDir, char *oneChrom)
/* hgLoadGap - Put chromosome .gap files into browser database.. */
{ 
struct sqlConnection *conn = sqlConnect(database);
/* target prefix is used in zoo browser */
if (oneChrom != NULL)
    {
    if (startsWith("chr", oneChrom))
	oneChrom += 3;
    else if (startsWith("target", oneChrom))
	oneChrom += 6;
    }
    
if (unsplit)
    gapFileToTable(conn, ooDir, "gap");
else
    {
    struct fileInfo *chrFiList, *chrFi; 
    char pathName[512];
    boolean gotAny = FALSE;

    chrFiList = listDirX(ooDir, "*", FALSE);
    for (chrFi = chrFiList; chrFi != NULL; chrFi = chrFi->next)
	{
	if (chrFi->isDir &&
	    ((strlen(chrFi->name) <= 2) || startsWith("NA_", chrFi->name)))
	    {
	    if (oneChrom == NULL || sameWord(chrFi->name, oneChrom))
		{
		safef(pathName, sizeof(pathName), "%s/%s", ooDir, chrFi->name);
		makeGap(conn, pathName);
		gotAny = TRUE;
		verbose(2, "done %s\n", chrFi->name);
		}
	    }
	}
    slFreeList(&chrFiList);
    if (!gotAny)
	errAbort("No .gap files found");
    }
sqlDisconnect(&conn);
}
struct fileInfo *readDirs(int dirCount, char *dirs[])
/* Return extended listing of all dirs. */
{
struct fileInfo *allFiles = NULL, *oneDir, *oneFile;
int i;
double totalSize = 0;

for (i=0; i<dirCount; ++i)
    {
    printf("Listing %s...", dirs[i]);
    fflush(stdout);
    oneDir = listDirX(dirs[i], "*.fa", TRUE);
    printf("got %d files\n", slCount(oneDir));
    allFiles = slCat(allFiles, oneDir);
    }
for (oneFile = allFiles; oneFile != NULL; oneFile = oneFile->next)
    totalSize += oneFile->size;
printf("Total %d files %e bytes\n", slCount(allFiles), totalSize);
return allFiles;
}
void tpfDirToTabFile(char *tpfDir, char *fileName)
/* Read TPF directory and make tab-separated file */
{
FILE *f = mustOpen(fileName, "w");
char tpfFile[512];
char ourChrom[16];
struct fileInfo *chrom, *dir = listDirX(tpfDir, "Chr*", FALSE);

if (dir == NULL)
    errAbort("No Chr files in %s", tpfDir);
for (chrom = dir; chrom != NULL; chrom = chrom->next)
    {
    if (chrom->isDir)
	{
	sprintf(tpfFile, "%s/%s/%s", tpfDir, chrom->name, "tpf.txt");
	sprintf(ourChrom, "chr%s",  chrom->name+3);
	addTpfToTabFile(ourChrom, tpfFile, f);
	}
    }
carefulClose(&f);
}
void makeGl(struct sqlConnection *conn, char *chromDir,
            struct hash *cloneVerHash)
/* Read in .gl files in chromDir and use them to create the
 * gl tables for the corresponding chromosome(s). */
{
    struct dyString *ds = newDyString(2048);
    struct fileInfo *fiList, *fi;
    char dir[256], chrom[128], ext[64];
    char *glFileName;
    char glTable[128];
    char *tab = "gl.tab";

    fiList = listDirX(chromDir, "*.gl", TRUE);
    for (fi = fiList; fi != NULL; fi = fi->next)
    {
        glFileName = fi->name;
        printf("Processing %s\n", glFileName);

        splitPath(glFileName, dir, chrom, ext);
        sprintf(glTable, "%s_gl", chrom);
        if ( (! noLoad) && sqlTableExists(conn, glTable))
        {
            dyStringClear(ds);
            dyStringPrintf(ds, "DROP table %s", glTable);
            sqlUpdate(conn, ds->string);
        }
        dyStringClear(ds);
        dyStringPrintf(ds, createGl, glTable, maxFragNameSize);
        verbose(2, "%s", ds->string);
        if (! noLoad)
            sqlMaybeMakeTable(conn, glTable, ds->string);
        dyStringClear(ds);
        addGlBin(glFileName, tab);
        dyStringPrintf(ds, "LOAD data local infile '%s' into table %s",
                       tab, glTable);
        if (! noLoad)
            sqlUpdate(conn, ds->string);
    }
    freeDyString(&ds);
}
void doPieceJob(FILE *con, FILE *sh, char *ooDir, char *chrom, char *cdna,
    char *conDir, char *pslDir, char *outDir, char *logDir, char *errDir, char *inDir)
/* Do mrna or EST alignment jobs on contigs. */
{
struct fileInfo *fileList, *fel;
char chromDir[512];
char pslSubDir[512];
char contigDir[512];
char faName[512];
char jobName[512];

printf("Piece job on %s %s\n", chrom, cdna);
fprintf(sh, "#Stitching %s pieces for chromosome %s\n", cdna, chrom);

/* Create a directory for result from each contig. */
sprintf(pslSubDir, "%s/%s", pslDir, chrom);
makeDir(pslSubDir);

/* List each contig and make a job for it. */
sprintf(chromDir, "%s/%s", ooDir, chrom);
fileList = listDirX(chromDir, "ctg*", FALSE);
for (fel = fileList; fel != NULL; fel = fel->next)
    {
    char *contig = fel->name;
    sprintf(contigDir, "%s/%s/%s", ooDir, chrom, contig);
    sprintf(faName, "%s/%s.fa", contigDir, contig);
    sprintf(jobName, "%s_%s", chrom, contig);
    if (fileExists(faName))
	{
	fprintf(con, "log = %s/%s.%s\n", logDir, jobName, cdna);
	fprintf(con, "error = %s/%s.%s\n", errDir, jobName, cdna);
	fprintf(con, "output = %s/%s.%s\n", outDir, jobName, cdna);
	makeSimpleIn(inDir, jobName, faName);
	fprintf(con, 
	    "arguments = %s/%s %s/%s mrna /var/tmp/hg/h/10.ooc %s/%s.%s.psl\n",
	    inDir, jobName, inDir, cdna, pslSubDir, contig, cdna);
	fprintf(con, "queue 1\n\n");
	}
    }
}
Example #21
0
struct hash *loadChroms(char *dir)
/* Load zipped chromosome files into memory. */
{
FILE *f;
char fastaScan[16];
safef(fastaScan, sizeof(fastaScan), "*.%s", faExtn);
struct fileInfo *chromEl, *chromList = listDirX(dir, fastaScan, TRUE);
struct hash *chromHash = newHash(0);
struct dnaSeq *seq;
char chrom[128];
char *faName;
int count = 0;

verbose(2, "#    scanning '%s/%s'\n", dir, fastaScan);
for (chromEl = chromList; chromEl != NULL; chromEl = chromEl->next)
    {
    char *fileName = chromEl->name;
    splitPath(fileName, NULL, chrom, NULL);
    chopSuffix(chrom);
    if (startsWith("chr0", chrom)) /* Convert chr01 to chr1, etc. */
	stripChar(chrom, '0');
    if (sameString(chrom, "chrmt"))
        strcpy(chrom, "chr17");
    f = fopen(fileName, "r");
    AllocVar(seq);
    seq->name = cloneString(chrom);
    if (!faFastReadNext(f, &seq->dna, &seq->size, &faName))
        errAbort("Couldn't load sequence from %s", fileName);
    seq->dna = cloneMem(seq->dna, seq->size+1);
    toUpperN(seq->dna, seq->size);
    hashAdd(chromHash, chrom, seq);
    verbose(3, "#    loadChrom %s '%s'\n", fileName, chrom);
    fclose(f);
    f = NULL;
    count++;
    }
if (0 == count)
    errAbort("not fasta files found in '%s/%s'\n", dir, fastaScan);
return chromHash;
}
void vgPatchJax(char *database, char *dir)
/* vgPatchJax - Patch Jackson labs part of visiGene database. */
{
struct sqlConnection *conn = sqlConnect(database);
struct fileInfo *raList, *ra;
struct dyString *query = dyStringNew(0);

raList = listDirX(dir, "*.ra", TRUE);
for (ra = raList; ra != NULL; ra = ra->next)
    {
    struct hash *hash = raReadSingle(ra->name);
    char *submitSet = hashMustFindVal(hash, "submitSet");
    char *year = hashMustFindVal(hash, "year");
    dyStringClear(query);
    dyStringPrintf(query,
    	"update submissionSet set year=%s "
	"where name = '%s'"
	, year, submitSet);
    sqlUpdate(conn, query->string);
    }

sqlDisconnect(&conn);
}
Example #23
0
void makeGap(struct sqlConnection *conn, char *chromDir)
/* Read in .gap files in chromDir and use them to create the
 * gap table for the corresponding chromosome(s). */
{
struct fileInfo *fiList, *fi;

fiList = listDirX(chromDir, "*.gap", TRUE);
for (fi = fiList; fi != NULL; fi = fi->next)
    {
    char dir[256], chrom[128], ext[64];
    char *ptr;
    char  gapName[128];
    char *gapFileName = fi->name;

    verbose(1, "Processing %s\n", gapFileName);
    /* Get full path name of .gap file and process it
     * into table names. */
    splitPath(gapFileName, dir, chrom, ext);
    while ((ptr = strchr(chrom, '.')) != NULL)
	*ptr = '_';
    safef(gapName, sizeof(gapName), "%s_gap", chrom);
    gapFileToTable(conn, gapFileName, gapName);
    }
}
Example #24
0
void nibbParseImageDir(char *sourceDir, char *goodTab, char *badTab)
/* nibbParseImageDir - Look through nibb image directory and allowing for 
 * typos and the like create a table that maps a file name to clone name, 
 * developmental stage, and view of body part. */
{
struct fileInfo *l1List, *l1, *l2List, *l2, *l3List, *l3;
struct hash *stageHash = hashNew(0);
struct hash *viewHash = hashNew(0);
struct hash *otherHash = hashNew(0);
struct hash *probeHash = hashNew(0);
struct hash *fixHash = hashFixers();
struct imageInfo *imageList = NULL, *image;
FILE *good = mustOpen(goodTab, "w");
FILE *bad = mustOpen(badTab, "w");
int goodCount = 0, badCount = 0;
int jpgCount = 0, jpgDir = 0;


l1List = listDirX(sourceDir, "XL*", FALSE);
for (l1 = l1List; l1 != NULL; l1 = l1->next)
    {
    char l1Path[PATH_LEN];
    safef(l1Path, sizeof(l1Path), "%s/%s", sourceDir, l1->name);
    l2List = listDirX(l1Path, "XL*", FALSE);
    for (l2 = l2List; l2 != NULL; l2 = l2->next)
        {
	char l2Path[PATH_LEN];
	char cloneName[64], *permanentCloneName;
	char *cloneDir = l2->name;
	char *cloneEnd;
	int cloneNameSize = 0;

	if (stringIx(cloneDir, skipDir) >= 0)
	    continue;

	/* Figure out clone name, whish is directory component up to
	 * first underbar. */
	cloneEnd = strchr(cloneDir, '_');
	if (cloneEnd != NULL)
	    cloneNameSize = cloneEnd - cloneDir;
	else
	    errAbort("Strangely formatted image dir %s, no underbar", cloneDir);
	if (cloneNameSize >= sizeof(cloneName))
	    errAbort("Clone name too long in dir %s", cloneDir);
	if (cloneNameSize < 8 || cloneNameSize > 12)
	    errAbort("Clone name wrong size %s", cloneDir);
	memcpy(cloneName, cloneDir, cloneNameSize);
	cloneName[cloneNameSize] = 0;
	/* Check format is XL###L##.  We already checked the XL. */
	if (!isdigit(cloneName[2]) || !isdigit(cloneName[3]) 
		 || !isdigit(cloneName[4]) || isdigit(cloneName[5]) 
		 || !isdigit(cloneName[6]) || !isdigit(cloneName[7]))
	    errAbort("Strangely formatted clone name %s", cloneDir);

	permanentCloneName = hashStoreName(probeHash, cloneName);


	/* Get all files in dir. */
	safef(l2Path, sizeof(l2Path), 
		"%s/%s/%s", sourceDir, l1->name, l2->name);
	l3List = listDirX(l2Path, "*.jpg", FALSE);
	for (l3 = l3List; l3 != NULL; l3 = l3->next)
	    {
	    char *fileName = l3->name;

	    if (stringIx(l3->name, skipFile) >= 0)
		continue;
	    image = getImageInfo(fixHash, permanentCloneName, 
	    	l1->name, cloneDir, fileName,
	    	stageHash, viewHash, otherHash, probeHash);
	    slAddHead(&imageList, image);
	    ++jpgCount;
	    }
	++jpgDir;
	}
    }
slReverse(&imageList);

verbose(1, "%d jpg images in %d directories\n", jpgCount, jpgDir);

#ifdef OLD
verbose(1, "%d probes, %d stages, %d views, %d other\n", 
	probeHash->elCount, stageHash->elCount, 
	viewHash->elCount, otherHash->elCount);
printHash("stages", stageHash);
printHash("views", viewHash);
printHash("other", otherHash);
#endif /* OLD */

for (image = imageList; image != NULL; image = image->next)
    {
    if (image->clone != NULL && image->stage != NULL && image->view != NULL)
        {
	imageInfoOut(image, good);
	++goodCount;
	}
    else
	{
	imageInfoOut(image, bad);
	++badCount;
	}
    }
verbose(1, "%d (%4.1f%%) parsed ok, %d (%4.2f%%) didn't\n", 
	goodCount, 100.0 * goodCount/(goodCount + badCount), 
	badCount, 100.0 * badCount/(goodCount + badCount));
carefulClose(&good);
carefulClose(&bad);
}
void hgGoldGapGl(char *database, char *gsDir, char *ooSubDir, boolean doGl, char *oneChrom)
/* hgGoldGapGl - Put chromosome .agp and .gl files into browser database.. */
{
    struct fileInfo *chrFiList, *chrFi;
    struct sqlConnection *conn = NULL;
    char ooDir[512];
    char pathName[512];
    struct hash *cloneVerHash = newHash(0);
    boolean gotAny = FALSE;
    struct hash *chromDirHash = newHash(4);
    char *chromLst = optionVal("chromLst", NULL);

    if (! noLoad)
        conn = sqlConnect(database);

    verbose(2,"#\tcomplete gold, gap and .gl files produced\n");

    if (chromLst != NULL)
    {
        struct lineFile *clf = lineFileOpen(chromLst, TRUE);
        char *row[1];
        while (lineFileRow(clf, row))
        {
            hashAdd(chromDirHash, row[0], NULL);
        }
        lineFileClose(&clf);
    }

    sprintf(ooDir, "%s/%s", gsDir, ooSubDir);
    /* target prefix is used in zoo browser */
    if (oneChrom != NULL && (startsWith("chr", oneChrom) || startsWith("target", oneChrom)))
        oneChrom += 3;


    if (doGl)
    {
        sprintf(pathName, "%s/ffa/sequence.inf", gsDir);
        makeCloneVerHash(pathName, cloneVerHash);
    }

    chrFiList = listDirX(ooDir, "*", FALSE);
    for (chrFi = chrFiList; chrFi != NULL; chrFi = chrFi->next)
    {
        if (chrFi->isDir &&
                ((strlen(chrFi->name) <= 2) || startsWith("NA_", chrFi->name) ||
                 (NULL != hashLookup(chromDirHash, chrFi->name))))
        {
            if (oneChrom == NULL || sameWord(chrFi->name, oneChrom))
            {
                sprintf(pathName, "%s/%s", ooDir, chrFi->name);
                makeGoldAndGap(conn, pathName);
                if (doGl)
                    makeGl(conn, pathName, cloneVerHash);
                gotAny = TRUE;
                uglyf("done %s\n", chrFi->name);
            }
        }
    }
    slFreeList(&chrFiList);
    if (! noLoad)
        sqlDisconnect(&conn);
    hashFree(&chromDirHash);
    if (!gotAny)
        errAbort("No contig agp and gold files found");
}
void cdnaOnOoJobs(char *ooDir, char *conDir, int cdnaCount, char *cdnaTypes[])
/* cdnaOnOoJobs - make condor submission file for EST and mRNA alignments on draft assembly. */
{
char chromDir[512];
char chromFile[512];
char conFile[512];
char shFile[512];
char conPslDir[512];
char conOutDir[512];
char conErrDir[512];
char conLogDir[512];
char conInDir[512];
struct fileInfo *cfaList, *cfa;
struct fileInfo *chromList, *chromEl;
static char lastChromName[64] = "9X8Y";	/* Something uniq. */
boolean lastDoFull = FALSE;
FILE *con, *sh;
int i;


/* Set up basic directory structure in output dir. */
makeDir(conDir);
sprintf(conPslDir, "%s/psl", conDir);
makeDir(conPslDir);
sprintf(conOutDir, "%s/out", conDir);
makeDir(conOutDir);
sprintf(conLogDir, "%s/log", conDir);
makeDir(conLogDir);
sprintf(conErrDir, "%s/err", conDir);
makeDir(conErrDir);
sprintf(conInDir, "%s/in", conDir);
makeDir(conInDir);

/* Create list files for mrna and est. */
for (i=0; i<cdnaCount; i++)
    {
    char fileName[512];
    sprintf(fileName, "/var/tmp/hg/h/mrna/%s.fa", cdnaTypes[i]);
    makeSimpleIn(conInDir, cdnaTypes[i], fileName);
    }

/* Create condor submission file and write header. */
sprintf(conFile, "%s/all.con", conDir);
con = mustOpen(conFile, "w");
fprintf(con, "#File created by cdnaOnOoJobs %s %s\n\n", ooDir, conDir);
fprintf(con,
"universe        = vanilla\n"
"notification    = error\n"
"requirements    = memory > 250\n"
"executable      = /cse/guests/kent/bin/i386/psLayout\n"
"initialdir      = %s\n"
"\n"
 , conDir);

/* Create shell script to finish job. */
sprintf(shFile, "%s/finish.sh", conDir);
sh = mustOpen(shFile, "w");

/* Loop through each chromosome directory. */
chromList = listDirX(ooDir, "*", FALSE);
for (chromEl = chromList; chromEl != NULL; chromEl = chromEl->next)
    {
    int len = strlen(chromEl->name);
    if (chromEl->isDir && len <= 2)
        {
	sprintf(chromDir, "%s/%s", ooDir, chromEl->name);
	cfaList = listDirX(chromDir, "chr*.fa", FALSE);
	slSort(&cfaList, cmpFileInfoUnderbar);

	/* Get list of assembled chromosomes in dir. */
	for (cfa = cfaList; cfa != NULL; cfa = cfa->next)
	    {
	    /* See if is _random version of previous chromosome, in which
	     * case we follow the lead of last time. */
	    printf("%s size %d\n", cfa->name, cfa->size);
	    sprintf(chromFile, "%s/%s", chromDir, cfa->name);
	    if (sameChrom(lastChromName, cfa->name))
		{
		if (lastDoFull)
		    {
		    for (i=0; i<cdnaCount; ++i)
			doFullJob(con, chromFile, lastChromName, conDir, conPslDir, 
				conOutDir, conLogDir, conErrDir, conInDir, cdnaTypes[i]);
		    }
		}
	    else
		{
		strcpy(lastChromName, cfa->name);
		chopSuffix(lastChromName);
		if (cfa->size < 60000000)
		    {
		    lastDoFull = TRUE;
		    for (i=0; i<cdnaCount; ++i)
			doFullJob(con, chromFile, lastChromName, conDir, conPslDir, conOutDir, 
			    conLogDir, conErrDir, conInDir, cdnaTypes[i]);
		    }
		else
		    {
		    lastDoFull = FALSE;
		    for (i=0; i<cdnaCount; ++i)
			doPieceJob(con, sh, ooDir, lastChromName+3, cdnaTypes[i], 
			    conDir, conPslDir, conOutDir, conLogDir, conErrDir, conInDir);
		    }
		}
	    }
	}
    }
}
Example #27
0
struct fileInfo* getGbFiles(struct gbUpdate* update, unsigned types)
/* generate list of genbank files for an update and type */
{
char relDir[PATH_LEN];
struct fileInfo* files = NULL; /* relative path, including directory */
char* updateDot = strchr(update->name, '.');

/* figure out path to input file */
strcpy(relDir, "download/");
strcat(relDir, update->release->name);
if (update->release->srcDb == GB_GENBANK)
    {
    /* genbank */
    if (update->isFull)
        {
        if (types & GB_MRNA)
            files = slCat(files, listDirX(relDir, "gbpri*.seq.gz", TRUE));
        if (types & GB_EST)
            files = slCat(files, listDirX(relDir, "gbest*.seq.gz", TRUE));
       }
    else
        {
        char dailyDir[PATH_LEN];
        char dailyFile[PATH_LEN];
        strcpy(dailyDir, relDir);
        strcat(dailyDir, "/daily-nc");
        strcpy(dailyFile, "nc");
        strcat(dailyFile, updateDot+1);
        strcat(dailyFile, ".flat.gz");
        files = slCat(files, listDirX(dailyDir, dailyFile, TRUE));
        }
    }
else
    {
    /* refseq */
    if (update->isFull)
        {
        char fullDir[PATH_LEN];
        strcpy(fullDir, relDir);
        strcat(fullDir, "/cumulative");
        files = slCat(files, listDirX(fullDir, "rscu.gbff.Z", TRUE));
        }
    else
        {
        /* need to reverse year and month-day parts in
         * rsnc.1231.2001.gbff.Z */
        char dailyDir[PATH_LEN];
        char dailyFile[PATH_LEN];
        int len;
        strcpy(dailyDir, relDir);
        strcat(dailyDir, "/daily");
        strcpy(dailyFile, "rsnc.");
        strcat(dailyFile, updateDot+6);
        len = strlen(dailyFile);
        strncpy(dailyFile+len, updateDot, 5);  /* include dot */
        dailyFile[len+5] = '\0';   /* strncpy didn't null term */
        strcat(dailyFile, ".gbff.Z");
        files = slCat(files, listDirX(dailyDir, dailyFile, TRUE));
        }
    }
if (files == NULL)
    errAbort("no input files found for release %s update %s",
             update->release->name, update->name);
return files;
}
void makeGoldAndGap(struct sqlConnection *conn, char *chromDir)
/* Read in .agp files in chromDir and use them to create the
 * gold and gap tables for the corresponding chromosome(s). */
{
    struct dyString *ds = newDyString(2048);
    struct fileInfo *fiList, *fi;
    char dir[256], chrom[128], ext[64];
    char goldName[128], gapName[128];
    char *agpName;
    char *ptr;
    char goldFileName[128];
    char gapFileName[128];

    if (! noLoad)
    {
        safef(goldFileName, ArraySize(goldFileName), "%s", goldTabName);
        safef(gapFileName, ArraySize(gapFileName), "%s", gapTabName);
    }
    fiList = listDirX(chromDir, "*.agp", TRUE);
    for (fi = fiList; fi != NULL; fi = fi->next)
    {

        /* Get full path name of .agp file and process it
         * into table names. */
        agpName = fi->name;
        printf("Processing %s\n", agpName);
        splitPath(agpName, dir, chrom, ext);
        while ((ptr = strchr(chrom, '.')) != NULL)
            *ptr = '_';
        sprintf(goldName, "%s_gold", chrom);
        sprintf(gapName, "%s_gap", chrom);

        if (noLoad)
        {
            safef(goldFileName, ArraySize(goldFileName), "%s_gold.tab", chrom);
            safef(gapFileName, ArraySize(gapFileName), "%s_gap.tab", chrom);
        }

        /* Create gold & gap tab separated files. */
        splitAgp(fi->name, goldFileName, gapFileName);

        /* Create gold table and load it up. */
        dyStringClear(ds);
        dyStringPrintf(ds, createGold, goldName);
        dyStringPrintf(ds, goldSplitIndex, maxFragNameSize);
        verbose(2, "%s", ds->string);
        if (! noLoad)
            sqlRemakeTable(conn, goldName, ds->string);
        dyStringClear(ds);
        dyStringPrintf(ds, "LOAD data local infile '%s' into table %s",
                       goldFileName, goldName);
        if (! noLoad)
        {
            sqlUpdate(conn, ds->string);
            remove(goldFileName);
        }

        /* Create gap table and load it up. */
        dyStringClear(ds);
        dyStringPrintf(ds, createGap, gapName);
        dyStringAppend(ds, gapSplitIndex);
        verbose(2, "%s", ds->string);
        if (! noLoad)
        {
            sqlRemakeTable(conn, gapName, ds->string);
            sqlMaybeMakeTable(conn, gapName, ds->string);
        }
        dyStringClear(ds);
        dyStringPrintf(ds, "LOAD data local infile '%s' into table %s",
                       gapFileName, gapName);
        if (! noLoad)
        {
            sqlUpdate(conn, ds->string);
            remove(gapFileName);
        }
    }
    freeDyString(&ds);
}