Пример #1
0
void makeConfigFromFileList(char *input, char *output)
/* makeConfigFromFileList - Create config file for hgBedsToBedExps from list of files.. */
{
FILE *f = mustOpen(output, "w");
struct slName *in, *inList = readAllLines(input);
int commonPrefix = commonPrefixSize(inList);
int commonSuffix = commonSuffixSize(inList);
for (in = inList; in != NULL; in = in->next)
    {
    char *s = in->name;
    int len = strlen(s);
    char *midString = cloneStringZ(s+commonPrefix, len - commonPrefix - commonSuffix);
    char *factor, *cell;
    camelParseTwo(midString, &cell, &factor);
    fprintf(f, "%s\t%s\t", factor, cell);
    fprintf(f, "%s\t", cellAbbreviation(cell));
    fprintf(f, "file\t%d\t", scoreCol-1);
    fprintf(f, "%g\t", calcNormScoreFactor(in->name, scoreCol-1));
    fprintf(f, "%s\n", in->name);
    }
carefulClose(&f);
}
Пример #2
0
static void regionPairsList(char *regionsFile) {
	FILE *fp;
	char buf[500], str1[500], str2[500], orient[10];
	int i;
	int od[4];

	fp = mustOpen(regionsFile, "r");
	while (fgets(buf, 500, fp)) {
		if (sscanf(buf, "%[^ ] %s %d %d %d %d", str1, str2, &(od[0]), &(od[1]), &(od[2]), &(od[3])) != 6)
			errAbort("error: %s", buf);
		for (i = 0; i < 4; i++) {
			if (od[i] >= MINPAIR) {
				mapOrient(i, orient);
				if (sameString(orient, "[- -]"))
					printf("%s\t%s\t[+ +]\t(%d)\n", str2, str1, od[i]);
				else
					printf("%s\t%s\t%s\t(%d)\n", str1, str2, orient, od[i]);
			}
		}
	}
	fclose(fp);
} 
Пример #3
0
void mafSplitPos(char *database, char *size, char *outFile)
/* Pick best positions for split close to size.
 * Use middle of a gap as preferred site.
 * If not gaps are in range, use recent repeats (0% diverged) */
{
int splitSize = 0;
int chromSize = 0;
struct hash *chromHash;
struct hashCookie hc;
struct hashEl *hel;
struct sqlConnection *conn = sqlConnect(database);
FILE *f;

db = database;

verbose(1, "Finding split positions for %s at ~%s Mbp intervals\n", 
                database, size);
splitSize = sqlSigned(size) * 1000000;
if (chrom == NULL)
    {
    chromHash = hChromSizeHash(database);
    }
else
    {
    chromHash = hashNew(6);
    hashAddInt(chromHash, chrom, hChromSize(database, chrom));
    }
conn = sqlConnect(database);
f = mustOpen(outFile, "w");
hc = hashFirst(chromHash);
while ((hel = hashNext(&hc)) != NULL)
    {
    chrom = hel->name;
    chromSize = ptToInt(hel->val);
    chromSplits(chrom, chromSize, splitSize, conn, f);
    }
sqlDisconnect(&conn);
carefulClose(&f);
}
void writeRa(char *fileName)
/* Write our .ra file with information common to all NIBB images. */
{
FILE *f = mustOpen(fileName, "w");
fprintf(f, "submitSet nibbXenopusLaevis3\n");
fprintf(f, "fullDir ../visiGene/full/inSitu/XenopusLaevis/nibb\n");
fprintf(f, "thumbDir ../visiGene/200/inSitu/XenopusLaevis/nibb\n");
fprintf(f, "priority 1200\n");
fprintf(f, "sliceType whole mount\n");
fprintf(f, "submissionSource National Institute of Basic Biology (NIBB) XDB\n");
fprintf(f, "taxon 8355\n");
fprintf(f, "genotype wild type\n");
fprintf(f, "acknowledgement Thanks to Naoto Ueno and colleagues at NIBB for helping make these images available in VisiGene\n");

/* Still need to fill in contributor, publication, journal, journalUrl, itemUrl */
fprintf(f, "contributor Ueno N., Kitayama A., Terasaka C., Nomoto K., Shibamoto K., Nishide H.\n");
fprintf(f, "year 2005\n");
fprintf(f, "setUrl http://xenopus.nibb.ac.jp\n");
fprintf(f, "itemUrl http://xenopus.nibb.ac.jp/cgi-bin/search?query=%%s&name=clone\n");
fprintf(f, "probeColor purple\n");
carefulClose(&f);
}
Пример #5
0
int main(int argc, char *argv[])
/* hash snpFasta, read through chrN_snpTmp, rewrite with extensions to individual chrom tables */
{
struct slName *chromList, *chromPtr;
char tableName[64];

if (argc != 2)
    usage();

snpDb = argv[1];
hSetDb(snpDb);
chromList = hAllChromNamesDb(snpDb);

errorFileHandle = mustOpen("snpMoltype.errors", "w");

multiFastaHash = readFasta("chrMulti");

for (chromPtr = chromList; chromPtr != NULL; chromPtr = chromPtr->next)
    {
    safef(tableName, ArraySize(tableName), "%s_snpTmp", chromPtr->name);
    if (!hTableExists(tableName)) continue;
    verbose(1, "chrom = %s\n", chromPtr->name);
    chromFastaHash = readFasta(chromPtr->name);
    processSnps(chromPtr->name);
    }

carefulClose(&errorFileHandle);

for (chromPtr = chromList; chromPtr != NULL; chromPtr = chromPtr->next)
    {
    safef(tableName, ArraySize(tableName), "%s_snpTmp", chromPtr->name);
    if (!hTableExists(tableName)) continue;
    recreateDatabaseTable(chromPtr->name);
    verbose(1, "loading chrom = %s\n", chromPtr->name);
    loadDatabase(chromPtr->name);
    }

return 0;
}
void sangPairs(char *sangDir, char *outFile)
/* sangPairs - Process Sanger Paired reads to remove low quality bases and put in one big file. */
{
struct hash *hash = newHash(20);
struct fileInfo *dirList, *dirEl;
struct fileInfo *subList, *subEl;
struct fileInfo *faList, *faEl;
FILE *f = mustOpen(outFile, "w");
unsigned long totalSize = 0;

dirList = listDirX(sangDir, "*", TRUE);
for (dirEl = dirList; dirEl != NULL; dirEl = dirEl->next)
    {
    if (dirEl->isDir)
	{
	printf("%s", dirEl->name);
	fflush(stdout);
	subList = listDirX(dirEl->name, "*", TRUE);
	for (subEl = subList; subEl != NULL; subEl = subEl->next)
	    {
	    if (subEl->isDir)
	        {
		printf(".");
		fflush(stdout);
		faList = listDirX(subEl->name, "*.fasta", TRUE);
		for (faEl = faList; faEl != NULL; faEl = faEl->next)
		    {
		    totalSize += filterByQual(faEl->name, f, 19, 15, hash);
		    }
		slFreeList(&faList);
		}
	    }
	printf("\n");
	slFreeList(&subList);
	}
    }
printf("Total size %lu bytes\n", totalSize);
fclose(f);
}
void encode2Md5UpdateManifest(char *md5File, char *rootDir, char *oldManifest, char *newManifest)
/* encode2Md5UpdateManifest - Update md5sum, size, validation key in an encode2 
 * manifest.tab file. */
{
struct encode2Manifest *mi, *miList = encode2ManifestLoadAll(oldManifest);
struct hash *md5Hash = md5FileHash(md5File);
verbose(2, "Got %d items in miList, %d in md5Hash\n", slCount(miList), md5Hash->elCount);
FILE *f = mustOpen(newManifest, "w");
int updateCount = 0;
for  (mi = miList; mi != NULL; mi = mi->next)
    {
    char *newMd5 = hashFindVal(md5Hash, mi->fileName);
    if (newMd5 != NULL)
        {
	++updateCount;
	updateSumAndAll(mi, newMd5, rootDir);
	}
    encode2ManifestTabOut(mi, f);
    }
verbose(1, "Found %d of %d in patch.\n", updateCount, md5Hash->elCount);
carefulClose(&f);
}
Пример #8
0
void outputGcStatsWiggle(struct dnaSeq *seqList, struct hash *noGapHash, unsigned int windowLength, char *outFilename)
{
	struct dnaSeq *currSeq = NULL;
	struct bed *currRegion = NULL;
	unsigned int i = 0;
	char *currWindow = NULL;
	FILE *fout = mustOpen(outFilename, "w");

	for(currSeq = seqList; currSeq != NULL; currSeq = currSeq->next)
	{
		for(currRegion = hashFindVal(noGapHash, currSeq->name); currRegion != NULL; currRegion = currRegion->next)
		{
			fprintf(fout, "fixedStep chrom=%s start=%u step=1\n", currRegion->chrom, currRegion->chromStart+1);
			for(i = currRegion->chromStart; i <= currRegion->chromEnd - windowLength; i++)
			{
				currWindow = &(currSeq->dna[i]);
				fprintf(fout, "%u\n", reportGcCount(currWindow, windowLength));
			}
		}
	}
	carefulClose(&fout);
}
Пример #9
0
boolean flyCdnaSeq(char *name, struct dnaSeq **retDna, struct wormCdnaInfo *retInfo)
/* Get a single fly cDNA sequence. Optionally (if retInfo is non-null) get additional
 * info about the sequence. */
{
long offset;
char *faComment;
char **pFaComment = (retInfo == NULL ? NULL : &faComment);
static struct snof *cdnaSnof = NULL;
static FILE *cdnaFa;

if (cdnaSnof == NULL)
	cdnaSnof = snofMustOpen("c:/biodata/fly/cDna/allcdna");
if (cdnaFa == NULL)
	cdnaFa = mustOpen("c:/biodata/fly/cDna/allcdna.fa", "rb");
if (!snofFindOffset(cdnaSnof, name, &offset))
    return FALSE;
fseek(cdnaFa, offset, SEEK_SET);
if (!faReadNext(cdnaFa, name, TRUE, pFaComment, retDna))
    return FALSE;
flyFaCommentIntoInfo(faComment, retInfo);
return TRUE;
}
Пример #10
0
void hgNearTest(char *url, char *log)
/* hgNearTest - Test hgNear web page. */
{
struct htmlPage *rootPage = htmlPageGet(url);

struct htmlForm *mainForm;
struct htmlFormVar *orgVar;
FILE *f = mustOpen(log, "w");

htmlPageValidateOrAbort(rootPage);
htmlPageSetVar(rootPage, NULL, orderVarName, "geneDistance");
htmlPageSetVar(rootPage, NULL, countVarName, "25");
if ((mainForm = htmlFormGet(rootPage, "mainForm")) == NULL)
    errAbort("Couldn't get main form");
if ((orgVar = htmlFormVarGet(mainForm, "org")) == NULL)
    errAbort("Couldn't get org var");
if (clOrg != NULL)
    testOrg(rootPage, mainForm, clOrg, clDb);
else
    {
    struct slName *org;
    for (org = orgVar->values; org != NULL; org = org->next)
        {
	testOrg(rootPage, mainForm, org->name, clDb);
	}
    }

htmlPageFree(&rootPage);

slReverse(&nearTestList);

reportSummary(nearTestList, stdout);
fprintf(f,"seed=%d\n",seed);
reportAll(nearTestList, f);
fprintf(f, "---------------------------------------------\n");
reportSummary(nearTestList, f);
slFreeList(&nearTestList);
carefulClose(&f);
}
Пример #11
0
void doTrimHeader(char *inputFileName)
{
FILE *outputFileHandle = mustOpen("trimHeader.out", "w");
struct lineFile *lf = lineFileOpen(inputFileName, TRUE);
char *line;
int lineSize;
char *row[5], *contigId[2];

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] != '>')
        {
	fprintf(outputFileHandle, "%s\n", line);
	continue;
	}
    chopString(line, ".", contigId, ArraySize(row));
    fprintf(outputFileHandle, "%s\n", contigId[0]);
    }

carefulClose(&outputFileHandle);
lineFileClose(&lf);
}
Пример #12
0
static void mkH1n1StructData(char *gene, char *idPairFile, char *highlightId,
                             struct tempName *imageFile, struct tempName *chimeraScript)
/* generate 3D structure files; difference highlighting is generate only idPairFile or
 * idPairFile, if specified, but not both. */
{
struct tempName prefix;
trashDirFile(&prefix, "hgct", gene, "tmp");

char idFile[PATH_LEN], idArg[PATH_LEN], logFile[PATH_LEN], cmd[2*PATH_LEN];
idArg[0] = '\0';
if ((idPairFile != NULL) || (highlightId != NULL))
    {
    safef(idFile, sizeof(idFile), "%s.ids", prefix.forCgi);
    if (idPairFile != NULL)
        {
        // extract first column
        safef(cmd, sizeof(cmd), "cut -f 1 %s >%s", idPairFile, idFile);
        if (system(cmd) != 0)
            errAbort("extracting protein ids failed: %s", cmd);
        }
    else
        {
        FILE *fh = mustOpen(idFile, "w");
        fprintf(fh, "%s\n", highlightId);
        carefulClose(&fh);
        }
    safef(idArg, sizeof(idArg), "--ids %s", idFile);
    }
// dynamic_highlight.pl knows locations of model files
safef(logFile, sizeof(logFile), "%s.log", prefix.forCgi);
safef(cmd, sizeof(cmd), "%s/dynamic_highlight.pl --rasmol --chimera --protein %s --consensus 0602 %s --base %s >%s 2>&1",
      getH1n1StructDir(), gene, idArg, prefix.forCgi, logFile);
if (system(cmd) != 0)
    errAbort("creation of 3D structure highlight files failed: %s", cmd);

// output names are all predefined by script relative to prefix
tempNameFromPrefix(imageFile, &prefix, "_highlight.jpg");
tempNameFromPrefix(chimeraScript, &prefix, "_highlight.cmd");
}
static void checkExtRecord(struct seqFields *seq,
                           char *extPath)
/* Check the external file record for a sequence (slow). Assumes
 * that bounds have been sanity check for a file. */
{
/* read range into buffer */
FILE *fh = mustOpen(extPath, "r");
char *faBuf;
char accVer[GB_ACC_BUFSZ];
struct dnaSeq *dnaSeq;
if (fseeko(fh, seq->file_offset, SEEK_SET) < 0)
    {
    gbError("%s: can't seek %s", seq->acc, extPath);
    carefulClose(&fh);
    }
faBuf = needMem(seq->file_size+1);
mustRead(fh, faBuf, seq->file_size);
faBuf[seq->file_size] = '\0';
carefulClose(&fh);

/* verify contents */
if (faBuf[0] != '>')
    {
    gbError("%s: gbExtFile offset %lld doesn't start a fasta record: %s",
            seq->acc, (long long)seq->file_offset, extPath);
    free(faBuf);
    return;
    }
dnaSeq = faFromMemText(faBuf);
safef(accVer, sizeof(accVer), "%s.%d", seq->acc, seq->version);

if (!sameString(dnaSeq->name, accVer))
    gbError("%s: name in fasta header \"%s\" doesn't match expected \"%s\": %s",
            seq->acc, dnaSeq->name, accVer, extPath);
if (dnaSeq->size != seq->size)
    gbError("%s: size of fasta sequence (%d) doesn't match expected (%d): %s",
            seq->acc, dnaSeq->size, seq->size, extPath);
freeDnaSeq(&dnaSeq);
}
void makeSineSineFixed(char *fileName, int innerRes, int outerRes, int outerCount, int chromCount)
/* Make a test set involving sine modulated sine waves in fixedStep format. */
{
FILE *f = mustOpen(fileName, "w");
int totalSteps = innerRes * outerRes * outerCount;
double innerStep = TWOPI/innerRes;
double outerStep = TWOPI/(innerRes*outerRes);
int chromIx;
for (chromIx=1; chromIx<=chromCount; ++chromIx)
    {
    fprintf(f, "fixedStep chrom=chr%d start=1 step=1 span=1\n", chromIx);
    double outerAngle = 0, innerAngle = 0;
    int i;
    for (i=0; i<totalSteps; ++i)
        {
	fprintf(f, "%f\n", 100.0*sin(innerAngle)*sin(outerAngle));
	innerAngle += innerStep;
	outerAngle += outerStep;
	}
    }
carefulClose(&f);
}
void affyTransLiftedToSample(int grouping, char *affyTransIn)
/* Top level function to run combine pairs and offset files to give sample. */
{
struct affyTransLifted *atl = NULL, *atlList = NULL;
struct sample *sampList = NULL, *samp = NULL;
struct sample *groupedList = NULL;
char *fileRoot = NULL;
char buff[10+strlen(affyTransIn)];
FILE *out = NULL;
char *fileNameCopy = cloneString(affyTransIn);
chopSuffix(fileNameCopy);
fprintf(stderr, ".");
fflush(stderr);
atlList = affyTransLiftedLoadAll(affyTransIn);
//warn("Creating samples.");
for(atl = atlList; atl != NULL; atl = atl->next)
    {
    samp = sampleFromAffyTransLifted(atl, fileNameCopy);
    if(samp != NULL)
	slAddHead(&sampList, samp);
    }
//warn("Sorting Samples");
slSort(&sampList, sampleCoordCmp);
groupedList = groupByPosition(grouping, sampList);
//warn("Saving Samples.");
snprintf(buff, sizeof(buff), "%s.sample", affyTransIn);
out = mustOpen(buff, "w");
for(samp = groupedList; samp != NULL; samp = samp->next)
    {
    sampleTabOut(samp, out);
    }
//warn("Cleaning up.");
freez(&fileNameCopy);
carefulClose(&out);
sampleFreeList(&sampList);
sampleFreeList(&groupedList);
affyTransLiftedFreeList(&atlList);
}
Пример #16
0
static void	createReadsHash(char **argv) {
	int i;
	FILE *fp;
	char buf[500], fub[500], dna[500], qua[500], id[50];
	char *str;
	readsHash = newHash(16);
	for (i = 2; i <= 3; i++) {
		fp = mustOpen(argv[i], "r");
		for (;;) {
			if (fgets(buf, 500, fp)) {
				if(strlen(buf) == 0){break;}
				if (ncbi) {
					sscanf(buf, "@%s %*s", id);
					sprintf(fub, "%s/%d", id, i - 1);
				}
				else {
					sscanf(buf, "@%s %*s", id);
					strcpy(fub, id);
				}
				if (!fgets(buf, 500, fp)){
					break;
					errAbort("error: %s", argv[i]);
	}
				sscanf(buf, "%s", dna);
				if (!fgets(buf, 500, fp))
					errAbort("error: %s", argv[i]);
				if (!fgets(buf, 500, fp))
					errAbort("error: %s", argv[i]);
				sscanf(buf, "%s", qua);
				sprintf(buf, "%s %s", dna, qua);
				str = cloneString(buf);
				hashAdd(readsHash, fub, str);
			}
			else break;
		}
		fclose(fp);
	}
}
void eisenInput(char *database, char *outFile)
/* eisenInput - Create input for Eisen-style cluster program. */
{
struct slName *chromList = NULL, *chromEl;
FILE *f = mustOpen(outFile, "w");
char *chrom;
struct hash *refLinkHash = hashNew(0);
struct refLink *refLinkList;
struct hash *erHash = hashNew(0);
struct expRecord *erList = NULL, *er;


/* Load info good for all chromosomes. */
refLinkList = loadRefLink(database, refLinkHash);
erList = loadExpRecord(expRecordTable, "hgFixed");
for (er = erList; er != NULL; er = er->next)
    {
    char sid[16];
    snprintf(sid, sizeof(sid), "%u", er->id);
    hashAdd(erHash, sid, er);
    }

/* Do it chromosome by chromosome. */
chromList = hAllChromNames(database);
for (chromEl = chromList; chromEl != NULL; chromEl = chromEl->next)
    {
    chrom = chromEl->name;
    uglyf("%s\n", chrom);
    oneChromInput(database, chrom, hChromSize(database, chrom), "rnaCluster", expTrack, 
    	refLinkHash, erHash, f);
    }

/* Cleanup time! */
expRecordFreeList(&erList);
freeHash(&erHash);
refLinkFreeList(&refLinkList);
freeHash(&refLinkHash);
}
int main(int argc, char *argv[])
{
char *sangerName, *jimName, *updateName, *errName;
struct g2cFile *sangerGenes, *jimGenes;
if (argc != 5)
    {
    errAbort("c2gcheck - compares two gene-to-cdna files, notes differences\n"
             "and writes out a third merged file.\n"
             "Usage:\n"
             "    c2gcheck Sanger Jim Update errs\n");
    }

memPool = lmInit(1<<16);
pushWarnHandler(reportWarning);

sangerName = argv[1];
jimName = argv[2];
updateName = argv[3];
errName = argv[4];

errFile = mustOpen(errName, "w");

sangerGenes = loadG2cFile(sangerName);
jimGenes = loadG2cFile(jimName);

checkOneFile(sangerGenes, sangerName);
checkOneFile(jimGenes, jimName);

checkTwoFiles(sangerGenes, jimGenes, "Jim unique");
checkTwoFiles(jimGenes, sangerGenes, "Sanger unique");

update(sangerGenes, jimGenes);

saveG2cFile(sangerGenes, updateName);

lmCleanup(&memPool);
return 0;
}
Пример #19
0
void mafToProtein(char *dbName, char *mafTable, char *frameTable, 
    char *org,  char *speciesList, char *outName)
/* mafToProtein - output protein alignments using maf and frames. */
{
struct slName *geneNames = NULL;
struct slName *speciesNames = readList(speciesList);
FILE *f = mustOpen(outName, "w");

hSetDb(dbName);

newTableType = hHasField(frameTable, "isExonStart");

if (inExons && !newTableType)
    errAbort("must have new mafFrames type to output in exons");

if (geneList != NULL)
    geneNames = readList(geneList);
else if (geneName != NULL)
    {
    int len = strlen(geneName);
    geneNames = needMem(sizeof(*geneNames)+len);
    strcpy(geneNames->name, geneName);
    }
else
    geneNames = queryNames(dbName, frameTable, org);

for(; geneNames; geneNames = geneNames->next)
    {
    verbose(2, "outting  gene %s \n",geneNames->name);
    outGene(f, geneNames->name, dbName, mafTable, 
	frameTable, org, speciesNames);
    if (delay)
	{
	verbose(2, "delaying %d seconds\n",delay);
	sleep(delay);
	}
    }
}
Пример #20
0
void twoBitMask(char *inName, char *maskName, char *outName)
/* twoBitMask - apply masking to a .2bit file, creating a new .2bit file. */
{
    struct hash *tbHash = hashNew(20);
    struct hash *bitmapHash = hashNew(20);
    struct twoBit *twoBitList = NULL;
    struct twoBit *twoBit = NULL;
    FILE *f = NULL;

    if (! twoBitIsFile(inName))
    {
        if (twoBitIsSpec(inName))
            errAbort("Sorry, this works only on whole .2bit files, not specs.");
        else
            errAbort("Input %s does not look like a proper .2bit file.", inName);
    }

    twoBitList = slurpInput(inName, tbHash, bitmapHash);

    /* Read mask data into bitmapHash, store it in twoBits: */
    if ((type && endsWith(type, "bed")) || endsWith(maskName, ".bed"))
        maskWithBed(maskName, tbHash, bitmapHash);
    else if ((type && endsWith(type, "out")) || endsWith(maskName, ".out"))
        maskWithOut(maskName, tbHash, bitmapHash);
    else
        errAbort("Sorry, maskFile must end in \".bed\" or \".out\".");

    /* Create a new .2bit file, write it out from twoBits. */
    f = mustOpen(outName, "wb");
    twoBitWriteHeader(twoBitList, f);
    for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next)
    {
        twoBitWriteOne(twoBit, f);
    }
    carefulClose(&f);

    /* Don't bother freeing twoBitList and hashes here -- just exit. */
}
Пример #21
0
void countCosmids(char *listFileName, FILE *out)
/* Read each cosmid in list file and find out how big it is. */
{
FILE *listFile = mustOpen(listFileName, "r");
char line[512], *s;
int lineCount;
struct dnaSeq *seq;
char path[512];

while (fgets(line, sizeof(line), listFile))
    {
    ++lineCount;
    s = trimSpaces(line);
    sprintf(path, "%s/%s", "C:/biodata/cbriggsae/finish", s);
    seq = faReadDna(path);
    ++cosmidCount;
    cosmidTotalSize += seq->size;
    freeDnaSeq(&seq);
    }
fclose(listFile);
cosmidAverageSize = round((double)cosmidTotalSize/cosmidCount);
fprintf(out, "%d cosmids, average length %d\n", cosmidCount, cosmidAverageSize);
}
void readAllWordsOrFa(char *fileName, char ***retFiles, int *retFileCount, 
   char **retBuf)
/* Open a file and check if it is .fa.  If so return just that
 * file in a list of one.  Otherwise read all file and treat file
 * as a list of filenames.  */
{
FILE *f = mustOpen(fileName, "r");
char c = fgetc(f);

fclose(f);
if (c == '>')
    {
    char **files;
    *retFiles = AllocArray(files, 1);
    *retBuf = files[0] = cloneString(fileName);
    *retFileCount = 1;
    return;
    }
else
    {
    readAllWords(fileName, retFiles, retFileCount, retBuf);
    }
}
Пример #23
0
void correctEst(char *oldFa, char *pslFile, char *nibDir, char *outFa)
/* correctEst - Correct ESTs by passing them through genome. */
{
struct hash *pslHash = hashPsls(pslFile);
struct lineFile *lf = lineFileOpen(oldFa, FALSE);
FILE *f = mustOpen(outFa, "w");
static struct dnaSeq est;
struct hashEl *hel;
struct psl *psl;
struct hash *nibHash = newHash(8);

while (faSpeedReadNext(lf, &est.dna, &est.size, &est.name))
    {
    if ((psl = hashFindVal(pslHash, est.name)) != NULL)
        {
	correctOne(&est, psl, nibDir, nibHash, f);
	}
    else
        {
	faWriteNext(f, est.name, est.dna, est.size);
	}
    }
}
Пример #24
0
void hgKgGetText(char *database, char *outFile)
/* hgKgGetText - Get text from known genes into a file. */
{
FILE *f = mustOpen(outFile, "w");
struct sqlConnection *conn = sqlConnect(database);
struct sqlConnection *spConn = sqlConnect("uniProt");
struct sqlConnection *goConn = sqlConnect("go");
struct kgXref *kgList = NULL, *kg;
struct hash *refSeqHash = NULL;
/* Return hash keyed by refSeq NM_ id, with description values. */

gotRefSeqSummary = sqlTableExists(conn, summaryTable);
if (gotRefSeqSummary)
    refSeqHash = getRefSeqSummary(conn);
else
    warn("No %s table in %s, proceeding without...", summaryTable, database);
kgList = getKgList(conn);
verbose(1, "Read in %d known genes from %s\n", slCount(kgList), database);

for (kg = kgList; kg != NULL; kg = kg->next)
    getText(kg, refSeqHash, conn, spConn, goConn, f);
carefulClose(&f);
}
Пример #25
0
boolean findLineInFile(char *fileName, char *start,
    char *lineBuf, int lineBufSize)
/* Loop through each line in named file until come to one whose
 * first word (deliminated by a space) is start.  Put the resulting
 * line in lineBuf. */
{
FILE *f;
int startLen = strlen(start);
boolean foundIt = FALSE;
f = mustOpen(fileName, "r");
for (;;)
    {
    if ((fgets(lineBuf, lineBufSize, f)) == NULL)
	break;
    if (strncmp(start, lineBuf, startLen) == 0 && lineBuf[startLen] == ' ')
	{
	foundIt = TRUE;
	break;
	}
    }
fclose(f);
return foundIt;
}
Пример #26
0
void seqFromPsl(char *inPsl, char *inTwoBit, char *outFa)
/* seqFromPsl - Extract masked sequence from database corresponding to psl file. */
{
struct twoBitFile *tbf = twoBitOpen(inTwoBit);
struct lineFile *lf = pslFileOpen(inPsl);
FILE *f = mustOpen(outFa, "w");
struct psl *psl;

while ((psl = pslNext(lf)) != NULL)
    {
    char faHead[512];
    struct dnaSeq *seq = twoBitReadSeqFrag(tbf, psl->tName,
    	psl->tStart, psl->tEnd);
    if (psl->strand[0] == '-')
        reverseComplement(seq->dna, seq->size);
    safef(faHead, sizeof(faHead), "%s (%s:%d-%d)", 
    	psl->qName, psl->tName, psl->tStart+1, psl->tEnd);
    if (hardMask)
        lowerToN(seq->dna, seq->size);
    faWriteNext(f, faHead, seq->dna, seq->size);
    }
carefulClose(&f);
}
Пример #27
0
void blatFlekFilter(char *outName, int inCount, char *inNames[])
/* blatFilter - filter blat alignments somewhat. */
{
int i;
FILE *f = mustOpen(outName, "w");

for (i=0; i<inCount; ++i)
    {
    char *inName = inNames[i];
    struct lineFile *lf = pslFileOpen(inName);
    struct psl *psl;
    while ((psl = pslNext(lf)) != NULL)
        {
	dotOut();
	if (psl->tEnd - psl->tStart < (psl->qEnd + psl->qStart) * 3)
	    pslTabOut(psl, f);
	else
	    writePslFrags(psl, f);
	pslFree(&psl);
	}
    }
printf("\n");
}
void readFile(char *pslFile)
/* Implements the readFile task */
{
FILE *outFh = NULL;
struct pslReader* pr = pslReaderFile(pslFile, gChrom);
struct psl* psl;
int numRows = 0;

if (gOutput != NULL)
    outFh = mustOpen(gOutput, "w");

while ((numRows < gMaxRows) && ((psl = pslReaderNext(pr)) != NULL))
    {
    if (outFh != NULL)
        pslTabOut(psl, outFh);
    pslFree(&psl);
    numRows++;
    }

carefulClose(&outFh);
pslReaderFree(&pr);
checkNumRows(pslFile, numRows);
}
Пример #29
0
void loadChroms()
/* hash chromNames, create file handles */
{
char query[512];
struct sqlConnection *conn = hAllocConn();
struct sqlResult *sr;
char **row;
FILE *f;
char fileName[64];

chromHash = newHash(0);
sqlSafef(query, sizeof(query), "select chrom from chromInfo");
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    safef(fileName, sizeof(fileName), "%s_snp125hg17ortho.tab", row[0]);
    f = mustOpen(fileName, "w");
    verbose(1, "chrom = %s\n", row[0]);
    hashAdd(chromHash, cloneString(row[0]), f);
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
}
void makeMixem(char *fileName)
/* Make a file that mixes up various types */
{
FILE *f = mustOpen(fileName, "w");
fprintf(f, "variableStep chrom=chr1\n");
fprintf(f, "100\t1.0\n");
fprintf(f, "200\t2.0\n");
fprintf(f, "fixedStep chrom=chr1 start=1000 step=2\n");
fprintf(f, "1.0\n");
fprintf(f, "2.0\n");
fprintf(f, "3.0\n");
fprintf(f, "4.0\n");
fprintf(f, "chr1\t10000\t10100\t100\n");
fprintf(f, "chr1\t20000\t20100\t200\n");
fprintf(f, "chr2\t10000\t10100\t100\n");
fprintf(f, "chr3\t10000\t10100\t100\n");
fprintf(f, "fixedStep chrom=chr11 start=1000 step=2\n");
fprintf(f, "11.0\n");
fprintf(f, "12.0\n");
fprintf(f, "13.0\n");
fprintf(f, "14.0\n");
carefulClose(&f);
}