struct contig *contigsFromAgp(char *fileName, struct hash *hash)
/* Build up a list of contigs looking at agp file. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[3];
struct contig *contigList = NULL, *contig;
while (lineFileChop(lf, row))
    {
    char *name = row[0];
    int s = lineFileNeedNum(lf, row, 1) - 1;
    int e = lineFileNeedNum(lf, row, 2);
    int size = e - s;
    if (size < 0)
        errAbort("Start before end line %d of %s", lf->lineIx, lf->fileName);
    if ((contig = hashFindVal(hash, name)) == NULL)
        {
	AllocVar(contig);
	hashAddSaveName(hash, name, contig, &contig->name);
	slAddHead(&contigList, contig);
	}
    if (s != contig->size)
	errAbort("Start doesn't match previous end line %d of %s", lf->lineIx, lf->fileName);
    contig->size = e;
    }
lineFileClose(&lf);
slReverse(&contigList);
return contigList;
}
Beispiel #2
0
double calcNormScoreFactor(char *fileName, int scoreCol)
/* Figure out what to multiply things by to get a nice browser score (0-1000) */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[scoreCol+1];
double sum = 0, sumSquares = 0;
int n = 0;
double minVal=0, maxVal=0;
int fieldCount;
while ((fieldCount = lineFileChop(lf, row)) != 0)
    {
    lineFileExpectAtLeast(lf, scoreCol+1, fieldCount);
    double x = sqlDouble(row[scoreCol]);
    if (n == 0)
        minVal = maxVal = x;
    if (x < minVal) minVal = x;
    if (x > maxVal) maxVal = x;
    sum += x;
    sumSquares += x*x;
    n += 1;
    }
lineFileClose(&lf);
double std = calcStdFromSums(sum, sumSquares, n);
double mean = sum/n;
double highEnd = mean + std;
if (highEnd > maxVal) highEnd = maxVal;
return 1000.0/highEnd;
}
Beispiel #3
0
struct liftSpec *readLifts(char *fileName)
/* Read in lift file. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
int wordCount;
char *words[16];
struct liftSpec *list = NULL, *el;

while ((wordCount = lineFileChop(lf, words)) != 0)
    {
    char *offs;
    lineFileExpectWords(lf, 5, wordCount);
    offs = words[0];
    if (!isdigit(offs[0]) && !(offs[0] == '-' && isdigit(offs[1])))
	errAbort("Expecting number in first field line %d of %s", lf->lineIx, lf->fileName);
    if (!isdigit(words[4][0]))
	errAbort("Expecting number in fifth field line %d of %s", lf->lineIx, lf->fileName);
    AllocVar(el);
    el->oldName = cloneString(words[1]);
    el->offset = atoi(offs);
    el->newName = cloneString(words[3]);
    el->size = atoi(words[4]);
    slAddHead(&list, el);
    }
slReverse(&list);
lineFileClose(&lf);
printf("Got %d lifts in %s\n", slCount(list), fileName);
if (list == NULL)
    errAbort("Empty liftSpec file %s", fileName);
return list;
}
Beispiel #4
0
void hgPhMouse(char *database, char *track, int fileCount, char *fileNames[])
/* hgPhMouse - Load phMouse track. */
{
int i;
char *fileName;
char *tabName = "phMouse.tab";
FILE *f = mustOpen(tabName, "w");
struct lineFile *lf;
char *words[32], *s, c;
int wordCount;
int oneSize, totalSize = 0;

for (i=0; i<fileCount; ++i)
    {
    struct bed *bedList = NULL, *bed;
    fileName = fileNames[i];
    lf = lineFileOpen(fileName, TRUE);
    printf("Reading %s ", fileName);
    fflush(stdout);
    while ((wordCount = lineFileChop(lf, words)) > 0)
        {
	if (wordCount < 7)
	   errAbort("Expecting at least 7 words line %d of %s", 
	   	lf->lineIx, fileName);
	AllocVar(bed);
	bed->chrom = cloneString(words[0]);
	bed->chromStart = lineFileNeedNum(lf, words, 1);
	bed->chromEnd = lineFileNeedNum(lf, words, 2);
	bed->score = lineFileNeedNum(lf, words, 6);
	s = strrchr(words[3], '|');
	c = s[1];
	s[0] = 0;
	if (c != '+' && c != '-')
	    errAbort("Misformed strandless trace name line %d of %s",
	    	lf->lineIx, lf->fileName);
	bed->name = cloneString(words[3]);
	bed->strand[0] = c;
	slAddHead(&bedList, bed);
	}
    oneSize = slCount(bedList);
    printf("%d alignments ", oneSize);
    totalSize += oneSize;
    fflush(stdout);
    slSort(&bedList, bedCmp);
    printf("sorted ");
    fflush(stdout);
    for (bed = bedList; bed != NULL; bed = bed->next)
        {
	int bin = hFindBin(bed->chromStart, bed->chromEnd);
	fprintf(f, "%d\t", bin);
	bedTabOutN(bed, 6, f);
	}
    printf("tabbed out\n");
    bedFreeList(&bedList);
    }
carefulClose(&f);
printf("Loading %d items into %s.%s\n", totalSize, database, track);
loadDatabase(database, track, tabName);
remove(tabName);
}
void clumpEst3(char *inName, char *outName)
/* clumpEst3 - Clump together 3' ESTs. */
{
struct lineFile *lf = lineFileOpen(inName, TRUE);
struct est3 *cum = NULL, *cur;
FILE *f = mustOpen(outName, "w");
char *words[8];
int wordCount;

while ((wordCount = lineFileChop(lf, words)) > 0)
    {
    cur = est3Load(words);
    if (cum == NULL || cur->strand[0] != cum->strand[0] 
       || cur->chromStart - cum->chromEnd > 2000
       || !sameString(cum->chrom, cur->chrom) )
       {
       writeEst3(f, cum);
       est3Free(&cum);
       cum = cur;
       }
    else
       {
       if (cur->chromStart - cum->chromEnd < 100 || cur->estCount > 1)
	   {
	   cum->chromEnd = cur->chromEnd;
	   cum->estCount += cur->estCount;
	   }
       est3Free(&cur);
       }
    }
writeEst3(f, cum);
}
Beispiel #6
0
struct groupSizeInfo *readSizes(char *fileName, struct hash *gsiHash)
/* Read in file of format:
 *     groupName guessedMin guessedMax
 * and save in hash and as list. */
{
struct groupSizeInfo *gsiList = NULL, *gsi;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
int wordCount;
char *words[8];
struct hashEl *hel;

while ((wordCount = lineFileChop(lf, words)) != 0)
    {
    lineFileExpectWords(lf, 3, wordCount);
    AllocVar(gsi);
    hel = hashAddUnique(gsiHash, words[0], gsi);
    gsi->name = hel->name;
    gsi->guessedMin = atoi(words[1]);
    gsi->guessedMax = atoi(words[2]);
    slAddHead(&gsiList, gsi);
    }

lineFileClose(&lf);
slReverse(&gsiList);
return gsiList;
}
Beispiel #7
0
void readLoc2ref(char *fileName, struct hash **retPgiHash, struct hash **retLocHash)
/* Read loc2ref file.  Create hashes of rsInfo's indexed by pgi and locusLinkId. */ 
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[4];
int wordCount;
struct rsInfo *rs;
struct hash *locHash = newHash(0);
struct hash *pgiHash = newHash(0);
int lineCount = 0, count = 0;

while ((wordCount = lineFileChop(lf, words)) > 0)
    {
    ++lineCount;
    if (wordCount == 4)
        {
	if (hashLookup(locHash, words[0]) != NULL && startsWith("XM_", words[1]))
	    continue;
	AllocVar(rs);
	hashAddSaveName(locHash, words[0], rs, &rs->locusLinkId);
	rs->mrnaAcc = cloneString(words[1]);
	hashAddSaveName(pgiHash, words[2], rs, &rs->pgi);
	}
    }
lineFileClose(&lf);
*retLocHash = locHash;
*retPgiHash = pgiHash;
printf("Added %d locusLink ids from %s in %d lines\n", count, fileName, lineCount);
}
Beispiel #8
0
void colTransform(char *column, char *input, char *addFactor, char *mulFactor, char *output)
/* colTransform - Add and/or multiply column by constant.. */
{
int col = sqlUnsigned(column) - 1;
double add = sqlDouble(addFactor);
double mul = sqlDouble(mulFactor);
struct lineFile *lf = lineFileOpen(input, TRUE);
FILE *f = mustOpen(output, "w");
char *words[512];
int wordCount;
while ((wordCount = lineFileChop(lf, words)) > 0)
    {
    lineFileExpectAtLeast(lf, col, wordCount);
    double x = lineFileNeedDouble(lf, words, col);
    int i;
    for (i=0; i<wordCount; ++i)
        {
	if (i != 0)
	    fputc('\t', f);
	if (i == col)
	    fprintf(f, "%g", x*mul+add);
	else
	    fputs(words[i], f);
	}
    fputc('\n', f);
    }
carefulClose(&f);
}
Beispiel #9
0
void readBad(struct hash *badHash, char *fileName, int cloneWord)
/* Read bad clones into hash. */
{
char *words[8];
struct lineFile *lf = lineFileOpen(fileName, TRUE);
int wordCount;
char *acc;
int badCount = 0;

while ((wordCount = lineFileChop(lf, words)) != 0)
    {
    if (wordCount < cloneWord+1)
	{
        errAbort("Expecting at least %d words line %d of %s", cloneWord+1,
		lf->lineIx, lf->fileName);
	}
    acc = words[cloneWord];
    chopSuffix(acc);
    if (!checkAccFormat(acc))
        errAbort("Badly formatted accession line %d of %s", lf->lineIx, lf->fileName);
    hashStore(badHash, acc);
    ++badCount;
    }
lineFileClose(&lf);
printf("Got %d clones to avoid from %s\n", badCount, fileName);
}
Beispiel #10
0
struct chromInfo *readChromSizes(char *fileName)
/* Create list of chromInfos based on a two column file <chrom><size> */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[2];
struct chromInfo *list = NULL, *el;
bits64 maxTotal = (1LL << 32) - 1;
bits64 total = 0;
int chromCount = 0;
struct hash *uniqHash = hashNew(16);

while (lineFileChop(lf, row))
    {
    char *name = row[0];
    if (hashLookup(uniqHash, name))
       errAbort("Duplicate chromosome or contig name %s line %d of %s",
       	name, lf->lineIx, lf->fileName);
    hashAdd(uniqHash, name, NULL);
    AllocVar(el);
    el->name = cloneString(name);
    el->size = lineFileNeedNum(lf, row, 1);
    el->genomeOffset = total;
    total += el->size;
    if (total > maxTotal)
        errAbort("Too many bases line %d of %s.  Max is %lld,  total so far is %lld",
		lf->lineIx, lf->fileName, maxTotal, total);
    slAddHead(&list, el);
    ++chromCount;
    }
hashFree(&uniqHash);
lineFileClose(&lf);
slReverse(&list);
verbose(1, "Read %d chroms totalling %lld bases in %s\n", chromCount, total, fileName);
return list;
}
void ave(char *fileName)
/* ave - Compute average and basic stats. */
{
int count = 0;
size_t alloc = 1024;
double *array;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[128], *word;
int wordCount;
int wordIx = col-1;

AllocArray(array, alloc);
while ((wordCount = lineFileChop(lf, words)) > 0)
    {
    if (count >= alloc)
        {
	alloc <<= 1;
	ExpandArray(array, count, alloc);
	}
    word = words[wordIx];
    if (word[0] == '-' || isdigit(word[0]))
        {
	array[count++] = atof(word);
	}
    }
if (count == 0)
    errAbort("No numerical data column %d of %s", col, fileName);
qsort(array, count, sizeof(array[0]), cmpDouble);
showStats(array, count);
}
struct finf *finfReadNext(struct lineFile *lf)
/* Read in next finf from file, or NULL at EOF. */
{
char ucscName[32];
char *parts[4], *words[16];
int partCount, wordCount;
struct finf *finf;

wordCount = lineFileChop(lf, words);
if (wordCount <= 0)
    return NULL;
lineFileExpectWords(lf, 7, wordCount);
AllocVar(finf);
gsToUcsc(words[0], ucscName);
finf->name = cloneString(ucscName);
finf->start = atoi(words[2])-1;
finf->end = atoi(words[3]);
if (words[5][0] != '?')
    {
    partCount = chopString(words[5], ",/", parts, ArraySize(parts));
    if (partCount != 3)
        errAbort("Misformed field 6 line %d of %s\n", lf->lineIx, lf->fileName);
    finf->chainId = atoi(parts[0]);
    finf->linkIx = atoi(parts[1]);
    finf->linkCount = atoi(parts[2]);
    }
strncpy(finf->endInfo, words[6], sizeof(finf->endInfo));
return finf;
}
Beispiel #13
0
void bedIntersect(char *aFile, char *bFile, char *outFile)
/* bedIntersect - Intersect two bed files. */
{
struct lineFile *lf = lineFileOpen(aFile, TRUE);
struct hash *bHash = readBed(bFile);
FILE *f = mustOpen(outFile, "w");
char *row[40];
int wordCount;

while ((wordCount = (strictTab ? lineFileChopTab(lf, row) : lineFileChop(lf, row))) != 0)
    {
    char *chrom = row[0];
    int start = lineFileNeedNum(lf, row, 1);
    int end = lineFileNeedNum(lf, row, 2);
    if (start > end)
        errAbort("start after end line %d of %s", lf->lineIx, lf->fileName);
    if (start == end && !allowStartEqualEnd)
	lineFileAbort(lf, "start==end (if this is legit, use -allowStartEqualEnd)");
    struct binKeeper *bk = hashFindVal(bHash, chrom);
    if (bk != NULL)
	{
	struct binElement *hitList = NULL, *hit;
	if (allowStartEqualEnd && start == end)
	    hitList = binKeeperFind(bk, start-1, end+1);
	else
	    hitList = binKeeperFind(bk, start, end);
	if (aHitAny)
	    {
	    for (hit = hitList; hit != NULL; hit = hit->next)
		{
		float cov = getCov(start, end, hit->val);
		if (cov >= minCoverage)
		    {
		    outputBed(f, row, wordCount, start, end, hit->val);
		    break;
		    }
		else
		    {
		    struct bed5 *b = hit->val;
		    verbose(1, "filter out %s %d %d %d %d overlap %d %d %d %.3f\n",
			    chrom, start, end, b->start, b->end,
			    positiveRangeIntersection(start, end, b->start, b->end),
			    end-start, b->end-b->start, cov);
		    }
		}
	    }
	else
	    {
	    for (hit = hitList; hit != NULL; hit = hit->next)
	        {
		if (getCov(start, end, hit->val) >= minCoverage)
		    outputBed(f, row, wordCount, start, end, hit->val);
		}
	    }
	slFreeList(&hitList);
	}
    }
}
Beispiel #14
0
void setupHugeGaps(char *insertFile)
/* Setup things to lookup gaps. */
{
struct lineFile *lf;
char *words[8];
int wordCount;
struct chromGaps *chromList = NULL, *cg;
struct hugeGap *gap;
char *chrom;
char query[512];
struct sqlResult *sr;
char **row;
struct ctgPos ctgPos;
int start, size;
struct hashEl *hel;
struct sqlConnection *conn = sqlConnect("hg4");

hugeHash = newHash(6);
lf = lineFileOpen(insertFile, TRUE);
while ((wordCount = lineFileChop(lf, words)) != 0)
     {
     chrom = words[0];
     if (sameString(words[2], "-"))
         continue;
     if ((cg = hashFindVal(hugeHash, chrom)) == NULL)
         {
	 AllocVar(cg);
	 slAddHead(&chromList, cg);
	 hel = hashAdd(hugeHash, chrom, cg);
	 cg->chrom = hel->name;
	 }
     size = atoi(words[3]);
     sqlSafef(query, sizeof query, "select * from ctgPos where contig = '%s'", words[2]);
     sr = sqlGetResult(conn, query);
     if ((row = sqlNextRow(sr)) == NULL)
        errAbort("Couldn't find %s from %s in database", words[2], lf->fileName);
     ctgPosStaticLoad(row, &ctgPos);
     if (!sameString(chrom, ctgPos.chrom))
         errAbort("%s is in %s in database and %s in %s", ctgPos.contig, ctgPos.chrom,
	 	chrom, lf->fileName);
     start = ctgPos.chromStart;
     uglyf("%s %s (%d size %d) %s \n", chrom, words[1], start, size, words[2]);
     sqlFreeResult(&sr);

     AllocVar(gap);
     slAddHead(&cg->gapList, gap);
     gap->offset = start;
     gap->size = size;
     }
lineFileClose(&lf);
sqlDisconnect(&conn);
for (cg = chromList; cg != NULL; cg = cg->next)
    {
    slSort(&cg->gapList, cmpHugeGap);
    }
}
Beispiel #15
0
void gapFileToTable(struct sqlConnection *conn, char *gapFileName,
		    char *gapTableName)
/* Build a single gap table from a single gap file. */
{
struct lineFile *lf = lineFileOpen(gapFileName, TRUE);
char tabFileName[256];
FILE *tabFile = NULL;
char *words[16];
int wordCount;

safef(tabFileName, sizeof(tabFileName), "%s.tab", gapTableName);
tabFile = mustOpen(tabFileName, "w");
while ((wordCount = lineFileChop(lf, words)) > 0)
    {
    if (wordCount < 5)
	errAbort("Short line %d of %s", lf->lineIx, lf->fileName);
    if (words[4][0] == 'N' || words[4][0] == 'U')
	{
	int len = strlen(words[0]);
	if (len > maxChromNameSize)
	    {
	    maxChromNameSize = len;
	    if (maxChromNameSize > 254)
		errAbort("ERROR: chrom name size is over 254(%d) characters: "
			"'%s'", maxChromNameSize, words[0]);
	    }
	struct agpGap gap;
	agpGapStaticLoad(words, &gap);
	gap.chromStart -= 1;
	fprintf(tabFile, "%u\t", hFindBin(gap.chromStart, gap.chromEnd));
	agpGapTabOut(&gap, tabFile);
	}
    }
lineFileClose(&lf);
fclose(tabFile);

if (! noLoad)
    {
    struct dyString *ds = newDyString(2048);
    if (unsplit)
	sqlDyStringPrintf(ds,  createGapUnsplit, gapTableName,
		maxChromNameSize, maxChromNameSize);
    else
	sqlDyStringPrintf(ds, createGapSplit, gapTableName);
    char query[1024];
    sqlRemakeTable(conn, gapTableName, ds->string);
    sqlSafef(query, sizeof(query), "LOAD data local infile '%s' into table %s", 
	  tabFileName, gapTableName);
    sqlUpdate(conn, query);
    remove(tabFileName);
    freeDyString(&ds);
    }
}
Beispiel #16
0
struct vcfRecord *vcfNextRecord(struct vcfFile *vcff)
/* Parse the words in the next line from vcff into a vcfRecord. Return NULL at end of file.
 * Note: this does not store record in vcff->records! */
{
char *words[VCF_MAX_COLUMNS];
int wordCount;
if ((wordCount = lineFileChop(vcff->lf, words)) <= 0)
    return NULL;
int expected = 8;
if (vcff->genotypeCount > 0)
    expected = 9 + vcff->genotypeCount;
lineFileExpectWords(vcff->lf, expected, wordCount);
return vcfRecordFromRow(vcff, words);
}
Beispiel #17
0
struct scaffold *readScaffoldsFromAgp(char *fileName)
/* Read in agp file and return as list of scaffolds. */
{
struct hash *scaffoldHash = newHash(17);
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[9];
int wordCount;
struct scaffold *scaffoldList = NULL, *scaffold;
struct agpFrag *frag;
int size;

for (;;)
    {
    wordCount = lineFileChop(lf, row);
    if (wordCount <= 0)
        break;
    if (wordCount < 8)
	lineFileShort(lf);
    if (row[4][0] == 'N' || row[4][0] == 'U')
        continue;
    if (wordCount < 9)
        lineFileShort(lf);
    frag = agpFragLoad(row);
    frag->chromStart -= 1;
    frag->fragStart -= 1;
    size = frag->fragEnd - frag->fragStart;
    if (size != frag->chromEnd - frag->chromStart)
        errAbort("scaffold/contig size mismatch line %d of %s", lf->lineIx, lf->fileName);
    if (frag->strand[0] != '+')
        errAbort("Strand not + line %d of %s", lf->lineIx, lf->fileName);
    scaffold = hashFindVal(scaffoldHash, frag->chrom);
    if (scaffold == NULL)
        {
	AllocVar(scaffold);
	hashAdd(scaffoldHash, frag->chrom, scaffold);
	slAddHead(&scaffoldList, scaffold);
	}
    slAddHead(&scaffold->list, frag);
    if (frag->chromEnd > scaffold->size)
        scaffold->size = frag->chromEnd;
    }
slReverse(&scaffoldList);
for (scaffold = scaffoldList; scaffold != NULL; scaffold = scaffold->next)
    slReverse(&scaffold->list);
printf("Got %d scaffolds in %s\n", slCount(scaffoldList), lf->fileName);
lineFileClose(&lf);
hashFree(&scaffoldHash);
return scaffoldList;
}
Beispiel #18
0
int firstLinePos(char *fileName)
/* Return position of first line. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[3];
int retVal = 0;
int wordCount = lineFileChop(lf, words);
if (wordCount > 0)
    {
    if (wordCount != 2)
	errAbort("%s is not a two column file", fileName);
    retVal = lineFileNeedNum(lf, words, 0);
    lineFileClose(&lf);
    }
return retVal;
}
Beispiel #19
0
struct rmskOut2 *rmskOut2ReadNext(struct lineFile *lf)
/* Read next record from repeat masker file.  Return NULL at EOF. */
{
char *words[32];
int wordCount;
char id;
struct rmskOut2 *ret;
char *class, *family;

if ((wordCount = lineFileChop(lf, words)) == 0)
    return NULL;
if (wordCount != 15 )
    errAbort("Expecting 15 words - line %d of %s", lf->lineIx, lf->fileName);

id = words[14][0];
AllocVar(ret);
ret->swScore = lineFileNeedNum(lf, words, 0);
ret->milliDiv = round(10.0*atof(words[1]));
ret->milliDel = round(10.0*atof(words[2]));
ret->milliIns = round(10.0*atof(words[3]));
ret->genoName = cloneString(words[4]);
ret->genoStart = lineFileNeedNum(lf, words, 5)-1;
ret->genoEnd = lineFileNeedNum(lf, words, 6);
ret->genoLeft = -negParenNum2(lf, words[7]);
if (sameString(words[8], "C"))
    ret->strand[0] = '-';
else if (sameString(words[8], "+"))
    ret->strand[0] = '+';
else
    errAbort("Unexpected strand char line %d of %s", lf->lineIx, lf->fileName);
ret->repName = cloneString(words[9]);
parseClassAndFamily(words[10], &class, &family);
ret->repClass = cloneString(class);
ret->repFamily = cloneString(family);
if (sameString(words[8], "C"))
{
    ret->repStart = negParenNum2(lf, words[11])-1;
    ret->repEnd = sqlSigned(words[12]);
    ret->repLeft = -negParenNum2(lf, words[13]);
}else
{
    ret->repLeft = -negParenNum2(lf, words[11]);
    ret->repEnd = sqlSigned(words[12]);
    ret->repStart = negParenNum2(lf, words[13])-1;
}
return ret;
}
Beispiel #20
0
struct hash *hashTwoColumnFile(char *fileName)
/* Given a two column file (key, value) return a hash. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *hash = hashNew(16);
char *row[3];
int fields = 0;
while ((fields = lineFileChop(lf, row)) != 0)
    {
    lineFileExpectWords(lf, 2, fields);
    char *name = row[0];
    char *value = lmCloneString(hash->lm, row[1]);
    hashAdd(hash, name, value);
    }
lineFileClose(&lf);
return hash;
}
Beispiel #21
0
void doContig(char *dir, char *chrom, char *contig)
/* Sniff contig for dupes. */
{
char fileName[512];
char *words[16];
int wordCount;
struct lineFile *lf;

sprintf(fileName, "%s/%s", dir, goldName);
lf = lineFileOpen(fileName, TRUE);
while ((wordCount = lineFileChop(lf, words)) != 0)
    {
    if (wordCount < 8)
        errAbort("Short line %d of %s", lf->lineIx, lf->fileName);
    if (words[4][0] != 'N' && words[4][0] != 'U')
        {
	char *frag = words[5];
	char cloneName[256];
	struct cloneLoc *cl;
	strcpy(cloneName, frag);
	chopSuffix(cloneName);
	cl = hashFindVal(fragHash, frag);
	if (cl != NULL)
	    {
	    printf("%s duplicated in %s/%s and %s/%s\n", frag, cl->chrom, cl->contig, chrom, contig);
	    ++errCount;
	    }
	else 
	    {
	    cl = hashFindVal(cloneHash, cloneName);
	    if (cl != NULL && !sameString(contig, cl->contig))
	        {
		printf("%s duplicated in %s/%s and %s/%s\n", cloneName, cl->chrom, cl->contig, chrom, contig);
		++errCount;
		}
	    if (cl == NULL)
	        {
		cl = cloneLocNew(cloneName, contig, chrom);
		hashAdd(cloneHash, cloneName, cl);
		}
	    hashAdd(fragHash, frag, cl);
	    }
	}
    }
lineFileClose(&lf);
}
Beispiel #22
0
struct rgi *readRgi(char *inName)
{
struct rgi *rgiList = NULL, *rgi;
struct lineFile *lf = lineFileOpen(inName, TRUE);
int wordCount;
char *words[8];

while ((wordCount = lineFileChop(lf, words)) != 0)
    {
    lineFileExpectWords(lf, 4, wordCount);
    rgi = rgiLoad(words);
    slAddHead(&rgiList, rgi);
    uglyf("%s %s: min %d, max %d\n", rgi->a, rgi->b, rgi->minDistance, rgi->maxDistance);
    }
lineFileClose(&lf);
slReverse(&rgiList);
return rgiList;
}
Beispiel #23
0
Datei: vcf.c Projekt: bh0085/kent
static void vcfParseData(struct vcfFile *vcff, int maxRecords)
/* Given a vcfFile into which the header has been parsed, and whose lineFile is positioned
 * at the beginning of a data row, parse and store all data rows from lineFile. */
{
if (vcff == NULL)
    return;
int recCount = 0, expected = 8;
if (vcff->genotypeCount > 0)
    expected = 9 + vcff->genotypeCount;
char *words[VCF_MAX_COLUMNS];
int wordCount;
while ((wordCount = lineFileChop(vcff->lf, words)) > 0)
    {
    if (maxRecords >= 0 && recCount >= maxRecords)
	break;
    lineFileExpectWords(vcff->lf, expected, wordCount);
    struct vcfRecord *record;
    AllocVar(record);
    record->file = vcff;
    record->chrom = vcfFilePooledStr(vcff, words[0]);
    record->chromStart = lineFileNeedNum(vcff->lf, words, 1) - 1;
    // chromEnd may be overwritten by parseRefAndAlt and parseInfoColumn.
    record->chromEnd = record->chromStart+1;
    record->name = vcfFilePooledStr(vcff, words[2]);
    parseRefAndAlt(vcff, record, words[3], words[4]);
    record->qual = vcfFilePooledStr(vcff, words[5]);
    parseFilterColumn(vcff, record, words[6]);
    parseInfoColumn(vcff, record, words[7]);
    if (vcff->genotypeCount > 0)
	{
	record->format = vcfFilePooledStr(vcff, words[8]);
	record->genotypeUnparsedStrings = vcfFileAlloc(vcff,
						       vcff->genotypeCount * sizeof(char *));
	int i;
	// Don't bother actually parsing all these until & unless we need the info:
	for (i = 0;  i < vcff->genotypeCount;  i++)
	    record->genotypeUnparsedStrings[i] = vcfFileCloneStr(vcff, words[9+i]);
	}
    slAddHead(&(vcff->records), record);
    recCount++;
    }
slReverse(&(vcff->records));
lineFileClose(&(vcff->lf));
}
void aveNoQuartiles(char *fileName)
/* aveNoQuartiles - Compute only min,max,mean,stdDev no quartiles */
{
bits64 count = 0;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[128], *word;
int wordCount;
int wordIx = col-1;
double sumData = 0.0, sumSquares = 0.0;
double minVal = DBL_MAX, maxVal = -DBL_MAX;

while ((wordCount = lineFileChop(lf, words)) > 0)
    {
    word = words[wordIx];
    if (word[0] == '-' || isdigit(word[0]))
        {
	double val = sqlDouble(word);
	if (minVal > val) minVal = val;
	if (maxVal < val) maxVal = val;
	sumData += val;
	sumSquares += val * val;
	++count;
	}
    }
if (count == 0)
    errAbort("No numerical data column %d of %s", col, fileName);
double average = sumData/count;
double stdDev = calcStdFromSums(sumData, sumSquares, count);
if (tableOut)
    {
    printf("# min max mean N sum stddev\n");
    printf("%g %g %g %llu %g %g\n",
	minVal, maxVal, average, count, sumData, stdDev);
    }
else
    {
    printf("average %f\n", average);
    printf("min %f\n", minVal);
    printf("max %f\n", maxVal);
    printf("count %llu\n", count);
    printf("total %f\n", sumData);
    printf("standard deviation %f\n", stdDev);
    }
}
struct hash *readAgp(char *fileName)
/* Read AGP file into hash */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *hash = newHash(0);
int wordCount;
char *words[16];

while ((wordCount = lineFileChop(lf, words)) > 0)
    {
    if (wordCount == 9 && !sameString(words[4], "N"))
	{
	addCloneToHash(hash, words[5]);
	}
    }
uglyf("Read %d lines in %s\n", lf->lineIx, fileName);
lineFileClose(&lf);
return hash;
}
struct liftSpec *readLifts(char *fileName)
/* Read in lift file. */
{
    struct lineFile *lf = lineFileOpen(fileName, TRUE);
    int wordCount;
    char *words[16];
    struct liftSpec *list = NULL, *el;

    while ((wordCount = lineFileChop(lf, words)) != 0)
    {
        char *offs;
        if (wordCount < 5)
            errAbort("Need at least 5 words line %d of %s", lf->lineIx, lf->fileName);
        offs = words[0];
        if (!isdigit(offs[0]) && !(offs[0] == '-' && isdigit(offs[1])))
            errAbort("Expecting number in first field line %d of %s", lf->lineIx, lf->fileName);
        if (!isdigit(words[4][0]))
            errAbort("Expecting number in fifth field line %d of %s", lf->lineIx, lf->fileName);
        AllocVar(el);
        el->offset = atol(offs);
        el->oldName = cloneString(words[1]);
        el->oldSize = atoi(words[2]);
        el->newName = cloneString(words[3]);
        el->newSize = atoi(words[4]);
        if (wordCount >= 6)
        {
            char c = words[5][0];
            if (c == '+' || c == '-')
                el->strand = c;
            else
                errAbort("Expecting + or - field 6, line %d of %s", lf->lineIx, lf->fileName);
        }
        else
            el->strand = '+';
        slAddHead(&list, el);
    }
    slReverse(&list);
    lineFileClose(&lf);
    if (list == NULL)
        errAbort("Empty liftSpec file %s", fileName);
    return list;
}
Beispiel #27
0
void tempLower(char *inName, char *insertsFile, char *outName)
/* tempLower - Remove centromeres etc. from oo.18 cytobands. */
{
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = mustOpen(outName, "w");
int wordCount, i;
int start, end, offset;
char *chrom;
char *words[128];
int count = 0, liftCount = 0;
struct chromGaps *cg;

setupHugeGaps(insertsFile);
while ((wordCount = lineFileChop(lf, words)) != 0)
    {
    chrom = words[0];
    start = atoi(words[1]);
    end = atoi(words[2]);

    cg = hashFindVal(hugeHash, chrom);
    if (cg != NULL)
        {
	offset = gapOffset(cg, start);
	if (offset != 0)
	    {
	    start += offset;
	    end += offset;
	    liftCount += 1;
	    }
	}

    fprintf(f, "%s\t%d\t%d", chrom, start, end);
    for (i=3; i<wordCount; ++i)
        fprintf(f, "\t%s", words[i]);
    fprintf(f, "\n");
    ++count;
    }
printf("Lifted %d of %d lines of %s to %s\n", liftCount, count, inName, outName);
fclose(f);
lineFileClose(&lf);
}
struct clone *readTrans(char *fileName)
/* Read info in trans file. */
{
    char cloneName[128], lastCloneName[128];
    struct clone *cloneList = NULL, *clone = NULL;
    struct frag *frag;
    struct lineFile *lf = lineFileOpen(fileName, TRUE);
    char *words[8], *parts[4], *subParts[3];
    int wordCount, partCount, subCount;

    strcpy(lastCloneName, "");
    while ((wordCount = lineFileChop(lf, words)) != 0)
    {
        lineFileExpectWords(lf, 3, wordCount);
        partCount = chopString(words[2], "(:)", parts, ArraySize(parts));
        if (partCount != 2)
            errAbort("Badly formatted third field line %d of %s",
                     lf->lineIx, lf->fileName);
        subCount = chopString(parts[1], ".", subParts, ArraySize(subParts));
        if (subCount != 2)
            errAbort("Badly formatted third field line %d of %s (expecting start..end)",
                     lf->lineIx, lf->fileName);
        fragToCloneName(words[0], cloneName);
        if (!sameString(cloneName, lastCloneName))
        {
            AllocVar(clone);
            clone->name = cloneString(cloneName);
            slAddHead(&cloneList, clone);
        }
        AllocVar(frag);
        frag->name = cloneString(words[0]);
        frag->ffaName = cloneString(words[1]);
        frag->start = lineFileNeedNum(lf, subParts, 0) - 1;
        frag->end = lineFileNeedNum(lf, subParts, 1);
        slAddTail(&clone->fragList, frag);
        strcpy(lastCloneName, cloneName);
    }
    lineFileClose(&lf);
    slReverse(&cloneList);
    return cloneList;
}
Beispiel #29
0
struct chain *chainReadChainLine(struct lineFile *lf)
/* Read line that starts with chain.  Allocate memory
 * and fill in values.  However don't read link lines. */
{
char *row[13];
int wordCount;
struct chain *chain;

wordCount = lineFileChop(lf, row);
if (wordCount == 0)
    return NULL;
if (wordCount < 12)
    errAbort("Expecting at least 12 words line %d of %s", 
    	lf->lineIx, lf->fileName);
if (!sameString(row[0], "chain"))
    errAbort("Expecting 'chain' line %d of %s", lf->lineIx, lf->fileName);
AllocVar(chain);
chain->score = atof(row[1]);
chain->tName = cloneString(row[2]);
chain->tSize = lineFileNeedNum(lf, row, 3);
if (wordCount >= 13)
    chain->id = lineFileNeedNum(lf, row, 12);
else
    chainIdNext(chain);

/* skip tStrand for now, always implicitly + */
chain->tStart = lineFileNeedNum(lf, row, 5);
chain->tEnd = lineFileNeedNum(lf, row, 6);
chain->qName = cloneString(row[7]);
chain->qSize = lineFileNeedNum(lf, row, 8);
chain->qStrand = row[9][0];
chain->qStart = lineFileNeedNum(lf, row, 10);
chain->qEnd = lineFileNeedNum(lf, row, 11);
if (chain->qStart >= chain->qEnd || chain->tStart >= chain->tEnd)
    errAbort("End before start line %d of %s", lf->lineIx, lf->fileName);
if (chain->qStart < 0 || chain->tStart < 0)
    errAbort("Start before zero line %d of %s", lf->lineIx, lf->fileName);
if (chain->qEnd > chain->qSize || chain->tEnd > chain->tSize)
    errAbort("Past end of sequence line %d of %s", lf->lineIx, lf->fileName);
return chain;
}
Beispiel #30
0
struct agpFrag *readAgpFile(char *agpName)
/* Read agps from file. */
{
struct lineFile *lf = lineFileOpen(agpName, TRUE);
int wordCount;
char *words[16];
struct agpFrag *list = NULL, *el;

while ((wordCount = lineFileChop(lf, words)) != 0)
    {
    if (words[4][0] != 'N')
        {
	lineFileExpectWords(lf, 9, wordCount);
	el = agpFragLoad(words);
	slAddHead(&list, el);
	}
    }
lineFileClose(&lf);
slReverse(&list);
return list;
}