boolean faSizeNext(struct lineFile *lf, char retLine[512], int *retSize)
/* Get > line and size for next fa record.  Return FALSE at end of file. */
{
char *line;
int size = 0;

/* Fetch first record . */
for (;;)
    {
    if (!lineFileNext(lf, &line, NULL))
        return FALSE;
    if (line[0] == '>')
        break;
    }
strncpy(retLine, line, 512);

/* Loop around counting DNA-looking characters until next record. */
for (;;)
    {
    if (!lineFileNext(lf, &line, NULL))
        break;
    if (line[0] == '>')
	{
        lineFileReuse(lf);
	break;
	}
    size += alphaCount(line);
    }
*retSize = size;
return TRUE;
}
Exemplo n.º 2
0
void lineRange(char *fileName, int start, int count)
/* lineRange - Get a range of lines from file. */
{
struct lineFile *lf = lineFileOpen(fileName, FALSE);
char *line;
int lineSize;
int i;
if (count == 0 || start == 0)
    errAbort("Expecting positive number for start, count in command line");

/* Skip over first lines. */
for (i=1; i<start; ++i)
    {
    if (!lineFileNext(lf, &line, &lineSize))
        errAbort("%s doesn't have %d lines", fileName, start);
    }
/* Print coutn lines. */
for (i=0; i<count; ++i)
    {
    if (!lineFileNext(lf, &line, &lineSize))
         break;
    mustWrite(stdout, line, lineSize);
    }
lineFileClose(&lf);
}
Exemplo n.º 3
0
boolean read_fastq_auto(struct fastq_auto *fq, struct lineFile *lf, boolean just_seq_qual)
/* fill in fastq struct from open lineFile.  return FALSE if at EOF */
/* set just_seq_qual=TRUE to skip loading everything except the sequence */
/* and quality information. */
{
    char *line;
    int len = 0;
    boolean neof = lineFileNext(lf, &line, &len);
    if (neof)
    {
	int i;
	int qual_size;
        /* should be header */
	if ((len <= 0) || (line[0] != '@'))
	    errAbort("Expecting header. Problem on line %d\n", lf->lineIx);
	if (!just_seq_qual)
	{
	    char *words[7];
	    int numWords;
	    numWords = chopByChar(line, ':', words, 6);
	    strcpy(fq->machine, words[0] + 1);
	    fq->flow_cell = sqlSigned(words[1]);
	    fq->tile = sqlSigned(words[2]);
	    fq->tile_x = sqlSigned(words[3]);
	    words[5] = chopPrefixAt(words[4], '#');
	    words[6] = chopPrefixAt(words[5], '/');
	    fq->tile_y = sqlSigned(words[4]);
	    fq->multiplex_index = sqlSigned(words[5]);
	    fq->pair_num = sqlSigned(words[6]);
	}
	/* read the sequence */
	fq->seq[0] = '\0';
	while ((neof = lineFileNext(lf, &line, &len)) && (len > 0) && (line[0] != '+'))
	    strcat(fq->seq, line);
	if (!neof)
	    errAbort("incomplete fastq file.  early EOF");
	fq->seq_size = strlen(fq->seq);
        /* at the point of the quality header.  who cares, read the quality */
	fq->qual[0] = '\0';
	while ((neof = lineFileNext(lf, &line, &len)) && (len > 0) && (line[0] != '@'))
	    strcat(fq->qual, line);
	if ((len > 0) && (line[0] == '@'))
	    lineFileReuse(lf);
	qual_size = strlen(fq->qual);
	if (qual_size != fq->seq_size)
	    errAbort("something wrong line %d.  sequence size (%d) should match quality size (%d)\n", 
		     lf->lineIx, fq->seq_size, qual_size);
	/* convert Illumina 1.3+ quals to Sanger */
	for (i = 0; i < qual_size; i++)
	    fq->qual[i] -= 64;
    }
    else 
	return FALSE;
    return TRUE;
}
Exemplo n.º 4
0
void viewWaba(char *wabName)
/* Show human readable waba alignment. */
{
struct lineFile *lf = lineFileOpen(wabName, TRUE);
int lineSize;
char *line;
char *qSym;
char *tSym;
char *hSym;
int symCount;
int wordCount, partCount;
char *words[16], *parts[4];
int qStart, qEnd, tStart, tEnd;
char strand;

while (lineFileNext(lf, &line, &lineSize))
    {
    printf("%s\n", line);
    wordCount = chopLine(line, words);
    if (wordCount != 10)
        errAbort("Funny info line %d of %s\n", lf->lineIx, lf->fileName);
    partCount = chopString(words[6], ":-", parts, ArraySize(parts));
    if (partCount != 3)
        errAbort("Bad query range line %d of %s\n", lf->lineIx, lf->fileName);
    qStart = atoi(parts[1]);
    qEnd = atoi(parts[2]);
    strand = words[7][0];
    partCount = chopString(words[8], ":-", parts, ArraySize(parts));
    if (partCount != 3)
        errAbort("Bad target range line %d of %s\n", lf->lineIx, lf->fileName);
    tStart = atoi(parts[1]);
    tEnd = atoi(parts[2]);

    if (!lineFileNext(lf, &line, &lineSize))
        errAbort("Unexpected EOF.");
    symCount = strlen(line);
    qSym = cloneString(line);
    if (!lineFileNext(lf, &line, &lineSize))
        errAbort("Unexpected EOF.");
    tSym = cloneString(line);
    if (!lineFileNext(lf, &line, &lineSize))
        errAbort("Unexpected EOF.");
    hSym = cloneString(line);
    if (strand == '+')
	xenShowAli(qSym, tSym, hSym, symCount, stdout, qStart, tStart, '+', '+', 60);
    else
	xenShowAli(qSym, tSym, hSym, symCount, stdout, qEnd, tStart, '-', '+', 60);
    freeMem(hSym);
    freeMem(tSym);
    freeMem(qSym);
    }
lineFileClose(&lf);
}
Exemplo n.º 5
0
void oneGenieFile(char *fileName, struct hash *uniq, FILE *f)
/* Process one genie peptide prediction file into known and alt tab files. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
int lineSize;
boolean firstTime = TRUE;
char *trans;
boolean skip = FALSE;

/* Do cursory sanity check. */
if (!lineFileNext(lf, &line, &lineSize))
    errAbort("%s is empty", fileName);
if (line[0] != '>')
    errAbort("%s is badly formatted, doesn't begin with '>'", fileName);
lineFileReuse(lf);

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '>')
        {
	/* End last line. */
	if (firstTime)
	    firstTime = FALSE;
	else
	    fputc('\n', f);
	trans = firstWordInLine(line+1);
	if (abbr != NULL && startsWith(abbr, trans))
	    trans += strlen(abbr);
	if (hashLookupUpperCase(uniq, trans) != NULL)
	    {
	    warn("Duplicate (case insensitive) '%s' line %d of %s. Ignoring all but first.", trans, lf->lineIx, lf->fileName);
	    skip = TRUE;
	    }
	else
	    {
	    char *upperCase;
	    upperCase = cloneString(trans);
	    touppers(upperCase);
	    hashAdd(uniq, upperCase, NULL);
	    freeMem(upperCase);
	    fprintf(f, "%s\t", trans);
	    skip = FALSE;
	    }
	}
    else if (!skip)
        {
	mustWrite(f, line, lineSize-1);
	}
    }
fputc('\n', f);
lineFileClose(&lf);
}
Exemplo n.º 6
0
void genericOne(char *fileName, struct hash *uniq, FILE *f)
/* Process one ensemble peptide prediction file into tab delimited
 * output f, using uniq hash to make sure no dupes. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
int lineSize;
boolean firstTime = TRUE;
char *trans, transBuf[128];

/* Do cursory sanity check. */
if (!lineFileNext(lf, &line, &lineSize))
    errAbort("%s is empty", fileName);
if (line[0] != '>')
    errAbort("%s is badly formatted, doesn't begin with '>'", fileName);
lineFileReuse(lf);

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '>')
        {
	char *upperCase;
	/* End last line. */
	if (firstTime)
	    firstTime = FALSE;
	else
	    fputc('\n', f);
	trans = firstWordInLine(line+1);
	if (abbr != NULL && startsWith(abbr, trans))
	    trans += strlen(abbr);
        if (suffix != NULL)
            {
            safef(transBuf, sizeof(transBuf), "%s%s", trans, suffix);
            trans = transBuf;
            }
	if (hashLookupUpperCase(uniq, trans) != NULL)
	    errAbort("Duplicate (case insensitive) '%s' line %d of %s", trans, lf->lineIx, lf->fileName);
	upperCase = cloneString(trans);
	touppers(upperCase);
	hashAdd(uniq, upperCase, NULL);
	freeMem(upperCase);
	fprintf(f, "%s\t", trans);
	}
    else
        {
	mustWrite(f, line, lineSize-1);
	}
    }
fputc('\n', f);
lineFileClose(&lf);
}
Exemplo n.º 7
0
void fastq_stats_read(char *filename, struct fastq_stats *fqs)
/* read from a file */
{
    struct lineFile *lf = lineFileOpen(filename, TRUE);
    char *line;
    char *words[FASTQ_MAX_LEN];
    int numWords = 0;
    int i, j;
    if (!lineFileNext(lf, &line, NULL))
	errAbort("bad file, total count section");
    fqs->total_count = sqlUnsignedLong(line);
    if (!lineFileNext(lf, &line, NULL))
	errAbort("bad file, skipped count section");
    fqs->all_b = sqlUnsignedLong(line);
    if (!lineFileNext(lf, &line, NULL))
	errAbort("bad file, skipped count section");
    fqs->skipped_count = sqlUnsignedLong(line);
    if (!lineFileNext(lf, &line, NULL))
	errAbort("bad file, skipped count section");
    fqs->kept_count = sqlUnsignedLong(line);
    if (!lineFileNext(lf, &line, NULL))
	errAbort("bad file, longest read section");
    fqs->longest_read = sqlUnsigned(line);
    numWords = lineFileChopTab(lf, words);
    if (numWords != fqs->longest_read)
	errAbort("bad file: original lengths section shoud have %u cols, has %d", fqs->longest_read, numWords);
    for (i = 0; i < numWords; i++)
	fqs->original_lengths[i] = sqlUnsignedLong(words[i]);
    numWords = lineFileChopTab(lf, words);
    if (numWords != fqs->longest_read)
	errAbort("bad file: trimmed lengths section shoud have %u cols, has %d", fqs->longest_read, numWords);
    for (i = 0; i < numWords; i++)
	fqs->trimmed_lengths[i] = sqlUnsignedLong(words[i]);
    for (i = 0; i <= FASTQ_SANGER_MAX_QUAL; i++)
    {
	numWords = lineFileChopTab(lf, words);
	if (numWords != fqs->longest_read)
	    errAbort("bad file:  before_quals section line %d shoud have %u cols, has %d", i+1, fqs->longest_read, numWords);
	for (j = 0; j < numWords; j++)
	    fqs->before_quals[j][i] = sqlUnsignedLong(words[j]);
    }
    for (i = 0; i <= FASTQ_SANGER_MAX_QUAL; i++)
    {
	numWords = lineFileChopTab(lf, words);
	if (numWords != fqs->longest_read)
	    errAbort("bad file:  after_quals section line %d shoud have %u cols, has %d", i+1, fqs->longest_read, numWords);
	for (j = 0; j < numWords; j++)
	    fqs->after_quals[j][i] = sqlUnsignedLong(words[j]);
    }
    lineFileClose(&lf);
}
Exemplo n.º 8
0
struct hash *raNextRecord(struct lineFile *lf)
/* Return a hash containing next record.   
 * Returns NULL at end of file.  freeHash this
 * when done.  Note this will free the hash
 * keys and values as well, so you'll have to
 * cloneMem them if you want them for later. */
{
struct hash *hash = NULL;
char *line, *key, *val;

/* Skip leading empty lines and comments. */
for (;;)
   {
   if (!lineFileNext(lf, &line, NULL))
       return NULL;
   line = skipLeadingSpaces(line);
   if (line[0] != 0 )
       {
       if (line[0] == '#')
           continue;
       else 
           break;
       }
   }
lineFileReuse(lf);
for (;;)
   {
   if (!lineFileNext(lf, &line, NULL))
       break;
   line = skipLeadingSpaces(line);
   if (line[0] == 0)
       break;
   if (line[0] == '#')
       {
       if (startsWith("#EOF", line))
           return NULL;
       else
	   continue;
       }
   if (hash == NULL)
       hash = newHash(7);
   key = nextWord(&line);
   val = trimSpaces(line);
   if (line == NULL)
       line = "";
   val = lmCloneString(hash->lm, val);
   hashAdd(hash, key, val);
   }
return hash;
}
Exemplo n.º 9
0
void headRest(char *countString, char *inName)
/* headRest - Return all *but* the first N lines of a file.. */
{
int count;
int i;
char *line;
struct lineFile *lf = lineFileOpen(inName, TRUE);
if (!isdigit(countString[0]))
    usage();
count = atoi(countString);
for (i=0; i<count; ++i)
     lineFileNext(lf, &line, NULL);
while (lineFileNext(lf, &line, NULL))
     puts(line);
}
Exemplo n.º 10
0
void oneEnsFile(char *ensFile, struct hash *uniq, struct hash *pToT, FILE *f)
/* Process one ensemble peptide prediction file into tab delimited
 * output f, using uniq hash to make sure no dupes. */
{
struct lineFile *lf = lineFileOpen(ensFile, TRUE);
char *line;
int lineSize;
boolean firstTime = TRUE;
char *translation;

/* Do cursory sanity check. */
if (!lineFileNext(lf, &line, &lineSize))
    errAbort("%s is empty", ensFile);
if (line[0] != '>')
    errAbort("%s is badly formatted, doesn't begin with '>'", ensFile);
lineFileReuse(lf);

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '>')
        {
	char *upperCase;
	char *transcript;
	/* End last line. */
	if (firstTime)
	    firstTime = FALSE;
	else
	    fputc('\n', f);
	translation = findEnsTrans(lf, line);
	if (hashLookupUpperCase(uniq, translation) != NULL)
	    errAbort("Duplicate (case insensitive) '%s' line %d of %s", translation, lf->lineIx, lf->fileName);
	upperCase = cloneString(translation);
	touppers(upperCase);
	hashAdd(uniq, upperCase, NULL);
	freeMem(upperCase);
	transcript = hashFindVal(pToT, translation);
	if (transcript == NULL)
	    errAbort("Can't find transcript for %s", translation);
	fprintf(f, "%s\t", transcript);
	}
    else
        {
	mustWrite(f, line, lineSize-1);
	}
    }
fputc('\n', f);
lineFileClose(&lf);
}
Exemplo n.º 11
0
struct raft *readRaftFile(char *fileName)
/* Read in a raft file. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct raft *raftList = NULL, *raft;
struct raftFrag *rf;
int lineSize, wordCount;
char *line, *words[16];

if (!lineFileNext(lf, &line, &lineSize))
	errAbort("%s is empty", fileName);
if (!startsWith("ooGreedy version", line))
	errAbort("%s isn't a raft file", fileName);

while (lineFileNext(lf, &line, &lineSize))
    {
    wordCount = chopLine(line, words);
    if (wordCount == 0)
	    {
	    raft = NULL;
	    continue;
	    }
    if (wordCount >= 9 && sameString(words[1], "raft"))
	    {
	    AllocVar(raft);
	    raft->name = cloneString(words[0]);
	    raft->baseCount = atoi(words[2]);
	    raft->fragCount = atoi(words[4]);
	    raft->defaultPos = atoi(words[6]);
	    slAddHead(&raftList, raft);
	    }
    else if (wordCount == 4 || wordCount == 6)
	    {
	    AllocVar(rf);
	    rf->name = cloneString(words[2]);
	    rf->pos = atoi(words[0]);
	    rf->strand = words[1][0];
	    rf->size = atoi(words[3]);
	    rf->raft = raft;
	    slAddTail(&raft->rfList, rf);
	    }
    else
	    errAbort("Bad line %d of %s", lf->lineIx, lf->fileName);
    }
lineFileClose(&lf);
slReverse(&raftList);
return raftList;
}
int addFa(FILE *f, char *ctgFaName)
/* Append contents of FA file. Return the number of bases written. */
{
struct lineFile *lf = lineFileOpen(ctgFaName, TRUE);
int lineSize;
char *line, c;
int recordCount = 0;
int baseCount = 0;
int i;

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '>')
        {
	++recordCount;
	if (recordCount > 1)
	   warn("More than one record in %s\n", ctgFaName);
	}
    else
        {
	for (i=0; i<lineSize; ++i)
	    {
	    c = line[i];
	    if (isalpha(c))
		{
	        charOut(f, c);
		baseCount++;
		}
	    }
	}
    }
lineFileClose(&lf);
return(baseCount);
}
void faSomeRecords(char *faIn, char *listName, char *faOut)
/* faSomeRecords - Extract multiple fa records. */
{
struct hash *hash = hashLines(listName);
char *line, *word;
struct lineFile *lf = lineFileOpen(faIn, TRUE);
FILE *f = mustOpen(faOut, "w");
boolean passMe = FALSE;

while (lineFileNext(lf, &line, NULL))
    {
    if (line[0] == '>')
	{
	char *d = cloneString(line);
	passMe = gExclude;
	line += 1;
	word = nextWord(&line);
	if (word != NULL)
	    {
	    if (hashLookup(hash, word))
		passMe = !gExclude;
            if (passMe)
                fprintf(f, "%s\n", d);
	    }
	freeMem(d);
	}
    else if (passMe)
	{
	fprintf(f, "%s\n", line);
	}
    }
}
boolean isValidKey(char *key)
/*
  Verify if a key matches our database key
*/
{
struct lineFile *file = NULL;
char *line = NULL;
int len = 0;

if (NULL == key || strlen(key) == 0)
    {
    return FALSE;
    }

file = lineFileOpen(keyFile, TRUE);
while (lineFileNext(file, &line, &len))
{
if (0 == strcmp(key, line))
    {
    return TRUE;
    }
}

return FALSE;
}
Exemplo n.º 15
0
struct mapPos *readInfoFile(char *mapName)
/* Read maps from file. */
{
struct lineFile *lf = lineFileOpen(mapName, TRUE);
int lineSize, wordCount;
char *line, *words[16];
struct mapPos *list = NULL, *el;

lineFileNeedNext(lf, &line, &lineSize);
while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '#')
        continue;
    wordCount = chopLine(line, words);
    lineFileExpectWords(lf, 3, wordCount);
    AllocVar(el);
    el->cloneName = cloneString(words[0]);
    el->pos = atoi(words[1]);
    el->phase = atoi(words[2]);
    slAddHead(&list, el);
    }
lineFileClose(&lf);
slReverse(&list);
return list;
}
void jkUniq(char *fileName)
/* Remove dupe lines from file. */
{
struct slName *lineList = NULL, *lineEl;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
int lineSize;
struct hash *hash = newHash(0);
FILE *f;

while (lineFileNext(lf, &line, &lineSize))
    {
    if (!hashLookup(hash, line))
	{
	hashAdd(hash, line, NULL);
	lineEl = newSlName(line);
	slAddHead(&lineList, lineEl);
	}
    }
slReverse(&lineList);
lineFileClose(&lf);
f = mustOpen(fileName, "w");
for (lineEl = lineList; lineEl != NULL; lineEl = lineEl->next)
    {
    fputs(lineEl->name, f);
    fputc('\n', f);
    }
fclose(f);
slFreeList(&lineList);
freeHash(&hash);
}
Exemplo n.º 17
0
void rt1dFind(char *tabFile, char *treeFile, char *chrom, bits32 start, bits32 end)
/* rt1dCreate - find items in 1-D range tree. */
{
struct lineFile *lf = lineFileOpen(tabFile, TRUE);
struct crTreeFile *crf = crTreeFileOpen(treeFile);
struct fileOffsetSize *block, *blockList = crTreeFindOverlappingBlocks(crf, chrom, start, end);
verbose(2, "Got %d overlapping blocks\n", slCount(blockList));
for (block = blockList; block != NULL; block = block->next)
    {
    verbose(2, "block->offset %llu, block->size %llu\n", block->offset, block->size);
    lineFileSeek(lf, block->offset, SEEK_SET);
    bits64 sizeUsed = 0;
    while (sizeUsed < block->size)
        {
	char *line;
	int size;
	if (!lineFileNext(lf, &line, &size))
	    errAbort("Couldn't read %s\n", lf->fileName);
	char *parsedLine = cloneString(line);
	char *row[3];
	if (chopLine(parsedLine, row) != ArraySize(row))
	    errAbort("Badly formatted line of %s\n%s", lf->fileName, line);
	char *bedChrom = row[0];
	bits32 bedStart = sqlUnsigned(row[1]);
	bits32 bedEnd = sqlUnsigned(row[2]);
	if (sameString(bedChrom, chrom) && rangeIntersection(bedStart, bedEnd, start, end) > 0)
	    fprintf(stdout, "%s\n", line);
	freeMem(parsedLine);
	sizeUsed += size;
	}
    }
crTreeFileClose(&crf);
}
Exemplo n.º 18
0
void readHugoMultiTable(char *fileName, struct hugoMulti **retList,
	struct hash **retIdHash, struct hash **retSymbolHash)
/* Read in file into list and hashes.  Make hash keyed on omim ID
 * and on OMIM symbol.  */
{
struct hash *idHash = newHash(0);
struct hash *symbolHash = newHash(0);
struct hugoMulti *list = NULL, *el;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[16];
char *line;
int lineSize, wordCount;
char *name;

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == 0 || line[0] == '#')
        continue;
    wordCount = chopTabs(line, words);
    lineFileExpectWords(lf, 12, wordCount);
    el = hugoMultiLoad(words);
    slAddHead(&list, el);
    name = el->omimId;
    if (name[0] != 0)
	hashAdd(idHash, name, el);
    name = el->symbol;
    if (name[0] != 0)
	hashAdd(symbolHash, name, el);
    }
lineFileClose(&lf);
slReverse(&list);
*retList = list;
*retIdHash = idHash;
*retSymbolHash = symbolHash;
}
Exemplo n.º 19
0
char *quotedPrintableDecode(char *input)
/* Use Quoted-Printable standard to decode a string.  Return decoded
 * string which will be freeMem'd.  */
{
size_t inplen = strlen(input);
char *result = (char *)needMem(inplen+1);
size_t j=0;
char *line = NULL;
int size = 0;
int i = 0;
boolean newLine = FALSE;

struct lineFile *lf = lineFileOnString("", TRUE, cloneString(input));

while (lineFileNext(lf, &line, &size))
    {
    newLine = quotedPCollapse(line);
    size = strlen(line); 
    for (i = 0; i < size; )
	result[j++] = line[i++];
    if (newLine)
	result[j++] = '\n';
    }

lineFileClose(&lf);  /* frees cloned string */

result[j] = 0;  /* terminate text string */
     
return result;
}
Exemplo n.º 20
0
static void h1n1DownloadPdb(char *item, char *pdbUrl, struct tempName *tmpPdb)
/* uncompress PDB to trash */
{
int inFd = netOpenHttpExt(pdbUrl, "GET", NULL);
int inFdRedir = 0;
char *pdbUrlRedir = NULL;
if (!netSkipHttpHeaderLinesHandlingRedirect(inFd, pdbUrl, &inFdRedir, &pdbUrlRedir))
    errAbort("Unable to access predicted 3D structure file: %s", pdbUrl);
if (pdbUrlRedir != NULL)
    {
    close(inFd);
    inFd = inFdRedir;
    freez(&pdbUrlRedir);
    }

trashDirFile(tmpPdb, "hgct", item, ".pdb");
FILE *outFh = mustOpen(tmpPdb->forCgi, "w");
struct lineFile *inLf = lineFileDecompressFd(pdbUrl, TRUE, inFd);
char *line;
while (lineFileNext(inLf, &line, NULL))
    {
    fputs(line, outFh);
    fputc('\n', outFh);
    }
lineFileClose(&inLf);
carefulClose(&outFh);
}
Exemplo n.º 21
0
void wordUse(char *fileName)
/* wordUse - Make dictionary of all words and count usages, report top ten. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line, *word;
struct hash *hash = newHash(18);
struct wordCount *wcList = NULL, *wc;
int i;
while (lineFileNext(lf, &line, NULL))
    {
    while ((word = nextWord(&line)) != NULL)
	{
	wc = hashFindVal(hash, word);
	if (wc == NULL)
	    {
	    AllocVar(wc);
	    hashAddSaveName(hash, word, wc, &wc->word);
	    slAddHead(&wcList, wc);
	    }
	wc->count += 1;
	}
    }
slSort(&wcList, wordCountCmpCount);
for (i=0, wc=wcList; i<10 && wc != NULL; wc = wc->next, ++i)
    printf("%s\t%d\n", wc->word, wc->count);
}
struct bed *loadBedFileWithHeader(char *fileName)
/* Read in a bed file into a bed list, dealing with header for custom track if necessary. */
{
struct bed *bedList = NULL, *bed = NULL;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[12];
int lineSize;
char *line;
/* Skip the headers. */
while(lineFileNext(lf, &line, &lineSize)) 
    {
    if(countChars(line, '\t') > 10) 
	{
	lineFileReuse(lf);
	break;
	}
    }
/* Load in the records. */
while(lineFileRow(lf, row)) 
    {
    bed = bedLoad12(row);
    slAddHead(&bedList, bed);
    }
lineFileClose(&lf);
slReverse(&bedList);
return bedList;
}
Exemplo n.º 23
0
struct raRecord *raNext(struct lineFile *lf, struct hash *stringHash)
/* Return next ra record.  Returns NULL at end of file.  */
{
char *line, *key, *val;
struct raRecord *ra = NULL;
struct nameVal *nv;

for (;;)
   {
   if (!lineFileNext(lf, &line, NULL))
       break;
   line = skipLeadingSpaces(line);
   if (line[0] == '#')
       continue;
   if (line[0] == 0)
       break;
   if (ra == NULL)
       AllocVar(ra);
   key = nextWord(&line);
   AllocVar(nv);
   nv->name = hashStoreName(stringHash, key);
   val = skipLeadingSpaces(line);
   if (val == NULL)
       errAbort("Expecting line/value pair line %d of %s"
       	, lf->lineIx, lf->fileName);
   nv->val = hashStoreName(stringHash, val);
   slAddHead(&ra->fieldList, nv);
   }
if (ra != NULL)
    slReverse(&ra->fieldList);
return ra;
}
void getOffset(char *directoryName, char *chromName, char *outputFileName)
{
FILE *outputFileHandle = mustOpen(outputFileName, "w");
struct lineFile *lf = NULL;
char *line;
off_t offset;
char *row[9], *rsId[2];
char inputFileName[64];

safef(inputFileName, sizeof(inputFileName), "%s/%s.fa", directoryName, chromName);
lf = lineFileOpen(inputFileName, TRUE);
while (lineFileNext(lf, &line, NULL))
    {
    if (line[0] == '>')
        {
	chopString(line, "|", row, ArraySize(row));
        chopString(row[2], " ", rsId, ArraySize(rsId));
	offset = lineFileTell(lf);
	fprintf(outputFileHandle, "%s\t%s\t%ld\n", rsId[0], chromName, offset);
	}
    }

carefulClose(&outputFileHandle);
lineFileClose(&lf);
}
void fixGdup(char *inName, char *outName)
/* fixGdup - Reformat genomic dups table a little.. */
{
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = mustOpen(outName, "w");
int wordCount, lineSize;
char *words[32], *line;
int i;


while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '#')
        continue;
    wordCount = chopTabs(line, words);
    if (wordCount == 0)
        continue;
    lineFileExpectWords(lf, 15, wordCount);
    for (i=0; i<3; ++i)
        fprintf(f, "%s\t", words[i]);
    fprintf(f, "%s:%s\t", words[6], words[7]);
    for (i=4; i<9; ++i)
        fprintf(f, "%s\t", words[i]);
    for (i=10; i<wordCount; ++i)
	{
        fprintf(f, "%s", words[i]);
	if (i == wordCount-1)
	    fprintf(f, "\n");
	else
	    fprintf(f, "\t");
	}
    }
}
Exemplo n.º 26
0
void addCtgFile(char *liftFileName, struct ctgPos **pCtgList)
/* Create ctgPos's out of liftSpecs in liftFile. */
{
struct lineFile *lf = lineFileOpen(liftFileName, TRUE);
int lineSize, wordCount;
char *line, *words[16];
struct liftSpec lift;
struct ctgPos *ctg;

printf("Processing %s\n", liftFileName);
while (lineFileNext(lf, &line, &lineSize))
    {
    wordCount = chopLine(line, words);
    if (wordCount == 0)
        continue;
    if (wordCount != 5)
        errAbort("Expecting 5 words line %d of %s", lf->lineIx, lf->fileName);
    liftSpecStaticLoad(words, &lift);
    AllocVar(ctg);
    ctg->contig = cloneString(skipPastSlash(lift.oldName));
    ctg->size = lift.oldSize;
    ctg->chrom = cloneString(lift.newName);
    ctg->chromStart = lift.offset;
    ctg->chromEnd = lift.offset + lift.oldSize;
    slAddHead(pCtgList, ctg);
    }
lineFileClose(&lf);
}
void doCleanSeq(char *inputFileName, char *outputFileName)
{
FILE *outputFileHandle = NULL;
struct lineFile *lf;
char *line;
char *row[9], *rsId[2];
struct hashEl *hel = NULL;
boolean skipping = FALSE;

outputFileHandle = mustOpen(outputFileName, "w");
lf = lineFileOpen(inputFileName, TRUE);

while (lineFileNext(lf, &line, NULL))
    {
    if (line[0] == '>')
        {
	skipping = FALSE;
        chopString(line, "|", row, ArraySize(row));
        chopString(row[2], " ", rsId, ArraySize(rsId));
        hel = hashLookup(snpHash, rsId[0]);
	if (hel)
	    skipping = TRUE;
	else
	    {
	    hashAdd(snpHash, cloneString(rsId[0]), NULL);
            fprintf(outputFileHandle, ">%s\n", rsId[0]);
	    }
	}
    else if (!skipping)
        fprintf(outputFileHandle, "%s\n", line);
    }
carefulClose(&outputFileHandle);
lineFileClose(&lf);
}
void netLineFilter(struct lineFile *lf, FILE *f)
/* Do filter one line at a time. */
{
struct hash *nameHash = newHash(0);
char *line, *l;
int d;

while (lineFileNext(lf, &line, NULL))
    {
    d = countLeadingChars(line, ' ');
    l = line + d;
    if (startsWith("fill", l) || startsWith("gap", l))
        {
	struct cnFill *fill = cnFillFromLine(nameHash, lf, l);
	if (filterOne(fill))
	    cnFillWrite(fill, f, d);
	cnFillFree(&fill);
	}
    else
        {
	fprintf(f, "%s\n", line);
	}
    }

hashFree(&nameHash);
}
Exemplo n.º 29
0
struct dyString *readAndReplaceTableName(char *fileName, char *table)
/* Read file into string.  While doing so strip any leading comments
 * and insist that the first non-comment line contain the words
 * "create table" followed by a table name.  Replace the table name,
 * and copy the rest of the file verbatem. */
{
    struct lineFile *lf = lineFileOpen(fileName, TRUE);
    struct dyString *dy = dyStringNew(0);
    char *line, *word;
    if (!lineFileNextReal(lf, &line))
        errAbort("No real lines in %s\n", fileName);
    word = nextWord(&line);
    if (!sameWord(word, "create"))
        errAbort("Expecting first word in file to be CREATE. Got %s", word);
    word = nextWord(&line);
    if (word == NULL || !sameWord(word, "table"))
        errAbort("Expecting second word in file to be table. Got %s", emptyForNull(word));
    word = nextWord(&line);
    if (word == NULL)
        errAbort("Expecting table name on same line as CREATE TABLE");
    sqlDyStringPrintf(dy, "CREATE TABLE %s ", table);
    if (line != NULL)
        dyStringAppend(dy, line);
    dyStringAppendC(dy, '\n');
    while (lineFileNext(lf, &line, NULL))
    {
        dyStringAppend(dy, line);
        dyStringAppendC(dy, '\n');
    }
    lineFileClose(&lf);
    return dy;
}
void wigSetSpanOrStep(char *input, char *output)
/* wigSetSpanOrStep - Set span or step variables in an ascii format wiggle file. */
{
struct lineFile *lf = lineFileOpen(input, TRUE);
FILE *f = mustOpen(output, "w");
char *line;
int lineSize;
char *fixedPat = "fixedStep";
int fixedPatSize = strlen(fixedPat);
char *varPat = "variableStep";
int varPatSize = strlen(varPat);
while (lineFileNext(lf, &line, &lineSize))
    {
    if (startsWithWord(fixedPat, line))
	handleStepLine(lf, line, fixedPatSize, FALSE, f);
    else if (startsWithWord(varPat, line))
	handleStepLine(lf, line, varPatSize, TRUE, f);
    else
	{
        mustWrite(f, line, lineSize-1);
	fputc('\n', f);
	}
    }
carefulClose(&f);
}