Esempio n. 1
0
struct cnFill *cnFillRead(struct chainNet *net, struct lineFile *lf)
/* Recursively read in list and children from file. */
{
char *line;
int d, depth = 0;
struct cnFill *fillList = NULL;
struct cnFill *fill = NULL;

for (;;)
    {
    if (!lineFileNextReal(lf, &line))
	break;
    d = countLeadingChars(line, ' ');
    if (fill == NULL)
        depth = d;
    if (d < depth)
	{
	lineFileReuse(lf);
	break;
	}
    if (d > depth)
        {
	lineFileReuse(lf);
	fill->children = cnFillRead(net, lf);
	}
    else
        {
	fill = cnFillFromLine(net->nameHash, lf, line);
	slAddHead(&fillList, fill);
	}
    }
slReverse(&fillList);
return fillList;
}
Esempio n. 2
0
File: ra.c Progetto: bh0085/kent
boolean raSkipLeadingEmptyLines(struct lineFile *lf, struct dyString *dy)
/* Skip leading empty lines and comments.  Returns FALSE at end of file.
 * Together with raNextTagVal you can construct your own raNextRecord....
 * If dy parameter is non-null, then the text parsed gets placed into dy. */
{
char *line;
/* Skip leading empty lines and comments. */
if (dy)
    dyStringClear(dy);
for (;;)
    {
    if (!lineFileNext(lf, &line, NULL))
       return FALSE;
    char *tag = skipLeadingSpaces(line);
    if (tag[0] == 0 || tag[0] == '#')
       {
       if (dy)
	   {
	   dyStringAppend(dy, line);
	   dyStringAppendC(dy, '\n');
	   }
	}
    else
       break;
    }
lineFileReuse(lf);
return TRUE;
}
Esempio n. 3
0
File: ra.c Progetto: bh0085/kent
struct slPair *raNextStanzaLinesAndUntouched(struct lineFile *lf)
// Return list of lines starting from current position, up through last line of next stanza.
// May return a few blank/comment lines at end with no real stanza.
// Will join continuation lines, allocating memory as needed.
// returns pairs with name=joined line and if joined,
// val will contain raw lines '\'s and linefeeds, else val will be NULL.
{
struct slPair *pairs = NULL;
boolean stanzaStarted = FALSE;
char *line, *raw;
int lineLen,rawLen;
while (lineFileNextFull(lf, &line, &lineLen, &raw, &rawLen)) // Joins continuation lines
    {
    char *clippedText = skipLeadingSpaces(line);

    if (stanzaStarted && clippedText[0] == 0)
        {
        lineFileReuse(lf);
        break;
        }
    if (!stanzaStarted && clippedText[0] != 0 && clippedText[0] != '#')
        stanzaStarted = TRUE; // Comments don't start stanzas and may be followed by blanks

    slPairAdd(&pairs, line,(raw != NULL?cloneString(raw):NULL));
    }
slReverse(&pairs);
return pairs;
}
struct bed *loadBedFileWithHeader(char *fileName)
/* Read in a bed file into a bed list, dealing with header for custom track if necessary. */
{
struct bed *bedList = NULL, *bed = NULL;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[12];
int lineSize;
char *line;
/* Skip the headers. */
while(lineFileNext(lf, &line, &lineSize)) 
    {
    if(countChars(line, '\t') > 10) 
	{
	lineFileReuse(lf);
	break;
	}
    }
/* Load in the records. */
while(lineFileRow(lf, row)) 
    {
    bed = bedLoad12(row);
    slAddHead(&bedList, bed);
    }
lineFileClose(&lf);
slReverse(&bedList);
return bedList;
}
Esempio n. 5
0
static void parseColumnHeaderRow(struct vcfFile *vcff, char *line)
/* Make sure column names are as we expect, and store genotype sample IDs if any are given. */
{
if (line[0] != '#')
    {
    vcfFileErr(vcff, "Expected to find # followed by column names (\"#CHROM POS ...\"), "
	       "not \"%s\"", line);
    lineFileReuse(vcff->lf);
    return;
    }
char *words[VCF_MAX_COLUMNS];
int wordCount = chopLine(line+1, words);
if (wordCount >= VCF_MAX_COLUMNS)
    vcfFileErr(vcff, "header contains at least %d columns; "
	       "VCF_MAX_COLUMNS may need to be increased in vcf.c!", VCF_MAX_COLUMNS);
expectColumnName(vcff, "CHROM", words, 0);
expectColumnName(vcff, "POS", words, 1);
expectColumnName(vcff, "ID", words, 2);
expectColumnName(vcff, "REF", words, 3);
expectColumnName(vcff, "ALT", words, 4);
expectColumnName2(vcff, "QUAL", "PROB", words, 5);
expectColumnName(vcff, "FILTER", words, 6);
expectColumnName(vcff, "INFO", words, 7);
if (wordCount > 8)
    {
    expectColumnName(vcff, "FORMAT", words, 8);
    if (wordCount < 10)
	vcfFileErr(vcff, "FORMAT column is given, but no sample IDs for genotype columns...?");
    vcff->genotypeCount = (wordCount - 9);
    vcff->genotypeIds = vcfFileAlloc(vcff, vcff->genotypeCount * sizeof(char *));
    int i;
    for (i = 9;  i < wordCount;  i++)
	vcff->genotypeIds[i-9] = vcfFileCloneStr(vcff, words[i]);
    }
}
Esempio n. 6
0
static boolean nextBlockLine(struct blastFile *bf, struct blastQuery *bq, char **retLine)
/* Get next block line.  Return FALSE and reuse line if it's
 * an end of block type line. */
{
struct lineFile *lf = bf->lf;
char *line;

*retLine = line = bfNextLine(bf);
if (line == NULL)
    return FALSE;
if (isRoundLine(line))
    parseRoundLine(line, bq);

/*
the last condition was added to deal with the new blast output format and is meant to find lines such as this one:
TBLASTN 2.2.15 [Oct-15-2006]
I am hoping that by looking for only "BLAST" this will work with things like blastp, blastn, psi-blast, etc
*/
if (line[0] == '>' || startsWith("Query=", line) || startsWith("  Database:", line) || (stringIn("BLAST", line) != NULL))
    {
    lineFileReuse(lf);
    return FALSE;
    }
return TRUE;
}
int findBedSize(char *fileName, struct lineFile **retLf)
/* Read first line of file and figure out how many words in it. */
/* Input file could be stdin, in which case we really don't want to open,
 * read, and close it here.  So if retLf is non-NULL, return the open 
 * linefile (having told it to reuse the line we just read). */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[64], *line;
int wordCount;

if (!lineFileNextReal(lf, &line))
    if (ignoreEmpty)
        return(0);
line = cloneString(line);
if (strictTab)
    wordCount = chopTabs(line, words);
else
    wordCount = chopLine(line, words);
if (wordCount == 0)
    errAbort("%s appears to be empty", fileName);
if (retLf != NULL)
    {
    lineFileReuse(lf);
    *retLf = lf;
    }
else
    lineFileClose(&lf);
freeMem(line);
return wordCount;
}
boolean faSizeNext(struct lineFile *lf, char retLine[512], int *retSize)
/* Get > line and size for next fa record.  Return FALSE at end of file. */
{
char *line;
int size = 0;

/* Fetch first record . */
for (;;)
    {
    if (!lineFileNext(lf, &line, NULL))
        return FALSE;
    if (line[0] == '>')
        break;
    }
strncpy(retLine, line, 512);

/* Loop around counting DNA-looking characters until next record. */
for (;;)
    {
    if (!lineFileNext(lf, &line, NULL))
        break;
    if (line[0] == '>')
	{
        lineFileReuse(lf);
	break;
	}
    size += alphaCount(line);
    }
*retSize = size;
return TRUE;
}
Esempio n. 9
0
void checkInputOpenFiles(struct inInfo *array, int count)
/* Make sure all of the input is there and of right format before going forward. Since
 * this is going to take a while we want to fail fast. */
{
int i;
for (i=0; i<count; ++i)
    {
    struct inInfo *in = &array[i];
    switch (in->type)
        {
	case itBigWig:
	    {
	    /* Just open and close, it will abort if any problem. */
	    in->bbi = bigWigFileOpen(in->fileName);
	    break;
	    }
	case itPromoterBed:
	case itUnstrandedBed:
	case itBlockedBed:
	    {
	    struct lineFile *lf = in->lf = lineFileOpen(in->fileName, TRUE);
	    char *line;
	    lineFileNeedNext(lf, &line, NULL);
	    char *dupe = cloneString(line);
	    char *row[256];
	    int wordCount = chopLine(dupe, row);
	    struct bed *bed = NULL;
	    switch (in->type)
	        {
		case itPromoterBed:
		    lineFileExpectAtLeast(lf, 6, wordCount);
		    bed = bedLoadN(row, 6);
		    char strand = bed->strand[0];
		    if (strand != '+' && strand != '-')
		        errAbort("%s must be stranded, got %s in that field", lf->fileName, row[6]);
		    break;
		case itUnstrandedBed:
		    lineFileExpectAtLeast(lf, 4, wordCount);
		    bed = bedLoadN(row, 4);
		    break;
		case itBlockedBed:
		    lineFileExpectAtLeast(lf, 4, wordCount);
		    bed = bedLoadN(row, 12);
		    break;
		default:
		    internalErr();
		    break;
		}
	    bedFree(&bed);
	    freez(&dupe);
	    lineFileReuse(lf);
	    break;
	    }
	default:
	    internalErr();
	    break;
	}
    }
}
Esempio n. 10
0
static void reuseRow(struct annoStreamTab *self)
// When a row falls after the region, undo the damage of lineFileChopNext,
// tell lf to reuse the line, and set EOF - we are all done until & unless the region changes.
{
unChop(self->asWords, self->streamer.numCols);
lineFileReuse(self->lf);
self->eof = TRUE;
}
Esempio n. 11
0
File: ra.c Progetto: bh0085/kent
boolean raNextTagVal(struct lineFile *lf, char **retTag, char **retVal, struct dyString *dyRecord)
// Read next line.  Return FALSE at end of file or blank line.  Otherwise fill in
// *retTag and *retVal and return TRUE.  If dy parameter is non-null, then the text parsed
// gets appended to dy. Continuation lines in RA file will be joined to produce tag and val,
// but dy will be filled with the unedited multiple lines containing the continuation chars.
// NOTE: retTag & retVal, if returned, point to static mem which will be overwritten on next call!
{
*retTag = NULL;
*retVal = NULL;

char *line, *raw = NULL;
int lineLen,rawLen;

// Don't bother with raw if it isn't used.
char **pRaw    = NULL;
int   *pRawLen = NULL;
if (dyRecord != NULL)
    {
    pRaw    = &raw;
    pRawLen = &rawLen;
    }

while (lineFileNextFull(lf, &line, &lineLen, pRaw, pRawLen)) // Joins continuation lines
    {
    char *clippedText = skipLeadingSpaces(line);
    if (*clippedText == 0)
        {
        if (dyRecord)
            lineFileReuse(lf);   // Just so don't loose leading space in dy.
        return FALSE;
        }

    // Append whatever line was read from file.
    if (dyRecord)
       {
        if (raw != NULL)
            dyStringAppendN(dyRecord, raw, rawLen);
       else
            dyStringAppendN(dyRecord, line, lineLen);
       dyStringAppendC(dyRecord,'\n');
       }

    // Skip comments
    if (*clippedText == '#')
       {
       if (startsWith("#EOF", clippedText))
           return FALSE;
       else
           continue;
       }
    *retTag = nextWord(&line);
    *retVal = trimSpaces(line);
    return TRUE;
    }
return FALSE;
}
struct wigSection *wigSectionRead(struct lineFile *lf)
/* Parse out next section of wig. */
{
    static double *vals = NULL;
    static int valAlloc = 0;

    /* Get "fixedStep" line and parse it. */
    char *line;
    if (!lineFileNextReal(lf, &line))
        return NULL;
    char *pattern = "fixedStep ";
    int patSize = 10;
    if (!startsWith(pattern, line))
        errAbort("Expecting fixedStep line %d of %s", lf->lineIx, lf->fileName);
    line += patSize;
    struct hash *varHash = hashVarLine(line, lf->lineIx);
    int step = sqlUnsigned(requiredVal(lf, varHash, "step"));
    int start = sqlUnsigned(requiredVal(lf, varHash, "start"));
    char *chrom = cloneString(requiredVal(lf, varHash, "chrom"));
    hashFree(&varHash);

    /* Parse out numbers until next fixedStep. */
    int valCount = 0;
    int i;
    for (;;)
    {
        if (!lineFileNextReal(lf, &line))
            break;
        if (startsWith(pattern, line))
        {
            lineFileReuse(lf);
            break;
        }
        for (i=0; i<step; ++i)
        {
            if (valCount >= valAlloc)
            {
                int newAlloc = valAlloc + 1024;
                ExpandArray(vals, valAlloc, newAlloc);
                valAlloc = newAlloc;
            }
            vals[valCount] = lineFileNeedDouble(lf, &line, 0);
            ++valCount;
        }
    }

    /* Create wigSection. */
    struct wigSection *section;
    AllocVar(section);
    section->chrom = chrom;
    section->chromStart = start;
    section->chromEnd = start + valCount;
    section->vals = CloneArray(vals, valCount);
    return section;
}
Esempio n. 13
0
boolean read_fastq_auto(struct fastq_auto *fq, struct lineFile *lf, boolean just_seq_qual)
/* fill in fastq struct from open lineFile.  return FALSE if at EOF */
/* set just_seq_qual=TRUE to skip loading everything except the sequence */
/* and quality information. */
{
    char *line;
    int len = 0;
    boolean neof = lineFileNext(lf, &line, &len);
    if (neof)
    {
	int i;
	int qual_size;
        /* should be header */
	if ((len <= 0) || (line[0] != '@'))
	    errAbort("Expecting header. Problem on line %d\n", lf->lineIx);
	if (!just_seq_qual)
	{
	    char *words[7];
	    int numWords;
	    numWords = chopByChar(line, ':', words, 6);
	    strcpy(fq->machine, words[0] + 1);
	    fq->flow_cell = sqlSigned(words[1]);
	    fq->tile = sqlSigned(words[2]);
	    fq->tile_x = sqlSigned(words[3]);
	    words[5] = chopPrefixAt(words[4], '#');
	    words[6] = chopPrefixAt(words[5], '/');
	    fq->tile_y = sqlSigned(words[4]);
	    fq->multiplex_index = sqlSigned(words[5]);
	    fq->pair_num = sqlSigned(words[6]);
	}
	/* read the sequence */
	fq->seq[0] = '\0';
	while ((neof = lineFileNext(lf, &line, &len)) && (len > 0) && (line[0] != '+'))
	    strcat(fq->seq, line);
	if (!neof)
	    errAbort("incomplete fastq file.  early EOF");
	fq->seq_size = strlen(fq->seq);
        /* at the point of the quality header.  who cares, read the quality */
	fq->qual[0] = '\0';
	while ((neof = lineFileNext(lf, &line, &len)) && (len > 0) && (line[0] != '@'))
	    strcat(fq->qual, line);
	if ((len > 0) && (line[0] == '@'))
	    lineFileReuse(lf);
	qual_size = strlen(fq->qual);
	if (qual_size != fq->seq_size)
	    errAbort("something wrong line %d.  sequence size (%d) should match quality size (%d)\n", 
		     lf->lineIx, fq->seq_size, qual_size);
	/* convert Illumina 1.3+ quals to Sanger */
	for (i = 0; i < qual_size; i++)
	    fq->qual[i] -= 64;
    }
    else 
	return FALSE;
    return TRUE;
}
Esempio n. 14
0
static boolean findBlockSeqPair(struct blastFile *bf, struct blastQuery *bq)
/* scan forward for the next pair of Query:/Sbjct: sequences */
{
char *line;
for (;;)
    {
    if (!nextBlockLine(bf, bq, &line))
        return FALSE;
    if (startsWith(" Score", line))
        {
        lineFileReuse(bf->lf);
        return FALSE;
        }
    if (startsWith("Query:", line))
        {
        lineFileReuse(bf->lf);
        return TRUE;
        }
    }
}
Esempio n. 15
0
void oneGenieFile(char *fileName, struct hash *uniq, FILE *f)
/* Process one genie peptide prediction file into known and alt tab files. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
int lineSize;
boolean firstTime = TRUE;
char *trans;
boolean skip = FALSE;

/* Do cursory sanity check. */
if (!lineFileNext(lf, &line, &lineSize))
    errAbort("%s is empty", fileName);
if (line[0] != '>')
    errAbort("%s is badly formatted, doesn't begin with '>'", fileName);
lineFileReuse(lf);

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '>')
        {
	/* End last line. */
	if (firstTime)
	    firstTime = FALSE;
	else
	    fputc('\n', f);
	trans = firstWordInLine(line+1);
	if (abbr != NULL && startsWith(abbr, trans))
	    trans += strlen(abbr);
	if (hashLookupUpperCase(uniq, trans) != NULL)
	    {
	    warn("Duplicate (case insensitive) '%s' line %d of %s. Ignoring all but first.", trans, lf->lineIx, lf->fileName);
	    skip = TRUE;
	    }
	else
	    {
	    char *upperCase;
	    upperCase = cloneString(trans);
	    touppers(upperCase);
	    hashAdd(uniq, upperCase, NULL);
	    freeMem(upperCase);
	    fprintf(f, "%s\t", trans);
	    skip = FALSE;
	    }
	}
    else if (!skip)
        {
	mustWrite(f, line, lineSize-1);
	}
    }
fputc('\n', f);
lineFileClose(&lf);
}
Esempio n. 16
0
void genericOne(char *fileName, struct hash *uniq, FILE *f)
/* Process one ensemble peptide prediction file into tab delimited
 * output f, using uniq hash to make sure no dupes. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
int lineSize;
boolean firstTime = TRUE;
char *trans, transBuf[128];

/* Do cursory sanity check. */
if (!lineFileNext(lf, &line, &lineSize))
    errAbort("%s is empty", fileName);
if (line[0] != '>')
    errAbort("%s is badly formatted, doesn't begin with '>'", fileName);
lineFileReuse(lf);

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '>')
        {
	char *upperCase;
	/* End last line. */
	if (firstTime)
	    firstTime = FALSE;
	else
	    fputc('\n', f);
	trans = firstWordInLine(line+1);
	if (abbr != NULL && startsWith(abbr, trans))
	    trans += strlen(abbr);
        if (suffix != NULL)
            {
            safef(transBuf, sizeof(transBuf), "%s%s", trans, suffix);
            trans = transBuf;
            }
	if (hashLookupUpperCase(uniq, trans) != NULL)
	    errAbort("Duplicate (case insensitive) '%s' line %d of %s", trans, lf->lineIx, lf->fileName);
	upperCase = cloneString(trans);
	touppers(upperCase);
	hashAdd(uniq, upperCase, NULL);
	freeMem(upperCase);
	fprintf(f, "%s\t", trans);
	}
    else
        {
	mustWrite(f, line, lineSize-1);
	}
    }
fputc('\n', f);
lineFileClose(&lf);
}
Esempio n. 17
0
struct hash *raNextRecord(struct lineFile *lf)
/* Return a hash containing next record.   
 * Returns NULL at end of file.  freeHash this
 * when done.  Note this will free the hash
 * keys and values as well, so you'll have to
 * cloneMem them if you want them for later. */
{
struct hash *hash = NULL;
char *line, *key, *val;

/* Skip leading empty lines and comments. */
for (;;)
   {
   if (!lineFileNext(lf, &line, NULL))
       return NULL;
   line = skipLeadingSpaces(line);
   if (line[0] != 0 )
       {
       if (line[0] == '#')
           continue;
       else 
           break;
       }
   }
lineFileReuse(lf);
for (;;)
   {
   if (!lineFileNext(lf, &line, NULL))
       break;
   line = skipLeadingSpaces(line);
   if (line[0] == 0)
       break;
   if (line[0] == '#')
       {
       if (startsWith("#EOF", line))
           return NULL;
       else
	   continue;
       }
   if (hash == NULL)
       hash = newHash(7);
   key = nextWord(&line);
   val = trimSpaces(line);
   if (line == NULL)
       line = "";
   val = lmCloneString(hash->lm, val);
   hashAdd(hash, key, val);
   }
return hash;
}
Esempio n. 18
0
boolean emblLineGroup(struct lineFile *lf, char type[16], struct dyString *val)
/* Read next line of embl file.  Read line after that too if it
 * starts with the same type field. Return FALSE at EOF. */
{
char *line, *word;
int typeLen = 0;

dyStringClear(val);
while (lineFileNext(lf, &line, NULL))
    {
    line = skipLeadingSpaces(line);

    /* Parse out first word into type. */
    if (isspace(line[0]))
        errAbort("embl line that doesn't start with type line %d of %s", 
		lf->lineIx, lf->fileName);
    if (typeLen == 0)
        {
	word = nextWord(&line);
	typeLen = strlen(word);
	if (typeLen >= 16)
	    errAbort("Type word at start of line too long for embl file line %d of %s",
	    	lf->lineIx, lf->fileName);
	strcpy(type, word);
	}
    else if (!startsWith(type, line) || !isspace(line[typeLen]))
        {
	lineFileReuse(lf);
	break;
	}
    else
        {
	dyStringAppendC(val, '\n');
	word = nextWord(&line);
	}

    if (line != NULL)
	{
	/* Usually have two spaces after type. */
	if (isspace(line[0]))
	   ++line;
	if (isspace(line[0]))
	   ++line;

	/* Append what's rest of line to return value. */
	dyStringAppend(val, line);
	}
    }
return typeLen > 0;
}
Esempio n. 19
0
static boolean bfSkipBlankLines(struct blastFile *bf)
/* skip blank lines, return FALSE on EOF */
{
char *line = NULL;
while ((line = bfNextLine(bf)) != NULL)
    {
    if (skipLeadingSpaces(line)[0] != '\0')
        {
	lineFileReuse(bf->lf);
        return TRUE;
        }
    }
return FALSE; /* EOF */
}
Esempio n. 20
0
struct blastQuery *blastFileNextQuery(struct blastFile *bf)
/* Read all alignments associated with next query.  Return NULL at EOF. */
{
char *line;
struct blastQuery *bq;
struct blastGappedAli *bga;
AllocVar(bq);

verbose(TRACE_LEVEL, "blastFileNextQuery\n");

/* find and parse Query= */
line = bfSearchForLine(bf, "Query=");
if (line == NULL)
    return NULL;
parseQueryLines(bf, line, bq);

/* find and parse Database: */
line = bfSearchForLine(bf, "Database:");
if (line == NULL)
    bfUnexpectedEof(bf);
parseDatabaseLines(bf, line, bq);

/* Seek to beginning of first gapped alignment. */
for (;;)
    {
    line = bfNeedNextLine(bf);
    if (line[0] == '>')
	{
	lineFileReuse(bf->lf);
	break;
	}
    else if (isRoundLine(line))
        parseRoundLine(line, bq);
    else if (stringIn("No hits found", line) != NULL)
        break;
    }

/* Read in gapped alignments. */
while ((bga = blastFileNextGapped(bf, bq)) != NULL)
    {
    slAddHead(&bq->gapped, bga);
    }
slReverse(&bq->gapped);
if (verboseLevel() >= DUMP_LEVEL)
    {
    verbose(DUMP_LEVEL, "blastFileNextQuery result:\n");
    blastQueryPrint(bq, stderr);
    }
return bq;
}
Esempio n. 21
0
boolean seekLocus(struct lineFile *lf)
/* search for locus line */
{
char *line;
while (TRUE)
    {
    if (!lineFileNext(lf, &line, NULL))
        return FALSE;
    if (startsWith("LOCUS", line))
        {
        lineFileReuse(lf);
        return TRUE;
        }
    }
}
Esempio n. 22
0
void lineFileRemoveInitialCustomTrackLines(struct lineFile *lf)
/* remove initial browser and track lines */
{
char *line;
while (lineFileNextReal(lf, &line))
    {
    if (!(startsWith("browser", line) || startsWith("track", line) ))
        {
        verbose(2, "found line not browser or track: %s\n", line);
        lineFileReuse(lf);
        break;
        }
    verbose(2, "skipping %s\n", line);
    }
}
Esempio n. 23
0
void oneEnsFile(char *ensFile, struct hash *uniq, struct hash *pToT, FILE *f)
/* Process one ensemble peptide prediction file into tab delimited
 * output f, using uniq hash to make sure no dupes. */
{
struct lineFile *lf = lineFileOpen(ensFile, TRUE);
char *line;
int lineSize;
boolean firstTime = TRUE;
char *translation;

/* Do cursory sanity check. */
if (!lineFileNext(lf, &line, &lineSize))
    errAbort("%s is empty", ensFile);
if (line[0] != '>')
    errAbort("%s is badly formatted, doesn't begin with '>'", ensFile);
lineFileReuse(lf);

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '>')
        {
	char *upperCase;
	char *transcript;
	/* End last line. */
	if (firstTime)
	    firstTime = FALSE;
	else
	    fputc('\n', f);
	translation = findEnsTrans(lf, line);
	if (hashLookupUpperCase(uniq, translation) != NULL)
	    errAbort("Duplicate (case insensitive) '%s' line %d of %s", translation, lf->lineIx, lf->fileName);
	upperCase = cloneString(translation);
	touppers(upperCase);
	hashAdd(uniq, upperCase, NULL);
	freeMem(upperCase);
	transcript = hashFindVal(pToT, translation);
	if (transcript == NULL)
	    errAbort("Can't find transcript for %s", translation);
	fprintf(f, "%s\t", transcript);
	}
    else
        {
	mustWrite(f, line, lineSize-1);
	}
    }
fputc('\n', f);
lineFileClose(&lf);
}
Esempio n. 24
0
void makeMotifs(char *inFile, struct hash *tfHash, char *outFile)
/* Parse input motifs and save them to outFile in dnaMotif format. */
{
struct lineFile *lf = lineFileOpen(inFile, TRUE);
FILE *f = mustOpen(outFile, "w");
struct hashEl *hel;

for (;;)
    {
    char *line;
    char *words[256], *word;
    int wordCount;
    struct dnaMotif *motif;
    if (!lineFileSkipTo(lf, "Probability matrix for"))
        break;
    lineFileNeedNext(lf, &line, NULL);
    wordCount = chopLine(line, words);
    if (wordCount >= ArraySize(words))
        errAbort("Line %d of %s is too long\n", lf->lineIx, lf->fileName);
    if (!sameString(words[0], "#"))
        badFormat(lf);
    AllocVar(motif);
    motif->columnCount = wordCount-1;
    readBaseProbs(lf, words, "#A", &motif->aProb, motif->columnCount);
    readBaseProbs(lf, words, "#C", &motif->cProb, motif->columnCount);
    readBaseProbs(lf, words, "#T", &motif->tProb, motif->columnCount);
    readBaseProbs(lf, words, "#G", &motif->gProb, motif->columnCount);

    if (!lineFileSkipTo(lf, "Source:"))
	lineFileUnexpectedEnd(lf);
    lineFileReuse(lf);
    lineFileNeedNext(lf, &line, NULL);
    word = nextWord(&line);
    word = nextWord(&line);
    if (word == NULL)
        errAbort("Short Source: line %d of %s", lf->lineIx, lf->fileName);
    motif->name = cloneString(word);
    
    hel = hashLookup(tfHash, motif->name);
    if (hel == NULL)
        errAbort("%s in %s but not GFFs", motif->name, lf->fileName);
    hel->val = motif;
    dnaMotifTabOut(motif, f);
    }
carefulClose(&f);
lineFileClose(&lf);
}
Esempio n. 25
0
struct dyString *suckSameLines(struct lineFile *lf, char *line)
/* Suck up lines concatenating as long as they begin with the same
 * first two characters as initial line. */
{
struct dyString *dy = dyStringNew(0);
char c1 = line[0], c2 = line[1];
dyStringAppend(dy, line+3);
while (lineFileNext(lf, &line, NULL))
    {
    if (line[0] != c1 || line[1] != c2)
        {
	lineFileReuse(lf);
	break;
	}
    dyStringAppend(dy, line+2);
    }
return dy;
}
Esempio n. 26
0
struct raRecord *raRecordReadOne(struct lineFile *lf, char *key, struct lm *lm)
/* Read next record from file. Returns NULL at end of file. */
{
struct raField *field, *fieldList = NULL;
char *line;
char *keyVal = NULL;
boolean override = FALSE;
struct slPair *settingsByView = NULL;
struct hash *subGroups = NULL;
char *view = NULL;
struct hash *viewHash = NULL;

/* Skip over blank initial lines. */
for (;;)
    {
    if (!lineFileNext(lf, &line, NULL))
        return NULL;
    line = skipLeadingSpaces(line);
    if (line != NULL && (line[0] != 0 && line[0] != '#'))
         {
	 lineFileReuse(lf);
	 break;
	 }
    }

/* Keep going until get a blank line. */
for (;;)
    {
    if (!lineFileNext(lf, &line, NULL))
        break;
    line = skipLeadingSpaces(line);
    if (line[0] == '#')
        continue;
    field = raFieldFromLine(line, lm);
    if (field == NULL)
        break;
    if (sameString(field->name, key))
	{
	keyVal = lmCloneFirstWord(lm, field->val);
	if (endsWith(field->val, "override") && !sameString("override", field->val))
	    override = TRUE;
	}
    else if (sameString(field->name, "settingsByView"))
Esempio n. 27
0
struct rbTree *rangeTreeForBedChrom(struct lineFile *lf, char *chrom)
/* Read lines from bed file as long as they match chrom.  Return a rangeTree that
 * corresponds to the coverage. */
{
struct rbTree *tree = rangeTreeNew();
char *line;
while (lineFileNextReal(lf, &line))
    {
    if (!startsWithWord(chrom, line))
        {
	lineFileReuse(lf);
	break;
	}
    char *row[3];
    chopLine(line, row);
    unsigned start = sqlUnsigned(row[1]);
    unsigned end = sqlUnsigned(row[2]);
    rangeTreeAddToCoverageDepth(tree, start, end);
    }
return tree;
}
Esempio n. 28
0
struct pslReader *pslReaderFile(char* pslFile, char* chrom)
/* Create a new pslReader to read from a file.  If chrom is not null,
 * only this chromsome is read.   Checks for psl header and pslx columns. */
{
char *line;
char *words[PSLX_NUM_COLS];
int wordCount, i;
struct pslReader* pr;
AllocVar(pr);
pr->table = cloneString(pslFile);
if (chrom != NULL)
    pr->chrom = cloneString(chrom);

pr->lf = lineFileOpen(pslFile, TRUE);

/* check for header and get number of columns */
if (lineFileNext(pr->lf, &line, NULL))
    {
    if (startsWith("psLayout version", line))
        {
        /* have header, skip it */
	for (i=0; i < 5; ++i)
	    {
	    if (!lineFileNext(pr->lf, &line, NULL))
		errAbort("%s header truncated", pslFile);
	    }
        }
    /* determine if this is a pslx */
    line = cloneString(line); /* don't corrupt input line */
    wordCount = chopLine(line, words);
    if ((wordCount < PSL_NUM_COLS) || (wordCount > PSLX_NUM_COLS)
        || (words[8][0] != '+' && words[8][0] != '-'))
        errAbort("%s is not a psl file", pslFile);
    pr->isPslx = (wordCount == PSLX_NUM_COLS);
    freez(&line);
    lineFileReuse(pr->lf);
    }

return pr;
}
Esempio n. 29
0
void convertVariableStepSection(struct lineFile *lf, struct hash *vars, struct bgOut *out)
/* Read through section and output. */
{
char *chrom = requiredVar(vars, "chrom", lf);
int span = sqlUnsigned(optionalVar(vars, "span", "1"));
char *line;
while (lineFileNextReal(lf, &line))
    {
    line = skipLeadingSpaces(line);
    if (isalpha(line[0]))
	{
        lineFileReuse(lf);
	break;
	}
    char *words[3];
    int wordCount = chopLine(line, words);
    if (wordCount != 2)
        errAbort("Expecting exactly two numbers line %d of %s", lf->lineIx, lf->fileName);
    int start = lineFileNeedNum(lf, words, 0) - 1;
    double val = lineFileNeedDouble(lf, words, 1);
    bgOutWrite(out, chrom, start, start+span, val);
    }
}
Esempio n. 30
0
void convertFixedStepSection(struct lineFile *lf, struct hash *vars, struct bgOut *out)
/* Read through section and output. */
{
char *chrom = requiredVar(vars, "chrom", lf);
int start = sqlUnsigned(requiredVar(vars, "start", lf)) - 1;
char *spanString = optionalVar(vars, "span", "1");
int span = sqlUnsigned(spanString);
int step = sqlUnsigned(optionalVar(vars, "step", spanString));
char *line;
while (lineFileNextReal(lf, &line))
    {
    line = skipLeadingSpaces(line);
    if (isalpha(line[0]))
	{
        lineFileReuse(lf);
	break;
	}
    eraseTrailingSpaces(line);
    double val = sqlDouble(line);
    bgOutWrite(out, chrom, start, start+span, val);
    start += step;
    }
}