示例#1
0
int makeMilli(char *s, struct lineFile *lf)
/* Convert ascii floating point to parts per thousand representation. */
{
/* Cope with -0.0  and -0.2 etc.*/
if (s[0] == '-')
    {
    if (!sameString(s, "-0.0"))
        warn("Strange perc. field %s line %d of %s", s, lf->lineIx, lf->fileName);
    s = "0.0";
    }
if (!isdigit(s[0]))
    badFormat(lf,1);
return round(10.0*atof(s));
}
示例#2
0
// checkFormat: Checks the format of the input and prints an 
// error and exits if one is found.
int checkFormat(char s[MAXS][MAXLEN]) {
  int state;
  int ops = 0, nums = 0;
  int exp_nums = 0;

  if(si == 0) {
    badFormat(); 
  }

  if(si == 1) {
    if(isNumber(s[0]) == 0) {
      return 0; 
    }
    else {
      badFormat(); 
    }
  }
  
  // Start by counting the number of operators
  state = OPSTATE;

  for(int i = si - 1; i >= 0; i--) {
    if(isNumber(s[i]) != 0 && isOp(s[i]) != 0) { 
      badFormat(); 
    }
    switch(state) {
      case OPSTATE:
        if(isOp(s[i]) == 0) {
          ops++;
          state = OPSTATE;
        }
        else {
          exp_nums = ops + 1;
          nums++;
          state = NUMSTATE;
        }
        break;
      case NUMSTATE:
        if(isNumber(s[i]) == 0) {
          nums++;
          state = NUMSTATE;
        }
        else {
          badFormat(); 
        }
        break;
      default:
        badFormat(); 
    }
  }

  if(nums != exp_nums) {
    badFormat(); 
  }
  
  return 0;
}
示例#3
0
void makeMotifs(char *inFile, struct hash *tfHash, char *outFile)
/* Parse input motifs and save them to outFile in dnaMotif format. */
{
struct lineFile *lf = lineFileOpen(inFile, TRUE);
FILE *f = mustOpen(outFile, "w");
struct hashEl *hel;

for (;;)
    {
    char *line;
    char *words[256], *word;
    int wordCount;
    struct dnaMotif *motif;
    if (!lineFileSkipTo(lf, "Probability matrix for"))
        break;
    lineFileNeedNext(lf, &line, NULL);
    wordCount = chopLine(line, words);
    if (wordCount >= ArraySize(words))
        errAbort("Line %d of %s is too long\n", lf->lineIx, lf->fileName);
    if (!sameString(words[0], "#"))
        badFormat(lf);
    AllocVar(motif);
    motif->columnCount = wordCount-1;
    readBaseProbs(lf, words, "#A", &motif->aProb, motif->columnCount);
    readBaseProbs(lf, words, "#C", &motif->cProb, motif->columnCount);
    readBaseProbs(lf, words, "#T", &motif->tProb, motif->columnCount);
    readBaseProbs(lf, words, "#G", &motif->gProb, motif->columnCount);

    if (!lineFileSkipTo(lf, "Source:"))
	lineFileUnexpectedEnd(lf);
    lineFileReuse(lf);
    lineFileNeedNext(lf, &line, NULL);
    word = nextWord(&line);
    word = nextWord(&line);
    if (word == NULL)
        errAbort("Short Source: line %d of %s", lf->lineIx, lf->fileName);
    motif->name = cloneString(word);
    
    hel = hashLookup(tfHash, motif->name);
    if (hel == NULL)
        errAbort("%s in %s but not GFFs", motif->name, lf->fileName);
    hel->val = motif;
    dnaMotifTabOut(motif, f);
    }
carefulClose(&f);
lineFileClose(&lf);
}
void wigAsciiToBinary( int argc, char *argv[] )
{
int i = 0;				/* general purpose int counter	*/
struct lineFile *lf;			/* for line file utilities	*/
char * fileName;			/* the basename of the input file */
char *line = (char *) NULL;		/* to receive data input line	*/
char *words[4];				/* to split data input line	*/
int wordCount = 0;			/* result of split	*/
int validLines = 0;			/* counting only lines with data */
unsigned long long previousOffset = 0;	/* for data missing detection */
double dataValue = 0.0;				/* from data input	*/
char *wigfile = (char *) NULL;	/*	file name of wiggle database file */
boolean firstInChrom;		/* Is this the first line in chromosome? */

/*	for each input data file	*/
for (i = 1; i < argc; ++i)
    {
    verbose(2, "translating file: %s\n", argv[i]);

    fileName = basename(argv[i]);
    if (name)		/*	Is the name of this feature specified ?	*/
	{
	safef( featureName, sizeof(featureName) - 1, "%s", name);
	}
    if (chrom)		/*	Is the chrom name specified ? */
	{
	chromName = cloneString(chrom);
	if (! name)	/*	that names the feature too if not already */
	    safef( featureName, sizeof(featureName) - 1, "%s", chrom);
	}
    /*	Name mangling to determine output file name */
    if (wibFile)	/*	when specified, simply use it	*/
	{
	binfile = addSuffix(wibFile, ".wib");
	wigfile = addSuffix(wibFile, ".wig");
	} else {	/*	not specified, construct from input names */
	if (startsWith("chr",fileName))
	    {
	    char *tmpString;
	    tmpString = cloneString(fileName);
	    chopSuffix(tmpString);
	    binfile = addSuffix(tmpString, ".wib");
	    wigfile = addSuffix(tmpString, ".wig");
	    if (! chrom)	/*	if not already taken care of	*/
		chromName = cloneString(tmpString);
	    if (! name && ! chrom)	/*	if not already done	*/
		safef(featureName, sizeof(featureName) - 1, "%s", tmpString);
	    freeMem(tmpString);
	    } else {
	    errAbort("Can not determine output file name, no -wibFile specified\n");
	    }
	}

    verbose(2, "output files: %s, %s\n", binfile, wigfile);
    validLines = 0;	/* to count only lines with data */
    rowCount = 0;	/* to count rows output */
    bincount = 0;	/* to count up to binsize	*/
    fileOffset = 0;	/* current location within binary data file	*/
    fileOffsetBegin = 0;/* location in binary data file where this bin starts*/
    firstInChrom = TRUE;
    freeMem(data_values);
    freeMem(validData);
    data_values =  needMem( (size_t) (binsize * sizeof(double)));
    validData = needMem( (size_t) (binsize * sizeof(unsigned char)));
    overallLowerLimit = 1.0e+300;	/* for the complete set of data */
    overallUpperLimit = -1.0e+300;	/* for the complete set of data */
    binout = mustOpen(binfile,"w");	/*	binary data file	*/
    wigout = mustOpen(wigfile,"w");	/*	table row definition file */
    lf = lineFileOpen(argv[i], TRUE);	/*	input file	*/
    while (lineFileNextReal(lf, &line))
	{
	boolean readingFrameSlipped;
	char *valEnd;
	char *val;
	++validLines;
	wordCount = chopByWhite(line, words, ArraySize(words));
	if (wordCount == 1)
	    {
	    Offset += 1;
	    val = words[0];
	    }
	else if (wordCount == 2)
	    {
	    Offset = atoll(words[0]) - 1;
	    val = words[1];
	    }
	else if (wordCount == 3)
	    {
	    char *newChrom = words[0];
	    boolean sameChrom = (chromName == NULL || sameString(chromName, newChrom));
	    Offset = atoll(words[1]) - 1;
	    val = words[2];
	    if (!sameChrom)
		{
		output_row();
		firstInChrom = TRUE;
		freez(&chromName);
		}
	    if (chromName == NULL)
		chromName = cloneString(newChrom);
	    }
	else
	    {
	    val = NULL;
	    badFormat(lf);
	    }
	if (Offset < 0)
	    errAbort("Illegal offset %llu at line %d of %s", Offset+1, lf->lineIx,
	    	lf->fileName);
	dataValue = strtod(val, &valEnd);
	if(trimVals)
	    {
	    dataValue = max(minVal, dataValue);
	    dataValue = min(maxVal, dataValue);
	    }
	if ((*val == '\0') || (*valEnd != '\0'))
	    errAbort("Not a valid float at line %d: %s\n", lf->lineIx, val);
	/* see if this is the first time through, establish chromStart 	*/
	if (firstInChrom) {
	    chromStart = Offset;
	    verbose(2, "first offset: %llu\n", chromStart);
	}
	else if (!firstInChrom && (Offset <= previousOffset))
	    errAbort("ERROR: chrom positions not in order. line %d of %s\n"
	             "previous: %llu >= %llu <-current", 
		     lf->lineIx, lf->fileName, previousOffset+1, Offset+1);
	/* if we are working on a zoom level and the data is not exactly
	 * spaced according to the span, then we need to put each value
	 * in its own row in order to keep positioning correct for these
	 * data values.  The number of skipped bases has to be an even
	 * multiple of dataSpan
	 */
	readingFrameSlipped = FALSE;
	if (!firstInChrom && (dataSpan > 1))
	    {
	    int skippedBases;
	    int spansSkipped;
	    skippedBases = Offset - previousOffset;
	    spansSkipped = skippedBases / dataSpan;
	    if ((spansSkipped * dataSpan) != skippedBases)
		readingFrameSlipped = TRUE;
	    }
	if (readingFrameSlipped)
	    {
	    verbose(2, "data not spanning %llu bases, prev: %llu, this: %llu, at line: %d\n", dataSpan, previousOffset, Offset, lf->lineIx);
	    output_row();
	    chromStart = Offset;	/*	a full reset here	*/
	    }
	/*	Check to see if data is being skipped	*/
	else if ( (!firstInChrom) && (Offset > (previousOffset + dataSpan)) )
	    {
	    unsigned long long off;
	    unsigned long long fillSize;	/* number of bytes */
	    verbose(2, "missing data offsets: %llu - %llu\n",
		    previousOffset+1,Offset-1);
	    /*	If we are just going to fill the rest of this bin with
	     *  no data, then may as well stop here.  No need to fill
	     *  it with nothing.
	     */
	    fillSize = (Offset - (previousOffset + dataSpan)) / dataSpan;
	    verbose(2, "filling NO_DATA for %llu bytes at bincount: %llu\n", fillSize, bincount);
	    if (fillSize + bincount >= binsize)
		{
		verbose(2, "completing a bin due to  NO_DATA for %llu bytes, only %llu - %llu = %llu to go\n", fillSize, binsize, bincount, binsize - bincount);
		verbose(2, "Offset: %llu, previousOffset: %llu\n",
			Offset, previousOffset);
		output_row();
		chromStart = Offset;	/*	a full reset here	*/
	    } else {
		fillSize = 0;
		/*	fill missing data with NO_DATA indication	*/
		for (off = previousOffset + dataSpan; off < Offset;
			off += dataSpan)
		    {
		    ++fillSize;
		    ++fileOffset;
		    ++bincount;	/*	count scores in this bin */
		    if (bincount >= binsize) break;
		    }
		verbose(2, "filled NO_DATA for %llu bytes at bincount: %llu\n", fillSize, bincount);
		/*	If that finished off this bin, output it
		 *	This most likely should not happen here.  The
		 *	check above: if (fillSize + bincount >= binsize) 
		 *	should have caught this case already.
		 */
		    if (bincount >= binsize)
			{
			output_row();
			chromStart = Offset;	/* a full reset here */
			}
	        }
	    }
	/*	With perhaps the missing data taken care of, back to the
	 *	real data.
	 */
	data_values[bincount] = dataValue;
	validData[bincount] = TRUE;
	++fileOffset;
	++bincount;	/*	count scores in this bin */
	/*	Is it time to output a row definition ? */
	if (bincount >= binsize)
	    {
	    output_row();
	    }
	previousOffset = Offset;
	firstInChrom = FALSE;
        }	/*	reading file input loop end	*/
    /*	Done with input file, any data points left in this bin ?	*/
    if (bincount)
	{
	output_row();
	}
    verbose(2, "fini: %s, read %d lines, table rows: %llu, data bytes: %lld\n",
	    argv[i], lf->lineIx, rowCount, fileOffset);
    verbose(1, "data limits: [%g:%g], range: %g\n", 
	overallLowerLimit, overallUpperLimit,
	overallUpperLimit - overallLowerLimit);
    lineFileClose(&lf);
    fclose(binout);
    fclose(wigout);
    freeMem(binfile);
    freeMem(wigfile);
    freeMem(chromName);
    binfile = (char *) NULL;
    wigfile = (char *) NULL;
    chromName = (char *) NULL;
    }
return;
}