int makeMilli(char *s, struct lineFile *lf) /* Convert ascii floating point to parts per thousand representation. */ { /* Cope with -0.0 and -0.2 etc.*/ if (s[0] == '-') { if (!sameString(s, "-0.0")) warn("Strange perc. field %s line %d of %s", s, lf->lineIx, lf->fileName); s = "0.0"; } if (!isdigit(s[0])) badFormat(lf,1); return round(10.0*atof(s)); }
// checkFormat: Checks the format of the input and prints an // error and exits if one is found. int checkFormat(char s[MAXS][MAXLEN]) { int state; int ops = 0, nums = 0; int exp_nums = 0; if(si == 0) { badFormat(); } if(si == 1) { if(isNumber(s[0]) == 0) { return 0; } else { badFormat(); } } // Start by counting the number of operators state = OPSTATE; for(int i = si - 1; i >= 0; i--) { if(isNumber(s[i]) != 0 && isOp(s[i]) != 0) { badFormat(); } switch(state) { case OPSTATE: if(isOp(s[i]) == 0) { ops++; state = OPSTATE; } else { exp_nums = ops + 1; nums++; state = NUMSTATE; } break; case NUMSTATE: if(isNumber(s[i]) == 0) { nums++; state = NUMSTATE; } else { badFormat(); } break; default: badFormat(); } } if(nums != exp_nums) { badFormat(); } return 0; }
void makeMotifs(char *inFile, struct hash *tfHash, char *outFile) /* Parse input motifs and save them to outFile in dnaMotif format. */ { struct lineFile *lf = lineFileOpen(inFile, TRUE); FILE *f = mustOpen(outFile, "w"); struct hashEl *hel; for (;;) { char *line; char *words[256], *word; int wordCount; struct dnaMotif *motif; if (!lineFileSkipTo(lf, "Probability matrix for")) break; lineFileNeedNext(lf, &line, NULL); wordCount = chopLine(line, words); if (wordCount >= ArraySize(words)) errAbort("Line %d of %s is too long\n", lf->lineIx, lf->fileName); if (!sameString(words[0], "#")) badFormat(lf); AllocVar(motif); motif->columnCount = wordCount-1; readBaseProbs(lf, words, "#A", &motif->aProb, motif->columnCount); readBaseProbs(lf, words, "#C", &motif->cProb, motif->columnCount); readBaseProbs(lf, words, "#T", &motif->tProb, motif->columnCount); readBaseProbs(lf, words, "#G", &motif->gProb, motif->columnCount); if (!lineFileSkipTo(lf, "Source:")) lineFileUnexpectedEnd(lf); lineFileReuse(lf); lineFileNeedNext(lf, &line, NULL); word = nextWord(&line); word = nextWord(&line); if (word == NULL) errAbort("Short Source: line %d of %s", lf->lineIx, lf->fileName); motif->name = cloneString(word); hel = hashLookup(tfHash, motif->name); if (hel == NULL) errAbort("%s in %s but not GFFs", motif->name, lf->fileName); hel->val = motif; dnaMotifTabOut(motif, f); } carefulClose(&f); lineFileClose(&lf); }
void wigAsciiToBinary( int argc, char *argv[] ) { int i = 0; /* general purpose int counter */ struct lineFile *lf; /* for line file utilities */ char * fileName; /* the basename of the input file */ char *line = (char *) NULL; /* to receive data input line */ char *words[4]; /* to split data input line */ int wordCount = 0; /* result of split */ int validLines = 0; /* counting only lines with data */ unsigned long long previousOffset = 0; /* for data missing detection */ double dataValue = 0.0; /* from data input */ char *wigfile = (char *) NULL; /* file name of wiggle database file */ boolean firstInChrom; /* Is this the first line in chromosome? */ /* for each input data file */ for (i = 1; i < argc; ++i) { verbose(2, "translating file: %s\n", argv[i]); fileName = basename(argv[i]); if (name) /* Is the name of this feature specified ? */ { safef( featureName, sizeof(featureName) - 1, "%s", name); } if (chrom) /* Is the chrom name specified ? */ { chromName = cloneString(chrom); if (! name) /* that names the feature too if not already */ safef( featureName, sizeof(featureName) - 1, "%s", chrom); } /* Name mangling to determine output file name */ if (wibFile) /* when specified, simply use it */ { binfile = addSuffix(wibFile, ".wib"); wigfile = addSuffix(wibFile, ".wig"); } else { /* not specified, construct from input names */ if (startsWith("chr",fileName)) { char *tmpString; tmpString = cloneString(fileName); chopSuffix(tmpString); binfile = addSuffix(tmpString, ".wib"); wigfile = addSuffix(tmpString, ".wig"); if (! chrom) /* if not already taken care of */ chromName = cloneString(tmpString); if (! name && ! chrom) /* if not already done */ safef(featureName, sizeof(featureName) - 1, "%s", tmpString); freeMem(tmpString); } else { errAbort("Can not determine output file name, no -wibFile specified\n"); } } verbose(2, "output files: %s, %s\n", binfile, wigfile); validLines = 0; /* to count only lines with data */ rowCount = 0; /* to count rows output */ bincount = 0; /* to count up to binsize */ fileOffset = 0; /* current location within binary data file */ fileOffsetBegin = 0;/* location in binary data file where this bin starts*/ firstInChrom = TRUE; freeMem(data_values); freeMem(validData); data_values = needMem( (size_t) (binsize * sizeof(double))); validData = needMem( (size_t) (binsize * sizeof(unsigned char))); overallLowerLimit = 1.0e+300; /* for the complete set of data */ overallUpperLimit = -1.0e+300; /* for the complete set of data */ binout = mustOpen(binfile,"w"); /* binary data file */ wigout = mustOpen(wigfile,"w"); /* table row definition file */ lf = lineFileOpen(argv[i], TRUE); /* input file */ while (lineFileNextReal(lf, &line)) { boolean readingFrameSlipped; char *valEnd; char *val; ++validLines; wordCount = chopByWhite(line, words, ArraySize(words)); if (wordCount == 1) { Offset += 1; val = words[0]; } else if (wordCount == 2) { Offset = atoll(words[0]) - 1; val = words[1]; } else if (wordCount == 3) { char *newChrom = words[0]; boolean sameChrom = (chromName == NULL || sameString(chromName, newChrom)); Offset = atoll(words[1]) - 1; val = words[2]; if (!sameChrom) { output_row(); firstInChrom = TRUE; freez(&chromName); } if (chromName == NULL) chromName = cloneString(newChrom); } else { val = NULL; badFormat(lf); } if (Offset < 0) errAbort("Illegal offset %llu at line %d of %s", Offset+1, lf->lineIx, lf->fileName); dataValue = strtod(val, &valEnd); if(trimVals) { dataValue = max(minVal, dataValue); dataValue = min(maxVal, dataValue); } if ((*val == '\0') || (*valEnd != '\0')) errAbort("Not a valid float at line %d: %s\n", lf->lineIx, val); /* see if this is the first time through, establish chromStart */ if (firstInChrom) { chromStart = Offset; verbose(2, "first offset: %llu\n", chromStart); } else if (!firstInChrom && (Offset <= previousOffset)) errAbort("ERROR: chrom positions not in order. line %d of %s\n" "previous: %llu >= %llu <-current", lf->lineIx, lf->fileName, previousOffset+1, Offset+1); /* if we are working on a zoom level and the data is not exactly * spaced according to the span, then we need to put each value * in its own row in order to keep positioning correct for these * data values. The number of skipped bases has to be an even * multiple of dataSpan */ readingFrameSlipped = FALSE; if (!firstInChrom && (dataSpan > 1)) { int skippedBases; int spansSkipped; skippedBases = Offset - previousOffset; spansSkipped = skippedBases / dataSpan; if ((spansSkipped * dataSpan) != skippedBases) readingFrameSlipped = TRUE; } if (readingFrameSlipped) { verbose(2, "data not spanning %llu bases, prev: %llu, this: %llu, at line: %d\n", dataSpan, previousOffset, Offset, lf->lineIx); output_row(); chromStart = Offset; /* a full reset here */ } /* Check to see if data is being skipped */ else if ( (!firstInChrom) && (Offset > (previousOffset + dataSpan)) ) { unsigned long long off; unsigned long long fillSize; /* number of bytes */ verbose(2, "missing data offsets: %llu - %llu\n", previousOffset+1,Offset-1); /* If we are just going to fill the rest of this bin with * no data, then may as well stop here. No need to fill * it with nothing. */ fillSize = (Offset - (previousOffset + dataSpan)) / dataSpan; verbose(2, "filling NO_DATA for %llu bytes at bincount: %llu\n", fillSize, bincount); if (fillSize + bincount >= binsize) { verbose(2, "completing a bin due to NO_DATA for %llu bytes, only %llu - %llu = %llu to go\n", fillSize, binsize, bincount, binsize - bincount); verbose(2, "Offset: %llu, previousOffset: %llu\n", Offset, previousOffset); output_row(); chromStart = Offset; /* a full reset here */ } else { fillSize = 0; /* fill missing data with NO_DATA indication */ for (off = previousOffset + dataSpan; off < Offset; off += dataSpan) { ++fillSize; ++fileOffset; ++bincount; /* count scores in this bin */ if (bincount >= binsize) break; } verbose(2, "filled NO_DATA for %llu bytes at bincount: %llu\n", fillSize, bincount); /* If that finished off this bin, output it * This most likely should not happen here. The * check above: if (fillSize + bincount >= binsize) * should have caught this case already. */ if (bincount >= binsize) { output_row(); chromStart = Offset; /* a full reset here */ } } } /* With perhaps the missing data taken care of, back to the * real data. */ data_values[bincount] = dataValue; validData[bincount] = TRUE; ++fileOffset; ++bincount; /* count scores in this bin */ /* Is it time to output a row definition ? */ if (bincount >= binsize) { output_row(); } previousOffset = Offset; firstInChrom = FALSE; } /* reading file input loop end */ /* Done with input file, any data points left in this bin ? */ if (bincount) { output_row(); } verbose(2, "fini: %s, read %d lines, table rows: %llu, data bytes: %lld\n", argv[i], lf->lineIx, rowCount, fileOffset); verbose(1, "data limits: [%g:%g], range: %g\n", overallLowerLimit, overallUpperLimit, overallUpperLimit - overallLowerLimit); lineFileClose(&lf); fclose(binout); fclose(wigout); freeMem(binfile); freeMem(wigfile); freeMem(chromName); binfile = (char *) NULL; wigfile = (char *) NULL; chromName = (char *) NULL; } return; }