boolean read_fastq_auto(struct fastq_auto *fq, struct lineFile *lf, boolean just_seq_qual) /* fill in fastq struct from open lineFile. return FALSE if at EOF */ /* set just_seq_qual=TRUE to skip loading everything except the sequence */ /* and quality information. */ { char *line; int len = 0; boolean neof = lineFileNext(lf, &line, &len); if (neof) { int i; int qual_size; /* should be header */ if ((len <= 0) || (line[0] != '@')) errAbort("Expecting header. Problem on line %d\n", lf->lineIx); if (!just_seq_qual) { char *words[7]; int numWords; numWords = chopByChar(line, ':', words, 6); strcpy(fq->machine, words[0] + 1); fq->flow_cell = sqlSigned(words[1]); fq->tile = sqlSigned(words[2]); fq->tile_x = sqlSigned(words[3]); words[5] = chopPrefixAt(words[4], '#'); words[6] = chopPrefixAt(words[5], '/'); fq->tile_y = sqlSigned(words[4]); fq->multiplex_index = sqlSigned(words[5]); fq->pair_num = sqlSigned(words[6]); } /* read the sequence */ fq->seq[0] = '\0'; while ((neof = lineFileNext(lf, &line, &len)) && (len > 0) && (line[0] != '+')) strcat(fq->seq, line); if (!neof) errAbort("incomplete fastq file. early EOF"); fq->seq_size = strlen(fq->seq); /* at the point of the quality header. who cares, read the quality */ fq->qual[0] = '\0'; while ((neof = lineFileNext(lf, &line, &len)) && (len > 0) && (line[0] != '@')) strcat(fq->qual, line); if ((len > 0) && (line[0] == '@')) lineFileReuse(lf); qual_size = strlen(fq->qual); if (qual_size != fq->seq_size) errAbort("something wrong line %d. sequence size (%d) should match quality size (%d)\n", lf->lineIx, fq->seq_size, qual_size); /* convert Illumina 1.3+ quals to Sanger */ for (i = 0; i < qual_size; i++) fq->qual[i] -= 64; } else return FALSE; return TRUE; }
static struct bed* regionsLoad(char* sectionsBed) /* return a bed3 list of regions for times when -regions is used. */ /* If the filename has a comma then a number, then take just that line */ { struct bed* list = NULL; unsigned ix = 0; if (strchr(sectionsBed, ',')) { char* number_part = chopPrefixAt(sectionsBed, ','); if (number_part) ix = sqlUnsigned(number_part); } list = readAtLeastBed3(sectionsBed); if (list && (ix > 0)) { struct bed* single = slElementFromIx(list, ix - 1); if (single) { struct bed* rem; while ((rem = slPopHead(&list)) != single) bedFree(&rem); rem = single->next; bedFreeList(&rem); single->next = NULL; list = single; } } return list; }
void bwtool_find_extrema(struct hash *options, char *favorites, char *regions, unsigned decimals, double fill, char *bigfile, char *tmp_dir, char *outputfile) /* find local extrema */ { unsigned min_sep = sqlUnsigned((char *)hashOptionalVal(options, "min-sep", "0")); char *other_bigfile = (char *)hashOptionalVal(options, "against", NULL); enum ex_removal rem = get_removal(options); struct metaBig *main_big = metaBigOpen_check(bigfile, tmp_dir, regions); struct metaBig *other_big = NULL; struct extrema *main_list; struct extrema *other_list = NULL; struct extrema *ex; unsigned shift = 0; FILE *out; if (other_bigfile) { char *num; if (rem == no_removal) errAbort("must specify either -maxima or -minima with -against"); if (!strchr(other_bigfile, ',')) errAbort("must specify shift limit in -against option"); num = chopPrefixAt(other_bigfile, ','); shift = sqlUnsigned(num); other_big = metaBigOpen_check(other_bigfile, tmp_dir, regions); } if (!main_big || (!other_big && other_bigfile)) errAbort("could not open bigWig file"); main_list = extrema_find(main_big, min_sep, rem); slReverse(&main_list); if (other_bigfile) { other_list = extrema_find(other_big, min_sep, rem); extrema_find_shifts(main_list, other_list, shift); } metaBigClose(&main_big); if (other_big) metaBigClose(&other_big); out = mustOpen(outputfile, "w"); if (other_bigfile) for (ex = main_list; ex != NULL; ex = ex->next) fprintf(out, "%s\t%d\t%d\t%d\t1000\t%c\n", ex->chrom, ex->chromStart, ex->chromStart+1, (int)ex->val, ex->min_or_max); else { slSort(&main_list, extrema_bed_cmp); for (ex = main_list; ex != NULL; ex = ex->next) fprintf(out, "%s\t%d\t%d\t%0.*f\t1000\t%c\n", ex->chrom, ex->chromStart, ex->chromStart+1, decimals, ex->val, ex->min_or_max); } carefulClose(&out); extrema_free_list(&main_list); }
int pickApartSeqName(char **pName) /* Change /path/chr:start-end into chr and return start. */ { char *name; char *words[3]; int numWords = 0; char *chrom, *range; int skip = 0; int start = 0; if (!pName || ((name = *pName) == NULL)) return 0; numWords = chopByChar(name, ':', words, sizeof(words)); if (numWords == 3) skip = 1; chrom = words[0 + skip]; *pName = chrom; range = words[1 + skip]; if (numWords > 1) { chopPrefixAt(range, '-'); start = sqlUnsigned(range); } return start; }
struct trackHubSettingSpec *trackHubSettingsForVersion(char *specHost, char *version) /* Return list of settings with support level. Version can be version string or spec url */ { struct htmlPage *page = NULL; if (version == NULL) { version = trackHubVersionDefault(specHost, &page); if (version == NULL) errAbort("Can't get default spec from host %s", specHost); } /* Retrieve specs from file url. * Settings are the first text word within any <code> tag having class="level-" attribute. * The level represents the level of support for the setting (e.g. base, full, deprecated) * The support level ('level-*') is the class value of the * <code> tag. * E.g. <code class="level-full">boxedConfig on</code> produces: * setting=boxedConfig, class=full */ if (page == NULL) page = trackHubVersionSpecMustGet(specHost, version); if (page == NULL) errAbort("Can't get settings spec for version %s from host %s", version, specHost); verbose(5, "Found %d tags\n", slCount(page->tags)); struct trackHubSettingSpec *spec, *savedSpec; struct hash *specHash = hashNew(0); struct htmlTag *tag; struct htmlAttribute *attr; char buf[256]; for (tag = page->tags; tag != NULL; tag = tag->next) { if (differentWord(tag->name, "code")) continue; attr = tag->attributes; if (attr == NULL || differentString(attr->name, "class") || !startsWith("level-", attr->val)) continue; AllocVar(spec); int len = min(tag->next->start - tag->end, sizeof buf - 1); memcpy(buf, tag->end, len); buf[len] = 0; verbose(6, "Found spec: %s\n", buf); spec->name = cloneString(firstWordInLine(buf)); if (spec->name == NULL || strlen(spec->name) == 0) { warn("ERROR: format problem with trackDbHub.html -- contact UCSC."); continue; } spec->level = cloneString(chopPrefixAt(attr->val, '-')); verbose(6, "spec: name=%s, level=%s\n", spec->name, spec->level); savedSpec = (struct trackHubSettingSpec *)hashFindVal(specHash, spec->name); if (savedSpec != NULL) verbose(6, "found spec %s level %s in hash\n", savedSpec->name, savedSpec->level); if (savedSpec == NULL) { hashAdd(specHash, spec->name, spec); verbose(6, "added spec %s at level %s\n", spec->name, spec->level); } else if (trackHubSettingLevelCmp(spec, savedSpec) > 0) { hashReplace(specHash, spec->name, spec); verbose(6, "replaced spec %s at level %s, was %s\n", spec->name, spec->level, savedSpec->level); } } struct hashEl *el, *list = hashElListHash(specHash); int settingsCt = slCount(list); verbose(5, "Found %d settings's\n", slCount(list)); if (settingsCt == 0) errAbort("Can't find hub setting info for version %s (host %s)." " Use -version to indicate a different version number or url.", version, specHost); slSort(&list, hashElCmp); struct trackHubSettingSpec *specs = NULL; int baseCt = 0; int requiredCt = 0; int deprecatedCt = 0; for (el = list; el != NULL; el = el->next) { if (sameString(((struct trackHubSettingSpec *)el->val)->level, "base")) baseCt++; else if (sameString(((struct trackHubSettingSpec *)el->val)->level, "required")) requiredCt++; else if (sameString(((struct trackHubSettingSpec *)el->val)->level, "deprecated")) deprecatedCt++; slAddHead(&specs, el->val); } slReverse(&specs); verbose(3, "Found %d supported settings for this version (%d required, %d base, %d deprecated)\n", slCount(specs), requiredCt, baseCt, deprecatedCt); return specs; }
static void wigZoom( int argc, char *argv[] ) { int i = 0; /* loop counter */ int lineCount = 0; /* lines from input file */ int validLines = 0; /* lines with actual data */ struct lineFile *lf; /* for line file utilities */ unsigned long long beginWindow = 0; /* from data input */ unsigned long long Offset = 0; /* from data input */ unsigned long long previousOffset = 0; /* for data missing detection */ char *line = (char *) NULL; /* to receive data input line */ char *words[2]; /* to split data input line */ int dataCount = 0; dataBlock = (struct dataPoint *) needMem( (size_t) (dataSpan * sizeof(struct dataPoint))); /* for each input data file */ for (i = 1; i < argc; ++i) { verbose(2, "translating file: %s\n", argv[i]); lineCount = 0; validLines = 0; dataCount = 0; lf = lineFileOpen(argv[i], TRUE); /* input file */ beginWindow = 1; /* input coords are 1 relative */ while (lineFileNext(lf, &line, NULL)) { int wordCount; char *val; char *valEnd; double dataValue; ++lineCount; chopPrefixAt(line, '#'); /* ignore any comments starting with # */ if (strlen(line) < 3) /* anything left on this line */ continue; /* no, go to next line */ ++validLines; wordCount = chopByWhite(line, words, 2); if (wordCount < 2) errAbort("ERROR: Expecting at least two words at line %d, found %d", lineCount, wordCount); Offset = atoll(words[0]); if (Offset < previousOffset) errAbort("ERROR: chrom positions not in order. previous: %llu is > current: %llu", previousOffset, Offset); val = words[1]; dataValue = strtod(val, &valEnd); if ((*val == '\0') || (*valEnd != '\0')) errAbort("Not a valid float at line %d: %s\n", lineCount, words[1]); if (Offset < 1) errAbort("Illegal offset: %llu at line %d, dataValue: %g", Offset, lineCount, dataValue); if (Offset > (beginWindow + dataSpan)) { processBlock(beginWindow, dataBlock, dataCount); while ((beginWindow + dataSpan) < Offset) beginWindow += dataSpan; dataCount = 0; } dataBlock[dataCount].offset = Offset; dataBlock[dataCount++].value = dataValue; previousOffset = Offset; } } }
/* convolve() - perform the task on the input data * I would like to rearrange this business here, and instead of * reading in the data and leaving it in the hash for all other * routines to work with, it would be best to get it immediately * into an array. That makes the work of the other routines much * easier. */ static void convolve(int argc, char *argv[]) { int i; struct lineFile *lf; /* for line file utilities */ for (i = 1; i < argc; ++i) { int lineCount = 0; /* counting input lines */ char *line = (char *)NULL; /* to receive data input line */ char *words[128]; /* to split data input line */ int wordCount = 0; /* result of split */ struct hash *histo0; /* first histogram */ struct hash *histo1; /* second histogram */ int medianBin0 = 0; /* bin at median for histo0 */ double medianLog_2 = -500.0; /* log at median */ int bin = 0; /* 0 to N-1 for N bins */ int convolutions = 0; /* loop counter for # of convolutions */ histo0 = newHash(0); lf = lineFileOpen(argv[i], TRUE); /* input file */ verbose(1, "Processing %s\n", argv[1]); while (lineFileNext(lf, &line, NULL)) { int j; /* loop counter over words */ int inputValuesCount = 0; struct histoGram *hg; /* an allocated hash element */ ++lineCount; chopPrefixAt(line, '#'); /* ignore any comments starting with # */ if (strlen(line) < 3) /* anything left on this line ? */ continue; /* no, go to next line */ wordCount = chopByWhite(line, words, 128); if (wordCount < 1) warn("Expecting at least a word at line %d, file: %s, found %d words", lineCount, argv[i], wordCount); if (wordCount == 128) warn("May have more than 128 values at line %d, file: %s", lineCount, argv[i]); verbose(2, "Input data read from file: %s\n", argv[i]); for (j = 0; j < wordCount; ++j) { char binName[128]; double dataValue; double probInput; double log_2; dataValue = strtod(words[j], NULL); ++inputValuesCount; if (logs) { log_2 = dataValue; probInput = pow(2.0,log_2); } else { if (dataValue > 0.0) { log_2 = log2(dataValue); probInput = dataValue; } else { log_2 = -500.0; /* arbitrary limit */ probInput = pow(2.0,log_2); } } if (log_2 > medianLog_2) { medianLog_2 = log_2; medianBin0 = bin; } verbose(2, "bin %d: %g %0.5g\n", inputValuesCount-1, probInput, log_2); AllocVar(hg); /* the histogram element */ hg->bin = bin; hg->prob = probInput; hg->log_2 = log_2; snprintf(binName, sizeof(binName), "%d", hg->bin); hashAdd(histo0, binName, hg); ++bin; } /* for each word on an input line */ } /* for each line in a file */ /* file read complete, echo input */ if (verboseLevel() >= 2) printHistogram(histo0, medianBin0); /* perform convolutions to specified count * the iteration does histo0 with itself to produce histo1 * Then histo0 is freed and histo1 copied to it for the * next loop. */ for (convolutions = 0; convolutions < convolve_count; ++convolutions) { int medianBin; histo1 = newHash(0); medianBin = iteration(histo0, histo1); if (verboseLevel() >= 2) printHistogram(histo1, medianBin); freeHashAndVals(&histo0); histo0 = histo1; } } /* for each input file */ } /* convolve() */