Ejemplo n.º 1
0
boolean read_fastq_auto(struct fastq_auto *fq, struct lineFile *lf, boolean just_seq_qual)
/* fill in fastq struct from open lineFile.  return FALSE if at EOF */
/* set just_seq_qual=TRUE to skip loading everything except the sequence */
/* and quality information. */
{
    char *line;
    int len = 0;
    boolean neof = lineFileNext(lf, &line, &len);
    if (neof)
    {
	int i;
	int qual_size;
        /* should be header */
	if ((len <= 0) || (line[0] != '@'))
	    errAbort("Expecting header. Problem on line %d\n", lf->lineIx);
	if (!just_seq_qual)
	{
	    char *words[7];
	    int numWords;
	    numWords = chopByChar(line, ':', words, 6);
	    strcpy(fq->machine, words[0] + 1);
	    fq->flow_cell = sqlSigned(words[1]);
	    fq->tile = sqlSigned(words[2]);
	    fq->tile_x = sqlSigned(words[3]);
	    words[5] = chopPrefixAt(words[4], '#');
	    words[6] = chopPrefixAt(words[5], '/');
	    fq->tile_y = sqlSigned(words[4]);
	    fq->multiplex_index = sqlSigned(words[5]);
	    fq->pair_num = sqlSigned(words[6]);
	}
	/* read the sequence */
	fq->seq[0] = '\0';
	while ((neof = lineFileNext(lf, &line, &len)) && (len > 0) && (line[0] != '+'))
	    strcat(fq->seq, line);
	if (!neof)
	    errAbort("incomplete fastq file.  early EOF");
	fq->seq_size = strlen(fq->seq);
        /* at the point of the quality header.  who cares, read the quality */
	fq->qual[0] = '\0';
	while ((neof = lineFileNext(lf, &line, &len)) && (len > 0) && (line[0] != '@'))
	    strcat(fq->qual, line);
	if ((len > 0) && (line[0] == '@'))
	    lineFileReuse(lf);
	qual_size = strlen(fq->qual);
	if (qual_size != fq->seq_size)
	    errAbort("something wrong line %d.  sequence size (%d) should match quality size (%d)\n", 
		     lf->lineIx, fq->seq_size, qual_size);
	/* convert Illumina 1.3+ quals to Sanger */
	for (i = 0; i < qual_size; i++)
	    fq->qual[i] -= 64;
    }
    else 
	return FALSE;
    return TRUE;
}
Ejemplo n.º 2
0
static struct bed* regionsLoad(char* sectionsBed)
/* return a bed3 list of regions for times when -regions is used. */
/* If the filename has a comma then a number, then take just that line */
{
    struct bed* list = NULL;
    unsigned ix = 0;
    if (strchr(sectionsBed, ',')) {
        char* number_part = chopPrefixAt(sectionsBed, ',');
        if (number_part)
            ix = sqlUnsigned(number_part);
    }
    list = readAtLeastBed3(sectionsBed);
    if (list && (ix > 0)) {
        struct bed* single = slElementFromIx(list, ix - 1);
        if (single) {
            struct bed* rem;
            while ((rem = slPopHead(&list)) != single)
                bedFree(&rem);
            rem = single->next;
            bedFreeList(&rem);
            single->next = NULL;
            list = single;
        }
    }
    return list;
}
Ejemplo n.º 3
0
void bwtool_find_extrema(struct hash *options, char *favorites, char *regions, unsigned decimals, double fill, char *bigfile, char *tmp_dir, char *outputfile)
/* find local extrema */
{
    unsigned min_sep = sqlUnsigned((char *)hashOptionalVal(options, "min-sep", "0"));
    char *other_bigfile = (char *)hashOptionalVal(options, "against", NULL);
    enum ex_removal rem = get_removal(options);
    struct metaBig *main_big = metaBigOpen_check(bigfile, tmp_dir, regions);
    struct metaBig *other_big = NULL;
    struct extrema *main_list;
    struct extrema *other_list = NULL;
    struct extrema *ex;
    unsigned shift = 0;
    FILE *out;
    if (other_bigfile)
    {
	char *num;
	if (rem == no_removal)
	    errAbort("must specify either -maxima or -minima with -against");
	if (!strchr(other_bigfile, ','))
	    errAbort("must specify shift limit in -against option");
	num = chopPrefixAt(other_bigfile, ',');
	shift = sqlUnsigned(num);
	other_big = metaBigOpen_check(other_bigfile, tmp_dir, regions);
    }
    if (!main_big || (!other_big && other_bigfile))
	errAbort("could not open bigWig file");
    main_list = extrema_find(main_big, min_sep, rem);
    slReverse(&main_list);
    if (other_bigfile)
    {
	other_list = extrema_find(other_big, min_sep, rem);
	extrema_find_shifts(main_list, other_list, shift);
    }
    metaBigClose(&main_big);
    if (other_big)
	metaBigClose(&other_big);
    out = mustOpen(outputfile, "w");
    if (other_bigfile)
	for (ex = main_list; ex != NULL; ex = ex->next)
	    fprintf(out, "%s\t%d\t%d\t%d\t1000\t%c\n", ex->chrom, ex->chromStart, ex->chromStart+1, (int)ex->val, ex->min_or_max);
    else
    {
	slSort(&main_list, extrema_bed_cmp);
	for (ex = main_list; ex != NULL; ex = ex->next)
	    fprintf(out, "%s\t%d\t%d\t%0.*f\t1000\t%c\n", ex->chrom, ex->chromStart, ex->chromStart+1, decimals, ex->val, ex->min_or_max);
    }
    carefulClose(&out);
    extrema_free_list(&main_list);
}
int pickApartSeqName(char **pName)
/* Change /path/chr:start-end into chr and return start. */
{
char *name;
char *words[3];
int numWords = 0;
char *chrom, *range;
int skip = 0;
int start = 0;
if (!pName || ((name = *pName) == NULL))
    return 0;
numWords = chopByChar(name, ':', words, sizeof(words));
if (numWords == 3)
    skip = 1;
chrom = words[0 + skip];
*pName = chrom;
range = words[1 + skip];
if (numWords > 1)
    {
    chopPrefixAt(range, '-');
    start = sqlUnsigned(range);
    }
return start;
}
Ejemplo n.º 5
0
struct trackHubSettingSpec *trackHubSettingsForVersion(char *specHost, char *version)
/* Return list of settings with support level. Version can be version string or spec url */
{
struct htmlPage *page = NULL;
if (version == NULL)
    {
    version = trackHubVersionDefault(specHost, &page);
    if (version == NULL)
        errAbort("Can't get default spec from host %s", specHost);
    }

/* Retrieve specs from file url. 
 * Settings are the first text word within any <code> tag having class="level-" attribute.
 * The level represents the level of support for the setting (e.g. base, full, deprecated)
 * The support level ('level-*') is the class value of the * <code> tag.
 * E.g.  <code class="level-full">boxedConfig on</code> produces:
 *      setting=boxedConfig, class=full */

if (page == NULL)
    page = trackHubVersionSpecMustGet(specHost, version);
if (page == NULL)
    errAbort("Can't get settings spec for version %s from host %s", version, specHost);
verbose(5, "Found %d tags\n", slCount(page->tags));

struct trackHubSettingSpec *spec, *savedSpec;
struct hash *specHash = hashNew(0);
struct htmlTag *tag;
struct htmlAttribute *attr;
char buf[256];
for (tag = page->tags; tag != NULL; tag = tag->next)
    {
    if (differentWord(tag->name, "code"))
        continue;
    attr = tag->attributes;
    if (attr == NULL || differentString(attr->name, "class") || !startsWith("level-", attr->val))
                        continue;
    AllocVar(spec);
    int len = min(tag->next->start - tag->end, sizeof buf - 1);
    memcpy(buf, tag->end, len);
    buf[len] = 0;
    verbose(6, "Found spec: %s\n", buf);
    spec->name = cloneString(firstWordInLine(buf));
    if (spec->name == NULL || strlen(spec->name) == 0)
        {
        warn("ERROR: format problem with trackDbHub.html -- contact UCSC.");
        continue;
        }
    spec->level = cloneString(chopPrefixAt(attr->val, '-'));
    verbose(6, "spec: name=%s, level=%s\n", spec->name, spec->level);
    savedSpec = (struct trackHubSettingSpec *)hashFindVal(specHash, spec->name);
    if (savedSpec != NULL)
        verbose(6, "found spec %s level %s in hash\n", savedSpec->name, savedSpec->level);
    if (savedSpec == NULL)
        {
        hashAdd(specHash, spec->name, spec);
        verbose(6, "added spec %s at level %s\n", spec->name, spec->level);
        }
    else if (trackHubSettingLevelCmp(spec, savedSpec) > 0)
        {
        hashReplace(specHash, spec->name, spec);
        verbose(6, "replaced spec %s at level %s, was %s\n", 
            spec->name, spec->level, savedSpec->level);
        }
    }
struct hashEl *el, *list = hashElListHash(specHash);

int settingsCt = slCount(list);
verbose(5, "Found %d settings's\n", slCount(list));
if (settingsCt == 0)
    errAbort("Can't find hub setting info for version %s (host %s)."
              " Use -version to indicate a different version number or url.", version, specHost);

slSort(&list, hashElCmp);
struct trackHubSettingSpec *specs = NULL;
int baseCt = 0;
int requiredCt = 0;
int deprecatedCt = 0;
for (el = list; el != NULL; el = el->next)
    {
    if (sameString(((struct trackHubSettingSpec *)el->val)->level, "base"))
        baseCt++;
    else if (sameString(((struct trackHubSettingSpec *)el->val)->level, "required"))
        requiredCt++;
    else if (sameString(((struct trackHubSettingSpec *)el->val)->level, "deprecated"))
        deprecatedCt++;
    slAddHead(&specs, el->val);
    }
slReverse(&specs);
verbose(3, 
        "Found %d supported settings for this version (%d required, %d base, %d deprecated)\n",
                slCount(specs), requiredCt, baseCt, deprecatedCt);
return specs;
}
Ejemplo n.º 6
0
static void wigZoom( int argc, char *argv[] )
{
int i = 0;				/*	loop counter	*/
int lineCount = 0;			/*	lines from input file */
int validLines = 0;			/*	lines with actual data */
struct lineFile *lf;			/* for line file utilities	*/
unsigned long long beginWindow = 0;	/* from data input	*/
unsigned long long Offset = 0;		/* from data input	*/
unsigned long long previousOffset = 0;	/* for data missing detection */
char *line = (char *) NULL;		/* to receive data input line	*/
char *words[2];				/* to split data input line	*/
int dataCount = 0;

dataBlock = (struct dataPoint *)
	needMem( (size_t) (dataSpan * sizeof(struct dataPoint)));

/*	for each input data file	*/
for (i = 1; i < argc; ++i)
    {
    verbose(2, "translating file: %s\n", argv[i]);
    lineCount = 0;
    validLines = 0;
    dataCount = 0;
    lf = lineFileOpen(argv[i], TRUE);	/*	input file	*/
    beginWindow = 1;			/* input coords are 1 relative */
    while (lineFileNext(lf, &line, NULL))
	{
	int wordCount;
	char *val;
	char *valEnd;
	double dataValue;

	++lineCount;
	chopPrefixAt(line, '#'); /* ignore any comments starting with # */
	if (strlen(line) < 3)	/*	anything left on this line */
	    continue;		/*	no, go to next line	*/

	++validLines;
	wordCount = chopByWhite(line, words, 2);
	if (wordCount < 2)
	    errAbort("ERROR: Expecting at least two words at line %d, found %d",
		lineCount, wordCount);
	Offset = atoll(words[0]);
	if (Offset < previousOffset)
	    errAbort("ERROR: chrom positions not in order. previous: %llu is > current: %llu", previousOffset, Offset);
	val = words[1];
	dataValue = strtod(val, &valEnd);
	if ((*val == '\0') || (*valEnd != '\0'))
	    errAbort("Not a valid float at line %d: %s\n", lineCount, words[1]);
	if (Offset < 1)
	    errAbort("Illegal offset: %llu at line %d, dataValue: %g", Offset, 
		    lineCount, dataValue);
	if (Offset > (beginWindow + dataSpan))
	    {
		processBlock(beginWindow, dataBlock, dataCount);
		while ((beginWindow + dataSpan) < Offset)
			beginWindow += dataSpan;
		dataCount = 0;
	    }
	dataBlock[dataCount].offset = Offset;
	dataBlock[dataCount++].value = dataValue;
	previousOffset = Offset;
	}
    }
}
Ejemplo n.º 7
0
/*	convolve() - perform the task on the input data
 *	I would like to rearrange this business here, and instead of
 *	reading in the data and leaving it in the hash for all other
 *	routines to work with, it would be best to get it immediately
 *	into an array.  That makes the work of the other routines much
 *	easier.
 */
static void convolve(int argc, char *argv[])
{
int i;
struct lineFile *lf;			/* for line file utilities	*/

for (i = 1; i < argc; ++i)
    {
    int lineCount = 0;			/* counting input lines	*/
    char *line = (char *)NULL;		/* to receive data input line	*/
    char *words[128];			/* to split data input line	*/
    int wordCount = 0;			/* result of split	*/
    struct hash *histo0;	/*	first histogram	*/
    struct hash *histo1;	/*	second histogram	*/
    int medianBin0 = 0;		/*	bin at median for histo0	*/
    double medianLog_2 = -500.0;	/*	log at median	*/
    int bin = 0;		/*	0 to N-1 for N bins	*/
    int convolutions = 0;	/*	loop counter for # of convolutions */

    histo0 = newHash(0);

    lf = lineFileOpen(argv[i], TRUE);	/*	input file	*/
    verbose(1, "Processing %s\n", argv[1]);
    while (lineFileNext(lf, &line, NULL))
	{
	int j;			/*	loop counter over words	*/
	int inputValuesCount = 0;
	struct histoGram *hg;	/*	an allocated hash element	*/

	++lineCount;
	chopPrefixAt(line, '#'); /* ignore any comments starting with # */
	if (strlen(line) < 3)	/*	anything left on this line ? */
	    continue;		/*	no, go to next line	*/
	wordCount = chopByWhite(line, words, 128);
	if (wordCount < 1)
warn("Expecting at least a word at line %d, file: %s, found %d words",
	lineCount, argv[i], wordCount);
	if (wordCount == 128)
warn("May have more than 128 values at line %d, file: %s", lineCount, argv[i]);

	verbose(2, "Input data read from file: %s\n", argv[i]);
	for (j = 0; j < wordCount; ++j)
	    {
	    char binName[128];
	    double dataValue;
	    double probInput;
	    double log_2;
	    dataValue = strtod(words[j], NULL);
	    ++inputValuesCount;
	    if (logs)
		{
		log_2 = dataValue;
		probInput = pow(2.0,log_2);
		} else {
		if (dataValue > 0.0)
		    {
		    log_2 = log2(dataValue);
		    probInput = dataValue;
		    } else {
		    log_2 = -500.0;	/*	arbitrary limit	*/
		    probInput = pow(2.0,log_2);
		    }
		}
	    if (log_2 > medianLog_2)
		{
		medianLog_2 = log_2;
		medianBin0 = bin;
		}
	    verbose(2, "bin %d: %g %0.5g\n",
		    inputValuesCount-1, probInput, log_2);

	    AllocVar(hg);	/*	the histogram element	*/
	    hg->bin = bin;
	    hg->prob = probInput;
	    hg->log_2 = log_2;
	    snprintf(binName, sizeof(binName), "%d", hg->bin);
	    hashAdd(histo0, binName, hg);

	    ++bin;
	    }	/*	for each word on an input line	*/
	}	/*	for each line in a file	*/

	/*	file read complete, echo input	*/
	if (verboseLevel() >= 2)
	    printHistogram(histo0, medianBin0);

	/*	perform convolutions to specified count
	 *	the iteration does histo0 with itself to produce histo1
	 *	Then histo0 is freed and histo1 copied to it for the
	 *	next loop.
	 */
	for (convolutions = 0; convolutions < convolve_count; ++convolutions)
	    {
	    int medianBin;
	    histo1 = newHash(0);
	    medianBin = iteration(histo0, histo1);
	    if (verboseLevel() >= 2)
		printHistogram(histo1, medianBin);
	    freeHashAndVals(&histo0);
	    histo0 = histo1;
	    }

    }		/*	for each input file	*/
}	/*	convolve()	*/