Exemple #1
0
void makeConfigFromTabList(char *input, char *output, boolean useTarget)
/* makeConfigFromFileList - Create config file for hgBedsToBedExps from list of file/cell/ab
     or file/cell/ab/target. */
{
struct lineFile *lf = lineFileOpen(input, TRUE);
char *row[4];
FILE *f = mustOpen(output, "w");

while (lineFileRow(lf, row))
    {
    char *fileName = row[0];
    char *cell = row[1];
    char *factor = row[2];
    verbose(3, "%s\n", fileName);
    if (useTarget)
        // 4 column input file -- output target cell+treatment+factor
        fprintf(f, "%s\t%s+%s\t", row[3], cell, factor);
    else
        // antibody cell+treatment
        fprintf(f, "%s\t%s", factor, cell);
    fprintf(f, "\t%s\t", cellAbbreviation(cell));
    fprintf(f, "file\t%d\t", scoreCol-1);
    fprintf(f, "%g\t", calcNormScoreFactor(fileName, scoreCol-1));
    fprintf(f, "%s\n", fileName);
    }
lineFileClose(&lf);
carefulClose(&f);
}
Exemple #2
0
void regClusterMakeTableOfTables(char *type, char *input, char *output)
/* regClusterMakeTableOfTables - Make up a table of tables for regCluster program. */
{
FILE *f = mustOpen(output, "w");
struct slName *in, *inList = readAllLines(input);
/* Generally we'll have a bunch of file names that all start and/or end with the same
 * thing.  This loop will isolate out the bits that vary, and then call a type-specific
 * routine to output the metadata columns from the middle parts. */
int commonPrefix = commonPrefixSize(inList);
int commonSuffix = commonSuffixSize(inList);
uglyf("regClusterMakeTableOfTables(type=%s, input=%s, output=%s)\n", type, input, output);
int scoreIx = scoreColIx - 1;
for (in = inList; in != NULL; in = in->next)
    {
    verbose(2, "Processing %s\n", in->name);
    fprintf(f, "%s\t0\t1\t2\t%d\t", in->name, scoreIx);
    fprintf(f, "%g", calcNormScoreFactor(in->name, scoreIx));
    char *s = in->name;
    int len = strlen(s);
    char *midString = cloneStringZ(s+commonPrefix, len - commonPrefix - commonSuffix);
    if (sameString(type, "uw01"))
	uw01MetaOut(f, midString);
    else if (sameString(type, "uw02"))
	uw02MetaOut(f, midString);
    else if (sameString(type, "ans01"))
	ans01MetaOut(f, midString);
    else if (sameString(type, "ans02"))
        ans02MetaOut(f, midString);
    else if (sameString(type, "enh01"))
        enh01MetaOut(f, midString);
    else if (sameString(type, "awgDnase01"))
        awgDnase01MetaOut(f, midString);
    else
	errAbort("Unknown type '%s' in first command line parameter.", type);
    freez(&midString);
    fprintf(f, "\n");
    }
carefulClose(&f);
}
Exemple #3
0
void makeConfigFromFileList(char *input, char *output)
/* makeConfigFromFileList - Create config file for hgBedsToBedExps from list of files.. */
{
FILE *f = mustOpen(output, "w");
struct slName *in, *inList = readAllLines(input);
int commonPrefix = commonPrefixSize(inList);
int commonSuffix = commonSuffixSize(inList);
for (in = inList; in != NULL; in = in->next)
    {
    char *s = in->name;
    int len = strlen(s);
    char *midString = cloneStringZ(s+commonPrefix, len - commonPrefix - commonSuffix);
    char *factor, *cell;
    camelParseTwo(midString, &cell, &factor);
    fprintf(f, "%s\t%s\t", factor, cell);
    fprintf(f, "%s\t", cellAbbreviation(cell));
    fprintf(f, "file\t%d\t", scoreCol-1);
    fprintf(f, "%g\t", calcNormScoreFactor(in->name, scoreCol-1));
    fprintf(f, "%s\n", in->name);
    }
carefulClose(&f);
}
Exemple #4
0
void makeConfigFromEncodeList(char *input, char *output)
/* create config file for hgBedsToBedExps from tab-separated file of format
 *         <relDate> <fileName> <fileSize> <submitDate> <metadata> */
{
FILE *f = mustOpen(output, "w");
struct lineFile *lf = lineFileOpen(input, TRUE);
char *line;

while (lineFileNextReal(lf, &line))
    {
    /* Parse out line into major components. */
    char *releaseDate = nextWord(&line);
    char *fileName = nextWord(&line);
    char *fileSize = nextWord(&line);
    char *submitDate = nextWord(&line);
    char *metadata = trimSpaces(line);
    if (isEmpty(metadata))
        errAbort("line %d of %s is truncated", lf->lineIx, lf->fileName);

    verbose(2, "releaseDate=%s; fileName=%s; fileSize=%s; submitDate=%s; %s\n", 
    	releaseDate, fileName, fileSize, submitDate, metadata);


    /* Loop through metadata looking for cell and antibody.  Metadata
     * is in format this=that; that=two words; that=whatever */
    char *cell = NULL, *antibody = NULL;
    for (;;)
        {
	/* Find terminating semicolon if any replace it with zero, and
	 * note position for next time around loop. */
	metadata = skipLeadingSpaces(metadata);
	if (isEmpty(metadata))
	    break;
	char *semi = strchr(metadata, ';');
	if (semi != NULL)
	   *semi++ = 0;

	/* Parse out name/value pair. */
	char *name = metadata;
	char *value = strchr(metadata, '=');
	if (value == NULL)
	   errAbort("Missing '=' in metadata after tag %s in line %d of %s", 
	   	name, lf->lineIx, lf->fileName);
	*value++ = 0;
	name = trimSpaces(name);
	value = trimSpaces(value);

	/* Look for our tags. */
	if (sameString(name, "cell"))
	    cell = value;
	else if (sameString(name, "antibody"))
	    antibody = value;

	metadata = semi;
	}
    if (cell == NULL) 
        errAbort("No cell in metadata line %d of %s", lf->lineIx, lf->fileName);
    if (antibody == NULL) 
        errAbort("No antibody in metadata line %d of %s", lf->lineIx, lf->fileName);

    fprintf(f, "%s\t%s\t", antibody, cell);
    fprintf(f, "%s\t", cellAbbreviation(cell));
    fprintf(f, "file\t%d\t", scoreCol-1);
    fprintf(f, "%g", calcNormScoreFactor(fileName, scoreCol-1));
    fprintf(f, "\t%s\n", fileName);
    }
carefulClose(&f);
}