void appendNewExperiment(char *file, struct hash *bedHash, struct hash *pslHash, struct expRecord **erList, int expNum)
{
struct expRecord *er = NULL;
char buff[256];
int count = 0;
struct bed *bed = NULL;
struct stanMad *smList = NULL, *sm = NULL;
smList = stanMadLoadAll(file);
er = createExpRec(file, expNum);
slAddHead(erList, er);
for(sm = smList; sm != NULL; sm = sm->next)
    {
    count++;
    snprintf(buff,sizeof(buff), "%d-%s-%d", sm->clid, sm->prow, sm->pcol);
    bed = hashFindVal(bedHash, buff);
    if(bed != NULL)
	{
	bed->expIds[expNum] = expNum;
	bed->expScores[expNum] = safeLog2(sm->rat2n);
	}
    else
	{
	if(sm->clid != 0) 
	    {
	    struct psl *psl = NULL;
	    snprintf(buff,sizeof(buff), "%d", sm->clid);
	    psl = hashFindVal(pslHash, buff);
	    if(psl != NULL)
		errAbort("Counldn't find hash entry at line %d in %s for %s, %d, %d.\n", count, file, buff, sm->clid, sm->spot);
	    }	
	}
    }

stanMadFreeList(&smList);
}
예제 #2
0
void convertIntensitiesToRatios(struct bed *bedList)
/* for each bed calculate the median intensity not including missing 
data and calcute scores as ratio of each intensity to median 
intensity. */
{
FILE *mediansOut = NULL, *valuesOut = NULL;
float median = 0;
struct idVal *statList = NULL, *stat = NULL;
struct bed *bed = NULL;
int i = 0;
#ifdef DEBUG
valuesOut = mustOpen("values.debug", "w");
mediansOut = mustOpen("medians.debug", "w");
#endif
for(bed = bedList; bed != NULL; bed = bed->next)
    {
    float ratio = 0;
    float logRatio = 0;
#ifdef DEBUG
    fprintf(mediansOut, "%s", bed->name);
    fprintf(valuesOut, "%s", bed->name);
#endif
    /* find the median value by sorting */
    for(i=0; i<bed->expCount; i++)
	{
	if(bed->expScores[i] != missingVal)
	    {
	    AllocVar(stat);
	    stat->val = bed->expScores[i];
	    slAddHead(&statList, stat);

	    }
	}
    median = statMedian(&statList);
#ifdef DEBUG
    for(stat = statList; stat != NULL; stat = stat->next)
	{
	fprintf(valuesOut, "\t%f", stat->val);
	}
    fprintf(valuesOut, "\n");
    fprintf(mediansOut, "\t%f\n", median);
#endif
    statFreeList(&statList);
    for(i=0; i<bed->expCount; i++)
	{
	if(bed->expScores[i] != missingVal)
	    {
	    ratio = bed->expScores[i] / median;
	    logRatio = safeLog2(ratio);
	    bed->expScores[i] = logRatio;
	    fprintf(scores, "%f\n", logRatio);
	    }
	}
    }
#ifdef DEBUG
carefulClose(&mediansOut);
carefulClose(&valuesOut);
#endif
}
예제 #3
0
void affyPslAndAtlasToBedNew(char *pslFile, char *atlasFile, char *bedOut, 
	char *expRecOut)
/** Main function that does all the work for new-style*/
{
struct lineFile *lf = lineFileOpen(atlasFile, TRUE);
char *line, *name;
int i, wordCount, expCount;
char **row;
double *data, median;
double invMedian, ratio, logRatio;
char *affyId;
struct hash *hash = newHash(17);
struct psl *psl;
struct bed *bed;
FILE *f = NULL;
int dataCount = 0, pslCount = 0, bedCount = 0;
int minExpVal = 20;

/* Open Atlas file and use first line to create experiment table. */
if (!lineFileNextReal(lf, &line))
    errAbort("%s is empty", lf->fileName);
if (startsWith("Affy", line))
    line += 4;
if (line[0] != '\t')
    errAbort("%s doesn't seem to be a new format atlas file", lf->fileName);
expCount = lineToExp(line+1, expRecOut);
if (expCount <= 0)
    errAbort("No experiments in %s it seems", lf->fileName);
warn("%d experiments\n", expCount);

f = mustOpen(bedOut, "w");

/* Build up a hash keyed by affyID with an int array of data
 * for value.  Do output in short case. */
AllocArray(row, expCount);
while (lineFileNextReal(lf, &line))
    {
    affyId = nextWord(&line);

    wordCount = chopByWhite(line, row, expCount);
    if (wordCount != expCount)
        errAbort("Expecting %d data points, got %d line %d of %s", 
		expCount, wordCount, lf->lineIx, lf->fileName);
    if (hashLookup(hash, affyId))
	{
        warn("Duplicate %s, skipping all but first.", affyId);
	continue;
	}
    AllocArray(data, expCount);
    for (i=0; i<expCount; ++i)
	{
        data[i] = atof(row[i]);
        if (data[i] < minExpVal)
	    data[i] = minExpVal;
	}
    median = findPositiveMedian(data, expCount, minExpVal);
    if (median >= 0)
	{
	invMedian = 1.0/median;
	for (i=0; i<expCount; ++i)
	    {
	    double val = data[i];
	    val = safeLog2(invMedian*val);
	    data[i] = val;
	    }
	if (shortOut)
	    shortDataOut(f, affyId, expCount, data);
	else
	    hashAdd(hash, affyId, data);
        }
    data = NULL;
    ++dataCount;
    }
lineFileClose(&lf);
warn("%d rows of expression data\n", dataCount);

/* Stream through psl file, converting it to bed with expression data. */
if (!shortOut)
    {
    lf = pslFileOpen(pslFile);
    while ((psl = pslNext(lf)) != NULL)
	{
	++pslCount;
        /* get probe id from sequence name */
        name=parseNameFromHgc(psl->qName);
	data = hashFindVal(hash, name);
        if (data != NULL)
	    {
            struct bed *bed = bedFromPsl(psl);
	    bed->expCount = expCount;
	    AllocArray(bed->expIds, expCount);
	    AllocArray(bed->expScores, expCount);
	    for (i=0; i<expCount; ++i)
		{
		bed->expScores[i] = data[i];
		bed->expIds[i] = i;
		}
	    bedTabOutN(bed, 15, f);
	    ++bedCount;

	    bedFree(&bed);
	    }
	pslFree(&psl);
	}
    warn("%d records in %s", pslCount, pslFile);
    warn("%d records written to %s", bedCount, bedOut);
    }
lineFileClose(&lf);
carefulClose(&f);
}