Example #1
0
static int
get_capture_config(const tchar *config_file, struct capture_config *config,
		   int add_flags, const tchar *fs_source_path)
{
	int ret;
	tchar *tmp_config_file = NULL;

	memset(config, 0, sizeof(*config));

	/* For WIMBoot capture, check for default capture configuration file
	 * unless one was explicitly specified.  */
	if (!config_file && (add_flags & WIMLIB_ADD_FLAG_WIMBOOT)) {

		/* XXX: Handle loading file correctly when in NTFS volume.  */

		size_t len = tstrlen(fs_source_path) +
			     tstrlen(wimboot_cfgfile);
		tmp_config_file = MALLOC((len + 1) * sizeof(tchar));
		struct stat st;

		tsprintf(tmp_config_file, T("%"TS"%"TS),
			 fs_source_path, wimboot_cfgfile);
		if (!tstat(tmp_config_file, &st)) {
			config_file = tmp_config_file;
			add_flags &= ~WIMLIB_ADD_FLAG_WINCONFIG;
		} else {
			WARNING("\"%"TS"\" does not exist.\n"
				"          Using default capture configuration!",
				tmp_config_file);
		}
	}

	if (add_flags & WIMLIB_ADD_FLAG_WINCONFIG) {
		/* Use Windows default.  */
		if (config_file)
			return WIMLIB_ERR_INVALID_PARAM;
		ret = read_capture_config(T("wincfg"), wincfg,
					  sizeof(wincfg) - 1, config);
	} else if (config_file) {
		/* Use the specified configuration file.  */
		ret = read_capture_config(config_file, NULL, 0, config);
	} else {
		/* ... Or don't use any configuration file at all.  No files
		 * will be excluded from capture, all files will be compressed,
		 * etc.  */
		ret = 0;
	}
	FREE(tmp_config_file);
	return ret;
}
Example #2
0
// ----------------------------------------------------------------------------
// fefind
// Tries to find first free file name with specified extension (adding numbers
// to the name until 15).
// ----------------------------------------------------------------------------
TCHAR *fefind(TCHAR *fname, TCHAR *ext, TCHAR *newname)
{
  TCHAR name[MAXS+1], newext[MAXS+1], *s;
  struct _stat fst;
  int ii;

  xstrncpy(name, fname, MAXS);
  if ((s = strrchr(name, T('.'))) != NULL) *s = T('\0');

  xstrncpy(newname, name, MAXS);
  xstrncat(newname, ext, MAXS);
  for (ii = 0; ii <= 15; ii++) { // append 1..15 to output file name
    if (tstat(newname, &fst) < 0) break; // file not found
    snprintf(newext, MAXS, T("-%d%s"), ii+1, ext);
    xstrncpy(newname, name, MAXS);
    xstrncat(newname, newext, MAXS);
  }
  if (ii > 15) {
    xstrncpy(newname, name, MAXS);
    xstrncat(newname, ext, MAXS);
    return NULL;
  }
  return newname;
} /* fefind */
float runOnePathway(char *line, pathwaySubject **sub, lrModel *model,paParams *pp, int writeFile)
{
	char pathwayName[1000], pathwayURL[1000], gene[MAX_LOCI][50], scoreFileName[1000], outputFileName[1000], thisGene[50];
	FILE *fs, *fo;
	int nGene, missing[MAX_LOCI], s, g, n[2], i, cc, c,t,nValidGenes;
	float sigma_x[2], sigma_x2[2], mean[2], var[2], SLP, SE, tval, s2, score;
	double p, pathway_p;
	fo = 0;
	if (sscanf(line, "%s %s %[^\n]", pathwayName, pathwayURL, rest) != 3)
		return 0;
	for (nGene = 0; strcpy(line, rest), *rest = '\0', sscanf(line, "%s %[^\n]", gene[nGene], rest) >= 1; ++nGene)
		;
	for (s = 0; s < pp->nSub; ++s)
		totScore[s] = 0;
	nValidGenes = 0;
	for (g = 0; g < nGene; ++g)
	{
		if (pp->scoreTableFile)
		{
			std::map<std::string, FILEOFFSET>::const_iterator geneIter = pp->geneIndex.find(gene[g]);
			if (geneIter == pp->geneIndex.end())
				missing[g] = 1;
			else
			{
				fseek(pp->scoreTableFile, geneIter->second, SEEK_SET);
				assert(fscanf(pp->scoreTableFile, "%s", thisGene) == 1);
				if (strcmp(thisGene, gene[g]))
				{
#if 0
					dcerror(1, "Problem finding %s in %s. Tried to seek to position %" PRId64 " but got this: %s\n",
						pp->scoreTableFileName, gene[g], geneIter->second, thisGene);
#else
					dcerror(1, "Problem finding %s in %s. Tried to seek to position %uld but got this: %s\n",
						pp->scoreTableFileName, gene[g], (unsigned long)geneIter->second, thisGene);
#endif
					exit(1);
				}
				missing[g] = 0;
				++nValidGenes;
				for (s = 0; s < pp->nSub; ++s)
				{
					if (fscanf(pp->scoreTableFile, "%f", &sub[s]->score[g]) != 1)
					{
						dcerror(1, "Not enough entries in %s for gene %s\n", pp->scoreTableFileName, gene[g]); exit(1);
					}
					totScore[s] += sub[s]->score[g];
				}
			}
		}
		else
		{
			sprintf(scoreFileName, "%s%s%s", pp->scoreFilePrefix, gene[g], pp->scoreFileSuffix);
			fs = fopen(scoreFileName, "r");
			if (fs == 0)
				missing[g] = 1;
			else
			{
				missing[g] = 0;
				++nValidGenes;
				for (s = 0; s < pp->nSub; ++s)
				{
					if (!fgets(line, 1000, fs))
					{
						dcerror(1, "Not enough lines in scores file %s\n", scoreFileName); exit(1);
					}
					if (sscanf(line, "%s %d %f", sub[s]->id, &sub[s]->cc, &sub[s]->score[g]) != 3)
					{
						dcerror(1, "Not enough entries on this line in scores file %s:\n%s\n", scoreFileName, line); exit(1);
					}
					totScore[s] += sub[s]->score[g];
				}
				fclose(fs);
			}

		}
	}
	for (s = 0; s < pp->nSub; ++s)
		totScore[s] /= nValidGenes; // use average score not total score
	if (writeFile)
	{
		if (pp->summaryOutputFile != 0)
			fprintf(pp->summaryOutputFile, "%s\t%s\t", pathwayName, pathwayURL);
		sprintf(line, "%s%s%s", pp->outputFilePrefix, pathwayName, pp->outputFileSuffix);
		fo = fopen(line, "w");
		if (fo == 0)
		{
			dcerror(2, "Could not open output file %s\n", line);
		}
		else
			fprintf(fo, "%s\n%s\n\n", pathwayName, pathwayURL);
	}

	if (pp->do_ttest)
	{
	for (i = 0; i < 2; ++i)
		sigma_x[i] = sigma_x2[i] = n[i] = 0;
	for (s = 0; s < pp->nSub; ++s)
	{
		cc = sub[s]->cc;
		++n[cc];
		score = totScore[s];
		sigma_x[cc] += score;
		sigma_x2[cc] += score * score;
	}
	for (i = 0; i < 2; ++i)
	{
		var[i] = (sigma_x2[i] - sigma_x[i] * sigma_x[i] / n[i]) / (n[i] - 1);
		mean[i] = sigma_x[i] / n[i];
	}

	s2 = ((n[0] - 1)*var[0] + (n[1] - 1)*var[1]) / (n[0] + n[1] - 2);
	SE = sqrt(s2*(1 / (float)n[0] + 1 / (float)n[1]));
	if (SE == 0)
		tval = 0;
	else
		tval = (mean[1] - mean[0]) / SE;
	pathway_p = tstat(tval, n[0] + n[1] - 2.0) / 2; // one-tailed
	SLP = log10(2 * pathway_p)*(mean[0] >= mean[1] ? 1 : -1);
	if (fo != NULL)
	{
		fprintf(fo, "             Controls  Cases     \n"
			"N            %9d %9d\n"
			"Mean score   %9.3f %9.3f\n"
			"t (%d df) = %6.3f\n"
			"p = %10.8f\n"
			"SLP = %8.2f (signed log10(p), positive if cases score higher than controls)\n",
			n[0], n[1], mean[0], mean[1], n[0] + n[1] - 2, tval, 2 * pathway_p, SLP);
		if (pp->summaryOutputFile != 0)
			fprintf(pp->summaryOutputFile, "%f\t", SLP);
		if (SLP > pp->geneLevelOutputThreshold)
		{
			fprintf(fo, "\n\nSLPs for individual genes above threshold:\n");
			for (g = 0; g < nGene; ++g)
			{
				if (missing[g] == 1)
					continue;
				for (i = 0; i < 2; ++i)
					sigma_x[i] = sigma_x2[i] = n[i] = 0;
				for (s = 0; s < pp->nSub; ++s)
				{
					cc = sub[s]->cc;
					++n[cc];
					score = sub[s]->score[g];
					sigma_x[cc] += score;
					sigma_x2[cc] += score * score;
				}
				for (i = 0; i < 2; ++i)
				{
					var[i] = (sigma_x2[i] - sigma_x[i] * sigma_x[i] / n[i]) / (n[i] - 1);
					mean[i] = sigma_x[i] / n[i];
				}

				s2 = ((n[0] - 1)*var[0] + (n[1] - 1)*var[1]) / (n[0] + n[1] - 2);
				SE = sqrt(s2*(1 / (float)n[0] + 1 / (float)n[1]));
				if (SE == 0)
					tval = 0;
				else
					tval = (mean[1] - mean[0]) / SE;
				p = tstat(tval, n[0] + n[1] - 2.0) / 2; // one-tailed
				SLP = log10(2 * p)*(mean[0] >= mean[1] ? 1 : -1);

				if (SLP > pp->geneLevelOutputThreshold)
					fprintf(fo, "%s %8.2f\n", gene[g], SLP);
			}
			fprintf(fo, "\n\nList of genes for which no score file was found:\n");
			for (g = 0; g < nGene; ++g)
				if (missing[g] == 1)
					fprintf(fo, "%s\n", gene[g]);
		}

	}
	if (mean[0] > mean[1])
		pathway_p = 1.0 - pathway_p;
	}
	if(pp->do_lrtest||pp->numTestFiles>0)
		fillModelWithVars(model,pp->nSub,pp,pp->scoreCol);
	if (pp->do_lrtest)
	{
		SLP = do_onetailed_LRT(fo, model, pp);
		if (writeFile &&pp->summaryOutputFile != 0)
			fprintf(pp->summaryOutputFile, "%f\t", SLP);
	}
	if (pp->numTestFiles > 0)
	{
		for (t = 0; t < pp->numTestFiles; ++t)
		{
			SLP = runTestFile(fo, pp->testFiles[t].fn, model, pp);
			if (writeFile &&pp->summaryOutputFile != 0)
				fprintf(pp->summaryOutputFile, "%f\t", SLP);
		}
	}
	if (fo)
		fclose(fo);
	if (writeFile && pp->summaryOutputFile != 0)
		fprintf(pp->summaryOutputFile, "\n");
	return SLP;
}
double do_score_onetailed_ttest(FILE *fo, float *score, subject **sub, int nsub, par_info *pi, sa_par_info *spi, float cc_freq[2][MAX_LOCI], float cc_count[2][MAX_LOCI], int max_cc[2], float *weight, float *missing, int *rarer)
{
	int s, i, n[2], cc, pl, l, j;
	float sigma_x[2], sigma_x2[2], mean[2], var[2], tval, SE, s2, rfreq, fscore, z, total_score;
	double p, pz;
#ifndef USEMLP
	float SLP;
#endif
	for (i = 0; i<2; ++i)
		sigma_x[i] = sigma_x2[i] = n[i] = 0;
	for (s = 0; s<nsub; ++s)
	{
		cc = sub[s]->cc;
		++n[cc];
		sigma_x[cc] += score[s];
		sigma_x2[cc] += score[s] * score[s];
	}
	for (i = 0; i<2; ++i)
	{
		if (spi->use_cc_freqs[i])
		{
			total_score = 0;
			n[i] = max_cc[i];
			var[i] = 0;
			for (pl = 0; pl<pi->n_loci_to_use; ++pl)
			{
				float tempvar = 0;
				float ngen[3]; // number of typed subjects who are AA,AB,BB
				l = pi->loci_to_use[pl];
				sigma_x2[i] = 0;
				sigma_x[i] = 0;
				if (rarer[l] == 2)
					rfreq = cc_freq[i][l];
				// assume supplied frequency is of alt allele, i.e. allele 2
				// but score will be added to by rarer allele, even if this is allele 1
				else
					rfreq = 1 - cc_freq[i][l];
				// as actual counts missing, assume HWE
				ngen[0] = (1 - rfreq)*(1 - rfreq)*cc_count[i][l];
				ngen[1] = 2 * rfreq*(1 - rfreq)*cc_count[i][l];
				ngen[2] = rfreq*rfreq*cc_count[i][l];
				fscore = weight[l]; // average score for each subject for this locus
				sigma_x[i] += ngen[1] * fscore; // AB
				sigma_x2[i] += ngen[1] * fscore*fscore;
				sigma_x[i] += ngen[2] * 2 * fscore; // BB
				sigma_x2[i] += ngen[2] * 4 * fscore*fscore;
				fscore = missing[l]; // as if missing scores were also independent
				sigma_x[i] += (max_cc[i] - cc_count[i][l])*fscore;
				sigma_x2[i] += (max_cc[i] - cc_count[i][l])*fscore*fscore;
				tempvar = (sigma_x2[i] - sigma_x[i] * sigma_x[i] / n[i]) / (n[i] - 1);
				var[i] += tempvar; // add variances due to each marker
				total_score += sigma_x[i];
			}
			mean[i] = total_score / n[i];
		}
		else
		{
			var[i] = (sigma_x2[i] - sigma_x[i] * sigma_x[i] / n[i]) / (n[i] - 1);
			mean[i] = sigma_x[i] / n[i];
		}
	}
	s2 = ((n[0] - 1)*var[0] + (n[1] - 1)*var[1]) / (n[0] + n[1] - 2);
	SE = sqrt(s2*(1 / (float)n[0] + 1 / (float)n[1]));
	if (SE == 0)
		tval = 0;
	else
		tval = (mean[1] - mean[0]) / SE;
	p = tstat(tval, n[0] + n[1] - 2.0) / 2; // one-tailed
	SLP = log10(2 * p)*(mean[0] >= mean[1] ? 1 : -1);
	if (fo != NULL)
		fprintf(fo, "             Controls  Cases     \n"
			"N            %9d %9d\n"
			"Mean score   %9.3f %9.3f\n"
			"SD           %9.3f %9.3f\n"
			"t (%d df) = %6.3f\n"
			"p = %10.8f\n"
			"SLP = %8.2f (signed log10(p), positive if cases score higher than controls)\n",
			n[0], n[1], mean[0], mean[1], sqrt(var[0]), sqrt(var[1]), n[0] + n[1] - 2, tval, 2 * p, SLP);
	// I am writing SD because it will allow me to combine statistics later
	if (mean[0]>mean[1])
		p = 1.0 - p;
#ifdef ALLOWUNEQUALVARIANCES
	SE = sqrt(var[0] / n[0] + var[1] / n[1]);
	if (SE == 0)
		z = 0;
	else
		z = (mean[1] - mean[0]) / SE;
	pz = one_tailed_p_norm(z);
	fprintf(fo, "Comparison of means not assuming equal variances, standardised normal deviate z = %5.3f\np = %10.8f (one-tailed)\n",
		z, pz);
	if (mean[0] >= mean[1])
		SLP = log10(2 * (1 - pz));
	else
		SLP = -log10(2 * pz);
	fprintf(fo, "SLP = %8.2f (signed log10(2p), positive if cases score higher than controls)\n",
		SLP);

#endif
	return p; // return one-tailed p
}