static int get_capture_config(const tchar *config_file, struct capture_config *config, int add_flags, const tchar *fs_source_path) { int ret; tchar *tmp_config_file = NULL; memset(config, 0, sizeof(*config)); /* For WIMBoot capture, check for default capture configuration file * unless one was explicitly specified. */ if (!config_file && (add_flags & WIMLIB_ADD_FLAG_WIMBOOT)) { /* XXX: Handle loading file correctly when in NTFS volume. */ size_t len = tstrlen(fs_source_path) + tstrlen(wimboot_cfgfile); tmp_config_file = MALLOC((len + 1) * sizeof(tchar)); struct stat st; tsprintf(tmp_config_file, T("%"TS"%"TS), fs_source_path, wimboot_cfgfile); if (!tstat(tmp_config_file, &st)) { config_file = tmp_config_file; add_flags &= ~WIMLIB_ADD_FLAG_WINCONFIG; } else { WARNING("\"%"TS"\" does not exist.\n" " Using default capture configuration!", tmp_config_file); } } if (add_flags & WIMLIB_ADD_FLAG_WINCONFIG) { /* Use Windows default. */ if (config_file) return WIMLIB_ERR_INVALID_PARAM; ret = read_capture_config(T("wincfg"), wincfg, sizeof(wincfg) - 1, config); } else if (config_file) { /* Use the specified configuration file. */ ret = read_capture_config(config_file, NULL, 0, config); } else { /* ... Or don't use any configuration file at all. No files * will be excluded from capture, all files will be compressed, * etc. */ ret = 0; } FREE(tmp_config_file); return ret; }
// ---------------------------------------------------------------------------- // fefind // Tries to find first free file name with specified extension (adding numbers // to the name until 15). // ---------------------------------------------------------------------------- TCHAR *fefind(TCHAR *fname, TCHAR *ext, TCHAR *newname) { TCHAR name[MAXS+1], newext[MAXS+1], *s; struct _stat fst; int ii; xstrncpy(name, fname, MAXS); if ((s = strrchr(name, T('.'))) != NULL) *s = T('\0'); xstrncpy(newname, name, MAXS); xstrncat(newname, ext, MAXS); for (ii = 0; ii <= 15; ii++) { // append 1..15 to output file name if (tstat(newname, &fst) < 0) break; // file not found snprintf(newext, MAXS, T("-%d%s"), ii+1, ext); xstrncpy(newname, name, MAXS); xstrncat(newname, newext, MAXS); } if (ii > 15) { xstrncpy(newname, name, MAXS); xstrncat(newname, ext, MAXS); return NULL; } return newname; } /* fefind */
float runOnePathway(char *line, pathwaySubject **sub, lrModel *model,paParams *pp, int writeFile) { char pathwayName[1000], pathwayURL[1000], gene[MAX_LOCI][50], scoreFileName[1000], outputFileName[1000], thisGene[50]; FILE *fs, *fo; int nGene, missing[MAX_LOCI], s, g, n[2], i, cc, c,t,nValidGenes; float sigma_x[2], sigma_x2[2], mean[2], var[2], SLP, SE, tval, s2, score; double p, pathway_p; fo = 0; if (sscanf(line, "%s %s %[^\n]", pathwayName, pathwayURL, rest) != 3) return 0; for (nGene = 0; strcpy(line, rest), *rest = '\0', sscanf(line, "%s %[^\n]", gene[nGene], rest) >= 1; ++nGene) ; for (s = 0; s < pp->nSub; ++s) totScore[s] = 0; nValidGenes = 0; for (g = 0; g < nGene; ++g) { if (pp->scoreTableFile) { std::map<std::string, FILEOFFSET>::const_iterator geneIter = pp->geneIndex.find(gene[g]); if (geneIter == pp->geneIndex.end()) missing[g] = 1; else { fseek(pp->scoreTableFile, geneIter->second, SEEK_SET); assert(fscanf(pp->scoreTableFile, "%s", thisGene) == 1); if (strcmp(thisGene, gene[g])) { #if 0 dcerror(1, "Problem finding %s in %s. Tried to seek to position %" PRId64 " but got this: %s\n", pp->scoreTableFileName, gene[g], geneIter->second, thisGene); #else dcerror(1, "Problem finding %s in %s. Tried to seek to position %uld but got this: %s\n", pp->scoreTableFileName, gene[g], (unsigned long)geneIter->second, thisGene); #endif exit(1); } missing[g] = 0; ++nValidGenes; for (s = 0; s < pp->nSub; ++s) { if (fscanf(pp->scoreTableFile, "%f", &sub[s]->score[g]) != 1) { dcerror(1, "Not enough entries in %s for gene %s\n", pp->scoreTableFileName, gene[g]); exit(1); } totScore[s] += sub[s]->score[g]; } } } else { sprintf(scoreFileName, "%s%s%s", pp->scoreFilePrefix, gene[g], pp->scoreFileSuffix); fs = fopen(scoreFileName, "r"); if (fs == 0) missing[g] = 1; else { missing[g] = 0; ++nValidGenes; for (s = 0; s < pp->nSub; ++s) { if (!fgets(line, 1000, fs)) { dcerror(1, "Not enough lines in scores file %s\n", scoreFileName); exit(1); } if (sscanf(line, "%s %d %f", sub[s]->id, &sub[s]->cc, &sub[s]->score[g]) != 3) { dcerror(1, "Not enough entries on this line in scores file %s:\n%s\n", scoreFileName, line); exit(1); } totScore[s] += sub[s]->score[g]; } fclose(fs); } } } for (s = 0; s < pp->nSub; ++s) totScore[s] /= nValidGenes; // use average score not total score if (writeFile) { if (pp->summaryOutputFile != 0) fprintf(pp->summaryOutputFile, "%s\t%s\t", pathwayName, pathwayURL); sprintf(line, "%s%s%s", pp->outputFilePrefix, pathwayName, pp->outputFileSuffix); fo = fopen(line, "w"); if (fo == 0) { dcerror(2, "Could not open output file %s\n", line); } else fprintf(fo, "%s\n%s\n\n", pathwayName, pathwayURL); } if (pp->do_ttest) { for (i = 0; i < 2; ++i) sigma_x[i] = sigma_x2[i] = n[i] = 0; for (s = 0; s < pp->nSub; ++s) { cc = sub[s]->cc; ++n[cc]; score = totScore[s]; sigma_x[cc] += score; sigma_x2[cc] += score * score; } for (i = 0; i < 2; ++i) { var[i] = (sigma_x2[i] - sigma_x[i] * sigma_x[i] / n[i]) / (n[i] - 1); mean[i] = sigma_x[i] / n[i]; } s2 = ((n[0] - 1)*var[0] + (n[1] - 1)*var[1]) / (n[0] + n[1] - 2); SE = sqrt(s2*(1 / (float)n[0] + 1 / (float)n[1])); if (SE == 0) tval = 0; else tval = (mean[1] - mean[0]) / SE; pathway_p = tstat(tval, n[0] + n[1] - 2.0) / 2; // one-tailed SLP = log10(2 * pathway_p)*(mean[0] >= mean[1] ? 1 : -1); if (fo != NULL) { fprintf(fo, " Controls Cases \n" "N %9d %9d\n" "Mean score %9.3f %9.3f\n" "t (%d df) = %6.3f\n" "p = %10.8f\n" "SLP = %8.2f (signed log10(p), positive if cases score higher than controls)\n", n[0], n[1], mean[0], mean[1], n[0] + n[1] - 2, tval, 2 * pathway_p, SLP); if (pp->summaryOutputFile != 0) fprintf(pp->summaryOutputFile, "%f\t", SLP); if (SLP > pp->geneLevelOutputThreshold) { fprintf(fo, "\n\nSLPs for individual genes above threshold:\n"); for (g = 0; g < nGene; ++g) { if (missing[g] == 1) continue; for (i = 0; i < 2; ++i) sigma_x[i] = sigma_x2[i] = n[i] = 0; for (s = 0; s < pp->nSub; ++s) { cc = sub[s]->cc; ++n[cc]; score = sub[s]->score[g]; sigma_x[cc] += score; sigma_x2[cc] += score * score; } for (i = 0; i < 2; ++i) { var[i] = (sigma_x2[i] - sigma_x[i] * sigma_x[i] / n[i]) / (n[i] - 1); mean[i] = sigma_x[i] / n[i]; } s2 = ((n[0] - 1)*var[0] + (n[1] - 1)*var[1]) / (n[0] + n[1] - 2); SE = sqrt(s2*(1 / (float)n[0] + 1 / (float)n[1])); if (SE == 0) tval = 0; else tval = (mean[1] - mean[0]) / SE; p = tstat(tval, n[0] + n[1] - 2.0) / 2; // one-tailed SLP = log10(2 * p)*(mean[0] >= mean[1] ? 1 : -1); if (SLP > pp->geneLevelOutputThreshold) fprintf(fo, "%s %8.2f\n", gene[g], SLP); } fprintf(fo, "\n\nList of genes for which no score file was found:\n"); for (g = 0; g < nGene; ++g) if (missing[g] == 1) fprintf(fo, "%s\n", gene[g]); } } if (mean[0] > mean[1]) pathway_p = 1.0 - pathway_p; } if(pp->do_lrtest||pp->numTestFiles>0) fillModelWithVars(model,pp->nSub,pp,pp->scoreCol); if (pp->do_lrtest) { SLP = do_onetailed_LRT(fo, model, pp); if (writeFile &&pp->summaryOutputFile != 0) fprintf(pp->summaryOutputFile, "%f\t", SLP); } if (pp->numTestFiles > 0) { for (t = 0; t < pp->numTestFiles; ++t) { SLP = runTestFile(fo, pp->testFiles[t].fn, model, pp); if (writeFile &&pp->summaryOutputFile != 0) fprintf(pp->summaryOutputFile, "%f\t", SLP); } } if (fo) fclose(fo); if (writeFile && pp->summaryOutputFile != 0) fprintf(pp->summaryOutputFile, "\n"); return SLP; }
double do_score_onetailed_ttest(FILE *fo, float *score, subject **sub, int nsub, par_info *pi, sa_par_info *spi, float cc_freq[2][MAX_LOCI], float cc_count[2][MAX_LOCI], int max_cc[2], float *weight, float *missing, int *rarer) { int s, i, n[2], cc, pl, l, j; float sigma_x[2], sigma_x2[2], mean[2], var[2], tval, SE, s2, rfreq, fscore, z, total_score; double p, pz; #ifndef USEMLP float SLP; #endif for (i = 0; i<2; ++i) sigma_x[i] = sigma_x2[i] = n[i] = 0; for (s = 0; s<nsub; ++s) { cc = sub[s]->cc; ++n[cc]; sigma_x[cc] += score[s]; sigma_x2[cc] += score[s] * score[s]; } for (i = 0; i<2; ++i) { if (spi->use_cc_freqs[i]) { total_score = 0; n[i] = max_cc[i]; var[i] = 0; for (pl = 0; pl<pi->n_loci_to_use; ++pl) { float tempvar = 0; float ngen[3]; // number of typed subjects who are AA,AB,BB l = pi->loci_to_use[pl]; sigma_x2[i] = 0; sigma_x[i] = 0; if (rarer[l] == 2) rfreq = cc_freq[i][l]; // assume supplied frequency is of alt allele, i.e. allele 2 // but score will be added to by rarer allele, even if this is allele 1 else rfreq = 1 - cc_freq[i][l]; // as actual counts missing, assume HWE ngen[0] = (1 - rfreq)*(1 - rfreq)*cc_count[i][l]; ngen[1] = 2 * rfreq*(1 - rfreq)*cc_count[i][l]; ngen[2] = rfreq*rfreq*cc_count[i][l]; fscore = weight[l]; // average score for each subject for this locus sigma_x[i] += ngen[1] * fscore; // AB sigma_x2[i] += ngen[1] * fscore*fscore; sigma_x[i] += ngen[2] * 2 * fscore; // BB sigma_x2[i] += ngen[2] * 4 * fscore*fscore; fscore = missing[l]; // as if missing scores were also independent sigma_x[i] += (max_cc[i] - cc_count[i][l])*fscore; sigma_x2[i] += (max_cc[i] - cc_count[i][l])*fscore*fscore; tempvar = (sigma_x2[i] - sigma_x[i] * sigma_x[i] / n[i]) / (n[i] - 1); var[i] += tempvar; // add variances due to each marker total_score += sigma_x[i]; } mean[i] = total_score / n[i]; } else { var[i] = (sigma_x2[i] - sigma_x[i] * sigma_x[i] / n[i]) / (n[i] - 1); mean[i] = sigma_x[i] / n[i]; } } s2 = ((n[0] - 1)*var[0] + (n[1] - 1)*var[1]) / (n[0] + n[1] - 2); SE = sqrt(s2*(1 / (float)n[0] + 1 / (float)n[1])); if (SE == 0) tval = 0; else tval = (mean[1] - mean[0]) / SE; p = tstat(tval, n[0] + n[1] - 2.0) / 2; // one-tailed SLP = log10(2 * p)*(mean[0] >= mean[1] ? 1 : -1); if (fo != NULL) fprintf(fo, " Controls Cases \n" "N %9d %9d\n" "Mean score %9.3f %9.3f\n" "SD %9.3f %9.3f\n" "t (%d df) = %6.3f\n" "p = %10.8f\n" "SLP = %8.2f (signed log10(p), positive if cases score higher than controls)\n", n[0], n[1], mean[0], mean[1], sqrt(var[0]), sqrt(var[1]), n[0] + n[1] - 2, tval, 2 * p, SLP); // I am writing SD because it will allow me to combine statistics later if (mean[0]>mean[1]) p = 1.0 - p; #ifdef ALLOWUNEQUALVARIANCES SE = sqrt(var[0] / n[0] + var[1] / n[1]); if (SE == 0) z = 0; else z = (mean[1] - mean[0]) / SE; pz = one_tailed_p_norm(z); fprintf(fo, "Comparison of means not assuming equal variances, standardised normal deviate z = %5.3f\np = %10.8f (one-tailed)\n", z, pz); if (mean[0] >= mean[1]) SLP = log10(2 * (1 - pz)); else SLP = -log10(2 * pz); fprintf(fo, "SLP = %8.2f (signed log10(2p), positive if cases score higher than controls)\n", SLP); #endif return p; // return one-tailed p }