int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; ESL_RANDOMNESS *r = NULL; int be_verbose; go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || esl_opt_VerifyConfig(go) != eslOK) esl_fatal("%s", go->errbuf); if (esl_opt_GetBoolean(go, "-h") == TRUE) { esl_usage(stdout, argv[0], usage); puts("\n where options are:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */ exit(0); } if (esl_opt_ArgNumber(go) != 0) { printf("Incorrect number of command line arguments.\n"); esl_usage(stdout, argv[0], usage); exit(1); } be_verbose = esl_opt_GetBoolean(go, "-v"); if (esl_opt_GetBoolean(go, "-r")) { r = esl_randomness_CreateTimeseeded(); if (be_verbose) printf("seed = %ld\n", esl_randomness_GetSeed(r)); } else r = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); utest_LinearRegression(r, TRUE, be_verbose); utest_LinearRegression(r, FALSE, be_verbose); esl_getopts_Destroy(go); esl_randomness_Destroy(r); exit(0); }
/* Function: esl_dst_XAverageId() * Synopsis: Calculate avg identity for digital MSA * Incept: SRE, Fri May 18 15:19:14 2007 [Janelia] * * Purpose: Calculates the average pairwise fractional identity in * a digital multiple sequence alignment <ax>, consisting of <N> * aligned digital sequences of identical length. * * If an exhaustive calculation would require more than * <max_comparisons> pairwise comparisons, then instead of * looking at all pairs, calculate the average over a * stochastic sample of <max_comparisons> random pairs. * This allows the routine to work efficiently even on very * deep MSAs. * * Each fractional pairwise identity (range $[0..$ pid $..1]$ * is calculated using <esl_dsq_XPairId()>. * * Returns: <eslOK> on success, and <*ret_id> contains the average * fractional identity. * * Throws: <eslEMEM> on allocation failure. * <eslEINVAL> if any of the aligned sequence pairs aren't * of the same length. * In either case, <*ret_id> is set to 0. */ int esl_dst_XAverageId(const ESL_ALPHABET *abc, ESL_DSQ **ax, int N, int max_comparisons, double *ret_id) { int status; double id; double sum; int i,j,n; if (N <= 1) { *ret_id = 1.; return eslOK; } *ret_id = 0.; /* Is N small enough that we can average over all pairwise comparisons? watch out for numerical overflow in this: Pfam N's easily overflow when squared */ if (N <= max_comparisons && N <= sqrt(2. * max_comparisons) && (N * (N-1) / 2) <= max_comparisons) { for (i = 0; i < N; i++) for (j = i+1; j < N; j++) { if ((status = esl_dst_XPairId(abc, ax[i], ax[j], &id, NULL, NULL)) != eslOK) return status; sum += id; } sum /= (double) (N * (N-1) / 2); } /* If nseq is large, calculate average over a stochastic sample. */ else { ESL_RANDOMNESS *r = esl_randomness_CreateTimeseeded(); for (n = 0; n < max_comparisons; n++) { do { i = esl_rnd_Roll(r, N); j = esl_rnd_Roll(r, N); } while (j == i); /* make sure j != i */ if ((status = esl_dst_XPairId(abc, ax[i], ax[j], &id, NULL, NULL)) != eslOK) return status; sum += id; } sum /= (double) max_comparisons; esl_randomness_Destroy(r); } *ret_id = sum; return eslOK; }
/* Function: esl_dst_CAverageId() * Synopsis: Calculate avg identity for multiple alignment * Incept: SRE, Fri May 18 15:02:38 2007 [Janelia] * * Purpose: Calculates the average pairwise fractional identity in * a multiple sequence alignment <as>, consisting of <N> * aligned character sequences of identical length. * * If an exhaustive calculation would require more than * <max_comparisons> pairwise comparisons, then instead of * looking at all pairs, calculate the average over a * stochastic sample of <max_comparisons> random pairs. * This allows the routine to work efficiently even on very * deep MSAs. * * Each fractional pairwise identity (range $[0..$ pid $..1]$ * is calculated using <esl_dsq_CPairId()>. * * Returns: <eslOK> on success, and <*ret_id> contains the average * fractional identity. * * Throws: <eslEMEM> on allocation failure. * <eslEINVAL> if any of the aligned sequence pairs aren't * of the same length. * In either case, <*ret_id> is set to 0. */ int esl_dst_CAverageId(char **as, int N, int max_comparisons, double *ret_id) { int status; double id; double sum; int i,j,n; if (N <= 1) { *ret_id = 1.; return eslOK; } *ret_id = 0.; /* Is nseq small enough that we can average over all pairwise comparisons? */ if ((N * (N-1) / 2) <= max_comparisons) { for (i = 0; i < N; i++) for (j = i+1; j < N; j++) { if ((status = esl_dst_CPairId(as[i], as[j], &id, NULL, NULL)) != eslOK) return status; sum += id; } id /= (double) (N * (N-1) / 2); } /* If nseq is large, calculate average over a stochastic sample. */ else { ESL_RANDOMNESS *r = esl_randomness_CreateTimeseeded(); for (n = 0; n < max_comparisons; n++) { do { i = esl_rnd_Roll(r, N); j = esl_rnd_Roll(r, N); } while (j == i); /* make sure j != i */ if ((status = esl_dst_CPairId(as[i], as[j], &id, NULL, NULL)) != eslOK) return status; sum += id; } id /= (double) max_comparisons; esl_randomness_Destroy(r); } *ret_id = id; return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *r = NULL; char *bitfile = esl_opt_GetString (go, "--bitfile"); int nbins = esl_opt_GetInteger(go, "-b"); int n = esl_opt_GetInteger(go, "-n"); int be_verbose = esl_opt_GetBoolean(go, "-v"); int seed = esl_opt_GetInteger(go, "-s"); if (esl_opt_GetBoolean(go, "-r")) r = esl_randomness_CreateTimeseeded(); else r = esl_randomness_Create(seed); utest_random(seed, n, nbins, be_verbose); utest_choose(r, n, nbins, be_verbose); if (bitfile != NULL) save_bitfile(bitfile, r, n); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(void) { ESL_RANDOMNESS *r = esl_randomness_CreateTimeseeded(); double a = -3.; double b = 1.; double xori = -20.; double xstep = 1.0; double setsigma = 1.0; /* sigma on all points */ int n = 100; double *x = malloc(sizeof(double) * n); double *y = malloc(sizeof(double) * n); double *sigma = malloc(sizeof(double) * n); int i; double ae, be, siga, sigb, cov_ab, cc, Q; /* Simulate some linear data, with Gaussian noise added to y_i */ for (i = 0; i < n; i++) { sigma[i] = setsigma; x[i] = xori + i*xstep; y[i] = esl_rnd_Gaussian(r, a + b*x[i], sigma[i]); } if (esl_stats_LinearRegression(x, y, sigma, n, &ae, &be, &siga, &sigb, &cov_ab, &cc, &Q) != eslOK) esl_fatal("linear regression failed"); printf("estimated intercept a = %8.4f [true = %8.4f]\n", ae, a); printf("estimated slope b = %8.4f [true = %8.4f]\n", be, b); printf("estimated sigma on a = %8.4f\n", siga); printf("estimated sigma on b = %8.4f\n", sigb); printf("estimated cov(a,b) = %8.4f\n", cov_ab); printf("correlation coeff = %8.4f\n", cc); printf("P-value = %8.4f\n", Q); free(x); free(y); free(sigma); esl_randomness_Destroy(r); exit(0); }
/* The esl_random() unit test: * a binned frequency test. */ static void utest_random(long seed, int n, int nbins, int be_verbose) { ESL_RANDOMNESS *r = NULL; int *counts = NULL; double X2p = 0.; int i; double X2, exp, diff; if ((counts = malloc(sizeof(int) * nbins)) == NULL) esl_fatal("malloc failed"); esl_vec_ISet(counts, nbins, 0); /* This contrived call sequence exercises CreateTimeseeded() and * Init(), while leaving us a reproducible chain. Because it's * reproducible, we know this test succeeds, despite being * statistical in nature. */ if ((r = esl_randomness_CreateTimeseeded()) == NULL) esl_fatal("randomness create failed"); if (esl_randomness_Init(r, seed) != eslOK) esl_fatal("randomness init failed"); for (i = 0; i < n; i++) counts[esl_rnd_Roll(r, nbins)]++; /* X^2 value: \sum (o_i - e_i)^2 / e_i */ for (X2 = 0., i = 0; i < nbins; i++) { exp = (double) n / (double) nbins; diff = (double) counts[i] - exp; X2 += diff*diff/exp; } if (esl_stats_ChiSquaredTest(nbins, X2, &X2p) != eslOK) esl_fatal("chi squared eval failed"); if (be_verbose) printf("random(): \t%g\n", X2p); if (X2p < 0.01) esl_fatal("chi squared test failed"); esl_randomness_Destroy(r); free(counts); return; }
int main(int argc, char **argv) { int status; ESL_RANDOMNESS *r = esl_randomness_CreateTimeseeded(); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); ESL_SCOREMATRIX *S = NULL; ESL_DSQ *x = NULL; /* iid query */ ESL_DSQ *y = NULL; /* iid target */ double lambda; double bg[20]; /* iid background probabilities */ int L; /* query length */ int M; /* target length */ int nseq; /* number of target seqs to simulate */ int i; int gop; int gex; char *mxfile = "PMX"; int raw_sc; /* Configuration */ L = 400; /* query length */ M = 400; /* target length */ nseq = 50000; gop = -11; gex = -1; lambda = 0.3207; ESL_ALLOC(x, sizeof(ESL_DSQ) * (L+2)); ESL_ALLOC(y, sizeof(ESL_DSQ) * (M+2)); /* Input an amino acid score matrix from a file. */ if (mxfile != NULL) { ESL_FILEPARSER *efp = NULL; if ( esl_fileparser_Open(mxfile, &efp) != eslOK) esl_fatal("failed to open score file %s", mxfile); if ( esl_sco_Read(efp, abc, &S) != eslOK) esl_fatal("failed to read matrix from %s", mxfile); esl_fileparser_Close(efp); } else { /* default = BLOSUM62 */ S = esl_scorematrix_Create(abc); esl_scorematrix_SetBLOSUM62(S); } esl_composition_BL62(bg); esl_rsq_xIID(r, bg, 20, L, x); for (i = 0; i < nseq; i++) { esl_rsq_xIID(r, bg, 20, M, y); esl_swat_Score(x, L, y, M, S, gop, gex, &raw_sc); printf("%d\n", raw_sc); } free(x); free(y); esl_scorematrix_Destroy(S); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); exit(0); ERROR: exit(status); }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* command line configuration */ struct cfg_s cfg; /* application configuration */ char *basename= NULL; /* base of the output file names */ char *alifile = NULL; /* alignment file name */ char *dbfile = NULL; /* name of seq db file */ char outfile[256]; /* name of an output file */ int alifmt; /* format code for alifile */ int dbfmt; /* format code for dbfile */ ESL_MSAFILE *afp = NULL; /* open alignment file */ ESL_MSA *origmsa = NULL; /* one multiple sequence alignment */ ESL_MSA *msa = NULL; /* MSA after frags are removed */ ESL_MSA *trainmsa= NULL; /* training set, aligned */ ESL_STACK *teststack=NULL; /* test set: stack of ESL_SQ ptrs */ int status; /* easel return code */ int nfrags; /* # of fragments removed */ int ntestdom; /* # of test domains */ int ntest; /* # of test sequences created */ int nali; /* number of alignments read */ double avgid; /* Parse command line */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) cmdline_failure(argv[0], "Error in app configuration: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h")) cmdline_help(argv[0], go); if (esl_opt_ArgNumber(go) != 3) cmdline_failure(argv[0], "Incorrect number of command line arguments\n"); basename = esl_opt_GetArg(go, 1); alifile = esl_opt_GetArg(go, 2); dbfile = esl_opt_GetArg(go, 3); alifmt = eslMSAFILE_STOCKHOLM; dbfmt = eslSQFILE_FASTA; /* Set up the configuration structure shared amongst functions here */ if (esl_opt_IsDefault(go, "--seed")) cfg.r = esl_randomness_CreateTimeseeded(); else cfg.r = esl_randomness_Create(esl_opt_GetInteger(go, "--seed")); cfg.abc = NULL; /* until we open the MSA file, below */ cfg.fragfrac = esl_opt_GetReal(go, "-F"); cfg.idthresh1 = esl_opt_GetReal(go, "-1"); cfg.idthresh2 = esl_opt_GetReal(go, "-2"); cfg.test_lens = NULL; cfg.ntest = 0; /* Open the output files */ if (snprintf(outfile, 256, "%s.msa", basename) >= 256) esl_fatal("Failed to construct output MSA file name"); if ((cfg.out_msafp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open MSA output file %s\n", outfile); if (snprintf(outfile, 256, "%s.fa", basename) >= 256) esl_fatal("Failed to construct output FASTA file name"); if ((cfg.out_seqfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open FASTA output file %s\n", outfile); if (snprintf(outfile, 256, "%s.pos", basename) >= 256) esl_fatal("Failed to construct pos test set summary file name"); if ((cfg.possummfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open pos test set summary file %s\n", outfile); if (snprintf(outfile, 256, "%s.neg", basename) >= 256) esl_fatal("Failed to construct neg test set summary file name"); if ((cfg.negsummfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open neg test set summary file %s\n", outfile); if (snprintf(outfile, 256, "%s.tbl", basename) >= 256) esl_fatal("Failed to construct benchmark table file name"); if ((cfg.tblfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open benchmark table file %s\n", outfile); /* Open the MSA file; determine alphabet */ status = esl_msafile_Open(alifile, alifmt, NULL, &afp); if (status == eslENOTFOUND) esl_fatal("Alignment file %s doesn't exist or is not readable\n", alifile); else if (status == eslEFORMAT) esl_fatal("Couldn't determine format of alignment %s\n", alifile); else if (status != eslOK) esl_fatal("Alignment file open failed with error %d\n", status); if (esl_opt_GetBoolean(go, "--amino")) cfg.abc = esl_alphabet_Create(eslAMINO); else if (esl_opt_GetBoolean(go, "--dna")) cfg.abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--rna")) cfg.abc = esl_alphabet_Create(eslRNA); else { int type; status = esl_msafile_GuessAlphabet(afp, &type); if (status == eslEAMBIGUOUS) esl_fatal("Failed to guess the bio alphabet used in %s.\nUse --dna, --rna, or --amino option to specify it.", alifile); else if (status == eslEFORMAT) esl_fatal("Alignment file parse failed: %s\n", afp->errbuf); else if (status == eslENODATA) esl_fatal("Alignment file %s is empty\n", alifile); else if (status != eslOK) esl_fatal("Failed to read alignment file %s\n", alifile); cfg.abc = esl_alphabet_Create(type); } esl_msafile_SetDigital(afp, cfg.abc); if (cfg.abc->type == eslAMINO) esl_composition_SW34(cfg.fq); else esl_vec_DSet(cfg.fq, cfg.abc->K, 1.0 / (double) cfg.abc->K); /* Open and process the dbfile; make sure it's in the same alphabet */ process_dbfile(&cfg, dbfile, dbfmt); /* Read and process MSAs one at a time */ nali = 0; while ((status = esl_msa_Read(afp, &origmsa)) == eslOK) { remove_fragments(&cfg, origmsa, &msa, &nfrags); separate_sets (&cfg, msa, &trainmsa, &teststack); ntestdom = esl_stack_ObjectCount(teststack); if (ntestdom >= 2) { esl_stack_Shuffle(cfg.r, teststack); synthesize_positives(go, &cfg, msa->name, teststack, &ntest); esl_msa_MinimGaps(trainmsa, NULL, NULL); esl_msa_Write(cfg.out_msafp, trainmsa, eslMSAFILE_STOCKHOLM); esl_dst_XAverageId(cfg.abc, trainmsa->ax, trainmsa->nseq, 10000, &avgid); /* 10000 is max_comparisons, before sampling kicks in */ fprintf(cfg.tblfp, "%-20s %3.0f%% %6d %6d %6d %6d %6d %6d\n", msa->name, 100.*avgid, (int) trainmsa->alen, msa->nseq, nfrags, trainmsa->nseq, ntestdom, ntest); nali++; } esl_msa_Destroy(trainmsa); esl_msa_Destroy(origmsa); esl_msa_Destroy(msa); } if (status == eslEFORMAT) esl_fatal("Alignment file parse error, line %d of file %s:\n%s\nOffending line is:\n%s\n", afp->linenumber, afp->fname, afp->errbuf, afp->buf); else if (status != eslEOF) esl_fatal("Alignment file read failed with error code %d\n", status); else if (nali == 0) esl_fatal("No alignments found in file %s\n", alifile); if (nali > 0) synthesize_negatives(go, &cfg, esl_opt_GetInteger(go, "-N")); fclose(cfg.out_msafp); fclose(cfg.out_seqfp); fclose(cfg.possummfp); fclose(cfg.negsummfp); fclose(cfg.tblfp); esl_randomness_Destroy(cfg.r); esl_alphabet_Destroy(cfg.abc); esl_msafile_Close(afp); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* command line configuration */ struct cfg_s cfg; /* application configuration */ char *basename= NULL; /* base of the output file names */ char *alifile = NULL; /* alignment file name */ char *dbfile = NULL; /* name of seq db file */ char outfile[256]; /* name of an output file */ int alifmt; /* format code for alifile */ int dbfmt; /* format code for dbfile */ ESLX_MSAFILE *afp = NULL; /* open alignment file */ ESL_MSA *origmsa = NULL; /* one multiple sequence alignment */ ESL_MSA *msa = NULL; /* MSA after frags are removed */ ESL_MSA *trainmsa= NULL; /* training set, aligned */ ESL_STACK *teststack=NULL; /* test set: stack of ESL_SQ ptrs */ int status; /* easel return code */ int nfrags; /* # of fragments removed */ int ntestdom; /* # of test domains */ int ntest; /* # of test sequences created */ int nali; /* number of alignments read */ double avgid; /* Parse command line */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) cmdline_failure(argv[0], "Error in app configuration: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h")) cmdline_help(argv[0], go); if (esl_opt_ArgNumber(go) != 3) cmdline_failure(argv[0], "Incorrect number of command line arguments\n"); basename = esl_opt_GetArg(go, 1); alifile = esl_opt_GetArg(go, 2); dbfile = esl_opt_GetArg(go, 3); alifmt = eslMSAFILE_STOCKHOLM; dbfmt = eslSQFILE_FASTA; /* Set up the configuration structure shared amongst functions here */ if (esl_opt_IsDefault(go, "--seed")) cfg.r = esl_randomness_CreateTimeseeded(); else cfg.r = esl_randomness_Create(esl_opt_GetInteger(go, "--seed")); cfg.abc = NULL; /* until we open the MSA file, below */ cfg.fragfrac = esl_opt_GetReal(go, "-F"); cfg.idthresh1 = esl_opt_GetReal(go, "-1"); cfg.idthresh2 = esl_opt_GetReal(go, "-2"); cfg.test_lens = NULL; cfg.ntest = 0; cfg.max_ntest = (esl_opt_IsOn(go, "--maxtest") ? esl_opt_GetInteger(go, "--maxtest") : 0); cfg.max_ntrain = (esl_opt_IsOn(go, "--maxtrain") ? esl_opt_GetInteger(go, "--maxtrain") : 0); /* Open the output files */ if (snprintf(outfile, 256, "%s.msa", basename) >= 256) esl_fatal("Failed to construct output MSA file name"); if ((cfg.out_msafp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open MSA output file %s\n", outfile); if (snprintf(outfile, 256, "%s.fa", basename) >= 256) esl_fatal("Failed to construct output FASTA file name"); if ((cfg.out_seqfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open FASTA output file %s\n", outfile); if (snprintf(outfile, 256, "%s.pos", basename) >= 256) esl_fatal("Failed to construct pos test set summary file name"); if ((cfg.possummfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open pos test set summary file %s\n", outfile); if (snprintf(outfile, 256, "%s.neg", basename) >= 256) esl_fatal("Failed to construct neg test set summary file name"); if ((cfg.negsummfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open neg test set summary file %s\n", outfile); if (snprintf(outfile, 256, "%s.tbl", basename) >= 256) esl_fatal("Failed to construct benchmark table file name"); if ((cfg.tblfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open benchmark table file %s\n", outfile); if (esl_opt_GetBoolean(go, "--pid")) { if (snprintf(outfile, 256, "%s.pid", basename) >= 256) esl_fatal("Failed to construct %%id table file name"); if ((cfg.pidfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open %%id table file %s\n", outfile); } else cfg.pidfp = NULL; /* Open the MSA file, digital mode; determine alphabet */ if (esl_opt_GetBoolean(go, "--amino")) cfg.abc = esl_alphabet_Create(eslAMINO); else if (esl_opt_GetBoolean(go, "--dna")) cfg.abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--rna")) cfg.abc = esl_alphabet_Create(eslRNA); status = eslx_msafile_Open(&(cfg.abc), alifile, NULL, alifmt, NULL, &afp); if (status != eslOK) eslx_msafile_OpenFailure(afp, status); if (cfg.abc->type == eslAMINO) esl_composition_SW34(cfg.fq); else esl_vec_DSet(cfg.fq, cfg.abc->K, 1.0 / (double) cfg.abc->K); /* Open and process the dbfile; make sure it's in the same alphabet */ process_dbfile(&cfg, dbfile, dbfmt); /* Read and process MSAs one at a time */ nali = 0; while ((status = eslx_msafile_Read(afp, &origmsa)) != eslEOF) { if (status != eslOK) eslx_msafile_ReadFailure(afp, status); esl_msa_ConvertDegen2X(origmsa); esl_msa_Hash(origmsa); remove_fragments(&cfg, origmsa, &msa, &nfrags); separate_sets (&cfg, msa, &trainmsa, &teststack); if ( esl_stack_ObjectCount(teststack) >= 2) { /* randomize test domain order, and apply size limit if any */ esl_stack_Shuffle(cfg.r, teststack); if (cfg.max_ntest) pstack_select_topn(&teststack, cfg.max_ntest); ntestdom = esl_stack_ObjectCount(teststack); /* randomize training set alignment order, and apply size limit if any */ esl_msashuffle_PermuteSequenceOrder(cfg.r, trainmsa); if (cfg.max_ntrain) msa_select_topn(&trainmsa, cfg.max_ntrain); esl_msa_MinimGaps(trainmsa, NULL, NULL, FALSE); if (esl_opt_GetBoolean(go, "--pid")) write_pids(cfg.pidfp, origmsa, trainmsa, teststack); synthesize_positives(go, &cfg, msa->name, teststack, &ntest); eslx_msafile_Write(cfg.out_msafp, trainmsa, eslMSAFILE_STOCKHOLM); esl_dst_XAverageId(cfg.abc, trainmsa->ax, trainmsa->nseq, 10000, &avgid); /* 10000 is max_comparisons, before sampling kicks in */ fprintf(cfg.tblfp, "%-20s %3.0f%% %6d %6d %6d %6d %6d %6d\n", msa->name, 100.*avgid, (int) trainmsa->alen, msa->nseq, nfrags, trainmsa->nseq, ntestdom, ntest); nali++; } esl_msa_Destroy(trainmsa); esl_msa_Destroy(origmsa); esl_msa_Destroy(msa); } if (nali == 0) esl_fatal("No alignments found in file %s\n", alifile); synthesize_negatives(go, &cfg, esl_opt_GetInteger(go, "-N")); fclose(cfg.out_msafp); fclose(cfg.out_seqfp); fclose(cfg.possummfp); fclose(cfg.negsummfp); fclose(cfg.tblfp); if (cfg.pidfp) fclose(cfg.pidfp); esl_randomness_Destroy(cfg.r); esl_alphabet_Destroy(cfg.abc); eslx_msafile_Close(afp); esl_getopts_Destroy(go); return 0; }