static int serial_loop(WORKER_INFO *info, struct cfg_s *cfg) { P7_BUILDER *bld = NULL; ESL_MSA *msa = NULL; ESL_MSA *postmsa = NULL; ESL_MSA **postmsa_ptr = (cfg->postmsafile != NULL) ? &postmsa : NULL; P7_HMM *hmm = NULL; char errmsg[eslERRBUFSIZE]; int status; double entropy; cfg->nali = 0; while ((status = esl_msa_Read(cfg->afp, &msa)) == eslOK) { cfg->nali++; if ((status = set_msa_name(cfg, errmsg, msa)) != eslOK) p7_Fail("%s\n", errmsg); /* cfg->nnamed gets incremented in this call */ /* bg new-HMM trarr gm om */ if ((status = p7_Builder(info->bld, msa, info->bg, &hmm, NULL, NULL, NULL, postmsa_ptr)) != eslOK) p7_Fail("build failed: %s", bld->errbuf); entropy = p7_MeanMatchRelativeEntropy(hmm, info->bg); if ((status = output_result(cfg, errmsg, cfg->nali, msa, hmm, postmsa, entropy)) != eslOK) p7_Fail(errmsg); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); esl_msa_Destroy(postmsa); } return status; }
int main(int argc, char **argv) { ESL_MSAFILE *afp; ESL_MSA *msa; ESL_DMATRIX *P; int status; int i,j; double min, avg, max; esl_msafile_Open(argv[1], eslMSAFILE_UNKNOWN, NULL, &afp); esl_msa_Read(afp, &msa); esl_dst_CPairIdMx(msa->aseq, msa->nseq, &P); min = 1.0; max = 0.0; avg = 0.0; for (i = 0; i < msa->nseq; i++) for (j = i+1; j < msa->nseq; j++) { avg += P->mx[i][j]; if (P->mx[i][j] < min) min = P->mx[i][j]; if (P->mx[i][j] > max) max = P->mx[i][j]; } avg /= (double) (msa->nseq * (msa->nseq-1) / 2); printf("Average pairwise %% id: %.1f%%\n", avg * 100.); printf("Minimum pairwise %% id: %.1f%%\n", min * 100.); printf("Maximum pairwise %% id: %.1f%%\n", max * 100.); esl_dmatrix_Destroy(P); esl_msa_Destroy(msa); esl_msafile_Close(afp); return 0; }
/* msa_shuffling() * SRE, Tue Jan 22 08:39:51 2008 [Market Street Cafe, Leesburg] * * Shuffling multiple sequence alignments */ static int msa_shuffling(ESL_GETOPTS *go, ESL_RANDOMNESS *r, FILE *ofp, int outfmt) { char *msafile = esl_opt_GetArg(go, 1); int infmt = eslMSAFILE_UNKNOWN; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; ESL_MSA *shuf = NULL; int N = esl_opt_GetInteger(go, "-N"); int i; int status, mstatus; status = esl_msafile_Open(msafile, infmt, NULL, &afp); if (status == eslENOTFOUND) esl_fatal("Alignment file %s isn't readable\n", msafile); else if (status == eslEFORMAT) esl_fatal("Couldn't determine format of %s\n", msafile); else if (status != eslOK) esl_fatal("Alignment file open failed (error %d)\n", status); while ((mstatus = esl_msa_Read(afp, &msa)) != eslEOF) { if (status == eslEFORMAT) esl_fatal("Alignment file parse error:\n%s\n", afp->errbuf); else if (status == eslEINVAL) esl_fatal("Alignment file parse error:\n%s\n", afp->errbuf); else if (status != eslOK) esl_fatal("Alignment file read failed with error code %d\n", status); shuf = esl_msa_Clone(msa); for (i = 0; i < N; i++) { if (esl_opt_GetBoolean(go, "--boot")) esl_msashuffle_Bootstrap(r, msa, shuf); else esl_msashuffle_Shuffle (r, msa, shuf); /* Set the name of the shuffled alignment */ if (msa->name != NULL) { if (esl_opt_GetBoolean(go, "--boot")) { if (N > 1) esl_msa_FormatName(shuf, "%s-sample-%d", msa->name, i); else esl_msa_FormatName(shuf, "%s-sample", msa->name); } else { if (N > 1) esl_msa_FormatName(shuf, "%s-shuffle-%d", msa->name, i); else esl_msa_FormatName(shuf, "%s-shuffle", msa->name); } } else { if (esl_opt_GetBoolean(go, "--boot")) { if (N > 1) esl_msa_FormatName(shuf, "sample-%d", i); else esl_msa_FormatName(shuf, "sample"); } else { if (N > 1) esl_msa_FormatName(shuf, "shuffle-%d", i); else esl_msa_FormatName(shuf, "shuffle"); } } esl_msa_Write(ofp, shuf, outfmt); } esl_msa_Destroy(shuf); esl_msa_Destroy(msa); } return eslOK; }
int main(int argc, char **argv) { char *filename = argv[1]; int fmt = eslMSAFILE_UNKNOWN; int type = eslUNKNOWN; ESL_ALPHABET *abc = NULL; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; double maxid = 0.62; /* cluster at 62% identity: the BLOSUM62 rule */ int *assignment = NULL; int *nin = NULL; int nclusters; int c, i; int status; /* Open; guess alphabet; set to digital mode */ status = esl_msafile_Open(filename, fmt, NULL, &afp); if (status == eslENOTFOUND) esl_fatal("Alignment file %s isn't readable", filename); else if (status == eslEFORMAT) esl_fatal("Couldn't determine format of %s", filename); else if (status != eslOK) esl_fatal("Alignment file open failed (error code %d)", status); status = esl_msafile_GuessAlphabet(afp, &type); if (status == eslEAMBIGUOUS) esl_fatal("Couldn't guess alphabet from first alignment in %s", filename); else if (status == eslEFORMAT) esl_fatal("Alignment file parse error, line %d of file %s:\n%s\nBad line is: %s\n", afp->linenumber, afp->fname, afp->errbuf, afp->buf); else if (status == eslENODATA) esl_fatal("Alignment file %s contains no data?", filename); else if (status != eslOK) esl_fatal("Failed to guess alphabet (error code %d)\n", status); abc = esl_alphabet_Create(type); esl_msafile_SetDigital(afp, abc); /* read one alignment */ status = esl_msa_Read(afp, &msa); if (status == eslEFORMAT) esl_fatal("alignment file %s: %s\n", afp->fname, afp->errbuf); else if (status != eslOK) esl_fatal("Alignment file read failed with error code %d\n", status); /* do the clustering */ esl_msacluster_SingleLinkage(msa, maxid, &assignment, &nin, &nclusters); printf("%d clusters at threshold of %f fractional identity\n", nclusters, maxid); for (c = 0; c < nclusters; c++) { printf("cluster %d:\n", c); for (i = 0; i < msa->nseq; i++) if (assignment[i] == c) printf(" %s\n", msa->sqname[i]); printf("(%d sequences)\n\n", nin[c]); } esl_msa_Destroy(msa); esl_msafile_Close(afp); free(assignment); free(nin); return 0; }
int main(int argc, char **argv) { ESL_MSAFILE *afp; ESL_MSA *msa; int i; esl_msafile_Open(argv[1], eslMSAFILE_UNKNOWN, NULL, &afp); esl_msa_Read(afp, &msa); esl_msafile_Close(afp); esl_msaweight_GSC(msa); for (i = 0; i < msa->nseq; i++) printf("%20s %f\n", msa->sqname[i], msa->wgt[i]); return 0; }
static int thread_loop(ESL_THREADS *obj, ESL_WORK_QUEUE *queue, struct cfg_s *cfg) { int status = eslOK; int sstatus = eslOK; int processed = 0; WORK_ITEM *item; void *newItem; int next = 1; PENDING_ITEM *top = NULL; PENDING_ITEM *empty = NULL; PENDING_ITEM *tmp = NULL; char errmsg[eslERRBUFSIZE]; esl_workqueue_Reset(queue); esl_threads_WaitForStart(obj); status = esl_workqueue_ReaderUpdate(queue, NULL, &newItem); if (status != eslOK) esl_fatal("Work queue reader failed"); /* Main loop: */ item = (WORK_ITEM *) newItem; while (sstatus == eslOK) { sstatus = esl_msa_Read(cfg->afp, &item->msa); if (sstatus == eslOK) { item->nali = ++cfg->nali; if (set_msa_name(cfg, errmsg, item->msa) != eslOK) p7_Fail("%s\n", errmsg); } if (sstatus == eslEOF && processed < cfg->nali) sstatus = eslOK; if (sstatus == eslOK) { status = esl_workqueue_ReaderUpdate(queue, item, &newItem); if (status != eslOK) esl_fatal("Work queue reader failed"); /* process any results */ item = (WORK_ITEM *) newItem; if (item->processed == TRUE) { ++processed; /* try to keep the input output order the same */ if (item->nali == next) { sstatus = output_result(cfg, errmsg, item->nali, item->msa, item->hmm, item->postmsa, item->entropy); if (sstatus != eslOK) p7_Fail(errmsg); p7_hmm_Destroy(item->hmm); esl_msa_Destroy(item->msa); esl_msa_Destroy(item->postmsa); ++next; /* output any pending msa as long as the order * remains the same as read in. */ while (top != NULL && top->nali == next) { sstatus = output_result(cfg, errmsg, top->nali, top->msa, top->hmm, top->postmsa, top->entropy); if (sstatus != eslOK) p7_Fail(errmsg); p7_hmm_Destroy(top->hmm); esl_msa_Destroy(top->msa); esl_msa_Destroy(top->postmsa); tmp = top; top = tmp->next; tmp->next = empty; empty = tmp; ++next; } } else { /* queue up the msa so the sequence order is the same in * the .sto and .hmm */ if (empty != NULL) { tmp = empty; empty = tmp->next; } else { ESL_ALLOC(tmp, sizeof(PENDING_ITEM)); } tmp->nali = item->nali; tmp->hmm = item->hmm; tmp->msa = item->msa; tmp->postmsa = item->postmsa; tmp->entropy = item->entropy; /* add the msa to the pending list */ if (top == NULL || tmp->nali < top->nali) { tmp->next = top; top = tmp; } else { PENDING_ITEM *ptr = top; while (ptr->next != NULL && tmp->nali > ptr->next->nali) { ptr = ptr->next; } tmp->next = ptr->next; ptr->next = tmp; } } item->nali = 0; item->processed = FALSE; item->hmm = NULL; item->msa = NULL; item->postmsa = NULL; item->entropy = 0.0; } } } if (top != NULL) esl_fatal("Top is not empty\n"); while (empty != NULL) { tmp = empty; empty = tmp->next; free(tmp); } status = esl_workqueue_ReaderUpdate(queue, item, NULL); if (status != eslOK) esl_fatal("Work queue reader failed"); if (sstatus == eslEOF) { /* wait for all the threads to complete */ esl_threads_WaitForFinish(obj); esl_workqueue_Complete(queue); } return sstatus; ERROR: return eslEMEM; }
/* mpi_master() * The MPI version of hmmbuild. * Follows standard pattern for a master/worker load-balanced MPI program (J1/78-79). * * A master can only return if it's successful. * Errors in an MPI master come in two classes: recoverable and nonrecoverable. * * Recoverable errors include all worker-side errors, and any * master-side error that do not affect MPI communication. Error * messages from recoverable messages are delayed until we've cleanly * shut down the workers. * * Unrecoverable errors are master-side errors that may affect MPI * communication, meaning we cannot count on being able to reach the * workers and shut them down. Unrecoverable errors result in immediate * p7_Fail()'s, which will cause MPI to shut down the worker processes * uncleanly. */ static void mpi_master(const ESL_GETOPTS *go, struct cfg_s *cfg) { int xstatus = eslOK; /* changes from OK on recoverable error */ int status; int have_work = TRUE; /* TRUE while alignments remain */ int nproc_working = 0; /* number of worker processes working, up to nproc-1 */ int wi; /* rank of next worker to get an alignment to work on */ char *buf = NULL; /* input/output buffer, for packed MPI messages */ int bn = 0; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; ESL_MSA **msalist = NULL; ESL_MSA *postmsa = NULL; int *msaidx = NULL; char errmsg[eslERRBUFSIZE]; MPI_Status mpistatus; int n; int pos; double entropy; /* Master initialization: including, figure out the alphabet type. * If any failure occurs, delay printing error message until we've shut down workers. */ if (xstatus == eslOK) { if ((status = init_master_cfg(go, cfg, errmsg)) != eslOK) xstatus = status; } if (xstatus == eslOK) { bn = 4096; if ((buf = malloc(sizeof(char) * bn)) == NULL) { sprintf(errmsg, "allocation failed"); xstatus = eslEMEM; } } if (xstatus == eslOK) { if ((msalist = malloc(sizeof(ESL_MSA *) * cfg->nproc)) == NULL) { sprintf(errmsg, "allocation failed"); xstatus = eslEMEM; } } if (xstatus == eslOK) { if ((msaidx = malloc(sizeof(int) * cfg->nproc)) == NULL) { sprintf(errmsg, "allocation failed"); xstatus = eslEMEM; } } MPI_Bcast(&xstatus, 1, MPI_INT, 0, MPI_COMM_WORLD); if (xstatus != eslOK) { MPI_Finalize(); p7_Fail(errmsg); } ESL_DPRINTF1(("MPI master is initialized\n")); bg = p7_bg_Create(cfg->abc); for (wi = 0; wi < cfg->nproc; wi++) { msalist[wi] = NULL; msaidx[wi] = 0; } /* Worker initialization: * Because we've already successfully initialized the master before we start * initializing the workers, we don't expect worker initialization to fail; * so we just receive a quick OK/error code reply from each worker to be sure, * and don't worry about an informative message. */ MPI_Bcast(&(cfg->abc->type), 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Reduce(&xstatus, &status, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); if (status != eslOK) { MPI_Finalize(); p7_Fail("One or more MPI worker processes failed to initialize."); } ESL_DPRINTF1(("%d workers are initialized\n", cfg->nproc-1)); /* Main loop: combining load workers, send/receive, clear workers loops; * also, catch error states and die later, after clean shutdown of workers. * * When a recoverable error occurs, have_work = FALSE, xstatus != * eslOK, and errmsg is set to an informative message. No more * errmsg's can be received after the first one. We wait for all the * workers to clear their work units, then send them shutdown signals, * then finally print our errmsg and exit. * * Unrecoverable errors just crash us out with p7_Fail(). */ wi = 1; while (have_work || nproc_working) { if (have_work) { if ((status = esl_msa_Read(cfg->afp, &msa)) == eslOK) { cfg->nali++; ESL_DPRINTF1(("MPI master read MSA %s\n", msa->name == NULL? "" : msa->name)); } else { have_work = FALSE; if (status == eslEFORMAT) { xstatus = eslEFORMAT; snprintf(errmsg, eslERRBUFSIZE, "Alignment file parse error:\n%s\n", cfg->afp->errbuf); } else if (status == eslEINVAL) { xstatus = eslEFORMAT; snprintf(errmsg, eslERRBUFSIZE, "Alignment file parse error:\n%s\n", cfg->afp->errbuf); } else if (status != eslEOF) { xstatus = status; snprintf(errmsg, eslERRBUFSIZE, "Alignment file read unexpectedly failed with code %d\n", status); } ESL_DPRINTF1(("MPI master has run out of MSAs (having read %d)\n", cfg->nali)); } } if ((have_work && nproc_working == cfg->nproc-1) || (!have_work && nproc_working > 0)) { if (MPI_Probe(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &mpistatus) != 0) { MPI_Finalize(); p7_Fail("mpi probe failed"); } if (MPI_Get_count(&mpistatus, MPI_PACKED, &n) != 0) { MPI_Finalize(); p7_Fail("mpi get count failed"); } wi = mpistatus.MPI_SOURCE; ESL_DPRINTF1(("MPI master sees a result of %d bytes from worker %d\n", n, wi)); if (n > bn) { if ((buf = realloc(buf, sizeof(char) * n)) == NULL) p7_Fail("reallocation failed"); bn = n; } if (MPI_Recv(buf, bn, MPI_PACKED, wi, 0, MPI_COMM_WORLD, &mpistatus) != 0) { MPI_Finalize(); p7_Fail("mpi recv failed"); } ESL_DPRINTF1(("MPI master has received the buffer\n")); /* If we're in a recoverable error state, we're only clearing worker results; * just receive them, don't unpack them or print them. * But if our xstatus is OK, go ahead and process the result buffer. */ if (xstatus == eslOK) { pos = 0; if (MPI_Unpack(buf, bn, &pos, &xstatus, 1, MPI_INT, MPI_COMM_WORLD) != 0) { MPI_Finalize(); p7_Fail("mpi unpack failed");} if (xstatus == eslOK) /* worker reported success. Get the HMM. */ { ESL_DPRINTF1(("MPI master sees that the result buffer contains an HMM\n")); if (p7_hmm_MPIUnpack(buf, bn, &pos, MPI_COMM_WORLD, &(cfg->abc), &hmm) != eslOK) { MPI_Finalize(); p7_Fail("HMM unpack failed"); } ESL_DPRINTF1(("MPI master has unpacked the HMM\n")); if (cfg->postmsafile != NULL) { if (esl_msa_MPIUnpack(cfg->abc, buf, bn, &pos, MPI_COMM_WORLD, &postmsa) != eslOK) { MPI_Finalize(); p7_Fail("postmsa unpack failed");} } entropy = p7_MeanMatchRelativeEntropy(hmm, bg); if ((status = output_result(cfg, errmsg, msaidx[wi], msalist[wi], hmm, postmsa, entropy)) != eslOK) xstatus = status; esl_msa_Destroy(postmsa); postmsa = NULL; p7_hmm_Destroy(hmm); hmm = NULL; } else /* worker reported an error. Get the errmsg. */ { if (MPI_Unpack(buf, bn, &pos, errmsg, eslERRBUFSIZE, MPI_CHAR, MPI_COMM_WORLD) != 0) { MPI_Finalize(); p7_Fail("mpi unpack of errmsg failed"); } ESL_DPRINTF1(("MPI master sees that the result buffer contains an error message\n")); } } esl_msa_Destroy(msalist[wi]); msalist[wi] = NULL; msaidx[wi] = 0; nproc_working--; } if (have_work) { ESL_DPRINTF1(("MPI master is sending MSA %s to worker %d\n", msa->name == NULL ? "":msa->name, wi)); if (esl_msa_MPISend(msa, wi, 0, MPI_COMM_WORLD, &buf, &bn) != eslOK) p7_Fail("MPI msa send failed"); msalist[wi] = msa; msaidx[wi] = cfg->nali; /* 1..N for N alignments in the MSA database */ msa = NULL; wi++; nproc_working++; } } /* On success or recoverable errors: * Shut down workers cleanly. */ ESL_DPRINTF1(("MPI master is done. Shutting down all the workers cleanly\n")); for (wi = 1; wi < cfg->nproc; wi++) if (esl_msa_MPISend(NULL, wi, 0, MPI_COMM_WORLD, &buf, &bn) != eslOK) p7_Fail("MPI msa send failed"); free(buf); free(msaidx); free(msalist); p7_bg_Destroy(bg); if (xstatus != eslOK) { MPI_Finalize(); p7_Fail(errmsg); } else return; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* command line configuration */ struct cfg_s cfg; /* application configuration */ char *basename= NULL; /* base of the output file names */ char *alifile = NULL; /* alignment file name */ char *dbfile = NULL; /* name of seq db file */ char outfile[256]; /* name of an output file */ int alifmt; /* format code for alifile */ int dbfmt; /* format code for dbfile */ ESL_MSAFILE *afp = NULL; /* open alignment file */ ESL_MSA *origmsa = NULL; /* one multiple sequence alignment */ ESL_MSA *msa = NULL; /* MSA after frags are removed */ ESL_MSA *trainmsa= NULL; /* training set, aligned */ ESL_STACK *teststack=NULL; /* test set: stack of ESL_SQ ptrs */ int status; /* easel return code */ int nfrags; /* # of fragments removed */ int ntestdom; /* # of test domains */ int ntest; /* # of test sequences created */ int nali; /* number of alignments read */ double avgid; /* Parse command line */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) cmdline_failure(argv[0], "Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) cmdline_failure(argv[0], "Error in app configuration: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h")) cmdline_help(argv[0], go); if (esl_opt_ArgNumber(go) != 3) cmdline_failure(argv[0], "Incorrect number of command line arguments\n"); basename = esl_opt_GetArg(go, 1); alifile = esl_opt_GetArg(go, 2); dbfile = esl_opt_GetArg(go, 3); alifmt = eslMSAFILE_STOCKHOLM; dbfmt = eslSQFILE_FASTA; /* Set up the configuration structure shared amongst functions here */ if (esl_opt_IsDefault(go, "--seed")) cfg.r = esl_randomness_CreateTimeseeded(); else cfg.r = esl_randomness_Create(esl_opt_GetInteger(go, "--seed")); cfg.abc = NULL; /* until we open the MSA file, below */ cfg.fragfrac = esl_opt_GetReal(go, "-F"); cfg.idthresh1 = esl_opt_GetReal(go, "-1"); cfg.idthresh2 = esl_opt_GetReal(go, "-2"); cfg.test_lens = NULL; cfg.ntest = 0; /* Open the output files */ if (snprintf(outfile, 256, "%s.msa", basename) >= 256) esl_fatal("Failed to construct output MSA file name"); if ((cfg.out_msafp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open MSA output file %s\n", outfile); if (snprintf(outfile, 256, "%s.fa", basename) >= 256) esl_fatal("Failed to construct output FASTA file name"); if ((cfg.out_seqfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open FASTA output file %s\n", outfile); if (snprintf(outfile, 256, "%s.pos", basename) >= 256) esl_fatal("Failed to construct pos test set summary file name"); if ((cfg.possummfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open pos test set summary file %s\n", outfile); if (snprintf(outfile, 256, "%s.neg", basename) >= 256) esl_fatal("Failed to construct neg test set summary file name"); if ((cfg.negsummfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open neg test set summary file %s\n", outfile); if (snprintf(outfile, 256, "%s.tbl", basename) >= 256) esl_fatal("Failed to construct benchmark table file name"); if ((cfg.tblfp = fopen(outfile, "w")) == NULL) esl_fatal("Failed to open benchmark table file %s\n", outfile); /* Open the MSA file; determine alphabet */ status = esl_msafile_Open(alifile, alifmt, NULL, &afp); if (status == eslENOTFOUND) esl_fatal("Alignment file %s doesn't exist or is not readable\n", alifile); else if (status == eslEFORMAT) esl_fatal("Couldn't determine format of alignment %s\n", alifile); else if (status != eslOK) esl_fatal("Alignment file open failed with error %d\n", status); if (esl_opt_GetBoolean(go, "--amino")) cfg.abc = esl_alphabet_Create(eslAMINO); else if (esl_opt_GetBoolean(go, "--dna")) cfg.abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--rna")) cfg.abc = esl_alphabet_Create(eslRNA); else { int type; status = esl_msafile_GuessAlphabet(afp, &type); if (status == eslEAMBIGUOUS) esl_fatal("Failed to guess the bio alphabet used in %s.\nUse --dna, --rna, or --amino option to specify it.", alifile); else if (status == eslEFORMAT) esl_fatal("Alignment file parse failed: %s\n", afp->errbuf); else if (status == eslENODATA) esl_fatal("Alignment file %s is empty\n", alifile); else if (status != eslOK) esl_fatal("Failed to read alignment file %s\n", alifile); cfg.abc = esl_alphabet_Create(type); } esl_msafile_SetDigital(afp, cfg.abc); if (cfg.abc->type == eslAMINO) esl_composition_SW34(cfg.fq); else esl_vec_DSet(cfg.fq, cfg.abc->K, 1.0 / (double) cfg.abc->K); /* Open and process the dbfile; make sure it's in the same alphabet */ process_dbfile(&cfg, dbfile, dbfmt); /* Read and process MSAs one at a time */ nali = 0; while ((status = esl_msa_Read(afp, &origmsa)) == eslOK) { remove_fragments(&cfg, origmsa, &msa, &nfrags); separate_sets (&cfg, msa, &trainmsa, &teststack); ntestdom = esl_stack_ObjectCount(teststack); if (ntestdom >= 2) { esl_stack_Shuffle(cfg.r, teststack); synthesize_positives(go, &cfg, msa->name, teststack, &ntest); esl_msa_MinimGaps(trainmsa, NULL, NULL); esl_msa_Write(cfg.out_msafp, trainmsa, eslMSAFILE_STOCKHOLM); esl_dst_XAverageId(cfg.abc, trainmsa->ax, trainmsa->nseq, 10000, &avgid); /* 10000 is max_comparisons, before sampling kicks in */ fprintf(cfg.tblfp, "%-20s %3.0f%% %6d %6d %6d %6d %6d %6d\n", msa->name, 100.*avgid, (int) trainmsa->alen, msa->nseq, nfrags, trainmsa->nseq, ntestdom, ntest); nali++; } esl_msa_Destroy(trainmsa); esl_msa_Destroy(origmsa); esl_msa_Destroy(msa); } if (status == eslEFORMAT) esl_fatal("Alignment file parse error, line %d of file %s:\n%s\nOffending line is:\n%s\n", afp->linenumber, afp->fname, afp->errbuf, afp->buf); else if (status != eslEOF) esl_fatal("Alignment file read failed with error code %d\n", status); else if (nali == 0) esl_fatal("No alignments found in file %s\n", alifile); if (nali > 0) synthesize_negatives(go, &cfg, esl_opt_GetInteger(go, "-N")); fclose(cfg.out_msafp); fclose(cfg.out_seqfp); fclose(cfg.possummfp); fclose(cfg.negsummfp); fclose(cfg.tblfp); esl_randomness_Destroy(cfg.r); esl_alphabet_Destroy(cfg.abc); esl_msafile_Close(afp); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go; char *msafile; ESL_MSAFILE *afp; ESL_MSA *msa; int do_gsc; int do_pb; int do_blosum; int maxN; double maxid; int nsmall, nbig; int i; /* Process command line */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("%s", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) esl_fatal("%s", go->errbuf); if (esl_opt_GetBoolean(go, "-h") == TRUE){ puts(usage); puts("\n where options are:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */ return 0; } do_blosum = esl_opt_GetBoolean(go, "--blosum"); do_gsc = esl_opt_GetBoolean(go, "--gsc"); do_pb = esl_opt_GetBoolean(go, "--pb"); maxid = esl_opt_GetReal (go, "--id"); maxN = esl_opt_GetInteger(go, "--maxN"); if (esl_opt_ArgNumber(go) != 1) { puts("Incorrect number of command line arguments."); puts(usage); return 1; } if ((msafile = esl_opt_GetArg(go, 1)) == NULL) esl_fatal("%s", go->errbuf); esl_getopts_Destroy(go); /* Weight one or more alignments from input file */ esl_msafile_Open(msafile, eslMSAFILE_UNKNOWN, NULL, &afp); while (esl_msa_Read(afp, &msa) == eslOK) { if (maxN > 0 && msa->nseq > maxN) { esl_msa_Destroy(msa); continue; } if (do_gsc) esl_msaweight_GSC(msa); else if (do_pb) esl_msaweight_PB(msa); else if (do_blosum) esl_msaweight_BLOSUM(msa, maxid); for (nsmall = 0, nbig = 0, i = 0; i < msa->nseq; i++) { if (msa->wgt[i] < 0.2) nsmall++; if (msa->wgt[i] > 5.0) nbig++; } printf("%-20s %5d %5d %8.4f %8.4f %5d %5d\n", msa->name, msa->nseq, msa->alen, esl_vec_DMin(msa->wgt, msa->nseq), esl_vec_DMax(msa->wgt, msa->nseq), nsmall, nbig); esl_msa_Destroy(msa); } esl_msafile_Close(afp); return eslOK; }
int main(int argc, char **argv) { ESL_STOPWATCH *w; ESL_GETOPTS *go; char *msafile; ESL_MSAFILE *afp; ESL_MSA *msa; int do_gsc; int do_pb; int do_blosum; int maxN; double maxid; double cpu; /* Process command line */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("failed to parse cmd line: %s", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) esl_fatal("failed to parse cmd line: %s", go->errbuf); if (esl_opt_GetBoolean(go, "-h") == TRUE) { puts(usage); puts("\n where options are:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */ return 0; } do_blosum = esl_opt_GetBoolean(go, "--blosum"); do_gsc = esl_opt_GetBoolean(go, "--gsc"); do_pb = esl_opt_GetBoolean(go, "--pb"); maxid = esl_opt_GetReal (go, "--id"); maxN = esl_opt_GetInteger(go, "--maxN"); if (esl_opt_ArgNumber(go) != 1) { puts("Incorrect number of command line arguments."); puts(usage); return 1; } if ((msafile = esl_opt_GetArg(go, 1)) == NULL) esl_fatal("failed to parse cmd line: %s", go->errbuf); esl_getopts_Destroy(go); w = esl_stopwatch_Create(); /* Weight one or more alignments from input file */ esl_msafile_Open(msafile, eslMSAFILE_UNKNOWN, NULL, &afp); while (esl_msa_Read(afp, &msa) == eslOK) { if (maxN > 0 && msa->nseq > maxN) { esl_msa_Destroy(msa); continue; } esl_stopwatch_Start(w); if (do_gsc) esl_msaweight_GSC(msa); else if (do_pb) esl_msaweight_PB(msa); else if (do_blosum) esl_msaweight_BLOSUM(msa, maxid); esl_stopwatch_Stop(w); cpu = w->user; printf("%-20s %6d %6d %.3f\n", msa->name, msa->alen, msa->nseq, cpu); esl_msa_Destroy(msa); } esl_msafile_Close(afp); esl_stopwatch_Destroy(w); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go; char *msafile; ESL_MSAFILE *afp; ESL_MSA *msa; float *sqd; int status; int nbad; int nali = 0; int nbadali = 0; int nwgt = 0; int nbadwgt = 0; int i; int be_quiet; int do_gsc; int do_pb; int do_blosum; double maxid; double tol; int maxN; /* Process command line */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("failed to parse cmd line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) esl_fatal("failed to parse cmd line: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h") == TRUE) { puts(usage); puts("\n where options are:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */ return 0; } be_quiet = esl_opt_GetBoolean(go, "-q"); do_blosum = esl_opt_GetBoolean(go, "--blosum"); do_gsc = esl_opt_GetBoolean(go, "--gsc"); do_pb = esl_opt_GetBoolean(go, "--pb"); maxid = esl_opt_GetReal (go, "--id"); tol = esl_opt_GetReal (go, "--tol"); maxN = esl_opt_GetInteger(go, "--maxN"); if (esl_opt_ArgNumber(go) != 1) { puts("Incorrect number of command line arguments."); puts(usage); return 1; } msafile = esl_opt_GetArg(go, 1); esl_getopts_Destroy(go); /* Weight one or more alignments from input file */ esl_msafile_Open(msafile, eslMSAFILE_UNKNOWN, NULL, &afp); while (esl_msa_Read(afp, &msa) == eslOK) { if (maxN > 0 && msa->nseq > maxN) { esl_msa_Destroy(msa); continue; } nali++; nwgt += msa->nseq; ESL_ALLOC(sqd, sizeof(float) * msa->nseq); if (do_gsc) { esl_msaweight_GSC(msa); GSCWeights(msa->aseq, msa->nseq, msa->alen, sqd); } else if (do_pb) { esl_msaweight_PB(msa); PositionBasedWeights(msa->aseq, msa->nseq, msa->alen, sqd); } else if (do_blosum) { esl_msaweight_BLOSUM(msa, maxid); BlosumWeights(msa->aseq, msa->nseq, msa->alen, maxid, sqd); /* workaround SQUID bug: BLOSUM weights weren't renormalized to sum to nseq. */ esl_vec_FNorm (sqd, msa->nseq); esl_vec_FScale(sqd, msa->nseq, (float) msa->nseq); } if (! be_quiet) { for (i = 0; i < msa->nseq; i++) fprintf(stdout, "%-20s %.3f %.3f\n", msa->sqname[i], msa->wgt[i], sqd[i]); } nbad = 0; for (i = 0; i < msa->nseq; i++) if (esl_DCompare((double) sqd[i], msa->wgt[i], tol) != eslOK) nbad++; if (nbad > 0) nbadali++; nbadwgt += nbad; if (nbad > 0) printf("%-20s :: alignment shows %d weights that differ (out of %d) \n", msa->name, nbad, msa->nseq); esl_msa_Destroy(msa); free(sqd); } esl_msafile_Close(afp); if (nbadali == 0) printf("OK: all weights identical between squid and Easel in %d alignment(s)\n", nali); else { printf("%d of %d weights mismatched at (> %f fractional difference)\n", nbadwgt, nwgt, tol); printf("involving %d of %d total alignments\n", nbadali, nali); } return eslOK; ERROR: return status; }