static int process_commandline(int argc, char **argv, ESL_GETOPTS **ret_go, char **ret_fmfile, char **ret_qfile) { ESL_GETOPTS *go = esl_getopts_Create(options); int status; if (esl_opt_ProcessEnvironment(go) != eslOK) { if (printf("Failed to process environment: %s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) { if (printf("Failed to parse command line: %s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if (esl_opt_VerifyConfig(go) != eslOK) { if (printf("Failed to parse command line: %s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } /* help format: */ if (esl_opt_GetBoolean(go, "-h") == TRUE) { esl_banner(stdout, argv[0], banner); esl_usage(stdout, argv[0], usage); if (puts("\nBasic options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1= group; 2 = indentation; 120=textwidth*/ if (puts("\nSpecial options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 2, 2, 80); /* 2= group; 2 = indentation; 120=textwidth*/ exit(0); } if (esl_opt_ArgNumber(go) != 2) { if (puts("Incorrect number of command line arguments.") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if ((*ret_qfile = esl_opt_GetArg(go, 1)) == NULL) { if (puts("Failed to get <qfile> argument on command line") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if ((*ret_fmfile = esl_opt_GetArg(go, 2)) == NULL) { if (puts("Failed to get <fmfile> argument on command line") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } /* Validate any attempted use of stdin streams */ if (esl_strcmp(*ret_fmfile, "-") == 0 && esl_strcmp(*ret_qfile, "-") == 0) { if (puts("Either <fmfile> or <qfile> may be '-' (to read from stdin), but not both.") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } *ret_go = go; return eslOK; FAILURE: /* all errors handled here are user errors, so be polite. */ esl_usage(stdout, argv[0], usage); puts("\nwhere basic options are:"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1= group; 2 = indentation; 80=textwidth*/ printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); esl_getopts_Destroy(go); exit(1); ERROR: if (go) esl_getopts_Destroy(go); exit(status); }
/* process_commandline() * * Processes the commandline, filling in fields in <cfg> and creating and returning * an <ESL_GETOPTS> options structure. The help page (hmmsearch -h) is formatted * here. */ static int process_commandline(int argc, char **argv, ESL_GETOPTS **ret_go, char **ret_hmmfile, char **ret_seqfile) { ESL_GETOPTS *go = esl_getopts_Create(options); int status; if (esl_opt_ProcessEnvironment(go) != eslOK) { if (printf("Failed to process environment: %s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) { if (printf("Failed to parse command line: %s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if (esl_opt_VerifyConfig(go) != eslOK) { if (printf("Failed to parse command line: %s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } /* help format: */ if (esl_opt_GetBoolean(go, "-h") == TRUE) { p7_banner(stdout, argv[0], banner); esl_usage(stdout, argv[0], usage); if (puts("\nBasic options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1= group; 2 = indentation; 80=textwidth*/ if (puts("\nOptions controlling output:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 2, 2, 80); if (puts("\nOptions controlling reporting thresholds:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 4, 2, 80); if (puts("\nOptions controlling inclusion (significance) thresholds:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 5, 2, 80); if (puts("\nOptions for model-specific thresholding:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 6, 2, 80); if (puts("\nOptions controlling acceleration heuristics:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 7, 2, 80); if (puts("\nOther expert options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 12, 2, 80); exit(0); } if (esl_opt_ArgNumber(go) != 2) { if (puts("Incorrect number of command line arguments.") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if ((*ret_hmmfile = esl_opt_GetArg(go, 1)) == NULL) { if (puts("Failed to get <hmmdb> argument on command line") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if ((*ret_seqfile = esl_opt_GetArg(go, 2)) == NULL) { if (puts("Failed to get <seqfile> argument on command line") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } /* Validate any attempted use of stdin streams */ if (strcmp(*ret_hmmfile, "-") == 0) { if (puts("nhmmscan cannot read <hmm database> from stdin stream, because it must have hmmpress'ed auxfiles") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } *ret_go = go; return eslOK; FAILURE: /* all errors handled here are user errors, so be polite. */ esl_usage(stdout, argv[0], usage); if (puts("\nwhere most common options are:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1= group; 2 = indentation; 80=textwidth*/ if (printf("\nTo see more help on available options, do %s -h\n\n", argv[0]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_getopts_Destroy(go); exit(1); ERROR: if (go) esl_getopts_Destroy(go); exit(status); }
static int output_header(FM_METADATA *meta, FILE *ofp, const ESL_GETOPTS *go, char *fmfile, char *qfile) { char *alph; char *appname = NULL; int status; if (meta->alph_type == fm_DNA) alph = "dna"; else if (meta->alph_type == fm_AMINO) alph = "amino"; if ((status = esl_FileTail(go->argv[0], FALSE, &appname)) != eslOK) return status; if (fprintf(ofp, "# %s :: %s\n", appname, banner) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(ofp, "# %s\n", EASEL_COPYRIGHT) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(ofp, "# %s\n", EASEL_LICENSE) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(ofp, "# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(ofp, "# input binary-formatted HMMER database: %s\n", fmfile) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(ofp, "# input file of query sequences: %s\n", qfile) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (esl_opt_IsUsed(go, "--out")) { char *outfile = esl_opt_GetString(go, "--out"); if (fprintf(ofp, "# output file containing list of hits: %s\n", (esl_strcmp(outfile, "-") == 0 ? "stdout" : outfile)) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); } if (esl_opt_IsUsed(go, "--count_only") && fprintf(ofp, "# output only counts, not hit locations\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(ofp, "# alphabet : %s\n", alph) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(ofp, "# bin_length : %d\n", meta->freq_cnt_b) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(ofp, "# suffix array sample rate: %d\n", meta->freq_SA) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(ofp, "# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (appname) free(appname); return eslOK; ERROR: if (appname) free(appname); return status; }
static int process_commandline(int argc, char **argv, ESL_GETOPTS **ret_go, char **ret_alifile, char **ret_postalifile) { ESL_GETOPTS *go = esl_getopts_Create(options); int status; if (esl_opt_ProcessEnvironment(go) != eslOK) { if (printf("Failed to process environment:\n%s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) { if (printf("Failed to parse command line:\n%s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if (esl_opt_VerifyConfig(go) != eslOK) { if (printf("Failed to parse command line:\n%s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } /* help format: */ if (esl_opt_GetBoolean(go, "-h") == TRUE) { p7_banner(stdout, argv[0], banner); esl_usage(stdout, argv[0], usage); if (puts("\nBasic options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); if (puts("\nMask range options (format: --xxx 10-20,30-40 ) :") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 5, 2, 80); if (puts("\nOptions for selecting alphabet rather than guessing it:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 2, 2, 80); if (puts("\nAlternative model construction strategies:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 3, 2, 80); if (puts("\nAlternative relative sequence weighting strategies:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 4, 2, 80); if (puts("\nOther options:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); esl_opt_DisplayHelp(stdout, go, 8, 2, 80); exit(0); } if (esl_opt_ArgNumber(go) > 2) { if (puts("Incorrect number of command line arguments.") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if ((*ret_alifile = esl_opt_GetArg(go, 1)) == NULL) { if (puts("Failed to get <msafile> argument on command line") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } if (esl_opt_IsUsed(go, "--alirange") || esl_opt_IsUsed(go, "--modelrange") ) { if ((*ret_postalifile = esl_opt_GetArg(go, 2)) == NULL) { if (puts("Failed to get <postmsafile> argument on command line") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } } if (strcmp(*ret_alifile, "-") == 0 && ! esl_opt_IsOn(go, "--informat")) { if (puts("Must specify --informat to read <alifile> from stdin ('-')") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; } *ret_go = go; return eslOK; FAILURE: /* all errors handled here are user errors, so be polite. */ esl_usage(stdout, argv[0], usage); puts("\nwhere basic options are:"); esl_opt_DisplayHelp(stdout, go, 1, 2, 80); printf("\nTo see more help on other available options, do:\n %s -h\n\n", argv[0]); esl_getopts_Destroy(go); exit(1); ERROR: if (go) esl_getopts_Destroy(go); exit(status); }
int main(int argc, char **argv) { int i,j; ESL_GETOPTS *go = NULL; /* command line processing */ ESL_STOPWATCH *w = esl_stopwatch_Create(); int status; ESL_MSA *msa = NULL; FILE *ofp = NULL; /* output file (default is stdout) */ ESL_ALPHABET *abc = NULL; /* digital alphabet */ char *alifile; /* name of the alignment file we're building HMMs from */ ESLX_MSAFILE *afp = NULL; /* open alifile */ int infmt = eslMSAFILE_UNKNOWN; /* autodetect alignment format by default. */ int outfmt = eslMSAFILE_STOCKHOLM; char *postmsafile; /* optional file to resave annotated, modified MSAs to */ FILE *postmsafp = NULL; /* open <postmsafile>, or NULL */ int mask_range_cnt = 0; uint32_t mask_starts[100]; // over-the-top allocation. uint32_t mask_ends[100]; char *rangestr; char *range; int *map = NULL; /* map[i]=j, means model position i comes from column j of the alignment; 1..alen */ int keep_mm; /* Set processor specific flags */ impl_Init(); alifile = NULL; postmsafile = NULL; /* Parse the command line */ process_commandline(argc, argv, &go, &alifile, &postmsafile); keep_mm = esl_opt_IsUsed(go, "--apendmask"); /* Initialize what we can in the config structure (without knowing the alphabet yet). * Fields controlled by masters are set up in usual_master() or mpi_master() * Fields used by workers are set up in mpi_worker() */ ofp = NULL; infmt = eslMSAFILE_UNKNOWN; afp = NULL; abc = NULL; if (esl_opt_IsOn(go, "--informat")) { infmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--informat")); if (infmt == eslMSAFILE_UNKNOWN) p7_Fail("%s is not a recognized input sequence file format\n", esl_opt_GetString(go, "--informat")); } /* Determine output alignment file format */ outfmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--outformat")); if (outfmt == eslMSAFILE_UNKNOWN) p7_Fail(argv[0], "%s is not a recognized output MSA file format\n", esl_opt_GetString(go, "--outformat")); /* Parse the ranges */ if (esl_opt_IsUsed(go, "--alirange")) { esl_strdup(esl_opt_GetString(go, "--alirange"), -1, &rangestr) ; } else if (esl_opt_IsUsed(go, "--modelrange")) { esl_strdup(esl_opt_GetString(go, "--modelrange"), -1, &rangestr) ; } else if (esl_opt_IsUsed(go, "--model2ali")) { esl_strdup(esl_opt_GetString(go, "--model2ali"), -1, &rangestr) ; } else if (esl_opt_IsUsed(go, "--ali2model")) { esl_strdup(esl_opt_GetString(go, "--ali2model"), -1, &rangestr) ; } else { if (puts("Must specify mask range with --modelrange, --alirange, --model2ali, or --ali2model\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto ERROR; } while ( (status = esl_strtok(&rangestr, ",", &range) ) == eslOK) { status = esl_regexp_ParseCoordString(range, mask_starts + mask_range_cnt, mask_ends + mask_range_cnt ); if (status == eslESYNTAX) esl_fatal("range flags take coords <from>..<to>; %s not recognized", range); if (status == eslFAIL) esl_fatal("Failed to find <from> or <to> coord in %s", range); mask_range_cnt++; } /* Start timing. */ esl_stopwatch_Start(w); /* Open files, set alphabet. * afp - open alignment file for input * abc - alphabet expected or guessed in ali file * postmsafp - open MSA output file * ofp - optional open output file, or stdout */ if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); else abc = NULL; status = eslx_msafile_Open(&abc, alifile, NULL, infmt, NULL, &afp); if (status != eslOK) eslx_msafile_OpenFailure(afp, status); if (esl_opt_IsUsed(go, "--alirange") || esl_opt_IsUsed(go, "--modelrange") ) { postmsafp = fopen(postmsafile, "w"); if (postmsafp == NULL) p7_Fail("Failed to MSA output file %s for writing", postmsafile); } if (esl_opt_IsUsed(go, "-o")) { ofp = fopen(esl_opt_GetString(go, "-o"), "w"); if (ofp == NULL) p7_Fail("Failed to open -o output file %s\n", esl_opt_GetString(go, "-o")); } else ofp = stdout; /* Looks like the i/o is set up successfully... * Initial output to the user */ output_header(go, ofp, alifile, postmsafile); /* cheery output header */ /* read the alignment */ if ((status = eslx_msafile_Read(afp, &msa)) != eslOK) eslx_msafile_ReadFailure(afp, status); if (esl_opt_IsUsed(go, "--alirange") || esl_opt_IsUsed(go, "--modelrange") ) { /* add/modify mmline for the mask */ if (msa->mm == NULL) { ESL_ALLOC(msa->mm, msa->alen); keep_mm = FALSE; } if (!keep_mm) for (i=0; i<msa->alen; i++) msa->mm[i] = '.'; } // convert model coordinates to alignment coordinates, if necessary if (esl_opt_IsUsed(go, "--modelrange") || esl_opt_IsUsed(go, "--model2ali") || esl_opt_IsUsed(go, "--ali2model") ) { float symfrac = esl_opt_GetReal(go, "--symfrac"); int do_hand = esl_opt_IsOn(go, "--hand"); int L; //same as p7_builder relative_weights if (esl_opt_IsOn(go, "--wnone") ) { esl_vec_DSet(msa->wgt, msa->nseq, 1.); } else if (esl_opt_IsOn(go, "--wgiven") ) ; else if (esl_opt_IsOn(go, "--wpb") ) status = esl_msaweight_PB(msa); else if (esl_opt_IsOn(go, "--wgsc") ) status = esl_msaweight_GSC(msa); else if (esl_opt_IsOn(go, "--wblosum")) status = esl_msaweight_BLOSUM(msa, esl_opt_GetReal(go, "--wid")); if ((status = esl_msa_MarkFragments(msa, esl_opt_GetReal(go, "--fragthresh"))) != eslOK) goto ERROR; //build a map of model mask coordinates to alignment coords ESL_ALLOC(map, sizeof(int) * (msa->alen+1)); L = p7_Alimask_MakeModel2AliMap(msa, do_hand, symfrac, map ); if ( esl_opt_IsUsed(go, "--model2ali") ) { //print mapping printf ("model coordinates alignment coordinates\n"); for (i=0; i<mask_range_cnt; i++) printf ("%8d..%-8d -> %8d..%-8d\n", mask_starts[i], mask_ends[i], map[mask_starts[i]-1], map[mask_ends[i]-1]); /* If I wanted to, I could print all the map values independently: printf("\n\n-----------\n"); printf("Map\n"); printf("---\n"); for (i=0; i<L; i++) printf("%d -> %d\n", i+1, map[i]); */ } else if ( esl_opt_IsUsed(go, "--ali2model") ) { //print mapping (requires scanning the inverted map int alistart = 0; int aliend = 0; printf ("alignment coordinates model coordinates\n"); for (i=0; i<mask_range_cnt; i++) { //find j for ali positions while (map[alistart] < mask_starts[i] ) alistart++; aliend = alistart; while (map[aliend] < mask_ends[i] ) aliend++; printf (" %8d..%-8d -> %8d..%-8d\n", map[alistart], map[aliend], alistart+1, aliend+1); } } else { //convert the mask coords based on map for (i=0; i<mask_range_cnt; i++) { mask_starts[i] = map[mask_starts[i]-1]; //-1 because mmline is offset by one relative to the 1-base alignment mask_ends[i] = map[mask_ends[i]-1]; } } } if (esl_opt_IsUsed(go, "--alirange") || esl_opt_IsUsed(go, "--modelrange") ) { //overwrite '.' with 'm' everywhere the range says to do it for (i=0; i<mask_range_cnt; i++) for (j=mask_starts[i]; j<=mask_ends[i]; j++) msa->mm[j-1] = 'm'; if ((status = eslx_msafile_Write(postmsafp, msa, outfmt)) != eslOK) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); } esl_stopwatch_Stop(w); if (esl_opt_IsOn(go, "-o")) fclose(ofp); if (postmsafp) fclose(postmsafp); if (afp) eslx_msafile_Close(afp); if (abc) esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); esl_stopwatch_Destroy(w); return 0; ERROR: return eslFAIL; }
/* Function: esl_msafile_a2m_Write() * Synopsis: Write an A2M (UCSC SAM) dotless format alignment to a stream. * * Purpose: Write alignment <msa> in dotless UCSC A2M format to a * stream <fp>. * * The <msa> should have a valid reference line <msa->rf>, * with alphanumeric characters marking consensus (match) * columns, and non-alphanumeric characters marking * nonconsensus (insert) columns. If it does not, * then as a fallback, the first sequence in the alignment is * considered to be the consensus. * * In "dotless" A2M format, gap characters (.) in insert * columns are omitted; therefore sequences can be of * different lengths, but each sequence has the same number * of consensus columns (residue or -). * * A2M format cannot represent missing data symbols * (Easel's ~). Any missing data symbols are converted to * gaps. * * A2M format cannot represent pyrrolysine residues in * amino acid sequences, because it treats 'O' symbols * specially, as indicating a position at which a * free-insertion module (FIM) should be created. Any 'O' * in the <msa> is written instead as an unknown * residue ('X', in protein sequences). * * Args: fp - open output stream * msa - MSA to write * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. * <eslEWRITE> on any system write error, such as filled disk. */ int esl_msafile_a2m_Write(FILE *fp, const ESL_MSA *msa) { char *buf = NULL; int cpl = 60; int bpos; int pos; int is_consensus; int is_residue; int do_dotless = TRUE; /* just changing this to FALSE makes it write dots too */ int i; int sym; int status; ESL_ALLOC(buf, sizeof(char) * (cpl+1)); for (i = 0; i < msa->nseq; i++) { /* Construct the name/description line */ if (fprintf(fp, ">%s", msa->sqname[i]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); if (msa->sqacc != NULL && msa->sqacc[i] != NULL) { if (fprintf(fp, " %s", msa->sqacc[i]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); } if (msa->sqdesc != NULL && msa->sqdesc[i] != NULL) { if (fprintf(fp, " %s", msa->sqdesc[i]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); } if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); #ifdef eslAUGMENT_ALPHABET if (msa->abc) { pos = 0; while (pos < msa->alen) { for (bpos = 0; pos < msa->alen && bpos < cpl; pos++) { sym = msa->abc->sym[msa->ax[i][pos+1]]; /* note off-by-one in digitized aseq: 1..alen */ is_residue = esl_abc_XIsResidue(msa->abc, msa->ax[i][pos+1]); if (msa->rf) is_consensus = (isalnum(msa->rf[pos]) ? TRUE : FALSE); else is_consensus = (esl_abc_XIsResidue(msa->abc, msa->ax[0][pos+1]) ? TRUE : FALSE); if (sym == 'O') sym = esl_abc_XGetUnknown(msa->abc); /* watch out: O means "insert a FIM" in a2m format, not pyrrolysine */ if (is_consensus) { buf[bpos++] = (is_residue ? toupper(sym) : '-'); } else if (is_residue) { buf[bpos++] = tolower(sym); } else if (! do_dotless) { buf[bpos++] = '.'; } } buf[bpos] = '\0'; if (bpos) { if (fprintf(fp, "%s\n", buf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed");} } } #endif if (! msa->abc) { pos = 0; while (pos < msa->alen) { for (bpos = 0; pos < msa->alen && bpos < cpl; pos++) { sym = msa->aseq[i][pos]; is_residue = isalpha(msa->aseq[i][pos]); if (msa->rf) is_consensus = (isalnum(msa->rf[pos]) ? TRUE : FALSE); else is_consensus = (isalnum(msa->aseq[0][pos]) ? TRUE : FALSE); if (sym == 'O') sym = 'X'; if (is_consensus) { buf[bpos++] = ( is_residue ? toupper(sym) : '-'); } else if (is_residue) { buf[bpos++] = tolower(sym); } else if (! do_dotless) { buf[bpos++] = '.'; } } buf[bpos] = '\0'; if (bpos) { if (fprintf(fp, "%s\n", buf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "a2m msa file write failed"); } } } } /* end, loop over sequences in the MSA */ free(buf); return eslOK; ERROR: if (buf) free(buf); return status; }
/* Function: esl_msafile_psiblast_Write() * Synopsis: Write an MSA to a stream in PSI-BLAST format * * Purpose: Write alignment <msa> in NCBI PSI-BLAST format to * stream <fp>. * * The <msa> should have a valid reference line <msa->rf>, * with alphanumeric characters marking consensus (match) * columns, and non-alphanumeric characters marking * nonconsensus (insert) columns. If it does not have RF * annotation, then the first sequence in the <msa> * defines the "consensus". * * PSI-BLAST format allows only one symbol ('-') for gaps, * and cannot represent missing data symbols (Easel's * '~'). Any missing data symbols are converted to gaps. * * Args: fp - open output stream * msa - MSA to write * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. * <eslEWRITE> on any system write failure, such as filled disk. */ int esl_msafile_psiblast_Write(FILE *fp, const ESL_MSA *msa) { char *buf = NULL; int cpl = 60; int acpl; int i; int sym; int64_t pos, bpos; int maxnamewidth = esl_str_GetMaxWidth(msa->sqname, msa->nseq); int is_consensus; int is_residue; int status; ESL_ALLOC(buf, sizeof(char) * (cpl+1)); for (pos = 0; pos < msa->alen; pos += cpl) { for (i = 0; i < msa->nseq; i++) { acpl = (msa->alen - pos > cpl)? cpl : msa->alen - pos; #ifdef eslAUGMENT_ALPHABET if (msa->abc) { for (bpos = 0; bpos < acpl; bpos++) { sym = msa->abc->sym[msa->ax[i][pos + bpos + 1]]; is_residue = esl_abc_XIsResidue(msa->abc, msa->ax[i][pos+bpos+1]); if (msa->rf) is_consensus = (isalnum(msa->rf[pos + bpos]) ? TRUE : FALSE); else is_consensus = (esl_abc_XIsResidue(msa->abc, msa->ax[0][pos+bpos+1]) ? TRUE : FALSE); if (is_consensus) { buf[bpos] = (is_residue ? toupper(sym) : '-'); } else { buf[bpos] = (is_residue ? tolower(sym) : '-'); } } } #endif if (! msa->abc) { for (bpos = 0; bpos < acpl; bpos++) { sym = msa->aseq[i][pos + bpos]; is_residue = isalnum(sym); if (msa->rf) is_consensus = (isalnum(msa->rf[pos + bpos]) ? TRUE : FALSE); else is_consensus = (isalnum(msa->aseq[0][pos+bpos]) ? TRUE : FALSE); if (is_consensus) { buf[bpos] = (is_residue ? toupper(sym) : '-'); } else { buf[bpos] = (is_residue ? tolower(sym) : '-'); } } } buf[acpl] = '\0'; if (fprintf(fp, "%-*s %s\n", maxnamewidth, msa->sqname[i], buf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "psiblast msa write failed"); } /* end loop over sequences */ if (pos + cpl < msa->alen) { if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "psiblast msa write failed"); } } free(buf); return eslOK; ERROR: if (buf) free(buf); return status; }
/* Function: p7_h2io_WriteASCII() * Synopsis: Write an H3 HMM in HMMER2 compatible format * * Purpose: Write HMM <hmm> to stream <fp> in HMMER2 ASCII save * file format. * * HMMER2 saved the null model and the search configuration * (local vs. glocal, for example) as part of its HMM file; * H3 only saves the core HMM. The HMMER2 file is created * for HMMER2's default ``ls mode'' (glocal) with default * null model transitions and default special state * transitions (NECJ). * * Optional statistical calibration and alignment checksum * are not written, because for these H3 and H2 differ too * much. * * Args: fp - stream to write save file format to * hmm - HMM to save * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation error. * * <eslEINVAL> if <hmm> can't be converted; for example, if * it is not in a protein or nucleic acid alphabet (H2 * requires biosequence in its save files). * * <eslEWRITE> if any write fails; for example, if the * disk fills up. */ int p7_h2io_WriteASCII(FILE *fp, P7_HMM *hmm) { P7_BG *bg; /* H2 saves null model in HMM file */ int k; /* counter for nodes */ int x; /* counter for symbols */ int ts; /* counter for state transitions */ float pmove,ploop; /* default H2 null model transitions */ int status; if ((bg = p7_bg_Create(hmm->abc)) == NULL) { status = eslEMEM; goto ERROR; } /* magic header */ if (fprintf(fp, "HMMER2.0 [converted from %s]\n", HMMER_VERSION) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); if (fprintf(fp, "NAME %s\n", hmm->name) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); if (hmm->acc && fprintf(fp, "ACC %s\n", hmm->acc) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); if (hmm->desc && fprintf(fp, "DESC %s\n", hmm->desc) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); if (fprintf(fp, "LENG %d\n", hmm->M) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); if (hmm->abc->type == eslAMINO) { if (fprintf(fp, "ALPH Amino\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); } else if (hmm->abc->type == eslDNA) { if (fprintf(fp, "ALPH Nucleic\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); } else if (hmm->abc->type == eslRNA) { if (fprintf(fp, "ALPH Nucleic\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); } else ESL_XEXCEPTION(eslEINVAL, "Only protein, DNA, RNA HMMs can be saved in H2 format"); if (fprintf(fp, "RF %s\n", (hmm->flags & p7H_RF) ? "yes" : "no") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); if (fprintf(fp, "CS %s\n", (hmm->flags & p7H_CS) ? "yes" : "no") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); if (fprintf(fp, "MAP %s\n", (hmm->flags & p7H_MAP) ? "yes" : "no") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); /* H3 consensus line has no counterpart in H2 */ if (hmm->comlog != NULL) { if ( (status = h2_multiline(fp, "COM ", hmm->comlog)) != eslOK) goto ERROR; } if (hmm->nseq != -1) { if ( fprintf (fp, "NSEQ %d\n", hmm->nseq) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); } if (hmm->ctime != NULL) { if ( fprintf (fp, "DATE %s\n", hmm->ctime) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); } /* Checksum is not written; H2 and H3 use different checksum algorithms */ if (hmm->flags & p7H_GA) { if (fprintf(fp, "GA %.1f %.1f\n", hmm->cutoff[p7_GA1], hmm->cutoff[p7_GA2]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); } if (hmm->flags & p7H_TC) { if (fprintf(fp, "TC %.1f %.1f\n", hmm->cutoff[p7_TC1], hmm->cutoff[p7_TC2]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); } if (hmm->flags & p7H_NC) { if (fprintf(fp, "NC %.1f %.1f\n", hmm->cutoff[p7_NC1], hmm->cutoff[p7_NC2]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); } /* in H3, the HMM does not include NECJ; these are part of the profile. * for emulating H2 output, assume default LS config */ if (fputs("XT ", fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); pmove = ( (hmm->abc->type == eslAMINO) ? 1./351. : 1./1001.); ploop = ( (hmm->abc->type == eslAMINO) ? 350./351. : 1000./1001.); if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR; /* NB */ if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR; /* NN */ if ( (status = printprob(fp, 6, 0.5, 1.0)) != eslOK) goto ERROR; /* EC */ if ( (status = printprob(fp, 6, 0.5, 1.0)) != eslOK) goto ERROR; /* EJ */ if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR; /* CT */ if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR; /* CC */ if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR; /* JB */ if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR; /* JJ */ if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); /* Save the default H2 null model transitions, not H3's null model transitions */ if (fprintf(fp, "NULT ") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR; /* 1-p1 */ if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR; /* p1 */ if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); /* but null emissions really are the H3 null model emissions */ if (fputs("NULE ", fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); for (x = 0; x < hmm->abc->K; x++) { if ( (status = printprob(fp, 6, bg->f[x], 1./(float)hmm->abc->K)) != eslOK) goto ERROR; } if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); /* Don't save stats; H3 local alignment stats are different from H2 calibration */ /* The main model section */ if (fprintf(fp, "HMM ") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); for (x = 0; x < hmm->abc->K; x++) { if (fprintf(fp, " %c ", hmm->abc->sym[x]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); } if (fprintf(fp, "\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); if (fprintf(fp, " %6s %6s %6s %6s %6s %6s %6s %6s %6s\n", "m->m", "m->i", "m->d", "i->m", "i->i", "d->m", "d->d", "b->m", "m->e") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); /* Print HMM parameters (main section of the save file) */ if (fprintf(fp, " ") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); if ( (status = printprob(fp, 6, 1.-hmm->t[0][p7H_MD], 1.0)) != eslOK) goto ERROR; if (fprintf(fp, " %6s", "*") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); if ( (status = printprob(fp, 6, hmm->t[0][p7H_MD], 1.0)) != eslOK) goto ERROR; if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); for (k = 1; k <= hmm->M; k++) { /* Line 1: k, match emissions, map */ if (fprintf(fp, " %5d ", k) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); for (x = 0; x < hmm->abc->K; x++) if ( (status = printprob(fp, 6, hmm->mat[k][x], bg->f[x])) != eslOK) goto ERROR; if (hmm->flags & p7H_MAP) { if (fprintf(fp, " %5d", hmm->map[k]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); } if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); /* Line 2: RF and insert emissions */ if (fprintf(fp, " %5c ", hmm->flags & p7H_RF ? hmm->rf[k] : '-') < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); for (x = 0; x < hmm->abc->K; x++) if ( (status = printprob(fp, 6, ((k < hmm->M) ? hmm->ins[k][x] : 0.0), bg->f[x])) != eslOK) goto ERROR; if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); /* Line 3: CS and transition probs */ if (fprintf(fp, " %5c ", hmm->flags & p7H_CS ? hmm->cs[k] : '-') < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); for (ts = 0; ts < 7; ts++) if ( (status = printprob(fp, 6, ((k < hmm->M) ? hmm->t[k][ts] : 0.0), 1.0)) != eslOK) goto ERROR; if ( (status = printprob(fp, 6, ((k==1) ? hmm->t[0][p7H_MM] : 0.0), 1.0)) != eslOK) goto ERROR; if ( (status = printprob(fp, 6, ((k<hmm->M) ? 0.0: 1.0), 1.0)) != eslOK) goto ERROR; if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); } if (fputs("//\n", fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); p7_bg_Destroy(bg); return eslOK; ERROR: p7_bg_Destroy(bg); return status; }