int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *msafile = esl_opt_GetArg(go, 1); ESL_ALPHABET *abc = NULL; int infmt = eslMSAFILE_UNKNOWN; ESLX_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; FILE *ofp = stdout; int nali = 0; int namewidth; double pid; int nid, n; int i,j; int status; /* allow user to assert the input MSA alphabet */ if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); /* allow user to assert the input MSA format */ if (esl_opt_IsOn(go, "--informat") && (infmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--informat"))) == eslMSAFILE_UNKNOWN) esl_fatal("%s is not a valid MSA file format for --informat", esl_opt_GetString(go, "--informat")); /* digital open */ if ( ( status = eslx_msafile_Open(&abc, msafile, NULL, infmt, NULL, &afp)) != eslOK) eslx_msafile_OpenFailure(afp, status); while ((status = eslx_msafile_Read(afp, &msa)) == eslOK) { nali++; namewidth = esl_str_GetMaxWidth(msa->sqname, msa->nseq); for (i = 0; i < msa->nseq; i++) for (j = i+1; j < msa->nseq; j++) { esl_dst_XPairId(abc, msa->ax[i], msa->ax[j], &pid, &nid, &n); fprintf(ofp, "%-*s %-*s %6.2f %6d %6d\n", namewidth, msa->sqname[i], namewidth, msa->sqname[j], pid*100.0, nid, n); } esl_msa_Destroy(msa); } if (nali == 0 || status != eslEOF) eslx_msafile_ReadFailure(afp, status); eslx_msafile_Close(afp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
/* Function: esl_msafile_psiblast_Write() * Synopsis: Write an MSA to a stream in PSI-BLAST format * * Purpose: Write alignment <msa> in NCBI PSI-BLAST format to * stream <fp>. * * The <msa> should have a valid reference line <msa->rf>, * with alphanumeric characters marking consensus (match) * columns, and non-alphanumeric characters marking * nonconsensus (insert) columns. If it does not have RF * annotation, then the first sequence in the <msa> * defines the "consensus". * * PSI-BLAST format allows only one symbol ('-') for gaps, * and cannot represent missing data symbols (Easel's * '~'). Any missing data symbols are converted to gaps. * * Args: fp - open output stream * msa - MSA to write * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. * <eslEWRITE> on any system write failure, such as filled disk. */ int esl_msafile_psiblast_Write(FILE *fp, const ESL_MSA *msa) { char *buf = NULL; int cpl = 60; int acpl; int i; int sym; int64_t pos, bpos; int maxnamewidth = esl_str_GetMaxWidth(msa->sqname, msa->nseq); int is_consensus; int is_residue; int status; ESL_ALLOC(buf, sizeof(char) * (cpl+1)); for (pos = 0; pos < msa->alen; pos += cpl) { for (i = 0; i < msa->nseq; i++) { acpl = (msa->alen - pos > cpl)? cpl : msa->alen - pos; #ifdef eslAUGMENT_ALPHABET if (msa->abc) { for (bpos = 0; bpos < acpl; bpos++) { sym = msa->abc->sym[msa->ax[i][pos + bpos + 1]]; is_residue = esl_abc_XIsResidue(msa->abc, msa->ax[i][pos+bpos+1]); if (msa->rf) is_consensus = (isalnum(msa->rf[pos + bpos]) ? TRUE : FALSE); else is_consensus = (esl_abc_XIsResidue(msa->abc, msa->ax[0][pos+bpos+1]) ? TRUE : FALSE); if (is_consensus) { buf[bpos] = (is_residue ? toupper(sym) : '-'); } else { buf[bpos] = (is_residue ? tolower(sym) : '-'); } } } #endif if (! msa->abc) { for (bpos = 0; bpos < acpl; bpos++) { sym = msa->aseq[i][pos + bpos]; is_residue = isalnum(sym); if (msa->rf) is_consensus = (isalnum(msa->rf[pos + bpos]) ? TRUE : FALSE); else is_consensus = (isalnum(msa->aseq[0][pos+bpos]) ? TRUE : FALSE); if (is_consensus) { buf[bpos] = (is_residue ? toupper(sym) : '-'); } else { buf[bpos] = (is_residue ? tolower(sym) : '-'); } } } buf[acpl] = '\0'; if (fprintf(fp, "%-*s %s\n", maxnamewidth, msa->sqname[i], buf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "psiblast msa write failed"); } /* end loop over sequences */ if (pos + cpl < msa->alen) { if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "psiblast msa write failed"); } } free(buf); return eslOK; ERROR: if (buf) free(buf); return status; }