Exemplo n.º 1
0
/* The "basic" utest is a minimal driver for making a small DNA profile and a small DNA sequence,
 * then running Viterbi and Forward. It's useful for dumping DP matrices and profiles for debugging.
 */
static void
utest_basic(ESL_GETOPTS *go)
{
  char           *query= "# STOCKHOLM 1.0\n\nseq1 GAATTC\nseq2 GAATTC\n//\n";
  int             fmt  = eslMSAFILE_STOCKHOLM;
  char           *targ = "GAATTC";
  ESL_ALPHABET   *abc  = NULL;
  ESL_MSA        *msa  = NULL;
  P7_HMM         *hmm  = NULL;
  P7_PROFILE     *gm   = NULL;
  P7_BG          *bg   = NULL;
  P7_PRIOR       *pri  = NULL;	
  ESL_DSQ        *dsq  = NULL;
  P7_GMX         *gx   = NULL;
  P7_TRACE        *tr  = NULL;
  int             L    = strlen(targ);
  float           vsc, vsc2, fsc;

  if ((abc = esl_alphabet_Create(eslDNA))          == NULL)  esl_fatal("failed to create alphabet");
  if ((pri = p7_prior_CreateNucleic())             == NULL)  esl_fatal("failed to create prior");
  if ((msa = esl_msa_CreateFromString(query, fmt)) == NULL)  esl_fatal("failed to create MSA");
  if (esl_msa_Digitize(abc, msa, NULL)             != eslOK) esl_fatal("failed to digitize MSA");
  if (p7_Fastmodelmaker(msa, 0.5, NULL, &hmm, NULL) != eslOK) esl_fatal("failed to create GAATTC model");
  if (p7_ParameterEstimation(hmm, pri)             != eslOK) esl_fatal("failed to parameterize GAATTC model");
  if (p7_hmm_SetConsensus(hmm, NULL)               != eslOK) esl_fatal("failed to make consensus");
  if ((bg = p7_bg_Create(abc))                     == NULL)  esl_fatal("failed to create DNA null model");
  if ((gm = p7_profile_Create(hmm->M, abc))        == NULL)  esl_fatal("failed to create GAATTC profile");
  if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL)!= eslOK) esl_fatal("failed to config profile");
  if (p7_profile_Validate(gm, NULL, 0.0001)        != eslOK) esl_fatal("whoops, profile is bad!");
  if (esl_abc_CreateDsq(abc, targ, &dsq)           != eslOK) esl_fatal("failed to create GAATTC digital sequence");
  if ((gx = p7_gmx_Create(gm->M, L))               == NULL)  esl_fatal("failed to create DP matrix");
  if ((tr = p7_trace_Create())                     == NULL)  esl_fatal("trace creation failed");

  p7_GViterbi   (dsq, L, gm, gx, &vsc);
  if (esl_opt_GetBoolean(go, "-v")) printf("Viterbi score: %.4f\n", vsc);
  if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT);

  p7_GTrace     (dsq, L, gm, gx, tr);
  p7_trace_Score(tr, dsq, gm, &vsc2);
  if (esl_opt_GetBoolean(go, "-v")) p7_trace_Dump(stdout, tr, gm, dsq);
  
  if (esl_FCompare(vsc, vsc2, 1e-5) != eslOK)  esl_fatal("trace score and Viterbi score don't agree.");

  p7_GForward   (dsq, L, gm, gx, &fsc);
  if (esl_opt_GetBoolean(go, "-v")) printf("Forward score: %.4f\n", fsc);
  if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT);

  p7_trace_Destroy(tr);
  p7_gmx_Destroy(gx);
  free(dsq);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_msa_Destroy(msa);
  p7_prior_Destroy(pri);
  esl_alphabet_Destroy(abc);
  return;
}
Exemplo n.º 2
0
static int
utest_BLOSUM(ESL_ALPHABET *abc, ESL_MSA *msa, double maxid, double *expect)
{
  char *msg = "BLOSUM weights unit test failure";

  if (esl_msaweight_BLOSUM(msa, maxid)                     != eslOK) esl_fatal(msg);
  if (esl_vec_DCompare(msa->wgt, expect, msa->nseq, 0.001) != eslOK) esl_fatal(msg);
  
  if (abc != NULL) 
    {
      if (esl_msa_Digitize(abc, msa, NULL)                     != eslOK) esl_fatal(msg);
      if (esl_msaweight_BLOSUM(msa, maxid)                     != eslOK) esl_fatal(msg);
      if (esl_vec_DCompare(msa->wgt, expect, msa->nseq, 0.001) != eslOK) esl_fatal(msg);
      if (esl_msa_Textize(msa)                                 != eslOK) esl_fatal(msg);
    }
  return eslOK;
}
Exemplo n.º 3
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_ALPHABET   *abc     = esl_alphabet_Create(eslAMINO);
  ESL_MSA        *msa     = esl_msa_CreateFromString("\
# STOCKHOLM 1.0\n\
\n\
seq0  AAAAAAAAAA\n\
seq1  AAAAAAAAAA\n\
seq2  AAAAAAAAAC\n\
seq3  AAAAAAAADD\n\
seq4  AAAAAAAEEE\n\
seq5  AAAAAAFFFF\n\
seq6  AAAAAGGGGG\n\
seq7  AAAAHHHHHH\n\
seq8  AAAIIIIIII\n\
seq9  AAKKKKKKKK\n\
seq10 ALLLLLLLLL\n\
seq11 MMMMMMMMMM\n\
//",   eslMSAFILE_STOCKHOLM);


  utest_SingleLinkage(go, msa, 1.0, 11, 10);    /* at 100% id, only seq0/seq1 cluster */
  utest_SingleLinkage(go, msa, 0.5,  6,  5);    /* at 50% id, seq0-seq6 cluster       */
  utest_SingleLinkage(go, msa, 0.0,  1,  0);    /* at 0% id, everything clusters      */

  /* Do the same tests, but now with a digital MSA */
  esl_msa_Digitize(abc, msa, NULL);
  utest_SingleLinkage(go, msa, 1.0, 11, 10);    /* at 100% id, only seq0/seq1 cluster */
  utest_SingleLinkage(go, msa, 0.5,  6,  5);    /* at 50% id, seq0-seq6 cluster       */
  utest_SingleLinkage(go, msa, 0.0,  1,  0);    /* at 0% id, everything clusters      */

  esl_msa_Destroy(msa);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
Exemplo n.º 4
0
/* if error occurs we return hmms that are already created, so caller should delete them anyway
   Note, that we do not destroy hmm->abc here, caller should delete it too.
   Don't worry, on exception we'll catch it here
 */
P7_HMM * UHMM3Build::build( const MAlignment & malignment, const UHMM3BuildSettings & settings ,TaskStateInfo & ti ) {
    ESL_ALPHABET*   abc = NULL;
    P7_BG*          bg  = NULL;
    P7_BUILDER*     bld = NULL;
    ESL_MSA*        msa = NULL;
    P7_HMM*         hmm = NULL;
    QByteArray      errStr;
    
    ti.progress = 0;
    try {
        int alType = UHMM3Utilities::convertAlphabetType( malignment.getAlphabet() );
        if( UHMM3Utilities::BAD_ALPHABET == alType ) {
            errStr = tr( "UGENE cannot determine alphabet of alignment" ).toLatin1();
            throwUHMMER3Exception( errStr.data() );
        }
        ESL_ALPHABET* abc = esl_alphabet_Create( alType );
        if( NULL == abc ) {
            errStr = tr( "Run out of memory (creating alphabet failed)" ).toLatin1();
            throwUHMMER3Exception( errStr.data() );
        }
        
        P7_BG* bg = p7_bg_Create( abc );
        if( NULL == bg ) {
            errStr = tr( "Run out of memory (creating null model failed)" ).toLatin1();
            throwUHMMER3Exception( errStr.data() );
        }
        P7_BUILDER* bld = p7_builder_Create( &settings, abc );
        if( NULL == bld ) {
            errStr = tr( "Run out of memory (creating builder failed)" ).toLatin1();
            throwUHMMER3Exception( errStr.data() );
        }
        
        ESL_MSA* msa = UHMM3Utilities::convertMSA( malignment );
        if( NULL == msa ) {
            errStr = tr( "Run out of memory (creating multiple alignment failed)" ).toLatin1();
            throwUHMMER3Exception( errStr.data() );
        }
        int ret = esl_msa_Digitize( abc, msa, NULL );
        if( eslOK != ret ) {
            errStr = tr( "Run out of memory (digitizing of alignment failed)" ).toLatin1();
            throwUHMMER3Exception( errStr.data() );
        }
        ret = p7_Builder( bld, msa, bg, &hmm, NULL, NULL, NULL, NULL, ti );
        if ( eslOK != ret ) {
            if( eslCANCELED == ret ) {
                errStr = tr( HMMER3_CANCELED_ERROR ).toLatin1();
            } else {
                errStr = tr( "Model building failed" ).toLatin1();
            }
            assert( !errStr.isEmpty() );
            throwUHMMER3Exception( errStr.data() );
        }
        
        destoryAllIfYouCan( abc, bg, bld, msa, NULL );
    } catch( const UHMMER3Exception& ex ) {
        ti.setError( ex.msg );
        destoryAllIfYouCan( abc, bg, bld, msa, hmm );
        return NULL;
    } catch(...) {
        ti.setError( tr( HMMER3_UNKNOWN_ERROR ) );
        destoryAllIfYouCan( abc, bg, bld, msa, hmm );
        return NULL;
    }
    
    return hmm;
}
Exemplo n.º 5
0
/* utest_fragments()
 * This exercises the building code that deals with fragments,
 * creating traces with B->X->{MDI}k and {MDI}k->X->E 
 * transitions, and making sure we can make MSAs correctly
 * from them using p7_tracealign_MSA(). This code was initially
 * buggy when first written; bugs first detected by Elena, 
 * Nov 2009
 */
static void
utest_fragments(void)
{
  char         *failmsg      = "failure in build.c::utest_fragments() unit test";
  char          msafile[16]  = "p7tmpXXXXXX"; /* tmpfile name template */
  FILE         *ofp          = NULL;
  ESL_ALPHABET *abc          = esl_alphabet_Create(eslAMINO);
  ESL_MSAFILE  *afp          = NULL;
  ESL_MSA      *msa          = NULL;
  ESL_MSA      *dmsa         = NULL;
  ESL_MSA      *postmsa      = NULL;
  P7_HMM       *hmm          = NULL;
  P7_TRACE    **trarr        = NULL;
  int           i;

  /* Write an MSA that tests fragment/missing data transitions. 
   * When built with Handmodelmaker (using the RF line):
   *   seq1 forces B->X->Mk and Mk->X->E missing data transitions; 
   *   seq2 forces B->X->Ik and Ik->X->E missing data transitions;
   *   seq3 forces B->X->Dk and Dk->X->E missing data transitions.
   *
   * The first two cases can arise from fragment definition in
   * model construction, or in an input file. 
   *
   * The X->Dk and Dk->X cases should never happen, but we don't
   * prohibit them. They can only arise in an input file, because
   * esl_msa_MarkFragments() converts everything before/after
   * first/last residue to ~, and won't leave a gap character in
   * between.
   *
   * There's nothing being tested by seq4 and seq5; they're just there.
   */
  if (esl_tmpfile_named(msafile, &ofp) != eslOK) esl_fatal(failmsg);
  fprintf(ofp, "# STOCKHOLM 1.0\n");
  fprintf(ofp, "#=GC RF xxxxx.xxxxxxxxxxxx.xxx\n");
  fprintf(ofp, "seq1    ~~~~~~GHIKLMNPQRST~~~~\n");
  fprintf(ofp, "seq2    ~~~~~aGHIKLMNPQRSTa~~~\n");
  fprintf(ofp, "seq3    ~~~~~~~HIKLMNPQRS~~~~~\n");
  fprintf(ofp, "seq4    ACDEF.GHIKLMNPQRST.VWY\n");
  fprintf(ofp, "seq5    ACDEF.GHIKLMNPQRST.VWY\n");
  fprintf(ofp, "//\n");
  fclose(ofp);

  /* Read the original as text for comparison to postmsa. Make a digital copy for construction */
  if (esl_msafile_Open(NULL, msafile, NULL, eslMSAFILE_UNKNOWN, NULL, &afp)!= eslOK) esl_fatal(failmsg);
  if (esl_msafile_Read(afp, &msa)                                          != eslOK) esl_fatal(failmsg);
  if ((dmsa = esl_msa_Clone(msa))                                           == NULL)  esl_fatal(failmsg);
  if (esl_msa_Digitize(abc, dmsa, NULL)                                     != eslOK) esl_fatal(failmsg);

  if (p7_Handmodelmaker(dmsa, NULL, &hmm, &trarr)                           != eslOK) esl_fatal(failmsg);
  for (i = 0; i < dmsa->nseq; i++)
    if (p7_trace_Validate(trarr[i], abc, dmsa->ax[i], NULL)                 != eslOK) esl_fatal(failmsg);

  /* The example is contrived such that the traces should give exactly the
   * same (text) alignment as the input alignment; no tracedoctoring.
   * Not a trivial test; for example, sequence 2 has a B->X->I transition that 
   * can be problematic to handle.
   */
  if (p7_tracealign_MSA(dmsa, trarr, hmm->M, p7_DEFAULT, &postmsa)          != eslOK) esl_fatal(failmsg);
  for (i = 0; i < msa->nseq; i++)
    if (strcmp(msa->aseq[i], postmsa->aseq[i]) != 0) esl_fatal(failmsg);

  p7_trace_DestroyArray(trarr, msa->nseq);
  p7_hmm_Destroy(hmm);
  esl_msa_Destroy(msa);
  esl_msa_Destroy(dmsa);
  esl_msa_Destroy(postmsa);
  esl_msafile_Close(afp);
  esl_alphabet_Destroy(abc);
  remove(msafile);
  return;
}
Exemplo n.º 6
0
SV* isaMSA (const char *input, int is_msa, int ali_hmm, int dna_ok, int frag){
  ESLX_MSAFILE *mfp         = NULL;
  ESL_MSA      *msa         = NULL;
  ESL_MSA      *msa_clone   = NULL;
  ESL_ALPHABET *abc         = NULL;
  P7_HMM       *ret_hmm     = NULL;
  char         *ascii_hmm   = NULL;
  int alpha;
  int status;
  char            errbuf[eslERRBUFSIZE];
  HV* hash = newHV();
  hv_store(hash, "type", strlen("type"), newSVpv("UNK", 3), 0);

  if ((status = eslx_msafile_OpenMem( &abc, input, -1, NULL, NULL, &mfp)) != eslOK){
    hv_store(hash, "error", strlen("error"), newSViv(status), 0);
  }else{
    status = eslx_msafile_Read(mfp, &msa);

    if(status != eslOK){
      hv_store(hash, "error", strlen("error"), newSVpv(mfp->errmsg, strlen(mfp->errmsg)), 0);
      hv_store(hash, "guess", strlen("guess"), newSViv(mfp->format), 0);
      hv_store(hash, "type", strlen("type"), newSVpv("MSA", 3), 0);
      hv_store(hash, "position", strlen("position"), newSViv(mfp->linenumber), 0);

    }else{
      esl_msa_Textize(msa);
      esl_msa_GuessAlphabet(msa, &alpha);
      hv_store(hash, "alpha", strlen("alpha"), newSViv(alpha), 0);
      if(alpha == eslAMINO || (dna_ok == 1 && (alpha == eslDNA || alpha == eslRNA))){
        abc = esl_alphabet_Create(alpha);
        esl_msa_Digitize( abc, msa, errbuf);
        hv_store(hash, "count", strlen("count"), newSViv(msa->nseq), 0);
        if(msa->nseq == 1 && mfp->format == eslMSAFILE_AFA){
          hv_store(hash, "type", strlen("type"), newSVpv("SS", 2), 0);
          hv_store(hash, "hmmpgmd", strlen("hmmpgmd"), newSVpv(input, strlen(input)), 0);
        }else{
          // We have been told it is an MSA or it is any other format other than AFA
          hv_store(hash, "type", strlen("type"), newSVpv("MSA", 3), 0);
          status = constructHMM( msa, abc, ali_hmm, frag, &ret_hmm, errbuf);

          if (status != eslOK) {
            hv_store(hash, "error", strlen("error"), newSVpv(errbuf, strlen(errbuf)), 0);
            hv_store(hash, "guess", strlen("guess"), newSViv(mfp->format), 0);
          } else {
            p7_hmmfile_WriteToString(&ascii_hmm, -1, ret_hmm);
            hv_store(hash, "hmmpgmd", strlen("hmmpgmd"), newSVpv(ascii_hmm, strlen(ascii_hmm)), 0);
          }

        }
      }else{
        if(alpha == 0 ){
          hv_store(hash, "error", strlen("error"), newSVpv("Could not determine alphabet", 28), 0);
        }else{
          hv_store(hash, "error", strlen("error"), newSVpv("Bad alphabet", 12), 0);
        }
        if (msa->nseq > 1) {
          if( mfp->format == eslMSAFILE_AFA){
            hv_store(hash, "type", strlen("type"), newSVpv("MS?", 3), 0);
          }else{
            hv_store(hash, "type", strlen("type"), newSVpv("MSA", 3), 0);
          }
        }
      }
    }
  }

  if (mfp != NULL) eslx_msafile_Close(mfp);
  if (abc != NULL) esl_alphabet_Destroy(abc);
  if (msa != NULL) esl_msa_Destroy(msa);
  if (ret_hmm != NULL) p7_hmm_Destroy(ret_hmm);
  if (ascii_hmm != NULL ) free( ascii_hmm );

  return newRV_noinc((SV*) hash);
}