Exemple #1
0
int
main(int argc, char **argv)
{
    char          *cmfile;
    ESL_ALPHABET  *abc;
    char          *seqfile;
    ESL_SQFILE    *sqfp;
    int            format;
    CM_FILE       *cmfp;
    CM_t          *cm;
    ESL_SQ        *seq;
    float          sc, rev_sc;
    Parsetree_t   *tr;
    Fancyali_t    *fali;
    Fancyali_t    *rev_fali;

    int do_local;

    /* int status;    */
    /* char *optname; */
    /* char *optarg; */
    int   optind;

    int status;
    char errbuf[eslERRBUFSIZE];

    cmfile = seqfile = NULL;
    abc = NULL;
    sqfp = NULL;
    cmfp = NULL;
    cm = NULL;
    seq = NULL;
    tr = NULL;
    fali = NULL;
    rev_fali = NULL;
    format = eslSQFILE_UNKNOWN;
    do_local = TRUE;

    /* Should process options, but for now assume none and set optind */
    optind = 1;

    if ( argc - optind != 2 ) cm_Die("Incorrect number of arguments\n");
    cmfile = argv[optind++];
    seqfile = argv[optind++];

    if((status = cm_file_Open(cmfile, NULL, FALSE, &cmfp, errbuf)) != eslOK)
        cm_Die("Failed to open covariance model save file\n");
    if ((status = cm_file_Read(cmfp, TRUE, &abc, &cm)) != eslOK)
        cm_Die("Failed to read a CM from cm file\n");
    if (cm == NULL)
        cm_Die("CM file empty?\n");
    cm_file_Close(cmfp);

    if ( esl_sqfile_Open(seqfile, format, NULL, &sqfp) != eslOK )
        cm_Die("Failed to open sequence database file\n");

    if (do_local) cm->config_opts |= CM_CONFIG_LOCAL;

    if((status = cm_Configure(cm, errbuf, -1)) != eslOK) cm_Die(errbuf);
    /*SetMarginalScores_reproduce_bug_i27(cm);*/

    seq = esl_sq_Create();
    while ( esl_sqio_Read(sqfp, seq) == eslOK )
    {
        if (seq->n == 0) continue;

        int i0 = 1;
        int j0 = seq->n;

        if (seq->dsq == NULL)
            esl_sq_Digitize(abc, seq);
        sc = TrCYK_DnC(cm, seq->dsq, seq->n, 0, i0, j0, PLI_PASS_5P_AND_3P_ANY, TRUE, &tr); /* TRUE: reproduce v1.0 behavior */
        /* sc = TrCYK_Inside(cm, seq->dsq, seq->n, 0, i0, j0, PLI_PASS_5P_AND_3P_ANY, TRUE, FALSE, &tr); */
        fali = CreateFancyAli(cm->abc, tr, cm, cm->cmcons, seq->dsq, FALSE, NULL);
        /* float sc, struct_sc;
         * ParsetreeScore(cm, NULL, NULL, tr, seq->dsq, FALSE, &sc, &struct_sc, NULL, NULL, NULL);
         * printf("Parsetree score: %.4f\n", sc);
         * ParsetreeDump(stdout, tr, cm, seq->dsq);
         */
        FreeParsetree(tr);

        revcomp(abc, seq, seq);
        rev_sc = TrCYK_DnC(cm,seq->dsq, seq->n, 0, i0, j0, PLI_PASS_5P_AND_3P_ANY, TRUE, &tr); /* TRUE: reproduce v1.0 behavior */
        rev_fali = CreateFancyAli(cm->abc, tr, cm, cm->cmcons,seq->dsq, FALSE, NULL);
        /*ParsetreeDump(stdout, tr, cm, seq->dsq);*/
        FreeParsetree(tr);

        if (sc > rev_sc)
        {
            printf("sequence: %s\n", seq->name);
            printf("score:    %.2f\n",sc);
            PrintFancyAli(stdout, fali, 0, FALSE, FALSE, 60);
        }
        else
        {
            printf("sequence: %s (reversed)\n", seq->name);
            printf("score:    %.2f\n",rev_sc);
            PrintFancyAli(stdout, fali, seq->n, TRUE, FALSE, 60);
        }

        FreeFancyAli(fali);
        FreeFancyAli(rev_fali);

        esl_sq_Destroy(seq);
        seq = esl_sq_Create();

    }
    esl_sq_Destroy(seq);

    FreeCM(cm);
    esl_sqfile_Close(sqfp);

    return EXIT_SUCCESS;
}
Exemple #2
0
int main(int argc, char **argv)
{
  ESL_SQFILE        *sqfp   = NULL;      
  ESL_SQ            *sq   = NULL;
  ESL_SQ            *dsq  = NULL;
  ESL_SQ            **prot;
  int               c;
  ESL_ALPHABET      *abc, *prot_abc;
  
  ESL_SQ            *prot6[6];
    
  int x;
  
  abc = esl_alphabet_Create(eslDNA);
  prot_abc = esl_alphabet_Create(eslAMINO);
  
  if(argc != 2)
  {
    printf("You need to pass an argument for a filepath to a dna/rna fasta file\n");
    exit(0);
  }
    
  if(eslOK != esl_sqfile_Open(argv[1], eslSQFILE_FASTA, NULL, &sqfp)) 
  {
    printf("Invalid filepath: %s\n", argv[1]);
    exit(0);
  }
  
  sq = esl_sq_Create();
  if(sq == NULL)
  {
    printf("could not allocate new sequence\n");
    exit(0);
  }
  
  if(esl_sqio_Read(sqfp, sq) != eslOK)
  {
    printf("Not a valid fasta file %s\n", argv[1]);
    exit(0);
  }

  dsq = esl_sq_Create();
  if(dsq == NULL)
  {
    printf("could not allocate digital sequence\n");
    exit(0);
  }
  
  if(esl_sq_Copy(sq, dsq) != eslOK)
  {
    printf("could not copy sequence\n");
    exit(0);
  }
  
  if(esl_sq_Digitize(abc, dsq) != eslOK)
  {
    printf("could not digitize sequence\n");
    exit(0);
  }

  esl_sqio_Write(stdout, sq, eslSQFILE_FASTA, 0);
  
  if(esl_trans_6frame(sq, prot6) != eslOK)
  {
    printf("could not generate six frame translation\n");
    exit(0);
  }
   
  for(x = 0; x < 6; x++)
  {
    esl_sqio_Write(stdout, prot6[x], eslSQFILE_FASTA, 0);
  }
    
  if(esl_trans_orf(dsq, &prot, &c, 10) != eslOK)
  {
    printf("could not translate open reading frames\n");
    exit(0);
  }
  
  for(x = 0; x < c; x++)
  {
    esl_sqio_Write(stdout, prot[x], eslSQFILE_FASTA, 0);
  }

  return 0;
}
void run_hmmer_pipeline(const char* seq) {
  int index, i, status;
  ESL_SQ* sq = esl_sq_CreateFrom(NULL, seq, NULL, NULL, NULL);
  P7_OPROFILE *om = NULL;
  P7_PROFILE *gm = NULL;
  float usc, vfsc, fwdsc;   /* filter scores                           */
  float filtersc;           /* HMM null filter score                   */
  float nullsc;             /* null model score                        */
  float seqbias;
  float seq_score;          /* the corrected per-seq bit score */
  double P;
  WRAPPER_RESULT* result;

  num_results = 0;
  if(sq->n == 0) {
    esl_sq_Destroy(sq);
    return;
  }

  esl_sq_Digitize(abc, sq);  

  int n = 0;
  float oasc;

  for(index = 0;index < num_models;index++) {
    om = models[index];

    p7_omx_Reuse(oxf);
    p7_omx_Reuse(oxb);

    p7_omx_GrowTo(oxf, om->M, sq->n, sq->n);
    p7_omx_GrowTo(oxb, om->M, sq->n, sq->n);

    p7_oprofile_ReconfigLength(om, sq->n);

    p7_bg_SetFilter(bg, om->M, om->compo);
    p7_bg_SetLength(bg, sq->n);

    //Calibrate null model
    p7_bg_NullOne(bg, sq->dsq, sq->n, &nullsc);

    //MSV Filter
    p7_MSVFilter(sq->dsq, sq->n, om, oxf, &usc);
    seq_score = (usc - nullsc) / eslCONST_LOG2;
    P = esl_gumbel_surv(seq_score,  om->evparam[p7_MMU],  om->evparam[p7_MLAMBDA]);
    if (P > f1) continue;

    //Bias filter (model compo)
    p7_bg_FilterScore(bg, sq->dsq, sq->n, &filtersc);
    seq_score = (usc - filtersc) / eslCONST_LOG2;
    P = esl_gumbel_surv(seq_score,  om->evparam[p7_MMU],  om->evparam[p7_MLAMBDA]);
    if (P > f1) continue;

    //Viterbi filter (Only do if P value from Bias is high)
    if(P > f2) {
      p7_ViterbiFilter(sq->dsq, sq->n, om, oxf, &vfsc);
      seq_score = (vfsc - filtersc) / eslCONST_LOG2;
      P = esl_gumbel_surv(seq_score,  om->evparam[p7_VMU],  om->evparam[p7_VLAMBDA]);
      if (P > f2) continue;
    }

    //Get the real probability (forward)
    p7_Forward(sq->dsq, sq->n, om, oxf, &fwdsc);
    seq_score = (fwdsc - filtersc) / eslCONST_LOG2;
    P = esl_exp_surv(seq_score,  om->evparam[p7_FTAU],  om->evparam[p7_FLAMBDA]);
    if(hmmer_error) {
      fprintf(stderr, "HMM: %s, seq: %s", om->name, seq);
      hmmer_error = 0;
      continue;
    }
    if (P > f3) continue;

    //Real hit, go in to posterior decoding and alignment
    p7_omx_Reuse(oxb);
    p7_trace_Reuse(tr);

    p7_Backward(sq->dsq, sq->n, om, oxf, oxb, NULL);

    status = p7_Decoding(om, oxf, oxb, oxb);

    if(status == eslOK) {
      //And then trace the result
      p7_OptimalAccuracy(om, oxb, oxf, &oasc);
      p7_OATrace(om, oxb, oxf, tr);
    } else if(status == eslERANGE) {
      fprintf(stderr, "Decoding overflow on model %s\n", om->name);
      gm = gmodels[index];
      if(gxf == NULL) {
	gxf = p7_gmx_Create(gm->M, sq->n);
	gxb = p7_gmx_Create(gm->M, sq->n);
      } else {
	p7_gmx_GrowTo(gxf, gm->M, sq->n);
	p7_gmx_GrowTo(gxb, gm->M, sq->n);
      }

      p7_ReconfigLength(gm, sq->n);

      p7_GForward (sq->dsq, sq->n, gm, gxf, &fwdsc);
      p7_GBackward(sq->dsq, sq->n, gm, gxb, NULL);

      p7_GDecoding(gm, gxf, gxb, gxb);
      p7_GOptimalAccuracy(gm, gxb, gxf, &oasc);
      p7_GOATrace        (gm, gxb, gxf, tr);

      p7_gmx_Reuse(gxf);
      p7_gmx_Reuse(gxb);
    }

    if(hmmer_error) {
      fprintf(stderr, "HMM: %s, seq: %s", om->name, seq);
      hmmer_error = 0;
      continue;
    }

    result = wrapper_results[num_results];
    reuse_result(result, tr->N + om->M, om->name); //We're way overallocating here, but it's hard to know at this point how much space we'll need for the alignment (plus leading and trailing gaps)
    trace_into(tr, result, sq, abc, om->M);
    result->bits = seq_score;
    num_results++;
  }

  esl_sq_Destroy(sq);
}