示例#1
0
void 
MSAFileWrite(FILE *fp, MSA *msa, int outfmt, int do_oneline)
{
  switch (outfmt) {
  case MSAFILE_A2M:       WriteA2M(fp, msa);     break;
  case MSAFILE_CLUSTAL:   WriteClustal(fp, msa); break;
  case MSAFILE_MSF:       WriteMSF(fp, msa);     break;
  case MSAFILE_PHYLIP:    WritePhylip(fp, msa);  break;
  case MSAFILE_SELEX:     WriteSELEX(fp, msa);   break;
  case MSAFILE_STOCKHOLM:
    if (do_oneline) WriteStockholmOneBlock(fp, msa);
    else            WriteStockholm(fp, msa);
    break;
  default:
    Die("can't write. no such alignment format %d\n", outfmt);
  }
}
示例#2
0
文件: seq.c 项目: BioInfoTools/MACSE
/**
 * @brief Write alignment to file.
 *
 * @param[in] mseq
 * The mseq_t struct containing the aligned sequences
 * @param[in] pcAlnOutfile
 * The name of the output file
 * @param[in] outfmt
 * The alignment output format (defined in squid.h)
 * @param[in] iWrap
 * length of line for Clustal/Fasta format
 *
 * @return Non-zero on error
 *
 * @note We create a temporary squid MSA struct in here because we never
 * use it within clustal. We might be better of using the old clustal
 * output routines instead.
 *
 */
int
WriteAlignment(mseq_t *mseq, const char *pcAlnOutfile, int outfmt, int iWrap, bool bResno)
{
    int i; /* aux */
    MSA *msa; /* squid's alignment structure */
    FILE *pfOut = NULL;
    int key; /* MSA struct internal index for sequence */
    int alen; /* alignment length */
    bool use_stdout;

    assert(mseq!=NULL);

    if (MSAFILE_UNKNOWN == outfmt) {
        Log(&rLog, LOG_ERROR, "Unknown output format chosen");
        return -1;
    }

    if (NULL == pcAlnOutfile) {
        pfOut = stdout;
        use_stdout = TRUE;
    } else {
        use_stdout = FALSE;
        if (NULL == (pfOut = fopen(pcAlnOutfile, "w"))) {
            Log(&rLog, LOG_ERROR, "Could not open file %s for writing", pcAlnOutfile);
            return -1;
        }
    }


    /* derive alignment length from first seq */
    alen = strlen(mseq->seq[0]);

    msa  = MSAAlloc(mseq->nseqs, alen);

    /* basic structure borrowed code from squid-1.9g/a2m.c:ReadA2M()
     * we actually create a copy of mseq. keeping the pointers becomes
     * messy when calling MSAFree()
     */
    for (i=0; i<mseq->nseqs; i++) {
        char *this_name = NULL; /* mseq sequence name */
        char *this_seq = NULL; /* mseq sequence */
        SQINFO *this_sqinfo = NULL; /* mseq sequence name */
        int iI;

        /* mseq->tree_order encodes to order in which sequences are listed in the guide-tree,
           if the user wants the sequence output in the input-order then mseq->tree_order==NULL,
           otherwise mseq->tree_order!=NULL, containing the indices of the sequences, FS, r274 ->  */
        iI = (NULL == mseq->tree_order) ? i : mseq->tree_order[i];

        this_name = mseq->sqinfo[iI].name; /* mseq sequence name */
        this_seq = mseq->seq[iI]; /* mseq sequence */
        this_sqinfo = &mseq->sqinfo[iI]; /* mseq sequence name */

        key = GKIStoreKey(msa->index, this_name);
        msa->sqname[key] = sre_strdup(this_name, strlen(this_name));

        /* setting msa->sqlen[idx] and msa->aseq[idx] */
        msa->sqlen[key] = sre_strcat(&(msa->aseq[key]), msa->sqlen[key],
                                     this_seq, strlen(this_seq));

        if (this_sqinfo->flags & SQINFO_DESC) {
            /* FIXME never get here ... */
            MSASetSeqDescription(msa, key, this_sqinfo->desc);
        }
        /* FIXME extend this by copying more stuff according to flags.
         * See MSAFileRead() in msa.c and used functions there
         *
         * Problem is that we never parse MSA information as we use squid'sSeqFile
         */

        msa->nseq++;

    } /* 0 <= i < mseq->nseqs */


    /* FIXME Would like to, but can't use MSAVerifyParse(msa) here, as it
     * will die on error. Need to implement our own version
     */
#if 0
    MSAVerifyParse(msa);
#endif

    /* The below is copy of MSAFileWrite() which originally only writes to stdout.
     */

    /* Be sloppy and make a2m and fasta the same. same for vienna (which is
       the same). same same. can can. boleh boleh */
    if (outfmt==SQFILE_FASTA)
        outfmt = MSAFILE_A2M;
    if (outfmt==SQFILE_VIENNA)
        outfmt = MSAFILE_VIENNA;

    switch (outfmt) {
    case MSAFILE_A2M:
        /*WriteA2M(pfOut, msa, 0);*/
        WriteA2M(pfOut, msa, iWrap);
        break;
    case MSAFILE_VIENNA:
        /*WriteA2M(pfOut, msa, 1);*/
        WriteA2M(pfOut, msa, INT_MAX);
        break;
    case MSAFILE_CLUSTAL:
        WriteClustal(pfOut, msa, iWrap, TRUE==bResno ? 1 : 0, mseq->seqtype);
        break;
    case MSAFILE_MSF:
        WriteMSF(pfOut, msa);
        break;
    case MSAFILE_PHYLIP:
        WritePhylip(pfOut, msa);
        break;
    case MSAFILE_SELEX:
        WriteSELEX(pfOut, msa);
        break;
    case MSAFILE_STOCKHOLM:
        WriteStockholm(pfOut, msa);
        break;
    default:
        Log(&rLog, LOG_FATAL, "internal error: %s",
            "invalid output format should have been detected before");
    }

    if (use_stdout == FALSE) {
        (void) fclose(pfOut);
        Log(&rLog, LOG_INFO,
            "Alignment written to %s", pcAlnOutfile);
    }
    MSAFree(msa);

    return 0;
}
示例#3
0
int main(int argc, char **argv) 
{
    const char      *hmmfile;	/* file to read HMMs from                  */
    FILE            *fp;	/* output file handle                      */
    HMMFILE         *hmmfp;	/* opened hmmfile for reading              */
    struct plan7_s  *hmm;	/* HMM to generate from                    */
    int              L;		/* length of a sequence                    */
    int              i;		/* counter over sequences                  */

    char            *ofile;	/* output sequence file                    */
    int              nseq;	/* number of seqs to sample                */
    int              seed;	/* random number generator seed            */
    int              be_quiet;	/* TRUE to silence header/footer           */
    int              do_alignment; /* TRUE to output in aligned format     */ 
    int              do_consensus; /* TRUE to do a single consensus seq    */

    AjBool ajselex;
    AjBool ajcons;
    AjPFile inf=NULL;
    AjPFile outf=NULL;
    AjPStr  instr=NULL;
    AjPStr  outstr=NULL;
  

#ifdef MEMDEBUG
    unsigned long histid1, histid2, orig_size, current_size;
    orig_size = malloc_inuse(&histid1);
    fprintf(stderr, "[... memory debugging is ON ...]\n");
#endif

    /*********************************************** 
     * Parse command line
     ***********************************************/

    nseq         = 10;

    be_quiet     = FALSE;
    do_alignment = FALSE;  
    do_consensus = FALSE;
    ofile        = NULL;

    embInitPV("ohmmemit",argc,argv,"HMMER",VERSION);

    ajselex = ajAcdGetBoolean("selex");
    ajcons  = ajAcdGetBoolean("consensus");
    nseq    = ajAcdGetInt("number");
    seed    = ajAcdGetInt("seed");
    inf     = ajAcdGetInfile("infile");
    outf    = ajAcdGetOutfile("outfile");
  
    if(!seed)
	seed = time ((time_t *) NULL);

    if(ajselex)
	do_alignment=TRUE;
    else
	do_alignment=FALSE;
  
    if(ajcons)
	do_consensus=TRUE;
    else
	do_consensus=FALSE;

    instr  = ajStrNewC((char *)ajFileGetNameC(inf));
    outstr = ajStrNewC((char *)ajFileGetNameC(outf));

    hmmfile = ajStrGetPtr(instr);

    sre_srandom(seed);

    if (do_alignment && do_consensus)
	ajFatal("Sorry, -selex and -consensus are incompatible.\n"); 
    if (nseq != 10 && do_consensus)
	ajWarn("-consensus overrides -number (# of sampled seqs)");

    /*********************************************** 
     * Open HMM file (might be in HMMERDB or current directory).
     * Read a single HMM from it.
     ***********************************************/

    if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL)
	ajFatal("Failed to open HMM file %s\n", hmmfile);
    if (!HMMFileRead(hmmfp, &hmm)) 
	ajFatal("Failed to read any HMMs from %s\n", hmmfile);
    HMMFileClose(hmmfp);
    if (hmm == NULL) 
	ajFatal("HMM file %s corrupt or in incorrect format? Parse failed",
		hmmfile);

    /* Configure the HMM to shut off N,J,C emission: so we
     * do a simple single pass through the model.
     */
    Plan7NakedConfig(hmm);
    Plan7Renormalize(hmm);

    /*********************************************** 
     * Open the output file, or stdout
     ***********************************************/ 

    fp = ajFileGetFileptr(outf);
  
 
    /*********************************************** 
     * Show the options banner
     ***********************************************/
    be_quiet=TRUE;
    if (! be_quiet) 
    {
	printf("HMM file:             %s\n", hmmfile);
	if (! do_consensus)
	{
	    printf("Number of seqs:       %d\n", nseq);
	    printf("Random seed:          %d\n", seed);
	}
	printf("- - - - - - - - - - - - - - - - - - - - - - - - - "
	       "- - - - - - -\n\n");
    }

    /*********************************************** 
     * Do the work.
     * If we're generating an alignment, we have to collect
     * all our traces, then output. If we're generating unaligned
     * sequences, we can emit one at a time.
     ***********************************************/

    if (do_consensus) 
    {
	char    *seq;
	SQINFO   sqinfo;	/* info about sequence (name/desc)        */

	EmitConsensusSequence(hmm, &seq, NULL, &L, NULL);
	strcpy(sqinfo.name, "consensus");
	sqinfo.len = L;
	sqinfo.flags = SQINFO_NAME | SQINFO_LEN;

	WriteSeq(fp, kPearson, seq, &sqinfo);
	free(seq);
    }
    else if (do_alignment)
    {
	struct p7trace_s **tr;
	char           **dsq;
	SQINFO          *sqinfo;
	char           **aseq;
	AINFO            ainfo;
	float           *wgt;

	dsq    = MallocOrDie(sizeof(char *)             * nseq);
	tr     = MallocOrDie(sizeof(struct p7trace_s *) * nseq);
	sqinfo = MallocOrDie(sizeof(SQINFO)             * nseq);
	wgt    = MallocOrDie(sizeof(float)              * nseq);
	FSet(wgt, nseq, 1.0);

	for (i = 0; i < nseq; i++)
	{
	    EmitSequence(hmm, &(dsq[i]), &L, &(tr[i]));
	    sprintf(sqinfo[i].name, "seq%d", i+1);
	    sqinfo[i].len   = L;
	    sqinfo[i].flags = SQINFO_NAME | SQINFO_LEN;
	}

	P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, FALSE, 
			   &aseq, &ainfo);

	/* Output the alignment */
	WriteSELEX(fp, aseq, &ainfo, 50);
	if (ofile != NULL && !be_quiet)
	    printf("Alignment saved in file %s\n", ofile);

	/* Free memory
	 */
	for (i = 0; i < nseq; i++) 
	{
	    P7FreeTrace(tr[i]);
	    free(dsq[i]);
	}
	FreeAlignment(aseq, &ainfo);
	free(sqinfo);
	free(dsq);
	free(wgt);
	free(tr);
    }
    else				/* unaligned sequence output */
    {
	struct p7trace_s *tr;
	char             *dsq;
	char             *seq;
	SQINFO            sqinfo;

	for (i = 0; i < nseq; i++)
	{
	    EmitSequence(hmm, &dsq, &L, &tr);
	    sprintf(sqinfo.name, "seq%d", i+1);
	    sqinfo.len   = L;
	    sqinfo.flags = SQINFO_NAME | SQINFO_LEN;

	    seq = DedigitizeSequence(dsq, L);

	    WriteSeq(fp, kPearson, seq, &sqinfo);
	  
	    P7FreeTrace(tr);
	    free(dsq);
	    free(seq);
	}
    }

    ajFileClose(&outf);
  
    FreePlan7(hmm);
    SqdClean();

#ifdef MEMDEBUG
    current_size = malloc_inuse(&histid2);
    if (current_size != orig_size)
	malloc_list(2, histid1, histid2);
    else
	fprintf(stderr, "[No memory leaks.]\n");
#endif


    ajStrDel(&instr);
    ajStrDel(&outstr);
    ajFileClose(&inf);
    ajFileClose(&outf);

    embExit();
    return 0;
}
示例#4
0
int main(int argc, char **argv) 
{
  const char      *hmmfile;	/* file to read HMMs from                  */
  HMMFILE         *hmmfp;       /* opened hmmfile for reading              */
  const char      *seqfile;     /* file to read target sequence from       */ 
  char           **rseq;        /* raw, unaligned sequences                */ 
  SQINFO          *sqinfo;      /* info associated with sequences          */
  char           **dsq;         /* digitized raw sequences                 */
  int              nseq;        /* number of sequences                     */  
  char           **aseq;        /* aligned sequences                       */
  AINFO            ainfo;       /* alignment information                   */
  float           *wgt;         /* per-sequence weights                    */
  int              i;
  struct plan7_s    *hmm;       /* HMM to align to                         */ 
  struct p7trace_s **tr;        /* traces for aligned sequences            */

  int   be_quiet;		/* TRUE to suppress verbose banner          */
  int   matchonly;		/* TRUE to show only match state syms       */
  const char *outfile;          /* optional alignment output file           */
  FILE *ofp;                    /* handle on alignment output file          */
  AjPFile ajwithali;          /* name of additional alignment file to align */
  AjPFile ajmapali;           /* name of additional alignment file to map   */
  AjBool ajmatch=ajFalse;
  AjPFile outf=NULL;
  AjPStr  outfname=NULL;
  AjPFile inf=NULL;
  AjPStr  infname=NULL;
  AjPSeqset seqset=NULL;
  AjPStr  ajseqfile=NULL;
  char*  mapali=NULL;
  char*  withali=NULL;
  
#ifdef MEMDEBUG
  unsigned long histid1, histid2, orig_size, current_size;
  orig_size = malloc_inuse(&histid1);
  fprintf(stderr, "[... memory debugging is ON ...]\n");
#endif

  /*********************************************** 
   * Parse command line
   ***********************************************/
  
  matchonly = FALSE;
  outfile   = NULL;
  be_quiet  = FALSE;
  withali   = NULL;
  mapali    = NULL;

  embInitPV("ohmmalign",argc,argv,"HMMER",VERSION);

  ajmatch = ajAcdGetBoolean("matchonly");
  if(ajmatch)
      matchonly=TRUE;
  else
      matchonly=FALSE;



  ajmapali = ajAcdGetInfile("mapalifile");
  if (ajmapali)
      mapali = ajCharNewS(ajFileGetNameS(ajmapali));
  ajFileClose(&ajmapali);
  ajwithali = ajAcdGetInfile("withalifile");
  if (ajwithali)
      withali = ajCharNewS(ajFileGetNameS(ajwithali));
  ajFileClose(&ajwithali);

  be_quiet=TRUE;



  outf = ajAcdGetOutfile("outfile");
  outfname = ajStrNewC((char *)ajFileGetNameC(outf));
  if(*ajStrGetPtr(outfname)>31)
      ajFileClose(&outf);
  outfile = ajStrGetPtr(outfname);

  inf = ajAcdGetInfile("hmmfile");
  infname = ajStrNewC((char *)ajFileGetNameC(inf));
  ajFileClose(&inf);
  hmmfile = ajStrGetPtr(infname);

  
  seqset = ajAcdGetSeqset("sequences");
  ajseqfile = ajStrNewC(ajStrGetPtr(seqset->Filename));
  seqfile = ajStrGetPtr(ajseqfile);
  

 /*********************************************** 
  * Open HMM file (might be in HMMERDB or current directory).
  * Read a single HMM from it.
  * 
  * Currently hmmalign disallows the J state and
  * only allows one domain per sequence. To preserve
  * the S/W entry information, the J state is explicitly
  * disallowed, rather than calling a Plan7*Config() function.
  * this is a workaround in 2.1 for the 2.0.x "yo!" bug.
  ***********************************************/

  if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL)
    ajFatal("Failed to open HMM file %s\n", hmmfile);
  if (!HMMFileRead(hmmfp, &hmm)) 
    ajFatal("Failed to read any HMMs from %s\n", hmmfile);
  HMMFileClose(hmmfp);
  if (hmm == NULL) 
    ajFatal("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile);
  hmm->xt[XTE][MOVE] = 1.;	      /* only 1 domain/sequence ("global" alignment) */
  hmm->xt[XTE][LOOP] = 0.;
  P7Logoddsify(hmm, TRUE);
				/* do we have the map we might need? */
  if (mapali != NULL && ! (hmm->flags & PLAN7_MAP))
    ajFatal("HMMER: HMM file %s has no map; you can't use --mapali.", hmmfile);

  /*********************************************** 
   * Open sequence file in current directory.
   * Read all seqs from it.
   ***********************************************/
/*
  if (! SeqfileFormat(seqfile, &format, NULL))
    switch (squid_errno) {
    case SQERR_NOFILE: 
      ajFatal("Sequence file %s could not be opened for reading", seqfile);
    case SQERR_FORMAT: 
    default:           
      ajFatal("Failed to determine format of sequence file %s", seqfile);
    }
  if (! ReadMultipleRseqs(seqfile, format, &rseq, &sqinfo, &nseq))
    ajFatal("Failed to read any sequences from file %s", seqfile);
*/

  emboss_rseqs(seqset,&rseq,&sqinfo,&nseq);

  /*********************************************** 
   * Show the banner
   ***********************************************/

  be_quiet=TRUE;
  if (! be_quiet) 
    {
/*      Banner(stdout, banner); */
      printf(   "HMM file:             %s\n", hmmfile);
      printf(   "Sequence file:        %s\n", seqfile);
      printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n");
    }

  /*********************************************** 
   * Do the work
   ***********************************************/

  /* Allocations and initializations.
   */
  dsq = MallocOrDie(sizeof(char *) * nseq);
  tr  = MallocOrDie(sizeof(struct p7trace_s *) * nseq);

  /* Align each sequence to the model, collect traces
   */
  for (i = 0; i < nseq; i++)
    {
      dsq[i] = DigitizeSequence(rseq[i], sqinfo[i].len);

      if (P7ViterbiSize(sqinfo[i].len, hmm->M) <= RAMLIMIT)
	(void) P7Viterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i]));
      else
	(void) P7SmallViterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i]));
    }

  /* Include an aligned alignment, if desired.
   */
  if (mapali != NULL)
    include_alignment(mapali, hmm, TRUE, &rseq, &dsq, &sqinfo, &tr, &nseq);
  if (withali != NULL) 
    include_alignment(withali, hmm, FALSE, &rseq, &dsq, &sqinfo, &tr, &nseq);

  /* Turn traces into a multiple alignment
   */ 
  wgt = MallocOrDie(sizeof(float) * nseq);
  FSet(wgt, nseq, 1.0);
  P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, matchonly,
		     &aseq, &ainfo);

  /*********************************************** 
   * Output the alignment
   ***********************************************/

  if (outfile != NULL && (ofp = fopen(outfile, "w")) != NULL)
    {
      WriteSELEX(ofp, aseq, &ainfo, 50);
      printf("Alignment saved in file %s\n", outfile);
      fclose(ofp);
    }
  else
    WriteSELEX(stdout, aseq, &ainfo, 50);

  /*********************************************** 
   * Cleanup and exit
   ***********************************************/
  
  for (i = 0; i < nseq; i++) 
    {
      P7FreeTrace(tr[i]);
      FreeSequence(rseq[i], &(sqinfo[i]));
      free(dsq[i]);
    }
  FreeAlignment(aseq, &ainfo);
  FreePlan7(hmm);
  free(sqinfo);
  free(rseq);
  free(dsq);
  free(wgt);
  free(tr);

  SqdClean();

  ajStrDel(&outfname);
  ajStrDel(&infname);
  ajStrDel(&ajseqfile);
  

#ifdef MEMDEBUG
  current_size = malloc_inuse(&histid2);
  if (current_size != orig_size) malloc_list(2, histid1, histid2);
  else fprintf(stderr, "[No memory leaks.]\n");
#endif

  ajSeqsetDel(&seqset);
  ajFileClose(&ajwithali);
  ajFileClose(&ajmapali);

  embExit();
  
  return 0;
}