int main(int argc, char* argv[])
{	
  double before, after;

  int M = atoi(argv[1]);
  int N = atoi(argv[2]);
  int P = atoi(argv[3]);

  int **A = Allocate2DArray< int >(M, P);
  int **B = Allocate2DArray< int >(P, N);
  int **C = Allocate2DArray< int >(M, N);
  int **C4 = Allocate2DArray< int >(M, N);

  int i, j;

  for (i = 0; i < M; i++) {
	for (j = 0; j < P; j++) {   
		A[i][j] = ((int)(rand()%100) /10);  
	 } 
  } 

  for (i = 0; i < P; i++) {	
	 for (j = 0; j < N; j++) {
	B[i][j] = ((int)(rand()%100) / 10.0);   
	 } 
  } 

  printf("Execute Standard matmult\n\n");
  before = omp_get_wtime();
  seqMatMult(M, N, P, A, B, C);
  after = omp_get_wtime();
  printf("Standard matrix function done in %10f secs\n\n\n",(after - before));
  
  //printf("The number of cores is %d\n",omp_get_num_procs());
  omp_set_num_threads(8);
  GRAIN = (long)(M*N*2);
  before = omp_get_wtime();
  matmultS(M, N, P, A, B, C4);
  after = omp_get_wtime();
  printf("Strassen matrix function done in %10f secs\n\n\n",(after - before));

  if (CheckResults(M, N, C, C4)) 
	 printf("Error in matmultS\n\n");
  else
	 printf("OKAY\n\n");

  Free2DArray(A);
  Free2DArray(B);
  Free2DArray(C);
  Free2DArray(C4);

  return 0;
}
/* Function: PAMPrior()
 * 
 * Purpose:  Produces an ad hoc "Dirichlet mixture" prior for
 *           match emissions, using a PAM matrix. 
 *           
 *           Side effect notice: PAMPrior() replaces the match
 *           emission section of an existing Dirichlet prior,
 *           which is /expected/ to be a simple one-component 
 *           kind of prior. The insert emissions /must/ be a
 *           one-component prior (because of details in how 
 *           PriorifyEmissionVector() is done). However, 
 *           the transitions /could/ be a mixture Dirichlet prior 
 *           without causing problems. In other words, the
 *           -p and -P options of hmmb can coexist, but there
 *           may be conflicts. PAMPrior() checks for these,
 *           so there's no serious problem, except that the
 *           error message from PAMPrior() might be confusing to
 *           a user. 
 */
void
PAMPrior(char *pamfile, struct p7prior_s *pri, float wt)
{
  FILE  *fp;
  char  *blastpamfile;            /* BLAST looks in aa/ subdirectory of BLASTMAT */
  int  **pam;
  float  scale;
  int    xi, xj;
  int    idx1, idx2;

  if (Alphabet_type != hmmAMINO)
    Die("PAM prior is only valid for protein sequences");
  if (pri->strategy != PRI_DCHLET)
    Die("PAM prior may only be applied over an existing Dirichlet prior");
  if (pri->inum != 1)
    Die("PAM prior requires that the insert emissions be a single Dirichlet");
  if (MAXDCHLET < 20)
    Die("Whoa, code is misconfigured; MAXDCHLET must be >= 20 for PAM prior");

  blastpamfile = FileConcat("aa", pamfile);

  if ((fp = fopen(pamfile, "r")) == NULL &&
      (fp = EnvFileOpen(pamfile, "BLASTMAT", NULL)) == NULL &&
      (fp = EnvFileOpen(blastpamfile, "BLASTMAT", NULL)) == NULL)
    Die("Failed to open PAM scoring matrix file %s", pamfile);
  if (! ParsePAMFile(fp, &pam, &scale))
    Die("Failed to parse PAM scoring matrix file %s", pamfile);
  fclose(fp);
  free(blastpamfile);

  pri->strategy = PRI_PAM;
  pri->mnum     = 20;
  
  /* Convert PAM entries back to conditional prob's P(xj | xi),
   * which we'll use as "pseudocounts" weighted by wt.
   */
  for (xi = 0; xi < Alphabet_size; xi++)
    for (xj = 0; xj < Alphabet_size; xj++)
      {
        idx1 = Alphabet[xi] - 'A';
        idx2 = Alphabet[xj] - 'A';
        pri->m[xi][xj] = aafq[xj] * exp((float) pam[idx1][idx2] * scale);
      }
  
  /* Normalize so that rows add up to wt.
   * i.e. Sum(xj) mat[xi][xj] = wt for every row xi
   */
  for (xi = 0; xi < Alphabet_size; xi++)
    {
      pri->mq[xi] = 1. / Alphabet_size;
      FNorm(pri->m[xi], Alphabet_size);
      FScale(pri->m[xi], Alphabet_size, wt);
    }

  Free2DArray((void **)pam,27);
}
예제 #3
0
파일: pr53487.c 프로젝트: 0day-ci/gcc
int Cluster(float **dmx, int N, struct phylo_s *tree)
{
  float **mx;
  int *coord;
  int i;
  int Np;
  int row, col;
  float min;
  for (col = 0; col < N; Np--)
    {
      for (row = 0; row < Np; row++)
	for (col = row+1; col < Np; col++)
	  if (mx[row][col] < min)
	    i = row;
      tree[Np-2].left = coord[i];
    }
  Free2DArray((void **) mx, N);
}
예제 #4
0
void rk8(real* y, void (*deriv_func)(real, real*, int, real*, void*), real t, const int n, const real h, void* data) {
    int i,j,k;

    int nrk=11;
    real  *ym = (real* )Alloc1DArray( sizeof(real), n );
    real **kn = (real**)Alloc2DArray( sizeof(real), nrk*2, n );
    memcpy( ym, y, sizeof(real)*n );

    for(i=0;i<nrk;i++) {
	deriv_func( t+h*c[i], ym, n, kn[i], data );
	for(j=0;j<n;j++) {
	    ym[j] = y[j];
	    for(k=0;k<i+1;k++) {
		ym[j] += a[i][k]*h*kn[k][j];
	    }
	}
    }
    memcpy( y, ym, sizeof(real)*n );
    Free1DArray( (void*)ym );
    Free2DArray( (void**)kn, nrk);
}
예제 #5
0
main (int argc, char ** argv ) 
{
  char     *seqfile;            /* name of sequence file     */
  SQINFO    sqinfo;             /* extra info about sequence */
  SQFILE   *dbfp;		/* open sequence file        */
  int       fmt,ofmt=106;	/* format of seqfile         */
                                /* 106 is PHYLIP format in SQUID */
  char     *seq;		/* sequence                  */
  int       type;		/* kAmino, kDNA, kRNA, or kOtherSeq */
  sequence  * seqs, * cds_seqs;
  sequence  tmp_seqs[2], tmp_cds_seqs[2];
  char  *optname;
  char  *optarg, *t;
  int    optind;
  int    be_quiet;
  int    seqct = 0,cdsct = 0;
  int    min_aln_len      = 0;
  int    do_oneline       = 0;
  char   * output_filename = 0, *submat_file = 0;
  int    showaln = 1;
  int    showheader=1;
  FILE  *ofd, *fd;
  alignment   *cds_aln;
  alignment * opt_alignment = NULL;  /* place for pairwise alignment */

  int    len,i,j, k, jk,ik,aln_count, rc;
  pairwise_distances pwMLdist, pwNGdist;
  int firsttime = 1;
  
  struct timeval tp;

  pwMLdist.N    = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS);
  pwMLdist.dN   = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS);
  pwMLdist.S    = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS);
  pwMLdist.dS   = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS);
  pwMLdist.dNdS = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS);
  pwMLdist.SEdS = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS);
  pwMLdist.SEdN = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS);
  pwMLdist.t    = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS);
  pwMLdist.kappa= make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS);

  pwNGdist.dN   = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS);
  pwNGdist.dS   = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS);
  pwNGdist.dNdS = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS);
/*
  pwMLdist.N    = pwMLdist.dN   = pwMLdist.S    = 0;
  pwMLdist.dS   = pwMLdist.dNdS = pwMLdist.SEdS = 0;
  pwMLdist.SEdN = pwMLdist.t    = pwMLdist.kappa= 0;
      
  pwNGdist.dN   = pwNGdist.dS   = pwNGdist.dNdS = 0;
*/

  Alntype = default_aln_type;
  
  /* Command line Parse */
  fmt       = SQFILE_UNKNOWN;	/* default: autodetect format  */
  be_quiet  = FALSE;
  type      =  kOtherSeq;

  /* for our purposes this is only pairwise alignments, but
   * would rather do it correctly in case we move to MSA case 
   */
  
  while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, 
		&optind, &optname, &optarg))
    {
      if      (strcmp(optname, "--matrix") == 0)  submat_file = optarg; 
      else if (strcmp(optname, "--quiet")   == 0)  be_quiet  = TRUE; 
      else if (strcmp(optname, "--gapopen") == 0)  {
	Gapopen = atoi(optarg); 
	if( Gapopen < 0 ) Gapopen *= -1;
	
      } else if (strcmp(optname, "--gapext")  == 0)  {
	Gapext = atoi(optarg); 
	if( Gapext < 0 ) Gapext *= -1;

      } else if (strcmp(optname, "--informat") == 0) {
	fmt = String2SeqfileFormat(optarg);
	if (fmt == SQFILE_UNKNOWN) 
	  Die("unrecognized sequence file format \"%s\"", optarg);
      } else if (strcmp(optname, "--outformat") == 0) {
	ofmt = String2SeqfileFormat(optarg);
	if (ofmt == SQFILE_UNKNOWN) 
	  Die("unrecognized sequence file format \"%s\"", optarg);
      }  else if( strcmp(optname, "--global") == 0 ) {
	Alntype = global;
      } else if (strcmp(optname, "-h") == 0) {
	puts(usage);
	puts(experts);
        exit(EXIT_SUCCESS);
      } else if ( strcmp(optname, "-v") == 0 ) {
	Verbose = 1;
      } else if ( strcmp(optname, "--gapchar") == 0 ) {
	GapChar = optarg[0];
      }  else if(  strcmp(optname, "--output") == 0 ) {
	output_filename = optarg;	  
      } else if( strcmp(optname, "--showtable" ) == 0  ) {
	showaln = 0;
      } else if( strcmp(optname, "--noheader" ) == 0 ) {
	showheader = 0;
      }      
    }

  if (argc - optind < 1) Die("%s\n", usage);

  if( ! submat_file ) { 
    if( (t = getenv("SUBOPTDIR")) != 0 || 
	(t = getenv("SUBOPT_DIR")) != 0 ) {
      submat_file = calloc(strlen(t) + 24, sizeof(char));
      sprintf(submat_file, "%s/%s",t,Default_submat);
    } else { 
      submat_file = calloc(strlen((void *)Default_submat) + 24, sizeof(char));
      sprintf(submat_file, "../%s",Default_submat);
    }
  }
  /* open matrix */
  fd = fopen(submat_file, "r");
  
  if( ! ParsePAMFile(fd,&ScoringMatrix, &MatrixScale) ) {
    fprintf(stderr, "Cannot parse or open matrix file %s\n",submat_file);
    free(submat_file);
    exit(EXIT_SUCCESS);
  }
  

  if( output_filename && strlen(output_filename) != 1 &&
      output_filename[0] != '-') {      
    ofd = fopen(output_filename,"w");
    if( ! ofd ) {
      fprintf(stderr, "could not open file %s",output_filename);
      goto end;
    }
  } else 
    ofd = stdout;

  while( optind < argc ) {
    seqfile = argv[optind++];
    
    /* Try to work around inability to autodetect from a pipe or .gz:
     * assume FASTA format
     */
    if (fmt == SQFILE_UNKNOWN &&
	(Strparse("^.*\\.gz$", seqfile, 0) || strcmp(seqfile, "-") == 0))
      fmt = SQFILE_FASTA;
    
    if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL)
      Die("Failed to open sequence file %s for reading", seqfile);
    
    while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo))
    { 
      FreeSequence(NULL, &sqinfo);
      seqct++;
    }
    

    cds_seqs = (sequence *)calloc(seqct, sizeof(sequence));
    seqs     = (sequence *)calloc(seqct, sizeof(sequence));
    SeqfileRewind(dbfp);
    seqct=0;

    while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo))
    {
      sqinfo.type = Seqtype(seq);
      if( sqinfo.type == kDNA || sqinfo.type == kRNA ) {

	seqs[seqct].seqstr = Translate(seq,stdcode1);
	/* Let's remove the last codon if it is a stop codon */	
	len = strlen(seqs[seqct].seqstr);
	if( Verbose ) 
	  fprintf(stderr,"seqct is %d length is %d\n",seqct,
		  len);

	if( seqs[seqct].seqstr[len-1] == '*' ) {
	  seqs[seqct].seqstr[len-1] = '\0';
	  seq[strlen(seq) - 3] = '\0';
	}
	cds_seqs[cdsct].seqstr = seq;
	seqs[seqct].seqname = calloc(strlen(sqinfo.name)+1,sizeof(char));
	cds_seqs[cdsct].seqname = calloc(strlen(sqinfo.name)+1,sizeof(char));
	strcpy(seqs[seqct].seqname,sqinfo.name );
	strcpy(cds_seqs[cdsct].seqname,sqinfo.name);	
	cds_seqs[cdsct].length = sqinfo.len;
	cds_seqs[cdsct].alphabet = ( sqinfo.type == kDNA ) ? dna : rna;
	seqs[seqct].length = strlen(seqs[seqct].seqstr);
	
	seqs[seqct].alphabet = protein;
	cdsct++; seqct++;
      } else {
	fprintf(stderr,"Expect CDS sequences (DNA or RNA) not Protein\n");
	goto end;
      }    
      FreeSequence(NULL, &sqinfo);
      if( Verbose && seqct > 3 ) 
	break;
    }
    
    if( seqct < 2 ) {
      fprintf(stderr,"Must have provided a valid file with at least 2 sequences in it");
      goto end;
    }
    
    for( i=0; i  < seqct; i++ ) {
      for(k=i+1; k < seqct; k++ ) {	
	if( (opt_alignment = (alignment *)calloc(1,sizeof(alignment *))) == NULL) {
	  fprintf(stderr,"Could not allocate memory\n");
	  goto end;
	}

	opt_alignment->msa = NULL;
	rc = optimal_align(&seqs[i],&seqs[k],opt_alignment);
  
	if( rc != 1 ) {
	  fprintf(stderr,"Could not make an optimal alignment\n");
	  goto end;
	} else {
	  tmp_cds_seqs[0] = cds_seqs[i];
	  tmp_cds_seqs[1] = cds_seqs[k];
	  rc = mrtrans(opt_alignment, tmp_cds_seqs, &cds_aln,0);
	  if( rc != 0  ) { 
	    fprintf(stderr, "Could not map the coding sequence to the protein alignemnt for aln %d: %d\n",i,rc);
	    goto end;
	  }
	  if( showaln ) {
	    if( ofmt >= 100 ) {
	      MSAFileWrite(ofd,cds_aln->msa, ofmt,do_oneline);
	    } else { 
	      for(j=0; j < cds_aln->msa->nseq; j++ ) {	
		WriteSeq(ofd, ofmt, 
			 cds_aln->msa->aseq[j],
			 &(cds_aln->sqinfo[j]) );
	      }
	    }	    
	  } else {
	    if( showheader && firsttime ) {
	      fprintf(ofd,"SEQ1\tSEQ2\tSCORE\tdN\tdS\tOMEGA\tN\tS\tkappa\tt\tLENGTH\n");
	      firsttime = 0;
	    }
	    if( do_kaks_yn00(cds_aln->msa, &pwMLdist,&pwNGdist) < 0 ) {
	      fprintf(stderr, "warning: problem with align for %s %s\n",
		      cds_aln->msa->sqname[0], cds_aln->msa->sqname[1]);
	      continue;
	    }

	    for(ik = 0; ik < NUM_PW_SEQS; ik++ ) {	  
	      for( jk = ik+1; jk < NUM_PW_SEQS; jk++ ) {
		fprintf(ofd,"%s\t%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%d\n",
			cds_aln->sqinfo[ik].name,
			cds_aln->sqinfo[jk].name,
			opt_alignment->score,
			pwMLdist.dN[ik][jk],pwMLdist.dS[ik][jk], 
			pwMLdist.dNdS[ik][jk],
			pwMLdist.N[ik][jk],
			pwMLdist.S[ik][jk],
			pwMLdist.kappa[ik][jk],
			pwMLdist.t[ik][jk],
			opt_alignment->msa->alen);
	      }
	    }  
	  }
	}
	cleanup_alignment(cds_aln);
	cleanup_alignment(opt_alignment); 
      }
    }
  }
  if( ofd && ofd != stdout )
    fclose(ofd);

  end:
  free(submat_file);
  Free2DArray((void **)ScoringMatrix,27);
  for(i =0; i< seqct; i++ ) {
    free(seqs[i].seqstr);
    free(seqs[i].seqname);    
    seqs[i].seqstr = seqs[i].seqname = 0;
  }
  for(i = 0; i < cdsct; i++) {
    free(cds_seqs[i].seqstr);
    free(cds_seqs[i].seqname);    
    cds_seqs[i].seqstr = cds_seqs[i].seqname = 0;
  }
  
  cleanup_matrix((void **)pwMLdist.N,NUM_PW_SEQS);
  cleanup_matrix((void **)pwMLdist.dN,NUM_PW_SEQS);
  cleanup_matrix((void **)pwMLdist.S,NUM_PW_SEQS);
  
  cleanup_matrix((void **)pwMLdist.dS,NUM_PW_SEQS);

  cleanup_matrix((void **)pwMLdist.SEdS,NUM_PW_SEQS);
  cleanup_matrix((void **)pwMLdist.SEdN,NUM_PW_SEQS);
  cleanup_matrix((void **)pwMLdist.t,NUM_PW_SEQS);
  cleanup_matrix((void **)pwMLdist.dNdS,NUM_PW_SEQS);
  cleanup_matrix((void **)pwMLdist.kappa,NUM_PW_SEQS);

  cleanup_matrix((void **)pwNGdist.dN,NUM_PW_SEQS);
  cleanup_matrix((void **)pwNGdist.dS,NUM_PW_SEQS);
  cleanup_matrix((void **)pwNGdist.dNdS,NUM_PW_SEQS);


  free(pwNGdist.dNdS);
  free(pwNGdist.dN);
  free(pwNGdist.dS);

  free(pwMLdist.dNdS);
  free(pwMLdist.dN);
  free(pwMLdist.dS);
  free(pwMLdist.N);
  free(pwMLdist.S);
  free(pwMLdist.SEdS);
  free(pwMLdist.SEdN);
  free(pwMLdist.t);
  free(pwMLdist.kappa);
  
  return 0;
}
예제 #6
0
/* Function: ViterbiAlignAlignment()
 * 
 * Purpose:  Align a multiple sequence alignment to an HMM without
 *           altering the multiple alignment.
 *           
 * Args:     shmm   - HMM in integer log-odds score form
 *           aseq   - alignment, [0..nseq-1][0..alen-1]
 *           alen   - length of aligned sequences
 *           nseq   - number of aligned sequences
 *           ret_tr - RETURN: array of tracebacks. rpos field is
 *                    relative to aseq, not raw seq, similar to
 *                    Maxmodelmaker(); use DealignTrace() if you
 *                    want relative to raw sequence.
 *           ret_sc - RETURN: sum of log odds scores.
 *           
 * Return:   (void)
 *           ret_tr is alloced here. Individuals must be free'd by FreeTrace(),
 *           then tr itself free'd by free().
 */
void
ViterbiAlignAlignment(struct shmm_s *shmm, char **aseq, int alen, int nseq,
		      struct trace_s ***ret_tr, float *ret_sc)
{
  struct fvit_s **mx;           /* the viterbi calculation grid       */
  int    score;	                /* tmp variable for scores            */
  int    i;			/* counter for sequence position: 0,1..L */
  int    k;			/* counter for model position: 0,1..M */
  int    idx;			/* index for sequences                */
  struct fvit_s *thisrow;       /* ptr to current row of mx           */
  struct fvit_s *nextrow;       /* ptr to next row of mx              */
  int  **matocc;                /* [0..alen+1][0..nseq-1], 1 for MATCH*/
  struct trace_s **tr;          /* array of tracebacks to return      */
  int   *tpos;                  /* index for position in indiv traces */
  int    lastsub;		/* last state type in master trace    */

  /* A crucial extra component of this alignment algorithm:
   * at each matrix cell, we have to remember: for the best
   * path into the INSERT subcell, what state is each sequence in?
   * This is non-trivial because some gaps are assigned to
   * no states. When we calculate the score from an insert column,
   * where there are gaps we have to look up the previous state.
   *
   * Fortunately, we don't need to keep a full matrix of these,
   * or we'd be in serious memory problems. Use a rolling pointer
   * trick, keep two active rows "current" and "next".
   */
  char **cur_state;             /* [0..M+1][0..nseq-1]; MATCH, INSERT, or DELETE */ 
  char **nxt_state;             /* same, except keeps states for next row        */
  char **swap;                  /* used for swapping cur, nxt                    */

  /********************************************
   * Initial setup and allocations
   ********************************************/
				/* allocate the calculation matrix,
				   which is 0..alen+1 rows by 0..M+1 cols */
  mx        = (struct fvit_s **) MallocOrDie (sizeof(struct fvit_s *) * (alen+2));
  matocc    = (int  **)          MallocOrDie (sizeof(int *)           * (alen+2));
  cur_state = (char **)          MallocOrDie (sizeof(char *)          * (shmm->M+2));
  nxt_state = (char **)          MallocOrDie (sizeof(char *)          * (shmm->M+2));
  for (i = 0; i <= alen+1; i++)
    {
      mx[i]    = (struct fvit_s *) MallocOrDie (sizeof(struct fvit_s) * (shmm->M+2));
      matocc[i]= (int *)           MallocOrDie (sizeof(int)           * nseq);
    }
  for (k = 0; k <= shmm->M+1; k++)
    {
      cur_state[k] = (char *) MallocOrDie (sizeof(char) * nseq);
      nxt_state[k] = (char *) MallocOrDie (sizeof(char) * nseq);
    }

  /********************************************
   * Initialization
   ********************************************/
				/* initialize the first cell 0,0 */
  mx[0][0].score_m = 0;
  mx[0][0].score_d = -99999999;
  mx[0][0].score_i = -99999999;

  for (k = 0; k <= shmm->M+1; k++)
    for (idx = 0; idx < nseq; idx++)
      nxt_state[k][idx] = MATCH;

				/* initialize the top row */
  for (k = 1; k <= shmm->M+1; k++)
    {
      mx[0][k].score_m = -99999999;
      mx[0][k].score_i = -99999999;
    }

  /* Precalculate matocc (match occupancy). 
   * 1 if symbol in column for this seq, 0 if not. 
   * 1..alen, from 0..alen-1 alignments
   */
  for (idx = 0; idx < nseq; idx++)
    {
      matocc[0][idx] = matocc[alen+1][idx] = 1; /* dummies for BEGIN, END */
      for (i = 1; i <= alen; i++)
	matocc[i][idx] = isgap(aseq[idx][i-1]) ? 0 : 1;
    }

  /********************************************
   * Recursion: fill in the mx matrix
   ********************************************/
				/* Alignment is 0..alen-1, we index it 
				   here as 1..alen because of Viterbi matrix. */
  for (i = 0; i <= alen; i++)
    {
				/* get ptrs into current and next row. */
      thisrow = mx[i];
      nextrow = mx[i+1];
				/* initialize in the next row */
      nextrow[0].score_m = -99999999;
      nextrow[0].score_d = -99999999;

      swap = cur_state; cur_state = nxt_state; nxt_state = swap;

      for (k = 0; k <= shmm->M; k++)
	{ /* begin inner loop... this is where all the time is spent. */

				/* add in emission scores to the current cell. */
	  if (i > 0)
	    for (idx = 0; idx < nseq; idx++)
	      if (matocc[i][idx])
		{
		  thisrow[k].score_m += shmm->m_emit[aseq[idx][i-1] - 'A'][k];
		  thisrow[k].score_i += shmm->i_emit[aseq[idx][i-1] - 'A'][k];
		}
				/* initialize with transitions out of delete state */
				/* to delete */
	  thisrow[k+1].score_d = thisrow[k].score_d + shmm->t[9*k + Tdd] * nseq;
	  thisrow[k+1].tback_d = DELETE;
				/* to insert */
	  nextrow[k].score_i = thisrow[k].score_d;
	  nextrow[k].tback_i = DELETE;
	  for (idx = 0; idx < nseq; idx++) 
	    if (matocc[i+1][idx])
	      {
		nextrow[k].score_i += shmm->t[9*k + Tdi];
		nxt_state[k][idx]  = INSERT;
	      }
	    else
	      nxt_state[k][idx] = DELETE;
				/* to match */
	  nextrow[k+1].score_m = thisrow[k].score_d;
	  nextrow[k+1].tback_m = DELETE;
	  for (idx = 0; idx < nseq; idx++)
	    if (matocc[i+1][idx])
	      nextrow[k+1].score_m += shmm-> t[9*k + Tdm];
	    else
	      nextrow[k+1].score_m += shmm-> t[9*k + Tdd];

	  
				/* deal with transitions out of insert state */
				/* to delete state. */
	  score = thisrow[k].score_i;
	  for (idx = 0; idx < nseq; idx++)
	    switch (cur_state[k][idx]) {
	    case MATCH:  score += shmm->t[9*k + Tmd]; break;
	    case DELETE: score += shmm->t[9*k + Tdd]; break;
	    case INSERT: score += shmm->t[9*k + Tid]; break;
	    }
	  if (score > thisrow[k+1].score_d) 
	    {
	      thisrow[k+1].score_d = score;
	      thisrow[k+1].tback_d = INSERT;
	    }
				/* to insert state */
	  score = thisrow[k].score_i;
	  for (idx = 0; idx < nseq; idx++)
	    {
	      if (matocc[i+1][idx])
		switch (cur_state[k][idx]) {
		case MATCH:  score += shmm->t[9*k + Tmi]; break;
		case DELETE: score += shmm->t[9*k + Tdi]; break;
		case INSERT: score += shmm->t[9*k + Tii]; break;
		}
	    }
	  if (score > nextrow[k].score_i) 
	    {
	      nextrow[k].score_i = score;
	      nextrow[k].tback_i = INSERT;
	      for (idx = 0; idx < nseq; idx++)
		if (matocc[i+1][idx])
		  nxt_state[k][idx] = INSERT;
		else
		  nxt_state[k][idx] = cur_state[k][idx];
	    }
				/* to match state */
	  score = thisrow[k].score_i;
	  for (idx = 0; idx < nseq; idx++)
	    if (matocc[i+1][idx])
	      switch (cur_state[k][idx]) {
	      case MATCH:  score += shmm->t[9*k + Tmm]; break;
	      case DELETE: score += shmm->t[9*k + Tdm]; break;
	      case INSERT: score += shmm->t[9*k + Tim]; break;
	      }
	    else
	      switch (cur_state[k][idx]) {
	      case MATCH:  score += shmm->t[9*k + Tmd]; break;
	      case DELETE: score += shmm->t[9*k + Tdd]; break;
	      case INSERT: score += shmm->t[9*k + Tid]; break;
	      }
	  if (score > nextrow[k+1].score_m) 
	    {
	      nextrow[k+1].score_m = score;
	      nextrow[k+1].tback_m = INSERT;
	    }

	  /* Transitions out of match state.
	   */
				/* to delete */
	  score = thisrow[k].score_m;
	  for (idx = 0; idx < nseq; idx++)
	    if (matocc[i][idx])
	      score += shmm->t[9*k + Tmd];
	    else
	      score += shmm->t[9*k + Tdd];
	  if (score > thisrow[k+1].score_d)
	    {
	      thisrow[k+1].score_d = score;
	      thisrow[k+1].tback_d = MATCH;
	    }
				/* to insert */
	  score = thisrow[k].score_m;
	  for (idx = 0; idx < nseq; idx++)
	    if (matocc[i+1][idx])
	      {
		if (matocc[i][idx])
		  score += shmm->t[9*k + Tmi];
		else
		  score += shmm->t[9*k + Tdi];
	      }
	  if (score > nextrow[k].score_i)
	    {
	      nextrow[k].score_i = score;
	      nextrow[k].tback_i = MATCH;
	      for (idx = 0; idx < nseq; idx++)
		if (matocc[i+1][idx])
		  nxt_state[k][idx] = INSERT;
		else if (matocc[i][idx])
		  nxt_state[k][idx] = MATCH;
		else
		  nxt_state[k][idx] = DELETE;
	    }
				/* to match */
	  score = thisrow[k].score_m;
	  for (idx = 0; idx < nseq; idx++)
	    if (matocc[i][idx])
	      {
		if (matocc[i+1][idx])
		  score += shmm->t[9*k + Tmm];
		else
		  score += shmm->t[9*k + Tmd];
	      }
	    else
	      {
		if (matocc[i+1][idx])
		  score += shmm->t[9*k + Tdm];
		else
		  score += shmm->t[9*k + Tdd];
	      }
	  if (score > nextrow[k+1].score_m)
	    {
	      nextrow[k+1].score_m = score;
	      nextrow[k+1].tback_m = MATCH;
	    }

	} /* end loop over model positions k */

    } /* end loop over alignment positions i */

/*  PrintFragViterbiMatrix(mx, alen, shmm->M); */

  /* Fill stage finished.
   * mx now contains final score in mx[alen+1][M+1].
   * Trace back from there to get master alignment.
   */
  tr   = (struct trace_s **) MallocOrDie (sizeof(struct trace_s *) * nseq);
  tpos = (int *)             MallocOrDie (sizeof(int)              * nseq);
  for (idx = 0; idx < nseq; idx++)
    {
      AllocTrace(alen + shmm->M + 3, &(tr[idx]));
      tr[idx]->nodeidx[0]   = shmm->M+1;
      tr[idx]->statetype[0] = MATCH;
      tr[idx]->rpos[0]      = -1;
      tpos[idx]        = 1;
    }
  i      = alen+1;
  k      = shmm->M+1;
  lastsub= MATCH;
	       
  while (i != 0 || k != 0)
    {
      switch (lastsub) {
      case MATCH:  lastsub = mx[i][k].tback_m; i--; k--; break;
      case DELETE: lastsub = mx[i][k].tback_d;      k--; break;
      case INSERT: lastsub = mx[i][k].tback_i; i--;      break;
      default: Die("trace failed!");
      }

      switch (lastsub) {
      case MATCH:
	for (idx = 0; idx < nseq; idx++)
	  if (matocc[i][idx]) 
	    {
	      tr[idx]->nodeidx[tpos[idx]]   = k;
	      tr[idx]->statetype[tpos[idx]] = MATCH;
	      tr[idx]->rpos[tpos[idx]]      = i-1;
	      tpos[idx]++;
	    }
	  else
	    {
	      tr[idx]->nodeidx[tpos[idx]]   = k;
	      tr[idx]->statetype[tpos[idx]] = DELETE;
	      tr[idx]->rpos[tpos[idx]]      = -1;
	      tpos[idx]++;
	    }
	break;
      case INSERT:
	for (idx = 0; idx < nseq; idx++)
	  if (matocc[i][idx])
	    {
	      tr[idx]->nodeidx[tpos[idx]]   = k;
	      tr[idx]->statetype[tpos[idx]] = INSERT;
	      tr[idx]->rpos[tpos[idx]]      = i-1;
	      tpos[idx]++;
	    }
	break;
      case DELETE:
	for (idx = 0; idx < nseq; idx++)
	  {
	    tr[idx]->nodeidx[tpos[idx]]   = k;
	    tr[idx]->statetype[tpos[idx]] = DELETE;
	    tr[idx]->rpos[tpos[idx]]      = -1;
	    tpos[idx]++;
	  }
	break;
      default: Die("trace failed!");
      }	/* end switch across new subcell in traceback */
    } /* end traceback */

  for (idx = 0; idx < nseq; idx++)
    ReverseTrace(tr[idx], tpos[idx]);

  *ret_tr = tr;
  *ret_sc = (float) mx[alen+1][shmm->M+1].score_m / INTSCALE;

  Free2DArray(matocc, alen+2);
  Free2DArray(cur_state, shmm->M+2);
  Free2DArray(nxt_state, shmm->M+2);
  Free2DArray(mx, alen+2);
  free(tpos);
}
예제 #7
0
/* Function: StrDPShuffle()
 * Date:     SRE, Fri Oct 29 09:15:17 1999 [St. Louis]
 *
 * Purpose:  Returns a shuffled version of s2, in s1.
 *           (s1 and s2 may be identical; i.e. a string
 *           may be shuffled in place.) The shuffle is a  
 *           "doublet-preserving" (DP) shuffle. Both
 *           mono- and di-symbol composition are preserved.
 *           
 *           Done by searching for a random Eulerian 
 *           walk on a directed multigraph. 
 *           Reference: S.F. Altschul and B.W. Erickson, Mol. Biol.
 *           Evol. 2:526-538, 1985. Quoted bits in my comments
 *           are from Altschul's outline of the algorithm.
 *
 * Args:     s1   - RETURN: the string after it's been shuffled
 *                    (space for s1 allocated by caller)
 *           s2   - the string to be shuffled
 *
 * Returns:  0 if string can't be shuffled (it's not all [a-zA-z]
 *             alphabetic.
 *           1 on success. 
 */
int
StrDPShuffle(char *s1, char *s2)
{
  int    len;
  int    pos;	/* a position in s1 or s2 */
  int    x,y;   /* indices of two characters */
  char **E;     /* edge lists: E[0] is the edge list from vertex A */
  int   *nE;    /* lengths of edge lists */
  int   *iE;    /* positions in edge lists */
  int    n;	/* tmp: remaining length of an edge list to be shuffled */
  char   sf;    /* last character in s2 */
  char   Z[26]; /* connectivity in last edge graph Z */ 
  int    keep_connecting; /* flag used in Z connectivity algorithm */
  int    is_eulerian;		/* flag used for when we've got a good Z */
  
  /* First, verify that the string is entirely alphabetic.
   */
  len = strlen(s2);
  for (pos = 0; pos < len; pos++)
    if (! isalpha(s2[pos])) return 0;

  /* "(1) Construct the doublet graph G and edge ordering E
   *      corresponding to S."
   * 
   * Note that these also imply the graph G; and note,
   * for any list x with nE[x] = 0, vertex x is not part
   * of G.
   */
  E  = MallocOrDie(sizeof(char *) * 26);
  nE = MallocOrDie(sizeof(int)    * 26);
  for (x = 0; x < 26; x++)
    {
      E[x]  = MallocOrDie(sizeof(char) * (len-1));
      nE[x] = 0; 
    }

  x = toupper(s2[0]) - 'A';
  for (pos = 1; pos < len; pos++)
    {
      y = toupper(s2[pos]) - 'A';
      E[x][nE[x]] = y;
      nE[x]++;
      x = y;
    }
  
  /* Now we have to find a random Eulerian edge ordering.
   */
  sf = toupper(s2[len-1]) - 'A'; 
  is_eulerian = 0;
  while (! is_eulerian)
    {
      /* "(2) For each vertex s in G except s_f, randomly select
       *      one edge from the s edge list of E(S) to be the
       *      last edge of the s list in a new edge ordering."
       *
       * select random edges and move them to the end of each 
       * edge list.
       */
      for (x = 0; x < 26; x++)
	{
	  if (nE[x] == 0 || x == sf) continue;
	  
	  pos           = CHOOSE(nE[x]);
	  y             = E[x][pos];		
	  E[x][pos]     = E[x][nE[x]-1];
	  E[x][nE[x]-1] = y;
	}

      /* "(3) From this last set of edges, construct the last-edge
       *      graph Z and determine whether or not all of its
       *      vertices are connected to s_f."
       * 
       * a probably stupid algorithm for looking at the
       * connectivity in Z: iteratively sweep through the
       * edges in Z, and build up an array (confusing called Z[x])
       * whose elements are 1 if x is connected to sf, else 0.
       */
      for (x = 0; x < 26; x++) Z[x] = 0;
      Z[(int) sf] = keep_connecting = 1;

      while (keep_connecting) {
	keep_connecting = 0;
	for (x = 0; x < 26; x++)
	  {
	    y = E[x][nE[x]-1];            /* xy is an edge in Z */
	    if (Z[x] == 0 && Z[y] == 1)   /* x is connected to sf in Z */
	      {
		Z[x] = 1;
		keep_connecting = 1;
	      }
	  }
      }

      /* if any vertex in Z is tagged with a 0, it's
       * not connected to sf, and we won't have a Eulerian
       * walk.
       */
      is_eulerian = 1;
      for (x = 0; x < 26; x++)
	{
	  if (nE[x] == 0 || x == sf) continue;
	  if (Z[x] == 0) {
	    is_eulerian = 0;
	    break;
	  }
	}

      /* "(4) If any vertex is not connected in Z to s_f, the
       *      new edge ordering will not be Eulerian, so return to
       *      (2). If all vertices are connected in Z to s_f, 
       *      the new edge ordering will be Eulerian, so
       *      continue to (5)."
       *      
       * e.g. note infinite loop while is_eulerian is FALSE.
       */
    }

  /* "(5) For each vertex s in G, randomly permute the remaining
   *      edges of the s edge list of E(S) to generate the s
   *      edge list of the new edge ordering E(S')."
   *      
   * Essentially a StrShuffle() on the remaining nE[x]-1 elements
   * of each edge list; unfortunately our edge lists are arrays,
   * not strings, so we can't just call out to StrShuffle().
   */
  for (x = 0; x < 26; x++)
    for (n = nE[x] - 1; n > 1; n--)
      {
	pos       = CHOOSE(n);
	y         = E[x][pos];
	E[x][pos] = E[x][n-1];
	E[x][n-1] = y;
      }

  /* "(6) Construct sequence S', a random DP permutation of
   *      S, from E(S') as follows. Start at the s_1 edge list.
   *      At each s_i edge list, add s_i to S', delete the
   *      first edge s_i,s_j of the edge list, and move to
   *      the s_j edge list. Continue this process until
   *      all edge lists are exhausted."
   */ 
  iE = MallocOrDie(sizeof(int) * 26);
  for (x = 0; x < 26; x++) iE[x] = 0; 

  pos = 0; 
  x = toupper(s2[0]) - 'A';
  while (1) 
    {
      s1[pos++] = 'A' + x;	/* add s_i to S' */
      
      y = E[x][iE[x]];
      iE[x]++;			/* "delete" s_i,s_j from edge list */
  
      x = y;			/* move to s_j edge list. */

      if (iE[x] == nE[x])
	break;			/* the edge list is exhausted. */
    }
  s1[pos++] = 'A' + sf;
  s1[pos]   = '\0';  

  /* Reality checks.
   */
  if (x   != sf)  Die("hey, you didn't end on s_f.");
  if (pos != len) Die("hey, pos (%d) != len (%d).", pos, len);
  
  /* Free and return.
   */
  Free2DArray((void **) E, 26);
  free(nE);
  free(iE);
  return 1;
}
예제 #8
0
파일: msa.c 프로젝트: hyphaltip/subopt-kaks
/* Function: MSAFree()
 * Date:     SRE, Tue May 18 11:20:16 1999 [St. Louis]
 *
 * Purpose:  Free a multiple sequence alignment structure.
 *
 * Args:     msa - the alignment
 *
 * Returns:  (void)
 */
void
MSAFree(MSA *msa)
{
  Free2DArray((void **) msa->aseq,   msa->nseq);
  Free2DArray((void **) msa->sqname, msa->nseq);
  Free2DArray((void **) msa->sqacc,  msa->nseq);
  Free2DArray((void **) msa->sqdesc, msa->nseq);
  Free2DArray((void **) msa->ss,     msa->nseq);
  Free2DArray((void **) msa->sa,     msa->nseq);

  if (msa->sqlen   != NULL) free(msa->sqlen);
  if (msa->wgt     != NULL) free(msa->wgt);

  if (msa->name    != NULL) free(msa->name);
  if (msa->desc    != NULL) free(msa->desc);
  if (msa->acc     != NULL) free(msa->acc);
  if (msa->au      != NULL) free(msa->au);
  if (msa->ss_cons != NULL) free(msa->ss_cons);
  if (msa->sa_cons != NULL) free(msa->sa_cons);
  if (msa->rf      != NULL) free(msa->rf);
  if (msa->sslen   != NULL) free(msa->sslen);
  if (msa->salen   != NULL) free(msa->salen);
  
  Free2DArray((void **) msa->comment, msa->ncomment);
  Free2DArray((void **) msa->gf_tag,  msa->ngf);
  Free2DArray((void **) msa->gf,      msa->ngf);
  Free2DArray((void **) msa->gs_tag,  msa->ngs);
  Free3DArray((void ***)msa->gs,      msa->ngs, msa->nseq);
  Free2DArray((void **) msa->gc_tag,  msa->ngc);
  Free2DArray((void **) msa->gc,      msa->ngc);
  Free2DArray((void **) msa->gr_tag,  msa->ngr);
  Free3DArray((void ***)msa->gr,      msa->ngr, msa->nseq);

  GKIFree(msa->index);
  GKIFree(msa->gs_idx);
  GKIFree(msa->gc_idx);
  GKIFree(msa->gr_idx);

  free(msa);
}
/* Function: WriteMSF()
 * Date:     SRE, Mon May 31 11:25:18 1999 [St. Louis]
 *
 * Purpose:  Write an alignment in MSF format to an open file.
 *
 * Args:     fp    - file that's open for writing.
 *           msa   - alignment to write. 
 *
 *                   Note that msa->type, usually optional, must be
 *                   set for WriteMSF to work. If it isn't, a fatal
 *                   error is generated.
 *
 * Returns:  (void)
 */
void
WriteMSF(FILE *fp, MSA *msa)
{
  time_t now;			/* current time as a time_t */
  char   date[64];		/* today's date in GCG's format "October 3, 1996 15:57" */
  char **gcg_aseq;              /* aligned sequences with gaps converted to GCG format */
  char **gcg_sqname;		/* sequence names with GCG-valid character sets */
  int    idx;			/* counter for sequences         */
  char  *s;                     /* pointer into sqname or seq    */
  int    len;			/* tmp variable for name lengths */
  int    namelen;		/* maximum name length used      */
  int    pos;			/* position counter              */
  char   buffer[51];		/* buffer for writing seq        */
  int    i;			/* another position counter */

  /*****************************************************************
   * Make copies of sequence names and sequences.
   *   GCG recommends that name characters should only contain
   *   alphanumeric characters, -, or _
   *   Some GCG and GCG-compatible software is sensitive to this.
   *   We silently convert all other characters to '_'.
   *   
   *   For sequences, GCG allows only ~ and . for gaps.
   *   Otherwise, everthing is interpreted as a residue;
   *   so squid's IUPAC-restricted chars are fine. ~ means
   *   an external gap. . means an internal gap.
   *****************************************************************/ 
   
				/* make copies that we can edit */
   gcg_aseq   = MallocOrDie(sizeof(char *) * msa->nseq);
   gcg_sqname = MallocOrDie(sizeof(char *) * msa->nseq);
   for (idx = 0; idx < msa->nseq; idx++)
     {
       gcg_aseq[idx]   = sre_strdup(msa->aseq[idx],   msa->alen);
       gcg_sqname[idx] = sre_strdup(msa->sqname[idx], -1);
     }
				/* alter names as needed  */
   for (idx = 0; idx < msa->nseq; idx++)
     for (s = gcg_sqname[idx]; *s != '\0'; s++)
       if (! isalnum((int) *s) && *s != '-' && *s != '_')
	 *s = '_';
				/* alter gap chars in seq  */
   for (idx = 0; idx < msa->nseq; idx++)
     {
       for (s = gcg_aseq[idx]; *s != '\0' && isgap(*s); s++)
	 *s = '~';
       for (; *s != '\0'; s++)
	 if (isgap(*s)) *s = '.';
       for (pos = msa->alen-1; pos > 0 && isgap(gcg_aseq[idx][pos]); pos--)
	 gcg_aseq[idx][pos] = '~';
     }
				/* calculate max namelen used */
  namelen = 0;
  for (idx = 0; idx < msa->nseq; idx++)
    if ((len = strlen(msa->sqname[idx])) > namelen) 
      namelen = len;

  /*****************************************************
   * Write the MSF header
   *****************************************************/
				/* required file type line */
  if (msa->type == kOtherSeq)
    msa->type = GuessAlignmentSeqtype(msa->aseq, msa->nseq);

  if      (msa->type == kRNA)   fprintf(fp, "!!NA_MULTIPLE_ALIGNMENT 1.0\n");
  else if (msa->type == kDNA)   fprintf(fp, "!!NA_MULTIPLE_ALIGNMENT 1.0\n");
  else if (msa->type == kAmino) fprintf(fp, "!!AA_MULTIPLE_ALIGNMENT 1.0\n");
  else if (msa->type == kOtherSeq) 
    Die("WriteMSF(): couldn't guess whether that alignment is RNA or protein.\n"); 
  else    
    Die("Invalid sequence type %d in WriteMSF()\n", msa->type); 

				/* free text comments */
  if (msa->ncomment > 0)
    {
      for (idx = 0; idx < msa->ncomment; idx++)
	fprintf(fp, "%s\n", msa->comment[idx]);
      fprintf(fp, "\n");
    }
				/* required checksum line */
  now = time(NULL);
  if (strftime(date, 64, "%B %d, %Y %H:%M", localtime(&now)) == 0)
    Die("What time is it on earth? strftime() failed in WriteMSF().\n");
  fprintf(fp, " %s  MSF: %d  Type: %c  %s  Check: %d  ..\n", 
	  msa->name != NULL ? msa->name : "squid.msf",
	  msa->alen,
	  msa->type == kRNA ? 'N' : 'P',
	  date,
	  GCGMultchecksum(gcg_aseq, msa->nseq));
  fprintf(fp, "\n");

  /*****************************************************
   * Names/weights section
   *****************************************************/

  for (idx = 0; idx < msa->nseq; idx++)
    {
      fprintf(fp, " Name: %-*.*s  Len:  %5d  Check: %4d  Weight: %.2f\n",
	      namelen, namelen,
	      gcg_sqname[idx],
	      msa->alen,
	      GCGchecksum(gcg_aseq[idx], msa->alen),
	      msa->wgt[idx]);
    }
  fprintf(fp, "\n");
  fprintf(fp, "//\n");

  /*****************************************************
   * Write the sequences
   *****************************************************/

  for (pos = 0; pos < msa->alen; pos += 50)
    {
      fprintf(fp, "\n");	/* Blank line between sequence blocks */

				/* Coordinate line */
      len = (pos + 50) > msa->alen ? msa->alen - pos : 50;
      if (len > 10)
	fprintf(fp, "%*s  %-6d%*s%6d\n", namelen, "", 
		pos+1,
		len + ((len-1)/10) - 12, "",
		pos + len);
      else
	fprintf(fp, "%*s  %-6d\n", namelen, "", pos+1);

      for (idx = 0; idx < msa->nseq; idx++)
	{
	  fprintf(fp, "%-*s ", namelen, gcg_sqname[idx]);
				/* get next line's worth of 50 from seq */
	  strncpy(buffer, gcg_aseq[idx] + pos, 50);
	  buffer[50] = '\0';
				/* draw the sequence line */
	  for (i = 0; i < len; i++)
	    {
	      if (! (i % 10)) fputc(' ', fp);
	      fputc(buffer[i], fp);
	    }
	  fputc('\n', fp);
	}
    }

  Free2DArray((void **) gcg_aseq,   msa->nseq);
  Free2DArray((void **) gcg_sqname, msa->nseq);
  return;
}
예제 #10
0
/* Function: Cluster()
 * 
 * Purpose:  Cluster analysis on a distance matrix. Constructs a
 *           phylogenetic tree which contains the topology
 *           and info for each node: branch lengths, how many
 *           sequences are included under the node, and which
 *           sequences are included under the node.
 *           
 * Args:     dmx     - the NxN distance matrix ( >= 0.0, larger means more diverged)
 *           N       - size of mx (number of sequences)
 *           mode    - CLUSTER_MEAN, CLUSTER_MAX, or CLUSTER_MIN
 *           ret_tree- RETURN: the tree 
 *
 * Return:   1 on success, 0 on failure.          
 *           The caller is responsible for freeing the tree's memory,
 *           by calling FreePhylo(tree, N).
 */
int
Cluster(float **dmx, int N, enum clust_strategy mode, struct phylo_s **ret_tree)
{
  struct phylo_s *tree;         /* (0..N-2) phylogenetic tree          */
  float    **mx;                /* copy of difference matrix           */
  int       *coord;             /* (0..N-1), indices for matrix coords */
  int        i=0, j=0;		/* coords of minimum difference        */
  int        idx;		/* counter over seqs                   */
  int        Np;                /* N', a working copy of N             */
  int        row, col;          /* loop variables                      */
  float      min;		/* best minimum score found            */
  float     *trow;              /* tmp pointer for swapping rows       */
  float      tcol;              /* tmp storage for swapping cols       */
  float     *diff=NULL;		/* (0..N-2) difference scores at nodes */
  int        swapfoo;		/* for SWAP() macro                    */

  /**************************
   * Initializations.
   **************************/
  /* We destroy the matrix we work on, so make a copy of dmx.
   */
  if ((mx = (float **) malloc (sizeof(float *) * N)) == NULL)
    Die("malloc failed");
  for (i = 0; i < N; i++)
    {
      if ((mx[i] = (float *) malloc (sizeof(float) * N)) == NULL)
	Die("malloc failed");
      for (j = 0; j < N; j++)
	mx[i][j] = dmx[i][j];
    }
				/* coord array alloc, (0..N-1) */
  if ((coord = (int *)    malloc  (N *    sizeof(int)))    == NULL ||
      (diff  = (float *) malloc ((N-1) * sizeof(float))) == NULL)
    Die("malloc failed");
				/* init the coord array to 0..N-1 */
  for (col = 0; col < N; col++)  coord[col] = col;
  for (i = 0; i < N-1; i++)      diff[i] = 0.0;

				/* tree array alloc, (0..N-2) */
  if ((tree = AllocPhylo(N)) == NULL)  Die("AllocPhylo() failed");

  /*********************************
   * Process the difference matrix
   *********************************/
  
				/* N-prime, for an NxN down to a 2x2 diffmx */
  for (Np = N; Np >= 2; Np--)
    {
				/* find a minimum on the N'xN' matrix*/
      min = 999999.;
      for (row = 0; row < Np; row++)
	for (col = row+1; col < Np; col++)
	  if (mx[row][col] < min)
	    {
	      min = mx[row][col];
	      i   = row;
	      j   = col;
	    }

      /* We're clustering row i with col j. write necessary
       * data into a node on the tree
       */
				/* topology info */
      tree[Np-2].left  = coord[i];
      tree[Np-2].right = coord[j];
      if (coord[i] >= N) tree[coord[i]-N].parent = N + Np - 2;
      if (coord[j] >= N) tree[coord[j]-N].parent = N + Np - 2;

				/* keep score info */
      diff[Np-2] = tree[Np-2].diff = min;

				/* way-simple branch length estimation */
      tree[Np-2].lblen = tree[Np-2].rblen = min;
      if (coord[i] >= N) tree[Np-2].lblen -= diff[coord[i]-N];
      if (coord[j] >= N) tree[Np-2].rblen -= diff[coord[j]-N];

				/* number seqs included at node */
      if (coord[i] < N) 
	{
	  tree[Np-2].incnum ++;
	  tree[Np-2].is_in[coord[i]] = 1;
	}
      else 
	{
	  tree[Np-2].incnum += tree[coord[i]-N].incnum;
	  for (idx = 0; idx < N; idx++)
	    tree[Np-2].is_in[idx] |= tree[coord[i]-N].is_in[idx];
	}
      
      if (coord[j] < N) 
	{
	  tree[Np-2].incnum ++;
	  tree[Np-2].is_in[coord[j]] = 1;
	}
      else 
	{
	  tree[Np-2].incnum += tree[coord[j]-N].incnum;
	  for (idx = 0; idx < N; idx++)
	    tree[Np-2].is_in[idx] |= tree[coord[j]-N].is_in[idx];
	}


      /* Now build a new matrix, by merging row i with row j and
       * column i with column j; see Fitch and Margoliash
       */
				/* Row and column swapping. */
				/* watch out for swapping i, j away: */
      if (i == Np-1 || j == Np-2)
	SWAP(i,j);

      if (i != Np-2)
	{
				/* swap row i, row N'-2 */
	  trow = mx[Np-2]; mx[Np-2] = mx[i]; mx[i] = trow;
				/* swap col i, col N'-2 */
	  for (row = 0; row < Np; row++) 
	    {
	      tcol = mx[row][Np-2];
	      mx[row][Np-2] = mx[row][i];
	      mx[row][i] = tcol;
	    }
				/* swap coord i, coord N'-2 */
	  SWAP(coord[i], coord[Np-2]);
	}

      if (j != Np-1)
	{
				/* swap row j, row N'-1 */
	  trow = mx[Np-1]; mx[Np-1] = mx[j]; mx[j] = trow;
				/* swap col j, col N'-1 */
	  for (row = 0; row < Np; row++) 
	    {
	      tcol = mx[row][Np-1];
	      mx[row][Np-1] = mx[row][j];
	      mx[row][j] = tcol;
	    }
				/* swap coord j, coord N'-1 */
	  SWAP(coord[j], coord[Np-1]);
	}

				/* average i and j together; they're now
				   at Np-2 and Np-1 though */
      i = Np-2;
      j = Np-1;
				/* merge by saving avg of cols of row i and row j */
      for (col = 0; col < Np; col++)
	{
	  switch (mode) {
	  case CLUSTER_MEAN:  mx[i][col] =(mx[i][col]+ mx[j][col]) / 2.0; break;
	  case CLUSTER_MIN:   mx[i][col] = MIN(mx[i][col], mx[j][col]);   break;
	  case CLUSTER_MAX:   mx[i][col] = MAX(mx[i][col], mx[j][col]);   break;
	  default:            mx[i][col] =(mx[i][col]+ mx[j][col]) / 2.0; break; 
	  }
	}
				/* copy those rows to columns */
      for (col = 0; col < Np; col++)
	mx[col][i] = mx[i][col];
				/* store the node index in coords */
      coord[Np-2] = Np+N-2;
    }

  /**************************
   * Garbage collection and return
   **************************/
  Free2DArray(mx, N);
  free(coord);
  free(diff);
  *ret_tree = tree;
  return 1;
}