예제 #1
0
/**
 * Implementation of the ImprovedBreakpointReversalSort algorithm
 * listed in the text
 */
int main()
{
  //SyntenyBlock pi[] = { 0, 8, 2, 7, 6, 5, 1, 4, 3, 9 };
  //int length = 10;
  SyntenyBlock pi[] = { 0,19,18,9,21,23,12,11,13,4,10,7,3,25,20,6,1,24,17,5,14,8,15,2,16,22,26 };
  int length = 27;
  Strip strips[length];

  int breakpoints = b(pi, length, strips);
  while(breakpoints > 0)
  {
    // I do not check whether there is a decreasing strip here.
    // If there is not, the chooseRho function will return the
    // appropriate permutation to reverse an increasing strip
    ReversalPermutation rho = chooseRho(pi, length, strips, breakpoints + 1);

    printf("Permuting: reversing from %d to %d\n", pi[rho.minIndex], pi[rho.maxIndex]);
    fputs("\tBefore: ", stdout);
    printPi(pi, length);

    permute(pi, rho);

    fputs("\n\tAfter:  ", stdout);
    printPi(pi, length);
    puts("");

    breakpoints = b(pi, length, strips);
  }
  return 0;
}
예제 #2
0
int main(int argc, const char *argv[])
{
    int N,
		    M,
		    T,
		    maxIters,
		    seed,
		    i,
		    j,
		    iter,
        str_len;

    char **alphabet;

    double logProb,
           newLogProb;

  	double *pi,
           *piBar, 
           **A,
           **Abar,
           **B,
           **Bbar;

    struct stepStruct *step;

    FILE *in,
         *out;
    
    char s[80];

	int wantTraining = 1;
    
    if(argc != 10)
    {
        fprintf(stderr, "\nUsage: %s N M T maxIters filename alphabet modelfile seed\n\n", argv[0]);
        fprintf(stderr, "where N == number of states of the HMM\n");
        fprintf(stderr, "      M == number of observation symbols\n");
        fprintf(stderr, "      T == number of observations in the training set\n");
        fprintf(stderr, "      maxIters == max iterations of re-estimation algorithm\n");
        fprintf(stderr, "      filename == name of input file\n");
        fprintf(stderr, "      alphabet == name of file defining the alphabet\n");
        fprintf(stderr, "      modelfile == name of model output file\n");
        fprintf(stderr, "      seed == seed value for pseudo-random number generator (PRNG)\n\n");
		fprintf(stderr, "      wantTraining == to train enter 1, otherwise 0 \n\n");
        fprintf(stderr, "For example:\n\n      %s 2 10 10000 500 datafile alphabet modelfile 1241\n\n", argv[0]);
        fprintf(stderr, "will create a HMM with 2 states and 10 observation symbols,\n");
        fprintf(stderr, "will read in the first 10000 observations from `datafile',\n");
        fprintf(stderr, "will use the observation symbols defined in file `alphabet', and\n");
        fprintf(stderr, "will write the model (pi, A, B) to `modelfile', and\n");
        fprintf(stderr, "will seed the PRNG with 1241 and train the HMM with a maximum of 500 iterations.\n\n");
        exit(0);
    }

    N = atoi(argv[1]);
    M = atoi(argv[2]);
    T = atoi(argv[3]);
    maxIters = atoi(argv[4]);
    seed = atoi(argv[8]);
	wantTraining = atoi(argv[9]);

    pi = (double *)malloc(N * sizeof(double));
    piBar = (double *)malloc(N * sizeof(double));

    A = (double **)malloc(N * sizeof(double*));
    Abar =static_cast<double **>(malloc(N * sizeof(double*)));
    for (i=0; i<N; ++i)
    {
      A[i] = static_cast<double *>(malloc(N * sizeof(double)));
      Abar[i] = static_cast<double *>(malloc(N * sizeof(double)));
    }

    B = static_cast<double **>(malloc(N * sizeof(double*)));
    Bbar = static_cast<double **>(malloc(N * sizeof(double*)));
    for (i=0; i<N; ++i)
    {
      B[i] = static_cast<double *>(malloc(M * sizeof(double)));
      Bbar[i] = static_cast<double *>(malloc(M * sizeof(double)));
    }
    
   
    ////////////////////////
    // read the data file //
    ////////////////////////

    // allocate memory
    printf("allocating %d bytes of memory... ", (T + 1) * sizeof(struct stepStruct));
    fflush(stdout);
    if((step = static_cast<stepStruct *>(calloc(T + 1, sizeof(struct stepStruct)))) == NULL)
    {
        fprintf(stderr, "\nUnable to allocate alpha\n\n");
        exit(0);
    }
    for (i=0; i<T+1; ++i)
    {
      step[i].alpha = static_cast<double *>(malloc(N * sizeof(double)));
      step[i].beta = static_cast<double *>(malloc(N * sizeof(double)));
      step[i].gamma = static_cast<double *>(malloc(N * sizeof(double)));
      step[i].diGamma = static_cast<double **>(malloc(N * sizeof(double*)));
      for (j=0; j<N; ++j)
      {
        step[i].diGamma[j] = static_cast<double *>(malloc(N * sizeof(double)));
      }
    }
    printf("done\n");

    // read in the observations from file
    printf("GetObservations... ");
    fflush(stdout);
    in = fopen(argv[5], "r"); // argv[5] = filename
    if(in == NULL)
    {
        fprintf(stderr, "\nError opening file %s\n\n", argv[5]);
        exit(0);
    }
    i = 0;
    fgets(s,80,in); // get rid of the first line
    while (i < T)
    {
      fgets(s,80,in);
      step[i].obs = atoi(s);
      ++i;
    }
    fclose(in);
    printf("done\n");

    // read in the alphabet from file
    printf("GetAlphabet... ");
    fflush(stdout);
    alphabet = static_cast<char **>(malloc(M * sizeof (char*)));
    in = fopen(argv[6], "r"); // argv[6] = alphabet
    if(in == NULL)
    {
        fprintf(stderr, "\nError opening file %s\n\n", argv[6]);
        exit(0);
    }
    i = 0;
    fgets(s,80,in); // get rid of the first line
    while (i < M)
    {
      fgets(s,80,in);
	    str_len = strlen(s);
      alphabet[i] = static_cast<char *>(malloc(str_len * sizeof(char)));
      strncpy(alphabet[i], s, str_len-1);
      alphabet[i][str_len-1] = '\0';
      ++i;
    }
    fclose(in);
    printf("done\n");


    /////////////////////////
    // hidden markov model //
    /////////////////////////

    srand(seed);

    // initialize pi[], A[][] and B[][]
    initMatrices(pi, A, B, N, M, seed);

    // print pi[], A[][] and B[][] transpose
    printf("\nN = %d, M = %d, T = %d\n", N, M, T);
    printf("initial pi =\n");
    printPi(pi, N);
    printf("initial A =\n");
    printA(A, N);
    printf("initial B^T =\n");
    printBT(B, N, M, alphabet);

    // initialization
    iter = 0;
    logProb = -1.0;
    newLogProb = 0.0;

	if (wantTraining) {

		// main loop
		while((iter < maxIters) && (newLogProb > logProb))
		{
			printf("\nbegin iteration = %d\n", iter);

			logProb = newLogProb;

			// alpha (or forward) pass
			printf("alpha pass... ");
			fflush(stdout);
			alphaPass(step, pi, A, B, N, T);
			printf("done\n");

			// beta (or backwards) pass
			printf("beta pass... ");
			fflush(stdout);
			betaPass(step, pi, A, B, N, T);
			printf("done\n");

			// compute gamma's and diGamma's
			printf("compute gamma's and diGamma's... ");
			fflush(stdout);
			computeGammas(step, pi, A, B, N, T);
			printf("done\n");

			// find piBar, reestimate of pi
			printf("reestimate pi... ");
			fflush(stdout);
			reestimatePi(step, piBar, N);
			printf("done\n");

			// find Abar, reestimate of A
			printf("reestimate A... ");
			fflush(stdout);
			reestimateA(step, Abar, N, T);
			printf("done\n");

			// find Bbar, reestimate of B
			printf("reestimate B... ");
			fflush(stdout);
			reestimateB(step, Bbar, N, M, T);
			printf("done\n");

	#ifdef PRINT_REESTIMATES
			printf("piBar =\n");
			printPi(piBar, N);
			printf("Abar =\n");
			printA(Abar, N);
			printf("Bbar^T = \n");
			printBT(Bbar, N, M, alphabet);
	#endif // PRINT_REESTIMATES

			// assign pi, A and B corresponding "bar" values
			for(i = 0; i < N; ++i)
			{
				pi[i] = piBar[i];

				for(j = 0; j < N; ++j)
				{
					A[i][j] = Abar[i][j];
				}

				for(j = 0; j < M; ++j)
				{
					B[i][j] = Bbar[i][j];
				}

			}// next i

			// compute log [P(observations | lambda)], where lambda = (A,B,pi)
			newLogProb = 0.0;
			for(i = 0; i < T; ++i)
			{
				newLogProb += log(step[i].c);
			}
			newLogProb = -newLogProb;

			// a little trick so that no initial logProb is required
			if(iter == 0)
			{
				logProb = newLogProb - 1.0;
			}

			printf("completed iteration = %d, log [P(observation | lambda)] = %f\n",
					iter, newLogProb);

			++iter;

		}// end while
    
		out = fopen(argv[7], "w"); // argv[7] = modelfile
		writeModel(pi, A, B, N, M, T, alphabet, out);
		fclose(out);
    
		printf("\nT = %d, N = %d, M = %d, iterations = %d\n\n", T, N, M, iter);
		printf("final pi =\n");
		printPi(pi, N);
		printf("\nfinal A =\n");
		printA(A, N);
		printf("\nfinal B^T =\n");
		printBT(B, N, M, alphabet);
		printf("\nlog [P(observations | lambda)] = %f\n\n", newLogProb);

	} // end of training
	else { //want to do testing


		out = fopen(argv[7], "r"); // argv[7] = modelfile
		readModelFile(pi, A, B, N, M, T, alphabet, out);
		
		// alpha (or forward) pass
		printf("alpha pass... ");
		fflush(stdout);
		alphaPass(step, pi, A, B, N, T);
		printf("done\n");
		printf("logProb %f\n", computeLogProb(step, T)/T);
		
	//	FILE * newFile = fopen("testing.txt", "a");
		//writeModel(pi, A, B, N, M, T, alphabet, newFile);

		//fclose(newFile);

		fclose(out);
	} // end of testing
}// end hmm