int main()
{

	int i,j,T,N,M;
	int maxIters = 800;
	int iters = 0;
	double oldLogProb = -(1.0/0.0);

	printf("\nPlease enter a value for N : ");
	scanf("%d", &N);

	printf("\nPlease enter a value for M : ");
	scanf("%d", &M);

	printf("\nPlease enter a value for T : ");
	scanf("%d", &T);


	char *O = (char *)calloc(T, sizeof(char));
	double *PI = (double *)calloc(N, sizeof(double));

	double A[26][26] = {
		{0.00320,0.02153,0.04721,0.04476,0.00113,0.01417,0.02455,0.00459,0.04017,0.00195,0.01100,0.09790,0.03810,0.18396,0.00281,0.02016,0.00058,0.11674,0.10182,0.14237,0.01213,0.01929,0.01127,0.00346,0.03671,0.00096},
		{0.11677,0.00882,0.00294,0.00257,0.28906,0.00196,0.00135,0.00257,0.06274,0.00539,0.00086,0.11089,0.00368,0.00110,0.11898,0.00196,0.00061,0.06163,0.01507,0.00821,0.10758,0.00270,0.00306,0.00061,0.08087,0.00074},
		{0.12775,0.00182,0.01820,0.00276,0.14564,0.00195,0.00151,0.15442,0.06808,0.00050,0.04700,0.03633,0.00144,0.00113,0.21447,0.00295,0.00125,0.03915,0.00734,0.09192,0.02893,0.00088,0.00169,0.00031,0.00866,0.00044},
		{0.11143,0.03926,0.02247,0.02257,0.17153,0.02531,0.01887,0.02800,0.11168,0.00538,0.00203,0.02105,0.03089,0.01643,0.07080,0.01856,0.00218,0.03256,0.06167,0.10326,0.03479,0.00771,0.02906,0.00025,0.01709,0.00046},
		{0.07743,0.01878,0.05059,0.08977,0.03883,0.02537,0.01513,0.01586,0.02962,0.00308,0.00602,0.04128,0.03728,0.10133,0.02457,0.02857,0.00321,0.14522,0.10970,0.06244,0.00743,0.01720,0.02802,0.01121,0.01318,0.00055},
		{0.09529,0.01090,0.02072,0.01045,0.08011,0.06725,0.00831,0.01768,0.11789,0.00366,0.00223,0.02635,0.01527,0.00911,0.19711,0.01393,0.00116,0.08109,0.02206,0.15442,0.03403,0.00259,0.01206,0.00045,0.00447,0.00071},
		{0.11690,0.01263,0.01426,0.00916,0.17240,0.01477,0.01527,0.11487,0.08513,0.00305,0.00204,0.02363,0.01466,0.03248,0.09888,0.01171,0.00112,0.09430,0.03371,0.08004,0.03574,0.00234,0.01609,0.00051,0.00438,0.00051},
		{0.15953,0.00460,0.00702,0.00310,0.47986,0.00337,0.00246,0.00667,0.11739,0.00095,0.00067,0.00417,0.00564,0.00722,0.09826,0.00460,0.00040,0.01921,0.01163,0.04139,0.01171,0.00127,0.00833,0.00020,0.00425,0.00024},
		{0.03055,0.00961,0.07411,0.04589,0.04204,0.01470,0.02892,0.00143,0.00087,0.00034,0.00511,0.06298,0.02881,0.25476,0.07959,0.01029,0.00082,0.03654,0.11632,0.11750,0.00126,0.02976,0.00138,0.00242,0.00020,0.00672},
		{0.15358,0.00524,0.00785,0.00611,0.13962,0.00436,0.00524,0.00960,0.04538,0.00698,0.00436,0.00611,0.00785,0.00524,0.30716,0.00611,0.00436,0.04538,0.00873,0.00611,0.28010,0.00436,0.00785,0.00436,0.00436,0.00436},
		{0.07177,0.01196,0.01892,0.00918,0.33324,0.01530,0.00640,0.03310,0.14604,0.00668,0.00417,0.02865,0.01363,0.04896,0.05285,0.01113,0.00195,0.01363,0.09513,0.05202,0.00807,0.00306,0.02587,0.00139,0.01446,0.00139},
		{0.12834,0.01892,0.01501,0.06400,0.16603,0.01644,0.00701,0.00801,0.12104,0.00133,0.00581,0.14211,0.01601,0.00486,0.07449,0.01549,0.00076,0.01020,0.04380,0.03479,0.02540,0.00705,0.01034,0.00024,0.06710,0.00038},
		{0.18651,0.03753,0.00859,0.00347,0.25011,0.00565,0.00188,0.00678,0.11718,0.00143,0.00121,0.00445,0.04491,0.00317,0.11831,0.06232,0.00045,0.04039,0.02999,0.02720,0.03617,0.00083,0.00784,0.00038,0.01063,0.00045},
		{0.07549,0.01200,0.05159,0.14048,0.08897,0.01677,0.10857,0.01370,0.06055,0.00385,0.00907,0.01148,0.01511,0.02065,0.06210,0.01120,0.00078,0.00757,0.07067,0.17194,0.01225,0.00732,0.01633,0.00036,0.01350,0.00058},
		{0.02177,0.01789,0.02276,0.02172,0.00739,0.10623,0.01114,0.00997,0.01218,0.00167,0.00917,0.04448,0.06516,0.18545,0.03081,0.03009,0.00048,0.14231,0.04094,0.05886,0.09193,0.02281,0.03972,0.00133,0.00603,0.00048},
		{0.13793,0.00381,0.00273,0.00254,0.16429,0.00449,0.00195,0.02987,0.06121,0.00146,0.00156,0.10045,0.01415,0.00098,0.13120,0.05476,0.00059,0.18167,0.02323,0.03954,0.04110,0.00059,0.00547,0.00049,0.00361,0.00049},
		{0.01071,0.01071,0.01071,0.01071,0.01285,0.01071,0.01071,0.01285,0.01071,0.01071,0.01071,0.01071,0.01071,0.01071,0.01071,0.01285,0.01071,0.01071,0.01071,0.01071,0.94861,0.01071,0.01071,0.01071,0.01071,0.01071},
		{0.10267,0.01015,0.02458,0.03757,0.21735,0.01142,0.01809,0.01295,0.09406,0.00268,0.01517,0.01803,0.03055,0.02846,0.10203,0.01446,0.00034,0.02009,0.08455,0.08012,0.02074,0.01086,0.01378,0.00015,0.03185,0.00049},
		{0.10195,0.02292,0.03705,0.01425,0.10867,0.02126,0.00828,0.05316,0.09497,0.00358,0.00864,0.01735,0.02410,0.01292,0.07702,0.03774,0.00160,0.01126,0.07181,0.19948,0.03470,0.00265,0.03048,0.00015,0.00693,0.00021},
		{0.07104,0.01045,0.01241,0.00547,0.11119,0.00964,0.00362,0.30486,0.11296,0.00157,0.00192,0.01560,0.01139,0.00554,0.11451,0.00848,0.00041,0.04002,0.04227,0.05523,0.01959,0.00135,0.02155,0.00011,0.02024,0.00085},
		{0.03310,0.03913,0.04880,0.02923,0.04857,0.00750,0.03642,0.00147,0.02753,0.00186,0.00247,0.09365,0.03047,0.15614,0.00340,0.04176,0.00046,0.13959,0.12845,0.12977,0.00046,0.00240,0.00124,0.00093,0.00263,0.00062},
		{0.08327,0.00180,0.00120,0.00260,0.61242,0.00160,0.00120,0.00180,0.21845,0.00180,0.00120,0.00160,0.00399,0.00120,0.05531,0.00240,0.00100,0.00240,0.00439,0.00339,0.00339,0.00180,0.00359,0.00100,0.00699,0.00100},
		{0.20303,0.00723,0.00587,0.00644,0.15741,0.00565,0.00282,0.15842,0.20246,0.00282,0.00226,0.00734,0.00949,0.03771,0.12048,0.00621,0.00079,0.01400,0.02179,0.01705,0.00215,0.00113,0.00689,0.00056,0.01107,0.00068},
		{0.14056,0.02711,0.09036,0.01004,0.09839,0.01406,0.00703,0.02510,0.08735,0.00502,0.00904,0.01707,0.01606,0.00703,0.03414,0.26807,0.00502,0.01205,0.01506,0.16064,0.00803,0.00602,0.02008,0.00602,0.01004,0.00502},
		{0.10465,0.04215,0.04298,0.02598,0.10141,0.03149,0.01425,0.04227,0.06334,0.00778,0.00431,0.02634,0.04346,0.02191,0.09794,0.03161,0.00180,0.02419,0.10022,0.11566,0.00682,0.00467,0.05280,0.00060,0.00311,0.00072},
		{0.21689,0.01142,0.02055,0.01826,0.42694,0.01370,0.01370,0.02283,0.09361,0.01370,0.01370,0.03425,0.01826,0.01142,0.05708,0.01370,0.01142,0.01826,0.02283,0.02511,0.03425,0.01598,0.01826,0.01142,0.02740,0.05251}};
	
	double **B = (double **)calloc(N, sizeof(double *));
	for(i=0;i<N;i++)
		B[i] = (double *)calloc(M, sizeof(double));
	
	/* These are the scaling factor, alpha and beta pass matrices*/
	double *c = (double *)calloc(T, sizeof(double));

	double **a = (double **)calloc(T, sizeof(double *));
	for(i=0;i<T;i++)
		a[i] = (double *)calloc(N, sizeof(double));

	double **b = (double **)calloc(T, sizeof(double *));
	for(i=0;i<T;i++)
		b[i] = (double *)calloc(N, sizeof(double));

	/* These are the gamma and di-gamma matrices*/

	double **gamma = (double **)calloc(T, sizeof(double *));
	for(i=0;i<T;i++)
		gamma[i] = (double *)calloc(N, sizeof(double));

	double ***gamma2  = (double ***)calloc(T, sizeof(double **));
	for(i=0;i<T;i++)
		gamma2[i] = (double **)calloc(N, sizeof(double *));
	for(i=0;i<T;i++)
	{
		for(j=0;j<N;j++)
		{
			gamma2[i][j] = (double *)calloc(M, sizeof(double));
		}
	}
	
	printf("\nValue of LogProb : %f", oldLogProb);

	readCipher(O, T);
	initialize_PI(PI, N);	
	initialize_B(B, N, M);	
	print_A_B_PI(A, B, PI, N, M);	

	computeAlphaPass(O, PI, A, B, c, a, N, T );
	
	betaPass(c, A, B, O, a, b, N, T );
	
	computegammapass(c, A, B, O, a, b, gamma, gamma2, N, T );

	reestimate_B_PI(PI, gamma, gamma2, B, O, N, M, T);

	printf("\n\nPlease find below the values of PI, A and B matrices after the initial pass.\n---------------------------------------------------------------------------------\n");
	print_A_B_PI(A, B, PI, N, M);	

	double newLogProb = 0.0;	
	newLogProb = computeLogP(c, T);
	
	printf("\nValue of Log Prob after initial pass is : %f\n",newLogProb);

	iters += 1;

//	while (iters < maxIters && newLogProb > oldLogProb) //This statement is not used as we reach a local maxima after the 3rd iteration
	while (iters < maxIters)
	{
		oldLogProb = newLogProb;

		computeAlphaPass(O, PI, A, B, c, a, N, T );
	
		betaPass(c, A, B, O, a, b, N, T );
	
		computegammapass(c, A, B, O, a, b, gamma, gamma2, N, T );

		reestimate_B_PI(PI, gamma, gamma2, B, O, N, M, T);

		newLogProb = computeLogP(c, T);

		iters++;

//		printf("\nNo of iterations elapsed : %d", iters);
	}
	
	printf("\nNo of iterations elapsed : %d.\nCurrent LogProb : %f\t Previous LogProb : %f\n",iters-1,newLogProb,oldLogProb);

	printf("\n\nPlease find below the values of PI, A and B matrices after the final iteration.\n---------------------------------------------------------------------------------\n");
	print_A_B_PI(A, B, PI, N, M);	

	printKey(B, N, M);

	printf("\n\nThe program has run to successful completion.\n\n");

	return 0;
}
예제 #2
0
int main(int argc, const char *argv[])
{
    int N,
		    M,
		    T,
		    maxIters,
		    seed,
		    i,
		    j,
		    iter,
        str_len;

    char **alphabet;

    double logProb,
           newLogProb;

  	double *pi,
           *piBar, 
           **A,
           **Abar,
           **B,
           **Bbar;

    struct stepStruct *step;

    FILE *in,
         *out;
    
    char s[80];

	int wantTraining = 1;
    
    if(argc != 10)
    {
        fprintf(stderr, "\nUsage: %s N M T maxIters filename alphabet modelfile seed\n\n", argv[0]);
        fprintf(stderr, "where N == number of states of the HMM\n");
        fprintf(stderr, "      M == number of observation symbols\n");
        fprintf(stderr, "      T == number of observations in the training set\n");
        fprintf(stderr, "      maxIters == max iterations of re-estimation algorithm\n");
        fprintf(stderr, "      filename == name of input file\n");
        fprintf(stderr, "      alphabet == name of file defining the alphabet\n");
        fprintf(stderr, "      modelfile == name of model output file\n");
        fprintf(stderr, "      seed == seed value for pseudo-random number generator (PRNG)\n\n");
		fprintf(stderr, "      wantTraining == to train enter 1, otherwise 0 \n\n");
        fprintf(stderr, "For example:\n\n      %s 2 10 10000 500 datafile alphabet modelfile 1241\n\n", argv[0]);
        fprintf(stderr, "will create a HMM with 2 states and 10 observation symbols,\n");
        fprintf(stderr, "will read in the first 10000 observations from `datafile',\n");
        fprintf(stderr, "will use the observation symbols defined in file `alphabet', and\n");
        fprintf(stderr, "will write the model (pi, A, B) to `modelfile', and\n");
        fprintf(stderr, "will seed the PRNG with 1241 and train the HMM with a maximum of 500 iterations.\n\n");
        exit(0);
    }

    N = atoi(argv[1]);
    M = atoi(argv[2]);
    T = atoi(argv[3]);
    maxIters = atoi(argv[4]);
    seed = atoi(argv[8]);
	wantTraining = atoi(argv[9]);

    pi = (double *)malloc(N * sizeof(double));
    piBar = (double *)malloc(N * sizeof(double));

    A = (double **)malloc(N * sizeof(double*));
    Abar =static_cast<double **>(malloc(N * sizeof(double*)));
    for (i=0; i<N; ++i)
    {
      A[i] = static_cast<double *>(malloc(N * sizeof(double)));
      Abar[i] = static_cast<double *>(malloc(N * sizeof(double)));
    }

    B = static_cast<double **>(malloc(N * sizeof(double*)));
    Bbar = static_cast<double **>(malloc(N * sizeof(double*)));
    for (i=0; i<N; ++i)
    {
      B[i] = static_cast<double *>(malloc(M * sizeof(double)));
      Bbar[i] = static_cast<double *>(malloc(M * sizeof(double)));
    }
    
   
    ////////////////////////
    // read the data file //
    ////////////////////////

    // allocate memory
    printf("allocating %d bytes of memory... ", (T + 1) * sizeof(struct stepStruct));
    fflush(stdout);
    if((step = static_cast<stepStruct *>(calloc(T + 1, sizeof(struct stepStruct)))) == NULL)
    {
        fprintf(stderr, "\nUnable to allocate alpha\n\n");
        exit(0);
    }
    for (i=0; i<T+1; ++i)
    {
      step[i].alpha = static_cast<double *>(malloc(N * sizeof(double)));
      step[i].beta = static_cast<double *>(malloc(N * sizeof(double)));
      step[i].gamma = static_cast<double *>(malloc(N * sizeof(double)));
      step[i].diGamma = static_cast<double **>(malloc(N * sizeof(double*)));
      for (j=0; j<N; ++j)
      {
        step[i].diGamma[j] = static_cast<double *>(malloc(N * sizeof(double)));
      }
    }
    printf("done\n");

    // read in the observations from file
    printf("GetObservations... ");
    fflush(stdout);
    in = fopen(argv[5], "r"); // argv[5] = filename
    if(in == NULL)
    {
        fprintf(stderr, "\nError opening file %s\n\n", argv[5]);
        exit(0);
    }
    i = 0;
    fgets(s,80,in); // get rid of the first line
    while (i < T)
    {
      fgets(s,80,in);
      step[i].obs = atoi(s);
      ++i;
    }
    fclose(in);
    printf("done\n");

    // read in the alphabet from file
    printf("GetAlphabet... ");
    fflush(stdout);
    alphabet = static_cast<char **>(malloc(M * sizeof (char*)));
    in = fopen(argv[6], "r"); // argv[6] = alphabet
    if(in == NULL)
    {
        fprintf(stderr, "\nError opening file %s\n\n", argv[6]);
        exit(0);
    }
    i = 0;
    fgets(s,80,in); // get rid of the first line
    while (i < M)
    {
      fgets(s,80,in);
	    str_len = strlen(s);
      alphabet[i] = static_cast<char *>(malloc(str_len * sizeof(char)));
      strncpy(alphabet[i], s, str_len-1);
      alphabet[i][str_len-1] = '\0';
      ++i;
    }
    fclose(in);
    printf("done\n");


    /////////////////////////
    // hidden markov model //
    /////////////////////////

    srand(seed);

    // initialize pi[], A[][] and B[][]
    initMatrices(pi, A, B, N, M, seed);

    // print pi[], A[][] and B[][] transpose
    printf("\nN = %d, M = %d, T = %d\n", N, M, T);
    printf("initial pi =\n");
    printPi(pi, N);
    printf("initial A =\n");
    printA(A, N);
    printf("initial B^T =\n");
    printBT(B, N, M, alphabet);

    // initialization
    iter = 0;
    logProb = -1.0;
    newLogProb = 0.0;

	if (wantTraining) {

		// main loop
		while((iter < maxIters) && (newLogProb > logProb))
		{
			printf("\nbegin iteration = %d\n", iter);

			logProb = newLogProb;

			// alpha (or forward) pass
			printf("alpha pass... ");
			fflush(stdout);
			alphaPass(step, pi, A, B, N, T);
			printf("done\n");

			// beta (or backwards) pass
			printf("beta pass... ");
			fflush(stdout);
			betaPass(step, pi, A, B, N, T);
			printf("done\n");

			// compute gamma's and diGamma's
			printf("compute gamma's and diGamma's... ");
			fflush(stdout);
			computeGammas(step, pi, A, B, N, T);
			printf("done\n");

			// find piBar, reestimate of pi
			printf("reestimate pi... ");
			fflush(stdout);
			reestimatePi(step, piBar, N);
			printf("done\n");

			// find Abar, reestimate of A
			printf("reestimate A... ");
			fflush(stdout);
			reestimateA(step, Abar, N, T);
			printf("done\n");

			// find Bbar, reestimate of B
			printf("reestimate B... ");
			fflush(stdout);
			reestimateB(step, Bbar, N, M, T);
			printf("done\n");

	#ifdef PRINT_REESTIMATES
			printf("piBar =\n");
			printPi(piBar, N);
			printf("Abar =\n");
			printA(Abar, N);
			printf("Bbar^T = \n");
			printBT(Bbar, N, M, alphabet);
	#endif // PRINT_REESTIMATES

			// assign pi, A and B corresponding "bar" values
			for(i = 0; i < N; ++i)
			{
				pi[i] = piBar[i];

				for(j = 0; j < N; ++j)
				{
					A[i][j] = Abar[i][j];
				}

				for(j = 0; j < M; ++j)
				{
					B[i][j] = Bbar[i][j];
				}

			}// next i

			// compute log [P(observations | lambda)], where lambda = (A,B,pi)
			newLogProb = 0.0;
			for(i = 0; i < T; ++i)
			{
				newLogProb += log(step[i].c);
			}
			newLogProb = -newLogProb;

			// a little trick so that no initial logProb is required
			if(iter == 0)
			{
				logProb = newLogProb - 1.0;
			}

			printf("completed iteration = %d, log [P(observation | lambda)] = %f\n",
					iter, newLogProb);

			++iter;

		}// end while
    
		out = fopen(argv[7], "w"); // argv[7] = modelfile
		writeModel(pi, A, B, N, M, T, alphabet, out);
		fclose(out);
    
		printf("\nT = %d, N = %d, M = %d, iterations = %d\n\n", T, N, M, iter);
		printf("final pi =\n");
		printPi(pi, N);
		printf("\nfinal A =\n");
		printA(A, N);
		printf("\nfinal B^T =\n");
		printBT(B, N, M, alphabet);
		printf("\nlog [P(observations | lambda)] = %f\n\n", newLogProb);

	} // end of training
	else { //want to do testing


		out = fopen(argv[7], "r"); // argv[7] = modelfile
		readModelFile(pi, A, B, N, M, T, alphabet, out);
		
		// alpha (or forward) pass
		printf("alpha pass... ");
		fflush(stdout);
		alphaPass(step, pi, A, B, N, T);
		printf("done\n");
		printf("logProb %f\n", computeLogProb(step, T)/T);
		
	//	FILE * newFile = fopen("testing.txt", "a");
		//writeModel(pi, A, B, N, M, T, alphabet, newFile);

		//fclose(newFile);

		fclose(out);
	} // end of testing
}// end hmm