int main() { int i,j,T,N,M; int maxIters = 800; int iters = 0; double oldLogProb = -(1.0/0.0); printf("\nPlease enter a value for N : "); scanf("%d", &N); printf("\nPlease enter a value for M : "); scanf("%d", &M); printf("\nPlease enter a value for T : "); scanf("%d", &T); char *O = (char *)calloc(T, sizeof(char)); double *PI = (double *)calloc(N, sizeof(double)); double A[26][26] = { {0.00320,0.02153,0.04721,0.04476,0.00113,0.01417,0.02455,0.00459,0.04017,0.00195,0.01100,0.09790,0.03810,0.18396,0.00281,0.02016,0.00058,0.11674,0.10182,0.14237,0.01213,0.01929,0.01127,0.00346,0.03671,0.00096}, {0.11677,0.00882,0.00294,0.00257,0.28906,0.00196,0.00135,0.00257,0.06274,0.00539,0.00086,0.11089,0.00368,0.00110,0.11898,0.00196,0.00061,0.06163,0.01507,0.00821,0.10758,0.00270,0.00306,0.00061,0.08087,0.00074}, {0.12775,0.00182,0.01820,0.00276,0.14564,0.00195,0.00151,0.15442,0.06808,0.00050,0.04700,0.03633,0.00144,0.00113,0.21447,0.00295,0.00125,0.03915,0.00734,0.09192,0.02893,0.00088,0.00169,0.00031,0.00866,0.00044}, {0.11143,0.03926,0.02247,0.02257,0.17153,0.02531,0.01887,0.02800,0.11168,0.00538,0.00203,0.02105,0.03089,0.01643,0.07080,0.01856,0.00218,0.03256,0.06167,0.10326,0.03479,0.00771,0.02906,0.00025,0.01709,0.00046}, {0.07743,0.01878,0.05059,0.08977,0.03883,0.02537,0.01513,0.01586,0.02962,0.00308,0.00602,0.04128,0.03728,0.10133,0.02457,0.02857,0.00321,0.14522,0.10970,0.06244,0.00743,0.01720,0.02802,0.01121,0.01318,0.00055}, {0.09529,0.01090,0.02072,0.01045,0.08011,0.06725,0.00831,0.01768,0.11789,0.00366,0.00223,0.02635,0.01527,0.00911,0.19711,0.01393,0.00116,0.08109,0.02206,0.15442,0.03403,0.00259,0.01206,0.00045,0.00447,0.00071}, {0.11690,0.01263,0.01426,0.00916,0.17240,0.01477,0.01527,0.11487,0.08513,0.00305,0.00204,0.02363,0.01466,0.03248,0.09888,0.01171,0.00112,0.09430,0.03371,0.08004,0.03574,0.00234,0.01609,0.00051,0.00438,0.00051}, {0.15953,0.00460,0.00702,0.00310,0.47986,0.00337,0.00246,0.00667,0.11739,0.00095,0.00067,0.00417,0.00564,0.00722,0.09826,0.00460,0.00040,0.01921,0.01163,0.04139,0.01171,0.00127,0.00833,0.00020,0.00425,0.00024}, {0.03055,0.00961,0.07411,0.04589,0.04204,0.01470,0.02892,0.00143,0.00087,0.00034,0.00511,0.06298,0.02881,0.25476,0.07959,0.01029,0.00082,0.03654,0.11632,0.11750,0.00126,0.02976,0.00138,0.00242,0.00020,0.00672}, {0.15358,0.00524,0.00785,0.00611,0.13962,0.00436,0.00524,0.00960,0.04538,0.00698,0.00436,0.00611,0.00785,0.00524,0.30716,0.00611,0.00436,0.04538,0.00873,0.00611,0.28010,0.00436,0.00785,0.00436,0.00436,0.00436}, {0.07177,0.01196,0.01892,0.00918,0.33324,0.01530,0.00640,0.03310,0.14604,0.00668,0.00417,0.02865,0.01363,0.04896,0.05285,0.01113,0.00195,0.01363,0.09513,0.05202,0.00807,0.00306,0.02587,0.00139,0.01446,0.00139}, {0.12834,0.01892,0.01501,0.06400,0.16603,0.01644,0.00701,0.00801,0.12104,0.00133,0.00581,0.14211,0.01601,0.00486,0.07449,0.01549,0.00076,0.01020,0.04380,0.03479,0.02540,0.00705,0.01034,0.00024,0.06710,0.00038}, {0.18651,0.03753,0.00859,0.00347,0.25011,0.00565,0.00188,0.00678,0.11718,0.00143,0.00121,0.00445,0.04491,0.00317,0.11831,0.06232,0.00045,0.04039,0.02999,0.02720,0.03617,0.00083,0.00784,0.00038,0.01063,0.00045}, {0.07549,0.01200,0.05159,0.14048,0.08897,0.01677,0.10857,0.01370,0.06055,0.00385,0.00907,0.01148,0.01511,0.02065,0.06210,0.01120,0.00078,0.00757,0.07067,0.17194,0.01225,0.00732,0.01633,0.00036,0.01350,0.00058}, {0.02177,0.01789,0.02276,0.02172,0.00739,0.10623,0.01114,0.00997,0.01218,0.00167,0.00917,0.04448,0.06516,0.18545,0.03081,0.03009,0.00048,0.14231,0.04094,0.05886,0.09193,0.02281,0.03972,0.00133,0.00603,0.00048}, {0.13793,0.00381,0.00273,0.00254,0.16429,0.00449,0.00195,0.02987,0.06121,0.00146,0.00156,0.10045,0.01415,0.00098,0.13120,0.05476,0.00059,0.18167,0.02323,0.03954,0.04110,0.00059,0.00547,0.00049,0.00361,0.00049}, {0.01071,0.01071,0.01071,0.01071,0.01285,0.01071,0.01071,0.01285,0.01071,0.01071,0.01071,0.01071,0.01071,0.01071,0.01071,0.01285,0.01071,0.01071,0.01071,0.01071,0.94861,0.01071,0.01071,0.01071,0.01071,0.01071}, {0.10267,0.01015,0.02458,0.03757,0.21735,0.01142,0.01809,0.01295,0.09406,0.00268,0.01517,0.01803,0.03055,0.02846,0.10203,0.01446,0.00034,0.02009,0.08455,0.08012,0.02074,0.01086,0.01378,0.00015,0.03185,0.00049}, {0.10195,0.02292,0.03705,0.01425,0.10867,0.02126,0.00828,0.05316,0.09497,0.00358,0.00864,0.01735,0.02410,0.01292,0.07702,0.03774,0.00160,0.01126,0.07181,0.19948,0.03470,0.00265,0.03048,0.00015,0.00693,0.00021}, {0.07104,0.01045,0.01241,0.00547,0.11119,0.00964,0.00362,0.30486,0.11296,0.00157,0.00192,0.01560,0.01139,0.00554,0.11451,0.00848,0.00041,0.04002,0.04227,0.05523,0.01959,0.00135,0.02155,0.00011,0.02024,0.00085}, {0.03310,0.03913,0.04880,0.02923,0.04857,0.00750,0.03642,0.00147,0.02753,0.00186,0.00247,0.09365,0.03047,0.15614,0.00340,0.04176,0.00046,0.13959,0.12845,0.12977,0.00046,0.00240,0.00124,0.00093,0.00263,0.00062}, {0.08327,0.00180,0.00120,0.00260,0.61242,0.00160,0.00120,0.00180,0.21845,0.00180,0.00120,0.00160,0.00399,0.00120,0.05531,0.00240,0.00100,0.00240,0.00439,0.00339,0.00339,0.00180,0.00359,0.00100,0.00699,0.00100}, {0.20303,0.00723,0.00587,0.00644,0.15741,0.00565,0.00282,0.15842,0.20246,0.00282,0.00226,0.00734,0.00949,0.03771,0.12048,0.00621,0.00079,0.01400,0.02179,0.01705,0.00215,0.00113,0.00689,0.00056,0.01107,0.00068}, {0.14056,0.02711,0.09036,0.01004,0.09839,0.01406,0.00703,0.02510,0.08735,0.00502,0.00904,0.01707,0.01606,0.00703,0.03414,0.26807,0.00502,0.01205,0.01506,0.16064,0.00803,0.00602,0.02008,0.00602,0.01004,0.00502}, {0.10465,0.04215,0.04298,0.02598,0.10141,0.03149,0.01425,0.04227,0.06334,0.00778,0.00431,0.02634,0.04346,0.02191,0.09794,0.03161,0.00180,0.02419,0.10022,0.11566,0.00682,0.00467,0.05280,0.00060,0.00311,0.00072}, {0.21689,0.01142,0.02055,0.01826,0.42694,0.01370,0.01370,0.02283,0.09361,0.01370,0.01370,0.03425,0.01826,0.01142,0.05708,0.01370,0.01142,0.01826,0.02283,0.02511,0.03425,0.01598,0.01826,0.01142,0.02740,0.05251}}; double **B = (double **)calloc(N, sizeof(double *)); for(i=0;i<N;i++) B[i] = (double *)calloc(M, sizeof(double)); /* These are the scaling factor, alpha and beta pass matrices*/ double *c = (double *)calloc(T, sizeof(double)); double **a = (double **)calloc(T, sizeof(double *)); for(i=0;i<T;i++) a[i] = (double *)calloc(N, sizeof(double)); double **b = (double **)calloc(T, sizeof(double *)); for(i=0;i<T;i++) b[i] = (double *)calloc(N, sizeof(double)); /* These are the gamma and di-gamma matrices*/ double **gamma = (double **)calloc(T, sizeof(double *)); for(i=0;i<T;i++) gamma[i] = (double *)calloc(N, sizeof(double)); double ***gamma2 = (double ***)calloc(T, sizeof(double **)); for(i=0;i<T;i++) gamma2[i] = (double **)calloc(N, sizeof(double *)); for(i=0;i<T;i++) { for(j=0;j<N;j++) { gamma2[i][j] = (double *)calloc(M, sizeof(double)); } } printf("\nValue of LogProb : %f", oldLogProb); readCipher(O, T); initialize_PI(PI, N); initialize_B(B, N, M); print_A_B_PI(A, B, PI, N, M); computeAlphaPass(O, PI, A, B, c, a, N, T ); betaPass(c, A, B, O, a, b, N, T ); computegammapass(c, A, B, O, a, b, gamma, gamma2, N, T ); reestimate_B_PI(PI, gamma, gamma2, B, O, N, M, T); printf("\n\nPlease find below the values of PI, A and B matrices after the initial pass.\n---------------------------------------------------------------------------------\n"); print_A_B_PI(A, B, PI, N, M); double newLogProb = 0.0; newLogProb = computeLogP(c, T); printf("\nValue of Log Prob after initial pass is : %f\n",newLogProb); iters += 1; // while (iters < maxIters && newLogProb > oldLogProb) //This statement is not used as we reach a local maxima after the 3rd iteration while (iters < maxIters) { oldLogProb = newLogProb; computeAlphaPass(O, PI, A, B, c, a, N, T ); betaPass(c, A, B, O, a, b, N, T ); computegammapass(c, A, B, O, a, b, gamma, gamma2, N, T ); reestimate_B_PI(PI, gamma, gamma2, B, O, N, M, T); newLogProb = computeLogP(c, T); iters++; // printf("\nNo of iterations elapsed : %d", iters); } printf("\nNo of iterations elapsed : %d.\nCurrent LogProb : %f\t Previous LogProb : %f\n",iters-1,newLogProb,oldLogProb); printf("\n\nPlease find below the values of PI, A and B matrices after the final iteration.\n---------------------------------------------------------------------------------\n"); print_A_B_PI(A, B, PI, N, M); printKey(B, N, M); printf("\n\nThe program has run to successful completion.\n\n"); return 0; }
int main(int argc, const char *argv[]) { int N, M, T, maxIters, seed, i, j, iter, str_len; char **alphabet; double logProb, newLogProb; double *pi, *piBar, **A, **Abar, **B, **Bbar; struct stepStruct *step; FILE *in, *out; char s[80]; int wantTraining = 1; if(argc != 10) { fprintf(stderr, "\nUsage: %s N M T maxIters filename alphabet modelfile seed\n\n", argv[0]); fprintf(stderr, "where N == number of states of the HMM\n"); fprintf(stderr, " M == number of observation symbols\n"); fprintf(stderr, " T == number of observations in the training set\n"); fprintf(stderr, " maxIters == max iterations of re-estimation algorithm\n"); fprintf(stderr, " filename == name of input file\n"); fprintf(stderr, " alphabet == name of file defining the alphabet\n"); fprintf(stderr, " modelfile == name of model output file\n"); fprintf(stderr, " seed == seed value for pseudo-random number generator (PRNG)\n\n"); fprintf(stderr, " wantTraining == to train enter 1, otherwise 0 \n\n"); fprintf(stderr, "For example:\n\n %s 2 10 10000 500 datafile alphabet modelfile 1241\n\n", argv[0]); fprintf(stderr, "will create a HMM with 2 states and 10 observation symbols,\n"); fprintf(stderr, "will read in the first 10000 observations from `datafile',\n"); fprintf(stderr, "will use the observation symbols defined in file `alphabet', and\n"); fprintf(stderr, "will write the model (pi, A, B) to `modelfile', and\n"); fprintf(stderr, "will seed the PRNG with 1241 and train the HMM with a maximum of 500 iterations.\n\n"); exit(0); } N = atoi(argv[1]); M = atoi(argv[2]); T = atoi(argv[3]); maxIters = atoi(argv[4]); seed = atoi(argv[8]); wantTraining = atoi(argv[9]); pi = (double *)malloc(N * sizeof(double)); piBar = (double *)malloc(N * sizeof(double)); A = (double **)malloc(N * sizeof(double*)); Abar =static_cast<double **>(malloc(N * sizeof(double*))); for (i=0; i<N; ++i) { A[i] = static_cast<double *>(malloc(N * sizeof(double))); Abar[i] = static_cast<double *>(malloc(N * sizeof(double))); } B = static_cast<double **>(malloc(N * sizeof(double*))); Bbar = static_cast<double **>(malloc(N * sizeof(double*))); for (i=0; i<N; ++i) { B[i] = static_cast<double *>(malloc(M * sizeof(double))); Bbar[i] = static_cast<double *>(malloc(M * sizeof(double))); } //////////////////////// // read the data file // //////////////////////// // allocate memory printf("allocating %d bytes of memory... ", (T + 1) * sizeof(struct stepStruct)); fflush(stdout); if((step = static_cast<stepStruct *>(calloc(T + 1, sizeof(struct stepStruct)))) == NULL) { fprintf(stderr, "\nUnable to allocate alpha\n\n"); exit(0); } for (i=0; i<T+1; ++i) { step[i].alpha = static_cast<double *>(malloc(N * sizeof(double))); step[i].beta = static_cast<double *>(malloc(N * sizeof(double))); step[i].gamma = static_cast<double *>(malloc(N * sizeof(double))); step[i].diGamma = static_cast<double **>(malloc(N * sizeof(double*))); for (j=0; j<N; ++j) { step[i].diGamma[j] = static_cast<double *>(malloc(N * sizeof(double))); } } printf("done\n"); // read in the observations from file printf("GetObservations... "); fflush(stdout); in = fopen(argv[5], "r"); // argv[5] = filename if(in == NULL) { fprintf(stderr, "\nError opening file %s\n\n", argv[5]); exit(0); } i = 0; fgets(s,80,in); // get rid of the first line while (i < T) { fgets(s,80,in); step[i].obs = atoi(s); ++i; } fclose(in); printf("done\n"); // read in the alphabet from file printf("GetAlphabet... "); fflush(stdout); alphabet = static_cast<char **>(malloc(M * sizeof (char*))); in = fopen(argv[6], "r"); // argv[6] = alphabet if(in == NULL) { fprintf(stderr, "\nError opening file %s\n\n", argv[6]); exit(0); } i = 0; fgets(s,80,in); // get rid of the first line while (i < M) { fgets(s,80,in); str_len = strlen(s); alphabet[i] = static_cast<char *>(malloc(str_len * sizeof(char))); strncpy(alphabet[i], s, str_len-1); alphabet[i][str_len-1] = '\0'; ++i; } fclose(in); printf("done\n"); ///////////////////////// // hidden markov model // ///////////////////////// srand(seed); // initialize pi[], A[][] and B[][] initMatrices(pi, A, B, N, M, seed); // print pi[], A[][] and B[][] transpose printf("\nN = %d, M = %d, T = %d\n", N, M, T); printf("initial pi =\n"); printPi(pi, N); printf("initial A =\n"); printA(A, N); printf("initial B^T =\n"); printBT(B, N, M, alphabet); // initialization iter = 0; logProb = -1.0; newLogProb = 0.0; if (wantTraining) { // main loop while((iter < maxIters) && (newLogProb > logProb)) { printf("\nbegin iteration = %d\n", iter); logProb = newLogProb; // alpha (or forward) pass printf("alpha pass... "); fflush(stdout); alphaPass(step, pi, A, B, N, T); printf("done\n"); // beta (or backwards) pass printf("beta pass... "); fflush(stdout); betaPass(step, pi, A, B, N, T); printf("done\n"); // compute gamma's and diGamma's printf("compute gamma's and diGamma's... "); fflush(stdout); computeGammas(step, pi, A, B, N, T); printf("done\n"); // find piBar, reestimate of pi printf("reestimate pi... "); fflush(stdout); reestimatePi(step, piBar, N); printf("done\n"); // find Abar, reestimate of A printf("reestimate A... "); fflush(stdout); reestimateA(step, Abar, N, T); printf("done\n"); // find Bbar, reestimate of B printf("reestimate B... "); fflush(stdout); reestimateB(step, Bbar, N, M, T); printf("done\n"); #ifdef PRINT_REESTIMATES printf("piBar =\n"); printPi(piBar, N); printf("Abar =\n"); printA(Abar, N); printf("Bbar^T = \n"); printBT(Bbar, N, M, alphabet); #endif // PRINT_REESTIMATES // assign pi, A and B corresponding "bar" values for(i = 0; i < N; ++i) { pi[i] = piBar[i]; for(j = 0; j < N; ++j) { A[i][j] = Abar[i][j]; } for(j = 0; j < M; ++j) { B[i][j] = Bbar[i][j]; } }// next i // compute log [P(observations | lambda)], where lambda = (A,B,pi) newLogProb = 0.0; for(i = 0; i < T; ++i) { newLogProb += log(step[i].c); } newLogProb = -newLogProb; // a little trick so that no initial logProb is required if(iter == 0) { logProb = newLogProb - 1.0; } printf("completed iteration = %d, log [P(observation | lambda)] = %f\n", iter, newLogProb); ++iter; }// end while out = fopen(argv[7], "w"); // argv[7] = modelfile writeModel(pi, A, B, N, M, T, alphabet, out); fclose(out); printf("\nT = %d, N = %d, M = %d, iterations = %d\n\n", T, N, M, iter); printf("final pi =\n"); printPi(pi, N); printf("\nfinal A =\n"); printA(A, N); printf("\nfinal B^T =\n"); printBT(B, N, M, alphabet); printf("\nlog [P(observations | lambda)] = %f\n\n", newLogProb); } // end of training else { //want to do testing out = fopen(argv[7], "r"); // argv[7] = modelfile readModelFile(pi, A, B, N, M, T, alphabet, out); // alpha (or forward) pass printf("alpha pass... "); fflush(stdout); alphaPass(step, pi, A, B, N, T); printf("done\n"); printf("logProb %f\n", computeLogProb(step, T)/T); // FILE * newFile = fopen("testing.txt", "a"); //writeModel(pi, A, B, N, M, T, alphabet, newFile); //fclose(newFile); fclose(out); } // end of testing }// end hmm