/** * Implementation of the ImprovedBreakpointReversalSort algorithm * listed in the text */ int main() { //SyntenyBlock pi[] = { 0, 8, 2, 7, 6, 5, 1, 4, 3, 9 }; //int length = 10; SyntenyBlock pi[] = { 0,19,18,9,21,23,12,11,13,4,10,7,3,25,20,6,1,24,17,5,14,8,15,2,16,22,26 }; int length = 27; Strip strips[length]; int breakpoints = b(pi, length, strips); while(breakpoints > 0) { // I do not check whether there is a decreasing strip here. // If there is not, the chooseRho function will return the // appropriate permutation to reverse an increasing strip ReversalPermutation rho = chooseRho(pi, length, strips, breakpoints + 1); printf("Permuting: reversing from %d to %d\n", pi[rho.minIndex], pi[rho.maxIndex]); fputs("\tBefore: ", stdout); printPi(pi, length); permute(pi, rho); fputs("\n\tAfter: ", stdout); printPi(pi, length); puts(""); breakpoints = b(pi, length, strips); } return 0; }
int main(int argc, const char *argv[]) { int N, M, T, maxIters, seed, i, j, iter, str_len; char **alphabet; double logProb, newLogProb; double *pi, *piBar, **A, **Abar, **B, **Bbar; struct stepStruct *step; FILE *in, *out; char s[80]; int wantTraining = 1; if(argc != 10) { fprintf(stderr, "\nUsage: %s N M T maxIters filename alphabet modelfile seed\n\n", argv[0]); fprintf(stderr, "where N == number of states of the HMM\n"); fprintf(stderr, " M == number of observation symbols\n"); fprintf(stderr, " T == number of observations in the training set\n"); fprintf(stderr, " maxIters == max iterations of re-estimation algorithm\n"); fprintf(stderr, " filename == name of input file\n"); fprintf(stderr, " alphabet == name of file defining the alphabet\n"); fprintf(stderr, " modelfile == name of model output file\n"); fprintf(stderr, " seed == seed value for pseudo-random number generator (PRNG)\n\n"); fprintf(stderr, " wantTraining == to train enter 1, otherwise 0 \n\n"); fprintf(stderr, "For example:\n\n %s 2 10 10000 500 datafile alphabet modelfile 1241\n\n", argv[0]); fprintf(stderr, "will create a HMM with 2 states and 10 observation symbols,\n"); fprintf(stderr, "will read in the first 10000 observations from `datafile',\n"); fprintf(stderr, "will use the observation symbols defined in file `alphabet', and\n"); fprintf(stderr, "will write the model (pi, A, B) to `modelfile', and\n"); fprintf(stderr, "will seed the PRNG with 1241 and train the HMM with a maximum of 500 iterations.\n\n"); exit(0); } N = atoi(argv[1]); M = atoi(argv[2]); T = atoi(argv[3]); maxIters = atoi(argv[4]); seed = atoi(argv[8]); wantTraining = atoi(argv[9]); pi = (double *)malloc(N * sizeof(double)); piBar = (double *)malloc(N * sizeof(double)); A = (double **)malloc(N * sizeof(double*)); Abar =static_cast<double **>(malloc(N * sizeof(double*))); for (i=0; i<N; ++i) { A[i] = static_cast<double *>(malloc(N * sizeof(double))); Abar[i] = static_cast<double *>(malloc(N * sizeof(double))); } B = static_cast<double **>(malloc(N * sizeof(double*))); Bbar = static_cast<double **>(malloc(N * sizeof(double*))); for (i=0; i<N; ++i) { B[i] = static_cast<double *>(malloc(M * sizeof(double))); Bbar[i] = static_cast<double *>(malloc(M * sizeof(double))); } //////////////////////// // read the data file // //////////////////////// // allocate memory printf("allocating %d bytes of memory... ", (T + 1) * sizeof(struct stepStruct)); fflush(stdout); if((step = static_cast<stepStruct *>(calloc(T + 1, sizeof(struct stepStruct)))) == NULL) { fprintf(stderr, "\nUnable to allocate alpha\n\n"); exit(0); } for (i=0; i<T+1; ++i) { step[i].alpha = static_cast<double *>(malloc(N * sizeof(double))); step[i].beta = static_cast<double *>(malloc(N * sizeof(double))); step[i].gamma = static_cast<double *>(malloc(N * sizeof(double))); step[i].diGamma = static_cast<double **>(malloc(N * sizeof(double*))); for (j=0; j<N; ++j) { step[i].diGamma[j] = static_cast<double *>(malloc(N * sizeof(double))); } } printf("done\n"); // read in the observations from file printf("GetObservations... "); fflush(stdout); in = fopen(argv[5], "r"); // argv[5] = filename if(in == NULL) { fprintf(stderr, "\nError opening file %s\n\n", argv[5]); exit(0); } i = 0; fgets(s,80,in); // get rid of the first line while (i < T) { fgets(s,80,in); step[i].obs = atoi(s); ++i; } fclose(in); printf("done\n"); // read in the alphabet from file printf("GetAlphabet... "); fflush(stdout); alphabet = static_cast<char **>(malloc(M * sizeof (char*))); in = fopen(argv[6], "r"); // argv[6] = alphabet if(in == NULL) { fprintf(stderr, "\nError opening file %s\n\n", argv[6]); exit(0); } i = 0; fgets(s,80,in); // get rid of the first line while (i < M) { fgets(s,80,in); str_len = strlen(s); alphabet[i] = static_cast<char *>(malloc(str_len * sizeof(char))); strncpy(alphabet[i], s, str_len-1); alphabet[i][str_len-1] = '\0'; ++i; } fclose(in); printf("done\n"); ///////////////////////// // hidden markov model // ///////////////////////// srand(seed); // initialize pi[], A[][] and B[][] initMatrices(pi, A, B, N, M, seed); // print pi[], A[][] and B[][] transpose printf("\nN = %d, M = %d, T = %d\n", N, M, T); printf("initial pi =\n"); printPi(pi, N); printf("initial A =\n"); printA(A, N); printf("initial B^T =\n"); printBT(B, N, M, alphabet); // initialization iter = 0; logProb = -1.0; newLogProb = 0.0; if (wantTraining) { // main loop while((iter < maxIters) && (newLogProb > logProb)) { printf("\nbegin iteration = %d\n", iter); logProb = newLogProb; // alpha (or forward) pass printf("alpha pass... "); fflush(stdout); alphaPass(step, pi, A, B, N, T); printf("done\n"); // beta (or backwards) pass printf("beta pass... "); fflush(stdout); betaPass(step, pi, A, B, N, T); printf("done\n"); // compute gamma's and diGamma's printf("compute gamma's and diGamma's... "); fflush(stdout); computeGammas(step, pi, A, B, N, T); printf("done\n"); // find piBar, reestimate of pi printf("reestimate pi... "); fflush(stdout); reestimatePi(step, piBar, N); printf("done\n"); // find Abar, reestimate of A printf("reestimate A... "); fflush(stdout); reestimateA(step, Abar, N, T); printf("done\n"); // find Bbar, reestimate of B printf("reestimate B... "); fflush(stdout); reestimateB(step, Bbar, N, M, T); printf("done\n"); #ifdef PRINT_REESTIMATES printf("piBar =\n"); printPi(piBar, N); printf("Abar =\n"); printA(Abar, N); printf("Bbar^T = \n"); printBT(Bbar, N, M, alphabet); #endif // PRINT_REESTIMATES // assign pi, A and B corresponding "bar" values for(i = 0; i < N; ++i) { pi[i] = piBar[i]; for(j = 0; j < N; ++j) { A[i][j] = Abar[i][j]; } for(j = 0; j < M; ++j) { B[i][j] = Bbar[i][j]; } }// next i // compute log [P(observations | lambda)], where lambda = (A,B,pi) newLogProb = 0.0; for(i = 0; i < T; ++i) { newLogProb += log(step[i].c); } newLogProb = -newLogProb; // a little trick so that no initial logProb is required if(iter == 0) { logProb = newLogProb - 1.0; } printf("completed iteration = %d, log [P(observation | lambda)] = %f\n", iter, newLogProb); ++iter; }// end while out = fopen(argv[7], "w"); // argv[7] = modelfile writeModel(pi, A, B, N, M, T, alphabet, out); fclose(out); printf("\nT = %d, N = %d, M = %d, iterations = %d\n\n", T, N, M, iter); printf("final pi =\n"); printPi(pi, N); printf("\nfinal A =\n"); printA(A, N); printf("\nfinal B^T =\n"); printBT(B, N, M, alphabet); printf("\nlog [P(observations | lambda)] = %f\n\n", newLogProb); } // end of training else { //want to do testing out = fopen(argv[7], "r"); // argv[7] = modelfile readModelFile(pi, A, B, N, M, T, alphabet, out); // alpha (or forward) pass printf("alpha pass... "); fflush(stdout); alphaPass(step, pi, A, B, N, T); printf("done\n"); printf("logProb %f\n", computeLogProb(step, T)/T); // FILE * newFile = fopen("testing.txt", "a"); //writeModel(pi, A, B, N, M, T, alphabet, newFile); //fclose(newFile); fclose(out); } // end of testing }// end hmm