/* ========================================================================= */ static int writeHMM(xmlTextWriterPtr writer, ghmm_xmlfile* f, int number) { #define CUR_PROC "writeHMM" int rc=0, i, N; int w_cos; double w_prior; char *w_name; char * w_type; /* start HMM */ if (0 > xmlTextWriterStartElement(writer, BAD_CAST "HMM")) { GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (HMM)"); goto STOP;; } /* write HMM attributes applicable */ switch (f->modelType & PTR_TYPE_MASK) { case GHMM_kDiscreteHMM: w_name = f->model.d[number]->name; w_type = strModeltype(f->model.d[number]->model_type); w_prior = f->model.d[number]->prior; N = f->model.d[number]->N; w_cos = 1; break; case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses): w_name = f->model.ds[number]->name; w_type = strModeltype(f->model.ds[number]->model_type); w_prior = f->model.ds[number]->prior; N = f->model.ds[number]->N; w_cos = 0; break; case (GHMM_kDiscreteHMM+GHMM_kPairHMM): case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses): /* w_name = f->model.dp[number]->name; w_type = strModeltype(f->model.dp[number]->model_type); w_prior = f->model.dp[number]->prior; N = f->model.dp[number]->N; w_cos = 0; */ break; case GHMM_kContinuousHMM: case (GHMM_kContinuousHMM+GHMM_kMultivariate): case (GHMM_kContinuousHMM+GHMM_kTransitionClasses): case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses): w_name = f->model.c[number]->name; if (f->model.c[number]->model_type) w_type = strModeltype(f->model.c[number]->model_type); else w_type = strModeltype(f->modelType); w_prior = f->model.c[number]->prior; N = f->model.c[number]->N; w_cos = f->model.c[number]->cos; break; default: GHMM_LOG(LERROR, "invalid modelType"); goto STOP;} if (w_name) { if (xmlTextWriterWriteAttribute(writer, BAD_CAST "name", w_name)) GHMM_LOG(LERROR, "writing HMM name failed"); } if (xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST w_type)) GHMM_LOG(LERROR, "writing HMM type failed"); if (w_prior >= 0.0) { WRITE_DOUBLE_ATTRIBUTE(writer, "prior", w_prior); } if (w_cos > 1) if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "transitionClasses", "%d", w_cos)) GHMM_LOG(LERROR, "failed to write no of transitionClasses"); /* write alphabet if applicable */ switch (f->modelType & (GHMM_kDiscreteHMM + GHMM_kTransitionClasses + GHMM_kPairHMM)) { case GHMM_kDiscreteHMM: rc = writeAlphabet(writer, f->model.d[number]->alphabet, kAlphabet); break; case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses): /*rc = writeAlphabet(writer, f->model.ds[number]->alphabet, kAlphabet);*/ break; case (GHMM_kDiscreteHMM+GHMM_kPairHMM): case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses): /*rc = writeAlphabet(writer, f->model.dp[number]->alphabets[0], kAlphabet); if (rc) { GHMM_LOG(LERROR, "writing first alphabet of discrete pair HMM failed"); goto STOP; } rc = writeAlphabet(writer, f->model.dp[number]->alphabets[1], kAlphabet);*/ break; } if (rc) { GHMM_LOG_PRINTF(LERROR, LOC, "writing alphabet for HMM %d (type %s) failed", number, strModeltype(f->modelType)); goto STOP; } /* write label alphabet if applicable */ if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM && f->modelType & GHMM_kLabeledStates) { if (writeAlphabet(writer, f->model.d[number]->label_alphabet, kLabelAlphabet)) GHMM_LOG(LERROR, "writing of label alphabet failed"); } /* write background distributions if applicable */ if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM && f->modelType & GHMM_kBackgroundDistributions) { if (writeBackground(writer, f->model.d[number]->bp)) GHMM_LOG(LERROR, "writing of background distributions failed"); } /* write all states */ for (i=0; i<N; i++) if (writeState(writer, f, number, i)) { GHMM_LOG_PRINTF(LERROR, LOC, "writing of state %d in HMM %d failed", i, number); goto STOP; } /* write all outgoing transitions */ for (i=0; i<N; i++) if (writeTransition(writer, f, number, i)) { GHMM_LOG_PRINTF(LERROR, LOC, "writing transitions of state %d in HMM %d failed", i, number); goto STOP; } /*end HMM*/ if (0 > xmlTextWriterEndElement(writer)) { GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (HMM)"); goto STOP; } return 0; STOP: return -1; #undef CUR_PROC }
/** * Compresses the input text and writes the compressed data to a file. * @param[in] filename name and path of the file to compress. * @param[in] compressed name and path of the compressed output file. * @param[in] algorithm the algorithm that will be used to build the suffix tree (Ukkonnen or Kurtz). * @param[in] see if see will be used. */ static void zip(char *filename, char *compressed, BOOL algorithm, int parts, BOOL see) { Uchar *origText, *prevText = NULL; Uint origTextLen, partTextLen, currentTextLen; FILE *compressed_file; int i, part; fsmTree_t stree = NULL, prevTree = NULL; BOOL alloc = False; #ifdef WIN32 HANDLE hndl; origText = (Uchar *) file2String(filename, &origTextLen, &hndl); #else origText = (Uchar *) file2String(filename, &origTextLen); #endif if(origText == NULL) { fprintf(stderr,"Cannot open file %s\n", filename); exit(EXIT_FAILURE); } /*if(textLen > MAXTEXTLEN) { fprintf(stderr,"Sorry, textlen = %lu is larger than maximal textlen = %lu\n", (Showuint) textLen,(Showuint) MAXTEXTLEN); exit(EXIT_FAILURE); }*/ if (!compressed) { CALLOC(compressed, Uchar, strlen(filename) + 5); strcpy(compressed, filename); strcat(compressed, ".ctx"); alloc = True; } compressed_file = fopen(compressed, "wb"); if (!compressed_file) { printf( "Could not open output file"); exit(1); } if (alloc) FREE(compressed); buildAlpha(origText, origTextLen); printf ("Alphasize: %ld\n", alphasize); printf("Algorithm %d\n", algorithm); setMaxCount(); /* write magic number */ putc(MAGIC >> 8, compressed_file); putc(MAGIC, compressed_file); /* write # of parts */ putc(parts, compressed_file); initialize_output_bitstream(); initialize_arithmetic_encoder(); writeAlphabet(compressed_file); currentTextLen = 0; for (part = 1; part <= parts; part++) { printf("---------- part %d ---------------\n", part); if (part != parts) { partTextLen = floor(origTextLen / parts); } else { partTextLen = origTextLen - (floor(origTextLen / parts) * (parts - 1)); } if (part > 1) { prevText = text; prevTree = stree; } textlen = partTextLen; CALLOC(text, Uchar, textlen); reversestring(origText + currentTextLen, textlen, text); if (algorithm == UKKONEN) { suffixTree_t tree = initSuffixTree(); buildSuffixTree(tree); printf("Tree built\n"); pruneSuffixTree(tree); stree = fsmSuffixTree(tree); } else { stree = buildSTree(); printf("Tree built\n"); } /*if (part > 1) { copyStatistics(prevTree, stree, prevText); FREE(prevText); freeFsmTree(prevTree); }*/ DEBUGCODE(printf("gamma hits: %d gamma Misses: %d\n", getHits(), getMisses())); printf("height: %ld\n", getHeight(stree)); /* write textlen */ for (i=3; i>=0; i--) { writeByte(textlen >> (8 * i), compressed_file); } printf ("Textlen: %ld\n", textlen); writeFsmTree(stree, compressed_file); printf("FSM...\n"); makeFsm(stree); DEBUGCODE(printFsmTree(stree)); printf("Encoding...\n"); encode(stree, compressed_file, origText + currentTextLen, partTextLen, see); currentTextLen += partTextLen; } FREE(text); freeFsmTree(stree); flush_arithmetic_encoder(compressed_file); flush_output_bitstream(compressed_file); #ifdef WIN32 freetextspace(origText, hndl); #else freetextspace(origText, origTextLen); #endif fclose(compressed_file); }