Exemple #1
0
/* ========================================================================= */
static int writeHMM(xmlTextWriterPtr writer, ghmm_xmlfile* f, int number) {
#define CUR_PROC "writeHMM"
  int rc=0, i, N;
  int w_cos;
  double w_prior;
  char *w_name;
  char * w_type;

  /* start HMM */
  if (0 > xmlTextWriterStartElement(writer, BAD_CAST "HMM")) {
    GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (HMM)");
    goto STOP;;
  }

  /* write HMM attributes applicable */
  switch (f->modelType & PTR_TYPE_MASK) {
  case GHMM_kDiscreteHMM:
    w_name  = f->model.d[number]->name;
    w_type  = strModeltype(f->model.d[number]->model_type);
    w_prior = f->model.d[number]->prior;
    N       = f->model.d[number]->N;
    w_cos   = 1;
    break;
  case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
    w_name  = f->model.ds[number]->name;
    w_type  = strModeltype(f->model.ds[number]->model_type);
    w_prior = f->model.ds[number]->prior;
    N       = f->model.ds[number]->N;
    w_cos   = 0;
    break;
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
    /*
    w_name  = f->model.dp[number]->name;
    w_type  = strModeltype(f->model.dp[number]->model_type);
    w_prior = f->model.dp[number]->prior;
    N       = f->model.dp[number]->N;
    w_cos   = 0;
    */
    break;
  case GHMM_kContinuousHMM:
  case (GHMM_kContinuousHMM+GHMM_kMultivariate):
  case (GHMM_kContinuousHMM+GHMM_kTransitionClasses):
  case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses):
    w_name  = f->model.c[number]->name;
    if (f->model.c[number]->model_type)
      w_type  = strModeltype(f->model.c[number]->model_type);
    else
      w_type  = strModeltype(f->modelType);
    w_prior = f->model.c[number]->prior;
    N       = f->model.c[number]->N;
    w_cos   = f->model.c[number]->cos;
    break;
  default:
    GHMM_LOG(LERROR, "invalid modelType");
    goto STOP;}

  if (w_name) {
    if (xmlTextWriterWriteAttribute(writer, BAD_CAST "name", w_name))
      GHMM_LOG(LERROR, "writing HMM name failed");
  }
  if (xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST w_type))
    GHMM_LOG(LERROR, "writing HMM type failed");

  if (w_prior >= 0.0) {
    WRITE_DOUBLE_ATTRIBUTE(writer, "prior", w_prior);
  }

  if (w_cos > 1)
    if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "transitionClasses",
                                              "%d", w_cos))
      GHMM_LOG(LERROR, "failed to write no of transitionClasses");
   

  /* write alphabet if applicable */
  switch (f->modelType & (GHMM_kDiscreteHMM + GHMM_kTransitionClasses
                          + GHMM_kPairHMM)) {
  case GHMM_kDiscreteHMM:
    rc = writeAlphabet(writer, f->model.d[number]->alphabet, kAlphabet);
    break;
  case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
    /*rc = writeAlphabet(writer, f->model.ds[number]->alphabet, kAlphabet);*/
    break;
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
    /*rc = writeAlphabet(writer, f->model.dp[number]->alphabets[0], kAlphabet);
    if (rc) {
      GHMM_LOG(LERROR, "writing first alphabet of discrete pair HMM failed");
      goto STOP;
    }
    rc = writeAlphabet(writer, f->model.dp[number]->alphabets[1], kAlphabet);*/
    break;
  }

  if (rc) {
      GHMM_LOG_PRINTF(LERROR, LOC, "writing alphabet for HMM %d (type %s) failed",
                      number, strModeltype(f->modelType));
      goto STOP;
  }

  /* write label alphabet if applicable */
  if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM
      && f->modelType & GHMM_kLabeledStates) {
    if (writeAlphabet(writer, f->model.d[number]->label_alphabet, kLabelAlphabet))
      GHMM_LOG(LERROR, "writing of label alphabet failed");
  }

  /* write background distributions if applicable */
  if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM
      && f->modelType & GHMM_kBackgroundDistributions) {
    if (writeBackground(writer, f->model.d[number]->bp))
      GHMM_LOG(LERROR, "writing of background distributions failed");
  }

  /* write all states */
  for (i=0; i<N; i++)
    if (writeState(writer, f, number, i)) {
      GHMM_LOG_PRINTF(LERROR, LOC, "writing of state %d in HMM %d failed", i, number);
      goto STOP;
    }

  /* write all outgoing transitions */
  for (i=0; i<N; i++)
    if (writeTransition(writer, f, number, i)) {
      GHMM_LOG_PRINTF(LERROR, LOC, "writing transitions of state %d in HMM %d failed",
                     i, number);
      goto STOP;
    }

  /*end HMM*/
  if (0 > xmlTextWriterEndElement(writer)) {
    GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (HMM)");
    goto STOP;
  }

  return 0;
STOP:
  return -1;
#undef CUR_PROC
}
Exemple #2
0
/**
 * Compresses the input text and writes the compressed data to a file.
 * @param[in] filename name and path of the file to compress.
 * @param[in] compressed name and path of the compressed output file.
 * @param[in] algorithm the algorithm that will be used to build the suffix tree (Ukkonnen or Kurtz).
 * @param[in] see if see will be used.
 */
static void zip(char *filename, char *compressed, BOOL algorithm, int parts, BOOL see) {
  Uchar *origText, *prevText = NULL;
  Uint origTextLen, partTextLen, currentTextLen;
  FILE *compressed_file;
  int i, part;
  fsmTree_t stree = NULL, prevTree = NULL;
  BOOL alloc = False;

#ifdef WIN32
  HANDLE hndl;
  origText = (Uchar *) file2String(filename, &origTextLen, &hndl);
#else
  origText = (Uchar *) file2String(filename, &origTextLen);
#endif

  if(origText == NULL) {
    fprintf(stderr,"Cannot open file %s\n", filename);
    exit(EXIT_FAILURE);
  }
  /*if(textLen > MAXTEXTLEN)
    {
    fprintf(stderr,"Sorry, textlen = %lu is larger than maximal textlen = %lu\n",
    (Showuint) textLen,(Showuint) MAXTEXTLEN);
    exit(EXIT_FAILURE);
    }*/

  if (!compressed) {
    CALLOC(compressed, Uchar, strlen(filename) + 5);
    strcpy(compressed, filename);
    strcat(compressed, ".ctx");
    alloc = True;
  }

  compressed_file = fopen(compressed, "wb");
  if (!compressed_file) {
    printf( "Could not open output file");
    exit(1);
  }
  if (alloc) FREE(compressed);

  buildAlpha(origText, origTextLen);
  printf ("Alphasize: %ld\n", alphasize);
  printf("Algorithm %d\n", algorithm);

  setMaxCount();

  /* write magic number */
  putc(MAGIC >> 8, compressed_file);
  putc(MAGIC, compressed_file);
  /* write # of parts */
  putc(parts, compressed_file);

  initialize_output_bitstream();
  initialize_arithmetic_encoder();

  writeAlphabet(compressed_file);

  currentTextLen = 0;
  for (part = 1; part <= parts; part++) {
    printf("---------- part %d ---------------\n", part);
    if (part != parts) {
      partTextLen = floor(origTextLen / parts);
    }
    else {
      partTextLen = origTextLen - (floor(origTextLen / parts) * (parts - 1));
    }
 
    if (part > 1) {
      prevText = text;
      prevTree = stree;
    }

    textlen = partTextLen;
    CALLOC(text, Uchar, textlen);
    reversestring(origText + currentTextLen, textlen, text);
    
    if (algorithm == UKKONEN) {
      suffixTree_t tree = initSuffixTree();
      buildSuffixTree(tree);
      printf("Tree built\n");
      pruneSuffixTree(tree);
      stree = fsmSuffixTree(tree);   
    }
    else {
      stree = buildSTree();
      printf("Tree built\n");
    }

    /*if (part > 1) {
      copyStatistics(prevTree, stree, prevText);
      FREE(prevText);
      freeFsmTree(prevTree);
    }*/

    DEBUGCODE(printf("gamma hits: %d gamma Misses: %d\n", getHits(), getMisses()));
    printf("height: %ld\n", getHeight(stree));

    /* write textlen */
    for (i=3; i>=0; i--) {
      writeByte(textlen >> (8 * i), compressed_file);
    }
    printf ("Textlen: %ld\n", textlen);
    writeFsmTree(stree, compressed_file);
    printf("FSM...\n");
    makeFsm(stree);
    DEBUGCODE(printFsmTree(stree));
    printf("Encoding...\n");

    encode(stree, compressed_file, origText + currentTextLen, partTextLen, see);
    
    currentTextLen += partTextLen;
  }

  FREE(text);
  freeFsmTree(stree);

  flush_arithmetic_encoder(compressed_file);
  flush_output_bitstream(compressed_file);

#ifdef WIN32
  freetextspace(origText, hndl);
#else
  freetextspace(origText, origTextLen);
#endif

  fclose(compressed_file);
}