HiddenVariableOrderMarkov* HiddenVariableOrderMarkov::ascii_read(StatError &error ,
                                                                 const string path , int length ,
                                                                 double cumul_threshold)

{
  RWLocaleSnapshot locale("en");
  RWCString buffer , token;
  size_t position;
  process_type type = DEFAULT_TYPE;
  bool status , lstatus;
  register int i;
  int line , nb_output_process , index;
  observation_process obs_type;
  long value;
  const VariableOrderMarkovChain *imarkov;
  CategoricalProcess **categorical_observation;
  DiscreteParametricProcess **discrete_parametric_observation;
  ContinuousParametricProcess **continuous_parametric_observation;
  HiddenVariableOrderMarkov *hmarkov;
  ifstream in_file(path.c_str());


  hmarkov = NULL;
  error.init();

  if (!in_file) {
    error.update(STAT_error[STATR_FILE_NAME]);
  }

  else {
    status = true;
    line = 0;

    if (length < 2) {
      status = false;
      error.update(SEQ_error[SEQR_SHORT_SEQUENCE_LENGTH]);
    }
    if (length > MAX_LENGTH) {
      status = false;
      error.update(SEQ_error[SEQR_LONG_SEQUENCE_LENGTH]);
    }

    while (buffer.readLine(in_file , false)) {
      line++;

#     ifdef DEBUG
      cout << line << "  " << buffer << endl;
#     endif

      position = buffer.first('#');
      if (position != RW_NPOS) {
        buffer.remove(position);
      }
      i = 0;

      RWCTokenizer next(buffer);

      while (!((token = next()).isNull())) {

        // test (EQUILIBRIUM_)HIDDEN_MARKOV_CHAIN keyword

        if (i == 0) {
          if (token == SEQ_word[SEQW_HIDDEN_MARKOV_CHAIN]) {
            type = ORDINARY;
          }
          else if (token == SEQ_word[SEQW_EQUILIBRIUM_HIDDEN_MARKOV_CHAIN]) {
            type = EQUILIBRIUM;
          }
          else {
            status = false;
            ostringstream correction_message;
            correction_message << SEQ_word[SEQW_HIDDEN_MARKOV_CHAIN] << " or "
                               << SEQ_word[SEQW_EQUILIBRIUM_HIDDEN_MARKOV_CHAIN];
            error.correction_update(STAT_parsing[STATP_KEYWORD] ,
                                    (correction_message.str()).c_str() , line);
          }
        }

        i++;
      }

      if (i > 0) {
        if (i != 1) {
          status = false;
          error.update(STAT_parsing[STATP_FORMAT] , line);
        }
        break;
      }
    }

    if (type != DEFAULT_TYPE) {

      // analysis of the format and reading of the variable-order Markov chain

      imarkov = VariableOrderMarkovChain::parsing(error , in_file , line , type);

      // analysis of the format and reading of the observation distributions

      if (imarkov) {
        nb_output_process = I_DEFAULT;

        categorical_observation = NULL;
        discrete_parametric_observation = NULL;
        continuous_parametric_observation = NULL;

        while (buffer.readLine(in_file , false)) {
          line++;

#         ifdef DEBUG
          cout << line << "  " << buffer << endl;
#         endif

          position = buffer.first('#');
          if (position != RW_NPOS) {
            buffer.remove(position);
          }
          i = 0;

          RWCTokenizer next(buffer);

          while (!((token = next()).isNull())) {
            switch (i) {

            // test number of observation processes

            case 0 : {
              lstatus = locale.stringToNum(token , &value);
              if (lstatus) {
                if ((value < 1) || (value > NB_OUTPUT_PROCESS)) {
                  lstatus = false;
                }
                else {
                  nb_output_process = value;
                }
              }

              if (!lstatus) {
                status = false;
                error.update(STAT_parsing[STATP_NB_OUTPUT_PROCESS] , line , i + 1);
              }
              break;
            }

            // test OUTPUT_PROCESS(ES) keyword

            case 1 : {
              if (token != STAT_word[nb_output_process == 1 ? STATW_OUTPUT_PROCESS : STATW_OUTPUT_PROCESSES]) {
                status = false;
                error.correction_update(STAT_parsing[STATP_KEYWORD] ,
                                        STAT_word[nb_output_process == 1 ? STATW_OUTPUT_PROCESS : STATW_OUTPUT_PROCESSES] , line , i + 1);
              }
              break;
            }
            }

            i++;
          }

          if (i > 0) {
            if (i != 2) {
              status = false;
              error.update(STAT_parsing[STATP_FORMAT] , line);
            }
            break;
          }
        }

        if (nb_output_process == I_DEFAULT) {
          status = false;
          error.update(STAT_parsing[STATP_FORMAT] , line);
        }

        else {
          categorical_observation = new CategoricalProcess*[nb_output_process];
          discrete_parametric_observation = new DiscreteParametricProcess*[nb_output_process];
          continuous_parametric_observation = new ContinuousParametricProcess*[nb_output_process];

          for (i = 0;i < nb_output_process;i++) {
            categorical_observation[i] = NULL;
            discrete_parametric_observation[i] = NULL;
            continuous_parametric_observation[i] = NULL;
          }

          index = 0;

          while (buffer.readLine(in_file , false)) {
            line++;

#           ifdef DEBUG
            cout << line << "  " << buffer << endl;
#           endif

            position = buffer.first('#');
            if (position != RW_NPOS) {
              buffer.remove(position);
            }
            i = 0;

            RWCTokenizer next(buffer);

            while (!((token = next()).isNull())) {
              switch (i) {

              // test OUTPUT_PROCESS keyword

              case 0 : {
                if (token == STAT_word[STATW_OUTPUT_PROCESS]) {
                  index++;
                }
                else {
                  status = false;
                  error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_OUTPUT_PROCESS] , line , i + 1);
                }
                break;
              }

              // test observation process index

              case 1 : {
                lstatus = locale.stringToNum(token , &value);
                if ((lstatus) && ((value != index) || (value > nb_output_process))) {
                  lstatus = false;
                }

                if (!lstatus) {
                  status = false;
                  error.update(STAT_parsing[STATP_OUTPUT_PROCESS_INDEX] , line , i + 1);
                }
                break;
              }

              // test separator

              case 2 : {
                if (token != ":") {
                  status = false;
                  error.update(STAT_parsing[STATP_SEPARATOR] , line , i + 1);
                }
                break;
              }

              // test CATEGORICAL/DISCRETE_PARAMETRIC/CONTINUOUS_PARAMETRIC keyword

              case 3 : {
                if ((token == STAT_word[STATW_CATEGORICAL]) ||
                    (token == STAT_word[STATW_NONPARAMETRIC])) {
                  obs_type = CATEGORICAL_PROCESS;
                }
                else if ((token == STAT_word[STATW_DISCRETE_PARAMETRIC]) ||
                         (token == STAT_word[STATW_PARAMETRIC])) {
                  obs_type = DISCRETE_PARAMETRIC;
                }
                else if (token == STAT_word[STATW_CONTINUOUS_PARAMETRIC]) {
                  obs_type = CONTINUOUS_PARAMETRIC;
                }
                else {
                  obs_type = DEFAULT_PROCESS;
                  status = false;
                  ostringstream correction_message;
                  correction_message << STAT_word[STATW_CATEGORICAL] << " or "
                                     << STAT_word[STATW_DISCRETE_PARAMETRIC] << " or "
                                     << STAT_word[STATW_CONTINUOUS_PARAMETRIC];
                  error.correction_update(STAT_parsing[STATP_KEYWORD] , (correction_message.str()).c_str() , line , i + 1);
                }
                break;
              }
              }

              i++;
            }

            if (i > 0) {
              if (i != 4) {
                status = false;
                error.update(STAT_parsing[STATP_FORMAT] , line);
              }

              switch (obs_type) {

              case CATEGORICAL_PROCESS : {
                categorical_observation[index - 1] = CategoricalProcess::parsing(error , in_file , line ,
                                                                                 ((Chain*)imarkov)->nb_state ,
                                                                                 HIDDEN_MARKOV , true);
                if (!categorical_observation[index - 1]) {
                  status = false;
                }
                break;
              }

              case DISCRETE_PARAMETRIC : {
                discrete_parametric_observation[index - 1] = DiscreteParametricProcess::parsing(error , in_file , line ,
                                                                                                ((Chain*)imarkov)->nb_state ,
                                                                                                HIDDEN_MARKOV ,
                                                                                                cumul_threshold);
                if (!discrete_parametric_observation[index - 1]) {
                  status = false;
                }
                break;
              }

              case CONTINUOUS_PARAMETRIC : {
                continuous_parametric_observation[index - 1] = ContinuousParametricProcess::parsing(error , in_file , line ,
                                                                                                    ((Chain*)imarkov)->nb_state ,
                                                                                                    HIDDEN_MARKOV ,
                                                                                                    ZERO_INFLATED_GAMMA);
                if (!continuous_parametric_observation[index - 1]) {
                  status = false;
                }
                break;
              }
              }
            }
          }

          if (index != nb_output_process) {
            status = false;
            error.update(STAT_parsing[STATP_FORMAT] , line);
          }

          if (status) {
            hmarkov = new HiddenVariableOrderMarkov(imarkov , nb_output_process ,
                                                    categorical_observation ,
                                                    discrete_parametric_observation ,
                                                    continuous_parametric_observation , length);

#           ifdef DEBUG
            hmarkov->ascii_write(cout);
#           endif

          }

          delete imarkov;

          for (i = 0;i < nb_output_process;i++) {
            delete categorical_observation[i];
            delete discrete_parametric_observation[i];
            delete continuous_parametric_observation[i];
          }
          delete [] categorical_observation;
          delete [] discrete_parametric_observation;
          delete [] continuous_parametric_observation;
        }
      }
    }
  }

  return hmarkov;
}
Exemplo n.º 2
0
Renewal* Renewal::ascii_read(StatError &error , const string path ,
                             process_type type , int time , double cumul_threshold)

{
    RWLocaleSnapshot locale("en");
    RWCString buffer , token;
    size_t position;
    bool status;
    int line;
    DiscreteParametric *inter_event;
    Renewal *renew;
    ifstream in_file(path.c_str());


    renew = NULL;
    error.init();

    if (!in_file) {
        error.update(STAT_error[STATR_FILE_NAME]);
    }

    else {
        status = true;
        line = 0;

        inter_event = DiscreteParametric::parsing(error , in_file , line ,
                      NEGATIVE_BINOMIAL , cumul_threshold , 1);

        if (!inter_event) {
            status = false;
        }

        else {
            if (time < MAX(inter_event->offset , 2)) {
                status = false;
                error.update(SEQ_error[SEQR_SHORT_OBSERVATION_TIME]);
            }
            if (time > MAX_TIME) {
                status = false;
                error.update(SEQ_error[SEQR_LONG_OBSERVATION_TIME]);
            }
        }

        while (buffer.readLine(in_file , false)) {
            line++;

#     ifdef DEBUG
            cout << line << " " << buffer << endl;
#     endif

            position = buffer.first('#');
            if (position != RW_NPOS) {
                buffer.remove(position);
            }
            if (!(buffer.isNull())) {
                status = false;
                error.update(STAT_parsing[STATP_FORMAT] , line);
            }
        }

        if (status) {
            DiscreteParametric dtime(UNIFORM , time , time , D_DEFAULT , D_DEFAULT);
            renew = new Renewal(type , dtime , *inter_event);
        }

        delete inter_event;
    }

    return renew;
}
Exemplo n.º 3
0
Compound* Compound::ascii_read(StatError &error , const string path , double cumul_threshold)

{
  RWCString buffer , token;

  size_t position;
  bool status;
  register int i;
  int line , read_line;
  DiscreteParametric *sum_dist , *dist;
  Compound *compound;
  ifstream in_file(path.c_str());


  compound = NULL;
  error.init();

  if (!in_file) {
    error.update(STAT_error[STATR_FILE_NAME]);
  }

  else {
    status = true;
    line = 0;
    read_line = 0;

    sum_dist = NULL;
    dist = NULL;

    while (buffer.readLine(in_file , false)) {
      line++;

#     ifdef DEBUG
      cout << line << "  " << buffer << endl;
#     endif

      position = buffer.first('#');
      if (position != RW_NPOS) {
        buffer.remove(position);
      }
      i = 0;

      RWCTokenizer next(buffer);

      while (!((token = next()).isNull())) {

        // test COMPOUND_DISTRIBUTION/SUM_DISTRIBUTION/ELEMENTARY_DISTRIBUTION keywords

        if (i == 0) {
          switch (read_line) {

          case 0 : {
            if (token != STAT_word[STATW_COMPOUND]) {
              status = false;
              error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_COMPOUND] , line);
            }
            break;
          }

          case 1 : {
            if (token != STAT_word[STATW_SUM]) {
              status = false;
              error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_SUM] , line);
            }
            break;
          }

          case 2 : {
            if (token != STAT_word[STATW_ELEMENTARY]) {
              status = false;
              error.correction_update(STAT_parsing[STATP_KEYWORD] , STAT_word[STATW_ELEMENTARY] , line);
            }
            break;
          }
          }
        }

        i++;
      }

      if (i > 0) {
        if (i != 1) {
          status = false;
          error.update(STAT_parsing[STATP_FORMAT] , line);
        }

        switch (read_line) {

        case 1 : {
          sum_dist = DiscreteParametric::parsing(error , in_file , line ,
                                                 NEGATIVE_BINOMIAL , CUMUL_THRESHOLD);
          if (!sum_dist) {
            status = false;
          }
          break;
        }

        case 2 : {
          dist = DiscreteParametric::parsing(error , in_file , line ,
                                             NEGATIVE_BINOMIAL , cumul_threshold);
          if (!dist) {
            status = false;
          }
          break;
        }
        }

        read_line++;
        if (read_line == 3) {
          break;
        }
      }
    }

    if (read_line != 3) {
      status = false;
      error.update(STAT_parsing[STATP_FORMAT] , line);
    }

    while (buffer.readLine(in_file , false)) {
      line++;

#     ifdef DEBUG
      cout << line << "  " << buffer << endl;
#     endif

      position = buffer.first('#');
      if (position != RW_NPOS) {
        buffer.remove(position);
      }

      if (!(buffer.isNull())) {
        status = false;
        error.update(STAT_parsing[STATP_FORMAT] , line);
      }
    }

    if (status) {
      compound = new Compound(*sum_dist , *dist , cumul_threshold);
    }

    delete sum_dist;
    delete dist;
  }

  return compound;
}