Exemplo n.º 1
0
// This loads a graph in the format of PIC 2011.
int LoadGraphUAI(ifstream &file_graph, 
                 FactorGraph *factor_graph) {
  string line = "";

  // Read header.
  while (line == "") {
    getline(file_graph, line);
    if (file_graph.eof()) return -1;
    TrimComments("#", &line);
    Trim("\t ", &line);
  }
  if (line != "MARKOV") {
    cout << "Wrong header: " << line << endl;
    return -1;
  }

  int num_factor_log_potentials = 0;

  // Read number of multi-variables.
  getline(file_graph, line);
  TrimComments("#", &line);
  int num_multi_variables = atoi(line.c_str());
  vector<MultiVariable*> multi_variables(num_multi_variables);

  // Read cardinality of each multi-variable.
  getline(file_graph, line);
  TrimComments("#", &line);
  vector<string> fields;
  StringSplit(line, "\t ", &fields);
  assert(fields.size() == num_multi_variables);
  for (int i = 0; i < num_multi_variables; ++i) {
    int num_states = atoi(fields[i].c_str());
    MultiVariable* multi_variable = 
      factor_graph->CreateMultiVariable(num_states);
    multi_variables[i] = multi_variable;
  }

  // Read number of factors (includes unary factors).
  getline(file_graph, line);
  TrimComments("#", &line);
  int num_factors = atoi(line.c_str());

  // Read factors (just the structure).
  vector<Factor*> factors(num_factors);
  vector<MultiVariable*> unary_factors(num_factors);
  for (int i = 0; i < num_factors; ++i) {
    getline(file_graph, line);
    TrimComments("#", &line);
    fields.clear();
    StringSplit(line, "\t ", &fields);

    // Read linked multi-variables.
    int num_links = atoi(fields[0].c_str());
    int offset = 1;
    assert(num_links == fields.size() - offset);
    if (num_links == 1) { 
      // Unary factor; in our formalism this is just a multi-variable.
      int k = atoi(fields[offset].c_str());
      unary_factors[i] = multi_variables[k];
    } else {
      vector<MultiVariable*> multi_variables_local(num_links);
      for (int j = 0; j < num_links; ++j) {
        int k = atoi(fields[offset + j].c_str());
        multi_variables_local[j] = multi_variables[k];
      }
      // For now, set an empty vector of additional log potentials.
      vector<double> additional_log_potentials;
      Factor *factor = 
        factor_graph->CreateFactorDense(multi_variables_local,
                                        additional_log_potentials);
      factors[i] = factor;
    }
  }

  // Read factors (the log-potentials).
  // IMPORTANT: the scores in the UAI files are potentials (not log-potentials!)
  for (int i = 0; i < num_factors; ++i) {
    Factor *factor = factors[i];
    line = "";
    while (line == "") {
      getline(file_graph, line);
      TrimComments("#", &line);
      Trim(" \t", &line);
    }
    int num_configurations = atoi(line.c_str());
    if (factor == NULL) { 
      // Unary factor; in our formalism this is just a multi-variable.
      assert(unary_factors[i] != NULL);
      MultiVariable *multi_variable = unary_factors[i];
      int index = 0;
      assert(num_configurations == multi_variable->GetNumStates());
      while (index < num_configurations) {
        getline(file_graph, line);
        TrimComments("#", &line);
        Trim(" \t", &line);
        fields.clear();
        StringSplit(line, "\t ", &fields);
        for (int j = 0; j < fields.size(); ++j) {
          double log_potential = LOG_STABLE(atof(fields[j].c_str()));
          multi_variable->SetLogPotential(index, log_potential);
          assert(index < num_configurations);
          ++index;
        }
      }
    } else {
      int num_links = static_cast<FactorDense*>(factor)->
        GetNumMultiVariables();
      int index = 0;
      assert(num_configurations == 
             static_cast<FactorDense*>(factor)->GetNumConfigurations());

      //int r = factor_graph->GetNumVariables() + num_factor_log_potentials;
      num_factor_log_potentials += num_configurations;
      //static_cast<FactorMultiDense*>(factor)->SetFirstGlobalIndex(r);
      vector<double> additional_log_potentials(num_configurations);
      while (index < num_configurations) {
        getline(file_graph, line);
        TrimComments("#", &line);
        Trim(" \t", &line);
        fields.clear();
        StringSplit(line, "\t ", &fields);
        for (int j = 0; j < fields.size(); ++j) {
          double log_potential = LOG_STABLE(atof(fields[j].c_str()));
          additional_log_potentials[index] = log_potential;
          assert(index < num_configurations);
          ++index;
        }
      }
      factor->SetAdditionalLogPotentials(additional_log_potentials);
    }
  }

  cout << "Read " << num_multi_variables << " multi-variables and "
       << num_factors << " factors." << endl;

  return 0;
}
Exemplo n.º 2
0
int LoadGraph(ifstream &file_graph, 
              FactorGraph *factor_graph) {
  string line;

  // Read number of variables.
  getline(file_graph, line);
  //cout << line << endl;
  if (file_graph.eof()) return -1;
  TrimComments("#", &line);
  int num_variables = atoi(line.c_str());

  // Read number of factors.
  getline(file_graph, line);
  //cout << line << endl;
  TrimComments("#", &line);
  int num_factors = atoi(line.c_str());

  // Read variable log-potentials.
  vector<BinaryVariable*> variables(num_variables);
  for (int i = 0; i < num_variables; ++i) {
    getline(file_graph, line);
    TrimComments("#", &line);
    double log_potential = atof(line.c_str());
    BinaryVariable* variable = factor_graph->CreateBinaryVariable();
    variable->SetLogPotential(log_potential);
    variables[i] = variable;
  }

  // Read factors.
  int num_messages = 0;
  int num_factor_log_potentials = 0;
  for (int i = 0; i < num_factors; ++i) {
    getline(file_graph, line);
    TrimComments("#", &line);
    vector<string> fields;
    StringSplit(line, "\t ", &fields);

    // Read linked variables.
    int offset = 1;
    int num_links = atoi(fields[1].c_str());
    vector<BinaryVariable*> binary_variables(num_links);
    vector<bool> negated(num_links, false);
    ++offset;

    if (fields[0] == "PAIR" && num_links != 2) {
      cout << "Error: PAIR factor must be attached to 2 variables." << endl;
      return -1;
    }
    for (int j = 0; j < num_links; ++j) {
      int k = atoi(fields[offset+j].c_str());
      if (k < 0) {
        negated[j] = true;
        k = -k;
      }
      --k;
      binary_variables[j] = variables[k];
    }

    // Read factor type.
    Factor *factor;
    if (fields[0] == "XOR") {
      factor = factor_graph->CreateFactorXOR(binary_variables, negated);
    } else if (fields[0] == "XOROUT") {
      factor = factor_graph->CreateFactorXOROUT(binary_variables, negated);
    } else if (fields[0] == "ATMOSTONE") {
      factor = factor_graph->CreateFactorAtMostOne(binary_variables,
                                                   negated);
    } else if (fields[0] == "OR") {
      factor = factor_graph->CreateFactorOR(binary_variables, negated);
    } else if (fields[0] == "OROUT") {
      factor = factor_graph->CreateFactorOROUT(binary_variables, negated);
    } else if (fields[0] == "ANDOUT") {
      factor = factor_graph->CreateFactorANDOUT(binary_variables, negated);
    } else if (fields[0] == "BUDGET") {
      // Read the budget value.
      int budget = atoi(fields[offset+num_links].c_str());      
      factor = factor_graph->CreateFactorBUDGET(binary_variables, negated, budget);      
    } else if (fields[0] == "PAIR") {
      // If it is a soft factor, read the factor log-potential.
      double log_potential = atof(fields[offset+num_links].c_str());
      //int r = num_variables + num_factor_log_potentials;
      ++num_factor_log_potentials;
      //static_cast<FactorPAIR*>(factor)->SetGlobalIndex(r);
      //static_cast<FactorPAIR*>(factor)->SetFactorLogPotential(log_potential);
      factor = factor_graph->CreateFactorPAIR(binary_variables, log_potential);
    } else if (fields[0] == "DENSE") {
      // Read the number of multi-variables.
      int num_multi_variables = atoi(fields[offset+num_links].c_str());
      // Read the number of states for each multi-variable.
      vector<MultiVariable*> multi_variables(num_multi_variables);
      int num_configurations = 1;
      int total_states = 0;
      for (int k = 0; k < num_multi_variables; ++k) {
        int num_states = atoi(fields[offset+num_links+1+k].c_str());
        num_configurations *= num_states;
        vector<BinaryVariable*> states(binary_variables.begin() + total_states,
                                       binary_variables.begin() + total_states +
                                         num_states);
        total_states += num_states;
        multi_variables[k] = factor_graph->CreateMultiVariable(states);
      }

      // Read the additional log-potentials.
      vector<double> additional_scores;
      for (int index = 0; index < num_configurations; ++index) {
        // Read the factor log-potential for this configuration.
        double log_potential = atof(fields[offset+num_links+1+num_multi_variables+index].c_str());
        additional_scores.push_back(log_potential);
      }

      // Create the factor and declare it.
      factor = new FactorDense;
      factor_graph->DeclareFactor(factor, binary_variables, true);
      static_cast<FactorDense*>(factor)->Initialize(multi_variables);
      factor->SetAdditionalLogPotentials(additional_scores);
      num_factor_log_potentials += additional_scores.size();
      cout << "Read dense factor." << endl;
    } else if (fields[0] == "SEQUENCE" ||
               fields[0] == "SEQUENCE_BUDGET") {
      bool has_budget = false;
      if (fields[0] == "SEQUENCE_BUDGET") has_budget = true;
      // Read the sequence length.
      int length = atoi(fields[offset+num_links].c_str());
      // If budget, read the budget.
      int budget = -1;
      if (has_budget) {
        ++offset; // TODO: Make sure this is fine.
        budget = atoi(fields[offset+num_links].c_str());
      }

      // Read the number of states for each position in the sequence.
      vector<int> num_states(length);
      int total_states = 0;
      for (int k = 0; k < length; ++k) {
        num_states[k] = atoi(fields[offset+num_links+1+k].c_str());
        total_states += num_states[k];
      }

      // Read the additional log-potentials.
      vector<double> additional_scores;
      int index = 0;
      for (int i = 0; i <= length; ++i) {
        // If i == 0, the previous state is the start symbol.
        int num_previous_states = (i > 0)? num_states[i - 1] : 1;
        // If i == length-1, the previous state is the final symbol.
        int num_current_states = (i < length)? num_states[i] : 1;
        for (int j = 0; j < num_previous_states; ++j) {
          for (int k = 0; k < num_current_states; ++k) {
            double log_potential = atof(fields[offset+num_links+1+length+index].c_str());
            additional_scores.push_back(log_potential);
            ++index;
          }
        }
      }
      if (fields.size() != offset+num_links+1+length+index) {
        cout << fields.size() << " "
             << offset+num_links+1+length+index;
        assert(false);
      }

      // Create the factor and declare it.
      if (has_budget) {
        factor = new FactorSequenceBudget;
        factor_graph->DeclareFactor(factor, binary_variables, true);
        static_cast<FactorSequenceBudget*>(factor)->
          Initialize(num_states, budget);
        factor->SetAdditionalLogPotentials(additional_scores);
        num_factor_log_potentials += additional_scores.size();
        cout << "Read sequence budget factor." << endl;
      } else {
        factor = new FactorSequence;
        factor_graph->DeclareFactor(factor, binary_variables, true);
        static_cast<FactorSequence*>(factor)->Initialize(num_states);
        factor->SetAdditionalLogPotentials(additional_scores);
        num_factor_log_potentials += additional_scores.size();
        cout << "Read sequence factor." << endl;
      }        
    } else if (fields[0] == "GENERAL_TREE" ||
               fields[0] == "GENERAL_TREE_COUNTS") {
      // Read the number of nodes in the tree.
      int length = atoi(fields[offset+num_links].c_str());

      // Read the number of states for each node in the tree.
      vector<int> num_states(length);
      int total_states = 0;
      for (int k = 0; k < length; ++k) {
        num_states[k] = atoi(fields[offset+num_links+1+k].c_str());
        total_states += num_states[k];
      }

      // Read the parent node for each node in the tree.
      vector<int> parents(length);
      for (int k = 0; k < length; ++k) {
        parents[k] = atoi(fields[offset+num_links+1+length+k].c_str());
      }

      // Read the additional log-potentials.
      vector<double> additional_scores;
      int index = 0;
      for (int i = 1; i < length; ++i) {
        int p = parents[i];
        int num_previous_states = num_states[p];
        int num_current_states = num_states[i];
        for (int k = 0; k < num_previous_states; ++k) {
          for (int j = 0; j < num_current_states; ++j) {
            double log_potential = atof(fields[offset+num_links+1+length+length+index].c_str());
            additional_scores.push_back(log_potential);
            ++index;
          }
        }
      }
      if (fields.size() != offset+num_links+1+length+length+index) {
        cout << fields.size() << " "
             << offset+num_links+1+length+length+index << endl;
        assert(false);
      }

      // Create the factor and declare it.
      if (fields[0] == "GENERAL_TREE") {
        factor = new FactorGeneralTree;
        factor_graph->DeclareFactor(factor, binary_variables, true);
        static_cast<FactorGeneralTree*>(factor)->Initialize(parents, num_states);
      } else {
        factor = new FactorGeneralTreeCounts;
        factor_graph->DeclareFactor(factor, binary_variables, true);
        static_cast<FactorGeneralTreeCounts*>(factor)->Initialize(parents, num_states);
      }

      factor->SetAdditionalLogPotentials(additional_scores);
      num_factor_log_potentials += additional_scores.size();
      if (fields[0] == "GENERAL_TREE") {
        cout << "Read general tree factor." << endl;
      } else {
        cout << "Read general tree counts factor." << endl;
      }
    } else if (fields[0] == "ARBORESCENCE") {
      // Read the sentence length.
      int sentence_length = atoi(fields[offset+num_links].c_str());
      // Read the arcs.
      vector<Arc*> arcs(binary_variables.size());
      for (int r = 0; r < binary_variables.size(); ++r) {
        //cout << fields.size() << " " << offset+num_links+2*r+1 << endl;
        int h = atoi(fields[offset+num_links+1+2*r].c_str());
        int m = atoi(fields[offset+num_links+1+2*r+1].c_str());
        Arc *arc = new Arc(h, m);
        arcs[r] = arc;
      }
      factor = new FactorTree;
      factor_graph->DeclareFactor(factor, binary_variables, true);
      static_cast<FactorTree*>(factor)->Initialize(sentence_length, arcs);
      for (int r = 0; r < arcs.size(); ++r) {
        delete arcs[r];
      }
      cout << "Read tree factor." << endl;
    } else if (fields[0] == "HEAD_AUTOMATON") {
      // Read the length of the automaton.
      int length = binary_variables.size() + 1;
      vector<vector<int> > index_siblings(length, vector<int>(length+1, -1));
      int total = 0;
      vector<Sibling*> siblings;
      vector<double> additional_scores;
      for (int m = 0; m < length; ++m) {
        for (int s = m+1; s <= length; ++s) {
          // Create a fake sibling.
          Sibling *sibling = new Sibling(0, m, s);
          siblings.push_back(sibling);
          // Read the sibling log-potential.
          double log_potential = atof(fields[offset+num_links+total].c_str());
          additional_scores.push_back(log_potential);
          ++total;
        }
      }
      factor = new FactorHeadAutomaton;
      factor_graph->DeclareFactor(factor, binary_variables, true);
      static_cast<FactorHeadAutomaton*>(factor)->Initialize(length, siblings);
      for (int r = 0; r < siblings.size(); ++r) {
        delete siblings[r];
      }
      factor->SetAdditionalLogPotentials(additional_scores);
      num_factor_log_potentials += additional_scores.size();
      cout << "Read head automaton factor." << endl;
    } else if (fields[0] == "SEQUENCE_COMPRESSOR") {
      // Read the length of the automaton.
      int length = binary_variables.size();
      vector<vector<int> > index_siblings(length, vector<int>(length+1, -1));
      int total = 0;
      vector<Sibling*> siblings;
      vector<double> additional_scores;
      for (int m = 0; m < length; ++m) {
        for (int s = m+1; s <= length; ++s) {
          // Create a fake sibling.
          Sibling *sibling = new Sibling(0, m, s);
          siblings.push_back(sibling);
          // Read the sibling log-potential.
          double log_potential = atof(fields[offset+num_links+total].c_str());
          additional_scores.push_back(log_potential);
          ++total;
        }
      }
      factor = new FactorSequenceCompressor;
      factor_graph->DeclareFactor(factor, binary_variables, true);
      static_cast<FactorSequenceCompressor*>(factor)->Initialize(length, siblings);
      for (int r = 0; r < siblings.size(); ++r) {
        delete siblings[r];
      }
      factor->SetAdditionalLogPotentials(additional_scores);
      num_factor_log_potentials += additional_scores.size();
      cout << "Read sequence compressor factor." << endl;
    } else if (fields[0] == "GRANDPARENT_HEAD_AUTOMATON") {
      // Read the number of grandparents.
      int num_grandparents = atoi(fields[offset+num_links].c_str());
      // Read the length of the automaton.
      int length = binary_variables.size() + 1 - num_grandparents;
      vector<vector<int> > index_siblings(length, vector<int>(length+1, -1));
      int total = 0;
      vector<Grandparent*> grandparents;
      vector<double> additional_scores;
      for (int g = 0; g < num_grandparents; ++g) {
        for (int m = 1; m < length; ++m) {
          // Create a fake grandparent.
          Grandparent *grandparent = new Grandparent(g, 0, m);
          grandparents.push_back(grandparent);
          // Read the sibling log-potential.
          double log_potential = atof(fields[offset+num_links+1+total].c_str());
          additional_scores.push_back(log_potential);
          ++total;
        }
      }
      vector<Sibling*> siblings;
      for (int m = 0; m < length; ++m) {
        for (int s = m+1; s <= length; ++s) {
          // Create a fake sibling.
          Sibling *sibling = new Sibling(0, m, s);
          siblings.push_back(sibling);
          // Read the sibling log-potential.
          double log_potential = atof(fields[offset+num_links+1+total].c_str());
          additional_scores.push_back(log_potential);
          ++total;
        }
      }
      factor = new FactorGrandparentHeadAutomaton;
      factor_graph->DeclareFactor(factor, binary_variables, true);
      static_cast<FactorGrandparentHeadAutomaton*>(factor)->
        Initialize(length, num_grandparents, siblings, grandparents);
      for (int r = 0; r < grandparents.size(); ++r) {
        delete grandparents[r];
      }
      for (int r = 0; r < siblings.size(); ++r) {
        delete siblings[r];
      }
      factor->SetAdditionalLogPotentials(additional_scores);
      num_factor_log_potentials += additional_scores.size();
      cout << "Read grandparent head automaton factor." << endl;
    } else {
      cout << "Unknown factor type: " << fields[0] << endl;
      return -1;
    }
  }

  // Read blank line.
  getline(file_graph, line);

  cout << "Read " << num_variables << " variables and "
       << num_factors << " factors." << endl;

  //ofstream file_out("test.fg", ios_base::out);
  //factor_graph->Print(file_out);
  //file_out.flush();
  //file_out.clear();
  //file_out.close();

  //factor_graph->Initialize(variables, factors, num_messages);

  return 0;
}