Beispiel #1
0
void Pipe::TrainEpoch(int epoch) {
  Instance *instance;
  Parts *parts = CreateParts();
  Features *features = CreateFeatures();
  vector<double> scores;
  vector<double> gold_outputs;
  vector<double> predicted_outputs;
  double total_cost = 0.0;
  double total_loss = 0.0;
  double eta;
  int num_instances = instances_.size();
  double lambda = 1.0/(options_->GetRegularizationConstant() *
                       (static_cast<double>(num_instances)));
  timeval start, end;
  gettimeofday(&start, NULL);
  int time_decoding = 0;
  int time_scores = 0;
  int num_mistakes = 0;

  LOG(INFO) << " Iteration #" << epoch + 1;

  dictionary_->StopGrowth();

  for (int i = 0; i < instances_.size(); i++) {
    int t = num_instances * epoch + i;
    instance = instances_[i];
    MakeParts(instance, parts, &gold_outputs);
    MakeFeatures(instance, parts, features);

    // If using only supported features, must remove the unsupported ones.
    // This is necessary not to mess up the computation of the squared norm
    // of the feature difference vector in MIRA.
    if (options_->only_supported_features()) {
      RemoveUnsupportedFeatures(instance, parts, features);
    }

    timeval start_scores, end_scores;
    gettimeofday(&start_scores, NULL);
    ComputeScores(instance, parts, features, &scores);
    gettimeofday(&end_scores, NULL);
    time_scores += diff_ms(end_scores, start_scores);

    if (options_->GetTrainingAlgorithm() == "perceptron" ||
        options_->GetTrainingAlgorithm() == "mira" ) {
      timeval start_decoding, end_decoding;
      gettimeofday(&start_decoding, NULL);
      decoder_->Decode(instance, parts, scores, &predicted_outputs);
      gettimeofday(&end_decoding, NULL);
      time_decoding += diff_ms(end_decoding, start_decoding);

      if (options_->GetTrainingAlgorithm() == "perceptron") {
        for (int r = 0; r < parts->size(); ++r) {
          if (!NEARLY_EQ_TOL(gold_outputs[r], predicted_outputs[r], 1e-6)) {
            ++num_mistakes;
          }
        }
        eta = 1.0;
      } else {
        CHECK(false) << "Plain mira is not implemented yet.";
      }

      MakeGradientStep(parts, features, eta, t, gold_outputs,
                       predicted_outputs);

    } else if (options_->GetTrainingAlgorithm() == "svm_mira" ||
               options_->GetTrainingAlgorithm() == "crf_mira" ||
               options_->GetTrainingAlgorithm() == "svm_sgd" ||
               options_->GetTrainingAlgorithm() == "crf_sgd") {
      double loss;
      timeval start_decoding, end_decoding;
      gettimeofday(&start_decoding, NULL);
      if (options_->GetTrainingAlgorithm() == "svm_mira" ||
          options_->GetTrainingAlgorithm() == "svm_sgd") {
        // Do cost-augmented inference.
        double cost;
        decoder_->DecodeCostAugmented(instance, parts, scores, gold_outputs,
                                      &predicted_outputs, &cost, &loss);
        total_cost += cost;
      } else {
        // Do marginal inference.
        double entropy;
        decoder_->DecodeMarginals(instance, parts, scores, gold_outputs,
                                  &predicted_outputs, &entropy, &loss);
        CHECK_GE(entropy, 0.0);
      }
      gettimeofday(&end_decoding, NULL);
      time_decoding += diff_ms(end_decoding, start_decoding);

      if (loss < 0.0) {
        if (!NEARLY_EQ_TOL(loss, 0.0, 1e-9)) {
          LOG(INFO) << "Warning: negative loss set to zero: " << loss;
        }
        loss = 0.0;
      }
      total_loss += loss;

      // Compute difference between predicted and gold feature vectors.
      FeatureVector difference;
      MakeFeatureDifference(parts, features, gold_outputs, predicted_outputs,
                            &difference);

      // Get the stepsize.
      if (options_->GetTrainingAlgorithm() == "svm_mira" ||
          options_->GetTrainingAlgorithm() == "crf_mira") {
        double squared_norm = difference.GetSquaredNorm();
        double threshold = 1e-9;
        if (loss < threshold || squared_norm < threshold) {
          eta = 0.0;
        } else {
          eta = loss / squared_norm;
          if (eta > options_->GetRegularizationConstant()) {
            eta = options_->GetRegularizationConstant();
          }
        }
      } else {
        if (options_->GetLearningRateSchedule() == "fixed") {
          eta = options_->GetInitialLearningRate();
        } else if (options_->GetLearningRateSchedule() == "invsqrt") {
          eta = options_->GetInitialLearningRate() /
            sqrt(static_cast<double>(t+1));
        } else if (options_->GetLearningRateSchedule() == "inv") {
          eta = options_->GetInitialLearningRate() /
            static_cast<double>(t+1);
        } else if (options_->GetLearningRateSchedule() == "lecun") {
          eta = options_->GetInitialLearningRate() /
            (1.0 + (static_cast<double>(t) / static_cast<double>(num_instances)));
        } else {
          CHECK(false) << "Unknown learning rate schedule: "
                       << options_->GetLearningRateSchedule();
        }

        // Scale the parameter vector (only for SGD).
        double decay = 1 - eta * lambda;
        CHECK_GT(decay, 0.0);
        parameters_->Scale(decay);
      }

      MakeGradientStep(parts, features, eta, t, gold_outputs,
                       predicted_outputs);
    } else {
      CHECK(false) << "Unknown algorithm: " << options_->GetTrainingAlgorithm();
    }
  }

  // Compute the regularization value (halved squared L2 norm of the weights).
  double regularization_value =
      lambda * static_cast<double>(num_instances) *
      parameters_->GetSquaredNorm() / 2.0;

  delete parts;
  delete features;

  gettimeofday(&end, NULL);
  LOG(INFO) << "Time: " << diff_ms(end,start);
  LOG(INFO) << "Time to score: " << time_scores;
  LOG(INFO) << "Time to decode: " << time_decoding;
  LOG(INFO) << "Number of Features: " << parameters_->Size();
  if (options_->GetTrainingAlgorithm() == "perceptron" ||
      options_->GetTrainingAlgorithm() == "mira") {
    LOG(INFO) << "Number of mistakes: " << num_mistakes;
  }
  LOG(INFO) << "Total Cost: " << total_cost << "\t"
            << "Total Loss: " << total_loss << "\t"
            << "Total Reg: " << regularization_value << "\t"
            << "Total Loss+Reg: " << total_loss + regularization_value << endl;
}
extern "C" int DLLEXPORT new_layout2d(
    LayoutRect * layout_rects,
    unsigned int num,
    scalar sheet_x,
    scalar sheet_y,
    scalar cut_size,
    Layout ** res)
{
    Rect sheet;
    sheet.Size[0] = sheet_x;
    sheet.Size[1] = sheet_y;
    Parts parts;
    for (unsigned int i = 0; i < num; i++)
    {
        auto rect = &layout_rects[i];
        Part part(rect->size[0], rect->size[1],
                  rect->can_rotate != 0, rect->amount);
        part.Tag = (int)i;
        parts.push_back(part);
    }

    // merge parts with the same relevant characteristics
    std::map<PartKey, std::list<Part*> > unique_parts_map;
    for (auto i = parts.begin(); i != parts.end(); i++) {
        PartKey part_key;
        part_key.rect = i->rect;
        part_key.can_rotate = i->Rotate;
        part_key.normalize();
        unique_parts_map[part_key].push_back(&*i);
    }
    Parts unique_parts;
    for (auto i = unique_parts_map.begin(); i != unique_parts_map.end(); i++) {
        Part part;
        part.rect = i->first.rect;
        part.Rotate = i->first.can_rotate;
        part.parts = i->second;
        // calculate combined amount
        part.Amount = 0;
        for_each(part.parts.begin(), part.parts.end(),
                 [&part](Part * el) { part.Amount += el->Amount; });

        unique_parts.push_back(part);
    }

    LayoutBuilder layout_builder;
    // initialize amounts vector
    Amounts remains(unique_parts.size());
    // assing amount offsets to parts
    // and amounts to remains
    auto offset = 0;
    std::for_each(unique_parts.begin(),
                  unique_parts.end(),
                  [&offset, &remains](Part & part) {
                      part.AmountOffset = offset++;
                      remains[part.AmountOffset] = part.Amount;
                  });
    // initialize sizes lookups
    Sizes sizes[2];
    for (auto s = 0; s <= 1; s++)
    {
        for (auto pPart = unique_parts.begin(); pPart != unique_parts.end(); pPart++)
            sizes[s].AddPart(*pPart, s);

        // order from big to small
        std::sort(sizes[s].begin(), sizes[s].end(), std::greater_equal<Size>());
        for (auto pSize = sizes[s].begin(); pSize != sizes[s].end(); pSize++)
        {
            std::sort(pSize->other_sizes.begin(), pSize->other_sizes.end(),
                    std::greater_equal<OtherSize>());
            pSize->other_sizes.SetMin();
        }
    }
    scalar min_size[2];
    Layout2d optimizer(sizes, min_size, &remains);
    optimizer.put_SawThickness(cut_size);
    int ret = optimizer.new_optimize(sheet, layout_builder) ? 1 : 0;
    if (ret) {
        unique_ptr<Layout> layout(new Layout);
        layout_builder.simplify();
        layout_builder.check();
        layout_builder.to_layout(*layout);
        *res = layout.release();
        // report back new amounts
        for (size_t i = 0; i < parts.size(); i++) {
            layout_rects[i].amount = parts[i].Amount;
        }
    }
    return ret;
}