Пример #1
0
/**
 * binary to txt
 */
int Format0(int argc, char** argv)
{
  assert(argv[1][0] == '0' && argv[1][1] == 0);
  if (argc != 6) {
    cout
        << "-- For type 0, read txt file and output binary file, [size] and [dimension] is essential"
        << endl;
    cout << "   " << argv[0] << " 0 txt_file_in binary_file_out size dimension"
        << endl;
    return -1;
  }

  int size = atoi(argv[4]);
  int dimension = atoi(argv[5]);

  Points<DefaultDataTypes> points;
  points.InitializeFromFile(argv[2], size, dimension);
  points.SavePoints(argv[3]);

  return 0;
}
Пример #2
0
int main(int argc, char** argv)
{
  typedef typename DefaultDataTypes::Value ValueType;
  typedef typename DefaultDataTypes::Dist DistType;
  typedef typename DefaultDataTypes::Index IndexType;
  typedef typename DefaultDataTypes::Dim DimType;

  if (argc < 2) {
    cout << "Usage: " << argv[0]
        << " config_file_name [config_key=config_value ...]" << endl;
    return -1;
  }
  //load config from config file
  Config config(argv[1]);

  //load config from argv, note that this may override the key_value of the original
  for (int i = 2; i < argc; i++) {
    string k_v(argv[i]);
    size_t pos = k_v.find("=");
    if (pos == string::npos) {
      cout << "Unrecognized arg:" << k_v << endl;
      cout << "Usage: " << argv[0]
          << " config_file_name [config_key=config_value ...]" << endl;
      return -1;
    } else {
      string key = k_v.substr(0, pos);
      string value = k_v.substr(pos + 1);
      config.Add(key, value);
    }
  }

  if (config.Read<bool>(kShowConfigKey)) {
    cout << "==============config content=============" << endl;
    cout << config;
    cout << "=========================================" << endl;
  }

  unsigned int random_seed = config.Read<unsigned int>(kRandomSeedKey);
  srand(random_seed);

  Stopwatch timer("");
  timer.Reset();
  timer.Start();

  Points<DefaultDataTypes> dps;
  string input_data_file_name = config.Read<string>(kDataFileNameKey);
  bool data_format_binary_flag = config.Read<bool>(kDataFormatBinaryKey);
  if (data_format_binary_flag) {
    dps.LoadPoints(input_data_file_name.c_str());
  } else {
    dps.InitializeFromFile(input_data_file_name.c_str(),
        config.Read<IndexType>(kTextDataSizeKey),
        config.Read<DimType>(kTextDataDimKey));
  }

  Points<DefaultDataTypes> qps;
  string input_query_file_name = config.Read<string>(kQueryFileNameKey);
  if (data_format_binary_flag) {
    qps.LoadPoints(input_query_file_name.c_str());
  } else {
    qps.InitializeFromFile(input_query_file_name.c_str(),
        config.Read<IndexType>(kTextQuerySizeKey),
        config.Read<DimType>(kTextQueryDimKey));
  }
  assert(dps.dim_ == qps.dim_);

  cout << "- Reading Data Finished (" << timer.GetTime() << " seconds)" << endl;
  if (config.Read<bool>(kSavePointsKey)) {
    cout << "Saving data points to "
        << config.Read<string>(kSaveDataPointsFileName) << endl;
    dps.SavePoints(config.Read<string>(kSaveDataPointsFileName).c_str());
    cout << "Saving query points to "
        << config.Read<string>(kSaveQueryPointsFileName) << endl;
    qps.SavePoints(config.Read<string>(kSaveQueryPointsFileName).c_str());
  }

  size_t knn = config.Read<size_t>(kNearKey);

  timer.Reset();
  timer.Start();
  // load ground truth
  Groundtruth<DefaultDataTypes> groundtruth;
  groundtruth.Initialize(dps, qps, knn,
      &ComputeEuclideanDistance<ValueType, DistType>);

  string gt_file_name_prefix = config.Read<string>(
      kGroundtruthFileNamePrefixKey);
  const size_t kMaxFileNameLength = 256;
  char gt_file_name[kMaxFileNameLength];
  sprintf(gt_file_name, "%s_d%d_q%d_k%d", gt_file_name_prefix.c_str(),
      (int) dps.size_, (int) qps.size_, (int) knn);
  FILE *gt_file = fopen(gt_file_name, "rb");
  if (gt_file != NULL) {
    std::cout << "-- Groundtruth file exists, " << gt_file_name << std::endl;
    std::cout << "-- Loading Groundtruth ..." << std::endl;
    groundtruth.Load(gt_file);
    fclose(gt_file);
  } else {
    std::cout << "-- Groundtruth file not exists, " << gt_file_name
        << std::endl;
    std::cout << "-- Building Groundtruth ..." << std::endl;
    groundtruth.Build();
    std::cout << "-- Saving Groundtruth to disk..." << std::endl;
    gt_file = fopen(gt_file_name, "wb");
    groundtruth.Save(gt_file);
    fclose(gt_file);
  }
  cout << "- GroundTruth Finished (" << timer.GetTime() << " seconds)" << endl;

  size_t repeat_count = config.Read<size_t>(kRepeatCountKey);

  size_t kg_max_expansion = 0;
  if (config.Read<bool>(kBatchTestKey)) { // Batch test
    vector<size_t> expansions = config.ReadVector<size_t>(kGnnsExpansionsKey);
    vector<size_t> max_expansions = config.ReadVector<size_t>(
        kGnnsMaxExpansionsKey);
    assert(expansions.size() == max_expansions.size());
    vector<size_t>::iterator me_it = max_expansions.begin();
    for (vector<size_t>::iterator e_it = expansions.begin();
        e_it != expansions.end(); ++me_it, ++e_it) {
      assert(me_it != max_expansions.end());
      size_t expansion = *e_it;
      size_t max_expansion = *me_it;
      assert(max_expansion >= expansion);
      if (max_expansion > kg_max_expansion) {
        kg_max_expansion = max_expansion;
      }
    }
  } else {
    kg_max_expansion = config.Read<size_t>(kGnnsMaxExpansionKey);
  }

  timer.Reset();
  timer.Start();
  string kg_file_name = config.Read<string>(kKnnGraphFileName);
  KnnGraph<DefaultDataTypes> kg(kg_file_name.c_str(), kg_max_expansion);
  cout << "KnnGraph Loaded (" << timer.GetTime() << " seconds)" << endl;
  Gnns<DefaultDataTypes> gnns(dps, kg);

  cout << "== Start Test ==" << endl;
  if (config.Read<bool>(kBatchTestKey)) { // Batch test
    vector<size_t> restarts = config.ReadVector<size_t>(kGnnsRestartsKey);
    vector<size_t> expansions = config.ReadVector<size_t>(kGnnsExpansionsKey);
    vector<size_t> max_expansions = config.ReadVector<size_t>(
        kGnnsMaxExpansionsKey);
    assert(expansions.size() == max_expansions.size());
    vector<size_t> greedy_steps = config.ReadVector<size_t>(
        kGnnsGreedyStepsKey);
    for (vector<size_t>::iterator r_it = restarts.begin();
        r_it != restarts.end(); ++r_it) {
      size_t restart = *r_it;
      vector<size_t>::iterator me_it = max_expansions.begin();
      for (vector<size_t>::iterator e_it = expansions.begin();
          e_it != expansions.end(); ++me_it, ++e_it) {
        assert(me_it != max_expansions.end());
        size_t expansion = *e_it;
        size_t max_expansion = *me_it;
        double last_hit_rate = -1;
        for (vector<size_t>::iterator gs_it = greedy_steps.begin();
            gs_it != greedy_steps.end(); ++gs_it) {
          srand(random_seed);
          size_t greedy_step = *gs_it;
          double hit_rate = test(knn, qps, groundtruth, gnns, restart,
              expansion, max_expansion, greedy_step, repeat_count);
          if (abs(last_hit_rate - hit_rate) < 0.003) {
//          if (abs(last_hit_rate - hit_rate) < 0.003 && hit_rate > 0.95) {
            break;
          }
          last_hit_rate = hit_rate;
        }
      }
    }
  } else {
    size_t restart = config.Read<size_t>(kGnnsRestartKey);
    size_t expansion = config.Read<size_t>(kGnnsExpansionKey);
    size_t max_expansion = config.Read<size_t>(kGnnsMaxExpansionKey);
    size_t greedy_step = config.Read<size_t>(kGnnsGreedyStepKey);
    srand(random_seed);
    test(knn, qps, groundtruth, gnns, restart, expansion, max_expansion,
        greedy_step, repeat_count);
  }
  cout << "== Finish Test ==" << endl;

  kg.FreeGraph();
}