Ejemplo n.º 1
0
bool ParallelCorpus::ReadDocumentPairs(const string& source_file,
                                       const string& target_file) {
  vector<Document> source_docs;
  std::ifstream source_in(source_file.c_str());
  if (source_in) {
    ReadDocuments(&source_in, &(source_docs), &source_vocab_, source_stemming_);
  } else {
    return false;
  }
  source_in.close();

  vector<Document> target_docs;
  std::ifstream target_in(target_file.c_str());
  if (target_in) {
    ReadDocuments(&target_in, &(target_docs), &target_vocab_, target_stemming_);
  } else {
    return false;
  }
  target_in.close();
  if (source_docs.size() != target_docs.size()) {
    return false;
  }
  for (int i = 0; i < source_docs.size(); ++i) {
    DocumentPair doc_pair;
    doc_pair.first.swap(source_docs.at(i));
    doc_pair.second.swap(target_docs.at(i));
    doc_pairs_.push_back(doc_pair);
  }
  return true;
}
Ejemplo n.º 2
0
int main(int argc, char *argv[]) {
  namespace po = boost::program_options;
  namespace fs = boost::filesystem;

  po::options_description desc("Allowed options");
  desc.add_options()
      ("input,i", po::value<std::string>(), "input file")
      ("output,o", po::value<std::string>(), "output file")
      ("cpp,x", "produce cpp/header file on successful compilation")
      ("platform,p", po::value<unsigned>(), "set platform")
      ("device,d", po::value<unsigned>(), "set device")
      ("options,c", po::value<std::string>(), "compile options")
      ("help,h", "print this help message")
      ("list,l", "list platforms and devices");

  po::positional_options_description pos;
  pos.add("input", 1);
  pos.add("output", 1);

  po::variables_map vm;
  try {
    po::store(
        po::command_line_parser(argc, argv).options(desc)
            .positional(pos).run(), vm);
  } catch(std::exception &e) {
    std::cerr << "options parsing error.\n" << desc << std::endl;
    return EXIT_FAILURE;
  }
  po::notify(vm);

  if(vm.count("help")) {
    std::cout << desc << std::endl;
    return EXIT_FAILURE;
  }

  if(vm.count("list")) {
    std::cout << "Accessible platforms:\n";
    std::vector<cl::platform_ref> platforms;
    cl::platform_ref::get_platforms(platforms);
    for(unsigned i=0; i<platforms.size(); ++i) {
      cl::platform_ref p = platforms[i];
      std::cout << i << ") " << p.name() << "\n";

      std::cout << "\tAssociated devices:\n";
      std::vector<cl::device_ref> devices;
      p.get_devices(devices);
      for(unsigned j=0; j<devices.size(); ++j) {
        cl::device_ref d = devices[j];
        std::cout << "\t" << j << ") " << d.name() << "\n";
      }
    }
    return EXIT_SUCCESS;
  }

  std::string input_path;
  std::string output_path;
  std::string options;
  bool do_output = false;
  unsigned platform_id = 0;
  unsigned device_id = 0;

  if(vm.count("input")) {
    input_path = vm["input"].as<std::string>();
  } else {
    std::cout << "must provide input file.\n";
    return EXIT_FAILURE;
  }
  if(vm.count("output")) {
    output_path = vm["output"].as<std::string>();
    do_output = true;
  }
  if(vm.count("platform")) {
    platform_id = vm["platform"].as<unsigned>();
  }
  if(vm.count("device")) {
    device_id = vm["device"].as<unsigned>();
  }
  if(vm.count("options")) {
    options = vm["options"].as<std::string>();
  }

  std::stringstream ss;
  std::ifstream in(input_path.c_str());
  std::string line;
  while(std::getline(in, line)) {
    ss << line << "\n";
  }

  std::vector<cl::platform_ref> platforms;
  std::vector<cl::device_ref> devices;
  cl::platform_ref::get_platforms(platforms);

  if(platform_id >= platforms.size()) {
    std::cerr << "invalid platform id\n";
    return EXIT_FAILURE;
  }
  cl::platform_ref platform = platforms[platform_id];
  platform.get_devices(devices);

  if(device_id >= devices.size()) {
    std::cerr << "invalid device id\n";
    return EXIT_FAILURE;
  }
  cl::device_ref device = devices[device_id];

  cl::context_ref context(platform, device);
  cl::program_ref program(context, ss.str());
  std::cout << "context is " << platform.name() << "/"
    << device.name() << "\n";
  std::cout << "OpenCL compiler options: " << options << "\n";
  try {
    program.build(options);
    std::cout << "build successful!\n";

    if(vm.count("cpp")) {
      fs::path input_path_obj(input_path);
      // older versions of boost don't have this method
      //std::string file_base = input_path_obj.stem();
      std::size_t start = input_path.find_last_of('/');
      if(start == std::string::npos) {
        start = 0;
      } else {
        ++start;
      }
      std::string file_base = input_path.substr(start, 
          input_path.find_last_of('.')-start);
      std::string file_base_upper = file_base;
      boost::to_upper(file_base_upper);
  
      // write header
      std::stringstream filess;
      filess << input_path << ".hpp";
      std::ofstream header_out(filess.str().c_str());
      filess.str("");
      filess << "#ifndef _" << file_base_upper << "_OPENCL_HPP_" << "\n";
      filess << "#define _" << file_base_upper << "_OPENCL_HPP_" << "\n";
      filess << "\n";
      filess << "extern const char *" << file_base << "_opencl_source;" << "\n";
      filess << "\n";
      filess << "#endif\n\n";
      header_out << filess.str();
      header_out.close();
  
      // write cpp file
      boost::regex rx;
      rx.assign(
        "(\\\\)|"
        "(\")");
      const char *format = 
        "(?1\\\\\\\\)"
        "(?2\\\\\")";

      filess.str("");
      filess << input_path << ".cpp";
      std::ofstream source_out(filess.str().c_str());
      std::ifstream source_in(input_path.c_str());
      filess.str("");
      filess << "const char *" << file_base << "_opencl_source = " << "\n";
      while(getline(source_in, line)) {
        std::string sanitized_line = 
          boost::regex_replace(line, rx, format, 
            boost::match_default | boost::format_all);
        filess << "\t\"" << sanitized_line << "\\n\"\n";
      }
      filess << ";";
      source_out << filess.str();
      source_out.close();
    }
  } catch(const cl::cl_error &c) {
    std::cout << "error building program\n";
  }

  std::cout << "build log:\n" << program.get_build_log(device)
    << "\n";

  return EXIT_SUCCESS;
}
Ejemplo n.º 3
0
bool ParallelCorpus::ReadParallelData(const string& source_file,
                                      const string& target_file) {
  typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
  boost::char_separator<char> sep(" \t");
  std::string line;

  vector<Sentence> source_sents;
  std::ifstream source_in(source_file.c_str());
  if (source_in.good()) {
    Document doc;
    while (getline(source_in, line)) {
      Sentence current_sentence;
      tokenizer line_tokenizer(line, sep);
      for (tokenizer::iterator it = line_tokenizer.begin();
           it != line_tokenizer.end(); ++it) {
        string token = *it;
        if (use_lowercase_) {
          boost::to_lower(token);
        }
        if (source_stemming_) {
          Stem(token);
        }
        current_sentence.push_back(source_vocab_.AddWord(token));
      }
      source_sents.push_back(current_sentence);
    }
    source_in.close();
  } else {
    return false;
  }

  vector<Sentence> target_sents;
  std::ifstream target_in(target_file.c_str());
  if (target_in.good()) {
    Document doc;
    while (getline(target_in, line)) {
      Sentence current_sentence;
      tokenizer line_tokenizer(line, sep);
      for (tokenizer::iterator it = line_tokenizer.begin();
           it != line_tokenizer.end(); ++it) {
        string token = *it;
        if (use_lowercase_) {
          boost::to_lower(token);
        }
        if (target_stemming_) {
          Stem(token);
        }
        current_sentence.push_back(target_vocab_.AddWord(token));
      }
      target_sents.push_back(current_sentence);
    }
    target_in.close();
  } else {
    return false;
  }
  if (source_sents.size() != target_sents.size()) {
    return false;
  }
  for (int i = 0; i < source_sents.size(); ++i) {
    //if ((source_sents.at(i).size() > 0)
    //  && (target_sents.at(i).size() > 0)) {
      DocumentPair doc_pair;
      doc_pair.first.push_back(source_sents.at(i));
      doc_pair.second.push_back(target_sents.at(i));
      doc_pairs_.push_back(doc_pair);
    //}
  }
  return true;
}