Пример #1
0
void WayJoinDriver::joinPointsToWays(QString input, QString out)
{
  // create a job
  Job job;

  job.setVerbose(Log::getInstance().getLevel() <= Log::Debug);
  // set the name
  job.setName("WayJoin2 " + input.toStdString());

  // set the input/output
  LOG_INFO("input: " << input);
  job.setInput(input.toStdString());
  job.setOutput(out.toStdString());

  // Pass the min/max values as parameters to the job.
  MapStats stats;
  stats.readDir(input);
  stats.write(job.getConfiguration());
  LOG_INFO("MapStats: " << stats.toString().toStdString());

  // Pass the maximum way size as a parameter to the job.
  job.getConfiguration().setDouble("hoot.max.way.size", _maxWaySize);
  // be nice and don't start the reduce tasks until most of the map tasks are done.
  job.getConfiguration().setDouble("mapred.reduce.slowstart.completed.maps", 0.98);

  job.getConfiguration().setInt(WayJoin2Mapper::elementStatusKey(), _newStatus.getEnum());
  job.getConfiguration().setLong(WayJoin2Mapper::nodeIdDeltaKey(), _nodeIdDelta);
  job.getConfiguration().setLong(WayJoin2Mapper::relationIdDeltaKey(), _relationIdDelta);
  job.getConfiguration().setLong(WayJoin2Mapper::wayIdDeltaKey(), _wayIdDelta);

  // setup the mapper and reducer classes.
  job.setMapperClass(WayJoin2Mapper::className());
  job.setReducerClass(WayJoin2Reducer::className());
  job.setInputFormatClass(WayJoin2InputFormat::className());
  job.setRecordReaderClass(WayJoin2RecordReader::className());
  job.setRecordWriterClass(PbfRecordWriter::className());

  // Adds all libraries in this directory to the job.
  job.addLibraryDirs(ConfigOptions().getHootHadoopLibpath());

  job.addFile(ConfPath::search("hoot.json").toStdString());

  // This library will be used to provide mapper/reducer classes and anything else referenced
  // by the factory.
  job.addPlugin(getenv("HOOT_HOME") + string("/lib/libHootHadoop.so.1"));

  _addDefaultJobSettings(job);

  // run the job.
  job.run();
}
Пример #2
0
void WayJoinDriver::joinWaysToNodes(QString input, QString out)
{
  // create a job
  Job job;

  job.setVerbose(Log::getInstance().getLevel() <= Log::Debug);
  // set the name
  job.setName("WayJoin1 " + input.toStdString());

  // be nice and don't start the reduce tasks until most of the map tasks are done.
  job.getConfiguration().setDouble("mapred.reduce.slowstart.completed.maps", 0.98);

  // set the input/output
  job.setInput(input.toStdString());
  job.setOutput(out.toStdString());

  // setup the mapper and reducer classes.
  job.setMapperClass(WayJoin1Mapper::className());
  job.setReducerClass(WayJoin1Reducer::className());
  job.setInputFormatClass(PbfInputFormat::className());
  job.setRecordReaderClass(PbfRecordReader::className());
  job.setRecordWriterClass(CppSeqFileRecordWriter::className());

  // Adds all libraries in this directory to the job.
  job.addLibraryDirs(ConfigOptions().getHootHadoopLibpath());

  job.addFile(ConfPath::search("hoot.json").toStdString());

  // This library will be used to provide mapper/reducer classes and anything else referenced
  // by the factory.
  job.addPlugin(getenv("HOOT_HOME") + string("/lib/libHootHadoop.so.1"));

  _addDefaultJobSettings(job);

  // run the job.
  job.run();

  LOG_INFO("Finished job.");
}
Пример #3
0
const cv::Mat& PaintNodesDriver::calculateDensity(const Envelope& e, double pixelSize,
  QString input)
{
  Hdfs fs;
  if (fs.exists(input.toStdString()) == false)
  {
    throw HootException("Input file does not exist.");
  }

  _width = ceil(e.getWidth() / pixelSize) + 1;
  _height = ceil(e.getHeight() / pixelSize) + 1;
  QString cacheName = QString("%1/density-%2x%3").arg(input).arg(_width).arg(_height);

  // if we've already calculated and cached the values.
  if (fs.exists(cacheName.toStdString()))
  {
    LOG_INFO("Loading cache file " << cacheName);
    LOG_INFO("If you want to recalculate the cache then run this first: ");
    LOG_INFO("  hadoop fs -rmr " << cacheName);
    _loadOutputDir(cacheName);
  }
  else
  {
    // create a job
    Job job;

    // set the name
    job.setName("Paint Nodes " + input.toStdString());

    fs.mkdirs("tmp");
    QString output = "tmp/" + QUuid::createUuid().toString().replace("{", "").replace("}", "") +
        "-PaintNodes";

    // set the input/output
    job.setInput(input.toStdString());
    job.setOutput(output.toStdString());

    // be nice and don't start the reduce tasks until most of the map tasks are done.
    job.getConfiguration().setDouble("mapred.reduce.slowstart.completed.maps", 0.98);

    job.getConfiguration().set("hoot.envelope", MapStats::toString(e));
    job.getConfiguration().setDouble("hoot.pixel.size", pixelSize);

    // setup the mapper and reducer classes.
    job.setMapperClass(PaintNodesMapper::className());
    job.setReducerClass(PaintNodesReducer::className());
    job.setInputFormatClass(PbfInputFormat::className());
    job.setRecordReaderClass(PbfRecordReader::className());
    job.setRecordWriterClass(CppSeqFileRecordWriter::className());

    // Adds all libraries in this directory to the job.
    job.addLibraryDirs(conf().getList("hoot.hadoop.libpath",
      "${HOOT_HOME}/lib/;${HOOT_HOME}/local/lib/;${HADOOP_HOME}/c++/Linux-amd64-64/lib/;"
      "${HOOT_HOME}/pretty-pipes/lib/"));

    job.addFile(ConfPath::search("hoot.json").toStdString());

    // This library will be used to provide mapper/reducer classes and anything else referenced
    // by the factory.
    job.addPlugin(getenv("HOOT_HOME") + string("/lib/libHootHadoop.so.1"));

    _addDefaultJobSettings(job);

    // run the job.
    job.run();

    _loadOutputDir(output);

    // if the input is a directory
    if (fs.getFileStatus(input.toStdString()).isDir())
    {
      // store the density info for later use.
      fs.rename(output.toStdString(), cacheName.toStdString());
    }
    else
    {
      fs.deletePath(output.toStdString(), true);
    }
  }

  return _c;
}
Пример #4
0
void TileOpDriver::apply(QString in, vector<Envelope> envelopes, double buffer,
  QString out)
{
  // create a job
  pp::Job job;

  job.setVerbose(Log::getInstance().getLevel() <= Log::Debug);
  // set the name
  job.setName("TileOpDriver");

  // be nice and don't start the reduce tasks until most of the map tasks are done.
  job.getConfiguration().setDouble("mapred.reduce.slowstart.completed.maps", 0.98);

  // set the input/output
  pp::Hdfs fs;
  job.setInput(fs.getAbsolutePath(in.toStdString()));
  job.setOutput(fs.getAbsolutePath(out.toStdString()));

  if (_op == 0)
  {
    throw HootException("You must specify an operation.");
  }

  stringstream ss;
  ObjectOutputStream oos(ss);
  oos.writeObject(*_op);
  oos.flush();
  LOG_INFO("oos size: " << ss.str().size());
  job.getConfiguration().setBytes(TileOpReducer::opKey(), ss.str());

  job.getConfiguration().set(TileOpMapper::envelopesKey(), _toString(envelopes));
  job.getConfiguration().set(TileOpMapper::replacementsKey(),
    fs.getAbsolutePath(in.toStdString()));
  job.getConfiguration().setDouble(TileOpMapper::maxWaySizeKey(), buffer);
  job.getConfiguration().setDouble(TileOpMapper::bufferKey(), buffer);

  // read the max ids from in and write them to the configuration
  MapStats stats;
  stats.readDir(in);
  stats.write(job.getConfiguration());

  // setup the mapper and reducer classes.
  job.setMapperClass(TileOpMapper::className());
  job.setReducerClass(TileOpReducer::className());
  job.setInputFormatClass(PbfInputFormat::className());
  job.setRecordReaderClass(PbfRecordReader::className());
  job.setRecordWriterClass(PbfRecordWriter::className());

  // Adds all libraries in this directory to the job.
  job.addLibraryDirs(conf().getList("hoot.hadoop.libpath",
    "${HOOT_HOME}/lib/;${HOOT_HOME}/local/lib/;${HADOOP_HOME}/c++/Linux-amd64-64/lib/;"
    "${HOOT_HOME}/pretty-pipes/lib/"));

  LOG_INFO("Hoot home: " << conf().getString("foo", "${HOOT_HOME}"));

  const std::vector<std::string>& dirs = job.getLibraryDirs();
  for (size_t i = 0; i < dirs.size(); i++)
  {
    LOG_INFO("lib dir: " << dirs[i]);
  }

  job.addFile(ConfPath::search("hoot.json").toStdString());

  // if GDAL isn't installed on all nodes, then we'll need to copy over the projection info.
  QString gdalData = QString(getenv("GDAL_DATA"));
  if (gdalData != "")
  {
    QDir gdalDir(gdalData);
    if (gdalDir.exists() == false)
    {
      LOG_WARN("Could not find GDAL_DIR: " << gdalData);
    }
    else
    {
      QStringList filters;
      filters << "*.csv";
      QFileInfoList fil = gdalDir.entryInfoList(filters, QDir::Files);
      for (int i = 0; i < fil.size(); i++)
      {
        LOG_INFO("Adding GDAL_DATA file: " << fil[i].absoluteFilePath());
        job.addFile(fil[i].absoluteFilePath().toStdString());
      }
    }
  }


  // This library will be used to provide mapper/reducer classes and anything else referenced
  // by the factory.
  job.addPlugin(getenv("HOOT_HOME") + string("/lib/libHootHadoop.so.1"));

  // serialize all the configuration settings.
  job.getConfiguration().set(settingsConfKey().toStdString(),
                             conf().toString().toUtf8().constData());

  _addDefaultJobSettings(job);

  QStringList fileDeps = conf().getList(fileDepsKey(), QStringList());
  for (int i = 0; i < fileDeps.size(); i++)
  {
    job.addFile(fileDeps[i].toStdString());
  }

  // conflation runs can go for a _long_ time. Setting timeout to 6 hours.
  job.getConfiguration().setInt("mapred.task.timeout", 6 * 3600 * 1000);

  // run the job.
  job.run();
}