Beispiel #1
0
void MapStatsMapper::_writeStats(HadoopPipes::MapContext& context, const MapStats& stats)
{
  LOG_INFO("node count: " << _nodeCount);
  LOG_INFO("way count: " << _wayCount);
  if (stats.isValid())
  {
    pp::Hdfs fs;
    int partition = context.getJobConf()->getInt("mapred.task.partition");
    string workDir = context.getJobConf()->get("mapred.work.output.dir");

    LOG_INFO("Stats: " << stats.toString());

    QString path = QString("%1/part-%2.stats").
        arg(QString::fromStdString(workDir)).
        arg(partition, 5, 10, QChar('0'));

    LOG_INFO("Writing to: " << path);
    boost::shared_ptr<ostream> osStats(fs.create(path.toStdString()));

    stats.write(*osStats);
  }
  else
  {
    LOG_INFO("Stats are not valid.");
  }
}
Beispiel #2
0
void WayJoinDriver::joinPointsToWays(QString input, QString out)
{
  // create a job
  Job job;

  job.setVerbose(Log::getInstance().getLevel() <= Log::Debug);
  // set the name
  job.setName("WayJoin2 " + input.toStdString());

  // set the input/output
  LOG_INFO("input: " << input);
  job.setInput(input.toStdString());
  job.setOutput(out.toStdString());

  // Pass the min/max values as parameters to the job.
  MapStats stats;
  stats.readDir(input);
  stats.write(job.getConfiguration());
  LOG_INFO("MapStats: " << stats.toString().toStdString());

  // Pass the maximum way size as a parameter to the job.
  job.getConfiguration().setDouble("hoot.max.way.size", _maxWaySize);
  // be nice and don't start the reduce tasks until most of the map tasks are done.
  job.getConfiguration().setDouble("mapred.reduce.slowstart.completed.maps", 0.98);

  job.getConfiguration().setInt(WayJoin2Mapper::elementStatusKey(), _newStatus.getEnum());
  job.getConfiguration().setLong(WayJoin2Mapper::nodeIdDeltaKey(), _nodeIdDelta);
  job.getConfiguration().setLong(WayJoin2Mapper::relationIdDeltaKey(), _relationIdDelta);
  job.getConfiguration().setLong(WayJoin2Mapper::wayIdDeltaKey(), _wayIdDelta);

  // setup the mapper and reducer classes.
  job.setMapperClass(WayJoin2Mapper::className());
  job.setReducerClass(WayJoin2Reducer::className());
  job.setInputFormatClass(WayJoin2InputFormat::className());
  job.setRecordReaderClass(WayJoin2RecordReader::className());
  job.setRecordWriterClass(PbfRecordWriter::className());

  // Adds all libraries in this directory to the job.
  job.addLibraryDirs(ConfigOptions().getHootHadoopLibpath());

  job.addFile(ConfPath::search("hoot.json").toStdString());

  // This library will be used to provide mapper/reducer classes and anything else referenced
  // by the factory.
  job.addPlugin(getenv("HOOT_HOME") + string("/lib/libHootHadoop.so.1"));

  _addDefaultJobSettings(job);

  // run the job.
  job.run();
}