void MapStatsMapper::_writeStats(HadoopPipes::MapContext& context, const MapStats& stats) { LOG_INFO("node count: " << _nodeCount); LOG_INFO("way count: " << _wayCount); if (stats.isValid()) { pp::Hdfs fs; int partition = context.getJobConf()->getInt("mapred.task.partition"); string workDir = context.getJobConf()->get("mapred.work.output.dir"); LOG_INFO("Stats: " << stats.toString()); QString path = QString("%1/part-%2.stats"). arg(QString::fromStdString(workDir)). arg(partition, 5, 10, QChar('0')); LOG_INFO("Writing to: " << path); boost::shared_ptr<ostream> osStats(fs.create(path.toStdString())); stats.write(*osStats); } else { LOG_INFO("Stats are not valid."); } }
void WayJoinDriver::joinPointsToWays(QString input, QString out) { // create a job Job job; job.setVerbose(Log::getInstance().getLevel() <= Log::Debug); // set the name job.setName("WayJoin2 " + input.toStdString()); // set the input/output LOG_INFO("input: " << input); job.setInput(input.toStdString()); job.setOutput(out.toStdString()); // Pass the min/max values as parameters to the job. MapStats stats; stats.readDir(input); stats.write(job.getConfiguration()); LOG_INFO("MapStats: " << stats.toString().toStdString()); // Pass the maximum way size as a parameter to the job. job.getConfiguration().setDouble("hoot.max.way.size", _maxWaySize); // be nice and don't start the reduce tasks until most of the map tasks are done. job.getConfiguration().setDouble("mapred.reduce.slowstart.completed.maps", 0.98); job.getConfiguration().setInt(WayJoin2Mapper::elementStatusKey(), _newStatus.getEnum()); job.getConfiguration().setLong(WayJoin2Mapper::nodeIdDeltaKey(), _nodeIdDelta); job.getConfiguration().setLong(WayJoin2Mapper::relationIdDeltaKey(), _relationIdDelta); job.getConfiguration().setLong(WayJoin2Mapper::wayIdDeltaKey(), _wayIdDelta); // setup the mapper and reducer classes. job.setMapperClass(WayJoin2Mapper::className()); job.setReducerClass(WayJoin2Reducer::className()); job.setInputFormatClass(WayJoin2InputFormat::className()); job.setRecordReaderClass(WayJoin2RecordReader::className()); job.setRecordWriterClass(PbfRecordWriter::className()); // Adds all libraries in this directory to the job. job.addLibraryDirs(ConfigOptions().getHootHadoopLibpath()); job.addFile(ConfPath::search("hoot.json").toStdString()); // This library will be used to provide mapper/reducer classes and anything else referenced // by the factory. job.addPlugin(getenv("HOOT_HOME") + string("/lib/libHootHadoop.so.1")); _addDefaultJobSettings(job); // run the job. job.run(); }
void TileOpDriver::apply(QString in, vector<Envelope> envelopes, double buffer, QString out) { // create a job pp::Job job; job.setVerbose(Log::getInstance().getLevel() <= Log::Debug); // set the name job.setName("TileOpDriver"); // be nice and don't start the reduce tasks until most of the map tasks are done. job.getConfiguration().setDouble("mapred.reduce.slowstart.completed.maps", 0.98); // set the input/output pp::Hdfs fs; job.setInput(fs.getAbsolutePath(in.toStdString())); job.setOutput(fs.getAbsolutePath(out.toStdString())); if (_op == 0) { throw HootException("You must specify an operation."); } stringstream ss; ObjectOutputStream oos(ss); oos.writeObject(*_op); oos.flush(); LOG_INFO("oos size: " << ss.str().size()); job.getConfiguration().setBytes(TileOpReducer::opKey(), ss.str()); job.getConfiguration().set(TileOpMapper::envelopesKey(), _toString(envelopes)); job.getConfiguration().set(TileOpMapper::replacementsKey(), fs.getAbsolutePath(in.toStdString())); job.getConfiguration().setDouble(TileOpMapper::maxWaySizeKey(), buffer); job.getConfiguration().setDouble(TileOpMapper::bufferKey(), buffer); // read the max ids from in and write them to the configuration MapStats stats; stats.readDir(in); stats.write(job.getConfiguration()); // setup the mapper and reducer classes. job.setMapperClass(TileOpMapper::className()); job.setReducerClass(TileOpReducer::className()); job.setInputFormatClass(PbfInputFormat::className()); job.setRecordReaderClass(PbfRecordReader::className()); job.setRecordWriterClass(PbfRecordWriter::className()); // Adds all libraries in this directory to the job. job.addLibraryDirs(conf().getList("hoot.hadoop.libpath", "${HOOT_HOME}/lib/;${HOOT_HOME}/local/lib/;${HADOOP_HOME}/c++/Linux-amd64-64/lib/;" "${HOOT_HOME}/pretty-pipes/lib/")); LOG_INFO("Hoot home: " << conf().getString("foo", "${HOOT_HOME}")); const std::vector<std::string>& dirs = job.getLibraryDirs(); for (size_t i = 0; i < dirs.size(); i++) { LOG_INFO("lib dir: " << dirs[i]); } job.addFile(ConfPath::search("hoot.json").toStdString()); // if GDAL isn't installed on all nodes, then we'll need to copy over the projection info. QString gdalData = QString(getenv("GDAL_DATA")); if (gdalData != "") { QDir gdalDir(gdalData); if (gdalDir.exists() == false) { LOG_WARN("Could not find GDAL_DIR: " << gdalData); } else { QStringList filters; filters << "*.csv"; QFileInfoList fil = gdalDir.entryInfoList(filters, QDir::Files); for (int i = 0; i < fil.size(); i++) { LOG_INFO("Adding GDAL_DATA file: " << fil[i].absoluteFilePath()); job.addFile(fil[i].absoluteFilePath().toStdString()); } } } // This library will be used to provide mapper/reducer classes and anything else referenced // by the factory. job.addPlugin(getenv("HOOT_HOME") + string("/lib/libHootHadoop.so.1")); // serialize all the configuration settings. job.getConfiguration().set(settingsConfKey().toStdString(), conf().toString().toUtf8().constData()); _addDefaultJobSettings(job); QStringList fileDeps = conf().getList(fileDepsKey(), QStringList()); for (int i = 0; i < fileDeps.size(); i++) { job.addFile(fileDeps[i].toStdString()); } // conflation runs can go for a _long_ time. Setting timeout to 6 hours. job.getConfiguration().setInt("mapred.task.timeout", 6 * 3600 * 1000); // run the job. job.run(); }