void PaintNodesDriver::_loadOutputDir(const QString& output) { int h = _height; int w = _width; LOG_INFO("w: " << _width << " h: " << _height); _c = cv::Mat(cvSize(_width, _height), CV_32SC1); for (int py = 0; py < h; py++) { int32_t* row1 = _c.ptr<int32_t>(py); for (int px = 0; px < w; px++) { row1[px] = 0; } } Hdfs fs; vector<FileStatus> status = fs.listStatus(output.toStdString()); for (size_t i = 0; i < status.size(); i++) { QString fn = QString::fromStdString(status[i].getPath()); if (fn.endsWith(".csq")) { _loadPartFile(fn); } } }
void WriteOsmSqlStatementsReducer::_writeElementCounts() { boost::shared_ptr<Configuration> config(HadoopPipesUtils::toConfiguration(_context->getJobConf())); //if it doesn't already exist, add a dir under our main output dir to store the aux element count //files in Hdfs fs; const string mainOutputDir = config->get("mapred.output.dir"); const string auxDir = mainOutputDir + "/elementCounts"; if (!fs.exists(auxDir)) { fs.mkdirs(auxDir); } const string outputFile = auxDir + "/elementCount-" + UuidHelper::createUuid().toString().toStdString(); boost::shared_ptr<ostream> out(fs.create(outputFile)); if (!out->good()) { throw Exception("Output stream is not good."); } //write out the element counts for this reduce task to be manually summed up by the driver LOG_VART(_elementCounts["nodes"]); const QString nodeCntStr = "nodes;" + QString::number(_elementCounts["nodes"]) + "\n"; out->write(nodeCntStr.toLatin1().data(), nodeCntStr.toLatin1().size()); LOG_VART(_elementCounts["ways"]); const QString wayCntStr = "ways;" + QString::number(_elementCounts["ways"]) + "\n"; out->write(wayCntStr.toLatin1().data(), wayCntStr.toLatin1().size()); LOG_VART(_elementCounts["relations"]); const QString relationCntStr = "relations;" + QString::number(_elementCounts["relations"]) + "\n"; out->write(relationCntStr.toLatin1().data(), relationCntStr.toLatin1().size()); _elementCounts["nodes"] = 0; _elementCounts["ways"] = 0; _elementCounts["relations"] = 0; }
void testBasics() { string outDir = "test-output/WayJoin2InputFormat"; string pbfIn = outDir + "/SmallSplits.pbf"; string csqIn = outDir + "/sample.csq"; Hdfs fs; if (fs.exists(pbfIn) == false) { fs.copyFromLocal("test-files/io/SmallSplits.pbf", pbfIn); } // create a sequence file that looks like the output of WayJoin1Reducer WayJoin1Reducer::Value v; int64_t wid; boost::shared_ptr<ostream> sample(fs.create(csqIn)); pp::CppSeqFile::Writer writer(*sample); v.nodeId = 1; v.x = -104; v.y = 38; wid = 2; writer.appendFixed<int64_t, WayJoin1Reducer::Value>(wid, v); v.nodeId = 3; v.x = -104; v.y = 38; wid = 4; writer.appendFixed<int64_t, WayJoin1Reducer::Value>(wid, v); writer.close(); sample.reset(); // create an input format WayJoin2InputFormat uut; uut.setPath(pbfIn + "," + csqIn); // verify that the input splits look as expected. CPPUNIT_ASSERT_EQUAL(2, uut.getSplitCount()); CPPUNIT_ASSERT_EQUAL(pbfIn, uut.getSplit(0).getPath()); CPPUNIT_ASSERT_EQUAL((long long)1125, uut.getSplit(0).getStart()); CPPUNIT_ASSERT_EQUAL((long long)13490, uut.getSplit(0).getLength()); CPPUNIT_ASSERT_EQUAL(csqIn, uut.getSplit(1).getPath()); CPPUNIT_ASSERT_EQUAL((long long)0, uut.getSplit(1).getStart()); CPPUNIT_ASSERT_EQUAL((long long)100, uut.getSplit(1).getLength()); }
void PaintNodesDriver::_loadPartFile(const QString& partPath) { Hdfs fs; auto_ptr<istream> is(fs.open(partPath.toStdString())); CppSeqFile::Reader reader(*is); string s; s.resize(1024); PaintNodesReducer::Pixel pixel; int32_t sum; while (reader.nextFixed<PaintNodesReducer::Pixel, int>(pixel, sum)) { assert(pixel.x < _width); assert(pixel.y < _height); _c.ptr<int32_t>(pixel.y)[pixel.x] = sum; } }
void writeTestData(string fn, int offset) { Hdfs fs; auto_ptr<ostream> out(fs.create(fn)); CppSeqFile::Writer writer(*out); int count = 100; DummyBlock2 i1, i2; int c = offset; for (int i = 0; i < count; i++) { i1.v = c++; i2.v = c++; writer.append<DummyBlock2, DummyBlock2>(i1, i2); } writer.close(); out.reset(); }
void WayJoinDriver::calculateWayBounds(QString in, QString out) { Hdfs fs; QString tmp = "tmp/" + UuidHelper::createUuid().toString().replace("{", "").replace("}", "") + "-JoinWaysToPoints.csq"; try { joinWaysToNodes(in, tmp); joinPointsToWays(in + "," + tmp, out); fs.deletePath(tmp.toStdString(), true); } catch (const HootException&) { if (fs.exists(tmp.toStdString())) { fs.deletePath(tmp.toStdString(), true); } throw; } }
void init(const string& outDir, const QString outFile) { if (QFile::exists(outFile)) { QFile::remove(outFile); } Hdfs fs; if (fs.exists(outDir)) { fs.deletePath(outDir, true); } QDir().mkpath(QString::fromStdString(outDir)); fs.copyFromLocal( /*"test-files/DcGisRoads.pbf"*/ "test-files/conflate/unified/AllDataTypesA.osm.pbf", outDir + "/input.osm.pbf"); //init db ServicesDbTestUtils::deleteDataFromOsmApiTestDatabase(); const QString scriptDir = "test-files/servicesdb"; ApiDb::execSqlFile(ServicesDbTestUtils::getOsmApiDbUrl().toString(), scriptDir + "/users.sql"); }
void testAll() { srand(0); OsmMap::resetCounters(); Settings::getInstance().clear(); conf().set(ConfigOptions().getUuidHelperRepeatableKey(), true); conf().set(ConfigOptions().getUnifyOptimizerTimeLimitKey(), -1); string outDir = "test-output/hadoop/HadoopTileWorkerTest/"; Hdfs fs; if (fs.exists(outDir)) { fs.deletePath(outDir, true); } fs.copyFromLocal("test-files/DcTigerRoads.pbf", outDir + "in1.pbf/DcTigerRoads.pbf"); fs.copyFromLocal("test-files/DcGisRoads.pbf", outDir + "in2.pbf/DcGisRoads.pbf"); shared_ptr<TileWorker> worker(new HadoopTileWorker()); TileConflator uut(worker); // ~240m uut.setBuffer(8.0 / 3600.0); uut.setMaxNodesPerBox(5000); uut.setSources(QString::fromStdString(outDir) + "in1.pbf", QString::fromStdString(outDir) + "in2.pbf"); uut.conflate(QString::fromStdString(outDir) + "HadoopTileWorkerTest.pbf"); shared_ptr<OsmMap> map(new OsmMap); PbfReader reader(true); reader.setUseFileStatus(true); std::vector<FileStatus> status = fs.listStatus(outDir + "HadoopTileWorkerTest.pbf", true); for (size_t i = 0; i < status.size(); i++) { const string& path = status[i].getPath(); LOG_INFO(path); if (QString::fromStdString(path).endsWith(".pbf")) { shared_ptr<istream> is(fs.open(path)); reader.parse(is.get(), map); } } QDir().mkpath(QString::fromStdString(outDir)); OsmWriter writer; writer.setIncludeHootInfo(true); writer.write(map, QString::fromStdString(outDir + "/result.osm")); HOOT_FILE_EQUALS("test-files/hadoop/HadoopTileWorkerTest/result.osm", "test-output/hadoop/HadoopTileWorkerTest/result.osm"); }
void testAll() { string outDir = "test-output/hadoop/HadoopTileWorker2Test/"; Hdfs fs; if (fs.exists(outDir)) { fs.deletePath(outDir, true); } fs.copyFromLocal("test-files/DcTigerRoads.pbf", outDir + "in1.pbf/DcTigerRoads.pbf"); fs.copyFromLocal("test-files/DcGisRoads.pbf", outDir + "in2.pbf/DcGisRoads.pbf"); shared_ptr<TileWorker2> worker(new HadoopTileWorker2()); FourPassManager uut(worker); // ~240m uut.setBuffer(8.0 / 3600.0); uut.setMaxNodesPerBox(5000); uut.setSources(QString::fromStdString(outDir) + "in1.pbf", QString::fromStdString(outDir) + "in2.pbf"); Envelope env(-77.039, -77.033, 38.892, 38.896); shared_ptr<OpList> op(new OpList()); op->addOp(shared_ptr<OsmMapOperation>(new MapCropper(env))); op->addOp(shared_ptr<OsmMapOperation>(new MergeNearbyNodes(10))); uut.setOperation(op); uut.apply(QString::fromStdString(outDir) + "HadoopTileWorker2Test.pbf"); shared_ptr<OsmMap> map(new OsmMap); PbfReader reader(true); reader.setUseFileStatus(true); std::vector<FileStatus> status = fs.listStatus(outDir + "HadoopTileWorker2Test.pbf"); for (size_t i = 0; i < status.size(); i++) { const string& path = status[i].getPath(); LOG_INFO(path); if (QString::fromStdString(path).endsWith(".pbf")) { shared_ptr<istream> is(fs.open(path)); reader.parse(is.get(), map); } } QDir().mkpath(QString::fromStdString(outDir)); OsmWriter writer; writer.setIncludeHootInfo(true); writer.write(map, QString::fromStdString(outDir + "/result.osm")); HOOT_FILE_EQUALS("test-files/hadoop/HadoopTileWorker2Test/result.osm", "test-output/hadoop/HadoopTileWorker2Test/result.osm"); }
void testJob() { LOG_INFO("Starting."); string outDir = "test-output/hadoop/PaintNodesDriverTest/"; Hdfs fs; if (fs.exists(outDir)) { fs.deletePath(outDir, true); } fs.copyFromLocal("test-files/io/SmallSplits.pbf", outDir + "input/SmallSplits.pbf"); Envelope e(-180, 180, -90, 90); double pixelSize = 0.1; { PaintNodesDriver uut; const cv::Mat& m = uut.calculateDensity(e, pixelSize, QString::fromStdString(outDir) + "input"); CPPUNIT_ASSERT_EQUAL(3601, m.cols); CPPUNIT_ASSERT_EQUAL(1801, m.rows); //CPPUNIT_ASSERT_EQUAL(9, m.ptr<int>(1288)[750]); //CPPUNIT_ASSERT_EQUAL(27, m.ptr<int>(1288)[751]); long sum = 0; for (int py = 0; py < m.rows; py++) { const int32_t* row1 = m.ptr<int32_t>(py); for (int px = 0; px < m.cols; px++) { sum += row1[px]; if (row1[px] > 0) { LOG_INFO("px: " << px << " py: " << py << " v: " << row1[px]); } } } CPPUNIT_ASSERT_EQUAL(36l, sum); } // does it work when cached? { PaintNodesDriver uut; const cv::Mat& m = uut.calculateDensity(e, pixelSize, QString::fromStdString(outDir) + "input"); CPPUNIT_ASSERT_EQUAL(3601, m.cols); CPPUNIT_ASSERT_EQUAL(1801, m.rows); //CPPUNIT_ASSERT_EQUAL(9, m.ptr<int>(1288)[750]); //CPPUNIT_ASSERT_EQUAL(27, m.ptr<int>(1288)[751]); long sum = 0; for (int py = 0; py < m.rows; py++) { const int32_t* row1 = m.ptr<int32_t>(py); for (int px = 0; px < m.cols; px++) { sum += row1[px]; if (row1[px] > 0) { LOG_INFO("px: " << px << " py: " << py << " v: " << row1[px]); } } } CPPUNIT_ASSERT_EQUAL(36l, sum); } }
void testWriteRead() { Hdfs fs; _foundKey.resize(400); _foundValue.resize(400); for (size_t i = 0; i < _foundKey.size(); i++) { _foundKey[i] = false; _foundValue[i] = false; } string inDir = "test-output/CppSeqFileRecordWriterTest/input/"; string outDir = "test-output/CppSeqFileRecordWriterTest/output/"; if (fs.exists(inDir)) { fs.deletePath(inDir, true); } fs.mkdirs(inDir); writeTestData(inDir + "in1.csq", 0); writeTestData(inDir + "in2.csq", 200); // create a job Job job; // set the name job.setName("CppSeqFileRecordWriterTest"); // set the input/output job.setInput(inDir); job.setOutput(outDir); job.setNumReduceTask(2); job.setInputFormatClass(FileInputFormat::className()); job.setRecordReaderClass(CppSeqFileRecordReader::className()); job.setRecordWriterClass(CppSeqFileRecordWriter::className()); // Adds all libraries in this directory to the job. job.addLibraryDir(getenv("HADOOP_HOME") + string("/c++/Linux-amd64-64/lib/")); job.addLibraryDir(getenv("PRETTY_PIPES_HOME") + string("/lib/")); // This library will be used to provide mapper/reducer classes and anything else referenced // by the factory. job.addPlugin(getenv("PRETTY_PIPES_HOME") + string("/lib/libPrettyPipes.so")); // run the job. job.setJobTracker("local"); job.run(); FileInputFormat fif; Configuration conf; conf.set("mapred.input.dir", outDir); fif.setConfiguration(conf); CPPUNIT_ASSERT_EQUAL(2, fif.getSplitCount()); verifyOutput(fif, 0); verifyOutput(fif, 1); for (size_t i = 0; i < _foundKey.size(); i++) { if (i % 2 == 0) { CPPUNIT_ASSERT_EQUAL(true, (bool)_foundKey[i]); CPPUNIT_ASSERT_EQUAL(false, (bool)_foundValue[i]); } else { CPPUNIT_ASSERT_EQUAL(false, (bool)_foundKey[i]); CPPUNIT_ASSERT_EQUAL(true, (bool)_foundValue[i]); } } }
const cv::Mat& PaintNodesDriver::calculateDensity(const Envelope& e, double pixelSize, QString input) { Hdfs fs; if (fs.exists(input.toStdString()) == false) { throw HootException("Input file does not exist."); } _width = ceil(e.getWidth() / pixelSize) + 1; _height = ceil(e.getHeight() / pixelSize) + 1; QString cacheName = QString("%1/density-%2x%3").arg(input).arg(_width).arg(_height); // if we've already calculated and cached the values. if (fs.exists(cacheName.toStdString())) { LOG_INFO("Loading cache file " << cacheName); LOG_INFO("If you want to recalculate the cache then run this first: "); LOG_INFO(" hadoop fs -rmr " << cacheName); _loadOutputDir(cacheName); } else { // create a job Job job; // set the name job.setName("Paint Nodes " + input.toStdString()); fs.mkdirs("tmp"); QString output = "tmp/" + QUuid::createUuid().toString().replace("{", "").replace("}", "") + "-PaintNodes"; // set the input/output job.setInput(input.toStdString()); job.setOutput(output.toStdString()); // be nice and don't start the reduce tasks until most of the map tasks are done. job.getConfiguration().setDouble("mapred.reduce.slowstart.completed.maps", 0.98); job.getConfiguration().set("hoot.envelope", MapStats::toString(e)); job.getConfiguration().setDouble("hoot.pixel.size", pixelSize); // setup the mapper and reducer classes. job.setMapperClass(PaintNodesMapper::className()); job.setReducerClass(PaintNodesReducer::className()); job.setInputFormatClass(PbfInputFormat::className()); job.setRecordReaderClass(PbfRecordReader::className()); job.setRecordWriterClass(CppSeqFileRecordWriter::className()); // Adds all libraries in this directory to the job. job.addLibraryDirs(conf().getList("hoot.hadoop.libpath", "${HOOT_HOME}/lib/;${HOOT_HOME}/local/lib/;${HADOOP_HOME}/c++/Linux-amd64-64/lib/;" "${HOOT_HOME}/pretty-pipes/lib/")); job.addFile(ConfPath::search("hoot.json").toStdString()); // This library will be used to provide mapper/reducer classes and anything else referenced // by the factory. job.addPlugin(getenv("HOOT_HOME") + string("/lib/libHootHadoop.so.1")); _addDefaultJobSettings(job); // run the job. job.run(); _loadOutputDir(output); // if the input is a directory if (fs.getFileStatus(input.toStdString()).isDir()) { // store the density info for later use. fs.rename(output.toStdString(), cacheName.toStdString()); } else { fs.deletePath(output.toStdString(), true); } } return _c; }
void testBasics() { PbfRecordWriter uut; // makes for a consistent output. uut.setCompressionLevel(0); uut.includeVersion(false); Hdfs fs; if (fs.exists("test-output/PbfRecordWriterTest")) { fs.deletePath("test-output/PbfRecordWriterTest", true); } uut.setReduceContext(2, "test-output/PbfRecordWriterTest"); OsmMapPtr map(new OsmMap()); NodePtr n(new Node(Status::Unknown1, 72, 42.0, 3.14159, 7.1)); n->setTag("hello", "world"); n->setTag("note", "test tag"); map->addNode(n); WayPtr w(new Way(Status::Unknown1, 42, 1.7)); vector<long> nodes; nodes.push_back(1); nodes.push_back(3); nodes.push_back(5); nodes.push_back(7); nodes.push_back(11); w->addNodes(nodes); w->setTag("hello", "world"); w->setTag("highway", "road"); w->setTag("note", "test tag"); map->addWay(w); uut.emitRecord(map); map->clear(); n.reset(new Node(Status::Unknown1, 73, 20, 30, 15)); map->addNode(n); w.reset(new Way(Status::Unknown1, 43, 1.7)); nodes.clear(); nodes.push_back(73); w->addNodes(nodes); map->addWay(w); uut.emitRecord(map); uut.close(); const unsigned char data[] = { 0, 0, 0, 13, 10, 9, 79, 83, 77, 72, 101, 97, 100, 101, 114, 24, 56, 16, 41, 26, 52, 120, 1, 1, 41, 0, 214, 255, 34, 14, 79, 115, 109, 83, 99, 104, 101, 109, 97, 45, 86, 48, 46, 54, 34, 10, 68, 101, 110, 115, 101, 78, 111, 100, 101, 115, 130, 1, 10, 72, 111, 111, 116, 101, 110, 97, 110, 110, 121, 9, 172, 13, 140, 0, 0, 0, 12, 10, 7, 79, 83, 77, 68, 97, 116, 97, 24, 221, 1, 16, 204, 1, 26, 215, 1, 120, 1, 1, 204, 0, 51, 255, 10, 89, 10, 0, 10, 4, 110, 111, 116, 101, 10, 8, 116, 101, 115, 116, 32, 116, 97, 103, 10, 5, 104, 101, 108, 108, 111, 10, 5, 119, 111, 114, 108, 100, 10, 14, 101, 114, 114, 111, 114, 58, 99, 105, 114, 99, 117, 108, 97, 114, 10, 3, 55, 46, 49, 10, 11, 104, 111, 111, 116, 58, 115, 116, 97, 116, 117, 115, 10, 1, 49, 10, 7, 104, 105, 103, 104, 119, 97, 121, 10, 4, 114, 111, 97, 100, 10, 3, 49, 46, 55, 18, 42, 18, 40, 10, 3, 144, 1, 2, 66, 9, 184, 249, 250, 29, 200, 146, 146, 128, 2, 74, 10, 128, 196, 197, 144, 3, 255, 187, 231, 209, 1, 82, 10, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 18, 67, 26, 23, 8, 42, 18, 5, 1, 3, 9, 5, 7, 26, 5, 2, 4, 10, 11, 8, 66, 5, 2, 4, 4, 4, 8, 26, 40, 8, 43, 18, 2, 5, 7, 26, 2, 11, 8, 66, 2, 146, 1, 74, 24, 8, 128, 136, 222, 190, 1, 16, 128, 136, 222, 190, 1, 24, 128, 140, 141, 158, 2, 32, 128, 140, 141, 158, 2, 109, 93, 53, 122}; size_t dataSize = 310; boost::shared_ptr<istream> strm(fs.open("test-output/PbfRecordWriterTest/part-00002r.pbf")); unsigned char* buffer = new unsigned char[dataSize * 2]; strm->read((char*)buffer, dataSize * 2); CPPUNIT_ASSERT_EQUAL(dataSize, (size_t)strm->gcount()); for (size_t i = 0; i < dataSize; i++) { CPPUNIT_ASSERT_EQUAL((int)(unsigned char)data[i], (int)(unsigned char)buffer[i]); } }