Ejemplo n.º 1
0
void PaintNodesDriver::_loadOutputDir(const QString& output)
{
  int h = _height;
  int w = _width;
  LOG_INFO("w: " << _width << " h: " << _height);
  _c = cv::Mat(cvSize(_width, _height), CV_32SC1);

  for (int py = 0; py < h; py++)
  {
    int32_t* row1 = _c.ptr<int32_t>(py);

    for (int px = 0; px < w; px++)
    {
      row1[px] = 0;
    }
  }

  Hdfs fs;

  vector<FileStatus> status = fs.listStatus(output.toStdString());
  for (size_t i = 0; i < status.size(); i++)
  {
    QString fn = QString::fromStdString(status[i].getPath());
    if (fn.endsWith(".csq"))
    {
      _loadPartFile(fn);
    }
  }
}
Ejemplo n.º 2
0
void WriteOsmSqlStatementsReducer::_writeElementCounts()
{
  boost::shared_ptr<Configuration> config(HadoopPipesUtils::toConfiguration(_context->getJobConf()));
  //if it doesn't already exist, add a dir under our main output dir to store the aux element count
  //files in
  Hdfs fs;
  const string mainOutputDir = config->get("mapred.output.dir");
  const string auxDir = mainOutputDir + "/elementCounts";
  if (!fs.exists(auxDir))
  {
    fs.mkdirs(auxDir);
  }
  const string outputFile =
    auxDir + "/elementCount-" + UuidHelper::createUuid().toString().toStdString();
  boost::shared_ptr<ostream> out(fs.create(outputFile));
  if (!out->good())
  {
    throw Exception("Output stream is not good.");
  }

  //write out the element counts for this reduce task to be manually summed up by the driver
  LOG_VART(_elementCounts["nodes"]);
  const QString nodeCntStr = "nodes;" + QString::number(_elementCounts["nodes"]) + "\n";
  out->write(nodeCntStr.toLatin1().data(), nodeCntStr.toLatin1().size());
  LOG_VART(_elementCounts["ways"]);
  const QString wayCntStr = "ways;" + QString::number(_elementCounts["ways"]) + "\n";
  out->write(wayCntStr.toLatin1().data(), wayCntStr.toLatin1().size());
  LOG_VART(_elementCounts["relations"]);
  const QString relationCntStr = "relations;" + QString::number(_elementCounts["relations"]) + "\n";
  out->write(relationCntStr.toLatin1().data(), relationCntStr.toLatin1().size());

  _elementCounts["nodes"] = 0;
  _elementCounts["ways"] = 0;
  _elementCounts["relations"] = 0;
}
Ejemplo n.º 3
0
  void testBasics()
  {
    string outDir = "test-output/WayJoin2InputFormat";
    string pbfIn = outDir + "/SmallSplits.pbf";
    string csqIn = outDir + "/sample.csq";
    Hdfs fs;
    if (fs.exists(pbfIn) == false)
    {
      fs.copyFromLocal("test-files/io/SmallSplits.pbf", pbfIn);
    }

    // create a sequence file that looks like the output of WayJoin1Reducer
    WayJoin1Reducer::Value v;
    int64_t wid;

    boost::shared_ptr<ostream> sample(fs.create(csqIn));

    pp::CppSeqFile::Writer writer(*sample);

    v.nodeId = 1;
    v.x = -104;
    v.y = 38;
    wid = 2;
    writer.appendFixed<int64_t, WayJoin1Reducer::Value>(wid, v);

    v.nodeId = 3;
    v.x = -104;
    v.y = 38;
    wid = 4;
    writer.appendFixed<int64_t, WayJoin1Reducer::Value>(wid, v);

    writer.close();
    sample.reset();

    // create an input format
    WayJoin2InputFormat uut;
    uut.setPath(pbfIn + "," + csqIn);

    // verify that the input splits look as expected.
    CPPUNIT_ASSERT_EQUAL(2, uut.getSplitCount());

    CPPUNIT_ASSERT_EQUAL(pbfIn, uut.getSplit(0).getPath());
    CPPUNIT_ASSERT_EQUAL((long long)1125, uut.getSplit(0).getStart());
    CPPUNIT_ASSERT_EQUAL((long long)13490, uut.getSplit(0).getLength());

    CPPUNIT_ASSERT_EQUAL(csqIn, uut.getSplit(1).getPath());
    CPPUNIT_ASSERT_EQUAL((long long)0, uut.getSplit(1).getStart());
    CPPUNIT_ASSERT_EQUAL((long long)100, uut.getSplit(1).getLength());
  }
Ejemplo n.º 4
0
void PaintNodesDriver::_loadPartFile(const QString& partPath)
{
  Hdfs fs;
  auto_ptr<istream> is(fs.open(partPath.toStdString()));
  CppSeqFile::Reader reader(*is);

  string s;
  s.resize(1024);
  PaintNodesReducer::Pixel pixel;
  int32_t sum;
  while (reader.nextFixed<PaintNodesReducer::Pixel, int>(pixel, sum))
  {
    assert(pixel.x < _width);
    assert(pixel.y < _height);
    _c.ptr<int32_t>(pixel.y)[pixel.x] = sum;
  }
}
  void writeTestData(string fn, int offset)
  {
    Hdfs fs;

    auto_ptr<ostream> out(fs.create(fn));
    CppSeqFile::Writer writer(*out);

    int count = 100;
    DummyBlock2 i1, i2;
    int c = offset;
    for (int i = 0; i < count; i++)
    {
      i1.v = c++;
      i2.v = c++;
      writer.append<DummyBlock2, DummyBlock2>(i1, i2);
    }
    writer.close();
    out.reset();
  }
Ejemplo n.º 6
0
void WayJoinDriver::calculateWayBounds(QString in, QString out)
{
  Hdfs fs;
  QString tmp = "tmp/" + UuidHelper::createUuid().toString().replace("{", "").replace("}", "") +
      "-JoinWaysToPoints.csq";
  try
  {
    joinWaysToNodes(in, tmp);

    joinPointsToWays(in + "," + tmp, out);

    fs.deletePath(tmp.toStdString(), true);
  }
  catch (const HootException&)
  {
    if (fs.exists(tmp.toStdString()))
    {
      fs.deletePath(tmp.toStdString(), true);
    }
    throw;
  }
}
Ejemplo n.º 7
0
  void init(const string& outDir, const QString outFile)
  {
    if (QFile::exists(outFile))
    {
      QFile::remove(outFile);
    }
    Hdfs fs;
    if (fs.exists(outDir))
    {
      fs.deletePath(outDir, true);
    }
    QDir().mkpath(QString::fromStdString(outDir));
    fs.copyFromLocal(
      /*"test-files/DcGisRoads.pbf"*/
      "test-files/conflate/unified/AllDataTypesA.osm.pbf",
      outDir + "/input.osm.pbf");

    //init db
    ServicesDbTestUtils::deleteDataFromOsmApiTestDatabase();
    const QString scriptDir = "test-files/servicesdb";
    ApiDb::execSqlFile(ServicesDbTestUtils::getOsmApiDbUrl().toString(), scriptDir + "/users.sql");
  }
Ejemplo n.º 8
0
  void testAll()
  {
    srand(0);
    OsmMap::resetCounters();
    Settings::getInstance().clear();
    conf().set(ConfigOptions().getUuidHelperRepeatableKey(), true);
    conf().set(ConfigOptions().getUnifyOptimizerTimeLimitKey(), -1);

    string outDir = "test-output/hadoop/HadoopTileWorkerTest/";
    Hdfs fs;
    if (fs.exists(outDir))
    {
      fs.deletePath(outDir, true);
    }
    fs.copyFromLocal("test-files/DcTigerRoads.pbf", outDir + "in1.pbf/DcTigerRoads.pbf");
    fs.copyFromLocal("test-files/DcGisRoads.pbf", outDir + "in2.pbf/DcGisRoads.pbf");

    shared_ptr<TileWorker> worker(new HadoopTileWorker());
    TileConflator uut(worker);
    // ~240m
    uut.setBuffer(8.0 / 3600.0);
    uut.setMaxNodesPerBox(5000);

    uut.setSources(QString::fromStdString(outDir) + "in1.pbf",
                   QString::fromStdString(outDir) + "in2.pbf");

    uut.conflate(QString::fromStdString(outDir) + "HadoopTileWorkerTest.pbf");

    shared_ptr<OsmMap> map(new OsmMap);
    PbfReader reader(true);
    reader.setUseFileStatus(true);
    std::vector<FileStatus> status = fs.listStatus(outDir + "HadoopTileWorkerTest.pbf", true);
    for (size_t i = 0; i < status.size(); i++)
    {
      const string& path = status[i].getPath();
      LOG_INFO(path);
      if (QString::fromStdString(path).endsWith(".pbf"))
      {
        shared_ptr<istream> is(fs.open(path));
        reader.parse(is.get(), map);
      }
    }

    QDir().mkpath(QString::fromStdString(outDir));

    OsmWriter writer;
    writer.setIncludeHootInfo(true);
    writer.write(map, QString::fromStdString(outDir + "/result.osm"));

    HOOT_FILE_EQUALS("test-files/hadoop/HadoopTileWorkerTest/result.osm",
                     "test-output/hadoop/HadoopTileWorkerTest/result.osm");
  }
Ejemplo n.º 9
0
  void testAll()
  {
    string outDir = "test-output/hadoop/HadoopTileWorker2Test/";
    Hdfs fs;
    if (fs.exists(outDir))
    {
      fs.deletePath(outDir, true);
    }
    fs.copyFromLocal("test-files/DcTigerRoads.pbf", outDir + "in1.pbf/DcTigerRoads.pbf");
    fs.copyFromLocal("test-files/DcGisRoads.pbf", outDir + "in2.pbf/DcGisRoads.pbf");

    shared_ptr<TileWorker2> worker(new HadoopTileWorker2());
    FourPassManager uut(worker);
    // ~240m
    uut.setBuffer(8.0 / 3600.0);
    uut.setMaxNodesPerBox(5000);

    uut.setSources(QString::fromStdString(outDir) + "in1.pbf",
                   QString::fromStdString(outDir) + "in2.pbf");

    Envelope env(-77.039, -77.033, 38.892, 38.896);
    shared_ptr<OpList> op(new OpList());
    op->addOp(shared_ptr<OsmMapOperation>(new MapCropper(env)));
    op->addOp(shared_ptr<OsmMapOperation>(new MergeNearbyNodes(10)));

    uut.setOperation(op);
    uut.apply(QString::fromStdString(outDir) + "HadoopTileWorker2Test.pbf");

    shared_ptr<OsmMap> map(new OsmMap);
    PbfReader reader(true);
    reader.setUseFileStatus(true);
    std::vector<FileStatus> status = fs.listStatus(outDir + "HadoopTileWorker2Test.pbf");
    for (size_t i = 0; i < status.size(); i++)
    {
      const string& path = status[i].getPath();
      LOG_INFO(path);
      if (QString::fromStdString(path).endsWith(".pbf"))
      {
        shared_ptr<istream> is(fs.open(path));
        reader.parse(is.get(), map);
      }
    }

    QDir().mkpath(QString::fromStdString(outDir));

    OsmWriter writer;
    writer.setIncludeHootInfo(true);
    writer.write(map, QString::fromStdString(outDir + "/result.osm"));

    HOOT_FILE_EQUALS("test-files/hadoop/HadoopTileWorker2Test/result.osm",
                     "test-output/hadoop/HadoopTileWorker2Test/result.osm");
  }
Ejemplo n.º 10
0
  void testJob()
  {
    LOG_INFO("Starting.");
    string outDir = "test-output/hadoop/PaintNodesDriverTest/";

    Hdfs fs;
    if (fs.exists(outDir))
    {
      fs.deletePath(outDir, true);
    }
    fs.copyFromLocal("test-files/io/SmallSplits.pbf", outDir + "input/SmallSplits.pbf");

    Envelope e(-180, 180, -90, 90);
    double pixelSize = 0.1;

    {
      PaintNodesDriver uut;
      const cv::Mat& m = uut.calculateDensity(e, pixelSize, QString::fromStdString(outDir) + "input");

      CPPUNIT_ASSERT_EQUAL(3601, m.cols);
      CPPUNIT_ASSERT_EQUAL(1801, m.rows);
      //CPPUNIT_ASSERT_EQUAL(9, m.ptr<int>(1288)[750]);
      //CPPUNIT_ASSERT_EQUAL(27, m.ptr<int>(1288)[751]);
      long sum = 0;
      for (int py = 0; py < m.rows; py++)
      {
        const int32_t* row1 = m.ptr<int32_t>(py);

        for (int px = 0; px < m.cols; px++)
        {
          sum += row1[px];
          if (row1[px] > 0)
          {
            LOG_INFO("px: " << px << " py: " << py << " v: " << row1[px]);
          }
        }
      }
      CPPUNIT_ASSERT_EQUAL(36l, sum);
    }

    // does it work when cached?
    {
      PaintNodesDriver uut;
      const cv::Mat& m = uut.calculateDensity(e, pixelSize, QString::fromStdString(outDir) + "input");

      CPPUNIT_ASSERT_EQUAL(3601, m.cols);
      CPPUNIT_ASSERT_EQUAL(1801, m.rows);
      //CPPUNIT_ASSERT_EQUAL(9, m.ptr<int>(1288)[750]);
      //CPPUNIT_ASSERT_EQUAL(27, m.ptr<int>(1288)[751]);
      long sum = 0;
      for (int py = 0; py < m.rows; py++)
      {
        const int32_t* row1 = m.ptr<int32_t>(py);

        for (int px = 0; px < m.cols; px++)
        {
          sum += row1[px];
          if (row1[px] > 0)
          {
            LOG_INFO("px: " << px << " py: " << py << " v: " << row1[px]);
          }
        }
      }
      CPPUNIT_ASSERT_EQUAL(36l, sum);
    }
  }
  void testWriteRead()
  {
    Hdfs fs;

    _foundKey.resize(400);
    _foundValue.resize(400);
    for (size_t i = 0; i < _foundKey.size(); i++)
    {
      _foundKey[i] = false;
      _foundValue[i] = false;
    }
    string inDir = "test-output/CppSeqFileRecordWriterTest/input/";
    string outDir = "test-output/CppSeqFileRecordWriterTest/output/";
    if (fs.exists(inDir))
    {
      fs.deletePath(inDir, true);
    }
    fs.mkdirs(inDir);
    writeTestData(inDir + "in1.csq", 0);
    writeTestData(inDir + "in2.csq", 200);

    // create a job
    Job job;

    // set the name
    job.setName("CppSeqFileRecordWriterTest");

    // set the input/output
    job.setInput(inDir);
    job.setOutput(outDir);

    job.setNumReduceTask(2);

    job.setInputFormatClass(FileInputFormat::className());
    job.setRecordReaderClass(CppSeqFileRecordReader::className());
    job.setRecordWriterClass(CppSeqFileRecordWriter::className());

    // Adds all libraries in this directory to the job.
    job.addLibraryDir(getenv("HADOOP_HOME") + string("/c++/Linux-amd64-64/lib/"));
    job.addLibraryDir(getenv("PRETTY_PIPES_HOME") + string("/lib/"));

    // This library will be used to provide mapper/reducer classes and anything else referenced
    // by the factory.
    job.addPlugin(getenv("PRETTY_PIPES_HOME") + string("/lib/libPrettyPipes.so"));

    // run the job.
    job.setJobTracker("local");
    job.run();

    FileInputFormat fif;
    Configuration conf;
    conf.set("mapred.input.dir", outDir);
    fif.setConfiguration(conf);

    CPPUNIT_ASSERT_EQUAL(2, fif.getSplitCount());
    verifyOutput(fif, 0);
    verifyOutput(fif, 1);

    for (size_t i = 0; i < _foundKey.size(); i++)
    {
      if (i % 2 == 0)
      {
        CPPUNIT_ASSERT_EQUAL(true, (bool)_foundKey[i]);
        CPPUNIT_ASSERT_EQUAL(false, (bool)_foundValue[i]);
      }
      else
      {
        CPPUNIT_ASSERT_EQUAL(false, (bool)_foundKey[i]);
        CPPUNIT_ASSERT_EQUAL(true, (bool)_foundValue[i]);
      }
    }
  }
Ejemplo n.º 12
0
const cv::Mat& PaintNodesDriver::calculateDensity(const Envelope& e, double pixelSize,
  QString input)
{
  Hdfs fs;
  if (fs.exists(input.toStdString()) == false)
  {
    throw HootException("Input file does not exist.");
  }

  _width = ceil(e.getWidth() / pixelSize) + 1;
  _height = ceil(e.getHeight() / pixelSize) + 1;
  QString cacheName = QString("%1/density-%2x%3").arg(input).arg(_width).arg(_height);

  // if we've already calculated and cached the values.
  if (fs.exists(cacheName.toStdString()))
  {
    LOG_INFO("Loading cache file " << cacheName);
    LOG_INFO("If you want to recalculate the cache then run this first: ");
    LOG_INFO("  hadoop fs -rmr " << cacheName);
    _loadOutputDir(cacheName);
  }
  else
  {
    // create a job
    Job job;

    // set the name
    job.setName("Paint Nodes " + input.toStdString());

    fs.mkdirs("tmp");
    QString output = "tmp/" + QUuid::createUuid().toString().replace("{", "").replace("}", "") +
        "-PaintNodes";

    // set the input/output
    job.setInput(input.toStdString());
    job.setOutput(output.toStdString());

    // be nice and don't start the reduce tasks until most of the map tasks are done.
    job.getConfiguration().setDouble("mapred.reduce.slowstart.completed.maps", 0.98);

    job.getConfiguration().set("hoot.envelope", MapStats::toString(e));
    job.getConfiguration().setDouble("hoot.pixel.size", pixelSize);

    // setup the mapper and reducer classes.
    job.setMapperClass(PaintNodesMapper::className());
    job.setReducerClass(PaintNodesReducer::className());
    job.setInputFormatClass(PbfInputFormat::className());
    job.setRecordReaderClass(PbfRecordReader::className());
    job.setRecordWriterClass(CppSeqFileRecordWriter::className());

    // Adds all libraries in this directory to the job.
    job.addLibraryDirs(conf().getList("hoot.hadoop.libpath",
      "${HOOT_HOME}/lib/;${HOOT_HOME}/local/lib/;${HADOOP_HOME}/c++/Linux-amd64-64/lib/;"
      "${HOOT_HOME}/pretty-pipes/lib/"));

    job.addFile(ConfPath::search("hoot.json").toStdString());

    // This library will be used to provide mapper/reducer classes and anything else referenced
    // by the factory.
    job.addPlugin(getenv("HOOT_HOME") + string("/lib/libHootHadoop.so.1"));

    _addDefaultJobSettings(job);

    // run the job.
    job.run();

    _loadOutputDir(output);

    // if the input is a directory
    if (fs.getFileStatus(input.toStdString()).isDir())
    {
      // store the density info for later use.
      fs.rename(output.toStdString(), cacheName.toStdString());
    }
    else
    {
      fs.deletePath(output.toStdString(), true);
    }
  }

  return _c;
}
Ejemplo n.º 13
0
  void testBasics()
  {
    PbfRecordWriter uut;
    // makes for a consistent output.
    uut.setCompressionLevel(0);
    uut.includeVersion(false);

    Hdfs fs;
    if (fs.exists("test-output/PbfRecordWriterTest"))
    {
      fs.deletePath("test-output/PbfRecordWriterTest", true);
    }

    uut.setReduceContext(2, "test-output/PbfRecordWriterTest");
    OsmMapPtr map(new OsmMap());
    NodePtr n(new Node(Status::Unknown1, 72, 42.0, 3.14159, 7.1));
    n->setTag("hello", "world");
    n->setTag("note", "test tag");
    map->addNode(n);
    WayPtr w(new Way(Status::Unknown1, 42, 1.7));
    vector<long> nodes;
    nodes.push_back(1);
    nodes.push_back(3);
    nodes.push_back(5);
    nodes.push_back(7);
    nodes.push_back(11);
    w->addNodes(nodes);
    w->setTag("hello", "world");
    w->setTag("highway", "road");
    w->setTag("note", "test tag");
    map->addWay(w);

    uut.emitRecord(map);

    map->clear();

    n.reset(new Node(Status::Unknown1, 73, 20, 30, 15));
    map->addNode(n);
    w.reset(new Way(Status::Unknown1, 43, 1.7));
    nodes.clear();
    nodes.push_back(73);
    w->addNodes(nodes);
    map->addWay(w);

    uut.emitRecord(map);

    uut.close();

    const unsigned char data[] = {
        0,   0,   0,  13,  10,   9,  79,  83,  77,  72, 101,  97, 100, 101, 114,  24,  56,  16,
       41,  26,  52, 120,   1,   1,  41,   0, 214, 255,  34,  14,  79, 115, 109,  83,  99, 104,
      101, 109,  97,  45,  86,  48,  46,  54,  34,  10,  68, 101, 110, 115, 101,  78, 111, 100,
      101, 115, 130,   1,  10,  72, 111, 111, 116, 101, 110,  97, 110, 110, 121,   9, 172,  13,
      140,   0,   0,   0,  12,  10,   7,  79,  83,  77,  68,  97, 116,  97,  24, 221,   1,  16,
      204,   1,  26, 215,   1, 120,   1,   1, 204,   0,  51, 255,  10,  89,  10,   0,  10,   4,
      110, 111, 116, 101,  10,   8, 116, 101, 115, 116,  32, 116,  97, 103,  10,   5, 104, 101,
      108, 108, 111,  10,   5, 119, 111, 114, 108, 100,  10,  14, 101, 114, 114, 111, 114,  58,
       99, 105, 114,  99, 117, 108,  97, 114,  10,   3,  55,  46,  49,  10,  11, 104, 111, 111,
      116,  58, 115, 116,  97, 116, 117, 115,  10,   1,  49,  10,   7, 104, 105, 103, 104, 119,
       97, 121,  10,   4, 114, 111,  97, 100,  10,   3,  49,  46,  55,  18,  42,  18,  40,  10,
        3, 144,   1,   2,  66,   9, 184, 249, 250,  29, 200, 146, 146, 128,   2,  74,  10, 128,
      196, 197, 144,   3, 255, 187, 231, 209,   1,  82,  10,   1,   2,   3,   4,   5,   6,   7,
        8,   0,   0,  18,  67,  26,  23,   8,  42,  18,   5,   1,   3,   9,   5,   7,  26,   5,
        2,   4,  10,  11,   8,  66,   5,   2,   4,   4,   4,   8,  26,  40,   8,  43,  18,   2,
        5,   7,  26,   2,  11,   8,  66,   2, 146,   1,  74,  24,   8, 128, 136, 222, 190,   1,
       16, 128, 136, 222, 190,   1,  24, 128, 140, 141, 158,   2,  32, 128, 140, 141, 158,   2,
      109,  93,  53, 122};
    size_t dataSize = 310;

    boost::shared_ptr<istream> strm(fs.open("test-output/PbfRecordWriterTest/part-00002r.pbf"));
    unsigned char* buffer = new unsigned char[dataSize * 2];
    strm->read((char*)buffer, dataSize * 2);
    CPPUNIT_ASSERT_EQUAL(dataSize, (size_t)strm->gcount());

    for (size_t i = 0; i < dataSize; i++)
    {
      CPPUNIT_ASSERT_EQUAL((int)(unsigned char)data[i], (int)(unsigned char)buffer[i]);
    }

  }