Example #1
0
void WriteOsmSqlStatementsReducer::_writeElementCounts()
{
  boost::shared_ptr<Configuration> config(HadoopPipesUtils::toConfiguration(_context->getJobConf()));
  //if it doesn't already exist, add a dir under our main output dir to store the aux element count
  //files in
  Hdfs fs;
  const string mainOutputDir = config->get("mapred.output.dir");
  const string auxDir = mainOutputDir + "/elementCounts";
  if (!fs.exists(auxDir))
  {
    fs.mkdirs(auxDir);
  }
  const string outputFile =
    auxDir + "/elementCount-" + UuidHelper::createUuid().toString().toStdString();
  boost::shared_ptr<ostream> out(fs.create(outputFile));
  if (!out->good())
  {
    throw Exception("Output stream is not good.");
  }

  //write out the element counts for this reduce task to be manually summed up by the driver
  LOG_VART(_elementCounts["nodes"]);
  const QString nodeCntStr = "nodes;" + QString::number(_elementCounts["nodes"]) + "\n";
  out->write(nodeCntStr.toLatin1().data(), nodeCntStr.toLatin1().size());
  LOG_VART(_elementCounts["ways"]);
  const QString wayCntStr = "ways;" + QString::number(_elementCounts["ways"]) + "\n";
  out->write(wayCntStr.toLatin1().data(), wayCntStr.toLatin1().size());
  LOG_VART(_elementCounts["relations"]);
  const QString relationCntStr = "relations;" + QString::number(_elementCounts["relations"]) + "\n";
  out->write(relationCntStr.toLatin1().data(), relationCntStr.toLatin1().size());

  _elementCounts["nodes"] = 0;
  _elementCounts["ways"] = 0;
  _elementCounts["relations"] = 0;
}
Example #2
0
  void testBasics()
  {
    string outDir = "test-output/WayJoin2InputFormat";
    string pbfIn = outDir + "/SmallSplits.pbf";
    string csqIn = outDir + "/sample.csq";
    Hdfs fs;
    if (fs.exists(pbfIn) == false)
    {
      fs.copyFromLocal("test-files/io/SmallSplits.pbf", pbfIn);
    }

    // create a sequence file that looks like the output of WayJoin1Reducer
    WayJoin1Reducer::Value v;
    int64_t wid;

    boost::shared_ptr<ostream> sample(fs.create(csqIn));

    pp::CppSeqFile::Writer writer(*sample);

    v.nodeId = 1;
    v.x = -104;
    v.y = 38;
    wid = 2;
    writer.appendFixed<int64_t, WayJoin1Reducer::Value>(wid, v);

    v.nodeId = 3;
    v.x = -104;
    v.y = 38;
    wid = 4;
    writer.appendFixed<int64_t, WayJoin1Reducer::Value>(wid, v);

    writer.close();
    sample.reset();

    // create an input format
    WayJoin2InputFormat uut;
    uut.setPath(pbfIn + "," + csqIn);

    // verify that the input splits look as expected.
    CPPUNIT_ASSERT_EQUAL(2, uut.getSplitCount());

    CPPUNIT_ASSERT_EQUAL(pbfIn, uut.getSplit(0).getPath());
    CPPUNIT_ASSERT_EQUAL((long long)1125, uut.getSplit(0).getStart());
    CPPUNIT_ASSERT_EQUAL((long long)13490, uut.getSplit(0).getLength());

    CPPUNIT_ASSERT_EQUAL(csqIn, uut.getSplit(1).getPath());
    CPPUNIT_ASSERT_EQUAL((long long)0, uut.getSplit(1).getStart());
    CPPUNIT_ASSERT_EQUAL((long long)100, uut.getSplit(1).getLength());
  }
  void writeTestData(string fn, int offset)
  {
    Hdfs fs;

    auto_ptr<ostream> out(fs.create(fn));
    CppSeqFile::Writer writer(*out);

    int count = 100;
    DummyBlock2 i1, i2;
    int c = offset;
    for (int i = 0; i < count; i++)
    {
      i1.v = c++;
      i2.v = c++;
      writer.append<DummyBlock2, DummyBlock2>(i1, i2);
    }
    writer.close();
    out.reset();
  }