void reduce(HadoopPipes::ReduceContext& context) {    
   int maxValue = INT_MIN;    
   while (context.nextValue()) {    
     maxValue = std::max(maxValue, HadoopUtils::toInt(context.getInputValue()));    
   }    
   context.emit(context.getInputKey(), HadoopUtils::toString(maxValue));    
 }    
Esempio n. 2
0
 void reduce(HadoopPipes::ReduceContext& context) { 
     int maxValue = 1; 
     while (context.nextValue()) {
         maxValue = max(maxValue, toInt(context.getInputValue())); 
     }
     context.emit(context.getInputKey(), toString(maxValue));
 }
Esempio n. 3
0
    // reduce function
    void reduce( HadoopPipes::ReduceContext& context ) {
        int count = 0;

        //--- get all tuples with the same key, and count their numbers ---
        while ( context.nextValue() ) {
            count += HadoopUtils::toInt( context.getInputValue() );
        }

        //--- emit (word, count) ---
        context.emit(context.getInputKey(), HadoopUtils::toString( count ));
    }
Esempio n. 4
0
 void reduce(HadoopPipes::ReduceContext& context) {
     std::string key = context.getInputKey();
     std::vector<uint32_t> ids;
     while (context.nextValue()) {
         ids.push_back(nise::ParseUint32(context.getInputValue()));
     }
     std::sort(ids.begin(), ids.end());
     ids.resize(std::unique(ids.begin(), ids.end()) - ids.begin());
     std::ostringstream ss(std::ios::binary);;
     nise::WriteVector<uint32_t>(ss, ids);
     std::string value(ss.str());
     context.emit(key, value);
 }
Esempio n. 5
0
void WriteOsmSqlStatementsReducer::reduce(HadoopPipes::ReduceContext& context)
{
  if (_context == NULL)
  {
    _context = &context;
  }

  boost::shared_ptr<Configuration> config(HadoopPipesUtils::toConfiguration(_context->getJobConf()));
  //LOG_VARD(config->getInt("mapred.reduce.tasks"));
  _localJobTracker = config->get("mapred.job.tracker") == "local";
  const long writeBufferSize = config->getLong("writeBufferSize");
  LOG_VART(writeBufferSize);
  if (config->hasKey("dbConnUrl"))
  {
    _dbConnStr = QString::fromStdString(config->get("dbConnUrl"));
    LOG_VART(_dbConnStr);
  }
  _retainSqlFile = config->get("retainSqlFile") == "1" ? true : false;
  LOG_VART(_retainSqlFile);

  //key syntax: <element id>;<table header> OR <element id>;<member id>;<table header>
  const QStringList keyParts = QString::fromStdString(context.getInputKey()).split(";");
  const QString tableHeader = keyParts[keyParts.size() - 2] + ";\n";
  LOG_VART(tableHeader);
  while (context.nextValue())
  {
    _updateElementCounts(tableHeader);

    const QString newValue = QString::fromStdString(context.getInputValue());
    LOG_VART(newValue);
    _sqlStatementBuffer[tableHeader] = _sqlStatementBuffer[tableHeader] % newValue;
    LOG_VART(_sqlStatementBuffer[tableHeader]);
    _sqlStatementBufferSize++;

    if (!_localJobTracker)
    {
      _context->incrementCounter(
        _context->getCounter("WriteOsmSqlStatements", "SQL statements"), 1);
    }

    //this flush will cause the same table to be written to the file twice, each time with a
    //different set of sql records...but that's ok...SQL is still valid
    if (_sqlStatementBufferSize >= writeBufferSize)
    {
      _flush();
    }
  }
}
Esempio n. 6
0
void OsmMapReducer::reduce(HadoopPipes::ReduceContext& context)
{
  _context = &context;
  const string& k = context.getInputKey();
  OsmMapIterator it(_context, _map, _reader);
  reduce(k, it, context);
}
	void reduce(HadoopPipes::ReduceContext& context) {
		buffer[0] = MaxValueSimple::MAX_FAILURE;
		i = 1;

		while (context.nextValue()) {
			if (i < MaxTemp_Reducer::MAX_VALUES)
				buffer[i++] = atoi(context.getInputValue().c_str());
			if (i >= MaxTemp_Reducer::MAX_VALUES) {
				buffer[0]
						= maxVal.maxValue(buffer, MaxTemp_Reducer::MAX_VALUES);
				i = 1;
			}
		}
		// work-around for context/iterable.hasNext()
		if (i > 1)
			buffer[0] = maxVal.maxValue(buffer, MaxTemp_Reducer::MAX_VALUES);

		valStr.str("");
		valStr << buffer[0];
		context.emit(context.getInputKey(), valStr.str());
	}
Esempio n. 8
0
 SegUsageCalcWriter(HadoopPipes::ReduceContext& context) {
     printf("DBG:--enter func%s\n",__func__); 
     const HadoopPipes::JobConf* job = context.getJobConf();
     int part = job->getInt("mapred.task.partition");
     std::string outDir = job->get("mapred.work.output.dir");
     std::string inputDir = job->get("mapred.input.dir");
     printf("DBG:--inputDir :%s\n",inputDir.c_str());
     const char * fs_name = g_basename(inputDir.c_str());
     const char * uri = inputDir.c_str();
     printf("DBG:--fs_name:%s,uri:%s\n",fs_name,uri);
     m_storage = init_storage_handler(uri);
     HADOOP_ASSERT(m_storage != NULL, "failed to init storage handler ");
 }
Esempio n. 9
0
 void reduce(HadoopPipes::ReduceContext& context) {
   printf("DBG:--enter reduce func ...\n");
   while(context.nextValue()){
         printf("DBG:--key:%s value%s\n",context.getInputKey().c_str(),context.getInputValue().c_str());
   }
   context.emit(context.getInputKey(),context.getInputValue());
   printf("DBG:--exit reduce func ...\n");
 }
Esempio n. 10
0
 void reduce(HadoopPipes::ReduceContext& context) {
   bool first = true;
   nise::Record record;
   std::string key = context.getInputKey();
   while (context.nextValue()) {
     std::stringstream ss(context.getInputValue());
     if (first) {
         nise::Signature::RECORD.check(ss);
         record.readFields(ss);
         if (ss) {
             first = false;
             continue;
         }
     }
     else {
         if (record.sources.size() > nise::MAX_SOURCES) {
             context.incrementCounter(trimmed, 1);
             break;
         }
         nise::Record cur;
         nise::Signature::RECORD.check(ss);
         cur.readFields(ss);
         if (ss && (cur.sources.size() > 0)) {
             record.sources.push_back(cur.sources[0]);
             continue;
         }
     }
     context.incrementCounter(bad, 1);
   }
   if (!first) {
       std::ostringstream ss(std::ios::binary);
       nise::Signature::RECORD.write(ss);
       record.write(ss);
       context.emit(key, ss.str());
   }
 }
Esempio n. 11
0
void SyntopReducer::reduce(HadoopPipes::ReduceContext& context) { // NOLINT
  boost::split(temp_string_components_, context.getInputKey(),
               boost::is_any_of("_"));

  float sum = 0;
  while (context.nextValue()) {
    sum += HadoopUtils::toFloat(context.getInputValue());
  }



  if (boost::starts_with(temp_string_components_[0], "gamma")) {
    context.emit(context.getInputKey(), boost::lexical_cast<string>(sum));
  } else if (boost::starts_with(temp_string_components_[0], "lhood")) {
    // sum += GlobalLikelihoodTerm();
    sum += GlobalWeightTerm(vars_->nu_.get(),
                            vars_->beta_.get(),
                            params_->alpha_trans(),
                            params_->alpha_top(),
                            params_->finite());

    context.emit(context.getInputKey(), boost::lexical_cast<string>(sum));
  } else {
    if (boost::starts_with(temp_string_components_[2], "~")) {
      // cout << "optimizing" << endl;

      Optimize();
      Emit(&output);

      StringMap::const_iterator last = (output).end();
      for (StringMap::const_iterator itr = (output).begin();
           itr != last; itr++) {
        // cout << itr->first << "\t" << itr->second << endl;
        context.emit(itr->first, boost::lexical_cast<string>(itr->second));
      }

      output.clear();

      last = (output).end();
      for (StringMap::const_iterator itr = (output).begin();
           itr != last; itr++) {
        // cout << "output is\t" << itr->first << "\t" << itr->second << endl;
        // context.emit(itr->first, boost::lexical_cast<string>(itr->second));
      }

      index = boost::lexical_cast<int>(temp_string_components_[1]);
      vars_ = new VariationalParameters(*params_);

      display_matrix(vars_->tau_est_top_.get(), "tau_est_top is\n");
      display_vector(vars_->tau_est_bottom_.get(), "tau_est_bottom is\n");

      tau_coordinate_ = -1;
      nu_coordinate_ = -1;
    } else {
      ProcessKey(context.getInputKey(), sum);
      // cout << "processing\t" << context.getInputKey() << "\t" << sum << endl;
    }

    /*
    if (index == -1) {
      index = boost::lexical_cast<double>(temp_string_components_[1]);
      // reduceContext = context;
    } else {
      if (index != boost::lexical_cast<int>(temp_string_components_[1])) {
        Optimize();
        Emit(&output);

        StringMap::const_iterator last = (output).end();
        for (StringMap::const_iterator itr = (output).begin();
             itr != last; itr++) {
          // cout << itr->first << "\t" << itr->second << endl;
          context.emit(itr->first, boost::lexical_cast<string>(itr->second));
        }

        output.clear();

        index = boost::lexical_cast<int>(temp_string_components_[1]);
        vars_ = new VariationalParameters(*params_);

        tau_coordinate_ = -1;
        nu_coordinate_ = -1;
      }
    }
    */
  }
  // }
}