Esempio n. 1
0
void MapStatsMapper::_writeStats(HadoopPipes::MapContext& context, const MapStats& stats)
{
  LOG_INFO("node count: " << _nodeCount);
  LOG_INFO("way count: " << _wayCount);
  if (stats.isValid())
  {
    pp::Hdfs fs;
    int partition = context.getJobConf()->getInt("mapred.task.partition");
    string workDir = context.getJobConf()->get("mapred.work.output.dir");

    LOG_INFO("Stats: " << stats.toString());

    QString path = QString("%1/part-%2.stats").
        arg(QString::fromStdString(workDir)).
        arg(partition, 5, 10, QChar('0'));

    LOG_INFO("Writing to: " << path);
    boost::shared_ptr<ostream> osStats(fs.create(path.toStdString()));

    stats.write(*osStats);
  }
  else
  {
    LOG_INFO("Stats are not valid.");
  }
}
 void map(HadoopPipes::MapContext& context) {    
   std::string line = context.getInputValue();    
   std::string year = line.substr(15, 4);    
   std::string airTemperature = line.substr(87, 5);    
   std::string q = line.substr(92, 1);    
   if (airTemperature != "+9999" &&    
       (q == "0" || q == "1" || q == "4" || q == "5" || q == "9")) {    
     context.emit(year, airTemperature);    
   }    
 }    
Esempio n. 3
0
        /* 
         * 1.exec segment usage calc
         * 2.emit (segno,seg_usage_text)
         */
        void map(HadoopPipes::MapContext& context) {
              int ret = 0;
		printf("DBG:-- enter func:%s\n",__func__);
		const char *segfile = context.getInputKey().data();
		printf("DBG:-- key len :%d ,segfile:%s\n",context.getInputValue().size(),segfile);
		uint64_t segno = get_segfile_no(segfile);
     	       HADOOP_ASSERT(segfile != NULL, "failed read segfile ");
			   
              GHashTable   *ss_hashtable = g_hash_table_new_full(g_str_hash,g_str_equal,NULL,NULL);
              ret = load_all_snapshot(m_storage,SNAPSHOT_FILE,ss_hashtable);
              printf("DBG:-- snapshot loaded\n"); 
              g_assert(ret == 0);
              GList* ss_list = NULL;
              ret = sort_all_snapshot(ss_hashtable,&ss_list);
              printf("DBG:--snapshot sorted\n"); 
              g_assert(ss_list !=NULL);
              g_assert(ret == 0);
              //struct inode * latest_inode = load_latest_inode(storage); 
              struct inode * inode=NULL;
              char *up_sname;
              ret = get_refer_inode_between_snapshots(m_storage,segno,ss_list,&inode,&up_sname);
		SEG_USAGE_T seg_usage;
		memset(&seg_usage,0,sizeof(SEG_USAGE_T));
		if(ret == 0){
                  printf("DBG:--seg is in snapshots\n");
                  strncpy(seg_usage.up_sname,up_sname,strlen(up_sname));
                  ret  = seg_usage_calc(m_storage,m_block_size,segno,inode,&seg_usage);
                  printf("up sname is:%s\n",seg_usage.up_sname);
                  g_assert(ret ==0);   
              }
              if(ret == 1){
                  printf("DBG:--seg is on snapshot,do nothing\n");
              }
              if(ret == 2){
                  printf("DBG:--seg is above snapshot,maybe need migrate\n");
                  strncpy(seg_usage.up_sname,EMPTY_UP_SNAPSHOT,strlen(EMPTY_UP_SNAPSHOT));
                  printf("DBG:--up sname is:%s\n",seg_usage.up_sname);
		    inode = load_latest_inode(m_storage); 
                  ret     =  seg_usage_calc(m_storage,m_block_size,segno,inode,&seg_usage);
                  g_assert(ret ==0);
              }
#if 1


             string key =string(segfile,strlen(segfile));
             char segtextbuf[4096];
             uint32_t len = seg_usage2text(&seg_usage,segtextbuf);
	      printf("DBG:--segtextbuf :%s ..\n",segtextbuf);
             string value = string(segtextbuf,len);
	      printf("DBG:--send segment usage text to reducer ..\n");
             context.emit(key,value);
#endif 
             g_free(seg_usage.bitmap);

	}
Esempio n. 4
0
    // map function: receives a line, outputs (word,"1")
    // to reducer.
    void map( HadoopPipes::MapContext& context ) {
        //--- get line of text ---
        string line = context.getInputValue();

        //--- split it into words ---
        vector< string > words =
            HadoopUtils::splitString( line, " " );

        //--- emit each word tuple (word, "1" ) ---
        for ( unsigned int i=0; i < words.size(); i++ ) {
            context.emit( words[i], HadoopUtils::toString( 1 ) );
        }
    }
Esempio n. 5
0
  //map function: receives a line, outputs (byteOffset, upper(line))
  //byte offset is monotonically rising, so sorting will be achieved
  void map(HadoopPipes::MapContext& context) {
    //get line of text
    string line = context.getInputValue();
	//transform to uppercase
	string::iterator it = line.begin();
	while (it != line.end()) {
		if ('a' <= *it && *it <= 'z') {
			*it += 'A' - 'a';
		}
		++it;
	}
    //emit
    context.emit("", line);
  }
	void map(HadoopPipes::MapContext& context) {
		line = context.getInputValue();

		size_t found = line.find_first_of("STN---");
		if (found != std::string::npos)
			return;

		year = DataSet::getYear(line);
		airTemperature = DataSet::getMax(line);

		if (airTemperature != DataSet::MISSING) {
			valStr.str("");
			valStr << airTemperature;
			context.emit(year, valStr.str());
		}

	}
Esempio n. 7
0
void WayJoin1Mapper::_map(shared_ptr<OsmMap>& m, HadoopPipes::MapContext& context)
{
  LOG_INFO("Starting map");
  string keyStr;
  string valueStr;
  keyStr.resize(sizeof(int64_t));
  int64_t* key = (int64_t*)keyStr.data();

  // Remove all non-roads.
  m->removeWays(TagFilter(Filter::FilterMatches, "highway", ""));

  Debug::printTroubled(m);

  // emit the node's ID as the key and x/y as the value.
  valueStr.resize(sizeof(ValueNode));
  ValueNode* valueNode = (ValueNode*)valueStr.data();
  const OsmMap::NodeMap& nm = m->getNodeMap();
  for (OsmMap::NodeMap::const_iterator it = nm.constBegin(); it != nm.constEnd(); ++it)
  {
    const shared_ptr<const Node>& n = it.value();

    *key = n->getId();
    valueNode->x = n->getX();
    valueNode->y = n->getY();
    context.emit(keyStr, valueStr);
  }

  // emit the way's nodes as the key and the way's id as the value.
  valueStr.resize(sizeof(ValueWay));
  ValueWay* valueWay = (ValueWay*)valueStr.data();
  const WayMap& wm = m->getWays();
  for (WayMap::const_iterator it = wm.begin(); it != wm.end(); ++it)
  {
    const shared_ptr<const Way>& w = it->second;

    valueWay->id = w->getId();

    const std::vector<long>& nids = w->getNodeIds();
    for (size_t i = 0; i < nids.size(); i++)
    {
      *key = nids[i];
      context.emit(keyStr, valueStr);
    }
  }
}
Esempio n. 8
0
  SegUsageCalcReader(HadoopPipes::MapContext& context) {
	std::string _filename; 
	/* FIXIT : hardcore for get segfile name from hadoop proctocol ? */
	int16_t mysize = *(int16_t*)context.getInputSplit().data();
	_filename = context.getInputSplit().data()+2;
	printf("GDB:-- filename :%s sizeof:%d\n",_filename.c_str(),_filename.size());
	uint64_t _offset = *(int64_t*)(context.getInputSplit().data()+ 2 +_filename.size()); 
	uint64_t offset = Swap64(_offset);
	uint64_t _len = *(int64_t*)(context.getInputSplit().data()+2+_filename.size()+8); 
	uint64_t len = Swap64(_len);
	printf("GDB:-- seg offset:%lld len:%lld\n",offset,len);
       std::string filename = _filename.data()+5;
	printf("GDB:-- filename :%s sizeof:%d\n",filename.c_str(),filename.size());
	if(TRUE!=g_str_has_suffix(filename.c_str(),"seg")){
		printf("GDB:-- ignore it \n");
		m_bytes_total = m_bytes_read = 0;
		return;
	}
	m_seg_file = g_strdup(g_basename(filename.c_str()));
	printf("GDB:-- seg file:%s\n",m_seg_file);
       m_bytes_total = len;
       m_bytes_read = 0;
  }
Esempio n. 9
0
void WayJoin2Mapper::map(HadoopPipes::MapContext& context)
{
  _context = &context;
  if (_reader == NULL)
  {
    HadoopPipes::RecordReader* rr = pp::HadoopPipesUtils::getRecordReader(&context);
    _reader = dynamic_cast<WayJoin2RecordReader*>(rr);
    if (_reader == NULL)
    {
      throw InternalErrorException("Expected a record reader of type WayJoin2RecordReader");
    }
  }

  if (_reader->getRecordType() == WayJoin2InputSplit::PbfInputSplitType)
  {
    mapOsmMap(_reader->getMap());
  }
  else
  {
    const string& key = context.getInputKey();
    const string& value = context.getInputValue();

    if (key.size() != sizeof(int64_t))
    {
      throw InternalErrorException("Expected key to be a int64_t");
    }
    if (value.size() != sizeof(WayJoin1Reducer::Value))
    {
      throw InternalErrorException("Expected value to be a WayJoin1Reducer::Value");
    }

    int64_t* k = (int64_t*)key.data();
    WayJoin1Reducer::Value* v = (WayJoin1Reducer::Value*)value.data();
    mapWayPoints(*k, *v);
  }
}
Esempio n. 10
0
 void map(HadoopPipes::MapContext& context) {
     std::vector<nise::HashEntry> v;
     {
         std::stringstream ss(context.getInputValue());
         nise::ReadVector<nise::HashEntry>(ss, &v);
         if (!ss) return;
     }
    
     fbi::Hamming hamming;
     if (v.size() > nise::MAX_HASH) {
         return;
     }
     for (unsigned i = 0; i < v.size(); ++i) {
         for (unsigned j = 0; j < i; ++j) {
             if (v[i].second == v[j].second) continue;
             if (hamming(v[i].first.sketch, v[j].first.sketch) < nise::SKETCH_DIST_OFFLINE) {
                 std::string v1(nise::EncodeUint32(v[i].second));
                 std::string v2(nise::EncodeUint32(v[j].second));
                 context.emit(v1, v2);
                 context.emit(v2, v1);
             }
         }
     }
 }
Esempio n. 11
0
 void map(HadoopPipes::MapContext& context) {
     string line = context.getInputValue(); 
     string year = line.substr(0, 4); 
     string airTemperature = line.substr(5, 7); 
     context.emit(year, airTemperature);
 }