void map(HadoopPipes::MapContext& context) {    
   std::string line = context.getInputValue();    
   std::string year = line.substr(15, 4);    
   std::string airTemperature = line.substr(87, 5);    
   std::string q = line.substr(92, 1);    
   if (airTemperature != "+9999" &&    
       (q == "0" || q == "1" || q == "4" || q == "5" || q == "9")) {    
     context.emit(year, airTemperature);    
   }    
 }    
Ejemplo n.º 2
0
        /* 
         * 1.exec segment usage calc
         * 2.emit (segno,seg_usage_text)
         */
        void map(HadoopPipes::MapContext& context) {
              int ret = 0;
		printf("DBG:-- enter func:%s\n",__func__);
		const char *segfile = context.getInputKey().data();
		printf("DBG:-- key len :%d ,segfile:%s\n",context.getInputValue().size(),segfile);
		uint64_t segno = get_segfile_no(segfile);
     	       HADOOP_ASSERT(segfile != NULL, "failed read segfile ");
			   
              GHashTable   *ss_hashtable = g_hash_table_new_full(g_str_hash,g_str_equal,NULL,NULL);
              ret = load_all_snapshot(m_storage,SNAPSHOT_FILE,ss_hashtable);
              printf("DBG:-- snapshot loaded\n"); 
              g_assert(ret == 0);
              GList* ss_list = NULL;
              ret = sort_all_snapshot(ss_hashtable,&ss_list);
              printf("DBG:--snapshot sorted\n"); 
              g_assert(ss_list !=NULL);
              g_assert(ret == 0);
              //struct inode * latest_inode = load_latest_inode(storage); 
              struct inode * inode=NULL;
              char *up_sname;
              ret = get_refer_inode_between_snapshots(m_storage,segno,ss_list,&inode,&up_sname);
		SEG_USAGE_T seg_usage;
		memset(&seg_usage,0,sizeof(SEG_USAGE_T));
		if(ret == 0){
                  printf("DBG:--seg is in snapshots\n");
                  strncpy(seg_usage.up_sname,up_sname,strlen(up_sname));
                  ret  = seg_usage_calc(m_storage,m_block_size,segno,inode,&seg_usage);
                  printf("up sname is:%s\n",seg_usage.up_sname);
                  g_assert(ret ==0);   
              }
              if(ret == 1){
                  printf("DBG:--seg is on snapshot,do nothing\n");
              }
              if(ret == 2){
                  printf("DBG:--seg is above snapshot,maybe need migrate\n");
                  strncpy(seg_usage.up_sname,EMPTY_UP_SNAPSHOT,strlen(EMPTY_UP_SNAPSHOT));
                  printf("DBG:--up sname is:%s\n",seg_usage.up_sname);
		    inode = load_latest_inode(m_storage); 
                  ret     =  seg_usage_calc(m_storage,m_block_size,segno,inode,&seg_usage);
                  g_assert(ret ==0);
              }
#if 1


             string key =string(segfile,strlen(segfile));
             char segtextbuf[4096];
             uint32_t len = seg_usage2text(&seg_usage,segtextbuf);
	      printf("DBG:--segtextbuf :%s ..\n",segtextbuf);
             string value = string(segtextbuf,len);
	      printf("DBG:--send segment usage text to reducer ..\n");
             context.emit(key,value);
#endif 
             g_free(seg_usage.bitmap);

	}
Ejemplo n.º 3
0
    // map function: receives a line, outputs (word,"1")
    // to reducer.
    void map( HadoopPipes::MapContext& context ) {
        //--- get line of text ---
        string line = context.getInputValue();

        //--- split it into words ---
        vector< string > words =
            HadoopUtils::splitString( line, " " );

        //--- emit each word tuple (word, "1" ) ---
        for ( unsigned int i=0; i < words.size(); i++ ) {
            context.emit( words[i], HadoopUtils::toString( 1 ) );
        }
    }
Ejemplo n.º 4
0
  //map function: receives a line, outputs (byteOffset, upper(line))
  //byte offset is monotonically rising, so sorting will be achieved
  void map(HadoopPipes::MapContext& context) {
    //get line of text
    string line = context.getInputValue();
	//transform to uppercase
	string::iterator it = line.begin();
	while (it != line.end()) {
		if ('a' <= *it && *it <= 'z') {
			*it += 'A' - 'a';
		}
		++it;
	}
    //emit
    context.emit("", line);
  }
Ejemplo n.º 5
0
	void map(HadoopPipes::MapContext& context) {
		line = context.getInputValue();

		size_t found = line.find_first_of("STN---");
		if (found != std::string::npos)
			return;

		year = DataSet::getYear(line);
		airTemperature = DataSet::getMax(line);

		if (airTemperature != DataSet::MISSING) {
			valStr.str("");
			valStr << airTemperature;
			context.emit(year, valStr.str());
		}

	}
Ejemplo n.º 6
0
void WayJoin2Mapper::map(HadoopPipes::MapContext& context)
{
  _context = &context;
  if (_reader == NULL)
  {
    HadoopPipes::RecordReader* rr = pp::HadoopPipesUtils::getRecordReader(&context);
    _reader = dynamic_cast<WayJoin2RecordReader*>(rr);
    if (_reader == NULL)
    {
      throw InternalErrorException("Expected a record reader of type WayJoin2RecordReader");
    }
  }

  if (_reader->getRecordType() == WayJoin2InputSplit::PbfInputSplitType)
  {
    mapOsmMap(_reader->getMap());
  }
  else
  {
    const string& key = context.getInputKey();
    const string& value = context.getInputValue();

    if (key.size() != sizeof(int64_t))
    {
      throw InternalErrorException("Expected key to be a int64_t");
    }
    if (value.size() != sizeof(WayJoin1Reducer::Value))
    {
      throw InternalErrorException("Expected value to be a WayJoin1Reducer::Value");
    }

    int64_t* k = (int64_t*)key.data();
    WayJoin1Reducer::Value* v = (WayJoin1Reducer::Value*)value.data();
    mapWayPoints(*k, *v);
  }
}
Ejemplo n.º 7
0
 void map(HadoopPipes::MapContext& context) {
     std::vector<nise::HashEntry> v;
     {
         std::stringstream ss(context.getInputValue());
         nise::ReadVector<nise::HashEntry>(ss, &v);
         if (!ss) return;
     }
    
     fbi::Hamming hamming;
     if (v.size() > nise::MAX_HASH) {
         return;
     }
     for (unsigned i = 0; i < v.size(); ++i) {
         for (unsigned j = 0; j < i; ++j) {
             if (v[i].second == v[j].second) continue;
             if (hamming(v[i].first.sketch, v[j].first.sketch) < nise::SKETCH_DIST_OFFLINE) {
                 std::string v1(nise::EncodeUint32(v[i].second));
                 std::string v2(nise::EncodeUint32(v[j].second));
                 context.emit(v1, v2);
                 context.emit(v2, v1);
             }
         }
     }
 }
Ejemplo n.º 8
0
 void map(HadoopPipes::MapContext& context) {
     string line = context.getInputValue(); 
     string year = line.substr(0, 4); 
     string airTemperature = line.substr(5, 7); 
     context.emit(year, airTemperature);
 }