void map(HadoopPipes::MapContext& context) { std::string line = context.getInputValue(); std::string year = line.substr(15, 4); std::string airTemperature = line.substr(87, 5); std::string q = line.substr(92, 1); if (airTemperature != "+9999" && (q == "0" || q == "1" || q == "4" || q == "5" || q == "9")) { context.emit(year, airTemperature); } }
/* * 1.exec segment usage calc * 2.emit (segno,seg_usage_text) */ void map(HadoopPipes::MapContext& context) { int ret = 0; printf("DBG:-- enter func:%s\n",__func__); const char *segfile = context.getInputKey().data(); printf("DBG:-- key len :%d ,segfile:%s\n",context.getInputValue().size(),segfile); uint64_t segno = get_segfile_no(segfile); HADOOP_ASSERT(segfile != NULL, "failed read segfile "); GHashTable *ss_hashtable = g_hash_table_new_full(g_str_hash,g_str_equal,NULL,NULL); ret = load_all_snapshot(m_storage,SNAPSHOT_FILE,ss_hashtable); printf("DBG:-- snapshot loaded\n"); g_assert(ret == 0); GList* ss_list = NULL; ret = sort_all_snapshot(ss_hashtable,&ss_list); printf("DBG:--snapshot sorted\n"); g_assert(ss_list !=NULL); g_assert(ret == 0); //struct inode * latest_inode = load_latest_inode(storage); struct inode * inode=NULL; char *up_sname; ret = get_refer_inode_between_snapshots(m_storage,segno,ss_list,&inode,&up_sname); SEG_USAGE_T seg_usage; memset(&seg_usage,0,sizeof(SEG_USAGE_T)); if(ret == 0){ printf("DBG:--seg is in snapshots\n"); strncpy(seg_usage.up_sname,up_sname,strlen(up_sname)); ret = seg_usage_calc(m_storage,m_block_size,segno,inode,&seg_usage); printf("up sname is:%s\n",seg_usage.up_sname); g_assert(ret ==0); } if(ret == 1){ printf("DBG:--seg is on snapshot,do nothing\n"); } if(ret == 2){ printf("DBG:--seg is above snapshot,maybe need migrate\n"); strncpy(seg_usage.up_sname,EMPTY_UP_SNAPSHOT,strlen(EMPTY_UP_SNAPSHOT)); printf("DBG:--up sname is:%s\n",seg_usage.up_sname); inode = load_latest_inode(m_storage); ret = seg_usage_calc(m_storage,m_block_size,segno,inode,&seg_usage); g_assert(ret ==0); } #if 1 string key =string(segfile,strlen(segfile)); char segtextbuf[4096]; uint32_t len = seg_usage2text(&seg_usage,segtextbuf); printf("DBG:--segtextbuf :%s ..\n",segtextbuf); string value = string(segtextbuf,len); printf("DBG:--send segment usage text to reducer ..\n"); context.emit(key,value); #endif g_free(seg_usage.bitmap); }
// map function: receives a line, outputs (word,"1") // to reducer. void map( HadoopPipes::MapContext& context ) { //--- get line of text --- string line = context.getInputValue(); //--- split it into words --- vector< string > words = HadoopUtils::splitString( line, " " ); //--- emit each word tuple (word, "1" ) --- for ( unsigned int i=0; i < words.size(); i++ ) { context.emit( words[i], HadoopUtils::toString( 1 ) ); } }
//map function: receives a line, outputs (byteOffset, upper(line)) //byte offset is monotonically rising, so sorting will be achieved void map(HadoopPipes::MapContext& context) { //get line of text string line = context.getInputValue(); //transform to uppercase string::iterator it = line.begin(); while (it != line.end()) { if ('a' <= *it && *it <= 'z') { *it += 'A' - 'a'; } ++it; } //emit context.emit("", line); }
void map(HadoopPipes::MapContext& context) { line = context.getInputValue(); size_t found = line.find_first_of("STN---"); if (found != std::string::npos) return; year = DataSet::getYear(line); airTemperature = DataSet::getMax(line); if (airTemperature != DataSet::MISSING) { valStr.str(""); valStr << airTemperature; context.emit(year, valStr.str()); } }
void WayJoin2Mapper::map(HadoopPipes::MapContext& context) { _context = &context; if (_reader == NULL) { HadoopPipes::RecordReader* rr = pp::HadoopPipesUtils::getRecordReader(&context); _reader = dynamic_cast<WayJoin2RecordReader*>(rr); if (_reader == NULL) { throw InternalErrorException("Expected a record reader of type WayJoin2RecordReader"); } } if (_reader->getRecordType() == WayJoin2InputSplit::PbfInputSplitType) { mapOsmMap(_reader->getMap()); } else { const string& key = context.getInputKey(); const string& value = context.getInputValue(); if (key.size() != sizeof(int64_t)) { throw InternalErrorException("Expected key to be a int64_t"); } if (value.size() != sizeof(WayJoin1Reducer::Value)) { throw InternalErrorException("Expected value to be a WayJoin1Reducer::Value"); } int64_t* k = (int64_t*)key.data(); WayJoin1Reducer::Value* v = (WayJoin1Reducer::Value*)value.data(); mapWayPoints(*k, *v); } }
void map(HadoopPipes::MapContext& context) { std::vector<nise::HashEntry> v; { std::stringstream ss(context.getInputValue()); nise::ReadVector<nise::HashEntry>(ss, &v); if (!ss) return; } fbi::Hamming hamming; if (v.size() > nise::MAX_HASH) { return; } for (unsigned i = 0; i < v.size(); ++i) { for (unsigned j = 0; j < i; ++j) { if (v[i].second == v[j].second) continue; if (hamming(v[i].first.sketch, v[j].first.sketch) < nise::SKETCH_DIST_OFFLINE) { std::string v1(nise::EncodeUint32(v[i].second)); std::string v2(nise::EncodeUint32(v[j].second)); context.emit(v1, v2); context.emit(v2, v1); } } } }
void map(HadoopPipes::MapContext& context) { string line = context.getInputValue(); string year = line.substr(0, 4); string airTemperature = line.substr(5, 7); context.emit(year, airTemperature); }