DWORD WINAPI Slave::SPEShufflerEx(LPVOID p) #endif { Slave* self = ((Param5*)p)->serv_instance; int transid = ((Param5*)p)->transid; string path = ((Param5*)p)->path; string localfile = ((Param5*)p)->filename; int bucketnum = ((Param5*)p)->bucketnum; const int key = ((Param5*)p)->key; const int type = ((Param5*)p)->type; string function = ((Param5*)p)->function; string master_ip = ((Param5*)p)->master_ip; int master_port = ((Param5*)p)->master_port; queue<Bucket>* bq = ((Param5*)p)->bq; CMutex* bqlock = ((Param5*)p)->bqlock; CCond* bqcond = ((Param5*)p)->bqcond; int64_t* pendingSize = ((Param5*)p)->pending; delete (Param5*)p; self->createDir(path); // remove old result data files for (int i = 0; i < bucketnum; ++ i) { int size = self->m_strHomeDir.length() + path.length() + localfile.length() + 64; char* tmp = new char[size]; snprintf(tmp, size, "%s.%d", (self->m_strHomeDir + path + "/" + localfile).c_str(), i); LocalFS::erase(tmp); snprintf(tmp, size, "%s.%d.idx", (self->m_strHomeDir + path + "/" + localfile).c_str(), i); LocalFS::erase(tmp); delete [] tmp; } // index file initial offset vector<int64_t> offset; offset.resize(bucketnum); for (vector<int64_t>::iterator i = offset.begin(); i != offset.end(); ++ i) *i = 0; set<int> fileid; while (true) { bqlock->acquire(); while (bq->empty()) bqcond->wait(*bqlock); Bucket b = bq->front(); bq->pop(); *pendingSize -= b.totalsize; bqlock->release(); if (b.totalnum == -1) break; string speip = b.src_ip; int dataport = b.src_dataport; int session = b.session; for (int i = 0; i < b.totalnum; ++ i) { int bucket = 0; if (self->m_DataChn.recv4(speip, dataport, session, bucket) < 0) continue; fileid.insert(bucket); char* tmp = new char[self->m_strHomeDir.length() + path.length() + localfile.length() + 64]; sprintf(tmp, "%s.%d", (self->m_strHomeDir + path + "/" + localfile).c_str(), bucket); fstream datafile(tmp, ios::out | ios::binary | ios::app); sprintf(tmp, "%s.%d.idx", (self->m_strHomeDir + path + "/" + localfile).c_str(), bucket); fstream indexfile(tmp, ios::out | ios::binary | ios::app); delete [] tmp; int64_t start = offset[bucket]; if (0 == start) indexfile.write((char*)&start, 8); int32_t len; char* data = NULL; if (self->m_DataChn.recv(speip, dataport, session, data, len) < 0) continue; datafile.write(data, len); delete [] data; tmp = NULL; if (self->m_DataChn.recv(speip, dataport, session, tmp, len) < 0) continue; int64_t* index = (int64_t*)tmp; for (int j = 0; j < len / 8; ++ j) index[j] += start; offset[bucket] = index[len / 8 - 1]; indexfile.write(tmp, len); delete [] tmp; datafile.close(); indexfile.close(); } // update total received data self->m_SlaveStat.updateIO(speip, b.totalsize, +SlaveStat::SYS_IN); } // sort and reduce if (type == 1) { void* lh = NULL; self->openLibrary(key, function, lh); if (NULL != lh) { MR_COMPARE comp = NULL; MR_REDUCE reduce = NULL; self->getReduceFunc(lh, function, comp, reduce); if (NULL != comp) { char* tmp = new char[self->m_strHomeDir.length() + path.length() + localfile.length() + 64]; for (set<int>::iterator i = fileid.begin(); i != fileid.end(); ++ i) { sprintf(tmp, "%s.%d", (self->m_strHomeDir + path + "/" + localfile).c_str(), *i); self->sort(tmp, comp, reduce); } delete [] tmp; } self->closeLibrary(lh); } } // report sphere output files char* tmp = new char[path.length() + localfile.length() + 64]; vector<string> filelist; for (set<int>::iterator i = fileid.begin(); i != fileid.end(); ++ i) { sprintf(tmp, "%s.%d", (path + "/" + localfile).c_str(), *i); filelist.push_back(tmp); sprintf(tmp, "%s.%d.idx", (path + "/" + localfile).c_str(), *i); filelist.push_back(tmp); } delete [] tmp; self->report(master_ip, master_port, transid, filelist, 1); return NULL; }
DWORD WINAPI Slave::SPEShuffler(LPVOID p) #endif { Slave* self = ((Param5*)p)->serv_instance; int transid = ((Param5*)p)->transid; string client_ip = ((Param5*)p)->client_ip; int client_port = ((Param5*)p)->client_ctrl_port; int client_data_port = ((Param5*)p)->client_data_port; string path = ((Param5*)p)->path; string localfile = ((Param5*)p)->filename; int bucketnum = ((Param5*)p)->bucketnum; CGMP* gmp = ((Param5*)p)->gmp; string function = ((Param5*)p)->function; int bucketid = ((Param5*)p)->bucketid; const int key = ((Param5*)p)->key; const int type = ((Param5*)p)->type; string master_ip = ((Param5*)p)->master_ip; int master_port = ((Param5*)p)->master_port; queue<Bucket>* bq = NULL; CMutex* bqlock = NULL; CCond* bqcond = NULL; int64_t* pendingSize = NULL; pthread_t shufflerex; bool init_success = true; //set up data connection, for keep-alive purpose if (self->m_DataChn.connect(client_ip, client_data_port) < 0) { init_success = false; } else { // read library files for MapReduce, no need for Sphere UDF if (type == 1) self->acceptLibrary(key, client_ip, client_data_port, transid); bq = new queue<Bucket>; bqlock = new CMutex; bqcond = new CCond; pendingSize = new int64_t; *pendingSize = 0; ((Param5*)p)->bq = bq; ((Param5*)p)->bqlock = bqlock; ((Param5*)p)->bqcond = bqcond; ((Param5*)p)->pending = pendingSize; #ifndef WIN32 pthread_create(&shufflerex, NULL, SPEShufflerEx, p); #else DWORD ThreadID; shufflerex = CreateThread(NULL, 0, SPEShufflerEx, p, NULL, &ThreadID); #endif self->m_SectorLog << LogStart(LogLevel::SCREEN) << "SPE Shuffler " << path << " " << localfile << " " << bucketnum << LogEnd(); } while (init_success) { string speip; int speport; SectorMsg msg; int msgid; int r = gmp->recvfrom(speip, speport, msgid, &msg, false); // client releases the task or client has already been shutdown if (((r > 0) && (speip == client_ip) && (speport == client_port)) || ((r < 0) && (!self->m_DataChn.isConnected(client_ip, client_data_port)))) { Bucket b; b.totalnum = -1; b.totalsize = 0; bqlock->acquire(); bq->push(b); bqcond->signal(); bqlock->release(); break; } if (r < 0) continue; if (*pendingSize > 256000000) { // too many incoming results, ask the sender to wait // the receiver buffer size threshold is set to 256MB. This prevents the shuffler from being overflowed // it also helps direct the traffic to less congested shuffler and leads to better load balance msg.setType(-msg.getType()); gmp->sendto(speip, speport, msgid, &msg); } else { Bucket b; b.totalnum = *(int32_t*)(msg.getData() + 8);; b.totalsize = *(int32_t*)(msg.getData() + 12); b.src_ip = speip; b.src_dataport = *(int32_t*)msg.getData(); b.session = *(int32_t*)(msg.getData() + 4); gmp->sendto(speip, speport, msgid, &msg); if (!self->m_DataChn.isConnected(speip, b.src_dataport)) self->m_DataChn.connect(speip, b.src_dataport); bqlock->acquire(); bq->push(b); *pendingSize += b.totalsize; bqcond->signal(); bqlock->release(); } } if (init_success) { #ifndef WIN32 pthread_join(shufflerex, NULL); #else WaitForSingleObject(shufflerex, INFINITE); #endif delete bqlock; delete bqcond; delete pendingSize; SectorMsg msg; msg.setType(1); // success, return result msg.setData(0, (char*)&(bucketid), 4); int progress = 100; msg.setData(4, (char*)&progress, 4); msg.m_iDataLength = SectorMsg::m_iHdrSize + 8; int id = 0; self->m_GMP.sendto(client_ip.c_str(), client_port, id, &msg); self->m_SectorLog << LogStart(LogLevel::LEVEL_3) << "bucket completed 100 " << client_ip << " " << client_port << LogEnd(); } gmp->close(); delete gmp; self->reportSphere(master_ip, master_port, transid); // clear this transaction self->m_TransManager.updateSlave(transid, self->m_iSlaveID); return NULL; }