int Client::list(const string& path, vector<SNode>& attr) { string revised_path = Metadata::revisePath(path); SectorMsg msg; msg.resize(65536); msg.setType(101); msg.setKey(m_iKey); msg.setData(0, revised_path.c_str(), revised_path.length() + 1); Address serv; m_Routing.lookup(revised_path, serv); login(serv.m_strIP, serv.m_iPort); if (m_GMP.rpc(serv.m_strIP.c_str(), serv.m_iPort, &msg, &msg) < 0) return SectorError::E_CONNECTION; if (msg.getType() < 0) return *(int32_t*)(msg.getData()); string filelist = msg.getData(); unsigned int s = 0; while (s < filelist.length()) { int t = filelist.find(';', s); SNode sn; sn.deserialize(filelist.substr(s, t - s).c_str()); attr.insert(attr.end(), sn); s = t + 1; } return attr.size(); }
int Client::sysinfo(SysStat& sys) { SectorMsg msg; msg.setKey(m_iKey); msg.setType(3); msg.m_iDataLength = SectorMsg::m_iHdrSize; Address serv; m_Routing.lookup(m_iKey, serv); login(serv.m_strIP, serv.m_iPort); if (m_GMP.rpc(serv.m_strIP.c_str(), serv.m_iPort, &msg, &msg) < 0) return SectorError::E_CONNECTION; if (msg.getType() < 0) return *(int32_t*)(msg.getData()); deserializeSysStat(sys, msg.getData(), msg.m_iDataLength); for (vector<Address>::iterator i = sys.m_vMasterList.begin(); i != sys.m_vMasterList.end(); ++ i) { if (i->m_strIP.length() == 0) { i->m_strIP = serv.m_strIP; break; } } return 0; }
int DCClient::connectSPE(SPE& s) { if (s.m_iStatus != 0) return -1; SectorMsg msg; msg.setType(203); // start processing engine msg.setKey(m_pClient->m_iKey); msg.setData(0, s.m_strIP.c_str(), s.m_strIP.length() + 1); msg.setData(64, (char*)&(s.m_iPort), 4); // leave a 4-byte blank spot for data port msg.setData(72, (char*)&(s.m_iID), 4); msg.setData(76, (char*)&m_pClient->m_iKey, 4); msg.setData(80, m_strOperator.c_str(), m_strOperator.length() + 1); int offset = 80 + m_strOperator.length() + 1; msg.setData(offset, (char*)&m_iRows, 4); msg.setData(offset + 4, (char*)&m_iParamSize, 4); msg.setData(offset + 8, m_pcParam, m_iParamSize); offset += 4 + 8 + m_iParamSize; msg.setData(offset, (char*)&m_iProcType, 4); Address serv; m_pClient->m_Routing.getPrimaryMaster(serv); if ((m_pClient->m_GMP.rpc(serv.m_strIP.c_str(), serv.m_iPort, &msg, &msg) < 0) || (msg.getType() < 0)) return SectorError::E_CONNECTION; s.m_iSession = *(int32_t*)msg.getData(); m_pClient->m_DataChn.connect(s.m_strIP, s.m_iDataPort); cout << "connect SPE " << s.m_strIP.c_str() << " " << *(int*)(msg.getData()) << endl; // send output information m_pClient->m_DataChn.send(s.m_strIP, s.m_iDataPort, s.m_iSession, (char*)&m_iOutputType, 4); if (m_iOutputType > 0) { int bnum = m_mBucket.size(); m_pClient->m_DataChn.send(s.m_strIP, s.m_iDataPort, s.m_iSession, (char*)&bnum, 4); m_pClient->m_DataChn.send(s.m_strIP, s.m_iDataPort, s.m_iSession, m_pOutputLoc, bnum * 80); m_pClient->m_DataChn.send(s.m_strIP, s.m_iDataPort, s.m_iSession, (char*)m_pOutput->m_piLocID, m_iOutputType * 4); } else if (m_iOutputType < 0) m_pClient->m_DataChn.send(s.m_strIP, s.m_iDataPort, s.m_iSession, m_pOutputLoc, strlen(m_pOutputLoc) + 1); loadOperator(s.m_strIP, s.m_iPort, s.m_iDataPort, s.m_iSession); s.m_iStatus = 1; return 0; }
int Slave::reportSphere(const string& master_ip, const int& master_port, const int& transid, const vector<Address>* bad) { SectorMsg msg; msg.setType(4); msg.setKey(0); msg.setData(0, (char*)&transid, 4); msg.setData(4, (char*)&m_iSlaveID, 4); int num = (NULL == bad) ? 0 : bad->size(); msg.setData(8, (char*)&num, 4); for (int i = 0; i < num; ++ i) { msg.setData(12 + 68 * i, (*bad)[i].m_strIP.c_str(), (*bad)[i].m_strIP.length() + 1); msg.setData(12 + 68 * i + 64, (char*)&((*bad)[i].m_iPort), 4); } cout << "reportSphere " << master_ip << " " << master_port << " " << transid << endl; if (m_GMP.rpc(master_ip.c_str(), master_port, &msg, &msg) < 0) return -1; if (msg.getType() < 0) return *(int32_t*)msg.getData(); return 1; }
int Client::move(const string& oldpath, const string& newpath) { string src = Metadata::revisePath(oldpath); string dst = Metadata::revisePath(newpath); SectorMsg msg; msg.setType(104); msg.setKey(m_iKey); int32_t size = src.length() + 1; msg.setData(0, (char*)&size, 4); msg.setData(4, src.c_str(), src.length() + 1); size = dst.length() + 1; msg.setData(4 + src.length() + 1, (char*)&size, 4); msg.setData(4 + src.length() + 1 + 4, dst.c_str(), dst.length() + 1); Address serv; m_Routing.lookup(src, serv); login(serv.m_strIP, serv.m_iPort); if (m_GMP.rpc(serv.m_strIP.c_str(), serv.m_iPort, &msg, &msg) < 0) return SectorError::E_CONNECTION; if (msg.getType() < 0) return *(int32_t*)(msg.getData()); return 0; }
int Client::copy(const string& src, const string& dst) { string rsrc = Metadata::revisePath(src); string rdst = Metadata::revisePath(dst); SectorMsg msg; msg.setType(106); msg.setKey(m_iKey); int32_t size = rsrc.length() + 1; msg.setData(0, (char*)&size, 4); msg.setData(4, rsrc.c_str(), rsrc.length() + 1); size = rdst.length() + 1; msg.setData(4 + rsrc.length() + 1, (char*)&size, 4); msg.setData(4 + rsrc.length() + 1 + 4, rdst.c_str(), rdst.length() + 1); Address serv; m_Routing.lookup(rsrc, serv); login(serv.m_strIP, serv.m_iPort); if (m_GMP.rpc(serv.m_strIP.c_str(), serv.m_iPort, &msg, &msg) < 0) return SectorError::E_CONNECTION; if (msg.getType() < 0) return *(int32_t*)(msg.getData()); return 0; }
int Slave::report(const string& master_ip, const int& master_port, const int32_t& transid, const vector<string>& filelist, const int& change) { vector<string> serlist; for (vector<string>::const_iterator i = filelist.begin(); i != filelist.end(); ++ i) { struct stat s; if (-1 == stat((m_strHomeDir + *i).c_str(), &s)) continue; SNode sn; sn.m_strName = *i; sn.m_bIsDir = S_ISDIR(s.st_mode) ? 1 : 0; sn.m_llTimeStamp = s.st_mtime; sn.m_llSize = s.st_size; char buf[1024]; sn.serialize(buf); //update local Address addr; addr.m_strIP = "127.0.0.1"; addr.m_iPort = 0; m_pLocalFile->update(buf, addr, change); serlist.push_back(buf); } if (serlist.empty()) return 0; SectorMsg msg; msg.setType(1); msg.setKey(0); msg.setData(0, (char*)&transid, 4); msg.setData(4, (char*)&m_iSlaveID, 4); msg.setData(8, (char*)&change, 4); int32_t num = serlist.size(); msg.setData(12, (char*)&num, 4); int pos = 16; for (vector<string>::iterator i = serlist.begin(); i != serlist.end(); ++ i) { int32_t bufsize = i->length() + 1; msg.setData(pos, (char*)&bufsize, 4); msg.setData(pos + 4, i->c_str(), bufsize); pos += bufsize + 4; } cout << "report " << master_ip << " " << master_port << " " << num << endl; if (m_GMP.rpc(master_ip.c_str(), master_port, &msg, &msg) < 0) return -1; if (msg.getType() < 0) return *(int32_t*)msg.getData(); return 1; }
int FSClient::reopen() { if (0 == m_strFileName.length()) return -1; // currently re-open only works on read if (m_bWrite) return -1; // close connection to the current slave int32_t cmd = 5; m_pClient->m_DataChn.send(m_strSlaveIP, m_iSlaveDataPort, m_iSession, (char*)&cmd, 4); int response; m_pClient->m_DataChn.recv4(m_strSlaveIP, m_iSlaveDataPort, m_iSession, response); m_pClient->m_DataChn.remove(m_strSlaveIP, m_iSlaveDataPort); SectorMsg msg; msg.setType(112); // open the file msg.setKey(m_pClient->m_iKey); msg.setData(0, (char*)&m_iSession, 4); int32_t port = m_pClient->m_DataChn.getPort(); msg.setData(4, (char*)&port, 4); Address serv; m_pClient->lookup(m_strFileName, serv); if (m_pClient->m_GMP.rpc(serv.m_strIP.c_str(), serv.m_iPort, &msg, &msg) < 0) return SectorError::E_CONNECTION; if (msg.getType() < 0) return *(int32_t*)(msg.getData()); m_strSlaveIP = msg.getData(); m_iSlaveDataPort = *(int*)(msg.getData() + 64); if (m_pClient->m_DataChn.connect(m_strSlaveIP, m_iSlaveDataPort) < 0) return SectorError::E_CONNECTION; memcpy(m_pcKey, m_pClient->m_pcCryptoKey, 16); memcpy(m_pcIV, m_pClient->m_pcCryptoIV, 8); m_pClient->m_DataChn.setCryptoKey(m_strSlaveIP, m_iSlaveDataPort, m_pcKey, m_pcIV); return 0; }
int Client::updateMasters() { SectorMsg msg; msg.setKey(m_iKey); map<uint32_t, Address> al; m_Routing.getListOfMasters(al); for (map<uint32_t, Address>::iterator i = al.begin(); i != al.end(); ++ i) { msg.setType(5); if (m_GMP.rpc(i->second.m_strIP.c_str(), i->second.m_iPort, &msg, &msg) >= 0) { Address addr; addr.m_strIP = i->second.m_strIP; addr.m_iPort = i->second.m_iPort; uint32_t key = i->first; m_Routing.init(); m_Routing.insert(key, addr); int n = *(int32_t*)msg.getData(); int p = 4; for (int m = 0; m < n; ++ m) { key = *(int32_t*)(msg.getData() + p); p += 4; addr.m_strIP = msg.getData() + p; p += addr.m_strIP.length() + 1; addr.m_iPort = *(int32_t*)(msg.getData() + p); p += 4; m_Routing.insert(key, addr); } return n + 1; } } return -1; }
int DCClient::dataInfo(const vector<string>& files, vector<string>& info) { SectorMsg msg; msg.setType(201); msg.setKey(m_pClient->m_iKey); int offset = 0; int32_t size = -1; for (vector<string>::const_iterator i = files.begin(); i != files.end(); ++ i) { string path = Metadata::revisePath(*i); size = path.length() + 1; msg.setData(offset, (char*)&size, 4); msg.setData(offset + 4, path.c_str(), size); offset += 4 + size; } size = -1; msg.setData(offset, (char*)&size, 4); Address serv; m_pClient->m_Routing.getPrimaryMaster(serv); if (m_pClient->m_GMP.rpc(serv.m_strIP.c_str(), serv.m_iPort, &msg, &msg) < 0) return SectorError::E_CONNECTION; if (msg.getType() < 0) return *(int32_t*)(msg.getData()); char* buf = msg.getData(); size = msg.m_iDataLength - SectorMsg::m_iHdrSize; while (size > 0) { info.insert(info.end(), buf); size -= strlen(buf) + 1; buf += strlen(buf) + 1; } return info.size(); }
int Client::stat(const string& path, SNode& attr) { string revised_path = Metadata::revisePath(path); SectorMsg msg; msg.resize(65536); msg.setType(102); msg.setKey(m_iKey); msg.setData(0, revised_path.c_str(), revised_path.length() + 1); Address serv; m_Routing.lookup(revised_path, serv); login(serv.m_strIP, serv.m_iPort); if (m_GMP.rpc(serv.m_strIP.c_str(), serv.m_iPort, &msg, &msg) < 0) return SectorError::E_CONNECTION; if (msg.getType() < 0) return *(int32_t*)(msg.getData()); attr.deserialize(msg.getData()); int n = (msg.m_iDataLength - SectorMsg::m_iHdrSize - 128) / 68; char* al = msg.getData() + 128; for (int i = 0; i < n; ++ i) { Address addr; addr.m_strIP = al + 68 * i; addr.m_iPort = *(int32_t*)(al + 68 * i + 64); attr.m_sLocation.insert(addr); } // check local cache: updated files may not be sent to the master yet m_StatCache.stat(path, attr); return 0; }
int Client::remove(const string& path) { string revised_path = Metadata::revisePath(path); SectorMsg msg; msg.setType(105); msg.setKey(m_iKey); msg.setData(0, revised_path.c_str(), revised_path.length() + 1); Address serv; m_Routing.lookup(revised_path, serv); login(serv.m_strIP, serv.m_iPort); if (m_GMP.rpc(serv.m_strIP.c_str(), serv.m_iPort, &msg, &msg) < 0) return SectorError::E_CONNECTION; if (msg.getType() < 0) return *(int32_t*)(msg.getData()); return 0; }
int FSClient::open(const string& filename, int mode, const string& hint) { m_strFileName = Metadata::revisePath(filename); SectorMsg msg; msg.setType(110); // open the file msg.setKey(m_pClient->m_iKey); int32_t m = mode; msg.setData(0, (char*)&m, 4); int32_t port = m_pClient->m_DataChn.getPort(); msg.setData(4, (char*)&port, 4); msg.setData(8, hint.c_str(), hint.length() + 1); msg.setData(72, m_strFileName.c_str(), m_strFileName.length() + 1); Address serv; m_pClient->m_Routing.lookup(m_strFileName, serv); if (m_pClient->m_GMP.rpc(serv.m_strIP.c_str(), serv.m_iPort, &msg, &msg) < 0) return SectorError::E_CONNECTION; if (msg.getType() < 0) return *(int32_t*)(msg.getData()); m_llSize = *(int64_t*)(msg.getData() + 72); m_llCurReadPos = m_llCurWritePos = 0; m_bRead = mode & 1; m_bWrite = mode & 2; m_bSecure = mode & 16; // check APPEND if (mode & 8) m_llCurWritePos = m_llSize; m_strSlaveIP = msg.getData(); m_iSlaveDataPort = *(int*)(msg.getData() + 64); m_iSession = *(int*)(msg.getData() + 68); cerr << "open file " << filename << " " << m_strSlaveIP << " " << m_iSlaveDataPort << endl; if (m_pClient->m_DataChn.connect(m_strSlaveIP, m_iSlaveDataPort) < 0) return SectorError::E_CONNECTION; string localip; int localport; m_pClient->m_DataChn.getSelfAddr(m_strSlaveIP, m_iSlaveDataPort, localip, localport); if (m_strSlaveIP == localip) { // the file is on the same node, check if the file can be read directly int32_t cmd = 6; m_pClient->m_DataChn.send(m_strSlaveIP, m_iSlaveDataPort, m_iSession, (char*)&cmd, 4); int size = 0; if (m_pClient->m_DataChn.recv(m_strSlaveIP, m_iSlaveDataPort, m_iSession, m_pcLocalPath, size) > 0) { fstream test((m_pcLocalPath + filename).c_str(), ios::binary | ios::in); if (!test.bad() && !test.fail()) m_bLocal = true; } } memcpy(m_pcKey, m_pClient->m_pcCryptoKey, 16); memcpy(m_pcIV, m_pClient->m_pcCryptoIV, 8); m_pClient->m_DataChn.setCryptoKey(m_strSlaveIP, m_iSlaveDataPort, m_pcKey, m_pcIV); if (m_bWrite) m_pClient->m_StatCache.insert(filename); return 0; }
int DCClient::prepareOutput(const char* spenodes) { m_pOutputLoc = NULL; m_pOutput->m_llSize = 0; m_pOutput->m_llRecNum = 0; // prepare output stream locations if (m_iOutputType > 0) { SectorMsg msg; msg.setType(204); msg.setKey(m_pClient->m_iKey); for (int i = 0; i < m_iSPENum; ++ i) { msg.setData(0, spenodes + i * 72, strlen(spenodes + i * 72) + 1); msg.setData(64, spenodes + i * 72 + 64, 4); msg.setData(68, (char*)&(m_pOutput->m_iFileNum), 4); msg.setData(72, (char*)&i, 4); int size = m_pOutput->m_strPath.length() + 1; int offset = 76; msg.setData(offset, (char*)&size, 4); msg.setData(offset + 4, m_pOutput->m_strPath.c_str(), m_pOutput->m_strPath.length() + 1); offset += 4 + size; size = m_pOutput->m_strName.length() + 1; msg.setData(offset, (char*)&size, 4); msg.setData(offset + 4, m_pOutput->m_strName.c_str(), m_pOutput->m_strName.length() + 1); offset += 4 + size; msg.setData(offset, (char*)&m_pClient->m_iKey, 4); offset += 4; msg.setData(offset, (char*)&m_iProcType, 4); if (m_iProcType == 1) { offset += 4; size = m_strOperator.length() + 1; msg.setData(offset, (char*)&size, 4); msg.setData(offset + 4, m_strOperator.c_str(), m_strOperator.length() + 1); } cout << "request shuffler " << spenodes + i * 72 << " " << *(int*)(spenodes + i * 72 + 64) << endl; Address serv; m_pClient->m_Routing.getPrimaryMaster(serv); if (m_pClient->m_GMP.rpc(serv.m_strIP.c_str(), serv.m_iPort, &msg, &msg) < 0) continue; if (msg.getType() < 0) { if (*(int32_t*)msg.getData() == SectorError::E_PERMISSION) break; else continue; } BUCKET b; b.m_iID = i; b.m_strIP = spenodes + i * 72; b.m_iPort = *(int32_t*)(spenodes + i * 72 + 64); b.m_iDataPort = *(int32_t*)(spenodes + i * 72 + 68); b.m_iShufflerPort = *(int32_t*)msg.getData(); b.m_iSession = *(int32_t*)(msg.getData() + 4); b.m_iProgress = 0; b.m_LastUpdateTime = CTimer::getTime(); m_mBucket[b.m_iID] = b; // set up data connection, not for data transfter, but for keep-alive m_pClient->m_DataChn.connect(b.m_strIP, b.m_iDataPort); // upload library files for MapReduce processing if (m_iProcType == 1) loadOperator(b.m_strIP, b.m_iPort, b.m_iDataPort, b.m_iSession); } if (m_mBucket.empty()) return SectorError::E_NOBUCKET; m_pOutputLoc = new char[m_mBucket.size() * 80]; int l = 0; for (map<int, BUCKET>::iterator b = m_mBucket.begin(); b != m_mBucket.end(); ++ b) { strcpy(m_pOutputLoc + l * 80, b->second.m_strIP.c_str()); *(int32_t*)(m_pOutputLoc + l * 80 + 64) = b->second.m_iPort; *(int32_t*)(m_pOutputLoc + l * 80 + 68) = b->second.m_iDataPort; *(int32_t*)(m_pOutputLoc + l * 80 + 72) = b->second.m_iShufflerPort; *(int32_t*)(m_pOutputLoc + l * 80 + 76) = b->second.m_iSession; ++ l; } // result locations map<int, BUCKET>::iterator b = m_mBucket.begin(); for (int i = 0; i < m_pOutput->m_iFileNum; ++ i) { char* tmp = new char[m_pOutput->m_strPath.length() + m_pOutput->m_strName.length() + 64]; sprintf(tmp, "%s/%s.%d", m_pOutput->m_strPath.c_str(), m_pOutput->m_strName.c_str(), i); m_pOutput->m_vFiles[i] = tmp; delete [] tmp; if (m_pOutput->m_vLocation[i].empty()) { // if user didn't specify output location, simply pick the next bucket location and rotate // this should be the normal case Address loc; loc.m_strIP = b->second.m_strIP; loc.m_iPort = b->second.m_iPort; m_pOutput->m_vLocation[i].insert(loc); m_pOutput->m_piLocID[i] = b->first; } else { // otherwise find if the user-sepcified location is available map<int, BUCKET>::iterator p = m_mBucket.begin(); for (; p != m_mBucket.end(); ++ p) { if ((p->second.m_strIP == m_pOutput->m_vLocation[i].begin()->m_strIP) && (p->second.m_iPort == m_pOutput->m_vLocation[i].begin()->m_iPort)) break; } if (p == m_mBucket.end()) { Address loc; loc.m_strIP = b->second.m_strIP; loc.m_iPort = b->second.m_iPort; m_pOutput->m_vLocation[i].insert(loc); m_pOutput->m_piLocID[i] = b->first; } else { Address loc; loc.m_strIP = p->second.m_strIP; loc.m_iPort = p->second.m_iPort; m_pOutput->m_vLocation[i].insert(loc); m_pOutput->m_piLocID[i] = p->first; } } if (++ b == m_mBucket.end()) b = m_mBucket.begin(); } } else if (m_iOutputType < 0) { char* localname = new char[m_pOutput->m_strPath.length() + m_pOutput->m_strName.length() + 64]; sprintf(localname, "%s/%s", m_pOutput->m_strPath.c_str(), m_pOutput->m_strName.c_str()); m_pOutputLoc = new char[strlen(localname) + 1]; memcpy(m_pOutputLoc, localname, strlen(localname) + 1); } return m_pOutput->m_iFileNum; }
int DCClient::run(const SphereStream& input, SphereStream& output, const string& op, const int& rows, const char* param, const int& size, const int& type) { CGuard::enterCS(m_RunLock); CGuard::leaveCS(m_RunLock); m_iProcType = type; m_strOperator = op; m_pcParam = new char[size]; memcpy(m_pcParam, param, size); m_iParamSize = size; m_pInput = (SphereStream*)&input; m_pOutput = &output; m_iRows = rows; m_iOutputType = m_pOutput->m_iFileNum; // when processing files, data will not be moved if (rows == 0) m_bDataMove = false; m_mpDS.clear(); m_mBucket.clear(); m_mSPE.clear(); int result = prepareInput(); if (result < 0) return result; cout << "JOB " << m_pInput->m_iFileNum << " " << m_pInput->m_llSize << " " << m_pInput->m_llRecNum << endl; SectorMsg msg; msg.setType(202); // locate available SPE msg.setKey(m_pClient->m_iKey); msg.m_iDataLength = SectorMsg::m_iHdrSize; Address serv; m_pClient->m_Routing.getPrimaryMaster(serv); if (m_pClient->m_GMP.rpc(serv.m_strIP.c_str(), serv.m_iPort, &msg, &msg) < 0) return SectorError::E_CONNECTION; if (msg.getType() < 0) return *(int32_t*)msg.getData(); m_iSPENum = (msg.m_iDataLength - 4) / 72; if (0 == m_iSPENum) return SectorError::E_RESOURCE; result = prepareSPE(msg.getData()); if (result < 0) return result; result = segmentData(); if (result <= 0) return result; if (m_iOutputType == -1) m_pOutput->init(m_mpDS.size()); result = prepareOutput(msg.getData()); if (result < 0) return result; m_iProgress = 0; m_iAvgRunTime = -1; m_iTotalDS = m_mpDS.size(); m_iTotalSPE = m_mSPE.size(); m_iAvailRes = 0; m_bBucketHealth = true; cout << m_mSPE.size() << " spes found! " << m_mpDS.size() << " data seg total." << endl; // starting... #ifndef WIN32 pthread_t scheduler; pthread_create(&scheduler, NULL, run, this); pthread_detach(scheduler); #else DWORD ThreadID; CreateThread(NULL, 0, run, this, NULL, &ThreadID); #endif m_bOpened = true; return 0; }
DWORD WINAPI DCClient::run(LPVOID param) #endif { DCClient* self = (DCClient*)param; CGuard::enterCS(self->m_RunLock); while (self->m_iProgress < self->m_iTotalDS) { if (0 == self->checkSPE()) break; string ip; int port; int tmp; SectorMsg msg; if (self->m_pClient->m_GMP.recvfrom(ip, port, tmp, &msg, false) < 0) continue; //TODO: due to one GMP limitation, one client can only execute one sphere process at each time //can be solved with individual GMP, or enhance GMP with session int32_t speid = *(int32_t*)(msg.getData()); map<int, SPE>::iterator s = self->m_mSPE.find(speid); if (s == self->m_mSPE.end()) continue; if (s->second.m_iStatus <= 1) continue; int progress = *(int32_t*)(msg.getData() + 4); s->second.m_LastUpdateTime = CTimer::getTime(); if (progress < 0) { cerr << "SPE PROCESSING ERROR " << ip << " " << port << endl; //error, quit this segment on the SPE s->second.m_pDS->m_iStatus = -1; s->second.m_pDS->m_iSPEID = -1; s->second.m_iStatus = 1; s->second.m_pDS->m_pResult->m_iStatus = *(int32_t*)(msg.getData() + 8); int errsize = msg.m_iDataLength - SectorMsg::m_iHdrSize - 12; if (errsize > 0) { s->second.m_pDS->m_pResult->m_pcData = new char[errsize]; strcpy(s->second.m_pDS->m_pResult->m_pcData, msg.getData() + 12); } ++ self->m_iProgress; #ifndef WIN32 pthread_mutex_lock(&self->m_ResLock); ++ self->m_iAvailRes; pthread_cond_signal(&self->m_ResCond); pthread_mutex_unlock(&self->m_ResLock); #else ++ self->m_iAvailRes; SetEvent(self->m_ResCond); #endif if (progress == -2) { // error occured to this SPE s->second.m_iStatus = -1; } continue; } if (progress > s->second.m_iProgress) s->second.m_iProgress = progress; if (progress < 100) continue; self->readResult(&(s->second)); // one SPE completes! int64_t t = CTimer::getTime(); if (self->m_iAvgRunTime <= 0) self->m_iAvgRunTime = (t - s->second.m_StartTime) / 1000000; else self->m_iAvgRunTime = (self->m_iAvgRunTime * 7 + (t - s->second.m_StartTime) / 1000000) / 8; } self->m_dRunningProgress = 0; // release all SPEs and close all Shufflers for (map<int, SPE>::iterator i = self->m_mSPE.begin(); i != self->m_mSPE.end(); ++ i) { // an offset of -1 will tell the SPE to release itself int64_t cmd = -1; self->m_pClient->m_DataChn.send(i->second.m_strIP, i->second.m_iDataPort, i->second.m_iSession, (char*)&cmd, 8); } for(map<int, BUCKET>::iterator i = self->m_mBucket.begin(); i != self->m_mBucket.end(); ++ i) { SectorMsg msg; int32_t cmd = -1; msg.setData(0, (char*)&cmd, 4); int id = 0; self->m_pClient->m_GMP.sendto(i->second.m_strIP.c_str(), i->second.m_iShufflerPort, id, &msg); } //TODO: need to detect lost slaves while (self->checkBucket() > 0) { string ip; int port; int tmp; SectorMsg msg; if (self->m_pClient->m_GMP.recvfrom(ip, port, tmp, &msg, false) < 0) continue; int32_t bucketid = *(int32_t*)(msg.getData()); map<int, BUCKET>::iterator b = self->m_mBucket.find(bucketid); if (b == self->m_mBucket.end()) continue; b->second.m_iProgress = 100; #ifndef WIN32 pthread_cond_signal(&self->m_ResCond); #else SetEvent(self->m_ResCond); #endif } // some buckets may be left empty because no value was sent to them. remove these from the output stream self->postProcessOutput(); // set totalSPE = 0, so that read() will return error immediately if (self->m_iProgress < 100) self->m_iTotalSPE = 0; CGuard::leaveCS(self->m_RunLock); #ifndef WIN32 return NULL; #else return 0; #endif }
DWORD WINAPI Slave::SPEShuffler(LPVOID p) #endif { Slave* self = ((Param5*)p)->serv_instance; int transid = ((Param5*)p)->transid; string client_ip = ((Param5*)p)->client_ip; int client_port = ((Param5*)p)->client_ctrl_port; int client_data_port = ((Param5*)p)->client_data_port; string path = ((Param5*)p)->path; string localfile = ((Param5*)p)->filename; int bucketnum = ((Param5*)p)->bucketnum; CGMP* gmp = ((Param5*)p)->gmp; string function = ((Param5*)p)->function; int bucketid = ((Param5*)p)->bucketid; const int key = ((Param5*)p)->key; const int type = ((Param5*)p)->type; string master_ip = ((Param5*)p)->master_ip; int master_port = ((Param5*)p)->master_port; queue<Bucket>* bq = NULL; CMutex* bqlock = NULL; CCond* bqcond = NULL; int64_t* pendingSize = NULL; pthread_t shufflerex; bool init_success = true; //set up data connection, for keep-alive purpose if (self->m_DataChn.connect(client_ip, client_data_port) < 0) { init_success = false; } else { // read library files for MapReduce, no need for Sphere UDF if (type == 1) self->acceptLibrary(key, client_ip, client_data_port, transid); bq = new queue<Bucket>; bqlock = new CMutex; bqcond = new CCond; pendingSize = new int64_t; *pendingSize = 0; ((Param5*)p)->bq = bq; ((Param5*)p)->bqlock = bqlock; ((Param5*)p)->bqcond = bqcond; ((Param5*)p)->pending = pendingSize; #ifndef WIN32 pthread_create(&shufflerex, NULL, SPEShufflerEx, p); #else DWORD ThreadID; shufflerex = CreateThread(NULL, 0, SPEShufflerEx, p, NULL, &ThreadID); #endif self->m_SectorLog << LogStart(LogLevel::SCREEN) << "SPE Shuffler " << path << " " << localfile << " " << bucketnum << LogEnd(); } while (init_success) { string speip; int speport; SectorMsg msg; int msgid; int r = gmp->recvfrom(speip, speport, msgid, &msg, false); // client releases the task or client has already been shutdown if (((r > 0) && (speip == client_ip) && (speport == client_port)) || ((r < 0) && (!self->m_DataChn.isConnected(client_ip, client_data_port)))) { Bucket b; b.totalnum = -1; b.totalsize = 0; bqlock->acquire(); bq->push(b); bqcond->signal(); bqlock->release(); break; } if (r < 0) continue; if (*pendingSize > 256000000) { // too many incoming results, ask the sender to wait // the receiver buffer size threshold is set to 256MB. This prevents the shuffler from being overflowed // it also helps direct the traffic to less congested shuffler and leads to better load balance msg.setType(-msg.getType()); gmp->sendto(speip, speport, msgid, &msg); } else { Bucket b; b.totalnum = *(int32_t*)(msg.getData() + 8);; b.totalsize = *(int32_t*)(msg.getData() + 12); b.src_ip = speip; b.src_dataport = *(int32_t*)msg.getData(); b.session = *(int32_t*)(msg.getData() + 4); gmp->sendto(speip, speport, msgid, &msg); if (!self->m_DataChn.isConnected(speip, b.src_dataport)) self->m_DataChn.connect(speip, b.src_dataport); bqlock->acquire(); bq->push(b); *pendingSize += b.totalsize; bqcond->signal(); bqlock->release(); } } if (init_success) { #ifndef WIN32 pthread_join(shufflerex, NULL); #else WaitForSingleObject(shufflerex, INFINITE); #endif delete bqlock; delete bqcond; delete pendingSize; SectorMsg msg; msg.setType(1); // success, return result msg.setData(0, (char*)&(bucketid), 4); int progress = 100; msg.setData(4, (char*)&progress, 4); msg.m_iDataLength = SectorMsg::m_iHdrSize + 8; int id = 0; self->m_GMP.sendto(client_ip.c_str(), client_port, id, &msg); self->m_SectorLog << LogStart(LogLevel::LEVEL_3) << "bucket completed 100 " << client_ip << " " << client_port << LogEnd(); } gmp->close(); delete gmp; self->reportSphere(master_ip, master_port, transid); // clear this transaction self->m_TransManager.updateSlave(transid, self->m_iSlaveID); return NULL; }
int Slave::readSectorFile(const string& filename, const int64_t& offset, const int64_t& size, char* buf) { SectorMsg msg; msg.setType(110); // open the index file msg.setKey(0); int32_t mode = 1; msg.setData(0, (char*)&mode, 4); int32_t port = m_DataChn.getPort(); msg.setData(4, (char*)&port, 4); int32_t len_name = filename.length() + 1; msg.setData(8, (char*)&len_name, 4); msg.setData(12, filename.c_str(), len_name); int32_t len_opt = 0; msg.setData(12 + len_name, (char*)&len_opt, 4); Address addr; m_Routing.lookup(filename, addr); if (m_GMP.rpc(addr.m_strIP.c_str(), addr.m_iPort, &msg, &msg) < 0) return -1; if (msg.getType() < 0) return -1; int32_t session = *(int32_t*)msg.getData(); string srcip = msg.getData() + 24; int32_t srcport = *(int32_t*)(msg.getData() + 64 + 24); // connect to the slave node with the file. if (!m_DataChn.isConnected(srcip, srcport)) { if (m_DataChn.connect(srcip, srcport) < 0) return -1; } int32_t cmd = 1; m_DataChn.send(srcip, srcport, session, (char*)&cmd, 4); char req[16]; *(int64_t*)req = offset; *(int64_t*)(req + 8) = size; if (m_DataChn.send(srcip, srcport, session, req, 16) < 0) return -1; int response = -1; if (m_DataChn.recv4(srcip, srcport, session, response) < 0) return -1; char* tmp = NULL; int recvsize = size; if (m_DataChn.recv(srcip, srcport, session, tmp, recvsize) < 0) return -1; if (recvsize == size) memcpy(buf, tmp, size); delete [] tmp; // file close command: 5 cmd = 5; m_DataChn.send(srcip, srcport, session, (char*)&cmd, 4); m_DataChn.recv4(srcip, srcport, session, response); // update total received data m_SlaveStat.updateIO(srcip, size, +SlaveStat::SYS_IN); if (recvsize != size) return -1; return size; }
void* Slave::SPEShuffler(void* p) { Slave* self = ((Param5*)p)->serv_instance; string client_ip = ((Param5*)p)->client_ip; int client_port = ((Param5*)p)->client_ctrl_port; int client_data_port = ((Param5*)p)->client_data_port; string path = ((Param5*)p)->path; string localfile = ((Param5*)p)->filename; // int bucketnum = ((Param5*)p)->bucketnum; CGMP* gmp = ((Param5*)p)->gmp; string function = ((Param5*)p)->function; //set up data connection, for keep-alive purpose if (self->m_DataChn.connect(client_ip, client_data_port) < 0) return NULL; queue<Bucket>* bq = new queue<Bucket>; pthread_mutex_t* bqlock = new pthread_mutex_t; pthread_mutex_init(bqlock, NULL); pthread_cond_t* bqcond = new pthread_cond_t; pthread_cond_init(bqcond, NULL); int64_t* pendingSize = new int64_t; *pendingSize = 0; ((Param5*)p)->bq = bq; ((Param5*)p)->bqlock = bqlock; ((Param5*)p)->bqcond = bqcond; ((Param5*)p)->pending = pendingSize; pthread_t ex; pthread_create(&ex, NULL, SPEShufflerEx, p); pthread_detach(ex); // cout << "SPE Shuffler " << path << " " << localfile << " " << bucketnum << endl; while (true) { string speip; int speport; SectorMsg msg; int msgid; int r = gmp->recvfrom(speip, speport, msgid, &msg, false); // client releases the task or client has already been shutdown if (((r > 0) && (speip == client_ip) && (speport == client_port)) || ((r < 0) && (!self->m_DataChn.isConnected(client_ip, client_data_port)))) { Bucket b; b.totalnum = -1; b.totalsize = 0; pthread_mutex_lock(bqlock); bq->push(b); pthread_cond_signal(bqcond); pthread_mutex_unlock(bqlock); break; } if (r < 0) continue; if (*pendingSize > 256000000) { // too many incoming results, ask the sender to wait // the receiver buffer size threshold is set to 256MB. This prevents the shuffler from being overflowed // it also helps direct the traffic to less congested shuffler and leads to better load balance msg.setType(-msg.getType()); gmp->sendto(speip, speport, msgid, &msg); } else { Bucket b; b.totalnum = *(int32_t*)(msg.getData() + 8);; b.totalsize = *(int32_t*)(msg.getData() + 12); b.src_ip = speip; b.src_dataport = *(int32_t*)msg.getData(); b.session = *(int32_t*)(msg.getData() + 4); gmp->sendto(speip, speport, msgid, &msg); if (!self->m_DataChn.isConnected(speip, b.src_dataport)) self->m_DataChn.connect(speip, b.src_dataport); pthread_mutex_lock(bqlock); bq->push(b); *pendingSize += b.totalsize; pthread_cond_signal(bqcond); pthread_mutex_unlock(bqlock); } } gmp->close(); delete gmp; return NULL; }
void* Slave::copy(void* p) { Slave* self = ((Param3*)p)->serv_instance; int transid = ((Param3*)p)->transid; string src = ((Param3*)p)->src; string dst = ((Param3*)p)->dst; string master_ip = ((Param3*)p)->master_ip; int master_port = ((Param3*)p)->master_port; delete (Param3*)p; if (src.c_str()[0] == '\0') src = "/" + src; if (dst.c_str()[0] == '\0') dst = "/" + dst; SNode tmp; if (self->m_pLocalFile->lookup(src.c_str(), tmp) >= 0) { //if file is local, copy directly //note that in this case, src != dst, therefore this is a regular "cp" command, not a system replication //TODO: check disk space self->createDir(dst.substr(0, dst.rfind('/'))); string rhome = self->reviseSysCmdPath(self->m_strHomeDir); string rsrc = self->reviseSysCmdPath(src); string rdst = self->reviseSysCmdPath(dst); system(("cp " + rhome + rsrc + " " + rhome + rdst).c_str()); // if the file has been modified during the replication, remove this replica int type = (src == dst) ? +FileChangeType::FILE_UPDATE_REPLICA : +FileChangeType::FILE_UPDATE_NEW; struct stat64 s; if (stat64((self->m_strHomeDir + dst).c_str(), &s) < 0) type = +FileChangeType::FILE_UPDATE_NO; if (self->report(master_ip, master_port, transid, dst, type) < 0) system(("rm " + rhome + rdst).c_str()); // clear this transaction self->m_TransManager.updateSlave(transid, self->m_iSlaveID); return NULL; } bool success = true; queue<string> tr; tr.push(src); while (!tr.empty()) { string src_path = tr.front(); tr.pop(); // try list this path SectorMsg msg; msg.setType(101); msg.setKey(0); msg.setData(0, src_path.c_str(), src_path.length() + 1); Address addr; self->m_Routing.lookup(src_path, addr); if (self->m_GMP.rpc(addr.m_strIP.c_str(), addr.m_iPort, &msg, &msg) < 0) { success = false; break; } if (msg.getType() >= 0) { // if this is a directory, put all files and sub-drectories into the queue of files to be copied string filelist = msg.getData(); unsigned int s = 0; while (s < filelist.length()) { int t = filelist.find(';', s); SNode sn; sn.deserialize(filelist.substr(s, t - s).c_str()); tr.push(src_path + "/" + sn.m_strName); s = t + 1; } continue; } // open the file and copy it to local msg.setType(110); msg.setKey(0); int32_t mode = SF_MODE::READ; msg.setData(0, (char*)&mode, 4); int64_t reserve = 0; msg.setData(4, (char*)&reserve, 8); int32_t localport = self->m_DataChn.getPort(); msg.setData(12, (char*)&localport, 4); msg.setData(16, "\0", 1); msg.setData(80, src_path.c_str(), src_path.length() + 1); if ((self->m_GMP.rpc(addr.m_strIP.c_str(), addr.m_iPort, &msg, &msg) < 0) || (msg.getType() < 0)) { success = false; break; } int32_t session = *(int32_t*)msg.getData(); int64_t size = *(int64_t*)(msg.getData() + 4); time_t ts = *(int64_t*)(msg.getData() + 12); string ip = msg.getData() + 24; int32_t port = *(int32_t*)(msg.getData() + 64 + 24); if (!self->m_DataChn.isConnected(ip, port)) { if (self->m_DataChn.connect(ip, port) < 0) { success = false; break; } } // download command: 3 int32_t cmd = 3; self->m_DataChn.send(ip, port, session, (char*)&cmd, 4); int64_t offset = 0; self->m_DataChn.send(ip, port, session, (char*)&offset, 8); int response = -1; if ((self->m_DataChn.recv4(ip, port, session, response) < 0) || (-1 == response)) { success = false; break; } string dst_path = dst; if (src != src_path) dst_path += "/" + src_path.substr(src.length() + 1, src_path.length() - src.length() - 1); //copy to .tmp first, then move to real location self->createDir(string(".tmp") + dst_path.substr(0, dst_path.rfind('/'))); fstream ofs; ofs.open((self->m_strHomeDir + ".tmp" + dst_path).c_str(), ios::out | ios::binary | ios::trunc); int64_t unit = 64000000; //send 64MB each time int64_t torecv = size; int64_t recd = 0; while (torecv > 0) { int64_t block = (torecv < unit) ? torecv : unit; if (self->m_DataChn.recvfile(ip, port, session, ofs, offset + recd, block) < 0) { success = false; break; } recd += block; torecv -= block; } ofs.close(); // update total received data size self->m_SlaveStat.updateIO(ip, size, +SlaveStat::SYS_IN); cmd = 5; self->m_DataChn.send(ip, port, session, (char*)&cmd, 4); self->m_DataChn.recv4(ip, port, session, cmd); if (src == dst) { //utime: update timestamp according to the original copy, for replica only; files created by "cp" have new timestamp utimbuf ut; ut.actime = ts; ut.modtime = ts; utime((self->m_strHomeDir + ".tmp" + dst_path).c_str(), &ut); } } string rhome = self->reviseSysCmdPath(self->m_strHomeDir); string rfile = self->reviseSysCmdPath(dst); if (success) { // move from temporary dir to the real dir when the copy is completed self->createDir(dst.substr(0, dst.rfind('/'))); system(("mv " + rhome + ".tmp" + rfile + " " + rhome + rfile).c_str()); // if the file has been modified during the replication, remove this replica int32_t type = (src == dst) ? +FileChangeType::FILE_UPDATE_REPLICA : +FileChangeType::FILE_UPDATE_NEW; if (self->report(master_ip, master_port, transid, dst, type) < 0) unlink((rhome + rfile).c_str()); } else { // failed, remove all temporary files system(("rm -rf " + rhome + ".tmp" + rfile).c_str()); self->report(master_ip, master_port, transid, "", +FileChangeType::FILE_UPDATE_NO); } // clear this transaction self->m_TransManager.updateSlave(transid, self->m_iSlaveID); return NULL; }
int Slave::SPEReadData(const string& datafile, const int64_t& offset, int& size, int64_t* index, const int64_t& totalrows, char*& block) { SNode sn; string idxfile = datafile + ".idx"; //read index if (m_pLocalFile->lookup(idxfile.c_str(), sn) >= 0) { fstream idx; idx.open((m_strHomeDir + idxfile).c_str(), ios::in | ios::binary); if (idx.bad() || idx.fail()) return -1; idx.seekg(offset * 8); idx.read((char*)index, (totalrows + 1) * 8); idx.close(); } else if (size) { SectorMsg msg; msg.setType(110); // open the index file msg.setKey(0); int32_t mode = 1; msg.setData(0, (char*)&mode, 4); int32_t port = m_DataChn.getPort(); msg.setData(4, (char*)&port, 4); msg.setData(8, "\0", 1); msg.setData(72, idxfile.c_str(), idxfile.length() + 1); Address addr; m_Routing.lookup(idxfile, addr); if (m_GMP.rpc(addr.m_strIP.c_str(), addr.m_iPort, &msg, &msg) < 0) return -1; if (msg.getType() < 0) return -1; string srcip = msg.getData(); int srcport = *(int*)(msg.getData() + 64); int session = *(int*)(msg.getData() + 68); // cout << "rendezvous connect " << srcip << " " << srcport << endl; if (m_DataChn.connect(srcip, srcport) < 0) return -1; int32_t cmd = 1; m_DataChn.send(srcip, srcport, session, (char*)&cmd, 4); int response = -1; if (m_DataChn.recv4(srcip, srcport, session, response) < 0) return -1; char req[16]; *(int64_t*)req = offset * 8; *(int64_t*)(req + 8) = (totalrows + 1) * 8; if (m_DataChn.send(srcip, srcport, session, req, 16) < 0) return -1; char* tmp = NULL; int size = (totalrows + 1) * 8; if (m_DataChn.recv(srcip, srcport, session, tmp, size) < 0) return -1; if (size > 0) memcpy((char*)index, tmp, size); delete [] tmp; // file close command: 5 cmd = 5; m_DataChn.send(srcip, srcport, session, (char*)&cmd, 4); m_DataChn.recv4(srcip, srcport, session, response); // update total received data m_SlaveStat.updateIO(srcip, (totalrows + 1) * 8, 0); } else { // no index, so rows are bytes size = totalrows; index[0] = 0; goto Lreaddata; } size = index[totalrows] - index[0]; Lreaddata: block = new char[size]; // read data file if (m_pLocalFile->lookup(datafile.c_str(), sn) >= 0) { fstream ifs; ifs.open((m_strHomeDir + datafile).c_str(), ios::in | ios::binary); if (ifs.bad() || ifs.fail()) return -1; ifs.seekg(index[0]); ifs.read(block, size); ifs.close(); } else { SectorMsg msg; msg.setType(110); // open the index file msg.setKey(0); int32_t mode = 1; msg.setData(0, (char*)&mode, 4); int32_t port = m_DataChn.getPort(); msg.setData(4, (char*)&port, 4); msg.setData(8, "\0", 1); msg.setData(72, datafile.c_str(), datafile.length() + 1); Address addr; m_Routing.lookup(datafile, addr); if (m_GMP.rpc(addr.m_strIP.c_str(), addr.m_iPort, &msg, &msg) < 0) return -1; if (msg.getType() < 0) return -1; string srcip = msg.getData(); int srcport = *(int*)(msg.getData() + 64); int session = *(int*)(msg.getData() + 68); // cout << "rendezvous connect " << srcip << " " << srcport << endl; if (m_DataChn.connect(srcip, srcport) < 0) return -1; int32_t cmd = 1; m_DataChn.send(srcip, srcport, session, (char*)&cmd, 4); int response = -1; if (m_DataChn.recv4(srcip, srcport, session, response) < 0) return -1; char req[16]; *(int64_t*)req = index[0]; *(int64_t*)(req + 8) = index[totalrows] - index[0]; if (m_DataChn.send(srcip, srcport, session, req, 16) < 0) return -1; char* tmp = NULL; int size = index[totalrows] - index[0]; if (m_DataChn.recv(srcip, srcport, session, tmp, size) < 0) return -1; if (size > 0) memcpy(block, tmp, size); delete [] tmp; // file close command: 5 cmd = 5; m_DataChn.send(srcip, srcport, session, (char*)&cmd, 4); m_DataChn.recv4(srcip, srcport, session, response); // update total received data m_SlaveStat.updateIO(srcip, index[totalrows] - index[0], 0); } return totalrows; }
DWORD WINAPI Slave::copy(LPVOID p) #endif { Slave* self = ((Param3*)p)->serv_instance; int transid = ((Param3*)p)->transid; int dir = ((Param3*)p)->dir; string src = ((Param3*)p)->src; string dst = ((Param3*)p)->dst; string master_ip = ((Param3*)p)->master_ip; int master_port = ((Param3*)p)->master_port; delete (Param3*)p; if (src.c_str()[0] == '\0') src = "/" + src; if (dst.c_str()[0] == '\0') dst = "/" + dst; bool success = true; queue<string> tr; // files to be replicated queue<string> td; // directories to be explored if (dir > 0) td.push(src); else tr.push(src); while (!td.empty()) { // If the file to be replicated is a directory, recursively list all files first string src_path = td.front(); td.pop(); // try list this path SectorMsg msg; msg.setType(101); msg.setKey(0); msg.setData(0, src_path.c_str(), src_path.length() + 1); Address addr; self->m_Routing.lookup(src_path, addr); if (self->m_GMP.rpc(addr.m_strIP.c_str(), addr.m_iPort, &msg, &msg) < 0) { success = false; break; } // the master only returns positive if this is a directory if (msg.getType() >= 0) { // if this is a directory, create it, and put all files and sub-directories into the queue of files to be copied // create a local dir string dst_path = dst; if (src != src_path) dst_path += "/" + src_path.substr(src.length() + 1, src_path.length() - src.length() - 1); //create at .tmp first, then move to real location self->createDir(string(".tmp") + dst_path); string filelist = msg.getData(); unsigned int s = 0; while (s < filelist.length()) { int t = filelist.find(';', s); SNode sn; sn.deserialize(filelist.substr(s, t - s).c_str()); if (sn.m_bIsDir) td.push(src_path + "/" + sn.m_strName); else tr.push(src_path + "/" + sn.m_strName); s = t + 1; } continue; } } while (!tr.empty()) { string src_path = tr.front(); tr.pop(); SNode tmp; if (self->m_pLocalFile->lookup(src_path.c_str(), tmp) >= 0) { //if file is local, copy directly //note that in this case, src != dst, therefore this is a regular "cp" command, not a system replication //IMPORTANT!!! //local files must be read directly from local disk, and cannot be read via datachn due to its limitation string dst_path = dst; if (src != src_path) dst_path += "/" + src_path.substr(src.length() + 1, src_path.length() - src.length() - 1); //copy to .tmp first, then move to real location self->createDir(string(".tmp") + dst_path.substr(0, dst_path.rfind('/'))); LocalFS::copy(self->m_strHomeDir + src_path, self->m_strHomeDir + ".tmp" + dst_path); } else { // open the file and copy it to local SectorMsg msg; msg.setType(110); msg.setKey(0); int32_t mode = SF_MODE::READ; msg.setData(0, (char*)&mode, 4); int32_t localport = self->m_DataChn.getPort(); msg.setData(4, (char*)&localport, 4); int32_t len_name = src_path.length() + 1; msg.setData(8, (char*)&len_name, 4); msg.setData(12, src_path.c_str(), len_name); int32_t len_opt = 0; msg.setData(12 + len_name, (char*)&len_opt, 4); Address addr; self->m_Routing.lookup(src_path, addr); if ((self->m_GMP.rpc(addr.m_strIP.c_str(), addr.m_iPort, &msg, &msg) < 0) || (msg.getType() < 0)) { success = false; break; } int32_t session = *(int32_t*)msg.getData(); int64_t size = *(int64_t*)(msg.getData() + 4); time_t ts = *(int64_t*)(msg.getData() + 12); string ip = msg.getData() + 24; int32_t port = *(int32_t*)(msg.getData() + 64 + 24); if (!self->m_DataChn.isConnected(ip, port)) { if (self->m_DataChn.connect(ip, port) < 0) { success = false; break; } } // download command: 3 int32_t cmd = 3; self->m_DataChn.send(ip, port, session, (char*)&cmd, 4); int64_t offset = 0; self->m_DataChn.send(ip, port, session, (char*)&offset, 8); int response = -1; if ((self->m_DataChn.recv4(ip, port, session, response) < 0) || (-1 == response)) { success = false; break; } string dst_path = dst; if (src != src_path) dst_path += "/" + src_path.substr(src.length() + 1, src_path.length() - src.length() - 1); //copy to .tmp first, then move to real location self->createDir(string(".tmp") + dst_path.substr(0, dst_path.rfind('/'))); fstream ofs; ofs.open((self->m_strHomeDir + ".tmp" + dst_path).c_str(), ios::out | ios::binary | ios::trunc); int64_t unit = 64000000; //send 64MB each time int64_t torecv = size; int64_t recd = 0; while (torecv > 0) { int64_t block = (torecv < unit) ? torecv : unit; if (self->m_DataChn.recvfile(ip, port, session, ofs, offset + recd, block) < 0) { success = false; break; } recd += block; torecv -= block; } ofs.close(); // update total received data size self->m_SlaveStat.updateIO(ip, size, +SlaveStat::SYS_IN); cmd = 5; self->m_DataChn.send(ip, port, session, (char*)&cmd, 4); self->m_DataChn.recv4(ip, port, session, cmd); if (src == dst) { //utime: update timestamp according to the original copy, for replica only; files created by "cp" have new timestamp utimbuf ut; ut.actime = ts; ut.modtime = ts; utime((self->m_strHomeDir + ".tmp" + dst_path).c_str(), &ut); } } } if (success) { // move from temporary dir to the real dir when the copy is completed self->createDir(dst.substr(0, dst.rfind('/'))); LocalFS::rename(self->m_strHomeDir + ".tmp" + dst, self->m_strHomeDir + dst); // if the file has been modified during the replication, remove this replica int32_t type = (src == dst) ? +FileChangeType::FILE_UPDATE_REPLICA : +FileChangeType::FILE_UPDATE_NEW; if (self->report(master_ip, master_port, transid, dst, type) < 0) LocalFS::erase(self->m_strHomeDir + dst); } else { // failed, remove all temporary files LocalFS::erase(self->m_strHomeDir + ".tmp" + dst); self->report(master_ip, master_port, transid, "", +FileChangeType::FILE_UPDATE_NO); } // clear this transaction self->m_TransManager.updateSlave(transid, self->m_iSlaveID); return NULL; }
int Slave::report(const string& master_ip, const int& master_port, const int32_t& transid, const vector<string>& filelist, const int32_t& change) { vector<string> serlist; if (change != FileChangeType::FILE_UPDATE_NO) { for (vector<string>::const_iterator i = filelist.begin(); i != filelist.end(); ++ i) { struct stat s; if (-1 == stat ((m_strHomeDir + *i).c_str(), &s)) continue; SNode sn; sn.m_strName = *i; sn.m_bIsDir = S_ISDIR(s.st_mode) ? 1 : 0; sn.m_llTimeStamp = s.st_mtime; sn.m_llSize = s.st_size; Address addr; addr.m_strIP = "127.0.0.1"; addr.m_iPort = 0; sn.m_sLocation.insert(addr); if (change == FileChangeType::FILE_UPDATE_WRITE) { // file may be created on write; in this case, create a new meta entry instead of update non-existing one if (m_pLocalFile->update(sn.m_strName, sn.m_llTimeStamp, sn.m_llSize) < 0) m_pLocalFile->create(sn); } else if (change == FileChangeType::FILE_UPDATE_NEW) m_pLocalFile->create(sn); else if (change == FileChangeType::FILE_UPDATE_REPLICA) m_pLocalFile->create(sn); char* buf = NULL; sn.serialize(buf); serlist.push_back(buf); delete [] buf; } } SectorMsg msg; msg.setType(1); msg.setKey(0); msg.setData(0, (char*)&transid, 4); msg.setData(4, (char*)&m_iSlaveID, 4); msg.setData(8, (char*)&change, 4); int32_t num = serlist.size(); msg.setData(12, (char*)&num, 4); int pos = 16; for (vector<string>::iterator i = serlist.begin(); i != serlist.end(); ++ i) { int32_t bufsize = i->length() + 1; msg.setData(pos, (char*)&bufsize, 4); msg.setData(pos + 4, i->c_str(), bufsize); pos += bufsize + 4; } //TODO: if the current master is down, try a different master if (m_GMP.rpc(master_ip.c_str(), master_port, &msg, &msg) < 0) return -1; if (msg.getType() < 0) return *(int32_t*)msg.getData(); return 1; }