示例#1
0
DWORD WINAPI Slave::SPEHandler(LPVOID p)
#endif
{
   Slave* self = ((Param4*)p)->serv_instance;
   const string ip = ((Param4*)p)->client_ip;
   const int ctrlport = ((Param4*)p)->client_ctrl_port;
   const int dataport = ((Param4*)p)->client_data_port;
   const int speid = ((Param4*)p)->speid;
   const int transid = ((Param4*)p)->transid;
   const int key = ((Param4*)p)->key;
   const string function = ((Param4*)p)->function;
   const int rows = ((Param4*)p)->rows;
   const char* param = ((Param4*)p)->param;
   const int psize = ((Param4*)p)->psize;
   const int type = ((Param4*)p)->type;
   const string master_ip = ((Param4*)p)->master_ip;
   const int master_port = ((Param4*)p)->master_port;
   delete (Param4*)p;

   SectorMsg msg;
   bool init_success = true;

   self->m_SectorLog << LogStart(LogLevel::LEVEL_3) << "SPE starts " << ip << " " << dataport << LogEnd();

   if (self->m_DataChn.connect(ip, dataport) < 0)
   {
      self->m_SectorLog << LogStart(LogLevel::LEVEL_2) << "failed to connect to spe client " << ip << ":" << ctrlport << " " << function << LogEnd();
      init_success = false;
   }

   self->m_SectorLog << LogStart(LogLevel::LEVEL_3) << "connected." << LogEnd();

   // read outupt parameters
   int buckets = 0;
   if (self->m_DataChn.recv4(ip, dataport, transid, buckets) < 0)
      init_success = false;

   SPEDestination dest;
   if (buckets > 0)
   {
      if (self->m_DataChn.recv4(ip, dataport, transid, dest.m_iLocNum) < 0)
         init_success = false;
      int len = dest.m_iLocNum * 80;
      if (self->m_DataChn.recv(ip, dataport, transid, dest.m_pcOutputLoc, len) < 0)
         init_success = false;
      len = buckets * 4;
      if (self->m_DataChn.recv(ip, dataport, transid, (char*&)dest.m_piLocID, len) < 0)
         init_success = false;
   }
   else if (buckets < 0)
   {
      int32_t len = 0;
      if (self->m_DataChn.recv(ip, dataport, transid, dest.m_pcOutputLoc, len) < 0)
         init_success = false;
      dest.m_strLocalFile = dest.m_pcOutputLoc;
   }
   dest.init(buckets);


   // initialize processing function
   self->acceptLibrary(key, ip, dataport, transid);
   SPHERE_PROCESS process = NULL;
   MR_MAP map = NULL;
   MR_PARTITION partition = NULL;
   void* lh = NULL;
   self->openLibrary(key, function, lh);
   if (NULL == lh)
   {
      self->m_SectorLog << LogStart(LogLevel::LEVEL_2) << "failed to open SPE library " << ip << ":" << ctrlport << " " << function << LogEnd();
      init_success = false;
   }

   if (type == 0)
   {
      if (self->getSphereFunc(lh, function, process) < 0)
         init_success = false;
   }
   else if (type == 1)
   {
      if (self->getMapFunc(lh, function, map, partition) < 0)
         init_success = false;
   }
   else
   {
      init_success = false;
   }

   timeval t1, t2, t3, t4;
   gettimeofday(&t1, 0);

   msg.setType(1); // success, return result
   msg.setData(0, (char*)&(speid), 4);

   SPEResult result;
   result.init(buckets);

   // processing...
   while (init_success)
   {
      char* dataseg = NULL;
      int size = 0;
      if (self->m_DataChn.recv(ip, dataport, transid, dataseg, size) < 0)
         break;

      // client request to close this SPE
      if (size < 20)
         break;

      // read data segment parameters
      int64_t offset = *(int64_t*)(dataseg);
      int64_t totalrows = *(int64_t*)(dataseg + 8);
      int32_t dsid = *(int32_t*)(dataseg + 16);
      string datafile = dataseg + 20;
      sprintf(dest.m_pcLocalFileID, ".%d", dsid);
      delete [] dataseg;

      self->m_SectorLog << LogStart(LogLevel::LEVEL_3) << "new job " << datafile << " " << offset << " " << totalrows << LogEnd();

      int64_t* index = NULL;
      if ((totalrows > 0) && (rows != 0))
         index = new int64_t[totalrows + 1];
      char* block = NULL;
      int unitrows = (rows != -1) ? rows : totalrows;
      int progress = 0;

      // read data
      if (0 != rows)
      {
         size = 0;
         if (self->SPEReadData(datafile, offset, size, index, totalrows, block) <= 0)
         {
            delete [] index;
            delete [] block;

            progress = SectorError::E_SPEREAD;
            msg.setData(4, (char*)&progress, 4);
            msg.m_iDataLength = SectorMsg::m_iHdrSize + 8;
            int id = 0;
            self->m_GMP.sendto(ip.c_str(), ctrlport, id, &msg);

            continue;
         }
      }
      else
      {
         // store file name in "process" parameter
         block = new char[datafile.length() + 1];
         strcpy(block, datafile.c_str());
         size = datafile.length() + 1;
         totalrows = 0;
      }

      SInput input;
      input.m_pcUnit = NULL;
      input.m_pcParam = (char*)param;
      input.m_iPSize = psize;
      SOutput output;
      output.m_iBufSize = (size < 64000000) ? 64000000 : size;
      output.m_pcResult = new char[output.m_iBufSize];
      output.m_iIndSize = (totalrows < 640000) ? 640000 : totalrows + 2;
      output.m_pllIndex = new int64_t[output.m_iIndSize];
      output.m_piBucketID = new int[output.m_iIndSize];
      SFile file;
      file.m_strHomeDir = self->m_strHomeDir;
      char path[64];
      sprintf(path, "%d", key);
      file.m_strLibDir = self->m_strHomeDir + ".sphere/" + path + "/";
      file.m_strTempDir = self->m_strHomeDir + ".tmp/";
      file.m_iSlaveID = self->m_iSlaveID;
      file.m_pInMemoryObjects = &self->m_InMemoryObjects;

      result.clear();
      gettimeofday(&t3, 0);

      int deliverystatus = 0;
      int processstatus = 0;

      // process data segments
      for (int i = 0; i < totalrows; i += unitrows)
      {
         if (unitrows > totalrows - i)
            unitrows = totalrows - i;

         input.m_pcUnit = block + index[i] - index[0];
         input.m_iRows = unitrows;
         input.m_pllIndex = index + i;
         output.m_iResSize = 0;
         output.m_iRows = 0;
         output.m_strError = "";

         processstatus = self->processData(input, output, file, result, buckets, process, map, partition);
         if (processstatus < 0)
         {
            progress = SectorError::E_SPEPROC;
            break;
         }

         timeval t;
         gettimeofday(&t, NULL);
         unsigned int seed = t.tv_sec * 1000000 + t.tv_usec;
         srand(seed);
         int ds_thresh = 32000000 * ((rand() % 7) + 1);
         if ((result.m_llTotalDataSize >= ds_thresh) && (buckets != 0))
            deliverystatus = self->deliverResult(buckets, result, dest);

         if (deliverystatus < 0)
         {
            progress = SectorError::E_SPEWRITE;
            break;
         }

         gettimeofday(&t4, 0);
         if (t4.tv_sec - t3.tv_sec > 1)
         {
            progress = i * 100 / totalrows;
            msg.setData(4, (char*)&progress, 4);
            msg.m_iDataLength = SectorMsg::m_iHdrSize + 8;
            int id = 0;
            self->m_GMP.sendto(ip.c_str(), ctrlport, id, &msg);

            t3 = t4;
         }
      }

      // process files
      if (0 == unitrows)
      {
         SNode s;
         LocalFS::stat(self->m_strHomeDir + datafile, s);
         int64_t filesize = s.m_llSize;

         input.m_pcUnit = block;
         input.m_iRows = -1;
         input.m_pllIndex = NULL;
         output.m_llOffset = 0;

         for (int i = 0; (i == 0) || (output.m_llOffset > 0); ++ i)
         {
            // re-initialize output everytime UDF is called, except for offset
            output.m_iResSize = 0;
            output.m_iRows = 0;
            output.m_strError = "";

            processstatus = self->processData(input, output, file, result, buckets, process, map, partition);
            if (processstatus < 0)
            {
               progress = SectorError::E_SPEPROC;
               break;
            }

            timeval t;
            gettimeofday(&t, NULL);
            unsigned int seed = t.tv_sec * 1000000 + t.tv_usec;
            srand(seed);
            int ds_thresh = 32000000 * ((rand() % 7) + 1);
            if ((result.m_llTotalDataSize >= ds_thresh) && (buckets != 0))
               deliverystatus = self->deliverResult(buckets, result, dest);

            if (deliverystatus < 0)
            {
               progress = SectorError::E_SPEWRITE;
               break;
            }

            if (output.m_llOffset > 0)
            {
               progress = output.m_llOffset * 100LL / filesize;
               msg.setData(4, (char*)&progress, 4);
               msg.m_iDataLength = SectorMsg::m_iHdrSize + 8;
               int id = 0;
               self->m_GMP.sendto(ip.c_str(), ctrlport, id, &msg);
            }
         }
      }

      // if buckets = 0, send back to clients, otherwise deliver to local or network locations
      if ((buckets != 0) && (progress >= 0))
         deliverystatus = self->deliverResult(buckets, result, dest);

      if (deliverystatus < 0)
         progress = SectorError::E_SPEWRITE;
      else
         progress = 100;

      self->m_SectorLog << LogStart(LogLevel::LEVEL_3) << "SPE completed " << progress << " " << ip << " " << ctrlport << LogEnd();

      msg.setData(4, (char*)&progress, 4);

      if (100 == progress)
      {
         msg.m_iDataLength = SectorMsg::m_iHdrSize + 8;
         int id = 0;
         self->m_GMP.sendto(ip.c_str(), ctrlport, id, &msg);

         self->sendResultToClient(buckets, dest.m_piSArray, dest.m_piRArray, result, ip, dataport, transid);
         dest.reset(buckets);

         // report new files
         vector<string> filelist;
         for (set<string>::iterator i = file.m_sstrFiles.begin(); i != file.m_sstrFiles.end(); ++ i)
            filelist.push_back(*i);
         self->report(master_ip, master_port, transid, filelist, +FileChangeType::FILE_UPDATE_NEW);
         self->reportMO(master_ip, master_port, transid);
      }
      else
      {
         msg.setData(8, (char*)&processstatus, 4);
         msg.m_iDataLength = SectorMsg::m_iHdrSize + 12;
         if (output.m_strError.length() > 0)
            msg.setData(12, output.m_strError.c_str(), output.m_strError.length() + 1);
         else if (deliverystatus < 0)
         {
            string tmp = "System Error: data transfer to buckets failed.";
            msg.setData(12, tmp.c_str(), tmp.length() + 1);
         }

         int id = 0;
         self->m_GMP.sendto(ip.c_str(), ctrlport, id, &msg);
      }

      delete [] index;
      delete [] block;
      delete [] output.m_pcResult;
      delete [] output.m_pllIndex;
      delete [] output.m_piBucketID;
      index = NULL;
      block = NULL;
   }

   gettimeofday(&t2, 0);
   int duration = t2.tv_sec - t1.tv_sec;
   self->m_SectorLog << LogStart(LogLevel::LEVEL_3) << "comp server closed " << ip << " " << ctrlport << " " << duration << LogEnd();

   delete [] param;

   vector<Address> bad;

   if (init_success)
   {
      self->closeLibrary(lh);

      multimap<int64_t, Address> sndspd;
      for (int i = 0; i < dest.m_iLocNum; ++ i)
      {
         Address addr;
         addr.m_strIP = dest.m_pcOutputLoc + i * 80;
         addr.m_iPort = *(int32_t*)(dest.m_pcOutputLoc + i * 80 + 64);
         int dataport = *(int32_t*)(dest.m_pcOutputLoc + i * 80 + 68);
         int64_t spd = self->m_DataChn.getRealSndSpeed(addr.m_strIP, dataport);
         if (spd > 0)
            sndspd.insert(pair<int64_t, Address>(spd, addr));
      }
      vector<Address> bad;
      self->checkBadDest(sndspd, bad);
   }
   else
   {
      // this SPE failed to initialize. send the error to the client
      int progress = SectorError::E_SPEUDF;
      msg.setData(4, (char*)&progress, 4);
      msg.m_iDataLength = SectorMsg::m_iHdrSize + 8;
      int id = 0;
      self->m_GMP.sendto(ip.c_str(), ctrlport, id, &msg);
   }

   self->reportSphere(master_ip, master_port, transid, &bad);

   // clear this transaction
   self->m_TransManager.updateSlave(transid, self->m_iSlaveID);

   return NULL;
}
示例#2
0
DWORD WINAPI Slave::SPEShuffler(LPVOID p)
#endif
{
   Slave* self = ((Param5*)p)->serv_instance;
   int transid = ((Param5*)p)->transid;
   string client_ip = ((Param5*)p)->client_ip;
   int client_port = ((Param5*)p)->client_ctrl_port;
   int client_data_port = ((Param5*)p)->client_data_port;
   string path = ((Param5*)p)->path;
   string localfile = ((Param5*)p)->filename;
   int bucketnum = ((Param5*)p)->bucketnum;
   CGMP* gmp = ((Param5*)p)->gmp;
   string function = ((Param5*)p)->function;
   int bucketid = ((Param5*)p)->bucketid;
   const int key = ((Param5*)p)->key;
   const int type = ((Param5*)p)->type;
   string master_ip = ((Param5*)p)->master_ip;
   int master_port = ((Param5*)p)->master_port;

   queue<Bucket>* bq = NULL;
   CMutex* bqlock = NULL;
   CCond* bqcond = NULL;
   int64_t* pendingSize = NULL;
   pthread_t shufflerex;

   bool init_success = true;

   //set up data connection, for keep-alive purpose
   if (self->m_DataChn.connect(client_ip, client_data_port) < 0)
   {
      init_success = false;
   }
   else
   {
      // read library files for MapReduce, no need for Sphere UDF
      if (type == 1)
         self->acceptLibrary(key, client_ip, client_data_port, transid);

      bq = new queue<Bucket>;
      bqlock = new CMutex;
      bqcond = new CCond;
      pendingSize = new int64_t;
      *pendingSize = 0;

      ((Param5*)p)->bq = bq;
      ((Param5*)p)->bqlock = bqlock;
      ((Param5*)p)->bqcond = bqcond;
      ((Param5*)p)->pending = pendingSize;

#ifndef WIN32
      pthread_create(&shufflerex, NULL, SPEShufflerEx, p);
#else
      DWORD ThreadID;
      shufflerex = CreateThread(NULL, 0, SPEShufflerEx, p, NULL, &ThreadID);
#endif

      self->m_SectorLog << LogStart(LogLevel::SCREEN) << "SPE Shuffler " << path << " " << localfile << " " << bucketnum << LogEnd();
   }

   while (init_success)
   {
      string speip;
      int speport;
      SectorMsg msg;
      int msgid;
      int r = gmp->recvfrom(speip, speport, msgid, &msg, false);

      // client releases the task or client has already been shutdown
      if (((r > 0) && (speip == client_ip) && (speport == client_port))
         || ((r < 0) && (!self->m_DataChn.isConnected(client_ip, client_data_port))))
      {
         Bucket b;
         b.totalnum = -1;
         b.totalsize = 0;
         bqlock->acquire();
         bq->push(b);
         bqcond->signal();
         bqlock->release();

         break;
      }

      if (r < 0)
         continue;

      if (*pendingSize > 256000000)
      {
         // too many incoming results, ask the sender to wait
         // the receiver buffer size threshold is set to 256MB. This prevents the shuffler from being overflowed
         // it also helps direct the traffic to less congested shuffler and leads to better load balance
         msg.setType(-msg.getType());
         gmp->sendto(speip, speport, msgid, &msg);
      }
      else
      {
         Bucket b;
         b.totalnum = *(int32_t*)(msg.getData() + 8);;
         b.totalsize = *(int32_t*)(msg.getData() + 12);
         b.src_ip = speip;
         b.src_dataport = *(int32_t*)msg.getData();
         b.session = *(int32_t*)(msg.getData() + 4);

         gmp->sendto(speip, speport, msgid, &msg);

         if (!self->m_DataChn.isConnected(speip, b.src_dataport))
            self->m_DataChn.connect(speip, b.src_dataport);

         bqlock->acquire();
         bq->push(b);
         *pendingSize += b.totalsize;
         bqcond->signal();
         bqlock->release();
      }
   }

   if (init_success)
   {
#ifndef WIN32
      pthread_join(shufflerex, NULL);
#else
      WaitForSingleObject(shufflerex, INFINITE);
#endif

      delete bqlock;
      delete bqcond;
      delete pendingSize;

      SectorMsg msg;
      msg.setType(1); // success, return result
      msg.setData(0, (char*)&(bucketid), 4);
      int progress = 100;
      msg.setData(4, (char*)&progress, 4);
      msg.m_iDataLength = SectorMsg::m_iHdrSize + 8;
      int id = 0;
      self->m_GMP.sendto(client_ip.c_str(), client_port, id, &msg);

      self->m_SectorLog << LogStart(LogLevel::LEVEL_3) << "bucket completed 100 " << client_ip << " " << client_port << LogEnd();
   }

   gmp->close();
   delete gmp;

   self->reportSphere(master_ip, master_port, transid);

   // clear this transaction
   self->m_TransManager.updateSlave(transid, self->m_iSlaveID);

   return NULL;
}
示例#3
0
void* Slave::SPEShufflerEx(void* p)
{
   Slave* self = ((Param5*)p)->serv_instance;
   int transid = ((Param5*)p)->transid;
   string client_ip = ((Param5*)p)->client_ip;
   int client_port = ((Param5*)p)->client_ctrl_port;
   int client_data_port = ((Param5*)p)->client_data_port;
   string path = ((Param5*)p)->path;
   string localfile = ((Param5*)p)->filename;
   int bucketnum = ((Param5*)p)->bucketnum;
   int bucketid = ((Param5*)p)->bucketid;
   const int key = ((Param5*)p)->key;
   const int type = ((Param5*)p)->type;
   string function = ((Param5*)p)->function;
   queue<Bucket>* bq = ((Param5*)p)->bq;
   pthread_mutex_t* bqlock = ((Param5*)p)->bqlock;
   pthread_cond_t* bqcond = ((Param5*)p)->bqcond;
   int64_t* pendingSize = ((Param5*)p)->pending;
   string master_ip = ((Param5*)p)->master_ip;
   int master_port = ((Param5*)p)->master_port;
   delete (Param5*)p;

   self->createDir(path);

   // remove old result data files
   for (int i = 0; i < bucketnum; ++ i)
   {
      char* tmp = new char[self->m_strHomeDir.length() + path.length() + localfile.length() + 64];
      sprintf(tmp, "%s.%d", (self->m_strHomeDir + path + "/" + localfile).c_str(), i);
      unlink(tmp);
      sprintf(tmp, "%s.%d.idx", (self->m_strHomeDir + path + "/" + localfile).c_str(), i);
      unlink(tmp);
      delete [] tmp;
   }

   // index file initial offset
   vector<int64_t> offset;
   offset.resize(bucketnum);
   for (vector<int64_t>::iterator i = offset.begin(); i != offset.end(); ++ i)
      *i = 0;
   set<int> fileid;

   while (true)
   {
      pthread_mutex_lock(bqlock);
      while (bq->empty())
         pthread_cond_wait(bqcond, bqlock);
      Bucket b = bq->front();
      bq->pop();
      *pendingSize -= b.totalsize;
      pthread_mutex_unlock(bqlock);

      if (b.totalnum == -1)
         break;

      string speip = b.src_ip;
      int dataport = b.src_dataport;
      int session = b.session;

      for (int i = 0; i < b.totalnum; ++ i)
      {
         int bucket = 0;
         if (self->m_DataChn.recv4(speip, dataport, session, bucket) < 0)
            continue;

         fileid.insert(bucket);

         char* tmp = new char[self->m_strHomeDir.length() + path.length() + localfile.length() + 64];
         sprintf(tmp, "%s.%d", (self->m_strHomeDir + path + "/" + localfile).c_str(), bucket);
         fstream datafile(tmp, ios::out | ios::binary | ios::app);
         sprintf(tmp, "%s.%d.idx", (self->m_strHomeDir + path + "/" + localfile).c_str(), bucket);
         fstream indexfile(tmp, ios::out | ios::binary | ios::app);
         delete [] tmp;
         int64_t start = offset[bucket];
         if (0 == start)
            indexfile.write((char*)&start, 8);

         int32_t len;
         char* data = NULL;
         if (self->m_DataChn.recv(speip, dataport, session, data, len) < 0)
            continue;
         datafile.write(data, len);
         delete [] data;

         tmp = NULL;
         if (self->m_DataChn.recv(speip, dataport, session, tmp, len) < 0)
            continue;
         int64_t* index = (int64_t*)tmp;
         for (int j = 0; j < len / 8; ++ j)
            index[j] += start;
         offset[bucket] = index[len / 8 - 1];
         indexfile.write(tmp, len);
         delete [] tmp;

         datafile.close();
         indexfile.close();
      }

      // update total received data
      self->m_SlaveStat.updateIO(speip, b.totalsize, 0);
   }

   pthread_mutex_destroy(bqlock);
   pthread_cond_destroy(bqcond);
   delete bqlock;
   delete bqcond;
   delete pendingSize;

   // sort and reduce
   if (type == 1)
   {
      void* lh = NULL;
      self->openLibrary(key, function, lh);
      //if (NULL == lh)
      //   break;

      MR_COMPARE comp = NULL;
      MR_REDUCE reduce = NULL;
      self->getReduceFunc(lh, function, comp, reduce);

      if (NULL != comp)
      {
         char* tmp = new char[self->m_strHomeDir.length() + path.length() + localfile.length() + 64];
         for (set<int>::iterator i = fileid.begin(); i != fileid.end(); ++ i)
         {
            sprintf(tmp, "%s.%d", (self->m_strHomeDir + path + "/" + localfile).c_str(), *i);
            self->sort(tmp, comp, reduce);
         }
         delete [] tmp;
      }

      self->closeLibrary(lh);
   }


   // report sphere output files
   char* tmp = new char[path.length() + localfile.length() + 64];
   vector<string> filelist;
   for (set<int>::iterator i = fileid.begin(); i != fileid.end(); ++ i)
   {
      sprintf(tmp, "%s.%d", (path + "/" + localfile).c_str(), *i);
      filelist.push_back(tmp);
      sprintf(tmp, "%s.%d.idx", (path + "/" + localfile).c_str(), *i);
      filelist.push_back(tmp);
   }
   delete [] tmp;

   self->report(master_ip, master_port, transid, filelist, 1);

   self->reportSphere(master_ip, master_port, transid);

   // cout << "bucket completed 100 " << client_ip << " " << client_port << endl;
   SectorMsg msg;
   msg.setType(1); // success, return result
   msg.setData(0, (char*)&(bucketid), 4);
   int progress = 100;
   msg.setData(4, (char*)&progress, 4);
   msg.m_iDataLength = SectorMsg::m_iHdrSize + 8;
   int id = 0;
   self->m_GMP.sendto(client_ip.c_str(), client_port, id, &msg);

   //remove this client data channel
   self->m_DataChn.remove(client_ip, client_data_port);

   return NULL;
}