예제 #1
0
void BlobResourceHandle::didRead(int bytesRead)
{
    if (bytesRead < 0) {
        failed(notReadableError);
        return;
    }

    consumeData(m_buffer.data(), bytesRead);
}
예제 #2
0
void NetworkDataTaskBlob::readData(const BlobDataItem& item)
{
    ASSERT(item.data().data());

    long long bytesToRead = item.length() - m_currentItemReadSize;
    if (bytesToRead > m_totalRemainingSize)
        bytesToRead = m_totalRemainingSize;
    consumeData(reinterpret_cast<const char*>(item.data().data()->data()) + item.offset() + m_currentItemReadSize, static_cast<int>(bytesToRead));
    m_currentItemReadSize = 0;
}
void BlobResourceHandle::readDataAsync(const BlobDataItem& item)
{
    ASSERT(m_async);

    long long bytesToRead = item.length - m_currentItemReadSize;
    if (bytesToRead > m_totalRemainingSize)
        bytesToRead = m_totalRemainingSize;
    consumeData(item.data->data() + item.offset + m_currentItemReadSize, static_cast<int>(bytesToRead));
    m_currentItemReadSize = 0;
}
예제 #4
0
void BlobResourceHandle::readDataAsync(const BlobDataItem& item)
{
    ASSERT(isMainThread());
    ASSERT(m_async);
    ASSERT(item.data().data());

    Ref<BlobResourceHandle> protectedThis(*this);

    long long bytesToRead = item.length() - m_currentItemReadSize;
    if (bytesToRead > m_totalRemainingSize)
        bytesToRead = m_totalRemainingSize;
    consumeData(reinterpret_cast<const char*>(item.data().data()->data()) + item.offset() + m_currentItemReadSize, static_cast<int>(bytesToRead));
    m_currentItemReadSize = 0;
}
예제 #5
0
void NetworkDataTaskBlob::didRead(int bytesRead)
{
    if (m_state == State::Canceling || m_state == State::Completed || (!m_client && !isDownload())) {
        clearStream();
        return;
    }

    if (bytesRead < 0) {
        didFail(Error::NotReadableError);
        return;
    }

    Ref<NetworkDataTaskBlob> protectedThis(*this);
    consumeData(m_buffer.data(), bytesRead);
}
예제 #6
0
파일: Server.cpp 프로젝트: joao29a/S.O.S
void Server::initServer(int port){
	createSharedMemory();
	int id = fork();

	struct stat st;
	if (stat(FIFO_NAME, &st) != 0)
		mkfifo(FIFO_NAME, 0666);

	if(id > 0){
		initSocket(port);
		wait();
	}
	else if (id == 0){
		consumeData();
	}
}
void BlobResourceHandle::didRead(int bytesRead)
{
    consumeData(m_buffer.data(), bytesRead);
}
예제 #8
0
void UpnpPortMapping::mapPort(const std::string& gateway, const std::string& client, uint16_t port, PortType type, bool enable)
{
	{
		std::lock_guard<std::mutex> guard(state->stateMutex);

		for (auto it = state->pendingRequests.begin(); it != state->pendingRequests.end(); it++)
		{
			if ((*it)->getHostname() == gateway)
			{
				UpnpMappingState::TodoMapping todo;
				todo.mapping = { gateway, port, type };
				todo.client = client;
				todo.enable = enable;
				todo.blockingStream = (*it).get();
				state->waitingMapping.push_back(todo);
				return;
			}
		}	
	}

	std::string portStr = std::to_string(port);
	std::string portType = type == PortType::Tcp ? "TCP" : "UDP";

	auto request = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
		"<s:Envelope xmlns:s =\"http://schemas.xmlsoap.org/soap/envelope/\" s:encodingStyle=\"http://schemas.xmlsoap.org/soap/encoding/\">\n"
		"<s:Body>\n"
		"<u:AddPortMapping xmlns:u=\"urn:schemas-upnp-org:service:WANIPConnection:1\">\n"
		"<NewRemoteHost></NewRemoteHost>\n"
		"<NewExternalPort>" + portStr + "</NewExternalPort>\n"
		"<NewProtocol>" + portType + "</NewProtocol>\n"
		"<NewInternalPort>" + portStr + "</NewInternalPort>\n"
		"<NewInternalClient>" + client + "</NewInternalClient>\n"
		"<NewEnabled>" + std::string(enable ? "1" : "0") + "</NewEnabled>\n"
		"<NewPortMappingDescription>mtTorrent UPnP " + portStr + " " + portType + "</NewPortMappingDescription>\n"
		"<NewLeaseDuration>0</NewLeaseDuration>\n"
		"</u:AddPortMapping>\n"
		"</s:Body>\n"
		"</s:Envelope>\r\n";

	auto httpHeader = createUpnpHttpHeader(gateway + ":1900", request.length(), "urn:schemas-upnp-org:service:WANIPConnection:1#AddPortMapping");

	auto stream = std::make_shared<TcpAsyncStream>(io);
	state->pendingRequests.push_back(stream);
	auto streamPtr = stream.get();
	auto upnpState = state;

	stream->onConnectCallback = [streamPtr, httpHeader, request]()
	{
		DataBuffer buffer;	
		buffer.assign(httpHeader.begin(), httpHeader.end());
		buffer.insert(buffer.end(), request.begin(), request.end());

		streamPtr->write(buffer);
	};

	stream->onReceiveCallback = [streamPtr, upnpState, gateway, port, type]()
	{
		auto data = streamPtr->getReceivedData();
		auto header = HttpHeaderInfo::readFromBuffer(data);

		if (header.valid && data.size() >= (header.dataStart + header.dataSize))
		{
			streamPtr->consumeData(header.dataStart + header.dataSize);

			std::lock_guard<std::mutex> guard(upnpState->stateMutex);

			if (header.success)
			{
				upnpState->mappedPorts.push_back({ gateway, port, type });
			}
		}
	};

	stream->onCloseCallback = [streamPtr, upnpState, this](int code)
	{
		{
			std::lock_guard<std::mutex> guard(upnpState->stateMutex);

			for (auto it = upnpState->pendingRequests.begin(); it != upnpState->pendingRequests.end(); it++)
			{
				if ((*it).get() == streamPtr)
				{
					upnpState->pendingRequests.erase(it);
					break;
				}
			}
		}

		if (upnpState->active)
		{
			checkPendingMapping(streamPtr);
		}
	};

	stream->connect(gateway, 1900);
}
예제 #9
0
void UpnpPortMapping::unmapPort(const std::string& gateway, uint16_t port, PortType type)
{
	std::string portStr = std::to_string(port);
	std::string portType = type == PortType::Tcp ? "TCP" : "UDP";

	auto request = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
		"<s:Envelope xmlns:s=\"http://schemas.xmlsoap.org/soap/envelope/\" s:encodingStyle=\"http://schemas.xmlsoap.org/soap/encoding/\">\n"
		"<s:Body>\n"
		"<u:DeletePortMapping xmlns:u=\"urn:schemas-upnp-org:service:WANIPConnection:1\">\n"
		"<NewRemoteHost></NewRemoteHost>\n"
		"<NewProtocol>" + portType + "</NewProtocol>\n"
		"<NewExternalPort>" + portStr + "</NewExternalPort>\n"
		"</u:DeletePortMapping>\n"
		"</s:Body>\n"
		"</s:Envelope>\r\n";

	auto httpHeader = createUpnpHttpHeader(gateway + ":1900", request.length(), "urn:schemas-upnp-org:service:WANIPConnection:1#DeletePortMapping");

	auto stream = std::make_shared<TcpAsyncStream>(io);
	auto streamPtr = stream.get();
	state->pendingRequests.push_back(stream);
	auto upnpState = state;

	stream->onConnectCallback = [streamPtr, upnpState, httpHeader, request]()
	{
		DataBuffer buffer;
		buffer.assign(httpHeader.begin(), httpHeader.end());
		buffer.insert(buffer.end(), request.begin(), request.end());

		std::lock_guard<std::mutex> guard(upnpState->stateMutex);
		streamPtr->write(buffer);
	};

	stream->onReceiveCallback = [streamPtr, upnpState, gateway, port, type]()
	{
		auto data = streamPtr->getReceivedData();
		auto header = HttpHeaderInfo::readFromBuffer(data);

		if (header.valid && data.size() >= (header.dataStart + header.dataSize))
		{
			streamPtr->consumeData(header.dataStart + header.dataSize);

			std::lock_guard<std::mutex> guard(upnpState->stateMutex);

			if (header.success)
			{
				for (auto it = upnpState->mappedPorts.begin(); it != upnpState->mappedPorts.end(); it++)
				{
					if (it->gateway == gateway && it->port == port && it->type == type)
					{
						upnpState->mappedPorts.erase(it);
						break;
					}
				}
			}
		}
	};

	stream->onCloseCallback = [streamPtr, upnpState](int code)
	{
		std::lock_guard<std::mutex> guard(upnpState->stateMutex);

		for (auto it = upnpState->pendingRequests.begin(); it != upnpState->pendingRequests.end(); it++)
		{
			if ((*it).get() == streamPtr)
			{
				upnpState->pendingRequests.erase(it);
				break;
			}
		}
	};

	stream->connect(gateway, 1900);
}
int main(int argc, char *argv[])
{
  MatMulArgs matMulArgs;
  matMulArgs.processArgs(argc, argv);

  size_t matrixAHeight = matMulArgs.getMatrixAHeight();
  size_t matrixBWidth = matMulArgs.getMatrixBWidth();
  size_t sharedDim = matMulArgs.getSharedDim();

  size_t blockSize = matMulArgs.getBlockSize();
  size_t numReadThreads = matMulArgs.getNumReadThreads();
  size_t numProdThreads = matMulArgs.getNumMatMulThreads();
  size_t numAccumThreads = (size_t) ceil((double)numProdThreads / 2.0);
  std::string directory = matMulArgs.getDirectory();
  std::string outputDirectory = matMulArgs.getOutputDir();
  bool runSequential = matMulArgs.isRunSequential();
  bool validate = matMulArgs.isValidateResults();

  size_t numGPUs = matMulArgs.getNumGPUs();
  int gpuIds[numGPUs];

  matMulArgs.copyGpuIds(gpuIds);


//  CUcontext *contexts = initCuda(numGPUs, gpuIds);

  std::string runtimeFileStr("runtimes");

  int numRetry = 1;

  std::ofstream runtimeFile(runtimeFileStr, std::ios::app);
  double *matrixA = new double[matrixAHeight * sharedDim];
  double *matrixB = new double[matrixBWidth * sharedDim];
  double *matrixC = new double[matrixAHeight * matrixBWidth];

  initMatrix(matrixA, sharedDim, matrixAHeight, true);
  initMatrix(matrixB, matrixBWidth, sharedDim, true);

  for (int numTry = 0; numTry < numRetry; numTry++) {
    SimpleClock clk;
    SimpleClock endToEnd;

    if (runSequential) {
      endToEnd.start();
      initMatMul(numProdThreads);

      cublasXtHandle_t handle;

      cublasXtCreate(&handle);

      cublasXtDeviceSelect(handle, numGPUs, gpuIds);
      cublasXtSetBlockDim(handle, blockSize);

      clk.start();
      computeSequentialMatMul(matrixA, matrixB, matrixC, (size_t) matrixAHeight, (size_t) sharedDim,
                              (size_t) matrixBWidth, handle);
      clk.stopAndIncrement();

      cublasXtDestroy(handle);

      endToEnd.stopAndIncrement();
    }
    else {
      endToEnd.start();
      initMatMul(1);

      LoadMatrixTask *readAMatTask =
          new LoadMatrixTask(matrixA,
                             numReadThreads,
                             MatrixType::MatrixA,
                             blockSize,
                             sharedDim,
                             matrixAHeight,
                             true);

      LoadMatrixTask *readBMatTask =
          new LoadMatrixTask(matrixB,
                             numReadThreads,
                             MatrixType::MatrixB,
                             blockSize,
                             matrixBWidth,
                             sharedDim,
                             true);

      MatrixMulBlkCudaTask *mmulTask = new MatrixMulBlkCudaTask(gpuIds, numGPUs);
      MatMulAccumTask *accumTask = new MatMulAccumTask(numAccumThreads, true);

      MatMulOutputTask *outputTask = new MatMulOutputTask(matrixC, matrixAHeight, blockSize, true);

      size_t blkHeightMatB = readBMatTask->getNumBlocksRows();
      size_t blkWidthMatB = readBMatTask->getNumBlocksCols();

      size_t blkHeightMatA = readAMatTask->getNumBlocksRows();
      size_t blkWidthMatA = readAMatTask->getNumBlocksCols();

      CudaCopyInTask *cudaCopyInATask = new CudaCopyInTask(gpuIds, numGPUs, MatrixType::MatrixA, blkWidthMatB);
      CudaCopyInTask *cudaCopyInBTask = new CudaCopyInTask(gpuIds, numGPUs, MatrixType::MatrixB, blkHeightMatA);

      CudaCopyOutTask *cudaCopyOutCTask = new CudaCopyOutTask(gpuIds, numGPUs, MatrixType::MatrixC);

      MatMulDistributeRule *distributeRuleMatA = new MatMulDistributeRule(MatrixType::MatrixA);
      MatMulDistributeRule *distributeRuleMatB = new MatMulDistributeRule(MatrixType::MatrixB);

      MatMulLoadRule<htgs::m_data_t<double>> *loadRule = new MatMulLoadRule<htgs::m_data_t<double>>(blkWidthMatA, blkHeightMatA, blkWidthMatB, blkHeightMatB);
      MatMulAccumulateRule<double *> *accumulateRule = new MatMulAccumulateRule<double *>(blkWidthMatB, blkHeightMatA, blkWidthMatA);

      MatMulOutputRule *outputRule = new MatMulOutputRule(blkWidthMatB, blkHeightMatA, blkWidthMatA);

      auto distributeBk = new htgs::Bookkeeper<MatrixRequestData>();
      auto matMulBk = new htgs::Bookkeeper<MatrixBlockData<htgs::m_data_t<double>>>();
      auto matAccumBk = new htgs::Bookkeeper<MatrixBlockData<double *>>();

      auto taskGraph = new htgs::TaskGraphConf<MatrixRequestData, MatrixBlockData<double *>>();

      taskGraph->setGraphConsumerTask(distributeBk);
      taskGraph->addRuleEdge(distributeBk, distributeRuleMatA, readAMatTask);
      taskGraph->addRuleEdge(distributeBk, distributeRuleMatB, readBMatTask);


      taskGraph->addEdge(readAMatTask, cudaCopyInATask);
      taskGraph->addEdge(readBMatTask, cudaCopyInBTask);

      taskGraph->addEdge(cudaCopyInATask, matMulBk);
      taskGraph->addEdge(cudaCopyInBTask, matMulBk);

      taskGraph->addRuleEdge(matMulBk, loadRule, mmulTask);

      taskGraph->addEdge(mmulTask, cudaCopyOutCTask);

      taskGraph->addGraphProducerTask(cudaCopyOutCTask);

      taskGraph->addCudaMemoryManagerEdge(matrixTypeToString(MatrixType::MatrixA) + "Copy",
                                          cudaCopyInATask,
                                          new CudaAllocator(blockSize, blockSize),
                                          blkWidthMatB+1,
                                          htgs::MMType::Static,
                                          gpuIds);
      taskGraph->addCudaMemoryManagerEdge(matrixTypeToString(MatrixType::MatrixB) + "Copy",
                                          cudaCopyInBTask,
                                          new CudaAllocator(blockSize, blockSize),
                                          blkHeightMatA+1,
                                          htgs::MMType::Static,
                                          gpuIds);

      taskGraph->addCudaMemoryManagerEdge(matrixTypeToString(MatrixType::MatrixC),
                                          mmulTask,
                                          new CudaAllocator(blockSize, blockSize),
                                          4,
                                          htgs::MMType::Static,
                                          gpuIds);


      auto mainTaskGraph = new htgs::TaskGraphConf<MatrixRequestData, MatrixRequestData>();


      auto execPipeline = new htgs::ExecutionPipeline<MatrixRequestData, MatrixBlockData<double *>>(numGPUs, taskGraph);
      auto decompositionRule = new MatrixDecompositionRule(numGPUs);

      execPipeline->addInputRule(decompositionRule);

      mainTaskGraph->setGraphConsumerTask(execPipeline);
      mainTaskGraph->addEdge(execPipeline, matAccumBk);


      mainTaskGraph->addRuleEdge(matAccumBk, outputRule, outputTask);
      mainTaskGraph->addRuleEdge(matAccumBk, accumulateRule, accumTask);

      mainTaskGraph->addEdge(accumTask, matAccumBk);

      mainTaskGraph->addGraphProducerTask(outputTask);

//      mainTaskGraph->writeDotToFile("pre-execution.dot");

      htgs::TaskGraphRuntime *runtime = new htgs::TaskGraphRuntime(mainTaskGraph);

      clk.start();

      runtime->executeRuntime();

      for (size_t col = 0; col < blkWidthMatA; col++) {
        for (size_t row = 0; row < blkHeightMatA; row++) {

          MatrixRequestData *matA = new MatrixRequestData(row, col, MatrixType::MatrixA);
          mainTaskGraph->produceData(matA);
        }
      }

      for (size_t row = 0; row < blkHeightMatB; row++) {
        for (size_t col = 0; col < blkWidthMatB; col++) {
          MatrixRequestData *matB = new MatrixRequestData(row, col, MatrixType::MatrixB);
          mainTaskGraph->produceData(matB);

        }
      }

      mainTaskGraph->finishedProducingData();

      while (!mainTaskGraph->isOutputTerminated()) {
        auto data = mainTaskGraph->consumeData();
        if (data != nullptr) {
//          std::cout << data->getRow() << ", " << data->getCol() << std::endl;
        }
      }

      runtime->waitForRuntime();


//      taskGraph->writeDotToFile("profile-graph.dot");
//      mainTaskGraph->writeDotToFile("profile-all-threads-graph.dot", DOTGEN_FLAG_SHOW_ALL_THREADING);
      mainTaskGraph->writeDotToFile("matrix-multiplication-cuda-multigpu.dot", DOTGEN_COLOR_COMP_TIME);

      clk.stopAndIncrement();

      delete runtime;
      endToEnd.stopAndIncrement();
    }

    if (validate) {
      double *matrixCTest = new double[matrixAHeight * matrixBWidth];
      initMatMul(numProdThreads);

      cublasXtHandle_t handle;

      cublasXtCreate(&handle);

      cublasXtDeviceSelect(handle, (int)numGPUs, gpuIds);
      cublasXtSetBlockDim(handle, (int)blockSize);

      computeSequentialMatMul(matrixA, matrixB, matrixCTest, (size_t) matrixAHeight, (size_t) sharedDim,
                              (size_t) matrixBWidth, handle);

      cublasXtDestroy(handle);


      int res = validateResults(matrixC, matrixCTest, matrixAHeight, matrixBWidth);
      if (res != 0) {
        std::cout << "Error validating test failed!" << std::endl;
      }
      else
      {
        std::cout << "Test PASSED" << std::endl;
      }

      delete []matrixCTest;

    }


    double numGflops = (2.0 * matrixAHeight *sharedDim * matrixBWidth) * 1.0e-9d;
    double gflops = numGflops / clk.getAverageTime(TimeVal::SEC);



    std::cout << (runSequential ? "sequential" : "htgs") << ", " << numProdThreads
              << ", accum-threads: " << numAccumThreads << ", width-b: " << matrixBWidth << ", height-a: " << matrixAHeight
              << ", shared-dim: " << sharedDim
              << ", blockSize: " << blockSize
              << ", time:" << clk.getAverageTime(TimeVal::MILLI)
              << ", end-to-end:" << endToEnd.getAverageTime(TimeVal::MILLI)
              << ", gflops: " << gflops
              << std::endl;

    runtimeFile << "MULTIGPU-MM" << (runSequential ? "sequential" : "htgs") << ", " << numProdThreads
                << ", " << numAccumThreads << ", "
                << matrixBWidth << ", " << matrixAHeight
                << ", " << sharedDim << ", " << blockSize << ", " << clk.getAverageTime(TimeVal::MILLI)
                << ", " << endToEnd.getAverageTime(TimeVal::MILLI)
                << std::endl;

  }

  delete[] matrixA;
  delete[] matrixB;
  delete[] matrixC;
}