void BlobResourceHandle::didRead(int bytesRead) { if (bytesRead < 0) { failed(notReadableError); return; } consumeData(m_buffer.data(), bytesRead); }
void NetworkDataTaskBlob::readData(const BlobDataItem& item) { ASSERT(item.data().data()); long long bytesToRead = item.length() - m_currentItemReadSize; if (bytesToRead > m_totalRemainingSize) bytesToRead = m_totalRemainingSize; consumeData(reinterpret_cast<const char*>(item.data().data()->data()) + item.offset() + m_currentItemReadSize, static_cast<int>(bytesToRead)); m_currentItemReadSize = 0; }
void BlobResourceHandle::readDataAsync(const BlobDataItem& item) { ASSERT(m_async); long long bytesToRead = item.length - m_currentItemReadSize; if (bytesToRead > m_totalRemainingSize) bytesToRead = m_totalRemainingSize; consumeData(item.data->data() + item.offset + m_currentItemReadSize, static_cast<int>(bytesToRead)); m_currentItemReadSize = 0; }
void BlobResourceHandle::readDataAsync(const BlobDataItem& item) { ASSERT(isMainThread()); ASSERT(m_async); ASSERT(item.data().data()); Ref<BlobResourceHandle> protectedThis(*this); long long bytesToRead = item.length() - m_currentItemReadSize; if (bytesToRead > m_totalRemainingSize) bytesToRead = m_totalRemainingSize; consumeData(reinterpret_cast<const char*>(item.data().data()->data()) + item.offset() + m_currentItemReadSize, static_cast<int>(bytesToRead)); m_currentItemReadSize = 0; }
void NetworkDataTaskBlob::didRead(int bytesRead) { if (m_state == State::Canceling || m_state == State::Completed || (!m_client && !isDownload())) { clearStream(); return; } if (bytesRead < 0) { didFail(Error::NotReadableError); return; } Ref<NetworkDataTaskBlob> protectedThis(*this); consumeData(m_buffer.data(), bytesRead); }
void Server::initServer(int port){ createSharedMemory(); int id = fork(); struct stat st; if (stat(FIFO_NAME, &st) != 0) mkfifo(FIFO_NAME, 0666); if(id > 0){ initSocket(port); wait(); } else if (id == 0){ consumeData(); } }
void BlobResourceHandle::didRead(int bytesRead) { consumeData(m_buffer.data(), bytesRead); }
void UpnpPortMapping::mapPort(const std::string& gateway, const std::string& client, uint16_t port, PortType type, bool enable) { { std::lock_guard<std::mutex> guard(state->stateMutex); for (auto it = state->pendingRequests.begin(); it != state->pendingRequests.end(); it++) { if ((*it)->getHostname() == gateway) { UpnpMappingState::TodoMapping todo; todo.mapping = { gateway, port, type }; todo.client = client; todo.enable = enable; todo.blockingStream = (*it).get(); state->waitingMapping.push_back(todo); return; } } } std::string portStr = std::to_string(port); std::string portType = type == PortType::Tcp ? "TCP" : "UDP"; auto request = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" "<s:Envelope xmlns:s =\"http://schemas.xmlsoap.org/soap/envelope/\" s:encodingStyle=\"http://schemas.xmlsoap.org/soap/encoding/\">\n" "<s:Body>\n" "<u:AddPortMapping xmlns:u=\"urn:schemas-upnp-org:service:WANIPConnection:1\">\n" "<NewRemoteHost></NewRemoteHost>\n" "<NewExternalPort>" + portStr + "</NewExternalPort>\n" "<NewProtocol>" + portType + "</NewProtocol>\n" "<NewInternalPort>" + portStr + "</NewInternalPort>\n" "<NewInternalClient>" + client + "</NewInternalClient>\n" "<NewEnabled>" + std::string(enable ? "1" : "0") + "</NewEnabled>\n" "<NewPortMappingDescription>mtTorrent UPnP " + portStr + " " + portType + "</NewPortMappingDescription>\n" "<NewLeaseDuration>0</NewLeaseDuration>\n" "</u:AddPortMapping>\n" "</s:Body>\n" "</s:Envelope>\r\n"; auto httpHeader = createUpnpHttpHeader(gateway + ":1900", request.length(), "urn:schemas-upnp-org:service:WANIPConnection:1#AddPortMapping"); auto stream = std::make_shared<TcpAsyncStream>(io); state->pendingRequests.push_back(stream); auto streamPtr = stream.get(); auto upnpState = state; stream->onConnectCallback = [streamPtr, httpHeader, request]() { DataBuffer buffer; buffer.assign(httpHeader.begin(), httpHeader.end()); buffer.insert(buffer.end(), request.begin(), request.end()); streamPtr->write(buffer); }; stream->onReceiveCallback = [streamPtr, upnpState, gateway, port, type]() { auto data = streamPtr->getReceivedData(); auto header = HttpHeaderInfo::readFromBuffer(data); if (header.valid && data.size() >= (header.dataStart + header.dataSize)) { streamPtr->consumeData(header.dataStart + header.dataSize); std::lock_guard<std::mutex> guard(upnpState->stateMutex); if (header.success) { upnpState->mappedPorts.push_back({ gateway, port, type }); } } }; stream->onCloseCallback = [streamPtr, upnpState, this](int code) { { std::lock_guard<std::mutex> guard(upnpState->stateMutex); for (auto it = upnpState->pendingRequests.begin(); it != upnpState->pendingRequests.end(); it++) { if ((*it).get() == streamPtr) { upnpState->pendingRequests.erase(it); break; } } } if (upnpState->active) { checkPendingMapping(streamPtr); } }; stream->connect(gateway, 1900); }
void UpnpPortMapping::unmapPort(const std::string& gateway, uint16_t port, PortType type) { std::string portStr = std::to_string(port); std::string portType = type == PortType::Tcp ? "TCP" : "UDP"; auto request = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" "<s:Envelope xmlns:s=\"http://schemas.xmlsoap.org/soap/envelope/\" s:encodingStyle=\"http://schemas.xmlsoap.org/soap/encoding/\">\n" "<s:Body>\n" "<u:DeletePortMapping xmlns:u=\"urn:schemas-upnp-org:service:WANIPConnection:1\">\n" "<NewRemoteHost></NewRemoteHost>\n" "<NewProtocol>" + portType + "</NewProtocol>\n" "<NewExternalPort>" + portStr + "</NewExternalPort>\n" "</u:DeletePortMapping>\n" "</s:Body>\n" "</s:Envelope>\r\n"; auto httpHeader = createUpnpHttpHeader(gateway + ":1900", request.length(), "urn:schemas-upnp-org:service:WANIPConnection:1#DeletePortMapping"); auto stream = std::make_shared<TcpAsyncStream>(io); auto streamPtr = stream.get(); state->pendingRequests.push_back(stream); auto upnpState = state; stream->onConnectCallback = [streamPtr, upnpState, httpHeader, request]() { DataBuffer buffer; buffer.assign(httpHeader.begin(), httpHeader.end()); buffer.insert(buffer.end(), request.begin(), request.end()); std::lock_guard<std::mutex> guard(upnpState->stateMutex); streamPtr->write(buffer); }; stream->onReceiveCallback = [streamPtr, upnpState, gateway, port, type]() { auto data = streamPtr->getReceivedData(); auto header = HttpHeaderInfo::readFromBuffer(data); if (header.valid && data.size() >= (header.dataStart + header.dataSize)) { streamPtr->consumeData(header.dataStart + header.dataSize); std::lock_guard<std::mutex> guard(upnpState->stateMutex); if (header.success) { for (auto it = upnpState->mappedPorts.begin(); it != upnpState->mappedPorts.end(); it++) { if (it->gateway == gateway && it->port == port && it->type == type) { upnpState->mappedPorts.erase(it); break; } } } } }; stream->onCloseCallback = [streamPtr, upnpState](int code) { std::lock_guard<std::mutex> guard(upnpState->stateMutex); for (auto it = upnpState->pendingRequests.begin(); it != upnpState->pendingRequests.end(); it++) { if ((*it).get() == streamPtr) { upnpState->pendingRequests.erase(it); break; } } }; stream->connect(gateway, 1900); }
int main(int argc, char *argv[]) { MatMulArgs matMulArgs; matMulArgs.processArgs(argc, argv); size_t matrixAHeight = matMulArgs.getMatrixAHeight(); size_t matrixBWidth = matMulArgs.getMatrixBWidth(); size_t sharedDim = matMulArgs.getSharedDim(); size_t blockSize = matMulArgs.getBlockSize(); size_t numReadThreads = matMulArgs.getNumReadThreads(); size_t numProdThreads = matMulArgs.getNumMatMulThreads(); size_t numAccumThreads = (size_t) ceil((double)numProdThreads / 2.0); std::string directory = matMulArgs.getDirectory(); std::string outputDirectory = matMulArgs.getOutputDir(); bool runSequential = matMulArgs.isRunSequential(); bool validate = matMulArgs.isValidateResults(); size_t numGPUs = matMulArgs.getNumGPUs(); int gpuIds[numGPUs]; matMulArgs.copyGpuIds(gpuIds); // CUcontext *contexts = initCuda(numGPUs, gpuIds); std::string runtimeFileStr("runtimes"); int numRetry = 1; std::ofstream runtimeFile(runtimeFileStr, std::ios::app); double *matrixA = new double[matrixAHeight * sharedDim]; double *matrixB = new double[matrixBWidth * sharedDim]; double *matrixC = new double[matrixAHeight * matrixBWidth]; initMatrix(matrixA, sharedDim, matrixAHeight, true); initMatrix(matrixB, matrixBWidth, sharedDim, true); for (int numTry = 0; numTry < numRetry; numTry++) { SimpleClock clk; SimpleClock endToEnd; if (runSequential) { endToEnd.start(); initMatMul(numProdThreads); cublasXtHandle_t handle; cublasXtCreate(&handle); cublasXtDeviceSelect(handle, numGPUs, gpuIds); cublasXtSetBlockDim(handle, blockSize); clk.start(); computeSequentialMatMul(matrixA, matrixB, matrixC, (size_t) matrixAHeight, (size_t) sharedDim, (size_t) matrixBWidth, handle); clk.stopAndIncrement(); cublasXtDestroy(handle); endToEnd.stopAndIncrement(); } else { endToEnd.start(); initMatMul(1); LoadMatrixTask *readAMatTask = new LoadMatrixTask(matrixA, numReadThreads, MatrixType::MatrixA, blockSize, sharedDim, matrixAHeight, true); LoadMatrixTask *readBMatTask = new LoadMatrixTask(matrixB, numReadThreads, MatrixType::MatrixB, blockSize, matrixBWidth, sharedDim, true); MatrixMulBlkCudaTask *mmulTask = new MatrixMulBlkCudaTask(gpuIds, numGPUs); MatMulAccumTask *accumTask = new MatMulAccumTask(numAccumThreads, true); MatMulOutputTask *outputTask = new MatMulOutputTask(matrixC, matrixAHeight, blockSize, true); size_t blkHeightMatB = readBMatTask->getNumBlocksRows(); size_t blkWidthMatB = readBMatTask->getNumBlocksCols(); size_t blkHeightMatA = readAMatTask->getNumBlocksRows(); size_t blkWidthMatA = readAMatTask->getNumBlocksCols(); CudaCopyInTask *cudaCopyInATask = new CudaCopyInTask(gpuIds, numGPUs, MatrixType::MatrixA, blkWidthMatB); CudaCopyInTask *cudaCopyInBTask = new CudaCopyInTask(gpuIds, numGPUs, MatrixType::MatrixB, blkHeightMatA); CudaCopyOutTask *cudaCopyOutCTask = new CudaCopyOutTask(gpuIds, numGPUs, MatrixType::MatrixC); MatMulDistributeRule *distributeRuleMatA = new MatMulDistributeRule(MatrixType::MatrixA); MatMulDistributeRule *distributeRuleMatB = new MatMulDistributeRule(MatrixType::MatrixB); MatMulLoadRule<htgs::m_data_t<double>> *loadRule = new MatMulLoadRule<htgs::m_data_t<double>>(blkWidthMatA, blkHeightMatA, blkWidthMatB, blkHeightMatB); MatMulAccumulateRule<double *> *accumulateRule = new MatMulAccumulateRule<double *>(blkWidthMatB, blkHeightMatA, blkWidthMatA); MatMulOutputRule *outputRule = new MatMulOutputRule(blkWidthMatB, blkHeightMatA, blkWidthMatA); auto distributeBk = new htgs::Bookkeeper<MatrixRequestData>(); auto matMulBk = new htgs::Bookkeeper<MatrixBlockData<htgs::m_data_t<double>>>(); auto matAccumBk = new htgs::Bookkeeper<MatrixBlockData<double *>>(); auto taskGraph = new htgs::TaskGraphConf<MatrixRequestData, MatrixBlockData<double *>>(); taskGraph->setGraphConsumerTask(distributeBk); taskGraph->addRuleEdge(distributeBk, distributeRuleMatA, readAMatTask); taskGraph->addRuleEdge(distributeBk, distributeRuleMatB, readBMatTask); taskGraph->addEdge(readAMatTask, cudaCopyInATask); taskGraph->addEdge(readBMatTask, cudaCopyInBTask); taskGraph->addEdge(cudaCopyInATask, matMulBk); taskGraph->addEdge(cudaCopyInBTask, matMulBk); taskGraph->addRuleEdge(matMulBk, loadRule, mmulTask); taskGraph->addEdge(mmulTask, cudaCopyOutCTask); taskGraph->addGraphProducerTask(cudaCopyOutCTask); taskGraph->addCudaMemoryManagerEdge(matrixTypeToString(MatrixType::MatrixA) + "Copy", cudaCopyInATask, new CudaAllocator(blockSize, blockSize), blkWidthMatB+1, htgs::MMType::Static, gpuIds); taskGraph->addCudaMemoryManagerEdge(matrixTypeToString(MatrixType::MatrixB) + "Copy", cudaCopyInBTask, new CudaAllocator(blockSize, blockSize), blkHeightMatA+1, htgs::MMType::Static, gpuIds); taskGraph->addCudaMemoryManagerEdge(matrixTypeToString(MatrixType::MatrixC), mmulTask, new CudaAllocator(blockSize, blockSize), 4, htgs::MMType::Static, gpuIds); auto mainTaskGraph = new htgs::TaskGraphConf<MatrixRequestData, MatrixRequestData>(); auto execPipeline = new htgs::ExecutionPipeline<MatrixRequestData, MatrixBlockData<double *>>(numGPUs, taskGraph); auto decompositionRule = new MatrixDecompositionRule(numGPUs); execPipeline->addInputRule(decompositionRule); mainTaskGraph->setGraphConsumerTask(execPipeline); mainTaskGraph->addEdge(execPipeline, matAccumBk); mainTaskGraph->addRuleEdge(matAccumBk, outputRule, outputTask); mainTaskGraph->addRuleEdge(matAccumBk, accumulateRule, accumTask); mainTaskGraph->addEdge(accumTask, matAccumBk); mainTaskGraph->addGraphProducerTask(outputTask); // mainTaskGraph->writeDotToFile("pre-execution.dot"); htgs::TaskGraphRuntime *runtime = new htgs::TaskGraphRuntime(mainTaskGraph); clk.start(); runtime->executeRuntime(); for (size_t col = 0; col < blkWidthMatA; col++) { for (size_t row = 0; row < blkHeightMatA; row++) { MatrixRequestData *matA = new MatrixRequestData(row, col, MatrixType::MatrixA); mainTaskGraph->produceData(matA); } } for (size_t row = 0; row < blkHeightMatB; row++) { for (size_t col = 0; col < blkWidthMatB; col++) { MatrixRequestData *matB = new MatrixRequestData(row, col, MatrixType::MatrixB); mainTaskGraph->produceData(matB); } } mainTaskGraph->finishedProducingData(); while (!mainTaskGraph->isOutputTerminated()) { auto data = mainTaskGraph->consumeData(); if (data != nullptr) { // std::cout << data->getRow() << ", " << data->getCol() << std::endl; } } runtime->waitForRuntime(); // taskGraph->writeDotToFile("profile-graph.dot"); // mainTaskGraph->writeDotToFile("profile-all-threads-graph.dot", DOTGEN_FLAG_SHOW_ALL_THREADING); mainTaskGraph->writeDotToFile("matrix-multiplication-cuda-multigpu.dot", DOTGEN_COLOR_COMP_TIME); clk.stopAndIncrement(); delete runtime; endToEnd.stopAndIncrement(); } if (validate) { double *matrixCTest = new double[matrixAHeight * matrixBWidth]; initMatMul(numProdThreads); cublasXtHandle_t handle; cublasXtCreate(&handle); cublasXtDeviceSelect(handle, (int)numGPUs, gpuIds); cublasXtSetBlockDim(handle, (int)blockSize); computeSequentialMatMul(matrixA, matrixB, matrixCTest, (size_t) matrixAHeight, (size_t) sharedDim, (size_t) matrixBWidth, handle); cublasXtDestroy(handle); int res = validateResults(matrixC, matrixCTest, matrixAHeight, matrixBWidth); if (res != 0) { std::cout << "Error validating test failed!" << std::endl; } else { std::cout << "Test PASSED" << std::endl; } delete []matrixCTest; } double numGflops = (2.0 * matrixAHeight *sharedDim * matrixBWidth) * 1.0e-9d; double gflops = numGflops / clk.getAverageTime(TimeVal::SEC); std::cout << (runSequential ? "sequential" : "htgs") << ", " << numProdThreads << ", accum-threads: " << numAccumThreads << ", width-b: " << matrixBWidth << ", height-a: " << matrixAHeight << ", shared-dim: " << sharedDim << ", blockSize: " << blockSize << ", time:" << clk.getAverageTime(TimeVal::MILLI) << ", end-to-end:" << endToEnd.getAverageTime(TimeVal::MILLI) << ", gflops: " << gflops << std::endl; runtimeFile << "MULTIGPU-MM" << (runSequential ? "sequential" : "htgs") << ", " << numProdThreads << ", " << numAccumThreads << ", " << matrixBWidth << ", " << matrixAHeight << ", " << sharedDim << ", " << blockSize << ", " << clk.getAverageTime(TimeVal::MILLI) << ", " << endToEnd.getAverageTime(TimeVal::MILLI) << std::endl; } delete[] matrixA; delete[] matrixB; delete[] matrixC; }