Esempio n. 1
0
    void MPICommunicatorImpl::Initialize(const std::vector<NDArrayViewPtr>& values)
    {
        assert(CPUDEVICE < 0); // just in case somebody decides to change CPUDEVICE macro.
        DeviceDescriptor lastGpuDevice = DeviceDescriptor::CPUDevice();
        m_gpuDataTransferers.resize(values.size());
        m_intermediateCPUBuffers.resize(values.size());
        for (auto i = 0; i < values.size(); ++i)
        {
            auto view = values[i];
            auto device = view->Device();

            // Make sure none of the values are sparse - we currently do not support aggregation of sparse matrices
            if (view->GetStorageFormat() != StorageFormat::Dense)
                RuntimeError("Aggregation for sparse matrices is currently not supported!");

            // TODO: device.Type should be called Kind.
            if (device.Type() != DeviceKind::GPU)
            {
                m_intermediateCPUBuffers[i] = Buffer();
                m_gpuDataTransferers[i] = nullptr;
            }
            else
            {
                if (lastGpuDevice.Type() == DeviceKind::CPU)
                    lastGpuDevice = device;
                else if (device.Id() != lastGpuDevice.Id()) // For the time being, assume all devices have the same id.
                    LogicError("Not all values are on the same GPU device id");

                auto requiredSize = GetBufferSize(view);
                m_gpuDataTransferers[i] = std::make_shared<GPUDataTransferer>(device.Id(), true);
                if (m_intermediateCPUBuffers[i].totalSize < requiredSize)
                    m_intermediateCPUBuffers[i] = AllocateIntermediateBuffer(device.Id(), requiredSize);
            }
        }
    }
Esempio n. 2
0
/// <summary>
/// Shows how to use Clone() to share function parameters among multi evaluation threads.
/// </summary>
/// <description>
/// It first creates a new function with parameters, then spawns multi threads. Each thread uses Clone() to create a new
/// instance of function and then use this instance to do evaluation.
/// All cloned functions share the same parameters.
/// </description>
void MultiThreadsEvaluationWithClone(const DeviceDescriptor& device, const int threadCount)
{
    using namespace std::placeholders;

    const size_t inputDim = 937;
    const size_t numOutputClasses = 9304;
    const size_t numHiddenLayers = 6;
    const size_t hiddenLayersDim = 2048;

    auto inputVar = InputVariable({inputDim}, DataType::Float, L"features");

    assert(numHiddenLayers >= 1);
    auto classifierRoot = SetupFullyConnectedDNNLayer(inputVar, hiddenLayersDim, device, std::bind(Sigmoid, _1, L""));
    for (size_t i = 1; i < numHiddenLayers; ++i)
    {
        classifierRoot = SetupFullyConnectedDNNLayer(classifierRoot, hiddenLayersDim, device, std::bind(Sigmoid, _1, L""));
    }

    auto outputTimesParam = Parameter(NDArrayView::RandomUniform<float>({numOutputClasses, hiddenLayersDim}, -0.5, 0.5, 1, device));
    auto classifierFunc = Times(outputTimesParam, classifierRoot, L"classifierOutput");

    // Now test the structure
    if (classifierFunc->Parameters().size() != ((numHiddenLayers * 2) + 1))
    {
        throw std::runtime_error("MultiThreadsEvaluationWithClone: Function does not have expected Parameter count");
    }

    OutputFunctionInfo(classifierFunc);
    fprintf(stderr, "MultiThreadsEvaluationWithClone on device=%d\n", device.Id());

    // Run evaluation in parallel
    std::vector<std::thread> threadList(threadCount);
    for (int th = 0; th < threadCount; ++th)
    {
        threadList[th] = std::thread(RunEvaluationClassifier, classifierFunc->Clone(), device);
    }

    for (int th = 0; th < threadCount; ++th)
    {
        threadList[th].join();
        fprintf(stderr, "thread %d joined.\n", th);
        fflush(stderr);
    }
}
Esempio n. 3
0
/// <summary>
/// Shows how to use LoadLegacyModel() and Clone() to share function parameters among multi evaluation threads.
/// </summary>
/// <description>
/// It first loads a model, then spawns multi threads. Each thread uses Clone() to create a new
/// instance of function and then use this instance to do evaluation.
/// All cloned functions share the same parameters.
/// Note: It uses the model trained by Examples\Image\GettingStarted\01_OneHidden.cntk as example. Instructions
/// to train the model is described in Examples\Image\GettingStarted\README.md. 
/// The pre-trained model file 01_OneHidden needs to be in the current directory.  
/// </description>
void MultiThreadsEvaluationWithLoadModel(const DeviceDescriptor& device, const int threadCount)
{
    // The model file will be trained and copied to the current runtime directory first.
    auto modelFuncPtr = CNTK::Function::LoadModel(DataType::Float, L"01_OneHidden", device);


    OutputFunctionInfo(modelFuncPtr);
    fprintf(stderr, "MultiThreadsEvaluationWithLoadModel on device=%d\n", device.Id());

    // Run evaluation in parallel.
    std::vector<std::thread> threadList(threadCount);
    for (int th = 0; th < threadCount; ++th)
    {
        threadList[th] = std::thread(RunEvaluationOneHidden, modelFuncPtr->Clone(), device);
    }

    for (int th = 0; th < threadCount; ++th)
    {
        threadList[th].join();
        fprintf(stderr, "thread %d joined.\n", th);
        fflush(stderr);
    }
}