void TestCheckpointing(const DeviceDescriptor& device) { auto featureStreamName = L"features"; auto labelsStreamName = L"labels"; size_t inputDim = 784; size_t numOutputClasses = 10; auto features1 = InputVariable({ inputDim }, false /*isSparse*/, DataType::Float, featureStreamName); auto labels1 = InputVariable({ numOutputClasses }, DataType::Float, labelsStreamName); auto net1_1 = BuildFFClassifierNet(features1, numOutputClasses, device, 1); FunctionPtr net1_2; if (device.Type() == DeviceKind::GPU) { // TODO: instead of cloning here, reset curand generator to make sure that parameters are initialized to the same state. for (auto& p : net1_1->Parameters()) { // make sure all parameters are initialized assert(p.Value() != nullptr); } net1_2 = net1_1->Clone(); } else { net1_2 = BuildFFClassifierNet(features1, numOutputClasses, device, 1); } auto minibatchSource1 = TextFormatMinibatchSource(L"Train-28x28_cntk_text.txt", { { featureStreamName, inputDim }, { labelsStreamName, numOutputClasses } }, 1000, false); TestTrainingWithCheckpointing(net1_1, net1_2, labels1, minibatchSource1, device); inputDim = 2000; numOutputClasses = 5; auto features2 = InputVariable({ inputDim }, true /*isSparse*/, DataType::Float, featureStreamName); auto labels2 = InputVariable({ numOutputClasses }, DataType::Float, labelsStreamName, { Axis::DefaultBatchAxis() }); auto net2_1 = BuildLSTMClassifierNet(features2, numOutputClasses, device, 1); FunctionPtr net2_2; if (device.Type() == DeviceKind::GPU) { // TODO: instead of cloning here, reset curand generator to make sure that parameters are initialized to the same state. for (auto& p : net2_1->Parameters()) { // make sure all parameters are initialized assert(p.Value() != nullptr); } net2_2 = net2_1->Clone(); } else { net2_2 = BuildLSTMClassifierNet(features2, numOutputClasses, device, 1); } auto minibatchSource2 = TextFormatMinibatchSource(L"Train.ctf", { { featureStreamName, inputDim, true, L"x" }, { labelsStreamName, numOutputClasses, false, L"y" } }, 1000, false); TestTrainingWithCheckpointing(net2_1, net2_2, labels2, minibatchSource2, device); }
void MPICommunicatorImpl::Initialize(const std::vector<NDArrayViewPtr>& values) { assert(CPUDEVICE < 0); // just in case somebody decides to change CPUDEVICE macro. DeviceDescriptor lastGpuDevice = DeviceDescriptor::CPUDevice(); m_gpuDataTransferers.resize(values.size()); m_intermediateCPUBuffers.resize(values.size()); for (auto i = 0; i < values.size(); ++i) { auto view = values[i]; auto device = view->Device(); // Make sure none of the values are sparse - we currently do not support aggregation of sparse matrices if (view->GetStorageFormat() != StorageFormat::Dense) RuntimeError("Aggregation for sparse matrices is currently not supported!"); // TODO: device.Type should be called Kind. if (device.Type() != DeviceKind::GPU) { m_intermediateCPUBuffers[i] = Buffer(); m_gpuDataTransferers[i] = nullptr; } else { if (lastGpuDevice.Type() == DeviceKind::CPU) lastGpuDevice = device; else if (device.Id() != lastGpuDevice.Id()) // For the time being, assume all devices have the same id. LogicError("Not all values are on the same GPU device id"); auto requiredSize = GetBufferSize(view); m_gpuDataTransferers[i] = std::make_shared<GPUDataTransferer>(device.Id(), true); if (m_intermediateCPUBuffers[i].totalSize < requiredSize) m_intermediateCPUBuffers[i] = AllocateIntermediateBuffer(device.Id(), requiredSize); } } }
void TestFunctionsForEquality(const DeviceDescriptor& device) { // TODO: add GPU version (need to reset cuda random generator each time a new function is created). assert(device.Type() == DeviceKind::CPU); auto inputVar = InputVariable({ 2 }, false, DataType::Float, L"features"); auto f1 = BuildFFClassifierNet(inputVar, 3, device, /*seed*/ 1); auto f2 = BuildFFClassifierNet(inputVar, 3, device, /*seed*/ 1); if (!AreEqual(f1, f2)) { throw std::runtime_error("TestFunctionsForEquality: two functions built with the same seed values are not identical."); } auto f3 = BuildFFClassifierNet(inputVar, 3, device, /*seed*/ 2); auto f4 = BuildFFClassifierNet(inputVar, 3, device, /*seed*/ 3); if (AreEqual(f3, f4)) { throw std::runtime_error("TestFunctionsForEquality: two functions built with different seed values are identical."); } }
void TestTimesAndPlus(size_t inputDim, size_t outputDim, size_t numSamples, const DeviceDescriptor& device, size_t numIterations, bool usePreAllocatedOutputs, bool outputOnSpecifiedDevice, bool testSaveAndReLoad, unsigned int seed = 1) { Parameter timesParam(MakeSharedObject<NDArrayView>((ElementType)0.5, NDShape({ outputDim, inputDim }), device), L"timesParameters"); Parameter plusParam(MakeSharedObject<NDArrayView>((ElementType)1.2, std::initializer_list<size_t>({ outputDim }), device), L"plusParameters"); Variable inputVar({ inputDim }, AsDataType<ElementType>(), L"input"); auto timesAndPlusFunc = Plus(plusParam, Times(timesParam, inputVar)); if (testSaveAndReLoad) SaveAndReloadModel<ElementType>(timesAndPlusFunc, { &inputVar, ×Param, &plusParam }, device); srand(seed); for (size_t iterIdx = 0; iterIdx < numIterations; ++iterIdx) { std::vector<ElementType> inputData(inputDim * numSamples); for (size_t i = 0; i < inputData.size(); ++i) inputData[i] = ((ElementType)rand()) / RAND_MAX; NDShape inputShape = inputVar.Shape().AppendShape({ 1, numSamples }); ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true)); NDShape outputShape = timesAndPlusFunc->Output().Shape().AppendShape({ 1, numSamples }); std::vector<ElementType> outputData(outputShape.TotalSize()); ValuePtr outputValue; if (usePreAllocatedOutputs) { auto outputAllocationDevice = outputOnSpecifiedDevice ? device : DeviceDescriptor::CPUDevice(); if (outputAllocationDevice.Type() == DeviceKind::CPU) outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, outputData.data(), outputData.size(), outputAllocationDevice, false)); else outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), outputShape, outputAllocationDevice)); } std::unordered_map<Variable, ValuePtr> outputs = { { timesAndPlusFunc->Output(), outputValue } }; auto backpropState = timesAndPlusFunc->Forward({ { inputVar, inputValue } }, outputs, device, { timesAndPlusFunc->Output() }); if (!usePreAllocatedOutputs) outputValue = outputs[timesAndPlusFunc->Output()]; // Perform backprop std::vector<ElementType> rootGradientsData(outputShape.TotalSize(), 1); ValuePtr rootGradientValue; if (device.Type() == DeviceKind::CPU) rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, rootGradientsData.data(), rootGradientsData.size(), device, true)); else { NDArrayViewPtr cpuArrayView = MakeSharedObject<NDArrayView>(outputShape, rootGradientsData.data(), rootGradientsData.size(), DeviceDescriptor::CPUDevice(), true); NDArrayViewPtr gpuArrayView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), outputShape, device); gpuArrayView->CopyFrom(*cpuArrayView); rootGradientValue = MakeSharedObject<Value>(gpuArrayView); } std::vector<ElementType> plusParameterGradientData(plusParam.Shape().TotalSize()); std::vector<ElementType> timesParameterGradientData(timesParam.Shape().TotalSize()); ValuePtr plusParameterGradientValue, timesParameterGradientValue; if (usePreAllocatedOutputs) { auto outputAllocationDevice = outputOnSpecifiedDevice ? device : DeviceDescriptor::CPUDevice(); if (outputAllocationDevice.Type() == DeviceKind::CPU) { plusParameterGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(plusParam.Shape(), plusParameterGradientData.data(), plusParameterGradientData.size(), outputAllocationDevice, false)); timesParameterGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(timesParam.Shape(), timesParameterGradientData.data(), timesParameterGradientData.size(), outputAllocationDevice, false)); } else { plusParameterGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), plusParam.Shape(), outputAllocationDevice)); timesParameterGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), timesParam.Shape(), outputAllocationDevice)); } } std::unordered_map<Variable, ValuePtr> paramGradients = { { plusParam, plusParameterGradientValue }, { timesParam, timesParameterGradientValue } }; timesAndPlusFunc->Backward(backpropState, { { timesAndPlusFunc->Output(), rootGradientValue } }, paramGradients); if (!usePreAllocatedOutputs) { plusParameterGradientValue = paramGradients[plusParam]; timesParameterGradientValue = paramGradients[timesParam]; } // Verify forward prop results if (!usePreAllocatedOutputs || (outputOnSpecifiedDevice && (device.Type() != DeviceKind::CPU))) { NDArrayViewPtr cpuArrayView = MakeSharedObject<NDArrayView>(outputShape, outputData.data(), outputData.size(), DeviceDescriptor::CPUDevice(), false); cpuArrayView->CopyFrom(*outputValue->Data()); } std::vector<ElementType> expectedOutputValues(outputShape.TotalSize()); for (size_t i = 0; i < numSamples; ++i) { ElementType expectedVal = (ElementType)1.2; for (size_t j = 0; j < inputDim; ++j) expectedVal += (ElementType)(inputData[i * inputDim + j] * 0.5); for (size_t j = 0; j < outputDim; ++j) expectedOutputValues[i * outputDim + j] = expectedVal; } FloatingPointVectorCompare(outputData, expectedOutputValues, "TestTimesAndPlus: Forward prop results do not match expected results"); // Verify backward prop results if (device.Type() != DeviceKind::CPU) { NDArrayViewPtr cpuArrayView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), plusParam.Shape(), DeviceDescriptor::CPUDevice()); cpuArrayView->CopyFrom(*plusParameterGradientValue->Data()); const ElementType* cpuArrayViewBuffer = cpuArrayView->DataBuffer<ElementType>(); memcpy(plusParameterGradientData.data(), cpuArrayViewBuffer, plusParam.Shape().TotalSize() * sizeof(ElementType)); cpuArrayView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), timesParam.Shape(), DeviceDescriptor::CPUDevice()); cpuArrayView->CopyFrom(*timesParameterGradientValue->Data()); cpuArrayViewBuffer = cpuArrayView->DataBuffer<ElementType>(); memcpy(timesParameterGradientData.data(), cpuArrayViewBuffer, timesParam.Shape().TotalSize() * sizeof(ElementType)); } for (size_t i = 0; i < outputDim; ++i) if (plusParameterGradientData[i] != numSamples) throw std::runtime_error("TestTimesAndPlus: Backprop prop results do not match expected results for Plus params gradients"); std::vector<ElementType> expectedTimesParamsGradientValues(timesParam.Shape().TotalSize()); for (size_t i = 0; i < inputDim; ++i) { ElementType expectedVal = 0; for (size_t j = 0; j < numSamples; ++j) expectedVal += inputData[j * inputDim + i]; for (size_t j = 0; j < outputDim; ++j) expectedTimesParamsGradientValues[i * outputDim + j] = expectedVal; } FloatingPointVectorCompare(timesParameterGradientData, expectedTimesParamsGradientValues, "TestTimesAndPlus: Backprop prop results do not match expected results for Times params gradients"); } }
void TestNDArrayView(size_t numAxes, const DeviceDescriptor& device) { srand(1); size_t maxDimSize = 15; NDShape viewShape(numAxes); for (size_t i = 0; i < numAxes; ++i) viewShape[i] = (rand() % maxDimSize) + 1; // Create a NDArrayView over a std::array std::array<ElementType, 1> arrayData = { 3 }; auto arrayDataView = MakeSharedObject<NDArrayView>(NDShape({}), arrayData); if (arrayDataView->template DataBuffer<ElementType>() != arrayData.data()) throw std::runtime_error("The DataBuffer of the NDArrayView does not match the original buffer it was created over"); std::vector<ElementType> data(viewShape.TotalSize()); ElementType scale = 19.0; ElementType offset = -4.0; for (size_t i = 0; i < viewShape.TotalSize(); ++i) data[i] = offset + ((((ElementType)rand()) / RAND_MAX) * scale); auto cpuDataView = MakeSharedObject<NDArrayView>(viewShape, data); if (cpuDataView->template DataBuffer<ElementType>() != data.data()) throw std::runtime_error("The DataBuffer of the NDArrayView does not match the original buffer it was created over"); NDArrayViewPtr dataView; if ((device.Type() == DeviceKind::CPU)) dataView = cpuDataView; else { dataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), viewShape, device); dataView->CopyFrom(*cpuDataView); } if (dataView->Device() != device) throw std::runtime_error("Device of NDArrayView does not match 'device' it was created on"); // Test clone auto clonedView = dataView->DeepClone(false); ElementType* first = nullptr; const ElementType* second = cpuDataView->template DataBuffer<ElementType>(); NDArrayViewPtr temp1CpuDataView, temp2CpuDataView; if ((device.Type() == DeviceKind::CPU)) { if (dataView->DataBuffer<ElementType>() != data.data()) throw std::runtime_error("The DataBuffer of the NDArrayView does not match the original buffer it was created over"); first = clonedView->WritableDataBuffer<ElementType>(); } else { temp1CpuDataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), viewShape, DeviceDescriptor::CPUDevice()); temp1CpuDataView->CopyFrom(*clonedView); first = temp1CpuDataView->WritableDataBuffer<ElementType>(); } for (size_t i = 0; i < viewShape.TotalSize(); ++i) { if (first[i] != second[i]) throw std::runtime_error("The contents of the clone do not match expected"); } first[0] += 1; if ((device.Type() != DeviceKind::CPU)) clonedView->CopyFrom(*temp1CpuDataView); if ((device.Type() == DeviceKind::CPU)) { first = clonedView->WritableDataBuffer<ElementType>(); second = dataView->DataBuffer<ElementType>(); } else { temp1CpuDataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), viewShape, DeviceDescriptor::CPUDevice()); temp1CpuDataView->CopyFrom(*clonedView); first = temp1CpuDataView->WritableDataBuffer<ElementType>(); temp2CpuDataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), viewShape, DeviceDescriptor::CPUDevice()); temp2CpuDataView->CopyFrom(*dataView); second = temp2CpuDataView->DataBuffer<ElementType>(); } if (first[0] != (second[0] + 1)) throw std::runtime_error("The clonedView's contents do not match expected"); // Test alias auto aliasView = clonedView->Alias(true); const ElementType* aliasViewBuffer = aliasView->DataBuffer<ElementType>(); const ElementType* clonedDataBuffer = clonedView->DataBuffer<ElementType>(); if (aliasViewBuffer != clonedDataBuffer) throw std::runtime_error("The buffers underlying the alias view and the view it is an alias of are different!"); clonedView->CopyFrom(*dataView); if (aliasViewBuffer != clonedDataBuffer) throw std::runtime_error("The buffers underlying the alias view and the view it is an alias of are different!"); // Test readonliness auto errorMsg = "Was incorrectly able to get a writable buffer pointer from a readonly view"; // Should not be able to get the WritableDataBuffer for a read-only view VerifyException([&aliasView]() { ElementType* aliasViewBuffer = aliasView->WritableDataBuffer<ElementType>(); aliasViewBuffer; }, errorMsg); // Should not be able to copy into a read-only view VerifyException([&aliasView, &dataView]() { aliasView->CopyFrom(*dataView); }, errorMsg); }