void CreateSequenceTestDense(const DeviceDescriptor device, bool readOnly) { size_t numAxes = 4; size_t maxDimSize = 30; NDShape sampleShape = CreateShape(numAxes, maxDimSize); auto sampleSize = sampleShape.TotalSize(); size_t batchCount = 1; size_t maxSequenceLen = 60; // Test without using seqStartFlag auto seqLenList = GenerateSequenceLengths(batchCount, maxSequenceLen); auto data = GenerateSequences<ElementType>(seqLenList, sampleShape); auto seq = data[0]; auto testValue = Value::CreateSequence(sampleShape, seq, device, readOnly); CheckValue(testValue, sampleShape, data, seqLenList); // Test seqStartFlag is true seqLenList = GenerateSequenceLengths(batchCount, maxSequenceLen); data = GenerateSequences<ElementType>(seqLenList, sampleShape); seq = data[0]; testValue = Value::CreateSequence(sampleShape, seq, true, device, readOnly); CheckValue(testValue, sampleShape, data, seqLenList, { true }); // Test seqStartFlag is false seqLenList = GenerateSequenceLengths(batchCount, maxSequenceLen); data = GenerateSequences<ElementType>(seqLenList, sampleShape); seq = data[0]; testValue = Value::CreateSequence(sampleShape, seq, false, device, readOnly); CheckValue(testValue, sampleShape, data, seqLenList, { false }); vector<ElementType> wrongSeq(sampleSize * 2 - 1, 0); VerifyException([&sampleShape, &wrongSeq, &device, &readOnly]() { Value::CreateSequence(sampleShape, wrongSeq, device, readOnly); }, "The expected exception has not been caught: The number of data is not a multiple of the sample size."); auto emptySeq = vector<ElementType>(0); VerifyException([&sampleShape, &emptySeq, &device, &readOnly]() { Value::CreateSequence(sampleShape, emptySeq, device, readOnly); }, "The expected exception has not been caught: The sequence length is 0"); }
ValuePtr CreateBatchWithVariableSequence(const NDShape& sampleShape, size_t batchSize, const std::vector<size_t>& sequenceSize, const std::vector<ElementType>& batchData, const DeviceDescriptor& device, bool readOnly = false) { auto shapeSize = sampleShape.TotalSize(); if (batchData.size() % shapeSize != 0) InvalidArgument("The number of elements (%zu) in the vector containing batch data must be a multiple of the size (%zu) of the sample shape '%S'.", batchData.size(), shapeSize, sampleShape.AsString().c_str()); if (sequenceSize.size() != batchSize) InvalidArgument("The number of sequences (%zu) in the vector containing sequence size must match batch size (%zu)", sequenceSize.size(), batchSize); std::vector<NDArrayViewPtr> sequencesView(batchSize); size_t curBatchDataIdx = 0; for (size_t i = 0; i < batchSize; i++) { auto sequenceDataShape = sampleShape.AppendShape({sequenceSize[i]}); sequencesView[i] = MakeSharedObject<NDArrayView>(sequenceDataShape, batchData.data() + curBatchDataIdx, shapeSize * sequenceSize[i], DeviceDescriptor::CPUDevice()); curBatchDataIdx += shapeSize * sequenceSize[i]; } return Value::Create(sampleShape, sequencesView, {}, device, readOnly, true); }
/*static*/ NDArrayViewPtr Variable::CreateValueFromParameterInitializer(const NDShape& shape, const ParameterInitializer& initConfig, const DeviceDescriptor& device) { auto dataType = AsDataType<ElementType>(); auto value = MakeSharedObject<NDArrayView>(dataType, shape, device); auto valueMatrix = value->template GetWritableMatrix<ElementType>(); auto initializerType = initConfig[InitializerTypeAttributeName].Value<std::wstring>(); if (initializerType == Microsoft::MSR::CNTK::ConstantInitializerTypeName) { auto constantInitValue = initConfig[ValueAttributeName].Value<double>(); valueMatrix->SetValue((ElementType)constantInitValue); } else if (initializerType == Microsoft::MSR::CNTK::BilinearInitializerTypeName) { auto kernelWidth = initConfig[KernelWidthAttributeName].Value<size_t>(); auto kernelHeight = initConfig[KernelHeightAttributeName].Value<size_t>(); Microsoft::MSR::CNTK::LearnableParameter<ElementType>::InitBilinear(*valueMatrix, AsTensorShape(shape), kernelWidth, kernelHeight, AsCNTKImplDeviceId(device)); } else { auto randomSeed = (unsigned long)initConfig[RandomSeedAttributeName].Value<size_t>(); if (randomSeed == SentinelValueForAutoSelectRandomSeed) randomSeed = s_currentRandomSeed++; auto scale = initConfig[ScaleAttributeName].Value<double>(); int outputRank = DefaultParamInitOutputRank, filterRank = DefaultParamInitFilterRank; if (initializerType != Microsoft::MSR::CNTK::UniformInitializerTypeName) { outputRank = initConfig[OutputRankAttributeName].Value<int>(); filterRank = initConfig[FilterRankAttributeName].Value<int>(); if (outputRank == SentinelValueForInferParamInitRank) outputRank = DefaultParamInitOutputRank; if (filterRank == SentinelValueForInferParamInitRank) filterRank = DefaultParamInitFilterRank; if ((filterRank + outputRank) > shape.Rank()) InvalidArgument("Sum of filter rank (%d) and output rank (%d) of the parameter initializer cannot exceed the Parameter's rank(%d)", filterRank, outputRank, (int)shape.Rank()); } Microsoft::MSR::CNTK::LearnableParameter<ElementType>::InitRandom(*valueMatrix, AsTensorShape(shape), initializerType, randomSeed, (ElementType)scale, filterRank, outputRank, /*initOnCPUOnly=*/true, AsCNTKImplDeviceId(device)); } return value; }
void RunEvaluationClassifier(FunctionPtr evalFunc, const DeviceDescriptor& device) { const std::wstring inputNodeName = L"features"; Variable inputVar; if (!GetInputVariableByName(evalFunc, inputNodeName, inputVar)) { fprintf(stderr, "Input variable %S is not available.\n", inputNodeName.c_str()); throw("Input variable not found error."); } // Evaluate the network in several runs size_t iterationCount = 4; unsigned int randSeed = 2; srand(randSeed); size_t numSamples = 3; std::vector<float> inputData(inputVar.Shape().TotalSize() * numSamples); for (size_t t = 0; t < iterationCount; ++t) { for (size_t i = 0; i < inputData.size(); ++i) { inputData[i] = ((float)rand()) / RAND_MAX; } // Create input data shape. Adding sequence length and numSamples as axes. // Todo: remove sequence length when only numSamples is supported. // Todo: add convenience APIs to simplify data preparation here. NDShape inputShape = inputVar.Shape().AppendShape({1, numSamples}); ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData, true)); // Define output. ValuePtr outputValue; auto outputVar = evalFunc->Output(); std::unordered_map<Variable, ValuePtr> outputs = {{outputVar, outputValue}}; // Evaluate the model evalFunc->Forward({{inputVar, inputValue}}, outputs, device); // Get output value outputValue = outputs[outputVar]; // Todo: remove sequence length when only numSamples is supported. // Todo: add convenience APIs to simplify retrieval of output results. NDShape outputShape = outputVar.Shape().AppendShape({1, numSamples}); std::vector<float> outputData(outputShape.TotalSize()); NDArrayViewPtr cpuArrayOutput = MakeSharedObject<NDArrayView>(outputShape, outputData, false); cpuArrayOutput->CopyFrom(*outputValue->Data()); assert(outputData.size() == outputVar.Shape()[0] * numSamples); fprintf(stderr, "Evaluation result:\n"); size_t dataIndex = 0; auto outputDim = outputVar.Shape()[0]; for (size_t i = 0; i < numSamples; i++) { fprintf(stderr, "Iteration:%lu, Sample %lu:\n", t, i); fprintf(stderr, " "); dataIndex = i * outputDim; for (size_t j = 0; j < std::min((size_t)10, outputDim); j++) { fprintf(stderr, "%f ", outputData[dataIndex++]); } if (outputDim > 10) { fprintf(stderr, "..."); } fprintf(stderr, "\n"); } } }
void TestReduceSum(size_t sampleRank, const DeviceDescriptor& device) { size_t numSequences = 7; size_t maxAllowedSequenceLength = 11; size_t maxDimSize = 23; NDShape inputShape(sampleRank); for (size_t i = 0; i < sampleRank; ++i) inputShape[i] = (rand() % maxDimSize) + 1; auto sequenceLengths = GenerateSequenceLengths(numSequences, maxAllowedSequenceLength); auto sequences = GenerateSequences<float>(sequenceLengths, inputShape); ValuePtr sequencesValue = Value::Create(inputShape, sequences, device, true); // Test ReduceSum along a static axis { auto testReduceSum = [&sequences, &sequenceLengths, inputShape, sequencesValue, device, sampleRank](int reductionAxis, bool useNegativeAxisIndex) { size_t maxActualSequenceLength = sequencesValue->Shape()[inputShape.Rank()]; size_t numSequences = sequencesValue->Shape()[inputShape.Rank() + 1]; auto inputVar = InputVariable(inputShape, DataType::Float, L"input"); FunctionPtr reduceSumFunc; bool reduceAll = (reductionAxis < 0); if (reduceAll) reduceSumFunc = ReduceSum(inputVar); else reduceSumFunc = ReduceSum(inputVar, Axis(useNegativeAxisIndex ? (reductionAxis - (int)sampleRank) : reductionAxis)); NDShape outputShape = reduceSumFunc->Output().Shape(); NDShape outputDataShape = outputShape; if (!reduceAll) outputDataShape = outputDataShape.AppendShape({ maxActualSequenceLength, numSequences }); std::vector<float> outputData(outputDataShape.TotalSize()); ValuePtr outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputDataShape, outputData, false), reduceAll ? nullptr : sequencesValue->Mask()->DeepClone()); std::unordered_map<Variable, ValuePtr> outputs = { { reduceSumFunc->Output(), outputValue } }; reduceSumFunc->Forward({ { inputVar, sequencesValue } }, outputs, device); std::vector<size_t> inputShapeStrides = GetStrides(inputShape); std::vector<size_t> outputShapeStrides = GetStrides(outputShape); std::vector<float> expectedPerFrameTotals(outputShape.TotalSize() * maxActualSequenceLength * numSequences, 0.0f); float expectedTotal = 0.0f; for (size_t i = 0; i < numSequences; ++i) { size_t currentSequenceLength = sequenceLengths[i]; for (size_t j = 0; j < currentSequenceLength; ++j) { for (size_t k = 0; k < inputShape.TotalSize(); ++k) { auto inputIdx = UnflattenedShape(k, inputShapeStrides); auto outputIdx = inputIdx; if (!reduceAll) outputIdx[reductionAxis] = 0; else outputIdx = {}; auto flatOutputIdx = FlattenedIndex(outputIdx, outputShapeStrides); float value = sequences[i][(j * inputShape.TotalSize()) + k]; expectedPerFrameTotals[(((i * maxActualSequenceLength) + j) * outputShape.TotalSize()) + flatOutputIdx] += value; expectedTotal += value; } } } if (reduceAll) FloatingPointVectorCompare(outputData, std::vector<float>({ expectedTotal }), "testReduceSum: Forward prop results do not match expected results"); else FloatingPointVectorCompare(outputData, expectedPerFrameTotals, "testReduceSum: Forward prop results do not match expected results"); }; // Reduce over all axes testReduceSum(-1, false); int reductionAxis = 0; testReduceSum(reductionAxis, true); if (reductionAxis < (inputShape.Rank() - 1)) reductionAxis++; testReduceSum(reductionAxis, false); if (reductionAxis < (inputShape.Rank() - 1)) reductionAxis++; testReduceSum(reductionAxis, true); } // Test ReduceSum along a dynamic axis { auto testReduceSum = [&sequences, &sequenceLengths, inputShape, sequencesValue, device](const Axis& axis) { if (!axis.IsDynamicAxis()) RuntimeError("Called the dynamic axis ReduceSum test with a static axis"); size_t maxActualSequenceLength = sequencesValue->Shape()[inputShape.Rank()]; size_t numSequences = sequencesValue->Shape()[inputShape.Rank() + 1]; auto inputVar = InputVariable({ inputShape }, DataType::Float, L"input"); FunctionPtr reduceSumFunc = ReduceSum(inputVar, axis); NDShape maskShape = { ((axis == Axis::DefaultBatchAxis()) ? maxActualSequenceLength : 1), ((axis == Axis::DefaultBatchAxis()) ? 1 : numSequences) }; NDShape outputShape = reduceSumFunc->Output().Shape(); auto outputDataShape = outputShape.AppendShape(maskShape); std::vector<float> outputData(outputDataShape.TotalSize()); auto maskPtr = MakeSharedObject<NDMask>(maskShape, device); ValuePtr outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputDataShape, outputData, false), maskPtr); std::unordered_map<Variable, ValuePtr> outputs = { { reduceSumFunc->Output(), outputValue } }; reduceSumFunc->Forward({ { inputVar, sequencesValue } }, outputs, device); std::vector<float> expectedTotals(outputDataShape.TotalSize(), 0.0f); for (size_t i = 0; i < numSequences; ++i) { size_t currentSequenceLength = sequenceLengths[i]; for (size_t j = 0; j < currentSequenceLength; ++j) { for (size_t k = 0; k < inputShape.TotalSize(); ++k) { float value = sequences[i][(j * inputShape.TotalSize()) + k]; if (axis == Axis::DefaultBatchAxis()) expectedTotals[(j * inputShape.TotalSize()) + k] += value; else expectedTotals[(i * inputShape.TotalSize()) + k] += value; } } } FloatingPointVectorCompare(outputData, expectedTotals, "testReduceSum: Forward prop results do not match expected results"); }; testReduceSum(Axis::DefaultDynamicAxis()); } }
void TestSlice(size_t sampleRank, const DeviceDescriptor& device) { size_t numSequences = 7; size_t maxAllowedSequenceLength = 11; size_t maxDimSize = 23; size_t minDimSize = 5; NDShape inputShape(sampleRank); for (size_t i = 0; i < sampleRank; ++i) inputShape[i] = (rand() % maxDimSize) + minDimSize; auto sequenceLengths = GenerateSequenceLengths(numSequences, maxAllowedSequenceLength); auto sequences = GenerateSequences<float>(sequenceLengths, inputShape); ValuePtr sequencesValue = Value::Create(inputShape, sequences, device, true); // Test slice along a static axis { auto testStaticAxisSlice = [&sequences, &sequenceLengths, inputShape, sequencesValue, device, sampleRank](int sliceAxis, int beginOffset, int endOffset, bool useNegativeAxisIndex) { size_t maxActualSequenceLength = sequencesValue->Shape()[inputShape.Rank()]; size_t numSequences = sequencesValue->Shape()[inputShape.Rank() + 1]; auto inputVar = InputVariable(inputShape, DataType::Float, L"input"); auto sliceFunc = Slice(inputVar, Axis(useNegativeAxisIndex ? (sliceAxis - (int)sampleRank) : sliceAxis), beginOffset, endOffset); NDShape outputShape = sliceFunc->Output().Shape(); auto outputDataShape = outputShape.AppendShape({ maxActualSequenceLength, numSequences }); std::vector<float> outputData(outputDataShape.TotalSize()); ValuePtr outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputDataShape, outputData, false), sequencesValue->Mask()->DeepClone()); std::unordered_map<Variable, ValuePtr> outputs = { { sliceFunc->Output(), outputValue } }; sliceFunc->Forward({ { inputVar, sequencesValue } }, outputs, device); std::vector<size_t> inputShapeStrides = GetStrides(inputShape); std::vector<size_t> outputShapeStrides = GetStrides(outputShape); size_t sliceStartOffset = (beginOffset >= 0) ? beginOffset : (inputShape[sliceAxis] + beginOffset); std::vector<float> expectedOutputValues(outputShape.TotalSize() * maxActualSequenceLength * numSequences); for (size_t i = 0; i < numSequences; ++i) { size_t currentSequenceLength = sequenceLengths[i]; for (size_t j = 0; j < currentSequenceLength; ++j) { for (size_t k = 0; k < outputShape.TotalSize(); ++k) { auto outputIdx = UnflattenedShape(k, outputShapeStrides); auto inputIdx = outputIdx; inputIdx[sliceAxis] += sliceStartOffset; auto flatInputIdx = FlattenedIndex(inputIdx, inputShapeStrides); expectedOutputValues[(((i * maxActualSequenceLength) + j) * outputShape.TotalSize()) + k] = sequences[i][(j * inputShape.TotalSize()) + flatInputIdx]; } } } FloatingPointVectorCompare(outputData, expectedOutputValues, "testStaticAxisSlice: Forward prop results do not match expected results"); }; int sliceAxis = 0; testStaticAxisSlice(sliceAxis, 3, 5, true); if (sliceAxis < (inputShape.Rank() - 1)) sliceAxis++; testStaticAxisSlice(sliceAxis, -1, 0, false); if (sliceAxis < (inputShape.Rank() - 1)) sliceAxis++; testStaticAxisSlice(sliceAxis, -3, -1, true); } // Test slice along a dynamic axis { auto testDynamicAxisSlice = [&sequences, &sequenceLengths, inputShape, sequencesValue, device](const Axis& axis, int beginOffset, int endOffset) { if (!axis.IsDynamicAxis()) RuntimeError("Called the dynamic axis slice test with a static axis"); size_t maxActualSequenceLength = sequencesValue->Shape()[inputShape.Rank()]; size_t numSequences = sequencesValue->Shape()[inputShape.Rank() + 1]; int endAndBeginOffsetDiff = endOffset - beginOffset; size_t maxSliceLength = (endAndBeginOffsetDiff > 0) ? endAndBeginOffsetDiff : maxActualSequenceLength + endAndBeginOffsetDiff; auto inputVar = InputVariable(inputShape, DataType::Float, L"input"); auto sliceFunc = Slice(inputVar, axis, beginOffset, endOffset); sliceFunc = sliceFunc + sliceFunc; size_t outputSequenceAxisLength = (axis == Axis::DefaultDynamicAxis()) ? maxSliceLength : maxActualSequenceLength; size_t outputBatchAxisLength = (axis == Axis::DefaultBatchAxis()) ? maxSliceLength : numSequences; NDShape outputShape = sliceFunc->Output().Shape().AppendShape({ outputSequenceAxisLength, outputBatchAxisLength }); std::vector<float> outputData(outputShape.TotalSize(), 0); NDMaskPtr mask; if (endAndBeginOffsetDiff < 0) { ValuePtr outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, outputData, false)); mask = MakeSharedObject<NDMask>(std::initializer_list<size_t>({ outputSequenceAxisLength, outputBatchAxisLength }), device); } ValuePtr outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, outputData, false), mask); std::unordered_map<Variable, ValuePtr> outputs = { { sliceFunc->Output(), outputValue } }; sliceFunc->Forward({ { inputVar, sequencesValue } }, outputs, device); size_t startSequenceIdx = (axis == Axis::DefaultBatchAxis()) ? ((beginOffset >= 0) ? beginOffset : (numSequences + beginOffset)) : 0; size_t endSequenceIdx = (axis == Axis::DefaultBatchAxis()) ? ((endOffset > 0) ? endOffset : (numSequences + endOffset)) : numSequences; std::vector<float> expectedOutputValues(inputShape.TotalSize() * outputSequenceAxisLength * outputBatchAxisLength); for (size_t i = startSequenceIdx; i < endSequenceIdx; ++i) { size_t currentSequenceLength = sequenceLengths[i]; size_t startFrameIdx = (axis == Axis::DefaultDynamicAxis()) ? ((beginOffset >= 0) ? beginOffset : (currentSequenceLength + beginOffset)) : 0; size_t endFrameIdx = (axis == Axis::DefaultDynamicAxis()) ? ((endOffset > 0) ? endOffset : (currentSequenceLength + endOffset)) : currentSequenceLength; size_t j = startFrameIdx; for (; j < endFrameIdx; ++j) { for (size_t k = 0; k < inputShape.TotalSize(); ++k) expectedOutputValues[((((i - startSequenceIdx) * outputSequenceAxisLength) + (j - startFrameIdx)) * inputShape.TotalSize()) + k] = 2 * sequences[i][(j * inputShape.TotalSize()) + k]; } // Zero out the invalid portions of the actual output for (; j < (outputSequenceAxisLength + startFrameIdx); ++j) for (size_t k = 0; k < inputShape.TotalSize(); ++k) outputData[((((i - startSequenceIdx) * outputSequenceAxisLength) + (j - startFrameIdx)) * inputShape.TotalSize()) + k] = 0; } FloatingPointVectorCompare(outputData, expectedOutputValues, "testDynamicAxisSlice: Forward prop results do not match expected results"); }; testDynamicAxisSlice(Axis::DefaultDynamicAxis(), 0, 1); testDynamicAxisSlice(Axis::DefaultDynamicAxis(), 0, 2); testDynamicAxisSlice(Axis::DefaultDynamicAxis(), -1, 0); testDynamicAxisSlice(Axis::DefaultDynamicAxis(), -2, 0); testDynamicAxisSlice(Axis::DefaultDynamicAxis(), 0, -1); testDynamicAxisSlice(Axis::DefaultDynamicAxis(), 1, 0); } }
NDArrayView::NDArrayView(const NDShape& viewShape, const SparseIndexType* colStarts, const SparseIndexType* rowIndices, const ElementType* nonZeroValues, size_t numNonZeroValues, const DeviceDescriptor& device, bool readOnly/* = false*/) : NDArrayView(AsDataType<ElementType>(), device, StorageFormat::SparseCSC, viewShape, false, AllocateTensorView<ElementType>(viewShape, StorageFormat::SparseCSC, device)) { if ((colStarts == nullptr) || (rowIndices == nullptr) || (nonZeroValues == nullptr) || (numNonZeroValues == 0) || (numNonZeroValues > viewShape.TotalSize())) InvalidArgument("Invalid sparse CSC format initial data specified for NDArrayView construction"); auto sparseMatrix = GetWritableMatrix<ElementType>(1); sparseMatrix->SetMatrixFromCSCFormat(colStarts, rowIndices, nonZeroValues, numNonZeroValues, sparseMatrix->GetNumRows(), sparseMatrix->GetNumCols()); m_isReadOnly = readOnly; }
void TestTimesAndPlus(size_t inputDim, size_t outputDim, size_t numSamples, const DeviceDescriptor& device, size_t numIterations, bool usePreAllocatedOutputs, bool outputOnSpecifiedDevice, bool testSaveAndReLoad, unsigned int seed = 1) { Parameter timesParam(MakeSharedObject<NDArrayView>((ElementType)0.5, NDShape({ outputDim, inputDim }), device), L"timesParameters"); Parameter plusParam(MakeSharedObject<NDArrayView>((ElementType)1.2, std::initializer_list<size_t>({ outputDim }), device), L"plusParameters"); Variable inputVar({ inputDim }, AsDataType<ElementType>(), L"input"); auto timesAndPlusFunc = Plus(plusParam, Times(timesParam, inputVar)); if (testSaveAndReLoad) SaveAndReloadModel<ElementType>(timesAndPlusFunc, { &inputVar, ×Param, &plusParam }, device); srand(seed); for (size_t iterIdx = 0; iterIdx < numIterations; ++iterIdx) { std::vector<ElementType> inputData(inputDim * numSamples); for (size_t i = 0; i < inputData.size(); ++i) inputData[i] = ((ElementType)rand()) / RAND_MAX; NDShape inputShape = inputVar.Shape().AppendShape({ 1, numSamples }); ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true)); NDShape outputShape = timesAndPlusFunc->Output().Shape().AppendShape({ 1, numSamples }); std::vector<ElementType> outputData(outputShape.TotalSize()); ValuePtr outputValue; if (usePreAllocatedOutputs) { auto outputAllocationDevice = outputOnSpecifiedDevice ? device : DeviceDescriptor::CPUDevice(); if (outputAllocationDevice.Type() == DeviceKind::CPU) outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, outputData.data(), outputData.size(), outputAllocationDevice, false)); else outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), outputShape, outputAllocationDevice)); } std::unordered_map<Variable, ValuePtr> outputs = { { timesAndPlusFunc->Output(), outputValue } }; auto backpropState = timesAndPlusFunc->Forward({ { inputVar, inputValue } }, outputs, device, { timesAndPlusFunc->Output() }); if (!usePreAllocatedOutputs) outputValue = outputs[timesAndPlusFunc->Output()]; // Perform backprop std::vector<ElementType> rootGradientsData(outputShape.TotalSize(), 1); ValuePtr rootGradientValue; if (device.Type() == DeviceKind::CPU) rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, rootGradientsData.data(), rootGradientsData.size(), device, true)); else { NDArrayViewPtr cpuArrayView = MakeSharedObject<NDArrayView>(outputShape, rootGradientsData.data(), rootGradientsData.size(), DeviceDescriptor::CPUDevice(), true); NDArrayViewPtr gpuArrayView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), outputShape, device); gpuArrayView->CopyFrom(*cpuArrayView); rootGradientValue = MakeSharedObject<Value>(gpuArrayView); } std::vector<ElementType> plusParameterGradientData(plusParam.Shape().TotalSize()); std::vector<ElementType> timesParameterGradientData(timesParam.Shape().TotalSize()); ValuePtr plusParameterGradientValue, timesParameterGradientValue; if (usePreAllocatedOutputs) { auto outputAllocationDevice = outputOnSpecifiedDevice ? device : DeviceDescriptor::CPUDevice(); if (outputAllocationDevice.Type() == DeviceKind::CPU) { plusParameterGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(plusParam.Shape(), plusParameterGradientData.data(), plusParameterGradientData.size(), outputAllocationDevice, false)); timesParameterGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(timesParam.Shape(), timesParameterGradientData.data(), timesParameterGradientData.size(), outputAllocationDevice, false)); } else { plusParameterGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), plusParam.Shape(), outputAllocationDevice)); timesParameterGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), timesParam.Shape(), outputAllocationDevice)); } } std::unordered_map<Variable, ValuePtr> paramGradients = { { plusParam, plusParameterGradientValue }, { timesParam, timesParameterGradientValue } }; timesAndPlusFunc->Backward(backpropState, { { timesAndPlusFunc->Output(), rootGradientValue } }, paramGradients); if (!usePreAllocatedOutputs) { plusParameterGradientValue = paramGradients[plusParam]; timesParameterGradientValue = paramGradients[timesParam]; } // Verify forward prop results if (!usePreAllocatedOutputs || (outputOnSpecifiedDevice && (device.Type() != DeviceKind::CPU))) { NDArrayViewPtr cpuArrayView = MakeSharedObject<NDArrayView>(outputShape, outputData.data(), outputData.size(), DeviceDescriptor::CPUDevice(), false); cpuArrayView->CopyFrom(*outputValue->Data()); } std::vector<ElementType> expectedOutputValues(outputShape.TotalSize()); for (size_t i = 0; i < numSamples; ++i) { ElementType expectedVal = (ElementType)1.2; for (size_t j = 0; j < inputDim; ++j) expectedVal += (ElementType)(inputData[i * inputDim + j] * 0.5); for (size_t j = 0; j < outputDim; ++j) expectedOutputValues[i * outputDim + j] = expectedVal; } FloatingPointVectorCompare(outputData, expectedOutputValues, "TestTimesAndPlus: Forward prop results do not match expected results"); // Verify backward prop results if (device.Type() != DeviceKind::CPU) { NDArrayViewPtr cpuArrayView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), plusParam.Shape(), DeviceDescriptor::CPUDevice()); cpuArrayView->CopyFrom(*plusParameterGradientValue->Data()); const ElementType* cpuArrayViewBuffer = cpuArrayView->DataBuffer<ElementType>(); memcpy(plusParameterGradientData.data(), cpuArrayViewBuffer, plusParam.Shape().TotalSize() * sizeof(ElementType)); cpuArrayView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), timesParam.Shape(), DeviceDescriptor::CPUDevice()); cpuArrayView->CopyFrom(*timesParameterGradientValue->Data()); cpuArrayViewBuffer = cpuArrayView->DataBuffer<ElementType>(); memcpy(timesParameterGradientData.data(), cpuArrayViewBuffer, timesParam.Shape().TotalSize() * sizeof(ElementType)); } for (size_t i = 0; i < outputDim; ++i) if (plusParameterGradientData[i] != numSamples) throw std::runtime_error("TestTimesAndPlus: Backprop prop results do not match expected results for Plus params gradients"); std::vector<ElementType> expectedTimesParamsGradientValues(timesParam.Shape().TotalSize()); for (size_t i = 0; i < inputDim; ++i) { ElementType expectedVal = 0; for (size_t j = 0; j < numSamples; ++j) expectedVal += inputData[j * inputDim + i]; for (size_t j = 0; j < outputDim; ++j) expectedTimesParamsGradientValues[i * outputDim + j] = expectedVal; } FloatingPointVectorCompare(timesParameterGradientData, expectedTimesParamsGradientValues, "TestTimesAndPlus: Backprop prop results do not match expected results for Times params gradients"); } }
void TestFeedForwardNetworkCreation(const DeviceDescriptor& device, bool testSaveAndReLoad) { using namespace std::placeholders; const size_t inputDim = 937; const size_t numOutputClasses = 9304; const size_t numHiddenLayers = 6; const size_t hiddenLayersDim = 2048; Variable inputVar({ inputDim }, DataType::Float, L"features"); auto classifierOutputFunction = FullyConnectedFeedForwardClassifierNet(inputVar, numOutputClasses, hiddenLayersDim, numHiddenLayers, device, std::bind(Sigmoid, _1, L""), L"classifierOutput"); Variable classifierOutput = classifierOutputFunction; Variable labelsVar({ numOutputClasses }, DataType::Float, L"Labels"); auto trainingLossFunction = CNTK::CrossEntropyWithSoftmax(classifierOutput, labelsVar, L"LossFunction"); Variable trainingLoss = trainingLossFunction; auto predictionFunction = CNTK::ClassificationError(classifierOutput, labelsVar, L"ClassificationError"); Variable prediction = predictionFunction; auto ffNet = CNTK::Combine({ trainingLoss.Owner(), prediction.Owner(), classifierOutput.Owner() }, L"ClassifierModel"); // Now test the structure if (ffNet->Parameters().size() != ((numHiddenLayers * 2) + 1)) throw std::runtime_error("TestFeedForwardNetworkCreation: Function does not have expected Parameter count"); if (ffNet->Arguments().size() != 2) throw std::runtime_error("TestFeedForwardNetworkCreation: Function does not have expected Argument count"); if (ffNet->Outputs().size() != 3) throw std::runtime_error("TestFeedForwardNetworkCreation: Function does not have expected Output count"); if (testSaveAndReLoad) SaveAndReloadModel<float>(ffNet, { &inputVar, &labelsVar, &trainingLoss, &prediction, &classifierOutput }, device); // Run Forward and backward a few times size_t iterationCount = 4; unsigned int randSeed = 2; srand(randSeed); size_t numSamples = 3; for (size_t i = 0; i < iterationCount; ++i) { std::vector<float> inputData(inputDim * numSamples); for (size_t i = 0; i < inputData.size(); ++i) inputData[i] = ((float)rand()) / RAND_MAX; NDShape inputShape = inputVar.Shape().AppendShape({ 1, numSamples }); ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true)); std::vector<float> labelData(numOutputClasses * numSamples, 0); for (size_t i = 0; i < numSamples; ++i) labelData[(i*numOutputClasses) + (rand() % numOutputClasses)] = 1; NDShape labelShape = labelsVar.Shape().AppendShape({ 1, numSamples }); ValuePtr labelValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(labelShape, labelData.data(), labelData.size(), DeviceDescriptor::CPUDevice(), true)); ValuePtr outputValue, predictionErrorValue; std::unordered_map<Variable, ValuePtr> outputs = { { classifierOutput, outputValue }, { prediction, predictionErrorValue } }; auto backpropState = ffNet->Forward({ { inputVar, inputValue }, { labelsVar, labelValue } }, outputs, device, { trainingLoss }); // Perform backprop NDShape outputShape = trainingLoss.Shape(); std::vector<float> rootGradientsData(outputShape.TotalSize(), 1); ValuePtr rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, rootGradientsData.data(), rootGradientsData.size(), DeviceDescriptor::CPUDevice(), true)); std::unordered_map<Variable, ValuePtr> paramGradients; auto allParams = ffNet->Parameters(); for (auto iter = allParams.begin(); iter != allParams.end(); ++iter) paramGradients[*iter] = nullptr; ffNet->Backward(backpropState, { { trainingLoss, rootGradientValue } }, paramGradients); } }
NDMask::NDMask(const NDShape& shape, const DeviceDescriptor& device/* = DeviceDescriptor::DefaultDevice()*/) : NDMask(shape, AllocateMatrix(shape, device)) { if (shape.NumAxes() > 2) LogicError("NDMask instances with more than 2 axes are currently unsupported"); }
void TestTensorPlus(size_t numAxesLeftOperand, size_t numAxesRightOperand, const DeviceDescriptor& device, bool useConstantInputsOnly) { srand(1); size_t maxDimSize = 15; NDShape leftInputShape(numAxesLeftOperand); for (size_t i = 0; i < numAxesLeftOperand; ++i) leftInputShape[i] = (rand() % maxDimSize) + 1; NDShape rightInputShape(numAxesRightOperand); for (size_t i = 0; i < std::min(numAxesLeftOperand, numAxesRightOperand); ++i) rightInputShape[i] = leftInputShape[i]; for (size_t i = std::min(numAxesLeftOperand, numAxesRightOperand); i < numAxesRightOperand; ++i) rightInputShape[i] = (rand() % maxDimSize) + 1; std::vector<ElementType> leftInputData(leftInputShape.TotalSize()); for (size_t i = 0; i < leftInputData.size(); ++i) leftInputData[i] = ((ElementType)rand()) / RAND_MAX; auto leftInputValueShape = leftInputShape.AppendShape({ 1, 1 }); auto leftInputValue = MakeSharedObject<NDArrayView>(leftInputValueShape, leftInputData, true); std::vector<ElementType> rightInputData(rightInputShape.TotalSize()); for (size_t i = 0; i < rightInputData.size(); ++i) rightInputData[i] = ((ElementType)rand()) / RAND_MAX; auto rightInputValueShape = rightInputShape.AppendShape({ 1, 1 }); auto rightInputValue = MakeSharedObject<NDArrayView>(rightInputValueShape, rightInputData, true); Variable leftInputVar, rightInputVar; if (useConstantInputsOnly) { leftInputValue = leftInputValue->DeepClone(device, false); rightInputValue = rightInputValue->DeepClone(device, false); leftInputVar = Parameter(leftInputValue, L"leftInput"); rightInputVar = Parameter(rightInputValue, L"rightInput"); } else { leftInputVar = InputVariable(leftInputShape, AsDataType<ElementType>(), true, L"leftInput"); rightInputVar = InputVariable(rightInputShape, AsDataType<ElementType>(), true, L"rightInput"); } auto plusFunc = Plus(leftInputVar, rightInputVar); NDShape outputShape = plusFunc->Output().Shape(); if (!useConstantInputsOnly) outputShape = outputShape.AppendShape({ 1, 1 }); std::vector<ElementType> outputData(outputShape.TotalSize()); ValuePtr outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, outputData, false)); std::unordered_map<Variable, ValuePtr> outputs = { { plusFunc->Output(), outputValue } }; BackPropStatePtr backPropState; if (useConstantInputsOnly) backPropState = plusFunc->Forward(std::unordered_map<Variable, ValuePtr>({}), outputs, device, { plusFunc->Output() }); else backPropState = plusFunc->Forward({ { leftInputVar, MakeSharedObject<Value>(leftInputValue) }, { rightInputVar, MakeSharedObject<Value>(rightInputValue) } }, outputs, device, { plusFunc->Output() }); // Perform backprop std::vector<ElementType> rootGradientsData(outputShape.TotalSize(), 1); ValuePtr rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, rootGradientsData, true)); std::vector<ElementType> leftInputGradientsData(leftInputValueShape.TotalSize()); ValuePtr leftInputGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(leftInputValueShape, leftInputGradientsData, false)); std::vector<ElementType> rightInputGradientsData(rightInputValueShape.TotalSize()); ValuePtr rightInputGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(rightInputValueShape, rightInputGradientsData, false)); std::unordered_map<Variable, ValuePtr> gradients = { { leftInputVar, leftInputGradientValue }, { rightInputVar, rightInputGradientValue } }; plusFunc->Backward(backPropState, { { plusFunc->Output(), rootGradientValue } }, gradients); // Verify forward prop results auto& smallerInput = (numAxesLeftOperand < numAxesRightOperand) ? leftInputData : rightInputData; auto& largerInput = (numAxesLeftOperand < numAxesRightOperand) ? rightInputData : leftInputData; std::vector<ElementType> expectedOutputValues = largerInput; for (size_t i = 0; i < (expectedOutputValues.size() / smallerInput.size()); ++i) { for (size_t j = 0; j < smallerInput.size(); ++j) expectedOutputValues[(i * smallerInput.size()) + j] += smallerInput[j]; } FloatingPointVectorCompare(outputData, expectedOutputValues, "Forward prop results do not match expected results"); auto& smallerInputGradients = (numAxesLeftOperand < numAxesRightOperand) ? leftInputGradientsData : rightInputGradientsData; auto& largerInputGradients = (numAxesLeftOperand < numAxesRightOperand) ? rightInputGradientsData : leftInputGradientsData; std::vector<ElementType> expectedLargerInputGradientValues(largerInputGradients.size(), (ElementType)1); std::vector<ElementType> expectedSmallerInputGradientValues(smallerInputGradients.size(), (ElementType)(largerInputGradients.size() / smallerInputGradients.size())); FloatingPointVectorCompare(smallerInputGradients, expectedSmallerInputGradientValues, "TestTimesAndPlus: Backward prop results do not match expected results"); FloatingPointVectorCompare(largerInputGradients, expectedLargerInputGradientValues, "TestTimesAndPlus: Backward prop results do not match expected results"); }
void CheckValue(const ValuePtr testValue, const NDShape& sampleShape, const vector<vector<ElementType>>& expectedData, const vector<size_t>& seqLenList, const vector<bool>& seqStartFlags = {}) { size_t sampleSize = sampleShape.TotalSize(); // Check parameters BOOST_TEST(expectedData.size() == seqLenList.size(), "Parameter error: the sequence number in the exepected data and sequence list does not match."); for (size_t i = 0; i < expectedData.size(); i++) { if (expectedData[i].size() != seqLenList[i] * sampleSize) { ReportFailure("Parameter erroe: the number of data for sequence %" PRIu64 " in the expected data does not match. Expected: %" PRIu64 ", actual: %" PRIu64 ".", i, seqLenList[i] * sampleSize, expectedData[i].size()); } } // Check shape auto valueRank = testValue->Shape().Rank(); auto sampleRank = sampleShape.Rank(); auto shapeIsCorrect = !((valueRank < sampleRank + 1) || (valueRank > sampleRank + 2) || (sampleShape != testValue->Shape().SubShape(0, sampleRank))); BOOST_TEST(shapeIsCorrect, "The Value does not have the expected shape."); size_t numOfSequences; if (valueRank == sampleShape.Rank() + 1) { // no batch axis, only sequence axis numOfSequences = 1; } else { assert(valueRank == sampleShape.Rank() + 2); numOfSequences = testValue->Shape()[valueRank - 1]; } if (numOfSequences != expectedData.size()) { ReportFailure("The sequence number in the Value does not match. Expected: %" PRIu64 ", actual: %" PRIu64 ".", expectedData.size(), numOfSequences); } CheckMask(testValue, seqLenList, seqStartFlags); // Get data from Value vector<ElementType> outputData(testValue->Shape().TotalSize()); NDArrayViewPtr arrayOutput = MakeSharedObject<NDArrayView>(testValue->Shape(), outputData, false); arrayOutput->CopyFrom(*testValue->Data()); size_t maxSeqLen = *max_element(seqLenList.begin(), seqLenList.end()); size_t oIndex = 0; for (size_t seq = 0; seq < seqLenList.size(); seq++) { size_t seqLen = seqLenList[seq]; for (size_t sIndex = 0; sIndex < seqLen * sampleSize; sIndex++, oIndex++) { if (expectedData[seq][sIndex] != outputData[oIndex]) { ReportFailure("Data does match at position %" PRIu64 ", expected: %f, actual: %f\n", oIndex, expectedData[seq][sIndex], outputData[oIndex]); } } // Skip mask data oIndex += (maxSeqLen - seqLen) * sampleSize; } }