void RunEvaluationOneHidden(FunctionPtr evalFunc, const DeviceDescriptor& device) { const std::wstring inputNodeName = L"features"; const std::wstring outputNodeName = L"out.z_output"; Variable inputVar; if (!GetInputVariableByName(evalFunc, inputNodeName, inputVar)) { fprintf(stderr, "Input variable %S is not available.\n", inputNodeName.c_str()); throw("Input variable not found error."); } Variable outputVar; if (!GetOutputVaraiableByName(evalFunc, outputNodeName, outputVar)) { fprintf(stderr, "Output variable %S is not available.\n", outputNodeName.c_str()); throw("Output variable not found error."); } // Evaluate the network in several runs size_t iterationCount = 4; size_t numSamples = 3; for (size_t t = 0; t < iterationCount; ++t) { std::vector<float> inputData(inputVar.Shape().TotalSize() * numSamples); for (size_t i = 0; i < inputData.size(); ++i) { inputData[i] = static_cast<float>(i % 255); } NDShape inputShape = inputVar.Shape().AppendShape({1, numSamples}); ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData, true)); ValuePtr outputValue; std::unordered_map<Variable, ValuePtr> outputs = {{outputVar, outputValue}}; evalFunc->Forward({{inputVar, inputValue}}, outputs, device); outputValue = outputs[outputVar]; NDShape outputShape = outputVar.Shape().AppendShape({1, numSamples}); std::vector<float> outputData(outputShape.TotalSize()); NDArrayViewPtr cpuArrayOutput = MakeSharedObject<NDArrayView>(outputShape, outputData, false); cpuArrayOutput->CopyFrom(*outputValue->Data()); assert(outputData.size() == outputVar.Shape()[0] * numSamples); fprintf(stderr, "Evaluation result:\n"); size_t dataIndex = 0; auto outputDim = outputVar.Shape()[0]; for (size_t i = 0; i < numSamples; i++) { fprintf(stderr, "Iteration:%lu, Sample %lu:\n", t, i); fprintf(stderr, "Ouput:"); for (size_t j = 0; j < outputDim; j++) { fprintf(stderr, "%f ", outputData[dataIndex++]); } fprintf(stderr, "\n"); } } }
void TestOneHotSequences(const Variable& sampleVariable, std::vector<size_t>& expectedSeqLens, std::vector<std::vector<size_t>>& output, const DeviceDescriptor& device) { auto input = GenerateOneHotSequences(expectedSeqLens, sampleVariable.Shape().TotalSize()); auto val = Value::Create<ElementType>(sampleVariable.Shape().TotalSize(), input, device); val->CopyVariableValueTo(sampleVariable, output); CheckCopyToOutput(input, output); }
FunctionPtr Embedding(const Variable& input, size_t embeddingDim, const DeviceDescriptor& device) { assert(input.Shape().Rank() == 1); size_t inputDim = input.Shape()[0]; auto embeddingParameters = Parameter(CNTK::NDArrayView::RandomUniform<float>({ embeddingDim, inputDim }, -0.05, 0.05, 1, device)); return Times(embeddingParameters, input); }
inline FunctionPtr SetupFullyConnectedLinearLayer(Variable input, size_t outputDim, const DeviceDescriptor& device, const std::wstring& outputName = L"") { assert(input.Shape().Rank() == 1); size_t inputDim = input.Shape()[0]; auto timesParam = CNTK::Parameter(CNTK::NDArrayView::RandomUniform<float>({outputDim, inputDim}, -0.05, 0.05, 1, device)); auto timesFunction = CNTK::Times(timesParam, input); auto plusParam = CNTK::Parameter(CNTK::NDArrayView::RandomUniform<float>({outputDim}, -0.05, 0.05, 1, device)); return CNTK::Plus(plusParam, timesFunction, outputName); }
/// <summary> /// The example shows /// - how to load a pretrained model and evaluate several nodes by combining their outputs /// Note: The example uses the model trained by <CNTK>/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py /// Please see README.md in <CNTK>/Examples/Image/Classification/ResNet about how to train the model. /// The parameter 'modelFilePath' specifies the path to the model. /// </summary> void EvaluateCombinedOutputs(const wchar_t* modelFilePath, const DeviceDescriptor& device) { printf("\n===== Evaluate combined outputs =====\n"); // Load the model. FunctionPtr modelFunc = Function::Load(modelFilePath, device); // Get node of interest std::wstring intermediateLayerName = L"final_avg_pooling"; FunctionPtr interLayerPrimitiveFunc = modelFunc->FindByName(intermediateLayerName); Variable poolingOutput = interLayerPrimitiveFunc->Output(); // Create a function which combine outputs from the node "final_avg_polling" and the final layer of the model. FunctionPtr evalFunc = Combine( { modelFunc->Output(), poolingOutput }); Variable inputVar = evalFunc->Arguments()[0]; // Prepare input data. // For evaluating an image, you first need to perform some image preprocessing to make sure that the input image has the correct size and layout // that match the model inputs. // Please note that the model used by this example expects the CHW image layout. // inputVar.Shape[0] is image width, inputVar.Shape[1] is image height, and inputVar.Shape[2] is channels. // For simplicity and avoiding external dependencies, we skip the preprocessing step here, and just use some artificially created data as input. std::vector<float> inputData(inputVar.Shape().TotalSize()); for (size_t i = 0; i < inputData.size(); ++i) { inputData[i] = static_cast<float>(i % 255); } // Create input value and input data map ValuePtr inputVal = Value::CreateBatch(inputVar.Shape(), inputData, device); std::unordered_map<Variable, ValuePtr> inputDataMap = { { inputVar, inputVal } }; // Create output data map. Using null as Value to indicate using system allocated memory. // Alternatively, create a Value object and add it to the data map. Variable modelOutput = evalFunc->Outputs()[0]; Variable interLayerOutput = evalFunc->Outputs()[1]; std::unordered_map<Variable, ValuePtr> outputDataMap = { { modelOutput, nullptr }, { interLayerOutput, nullptr } }; // Start evaluation on the device evalFunc->Evaluate(inputDataMap, outputDataMap, device); // Get evaluate result as dense outputs for(auto & outputVariableValuePair : outputDataMap) { auto variable = outputVariableValuePair.first; auto value = outputVariableValuePair.second; std::vector<std::vector<float>> outputData; value->CopyVariableValueTo(variable, outputData); PrintOutput<float>(variable.Shape().TotalSize(), outputData); } }
FunctionPtr FullyConnectedDNNLayer(Variable input, size_t outputDim, const DeviceDescriptor& device, const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity) { assert(input.Shape().NumAxes() == 1); size_t inputDim = input.Shape()[0]; auto timesParam = Parameter(NDArrayView::RandomUniform<float>({ outputDim, inputDim }, -0.5, 0.5, 1, device)); auto timesFunction = Times(timesParam, input); auto plusParam = Parameter({ outputDim }, 0.0f, device); auto plusFunction = Plus(plusParam, timesFunction); return nonLinearity(plusFunction); }
/// <summary> /// The example shows /// - how to load model. /// - how to prepare input data for a batch of samples. /// - how to prepare input and output data map. /// - how to evaluate a model. /// - how to retrieve evaluation result and retrieve output data in dense format. /// Note: The example uses the model trained by <CNTK>/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py /// Please see README.md in <CNTK>/Examples/Image/Classification/ResNet about how to train the model. /// The parameter 'modelFile' specifies the path to the model. /// </summary> void EvaluationBatchUsingDense(const wchar_t* modelFile, const DeviceDescriptor& device) { printf("\n===== Evaluate batch of samples using dense format.\n"); // The number of samples in the batch. size_t sampleCount = 3; // Load the model. // The model is trained by <CNTK>/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py // Please see README.md in <CNTK>/Examples/Image/Classification/ResNet about how to train the model. FunctionPtr modelFunc = Function::Load(modelFile, device); // Get input variable. The model has only one single input. Variable inputVar = modelFunc->Arguments()[0]; // The model has only one output. // If the model has more than one output, use modelFunc->Outputs to get the list of output variables. Variable outputVar = modelFunc->Output(); // Prepare input data. // For evaluating an image, you first need to perform some image preprocessing to make sure that the input image has the correct size and layout // that match the model inputs. // Please note that the model used by this example expects the CHW image layout. // inputVar.Shape[0] is image width, inputVar.Shape[1] is image height, and inputVar.Shape[2] is channels. // For simplicity and avoiding external dependencies, we skip the preprocessing step here, and just use some artificially created data as input. std::vector<float> inputData(inputVar.Shape().TotalSize() * sampleCount); for (size_t i = 0; i < inputData.size(); ++i) { inputData[i] = static_cast<float>(i % 255); } // Create input value and input data map. ValuePtr inputVal = Value::CreateBatch(inputVar.Shape(), inputData, device); std::unordered_map<Variable, ValuePtr> inputDataMap = { { inputVar, inputVal } }; // Create output data map. Using null as Value to indicate using system allocated memory. // Alternatively, create a Value object and add it to the data map. std::unordered_map<Variable, ValuePtr> outputDataMap = { { outputVar, nullptr } }; // Start evaluation on the device modelFunc->Evaluate(inputDataMap, outputDataMap, device); // Get evaluate result as dense output ValuePtr outputVal = outputDataMap[outputVar]; std::vector<std::vector<float>> outputData; outputVal->CopyVariableValueTo(outputVar, outputData); PrintOutput<float>(outputVar.Shape().TotalSize(), outputData); }
/// <summary> /// The example shows /// - how to load a pretrained model and evaluate an intermediate layer of its network. /// Note: The example uses the model trained by <CNTK>/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py /// Please see README.md in <CNTK>/Examples/Image/Classification/ResNet about how to train the model. /// The parameter 'modelFilePath' specifies the path to the model. /// </summary> void EvaluateIntermediateLayer(const wchar_t* modelFilePath, const DeviceDescriptor& device) { printf("\n===== Evaluate intermediate layer =====\n"); // Load the model. FunctionPtr rootFunc = Function::Load(modelFilePath, device); std::wstring intermediateLayerName = L"final_avg_pooling"; FunctionPtr interLayerPrimitiveFunc = rootFunc->FindByName(intermediateLayerName); // The Function returned by FindByName is a primitive function. // For evaluation, it is required to create a composite function from the primitive function. FunctionPtr modelFunc = AsComposite(interLayerPrimitiveFunc); Variable outputVar = modelFunc->Output(); Variable inputVar = modelFunc->Arguments()[0]; // Prepare input data. // For evaluating an image, you first need to perform some image preprocessing to make sure that the input image has the correct size and layout // that match the model inputs. // Please note that the model used by this example expects the CHW image layout. // inputVar.Shape[0] is image width, inputVar.Shape[1] is image height, and inputVar.Shape[2] is channels. // For simplicity and avoiding external dependencies, we skip the preprocessing step here, and just use some artificially created data as input. std::vector<float> inputData(inputVar.Shape().TotalSize()); for (size_t i = 0; i < inputData.size(); ++i) { inputData[i] = static_cast<float>(i % 255); } // Create input value and input data map ValuePtr inputVal = Value::CreateBatch(inputVar.Shape(), inputData, device); std::unordered_map<Variable, ValuePtr> inputDataMap = { { inputVar, inputVal } }; // Create output data map. Using null as Value to indicate using system allocated memory. // Alternatively, create a Value object and add it to the data map. std::unordered_map<Variable, ValuePtr> outputDataMap = { { outputVar, nullptr } }; // Start evaluation on the device modelFunc->Evaluate(inputDataMap, outputDataMap, device); // Get evaluate result as dense output ValuePtr outputVal = outputDataMap[outputVar]; std::vector<std::vector<float>> outputData; outputVal->CopyVariableValueTo(outputVar, outputData); PrintOutput<float>(outputVar.Shape().TotalSize(), outputData); }
size_t Evaluator::GetSampleCount(const Variable& var, const ValuePtr& value) { auto valueDataShape = value->Shape(); size_t numMaskedSamples = value->MaskedCount(); size_t numSamplesInDataArrayView = valueDataShape.SubShape(var.Shape().Rank()).TotalSize(); if (numMaskedSamples > numSamplesInDataArrayView) LogicError("Number (%d) of masked values cannot exceed the number (%d) of samples that the Value object's Data NDArrayView can hold.", (int)numMaskedSamples, (int)numSamplesInDataArrayView); return (numSamplesInDataArrayView - numMaskedSamples); }
void RunEvaluationOnSingleSample(FunctionPtr evalInstance, const DeviceDescriptor& device) { // Get input variable. The model has only one single input. Variable inputVar = evalInstance->Arguments()[0]; // The model has only one output. // If the model has more than one output, use modelFunc->Outputs to get the list of output variables. Variable outputVar = evalInstance->Output(); // Prepare input data. // For evaluating an image, you first need to perform some image preprocessing to make sure that the input image has the correct size and layout // that match the model inputs. // Please note that the model used by this example expects the CHW image layout. // inputVar.Shape[0] is image width, inputVar.Shape[1] is image height, and inputVar.Shape[2] is channels. // For simplicity and avoiding external dependencies, we skip the preprocessing step here, and just use some artificially created data as input. std::vector<float> inputData(inputVar.Shape().TotalSize()); for (size_t i = 0; i < inputData.size(); ++i) { inputData[i] = static_cast<float>(i % 255); } // Create input value and input data map ValuePtr inputVal = Value::CreateBatch(inputVar.Shape(), inputData, device); std::unordered_map<Variable, ValuePtr> inputDataMap = { { inputVar, inputVal } }; // Create output data map. Using null as Value to indicate using system allocated memory. // Alternatively, create a Value object and add it to the data map. std::unordered_map<Variable, ValuePtr> outputDataMap = { { outputVar, nullptr } }; // Start evaluation on the device evalInstance->Evaluate(inputDataMap, outputDataMap, device); // Get evaluate result as dense output ValuePtr outputVal = outputDataMap[outputVar]; std::vector<std::vector<float>> outputData; outputVal->CopyVariableValueTo(outputVar, outputData); }
inline FunctionPtr FullyConnectedDNNLayerWithSharedParameters(Variable input, const Parameter& timesParam, const Parameter& plusParam, const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity) { assert(input.Shape().Rank() == 1); // Todo: assume that timesParam has matched outputDim and inputDim auto timesFunction = Times(timesParam, input); // Todo: assume that timesParam has matched outputDim auto plusFunction = Plus(plusParam, timesFunction); return nonLinearity(plusFunction); }
void RunEvaluationClassifier(FunctionPtr evalFunc, const DeviceDescriptor& device) { const std::wstring inputNodeName = L"features"; Variable inputVar; if (!GetInputVariableByName(evalFunc, inputNodeName, inputVar)) { fprintf(stderr, "Input variable %S is not available.\n", inputNodeName.c_str()); throw("Input variable not found error."); } // Evaluate the network in several runs size_t iterationCount = 4; unsigned int randSeed = 2; srand(randSeed); size_t numSamples = 3; std::vector<float> inputData(inputVar.Shape().TotalSize() * numSamples); for (size_t t = 0; t < iterationCount; ++t) { for (size_t i = 0; i < inputData.size(); ++i) { inputData[i] = ((float)rand()) / RAND_MAX; } // Create input data shape. Adding sequence length and numSamples as axes. // Todo: remove sequence length when only numSamples is supported. // Todo: add convenience APIs to simplify data preparation here. NDShape inputShape = inputVar.Shape().AppendShape({1, numSamples}); ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData, true)); // Define output. ValuePtr outputValue; auto outputVar = evalFunc->Output(); std::unordered_map<Variable, ValuePtr> outputs = {{outputVar, outputValue}}; // Evaluate the model evalFunc->Forward({{inputVar, inputValue}}, outputs, device); // Get output value outputValue = outputs[outputVar]; // Todo: remove sequence length when only numSamples is supported. // Todo: add convenience APIs to simplify retrieval of output results. NDShape outputShape = outputVar.Shape().AppendShape({1, numSamples}); std::vector<float> outputData(outputShape.TotalSize()); NDArrayViewPtr cpuArrayOutput = MakeSharedObject<NDArrayView>(outputShape, outputData, false); cpuArrayOutput->CopyFrom(*outputValue->Data()); assert(outputData.size() == outputVar.Shape()[0] * numSamples); fprintf(stderr, "Evaluation result:\n"); size_t dataIndex = 0; auto outputDim = outputVar.Shape()[0]; for (size_t i = 0; i < numSamples; i++) { fprintf(stderr, "Iteration:%lu, Sample %lu:\n", t, i); fprintf(stderr, " "); dataIndex = i * outputDim; for (size_t j = 0; j < std::min((size_t)10, outputDim); j++) { fprintf(stderr, "%f ", outputData[dataIndex++]); } if (outputDim > 10) { fprintf(stderr, "..."); } fprintf(stderr, "\n"); } } }
void TestFeedForwardNetworkCreation(const DeviceDescriptor& device, bool testSaveAndReLoad) { using namespace std::placeholders; const size_t inputDim = 937; const size_t numOutputClasses = 9304; const size_t numHiddenLayers = 6; const size_t hiddenLayersDim = 2048; Variable inputVar({ inputDim }, DataType::Float, L"features"); auto classifierOutputFunction = FullyConnectedFeedForwardClassifierNet(inputVar, numOutputClasses, hiddenLayersDim, numHiddenLayers, device, std::bind(Sigmoid, _1, L""), L"classifierOutput"); Variable classifierOutput = classifierOutputFunction; Variable labelsVar({ numOutputClasses }, DataType::Float, L"Labels"); auto trainingLossFunction = CNTK::CrossEntropyWithSoftmax(classifierOutput, labelsVar, L"LossFunction"); Variable trainingLoss = trainingLossFunction; auto predictionFunction = CNTK::ClassificationError(classifierOutput, labelsVar, L"ClassificationError"); Variable prediction = predictionFunction; auto ffNet = CNTK::Combine({ trainingLoss.Owner(), prediction.Owner(), classifierOutput.Owner() }, L"ClassifierModel"); // Now test the structure if (ffNet->Parameters().size() != ((numHiddenLayers * 2) + 1)) throw std::runtime_error("TestFeedForwardNetworkCreation: Function does not have expected Parameter count"); if (ffNet->Arguments().size() != 2) throw std::runtime_error("TestFeedForwardNetworkCreation: Function does not have expected Argument count"); if (ffNet->Outputs().size() != 3) throw std::runtime_error("TestFeedForwardNetworkCreation: Function does not have expected Output count"); if (testSaveAndReLoad) SaveAndReloadModel<float>(ffNet, { &inputVar, &labelsVar, &trainingLoss, &prediction, &classifierOutput }, device); // Run Forward and backward a few times size_t iterationCount = 4; unsigned int randSeed = 2; srand(randSeed); size_t numSamples = 3; for (size_t i = 0; i < iterationCount; ++i) { std::vector<float> inputData(inputDim * numSamples); for (size_t i = 0; i < inputData.size(); ++i) inputData[i] = ((float)rand()) / RAND_MAX; NDShape inputShape = inputVar.Shape().AppendShape({ 1, numSamples }); ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true)); std::vector<float> labelData(numOutputClasses * numSamples, 0); for (size_t i = 0; i < numSamples; ++i) labelData[(i*numOutputClasses) + (rand() % numOutputClasses)] = 1; NDShape labelShape = labelsVar.Shape().AppendShape({ 1, numSamples }); ValuePtr labelValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(labelShape, labelData.data(), labelData.size(), DeviceDescriptor::CPUDevice(), true)); ValuePtr outputValue, predictionErrorValue; std::unordered_map<Variable, ValuePtr> outputs = { { classifierOutput, outputValue }, { prediction, predictionErrorValue } }; auto backpropState = ffNet->Forward({ { inputVar, inputValue }, { labelsVar, labelValue } }, outputs, device, { trainingLoss }); // Perform backprop NDShape outputShape = trainingLoss.Shape(); std::vector<float> rootGradientsData(outputShape.TotalSize(), 1); ValuePtr rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, rootGradientsData.data(), rootGradientsData.size(), DeviceDescriptor::CPUDevice(), true)); std::unordered_map<Variable, ValuePtr> paramGradients; auto allParams = ffNet->Parameters(); for (auto iter = allParams.begin(); iter != allParams.end(); ++iter) paramGradients[*iter] = nullptr; ffNet->Backward(backpropState, { { trainingLoss, rootGradientValue } }, paramGradients); } }
/// <summary> /// The example shows /// - how to prepare input data as sequence using sparse input. /// The example uses the model trained by <CNTK>/Examples/LanguageUnderstanding/ATIS/Python/LanguageUnderstanding.py /// Please see README.md in <CNTK>/Examples/LanguageUnderstanding/ATIS about how to train the model. /// The parameter 'modelFile' specifies the path to the model. /// The vocabularyFile specifies the vacabulary file used by the ATIS model, e.g. <CNTK>/Examples/LanguageUnderstanding/ATIS/BrainScript/query.wl /// The labelFile specifies the label file used by the ATIS model, e.g. <CNTK>/Examples/LanguageUnderstanding/ATIS/BrainScript/slots.wl /// </summary> void EvaluationSingleSequenceUsingSparse(const wchar_t* modelFile, const wchar_t* vocabularyFile, const wchar_t* labelFile, const DeviceDescriptor& device) { printf("\n===== Evaluate single sequence using sparse input.\n"); // Load the model. // The model is trained by <CNTK>/Examples/LanguageUnderstanding/ATIS/Python/LanguageUnderstanding.py // Please see README.md in <CNTK>/Examples/LanguageUnderstanding/ATIS about how to train the model. FunctionPtr modelFunc = Function::Load(modelFile, device); // Read word and slot index files. std::unordered_map<std::string, size_t> vocabToIndex = BuildVocabIndex(vocabularyFile); std::vector<std::string> indexToSlots = BuildSlotIndex(labelFile); // Get input variable. The model has only one single input. Variable inputVar = modelFunc->Arguments()[0]; size_t vocabSize = inputVar.Shape().TotalSize(); const char *inputSentence = "BOS i would like to find a flight from charlotte to las vegas that makes a stop in st. louis EOS"; std::vector<size_t> seqData; std::vector<std::string> inputWords; std::stringstream inputStream; std::string word; // build one-hot index for the input sequence. inputStream.str(inputSentence); while (inputStream >> word) { inputWords.push_back(word); } size_t seqLen = inputWords.size(); // For this example, only 1 non-zero value for each sample. size_t numNonZeroValues = seqLen * 1; std::vector<SparseIndexType> colStarts; std::vector<SparseIndexType> rowIndices; std::vector<float> nonZeroValues; size_t count = 0; for (; count < seqLen; count++) { // Get the index of the word auto nonZeroValueIndex = static_cast<SparseIndexType>(vocabToIndex[inputWords[count]]); // Add the sample to the sequence nonZeroValues.push_back(1.0); rowIndices.push_back(nonZeroValueIndex); colStarts.push_back(static_cast<SparseIndexType>(count)); } colStarts.push_back(static_cast<SparseIndexType>(numNonZeroValues)); // Create input value using one-hot vector and input data map ValuePtr inputVal = Value::CreateSequence<float>(vocabSize, seqLen, colStarts.data(), rowIndices.data(), nonZeroValues.data(), numNonZeroValues, device); std::unordered_map<Variable, ValuePtr> inputDataMap = { { inputVar, inputVal } }; // The model has only one output. // If the model has more than one output, use modelFunc->Outputs to get the list of output variables. Variable outputVar = modelFunc->Output(); // Create output data map. Using null as Value to indicate using system allocated memory. // Alternatively, create a Value object and add it to the data map. std::unordered_map<Variable, ValuePtr> outputDataMap = { { outputVar, nullptr } }; // Start evaluation on the device modelFunc->Evaluate(inputDataMap, outputDataMap, device); // Get evaluate result as dense output ValuePtr outputVal = outputDataMap[outputVar]; std::vector<std::vector<float>> outputData; outputVal->CopyVariableValueTo(outputVar, outputData); // output the result size_t outputSampleSize = outputVar.Shape().TotalSize(); if (outputData.size() != 1) { throw("Only one sequence of slots is expected as output."); } std::vector<float> slotSeq = outputData[0]; if (slotSeq.size() % outputSampleSize != 0) { throw("The number of elements in the slot sequence is not a multiple of sample size"); } size_t numOfSlotsInOutput = slotSeq.size() / outputSampleSize; if (inputWords.size() != numOfSlotsInOutput) { throw("The number of input words and the number of output slots do not match"); } for (size_t i = 0; i < numOfSlotsInOutput; i++) { float max = slotSeq[i * outputSampleSize]; size_t maxIndex = 0; for (size_t j = 1; j < outputSampleSize; j++) { if (slotSeq[i * outputSampleSize + j] > max) { max = slotSeq[i * outputSampleSize + j]; maxIndex = j; } } printf(" %10s ---- %s\n", inputWords[i].c_str(), indexToSlots[maxIndex].c_str()); } printf("\n"); }
/// <summary> /// The example shows /// - how to load model. /// - how to prepare input data as batch of sequences with variable length. /// how to prepare data using one-hot vector format. /// - how to prepare input and output data map. /// - how to evaluate a model. /// The example uses the model trained by <CNTK>/Examples/LanguageUnderstanding/ATIS/Python/LanguageUnderstanding.py /// Please see README.md in <CNTK>/Examples/LanguageUnderstanding/ATIS about how to train the model. /// The parameter 'modelFile' specifies the path to the model. /// The vocabularyFile specifies the vacabulary file used by the ATIS model, e.g. <CNTK>/Examples/LanguageUnderstanding/ATIS/BrainScript/query.wl /// The labelFile specifies the label file used by the ATIS model, e.g. <CNTK>/Examples/LanguageUnderstanding/ATIS/BrainScript/slots.wl /// </summary> void EvaluationBatchOfSequencesUsingOneHot(const wchar_t* modelFile, const wchar_t* vocabularyFile, const wchar_t* labelFile, const DeviceDescriptor& device) { printf("\n===== Evaluate batch of sequences with variable length using one-hot vector.\n"); // Load the model. // The model is trained by <CNTK>/Examples/LanguageUnderstanding/ATIS/Python/LanguageUnderstanding.py // Please see README.md in <CNTK>/Examples/LanguageUnderstanding/ATIS about how to train the model. FunctionPtr modelFunc = Function::Load(modelFile, device); // Read word and slot index files. std::unordered_map<std::string, size_t> vocabToIndex = BuildVocabIndex(vocabularyFile); std::vector<std::string> indexToSlots = BuildSlotIndex(labelFile); // Get input variable. The model has only one single input. Variable inputVar = modelFunc->Arguments()[0]; size_t vocabSize = inputVar.Shape().TotalSize(); std::vector<const char *> inputSentences = { "BOS i would like to find a flight from charlotte to las vegas that makes a stop in st. louis EOS", "BOS flights from new york to seattle EOS" }; // Prepare input data. std::vector<std::vector<std::string>> inputWordsList(inputSentences.size()); // Each sample is represented by an index to the one-hot vector, so the index of the non-zero value of each sample is saved in the inner list. // The outer list represents sequences contained in the batch. std::vector<std::vector<size_t>> inputBatch; // SeqStartFlagBatch is used to indicate whether this sequence is a new sequence (true) or concatenating the previous sequence (false). std::vector<bool> seqStartFlagBatch; std::string word; size_t index; for (size_t seqIndex = 0; seqIndex < inputSentences.size(); seqIndex++) { std::stringstream inputStream; std::vector<size_t> seqData; // build one-hot index for the input sequences. inputStream.str(inputSentences[seqIndex]); while (inputStream >> word) { inputWordsList[seqIndex].push_back(word); index = vocabToIndex.at(word); seqData.push_back(index); } inputBatch.push_back(seqData); seqStartFlagBatch.push_back(true); } // Create input value representing the batch data and input data map ValuePtr inputVal = Value::CreateBatchOfSequences<float>(vocabSize, inputBatch, seqStartFlagBatch, device); std::unordered_map<Variable, ValuePtr> inputDataMap = { { inputVar, inputVal } }; // The model has only one output. // If the model has more than one output, use modelFunc->Outputs to get the list of output variables. Variable outputVar = modelFunc->Output(); // Create output data map. Using null as Value to indicate using system allocated memory. // Alternatively, create a Value object and add it to the data map. std::unordered_map<Variable, ValuePtr> outputDataMap = { { outputVar, nullptr } }; // Start evaluation on the device modelFunc->Evaluate(inputDataMap, outputDataMap, device); // Get evaluate result as dense output ValuePtr outputVal = outputDataMap[outputVar]; std::vector<std::vector<float>> outputData; outputVal->CopyVariableValueTo(outputVar, outputData); // output the result size_t outputSampleSize = outputVar.Shape().TotalSize(); if (outputData.size() != inputBatch.size()) { throw("The number of sequence in output does not match that in input."); } printf("The number of sequences in the batch: %d\n", (int)outputData.size()); for (size_t seqno = 0; seqno < outputData.size(); seqno++) { std::vector<float> slotSeq = outputData[seqno]; printf("Sequence %d:\n", (int)seqno); if (slotSeq.size() % outputSampleSize != 0) { throw("The number of elements in the slot sequence is not a multiple of sample size"); } size_t numOfSlotsInOutput = slotSeq.size() / outputSampleSize; if (inputWordsList[seqno].size() != numOfSlotsInOutput) { throw("The number of input words and the number of output slots do not match"); } for (size_t i = 0; i < numOfSlotsInOutput; i++) { float max = slotSeq[i * outputSampleSize]; size_t maxIndex = 0; for (size_t j = 1; j < outputSampleSize; j++) { if (slotSeq[i * outputSampleSize + j] > max) { max = slotSeq[i * outputSampleSize + j]; maxIndex = j; } } printf(" %10s ---- %s\n", inputWordsList[seqno][i].c_str(), indexToSlots[maxIndex].c_str()); } printf("\n"); } }