Esempio n. 1
0
void RunEvaluationOneHidden(FunctionPtr evalFunc, const DeviceDescriptor& device)
{
    const std::wstring inputNodeName = L"features";
    const std::wstring outputNodeName = L"out.z_output";

    Variable inputVar;
    if (!GetInputVariableByName(evalFunc, inputNodeName, inputVar))
    {
        fprintf(stderr, "Input variable %S is not available.\n", inputNodeName.c_str());
        throw("Input variable not found error.");
    }

    Variable outputVar;
    if (!GetOutputVaraiableByName(evalFunc, outputNodeName, outputVar))
    {
        fprintf(stderr, "Output variable %S is not available.\n", outputNodeName.c_str());
        throw("Output variable not found error.");
    }

    // Evaluate the network in several runs 
    size_t iterationCount = 4;   
    size_t numSamples = 3;
    for (size_t t = 0; t < iterationCount; ++t)
    {
        std::vector<float> inputData(inputVar.Shape().TotalSize() * numSamples);
        for (size_t i = 0; i < inputData.size(); ++i)
        {
            inputData[i] = static_cast<float>(i % 255);
        }

        NDShape inputShape = inputVar.Shape().AppendShape({1, numSamples});
        ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData, true));

        ValuePtr outputValue;
        std::unordered_map<Variable, ValuePtr> outputs = {{outputVar, outputValue}};
        evalFunc->Forward({{inputVar, inputValue}}, outputs, device);

        outputValue = outputs[outputVar];        
        NDShape outputShape = outputVar.Shape().AppendShape({1, numSamples});
        std::vector<float> outputData(outputShape.TotalSize());
        NDArrayViewPtr cpuArrayOutput = MakeSharedObject<NDArrayView>(outputShape, outputData, false);
        cpuArrayOutput->CopyFrom(*outputValue->Data());

        assert(outputData.size() == outputVar.Shape()[0] * numSamples);
        fprintf(stderr, "Evaluation result:\n");
        size_t dataIndex = 0;
        auto outputDim = outputVar.Shape()[0];
        for (size_t i = 0; i < numSamples; i++)
        {
            fprintf(stderr, "Iteration:%lu, Sample %lu:\n", t, i);
            fprintf(stderr, "Ouput:");
            for (size_t j = 0; j < outputDim; j++)
            {
                fprintf(stderr, "%f ", outputData[dataIndex++]);
            }
            fprintf(stderr, "\n");
        }
    }
}
Esempio n. 2
0
void TestOneHotSequences(const Variable& sampleVariable, std::vector<size_t>& expectedSeqLens, std::vector<std::vector<size_t>>& output, const DeviceDescriptor& device)
{
    auto input = GenerateOneHotSequences(expectedSeqLens, sampleVariable.Shape().TotalSize());
    auto val = Value::Create<ElementType>(sampleVariable.Shape().TotalSize(), input, device);

    val->CopyVariableValueTo(sampleVariable, output);
    CheckCopyToOutput(input, output);
}
Esempio n. 3
0
FunctionPtr Embedding(const Variable& input, size_t embeddingDim, const DeviceDescriptor& device)
{
    assert(input.Shape().Rank() == 1);
    size_t inputDim = input.Shape()[0];

    auto embeddingParameters = Parameter(CNTK::NDArrayView::RandomUniform<float>({ embeddingDim, inputDim }, -0.05, 0.05, 1, device));
    return Times(embeddingParameters, input);
}
Esempio n. 4
0
inline FunctionPtr SetupFullyConnectedLinearLayer(Variable input, size_t outputDim, const DeviceDescriptor& device, const std::wstring& outputName = L"")
{
    assert(input.Shape().Rank() == 1);
    size_t inputDim = input.Shape()[0];

    auto timesParam = CNTK::Parameter(CNTK::NDArrayView::RandomUniform<float>({outputDim, inputDim}, -0.05, 0.05, 1, device));
    auto timesFunction = CNTK::Times(timesParam, input);

    auto plusParam = CNTK::Parameter(CNTK::NDArrayView::RandomUniform<float>({outputDim}, -0.05, 0.05, 1, device));
    return CNTK::Plus(plusParam, timesFunction, outputName);
}
/// <summary>
/// The example shows
/// - how to load a pretrained model and evaluate several nodes by combining their outputs
/// Note: The example uses the model trained by <CNTK>/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py
/// Please see README.md in <CNTK>/Examples/Image/Classification/ResNet about how to train the model.
/// The parameter 'modelFilePath' specifies the path to the model.
/// </summary>
void EvaluateCombinedOutputs(const wchar_t* modelFilePath, const DeviceDescriptor& device)
{
    printf("\n===== Evaluate combined outputs =====\n");

    // Load the model.
    FunctionPtr modelFunc = Function::Load(modelFilePath, device);

    // Get node of interest
    std::wstring intermediateLayerName = L"final_avg_pooling";
    FunctionPtr interLayerPrimitiveFunc = modelFunc->FindByName(intermediateLayerName);

    Variable poolingOutput = interLayerPrimitiveFunc->Output();

    // Create a function which combine outputs from the node "final_avg_polling" and the final layer of the model.
    FunctionPtr evalFunc = Combine( { modelFunc->Output(), poolingOutput });
    Variable inputVar = evalFunc->Arguments()[0];

    // Prepare input data.
    // For evaluating an image, you first need to perform some image preprocessing to make sure that the input image has the correct size and layout
    // that match the model inputs.
    // Please note that the model used by this example expects the CHW image layout.
    // inputVar.Shape[0] is image width, inputVar.Shape[1] is image height, and inputVar.Shape[2] is channels.
    // For simplicity and avoiding external dependencies, we skip the preprocessing step here, and just use some artificially created data as input.
    std::vector<float> inputData(inputVar.Shape().TotalSize());
    for (size_t i = 0; i < inputData.size(); ++i)
    {
        inputData[i] = static_cast<float>(i % 255);
    }

    // Create input value and input data map
    ValuePtr inputVal = Value::CreateBatch(inputVar.Shape(), inputData, device);
    std::unordered_map<Variable, ValuePtr> inputDataMap = { { inputVar, inputVal } };

    // Create output data map. Using null as Value to indicate using system allocated memory.
    // Alternatively, create a Value object and add it to the data map.
    Variable modelOutput = evalFunc->Outputs()[0];
    Variable interLayerOutput = evalFunc->Outputs()[1];

    std::unordered_map<Variable, ValuePtr> outputDataMap = { { modelOutput, nullptr }, { interLayerOutput, nullptr } };

    // Start evaluation on the device
    evalFunc->Evaluate(inputDataMap, outputDataMap, device);

    // Get evaluate result as dense outputs
    for(auto & outputVariableValuePair : outputDataMap)
    {
        auto variable = outputVariableValuePair.first;
        auto value = outputVariableValuePair.second;
        std::vector<std::vector<float>> outputData;
        value->CopyVariableValueTo(variable, outputData);
        PrintOutput<float>(variable.Shape().TotalSize(), outputData);
    }
}
Esempio n. 6
0
FunctionPtr FullyConnectedDNNLayer(Variable input, size_t outputDim, const DeviceDescriptor& device, const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity)
{
    assert(input.Shape().NumAxes() == 1);
    size_t inputDim = input.Shape()[0];

    auto timesParam = Parameter(NDArrayView::RandomUniform<float>({ outputDim, inputDim }, -0.5, 0.5, 1, device));
    auto timesFunction = Times(timesParam, input);

    auto plusParam = Parameter({ outputDim }, 0.0f, device);
    auto plusFunction = Plus(plusParam, timesFunction);

    return nonLinearity(plusFunction);
}
/// <summary>
/// The example shows
/// - how to load model.
/// - how to prepare input data for a batch of samples.
/// - how to prepare input and output data map.
/// - how to evaluate a model.
/// - how to retrieve evaluation result and retrieve output data in dense format.
/// Note: The example uses the model trained by <CNTK>/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py
/// Please see README.md in <CNTK>/Examples/Image/Classification/ResNet about how to train the model.
/// The parameter 'modelFile' specifies the path to the model.
/// </summary>
void EvaluationBatchUsingDense(const wchar_t* modelFile, const DeviceDescriptor& device)
{
    printf("\n===== Evaluate batch of samples using dense format.\n");

    // The number of samples in the batch.
    size_t sampleCount = 3;

    // Load the model.
    // The model is trained by <CNTK>/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py
    // Please see README.md in <CNTK>/Examples/Image/Classification/ResNet about how to train the model.
    FunctionPtr modelFunc = Function::Load(modelFile, device);

    // Get input variable. The model has only one single input.
    Variable inputVar = modelFunc->Arguments()[0];

    // The model has only one output.
    // If the model has more than one output, use modelFunc->Outputs to get the list of output variables.
    Variable outputVar = modelFunc->Output();

    // Prepare input data.
    // For evaluating an image, you first need to perform some image preprocessing to make sure that the input image has the correct size and layout
    // that match the model inputs.
    // Please note that the model used by this example expects the CHW image layout.
    // inputVar.Shape[0] is image width, inputVar.Shape[1] is image height, and inputVar.Shape[2] is channels.
    // For simplicity and avoiding external dependencies, we skip the preprocessing step here, and just use some artificially created data as input.
    std::vector<float> inputData(inputVar.Shape().TotalSize() * sampleCount);
    for (size_t i = 0; i < inputData.size(); ++i)
    {
        inputData[i] = static_cast<float>(i % 255);
    }

    // Create input value and input data map.
    ValuePtr inputVal = Value::CreateBatch(inputVar.Shape(), inputData, device);
    std::unordered_map<Variable, ValuePtr> inputDataMap = { { inputVar, inputVal } };

    // Create output data map. Using null as Value to indicate using system allocated memory.
    // Alternatively, create a Value object and add it to the data map.
    std::unordered_map<Variable, ValuePtr> outputDataMap = { { outputVar, nullptr } };

    // Start evaluation on the device
    modelFunc->Evaluate(inputDataMap, outputDataMap, device);

    // Get evaluate result as dense output
    ValuePtr outputVal = outputDataMap[outputVar];
    std::vector<std::vector<float>> outputData;
    outputVal->CopyVariableValueTo(outputVar, outputData);

    PrintOutput<float>(outputVar.Shape().TotalSize(), outputData);
}
/// <summary>
/// The example shows
/// - how to load a pretrained model and evaluate an intermediate layer of its network.
/// Note: The example uses the model trained by <CNTK>/Examples/Image/Classification/ResNet/Python/TrainResNet_CIFAR10.py
/// Please see README.md in <CNTK>/Examples/Image/Classification/ResNet about how to train the model.
/// The parameter 'modelFilePath' specifies the path to the model.
/// </summary>
void EvaluateIntermediateLayer(const wchar_t* modelFilePath, const DeviceDescriptor& device)
{
    printf("\n===== Evaluate intermediate layer =====\n");

    // Load the model.
    FunctionPtr rootFunc = Function::Load(modelFilePath, device);

    std::wstring intermediateLayerName = L"final_avg_pooling";
    FunctionPtr interLayerPrimitiveFunc = rootFunc->FindByName(intermediateLayerName);

    // The Function returned by FindByName is a primitive function.
    // For evaluation, it is required to create a composite function from the primitive function.
    FunctionPtr modelFunc = AsComposite(interLayerPrimitiveFunc);

    Variable outputVar = modelFunc->Output();
    Variable inputVar = modelFunc->Arguments()[0];

    // Prepare input data.
    // For evaluating an image, you first need to perform some image preprocessing to make sure that the input image has the correct size and layout
    // that match the model inputs.
    // Please note that the model used by this example expects the CHW image layout.
    // inputVar.Shape[0] is image width, inputVar.Shape[1] is image height, and inputVar.Shape[2] is channels.
    // For simplicity and avoiding external dependencies, we skip the preprocessing step here, and just use some artificially created data as input.
    std::vector<float> inputData(inputVar.Shape().TotalSize());
    for (size_t i = 0; i < inputData.size(); ++i)
    {
        inputData[i] = static_cast<float>(i % 255);
    }

    // Create input value and input data map
    ValuePtr inputVal = Value::CreateBatch(inputVar.Shape(), inputData, device);
    std::unordered_map<Variable, ValuePtr> inputDataMap = { { inputVar, inputVal } };

    // Create output data map. Using null as Value to indicate using system allocated memory.
    // Alternatively, create a Value object and add it to the data map.
    std::unordered_map<Variable, ValuePtr> outputDataMap = { { outputVar, nullptr } };

    // Start evaluation on the device
    modelFunc->Evaluate(inputDataMap, outputDataMap, device);

    // Get evaluate result as dense output
    ValuePtr outputVal = outputDataMap[outputVar];
    std::vector<std::vector<float>> outputData;
    outputVal->CopyVariableValueTo(outputVar, outputData);

    PrintOutput<float>(outputVar.Shape().TotalSize(), outputData);
}
Esempio n. 9
0
    size_t Evaluator::GetSampleCount(const Variable& var, const ValuePtr& value)
    {
        auto valueDataShape = value->Shape();
        size_t numMaskedSamples = value->MaskedCount();
        size_t numSamplesInDataArrayView = valueDataShape.SubShape(var.Shape().Rank()).TotalSize();
        if (numMaskedSamples > numSamplesInDataArrayView)
            LogicError("Number (%d) of masked values cannot exceed the number (%d) of samples that the Value object's Data NDArrayView can hold.",
            (int)numMaskedSamples, (int)numSamplesInDataArrayView);

        return (numSamplesInDataArrayView - numMaskedSamples);
    }
void RunEvaluationOnSingleSample(FunctionPtr evalInstance, const DeviceDescriptor& device)
{
    // Get input variable. The model has only one single input.
    Variable inputVar = evalInstance->Arguments()[0];

    // The model has only one output.
    // If the model has more than one output, use modelFunc->Outputs to get the list of output variables.
    Variable outputVar = evalInstance->Output();

    // Prepare input data.
    // For evaluating an image, you first need to perform some image preprocessing to make sure that the input image has the correct size and layout
    // that match the model inputs.
    // Please note that the model used by this example expects the CHW image layout.
    // inputVar.Shape[0] is image width, inputVar.Shape[1] is image height, and inputVar.Shape[2] is channels.
    // For simplicity and avoiding external dependencies, we skip the preprocessing step here, and just use some artificially created data as input.
    std::vector<float> inputData(inputVar.Shape().TotalSize());
    for (size_t i = 0; i < inputData.size(); ++i)
    {
        inputData[i] = static_cast<float>(i % 255);
    }

    // Create input value and input data map
    ValuePtr inputVal = Value::CreateBatch(inputVar.Shape(), inputData, device);
    std::unordered_map<Variable, ValuePtr> inputDataMap = { { inputVar, inputVal } };

    // Create output data map. Using null as Value to indicate using system allocated memory.
    // Alternatively, create a Value object and add it to the data map.
    std::unordered_map<Variable, ValuePtr> outputDataMap = { { outputVar, nullptr } };

    // Start evaluation on the device
    evalInstance->Evaluate(inputDataMap, outputDataMap, device);

    // Get evaluate result as dense output
    ValuePtr outputVal = outputDataMap[outputVar];
    std::vector<std::vector<float>> outputData;
    outputVal->CopyVariableValueTo(outputVar, outputData);
}
Esempio n. 11
0
inline FunctionPtr FullyConnectedDNNLayerWithSharedParameters(Variable input,
                                                              const Parameter& timesParam,
                                                              const Parameter& plusParam,
                                                              const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity)
{
    assert(input.Shape().Rank() == 1);

    // Todo: assume that timesParam has matched outputDim and inputDim 
    auto timesFunction = Times(timesParam, input);

    // Todo: assume that timesParam has matched outputDim 
    auto plusFunction = Plus(plusParam, timesFunction);

    return nonLinearity(plusFunction);
}
Esempio n. 12
0
void RunEvaluationClassifier(FunctionPtr evalFunc, const DeviceDescriptor& device)
{
    const std::wstring inputNodeName = L"features";

    Variable inputVar;
    if (!GetInputVariableByName(evalFunc, inputNodeName, inputVar))
    {
        fprintf(stderr, "Input variable %S is not available.\n", inputNodeName.c_str());
        throw("Input variable not found error.");
    }

    // Evaluate the network in several runs 
    size_t iterationCount = 4;
    unsigned int randSeed = 2;
    srand(randSeed);
    size_t numSamples = 3;
    std::vector<float> inputData(inputVar.Shape().TotalSize() * numSamples);
    for (size_t t = 0; t < iterationCount; ++t)
    {
        for (size_t i = 0; i < inputData.size(); ++i)
        {
            inputData[i] = ((float)rand()) / RAND_MAX;
        }

        // Create input data shape. Adding sequence length and numSamples as axes.
        // Todo: remove sequence length when only numSamples is supported.
        // Todo: add convenience APIs to simplify data preparation here.
        NDShape inputShape = inputVar.Shape().AppendShape({1, numSamples});
        ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData, true));

        // Define output.
        ValuePtr outputValue;
        auto outputVar = evalFunc->Output();
        std::unordered_map<Variable, ValuePtr> outputs = {{outputVar, outputValue}};

        // Evaluate the model
        evalFunc->Forward({{inputVar, inputValue}}, outputs, device);

        // Get output value
        outputValue = outputs[outputVar];

        // Todo: remove sequence length when only numSamples is supported.
        // Todo: add convenience APIs to simplify retrieval of output results.
        NDShape outputShape = outputVar.Shape().AppendShape({1, numSamples});
        std::vector<float> outputData(outputShape.TotalSize());
        NDArrayViewPtr cpuArrayOutput = MakeSharedObject<NDArrayView>(outputShape, outputData, false);
        cpuArrayOutput->CopyFrom(*outputValue->Data());

        assert(outputData.size() == outputVar.Shape()[0] * numSamples);
        fprintf(stderr, "Evaluation result:\n");
        size_t dataIndex = 0;
        auto outputDim = outputVar.Shape()[0];
        for (size_t i = 0; i < numSamples; i++)
        {
            fprintf(stderr, "Iteration:%lu, Sample %lu:\n", t, i);
            fprintf(stderr, "    ");
            dataIndex = i * outputDim;
            for (size_t j = 0; j < std::min((size_t)10, outputDim); j++)
            {
                fprintf(stderr, "%f ", outputData[dataIndex++]);
            }
            if (outputDim > 10)
            {
                fprintf(stderr, "...");
            }
            fprintf(stderr, "\n");
        }
    }
}
Esempio n. 13
0
void TestFeedForwardNetworkCreation(const DeviceDescriptor& device, bool testSaveAndReLoad)
{
    using namespace std::placeholders;

    const size_t inputDim = 937;
    const size_t numOutputClasses = 9304;
    const size_t numHiddenLayers = 6;
    const size_t hiddenLayersDim = 2048;

    Variable inputVar({ inputDim }, DataType::Float, L"features");
    auto classifierOutputFunction = FullyConnectedFeedForwardClassifierNet(inputVar, numOutputClasses, hiddenLayersDim, numHiddenLayers, device, std::bind(Sigmoid, _1, L""), L"classifierOutput");
    Variable classifierOutput = classifierOutputFunction;

    Variable labelsVar({ numOutputClasses }, DataType::Float, L"Labels");
    auto trainingLossFunction = CNTK::CrossEntropyWithSoftmax(classifierOutput, labelsVar, L"LossFunction");
    Variable trainingLoss = trainingLossFunction;
    auto predictionFunction = CNTK::ClassificationError(classifierOutput, labelsVar, L"ClassificationError");
    Variable prediction = predictionFunction;

    auto ffNet = CNTK::Combine({ trainingLoss.Owner(), prediction.Owner(), classifierOutput.Owner() }, L"ClassifierModel");

    // Now test the structure
    if (ffNet->Parameters().size() != ((numHiddenLayers * 2) + 1))
        throw std::runtime_error("TestFeedForwardNetworkCreation: Function does not have expected Parameter count");

    if (ffNet->Arguments().size() != 2)
        throw std::runtime_error("TestFeedForwardNetworkCreation: Function does not have expected Argument count");

    if (ffNet->Outputs().size() != 3)
        throw std::runtime_error("TestFeedForwardNetworkCreation: Function does not have expected Output count");

    if (testSaveAndReLoad)
        SaveAndReloadModel<float>(ffNet, { &inputVar, &labelsVar, &trainingLoss, &prediction, &classifierOutput }, device);

    // Run Forward and backward a few times
    size_t iterationCount = 4;
    unsigned int randSeed = 2;
    srand(randSeed);
    size_t numSamples = 3;
    for (size_t i = 0; i < iterationCount; ++i)
    {
        std::vector<float> inputData(inputDim * numSamples);
        for (size_t i = 0; i < inputData.size(); ++i)
            inputData[i] = ((float)rand()) / RAND_MAX;

        NDShape inputShape = inputVar.Shape().AppendShape({ 1, numSamples });
        ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true));

        std::vector<float> labelData(numOutputClasses * numSamples, 0);
        for (size_t i = 0; i < numSamples; ++i)
            labelData[(i*numOutputClasses) + (rand() % numOutputClasses)] = 1;

        NDShape labelShape = labelsVar.Shape().AppendShape({ 1, numSamples });
        ValuePtr labelValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(labelShape, labelData.data(), labelData.size(), DeviceDescriptor::CPUDevice(), true));

        ValuePtr outputValue, predictionErrorValue;
        std::unordered_map<Variable, ValuePtr> outputs = { { classifierOutput, outputValue }, { prediction, predictionErrorValue } };
        auto backpropState = ffNet->Forward({ { inputVar, inputValue }, { labelsVar, labelValue } }, outputs, device, { trainingLoss });

        // Perform backprop
        NDShape outputShape = trainingLoss.Shape();
        std::vector<float> rootGradientsData(outputShape.TotalSize(), 1);
        ValuePtr rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, rootGradientsData.data(), rootGradientsData.size(), DeviceDescriptor::CPUDevice(), true));
        std::unordered_map<Variable, ValuePtr> paramGradients;
        auto allParams = ffNet->Parameters();
        for (auto iter = allParams.begin(); iter != allParams.end(); ++iter)
            paramGradients[*iter] = nullptr;
        
        ffNet->Backward(backpropState, { { trainingLoss, rootGradientValue } }, paramGradients);
    }
}
/// <summary>
/// The example shows
/// - how to prepare input data as sequence using sparse input.
/// The example uses the model trained by <CNTK>/Examples/LanguageUnderstanding/ATIS/Python/LanguageUnderstanding.py
/// Please see README.md in <CNTK>/Examples/LanguageUnderstanding/ATIS about how to train the model.
/// The parameter 'modelFile' specifies the path to the model.
/// The vocabularyFile specifies the vacabulary file used by the ATIS model, e.g. <CNTK>/Examples/LanguageUnderstanding/ATIS/BrainScript/query.wl
/// The labelFile specifies the label file used by the ATIS model, e.g. <CNTK>/Examples/LanguageUnderstanding/ATIS/BrainScript/slots.wl
/// </summary>
void EvaluationSingleSequenceUsingSparse(const wchar_t* modelFile, const wchar_t* vocabularyFile, const wchar_t* labelFile, const DeviceDescriptor& device)
{
    printf("\n===== Evaluate single sequence using sparse input.\n");

    // Load the model.
    // The model is trained by <CNTK>/Examples/LanguageUnderstanding/ATIS/Python/LanguageUnderstanding.py
    // Please see README.md in <CNTK>/Examples/LanguageUnderstanding/ATIS about how to train the model.
    FunctionPtr modelFunc = Function::Load(modelFile, device);

    // Read word and slot index files.
    std::unordered_map<std::string, size_t> vocabToIndex = BuildVocabIndex(vocabularyFile);
    std::vector<std::string> indexToSlots = BuildSlotIndex(labelFile);

    // Get input variable. The model has only one single input.
    Variable inputVar = modelFunc->Arguments()[0];
    size_t vocabSize = inputVar.Shape().TotalSize();

    const char *inputSentence = "BOS i would like to find a flight from charlotte to las vegas that makes a stop in st. louis EOS";
    std::vector<size_t> seqData;
    std::vector<std::string> inputWords;
    std::stringstream inputStream;
    std::string word;

    // build one-hot index for the input sequence.
    inputStream.str(inputSentence);
    while (inputStream >> word)
    {
        inputWords.push_back(word);
    }

    size_t seqLen = inputWords.size();
    // For this example, only 1 non-zero value for each sample.
    size_t numNonZeroValues = seqLen * 1;
    std::vector<SparseIndexType> colStarts;
    std::vector<SparseIndexType> rowIndices;
    std::vector<float> nonZeroValues;

    size_t count = 0;
    for (; count < seqLen; count++)
    {
        // Get the index of the word
        auto nonZeroValueIndex = static_cast<SparseIndexType>(vocabToIndex[inputWords[count]]);
        // Add the sample to the sequence
        nonZeroValues.push_back(1.0);
        rowIndices.push_back(nonZeroValueIndex);
        colStarts.push_back(static_cast<SparseIndexType>(count));
    }
    colStarts.push_back(static_cast<SparseIndexType>(numNonZeroValues));

    // Create input value using one-hot vector and input data map
    ValuePtr inputVal = Value::CreateSequence<float>(vocabSize, seqLen, colStarts.data(), rowIndices.data(), nonZeroValues.data(), numNonZeroValues, device);
    std::unordered_map<Variable, ValuePtr> inputDataMap = { { inputVar, inputVal } };

    // The model has only one output.
    // If the model has more than one output, use modelFunc->Outputs to get the list of output variables.
    Variable outputVar = modelFunc->Output();

    // Create output data map. Using null as Value to indicate using system allocated memory.
    // Alternatively, create a Value object and add it to the data map.
    std::unordered_map<Variable, ValuePtr> outputDataMap = { { outputVar, nullptr } };

    // Start evaluation on the device
    modelFunc->Evaluate(inputDataMap, outputDataMap, device);

    // Get evaluate result as dense output
    ValuePtr outputVal = outputDataMap[outputVar];
    std::vector<std::vector<float>> outputData;
    outputVal->CopyVariableValueTo(outputVar, outputData);

    // output the result
    size_t outputSampleSize = outputVar.Shape().TotalSize();
    if (outputData.size() != 1)
    {
        throw("Only one sequence of slots is expected as output.");
    }
    std::vector<float> slotSeq = outputData[0];
    if (slotSeq.size() % outputSampleSize != 0)
    {
        throw("The number of elements in the slot sequence is not a multiple of sample size");
    }

    size_t numOfSlotsInOutput = slotSeq.size() / outputSampleSize;
    if (inputWords.size() != numOfSlotsInOutput)
    {
        throw("The number of input words and the number of output slots do not match");
    }
    for (size_t i = 0; i < numOfSlotsInOutput; i++)
    {
        float max = slotSeq[i * outputSampleSize];
        size_t maxIndex = 0;
        for (size_t j = 1; j < outputSampleSize; j++)
        {
            if (slotSeq[i * outputSampleSize + j] > max)
            {
                max = slotSeq[i * outputSampleSize + j];
                maxIndex = j;
            }
        }
        printf("     %10s ---- %s\n", inputWords[i].c_str(), indexToSlots[maxIndex].c_str());
    }
    printf("\n");
}
/// <summary>
/// The example shows
/// - how to load model.
/// - how to prepare input data as batch of sequences with variable length.
///   how to prepare data using one-hot vector format.
/// - how to prepare input and output data map.
/// - how to evaluate a model.
/// The example uses the model trained by <CNTK>/Examples/LanguageUnderstanding/ATIS/Python/LanguageUnderstanding.py
/// Please see README.md in <CNTK>/Examples/LanguageUnderstanding/ATIS about how to train the model.
/// The parameter 'modelFile' specifies the path to the model.
/// The vocabularyFile specifies the vacabulary file used by the ATIS model, e.g. <CNTK>/Examples/LanguageUnderstanding/ATIS/BrainScript/query.wl
/// The labelFile specifies the label file used by the ATIS model, e.g. <CNTK>/Examples/LanguageUnderstanding/ATIS/BrainScript/slots.wl
/// </summary>
void EvaluationBatchOfSequencesUsingOneHot(const wchar_t* modelFile, const wchar_t* vocabularyFile, const wchar_t* labelFile, const DeviceDescriptor& device)
{
    printf("\n===== Evaluate batch of sequences with variable length using one-hot vector.\n");

    // Load the model.
    // The model is trained by <CNTK>/Examples/LanguageUnderstanding/ATIS/Python/LanguageUnderstanding.py
    // Please see README.md in <CNTK>/Examples/LanguageUnderstanding/ATIS about how to train the model.
    FunctionPtr modelFunc = Function::Load(modelFile, device);

    // Read word and slot index files.
    std::unordered_map<std::string, size_t> vocabToIndex = BuildVocabIndex(vocabularyFile);
    std::vector<std::string> indexToSlots = BuildSlotIndex(labelFile);

    // Get input variable. The model has only one single input.
    Variable inputVar = modelFunc->Arguments()[0];
    size_t vocabSize = inputVar.Shape().TotalSize();

    std::vector<const char *> inputSentences = {
        "BOS i would like to find a flight from charlotte to las vegas that makes a stop in st. louis EOS",
        "BOS flights from new york to seattle EOS"
    };

    // Prepare input data.
    std::vector<std::vector<std::string>> inputWordsList(inputSentences.size());
    // Each sample is represented by an index to the one-hot vector, so the index of the non-zero value of each sample is saved in the inner list.
    // The outer list represents sequences contained in the batch.
    std::vector<std::vector<size_t>> inputBatch;
    // SeqStartFlagBatch is used to indicate whether this sequence is a new sequence (true) or concatenating the previous sequence (false).
    std::vector<bool> seqStartFlagBatch;
    std::string word;
    size_t index;

    for (size_t seqIndex = 0; seqIndex < inputSentences.size(); seqIndex++)
    {
        std::stringstream inputStream;
        std::vector<size_t> seqData;
        // build one-hot index for the input sequences.
        inputStream.str(inputSentences[seqIndex]);
        while (inputStream >> word)
        {
            inputWordsList[seqIndex].push_back(word);
            index = vocabToIndex.at(word);
            seqData.push_back(index);
        }
        inputBatch.push_back(seqData);
        seqStartFlagBatch.push_back(true);
    }

    // Create input value representing the batch data and input data map
    ValuePtr inputVal = Value::CreateBatchOfSequences<float>(vocabSize, inputBatch, seqStartFlagBatch, device);
    std::unordered_map<Variable, ValuePtr> inputDataMap = { { inputVar, inputVal } };

    // The model has only one output.
    // If the model has more than one output, use modelFunc->Outputs to get the list of output variables.
    Variable outputVar = modelFunc->Output();

    // Create output data map. Using null as Value to indicate using system allocated memory.
    // Alternatively, create a Value object and add it to the data map.
    std::unordered_map<Variable, ValuePtr> outputDataMap = { { outputVar, nullptr } };

    // Start evaluation on the device
    modelFunc->Evaluate(inputDataMap, outputDataMap, device);

    // Get evaluate result as dense output
    ValuePtr outputVal = outputDataMap[outputVar];
    std::vector<std::vector<float>> outputData;
    outputVal->CopyVariableValueTo(outputVar, outputData);

    // output the result
    size_t outputSampleSize = outputVar.Shape().TotalSize();
    if (outputData.size() != inputBatch.size())
    {
        throw("The number of sequence in output does not match that in input.");
    }
    printf("The number of sequences in the batch: %d\n", (int)outputData.size());
    for (size_t seqno = 0; seqno < outputData.size(); seqno++)
    {
        std::vector<float> slotSeq = outputData[seqno];
        printf("Sequence %d:\n", (int)seqno);

        if (slotSeq.size() % outputSampleSize != 0)
        {
            throw("The number of elements in the slot sequence is not a multiple of sample size");
        }

        size_t numOfSlotsInOutput = slotSeq.size() / outputSampleSize;
        if (inputWordsList[seqno].size() != numOfSlotsInOutput)
        {
            throw("The number of input words and the number of output slots do not match");
        }
        for (size_t i = 0; i < numOfSlotsInOutput; i++)
        {
            float max = slotSeq[i * outputSampleSize];
            size_t maxIndex = 0;
            for (size_t j = 1; j < outputSampleSize; j++)
            {
                if (slotSeq[i * outputSampleSize + j] > max)
                {
                    max = slotSeq[i * outputSampleSize + j];
                    maxIndex = j;
                }
            }
            printf("     %10s ---- %s\n", inputWordsList[seqno][i].c_str(), indexToSlots[maxIndex].c_str());
        }
        printf("\n");
    }
}