// Create transformers based on the configuration, i.e.
// deserializers = [
//     [
//         type = "ImageDataDeserializer"
//         module = "ImageReader"
//         input = [
//               features = [
//---->              transforms = [
//                       [type = "Crop"]:[type = "Scale"]...
void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerConfig)
{
    std::string defaultModule = deserializerConfig("module");
    if (!deserializerConfig.Exists("input"))
        return;

    const ConfigParameters& inputs = deserializerConfig("input");
    for (const pair<string, ConfigParameters>& section : inputs)
    {
        ConfigParameters inputBody = section.second;

        // Trying to find transforms in the input section of the config.
        if (inputBody.find("transforms") == inputBody.end())
            continue;

        std::wstring inputName = msra::strfun::utf16(section.first);

        // Read transformers in order and appending them to the transformer pipeline.
        argvector<ConfigParameters> transforms = inputBody("transforms");
        for (size_t j = 0; j < transforms.size(); ++j)
        {
            ConfigParameters p = transforms[j];
            p.Insert("precision", deserializerConfig("precision"));

            TransformerPtr transformer = CreateTransformer(p, defaultModule, std::wstring());
            m_transforms.push_back(Transformation{ transformer, inputName });
        }

        // Let's add a cast transformer by default. It is noop if the type provided by others is float
        // or double, but will do a proper cast if the type is uchar.
        auto cast = CreateTransformer(inputBody, defaultModule, std::wstring(L"Cast"));
        m_transforms.push_back(Transformation{ cast, inputName });
    }
}
// Create deserializers based on the specified configuration. 
// deserializers = [
//        [ type = "ImageDataDeserializer" module = "ImageReader" ...]
//        [ type = "CNTKTextFormatDeserializer" module = "CNTKTextFormatReader" ...]
void CompositeDataReader::CreateDeserializers(const ConfigParameters& readerConfig)
{
    argvector<ConfigValue> deserializerConfigs =
        readerConfig(L"deserializers", ConfigParameters::Array(argvector<ConfigValue>(vector<ConfigValue> {})));

    assert(m_deserializers.empty());
    bool primary = true;  // Currently, the first deserializer becomes primary - it drives chunking.
    for (size_t i = 0; i < deserializerConfigs.size(); ++i)
    {
        // TODO: Should go away in the future. Framing can be done on top of deserializers.
        ConfigParameters p = deserializerConfigs[i];
        p.Insert("frameMode", m_packingMode == PackingMode::sample ? "true" : "false");
        p.Insert("precision", m_precision);

        IDataDeserializerPtr d = CreateDeserializer(p, primary);
        primary = false;
        m_deserializers.push_back(d);
    }
}
Exemple #3
0
void TestSequenceReader(const ConfigParameters& configBase)
{
    // int nonexistant = configBase("nonexistant");  // use to test global exception handler
    ConfigParameters config = configBase("sequenceTest");

    size_t mbSize = config("minibatchSize");
    size_t epochSize = config("epochSize", "0");
    if (epochSize == 0)
    {
        epochSize = requestDataSize;
    }

    for (int fileType = 0; fileType < 2; ++fileType)
    {
        ConfigParameters readerConfig = config(fileType ? "readerSequence" : "readerSentence");
        readerConfig.Insert("traceLevel", config("traceLevel", "0"));

        std::vector<std::wstring> featureNames;
        std::vector<std::wstring> labelNames;
        GetFileConfigNames(readerConfig, featureNames, labelNames);

        DataReader dataReader(readerConfig);

        // get names of features and labels
        std::vector<std::wstring> files;
        files.push_back(readerConfig(L"file"));

        // setup minibatch matrices
        auto featuresMatrix = make_shared<Matrix<ElemType>>();
        auto labelsMatrix   = make_shared<Matrix<ElemType>>();
        MBLayoutPtr pMBLayout = make_shared<MBLayout>();
        StreamMinibatchInputs matrices;
        matrices.AddInput(featureNames[0], featuresMatrix, pMBLayout, TensorShape());
        matrices.AddInput(labelNames[1]  , labelsMatrix  , pMBLayout, TensorShape());

        auto start = std::chrono::system_clock::now();
        int epochs = config("maxEpochs");
        epochs *= 2;
        for (int epoch = 0; epoch < epochs; epoch++)
        {
            dataReader.StartMinibatchLoop(mbSize, epoch, epochSize);
            for (int i = 0; dataReader.GetMinibatch(matrices); i++)
            {
                auto& features = matrices.GetInputMatrix<ElemType>(featureNames[0]);
                auto& labels   = matrices.GetInputMatrix<ElemType>(labelNames[1]);
                fprintf(stderr, "%4d: features dim: %lu x %lu - [%.8g, %.8g, ...] label dim: %d x %d - [%d, %d, ...]\n", i, features.GetNumRows(), features.GetNumCols(), features(0, 0), features(0, 1), labels.GetNumRows(), labels.GetNumCols(), (int) labels(0, 0), (int) labels(0, 1));
            }
        }
        auto end = std::chrono::system_clock::now();
        auto elapsed = end - start;
        fprintf(stderr, "%f seconds elapsed", (float) (std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count()) / 1000);
    }
}