// Create deserializers based on the specified configuration. 
// deserializers = [
//        [ type = "ImageDataDeserializer" module = "ImageReader" ...]
//        [ type = "CNTKTextFormatDeserializer" module = "CNTKTextFormatReader" ...]
bool CompositeDataReader::CreateDeserializers(const ConfigParameters& readerConfig)
{
    argvector<ConfigValue> deserializerConfigs =
        readerConfig(L"deserializers", ConfigParameters::Array(argvector<ConfigValue>(vector<ConfigValue> {})));

    assert(m_deserializers.empty());

    auto traceLevel = readerConfig.Find("traceLevel");
    bool composable = true;

    bool primary = true;  // Currently, the first deserializer becomes primary - it drives chunking.
    for (size_t i = 0; i < deserializerConfigs.size(); ++i)
    {
        // TODO: Should go away in the future. Framing can be done on top of deserializers.
        ConfigParameters p = deserializerConfigs[i];
        p.Insert("frameMode", m_packingMode == PackingMode::sample ? "true" : "false");
        p.Insert("precision", m_precision);
        if (!traceLevel.empty()) 
        {
            p.Insert("traceLevel", traceLevel);
        }

        composable &= p(L"composable", true);
        DataDeserializerPtr d = CreateDeserializer(p, primary);
        primary = false;
        m_deserializers.push_back(d);
    }
    return composable;
}
Exemple #2
0
// TODO: Not safe from the ABI perspective. Will be uglified to make the interface ABI.
// A factory method for creating text deserializers.
extern "C" DATAREADER_API bool CreateDeserializer(IDataDeserializer** deserializer, const std::wstring& type, const ConfigParameters& deserializerConfig, CorpusDescriptorPtr corpus, bool)
{
    string precision = deserializerConfig.Find("precision", "float");
    if (!AreEqualIgnoreCase(precision, "float") && !AreEqualIgnoreCase(precision, "double"))
    {
        InvalidArgument("Unsupported precision '%s'", precision.c_str());
    }

    // TODO: Remove type from the parser. Current implementation does not support streams of different types.
    if (type == L"CNTKTextFormatDeserializer")
    {
        if (precision == "float")
            *deserializer = new TextParser<float>(corpus, TextConfigHelper(deserializerConfig));
        else // double
            *deserializer = new TextParser<double>(corpus, TextConfigHelper(deserializerConfig));
    }
    else
        InvalidArgument("Unknown deserializer type '%ls'", type.c_str());

    // Deserializer created.
    return true;
}
Exemple #3
0
    ImageConfigHelper::ImageConfigHelper(const ConfigParameters& config)
        : m_dataFormat(CHW)
    {
        std::vector<std::string> featureNames = GetSectionsWithParameter(config, "width");
        std::vector<std::string> labelNames = GetSectionsWithParameter(config, "labelDim");

        // REVIEW alexeyk: currently support only one feature and label section.
        if (featureNames.size() != 1 || labelNames.size() != 1)
        {
            RuntimeError(
                "ImageReader currently supports a single feature and label stream. '%d' features , '%d' labels found.",
                static_cast<int>(featureNames.size()),
                static_cast<int>(labelNames.size()));
        }

        ConfigParameters featureSection = config(featureNames[0]);
        size_t w = featureSection("width");
        size_t h = featureSection("height");
        size_t c = featureSection("channels");

        std::string mbFmt = featureSection("mbFormat", "nchw");
        if (AreEqualIgnoreCase(mbFmt, "nhwc") || AreEqualIgnoreCase(mbFmt, "legacy"))
        {
            m_dataFormat = HWC;
        }
        else if (!AreEqualIgnoreCase(mbFmt, "nchw") || AreEqualIgnoreCase(mbFmt, "cudnn"))
        {
            RuntimeError("ImageReader does not support the sample format '%s', only 'nchw' and 'nhwc' are supported.", mbFmt.c_str());
        }

        auto features = std::make_shared<StreamDescription>();
        features->m_id = 0;
        features->m_name = msra::strfun::utf16(featureSection.ConfigName());
        features->m_sampleLayout = std::make_shared<TensorShape>(ImageDimensions(w, h, c).AsTensorShape(m_dataFormat));
        m_streams.push_back(features);

        ConfigParameters label = config(labelNames[0]);
        size_t labelDimension = label("labelDim");

        auto labelSection = std::make_shared<StreamDescription>();
        labelSection->m_id = 1;
        labelSection->m_name = msra::strfun::utf16(label.ConfigName());
        labelSection->m_sampleLayout = std::make_shared<TensorShape>(labelDimension);
        m_streams.push_back(labelSection);

        m_mapPath = config(L"file");

        std::string rand = config(L"randomize", "auto");

        if (AreEqualIgnoreCase(rand, "auto"))
        {
            m_randomize = true;
        }
        else if (AreEqualIgnoreCase(rand, "none"))
        {
            m_randomize = false;
        }
        else
        {
            RuntimeError("'randomize' parameter must be set to 'auto' or 'none'");
        }

        // Identify precision
        string precision = config.Find("precision", "float");
        if (AreEqualIgnoreCase(precision, "float"))
        {
            features->m_elementType = ElementType::tfloat;
            labelSection->m_elementType = ElementType::tfloat;
        }
        else if (AreEqualIgnoreCase(precision, "double"))
        {
            features->m_elementType = ElementType::tdouble;
            labelSection->m_elementType = ElementType::tdouble;
        }
        else
        {
            RuntimeError("Not supported precision '%s'. Expected 'double' or 'float'.", precision.c_str());
        }

        m_cpuThreadCount = config(L"numCPUThreads", 0);
    }
TextConfigHelper::TextConfigHelper(const ConfigParameters& config)
{
    if (!config.ExistsCurrent(L"input"))
    {
        RuntimeError("CNTKTextFormatReader configuration does not contain \"input\" section.");
    }

    const ConfigParameters& input = config(L"input");

    if (input.empty())
    {
        RuntimeError("CNTKTextFormatReader configuration contains an empty \"input\" section.");
    }

    string precision = config.Find("precision", "float");
    if (AreEqualIgnoreCase(precision, "double"))
    {
        m_elementType = ElementType::tdouble;
    }
    else if (AreEqualIgnoreCase(precision, "float"))
    {
        m_elementType = ElementType::tfloat;
    }
    else
    {
        RuntimeError("Not supported precision '%s'. Expected 'double' or 'float'.", precision.c_str());
    }

    StreamId id = 0;
    map<string, wstring> aliasToInputMap;
    for (const pair<string, ConfigParameters>& section : input)
    {
        ConfigParameters input = section.second;
        wstring name = msra::strfun::utf16(section.first);

        if (!input.ExistsCurrent(L"dim") || !input.ExistsCurrent(L"format"))
        {
            RuntimeError("Input section for input '%ls' does not specify all the required parameters, "
                "\"dim\" and \"format\".", name.c_str());
        }

        StreamDescriptor stream;
        stream.m_id = id++;
        stream.m_name = name;
        stream.m_sampleDimension = input(L"dim");
        string type = input(L"format");

        if (AreEqualIgnoreCase(type, "dense"))
        {
            stream.m_storageType = StorageType::dense;
        }
        else if (AreEqualIgnoreCase(type, "sparse"))
        {
            stream.m_storageType = StorageType::sparse_csc;
            if (stream.m_sampleDimension > numeric_limits<IndexType>::max())
            {
                RuntimeError("Sample dimension (%" PRIu64 ") for sparse input '%ls'"
                    " exceeds the maximum allowed value (%" PRIu64 ").\n",
                    stream.m_sampleDimension, name.c_str(), (size_t)numeric_limits<IndexType>::max());
            }
        }
        else
        {
            RuntimeError("'format' parameter must be set either to 'dense' or 'sparse'.");
        }

        // alias is optional
        if (input.ExistsCurrent(L"alias"))
        {
            stream.m_alias = input(L"alias");
            if (stream.m_alias.empty())
            {
                RuntimeError("Alias value for input '%ls' is empty.", name.c_str());
            }
        }
        else
        {
            stream.m_alias = section.first;
        }

        if (aliasToInputMap.find(stream.m_alias) != aliasToInputMap.end())
        {
            RuntimeError("Alias %s is already mapped to input %ls.",
                stream.m_alias.c_str(), aliasToInputMap[stream.m_alias].c_str());
        }
        else
        {
            aliasToInputMap[stream.m_alias] = stream.m_name;
        }

        stream.m_elementType = m_elementType;
        m_streams.push_back(stream);
    }

    m_filepath = msra::strfun::utf16(config(L"file"));

    if (config.Exists(L"randomize"))
    {
        wstring randomizeString = config.CanBeString(L"randomize") ? config(L"randomize") : wstring();
        if (!_wcsicmp(randomizeString.c_str(), L"none"))
        {
            m_randomizationWindow = randomizeNone;
        }
        else if (!_wcsicmp(randomizeString.c_str(), L"auto"))
        {
            m_randomizationWindow = randomizeAuto;
        }
        else
        {
            m_randomizationWindow = config(L"randomize");
        }
    }
    else
    {
        m_randomizationWindow = randomizeAuto;
    }

    m_skipSequenceIds = config(L"skipSequenceIds", false);
    m_maxErrors = config(L"maxErrors", 0);
    m_traceLevel = config(L"traceLevel", 0);
    m_chunkSizeBytes = config(L"chunkSizeInBytes", 32 * 1024 * 1024); // 32 MB by default
    m_chunkCacheSize = config(L"numChunksToCache", 32); // 32 * 32 MB = 1 GB of memory in total
}
Exemple #5
0
TextConfigHelper::TextConfigHelper(const ConfigParameters& config)
{
    if (!config.ExistsCurrent(L"input"))
    {
        RuntimeError("CNTKTextFormatReader configuration does not contain \"input\" section.");
    }

    const ConfigParameters& input = config(L"input");

    if (input.empty())
    {
        RuntimeError("CNTKTextFormatReader configuration contains an empty \"input\" section.");
    }

    string precision = config.Find("precision", "float");
    if (AreEqualIgnoreCase(precision, "double"))
    {
        m_elementType = DataType::Double;
    }
    else if (AreEqualIgnoreCase(precision, "float"))
    {
        m_elementType = DataType::Float;
    }
    else
    {
        RuntimeError("Not supported precision '%s'. Expected 'double' or 'float'.", precision.c_str());
    }

    StreamId id = 0;
    map<string, wstring> aliasToInputMap;
    for (const pair<string, ConfigParameters>& section : input)
    {
        ConfigParameters input2 = section.second;
        wstring name = msra::strfun::utf16(section.first);

        if (!input2.ExistsCurrent(L"dim") || !input2.ExistsCurrent(L"format"))
        {
            RuntimeError("Input section for input '%ls' does not specify all the required parameters, "
                "\"dim\" and \"format\".", name.c_str());
        }

        StreamDescriptor stream;
        stream.m_id = id++;
        stream.m_name = name;
        stream.m_sampleDimension = input2(L"dim");
        stream.m_definesMbSize = input2(L"definesMBSize", false);
        string type = input2(L"format");

        if (AreEqualIgnoreCase(type, "dense"))
        {
            stream.m_storageFormat = StorageFormat::Dense;
        }
        else if (AreEqualIgnoreCase(type, "sparse"))
        {
            stream.m_storageFormat = StorageFormat::SparseCSC;
            if (stream.m_sampleDimension > numeric_limits<IndexType>::max())
            {
                RuntimeError("Sample dimension (%" PRIu64 ") for sparse input '%ls'"
                    " exceeds the maximum allowed value (%" PRIu64 ").\n",
                    stream.m_sampleDimension, name.c_str(), (size_t)numeric_limits<IndexType>::max());
            }
        }
        else
        {
            RuntimeError("'format' parameter must be set either to 'dense' or 'sparse'.");
        }

        // alias is optional
        if (input2.ExistsCurrent(L"alias"))
        {
            stream.m_alias = input2(L"alias");
            if (stream.m_alias.empty())
            {
                RuntimeError("Alias value for input '%ls' is empty.", name.c_str());
            }
        }
        else
        {
            stream.m_alias = section.first;
        }

        if (aliasToInputMap.find(stream.m_alias) != aliasToInputMap.end())
        {
            RuntimeError("Alias %s is already mapped to input %ls.",
                stream.m_alias.c_str(), aliasToInputMap[stream.m_alias].c_str());
        }
        else
        {
            aliasToInputMap[stream.m_alias] = stream.m_name;
        }

        stream.m_elementType = m_elementType;
        m_streams.push_back(stream);
    }

    m_filepath = msra::strfun::utf16(config(L"file"));
    m_skipSequenceIds = config(L"skipSequenceIds", false);
    m_maxErrors = config(L"maxErrors", 0);
    m_traceLevel = config(L"traceLevel", 1);
    m_chunkSizeBytes = config(L"chunkSizeInBytes", g_32MB); // 32 MB by default
    m_keepDataInMemory = config(L"keepDataInMemory", false);
    m_frameMode = config(L"frameMode", false);

    m_randomizationWindow = GetRandomizationWindowFromConfig(config);
    m_sampleBasedRandomizationWindow = config(L"sampleBasedRandomizationWindow", false);
    if (!m_sampleBasedRandomizationWindow && m_randomizationWindow == randomizeAuto) 
    {
        m_randomizationWindow = g_4GB / m_chunkSizeBytes; // ~ 4 GB (on disk) worth of chunks
    }
}