// Create transformers based on the configuration, i.e. // deserializers = [ // [ // type = "ImageDataDeserializer" // module = "ImageReader" // input = [ // features = [ //----> transforms = [ // [type = "Crop"]:[type = "Scale"]... void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerConfig) { std::string defaultModule = deserializerConfig("module"); if (!deserializerConfig.Exists("input")) return; const ConfigParameters& inputs = deserializerConfig("input"); for (const pair<string, ConfigParameters>& section : inputs) { ConfigParameters inputBody = section.second; // Trying to find transforms in the input section of the config. if (inputBody.find("transforms") == inputBody.end()) continue; std::wstring inputName = msra::strfun::utf16(section.first); // Read transformers in order and appending them to the transformer pipeline. argvector<ConfigParameters> transforms = inputBody("transforms"); for (size_t j = 0; j < transforms.size(); ++j) { ConfigParameters p = transforms[j]; p.Insert("precision", deserializerConfig("precision")); TransformerPtr transformer = CreateTransformer(p, defaultModule, std::wstring()); m_transforms.push_back(Transformation{ transformer, inputName }); } // Let's add a cast transformer by default. It is noop if the type provided by others is float // or double, but will do a proper cast if the type is uchar. auto cast = CreateTransformer(inputBody, defaultModule, std::wstring(L"Cast")); m_transforms.push_back(Transformation{ cast, inputName }); } }
// Create deserializers based on the specified configuration. // deserializers = [ // [ type = "ImageDataDeserializer" module = "ImageReader" ...] // [ type = "CNTKTextFormatDeserializer" module = "CNTKTextFormatReader" ...] void CompositeDataReader::CreateDeserializers(const ConfigParameters& readerConfig) { argvector<ConfigValue> deserializerConfigs = readerConfig(L"deserializers", ConfigParameters::Array(argvector<ConfigValue>(vector<ConfigValue> {}))); assert(m_deserializers.empty()); bool primary = true; // Currently, the first deserializer becomes primary - it drives chunking. for (size_t i = 0; i < deserializerConfigs.size(); ++i) { // TODO: Should go away in the future. Framing can be done on top of deserializers. ConfigParameters p = deserializerConfigs[i]; p.Insert("frameMode", m_packingMode == PackingMode::sample ? "true" : "false"); p.Insert("precision", m_precision); IDataDeserializerPtr d = CreateDeserializer(p, primary); primary = false; m_deserializers.push_back(d); } }
void TestSequenceReader(const ConfigParameters& configBase) { // int nonexistant = configBase("nonexistant"); // use to test global exception handler ConfigParameters config = configBase("sequenceTest"); size_t mbSize = config("minibatchSize"); size_t epochSize = config("epochSize", "0"); if (epochSize == 0) { epochSize = requestDataSize; } for (int fileType = 0; fileType < 2; ++fileType) { ConfigParameters readerConfig = config(fileType ? "readerSequence" : "readerSentence"); readerConfig.Insert("traceLevel", config("traceLevel", "0")); std::vector<std::wstring> featureNames; std::vector<std::wstring> labelNames; GetFileConfigNames(readerConfig, featureNames, labelNames); DataReader dataReader(readerConfig); // get names of features and labels std::vector<std::wstring> files; files.push_back(readerConfig(L"file")); // setup minibatch matrices auto featuresMatrix = make_shared<Matrix<ElemType>>(); auto labelsMatrix = make_shared<Matrix<ElemType>>(); MBLayoutPtr pMBLayout = make_shared<MBLayout>(); StreamMinibatchInputs matrices; matrices.AddInput(featureNames[0], featuresMatrix, pMBLayout, TensorShape()); matrices.AddInput(labelNames[1] , labelsMatrix , pMBLayout, TensorShape()); auto start = std::chrono::system_clock::now(); int epochs = config("maxEpochs"); epochs *= 2; for (int epoch = 0; epoch < epochs; epoch++) { dataReader.StartMinibatchLoop(mbSize, epoch, epochSize); for (int i = 0; dataReader.GetMinibatch(matrices); i++) { auto& features = matrices.GetInputMatrix<ElemType>(featureNames[0]); auto& labels = matrices.GetInputMatrix<ElemType>(labelNames[1]); fprintf(stderr, "%4d: features dim: %lu x %lu - [%.8g, %.8g, ...] label dim: %d x %d - [%d, %d, ...]\n", i, features.GetNumRows(), features.GetNumCols(), features(0, 0), features(0, 1), labels.GetNumRows(), labels.GetNumCols(), (int) labels(0, 0), (int) labels(0, 1)); } } auto end = std::chrono::system_clock::now(); auto elapsed = end - start; fprintf(stderr, "%f seconds elapsed", (float) (std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count()) / 1000); } }