void TestReader(const ConfigParameters& configBase) { // int nonexistant = configBase("nonexistant"); // use to test global exception handler ConfigParameters config(configBase("mnistTest")); ConfigParameters readerConfig(config("reader")); readerConfig.Insert("traceLevel", config("traceLevel", "0")); size_t mbSize = config("minibatchSize"); size_t epochSize = config("epochSize", "0"); if (epochSize == 0) { epochSize = requestDataSize; } DataReader dataReader(readerConfig); // get names of features and labels std::vector<std::wstring> featureNames; std::vector<std::wstring> labelNames; GetFileConfigNames(readerConfig, featureNames, labelNames); // setup minibatch matrices int deviceId = 0; auto featuresMatrix = make_shared<Matrix<ElemType>>(deviceId); auto labelsMatrix = make_shared<Matrix<ElemType>>(deviceId); StreamMinibatchInputs matrices; matrices.AddInputMatrix(featureNames[0], featuresMatrix); matrices.AddInputMatrix(labelNames[0], labelsMatrix); auto start = std::chrono::system_clock::now(); int epochs = config("maxEpochs"); epochs *= 2; for (int epoch = 0; epoch < epochs; epoch++) { dataReader.StartMinibatchLoop(mbSize, epoch, epochSize); int i = 0; while (dataReader.GetMinibatch(matrices)) { Matrix<ElemType>& features = matrices.GetInputMatrix<ElemType>(featureNames[0]); Matrix<ElemType>& labels = matrices.GetInputMatrix<ElemType>(labelNames[0]); if (labels.GetNumRows() == 0) { fprintf(stderr, "%4d: features dim: %lu x %lu - [%.8g, %.8g, ...]\n", i++, features.GetNumRows(), features.GetNumCols(), features(0, 0), features(0, 1)); } else { fprintf(stderr, "%4d: features dim: %lu x %lu - [%.8g, %.8g, ...] label dim: %lu x %lu - [%d, %d, ...]\n", i++, features.GetNumRows(), features.GetNumCols(), features(0, 0), features(0, 1), labels.GetNumRows(), labels.GetNumCols(), (int) labels(0, 0), (int) labels(0, 1)); } } } auto end = std::chrono::system_clock::now(); auto elapsed = end - start; fprintf(stderr, "%f seconds elapsed", (float) (std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count()) / 1000); }
void DoCreateLabelMap(const ConfigParameters& config) { // this gets the section name we are interested in std::string section = config(L"section"); // get that section (probably a peer config section, which works thanks to heirarchal symbol resolution) ConfigParameters configSection(config(section)); ConfigParameters readerConfig(configSection("reader")); readerConfig.Insert("allowMapCreation", "true"); size_t minibatchSize = config(L"minibatchSize", "2048"); int traceLevel = config(L"traceLevel", "0"); std::vector<std::wstring> featureNames; std::vector<std::wstring> labelNames; GetFileConfigNames(readerConfig, featureNames, labelNames); // setup minibatch matrices auto featuresMatrix = make_shared<Matrix<ElemType>>(CPUDEVICE); auto labelsMatrix = make_shared<Matrix<ElemType>>(CPUDEVICE); StreamMinibatchInputs matrices; matrices.AddInputMatrix(featureNames[0], featuresMatrix); if (labelNames.size() == 0) RuntimeError("CreateLabelMap: no labels found to process"); // now create the reader and loop through the entire dataset to get all the labels auto start = std::chrono::system_clock::now(); for (const std::wstring& labelsName : labelNames) { // take the last label file defined (the other one might be input) matrices.AddInputMatrix(labelsName, labelsMatrix); // get the label mapping file name ConfigParameters labelConfig(readerConfig(labelsName)); std::string labelMappingFile; if (labelConfig.ExistsCurrent(L"labelMappingFile")) labelMappingFile = labelConfig(L"labelMappingFile"); else if (readerConfig.ExistsCurrent(L"labelMappingFile")) labelMappingFile = labelConfig(L"labelMappingFile"); else RuntimeError("CreateLabelMap: No labelMappingFile defined"); if (fexists(labelMappingFile)) { fprintf(stderr, "CreateLabelMap: the label mapping file '%s' already exists, no work to do.\n", labelMappingFile.c_str()); return; } fprintf(stderr, "CreateLabelMap: Creating the mapping file '%s' \n", labelMappingFile.c_str()); DataReader dataReader(readerConfig); dataReader.StartMinibatchLoop(minibatchSize, 0, requestDataSize); int count = 0; while (dataReader.GetMinibatch(matrices)) { Matrix<ElemType>& features = matrices.GetInputMatrix<ElemType>(featureNames[0]); count += features.GetNumCols(); if (traceLevel > 1) fprintf(stderr, "."); // progress meter } dataReader.StartMinibatchLoop(minibatchSize, 1, requestDataSize); // print the results if (traceLevel > 0) fprintf(stderr, "\nread %d labels and produced %s\n", count, labelMappingFile.c_str()); } auto end = std::chrono::system_clock::now(); auto elapsed = end - start; if (traceLevel > 1) fprintf(stderr, "%f seconds elapsed\n", (float) (std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count()) / 1000); }