// Create deserializers based on the specified configuration. // deserializers = [ // [ type = "ImageDataDeserializer" module = "ImageReader" ...] // [ type = "CNTKTextFormatDeserializer" module = "CNTKTextFormatReader" ...] bool CompositeDataReader::CreateDeserializers(const ConfigParameters& readerConfig) { argvector<ConfigValue> deserializerConfigs = readerConfig(L"deserializers", ConfigParameters::Array(argvector<ConfigValue>(vector<ConfigValue> {}))); assert(m_deserializers.empty()); auto traceLevel = readerConfig.Find("traceLevel"); bool composable = true; bool primary = true; // Currently, the first deserializer becomes primary - it drives chunking. for (size_t i = 0; i < deserializerConfigs.size(); ++i) { // TODO: Should go away in the future. Framing can be done on top of deserializers. ConfigParameters p = deserializerConfigs[i]; p.Insert("frameMode", m_packingMode == PackingMode::sample ? "true" : "false"); p.Insert("precision", m_precision); if (!traceLevel.empty()) { p.Insert("traceLevel", traceLevel); } composable &= p(L"composable", true); DataDeserializerPtr d = CreateDeserializer(p, primary); primary = false; m_deserializers.push_back(d); } return composable; }
CompositeMinibatchSource::CompositeMinibatchSource(const MinibatchSourceConfig& configuration) : m_epochEndReached(false), m_prevMinibatchSize(0), m_maxNumSamplesToRead(configuration.maxSamples), m_maxNumSweepsToRead(configuration.maxSweeps), m_truncationLength(0), m_numWorkers(1), m_workerRank(0), m_restorePosition(0) { m_truncationLength = configuration.truncationLength; auto augmentedConfiguration = Internal::ToDictionary(configuration); ConfigParameters config; std::wstringstream s; for (const auto& keyValuePair : *(augmentedConfiguration.m_dictionaryData)) AddConfigString(s, keyValuePair.first, keyValuePair.second, 0); config.Parse(msra::strfun::utf8(s.str())); typedef Reader*(*CreateCompositeDataReaderProc)(const ConfigParameters* parameters); CreateCompositeDataReaderProc createReaderProc = (CreateCompositeDataReaderProc)Plugin().Load(L"CompositeDataReader", "CreateCompositeDataReader"); std::shared_ptr<Microsoft::MSR::CNTK::Reader> compositeDataReader(createReaderProc(&config)); m_compositeDataReaderStreamDescs = compositeDataReader->GetStreamDescriptions(); for (auto streamDesc : m_compositeDataReaderStreamDescs) m_streamInfos.insert({ streamDesc->m_name, streamDesc->m_id, AsStorageFormat(streamDesc->m_storageType), AsDataType(streamDesc->m_elementType), AsNDShape(*(streamDesc->m_sampleLayout)) }); m_shim = std::shared_ptr<ReaderShim<float>>(new ReaderShim<float>(compositeDataReader), [](ReaderShim<float>* x) { x->Destroy(); }); m_shim->Init(config); }
void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerConfig) { std::string defaultModule = deserializerConfig("module"); argvector<ConfigParameters> inputs = deserializerConfig("input"); for (size_t i = 0; i < inputs.size(); ++i) { // Trying to find transfomers in a stream section of the config. auto inputSections = TryGetSectionsWithParameter(inputs[i], "transforms"); if (inputSections.size() > 1) { LogicError("Only a single 'transforms' config is allowed per stream."); } // No need to create anything for this stream, skipping. if (inputSections.empty()) { continue; } ConfigParameters input = inputs[i](inputSections.front()); std::wstring inputName = msra::strfun::utf16(input.ConfigName()); // Read tranformers in order and appending them to the transformer pipeline. argvector<ConfigParameters> transforms = input("transforms"); for (size_t j = 0; j < transforms.size(); ++j) { TransformerPtr transformer = CreateTransformer(transforms[j], defaultModule); m_transforms.push_back(Transformation{transformer, inputName}); } } }
void HTKMLFWriter<ElemType>::InitFromConfig(const ConfigRecordType& writerConfig) { m_tempArray = nullptr; m_tempArraySize = 0; m_overflowWarningCount = 0; vector<wstring> scriptpaths; vector<wstring> filelist; size_t numFiles; size_t firstfilesonly = SIZE_MAX; // set to a lower value for testing m_verbosity = writerConfig(L"verbosity", 2); m_overflowValue = writerConfig(L"overflowValue", 50); m_maxNumOverflowWarning = writerConfig(L"maxNumOverflowWarning", 10); vector<wstring> outputNames = writerConfig(L"outputNodeNames", ConfigRecordType::Array(stringargvector())); if (outputNames.size() < 1) RuntimeError("writer needs at least one outputNodeName specified in config"); int counter = 0; foreach_index (i, outputNames) // inputNames should map to node names { ConfigParameters thisOutput = writerConfig(outputNames[i]); if (thisOutput.Exists("dim")) udims.push_back(thisOutput(L"dim")); else RuntimeError("HTKMLFWriter::Init: writer need to specify dim of output"); if (thisOutput.Exists("file")) scriptpaths.push_back(thisOutput(L"file")); else if (thisOutput.Exists("scpFile")) scriptpaths.push_back(thisOutput(L"scpFile")); else RuntimeError("HTKMLFWriter::Init: writer needs to specify scpFile for output"); if (thisOutput.Exists("Kaldicmd")) { kaldicmd.push_back(thisOutput(L"Kaldicmd")); kaldi::BaseFloatMatrixWriter wfea; feature_writer.push_back(wfea); feature_writer[i].Open(msra::strfun::utf8(kaldicmd[counter])); } outputNameToIdMap[outputNames[i]] = i; outputNameToDimMap[outputNames[i]] = udims[i]; wstring type = thisOutput(L"type", "Real"); if (type == L"Real") { outputNameToTypeMap[outputNames[i]] = OutputTypes::outputReal; } else { throw std::runtime_error("HTKMLFWriter::Init: output type for writer output expected to be Real"); } counter++; }
void TestSequenceReader(const ConfigParameters& configBase) { // int nonexistant = configBase("nonexistant"); // use to test global exception handler ConfigParameters config = configBase("sequenceTest"); size_t mbSize = config("minibatchSize"); size_t epochSize = config("epochSize", "0"); if (epochSize == 0) { epochSize = requestDataSize; } for (int fileType = 0; fileType < 2; ++fileType) { ConfigParameters readerConfig = config(fileType ? "readerSequence" : "readerSentence"); readerConfig.Insert("traceLevel", config("traceLevel", "0")); std::vector<std::wstring> featureNames; std::vector<std::wstring> labelNames; GetFileConfigNames(readerConfig, featureNames, labelNames); DataReader dataReader(readerConfig); // get names of features and labels std::vector<std::wstring> files; files.push_back(readerConfig(L"file")); // setup minibatch matrices auto featuresMatrix = make_shared<Matrix<ElemType>>(); auto labelsMatrix = make_shared<Matrix<ElemType>>(); MBLayoutPtr pMBLayout = make_shared<MBLayout>(); StreamMinibatchInputs matrices; matrices.AddInput(featureNames[0], featuresMatrix, pMBLayout, TensorShape()); matrices.AddInput(labelNames[1] , labelsMatrix , pMBLayout, TensorShape()); auto start = std::chrono::system_clock::now(); int epochs = config("maxEpochs"); epochs *= 2; for (int epoch = 0; epoch < epochs; epoch++) { dataReader.StartMinibatchLoop(mbSize, epoch, epochSize); for (int i = 0; dataReader.GetMinibatch(matrices); i++) { auto& features = matrices.GetInputMatrix<ElemType>(featureNames[0]); auto& labels = matrices.GetInputMatrix<ElemType>(labelNames[1]); fprintf(stderr, "%4d: features dim: %lu x %lu - [%.8g, %.8g, ...] label dim: %d x %d - [%d, %d, ...]\n", i, features.GetNumRows(), features.GetNumCols(), features(0, 0), features(0, 1), labels.GetNumRows(), labels.GetNumCols(), (int) labels(0, 0), (int) labels(0, 1)); } } auto end = std::chrono::system_clock::now(); auto elapsed = end - start; fprintf(stderr, "%f seconds elapsed", (float) (std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count()) / 1000); } }
void CNTKEvalBase<ElemType>::CreateNetwork(const std::string& networkDescription) { ConfigParameters config; config.Parse(networkDescription); std::vector<wstring> outputNodeNames; this->m_net = GetModelFromConfig<ConfigParameters, ElemType>(config, L"outputNodeNames", outputNodeNames); if (this->m_net == nullptr) { LogicError("Unable to construct network from description"); } }
ImageDeserializerBase::ImageDeserializerBase(CorpusDescriptorPtr corpus, const ConfigParameters& config, bool primary) : DataDeserializerBase(primary), m_corpus(corpus) { assert(m_corpus); ConfigParameters inputs = config("input"); std::vector<std::string> featureNames = GetSectionsWithParameter("ImageDeserializerBase", inputs, "transforms"); std::vector<std::string> labelNames = GetSectionsWithParameter("ImageDeserializerBase", inputs, "labelDim"); if (featureNames.size() != 1 || labelNames.size() != 1) RuntimeError( "Please specify a single feature and label stream. '%d' features , '%d' labels found.", static_cast<int>(featureNames.size()), static_cast<int>(labelNames.size())); string precision = config("precision", "float"); m_precision = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble; m_verbosity = config(L"verbosity", 0); // Feature stream. ConfigParameters featureSection = inputs(featureNames[0]); auto features = std::make_shared<StreamDescription>(); features->m_id = 0; features->m_name = msra::strfun::utf16(featureSection.ConfigName()); features->m_storageType = StorageType::dense; // Due to performance, now we support images of different types. features->m_elementType = ElementType::tvariant; m_streams.push_back(features); // Label stream. ConfigParameters label = inputs(labelNames[0]); size_t labelDimension = label("labelDim"); auto labels = std::make_shared<StreamDescription>(); labels->m_id = 1; labels->m_name = msra::strfun::utf16(label.ConfigName()); labels->m_sampleLayout = std::make_shared<TensorShape>(labelDimension); labels->m_storageType = StorageType::sparse_csc; labels->m_elementType = m_precision; m_streams.push_back(labels); m_labelGenerator = labels->m_elementType == ElementType::tfloat ? (LabelGeneratorPtr)std::make_shared<TypedLabelGenerator<float>>(labelDimension) : std::make_shared<TypedLabelGenerator<double>>(labelDimension); m_grayscale = config(L"grayscale", false); // TODO: multiview should be done on the level of randomizer/transformers - it is responsiblity of the // TODO: randomizer to collect how many copies each transform needs and request same sequence several times. m_multiViewCrop = config(L"multiViewCrop", false); }
// ---------------------------------------------------------------------- // simple macro to illustrate how to call a test in a macro environment // test parameters are changed from the macro void singleTest(string testname = "PixelAlive", string rootfilename = "pixelalive.root", string cfgdirectory = "../data/defaultParametersRocPSI46digV2") { ConfigParameters *configParameters = ConfigParameters::Singleton(); configParameters->setDirectory(cfgdirectory); string cfgFile = configParameters->getDirectory() + string("/configParameters.dat"); configParameters->readConfigParameterFile(cfgFile); string rootfile = rootfilename; PixTestParameters *ptp = new PixTestParameters(configParameters->getDirectory() + "/" + configParameters->getTestParameterFileName()); PixSetup *ap = new PixSetup("DEBUG", ptp, configParameters); cout << "pxar: dumping results into " << rootfile << endl; TFile *rfile = TFile::Open(rootfile.c_str(), "RECREATE"); PixTestFactory *factory = PixTestFactory::instance(); PixTest *pt = factory->createTest(testname, ap); if (!pt->getName().compare("PixelAlive")) { pt->setParameter("Ntrig", "10"); pt->doTest(); pt->setParameter("Ntrig", "20"); pt->doTest(); } if (!pt->getName().compare("Ph")) { pt->setParameter("Ntrig", "2"); pt->setParameter("DAC", "Vcal"); pt->setParameter("DacVal", "200"); pt->dumpParameters(); pt->doTest(); pt->setParameter("PIX", "reset"); pt->setParameter("Ntrig", "4"); pt->setParameter("DacVal", "250"); pt->setParameter("PIX", "45,45"); pt->dumpParameters(); pt->doTest(); } delete pt; rfile->Close(); ap->killApi(); }
int wmain(int argc, wchar_t* argv[]) { try { ConfigParameters config; ConfigParameters::ParseCommandLine(argc, argv, config); // get the command param set they want wstring logpath = config("stderr", L""); ConfigArray command = config("command", "train"); // dump config info fprintf(stderr, "command: "); for (int i = 0; i < command.size(); i++) { fprintf(stderr, "%s ", command[i].c_str()); } // run commands std::string type = config("precision", "float"); // accept old precision key for backward compatibility if (config.Exists("type")) type = config("type", "float"); fprintf(stderr, "\nprecision = %s\n", type.c_str()); if (type == "float") DoCommand<float>(config); else if (type == "double") DoCommand<double>(config); else RuntimeError("invalid precision specified: %s", type.c_str()); } catch (std::exception& err) { fprintf(stderr, "EXCEPTION occurred: %s", err.what()); Microsoft::MSR::CNTK::DebugUtil::PrintCallStack(); #ifdef _DEBUG DebugBreak(); #endif return -1; } catch (...) { fprintf(stderr, "Unknown ERROR occurred"); Microsoft::MSR::CNTK::DebugUtil::PrintCallStack(); #ifdef _DEBUG DebugBreak(); #endif return -1; } return 0; }
void CropTransformer::InitFromConfig(const ConfigParameters &config) { floatargvector cropRatio = config(L"cropRatio", "1.0"); m_cropRatioMin = cropRatio[0]; m_cropRatioMax = cropRatio[1]; if (!(0 < m_cropRatioMin && m_cropRatioMin <= 1.0) || !(0 < m_cropRatioMax && m_cropRatioMax <= 1.0) || m_cropRatioMin > m_cropRatioMax) { RuntimeError("Invalid cropRatio value, must be > 0 and <= 1. cropMin must " "<= cropMax"); } m_jitterType = ParseJitterType(config(L"jitterType", "")); if (!config.ExistsCurrent(L"hflip")) { m_hFlip = m_imageConfig->GetCropType() == CropType::Random; } else { m_hFlip = config(L"hflip"); } m_aspectRatioRadius = config(L"aspectRatioRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0}))); }
void CropTransformer::InitFromConfig(const ConfigParameters &config) { m_cropType = ParseCropType(config(L"cropType", "")); floatargvector cropRatio = config(L"cropRatio", "1.0"); m_cropRatioMin = cropRatio[0]; m_cropRatioMax = cropRatio[1]; if (!(0 < m_cropRatioMin && m_cropRatioMin <= 1.0) || !(0 < m_cropRatioMax && m_cropRatioMax <= 1.0) || m_cropRatioMin > m_cropRatioMax) { RuntimeError("Invalid cropRatio value, must be > 0 and <= 1. cropMin must " "<= cropMax"); } m_jitterType = ParseJitterType(config(L"jitterType", "")); if (!config.ExistsCurrent(L"hflip")) { m_hFlip = m_cropType == CropType::Random; } else { m_hFlip = config(L"hflip"); } }
void DoEdit(const ConfigParameters& config) { // BrainScript editing if (config.Exists(L"BrainScriptNetworkBuilder")) { bool makeMode = config(L"makeMode", true); wstring outputPathname = config(L"outputModelPath"); // in makeMode, if output file exists, we are done if (makeMode && File::Exists(outputPathname)) { LOGPRINTF(stderr, "'%ls' exists, skipping. Specify makeMode=false to force executing the action.\n", outputPathname.c_str()); return; } DEVICEID_TYPE deviceId = DeviceFromConfig(config); let createNetworkFn = GetNetworkFactory<ConfigParameters, ElemType>(config); let net = createNetworkFn(deviceId); net->Save(outputPathname); LOGPRINTF(stderr, "\nModel with %d nodes saved as '%ls'.\n", (int)net->GetTotalNumberOfNodes(), outputPathname.c_str()); return; } // legacy model editing wstring editPath = config(L"editPath"); wstring ndlMacros = config(L"ndlMacros", ""); NDLScript<ElemType> ndlScript; if (!ndlMacros.empty()) { ndlScript.LoadConfigFile(ndlMacros); } MELScript<ElemType> melScript; melScript.LoadConfigFileAndResolveVariables(editPath, config); }
// ---------------------------------------------------------------------- // create PH vs VCal scans for a grid of phscale and phoffset values void phOpt(string rootfile = "phOpt.root", string cfgdirectory = "testROC") { ConfigParameters *configParameters = ConfigParameters::Singleton(); configParameters->setDirectory(cfgdirectory); string cfgFile = configParameters->getDirectory() + string("/configParameters.dat"); configParameters->readConfigParameterFile(cfgFile); PixTestParameters *ptp = new PixTestParameters(configParameters->getDirectory() + "/" + configParameters->getTestParameterFileName()); PixSetup *ap = new PixSetup("DEBUG", ptp, configParameters); cout << "pxar: dumping results into " << rootfile << endl; TFile *rfile = TFile::Open(rootfile.c_str(), "RECREATE"); PixTestFactory *factory = PixTestFactory::instance(); PixTest *pt = factory->createTest("DacScan", ap); pt->setDAC("ctrlreg", 4); pt->setParameter("PHmap", "1"); pt->setParameter("DAC", "Vcal"); pt->setParameter("DACLO", "0"); pt->setParameter("DACHI", "255"); int cycle(0); TH1D *h1(0); for (unsigned int io = 0; io < 26; ++io) { for (unsigned int is = 0; is < 52; ++is) { pt->setDAC("phoffset", io*10); pt->setDAC("phscale", is*5); pt->doTest(); h1 = (TH1D*)rfile->Get(Form("DacScan/ph_Vcal_c11_r20_C0_V%d", cycle)); h1->SetTitle(Form("ph_Vcal_c11_r20_C0_V%d phscale=%d phoffset=%d", cycle, is*5, io*10)); ++cycle; } } rfile->Print(); delete pt; rfile->Close(); ap->killApi(); }
void DoWriteOutput(const ConfigParameters& config) { ConfigParameters readerConfig(config(L"reader")); readerConfig.Insert("randomize", "None"); // we don't want randomization when output results DataReader testDataReader(readerConfig); ConfigArray minibatchSize = config(L"minibatchSize", "2048"); intargvector mbSize = minibatchSize; size_t epochSize = config(L"epochSize", "0"); if (epochSize == 0) { epochSize = requestDataSize; } vector<wstring> outputNodeNamesVector; let net = GetModelFromConfig<ConfigParameters, ElemType>(config, L"outputNodeNames", outputNodeNamesVector); // set tracing flags net->EnableNodeTracing(config(L"traceNodeNamesReal", ConfigParameters::Array(stringargvector())), config(L"traceNodeNamesCategory", ConfigParameters::Array(stringargvector())), config(L"traceNodeNamesSparse", ConfigParameters::Array(stringargvector()))); SimpleOutputWriter<ElemType> writer(net, 1); if (config.Exists("writer")) { ConfigParameters writerConfig(config(L"writer")); bool writerUnittest = writerConfig(L"unittest", "false"); DataWriter testDataWriter(writerConfig); writer.WriteOutput(testDataReader, mbSize[0], testDataWriter, outputNodeNamesVector, epochSize, writerUnittest); } else if (config.Exists("outputPath")) { wstring outputPath = config(L"outputPath"); WriteFormattingOptions formattingOptions(config); bool nodeUnitTest = config(L"nodeUnitTest", "false"); writer.WriteOutput(testDataReader, mbSize[0], outputPath, outputNodeNamesVector, formattingOptions, epochSize, nodeUnitTest); } else InvalidArgument("write command: You must specify either 'writer'or 'outputPath'"); }
// Create transformers based on the configuration, i.e. // deserializers = [ // [ // type = "ImageDataDeserializer" // module = "ImageReader" // input = [ // features = [ //----> transforms = [ // [type = "Crop"]:[type = "Scale"]... void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerConfig) { std::string defaultModule = deserializerConfig("module"); if (!deserializerConfig.Exists("input")) return; const ConfigParameters& inputs = deserializerConfig("input"); for (const pair<string, ConfigParameters>& section : inputs) { ConfigParameters inputBody = section.second; // Trying to find transforms in the input section of the config. if (inputBody.find("transforms") == inputBody.end()) continue; std::wstring inputName = Microsoft::MSR::CNTK::ToFixedWStringFromMultiByte(section.first); // Read transformers in order and appending them to the transformer pipeline. argvector<ConfigParameters> transforms = inputBody("transforms"); for (size_t j = 0; j < transforms.size(); ++j) { ConfigParameters p = transforms[j]; p.Insert("precision", deserializerConfig("precision")); TransformerPtr transformer = CreateTransformer(p, defaultModule, std::wstring()); m_transforms.push_back(Transformation{ transformer, inputName }); } // Let's add a cast transformer by default. It is noop if the type provided by others is float // or double, but will do a proper cast if the type is uchar. auto cast = CreateTransformer(inputBody, defaultModule, std::wstring(L"Cast")); m_transforms.push_back(Transformation{ cast, inputName }); } }
// special temporary function to guard against a now invalid usage of "truncated" which exists in some IPG production setups static void DisableLegacyTruncationSettings(const ConfigParameters& TopLevelConfig, const ConfigParameters& commandConfig) { if (TopLevelConfig.ExistsCurrent(L"Truncated")) { return; } // if any of the action has set a reader/SGD section and has different Truncated value for reader and SGD section ConfigArray actions = commandConfig(L"action"); for (size_t i = 0; i < actions.size(); i++) { if (actions[i] == "train" || actions[i] == "trainRNN") { ConfigParameters sgd = ConfigParameters(commandConfig(L"SGD")); ConfigParameters reader = ConfigParameters(commandConfig(L"reader")); // reader and SGD sections are two must-have sections in train/trainRNN if (reader.ExistsCurrent(L"Truncated") && !sgd.ExistsCurrent(L"Truncated")) { InvalidArgument("DisableLegacyUsage: setting Truncated only in reader section are not allowed. Please move Truncated=true/false to the top level section."); } } } }
void TestBing(const ConfigParameters& config) { if (!config.Exists("train.set")) { std::cout<<"USAGE: cn.exe train.set featureDim networkDescription learnRatesPerMB mbSize epochSize maxEpochs outdir test.set test.set.size"<<endl; exit(0); } size_t vdim = config("featureDim"); size_t udim = 1; vector<wstring> filepaths; filepaths.push_back(config("train.set")); DataReader<ElemType> dataReader(vdim, udim, filepaths, config); ConfigArray layerSizes(config("networkDescription")); SimpleNetworkBuilder<ElemType> netBuilder(layerSizes, TrainingCriterion::SquareError, EvalCriterion::SquareError, L"Sigmoid", true, false, false, &dataReader); ConfigArray learnRatesPerMB(config("learnRatesPerMB")); ConfigArray mbSize(config("mbSize")); size_t epochSize = config("epochSize"); size_t maxEpochs = config("maxEpochs"); float momentumPerMB = 0.9;//0.9f; std::string outDir = config("outdir"); wstring modelPath = wstring(msra::strfun::utf16(outDir)).append(L"\\bingranknet.dnn"); SimpleSGD<ElemType> sgd(learnRatesPerMB, mbSize, epochSize, maxEpochs, modelPath, momentumPerMB); sgd.Train(netBuilder, dataReader, true); std::cout<<std::endl<<std::endl<<std::endl<<std::endl<<"Testing ..... "<<std::endl; // test vector<wstring> testfilepaths; testfilepaths.push_back( config("test.set")); size_t testSize = config("test.set.size"); DataReader<ElemType> testDataReader(vdim, udim, testfilepaths, config); wstring finalNetPath = modelPath.append(L".").append(to_wstring(maxEpochs-1)); SimpleEvaluator<ElemType> eval(netBuilder.LoadNetworkFromFile(finalNetPath, false)); eval.Evaluate(testDataReader, 1024, (finalNetPath.append(L".results.txt")).c_str(),testSize); }
// TODO: Not safe from the ABI perspective. Will be uglified to make the interface ABI. // A factory method for creating text deserializers. extern "C" DATAREADER_API bool CreateDeserializer(IDataDeserializer** deserializer, const std::wstring& type, const ConfigParameters& deserializerConfig, CorpusDescriptorPtr corpus, bool) { string precision = deserializerConfig.Find("precision", "float"); if (!AreEqualIgnoreCase(precision, "float") && !AreEqualIgnoreCase(precision, "double")) { InvalidArgument("Unsupported precision '%s'", precision.c_str()); } // TODO: Remove type from the parser. Current implementation does not support streams of different types. if (type == L"CNTKTextFormatDeserializer") { if (precision == "float") *deserializer = new TextParser<float>(corpus, TextConfigHelper(deserializerConfig)); else // double *deserializer = new TextParser<double>(corpus, TextConfigHelper(deserializerConfig)); } else InvalidArgument("Unknown deserializer type '%ls'", type.c_str()); // Deserializer created. return true; }
// --------------------------------------------------------------------------- // main() for old CNTK config language // --------------------------------------------------------------------------- // called from wmain which is a wrapper that catches & repots Win32 exceptions int wmainOldCNTKConfig(int argc, wchar_t* argv[]) { ConfigParameters config; std::string rawConfigString = ConfigParameters::ParseCommandLine(argc, argv, config); // get the command param set they want bool timestamping = config(L"timestamping", false); if (timestamping) { ProgressTracing::SetTimestampingFlag(); } // get the command param set they want wstring logpath = config(L"stderr", L""); // [1/26/2015 erw, add done file so that it can be used on HPC] wstring DoneFile = config(L"DoneFile", L""); ConfigArray command = config(L"command", "train"); // paralleltrain training shared_ptr<Microsoft::MSR::CNTK::MPIWrapper> mpi; bool paralleltrain = config(L"parallelTrain", "false"); if (paralleltrain) mpi = MPIWrapper::GetInstance(true /*create*/); g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false); TracingGPUMemoryAllocator::SetTraceLevel(config(L"traceGPUMemoryAllocations", 0)); if (logpath != L"") { for (int i = 0; i < command.size(); i++) { logpath += L"_"; logpath += (wstring) command[i]; } logpath += L".log"; if (paralleltrain) { std::wostringstream oss; oss << mpi->CurrentNodeRank(); logpath += L"rank" + oss.str(); } RedirectStdErr(logpath); } PrintBuiltInfo(); // this one goes to log file std::string timestamp = TimeDateStamp(); // dump config info fprintf(stderr, "\n"); LOGPRINTF(stderr, "Running on %s at %s\n", GetHostName().c_str(), timestamp.c_str()); LOGPRINTF(stderr, "Command line: \n"); for (int i = 0; i < argc; i++) fprintf(stderr, "%*s%ls", i > 0 ? 2 : 0, "", argv[i]); // use 2 spaces for better visual separability fprintf(stderr, "\n\n"); #if 1 //def _DEBUG // This simply merges all the different config parameters specified (eg, via config files or via command line directly), // and prints it. fprintf(stderr, "\n\n"); LOGPRINTF(stderr, ">>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>\n"); LOGPRINTF(stderr, "%s\n", rawConfigString.c_str()); LOGPRINTF(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<\n"); // Same as above, but all variables are resolved. If a parameter is set multiple times (eg, set in config, overridden at command line), // All of these assignments will appear, even though only the last assignment matters. fprintf(stderr, "\n"); LOGPRINTF(stderr, ">>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n"); LOGPRINTF(stderr, "%s\n", config.ResolveVariables(rawConfigString).c_str()); LOGPRINTF(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n"); // This outputs the final value each variable/parameter is assigned to in config (so if a parameter is set multiple times, only the last // value it is set to will appear). fprintf(stderr, "\n"); LOGPRINTF(stderr, ">>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n"); config.dumpWithResolvedVariables(); LOGPRINTF(stderr, "<<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n"); #endif LOGPRINTF(stderr, "Commands:"); for (int i = 0; i < command.size(); i++) fprintf(stderr, " %s", command[i].c_str()); fprintf(stderr, "\n"); // run commands std::string type = config(L"precision", "float"); // accept old precision key for backward compatibility if (config.Exists("type")) InvalidArgument("CNTK: Use of 'type' parameter is deprecated, it is called 'precision' now."); LOGPRINTF(stderr, "Precision = \"%s\"\n", type.c_str()); if (type == "float") DoCommands<float>(config, mpi); else if (type == "double") DoCommands<double>(config, mpi); else RuntimeError("CNTK: Invalid precision string: \"%s\", must be \"float\" or \"double\"", type.c_str()); // if completed then write a DoneFile if requested if (!DoneFile.empty()) { FILE* fp = fopenOrDie(DoneFile.c_str(), L"w"); fprintf(fp, "successfully finished at %s on %s\n", TimeDateStamp().c_str(), GetHostName().c_str()); fcloseOrDie(fp); } // TODO: Change back to COMPLETED (no underscores) LOGPRINTF(stderr, "__COMPLETED__\n"); fflush(stderr); MPIWrapper::DeleteInstance(); return EXIT_SUCCESS; }
ImageConfigHelper::ImageConfigHelper(const ConfigParameters& config) : m_dataFormat(CHW) { std::vector<std::string> featureNames = GetSectionsWithParameter(config, "width"); std::vector<std::string> labelNames = GetSectionsWithParameter(config, "labelDim"); // REVIEW alexeyk: currently support only one feature and label section. if (featureNames.size() != 1 || labelNames.size() != 1) { RuntimeError( "ImageReader currently supports a single feature and label stream. '%d' features , '%d' labels found.", static_cast<int>(featureNames.size()), static_cast<int>(labelNames.size())); } ConfigParameters featureSection = config(featureNames[0]); size_t w = featureSection("width"); size_t h = featureSection("height"); size_t c = featureSection("channels"); std::string mbFmt = featureSection("mbFormat", "nchw"); if (AreEqualIgnoreCase(mbFmt, "nhwc") || AreEqualIgnoreCase(mbFmt, "legacy")) { m_dataFormat = HWC; } else if (!AreEqualIgnoreCase(mbFmt, "nchw") || AreEqualIgnoreCase(mbFmt, "cudnn")) { RuntimeError("ImageReader does not support the sample format '%s', only 'nchw' and 'nhwc' are supported.", mbFmt.c_str()); } auto features = std::make_shared<StreamDescription>(); features->m_id = 0; features->m_name = msra::strfun::utf16(featureSection.ConfigName()); features->m_sampleLayout = std::make_shared<TensorShape>(ImageDimensions(w, h, c).AsTensorShape(m_dataFormat)); m_streams.push_back(features); ConfigParameters label = config(labelNames[0]); size_t labelDimension = label("labelDim"); auto labelSection = std::make_shared<StreamDescription>(); labelSection->m_id = 1; labelSection->m_name = msra::strfun::utf16(label.ConfigName()); labelSection->m_sampleLayout = std::make_shared<TensorShape>(labelDimension); m_streams.push_back(labelSection); m_mapPath = config(L"file"); std::string rand = config(L"randomize", "auto"); if (AreEqualIgnoreCase(rand, "auto")) { m_randomize = true; } else if (AreEqualIgnoreCase(rand, "none")) { m_randomize = false; } else { RuntimeError("'randomize' parameter must be set to 'auto' or 'none'"); } // Identify precision string precision = config.Find("precision", "float"); if (AreEqualIgnoreCase(precision, "float")) { features->m_elementType = ElementType::tfloat; labelSection->m_elementType = ElementType::tfloat; } else if (AreEqualIgnoreCase(precision, "double")) { features->m_elementType = ElementType::tdouble; labelSection->m_elementType = ElementType::tdouble; } else { RuntimeError("Not supported precision '%s'. Expected 'double' or 'float'.", precision.c_str()); } m_cpuThreadCount = config(L"numCPUThreads", 0); }
void DSSMReader<ElemType>::InitFromConfig(const ConfigRecordType& readerConfig) { std::vector<std::wstring> features; std::vector<std::wstring> labels; // Determine the names of the features and lables sections in the config file. // features - [in,out] a vector of feature name strings // labels - [in,out] a vector of label name strings // For DSSM dataset, we only need features. No label is necessary. The following "labels" just serves as a place holder GetFileConfigNames(readerConfig, features, labels); // For DSSM dataset, it must have exactly two features // In the config file, we must specify query features first, then document features. The sequence is different here. Pay attention if (features.size() == 2 && labels.size() == 1) { m_featuresNameQuery = features[1]; m_featuresNameDoc = features[0]; m_labelsName = labels[0]; } else { RuntimeError("DSSM requires exactly two features and one label. Their names should match those in NDL definition"); return; } m_mbStartSample = m_epoch = m_totalSamples = m_epochStartSample = 0; m_labelIdMax = m_labelDim = 0; m_partialMinibatch = m_endReached = false; m_labelType = labelCategory; m_readNextSample = 0; m_traceLevel = readerConfig(L"traceLevel", 0); if (readerConfig.Exists(L"randomize")) { // BUGBUG: reading out string and number... ugh wstring randomizeString = readerConfig(L"randomize"); if (randomizeString == L"None") { m_randomizeRange = randomizeNone; } else if (randomizeString == L"Auto") { m_randomizeRange = randomizeAuto; } else { m_randomizeRange = readerConfig(L"randomize"); } } else { m_randomizeRange = randomizeNone; } std::string minibatchMode(readerConfig(L"minibatchMode", "Partial")); m_partialMinibatch = EqualCI(minibatchMode, "Partial"); // Get the config parameters for query feature and doc feature ConfigParameters configFeaturesQuery = readerConfig(m_featuresNameQuery, ""); ConfigParameters configFeaturesDoc = readerConfig(m_featuresNameDoc, ""); if (configFeaturesQuery.size() == 0) RuntimeError("features file not found, required in configuration: i.e. 'features=[file=c:\\myfile.txt;start=1;dim=123]'"); if (configFeaturesDoc.size() == 0) RuntimeError("features file not found, required in configuration: i.e. 'features=[file=c:\\myfile.txt;start=1;dim=123]'"); // Read in feature size information // This information will be used to handle OOVs m_featuresDimQuery = configFeaturesQuery(L"dim"); m_featuresDimDoc = configFeaturesDoc(L"dim"); std::wstring fileQ = configFeaturesQuery("file"); std::wstring fileD = configFeaturesDoc("file"); dssm_queryInput.Init(fileQ, m_featuresDimQuery); dssm_docInput.Init(fileD, m_featuresDimDoc); m_totalSamples = dssm_queryInput.numRows; if (read_order == NULL) { read_order = new int[m_totalSamples]; for (int c = 0; c < m_totalSamples; c++) { read_order[c] = c; } } m_mbSize = 0; }
// The whole CompositeDataReader is meant as a stopgap to allow deserializers/transformers composition until SGD talkes // directly to the new Reader API. // For more information please see its header file. // This method composes together packers + randomizer + a set of transformers and deserializers. CompositeDataReader::CompositeDataReader(const ConfigParameters& config) : m_truncationLength(0) { wstring action = config(L"action", L""); bool isActionWrite = AreEqualIgnoreCase(action, L"write"); // By default, we use numeric sequence keys (i.e., for cbf, ctf, image and base64 readers). // For MLF and HTK deserializers, we use non-numeric (string) sequence keys. bool useNumericSequenceKeys = true; if (ContainsDeserializer(config, L"HTKFeatureDeserializer") || ContainsDeserializer(config, L"HTKMLFDeserializer")) { useNumericSequenceKeys = false; } useNumericSequenceKeys = config(L"useNumericSequenceKeys", useNumericSequenceKeys); bool useHash = config(L"hashSequenceKeys", false); m_corpus = std::make_shared<CorpusDescriptor>(useNumericSequenceKeys, useHash); // Identifying packing mode. bool frameMode = config(L"frameMode", false); bool truncated = config(L"truncated", false); if (frameMode && truncated) { LogicError("frameMode and truncated BPTT are mutually exclusive."); } if (isActionWrite) // For writing we always use sequence mode. { m_packingMode = PackingMode::sequence; } else if (frameMode) { m_packingMode = PackingMode::sample; } else if (truncated) { m_packingMode = PackingMode::truncated; m_truncationLength = config(L"truncationLength", 0); if (m_truncationLength == 0) { InvalidArgument("Truncation length cannot be 0."); } } else { m_packingMode = PackingMode::sequence; } m_rightSplice = config(L"rightSplice", 0); if (m_rightSplice > m_truncationLength) InvalidArgument("rightSplice should not be greater than truncation length"); m_precision = config("precision", "float"); // Creating deserializers. bool composable = CreateDeserializers(config); if (m_deserializers.empty()) InvalidArgument("Could not find deserializers in the reader config."); if (!composable && m_deserializers.size() > 1) InvalidArgument("Currently user defined deserializers do not support composability. Please specify a single deserializer."); DataDeserializerPtr deserializer = m_deserializers.front(); if (m_deserializers.size() > 1) { // Bundling deserializers together. // Option whether we need to check data between different deserializers. bool cleanse = config(L"checkData", true); deserializer = std::make_shared<Bundler>(config, m_corpus, deserializer, m_deserializers, cleanse); } int verbosity = config(L"verbosity", 0); // Pick up the randomizer, always picking up no randomization for the write mode. bool randomize = isActionWrite ? false : config(L"randomize", true); // Get maximum number of allowed errors per worker. size_t maxErrors = config(L"maxErrors", 0); // By default do not use omp threads for deserialization of sequences. // It makes sense to put it to true for cases when deserialization is CPU intensive, // i.e. decompression of images. bool multiThreadedDeserialization = config(L"multiThreadedDeserialization", ContainsDeserializer(config, L"ImageDeserializer")); if (!composable) // Pick up simple interface. { if (randomize) { bool sampleBasedRandomizationWindow = config(L"sampleBasedRandomizationWindow", false); m_sequenceEnumerator = std::make_shared<LTTumblingWindowRandomizer>(deserializer, sampleBasedRandomizationWindow, config(L"randomizationWindow", requestDataSize), GetRandomSeed(config), multiThreadedDeserialization, maxErrors); } else m_sequenceEnumerator = std::make_shared<LTNoRandomizer>(deserializer, multiThreadedDeserialization, maxErrors); } else { if (randomize) { // By default randomizing the whole data set. size_t randomizationWindow = requestDataSize; // Currently in case of images, a single chunk is a single image. So no need to randomize, chunks will be randomized anyway. if (ContainsDeserializer(config, L"ImageDeserializer") && m_deserializers.size() == 1) { randomizationWindow = 1; m_packingMode = PackingMode::sample; } randomizationWindow = config(L"randomizationWindow", randomizationWindow); bool sampleBasedRandomizationWindow = config(L"sampleBasedRandomizationWindow", true); if (ContainsDeserializer(config, L"CNTKTextFormatDeserializer") && !config.ExistsCurrent(L"randomizationWindow")) { if (!config.ExistsCurrent(L"sampleBasedRandomizationWindow") || // sampleBasedRandomizationWindow is not specified !sampleBasedRandomizationWindow) // randomization window is in chunks { sampleBasedRandomizationWindow = false; size_t chunkSizeBytes = config(L"chunkSizeInBytes", g_32MB); // 32 MB by default randomizationWindow = g_4GB / chunkSizeBytes; // ~ 4 GB disk space worth of chunks // TODO: decrease randomization window if m_deserializers.size() > 1 ? } else { // config explicitly says to use a sample-based window, but does not specify its size. LogicError("'sampleBasedRandomizationWindow' (== 'true') requires that the 'randomizationWindow' is explicitly specified."); } } bool shouldPrefetch = true; m_sequenceEnumerator = std::make_shared<BlockRandomizer>(verbosity, randomizationWindow, deserializer, shouldPrefetch, multiThreadedDeserialization, maxErrors, sampleBasedRandomizationWindow, GetRandomSeed(config)); } else m_sequenceEnumerator = std::make_shared<NoRandomizer>(deserializer, multiThreadedDeserialization, maxErrors); } // In case when there are transforms, applying them to the data. m_sequenceEnumerator = m_transforms.empty() ? m_sequenceEnumerator : std::make_shared<TransformController>(m_transforms, m_sequenceEnumerator, multiThreadedDeserialization); // TODO: Output stream descriptions - this should come from the network so that we can check // that input matches what the network expects (including tensor shape, etc.). std::vector<StreamInformation> outputStreams = m_sequenceEnumerator->GetStreamDescriptions(); // Currently for prefetch we use two alternating buffers, // same is the default. size_t numAlternatingBuffers = 2; // Check whether to use local timeline, by default we use it for better performance. bool localTimeline = config(L"localTimeline", true); switch (m_packingMode) { case PackingMode::sample: m_packer = std::make_shared<FramePacker>( m_sequenceEnumerator, outputStreams, numAlternatingBuffers, localTimeline, m_corpus); break; case PackingMode::sequence: m_packer = std::make_shared<SequencePacker>( m_sequenceEnumerator, outputStreams, numAlternatingBuffers, localTimeline, m_corpus); break; case PackingMode::truncated: { // Currently BPTT does not support sparse format as output. // We always require dense from the packer. for (auto& s : outputStreams) s.m_storageFormat = StorageFormat::Dense; m_packer = std::make_shared<TruncatedBPTTPacker>( m_sequenceEnumerator, outputStreams, numAlternatingBuffers, m_corpus); break; } default: LogicError("Unsupported type of packer '%d'.", (int)m_packingMode); } }
// called from wmain which is a wrapper that catches & repots Win32 exceptions int wmainOldCNTKConfig(int argc, wchar_t* argv[]) { std::string timestamp = TimeDateStamp(); PrintBanner(argc, argv, timestamp); ConfigParameters config; std::string rawConfigString = ConfigParameters::ParseCommandLine(argc, argv, config); // get the command param set they want int traceLevel = config(L"traceLevel", 0); #ifndef CPUONLY ConfigValue val = config("deviceId", "auto"); if (!EqualCI(val, "cpu") && !EqualCI(val, "auto")) { if (static_cast<int>(val) >= 0) // gpu (id >= 0) { CheckSupportForGpu(static_cast<int>(val)); // throws if gpu is not supported } } #endif if (config(L"timestamping", false)) ProgressTracing::SetTimestampingFlag(); if (config(L"forceDeterministicAlgorithms", false)) Globals::ForceDeterministicAlgorithms(); // get the command param set they want wstring logpath = config(L"stderr", L""); wstring doneFile = config(L"doneFile", L""); ConfigArray command = config(L"command", "train"); // parallel training // The top-level 'parallelTrain' is a bool, not to be confused with the parallelTrain block inside SGD. shared_ptr<Microsoft::MSR::CNTK::MPIWrapper> mpi; auto ensureMPIWrapperCleanup = MakeScopeExit(&MPIWrapper::DeleteInstance); // when running under MPI with more than one node, use 'true' as the default value for parallelTrain, // 'false' otherwise. bool paralleltrain = config(L"parallelTrain", (MPIWrapper::GetTotalNumberOfMPINodes() > 1)); if (paralleltrain) { mpi = MPIWrapper::GetInstance(true /*create*/); } g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false); TracingGPUMemoryAllocator::SetTraceLevel(config(L"traceGPUMemoryAllocations", 0)); if (logpath != L"") { #if 1 // keep the ability to do it how it was done before 1.8; delete if noone needs it anymore let useOldWay = ProgressTracing::GetTimestampingFlag(); // enable it when running in our server farm if (useOldWay) { for (int i = 0; i < command.size(); i++) // append all 'command' entries { logpath += L"_"; logpath += (wstring)command[i]; } logpath += L".log"; // append .log } if (paralleltrain && useOldWay) { std::wostringstream oss; oss << mpi->CurrentNodeRank(); logpath += L"rank" + oss.str(); } else #endif // for MPI workers except main, append .rankN if (paralleltrain && mpi->CurrentNodeRank() != 0) logpath += msra::strfun::wstrprintf(L".rank%d", mpi->CurrentNodeRank()); RedirectStdErr(logpath); if (traceLevel == 0) PrintBanner(argc, argv, timestamp); // repeat simple banner into log file } // full config info if (traceLevel > 0) { PrintBuiltInfo(); PrintGpuInfo(); } #ifdef _DEBUG if (traceLevel > 0) { // This simply merges all the different config parameters specified (eg, via config files or via command line directly), // and prints it. fprintf(stderr, "\nConfiguration, Raw:\n\n"); LOGPRINTF(stderr, "%s\n", rawConfigString.c_str()); // Same as above, but all variables are resolved. If a parameter is set multiple times (eg, set in config, overridden at command line), // All of these assignments will appear, even though only the last assignment matters. fprintf(stderr, "\nConfiguration After Variable Resolution:\n\n"); LOGPRINTF(stderr, "%s\n", config.ResolveVariables(rawConfigString).c_str()); } #endif SetMathLibTraceLevel(traceLevel); // This outputs the final value each variable/parameter is assigned to in config (so if a parameter is set multiple times, only the last // value it is set to will appear). if (traceLevel > 0) { fprintf(stderr, "\nConfiguration After Processing and Variable Resolution:\n\n"); config.dumpWithResolvedVariables(); LOGPRINTF(stderr, "Commands:"); for (int i = 0; i < command.size(); i++) fprintf(stderr, " %s", command[i].c_str()); fprintf(stderr, "\n"); } // run commands std::string type = config(L"precision", "float"); // accept old precision key for backward compatibility if (config.Exists("type")) InvalidArgument("CNTK: Use of 'type' parameter is deprecated, it is called 'precision' now."); if (traceLevel > 0) { LOGPRINTF(stderr, "precision = \"%s\"\n", type.c_str()); } if (type == "float") DoCommands<float>(config, mpi); else if (type == "double") DoCommands<double>(config, mpi); else RuntimeError("CNTK: Invalid precision string: \"%s\", must be \"float\" or \"double\"", type.c_str()); // if completed then write a doneFile if requested if (!doneFile.empty()) { FILE* fp = fopenOrDie(doneFile.c_str(), L"w"); fprintf(fp, "Successfully finished at %s on %s\n", TimeDateStamp().c_str(), GetHostName().c_str()); fcloseOrDie(fp); } if (ProgressTracing::GetTimestampingFlag()) { LOGPRINTF(stderr, "__COMPLETED__\n"); // running in server environment which expects this string } else fprintf(stderr, "COMPLETED.\n"); fflush(stderr); return EXIT_SUCCESS; }
void DoWriteOutput(const ConfigParameters& config) { ConfigParameters readerConfig(config(L"reader")); readerConfig.Insert("traceLevel", config(L"traceLevel", "0")); readerConfig.Insert("randomize", "None"); // we don't want randomization when output results DataReader testDataReader(readerConfig); DEVICEID_TYPE deviceId = DeviceFromConfig(config); ConfigArray minibatchSize = config(L"minibatchSize", "2048"); wstring modelPath = config(L"modelPath"); intargvector mbSize = minibatchSize; size_t epochSize = config(L"epochSize", "0"); if (epochSize == 0) { epochSize = requestDataSize; } ConfigArray outputNodeNames = config(L"outputNodeNames", ""); vector<wstring> outputNodeNamesVector; // Note this is required since the user might specify OutputNodeNames in the config, so don't use CreateFromFile, // instead we build the network ourselves. auto net = make_shared<ComputationNetwork>(deviceId); net->Read<ElemType>(modelPath); if (outputNodeNames.size() > 0) { net->OutputNodes().clear(); for (int i = 0; i < outputNodeNames.size(); ++i) { outputNodeNamesVector.push_back(outputNodeNames[i]); net->OutputNodes().emplace_back(net->GetNodeFromName(outputNodeNames[i])); } } net->CompileNetwork(); SimpleOutputWriter<ElemType> writer(net, 1); if (config.Exists("writer")) { ConfigParameters writerConfig(config(L"writer")); bool bWriterUnittest = writerConfig(L"unittest", "false"); DataWriter testDataWriter(writerConfig); writer.WriteOutput(testDataReader, mbSize[0], testDataWriter, outputNodeNamesVector, epochSize, bWriterUnittest); } else if (config.Exists("outputPath")) { wstring outputPath = config(L"outputPath"); // gather additional formatting options typename decltype(writer)::WriteFormattingOptions formattingOptions; if (config.Exists("format")) { ConfigParameters formatConfig(config(L"format")); if (formatConfig.ExistsCurrent("type")) // do not inherit 'type' from outer block { string type = formatConfig(L"type"); if (type == "real") formattingOptions.isCategoryLabel = false; else if (type == "category") formattingOptions.isCategoryLabel = true; else InvalidArgument("write: type must be 'real' or 'category'"); if (formattingOptions.isCategoryLabel) formattingOptions.labelMappingFile = (wstring)formatConfig(L"labelMappingFile", L""); } formattingOptions.transpose = formatConfig(L"transpose", formattingOptions.transpose); formattingOptions.prologue = formatConfig(L"prologue", formattingOptions.prologue); formattingOptions.epilogue = formatConfig(L"epilogue", formattingOptions.epilogue); formattingOptions.sequenceSeparator = formatConfig(L"sequenceSeparator", formattingOptions.sequenceSeparator); formattingOptions.sequencePrologue = formatConfig(L"sequencePrologue", formattingOptions.sequencePrologue); formattingOptions.sequenceEpilogue = formatConfig(L"sequenceEpilogue", formattingOptions.sequenceEpilogue); formattingOptions.elementSeparator = formatConfig(L"elementSeparator", formattingOptions.elementSeparator); formattingOptions.sampleSeparator = formatConfig(L"sampleSeparator", formattingOptions.sampleSeparator); formattingOptions.precisionFormat = formatConfig(L"precisionFormat", formattingOptions.precisionFormat); } writer.WriteOutput(testDataReader, mbSize[0], outputPath, outputNodeNamesVector, formattingOptions, epochSize); } else InvalidArgument("write command: You must specify either 'writer'or 'outputPath'"); }
void TestConfiguration(const ConfigParameters& configBase) { ConfigParameters configMacros = configBase("macroExample"); for (auto iterMacro = configMacros.begin(); iterMacro != configMacros.end(); iterMacro++) { std::map<std::string, ConfigValue> paramsMap; ConfigParameters configCN = iterMacro->second; if (configCN.Exists("parameters")) { ConfigArray params = configCN("parameters"); for (int i = 0; i < params.size(); ++i) paramsMap[params[i]] = ConfigValue("uninitialized"); } ConfigParameters configNodes = configCN("NodeList"); for (auto iter = configNodes.begin(); iter != configNodes.end(); iter++) { std::wstring nodeName; nodeName = msra::strfun::utf16(iter->first); ConfigArray configNode = iter->second; std::string opName = configNode[0]; if (IsParameter(paramsMap, opName)) { ; } if (opName == "InputValue" && configNode.size() >= 2) { size_t rows = 0; if (!IsParameter(paramsMap, configNode[1])) rows = configNode[1]; } else if (opName == "LearnableParameter" && configNode.size() >= 3) { size_t rows = 0; if (!IsParameter(paramsMap, configNode[1])) rows = configNode[1]; size_t cols = 0; if (!IsParameter(paramsMap, configNode[2])) cols = configNode[2]; bool learningRateMultiplier = 0; bool init = false; ConfigArray initData; // look for optional parameters for (int i = 3; i < configNode.size(); ++i) { bool needsGradient = false; ConfigParameters configParam = configNode[i]; if (configParam.Exists("learningRateMultiplier")) // TODO: should this be a test for 'true' rather than Exists()? needsGradient = (float)configParam("learningRateMultiplier") > 0? true : false; else if (configParam.Exists("init")) { init = true; initData = configParam["init"]; } } // if initializing, do so now if (init) { bool uniform = true; ElemType initValueScale = 1; size_t inputSize = cols; if (initData.size() > 0) initValueScale = initData[0]; if (initData.size() > 1) uniform = EqualCI(initData[1], "uniform"); } } } // now link up all the nodes configNodes = configCN("Relation"); for (auto iter = configNodes.begin(); iter != configNodes.end(); iter++) { std::wstring nodeName = msra::strfun::utf16(iter->first); ConfigArray configNode = iter->second; int numChildren = (int) configNode.size(); for (int i = 0; i < numChildren; ++i) { std::wstring nodeName = configNode[i]; } } ConfigParameters configRoots = configCN("RootNodes"); ConfigArray configNode = configRoots("FeatureNodes"); for (size_t i = 0; i < configNode.size(); i++) { std::wstring nodeName = configNode[i]; } if (configRoots.Exists("LabelNodes")) { configNode = configRoots("LabelNodes"); for (size_t i = 0; i < configNode.size(); i++) { std::wstring nodeName = configNode[i]; } } if (configRoots.Exists("CriterionNodes")) { configNode = configRoots("CriterionNodes"); for (size_t i = 0; i < configNode.size(); i++) { std::wstring nodeName = configNode[i]; } } if (configRoots.Exists("CriteriaNodes")) // legacy { configNode = configRoots("CriteriaNodes"); for (size_t i = 0; i < configNode.size(); i++) { std::wstring nodeName = configNode[i]; } } if (configRoots.Exists("NodesReqMultiSeqHandling")) { configNode = configRoots("NodesReqMultiSeqHandling"); for (size_t i = 0; i < configNode.size(); i++) { std::wstring nodeName = configNode[i]; } fprintf(stderr, "WARNING: 'NodesReqMultiSeqHandling' flag is defunct\n"); } if (configRoots.Exists("EvalNodes")) { configNode = configRoots("EvalNodes"); for (size_t i = 0; i < configNode.size(); i++) { std::wstring nodeName = configNode[i]; } } if (configRoots.Exists("OutputNodes")) { configNode = configRoots("OutputNodes"); for (size_t i = 0; i < configNode.size(); i++) { std::wstring nodeName = configNode[i]; } } } }
int wmainOldCNTKConfig(int argc, wchar_t* argv[]) // called from wmain which is a wrapper that catches & repots Win32 exceptions { ConfigParameters config; std::string rawConfigString = ConfigParameters::ParseCommandLine(argc, argv, config); // get the command param set they want wstring logpath = config(L"stderr", L""); // [1/26/2015 erw, add done file so that it can be used on HPC] wstring DoneFile = config(L"DoneFile", L""); ConfigArray command = config(L"command", "train"); // paralleltrain training g_mpi = nullptr; bool paralleltrain = config(L"parallelTrain", "false"); if (paralleltrain) { g_mpi = new MPIWrapper(); } g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false); TracingGPUMemoryAllocator::SetTraceLevel(config(L"traceGPUMemoryAllocations", 0)); if (logpath != L"") { for (int i = 0; i < command.size(); i++) { logpath += L"_"; logpath += (wstring) command[i]; } logpath += L".log"; if (paralleltrain) { std::wostringstream oss; oss << g_mpi->CurrentNodeRank(); logpath += L"rank" + oss.str(); } RedirectStdErr(logpath); } PrintBuiltInfo(); // this one goes to log file std::string timestamp = TimeDateStamp(); // dump config info fprintf(stderr, "running on %s at %s\n", GetHostName().c_str(), timestamp.c_str()); fprintf(stderr, "command line: \n"); for (int i = 0; i < argc; i++) { fprintf(stderr, "%s ", WCharToString(argv[i]).c_str()); } // This simply merges all the different config parameters specified (eg, via config files or via command line directly), // and prints it. fprintf(stderr, "\n\n>>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>\n"); fprintf(stderr, "%s\n", rawConfigString.c_str()); fprintf(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<\n"); // Same as above, but all variables are resolved. If a parameter is set multiple times (eg, set in config, overriden at command line), // All of these assignments will appear, even though only the last assignment matters. fprintf(stderr, "\n>>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n"); fprintf(stderr, "%s\n", config.ResolveVariables(rawConfigString).c_str()); fprintf(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n"); // This outputs the final value each variable/parameter is assigned to in config (so if a parameter is set multiple times, only the last // value it is set to will appear). fprintf(stderr, "\n>>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n"); config.dumpWithResolvedVariables(); fprintf(stderr, "<<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n"); fprintf(stderr, "command: "); for (int i = 0; i < command.size(); i++) { fprintf(stderr, "%s ", command[i].c_str()); } // run commands std::string type = config(L"precision", "float"); // accept old precision key for backward compatibility if (config.Exists("type")) { type = config(L"type", "float"); } fprintf(stderr, "\nprecision = %s\n", type.c_str()); if (type == "float") { DoCommands<float>(config); } else if (type == "double") { DoCommands<double>(config); } else { RuntimeError("invalid precision specified: %s", type.c_str()); } // still here , write a DoneFile if necessary if (!DoneFile.empty()) { FILE* fp = fopenOrDie(DoneFile.c_str(), L"w"); fprintf(fp, "successfully finished at %s on %s\n", TimeDateStamp().c_str(), GetHostName().c_str()); fcloseOrDie(fp); } fprintf(stderr, "COMPLETED\n"), fflush(stderr); delete g_mpi; return EXIT_SUCCESS; }
TextConfigHelper::TextConfigHelper(const ConfigParameters& config) { if (!config.ExistsCurrent(L"input")) { RuntimeError("CNTKTextFormatReader configuration does not contain \"input\" section."); } const ConfigParameters& input = config(L"input"); if (input.empty()) { RuntimeError("CNTKTextFormatReader configuration contains an empty \"input\" section."); } string precision = config.Find("precision", "float"); if (AreEqualIgnoreCase(precision, "double")) { m_elementType = DataType::Double; } else if (AreEqualIgnoreCase(precision, "float")) { m_elementType = DataType::Float; } else { RuntimeError("Not supported precision '%s'. Expected 'double' or 'float'.", precision.c_str()); } StreamId id = 0; map<string, wstring> aliasToInputMap; for (const pair<string, ConfigParameters>& section : input) { ConfigParameters input2 = section.second; wstring name = msra::strfun::utf16(section.first); if (!input2.ExistsCurrent(L"dim") || !input2.ExistsCurrent(L"format")) { RuntimeError("Input section for input '%ls' does not specify all the required parameters, " "\"dim\" and \"format\".", name.c_str()); } StreamDescriptor stream; stream.m_id = id++; stream.m_name = name; stream.m_sampleDimension = input2(L"dim"); stream.m_definesMbSize = input2(L"definesMBSize", false); string type = input2(L"format"); if (AreEqualIgnoreCase(type, "dense")) { stream.m_storageFormat = StorageFormat::Dense; } else if (AreEqualIgnoreCase(type, "sparse")) { stream.m_storageFormat = StorageFormat::SparseCSC; if (stream.m_sampleDimension > numeric_limits<IndexType>::max()) { RuntimeError("Sample dimension (%" PRIu64 ") for sparse input '%ls'" " exceeds the maximum allowed value (%" PRIu64 ").\n", stream.m_sampleDimension, name.c_str(), (size_t)numeric_limits<IndexType>::max()); } } else { RuntimeError("'format' parameter must be set either to 'dense' or 'sparse'."); } // alias is optional if (input2.ExistsCurrent(L"alias")) { stream.m_alias = input2(L"alias"); if (stream.m_alias.empty()) { RuntimeError("Alias value for input '%ls' is empty.", name.c_str()); } } else { stream.m_alias = section.first; } if (aliasToInputMap.find(stream.m_alias) != aliasToInputMap.end()) { RuntimeError("Alias %s is already mapped to input %ls.", stream.m_alias.c_str(), aliasToInputMap[stream.m_alias].c_str()); } else { aliasToInputMap[stream.m_alias] = stream.m_name; } stream.m_elementType = m_elementType; m_streams.push_back(stream); } m_filepath = msra::strfun::utf16(config(L"file")); m_skipSequenceIds = config(L"skipSequenceIds", false); m_maxErrors = config(L"maxErrors", 0); m_traceLevel = config(L"traceLevel", 1); m_chunkSizeBytes = config(L"chunkSizeInBytes", g_32MB); // 32 MB by default m_keepDataInMemory = config(L"keepDataInMemory", false); m_frameMode = config(L"frameMode", false); m_randomizationWindow = GetRandomizationWindowFromConfig(config); m_sampleBasedRandomizationWindow = config(L"sampleBasedRandomizationWindow", false); if (!m_sampleBasedRandomizationWindow && m_randomizationWindow == randomizeAuto) { m_randomizationWindow = g_4GB / m_chunkSizeBytes; // ~ 4 GB (on disk) worth of chunks } }
CropTransformer::CropTransformer(const ConfigParameters& config) : ImageTransformerBase(config) { intargvector cropSize = config(L"cropSize", "0"); m_cropWidth = cropSize[0]; m_cropHeight = cropSize[1]; if (m_cropWidth < 0 || m_cropHeight < 0) { RuntimeError("Invalid cropSize value, must be >= 0"); } m_useSideRatio = true; floatargvector sideRatio = config(L"sideRatio", "0.0"); m_sideRatioMin = sideRatio[0]; m_sideRatioMax = sideRatio[1]; if (m_sideRatioMin == 0.0 && m_sideRatioMax == 0.0) // taking default value means not specified { m_useSideRatio = false; } else if (!(m_sideRatioMin > 0 && m_sideRatioMax <= 1.0) || m_sideRatioMin > m_sideRatioMax) { RuntimeError("Invalid sideRatio value, must be > 0 and <= 1. sideMin must <= sideMax"); } m_useAreaRatio = true; floatargvector areaRatio = config(L"areaRatio", "0.0"); m_areaRatioMin = areaRatio[0]; m_areaRatioMax = areaRatio[1]; if (m_areaRatioMin == 0.0 && m_areaRatioMax == 0.0) // taking default value means not specified { m_useAreaRatio = false; } else if (!(m_areaRatioMin > 0 && m_areaRatioMax <= 1.0) || m_areaRatioMin > m_areaRatioMax) { RuntimeError("Invalid areaRatio value, must be > 0 and <= 1. areaMin must <= areaMax"); } if (m_useSideRatio && m_useAreaRatio) RuntimeError("sideRatio and areaRatio cannot be specified simultaneously"); floatargvector aspectRatio = config(L"aspectRatio", "1.0"); m_aspectRatioMin = aspectRatio[0]; m_aspectRatioMax = aspectRatio[1]; if (!(m_aspectRatioMin > 0 && m_aspectRatioMax <= 1.0) || m_aspectRatioMin > m_aspectRatioMax) { RuntimeError("Invalid aspectRatio value, must be > 0 and <= 1. aspectMin must <= aspectMax"); } m_jitterType = ParseJitterType(config(L"jitterType", "")); m_cropType = ImageConfigHelper::ParseCropType(config(L"cropType", "")); if (!config.ExistsCurrent(L"hflip")) { m_hFlip = (m_cropType == CropType::RandomSide || m_cropType == CropType::RandomArea); } else { m_hFlip = config(L"hflip"); } // for MultiView10 we need to set m_hflip = false, otherwise we might not get 5 unflipped image (see CropTransformer::Apply below) if (m_cropType == CropType::MultiView10) { m_hFlip = false; } }
TextConfigHelper::TextConfigHelper(const ConfigParameters& config) { if (!config.ExistsCurrent(L"input")) { RuntimeError("CNTKTextFormatReader configuration does not contain \"input\" section."); } const ConfigParameters& input = config(L"input"); if (input.empty()) { RuntimeError("CNTKTextFormatReader configuration contains an empty \"input\" section."); } string precision = config.Find("precision", "float"); if (AreEqualIgnoreCase(precision, "double")) { m_elementType = ElementType::tdouble; } else if (AreEqualIgnoreCase(precision, "float")) { m_elementType = ElementType::tfloat; } else { RuntimeError("Not supported precision '%s'. Expected 'double' or 'float'.", precision.c_str()); } StreamId id = 0; map<string, wstring> aliasToInputMap; for (const pair<string, ConfigParameters>& section : input) { ConfigParameters input = section.second; wstring name = msra::strfun::utf16(section.first); if (!input.ExistsCurrent(L"dim") || !input.ExistsCurrent(L"format")) { RuntimeError("Input section for input '%ls' does not specify all the required parameters, " "\"dim\" and \"format\".", name.c_str()); } StreamDescriptor stream; stream.m_id = id++; stream.m_name = name; stream.m_sampleDimension = input(L"dim"); string type = input(L"format"); if (AreEqualIgnoreCase(type, "dense")) { stream.m_storageType = StorageType::dense; } else if (AreEqualIgnoreCase(type, "sparse")) { stream.m_storageType = StorageType::sparse_csc; if (stream.m_sampleDimension > numeric_limits<IndexType>::max()) { RuntimeError("Sample dimension (%" PRIu64 ") for sparse input '%ls'" " exceeds the maximum allowed value (%" PRIu64 ").\n", stream.m_sampleDimension, name.c_str(), (size_t)numeric_limits<IndexType>::max()); } } else { RuntimeError("'format' parameter must be set either to 'dense' or 'sparse'."); } // alias is optional if (input.ExistsCurrent(L"alias")) { stream.m_alias = input(L"alias"); if (stream.m_alias.empty()) { RuntimeError("Alias value for input '%ls' is empty.", name.c_str()); } } else { stream.m_alias = section.first; } if (aliasToInputMap.find(stream.m_alias) != aliasToInputMap.end()) { RuntimeError("Alias %s is already mapped to input %ls.", stream.m_alias.c_str(), aliasToInputMap[stream.m_alias].c_str()); } else { aliasToInputMap[stream.m_alias] = stream.m_name; } stream.m_elementType = m_elementType; m_streams.push_back(stream); } m_filepath = msra::strfun::utf16(config(L"file")); if (config.Exists(L"randomize")) { wstring randomizeString = config.CanBeString(L"randomize") ? config(L"randomize") : wstring(); if (!_wcsicmp(randomizeString.c_str(), L"none")) { m_randomizationWindow = randomizeNone; } else if (!_wcsicmp(randomizeString.c_str(), L"auto")) { m_randomizationWindow = randomizeAuto; } else { m_randomizationWindow = config(L"randomize"); } } else { m_randomizationWindow = randomizeAuto; } m_skipSequenceIds = config(L"skipSequenceIds", false); m_maxErrors = config(L"maxErrors", 0); m_traceLevel = config(L"traceLevel", 0); m_chunkSizeBytes = config(L"chunkSizeInBytes", 32 * 1024 * 1024); // 32 MB by default m_chunkCacheSize = config(L"numChunksToCache", 32); // 32 * 32 MB = 1 GB of memory in total }