コード例 #1
0
// Create deserializers based on the specified configuration. 
// deserializers = [
//        [ type = "ImageDataDeserializer" module = "ImageReader" ...]
//        [ type = "CNTKTextFormatDeserializer" module = "CNTKTextFormatReader" ...]
bool CompositeDataReader::CreateDeserializers(const ConfigParameters& readerConfig)
{
    argvector<ConfigValue> deserializerConfigs =
        readerConfig(L"deserializers", ConfigParameters::Array(argvector<ConfigValue>(vector<ConfigValue> {})));

    assert(m_deserializers.empty());

    auto traceLevel = readerConfig.Find("traceLevel");
    bool composable = true;

    bool primary = true;  // Currently, the first deserializer becomes primary - it drives chunking.
    for (size_t i = 0; i < deserializerConfigs.size(); ++i)
    {
        // TODO: Should go away in the future. Framing can be done on top of deserializers.
        ConfigParameters p = deserializerConfigs[i];
        p.Insert("frameMode", m_packingMode == PackingMode::sample ? "true" : "false");
        p.Insert("precision", m_precision);
        if (!traceLevel.empty()) 
        {
            p.Insert("traceLevel", traceLevel);
        }

        composable &= p(L"composable", true);
        DataDeserializerPtr d = CreateDeserializer(p, primary);
        primary = false;
        m_deserializers.push_back(d);
    }
    return composable;
}
コード例 #2
0
    CompositeMinibatchSource::CompositeMinibatchSource(const MinibatchSourceConfig& configuration)
        : m_epochEndReached(false),
          m_prevMinibatchSize(0),
          m_maxNumSamplesToRead(configuration.maxSamples),
          m_maxNumSweepsToRead(configuration.maxSweeps),
          m_truncationLength(0),
          m_numWorkers(1),
          m_workerRank(0),
          m_restorePosition(0)
    {
        m_truncationLength = configuration.truncationLength;

        auto augmentedConfiguration = Internal::ToDictionary(configuration);

        ConfigParameters config;
        std::wstringstream s;
        for (const auto& keyValuePair : *(augmentedConfiguration.m_dictionaryData))
            AddConfigString(s, keyValuePair.first, keyValuePair.second, 0);

        config.Parse(msra::strfun::utf8(s.str()));

        typedef Reader*(*CreateCompositeDataReaderProc)(const ConfigParameters* parameters);
        CreateCompositeDataReaderProc createReaderProc = (CreateCompositeDataReaderProc)Plugin().Load(L"CompositeDataReader", "CreateCompositeDataReader");
        std::shared_ptr<Microsoft::MSR::CNTK::Reader> compositeDataReader(createReaderProc(&config));

        m_compositeDataReaderStreamDescs = compositeDataReader->GetStreamDescriptions();
        for (auto streamDesc : m_compositeDataReaderStreamDescs)
            m_streamInfos.insert({ streamDesc->m_name, streamDesc->m_id, AsStorageFormat(streamDesc->m_storageType), AsDataType(streamDesc->m_elementType), AsNDShape(*(streamDesc->m_sampleLayout)) });

        m_shim = std::shared_ptr<ReaderShim<float>>(new ReaderShim<float>(compositeDataReader), [](ReaderShim<float>* x) { x->Destroy(); });
        m_shim->Init(config);
    }
コード例 #3
0
ファイル: CompositeDataReader.cpp プロジェクト: StetHD/CNTK
void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerConfig)
{
    std::string defaultModule = deserializerConfig("module");
    argvector<ConfigParameters> inputs = deserializerConfig("input");
    for (size_t i = 0; i < inputs.size(); ++i)
    {
        // Trying to find transfomers in a stream section of the config.
        auto inputSections = TryGetSectionsWithParameter(inputs[i], "transforms");
        if (inputSections.size() > 1)
        {
            LogicError("Only a single 'transforms' config is allowed per stream.");
        }

        // No need to create anything for this stream, skipping.
        if (inputSections.empty())
        {
            continue;
        }

        ConfigParameters input = inputs[i](inputSections.front());
        std::wstring inputName = msra::strfun::utf16(input.ConfigName());

        // Read tranformers in order and appending them to the transformer pipeline.
        argvector<ConfigParameters> transforms = input("transforms");
        for (size_t j = 0; j < transforms.size(); ++j)
        {
            TransformerPtr transformer = CreateTransformer(transforms[j], defaultModule);
            m_transforms.push_back(Transformation{transformer, inputName});
        }
    }

}
コード例 #4
0
ファイル: HTKMLFWriter.cpp プロジェクト: 1132520084/CNTK
void HTKMLFWriter<ElemType>::InitFromConfig(const ConfigRecordType& writerConfig)
{
    m_tempArray = nullptr;
    m_tempArraySize = 0;
    m_overflowWarningCount = 0;

    vector<wstring> scriptpaths;
    vector<wstring> filelist;
    size_t numFiles;
    size_t firstfilesonly = SIZE_MAX; // set to a lower value for testing

    m_verbosity = writerConfig(L"verbosity", 2);
    m_overflowValue = writerConfig(L"overflowValue", 50);
    m_maxNumOverflowWarning = writerConfig(L"maxNumOverflowWarning", 10);

    vector<wstring> outputNames = writerConfig(L"outputNodeNames", ConfigRecordType::Array(stringargvector()));
    if (outputNames.size() < 1)
        RuntimeError("writer needs at least one outputNodeName specified in config");
    int counter = 0;
    foreach_index (i, outputNames) // inputNames should map to node names
    {
        ConfigParameters thisOutput = writerConfig(outputNames[i]);

        if (thisOutput.Exists("dim"))
            udims.push_back(thisOutput(L"dim"));
        else
            RuntimeError("HTKMLFWriter::Init: writer need to specify dim of output");
        if (thisOutput.Exists("file"))
            scriptpaths.push_back(thisOutput(L"file"));
        else if (thisOutput.Exists("scpFile"))
            scriptpaths.push_back(thisOutput(L"scpFile"));
        else
            RuntimeError("HTKMLFWriter::Init: writer needs to specify scpFile for output");

        if (thisOutput.Exists("Kaldicmd"))
        {
            kaldicmd.push_back(thisOutput(L"Kaldicmd"));
            kaldi::BaseFloatMatrixWriter wfea;
            feature_writer.push_back(wfea);
            feature_writer[i].Open(msra::strfun::utf8(kaldicmd[counter]));
        }

        outputNameToIdMap[outputNames[i]] = i;
        outputNameToDimMap[outputNames[i]] = udims[i];
        wstring type = thisOutput(L"type", "Real");
        if (type == L"Real")
        {
            outputNameToTypeMap[outputNames[i]] = OutputTypes::outputReal;
        }
        else
        {
            throw std::runtime_error("HTKMLFWriter::Init: output type for writer output expected to be Real");
        }
        counter++;
    }
コード例 #5
0
ファイル: tests.cpp プロジェクト: 1132520084/CNTK
void TestSequenceReader(const ConfigParameters& configBase)
{
    // int nonexistant = configBase("nonexistant");  // use to test global exception handler
    ConfigParameters config = configBase("sequenceTest");

    size_t mbSize = config("minibatchSize");
    size_t epochSize = config("epochSize", "0");
    if (epochSize == 0)
    {
        epochSize = requestDataSize;
    }

    for (int fileType = 0; fileType < 2; ++fileType)
    {
        ConfigParameters readerConfig = config(fileType ? "readerSequence" : "readerSentence");
        readerConfig.Insert("traceLevel", config("traceLevel", "0"));

        std::vector<std::wstring> featureNames;
        std::vector<std::wstring> labelNames;
        GetFileConfigNames(readerConfig, featureNames, labelNames);

        DataReader dataReader(readerConfig);

        // get names of features and labels
        std::vector<std::wstring> files;
        files.push_back(readerConfig(L"file"));

        // setup minibatch matrices
        auto featuresMatrix = make_shared<Matrix<ElemType>>();
        auto labelsMatrix   = make_shared<Matrix<ElemType>>();
        MBLayoutPtr pMBLayout = make_shared<MBLayout>();
        StreamMinibatchInputs matrices;
        matrices.AddInput(featureNames[0], featuresMatrix, pMBLayout, TensorShape());
        matrices.AddInput(labelNames[1]  , labelsMatrix  , pMBLayout, TensorShape());

        auto start = std::chrono::system_clock::now();
        int epochs = config("maxEpochs");
        epochs *= 2;
        for (int epoch = 0; epoch < epochs; epoch++)
        {
            dataReader.StartMinibatchLoop(mbSize, epoch, epochSize);
            for (int i = 0; dataReader.GetMinibatch(matrices); i++)
            {
                auto& features = matrices.GetInputMatrix<ElemType>(featureNames[0]);
                auto& labels   = matrices.GetInputMatrix<ElemType>(labelNames[1]);
                fprintf(stderr, "%4d: features dim: %lu x %lu - [%.8g, %.8g, ...] label dim: %d x %d - [%d, %d, ...]\n", i, features.GetNumRows(), features.GetNumCols(), features(0, 0), features(0, 1), labels.GetNumRows(), labels.GetNumCols(), (int) labels(0, 0), (int) labels(0, 1));
            }
        }
        auto end = std::chrono::system_clock::now();
        auto elapsed = end - start;
        fprintf(stderr, "%f seconds elapsed", (float) (std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count()) / 1000);
    }
}
コード例 #6
0
ファイル: CNTKEval.cpp プロジェクト: fly-fisher/CNTK
void CNTKEvalBase<ElemType>::CreateNetwork(const std::string& networkDescription)
{
    ConfigParameters config;
    config.Parse(networkDescription);

    std::vector<wstring> outputNodeNames;
    this->m_net = GetModelFromConfig<ConfigParameters, ElemType>(config, L"outputNodeNames", outputNodeNames);
    
    if (this->m_net == nullptr)
    {
        LogicError("Unable to construct network from description");
    }
}
コード例 #7
0
    ImageDeserializerBase::ImageDeserializerBase(CorpusDescriptorPtr corpus, const ConfigParameters& config, bool primary)
        : DataDeserializerBase(primary),
          m_corpus(corpus)
    {
        assert(m_corpus);

        ConfigParameters inputs = config("input");
        std::vector<std::string> featureNames = GetSectionsWithParameter("ImageDeserializerBase", inputs, "transforms");
        std::vector<std::string> labelNames = GetSectionsWithParameter("ImageDeserializerBase", inputs, "labelDim");

        if (featureNames.size() != 1 || labelNames.size() != 1)
            RuntimeError(
            "Please specify a single feature and label stream. '%d' features , '%d' labels found.",
            static_cast<int>(featureNames.size()),
            static_cast<int>(labelNames.size()));

        string precision = config("precision", "float");
        m_precision = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble;
        m_verbosity = config(L"verbosity", 0);

        // Feature stream.
        ConfigParameters featureSection = inputs(featureNames[0]);
        auto features = std::make_shared<StreamDescription>();
        features->m_id = 0;
        features->m_name = msra::strfun::utf16(featureSection.ConfigName());
        features->m_storageType = StorageType::dense;
        // Due to performance, now we support images of different types.
        features->m_elementType = ElementType::tvariant;
        m_streams.push_back(features);

        // Label stream.
        ConfigParameters label = inputs(labelNames[0]);
        size_t labelDimension = label("labelDim");
        auto labels = std::make_shared<StreamDescription>();
        labels->m_id = 1;
        labels->m_name = msra::strfun::utf16(label.ConfigName());
        labels->m_sampleLayout = std::make_shared<TensorShape>(labelDimension);
        labels->m_storageType = StorageType::sparse_csc;
        labels->m_elementType = m_precision;
        m_streams.push_back(labels);

        m_labelGenerator = labels->m_elementType == ElementType::tfloat ?
            (LabelGeneratorPtr)std::make_shared<TypedLabelGenerator<float>>(labelDimension) :
            std::make_shared<TypedLabelGenerator<double>>(labelDimension);

        m_grayscale = config(L"grayscale", false);

        // TODO: multiview should be done on the level of randomizer/transformers - it is responsiblity of the
        // TODO: randomizer to collect how many copies each transform needs and request same sequence several times.
        m_multiViewCrop = config(L"multiViewCrop", false);
    }
コード例 #8
0
ファイル: singleTest.C プロジェクト: MDallOsso/pxar
// ----------------------------------------------------------------------
// simple macro to illustrate how to call a test in a macro environment
// test parameters are changed from the macro
void singleTest(string testname = "PixelAlive", string rootfilename = "pixelalive.root", string cfgdirectory = "../data/defaultParametersRocPSI46digV2") {
  ConfigParameters *configParameters = ConfigParameters::Singleton();

  configParameters->setDirectory(cfgdirectory);
  string cfgFile = configParameters->getDirectory() + string("/configParameters.dat");
  configParameters->readConfigParameterFile(cfgFile);

  string rootfile = rootfilename;
  
  PixTestParameters *ptp = new PixTestParameters(configParameters->getDirectory() + "/" + configParameters->getTestParameterFileName()); 

  PixSetup *ap = new PixSetup("DEBUG", ptp, configParameters);  

  cout << "pxar: dumping results into " << rootfile << endl;
  TFile *rfile = TFile::Open(rootfile.c_str(), "RECREATE"); 
  
  PixTestFactory *factory = PixTestFactory::instance(); 
  
  PixTest *pt = factory->createTest(testname, ap); 

  if (!pt->getName().compare("PixelAlive")) {
    pt->setParameter("Ntrig", "10"); 
    pt->doTest();

    pt->setParameter("Ntrig", "20"); 
    pt->doTest();
  }
  
  if (!pt->getName().compare("Ph")) {
    pt->setParameter("Ntrig", "2"); 
    pt->setParameter("DAC", "Vcal"); 
    pt->setParameter("DacVal", "200"); 
    pt->dumpParameters(); 
    pt->doTest();
    
    pt->setParameter("PIX", "reset"); 
    pt->setParameter("Ntrig", "4"); 
    pt->setParameter("DacVal", "250"); 
    pt->setParameter("PIX", "45,45"); 
    pt->dumpParameters(); 
    pt->doTest();

  }


  delete pt; 

  rfile->Close();

  ap->killApi();
}
コード例 #9
0
ファイル: CNTKCommandEval.cpp プロジェクト: delpart/CNTK
int wmain(int argc, wchar_t* argv[])
{
    try
    {
        ConfigParameters config;
        ConfigParameters::ParseCommandLine(argc, argv, config);

        // get the command param set they want
        wstring logpath = config("stderr", L"");
        ConfigArray command = config("command", "train");

        // dump config info
        fprintf(stderr, "command: ");
        for (int i = 0; i < command.size(); i++)
        {
            fprintf(stderr, "%s ", command[i].c_str());
        }

        // run commands
        std::string type = config("precision", "float");
        // accept old precision key for backward compatibility
        if (config.Exists("type"))
            type = config("type", "float");
        fprintf(stderr, "\nprecision = %s\n", type.c_str());
        if (type == "float")
            DoCommand<float>(config);
        else if (type == "double")
            DoCommand<double>(config);
        else
            RuntimeError("invalid precision specified: %s", type.c_str());
    }
    catch (std::exception& err)
    {
        fprintf(stderr, "EXCEPTION occurred: %s", err.what());
        Microsoft::MSR::CNTK::DebugUtil::PrintCallStack();
#ifdef _DEBUG
        DebugBreak();
#endif
        return -1;
    }
    catch (...)
    {
        fprintf(stderr, "Unknown ERROR occurred");
        Microsoft::MSR::CNTK::DebugUtil::PrintCallStack();
#ifdef _DEBUG
        DebugBreak();
#endif
        return -1;
    }
    return 0;
}
コード例 #10
0
void CropTransformer::InitFromConfig(const ConfigParameters &config)
{
    floatargvector cropRatio = config(L"cropRatio", "1.0");
    m_cropRatioMin = cropRatio[0];
    m_cropRatioMax = cropRatio[1];

    if (!(0 < m_cropRatioMin && m_cropRatioMin <= 1.0) ||
        !(0 < m_cropRatioMax && m_cropRatioMax <= 1.0) ||
        m_cropRatioMin > m_cropRatioMax)
    {
        RuntimeError("Invalid cropRatio value, must be > 0 and <= 1. cropMin must "
                     "<= cropMax");
    }

    m_jitterType = ParseJitterType(config(L"jitterType", ""));

    if (!config.ExistsCurrent(L"hflip"))
    {
        m_hFlip = m_imageConfig->GetCropType() == CropType::Random;
    }
    else
    {
        m_hFlip = config(L"hflip");
    }

    m_aspectRatioRadius = config(L"aspectRatioRadius", ConfigParameters::Array(doubleargvector(vector<double>{0.0})));
}
コード例 #11
0
ファイル: ImageTransformers.cpp プロジェクト: 6779660/CNTK
void CropTransformer::InitFromConfig(const ConfigParameters &config)
{
    m_cropType = ParseCropType(config(L"cropType", ""));

    floatargvector cropRatio = config(L"cropRatio", "1.0");
    m_cropRatioMin = cropRatio[0];
    m_cropRatioMax = cropRatio[1];

    if (!(0 < m_cropRatioMin && m_cropRatioMin <= 1.0) ||
        !(0 < m_cropRatioMax && m_cropRatioMax <= 1.0) ||
        m_cropRatioMin > m_cropRatioMax)
    {
        RuntimeError("Invalid cropRatio value, must be > 0 and <= 1. cropMin must "
                     "<= cropMax");
    }

    m_jitterType = ParseJitterType(config(L"jitterType", ""));

    if (!config.ExistsCurrent(L"hflip"))
    {
        m_hFlip = m_cropType == CropType::Random;
    }
    else
    {
        m_hFlip = config(L"hflip");
    }
}
コード例 #12
0
ファイル: TrainActions.cpp プロジェクト: hahatt/CNTK
void DoEdit(const ConfigParameters& config)
{
    // BrainScript editing
    if (config.Exists(L"BrainScriptNetworkBuilder"))
    {
        bool makeMode = config(L"makeMode", true);
        wstring outputPathname = config(L"outputModelPath");
        // in makeMode, if output file exists, we are done
        if (makeMode && File::Exists(outputPathname))
        {
            LOGPRINTF(stderr, "'%ls' exists, skipping. Specify makeMode=false to force executing the action.\n", outputPathname.c_str());
            return;
        }
        DEVICEID_TYPE deviceId = DeviceFromConfig(config);
        let createNetworkFn = GetNetworkFactory<ConfigParameters, ElemType>(config);
        let net = createNetworkFn(deviceId);
        net->Save(outputPathname);
        LOGPRINTF(stderr, "\nModel with %d nodes saved as '%ls'.\n", (int)net->GetTotalNumberOfNodes(), outputPathname.c_str());
        return;
    }
    // legacy model editing
    wstring editPath = config(L"editPath");
    wstring ndlMacros = config(L"ndlMacros", "");
    NDLScript<ElemType> ndlScript;
    if (!ndlMacros.empty())
    {
        ndlScript.LoadConfigFile(ndlMacros);
    }
    MELScript<ElemType> melScript;
    melScript.LoadConfigFileAndResolveVariables(editPath, config);
}
コード例 #13
0
ファイル: phOpt.C プロジェクト: MDallOsso/pxar
// ----------------------------------------------------------------------
// create PH vs VCal scans for a grid of phscale and phoffset values
void phOpt(string rootfile = "phOpt.root", string cfgdirectory = "testROC") {
  ConfigParameters *configParameters = ConfigParameters::Singleton();
  
  configParameters->setDirectory(cfgdirectory);
  string cfgFile = configParameters->getDirectory() + string("/configParameters.dat");
  configParameters->readConfigParameterFile(cfgFile);

  
  PixTestParameters *ptp = new PixTestParameters(configParameters->getDirectory() + "/" + configParameters->getTestParameterFileName()); 

  PixSetup *ap = new PixSetup("DEBUG", ptp, configParameters);  

  cout << "pxar: dumping results into " << rootfile << endl;
  TFile *rfile = TFile::Open(rootfile.c_str(), "RECREATE"); 
  
  PixTestFactory *factory = PixTestFactory::instance(); 
  
  PixTest *pt = factory->createTest("DacScan", ap); 
  pt->setDAC("ctrlreg", 4); 
  pt->setParameter("PHmap", "1"); 
  pt->setParameter("DAC", "Vcal"); 
  pt->setParameter("DACLO", "0"); 
  pt->setParameter("DACHI", "255"); 

  int cycle(0);
  TH1D *h1(0); 
  for (unsigned int io = 0; io < 26; ++io) {
    for (unsigned int is = 0; is < 52; ++is) {
      pt->setDAC("phoffset", io*10);
      pt->setDAC("phscale", is*5);
      pt->doTest(); 
      h1 = (TH1D*)rfile->Get(Form("DacScan/ph_Vcal_c11_r20_C0_V%d", cycle)); 
      h1->SetTitle(Form("ph_Vcal_c11_r20_C0_V%d phscale=%d phoffset=%d", cycle, is*5, io*10));
      ++cycle;
    }
  }
  rfile->Print();
  delete pt; 

  rfile->Close();

  ap->killApi();

}
コード例 #14
0
ファイル: EvalActions.cpp プロジェクト: junjieqian/CNTK
void DoWriteOutput(const ConfigParameters& config)
{
    ConfigParameters readerConfig(config(L"reader"));
    readerConfig.Insert("randomize", "None"); // we don't want randomization when output results

    DataReader testDataReader(readerConfig);

    ConfigArray minibatchSize = config(L"minibatchSize", "2048");
    intargvector mbSize = minibatchSize;

    size_t epochSize = config(L"epochSize", "0");
    if (epochSize == 0)
    {
        epochSize = requestDataSize;
    }

    vector<wstring> outputNodeNamesVector;

    let net = GetModelFromConfig<ConfigParameters, ElemType>(config, L"outputNodeNames", outputNodeNamesVector);

    // set tracing flags
    net->EnableNodeTracing(config(L"traceNodeNamesReal",     ConfigParameters::Array(stringargvector())),
                           config(L"traceNodeNamesCategory", ConfigParameters::Array(stringargvector())),
                           config(L"traceNodeNamesSparse",   ConfigParameters::Array(stringargvector())));

    SimpleOutputWriter<ElemType> writer(net, 1);

    if (config.Exists("writer"))
    {
        ConfigParameters writerConfig(config(L"writer"));
        bool writerUnittest = writerConfig(L"unittest", "false");
        DataWriter testDataWriter(writerConfig);
        writer.WriteOutput(testDataReader, mbSize[0], testDataWriter, outputNodeNamesVector, epochSize, writerUnittest);
    }
    else if (config.Exists("outputPath"))
    {
        wstring outputPath = config(L"outputPath");
        WriteFormattingOptions formattingOptions(config);
        bool nodeUnitTest = config(L"nodeUnitTest", "false");
        writer.WriteOutput(testDataReader, mbSize[0], outputPath, outputNodeNamesVector, formattingOptions, epochSize, nodeUnitTest);
    }
    else
        InvalidArgument("write command: You must specify either 'writer'or 'outputPath'");
}
コード例 #15
0
// Create transformers based on the configuration, i.e.
// deserializers = [
//     [
//         type = "ImageDataDeserializer"
//         module = "ImageReader"
//         input = [
//               features = [
//---->              transforms = [
//                       [type = "Crop"]:[type = "Scale"]...
void CompositeDataReader::CreateTransforms(const ConfigParameters& deserializerConfig)
{
    std::string defaultModule = deserializerConfig("module");
    if (!deserializerConfig.Exists("input"))
        return;

    const ConfigParameters& inputs = deserializerConfig("input");
    for (const pair<string, ConfigParameters>& section : inputs)
    {
        ConfigParameters inputBody = section.second;

        // Trying to find transforms in the input section of the config.
        if (inputBody.find("transforms") == inputBody.end())
            continue;

        std::wstring inputName = Microsoft::MSR::CNTK::ToFixedWStringFromMultiByte(section.first);

        // Read transformers in order and appending them to the transformer pipeline.
        argvector<ConfigParameters> transforms = inputBody("transforms");
        for (size_t j = 0; j < transforms.size(); ++j)
        {
            ConfigParameters p = transforms[j];
            p.Insert("precision", deserializerConfig("precision"));

            TransformerPtr transformer = CreateTransformer(p, defaultModule, std::wstring());
            m_transforms.push_back(Transformation{ transformer, inputName });
        }

        // Let's add a cast transformer by default. It is noop if the type provided by others is float
        // or double, but will do a proper cast if the type is uchar.
        auto cast = CreateTransformer(inputBody, defaultModule, std::wstring(L"Cast"));
        m_transforms.push_back(Transformation{ cast, inputName });
    }
}
コード例 #16
0
ファイル: CNTK.cpp プロジェクト: hahatt/CNTK
// special temporary function to guard against a now invalid usage of "truncated" which exists in some IPG production setups
static void DisableLegacyTruncationSettings(const ConfigParameters& TopLevelConfig, const ConfigParameters& commandConfig)
{
    if (TopLevelConfig.ExistsCurrent(L"Truncated"))
    {
        return;
    }

    // if any of the action has set a reader/SGD section and has different Truncated value for reader and SGD section
    ConfigArray actions = commandConfig(L"action");
    for (size_t i = 0; i < actions.size(); i++)
    {
        if (actions[i] == "train" || actions[i] == "trainRNN")
        {
            ConfigParameters sgd = ConfigParameters(commandConfig(L"SGD"));
            ConfigParameters reader = ConfigParameters(commandConfig(L"reader"));
            // reader and SGD sections are two must-have sections in train/trainRNN
            if (reader.ExistsCurrent(L"Truncated") && !sgd.ExistsCurrent(L"Truncated"))
            {
                InvalidArgument("DisableLegacyUsage: setting Truncated only in reader section are not allowed. Please move Truncated=true/false to the top level section.");
            }
        }
    }
}
コード例 #17
0
ファイル: tests.cpp プロジェクト: 6779660/CNTK
void TestBing(const ConfigParameters& config)
{
    if (!config.Exists("train.set"))
    {
        std::cout<<"USAGE: cn.exe train.set featureDim networkDescription learnRatesPerMB mbSize epochSize maxEpochs outdir test.set test.set.size"<<endl;
        exit(0);
    }

    size_t vdim = config("featureDim");
    size_t udim = 1;
    vector<wstring> filepaths;
    filepaths.push_back(config("train.set"));

    DataReader<ElemType> dataReader(vdim, udim, filepaths, config);
    ConfigArray layerSizes(config("networkDescription"));
    SimpleNetworkBuilder<ElemType> netBuilder(layerSizes, TrainingCriterion::SquareError, EvalCriterion::SquareError, L"Sigmoid", true, false, false, &dataReader);


    ConfigArray learnRatesPerMB(config("learnRatesPerMB"));
    ConfigArray mbSize(config("mbSize"));
    size_t epochSize = config("epochSize");
    size_t maxEpochs = config("maxEpochs");
    float momentumPerMB = 0.9;//0.9f;
    std::string outDir = config("outdir");
    wstring modelPath = wstring(msra::strfun::utf16(outDir)).append(L"\\bingranknet.dnn");

    SimpleSGD<ElemType> sgd(learnRatesPerMB, mbSize, epochSize, maxEpochs, modelPath, momentumPerMB);
    sgd.Train(netBuilder, dataReader, true);

    std::cout<<std::endl<<std::endl<<std::endl<<std::endl<<"Testing ..... "<<std::endl;

    // test
    vector<wstring> testfilepaths;
    testfilepaths.push_back( config("test.set"));
    size_t testSize = config("test.set.size");
    DataReader<ElemType> testDataReader(vdim, udim, testfilepaths, config);

    wstring finalNetPath = modelPath.append(L".").append(to_wstring(maxEpochs-1));

    SimpleEvaluator<ElemType> eval(netBuilder.LoadNetworkFromFile(finalNetPath, false));
    eval.Evaluate(testDataReader, 1024, (finalNetPath.append(L".results.txt")).c_str(),testSize);
}
コード例 #18
0
ファイル: Exports.cpp プロジェクト: 1132520084/CNTK
// TODO: Not safe from the ABI perspective. Will be uglified to make the interface ABI.
// A factory method for creating text deserializers.
extern "C" DATAREADER_API bool CreateDeserializer(IDataDeserializer** deserializer, const std::wstring& type, const ConfigParameters& deserializerConfig, CorpusDescriptorPtr corpus, bool)
{
    string precision = deserializerConfig.Find("precision", "float");
    if (!AreEqualIgnoreCase(precision, "float") && !AreEqualIgnoreCase(precision, "double"))
    {
        InvalidArgument("Unsupported precision '%s'", precision.c_str());
    }

    // TODO: Remove type from the parser. Current implementation does not support streams of different types.
    if (type == L"CNTKTextFormatDeserializer")
    {
        if (precision == "float")
            *deserializer = new TextParser<float>(corpus, TextConfigHelper(deserializerConfig));
        else // double
            *deserializer = new TextParser<double>(corpus, TextConfigHelper(deserializerConfig));
    }
    else
        InvalidArgument("Unknown deserializer type '%ls'", type.c_str());

    // Deserializer created.
    return true;
}
コード例 #19
0
ファイル: CNTK.cpp プロジェクト: AltasK/CNTK
// ---------------------------------------------------------------------------
// main() for old CNTK config language
// ---------------------------------------------------------------------------
// called from wmain which is a wrapper that catches & repots Win32 exceptions
int wmainOldCNTKConfig(int argc, wchar_t* argv[])
{
    ConfigParameters config;
    std::string rawConfigString = ConfigParameters::ParseCommandLine(argc, argv, config);    // get the command param set they want
    bool timestamping = config(L"timestamping", false);
    if (timestamping)
    {
        ProgressTracing::SetTimestampingFlag();
    }

    // get the command param set they want
    wstring logpath = config(L"stderr", L"");

    //  [1/26/2015 erw, add done file so that it can be used on HPC]
    wstring DoneFile = config(L"DoneFile", L"");
    ConfigArray command = config(L"command", "train");

    // paralleltrain training
    shared_ptr<Microsoft::MSR::CNTK::MPIWrapper> mpi;
    bool paralleltrain = config(L"parallelTrain", "false");
    if (paralleltrain)
        mpi = MPIWrapper::GetInstance(true /*create*/);

    g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false);

    TracingGPUMemoryAllocator::SetTraceLevel(config(L"traceGPUMemoryAllocations", 0));

    if (logpath != L"")
    {
        for (int i = 0; i < command.size(); i++)
        {
            logpath += L"_";
            logpath += (wstring) command[i];
        }
        logpath += L".log";

        if (paralleltrain)
        {
            std::wostringstream oss;
            oss << mpi->CurrentNodeRank();
            logpath += L"rank" + oss.str();
        }
        RedirectStdErr(logpath);
    }

    PrintBuiltInfo(); // this one goes to log file
    std::string timestamp = TimeDateStamp();

    // dump config info
    fprintf(stderr, "\n");
    LOGPRINTF(stderr, "Running on %s at %s\n", GetHostName().c_str(), timestamp.c_str());
    LOGPRINTF(stderr, "Command line: \n");
    for (int i = 0; i < argc; i++)
        fprintf(stderr, "%*s%ls", i > 0 ? 2 : 0, "", argv[i]); // use 2 spaces for better visual separability
    fprintf(stderr, "\n\n");

#if 1 //def _DEBUG
    // This simply merges all the different config parameters specified (eg, via config files or via command line directly),
    // and prints it.
    fprintf(stderr, "\n\n");
    LOGPRINTF(stderr, ">>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>\n");
    LOGPRINTF(stderr, "%s\n", rawConfigString.c_str());
    LOGPRINTF(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<\n");

    // Same as above, but all variables are resolved.  If a parameter is set multiple times (eg, set in config, overridden at command line),
    // All of these assignments will appear, even though only the last assignment matters.
    fprintf(stderr, "\n");
    LOGPRINTF(stderr, ">>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n");
    LOGPRINTF(stderr, "%s\n", config.ResolveVariables(rawConfigString).c_str());
    LOGPRINTF(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n");

    // This outputs the final value each variable/parameter is assigned to in config (so if a parameter is set multiple times, only the last
    // value it is set to will appear).
    fprintf(stderr, "\n");
    LOGPRINTF(stderr, ">>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n");
    config.dumpWithResolvedVariables();
    LOGPRINTF(stderr, "<<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n");
#endif

    LOGPRINTF(stderr, "Commands:");
    for (int i = 0; i < command.size(); i++)
        fprintf(stderr, " %s", command[i].c_str());
    fprintf(stderr, "\n");

    // run commands
    std::string type = config(L"precision", "float");
    // accept old precision key for backward compatibility
    if (config.Exists("type"))
        InvalidArgument("CNTK: Use of 'type' parameter is deprecated, it is called 'precision' now.");

    LOGPRINTF(stderr, "Precision = \"%s\"\n", type.c_str());

    if (type == "float")
        DoCommands<float>(config, mpi);
    else if (type == "double")
        DoCommands<double>(config, mpi);
    else
        RuntimeError("CNTK: Invalid precision string: \"%s\", must be \"float\" or \"double\"", type.c_str());

    // if completed then write a DoneFile if requested
    if (!DoneFile.empty())
    {
        FILE* fp = fopenOrDie(DoneFile.c_str(), L"w");
        fprintf(fp, "successfully finished at %s on %s\n", TimeDateStamp().c_str(), GetHostName().c_str());
        fcloseOrDie(fp);
    }
    // TODO: Change back to COMPLETED (no underscores)
    LOGPRINTF(stderr, "__COMPLETED__\n");
    fflush(stderr);

    MPIWrapper::DeleteInstance();
    return EXIT_SUCCESS;
}
コード例 #20
0
ファイル: ImageConfigHelper.cpp プロジェクト: 6779660/CNTK
    ImageConfigHelper::ImageConfigHelper(const ConfigParameters& config)
        : m_dataFormat(CHW)
    {
        std::vector<std::string> featureNames = GetSectionsWithParameter(config, "width");
        std::vector<std::string> labelNames = GetSectionsWithParameter(config, "labelDim");

        // REVIEW alexeyk: currently support only one feature and label section.
        if (featureNames.size() != 1 || labelNames.size() != 1)
        {
            RuntimeError(
                "ImageReader currently supports a single feature and label stream. '%d' features , '%d' labels found.",
                static_cast<int>(featureNames.size()),
                static_cast<int>(labelNames.size()));
        }

        ConfigParameters featureSection = config(featureNames[0]);
        size_t w = featureSection("width");
        size_t h = featureSection("height");
        size_t c = featureSection("channels");

        std::string mbFmt = featureSection("mbFormat", "nchw");
        if (AreEqualIgnoreCase(mbFmt, "nhwc") || AreEqualIgnoreCase(mbFmt, "legacy"))
        {
            m_dataFormat = HWC;
        }
        else if (!AreEqualIgnoreCase(mbFmt, "nchw") || AreEqualIgnoreCase(mbFmt, "cudnn"))
        {
            RuntimeError("ImageReader does not support the sample format '%s', only 'nchw' and 'nhwc' are supported.", mbFmt.c_str());
        }

        auto features = std::make_shared<StreamDescription>();
        features->m_id = 0;
        features->m_name = msra::strfun::utf16(featureSection.ConfigName());
        features->m_sampleLayout = std::make_shared<TensorShape>(ImageDimensions(w, h, c).AsTensorShape(m_dataFormat));
        m_streams.push_back(features);

        ConfigParameters label = config(labelNames[0]);
        size_t labelDimension = label("labelDim");

        auto labelSection = std::make_shared<StreamDescription>();
        labelSection->m_id = 1;
        labelSection->m_name = msra::strfun::utf16(label.ConfigName());
        labelSection->m_sampleLayout = std::make_shared<TensorShape>(labelDimension);
        m_streams.push_back(labelSection);

        m_mapPath = config(L"file");

        std::string rand = config(L"randomize", "auto");

        if (AreEqualIgnoreCase(rand, "auto"))
        {
            m_randomize = true;
        }
        else if (AreEqualIgnoreCase(rand, "none"))
        {
            m_randomize = false;
        }
        else
        {
            RuntimeError("'randomize' parameter must be set to 'auto' or 'none'");
        }

        // Identify precision
        string precision = config.Find("precision", "float");
        if (AreEqualIgnoreCase(precision, "float"))
        {
            features->m_elementType = ElementType::tfloat;
            labelSection->m_elementType = ElementType::tfloat;
        }
        else if (AreEqualIgnoreCase(precision, "double"))
        {
            features->m_elementType = ElementType::tdouble;
            labelSection->m_elementType = ElementType::tdouble;
        }
        else
        {
            RuntimeError("Not supported precision '%s'. Expected 'double' or 'float'.", precision.c_str());
        }

        m_cpuThreadCount = config(L"numCPUThreads", 0);
    }
コード例 #21
0
ファイル: DSSMReader.cpp プロジェクト: 1132520084/CNTK
void DSSMReader<ElemType>::InitFromConfig(const ConfigRecordType& readerConfig)
{
    std::vector<std::wstring> features;
    std::vector<std::wstring> labels;

    // Determine the names of the features and lables sections in the config file.
    // features - [in,out] a vector of feature name strings
    // labels - [in,out] a vector of label name strings
    // For DSSM dataset, we only need features. No label is necessary. The following "labels" just serves as a place holder
    GetFileConfigNames(readerConfig, features, labels);

    // For DSSM dataset, it must have exactly two features
    // In the config file, we must specify query features first, then document features. The sequence is different here. Pay attention
    if (features.size() == 2 && labels.size() == 1)
    {
        m_featuresNameQuery = features[1];
        m_featuresNameDoc = features[0];
        m_labelsName = labels[0];
    }
    else
    {
        RuntimeError("DSSM requires exactly two features and one label. Their names should match those in NDL definition");
        return;
    }

    m_mbStartSample = m_epoch = m_totalSamples = m_epochStartSample = 0;
    m_labelIdMax = m_labelDim = 0;
    m_partialMinibatch = m_endReached = false;
    m_labelType = labelCategory;
    m_readNextSample = 0;
    m_traceLevel = readerConfig(L"traceLevel", 0);

    if (readerConfig.Exists(L"randomize"))
    {
        // BUGBUG: reading out string and number... ugh
        wstring randomizeString = readerConfig(L"randomize");
        if (randomizeString == L"None")
        {
            m_randomizeRange = randomizeNone;
        }
        else if (randomizeString == L"Auto")
        {
            m_randomizeRange = randomizeAuto;
        }
        else
        {
            m_randomizeRange = readerConfig(L"randomize");
        }
    }
    else
    {
        m_randomizeRange = randomizeNone;
    }

    std::string minibatchMode(readerConfig(L"minibatchMode", "Partial"));
    m_partialMinibatch = EqualCI(minibatchMode, "Partial");

    // Get the config parameters for query feature and doc feature
    ConfigParameters configFeaturesQuery = readerConfig(m_featuresNameQuery, "");
    ConfigParameters configFeaturesDoc   = readerConfig(m_featuresNameDoc, "");

    if (configFeaturesQuery.size() == 0)
        RuntimeError("features file not found, required in configuration: i.e. 'features=[file=c:\\myfile.txt;start=1;dim=123]'");
    if (configFeaturesDoc.size() == 0)
        RuntimeError("features file not found, required in configuration: i.e. 'features=[file=c:\\myfile.txt;start=1;dim=123]'");

    // Read in feature size information
    // This information will be used to handle OOVs
    m_featuresDimQuery = configFeaturesQuery(L"dim");
    m_featuresDimDoc   = configFeaturesDoc(L"dim");

    std::wstring fileQ = configFeaturesQuery("file");
    std::wstring fileD = configFeaturesDoc("file");

    dssm_queryInput.Init(fileQ, m_featuresDimQuery);
    dssm_docInput.Init(fileD, m_featuresDimDoc);

    m_totalSamples = dssm_queryInput.numRows;
    if (read_order == NULL)
    {
        read_order = new int[m_totalSamples];
        for (int c = 0; c < m_totalSamples; c++)
        {
            read_order[c] = c;
        }
    }
    m_mbSize = 0;
}
コード例 #22
0
// The whole CompositeDataReader is meant as a stopgap to allow deserializers/transformers composition until SGD talkes 
// directly to the new Reader API. 
// For more information please see its header file.
// This method composes together packers + randomizer + a set of transformers and deserializers.
CompositeDataReader::CompositeDataReader(const ConfigParameters& config) :
    m_truncationLength(0)
{
    wstring action = config(L"action", L"");
    bool isActionWrite = AreEqualIgnoreCase(action, L"write");

    // By default, we use numeric sequence keys (i.e., for cbf, ctf, image and base64 readers).
    // For MLF and HTK deserializers, we use non-numeric (string) sequence keys.
    bool useNumericSequenceKeys = true;
    if (ContainsDeserializer(config, L"HTKFeatureDeserializer") ||
        ContainsDeserializer(config, L"HTKMLFDeserializer")) 
    {
        useNumericSequenceKeys = false;
    }

    useNumericSequenceKeys = config(L"useNumericSequenceKeys", useNumericSequenceKeys);

    bool useHash = config(L"hashSequenceKeys", false);
    m_corpus = std::make_shared<CorpusDescriptor>(useNumericSequenceKeys, useHash);

    // Identifying packing mode.
    bool frameMode = config(L"frameMode", false);
    bool truncated = config(L"truncated", false);
    if (frameMode && truncated)
    {
        LogicError("frameMode and truncated BPTT are mutually exclusive.");
    }

    if (isActionWrite) // For writing we always use sequence mode.
    {
        m_packingMode = PackingMode::sequence;
    }
    else if (frameMode)
    {
        m_packingMode = PackingMode::sample;
    }
    else if (truncated)
    {
        m_packingMode = PackingMode::truncated;
        m_truncationLength = config(L"truncationLength", 0);
        if (m_truncationLength == 0)
        {
            InvalidArgument("Truncation length cannot be 0.");
        }
    }
    else
    {
        m_packingMode = PackingMode::sequence;
    }

    m_rightSplice = config(L"rightSplice", 0);
    if (m_rightSplice > m_truncationLength)
        InvalidArgument("rightSplice should not be greater than truncation length");

    m_precision = config("precision", "float");

    // Creating deserializers.
    bool composable = CreateDeserializers(config);
    if (m_deserializers.empty())
        InvalidArgument("Could not find deserializers in the reader config.");

    if (!composable && m_deserializers.size() > 1)
        InvalidArgument("Currently user defined deserializers do not support composability. Please specify a single deserializer.");

    DataDeserializerPtr deserializer = m_deserializers.front();
    if (m_deserializers.size() > 1)
    {
        // Bundling deserializers together.
        // Option whether we need to check data between different deserializers.
        bool cleanse = config(L"checkData", true);
        deserializer = std::make_shared<Bundler>(config, m_corpus, deserializer, m_deserializers, cleanse);
    }

    int verbosity = config(L"verbosity", 0);

    // Pick up the randomizer, always picking up no randomization for the write mode.
    bool randomize = isActionWrite ? false : config(L"randomize", true);

    // Get maximum number of allowed errors per worker.
    size_t maxErrors = config(L"maxErrors", 0);

    // By default do not use omp threads for deserialization of sequences.
    // It makes sense to put it to true for cases when deserialization is CPU intensive,
    // i.e. decompression of images.
    bool multiThreadedDeserialization = config(L"multiThreadedDeserialization", ContainsDeserializer(config, L"ImageDeserializer"));

    if (!composable) // Pick up simple interface.
    {
        if (randomize)
        {
            bool sampleBasedRandomizationWindow = config(L"sampleBasedRandomizationWindow", false);
            m_sequenceEnumerator = std::make_shared<LTTumblingWindowRandomizer>(deserializer,
                sampleBasedRandomizationWindow, config(L"randomizationWindow", requestDataSize),
                GetRandomSeed(config),
                multiThreadedDeserialization, maxErrors);
        }
        else
            m_sequenceEnumerator = std::make_shared<LTNoRandomizer>(deserializer, multiThreadedDeserialization, maxErrors);
    }
    else
    {
        if (randomize)
        {
            // By default randomizing the whole data set.
            size_t randomizationWindow = requestDataSize;

            // Currently in case of images, a single chunk is a single image. So no need to randomize, chunks will be randomized anyway.
            if (ContainsDeserializer(config, L"ImageDeserializer") && m_deserializers.size() == 1)
            {
                randomizationWindow = 1;
                m_packingMode = PackingMode::sample;
            }

            randomizationWindow = config(L"randomizationWindow", randomizationWindow);
            bool sampleBasedRandomizationWindow = config(L"sampleBasedRandomizationWindow", true);

            if (ContainsDeserializer(config, L"CNTKTextFormatDeserializer") && !config.ExistsCurrent(L"randomizationWindow"))
            {
                if (!config.ExistsCurrent(L"sampleBasedRandomizationWindow") || // sampleBasedRandomizationWindow is not specified
                    !sampleBasedRandomizationWindow) // randomization window is in chunks
                {
                    sampleBasedRandomizationWindow = false;
                    size_t chunkSizeBytes = config(L"chunkSizeInBytes", g_32MB); // 32 MB by default
                    randomizationWindow = g_4GB / chunkSizeBytes; // ~ 4 GB disk space worth of chunks
                                                                  // TODO: decrease randomization window if m_deserializers.size() > 1 ?
                }
                else
                {
                    // config explicitly says to use a sample-based window, but does not specify its size.
                    LogicError("'sampleBasedRandomizationWindow' (== 'true') requires that the 'randomizationWindow' is explicitly specified.");
                }
            }

            bool shouldPrefetch = true;
            m_sequenceEnumerator = std::make_shared<BlockRandomizer>(verbosity, randomizationWindow, deserializer, shouldPrefetch,
                multiThreadedDeserialization, maxErrors, sampleBasedRandomizationWindow, GetRandomSeed(config));
        }
        else
            m_sequenceEnumerator = std::make_shared<NoRandomizer>(deserializer, multiThreadedDeserialization, maxErrors);
    }

    // In case when there are transforms, applying them to the data.
    m_sequenceEnumerator = m_transforms.empty()
        ? m_sequenceEnumerator
        : std::make_shared<TransformController>(m_transforms, m_sequenceEnumerator, multiThreadedDeserialization);

    // TODO: Output stream descriptions - this should come from the network so that we can check 
    // that input matches what the network expects (including tensor shape, etc.).
    std::vector<StreamInformation> outputStreams = m_sequenceEnumerator->GetStreamDescriptions();

    // Currently for prefetch we use two alternating buffers,
    // same is the default.
    size_t numAlternatingBuffers = 2;

    // Check whether to use local timeline, by default we use it for better performance.
    bool localTimeline = config(L"localTimeline", true);
    switch (m_packingMode)
    {
    case PackingMode::sample:
        m_packer = std::make_shared<FramePacker>(
            m_sequenceEnumerator,
            outputStreams,
            numAlternatingBuffers,
            localTimeline,
            m_corpus);
        break;
    case PackingMode::sequence:
        m_packer = std::make_shared<SequencePacker>(
            m_sequenceEnumerator,
            outputStreams,
            numAlternatingBuffers,
            localTimeline,
            m_corpus);
        break;
    case PackingMode::truncated:
    {
        // Currently BPTT does not support sparse format as output.
        // We always require dense from the packer.
        for (auto& s : outputStreams)
            s.m_storageFormat = StorageFormat::Dense;

        m_packer = std::make_shared<TruncatedBPTTPacker>(
            m_sequenceEnumerator,
            outputStreams,
            numAlternatingBuffers,
            m_corpus);
        break;
    }
    default:
        LogicError("Unsupported type of packer '%d'.", (int)m_packingMode);
    }
}
コード例 #23
0
ファイル: CNTK.cpp プロジェクト: hahatt/CNTK
// called from wmain which is a wrapper that catches & repots Win32 exceptions
int wmainOldCNTKConfig(int argc, wchar_t* argv[])
{
    std::string timestamp = TimeDateStamp();
    PrintBanner(argc, argv, timestamp);

    ConfigParameters config;
    std::string rawConfigString = ConfigParameters::ParseCommandLine(argc, argv, config);    // get the command param set they want

    int traceLevel = config(L"traceLevel", 0);

#ifndef CPUONLY
    ConfigValue val = config("deviceId", "auto");
    if (!EqualCI(val, "cpu") && !EqualCI(val, "auto"))
    {
        if (static_cast<int>(val) >= 0) // gpu (id >= 0)
        {
            CheckSupportForGpu(static_cast<int>(val)); // throws if gpu is not supported
        }
    }
#endif

    if (config(L"timestamping", false))
        ProgressTracing::SetTimestampingFlag();

    if (config(L"forceDeterministicAlgorithms", false))
        Globals::ForceDeterministicAlgorithms();

    // get the command param set they want
    wstring logpath = config(L"stderr", L"");

    wstring doneFile = config(L"doneFile", L"");
    ConfigArray command = config(L"command", "train");

    // parallel training
    // The top-level 'parallelTrain' is a bool, not to be confused with the parallelTrain block inside SGD.
    shared_ptr<Microsoft::MSR::CNTK::MPIWrapper> mpi;
    auto ensureMPIWrapperCleanup = MakeScopeExit(&MPIWrapper::DeleteInstance);
    
    // when running under MPI with more than one node, use 'true' as the default value for parallelTrain,
    // 'false' otherwise.
    bool paralleltrain = config(L"parallelTrain", (MPIWrapper::GetTotalNumberOfMPINodes() > 1));

    if (paralleltrain)
    {
       mpi = MPIWrapper::GetInstance(true /*create*/);
    } 

    g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false);

    TracingGPUMemoryAllocator::SetTraceLevel(config(L"traceGPUMemoryAllocations", 0));

    if (logpath != L"")
    {
#if 1   // keep the ability to do it how it was done before 1.8; delete if noone needs it anymore
        let useOldWay = ProgressTracing::GetTimestampingFlag(); // enable it when running in our server farm
        if (useOldWay)
        {
            for (int i = 0; i < command.size(); i++) // append all 'command' entries
            {
                logpath += L"_";
                logpath += (wstring)command[i];
            }
            logpath += L".log"; // append .log
        }

        if (paralleltrain && useOldWay)
        {
            std::wostringstream oss;
            oss << mpi->CurrentNodeRank();
            logpath += L"rank" + oss.str();
        }
        else
#endif
        // for MPI workers except main, append .rankN
        if (paralleltrain && mpi->CurrentNodeRank() != 0)
            logpath += msra::strfun::wstrprintf(L".rank%d", mpi->CurrentNodeRank());
        RedirectStdErr(logpath);
        if (traceLevel == 0)
            PrintBanner(argc, argv, timestamp); // repeat simple banner into log file
    }

    // full config info
    if (traceLevel > 0)
    {
        PrintBuiltInfo();
        PrintGpuInfo();
    }

#ifdef _DEBUG
    if (traceLevel > 0)
    {
        // This simply merges all the different config parameters specified (eg, via config files or via command line directly),
        // and prints it.
        fprintf(stderr, "\nConfiguration, Raw:\n\n");
        LOGPRINTF(stderr, "%s\n", rawConfigString.c_str());

        // Same as above, but all variables are resolved.  If a parameter is set multiple times (eg, set in config, overridden at command line),
        // All of these assignments will appear, even though only the last assignment matters.
        fprintf(stderr, "\nConfiguration After Variable Resolution:\n\n");
        LOGPRINTF(stderr, "%s\n", config.ResolveVariables(rawConfigString).c_str());
    }
#endif

    SetMathLibTraceLevel(traceLevel);

    // This outputs the final value each variable/parameter is assigned to in config (so if a parameter is set multiple times, only the last
    // value it is set to will appear).
    if (traceLevel > 0)
    {
        fprintf(stderr, "\nConfiguration After Processing and Variable Resolution:\n\n");
        config.dumpWithResolvedVariables();

        LOGPRINTF(stderr, "Commands:");
        for (int i = 0; i < command.size(); i++)
            fprintf(stderr, " %s", command[i].c_str());
        fprintf(stderr, "\n");
    }

    // run commands
    std::string type = config(L"precision", "float");
    // accept old precision key for backward compatibility
    if (config.Exists("type"))
        InvalidArgument("CNTK: Use of 'type' parameter is deprecated, it is called 'precision' now.");

    if (traceLevel > 0)
    {
        LOGPRINTF(stderr, "precision = \"%s\"\n", type.c_str());
    }

    if (type == "float")
        DoCommands<float>(config, mpi);
    else if (type == "double")
        DoCommands<double>(config, mpi);
    else
        RuntimeError("CNTK: Invalid precision string: \"%s\", must be \"float\" or \"double\"", type.c_str());

    // if completed then write a doneFile if requested
    if (!doneFile.empty())
    {
        FILE* fp = fopenOrDie(doneFile.c_str(), L"w");
        fprintf(fp, "Successfully finished at %s on %s\n", TimeDateStamp().c_str(), GetHostName().c_str());
        fcloseOrDie(fp);
    }
    if (ProgressTracing::GetTimestampingFlag())
    {
        LOGPRINTF(stderr, "__COMPLETED__\n"); // running in server environment which expects this string
    }
    else
        fprintf(stderr, "COMPLETED.\n");
    fflush(stderr);

    return EXIT_SUCCESS;
}
コード例 #24
0
ファイル: EvalActions.cpp プロジェクト: daib13/CNTK
void DoWriteOutput(const ConfigParameters& config)
{
    ConfigParameters readerConfig(config(L"reader"));
    readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
    readerConfig.Insert("randomize", "None"); // we don't want randomization when output results

    DataReader testDataReader(readerConfig);

    DEVICEID_TYPE deviceId = DeviceFromConfig(config);
    ConfigArray minibatchSize = config(L"minibatchSize", "2048");
    wstring modelPath = config(L"modelPath");
    intargvector mbSize = minibatchSize;

    size_t epochSize = config(L"epochSize", "0");
    if (epochSize == 0)
    {
        epochSize = requestDataSize;
    }

    ConfigArray outputNodeNames = config(L"outputNodeNames", "");
    vector<wstring> outputNodeNamesVector;

    // Note this is required since the user might specify OutputNodeNames in the config, so don't use CreateFromFile,
	// instead we build the network ourselves.
    auto net = make_shared<ComputationNetwork>(deviceId);
    net->Read<ElemType>(modelPath);

    if (outputNodeNames.size() > 0)
    {
        net->OutputNodes().clear();
        for (int i = 0; i < outputNodeNames.size(); ++i)
        {
            outputNodeNamesVector.push_back(outputNodeNames[i]);
            net->OutputNodes().emplace_back(net->GetNodeFromName(outputNodeNames[i]));
        }
    }
    net->CompileNetwork();

    SimpleOutputWriter<ElemType> writer(net, 1);

    if (config.Exists("writer"))
    {
        ConfigParameters writerConfig(config(L"writer"));
        bool bWriterUnittest = writerConfig(L"unittest", "false");
        DataWriter testDataWriter(writerConfig);
        writer.WriteOutput(testDataReader, mbSize[0], testDataWriter, outputNodeNamesVector, epochSize, bWriterUnittest);
    }
    else if (config.Exists("outputPath"))
    {
        wstring outputPath = config(L"outputPath");

        // gather additional formatting options
        typename decltype(writer)::WriteFormattingOptions formattingOptions;
        if (config.Exists("format"))
        {
            ConfigParameters formatConfig(config(L"format"));
            if (formatConfig.ExistsCurrent("type")) // do not inherit 'type' from outer block
            {
                string type = formatConfig(L"type");
                if      (type == "real")     formattingOptions.isCategoryLabel = false;
                else if (type == "category") formattingOptions.isCategoryLabel = true;
                else                         InvalidArgument("write: type must be 'real' or 'category'");
                if (formattingOptions.isCategoryLabel)
                    formattingOptions.labelMappingFile = (wstring)formatConfig(L"labelMappingFile", L"");
            }
            formattingOptions.transpose         = formatConfig(L"transpose",         formattingOptions.transpose);
            formattingOptions.prologue          = formatConfig(L"prologue",          formattingOptions.prologue);
            formattingOptions.epilogue          = formatConfig(L"epilogue",          formattingOptions.epilogue);
            formattingOptions.sequenceSeparator = formatConfig(L"sequenceSeparator", formattingOptions.sequenceSeparator);
            formattingOptions.sequencePrologue  = formatConfig(L"sequencePrologue",  formattingOptions.sequencePrologue);
            formattingOptions.sequenceEpilogue  = formatConfig(L"sequenceEpilogue",  formattingOptions.sequenceEpilogue);
            formattingOptions.elementSeparator  = formatConfig(L"elementSeparator",  formattingOptions.elementSeparator);
            formattingOptions.sampleSeparator   = formatConfig(L"sampleSeparator",   formattingOptions.sampleSeparator);
            formattingOptions.precisionFormat   = formatConfig(L"precisionFormat",   formattingOptions.precisionFormat);
        }

        writer.WriteOutput(testDataReader, mbSize[0], outputPath, outputNodeNamesVector, formattingOptions, epochSize);
    }
    else
        InvalidArgument("write command: You must specify either 'writer'or 'outputPath'");
}
コード例 #25
0
ファイル: tests.cpp プロジェクト: 6779660/CNTK
void TestConfiguration(const ConfigParameters& configBase)
{
    ConfigParameters configMacros = configBase("macroExample");
    for (auto iterMacro = configMacros.begin(); iterMacro != configMacros.end(); iterMacro++)
    {
        std::map<std::string, ConfigValue> paramsMap;
        ConfigParameters configCN = iterMacro->second;
        if (configCN.Exists("parameters"))
        {
            ConfigArray params = configCN("parameters");
            for (int i = 0; i < params.size(); ++i)
                paramsMap[params[i]] = ConfigValue("uninitialized");
        }
        ConfigParameters configNodes = configCN("NodeList");
        for (auto iter = configNodes.begin();
             iter != configNodes.end(); iter++)
        {
            std::wstring nodeName;
            nodeName = msra::strfun::utf16(iter->first);
            ConfigArray configNode = iter->second;
            std::string opName = configNode[0];
            if (IsParameter(paramsMap, opName))
            {
                ;
            }
            if (opName == "InputValue" && configNode.size() >= 2)
            {
                size_t rows = 0;
                if (!IsParameter(paramsMap, configNode[1]))
                    rows = configNode[1];
            }
            else if (opName == "LearnableParameter" && configNode.size() >= 3)
            {
                size_t rows = 0;
                if (!IsParameter(paramsMap, configNode[1]))
                    rows = configNode[1];
                size_t cols = 0;
                if (!IsParameter(paramsMap, configNode[2]))
                    cols = configNode[2];
                bool learningRateMultiplier = 0;
                bool init = false;
                ConfigArray initData;

                // look for optional parameters
                for (int i = 3; i < configNode.size(); ++i)
                {
                    bool needsGradient = false;
                    ConfigParameters configParam = configNode[i];
                    if (configParam.Exists("learningRateMultiplier")) // TODO: should this be a test for 'true' rather than Exists()?
                        needsGradient = (float)configParam("learningRateMultiplier") > 0? true : false;
                    else if (configParam.Exists("init"))
                    {
                        init = true;
                        initData = configParam["init"];
                    }
                }
                // if initializing, do so now
                if (init)
                {
                    bool uniform = true;
                    ElemType initValueScale = 1;
                    size_t inputSize = cols;

                    if (initData.size() > 0)
                        initValueScale = initData[0];
                    if (initData.size() > 1)
                        uniform = EqualCI(initData[1], "uniform");
                }
            }
        }

        // now link up all the nodes
        configNodes = configCN("Relation");
        for (auto iter = configNodes.begin(); iter != configNodes.end(); iter++)
        {
            std::wstring nodeName = msra::strfun::utf16(iter->first);
            ConfigArray configNode = iter->second;
            int numChildren = (int) configNode.size();
            for (int i = 0; i < numChildren; ++i)
            {
                std::wstring nodeName = configNode[i];
            }
        }

        ConfigParameters configRoots = configCN("RootNodes");
        ConfigArray configNode = configRoots("FeatureNodes");
        for (size_t i = 0; i < configNode.size(); i++)
        {
            std::wstring nodeName = configNode[i];
        }

        if (configRoots.Exists("LabelNodes"))
        {
            configNode = configRoots("LabelNodes");
            for (size_t i = 0; i < configNode.size(); i++)
            {
                std::wstring nodeName = configNode[i];
            }
        }

        if (configRoots.Exists("CriterionNodes"))
        {
            configNode = configRoots("CriterionNodes");
            for (size_t i = 0; i < configNode.size(); i++)
            {
                std::wstring nodeName = configNode[i];
            }
        }

        if (configRoots.Exists("CriteriaNodes")) // legacy
        {
            configNode = configRoots("CriteriaNodes");
            for (size_t i = 0; i < configNode.size(); i++)
            {
                std::wstring nodeName = configNode[i];
            }
        }

        if (configRoots.Exists("NodesReqMultiSeqHandling"))
        {
            configNode = configRoots("NodesReqMultiSeqHandling");
            for (size_t i = 0; i < configNode.size(); i++)
            {
                std::wstring nodeName = configNode[i];
            }
            fprintf(stderr, "WARNING: 'NodesReqMultiSeqHandling' flag is defunct\n");
        }

        if (configRoots.Exists("EvalNodes"))
        {
            configNode = configRoots("EvalNodes");
            for (size_t i = 0; i < configNode.size(); i++)
            {
                std::wstring nodeName = configNode[i];
            }
        }

        if (configRoots.Exists("OutputNodes"))
        {
            configNode = configRoots("OutputNodes");
            for (size_t i = 0; i < configNode.size(); i++)
            {
                std::wstring nodeName = configNode[i];
            }
        }
    }
}
コード例 #26
0
ファイル: CNTK.cpp プロジェクト: WorldofOpenDev/CNTK
int wmainOldCNTKConfig(int argc, wchar_t* argv[]) // called from wmain which is a wrapper that catches & repots Win32 exceptions
{
    ConfigParameters config;
    std::string rawConfigString = ConfigParameters::ParseCommandLine(argc, argv, config);

    // get the command param set they want
    wstring logpath = config(L"stderr", L"");

    //  [1/26/2015 erw, add done file so that it can be used on HPC]
    wstring DoneFile = config(L"DoneFile", L"");
    ConfigArray command = config(L"command", "train");

    // paralleltrain training
    g_mpi = nullptr;
    bool paralleltrain = config(L"parallelTrain", "false");
    if (paralleltrain)
    {
        g_mpi = new MPIWrapper();
    }

    g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false);

    TracingGPUMemoryAllocator::SetTraceLevel(config(L"traceGPUMemoryAllocations", 0));

    if (logpath != L"")
    {
        for (int i = 0; i < command.size(); i++)
        {
            logpath += L"_";
            logpath += (wstring) command[i];
        }
        logpath += L".log";

        if (paralleltrain)
        {
            std::wostringstream oss;
            oss << g_mpi->CurrentNodeRank();
            logpath += L"rank" + oss.str();
        }
        RedirectStdErr(logpath);
    }

    PrintBuiltInfo(); // this one goes to log file
    std::string timestamp = TimeDateStamp();

    // dump config info
    fprintf(stderr, "running on %s at %s\n", GetHostName().c_str(), timestamp.c_str());
    fprintf(stderr, "command line: \n");
    for (int i = 0; i < argc; i++)
    {
        fprintf(stderr, "%s ", WCharToString(argv[i]).c_str());
    }

    // This simply merges all the different config parameters specified (eg, via config files or via command line directly),
    // and prints it.
    fprintf(stderr, "\n\n>>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>\n");
    fprintf(stderr, "%s\n", rawConfigString.c_str());
    fprintf(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<\n");

    // Same as above, but all variables are resolved.  If a parameter is set multiple times (eg, set in config, overriden at command line),
    // All of these assignments will appear, even though only the last assignment matters.
    fprintf(stderr, "\n>>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n");
    fprintf(stderr, "%s\n", config.ResolveVariables(rawConfigString).c_str());
    fprintf(stderr, "<<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n");

    // This outputs the final value each variable/parameter is assigned to in config (so if a parameter is set multiple times, only the last
    // value it is set to will appear).
    fprintf(stderr, "\n>>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>\n");
    config.dumpWithResolvedVariables();
    fprintf(stderr, "<<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<\n");

    fprintf(stderr, "command: ");
    for (int i = 0; i < command.size(); i++)
    {
        fprintf(stderr, "%s ", command[i].c_str());
    }

    // run commands
    std::string type = config(L"precision", "float");
    // accept old precision key for backward compatibility
    if (config.Exists("type"))
    {
        type = config(L"type", "float");
    }

    fprintf(stderr, "\nprecision = %s\n", type.c_str());
    if (type == "float")
    {
        DoCommands<float>(config);
    }
    else if (type == "double")
    {
        DoCommands<double>(config);
    }
    else
    {
        RuntimeError("invalid precision specified: %s", type.c_str());
    }

    // still here , write a DoneFile if necessary
    if (!DoneFile.empty())
    {
        FILE* fp = fopenOrDie(DoneFile.c_str(), L"w");
        fprintf(fp, "successfully finished at %s on %s\n", TimeDateStamp().c_str(), GetHostName().c_str());
        fcloseOrDie(fp);
    }
    fprintf(stderr, "COMPLETED\n"), fflush(stderr);

    delete g_mpi;
    return EXIT_SUCCESS;
}
コード例 #27
0
ファイル: TextConfigHelper.cpp プロジェクト: vnvizitiu/CNTK
TextConfigHelper::TextConfigHelper(const ConfigParameters& config)
{
    if (!config.ExistsCurrent(L"input"))
    {
        RuntimeError("CNTKTextFormatReader configuration does not contain \"input\" section.");
    }

    const ConfigParameters& input = config(L"input");

    if (input.empty())
    {
        RuntimeError("CNTKTextFormatReader configuration contains an empty \"input\" section.");
    }

    string precision = config.Find("precision", "float");
    if (AreEqualIgnoreCase(precision, "double"))
    {
        m_elementType = DataType::Double;
    }
    else if (AreEqualIgnoreCase(precision, "float"))
    {
        m_elementType = DataType::Float;
    }
    else
    {
        RuntimeError("Not supported precision '%s'. Expected 'double' or 'float'.", precision.c_str());
    }

    StreamId id = 0;
    map<string, wstring> aliasToInputMap;
    for (const pair<string, ConfigParameters>& section : input)
    {
        ConfigParameters input2 = section.second;
        wstring name = msra::strfun::utf16(section.first);

        if (!input2.ExistsCurrent(L"dim") || !input2.ExistsCurrent(L"format"))
        {
            RuntimeError("Input section for input '%ls' does not specify all the required parameters, "
                "\"dim\" and \"format\".", name.c_str());
        }

        StreamDescriptor stream;
        stream.m_id = id++;
        stream.m_name = name;
        stream.m_sampleDimension = input2(L"dim");
        stream.m_definesMbSize = input2(L"definesMBSize", false);
        string type = input2(L"format");

        if (AreEqualIgnoreCase(type, "dense"))
        {
            stream.m_storageFormat = StorageFormat::Dense;
        }
        else if (AreEqualIgnoreCase(type, "sparse"))
        {
            stream.m_storageFormat = StorageFormat::SparseCSC;
            if (stream.m_sampleDimension > numeric_limits<IndexType>::max())
            {
                RuntimeError("Sample dimension (%" PRIu64 ") for sparse input '%ls'"
                    " exceeds the maximum allowed value (%" PRIu64 ").\n",
                    stream.m_sampleDimension, name.c_str(), (size_t)numeric_limits<IndexType>::max());
            }
        }
        else
        {
            RuntimeError("'format' parameter must be set either to 'dense' or 'sparse'.");
        }

        // alias is optional
        if (input2.ExistsCurrent(L"alias"))
        {
            stream.m_alias = input2(L"alias");
            if (stream.m_alias.empty())
            {
                RuntimeError("Alias value for input '%ls' is empty.", name.c_str());
            }
        }
        else
        {
            stream.m_alias = section.first;
        }

        if (aliasToInputMap.find(stream.m_alias) != aliasToInputMap.end())
        {
            RuntimeError("Alias %s is already mapped to input %ls.",
                stream.m_alias.c_str(), aliasToInputMap[stream.m_alias].c_str());
        }
        else
        {
            aliasToInputMap[stream.m_alias] = stream.m_name;
        }

        stream.m_elementType = m_elementType;
        m_streams.push_back(stream);
    }

    m_filepath = msra::strfun::utf16(config(L"file"));
    m_skipSequenceIds = config(L"skipSequenceIds", false);
    m_maxErrors = config(L"maxErrors", 0);
    m_traceLevel = config(L"traceLevel", 1);
    m_chunkSizeBytes = config(L"chunkSizeInBytes", g_32MB); // 32 MB by default
    m_keepDataInMemory = config(L"keepDataInMemory", false);
    m_frameMode = config(L"frameMode", false);

    m_randomizationWindow = GetRandomizationWindowFromConfig(config);
    m_sampleBasedRandomizationWindow = config(L"sampleBasedRandomizationWindow", false);
    if (!m_sampleBasedRandomizationWindow && m_randomizationWindow == randomizeAuto) 
    {
        m_randomizationWindow = g_4GB / m_chunkSizeBytes; // ~ 4 GB (on disk) worth of chunks
    }
}
コード例 #28
0
ファイル: ImageTransformers.cpp プロジェクト: gzt200361/CNTK
CropTransformer::CropTransformer(const ConfigParameters& config) : ImageTransformerBase(config)
{
    intargvector cropSize = config(L"cropSize", "0"); 
    m_cropWidth = cropSize[0]; 
    m_cropHeight = cropSize[1]; 
    if (m_cropWidth < 0 || m_cropHeight < 0)
    {
        RuntimeError("Invalid cropSize value, must be >= 0"); 
    }

    m_useSideRatio = true;
    floatargvector sideRatio = config(L"sideRatio", "0.0");
    m_sideRatioMin = sideRatio[0];
    m_sideRatioMax = sideRatio[1];
    if (m_sideRatioMin == 0.0 && m_sideRatioMax == 0.0) // taking default value means not specified 
    {
        m_useSideRatio = false;
    }
    else if (!(m_sideRatioMin > 0 && m_sideRatioMax <= 1.0) ||
        m_sideRatioMin > m_sideRatioMax)
    {
        RuntimeError("Invalid sideRatio value, must be > 0 and <= 1. sideMin must <= sideMax");
    }

    m_useAreaRatio = true; 
    floatargvector areaRatio = config(L"areaRatio", "0.0");
    m_areaRatioMin = areaRatio[0];
    m_areaRatioMax = areaRatio[1];
    if (m_areaRatioMin == 0.0 && m_areaRatioMax == 0.0) // taking default value means not specified 
    {
        m_useAreaRatio = false;
    }
    else if (!(m_areaRatioMin > 0 && m_areaRatioMax <= 1.0) ||
        m_areaRatioMin > m_areaRatioMax)
    {
        RuntimeError("Invalid areaRatio value, must be > 0 and <= 1. areaMin must <= areaMax");
    }

    if (m_useSideRatio && m_useAreaRatio)
        RuntimeError("sideRatio and areaRatio cannot be specified simultaneously"); 

    floatargvector aspectRatio = config(L"aspectRatio", "1.0");
    m_aspectRatioMin = aspectRatio[0];
    m_aspectRatioMax = aspectRatio[1];
    if (!(m_aspectRatioMin > 0 && m_aspectRatioMax <= 1.0) ||  
        m_aspectRatioMin > m_aspectRatioMax)
    {
        RuntimeError("Invalid aspectRatio value, must be > 0 and <= 1. aspectMin must <= aspectMax");
    }

    m_jitterType = ParseJitterType(config(L"jitterType", ""));
    m_cropType = ImageConfigHelper::ParseCropType(config(L"cropType", ""));

    if (!config.ExistsCurrent(L"hflip"))
    {
        m_hFlip = (m_cropType == CropType::RandomSide || m_cropType == CropType::RandomArea);
    }
    else
    {
        m_hFlip = config(L"hflip");
    }

    // for MultiView10 we need to set m_hflip = false, otherwise we might not get 5 unflipped image (see CropTransformer::Apply below)
    if (m_cropType == CropType::MultiView10)
    {
        m_hFlip = false;
    }
}
コード例 #29
0
TextConfigHelper::TextConfigHelper(const ConfigParameters& config)
{
    if (!config.ExistsCurrent(L"input"))
    {
        RuntimeError("CNTKTextFormatReader configuration does not contain \"input\" section.");
    }

    const ConfigParameters& input = config(L"input");

    if (input.empty())
    {
        RuntimeError("CNTKTextFormatReader configuration contains an empty \"input\" section.");
    }

    string precision = config.Find("precision", "float");
    if (AreEqualIgnoreCase(precision, "double"))
    {
        m_elementType = ElementType::tdouble;
    }
    else if (AreEqualIgnoreCase(precision, "float"))
    {
        m_elementType = ElementType::tfloat;
    }
    else
    {
        RuntimeError("Not supported precision '%s'. Expected 'double' or 'float'.", precision.c_str());
    }

    StreamId id = 0;
    map<string, wstring> aliasToInputMap;
    for (const pair<string, ConfigParameters>& section : input)
    {
        ConfigParameters input = section.second;
        wstring name = msra::strfun::utf16(section.first);

        if (!input.ExistsCurrent(L"dim") || !input.ExistsCurrent(L"format"))
        {
            RuntimeError("Input section for input '%ls' does not specify all the required parameters, "
                "\"dim\" and \"format\".", name.c_str());
        }

        StreamDescriptor stream;
        stream.m_id = id++;
        stream.m_name = name;
        stream.m_sampleDimension = input(L"dim");
        string type = input(L"format");

        if (AreEqualIgnoreCase(type, "dense"))
        {
            stream.m_storageType = StorageType::dense;
        }
        else if (AreEqualIgnoreCase(type, "sparse"))
        {
            stream.m_storageType = StorageType::sparse_csc;
            if (stream.m_sampleDimension > numeric_limits<IndexType>::max())
            {
                RuntimeError("Sample dimension (%" PRIu64 ") for sparse input '%ls'"
                    " exceeds the maximum allowed value (%" PRIu64 ").\n",
                    stream.m_sampleDimension, name.c_str(), (size_t)numeric_limits<IndexType>::max());
            }
        }
        else
        {
            RuntimeError("'format' parameter must be set either to 'dense' or 'sparse'.");
        }

        // alias is optional
        if (input.ExistsCurrent(L"alias"))
        {
            stream.m_alias = input(L"alias");
            if (stream.m_alias.empty())
            {
                RuntimeError("Alias value for input '%ls' is empty.", name.c_str());
            }
        }
        else
        {
            stream.m_alias = section.first;
        }

        if (aliasToInputMap.find(stream.m_alias) != aliasToInputMap.end())
        {
            RuntimeError("Alias %s is already mapped to input %ls.",
                stream.m_alias.c_str(), aliasToInputMap[stream.m_alias].c_str());
        }
        else
        {
            aliasToInputMap[stream.m_alias] = stream.m_name;
        }

        stream.m_elementType = m_elementType;
        m_streams.push_back(stream);
    }

    m_filepath = msra::strfun::utf16(config(L"file"));

    if (config.Exists(L"randomize"))
    {
        wstring randomizeString = config.CanBeString(L"randomize") ? config(L"randomize") : wstring();
        if (!_wcsicmp(randomizeString.c_str(), L"none"))
        {
            m_randomizationWindow = randomizeNone;
        }
        else if (!_wcsicmp(randomizeString.c_str(), L"auto"))
        {
            m_randomizationWindow = randomizeAuto;
        }
        else
        {
            m_randomizationWindow = config(L"randomize");
        }
    }
    else
    {
        m_randomizationWindow = randomizeAuto;
    }

    m_skipSequenceIds = config(L"skipSequenceIds", false);
    m_maxErrors = config(L"maxErrors", 0);
    m_traceLevel = config(L"traceLevel", 0);
    m_chunkSizeBytes = config(L"chunkSizeInBytes", 32 * 1024 * 1024); // 32 MB by default
    m_chunkCacheSize = config(L"numChunksToCache", 32); // 32 * 32 MB = 1 GB of memory in total
}