Exemplo n.º 1
0
void TestReader(const ConfigParameters& configBase)
{
    // int nonexistant = configBase("nonexistant");  // use to test global exception handler
    ConfigParameters config(configBase("mnistTest"));
    ConfigParameters readerConfig(config("reader"));
    readerConfig.Insert("traceLevel", config("traceLevel", "0"));

    size_t mbSize = config("minibatchSize");
    size_t epochSize = config("epochSize", "0");
    if (epochSize == 0)
    {
        epochSize = requestDataSize;
    }

    DataReader dataReader(readerConfig);

    // get names of features and labels
    std::vector<std::wstring> featureNames;
    std::vector<std::wstring> labelNames;
    GetFileConfigNames(readerConfig, featureNames, labelNames);

    // setup minibatch matrices
    int deviceId = 0;
    auto featuresMatrix = make_shared<Matrix<ElemType>>(deviceId);
    auto labelsMatrix   = make_shared<Matrix<ElemType>>(deviceId);
    StreamMinibatchInputs matrices;
    matrices.AddInputMatrix(featureNames[0], featuresMatrix);
    matrices.AddInputMatrix(labelNames[0],   labelsMatrix);

    auto start = std::chrono::system_clock::now();
    int epochs = config("maxEpochs");
    epochs *= 2;
    for (int epoch = 0; epoch < epochs; epoch++)
    {
        dataReader.StartMinibatchLoop(mbSize, epoch, epochSize);
        int i = 0;
        while (dataReader.GetMinibatch(matrices))
        {
            Matrix<ElemType>& features = matrices.GetInputMatrix<ElemType>(featureNames[0]);
            Matrix<ElemType>& labels   = matrices.GetInputMatrix<ElemType>(labelNames[0]);

            if (labels.GetNumRows() == 0)
            {
                fprintf(stderr, "%4d: features dim: %lu x %lu - [%.8g, %.8g, ...]\n", i++, features.GetNumRows(), features.GetNumCols(), features(0, 0), features(0, 1));
            }
            else
            {
                fprintf(stderr, "%4d: features dim: %lu x %lu - [%.8g, %.8g, ...] label dim: %lu x %lu - [%d, %d, ...]\n", i++, features.GetNumRows(), features.GetNumCols(), features(0, 0), features(0, 1), labels.GetNumRows(), labels.GetNumCols(), (int) labels(0, 0), (int) labels(0, 1));
            }
        }
    }
    auto end = std::chrono::system_clock::now();
    auto elapsed = end - start;
    fprintf(stderr, "%f seconds elapsed", (float) (std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count()) / 1000);
}
Exemplo n.º 2
0
void DoCreateLabelMap(const ConfigParameters& config)
{
    // this gets the section name we are interested in
    std::string section = config(L"section");
    // get that section (probably a peer config section, which works thanks to heirarchal symbol resolution)
    ConfigParameters configSection(config(section));
    ConfigParameters readerConfig(configSection("reader"));
    readerConfig.Insert("allowMapCreation", "true");
    size_t minibatchSize = config(L"minibatchSize", "2048");
    int traceLevel = config(L"traceLevel", "0");
    std::vector<std::wstring> featureNames;
    std::vector<std::wstring> labelNames;
    GetFileConfigNames(readerConfig, featureNames, labelNames);

    // setup minibatch matrices
    auto featuresMatrix = make_shared<Matrix<ElemType>>(CPUDEVICE);
    auto labelsMatrix   = make_shared<Matrix<ElemType>>(CPUDEVICE);
    StreamMinibatchInputs matrices;
    matrices.AddInputMatrix(featureNames[0], featuresMatrix);
    if (labelNames.size() == 0)
        RuntimeError("CreateLabelMap: no labels found to process");

    // now create the reader and loop through the entire dataset to get all the labels
    auto start = std::chrono::system_clock::now();
    for (const std::wstring& labelsName : labelNames)
    {
        // take the last label file defined (the other one might be input)
        matrices.AddInputMatrix(labelsName, labelsMatrix);

        // get the label mapping file name
        ConfigParameters labelConfig(readerConfig(labelsName));
        std::string labelMappingFile;
        if (labelConfig.ExistsCurrent(L"labelMappingFile"))
            labelMappingFile = labelConfig(L"labelMappingFile");
        else if (readerConfig.ExistsCurrent(L"labelMappingFile"))
            labelMappingFile = labelConfig(L"labelMappingFile");
        else
            RuntimeError("CreateLabelMap: No labelMappingFile defined");

        if (fexists(labelMappingFile))
        {
            fprintf(stderr, "CreateLabelMap: the label mapping file '%s' already exists, no work to do.\n", labelMappingFile.c_str());
            return;
        }
        fprintf(stderr, "CreateLabelMap: Creating the mapping file '%s' \n", labelMappingFile.c_str());

        DataReader dataReader(readerConfig);
        dataReader.StartMinibatchLoop(minibatchSize, 0, requestDataSize);
        int count = 0;
        while (dataReader.GetMinibatch(matrices))
        {
            Matrix<ElemType>& features = matrices.GetInputMatrix<ElemType>(featureNames[0]);
            count += features.GetNumCols();
            if (traceLevel > 1)
                fprintf(stderr, "."); // progress meter
        }
        dataReader.StartMinibatchLoop(minibatchSize, 1, requestDataSize);

        // print the results
        if (traceLevel > 0)
            fprintf(stderr, "\nread %d labels and produced %s\n", count, labelMappingFile.c_str());
    }
    auto end = std::chrono::system_clock::now();
    auto elapsed = end - start;
    if (traceLevel > 1)
        fprintf(stderr, "%f seconds elapsed\n", (float) (std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count()) / 1000);
}