C++ (Cpp) MBLayoutPtr Examples

Programming Language: C++ (Cpp)

Class/Type: MBLayoutPtr

Examples at hotexamples.com: 4

C++ (Cpp) MBLayoutPtr - 4 examples found. These are the top rated real world C++ (Cpp) examples of MBLayoutPtr extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GetAllSequences(2)

GetColumnIndex(2)

InitAsFrameMode(2)

InitAsPackedSequences(2)

AddSequence(1)

GetNumCols(1)

GetNumParallelSequences(1)

GetNumTimeSteps(1)

Init(1)

Example #1

Show file

File: TestHelpers.cpp Project: AllanYiin/CNTK

void DummyNodeTest<ElemType>::SetMinibatch(size_t minibatchSize, SmallVector<size_t> sampleDimensions,
                                               std::vector<ElemType>& data)
{
    // Set given shape and allocate matrices.
    TensorShape shape(sampleDimensions);
    if (shape.GetNumElements() * minibatchSize != data.size())
        LogicError("Data size is incompatible with specified dimensions.");
    MBLayoutPtr mbLayout = make_shared<MBLayout>();
    mbLayout->InitAsFrameMode(minibatchSize);
    this->LinkToMBLayout(mbLayout);
    this->SetDims(shape, true);
    this->CreateValueMatrixIfNull();
    this->Value().Resize(shape.GetNumElements(), minibatchSize);
    this->CreateGradientMatrixIfNull();
    this->Gradient().Resize(shape.GetNumElements(), minibatchSize);
    this->Value().SetValue(minibatchSize, shape.GetNumElements(), this->m_deviceId, data.data());
}

Example #2

Show file

File: SequencePacker.cpp Project: Microsoft/CNTK

MBLayoutPtr SequencePacker::CreateMBLayout(const StreamBatch& batch)
{
    vector<MBLayout::SequenceInfo> infos;
    for (size_t index = 0; index < batch.size(); ++index)
    {
        MBLayout::SequenceInfo info;

        info.seqId = index;
        info.tBegin = 0;
        info.tEnd = batch[index]->m_numberOfSamples;
        infos.push_back(info);
    }

    vector<pair<size_t, size_t>> placement;
    vector<size_t> rowAllocations;

    // Creating the minibatch layout.
    MBLayoutPtr pMBLayout = make_shared<MBLayout>();
    pMBLayout->InitAsPackedSequences(infos, placement, rowAllocations);
    return pMBLayout;
}

Example #3

Show file

File: ComputationNode.cpp Project: Soukiy/CNTK

void ComputationNode<ElemType>::WriteMinibatchWithFormatting(FILE* f, const FrameRange& fr,
                                                             size_t onlyUpToRow, size_t onlyUpToT, bool transpose, bool isCategoryLabel, bool isSparse,
                                                             const vector<string>& labelMapping, const string& sequenceSeparator, 
                                                             const string& sequencePrologue, const string& sequenceEpilogue,
                                                             const string& elementSeparator, const string& sampleSeparator,
                                                             string valueFormatString,
                                                             bool outputGradient) const
{
    // get minibatch matrix -> matData, matRows, matStride
    const Matrix<ElemType>& outputValues = outputGradient ? Gradient() : Value();
    let matRows   = outputValues.GetNumRows();
    let matStride = matRows; // how to get from one column to the next
    unique_ptr<ElemType[]> matDataPtr(outputValues.CopyToArray());
    ElemType* matData = matDataPtr.get();
    let sampleLayout = GetSampleLayout(); // this is currently only used for sparse; dense tensors are linearized

    // process all sequences one by one
    MBLayoutPtr pMBLayout = GetMBLayout();
    if (!pMBLayout) // no MBLayout: We are printing aggregates (or LearnableParameters?)
    {
        pMBLayout = make_shared<MBLayout>();
        pMBLayout->Init(1, outputValues.GetNumCols()); // treat this as if we have one single sequence consisting of the columns
        pMBLayout->AddSequence(0, 0, 0, outputValues.GetNumCols());
    }
    let& sequences = pMBLayout->GetAllSequences();
    let  width     = pMBLayout->GetNumTimeSteps();

    TensorShape tensorShape = GetSampleLayout();
    stringstream str;
    let dims = tensorShape.GetDims();
    for (auto dim : dims)
        str << dim << ' ';
    let shape = str.str(); // BUGBUG: change to string(tensorShape) to make sure we always use the same format

    bool sequencePrologueHasShape = sequencePrologue.find("%x") != sequencePrologue.npos;
    bool sampleSeparatorHasShape  = sampleSeparator.find("%x")  != sampleSeparator.npos;
    bool sequencePrologueHasSeqId = sequencePrologue.find("%d") != sequencePrologue.npos;
    bool sampleSeparatorHasSeqId  = sampleSeparator.find("%d")  != sampleSeparator.npos;

    for (size_t s = 0; s < sequences.size(); s++)
    {
        const auto& seqInfo = sequences[s];
        if (seqInfo.seqId == GAP_SEQUENCE_ID) // nothing in gaps to print
            continue;
        let tBegin = seqInfo.tBegin >= 0     ? seqInfo.tBegin : 0;
        let tEnd   = seqInfo.tEnd   <= width ? seqInfo.tEnd   : width;
        // [tBegin,tEnd) is where the sequence resides.
        // fr is also referencing where a sequence resides.

        // narrow to FrameRange if needed
        auto t0 = fr.IsAllFrames() ? tBegin : fr.m_timeOffset + (ptrdiff_t)fr.timeIdxInSeq;
        auto t1 = fr.IsAllFrames() ? tEnd   : fr.m_timeOffset + (ptrdiff_t)fr.timeIdxInSeq + (ptrdiff_t)fr.m_timeRange;
        if (t0 < tBegin)
            t0 = tBegin;
        if (t1 > tEnd)
            t1 = tEnd;
        // [t0,t1) is the range we want to print
        if (t0 > (ptrdiff_t)t1)
            continue; // skip this sequence

        // get sequence matrix -> seqData, seqRows, seqCols, seqStride
        let  seqData   = matData + pMBLayout->GetColumnIndex(seqInfo, t0 - tBegin) * matStride;
        auto seqRows   = matRows;
        let  seqCols   = t1 - t0;
        let  seqStride = pMBLayout->GetNumParallelSequences() * matStride;

        auto seqProl = sequencePrologue;
        auto sampleSep = sampleSeparator;

        if (sequencePrologueHasShape || sampleSeparatorHasShape)
        {
            auto sh = msra::strfun::_strprintf<char>("%s%ld", shape.c_str(), (unsigned long long)seqInfo.GetNumTimeSteps());
            if (sequencePrologueHasShape)
                seqProl = msra::strfun::ReplaceAll<std::string>(seqProl, "%x", sh);
            if (sampleSeparatorHasShape)
                sampleSep = msra::strfun::ReplaceAll<std::string>(sampleSep, "%x", sh);
        }

        if (sequencePrologueHasSeqId || sampleSeparatorHasSeqId)
        {
            auto sh = msra::strfun::_strprintf<char>("%ld", (unsigned long long)seqInfo.seqId);
            if (sequencePrologueHasSeqId)
                seqProl = msra::strfun::ReplaceAll<std::string>(seqProl, "%d", sh);
            if (sampleSeparatorHasSeqId)
                sampleSep = msra::strfun::ReplaceAll<std::string>(sampleSep, "%d", sh);
        }

        if (s > 0)
            fprintfOrDie(f, "%s", sequenceSeparator.c_str());
        fprintfOrDie(f, "%s", seqProl.c_str());

        // output it according to our format specification
        auto formatChar = valueFormatString.back();
        if (isCategoryLabel) // if is category then find the max value and output its index (possibly mapped to a string)
        {
            if (formatChar == 's') // verify label dimension
            {
                if (outputValues.GetNumRows() != labelMapping.size() &&
                    sampleLayout[0] != labelMapping.size()) // if we match the first dim then use that
                {
                    static size_t warnings = 0;
                    if (warnings++ < 5)
                        fprintf(stderr, "write: Row dimension %d does not match number of entries %d in labelMappingFile, not using mapping\n", (int)seqRows, (int)labelMapping.size());
                    valueFormatString.back() = 'u'; // this is a fallback
                    formatChar = valueFormatString.back();
                }
            }
            // update the matrix in-place from one-hot (or max) to index
            // find the max in each column
            for (size_t j = 0; j < seqCols; j++) // loop over all time steps of the sequence
            {
                double maxLoc = -1;
                double maxVal = 0;
                for (size_t i = 0; i < seqRows; i++) // loop over rows
                {
                    let val = seqData[i + j * seqStride];
                    if (maxLoc < 0 || val >= maxVal)
                    {
                        maxLoc = (double)i;
                        maxVal = val;
                    }
                }
                seqData[0 + j * seqStride] = (ElemType)maxLoc; // overwrite first element in-place
            }
            seqRows = 1; // ignore remaining dimensions
        }
        // function to print a value
        auto print = [&](double dval)
        {
            if (formatChar == 'f') // print as real number
            {
                if (dval == 0) dval = fabs(dval);    // clear the sign of a negative 0, which are produced inconsistently between CPU and GPU
                fprintfOrDie(f, valueFormatString.c_str(), dval);
            }
            else if (formatChar == 'u') // print category as integer index
            {
                fprintfOrDie(f, valueFormatString.c_str(), (unsigned int)dval);
            }
            else if (formatChar == 's') // print category as a label string
            {
                size_t uval = (size_t)dval;
                if (!labelMapping.empty())
                    uval %= labelMapping.size();
                assert(uval < labelMapping.size());
                const char * sval = labelMapping[uval].c_str();
                fprintfOrDie(f, valueFormatString.c_str(), sval);
            }
        };
        // bounds for printing
        let iend    = transpose ?     seqRows : seqCols;     // true dimension of the data to print
        let jend    = transpose ?     seqCols : seqRows;
        let istop   = transpose ? onlyUpToRow : onlyUpToT;   // we stop at these dimensions (for debugging, one often needs only the first few values of those huge matrices)
        let jstop   = transpose ?   onlyUpToT : onlyUpToRow;
        let istride = transpose ?           1 : seqStride;
        let jstride = transpose ?   seqStride : 1;
        if (isSparse)
        {
            // sparse linearizes the entire matrix into a single vector, and prints that one with coordinates
            // TODO: This can be done more nicely. We should keep the block structure.
            size_t numPrinted = 0;
            for (size_t i = 0; i < iend; i++) // loop over elements --we just flatten them all out
            {
                for (size_t j = 0; j < jend; j++) // loop over rows
                {
                    double dval = seqData[i * istride + j * jstride];
                    if (dval == 0) // only print non-0 values
                        continue;
                    if (numPrinted++ > 0)
                        fprintfOrDie(f, "%s", transpose ? sampleSeparator.c_str() : elementSeparator.c_str());
                    if (dval != 1.0 || formatChar != 'f') // hack: we assume that we are either one-hot or never precisely hitting 1.0
                        print(dval);
                    size_t row = transpose ? i : j;
                    size_t col = transpose ? j : i;
                    for (size_t k = 0; k < sampleLayout.size(); k++)
                    {
                        fprintfOrDie(f, "%c%d", k == 0 ? '[' : ',', row % sampleLayout[k]);
                        if (sampleLayout[k] == labelMapping.size()) // annotate index with label if dimensions match (which may misfire once in a while)
                            fprintfOrDie(f, "=%s", labelMapping[row % sampleLayout[k]].c_str());
                        row /= sampleLayout[k];
                    }
                    if (seqInfo.GetNumTimeSteps() > 1)
                        fprintfOrDie(f, ";%d", col);
                    fprintfOrDie(f, "]");
                }
            }
        }
        else
        {
            for (size_t j = 0; j < jend; j++) // loop over output rows     --BUGBUG: row index is 'i'!! Rename these!!
            {
                if (j > 0)
                    fprintfOrDie(f, "%s", sampleSep.c_str());
                if (j == jstop && jstop < jend - 1) // if jstop == jend-1 we may as well just print the value instead of '...'
                {
                    fprintfOrDie(f, "...+%d", (int)(jend - jstop)); // 'nuff said
                    break;
                }
                // inject sample tensor index if we are printing row-wise and it's a tensor
                if (!transpose && sampleLayout.size() > 1 && !isCategoryLabel) // each row is a different sample dimension
                {
                    for (size_t k = 0; k < sampleLayout.size(); k++)
                        fprintfOrDie(f, "%c%d", k == 0 ? '[' : ',', (int)((j / sampleLayout.GetStrides()[k])) % sampleLayout[k]);
                    fprintfOrDie(f, "]\t");
                }
                // print a row of values
                for (size_t i = 0; i < iend; i++) // loop over elements
                {
                    if (i > 0)
                        fprintfOrDie(f, "%s", elementSeparator.c_str());
                    if (i == istop && istop < iend - 1)
                    {
                        fprintfOrDie(f, "...+%d", (int)(iend - istop));
                        break;
                    }
                    double dval = seqData[i * istride + j * jstride];
                    print(dval);
                }
            }
        }
        fprintfOrDie(f, "%s", sequenceEpilogue.c_str());
    } // end loop over sequences
    fflushOrDie(f);
}

Example #4

Show file

File: SequencePacker.cpp Project: JupiterEthan/CNTK

StreamMinibatchPtr SequencePacker::PackStreamMinibatch(const std::vector<SequenceDataPtr>& sequences, size_t streamId)
{
    // Create sequence info for each sequences that we have got from the transformer.

    std::vector<MBLayout::SequenceInfo> inputSequences;
    for (size_t index = 0; index < sequences.size(); ++index)
    {
        MBLayout::SequenceInfo info;

        // In each minibatch sequence ids should be unique.
        // They have to match between different input streams in the same minibatch.
        // We are using sequence index in the set of received sequences.
        // TODO: should we use m_key as sequence id and pass it with data?
        info.seqId = index;

        info.tBegin = 0;
        info.tEnd = sequences[index]->m_numberOfSamples;
        inputSequences.push_back(info);
    }

    std::vector<std::pair<size_t, size_t>> placement;
    std::vector<size_t> rowAllocations;

    // Creating the minibatch layout.
    MBLayoutPtr layout = std::make_shared<MBLayout>();
    layout->InitAsPackedSequences(inputSequences, placement, rowAllocations);

    // Allocating necessary data buffer for the stream.
    size_t sampleSize = GetSampleSize(m_inputStreams[streamId]);
    size_t totalNumberOfSamplesInBytes = layout->GetNumCols() * sampleSize;
    if (m_streamBufferSizes[streamId] < totalNumberOfSamplesInBytes)
    {
        m_streamBuffers[streamId] = AllocateBuffer(layout->GetNumCols(), sampleSize);
        m_streamBufferSizes[streamId] = totalNumberOfSamplesInBytes;
    }

    // Packing the actual data.
    StorageType storageType = m_inputStreams[streamId]->m_storageType;
    size_t elementSize = GetSizeByType(m_inputStreams[streamId]->m_elementType);
    const auto& packedSequences = layout->GetAllSequences();
    char* streamBuffer = m_streamBuffers[streamId].get();
    for (const auto& sequence : packedSequences)
    {
        if (sequence.seqId == GAP_SEQUENCE_ID)
            continue;
        const auto& data = sequences[sequence.seqId];

        // Packing the sequence
        for (size_t sampleIndex = 0; sampleIndex < sequence.GetNumTimeSteps(); ++sampleIndex)
        {
            char* destination = streamBuffer + layout->GetColumnIndex(sequence, sampleIndex) * sampleSize;
            if (storageType == StorageType::dense)
            {
                PackDenseSample(destination, data, sampleIndex, elementSize, sampleSize);
            }
            else // sparse
            {
                assert(storageType == StorageType::sparse_csc);
                PackSparseSample(destination, data, sampleIndex, elementSize, sampleSize);
            }
        }
    }

    // Ok, minibatch is ready, give it out.
    StreamMinibatchPtr result = std::make_shared<StreamMinibatch>();
    result->m_data = m_streamBuffers[streamId].get();
    result->m_layout = layout;
    return result;
}