コード例 #1
0
ファイル: InputAndParamNodes.cpp プロジェクト: ShuaiW/CNTK
void LearnableParameter<ElemType>::InitRandom(const bool uniformInit,
                                                const unsigned long randomSeed,
                                                const ElemType initValueScale,
                                                bool initOnCPUOnly)
{
    // fprintf(stderr, "%d x %d: %d  %ls\n", (int)GetNumRows(), (int)GetNumCols(), (int)randomSeed, NodeName().c_str());

    // the random seed offset is set via the "randomSeedOffset" parameter in config
    if (initOnCPUOnly)
        Value().TransferToDeviceIfNotThere(CPUDEVICE, true);
#if 1   // this more complex version is needed to repro test cases generated with an older version
    auto& value = GetSampleLayout().GetRank() > 2 ? Value() : ValueAsMatrix();
#else
    auto& value = Value();
#endif
    if (uniformInit)
    {
        // TODO: move these hidden extra factors out from here and into NDL, and make them visible in BS
        ElemType randRange = 0.05f * initValueScale;
        value.SetUniformRandomValue(-randRange, randRange, randomSeed);
    }
    else
    {
        size_t inputSize = value.GetNumCols();
        ElemType randInitstd = 0.2f * initValueScale / sqrt(ElemType(inputSize));
        value.SetGaussianRandomValue(0, randInitstd, randomSeed);
    }
    if (initOnCPUOnly)
        Value().TransferToDeviceIfNotThere(m_deviceId, true);
}
コード例 #2
0
ファイル: InputAndParamNodes.cpp プロジェクト: ShuaiW/CNTK
void LearnableParameter<ElemType>::InitFromArray(const std::vector<ElemType>& array, size_t numRows, size_t numCols)
{
    // infer tensor dimensions from input file if not set
    // Note: The mapping of dimensions of the input matrix to tensor dimensions are somewhat confusing.
    //       The file contains a 2D matrix (one row per text line) that is saved into our column-major representation.
    //       That representation is then reshaped into a column-major tensor.
    if (GetSampleLayout().GetNumElements() == 0)    // at least one dimension is 0
    {
        auto dims = GetSampleLayout().GetDims();
        // infer rank
        if (dims.size() == 0)
            dims.push_back(0);
        if (dims.size() == 1 && numCols != 1)
            dims.push_back(0);
        // infer #rows
        if (dims[0] == 0)           // infer row dimension as input matrix row dimension
            dims[0] = numRows;      // (if already set, then mismatch will be caught in VerifyDataSize() below)
        // infer #cols: product of all dimensions but the first must match matrix #cols; if there is a single 0 position, we infer it
        size_t zeroDim = 0;         // 0 means not found
        size_t prod = 1;
        for (size_t k = 1; k < dims.size(); k++)
        {
            auto dim = dims[k];
            if (dim != 0)
                prod *= dim;
            else if (zeroDim == 0)
                zeroDim = k;
            else
                InvalidArgument("%ls %ls operation's specified shape [%s] cannot be inferred: Too many unknown dimensions.", NodeName().c_str(), OperationName().c_str(), string(GetSampleLayout()).c_str());
        }
        if (zeroDim != 0)   // we found a zero
        {
            dims[zeroDim] = numCols / prod;
            if (prod * dims[zeroDim] != numCols)
                InvalidArgument("%ls %ls operation's specified shape [%s] cannot be inferred: Tensor shape cannot hold a [%d x %d] matrix.", NodeName().c_str(), OperationName().c_str(), string(GetSampleLayout()).c_str(), (int)numRows, (int)numCols);
        }
        SetDims(TensorShape(dims), false);
    }

    // BUGBUG: We should allow to read an arbitrary tensor from a single-column file.
    //         Currently, this would cause a matrix/tensor dimension mismatch. --TODO: Is this comment up-to-date?
    Value().SetValue(numRows, numCols, m_deviceId, const_cast<ElemType*>(array.data()), matrixFlagNormal);
    // TODO: Get rid of that const_cast, as soon as after Ryan's Matrix-lib refactoring separated out SetValue() from external vs. from deep copy
    VerifyDataSize(Value());      // sanity check
}
コード例 #3
0
ファイル: ComputationNode.cpp プロジェクト: Soukiy/CNTK
// form the actual tensor that describes the full object
TensorShape ComputationNodeBase::GetTensorShape(size_t rank) const
{
    // If we have an MB layout then add the necessary sequence and time axes. If we have none, then absorb the column dimension.
    TensorShape tensorShape = GetSampleLayout(); // TODO: Do we need to expect this tensor to have arbitrary strides? In case it came out of a Slice, Reshape, or Transpose op in-place?
    if (HasMBLayout())
    {
        size_t i = (rank != SIZE_MAX) ? rank : tensorShape.GetRank();
        tensorShape.AppendInPlace(i++, GetMBLayout()->GetNumParallelSequences());
        tensorShape.AppendInPlace(i++, GetMBLayout()->GetNumTimeSteps());
    }
    return tensorShape;
}
コード例 #4
0
ファイル: ComputationNode.cpp プロジェクト: Soukiy/CNTK
// determine the sample tensor dimension to use for operations based on output and all inputs
// 'Sample tensor' means we only consider single samples. If we have an MBLayout, that is the sample layout of a single matrix column.
// TODO: Turn rank into a member variable, and call this method once in validation (currently called for every single ForwardProp/BackpropTo()).
size_t ComputationNodeBase::DetermineElementwiseTensorRank() const
{
    // determine largest tensor dimension amongst the sample shapes of output and the selected inputs
    size_t maxRank = GetSampleLayout().GetRank();
    for (size_t i = 0; i < GetNumInputs(); i++)
    {
        size_t rank = Input(i)->GetSampleLayout().GetRank();
        if (maxRank < rank)
            maxRank = rank;
    }
    return maxRank;
}
コード例 #5
0
ファイル: ComputationNode.cpp プロジェクト: Soukiy/CNTK
// helper function for validation
// In complex cases of convolution, dimensions are quite difficult for a user to know/derive.
// This is a feature that allows a node to help resizing its input node to the expected value
// iff that input must be a learnable parameter.
void ComputationNodeBase::ValidateInferBinaryInputDims()
{
    // limited inference of children dimensions
    // if dimension not specified we assume two operands' dimensions should be the same
    // NOTE: The assert is set to check if >= 2 since this is called from nodes which have more than two children.
    //      The number of children is formally verified elsewhere, so this will not break consistency.
    assert(m_inputs.size() >= 2);
    for (size_t index = 0; index < 2; index++)
    {
        auto in    = Input(    index);
        auto other = Input(1 - index);
        // borrow any unset dimension on one input from the other input
        in->ValidateInferInputDimsFrom(other->GetSampleLayout());
    }
}
コード例 #6
0
ファイル: ComputationNode.cpp プロジェクト: Soukiy/CNTK
void ComputationNode<ElemType>::WriteMinibatchWithFormatting(FILE* f, const FrameRange& fr,
                                                             size_t onlyUpToRow, size_t onlyUpToT, bool transpose, bool isCategoryLabel, bool isSparse,
                                                             const vector<string>& labelMapping, const string& sequenceSeparator, 
                                                             const string& sequencePrologue, const string& sequenceEpilogue,
                                                             const string& elementSeparator, const string& sampleSeparator,
                                                             string valueFormatString,
                                                             bool outputGradient) const
{
    // get minibatch matrix -> matData, matRows, matStride
    const Matrix<ElemType>& outputValues = outputGradient ? Gradient() : Value();
    let matRows   = outputValues.GetNumRows();
    let matStride = matRows; // how to get from one column to the next
    unique_ptr<ElemType[]> matDataPtr(outputValues.CopyToArray());
    ElemType* matData = matDataPtr.get();
    let sampleLayout = GetSampleLayout(); // this is currently only used for sparse; dense tensors are linearized

    // process all sequences one by one
    MBLayoutPtr pMBLayout = GetMBLayout();
    if (!pMBLayout) // no MBLayout: We are printing aggregates (or LearnableParameters?)
    {
        pMBLayout = make_shared<MBLayout>();
        pMBLayout->Init(1, outputValues.GetNumCols()); // treat this as if we have one single sequence consisting of the columns
        pMBLayout->AddSequence(0, 0, 0, outputValues.GetNumCols());
    }
    let& sequences = pMBLayout->GetAllSequences();
    let  width     = pMBLayout->GetNumTimeSteps();

    TensorShape tensorShape = GetSampleLayout();
    stringstream str;
    let dims = tensorShape.GetDims();
    for (auto dim : dims)
        str << dim << ' ';
    let shape = str.str(); // BUGBUG: change to string(tensorShape) to make sure we always use the same format

    bool sequencePrologueHasShape = sequencePrologue.find("%x") != sequencePrologue.npos;
    bool sampleSeparatorHasShape  = sampleSeparator.find("%x")  != sampleSeparator.npos;
    bool sequencePrologueHasSeqId = sequencePrologue.find("%d") != sequencePrologue.npos;
    bool sampleSeparatorHasSeqId  = sampleSeparator.find("%d")  != sampleSeparator.npos;

    for (size_t s = 0; s < sequences.size(); s++)
    {
        const auto& seqInfo = sequences[s];
        if (seqInfo.seqId == GAP_SEQUENCE_ID) // nothing in gaps to print
            continue;
        let tBegin = seqInfo.tBegin >= 0     ? seqInfo.tBegin : 0;
        let tEnd   = seqInfo.tEnd   <= width ? seqInfo.tEnd   : width;
        // [tBegin,tEnd) is where the sequence resides.
        // fr is also referencing where a sequence resides.

        // narrow to FrameRange if needed
        auto t0 = fr.IsAllFrames() ? tBegin : fr.m_timeOffset + (ptrdiff_t)fr.timeIdxInSeq;
        auto t1 = fr.IsAllFrames() ? tEnd   : fr.m_timeOffset + (ptrdiff_t)fr.timeIdxInSeq + (ptrdiff_t)fr.m_timeRange;
        if (t0 < tBegin)
            t0 = tBegin;
        if (t1 > tEnd)
            t1 = tEnd;
        // [t0,t1) is the range we want to print
        if (t0 > (ptrdiff_t)t1)
            continue; // skip this sequence

        // get sequence matrix -> seqData, seqRows, seqCols, seqStride
        let  seqData   = matData + pMBLayout->GetColumnIndex(seqInfo, t0 - tBegin) * matStride;
        auto seqRows   = matRows;
        let  seqCols   = t1 - t0;
        let  seqStride = pMBLayout->GetNumParallelSequences() * matStride;

        auto seqProl = sequencePrologue;
        auto sampleSep = sampleSeparator;

        if (sequencePrologueHasShape || sampleSeparatorHasShape)
        {
            auto sh = msra::strfun::_strprintf<char>("%s%ld", shape.c_str(), (unsigned long long)seqInfo.GetNumTimeSteps());
            if (sequencePrologueHasShape)
                seqProl = msra::strfun::ReplaceAll<std::string>(seqProl, "%x", sh);
            if (sampleSeparatorHasShape)
                sampleSep = msra::strfun::ReplaceAll<std::string>(sampleSep, "%x", sh);
        }

        if (sequencePrologueHasSeqId || sampleSeparatorHasSeqId)
        {
            auto sh = msra::strfun::_strprintf<char>("%ld", (unsigned long long)seqInfo.seqId);
            if (sequencePrologueHasSeqId)
                seqProl = msra::strfun::ReplaceAll<std::string>(seqProl, "%d", sh);
            if (sampleSeparatorHasSeqId)
                sampleSep = msra::strfun::ReplaceAll<std::string>(sampleSep, "%d", sh);
        }

        if (s > 0)
            fprintfOrDie(f, "%s", sequenceSeparator.c_str());
        fprintfOrDie(f, "%s", seqProl.c_str());

        // output it according to our format specification
        auto formatChar = valueFormatString.back();
        if (isCategoryLabel) // if is category then find the max value and output its index (possibly mapped to a string)
        {
            if (formatChar == 's') // verify label dimension
            {
                if (outputValues.GetNumRows() != labelMapping.size() &&
                    sampleLayout[0] != labelMapping.size()) // if we match the first dim then use that
                {
                    static size_t warnings = 0;
                    if (warnings++ < 5)
                        fprintf(stderr, "write: Row dimension %d does not match number of entries %d in labelMappingFile, not using mapping\n", (int)seqRows, (int)labelMapping.size());
                    valueFormatString.back() = 'u'; // this is a fallback
                    formatChar = valueFormatString.back();
                }
            }
            // update the matrix in-place from one-hot (or max) to index
            // find the max in each column
            for (size_t j = 0; j < seqCols; j++) // loop over all time steps of the sequence
            {
                double maxLoc = -1;
                double maxVal = 0;
                for (size_t i = 0; i < seqRows; i++) // loop over rows
                {
                    let val = seqData[i + j * seqStride];
                    if (maxLoc < 0 || val >= maxVal)
                    {
                        maxLoc = (double)i;
                        maxVal = val;
                    }
                }
                seqData[0 + j * seqStride] = (ElemType)maxLoc; // overwrite first element in-place
            }
            seqRows = 1; // ignore remaining dimensions
        }
        // function to print a value
        auto print = [&](double dval)
        {
            if (formatChar == 'f') // print as real number
            {
                if (dval == 0) dval = fabs(dval);    // clear the sign of a negative 0, which are produced inconsistently between CPU and GPU
                fprintfOrDie(f, valueFormatString.c_str(), dval);
            }
            else if (formatChar == 'u') // print category as integer index
            {
                fprintfOrDie(f, valueFormatString.c_str(), (unsigned int)dval);
            }
            else if (formatChar == 's') // print category as a label string
            {
                size_t uval = (size_t)dval;
                if (!labelMapping.empty())
                    uval %= labelMapping.size();
                assert(uval < labelMapping.size());
                const char * sval = labelMapping[uval].c_str();
                fprintfOrDie(f, valueFormatString.c_str(), sval);
            }
        };
        // bounds for printing
        let iend    = transpose ?     seqRows : seqCols;     // true dimension of the data to print
        let jend    = transpose ?     seqCols : seqRows;
        let istop   = transpose ? onlyUpToRow : onlyUpToT;   // we stop at these dimensions (for debugging, one often needs only the first few values of those huge matrices)
        let jstop   = transpose ?   onlyUpToT : onlyUpToRow;
        let istride = transpose ?           1 : seqStride;
        let jstride = transpose ?   seqStride : 1;
        if (isSparse)
        {
            // sparse linearizes the entire matrix into a single vector, and prints that one with coordinates
            // TODO: This can be done more nicely. We should keep the block structure.
            size_t numPrinted = 0;
            for (size_t i = 0; i < iend; i++) // loop over elements --we just flatten them all out
            {
                for (size_t j = 0; j < jend; j++) // loop over rows
                {
                    double dval = seqData[i * istride + j * jstride];
                    if (dval == 0) // only print non-0 values
                        continue;
                    if (numPrinted++ > 0)
                        fprintfOrDie(f, "%s", transpose ? sampleSeparator.c_str() : elementSeparator.c_str());
                    if (dval != 1.0 || formatChar != 'f') // hack: we assume that we are either one-hot or never precisely hitting 1.0
                        print(dval);
                    size_t row = transpose ? i : j;
                    size_t col = transpose ? j : i;
                    for (size_t k = 0; k < sampleLayout.size(); k++)
                    {
                        fprintfOrDie(f, "%c%d", k == 0 ? '[' : ',', row % sampleLayout[k]);
                        if (sampleLayout[k] == labelMapping.size()) // annotate index with label if dimensions match (which may misfire once in a while)
                            fprintfOrDie(f, "=%s", labelMapping[row % sampleLayout[k]].c_str());
                        row /= sampleLayout[k];
                    }
                    if (seqInfo.GetNumTimeSteps() > 1)
                        fprintfOrDie(f, ";%d", col);
                    fprintfOrDie(f, "]");
                }
            }
        }
        else
        {
            for (size_t j = 0; j < jend; j++) // loop over output rows     --BUGBUG: row index is 'i'!! Rename these!!
            {
                if (j > 0)
                    fprintfOrDie(f, "%s", sampleSep.c_str());
                if (j == jstop && jstop < jend - 1) // if jstop == jend-1 we may as well just print the value instead of '...'
                {
                    fprintfOrDie(f, "...+%d", (int)(jend - jstop)); // 'nuff said
                    break;
                }
                // inject sample tensor index if we are printing row-wise and it's a tensor
                if (!transpose && sampleLayout.size() > 1 && !isCategoryLabel) // each row is a different sample dimension
                {
                    for (size_t k = 0; k < sampleLayout.size(); k++)
                        fprintfOrDie(f, "%c%d", k == 0 ? '[' : ',', (int)((j / sampleLayout.GetStrides()[k])) % sampleLayout[k]);
                    fprintfOrDie(f, "]\t");
                }
                // print a row of values
                for (size_t i = 0; i < iend; i++) // loop over elements
                {
                    if (i > 0)
                        fprintfOrDie(f, "%s", elementSeparator.c_str());
                    if (i == istop && istop < iend - 1)
                    {
                        fprintfOrDie(f, "...+%d", (int)(iend - istop));
                        break;
                    }
                    double dval = seqData[i * istride + j * jstride];
                    print(dval);
                }
            }
        }
        fprintfOrDie(f, "%s", sequenceEpilogue.c_str());
    } // end loop over sequences
    fflushOrDie(f);
}