VariableLayout CNTKEvalExtended<ElemType>::ToVariableLayout(const ComputationNodeBasePtr n) { auto matrix = dynamic_pointer_cast<Matrix<ElemType>>(n->ValuePtr()); return VariableLayout { /* name */ n->GetName(), /* type */ sizeof(ElemType) == sizeof(float) ? VariableLayout::Float32 : VariableLayout::Float64, /* storage */ matrix ? matrix->GetMatrixType() == MatrixType::DENSE ? VariableLayout::Dense : matrix->GetMatrixType() == MatrixType::SPARSE ? VariableLayout::Sparse : VariableLayout::Undetermined : VariableLayout::Undetermined, /* dimension */ n->GetSampleLayout().GetNumElements() }; }
//! Return a simple one-line description of this object. std::string description() const { std::ostringstream out; out << Teuchos::Describable::description(); out << "{type = " << GetMatrixType() << ", size = " << GetNumGlobalElements() << "} "; return out.str(); }
/*virtual*/ const std::unordered_map<StreamInformation, MinibatchData>& CompositeMinibatchSource::GetNextMinibatch(size_t minibatchSizeInSequences, size_t minibatchSizeInSamples, size_t numberOfWorkers, size_t workerRank, const DeviceDescriptor& device /*= DeviceDescriptor::UseDefaultDevice()*/) /*override*/ { auto profGetMinibatch = Microsoft::MSR::CNTK::ScopeProfile(Microsoft::MSR::CNTK::profilerEvtMainGetMinibatch); m_minibatchData.clear(); if (!m_epochEndReached) { if (minibatchSizeInSequences != 0) LogicError("GetNextMinibatch: Specifying minibatch size in #sequences is currently unsupported"); if (minibatchSizeInSamples == 0) InvalidArgument("GetNextMinibatch: Requested minibatch size must be > 0."); if (m_prevMinibatchSize == 0) { EpochConfiguration epochConfig; epochConfig.m_numberOfWorkers = numberOfWorkers; epochConfig.m_workerRank = workerRank; epochConfig.m_minibatchSizeInSamples = minibatchSizeInSamples; epochConfig.m_truncationSize = m_truncationLength; epochConfig.m_allowMinibatchesToCrossSweepBoundaries = true; if (m_maxNumSamplesToRead == MinibatchSource::FullDataSweep) { epochConfig.m_totalEpochSizeInSamples = Microsoft::MSR::CNTK::requestDataSize; } else if (m_maxNumSamplesToRead == MinibatchSource::InfinitelyRepeat) { // Setting big value, but not the max in order to avoid bit overflow. epochConfig.m_totalEpochSizeInSamples = std::numeric_limits<size_t>::max() / 2; } else { epochConfig.m_totalEpochSizeInSamples = m_maxNumSamplesToRead; } epochConfig.m_totalEpochSizeInSweeps = m_maxNumSweepsToRead; epochConfig.m_epochIndex = 0; m_matrices.clear(); std::unordered_set<InputStreamDescription> inputs; for (const auto& s : m_streamInfos) { auto inputStreamDescription = GetInputStreamDescription(s, device); inputs.insert(inputStreamDescription); if (s.m_elementType == DataType::Float) { auto iter = std::find_if(m_compositeDataReaderStreamDescs.begin(), m_compositeDataReaderStreamDescs.end(), [s](StreamDescriptionPtr& streamInfo) { return streamInfo->m_id == s.m_id; }); assert(iter != m_compositeDataReaderStreamDescs.end()); m_matrices.AddInput( s.m_name, std::make_shared<Matrix<float>>(0, 0, inputStreamDescription.GetDeviceId(), inputStreamDescription.GetMatrixType(), inputStreamDescription.GetMatrixFormat()), std::make_shared<MBLayout>(), *(*iter)->m_sampleLayout); } else LogicError("GetNextMinibatch: Input of type other than DataType::Float is currently unsupported by the CNTK built-in composite MinibatchSource!"); } m_shim->StartEpoch(epochConfig, inputs); m_prevMinibatchSize = minibatchSizeInSamples; m_workerRank = workerRank; m_numWorkers = numberOfWorkers; } if (minibatchSizeInSamples != m_prevMinibatchSize || m_workerRank != workerRank || m_numWorkers != numberOfWorkers || m_restorePosition != 0) { std::map<std::wstring, int> inputDescriptions; for (const auto& s : m_streamInfos) inputDescriptions[s.m_name] = AsCNTKImplDeviceId(device); ReaderConfiguration newConfig; newConfig.m_numberOfWorkers = numberOfWorkers; newConfig.m_workerRank = workerRank; newConfig.m_minibatchSizeInSamples = minibatchSizeInSamples; newConfig.m_truncationSize = m_truncationLength; newConfig.m_allowMinibatchesToCrossSweepBoundaries = true; if (m_restorePosition != 0) { m_shim->SetCurrentSamplePosition(m_restorePosition); m_restorePosition = 0; } m_shim->SetConfiguration(newConfig, inputDescriptions); m_prevMinibatchSize = minibatchSizeInSamples; m_workerRank = workerRank; m_numWorkers = numberOfWorkers; } auto hasData = m_shim->GetMinibatch(m_matrices); m_epochEndReached = m_shim->IsEndOfEpoch(); if (m_epochEndReached && !hasData) return m_minibatchData; bool hasReachedSweepEnd = m_shim->IsEndOfSweep(); for (const auto& s: m_streamInfos) { auto input = m_matrices.GetInput(s.m_name); auto& currentStreamInfo = s; ValuePtr minibatchValuePtr; if (!hasData) { m_minibatchData[currentStreamInfo] = { nullptr, 0, 0 }; continue; } if (s.m_elementType == DataType::Float) { auto matrix = dynamic_pointer_cast<Matrix<float>>(input.matrix); if (!matrix) LogicError("GetNextMinibatch: Invalid matrix type."); minibatchValuePtr = MakeSharedObject<PackedValue>(s.m_sampleLayout, Axis::DefaultInputVariableDynamicAxes(), matrix, input.pMBLayout, /*readOnly =*/ false); size_t numSamples = input.pMBLayout->GetActualNumSamples(); size_t numSequences = input.pMBLayout->GetNumSequences(); m_minibatchData[currentStreamInfo] = { minibatchValuePtr, numSequences, numSamples, hasReachedSweepEnd }; } else LogicError("GetNextMinibatch: Input of type other than DataType::Float is currently unsupported by the CNTK built-in composite MinibatchSource!"); } } return m_minibatchData; }
void CNTKEvalExtended<ElemType>::ForwardPassT(const std::vector<ValueBuffer<ElemType, ValueContainer> >& inputs, std::vector<ValueBuffer<ElemType, ValueContainer> >& outputs, bool resetRNN) { if (!m_started) RuntimeError("ForwardPass() called before StartForwardEvaluation()"); if (inputs.size() != (size_t)std::distance(m_inputMatrices.begin(), m_inputMatrices.end())) RuntimeError("Expected %d inputs, but got %d.", (int)std::distance(m_inputMatrices.begin(), m_inputMatrices.end()), (int)inputs.size()); if (outputs.size() != m_outputNodes.size()) RuntimeError("Expected %d outputs, but got %d.", (int)m_outputNodes.size(), (int)outputs.size()); size_t i = 0; for (auto& inputNode : m_inputNodes) { // const cast: The matrix class takes this over without copying and could theoretically change the contents, // though it doesn't in this case. auto& buffer = const_cast<ValueBuffer<ElemType, ValueContainer>&>(inputs[i]); auto matrix = dynamic_pointer_cast<Matrix<ElemType>>(inputNode->ValuePtr()); auto type = matrix->GetMatrixType(); size_t numRows = inputNode->GetSampleLayout().GetNumElements(); if (buffer.m_buffer.data() == nullptr) RuntimeError("Input %ls: Buffer is not allocated.", m_inputNodes[i]->GetName().c_str()); if (type == MatrixType::DENSE) { if (buffer.m_buffer.size() % numRows != 0) RuntimeError("Input %ls: Expected input data to be a multiple of %" PRIu64 ", but it is %" PRIu64 ".", m_inputNodes[i]->GetName().c_str(), numRows, buffer.m_buffer.size()); if (buffer.m_buffer.size() == 0) RuntimeError("Input %ls: Expected at least one element.", m_inputNodes[i]->GetName().c_str()); } else if (type == MatrixType::SPARSE) { if (buffer.m_colIndices.data() == nullptr) RuntimeError("Input %ls: Due to sparse input format, expected colIndices array, but was nullptr.", m_inputNodes[i]->GetName().c_str()); if (buffer.m_indices.data() == nullptr) RuntimeError("Input %ls: Due to sparse input format, expected Indices array, but was nullptr.", m_inputNodes[i]->GetName().c_str()); if (buffer.m_colIndices.size() < 2) RuntimeError("Input %ls: Expected at least one element (2 entries in colIndices array).", m_inputNodes[i]->GetName().c_str()); if (buffer.m_colIndices[0] != 0) RuntimeError("Input %ls: First element of column indices must be 0", m_inputNodes[i]->GetName().c_str()); if (buffer.m_colIndices[buffer.m_colIndices.size() - 1] != buffer.m_indices.size()) RuntimeError("Input %ls: Last element of column indices must be equal to the size of indices (%ld), but was %d", m_inputNodes[i]->GetName().c_str(), buffer.m_indices.size(), buffer.m_colIndices[buffer.m_colIndices.size() - 1]); } int numCols = type == MatrixType::DENSE ? buffer.m_buffer.size() / numRows : buffer.m_colIndices.size() - 1; if (numCols < 1) RuntimeError("Input: the number of column must be greater than or equal to 1."); inputNode->GetMBLayout()->Init(1, numCols); // SentinelValueIndicatingUnspecifedSequenceBeginIdx is used to specify the lower bound of look-back step of recurrent nodes inputNode->GetMBLayout()->AddSequence(0, 0, resetRNN ? 0 : SentinelValueIndicatingUnspecifedSequenceBeginIdx, numCols); if (type == MatrixType::DENSE) matrix->SetValue(numRows, numCols, matrix->GetDeviceId(), buffer.m_buffer.data(), matrixFlagNormal); else if (type == MatrixType::SPARSE) { // In the sparse case the m_data layout is identical to CUDA's CSC layout // (see http://docs.nvidia.com/cuda/cusparse/#compressed-sparse-column-format-csc). matrix->SetMatrixFromCSCFormat(buffer.m_colIndices.data(), buffer.m_indices.data(), buffer.m_buffer.data(), buffer.m_buffer.size(), numRows, numCols); } ++i; } ComputationNetwork::BumpEvalTimeStamp(m_inputNodes); this->m_net->ForwardProp(m_outputNodes); for (size_t i2 = 0; i2 < m_outputNodes.size(); ++i2) { auto node = m_outputNodes[i2]; shared_ptr<Matrix<ElemType>> outputMatrix = dynamic_pointer_cast<Matrix<ElemType>>(node->ValuePtr()); auto pMBLayout = node->GetMBLayout(); if (!pMBLayout) { pMBLayout = make_shared<MBLayout>(); pMBLayout->InitAsFrameMode(1); // treat this as if we have one single sample } const auto& seq = pMBLayout->GetAllSequences(); if (seq.size() != 1) RuntimeError("Only 1 output sequence supported by this API"); ValueContainer<ElemType>& vec = outputs[i2].m_buffer; size_t numElements = outputMatrix->GetNumElements(); if (vec.capacity() < numElements) { // Bad luck - we can't reallocate memory of an external object at this point. RuntimeError("Not enough space in output buffer for output '%ls'.", node->GetName().c_str()); } vec.resize(numElements); ElemType* data = const_cast<ElemType*>(vec.data()); outputMatrix->CopyToArray(data, numElements); } }