Section* BinaryWriter<ElemType>::CreateSection(const ConfigParameters& config, Section* parentSection, size_t p_records, size_t p_windowSize) { // first check if we need to open a new section file std::vector<std::wstring> sections; // determine the element size, default to ElemType size size_t elementSize = sizeof(ElemType); if (config.ExistsCurrent(L"elementSize")) { elementSize = config(L"elementSize"); } // get the number of records we should expect (max) // if defined in previous levels same number will be used size_t records = p_records; if (config.ExistsCurrent(L"wrecords")) { records = config(L"wrecords"); } if (records == 0) { InvalidArgument("Required config variable 'wrecords' missing from BinaryWriter configuration."); } size_t dim = 1; // default dimension (single item) if (config.ExistsCurrent(L"dim")) { dim = config(L"dim"); } // get the section type (used for caching) SectionType sectionType = sectionTypeNull; if (config.ExistsCurrent(L"sectionType")) { SectionType foundType = sectionTypeNull; wstring type = config(L"sectionType"); for (int i = 0; i < sectionTypeMax; i++) { if (EqualCI(type, SectionTypeStrings[i])) { foundType = SectionType(i); break; } } // check to make sure it matched something if (foundType == sectionTypeNull) { InvalidArgument("Invalid value for 'sectionType' in BinaryWriter configuration: %ls", type.c_str()); } sectionType = foundType; } // calculate number of bytes = dim*elementSize*records size_t dataOnlySize = records * elementSize * dim; size_t dataSize = dataOnlySize + sectionHeaderMin; // filename to use the one defined at this level, if there is none use the parent file SectionFile* file = NULL; if (config.ExistsCurrent(L"wfile")) { std::wstring wfile = config(L"wfile"); auto secFile = m_secFiles.find(wfile); if (secFile != m_secFiles.end()) { file = secFile->second; } else { // TODO: sanity check and use records as a clue of how big to make it size_t initialSize = config(L"wsize", (size_t) 256); // default to 256MB if not provided initialSize *= 1024 * 1024; // convert MB to bytes if (initialSize < dataSize) initialSize = dataSize * 5 / 4; // make the initalSize slightly larger than needed for data file = new SectionFile(wfile, fileOptionsReadWrite, initialSize); m_secFiles[wfile] = file; parentSection = file->FileSection(); parentSection->SetElementCount(records); parentSection->SetFileUniqueId(this->m_uniqueID); } } else { // no file defined at this config level, use parent file if (parentSection != NULL && parentSection->GetSectionFile() != NULL) { file = parentSection->GetSectionFile(); } else if (sectionType != sectionTypeNull) { InvalidArgument("No filename (wfile) defined in BinaryWriter configuration."); } } // determine file position if needed size_t filePositionLast = 0; size_t filePositionNext = 0; if (file != NULL) { // get the next available position in the file (always on the end) filePositionLast = file->GetFilePositionMax(); filePositionNext = file->RoundUp(filePositionLast); // we have a gap, zero it out to keep the file clean if (filePositionLast != filePositionNext) { size_t size = filePositionNext - filePositionLast; size_t roundDown = file->RoundUp(filePositionLast - file->GetViewAlignment() - 1); // need to get a veiw to zero out non-used bytes void* view = file->GetView(roundDown, file->GetViewAlignment()); char* ptr = (char*) view + filePositionLast % file->GetViewAlignment(); memset(ptr, 0, size); file->ReleaseView(view); } } // get the new section name std::string sectionName = config.ConfigName(); // get the window size, to see if we want to do separate element mapping size_t windowSize = p_windowSize; if (config.ExistsCurrent(L"windowSize")) { windowSize = config(L"windowSize"); } MappingType mappingMain = windowSize ? mappingElementWindow : mappingParent; MappingType mappingAux = windowSize ? mappingSection : mappingParent; // now create the new section Section* section = NULL; switch (sectionType) { case sectionTypeNull: // this happens for the original file header, nothing to do // also used when multiple files are defined, but none at the base level break; case sectionTypeFile: // file header // shouldn't occur, but same case as above break; case sectionTypeData: // data section section = new Section(file, parentSection, filePositionNext, mappingMain, dataSize); section->InitHeader(sectionTypeData, sectionName + ":Data Section", sectionDataFloat, sizeof(ElemType)); break; case sectionTypeLabel: // label data { size_t elementSize2 = sizeof(LabelIdType); dataSize = records * elementSize2 + sectionHeaderMin; auto sectionLabel = new SectionLabel(file, parentSection, filePositionNext, mappingMain, dataSize); SectionData dataType = sectionDataInt; LabelKind labelKind = labelCategory; // default if (config.Match(L"labelType", L"Regression")) { labelKind = labelRegression; dataType = sectionDataFloat; elementSize2 = sizeof(ElemType); } else if (config.Match(L"labelType", L"Category")) { // everything set already, default value } else { RuntimeError("Invalid type 'labelType' or missing in BinaryWriter configuration."); } // initialize the section header sectionLabel->InitHeader(sectionTypeLabel, sectionName + ":Labels", dataType, (WORD) elementSize2); // initialize the special label header items sectionLabel->SetLabelKind(labelKind); sectionLabel->SetLabelDim(config(L"labelDim")); section = sectionLabel; break; } case sectionTypeLabelMapping: // label mapping table (array of strings) section = new SectionString(file, parentSection, filePositionNext, mappingAux, dataSize); section->InitHeader(sectionTypeLabelMapping, sectionName + ":Label Map", sectionDataStrings, 0); // declare variable length strings section->SetFlags(flagAuxilarySection); section->SetFlags(flagVariableSized); break; case sectionTypeStats: // data statistics { ConfigArray calcStats = config(L"compute"); records = calcStats.size(); elementSize = sizeof(NumericStatistics); dataOnlySize = records * elementSize; dataSize = dataOnlySize + sectionHeaderMin; auto sectionStats = new SectionStats(file, parentSection, filePositionNext, mappingAux, dataSize); sectionStats->InitHeader(sectionTypeStats, sectionName + ":Data Statistics", sectionDataStruct, sizeof(NumericStatistics)); // declare variable length strings sectionStats->SetFlags(flagAuxilarySection); section = sectionStats; break; } case sectionTypeCategoryLabel: section = new Section(file, parentSection, filePositionNext, mappingMain, dataSize); section->InitHeader(sectionTypeCategoryLabel, sectionName + ":Category Labels", sectionDataFloat, sizeof(ElemType)); // declare variable length strings break; } // set the rest of the header variables necessary if (section == NULL) { // NULL or file section/already created section = parentSection; } else { section->SetElementSize(elementSize); section->SetElementsPerRecord(dim); section->SetElementCount(records * dim); section->SetSize(dataSize); section->SetSizeAll(dataSize); // windowSize is in records, convert to bytes size_t dataWindowSize = windowSize ? windowSize * elementSize * dim : dataOnlySize; // clamp it down to actual data size dataWindowSize = min(dataOnlySize, dataWindowSize); // now get the data pointer setup and allocate the view as necessary bool auxSection = !!(section->GetFlags() & flagAuxilarySection); section->EnsureElements(0, auxSection ? dataOnlySize : dataWindowSize); // update the max file position for the next section file->SetFilePositionMax(section->GetFilePosition() + dataSize); // Add new section to parent parentSection->AddSection(section); } // From here on down we have a fully usable section object // now find the subsections and repeat vector<std::wstring> subsections; FindConfigNames(config, "sectionType", subsections); // look for any children and create them as well for (std::wstring subsection : subsections) { CreateSection(config(subsection), section, records, windowSize); } // wait until here so everything is mapped and valid in the object if (sectionType == sectionTypeStats) { ConfigArray calcStats = config(L"compute"); ((SectionStats*) section)->InitCompute(calcStats); } // add to section map if (sectionType != sectionTypeFile && sectionType != sectionTypeNull) { std::wstring wsectionName = msra::strfun::utf16(sectionName); // can't have identical names in a write configuration if (m_sections.find(wsectionName) != m_sections.end()) { RuntimeError("Identical section name appears twice:%s", sectionName.c_str()); } m_sections[wsectionName] = section; } // validate the header (make sure it's sane) if (section && file && !section->ValidateHeader(file->Writing())) { RuntimeError("Invalid header in file %ls, in header %ls\n", file->GetName().c_str(), section->GetName().c_str()); } // return the now complete section return section; }
SectionType type() const { return SectionType((this + 1)->dataPos_ & TYPE_MASK); }