virtual void init() { HashDistributeMasterBase::init(); // JCSMORE should common up some with indexread IHThorKeyedDistributeArg *helper = (IHThorKeyedDistributeArg *)queryHelper(); StringBuffer scoped; OwnedRoxieString indexFileName(helper->getIndexFileName()); queryThorFileManager().addScope(container.queryJob(), indexFileName, scoped); file.setown(queryThorFileManager().lookup(container.queryJob(), indexFileName)); if (!file) throw MakeActivityException(this, 0, "KeyedDistribute: Failed to find key: %s", scoped.str()); if (0 == file->numParts()) throw MakeActivityException(this, 0, "KeyedDistribute: Can't distribute based on an empty key: %s", scoped.str()); checkFormatCrc(this, file, helper->getFormatCrc(), true); Owned<IFileDescriptor> fileDesc = file->getFileDescriptor(); Owned<IPartDescriptor> tlkDesc = fileDesc->getPart(fileDesc->numParts()-1); if (!tlkDesc->queryProperties().hasProp("@kind") || 0 != stricmp("topLevelKey", tlkDesc->queryProperties().queryProp("@kind"))) throw MakeActivityException(this, 0, "Cannot distribute using a non-distributed key: '%s'", scoped.str()); unsigned location; OwnedIFile iFile; StringBuffer filePath; if (!getBestFilePart(this, *tlkDesc, iFile, location, filePath)) throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", file->queryLogicalName()); OwnedIFileIO iFileIO = iFile->open(IFOread); assertex(iFileIO); tlkMb.append(iFileIO->size()); ::read(iFileIO, 0, (size32_t)iFileIO->size(), tlkMb); queryThorFileManager().noteFileRead(container.queryJob(), file); }
virtual void validateFile(IDistributedFile *file) { IHThorDiskReadBaseArg *helper = (IHThorDiskReadBaseArg *)queryHelper(); bool codeGenGrouped = 0 != (TDXgrouped & helper->getFlags()); bool isGrouped = fileDesc->isGrouped(); if (isGrouped != codeGenGrouped) { Owned<IException> e = MakeActivityWarning(&container, TE_GroupMismatch, "DFS and code generated group info. differs: DFS(%s), CodeGen(%s), using DFS info", isGrouped?"grouped":"ungrouped", codeGenGrouped?"grouped":"ungrouped"); queryJobChannel().fireException(e); } IOutputMetaData *recordSize = helper->queryDiskRecordSize()->querySerializedDiskMeta(); if (recordSize->isFixedSize()) // fixed size { if (0 != fileDesc->queryProperties().getPropInt("@recordSize")) { size32_t rSz = fileDesc->queryProperties().getPropInt("@recordSize"); if (isGrouped) rSz--; // eog byte not to be included in this test. if (rSz != recordSize->getMinRecordSize()) throw MakeThorException(TE_RecordSizeMismatch, "Published record size %d for file %s, does not match coded record size %d", rSz, fileName.get(), recordSize->getMinRecordSize()); } if (!fileDesc->isCompressed() && (TDXcompress & helper->getFlags())) { size32_t rSz = recordSize->getMinRecordSize(); if (isGrouped) rSz++; if (rSz >= MIN_ROWCOMPRESS_RECSIZE) { Owned<IException> e = MakeActivityWarning(&container, TE_CompressionMismatch, "Ignoring compression attribute on file '%s', which is not published as compressed in DFS", fileName.get()); queryJobChannel().fireException(e); } } } if (0 == (TDRnocrccheck & helper->getFlags())) checkFormatCrc(this, file, helper->getFormatCrc(), false); }
virtual void init() { CMasterActivity::init(); OwnedRoxieString indexFileName(helper->getIndexFileName()); Owned<IDistributedFile> dataFile; Owned<IDistributedFile> indexFile = queryThorFileManager().lookup(container.queryJob(), indexFileName, false, 0 != (helper->getJoinFlags() & JFindexoptional), true); unsigned keyReadWidth = (unsigned)container.queryJob().getWorkUnitValueInt("KJKRR", 0); if (!keyReadWidth || keyReadWidth>container.queryJob().querySlaves()) keyReadWidth = container.queryJob().querySlaves(); initMb.clear(); initMb.append(indexFileName.get()); if (helper->diskAccessRequired()) numTags += 2; initMb.append(numTags); unsigned t=0; for (; t<numTags; t++) { tags[t] = container.queryJob().allocateMPTag(); initMb.append(tags[t]); } bool keyHasTlk = false; if (indexFile) { unsigned numParts = 0; localKey = indexFile->queryAttributes().getPropBool("@local"); if (container.queryLocalData() && !localKey) throw MakeActivityException(this, 0, "Keyed Join cannot be LOCAL unless supplied index is local"); checkFormatCrc(this, indexFile, helper->getIndexFormatCrc(), true); Owned<IFileDescriptor> indexFileDesc = indexFile->getFileDescriptor(); IDistributedSuperFile *superIndex = indexFile->querySuperFile(); unsigned superIndexWidth = 0; unsigned numSuperIndexSubs = 0; if (superIndex) { numSuperIndexSubs = superIndex->numSubFiles(true); bool first=true; // consistency check Owned<IDistributedFileIterator> iter = superIndex->getSubFileIterator(true); ForEach(*iter) { IDistributedFile &f = iter->query(); unsigned np = f.numParts()-1; IDistributedFilePart &part = f.queryPart(np); const char *kind = part.queryAttributes().queryProp("@kind"); bool hasTlk = NULL != kind && 0 == stricmp("topLevelKey", kind); // if last part not tlk, then deemed local (might be singlePartKey) if (first) { first = false; keyHasTlk = hasTlk; superIndexWidth = f.numParts(); if (keyHasTlk) --superIndexWidth; } else { if (hasTlk != keyHasTlk) throw MakeActivityException(this, 0, "Local/Single part keys cannot be mixed with distributed(tlk) keys in keyedjoin"); if (keyHasTlk && superIndexWidth != f.numParts()-1) throw MakeActivityException(this, 0, "Super sub keys of different width cannot be mixed with distributed(tlk) keys in keyedjoin"); } } if (keyHasTlk) numParts = superIndexWidth * numSuperIndexSubs; else numParts = superIndex->numParts(); } else { numParts = indexFile->numParts(); if (numParts) { const char *kind = indexFile->queryPart(indexFile->numParts()-1).queryAttributes().queryProp("@kind"); keyHasTlk = NULL != kind && 0 == stricmp("topLevelKey", kind); if (keyHasTlk) --numParts; } } if (numParts) { initMb.append(numParts); initMb.append(superIndexWidth); // 0 if not superIndex initMb.append((superIndex && superIndex->isInterleaved()) ? numSuperIndexSubs : 0); unsigned p=0; UnsignedArray parts; for (; p<numParts; p++) parts.append(p); indexFileDesc->serializeParts(initMb, parts); if (localKey) keyHasTlk = false; // not used initMb.append(keyHasTlk); if (keyHasTlk) { if (numSuperIndexSubs) initMb.append(numSuperIndexSubs); else initMb.append((unsigned)1); Owned<IDistributedFileIterator> iter; IDistributedFile *f; if (superIndex) { iter.setown(superIndex->getSubFileIterator(true)); f = &iter->query(); } else f = indexFile; loop { unsigned location; OwnedIFile iFile; StringBuffer filePath; Owned<IFileDescriptor> fileDesc = f->getFileDescriptor(); Owned<IPartDescriptor> tlkDesc = fileDesc->getPart(fileDesc->numParts()-1); if (!getBestFilePart(this, *tlkDesc, iFile, location, filePath)) throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", f->queryLogicalName()); OwnedIFileIO iFileIO = iFile->open(IFOread); assertex(iFileIO); size32_t tlkSz = (size32_t)iFileIO->size(); initMb.append(tlkSz); ::read(iFileIO, 0, tlkSz, initMb); if (!iter || !iter->next()) break; f = &iter->query(); } } if (helper->diskAccessRequired()) { OwnedRoxieString fetchFilename(helper->getFileName()); if (fetchFilename) { dataFile.setown(queryThorFileManager().lookup(container.queryJob(), fetchFilename, false, 0 != (helper->getFetchFlags() & FFdatafileoptional), true)); if (dataFile) { if (superIndex) throw MakeActivityException(this, 0, "Superkeys and full keyed joins are not supported"); Owned<IFileDescriptor> dataFileDesc = getConfiguredFileDescriptor(*dataFile); void *ekey; size32_t ekeylen; helper->getFileEncryptKey(ekeylen,ekey); bool encrypted = dataFileDesc->queryProperties().getPropBool("@encrypted"); if (0 != ekeylen) { memset(ekey,0,ekeylen); free(ekey); if (!encrypted) { Owned<IException> e = MakeActivityWarning(&container, TE_EncryptionMismatch, "Ignoring encryption key provided as file '%s' was not published as encrypted", helper->getFileName()); container.queryJob().fireException(e); } } else if (encrypted) throw MakeActivityException(this, 0, "File '%s' was published as encrypted but no encryption key provided", fetchFilename.get()); unsigned dataReadWidth = (unsigned)container.queryJob().getWorkUnitValueInt("KJDRR", 0); if (!dataReadWidth || dataReadWidth>container.queryJob().querySlaves()) dataReadWidth = container.queryJob().querySlaves(); Owned<IGroup> grp = container.queryJob().querySlaveGroup().subset((unsigned)0, dataReadWidth); dataFileMapping.setown(getFileSlaveMaps(dataFile->queryLogicalName(), *dataFileDesc, container.queryJob().queryUserDescriptor(), *grp, false, false, NULL)); dataFileMapping->serializeFileOffsetMap(offsetMapMb.clear()); } else indexFile.clear(); } } } else