void prepareKey(IDistributedFile *index) { IDistributedFile *f = index; IDistributedSuperFile *super = f->querySuperFile(); unsigned nparts = f->numParts(); // includes tlks if any, but unused in array performPartLookup.ensure(nparts); bool checkTLKConsistency = (nullptr != super) && !localKey && (0 != (TIRsorted & indexBaseHelper->getFlags())); if (nofilter) { while (nparts--) performPartLookup.append(true); if (!checkTLKConsistency) return; } else { while (nparts--) performPartLookup.append(false); // parts to perform lookup set later } Owned<IDistributedFileIterator> iter; if (super) { iter.setown(super->getSubFileIterator(true)); verifyex(iter->first()); f = &iter->query(); } unsigned width = f->numParts(); if (!localKey) --width; assertex(width); unsigned tlkCrc = 0; bool first = true; unsigned superSubIndex=0; bool fileCrc = false, rowCrc = false; for (;;) { Owned<IDistributedFilePart> part = f->getPart(width); if (checkTLKConsistency) { unsigned _tlkCrc; if (part->getCrc(_tlkCrc)) fileCrc = true; else if (part->queryAttributes().hasProp("@crc")) // NB: key "@crc" is not a crc on the file, but data within. { _tlkCrc = part->queryAttributes().getPropInt("@crc"); rowCrc = true; } else if (part->queryAttributes().hasProp("@tlkCrc")) // backward compat. { _tlkCrc = part->queryAttributes().getPropInt("@tlkCrc"); rowCrc = true; } else { if (rowCrc || fileCrc) { checkTLKConsistency = false; Owned<IException> e = MakeActivityWarning(&container, 0, "Cannot validate that tlks in superfile %s match, some crc attributes are missing", super->queryLogicalName()); queryJobChannel().fireException(e); } } if (rowCrc && fileCrc) { checkTLKConsistency = false; Owned<IException> e = MakeActivityWarning(&container, 0, "Cannot validate that tlks in superfile %s match, due to mixed crc types.", super->queryLogicalName()); queryJobChannel().fireException(e); } if (checkTLKConsistency) { if (first) { tlkCrc = _tlkCrc; first = false; } else if (tlkCrc != _tlkCrc) throw MakeActivityException(this, 0, "Sorted output on super files comprising of non coparitioned sub keys is not supported (TLK's do not match)"); } } if (!nofilter) { Owned<IKeyIndex> keyIndex; unsigned copy; for (copy=0; copy<part->numCopies(); copy++) { RemoteFilename rfn; OwnedIFile ifile = createIFile(part->getFilename(rfn,copy)); if (ifile->exists()) { StringBuffer remotePath; rfn.getRemotePath(remotePath); unsigned crc = 0; part->getCrc(crc); keyIndex.setown(createKeyIndex(remotePath.str(), crc, false, false)); break; } } if (!keyIndex) throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", index->queryLogicalName()); unsigned fixedSize = indexBaseHelper->queryDiskRecordSize()->querySerializedDiskMeta()->getFixedSize(); // used only if fixed Owned <IKeyManager> tlk = createLocalKeyManager(keyIndex, fixedSize, NULL); indexBaseHelper->createSegmentMonitors(tlk); tlk->finishSegmentMonitors(); tlk->reset(); while (tlk->lookup(false)) { if (tlk->queryFpos()) performPartLookup.replace(true, (aindex_t)(super?super->numSubFiles(true)*(tlk->queryFpos()-1)+superSubIndex:tlk->queryFpos()-1)); } } if (!super||!iter->next()) break; superSubIndex++; f = &iter->query(); if (width != f->numParts()-1) throw MakeActivityException(this, 0, "Super key %s, with mixture of sub key width are not supported.", f->queryLogicalName()); } }
virtual void init() { CMasterActivity::init(); OwnedRoxieString indexFileName(helper->getIndexFileName()); Owned<IDistributedFile> dataFile; Owned<IDistributedFile> indexFile = queryThorFileManager().lookup(container.queryJob(), indexFileName, false, 0 != (helper->getJoinFlags() & JFindexoptional), true); unsigned keyReadWidth = (unsigned)container.queryJob().getWorkUnitValueInt("KJKRR", 0); if (!keyReadWidth || keyReadWidth>container.queryJob().querySlaves()) keyReadWidth = container.queryJob().querySlaves(); initMb.clear(); initMb.append(indexFileName.get()); if (helper->diskAccessRequired()) numTags += 2; initMb.append(numTags); unsigned t=0; for (; t<numTags; t++) { tags[t] = container.queryJob().allocateMPTag(); initMb.append(tags[t]); } bool keyHasTlk = false; if (indexFile) { unsigned numParts = 0; localKey = indexFile->queryAttributes().getPropBool("@local"); if (container.queryLocalData() && !localKey) throw MakeActivityException(this, 0, "Keyed Join cannot be LOCAL unless supplied index is local"); checkFormatCrc(this, indexFile, helper->getIndexFormatCrc(), true); Owned<IFileDescriptor> indexFileDesc = indexFile->getFileDescriptor(); IDistributedSuperFile *superIndex = indexFile->querySuperFile(); unsigned superIndexWidth = 0; unsigned numSuperIndexSubs = 0; if (superIndex) { numSuperIndexSubs = superIndex->numSubFiles(true); bool first=true; // consistency check Owned<IDistributedFileIterator> iter = superIndex->getSubFileIterator(true); ForEach(*iter) { IDistributedFile &f = iter->query(); unsigned np = f.numParts()-1; IDistributedFilePart &part = f.queryPart(np); const char *kind = part.queryAttributes().queryProp("@kind"); bool hasTlk = NULL != kind && 0 == stricmp("topLevelKey", kind); // if last part not tlk, then deemed local (might be singlePartKey) if (first) { first = false; keyHasTlk = hasTlk; superIndexWidth = f.numParts(); if (keyHasTlk) --superIndexWidth; } else { if (hasTlk != keyHasTlk) throw MakeActivityException(this, 0, "Local/Single part keys cannot be mixed with distributed(tlk) keys in keyedjoin"); if (keyHasTlk && superIndexWidth != f.numParts()-1) throw MakeActivityException(this, 0, "Super sub keys of different width cannot be mixed with distributed(tlk) keys in keyedjoin"); } } if (keyHasTlk) numParts = superIndexWidth * numSuperIndexSubs; else numParts = superIndex->numParts(); } else { numParts = indexFile->numParts(); if (numParts) { const char *kind = indexFile->queryPart(indexFile->numParts()-1).queryAttributes().queryProp("@kind"); keyHasTlk = NULL != kind && 0 == stricmp("topLevelKey", kind); if (keyHasTlk) --numParts; } } if (numParts) { initMb.append(numParts); initMb.append(superIndexWidth); // 0 if not superIndex initMb.append((superIndex && superIndex->isInterleaved()) ? numSuperIndexSubs : 0); unsigned p=0; UnsignedArray parts; for (; p<numParts; p++) parts.append(p); indexFileDesc->serializeParts(initMb, parts); if (localKey) keyHasTlk = false; // not used initMb.append(keyHasTlk); if (keyHasTlk) { if (numSuperIndexSubs) initMb.append(numSuperIndexSubs); else initMb.append((unsigned)1); Owned<IDistributedFileIterator> iter; IDistributedFile *f; if (superIndex) { iter.setown(superIndex->getSubFileIterator(true)); f = &iter->query(); } else f = indexFile; loop { unsigned location; OwnedIFile iFile; StringBuffer filePath; Owned<IFileDescriptor> fileDesc = f->getFileDescriptor(); Owned<IPartDescriptor> tlkDesc = fileDesc->getPart(fileDesc->numParts()-1); if (!getBestFilePart(this, *tlkDesc, iFile, location, filePath)) throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", f->queryLogicalName()); OwnedIFileIO iFileIO = iFile->open(IFOread); assertex(iFileIO); size32_t tlkSz = (size32_t)iFileIO->size(); initMb.append(tlkSz); ::read(iFileIO, 0, tlkSz, initMb); if (!iter || !iter->next()) break; f = &iter->query(); } } if (helper->diskAccessRequired()) { OwnedRoxieString fetchFilename(helper->getFileName()); if (fetchFilename) { dataFile.setown(queryThorFileManager().lookup(container.queryJob(), fetchFilename, false, 0 != (helper->getFetchFlags() & FFdatafileoptional), true)); if (dataFile) { if (superIndex) throw MakeActivityException(this, 0, "Superkeys and full keyed joins are not supported"); Owned<IFileDescriptor> dataFileDesc = getConfiguredFileDescriptor(*dataFile); void *ekey; size32_t ekeylen; helper->getFileEncryptKey(ekeylen,ekey); bool encrypted = dataFileDesc->queryProperties().getPropBool("@encrypted"); if (0 != ekeylen) { memset(ekey,0,ekeylen); free(ekey); if (!encrypted) { Owned<IException> e = MakeActivityWarning(&container, TE_EncryptionMismatch, "Ignoring encryption key provided as file '%s' was not published as encrypted", helper->getFileName()); container.queryJob().fireException(e); } } else if (encrypted) throw MakeActivityException(this, 0, "File '%s' was published as encrypted but no encryption key provided", fetchFilename.get()); unsigned dataReadWidth = (unsigned)container.queryJob().getWorkUnitValueInt("KJDRR", 0); if (!dataReadWidth || dataReadWidth>container.queryJob().querySlaves()) dataReadWidth = container.queryJob().querySlaves(); Owned<IGroup> grp = container.queryJob().querySlaveGroup().subset((unsigned)0, dataReadWidth); dataFileMapping.setown(getFileSlaveMaps(dataFile->queryLogicalName(), *dataFileDesc, container.queryJob().queryUserDescriptor(), *grp, false, false, NULL)); dataFileMapping->serializeFileOffsetMap(offsetMapMb.clear()); } else indexFile.clear(); } } } else