Пример #1
0
    virtual void init()
    {
        HashDistributeMasterBase::init();

        // JCSMORE should common up some with indexread
        IHThorKeyedDistributeArg *helper = (IHThorKeyedDistributeArg *)queryHelper();

        StringBuffer scoped;
        OwnedRoxieString indexFileName(helper->getIndexFileName());
        queryThorFileManager().addScope(container.queryJob(), indexFileName, scoped);
        file.setown(queryThorFileManager().lookup(container.queryJob(), indexFileName));
        if (!file)
            throw MakeActivityException(this, 0, "KeyedDistribute: Failed to find key: %s", scoped.str());
        if (0 == file->numParts())
            throw MakeActivityException(this, 0, "KeyedDistribute: Can't distribute based on an empty key: %s", scoped.str());

        checkFormatCrc(this, file, helper->getFormatCrc(), true);
        Owned<IFileDescriptor> fileDesc = file->getFileDescriptor();
        Owned<IPartDescriptor> tlkDesc = fileDesc->getPart(fileDesc->numParts()-1);
        if (!tlkDesc->queryProperties().hasProp("@kind") || 0 != stricmp("topLevelKey", tlkDesc->queryProperties().queryProp("@kind")))
            throw MakeActivityException(this, 0, "Cannot distribute using a non-distributed key: '%s'", scoped.str());
        unsigned location;
        OwnedIFile iFile;
        StringBuffer filePath;
        if (!getBestFilePart(this, *tlkDesc, iFile, location, filePath))
            throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", file->queryLogicalName());
        OwnedIFileIO iFileIO = iFile->open(IFOread);
        assertex(iFileIO);

        tlkMb.append(iFileIO->size());
        ::read(iFileIO, 0, (size32_t)iFileIO->size(), tlkMb);

        queryThorFileManager().noteFileRead(container.queryJob(), file);
    }
Пример #2
0
extern bool isCompressedIndex(const char *filename)
{
    OwnedIFile file = createIFile(filename);
    OwnedIFileIO io = file->open(IFOread);
    unsigned __int64 size = file->size();
    if (size)
    {
        KeyHdr hdr;
        if (io->read(0, sizeof(hdr), &hdr) == sizeof(hdr))
        {
            SwapBigEndian(hdr);
            if (size % hdr.nodeSize == 0 && hdr.phyrec == size-1 && hdr.root && hdr.root % hdr.nodeSize == 0 && hdr.ktype & (HTREE_COMPRESSED_KEY|HTREE_QUICK_COMPRESSED_KEY))
            {
                NodeHdr root;
                if (io->read(hdr.root, sizeof(root), &root) == sizeof(root))
                {
                    SwapBigEndian(root);
                    return root.leftSib==0 && root.rightSib==0; 
                }
            }
        }
    }
    return false;
}
Пример #3
0
extern jhtree_decl void validateKeyFile(const char *filename, offset_t nodePos)
{
    OwnedIFile file = createIFile(filename);
    OwnedIFileIO io = file->open(IFOread);
    if (!io)
        throw MakeStringException(1, "Invalid key %s: cannot open file", filename);
    unsigned __int64 size = file->size();
    if (!size)
        throw MakeStringException(2, "Invalid key %s: zero size", filename);
    KeyHdr hdr;
    if (io->read(0, sizeof(hdr), &hdr) != sizeof(hdr))
        throw MakeStringException(4, "Invalid key %s: failed to read key header", filename);
    CKeyHdr keyHdr;
    keyHdr.load(hdr);

    _WINREV(hdr.phyrec);
    _WINREV(hdr.root);
    _WINREV(hdr.nodeSize);
    if (hdr.phyrec != size-1)
        throw MakeStringException(5, "Invalid key %s: phyrec was %" I64F "d, expected %" I64F "d", filename, hdr.phyrec, size-1);
    if (size % hdr.nodeSize)
        throw MakeStringException(3, "Invalid key %s: size %" I64F "d is not a multiple of key node size (%d)", filename, size, hdr.nodeSize);
    if (!hdr.root || hdr.root % hdr.nodeSize !=0)
        throw MakeStringException(6, "Invalid key %s: invalid root pointer %" I64F "x", filename, hdr.root);
    NodeHdr root;
    if (io->read(hdr.root, sizeof(root), &root) != sizeof(root))
        throw MakeStringException(7, "Invalid key %s: failed to read root node", filename);
    _WINREV(root.rightSib);
    _WINREV(root.leftSib);
    if (root.leftSib || root.rightSib)
        throw MakeStringException(8, "Invalid key %s: invalid root node sibling pointers 0x%" I64F "x, 0x%" I64F "x (expected 0,0)", filename, root.leftSib, root.rightSib);

    for (offset_t nodeOffset = (nodePos ? nodePos : hdr.nodeSize); nodeOffset < (nodePos ? nodePos+1 : size); nodeOffset += hdr.nodeSize)
    {
        MemoryAttr ma;
        char *buffer = (char *) ma.allocate(hdr.nodeSize);
        {
            MTIME_SECTION(queryActiveTimer(), "JHTREE read index node");
            io->read(nodeOffset, hdr.nodeSize, buffer);
        }
        CJHTreeNode theNode;
        {
            MTIME_SECTION(queryActiveTimer(), "JHTREE load index node");
            theNode.load(&keyHdr, buffer, nodeOffset, true);
        }
        NodeHdr *nodeHdr = (NodeHdr *) buffer;
        SwapBigEndian(*nodeHdr);
        if (!nodeHdr->isValid(hdr.nodeSize))
            throw MakeStringException(9, "Invalid key %s: invalid node header at position 0x%" I64F "x", filename, nodeOffset);
        if (nodeHdr->leftSib >= size || nodeHdr->rightSib >= size)
            throw MakeStringException(9, "Invalid key %s: out of range sibling pointers 0x%" I64F "x, 0x%" I64F "x at position 0x%" I64F "x", filename, nodeHdr->leftSib, nodeHdr->rightSib, nodeOffset);
        if (nodeHdr->crc32)
        {
            unsigned crc = crc32(buffer + sizeof(NodeHdr), nodeHdr->keyBytes, 0);
            if (crc != nodeHdr->crc32)
                throw MakeStringException(9, "Invalid key %s: crc mismatch at position 0x%" I64F "x", filename, nodeOffset);
        }
        else
        {
            // MORE - if we felt so inclined, we could decode the node and check records were in ascending order
        }
    }
}
Пример #4
0
void CWriteMasterBase::publish()
{
    if (published) return;
    published = true;
    if (!(diskHelperBase->getFlags() & (TDXtemporary|TDXjobtemp)))
        updateActivityResult(container.queryJob().queryWorkUnit(), diskHelperBase->getFlags(), diskHelperBase->getSequence(), fileName, recordsProcessed);

    IPropertyTree &props = fileDesc->queryProperties();
    props.setPropInt64("@recordCount", recordsProcessed);
    if (0 == (diskHelperBase->getFlags() & TDXtemporary) || container.queryJob().queryUseCheckpoints())
    {
        if (0 != (diskHelperBase->getFlags() & TDWexpires))
            setExpiryTime(props, diskHelperBase->getExpiryDays());
        if (TDWupdate & diskHelperBase->getFlags())
        {
            unsigned eclCRC;
            unsigned __int64 totalCRC;
            diskHelperBase->getUpdateCRCs(eclCRC, totalCRC);
            props.setPropInt("@eclCRC", eclCRC);
            props.setPropInt64("@totalCRC", totalCRC);
        }
    }
    container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), diskHelperBase->getTempUsageCount(), TDXtemporary & diskHelperBase->getFlags(), getDiskOutputKind(diskHelperBase->getFlags()), &clusters);
    if (!dlfn.isExternal())
    {
        bool temporary = 0 != (diskHelperBase->getFlags()&TDXtemporary);
        if (!temporary && (queryJob().querySlaves() < fileDesc->numParts()))
        {
            // create empty parts for a fileDesc being published that is larger than this clusters
            size32_t recordSize = 0;
            IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta();
            if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind()))
            {
                recordSize = diskRowMeta->getMinRecordSize();
                if (0 != (diskHelperBase->getFlags() & TDXgrouped))
                    recordSize += 1;
            }
            unsigned compMethod = COMPRESS_METHOD_LZW;
            // rowdiff used if recordSize > 0, else fallback to compMethod
            if (getOptBool(THOROPT_COMP_FORCELZW, false))
            {
                recordSize = 0; // by default if fixed length (recordSize set), row diff compression is used. This forces compMethod.
                compMethod = COMPRESS_METHOD_LZW;
            }
            else if (getOptBool(THOROPT_COMP_FORCEFLZ, false))
                compMethod = COMPRESS_METHOD_FASTLZ;
            else if (getOptBool(THOROPT_COMP_FORCELZ4, false))
                compMethod = COMPRESS_METHOD_LZ4;
            bool blockCompressed;
            bool compressed = fileDesc->isCompressed(&blockCompressed);
            for (unsigned clusterIdx=0; clusterIdx<fileDesc->numClusters(); clusterIdx++)
            {
                StringBuffer clusterName;
                fileDesc->getClusterGroupName(clusterIdx, clusterName, &queryNamedGroupStore());
                PROGLOG("Creating blank parts for file '%s', cluster '%s'", fileName.get(), clusterName.str());
                unsigned p=0;
                while (p<fileDesc->numParts())
                {
                    if (p == targetOffset)
                        p += queryJob().querySlaves();
                    IPartDescriptor *partDesc = fileDesc->queryPart(p);
                    CDateTime createTime, modifiedTime;
                    for (unsigned c=0; c<partDesc->numCopies(); c++)
                    {
                        RemoteFilename rfn;
                        partDesc->getFilename(c, rfn);
                        StringBuffer path;
                        rfn.getPath(path);
                        try
                        {
                            ensureDirectoryForFile(path.str());
                            OwnedIFile iFile = createIFile(path.str());
                            Owned<IFileIO> iFileIO;
                            if (compressed)
                                iFileIO.setown(createCompressedFileWriter(iFile, recordSize, false, true, NULL, compMethod));
                            else
                                iFileIO.setown(iFile->open(IFOcreate));
                            dbgassertex(iFileIO.get());
                            iFileIO.clear();
                            // ensure copies have matching datestamps, as they would do normally (backupnode expects it)
                            if (partDesc->numCopies() > 1)
                            {
                                if (0 == c)
                                    iFile->getTime(&createTime, &modifiedTime, NULL);
                                else
                                    iFile->setTime(&createTime, &modifiedTime, NULL);
                            }
                        }
                        catch (IException *e)
                        {
                            if (0 == c)
                                throw;
                            Owned<IThorException> e2 = MakeThorException(e);
                            e->Release();
                            e2->setAction(tea_warning);
                            queryJob().fireException(e2);
                        }
                    }
                    partDesc->queryProperties().setPropInt64("@size", 0);
                    p++;
                }
                clusterIdx++;
            }
        }
        queryThorFileManager().publish(container.queryJob(), fileName, *fileDesc, NULL);
    }
}
Пример #5
0
    virtual void init()
    {
        CMasterActivity::init();
        OwnedRoxieString indexFileName(helper->getIndexFileName());
        Owned<IDistributedFile> dataFile;
        Owned<IDistributedFile> indexFile = queryThorFileManager().lookup(container.queryJob(), indexFileName, false, 0 != (helper->getJoinFlags() & JFindexoptional), true);

        unsigned keyReadWidth = (unsigned)container.queryJob().getWorkUnitValueInt("KJKRR", 0);
        if (!keyReadWidth || keyReadWidth>container.queryJob().querySlaves())
            keyReadWidth = container.queryJob().querySlaves();
        

        initMb.clear();
        initMb.append(indexFileName.get());
        if (helper->diskAccessRequired())
            numTags += 2;
        initMb.append(numTags);
        unsigned t=0;
        for (; t<numTags; t++)
        {
            tags[t] = container.queryJob().allocateMPTag();
            initMb.append(tags[t]);
        }
        bool keyHasTlk = false;
        if (indexFile)
        {
            unsigned numParts = 0;
            localKey = indexFile->queryAttributes().getPropBool("@local");

            if (container.queryLocalData() && !localKey)
                throw MakeActivityException(this, 0, "Keyed Join cannot be LOCAL unless supplied index is local");

            checkFormatCrc(this, indexFile, helper->getIndexFormatCrc(), true);
            Owned<IFileDescriptor> indexFileDesc = indexFile->getFileDescriptor();
            IDistributedSuperFile *superIndex = indexFile->querySuperFile();
            unsigned superIndexWidth = 0;
            unsigned numSuperIndexSubs = 0;
            if (superIndex)
            {
                numSuperIndexSubs = superIndex->numSubFiles(true);
                bool first=true;
                // consistency check
                Owned<IDistributedFileIterator> iter = superIndex->getSubFileIterator(true);
                ForEach(*iter)
                {
                    IDistributedFile &f = iter->query();
                    unsigned np = f.numParts()-1;
                    IDistributedFilePart &part = f.queryPart(np);
                    const char *kind = part.queryAttributes().queryProp("@kind");
                    bool hasTlk = NULL != kind && 0 == stricmp("topLevelKey", kind); // if last part not tlk, then deemed local (might be singlePartKey)
                    if (first)
                    {
                        first = false;
                        keyHasTlk = hasTlk;
                        superIndexWidth = f.numParts();
                        if (keyHasTlk)
                            --superIndexWidth;
                    }
                    else
                    {
                        if (hasTlk != keyHasTlk)
                            throw MakeActivityException(this, 0, "Local/Single part keys cannot be mixed with distributed(tlk) keys in keyedjoin");
                        if (keyHasTlk && superIndexWidth != f.numParts()-1)
                            throw MakeActivityException(this, 0, "Super sub keys of different width cannot be mixed with distributed(tlk) keys in keyedjoin");
                    }
                }
                if (keyHasTlk)
                    numParts = superIndexWidth * numSuperIndexSubs;
                else
                    numParts = superIndex->numParts();
            }
            else
            {
                numParts = indexFile->numParts();
                if (numParts)
                {
                    const char *kind = indexFile->queryPart(indexFile->numParts()-1).queryAttributes().queryProp("@kind");
                    keyHasTlk = NULL != kind && 0 == stricmp("topLevelKey", kind);
                    if (keyHasTlk)
                        --numParts;
                }
            }
            if (numParts)
            {
                initMb.append(numParts);
                initMb.append(superIndexWidth); // 0 if not superIndex
                initMb.append((superIndex && superIndex->isInterleaved()) ? numSuperIndexSubs : 0);
                unsigned p=0;
                UnsignedArray parts;
                for (; p<numParts; p++)
                    parts.append(p);
                indexFileDesc->serializeParts(initMb, parts);
                if (localKey)
                    keyHasTlk = false; // not used
                initMb.append(keyHasTlk);
                if (keyHasTlk)
                {
                    if (numSuperIndexSubs)
                        initMb.append(numSuperIndexSubs);
                    else
                        initMb.append((unsigned)1);

                    Owned<IDistributedFileIterator> iter;
                    IDistributedFile *f;
                    if (superIndex)
                    {
                        iter.setown(superIndex->getSubFileIterator(true));
                        f = &iter->query();
                    }
                    else
                        f = indexFile;
                    loop
                    {
                        unsigned location;
                        OwnedIFile iFile;
                        StringBuffer filePath;
                        Owned<IFileDescriptor> fileDesc = f->getFileDescriptor();
                        Owned<IPartDescriptor> tlkDesc = fileDesc->getPart(fileDesc->numParts()-1);
                        if (!getBestFilePart(this, *tlkDesc, iFile, location, filePath))
                            throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", f->queryLogicalName());
                        OwnedIFileIO iFileIO = iFile->open(IFOread);
                        assertex(iFileIO);

                        size32_t tlkSz = (size32_t)iFileIO->size();
                        initMb.append(tlkSz);
                        ::read(iFileIO, 0, tlkSz, initMb);

                        if (!iter || !iter->next())
                            break;
                        f = &iter->query();
                    }
                }
                if (helper->diskAccessRequired())
                {
                    OwnedRoxieString fetchFilename(helper->getFileName());
                    if (fetchFilename)
                    {
                        dataFile.setown(queryThorFileManager().lookup(container.queryJob(), fetchFilename, false, 0 != (helper->getFetchFlags() & FFdatafileoptional), true));
                        if (dataFile)
                        {
                            if (superIndex)
                                throw MakeActivityException(this, 0, "Superkeys and full keyed joins are not supported");
                            Owned<IFileDescriptor> dataFileDesc = getConfiguredFileDescriptor(*dataFile);
                            void *ekey;
                            size32_t ekeylen;
                            helper->getFileEncryptKey(ekeylen,ekey);
                            bool encrypted = dataFileDesc->queryProperties().getPropBool("@encrypted");
                            if (0 != ekeylen)
                            {
                                memset(ekey,0,ekeylen);
                                free(ekey);
                                if (!encrypted)
                                {
                                    Owned<IException> e = MakeActivityWarning(&container, TE_EncryptionMismatch, "Ignoring encryption key provided as file '%s' was not published as encrypted", helper->getFileName());
                                    container.queryJob().fireException(e);
                                }
                            }
                            else if (encrypted)
                                throw MakeActivityException(this, 0, "File '%s' was published as encrypted but no encryption key provided", fetchFilename.get());
                            unsigned dataReadWidth = (unsigned)container.queryJob().getWorkUnitValueInt("KJDRR", 0);
                            if (!dataReadWidth || dataReadWidth>container.queryJob().querySlaves())
                                dataReadWidth = container.queryJob().querySlaves();
                            Owned<IGroup> grp = container.queryJob().querySlaveGroup().subset((unsigned)0, dataReadWidth);
                            dataFileMapping.setown(getFileSlaveMaps(dataFile->queryLogicalName(), *dataFileDesc, container.queryJob().queryUserDescriptor(), *grp, false, false, NULL));
                            dataFileMapping->serializeFileOffsetMap(offsetMapMb.clear());
                        }
                        else
                            indexFile.clear();
                    }
                }
            }
            else