Example #1
0
    IRowStream *load(
        IRowStream *in,
        IRowInterfaces *rowif,
        ICompare *icompare,
        bool alldisk,
        bool &abort,
        bool &isempty,
        const char *tracename,
        bool isstable, unsigned maxcores)
    {
        overflowcount = 0;
        unsigned nrecs;
        serializer.set(rowif->queryRowSerializer());
        rows.setSizing(true,false);
#ifdef _FULL_TRACE
        PROGLOG("CThorRowSortedLoader load start");
#endif
        isempty = true;
        while (!abort) {
            rows.clear();
            bool hasoverflowed = false;
            nrecs = rows.load(*in,true,abort,&hasoverflowed);
            if (nrecs)
                isempty = false;
            if (hasoverflowed)
                overflowcount++;
#ifdef _FULL_TRACE
            PROGLOG("rows loaded overflowed = %s",hasoverflowed?"true":"false");
#endif
            if (nrecs&&icompare)
                rows.sort(*icompare,isstable,maxcores);
            numrows += nrecs;
            totalsize += rows.totalSize();
            if (!hasoverflowed&&!alldisk)
                break;
#ifdef _FULL_TRACE
            PROGLOG("CThorRowSortedLoader spilling %u",(unsigned)rows.totalSize());
#endif
            alldisk = true;
            unsigned idx = newAuxFile();
            Owned<IExtRowWriter> writer = createRowWriter(&auxfiles.item(idx),rowif->queryRowSerializer(),rowif->queryRowAllocator(),false,false,false);
            rows.save(writer);
            writer->flush();
#ifdef _FULL_TRACE
            PROGLOG("CThorRowSortedLoader spilt");
#endif
            if (!hasoverflowed)
                break;
        }
        ForEachItemIn(i,auxfiles) {
#ifdef _FULL_TRACE
            PROGLOG("CThorRowSortedLoader spill file(%d) %s %"I64F"d",i,auxfiles.item(i).queryFilename(),auxfiles.item(i).size());
#endif
            instrms.append(*createSimpleRowStream(&auxfiles.item(i),rowif));
        }
Example #2
0
    offset_t write(IRowStream *input)
    {
        StringBuffer tempname;
        GetTempName(tempname,"srtmrg",false);
        dataFile.setown(createIFile(tempname.str()));
        Owned<IExtRowWriter> output = createRowWriter(dataFile, rowIf);

        bool overflowed = false;
        ActPrintLog(&activity, "Local Overflow Merge start");
        unsigned ret=0;
        loop
        {
            const void *_row = input->nextRow();
            if (!_row)
                break;
            ret++;

            OwnedConstThorRow row = _row;
            offset_t start = output->getPosition();
            output->putRow(row.getLink());
            idx++;
            if (idx==interval)
            {
                idx = 0;
                if (!sampleRows.append(row.getClear()))
                {
                    // JCSMORE used to check if 'isFull()' here, but only to warn
                    // I think this is bad news, if has run out of room here...
                    // should at least warn in workunit I suspect
                    overflowsize = output->getPosition();
                    if (!overflowed)
                    {
                        WARNLOG("Sample buffer full");
                        overflowed = true;
                    }
                }
            }
            writeidxofs(start);
        }
        output->flush();
        offset_t end = output->getPosition();
        output.clear();
        writeidxofs(end);
        if (idxFileIO)
        {
            idxFileStream->flush();
            idxFileStream.clear();
            idxFileIO.clear();
        }
        if (overflowed)
            WARNLOG("Overflowed by %"I64F"d", overflowsize);
        ActPrintLog(&activity, "Local Overflow Merge done: overflow file '%s', size = %"I64F"d", dataFile->queryFilename(), dataFile->size());
        return end;
    }
    void open()
    {
        char drive       [_MAX_DRIVE];
        char dir         [_MAX_DIR];
        char fname       [_MAX_DIR];
        char ext         [_MAX_EXT];
        _splitpath(fileName.str(), drive, dir, fname, ext);

        StringBuffer directory;
        directory.append(drive).append(dir);

        Owned<IFile> cd = createIFile(directory.str());
        cd->createDirectory();

        IHThorSpillArg *helper = (IHThorSpillArg *)queryHelper();
        void *ekey;
        size32_t ekeylen;
        helper->getEncryptKey(ekeylen,ekey);
        Owned<ICompressor> ecomp;
        if (ekeylen!=0)
        {
            ecomp.setown(createAESCompressor256(ekeylen,ekey));
            memset(ekey,0,ekeylen);
            free(ekey);
            compress = true;
        }
        Owned<IFile> file = createIFile(fileName.str());
        Owned<IFileIO> iFileIO;
        bool fixedRecordSize = queryRowMetaData()->isFixedSize();
        size32_t minrecsize = queryRowMetaData()->getMinRecordSize();

        if (fixedRecordSize)
            ActPrintLog("SPILL: created fixed output %s recsize=%u", (0!=ekeylen)?"[encrypted]":compress?"[compressed]":"",minrecsize);
        else
            ActPrintLog("SPILL: created variable output %s, minrecsize=%u", (0!=ekeylen)?"[encrypted]":compress?"[compressed]":"",minrecsize);
        unsigned rwFlags = (DEFAULT_RWFLAGS & ~rw_autoflush); // flushed by close()
        if (compress)
            rwFlags |= rw_compress;
        else
            rwFlags |= rw_crc; // only if !compress
        if (grouped)
            rwFlags |= rw_grouped;
        out.setown(createRowWriter(file, this, rwFlags));
    }
Example #4
0
void CDiskWriteSlaveActivityBase::open()
{
    if (dlfn.isExternal() && !firstNode())
    {
        input.setown(createDataLinkSmartBuffer(this, inputs.item(0), PROCESS_SMART_BUFFER_SIZE, isSmartBufferSpillNeeded(this), grouped, RCUNBOUND, NULL, false, &container.queryJob().queryIDiskUsage()));
        startInput(input);
        if (!rfsQueryParallel)
        {
            ActPrintLog("Blocked, waiting for previous part to complete write");
            CMessageBuffer msg;
            if (!receiveMsg(msg, container.queryJob().queryMyRank()-1, mpTag))
                return;
            rowcount_t prevRows;
            msg.read(prevRows);
            msg.read(tempExternalName); // reuse temp filename, last node will rename
            ActPrintLog("Previous write row count = %"RCPF"d", prevRows);
        }
    }
    else
    {
        input.set(inputs.item(0));
        startInput(input);
    }
    processed = THORDATALINK_STARTED;

    bool extend = 0 != (diskHelperBase->getFlags() & TDWextend);
    if (extend)
        ActPrintLog("Extending file %s", fName.get());
    size32_t exclsz = 0;
    calcFileCrc = true;

    bool external = dlfn.isExternal();
    bool query = dlfn.isQuery();
    if (query && compress)
        UNIMPLEMENTED;

    bool direct = query || (external && !firstNode());
    bool rename = !external || (!query && lastNode());
    Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, exclsz, compress, extend||(external&&!query), ecomp, this, direct, rename, &abortSoon, (external&&!query) ? &tempExternalName : NULL);

    if (compress)
    {
        ActPrintLog("Performing row compression on output file: %s", fName.get());
        calcFileCrc = false;
    }
    Owned<IFileIOStream> stream;
    if (wantRaw())
    {
        outraw.setown(createBufferedIOStream(iFileIO));
        stream.set(outraw);
    }
    else
    {
        stream.setown(createIOStream(iFileIO));
        out.setown(createRowWriter(stream,::queryRowSerializer(input),::queryRowAllocator(input),grouped,calcFileCrc,false)); // flushed by close
    }
    CDfsLogicalFileName dlfn;
    dlfn.set(logicalFilename);
    if (extend || (external && !query))
        stream->seek(0,IFSend);
    ActPrintLog("Created output stream for %s", fName.get());
}
Example #5
0
void CDiskWriteSlaveActivityBase::open()
{
    if (dlfn.isExternal() && !firstNode())
    {
        input.setown(createDataLinkSmartBuffer(this, inputs.item(0), PROCESS_SMART_BUFFER_SIZE, isSmartBufferSpillNeeded(this), grouped, RCUNBOUND, NULL, false, &container.queryJob().queryIDiskUsage()));
        startInput(input);
        if (!rfsQueryParallel)
        {
            ActPrintLog("Blocked, waiting for previous part to complete write");
            CMessageBuffer msg;
            if (!receiveMsg(msg, queryJobChannel().queryMyRank()-1, mpTag))
                return;
            rowcount_t prevRows;
            msg.read(prevRows);
            msg.read(tempExternalName); // reuse temp filename, last node will rename
            ActPrintLog("Previous write row count = %" RCPF "d", prevRows);
        }
    }
    else
    {
        input.set(inputs.item(0));
        startInput(input);
    }
    processed = THORDATALINK_STARTED;

    bool extend = 0 != (diskHelperBase->getFlags() & TDWextend);
    if (extend)
        ActPrintLog("Extending file %s", fName.get());

    /* Fixed length record size is used when outputting compressed stream to determine run-length compression vs default LZW compression.
     * NB: only for FLAT files, not CSV or XML
     */
    size32_t diskRowMinSz = 0;
    IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta();
    if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind()))
    {
        diskRowMinSz = diskRowMeta->getMinRecordSize();
        if (grouped)
            diskRowMinSz += 1;
    }

    calcFileCrc = true;

    bool external = dlfn.isExternal();
    bool query = dlfn.isQuery();
    if (query && compress)
        UNIMPLEMENTED;

    unsigned twFlags = external ? TW_External : 0;
    if (query || (external && !firstNode()))
        twFlags |= TW_Direct;
    if (!external || (!query && lastNode()))
        twFlags |= TW_RenameToPrimary;
    if (extend||(external&&!query))
        twFlags |= TW_Extend;

    Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, diskRowMinSz, twFlags, compress, ecomp, this, &abortSoon, (external&&!query) ? &tempExternalName : NULL);

    if (compress)
    {
        ActPrintLog("Performing row compression on output file: %s", fName.get());
        // NB: block compressed output has implicit crc of 0, no need to calculate in row  writer.
        calcFileCrc = false;
    }
    Owned<IFileIOStream> stream;
    if (wantRaw())
    {
        outraw.setown(createBufferedIOStream(iFileIO));
        stream.set(outraw);
    }
    else
    {
        stream.setown(createIOStream(iFileIO));
        unsigned rwFlags = 0;
        if (grouped)
            rwFlags |= rw_grouped;
        if (calcFileCrc)
            rwFlags |= rw_crc;
        out.setown(createRowWriter(stream, ::queryRowInterfaces(input), rwFlags));
    }
    if (extend || (external && !query))
        stream->seek(0,IFSend);
    ActPrintLog("Created output stream for %s", fName.get());
}