IRowStream *load( IRowStream *in, IRowInterfaces *rowif, ICompare *icompare, bool alldisk, bool &abort, bool &isempty, const char *tracename, bool isstable, unsigned maxcores) { overflowcount = 0; unsigned nrecs; serializer.set(rowif->queryRowSerializer()); rows.setSizing(true,false); #ifdef _FULL_TRACE PROGLOG("CThorRowSortedLoader load start"); #endif isempty = true; while (!abort) { rows.clear(); bool hasoverflowed = false; nrecs = rows.load(*in,true,abort,&hasoverflowed); if (nrecs) isempty = false; if (hasoverflowed) overflowcount++; #ifdef _FULL_TRACE PROGLOG("rows loaded overflowed = %s",hasoverflowed?"true":"false"); #endif if (nrecs&&icompare) rows.sort(*icompare,isstable,maxcores); numrows += nrecs; totalsize += rows.totalSize(); if (!hasoverflowed&&!alldisk) break; #ifdef _FULL_TRACE PROGLOG("CThorRowSortedLoader spilling %u",(unsigned)rows.totalSize()); #endif alldisk = true; unsigned idx = newAuxFile(); Owned<IExtRowWriter> writer = createRowWriter(&auxfiles.item(idx),rowif->queryRowSerializer(),rowif->queryRowAllocator(),false,false,false); rows.save(writer); writer->flush(); #ifdef _FULL_TRACE PROGLOG("CThorRowSortedLoader spilt"); #endif if (!hasoverflowed) break; } ForEachItemIn(i,auxfiles) { #ifdef _FULL_TRACE PROGLOG("CThorRowSortedLoader spill file(%d) %s %"I64F"d",i,auxfiles.item(i).queryFilename(),auxfiles.item(i).size()); #endif instrms.append(*createSimpleRowStream(&auxfiles.item(i),rowif)); }
offset_t write(IRowStream *input) { StringBuffer tempname; GetTempName(tempname,"srtmrg",false); dataFile.setown(createIFile(tempname.str())); Owned<IExtRowWriter> output = createRowWriter(dataFile, rowIf); bool overflowed = false; ActPrintLog(&activity, "Local Overflow Merge start"); unsigned ret=0; loop { const void *_row = input->nextRow(); if (!_row) break; ret++; OwnedConstThorRow row = _row; offset_t start = output->getPosition(); output->putRow(row.getLink()); idx++; if (idx==interval) { idx = 0; if (!sampleRows.append(row.getClear())) { // JCSMORE used to check if 'isFull()' here, but only to warn // I think this is bad news, if has run out of room here... // should at least warn in workunit I suspect overflowsize = output->getPosition(); if (!overflowed) { WARNLOG("Sample buffer full"); overflowed = true; } } } writeidxofs(start); } output->flush(); offset_t end = output->getPosition(); output.clear(); writeidxofs(end); if (idxFileIO) { idxFileStream->flush(); idxFileStream.clear(); idxFileIO.clear(); } if (overflowed) WARNLOG("Overflowed by %"I64F"d", overflowsize); ActPrintLog(&activity, "Local Overflow Merge done: overflow file '%s', size = %"I64F"d", dataFile->queryFilename(), dataFile->size()); return end; }
void open() { char drive [_MAX_DRIVE]; char dir [_MAX_DIR]; char fname [_MAX_DIR]; char ext [_MAX_EXT]; _splitpath(fileName.str(), drive, dir, fname, ext); StringBuffer directory; directory.append(drive).append(dir); Owned<IFile> cd = createIFile(directory.str()); cd->createDirectory(); IHThorSpillArg *helper = (IHThorSpillArg *)queryHelper(); void *ekey; size32_t ekeylen; helper->getEncryptKey(ekeylen,ekey); Owned<ICompressor> ecomp; if (ekeylen!=0) { ecomp.setown(createAESCompressor256(ekeylen,ekey)); memset(ekey,0,ekeylen); free(ekey); compress = true; } Owned<IFile> file = createIFile(fileName.str()); Owned<IFileIO> iFileIO; bool fixedRecordSize = queryRowMetaData()->isFixedSize(); size32_t minrecsize = queryRowMetaData()->getMinRecordSize(); if (fixedRecordSize) ActPrintLog("SPILL: created fixed output %s recsize=%u", (0!=ekeylen)?"[encrypted]":compress?"[compressed]":"",minrecsize); else ActPrintLog("SPILL: created variable output %s, minrecsize=%u", (0!=ekeylen)?"[encrypted]":compress?"[compressed]":"",minrecsize); unsigned rwFlags = (DEFAULT_RWFLAGS & ~rw_autoflush); // flushed by close() if (compress) rwFlags |= rw_compress; else rwFlags |= rw_crc; // only if !compress if (grouped) rwFlags |= rw_grouped; out.setown(createRowWriter(file, this, rwFlags)); }
void CDiskWriteSlaveActivityBase::open() { if (dlfn.isExternal() && !firstNode()) { input.setown(createDataLinkSmartBuffer(this, inputs.item(0), PROCESS_SMART_BUFFER_SIZE, isSmartBufferSpillNeeded(this), grouped, RCUNBOUND, NULL, false, &container.queryJob().queryIDiskUsage())); startInput(input); if (!rfsQueryParallel) { ActPrintLog("Blocked, waiting for previous part to complete write"); CMessageBuffer msg; if (!receiveMsg(msg, container.queryJob().queryMyRank()-1, mpTag)) return; rowcount_t prevRows; msg.read(prevRows); msg.read(tempExternalName); // reuse temp filename, last node will rename ActPrintLog("Previous write row count = %"RCPF"d", prevRows); } } else { input.set(inputs.item(0)); startInput(input); } processed = THORDATALINK_STARTED; bool extend = 0 != (diskHelperBase->getFlags() & TDWextend); if (extend) ActPrintLog("Extending file %s", fName.get()); size32_t exclsz = 0; calcFileCrc = true; bool external = dlfn.isExternal(); bool query = dlfn.isQuery(); if (query && compress) UNIMPLEMENTED; bool direct = query || (external && !firstNode()); bool rename = !external || (!query && lastNode()); Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, exclsz, compress, extend||(external&&!query), ecomp, this, direct, rename, &abortSoon, (external&&!query) ? &tempExternalName : NULL); if (compress) { ActPrintLog("Performing row compression on output file: %s", fName.get()); calcFileCrc = false; } Owned<IFileIOStream> stream; if (wantRaw()) { outraw.setown(createBufferedIOStream(iFileIO)); stream.set(outraw); } else { stream.setown(createIOStream(iFileIO)); out.setown(createRowWriter(stream,::queryRowSerializer(input),::queryRowAllocator(input),grouped,calcFileCrc,false)); // flushed by close } CDfsLogicalFileName dlfn; dlfn.set(logicalFilename); if (extend || (external && !query)) stream->seek(0,IFSend); ActPrintLog("Created output stream for %s", fName.get()); }
void CDiskWriteSlaveActivityBase::open() { if (dlfn.isExternal() && !firstNode()) { input.setown(createDataLinkSmartBuffer(this, inputs.item(0), PROCESS_SMART_BUFFER_SIZE, isSmartBufferSpillNeeded(this), grouped, RCUNBOUND, NULL, false, &container.queryJob().queryIDiskUsage())); startInput(input); if (!rfsQueryParallel) { ActPrintLog("Blocked, waiting for previous part to complete write"); CMessageBuffer msg; if (!receiveMsg(msg, queryJobChannel().queryMyRank()-1, mpTag)) return; rowcount_t prevRows; msg.read(prevRows); msg.read(tempExternalName); // reuse temp filename, last node will rename ActPrintLog("Previous write row count = %" RCPF "d", prevRows); } } else { input.set(inputs.item(0)); startInput(input); } processed = THORDATALINK_STARTED; bool extend = 0 != (diskHelperBase->getFlags() & TDWextend); if (extend) ActPrintLog("Extending file %s", fName.get()); /* Fixed length record size is used when outputting compressed stream to determine run-length compression vs default LZW compression. * NB: only for FLAT files, not CSV or XML */ size32_t diskRowMinSz = 0; IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta(); if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind())) { diskRowMinSz = diskRowMeta->getMinRecordSize(); if (grouped) diskRowMinSz += 1; } calcFileCrc = true; bool external = dlfn.isExternal(); bool query = dlfn.isQuery(); if (query && compress) UNIMPLEMENTED; unsigned twFlags = external ? TW_External : 0; if (query || (external && !firstNode())) twFlags |= TW_Direct; if (!external || (!query && lastNode())) twFlags |= TW_RenameToPrimary; if (extend||(external&&!query)) twFlags |= TW_Extend; Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, diskRowMinSz, twFlags, compress, ecomp, this, &abortSoon, (external&&!query) ? &tempExternalName : NULL); if (compress) { ActPrintLog("Performing row compression on output file: %s", fName.get()); // NB: block compressed output has implicit crc of 0, no need to calculate in row writer. calcFileCrc = false; } Owned<IFileIOStream> stream; if (wantRaw()) { outraw.setown(createBufferedIOStream(iFileIO)); stream.set(outraw); } else { stream.setown(createIOStream(iFileIO)); unsigned rwFlags = 0; if (grouped) rwFlags |= rw_grouped; if (calcFileCrc) rwFlags |= rw_crc; out.setown(createRowWriter(stream, ::queryRowInterfaces(input), rwFlags)); } if (extend || (external && !query)) stream->seek(0,IFSend); ActPrintLog("Created output stream for %s", fName.get()); }