void process() { ActPrintLog("ReDistributeActivityMaster::process"); HashDistributeMasterBase::process(); IHThorHashDistributeArg *helper = (IHThorHashDistributeArg *)queryHelper(); unsigned n = container.queryJob().querySlaves(); MemoryAttr ma; offset_t *sizes = (offset_t *)ma.allocate(sizeof(offset_t)*n); unsigned i; try { for (i=0;i<n;i++) { if (abortSoon) return; CMessageBuffer mb; #ifdef _TRACE ActPrintLog("ReDistribute process, Receiving on tag %d",statstag); #endif rank_t sender; if (!receiveMsg(mb, RANK_ALL, statstag, &sender)||abortSoon) return; #ifdef _TRACE ActPrintLog("ReDistribute process, Received size from %d",sender); #endif sender--; assertex((unsigned)sender<n); mb.read(sizes[sender]); } ActPrintLog("ReDistributeActivityMaster::process sizes got"); for (i=0;i<n;i++) { CMessageBuffer mb; mb.append(n*sizeof(offset_t),sizes); #ifdef _TRACE ActPrintLog("ReDistribute process, Replying to node %d tag %d",i+1,statstag); #endif if (!container.queryJob().queryJobComm().send(mb, (rank_t)i+1, statstag)) return; } // check if any max skew broken double maxskew = helper->getTargetSkew(); if (maxskew>helper->getSkew()) { offset_t tot = 0; for (i=0;i<n;i++) tot += sizes[i]; offset_t avg = tot/n; for (i=0;i<n;i++) { double r = ((double)sizes[i]-(double)avg)/(double)avg; if ((r>=maxskew)||(-r>maxskew)) { throw MakeActivityException(this, TE_DistributeFailedSkewExceeded, "DISTRIBUTE maximum skew exceeded (node %d has %"I64F"d, average is %"I64F"d)",i+1,sizes[i],avg); } } } } catch (IException *e) { ActPrintLog(e,"ReDistribute"); throw; } ActPrintLog("ReDistributeActivityMaster::process exit"); }
void CKeyHdr::write(IFileIOStream *out, CRC32 *crc) { unsigned nodeSize = hdr.nodeSize; MemoryAttr ma; byte *buf = (byte *) ma.allocate(nodeSize); memcpy(buf, &hdr, sizeof(hdr)); memset(buf+sizeof(hdr), 0xff, nodeSize-sizeof(hdr)); SwapBigEndian(*(KeyHdr*) buf); out->write(nodeSize, buf); if (crc) crc->tally(nodeSize, buf); }
void CKeyHdr::write(IWriteSeq *out, CRC32 *crc) { unsigned nodeSize = hdr.nodeSize; assertex(out->getRecordSize()==nodeSize); MemoryAttr ma; byte *buf = (byte *) ma.allocate(nodeSize); memcpy(buf, &hdr, sizeof(hdr)); memset(buf+sizeof(hdr), 0xff, nodeSize-sizeof(hdr)); SwapBigEndian(*(KeyHdr*) buf); out->put(buf); if (crc) crc->tally(nodeSize, buf); }
void LZMALZDecompressToBuffer(MemoryAttr & out, MemoryBuffer & in) { size32_t expsz; size32_t cmpsz; in.read(expsz).read(cmpsz); void *o = out.allocate(expsz); if (cmpsz!=expsz) { CLZMA lzma; size32_t written = lzma.expand(in.readDirect(cmpsz),cmpsz,o,expsz); if (written!=expsz) throw MakeStringException(0, "fastLZDecompressToBuffer - corrupt data(4) %d %d",written,expsz); } else memcpy(o,in.readDirect(cmpsz),expsz); }
void LZMADecompressToAttr(MemoryAttr & out, const void * src) { size32_t *sz = (size32_t *)src; size32_t expsz = *(sz++); size32_t cmpsz = *(sz++); void *o = out.allocate(expsz); if (cmpsz!=expsz) { CLZMA lzma; size32_t written = lzma.expand(sz,cmpsz,o,expsz); if (written!=expsz) throw MakeStringException(0, "fastLZDecompressToBuffer - corrupt data(2) %d %d",written,expsz); } else memcpy(o,sz,expsz); }
extern jhtree_decl void validateKeyFile(const char *filename, offset_t nodePos) { OwnedIFile file = createIFile(filename); OwnedIFileIO io = file->open(IFOread); if (!io) throw MakeStringException(1, "Invalid key %s: cannot open file", filename); unsigned __int64 size = file->size(); if (!size) throw MakeStringException(2, "Invalid key %s: zero size", filename); KeyHdr hdr; if (io->read(0, sizeof(hdr), &hdr) != sizeof(hdr)) throw MakeStringException(4, "Invalid key %s: failed to read key header", filename); CKeyHdr keyHdr; keyHdr.load(hdr); _WINREV(hdr.phyrec); _WINREV(hdr.root); _WINREV(hdr.nodeSize); if (hdr.phyrec != size-1) throw MakeStringException(5, "Invalid key %s: phyrec was %" I64F "d, expected %" I64F "d", filename, hdr.phyrec, size-1); if (size % hdr.nodeSize) throw MakeStringException(3, "Invalid key %s: size %" I64F "d is not a multiple of key node size (%d)", filename, size, hdr.nodeSize); if (!hdr.root || hdr.root % hdr.nodeSize !=0) throw MakeStringException(6, "Invalid key %s: invalid root pointer %" I64F "x", filename, hdr.root); NodeHdr root; if (io->read(hdr.root, sizeof(root), &root) != sizeof(root)) throw MakeStringException(7, "Invalid key %s: failed to read root node", filename); _WINREV(root.rightSib); _WINREV(root.leftSib); if (root.leftSib || root.rightSib) throw MakeStringException(8, "Invalid key %s: invalid root node sibling pointers 0x%" I64F "x, 0x%" I64F "x (expected 0,0)", filename, root.leftSib, root.rightSib); for (offset_t nodeOffset = (nodePos ? nodePos : hdr.nodeSize); nodeOffset < (nodePos ? nodePos+1 : size); nodeOffset += hdr.nodeSize) { MemoryAttr ma; char *buffer = (char *) ma.allocate(hdr.nodeSize); { MTIME_SECTION(queryActiveTimer(), "JHTREE read index node"); io->read(nodeOffset, hdr.nodeSize, buffer); } CJHTreeNode theNode; { MTIME_SECTION(queryActiveTimer(), "JHTREE load index node"); theNode.load(&keyHdr, buffer, nodeOffset, true); } NodeHdr *nodeHdr = (NodeHdr *) buffer; SwapBigEndian(*nodeHdr); if (!nodeHdr->isValid(hdr.nodeSize)) throw MakeStringException(9, "Invalid key %s: invalid node header at position 0x%" I64F "x", filename, nodeOffset); if (nodeHdr->leftSib >= size || nodeHdr->rightSib >= size) throw MakeStringException(9, "Invalid key %s: out of range sibling pointers 0x%" I64F "x, 0x%" I64F "x at position 0x%" I64F "x", filename, nodeHdr->leftSib, nodeHdr->rightSib, nodeOffset); if (nodeHdr->crc32) { unsigned crc = crc32(buffer + sizeof(NodeHdr), nodeHdr->keyBytes, 0); if (crc != nodeHdr->crc32) throw MakeStringException(9, "Invalid key %s: crc mismatch at position 0x%" I64F "x", filename, nodeOffset); } else { // MORE - if we felt so inclined, we could decode the node and check records were in ascending order } } }
void init(MemoryBuffer &data, MemoryBuffer &slaveData) { mpTag = container.queryJob().deserializeMPTag(data); helper = static_cast <IHThorDistributionArg *> (queryHelper()); aggy = (IDistributionTable * *)ma.allocate(helper->queryInternalRecordSize()->getMinRecordSize()); }
void CSVOutputStream::writeUtf8(size32_t len, const char * data) { append(prefix); if (oldOutputFormat) { append(quote).append(rtlUtf8Size(len, data), data).append(quote); } else if (len) { // is this OTT? // not sure if best way but generate an array of utf8 sizes MemoryAttr ma; size32_t * cl; if (len>256) cl = (size32_t *)ma.allocate(sizeof(size32_t)*len); else cl = (size32_t *)alloca(sizeof(size32_t)*len); unsigned start=(unsigned)-1; unsigned end=0; const byte * s = (const byte *)data; unsigned i; for (i=0;i<len;i++) { const byte *p=s; UChar next = readUtf8Character(sizeof(UChar), s); cl[i] = (size32_t)(s-p); if (!u_isspace(next)) { end = i; if (start==(unsigned)-1) start = i; } } const byte *e=s; // do trim if (start!=(unsigned)-1) { for (i=0;i<start;i++) data += *(cl++); len -= start; end -= start; end++; while (end<len) e -= cl[--len]; } // now see if need quoting by looking for separator, terminator or quote // I *think* this can be done with memcmps as has to be exact size32_t sl = separator.length(); size32_t tl = terminator.length(); size32_t ql = quote.length(); bool needquote=false; s = (const byte *)data; for (i=0;i<len;i++) { size32_t l = (size32_t)(e-s); if (sl&&(l>=sl)&&(memcmp(separator.get(),s,sl)==0)) { needquote = true; break; } if (tl&&(l>=tl)&&(memcmp(terminator.get(),s,tl)==0)) { needquote = true; break; } if ((l>=ql)&&(memcmp(quote.get(),s,ql)==0)) { needquote = true; break; } s+=cl[i]; } if (needquote) { append(quote); s = (const byte *)data; for (i=0;i<len;i++) { size32_t l = (size32_t)(e-s); if ((l>=ql)&&(memcmp(quote.get(),s,ql)==0)) append(quote); append(cl[i],(const char *)s); s+=cl[i]; } append(quote); } else append((size32_t)(e-(const byte *)data),data); } prefix = separator; }
void copyCompress(const char *from, const char *to, size32_t rowsize, bool fast, bool flzstrm, bool stats) { Owned<IFile> srcfile = createIFile(from); Owned<IFileIO> baseio = srcfile->open(IFOread); if (!baseio) { printf("ERROR: could not open '%s' for read\n",from); doexit(3); } Owned<ICompressedFileIO> cmpio = createCompressedFileReader(baseio); Owned<IFileIOStream> flzstrmsrc = cmpio?NULL:createFastLZStreamRead(baseio); bool plaincopy = false; IFileIO *srcio = NULL; if (cmpio) { srcio = cmpio; if (rowsize&&(cmpio->recordSize()==rowsize)) plaincopy = true; else if (!rowsize) { if (fast&&(cmpio->method()==COMPRESS_METHOD_FASTLZ)) plaincopy = true; else if (!fast&&(cmpio->method()==COMPRESS_METHOD_LZW)) plaincopy = true; } } else if (flzstrmsrc) { if (flzstrm) plaincopy = true; } else srcio = baseio; if (plaincopy) { cmpio.clear(); srcio = baseio.get(); } Owned<IFile> dstfile = createIFile(to); StringBuffer fulldst; if (dstfile->isDirectory()==foundYes) { dstfile.clear(); addPathSepChar(fulldst.append(to)).append(pathTail(from)); to = fulldst.str(); dstfile.setown(createIFile(to)); } if (dstfile->exists()) { printf("ERROR: file '%s' already exists\n",to); doexit(4); } unsigned start; unsigned startu; if (stats) { start = msTick(); startu = usTick(); } Owned<IFileIO> dstio; Owned<IFileIOStream> flzstrmdst; if (plaincopy||flzstrm) { dstio.setown(dstfile->open(IFOcreate)); if (dstio&&!plaincopy) flzstrmdst.setown(createFastLZStreamWrite(dstio)); } else dstio.setown(createCompressedFileWriter(dstfile,rowsize,false,true,NULL,fast)); if (!dstio) { printf("ERROR: could not open '%s' for write\n",to); doexit(5); } #ifdef __linux__ // this is not really needed in windows - if it is we will have to // test the file extension - .exe, .bat struct stat info; if (stat(from, &info) == 0) // cannot fail - exception would have been thrown above dstfile->setCreateFlags(info.st_mode&(S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH|S_IXUSR|S_IXGRP|S_IXOTH)); #endif MemoryAttr mb; void * buffer = mb.allocate(BUFFERSIZE); offset_t offset = 0; try { loop { size32_t got = cmpio.get()?cmpio->read(offset, BUFFERSIZE, buffer):srcio->read(offset, BUFFERSIZE, buffer); if (got == 0) break; if (flzstrmdst) flzstrmdst->write(got,buffer); else dstio->write(offset, got, buffer); offset += got; } } catch (IException *e) { // try to delete partial copy dstio.clear(); try { dstfile->remove(); } catch (IException *e2) { StringBuffer s; pexception(s.clear().append("Removing partial copy file: ").append(to).str(),e2); e2->Release(); } throw e; } flzstrmdst.clear(); dstio.clear(); if (stats) printStats(offset,start,startu); CDateTime createTime, modifiedTime; if (srcfile->getTime(&createTime, &modifiedTime, NULL)) dstfile->setTime(&createTime, &modifiedTime, NULL); printf("copied %s to %s%s\n",from,to,plaincopy?"":" compressing"); { // print details dstio.setown(dstfile->open(IFOread)); if (dstio) { Owned<ICompressedFileIO> cmpio = createCompressedFileReader(dstio); Owned<IFileIOStream> flzstrm = cmpio?NULL:createFastLZStreamRead(dstio); if (cmpio||flzstrm) printCompDetails(to,dstio,cmpio,flzstrm); else printf("destination %s not compressed\n",to); } else printf("destination %s could not be read\n",to); } }
int copyExpanded(const char *from, const char *to, bool stats) { Owned<IFile> srcfile = createIFile(from); Owned<IFileIO> srcio = srcfile->open(IFOread); if (!srcio) { printf("ERROR: could not open '%s' for read\n",from); doexit(3); } Owned<ICompressedFileIO> cmpio = createCompressedFileReader(srcio); Owned<IFileIOStream> flzstrm = cmpio?NULL:createFastLZStreamRead(srcio); int ret = 0; if (cmpio||flzstrm) printCompDetails(from,srcio,cmpio,flzstrm); else { ret = 1; printf("%s is not compressed, size= %"I64F"d\n",from,srcio->size()); } if (!to||!*to) return ret; Owned<IFile> dstfile = createIFile(to); StringBuffer fulldst; if (dstfile->isDirectory()==foundYes) { dstfile.clear(); addPathSepChar(fulldst.append(to)).append(pathTail(from)); to = fulldst.str(); dstfile.setown(createIFile(to)); } if (dstfile->exists()) { printf("ERROR: file '%s' already exists\n",to); doexit(4); } unsigned start; unsigned startu; if (stats) { start = msTick(); startu = usTick(); } Owned<IFileIO> dstio = dstfile->open(IFOcreate); if (!dstio) { printf("ERROR: could not open '%s' for write\n",to); doexit(5); } #ifdef __linux__ // this is not really needed in windows - if it is we will have to // test the file extension - .exe, .bat struct stat info; if (stat(from, &info) == 0) // cannot fail - exception would have been thrown above dstfile->setCreateFlags(info.st_mode&(S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH|S_IXUSR|S_IXGRP|S_IXOTH)); #endif MemoryAttr mb; void * buffer = mb.allocate(BUFFERSIZE); offset_t offset = 0; try { loop { size32_t got = cmpio.get()?cmpio->read(offset,BUFFERSIZE, buffer): (flzstrm?flzstrm->read(BUFFERSIZE, buffer): srcio->read(offset, BUFFERSIZE, buffer)); if (got == 0) break; dstio->write(offset, got, buffer); offset += got; } } catch (IException *e) { // try to delete partial copy dstio.clear(); try { dstfile->remove(); } catch (IException *e2) { StringBuffer s; pexception(s.clear().append("Removing partial copy file: ").append(to).str(),e2); e2->Release(); } throw e; } dstio.clear(); if (stats) printStats(offset,start,startu); CDateTime createTime, modifiedTime; if (srcfile->getTime(&createTime, &modifiedTime, NULL)) dstfile->setTime(&createTime, &modifiedTime, NULL); printf("copied %s to %s%s\n",from,to,cmpio.get()?" expanding":""); return 0; }