void c4_FormatB::Remove(int index_, int count_) { _recalc = true; t4_i32 off = Offset(index_); t4_i32 n = Offset(index_ + count_) - off; d4_assert(n >= 0); // remove the columns, if present for (int i = 0; i < count_; ++i) delete (c4_Column*)_memos.GetAt(index_ + i); _memos.RemoveAt(index_, count_); if (n > 0) _data.Shrink(off, n); _offsets.RemoveAt(index_, count_); d4_assert(index_ < _offsets.GetSize()); // adjust all following entries while (index_ < _offsets.GetSize()) _offsets.ElementAt(index_++) -= n; d4_assert((t4_i32)_offsets.GetAt(index_ - 1) == _data.ColSize()); d4_assert(index_ <= _memos.GetSize() + 1); }
void c4_FormatB::Define(int, const t4_byte **ptr_) { d4_assert(_memos.GetSize() == 0); if (ptr_ != 0) { _data.PullLocation(*ptr_); if (_data.ColSize() > 0) _sizeCol.PullLocation(*ptr_); _memoCol.PullLocation(*ptr_); } // everything below this point could be delayed until use // in that case, watch out that column space use is properly tracked InitOffsets(_sizeCol); if (_memoCol.ColSize() > 0) { c4_Bytes walk; _memoCol.FetchBytes(0, _memoCol.ColSize(), walk, true); const t4_byte *p = walk.Contents(); for (int row = 0; p < walk.Contents() + walk.Size(); ++row) { row += c4_Column::PullValue(p); d4_assert(row < _memos.GetSize()); c4_Column *mc = d4_new c4_Column(_data.Persist()); d4_assert(mc != 0); _memos.SetAt(row, mc); mc->PullLocation(p); } d4_assert(p == walk.Contents() + walk.Size()); } }
void c4_FormatB::Unmapped() { _data.ReleaseAllSegments(); _sizeCol.ReleaseAllSegments(); _memoCol.ReleaseAllSegments(); for (int i = 0; i < _memos.GetSize(); ++i) { c4_Column *cp = (c4_Column*)_memos.GetAt(i); if (cp != 0) cp->ReleaseAllSegments(); } }
void c4_FormatB::Insert(int index_, const c4_Bytes& buf_, int count_) { d4_assert(count_ > 0); _recalc = true; int m = buf_.Size(); t4_i32 off = Offset(index_); _memos.InsertAt(index_, 0, count_); // insert the appropriate number of bytes t4_i32 n = count_ * (t4_i32) m; if (n > 0) { _data.Grow(off, n); // store as many copies as needed, but may have to do it in chunks int spos = 0; c4_ColIter iter (_data, off, off + n); while (iter.Next(m - spos)) { memcpy(iter.BufSave(), buf_.Contents() + spos, iter.BufLen()); spos += iter.BufLen(); if (spos >= m) spos = 0; } d4_assert(spos == 0); // must have copied an exact multiple of the data } // define offsets of the new entries _offsets.InsertAt(index_, 0, count_); d4_assert(_offsets.GetSize() <= _memos.GetSize() + 1); while (--count_ >= 0) { _offsets.SetAt(index_++, off); off += m; } d4_assert(index_ < _offsets.GetSize()); // adjust all following entries while (index_ < _offsets.GetSize()) _offsets.ElementAt(index_++) += n; d4_assert((t4_i32) _offsets.GetAt(index_ - 1) == _data.ColSize()); d4_assert(index_ <= _memos.GetSize() + 1); }
void c4_FormatB::InitOffsets(c4_ColOfInts& sizes_) { int rows = Owner().NumRows(); if (sizes_.RowCount() != rows) { sizes_.SetRowCount(rows); } _memos.SetSize(rows); _offsets.SetSize(rows + 1); if (_data.ColSize() > 0) { t4_i32 total = 0; for (int r = 0; r < rows; ++r) { int n = sizes_.GetInt(r); d4_assert(n >= 0); total += n; _offsets.SetAt(r + 1, total); } d4_assert(total == _data.ColSize()); } }
void c4_FormatV::SetupAllSubviews() { d4_assert(!_inited); _inited = true; if (_data.ColSize() > 0) { c4_Bytes temp; _data.FetchBytes(0, _data.ColSize(), temp, true); const t4_byte* ptr = temp.Contents(); for (int r = 0; r < _subSeqs.GetSize(); ++r) { // don't materialize subview if it is empty // duplicates code which is in c4_HandlerSeq::Prepare const t4_byte* p2 = ptr; d4_dbgdef(t4_i32 sias =) c4_Column::PullValue(p2); d4_assert(sias == 0); // not yet if (c4_Column::PullValue(p2) > 0) At(r).Prepare(&ptr, false); else ptr = p2; } d4_assert(ptr == temp.Contents() + temp.Size()); }
c4_HandlerSeq &c4_FormatV::At(int index_) { d4_assert(_inited); c4_HandlerSeq * &hs = (c4_HandlerSeq * &)_subSeqs.ElementAt(index_); if (hs == 0) { hs = d4_new c4_HandlerSeq(Owner(), this); hs->IncRef(); } return *hs; }
int c4_FormatB::ItemLenOffCol(int index_, t4_i32 &off_, c4_Column * &col_) { col_ = (c4_Column*)_memos.GetAt(index_); if (col_ != 0) { off_ = 0; return col_->ColSize(); } col_ = &_data; off_ = Offset(index_); return Offset(index_ + 1) - off_; }
d4_inline t4_i32 c4_FormatB::Offset(int index_)const { d4_assert((t4_i32)_offsets.GetAt(_offsets.GetSize() - 1) == _data.ColSize()); d4_assert(_offsets.GetSize() == _memos.GetSize() + 1); d4_assert(index_ < _offsets.GetSize()); // extend offset vectors for missing empty entries at end int n = _offsets.GetSize(); d4_assert(n > 0); if (index_ >= n) index_ = n - 1; return _offsets.GetAt(index_); }
void c4_FormatB::SetOne(int index_, const c4_Bytes &xbuf_, bool ignoreMemos_) { // this fixes bug in 2.4.0 when copying string from higher row // TODO: this fix is very conservative, figure out when to copy // (can probably look at pointer to see whether it's from us) int sz = xbuf_.Size(); c4_Bytes buf_(xbuf_.Contents(), sz, 0 < sz && sz <= c4_Column::kSegMax); c4_Column *cp = &_data; t4_i32 start = Offset(index_); int len = Offset(index_ + 1) - start; if (!ignoreMemos_ && _memos.GetAt(index_) != 0) len = ItemLenOffCol(index_, start, cp); int m = buf_.Size(); int n = m - len; if (n > 0) cp->Grow(start, n); else if (n < 0) cp->Shrink(start, - n); else if (m == 0) return ; // no size change and no contents _recalc = true; cp->StoreBytes(start, buf_); if (n && cp == &_data) { // if size has changed int k = _offsets.GetSize() - 1; // if filling in an empty entry at end: extend offsets first if (m > 0 && index_ >= k) { _offsets.InsertAt(k, _offsets.GetAt(k), index_ - k + 1); k = index_ + 1; d4_assert(k == _offsets.GetSize() - 1); } // adjust following entry offsets while (++index_ <= k) _offsets.ElementAt(index_) += n; } d4_assert((t4_i32)_offsets.GetAt(_offsets.GetSize() - 1) == _data.ColSize()); }
d4_inline bool c4_FormatB::ShouldBeMemo(int length_)const { // items over 10000 bytes are always memos // items up to 100 bytes are never memos // // else, memo only if the column would be under 1 Mb // (assuming all items had the same size as this one) // // the effect is that as the number of rows increases, // smaller and smaller items get turned into memos // // note that items which are no memo right now stay // as is, and so do memos which have not been modified int rows = _memos.GetSize() + 1; // avoids divide by zero return length_ > 10000 || length_ > 100 && length_ > 1000000 / rows; }
c4_Column *c4_FormatB::GetNthMemoCol(int index_, bool alloc_) { t4_i32 start; c4_Column *col; int n = ItemLenOffCol(index_, start, col); if (col == &_data && alloc_) { col = d4_new c4_Column(_data.Persist()); _memos.SetAt(index_, col); if (n > 0) if (_data.IsDirty()) { c4_Bytes temp; _data.FetchBytes(start, n, temp, true); col->SetBuffer(n); col->StoreBytes(0, temp); } else col->SetLocation(_data.Position() + start, n); } return col; }
void c4_FormatB::Commit(c4_SaveContext &ar_) { int rows = _memos.GetSize(); d4_assert(rows > 0); bool full = _recalc || ar_.Serializing(); if (!full) for (int i = 0; i < rows; ++i) { c4_Column *col = (c4_Column*)_memos.GetAt(i); if (col != 0) { full = true; break; } } d4_assert(_recalc || _sizeCol.RowCount() == rows); if (full) { _memoCol.SetBuffer(0); _sizeCol.SetBuffer(0); _sizeCol.SetAccessWidth(0); _sizeCol.SetRowCount(rows); int skip = 0; c4_Column *saved = ar_.SetWalkBuffer(&_memoCol); for (int r = 0; r < rows; ++r) { ++skip; t4_i32 start; c4_Column *col; int len = ItemLenOffCol(r, start, col); bool oldMemo = col != &_data; bool newMemo = ShouldBeMemo(len); if (!oldMemo && newMemo) { col = GetNthMemoCol(r, true); d4_assert(col != &_data); //? start = 0; } c4_Bytes temp; if (newMemo) { // it now is a memo, inlined data will be empty ar_.StoreValue(skip - 1); skip = 0; ar_.CommitColumn(*col); } else if (!oldMemo) { // it was no memo, done if it hasn't become one _sizeCol.SetInt(r, len); continue; } else { // it was a memo, but it no longer is d4_assert(start == 0); if (len > 0) { _sizeCol.SetInt(r, len); col->FetchBytes(start, len, temp, true); delete (c4_Column*)_memos.GetAt(r); // 28-11-2001: fix mem leak _memos.SetAt(r, 0); // 02-11-2001: fix for use after commit } } SetOne(r, temp, true); // bypass current memo pointer } ar_.SetWalkBuffer(saved); } ar_.CommitColumn(_data); if (_data.ColSize() > 0) { _sizeCol.FixSize(true); ar_.CommitColumn(_sizeCol); //_sizeCol.FixSize(false); } ar_.CommitColumn(_memoCol); // need a way to find out when the data has been committed (on 2nd pass) // both _sizeCol and _memoCol will be clean again when it has // but be careful because dirty flag is only useful if size is nonzero if (_recalc && !ar_.Serializing()) _recalc = _sizeCol.ColSize() > 0 && _sizeCol.IsDirty() || _memoCol.ColSize() > 0 && _memoCol.IsDirty(); }
void c4_FormatB::OldDefine(char type_, c4_Persist &pers_) { int rows = Owner().NumRows(); c4_ColOfInts sizes(_data.Persist()); if (type_ == 'M') { InitOffsets(sizes); c4_ColOfInts szVec(_data.Persist()); pers_.FetchOldLocation(szVec); szVec.SetRowCount(rows); c4_ColOfInts posVec(_data.Persist()); pers_.FetchOldLocation(posVec); posVec.SetRowCount(rows); for (int r = 0; r < rows; ++r) { t4_i32 sz = szVec.GetInt(r); if (sz > 0) { c4_Column *mc = d4_new c4_Column(_data.Persist()); d4_assert(mc != 0); _memos.SetAt(r, mc); mc->SetLocation(posVec.GetInt(r), sz); } } } else { pers_.FetchOldLocation(_data); if (type_ == 'B') { pers_.FetchOldLocation(sizes); #if !q4_OLD_IS_ALWAYS_V2 // WARNING - HUGE HACK AHEAD - THIS IS NOT 100% FULLPROOF! // // The above is correct for MK versions 2.0 and up, but *NOT* // for MK 1.8.6 datafiles, which store sizes first (OUCH!!!). // This means that there is not a 100% safe way to auto-convert // both 1.8.6 and 2.0 files - since there is no way to detect // unambiguously which version a datafile is. All we can do, // is to carefully check both vectors, and *hope* that only one // of them is valid as sizes vector. This problem applies to // the 'B' (bytes) property type only, and only pre 2.0 files. // // To build a version which *always* converts assuming 1.8.6, // add flag "-Dq4_OLD_IS_PRE_V2" to the compiler command line. // Conversely, "-Dq4_OLD_IS_ALWAYS_V2" forces 2.0 conversion. if (rows > 0) { t4_i32 s1 = sizes.ColSize(); t4_i32 s2 = _data.ColSize(); #if !q4_OLD_IS_PRE_V2 // if the size vector is clearly impossible, swap vectors bool fix = c4_ColOfInts::CalcAccessWidth(rows, s1) < 0; // if the other vector might be valid as well, check further if (!fix && c4_ColOfInts::CalcAccessWidth(rows, s2) >= 0) { sizes.SetRowCount(rows); t4_i32 total = 0; for (int i = 0; i < rows; ++i) { t4_i32 w = sizes.GetInt(i); if (w < 0 || total > s2) { total = - 1; break; } total += w; } // if the sizes don't add up, swap vectors fix = total != s2; } if (fix) #endif { t4_i32 p1 = sizes.Position(); t4_i32 p2 = _data.Position(); _data.SetLocation(p1, s1); sizes.SetLocation(p2, s2); } } #endif InitOffsets(sizes); } else { d4_assert(type_ == 'S'); sizes.SetRowCount(rows); t4_i32 pos = 0; t4_i32 lastEnd = 0; int k = 0; c4_ColIter iter(_data, 0, _data.ColSize()); while (iter.Next()) { const t4_byte *p = iter.BufLoad(); for (int j = 0; j < iter.BufLen(); ++j) if (!p[j]) { sizes.SetInt(k++, pos + j + 1-lastEnd); lastEnd = pos + j + 1; } pos += iter.BufLen(); } d4_assert(pos == _data.ColSize()); if (lastEnd < pos) { // last entry had no zero byte _data.InsertData(pos++, 1, true); sizes.SetInt(k, pos - lastEnd); } InitOffsets(sizes); // get rid of entries with just a null byte for (int r = 0; r < rows; ++r) if (c4_FormatB::ItemSize(r) == 1) SetOne(r, c4_Bytes()); } } }