void c4_FormatB::Remove(int index_, int count_) {
  _recalc = true;

  t4_i32 off = Offset(index_);
  t4_i32 n = Offset(index_ + count_) - off;
  d4_assert(n >= 0);

  // remove the columns, if present
  for (int i = 0; i < count_; ++i)
    delete (c4_Column*)_memos.GetAt(index_ + i);
  _memos.RemoveAt(index_, count_);

  if (n > 0)
    _data.Shrink(off, n);

  _offsets.RemoveAt(index_, count_);

  d4_assert(index_ < _offsets.GetSize());

  // adjust all following entries
  while (index_ < _offsets.GetSize())
    _offsets.ElementAt(index_++) -= n;

  d4_assert((t4_i32)_offsets.GetAt(index_ - 1) == _data.ColSize());
  d4_assert(index_ <= _memos.GetSize() + 1);
}
void c4_FormatB::Define(int, const t4_byte **ptr_) {
  d4_assert(_memos.GetSize() == 0);

  if (ptr_ != 0) {
    _data.PullLocation(*ptr_);
    if (_data.ColSize() > 0)
      _sizeCol.PullLocation(*ptr_);
    _memoCol.PullLocation(*ptr_);
  }

  // everything below this point could be delayed until use
  // in that case, watch out that column space use is properly tracked

  InitOffsets(_sizeCol);

  if (_memoCol.ColSize() > 0) {
    c4_Bytes walk;
    _memoCol.FetchBytes(0, _memoCol.ColSize(), walk, true);

    const t4_byte *p = walk.Contents();

    for (int row = 0; p < walk.Contents() + walk.Size(); ++row) {
      row += c4_Column::PullValue(p);
      d4_assert(row < _memos.GetSize());

      c4_Column *mc = d4_new c4_Column(_data.Persist());
      d4_assert(mc != 0);
      _memos.SetAt(row, mc);

      mc->PullLocation(p);
    }

    d4_assert(p == walk.Contents() + walk.Size());
  }
}
void c4_FormatB::Unmapped() {
  _data.ReleaseAllSegments();
  _sizeCol.ReleaseAllSegments();
  _memoCol.ReleaseAllSegments();

  for (int i = 0; i < _memos.GetSize(); ++i) {
    c4_Column *cp = (c4_Column*)_memos.GetAt(i);
    if (cp != 0)
      cp->ReleaseAllSegments();
  }
}
示例#4
0
文件: format.cpp 项目: SASfit/SASfit
void c4_FormatB::Insert(int index_, const c4_Bytes& buf_, int count_)
{
  d4_assert(count_ > 0);

  _recalc = true;

  int m = buf_.Size();
  t4_i32 off = Offset(index_);

  _memos.InsertAt(index_, 0, count_);

    // insert the appropriate number of bytes
  t4_i32 n = count_ * (t4_i32) m;
  if (n > 0) {
    _data.Grow(off, n);

      // store as many copies as needed, but may have to do it in chunks
    int spos = 0;

    c4_ColIter iter (_data, off, off + n);
    while (iter.Next(m - spos)) {
      memcpy(iter.BufSave(), buf_.Contents() + spos, iter.BufLen());

      spos += iter.BufLen();
      if (spos >= m)
        spos = 0;
    }

    d4_assert(spos == 0); // must have copied an exact multiple of the data
  }

    // define offsets of the new entries
  _offsets.InsertAt(index_, 0, count_);
  d4_assert(_offsets.GetSize() <= _memos.GetSize() + 1);

  while (--count_ >= 0) {
    _offsets.SetAt(index_++, off);
    off += m;
  }

  d4_assert(index_ < _offsets.GetSize());

    // adjust all following entries
  while (index_ < _offsets.GetSize())
    _offsets.ElementAt(index_++) += n;

  d4_assert((t4_i32) _offsets.GetAt(index_ - 1) == _data.ColSize());
  d4_assert(index_ <= _memos.GetSize() + 1);
}
示例#5
0
文件: format.cpp 项目: SASfit/SASfit
void c4_FormatB::InitOffsets(c4_ColOfInts& sizes_)
{
  int rows = Owner().NumRows();

  if (sizes_.RowCount() != rows) {
    sizes_.SetRowCount(rows);
  }

  _memos.SetSize(rows);
  _offsets.SetSize(rows + 1);

  if (_data.ColSize() > 0) {
    t4_i32 total = 0;

    for (int r = 0; r < rows; ++r) {
      int n = sizes_.GetInt(r);
      d4_assert(n >= 0);
      total += n;
      _offsets.SetAt(r + 1, total);
    }

    d4_assert(total == _data.ColSize());
  }
  
}
示例#6
0
文件: format.cpp 项目: SASfit/SASfit
void c4_FormatV::SetupAllSubviews()
{
  d4_assert(!_inited);
  _inited = true;
  
  if (_data.ColSize() > 0) {
    c4_Bytes temp;
    _data.FetchBytes(0, _data.ColSize(), temp, true);
    const t4_byte* ptr = temp.Contents();
    
    for (int r = 0; r < _subSeqs.GetSize(); ++r) {
      // don't materialize subview if it is empty
      // duplicates code which is in c4_HandlerSeq::Prepare
      const t4_byte* p2 = ptr;
      d4_dbgdef(t4_i32 sias =)
	c4_Column::PullValue(p2);
      d4_assert(sias == 0); // not yet

      if (c4_Column::PullValue(p2) > 0)
	At(r).Prepare(&ptr, false);
      else
	ptr = p2;
    }

    d4_assert(ptr == temp.Contents() + temp.Size());
  }
c4_HandlerSeq &c4_FormatV::At(int index_) {
  d4_assert(_inited);

  c4_HandlerSeq * &hs = (c4_HandlerSeq * &)_subSeqs.ElementAt(index_);
  if (hs == 0) {
    hs = d4_new c4_HandlerSeq(Owner(), this);
    hs->IncRef();
  }

  return  *hs;
}
int c4_FormatB::ItemLenOffCol(int index_, t4_i32 &off_, c4_Column * &col_) {
  col_ = (c4_Column*)_memos.GetAt(index_);
  if (col_ != 0) {
    off_ = 0;
    return col_->ColSize();
  }

  col_ = &_data;
  off_ = Offset(index_);
  return Offset(index_ + 1) - off_;
}
d4_inline t4_i32 c4_FormatB::Offset(int index_)const {
  d4_assert((t4_i32)_offsets.GetAt(_offsets.GetSize() - 1) == _data.ColSize());
  d4_assert(_offsets.GetSize() == _memos.GetSize() + 1);
  d4_assert(index_ < _offsets.GetSize());

  // extend offset vectors for missing empty entries at end 
  int n = _offsets.GetSize();
  d4_assert(n > 0);

  if (index_ >= n)
    index_ = n - 1;

  return _offsets.GetAt(index_);
}
void c4_FormatB::SetOne(int index_, const c4_Bytes &xbuf_, bool ignoreMemos_) {
  // this fixes bug in 2.4.0 when copying string from higher row
  // TODO: this fix is very conservative, figure out when to copy
  // (can probably look at pointer to see whether it's from us)
  int sz = xbuf_.Size();
  c4_Bytes buf_(xbuf_.Contents(), sz, 0 < sz && sz <= c4_Column::kSegMax);

  c4_Column *cp = &_data;
  t4_i32 start = Offset(index_);
  int len = Offset(index_ + 1) - start;

  if (!ignoreMemos_ && _memos.GetAt(index_) != 0)
    len = ItemLenOffCol(index_, start, cp);

  int m = buf_.Size();
  int n = m - len;

  if (n > 0)
    cp->Grow(start, n);
  else if (n < 0)
    cp->Shrink(start,  - n);
  else if (m == 0)
    return ;
  // no size change and no contents

  _recalc = true;

  cp->StoreBytes(start, buf_);

  if (n && cp ==  &_data) {
    // if size has changed
    int k = _offsets.GetSize() - 1;

    // if filling in an empty entry at end: extend offsets first
    if (m > 0 && index_ >= k) {
      _offsets.InsertAt(k, _offsets.GetAt(k), index_ - k + 1);

      k = index_ + 1;
      d4_assert(k == _offsets.GetSize() - 1);
    }

    // adjust following entry offsets
    while (++index_ <= k)
      _offsets.ElementAt(index_) += n;
  }

  d4_assert((t4_i32)_offsets.GetAt(_offsets.GetSize() - 1) == _data.ColSize());
}
d4_inline bool c4_FormatB::ShouldBeMemo(int length_)const {
  // items over 10000 bytes are always memos
  // items up to 100 bytes are never memos
  //
  // else, memo only if the column would be under 1 Mb
  // (assuming all items had the same size as this one)
  //
  // the effect is that as the number of rows increases,
  // smaller and smaller items get turned into memos
  //
  // note that items which are no memo right now stay
  // as is, and so do memos which have not been modified

  int rows = _memos.GetSize() + 1; // avoids divide by zero
  return length_ > 10000 || length_ > 100 && length_ > 1000000 / rows;
}
c4_Column *c4_FormatB::GetNthMemoCol(int index_, bool alloc_) {
  t4_i32 start;
  c4_Column *col;
  int n = ItemLenOffCol(index_, start, col);

  if (col ==  &_data && alloc_) {
    col = d4_new c4_Column(_data.Persist());
    _memos.SetAt(index_, col);

    if (n > 0)
    if (_data.IsDirty()) {
      c4_Bytes temp;
      _data.FetchBytes(start, n, temp, true);
      col->SetBuffer(n);
      col->StoreBytes(0, temp);
    } else
      col->SetLocation(_data.Position() + start, n);
  }

  return col;
}
void c4_FormatB::Commit(c4_SaveContext &ar_) {
  int rows = _memos.GetSize();
  d4_assert(rows > 0);

  bool full = _recalc || ar_.Serializing();

  if (!full)
  for (int i = 0; i < rows; ++i) {
    c4_Column *col = (c4_Column*)_memos.GetAt(i);
    if (col != 0) {
      full = true;
      break;
    }
  }
  d4_assert(_recalc || _sizeCol.RowCount() == rows);

  if (full) {
    _memoCol.SetBuffer(0);
    _sizeCol.SetBuffer(0);
    _sizeCol.SetAccessWidth(0);
    _sizeCol.SetRowCount(rows);

    int skip = 0;

    c4_Column *saved = ar_.SetWalkBuffer(&_memoCol);

    for (int r = 0; r < rows; ++r) {
      ++skip;

      t4_i32 start;
      c4_Column *col;
      int len = ItemLenOffCol(r, start, col);

      bool oldMemo = col !=  &_data;
      bool newMemo = ShouldBeMemo(len);

      if (!oldMemo && newMemo) {
        col = GetNthMemoCol(r, true);
        d4_assert(col !=  &_data);
        //? start = 0;
      }

      c4_Bytes temp;

      if (newMemo) {
        // it now is a memo, inlined data will be empty
        ar_.StoreValue(skip - 1);
        skip = 0;
        ar_.CommitColumn(*col);
      } else if (!oldMemo) {
        // it was no memo, done if it hasn't become one
        _sizeCol.SetInt(r, len);
        continue;
      } else {
        // it was a memo, but it no longer is
        d4_assert(start == 0);
        if (len > 0) {
          _sizeCol.SetInt(r, len);
          col->FetchBytes(start, len, temp, true);
          delete (c4_Column*)_memos.GetAt(r); // 28-11-2001: fix mem leak
          _memos.SetAt(r, 0); // 02-11-2001: fix for use after commit
        }
      }

      SetOne(r, temp, true); // bypass current memo pointer
    }

    ar_.SetWalkBuffer(saved);
  }

  ar_.CommitColumn(_data);

  if (_data.ColSize() > 0) {
    _sizeCol.FixSize(true);
    ar_.CommitColumn(_sizeCol);
    //_sizeCol.FixSize(false);
  }

  ar_.CommitColumn(_memoCol);

  // need a way to find out when the data has been committed (on 2nd pass)
  // both _sizeCol and _memoCol will be clean again when it has
  // but be careful because dirty flag is only useful if size is nonzero
  if (_recalc && !ar_.Serializing())
    _recalc = _sizeCol.ColSize() > 0 && _sizeCol.IsDirty() || _memoCol.ColSize()
      > 0 && _memoCol.IsDirty();
}
void c4_FormatB::OldDefine(char type_, c4_Persist &pers_) {
  int rows = Owner().NumRows();

  c4_ColOfInts sizes(_data.Persist());

  if (type_ == 'M') {
    InitOffsets(sizes);

    c4_ColOfInts szVec(_data.Persist());
    pers_.FetchOldLocation(szVec);
    szVec.SetRowCount(rows);

    c4_ColOfInts posVec(_data.Persist());
    pers_.FetchOldLocation(posVec);
    posVec.SetRowCount(rows);

    for (int r = 0; r < rows; ++r) {
      t4_i32 sz = szVec.GetInt(r);
      if (sz > 0) {
        c4_Column *mc = d4_new c4_Column(_data.Persist());
        d4_assert(mc != 0);
        _memos.SetAt(r, mc);

        mc->SetLocation(posVec.GetInt(r), sz);
      }
    }
  } else {
    pers_.FetchOldLocation(_data);

    if (type_ == 'B') {
      pers_.FetchOldLocation(sizes);

#if !q4_OLD_IS_ALWAYS_V2

      // WARNING - HUGE HACK AHEAD - THIS IS NOT 100% FULLPROOF!
      //
      // The above is correct for MK versions 2.0 and up, but *NOT*
      // for MK 1.8.6 datafiles, which store sizes first (OUCH!!!).
      // This means that there is not a 100% safe way to auto-convert
      // both 1.8.6 and 2.0 files - since there is no way to detect
      // unambiguously which version a datafile is.  All we can do,
      // is to carefully check both vectors, and *hope* that only one
      // of them is valid as sizes vector.  This problem applies to
      // the 'B' (bytes) property type only, and only pre 2.0 files.
      //
      // To build a version which *always* converts assuming 1.8.6,
      // add flag "-Dq4_OLD_IS_PRE_V2" to the compiler command line.
      // Conversely, "-Dq4_OLD_IS_ALWAYS_V2" forces 2.0 conversion.

      if (rows > 0) {
        t4_i32 s1 = sizes.ColSize();
        t4_i32 s2 = _data.ColSize();

#if !q4_OLD_IS_PRE_V2
        // if the size vector is clearly impossible, swap vectors
        bool fix = c4_ColOfInts::CalcAccessWidth(rows, s1) < 0;

        // if the other vector might be valid as well, check further
        if (!fix && c4_ColOfInts::CalcAccessWidth(rows, s2) >= 0) {
          sizes.SetRowCount(rows);
          t4_i32 total = 0;
          for (int i = 0; i < rows; ++i) {
            t4_i32 w = sizes.GetInt(i);
            if (w < 0 || total > s2) {
              total =  - 1;
              break;
            }
            total += w;
          }

          // if the sizes don't add up, swap vectors
          fix = total != s2;
        }

        if (fix)
#endif 
         {
          t4_i32 p1 = sizes.Position();
          t4_i32 p2 = _data.Position();
          _data.SetLocation(p1, s1);
          sizes.SetLocation(p2, s2);
        }
      }
#endif 
      InitOffsets(sizes);
    } else {
      d4_assert(type_ == 'S');

      sizes.SetRowCount(rows);

      t4_i32 pos = 0;
      t4_i32 lastEnd = 0;
      int k = 0;

      c4_ColIter iter(_data, 0, _data.ColSize());
      while (iter.Next()) {
        const t4_byte *p = iter.BufLoad();
        for (int j = 0; j < iter.BufLen(); ++j)
        if (!p[j]) {
          sizes.SetInt(k++, pos + j + 1-lastEnd);
          lastEnd = pos + j + 1;
        }

        pos += iter.BufLen();
      }

      d4_assert(pos == _data.ColSize());

      if (lastEnd < pos) {
        // last entry had no zero byte
        _data.InsertData(pos++, 1, true);
        sizes.SetInt(k, pos - lastEnd);
      }

      InitOffsets(sizes);

      // get rid of entries with just a null byte
      for (int r = 0; r < rows; ++r)
        if (c4_FormatB::ItemSize(r) == 1)
          SetOne(r, c4_Bytes());
    }
  }
}