// TODO-future: Add in an optional invariant that checks statistical validity, // i.e., that number>30. Consider also adding a generic conf function that // takes a z-constant as input so that we can easily add more/different conf // intervals. Could also add an interface that checks if a specific value is // within some specified confidence interval. // Potential z constants of interest: // conf90 z= 1.645 // conf95 z= 1.960 // conf99 z= 2.576 double Stats::conf95() const { DEBUG_SINVARIANT(checkInvariants()); if (number == 0) return DBL_MAX; // **** Should really be NaN if count==0 DEBUG_SINVARIANT(number > 0); // **** Could be number > 30 for statistical // **** validity. return 1.96*stddev()/sqrt((double)number); }
void set(Extent &e, uint8_t *row_pos, T val) { DEBUG_SINVARIANT(&e != NULL); uint8_t *byte_pos = row_pos + offset; DEBUG_SINVARIANT(e.insideExtentFixed(byte_pos)); *reinterpret_cast<T *>(byte_pos) = val; setNull(e, row_pos, false); }
T val(const Extent &e, uint8_t *row_pos) const { DEBUG_SINVARIANT(&e != NULL); uint8_t *byte_pos = row_pos + offset; DEBUG_SINVARIANT(e.insideExtentFixed(byte_pos)); if (nullable && isNull(e, row_pos)) { return default_value; } else { return *reinterpret_cast<T *>(byte_pos); } }
const byte *val(const Extent &e, uint8_t *row_pos) const { DEBUG_SINVARIANT(&e != NULL); if (nullable && isNull(e, row_pos)) { return NULL; } else { uint8_t *byte_pos = row_pos + offset; DEBUG_SINVARIANT(e.insideExtentFixed(byte_pos)); return byte_pos; } }
bool val(const Extent &e, uint8_t *row_pos) const { DEBUG_SINVARIANT(&e != NULL); uint8_t *byte_pos = row_pos + offset; DEBUG_SINVARIANT(e.insideExtentFixed(byte_pos)); if (nullable && isNull(e, row_pos)) { return default_value; } else { return *byte_pos & bit_mask ? true : false; } }
double StatsBase::stddev() const { DEBUG_SINVARIANT(checkInvariants()); double sigsq = variance(); if (sigsq <= 0.0) { return 0.0; } DEBUG_SINVARIANT(sigsq > 0.0); return sqrt(sigsq); }
void set(const Extent &e, uint8_t *row_pos, bool val) { DEBUG_SINVARIANT(&e != NULL); uint8_t *byte_pos = row_pos + offset; DEBUG_SINVARIANT(e.insideExtentFixed(byte_pos)); if (val) { *byte_pos = *byte_pos | bit_mask; } else { *byte_pos = *byte_pos & ~bit_mask; } setNull(e, row_pos, false); }
void set(const Extent &e, uint8_t *row_pos, const void *val, uint32_t val_size) { DEBUG_SINVARIANT(&e != NULL); if (val == NULL) { setNull(e, row_pos, true); return; } DEBUG_SINVARIANT(val_size == static_cast<uint32_t>(field_size)); (void)val_size; uint8_t *byte_pos = row_pos + offset; DEBUG_SINVARIANT(e.insideExtentFixed(byte_pos)); memmove(byte_pos, val, field_size); setNull(e, row_pos, false); }
void Stats::printTabular(int depth, std::ostream &out) const { DEBUG_SINVARIANT(checkInvariants()); std::string spaces; for(int i = 0; i < depth; i++) { spaces += " "; } out << spaces << "count " << countll() << "\n"; if (count() > 0) { out << spaces << "min " << min() << "\n"; out << spaces << "max " << max() << "\n"; out << spaces << "mean " << mean() << "\n"; out << spaces << "stddev " << stddev() << "\n"; out << spaces << "variance " << variance() << "\n"; out << spaces << "conf95 " << conf95() << "\n"; out << spaces << "total " << total() << "\n"; out << spaces << "total_sq " << total_sq() << "\n"; } // This is kind of a hack, but I had problems when the printout of // Stats changed for an empty list. else { out << spaces << "min " << min() << "\n"; out << spaces << "max " << max() << "\n"; out << spaces << "mean " << 0 << "\n"; out << spaces << "stddev " << 0 << "\n"; out << spaces << "variance " << 0 << "\n"; out << spaces << "conf95 " << 0 << "\n"; out << spaces << "total " << total() << "\n"; out << spaces << "total_sq " << total_sq() << "\n"; } }
double Stats::mean() const { DEBUG_SINVARIANT(checkInvariants()); if (number == 0) { return 0.0; } else { return double(sum)/double(number); } };
void Stats::reset() { DEBUG_SINVARIANT(checkInvariants()); StatsBase::reset(); number = 0; sum = 0.0; sumsq = 0.0; min_value = Double::Inf; max_value = -Double::Inf; }
std::string Stats::debugString() const { DEBUG_SINVARIANT(checkInvariants()); if (count() == 0) { return "count 0"; } else { return str(boost::format("count %d mean %G stddev %G var %G 95%%conf %G rel95%%conf %G" " min %G max %G") % count() % mean() % stddev() % variance() % conf95() % relconf95() % min() % max()); } };
void clear() { if (!empty()) { size_t max = q_back > q_front ? q_back : q_size; for(size_t i = q_front; i < max; ++i) { allocator.destroy(deque + i); } if (q_back < q_front) { for(size_t i = 0; i < q_back; ++i) { allocator.destroy(deque + i); } } } q_front = q_back = 0; DEBUG_SINVARIANT(empty()); }
void Stats::printRome(int depth, std::ostream &out) const { DEBUG_SINVARIANT(checkInvariants()); std::string spaces; for(int i = 0; i < depth; i++) { spaces += " "; } out << spaces << "{ count " << countll() << " }\n"; if (count() > 0) { out << spaces << "{ min " << min() << " }\n"; out << spaces << "{ max " << max() << " }\n"; out << spaces << "{ mean " << mean() << " }\n"; out << spaces << "{ stddev " << stddev() << " }\n"; out << spaces << "{ variance " << variance() << " }\n"; out << spaces << "{ conf95 " << conf95() << " }\n"; out << spaces << "{ total " << total() << " }\n"; out << spaces << "{ total_sq " << total_sq() << " }\n"; } }
/// compares two iterators for inequality bool operator!=(const iterator &y) const { DEBUG_SINVARIANT(mydeque == y.mydeque); return cur_pos != y.cur_pos; }
/// subtracts diff to the iterator and returns the reference iterator &operator-=(ptrdiff_t diff) { DEBUG_SINVARIANT(inRangeMinus(diff)); cur_pos = static_cast<size_t>(cur_pos + mydeque->q_size - diff) % mydeque->q_size; return *this; }
double StatsBase::relconf95() const { DEBUG_SINVARIANT(checkInvariants()); return conf95()/mean(); }
void StatsBase::reset() { DEBUG_SINVARIANT(checkInvariants()); reset_count++; }
StatsBase::~StatsBase() { DEBUG_SINVARIANT(checkInvariants()); is_assigned = false; };
/** Returns the value of the field in the @c ExtentSeries' current record. Preconditions: - The name of the Field must have been set and the @c ExtentSeries must have a current record. */ const byte *val() const { DEBUG_SINVARIANT(dataseries.hasExtent()); return val(dataseries.getExtentRef(), rowPos()); }
/** Sets the value of the field in the @c ExtentSeries' current record. Note that @param val must be the correct size (or NULL) Preconditions: - The name of the Field must have been set and the associated @c ExtentSeries must have a current record. @param val source value for the copy @param val_size size of the value */ void set(const void *val, uint32_t val_size = 0) { DEBUG_SINVARIANT(dataseries.hasExtent()); set(dataseries.getExtentRef(), rowPos(), val, val_size); }
double Stats::variance() const { DEBUG_SINVARIANT(checkInvariants()); if (number == 0) return 0.0; double m = mean(); return double(sumsq)/double(number) - m*m; }
/// subtracts diff and returns the iterator iterator operator-(ptrdiff_t diff) { DEBUG_SINVARIANT(inRangeMinus(diff)); return iterator(mydeque, static_cast<size_t>(cur_pos + mydeque->q_size - diff) % mydeque->q_size); }
/// advance iterator forward (or backward if negative) by amount void advance(int32_t amount, const Extent *within_extent) { DEBUG_SINVARIANT(extent == within_extent); row_offset += amount * within_extent->getTypePtr()->fixedrecordsize(); // allow <= so that we can have a SEP_RowOffset pointing to the end of an exent. DEBUG_SINVARIANT(row_offset <= extent->fixeddata.size()); }
/// distance from a to b in rows; will be negative if a is after b. static difference_type distance(const SEP_RowOffset &a, const SEP_RowOffset &b, const Extent *extent) { DEBUG_SINVARIANT(a.extent == b.extent && b.extent == extent); return (b.row_offset - a.row_offset) / extent->getTypePtr()->fixedrecordsize(); }
bool operator >=(const SEP_RowOffset &them) { DEBUG_SINVARIANT(extent == them.extent); return row_offset >= them.row_offset; }