void interval_set_distinct_4_bicremental_continuous_types() { typedef IntervalSet<T> IntervalSetT; typedef typename IntervalSetT::interval_type IntervalT; typedef typename IntervalSet<T>::size_type size_T; typedef typename IntervalSet<T>::difference_type diff_T; T v1 = make<T>(1); T v3 = make<T>(3); T v5 = make<T>(5); size_T s3 = make<size_T>(3); diff_T d0 = make<diff_T>(0); diff_T d2 = make<diff_T>(2); IntervalSet<T> is_1_3_5; is_1_3_5.add(v1).add(v3).add(v5); BOOST_CHECK_EQUAL( cardinality(is_1_3_5), s3 ); BOOST_CHECK_EQUAL( is_1_3_5.size(), s3 ); BOOST_CHECK_EQUAL( icl::length(is_1_3_5), d0 ); BOOST_CHECK_EQUAL( interval_count(is_1_3_5), 3 ); BOOST_CHECK_EQUAL( is_1_3_5.iterative_size(), 3 ); BOOST_CHECK_EQUAL( iterative_size(is_1_3_5), 3 ); IntervalSet<T> is_123_5; is_123_5 = is_1_3_5; is_123_5 += IntervalT::open(v1,v3); BOOST_CHECK_EQUAL( cardinality(is_123_5), icl::infinity<size_T>::value() ); BOOST_CHECK_EQUAL( is_123_5.size(), icl::infinity<size_T>::value() ); BOOST_CHECK_EQUAL( icl::length(is_123_5), d2 ); }
/// Run the Pipeline skeleton. int run(bool skip_init=false) { int nstages=static_cast<int>(nodes_list.size()); if (!skip_init) { // set the initial value for the barrier if (!barrier) barrier = new BARRIER_T; barrier->barrierSetup(cardinality(barrier)); } if (!prepared) if (prepare()<0) return -1; if (has_input_channel) { /* freeze_and_run is required because in the pipeline * where there are not any manager threads, * which allow to freeze other threads before starting the * computation */ for(int i=0;i<nstages;++i) { nodes_list[i]->set_id(i); if (nodes_list[i]->freeze_and_run(true)<0) { error("ERROR: PIPE, running stage %d\n", i); return -1; } } } else { for(int i=0;i<nstages;++i) { nodes_list[i]->set_id(i); if (nodes_list[i]->run(true)<0) { error("ERROR: PIPE, running stage %d\n", i); return -1; } } } return 0; }
// == operator (EQUALITY) bool set::operator==(const set &s) // Postcondition: checks to see if // set 's' equals the current set. { // self comparison if(this == &s) return true; if(s.cardinality() != cardinality()) return false; set tmp = *this; tmp.insertAll(s); if(tmp.cardinality() == cardinality()) return true; return false; }
Generator(std::string const& input) : _input(input), _dashes(region_map(_input.begin(), _input.end())), _rng(std::random_device {}()), _select (0, _dashes.iterative_size() - 1), _randpos(0, _input.size() - 1), _is_degenerate(cardinality(_dashes) == _input.size()) { }
Datum hll_sum_fin(PG_FUNCTION_ARGS) { int64 result = 0; dmerge_state *state; if (!PG_ARGISNULL(0)) { state = (dmerge_state *) PG_GETARG_POINTER(0); result = cardinality(1 << state->state[0], state->state + 2, 1); } PG_RETURN_INT64(result); }
// - operator (COMPLIMENT) set& set::operator-=(const set &s) // Postcondition: returns the // compliment of active set - 's' { if(this == &s || cardinality() < 1 || s.cardinality() < 1) return *this; // Remove all intersecting parts removeAll(*this*s); return *this; }
// returns the basis as a vector of commutator words VectorOf<PolyWord> MalcevSet::getPolyWords() const { VectorOf<PolyWord> res( cardinality() ); int cnt = 0; const BasicCommutators& BC = theCollector.commutators(); for(int key = 1; key <= BC.theHirschNumber(); key++) { if( theSet.bound( Generator(key) ) ) { PolyWord pw = theSet.valueOf( Generator(key) ); res[cnt++] = pw; } } return res; }
void interval_set_ctor_4_bicremental_types() { typedef IntervalSet<T> IntervalSetT; typedef typename IntervalSetT::interval_type IntervalT; T v4 = make<T>(4); IntervalT I4_4I(v4); IntervalSet<T> _I4_4I; BOOST_CHECK_EQUAL( _I4_4I.empty(), true ); IntervalSet<T> _I4_4I_1; IntervalSet<T> _I4_4I_2; IntervalSet<T> _I4_4I_3; _I4_4I += v4; _I4_4I_1 += I4_4I; BOOST_CHECK_EQUAL( _I4_4I, _I4_4I_1 ); _I4_4I_2.add(v4); BOOST_CHECK_EQUAL( _I4_4I, _I4_4I_2 ); _I4_4I_3.add(I4_4I); BOOST_CHECK_EQUAL( _I4_4I, _I4_4I_3 ); _I4_4I_1.add(v4).add(I4_4I); BOOST_CHECK_EQUAL( _I4_4I, _I4_4I_1 ); _I4_4I_1.insert(v4).insert(I4_4I); BOOST_CHECK_EQUAL( _I4_4I, _I4_4I_1 ); (_I4_4I_1 += v4) += I4_4I; BOOST_CHECK_EQUAL( _I4_4I, _I4_4I_1 ); BOOST_CHECK_EQUAL( cardinality(_I4_4I), unit_element<typename IntervalSet<T>::size_type>::value() ); BOOST_CHECK_EQUAL( _I4_4I.size(), unit_element<typename IntervalSet<T>::size_type>::value() ); BOOST_CHECK_EQUAL( interval_count(_I4_4I), 1 ); BOOST_CHECK_EQUAL( _I4_4I.iterative_size(), 1 ); BOOST_CHECK_EQUAL( iterative_size(_I4_4I), 1 ); BOOST_CHECK_EQUAL( hull(_I4_4I).lower(), v4 ); BOOST_CHECK_EQUAL( hull(_I4_4I).upper(), v4 ); IntervalSet<T> _I4_4I_copy(_I4_4I); IntervalSet<T> _I4_4I_assigned; _I4_4I_assigned = _I4_4I; BOOST_CHECK_EQUAL( _I4_4I, _I4_4I_copy ); BOOST_CHECK_EQUAL( _I4_4I, _I4_4I_assigned ); _I4_4I_assigned.clear(); BOOST_CHECK_EQUAL( true, _I4_4I_assigned.empty() ); _I4_4I_assigned.swap(_I4_4I_copy); BOOST_CHECK_EQUAL( true, _I4_4I_copy.empty() ); BOOST_CHECK_EQUAL( _I4_4I, _I4_4I_assigned ); }
// Postcondition: Determines if the array 'a' is bijective // to the active set. This function also tests for // equality of the array 'a' and the active set. bool set::bijectiveTo(const T a[], size_t N) { // Make sure the number of elements in each matches. bool result = cardinality() == N? true : false; // if we have a match, then search for all the // elements in the array 'a' in the active set. if(result) for(size_t i=0; i<N && result; ++i) result = search(a[i]); // if all the elements of array 'a' are found in // the active set, then search through all the // elements of the active set and see if they // all match with the elements in the array 'a'. if(result) matchALL(a, N); return result; }
// - operator (COMPLIMENT) set set::operator-(const set &s) // Postcondition: returns the // compliment of active set - 's' { set c; if(cardinality() < 1 || this == &s) return c; c = *this; // self assignment if(s.cardinality() < 1) return c; // Remove all intersecting parts c.removeAll(c*s); return c; }
void interval_set_isolate_4_bicremental_continuous_types() { typedef IntervalSet<T> IntervalSetT; typedef typename IntervalSetT::interval_type IntervalT; typedef typename IntervalSet<T>::size_type size_T; typedef typename IntervalSet<T>::difference_type diff_T; T v0 = make<T>(0); T v2 = make<T>(2); T v4 = make<T>(4); IntervalT I0_4I = IntervalT::closed(v0,v4); IntervalT C0_2D = IntervalT::open(v0,v2); IntervalT C2_4D = IntervalT::open(v2,v4); // {[0 4]} // - { (0,2) (2,4) } // = {[0] [2] [4]} IntervalSet<T> iso_set = IntervalSet<T>(I0_4I); IntervalSet<T> gap_set; gap_set.add(C0_2D).add(C2_4D); BOOST_CHECK_EQUAL( true, true ); iso_set -= gap_set; BOOST_CHECK_EQUAL( cardinality(iso_set), static_cast<size_T>(3) ); BOOST_CHECK_EQUAL( iso_set.iterative_size(), static_cast<std::size_t>(3) ); BOOST_CHECK_EQUAL( iterative_size(iso_set), static_cast<std::size_t>(3) ); IntervalSet<T> iso_set2; iso_set2.add(I0_4I); iso_set2.subtract(C0_2D).subtract(C2_4D); IntervalSet<T> iso_set3(I0_4I); (iso_set3 -= C0_2D) -= C2_4D; IntervalSet<T> iso_set4; iso_set4.insert(I0_4I); iso_set4.erase(C0_2D).erase(C2_4D); BOOST_CHECK_EQUAL( iso_set, iso_set2 ); BOOST_CHECK_EQUAL( iso_set, iso_set3 ); BOOST_CHECK_EQUAL( iso_set, iso_set4 ); }
void QueryCostInfo :: translateToExternalFormat(SQL_QUERY_COST_INFO *query_cost_info) { query_cost_info->cpuTime = cpuTime(); query_cost_info->ioTime = ioTime(); query_cost_info->msgTime = msgTime(); query_cost_info->idleTime = idleTime(); query_cost_info->totalTime = totalTime(); query_cost_info->cardinality = cardinality(); query_cost_info->estimatedTotalMem = totalMem(); query_cost_info->resourceUsage = resourceUsage(); query_cost_info->maxCpuUsage = maxCpuUsage(); }
void interval_set_distinct_4_bicremental_types() { typedef IntervalSet<T> IntervalSetT; typedef typename IntervalSetT::interval_type IntervalT; typedef typename IntervalSet<T>::size_type size_T; typedef typename IntervalSet<T>::difference_type diff_T; T v1 = make<T>(1); T v3 = make<T>(3); T v5 = make<T>(5); size_T s3 = make<size_T>(3); IntervalSet<T> is_1_3_5; is_1_3_5.add(v1).add(v3).add(v5); BOOST_CHECK_EQUAL( cardinality(is_1_3_5), s3 ); BOOST_CHECK_EQUAL( is_1_3_5.size(), s3 ); BOOST_CHECK_EQUAL( interval_count(is_1_3_5), 3 ); BOOST_CHECK_EQUAL( iterative_size(is_1_3_5), 3 ); BOOST_CHECK_EQUAL( is_1_3_5.iterative_size(), 3 ); }
static value_type max () { return cardinality(); }
static period_type period () { return cardinality(); }
bool Set::operator<(const Set& b) const { if( (*this <= b) && (cardinality() < b.cardinality()) ) { return true; } return false; }
bool operator()(const derivation_type* x, const derivation_type* y) const { return (x->score < y->score) || (!(y->score < x->score) && (cardinality(x->j) > cardinality(y->j))); }
Datum hll_count(PG_FUNCTION_ARGS) { bytea *arg = PG_GETARG_BYTEA_P(0); uint32_t *data = (uint32_t *) VARDATA(arg); int64 result = cardinality(1 << data[0], data + 2, 1); PG_RETURN_INT64(result); }
void interval_set_fundamentals_4_ordered_types() { typedef IntervalSet<T> IntervalSetT; typedef typename IntervalSetT::interval_type IntervalT; typedef typename IntervalSet<T>::size_type size_T; typedef typename IntervalSet<T>::difference_type diff_T; // ordered types is the largest set of instance types. // Because we can not generate values via incrementation for e.g. string, // we are able to test operations only for the most basic values // identity_element (0, empty, T() ...) and unit_element. T v0 = boost::icl::identity_element<T>::value(); T v1 = unit_element<T>::value(); IntervalT I0_0I(v0); IntervalT I1_1I(v1); IntervalT I0_1I(v0, v1, interval_bounds::closed()); //------------------------------------------------------------------------- //empty set //------------------------------------------------------------------------- BOOST_CHECK_EQUAL(IntervalSet<T>().empty(), true); BOOST_CHECK_EQUAL(icl::is_empty(IntervalSet<T>()), true); BOOST_CHECK_EQUAL(cardinality(IntervalSet<T>()), boost::icl::identity_element<size_T>::value()); BOOST_CHECK_EQUAL(IntervalSet<T>().size(), boost::icl::identity_element<size_T>::value()); BOOST_CHECK_EQUAL(interval_count(IntervalSet<T>()), 0); BOOST_CHECK_EQUAL(IntervalSet<T>().iterative_size(), 0); BOOST_CHECK_EQUAL(iterative_size(IntervalSet<T>()), 0); BOOST_CHECK_EQUAL(IntervalSet<T>(), IntervalSet<T>()); IntervalT mt_interval = boost::icl::identity_element<IntervalT>::value(); BOOST_CHECK_EQUAL(mt_interval, IntervalT()); IntervalSet<T> mt_set = boost::icl::identity_element<IntervalSet<T> >::value(); BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); //adding emptieness to emptieness yields emptieness ;) mt_set.add(mt_interval).add(mt_interval); BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); mt_set.insert(mt_interval).insert(mt_interval); BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); (mt_set += mt_interval) += mt_interval; BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); BOOST_CHECK_EQUAL(hull(mt_set), boost::icl::identity_element<IntervalT >::value()); //subtracting emptieness mt_set.subtract(mt_interval).subtract(mt_interval); BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); mt_set.erase(mt_interval).erase(mt_interval); BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); (mt_set -= mt_interval) -= mt_interval; BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); //subtracting elements form emptieness mt_set.subtract(v0).subtract(v1); BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); mt_set.erase(v0).erase(v1); BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); (mt_set -= v1) -= v0; BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); //subtracting intervals form emptieness mt_set.subtract(I0_1I).subtract(I1_1I); BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); mt_set.erase(I0_1I).erase(I1_1I); BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); (mt_set -= I1_1I) -= I0_1I; BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); //insecting emptieness //mt_set.insect(mt_interval).insect(mt_interval); //BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); (mt_set &= mt_interval) &= mt_interval; BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); //insecting emptieness with elements (mt_set &= v1) &= v0; BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); //insecting emptieness with intervals (mt_set &= I1_1I) &= I0_1I; BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>()); //------------------------------------------------------------------------- //unary set //------------------------------------------------------------------------- IntervalSet<T> single_I0_0I_from_element(v0); IntervalSet<T> single_I0_0I_from_interval(I0_0I); IntervalSet<T> single_I0_0I(single_I0_0I_from_interval); BOOST_CHECK_EQUAL(single_I0_0I_from_element, single_I0_0I_from_interval); BOOST_CHECK_EQUAL(single_I0_0I_from_element, single_I0_0I); BOOST_CHECK_EQUAL(icl::hull(single_I0_0I).lower(), I0_0I.lower()); BOOST_CHECK_EQUAL(icl::hull(single_I0_0I).upper(), I0_0I.upper()); IntervalSet<T> single_I1_1I_from_element(v1); IntervalSet<T> single_I1_1I_from_interval(I1_1I); IntervalSet<T> single_I1_1I(single_I1_1I_from_interval); BOOST_CHECK_EQUAL(single_I1_1I_from_element, single_I1_1I_from_interval); BOOST_CHECK_EQUAL(single_I1_1I_from_element, single_I1_1I); IntervalSet<T> single_I0_1I_from_interval(I0_1I); IntervalSet<T> single_I0_1I(single_I0_1I_from_interval); BOOST_CHECK_EQUAL(single_I0_1I_from_interval, single_I0_1I); BOOST_CHECK_EQUAL(hull(single_I0_1I), I0_1I); BOOST_CHECK_EQUAL(hull(single_I0_1I).lower(), I0_1I.lower()); BOOST_CHECK_EQUAL(hull(single_I0_1I).upper(), I0_1I.upper()); //contains predicate BOOST_CHECK_EQUAL(icl::contains(single_I0_0I, v0), true); BOOST_CHECK_EQUAL(icl::contains(single_I0_0I, I0_0I), true); BOOST_CHECK_EQUAL(icl::contains(single_I1_1I, v1), true); BOOST_CHECK_EQUAL(icl::contains(single_I1_1I, I1_1I), true); BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, v0), true); BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, I0_1I), true); BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, v1), true); BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, I1_1I), true); BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, single_I0_0I), true); BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, single_I1_1I), true); BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, single_I0_1I), true); BOOST_CHECK_EQUAL(cardinality(single_I0_0I), unit_element<size_T>::value()); BOOST_CHECK_EQUAL(single_I0_0I.size(), unit_element<size_T>::value()); BOOST_CHECK_EQUAL(interval_count(single_I0_0I), 1); BOOST_CHECK_EQUAL(single_I0_0I.iterative_size(), 1); BOOST_CHECK_EQUAL(iterative_size(single_I0_0I), 1); }
//Alla tal i this måste finnas i b och b måste vara större //för att få ett proper subset bool Set:: operator<(const Set& b) const { if (*this <= b && b.cardinality() > cardinality()) return true; return false; }
//////////////////////////////////////////////////////////////////////////////// // parse - parses a declaration (s.a. TIntermediateRenderer::declare() for syntax // of type) // // Parameters: // name - optional name of the declaration (case sensitive) // type - type description of the declaration // isDef - true if the declarition a default declaration of the renderer // // Returns: // false - type could not be parsed // bool TParameterDeclaration::parse(const char *name, const char *type, bool isDef) { m_isDefault = isDef; m_name = name ? name : ""; m_fullname = type ? type : m_name.c_str(); m_isInline = false; if ( type ) { size_t pos = 0; while ( *type && isspace(*type) ) ++type; m_class = classNum(type, pos); if ( m_class ) type += pos; else m_class = CLASS_UNIFORM; pos = 0; while ( *type && isspace(*type) ) ++type; m_type = typeNum(type, pos); if ( m_type ) type += pos; /* else // if there is no default type (== 0) return false; */ pos = 0; while ( *type && isspace(*type) ) ++type; m_cardinality = cardinality(type, pos); if ( m_cardinality >= 1 ) type += pos; else m_cardinality = 1; pos = 0; while ( *type && isspace(*type) ) ++type; if ( !name || !*name ) { // RI_TOKEN name while ( *type && !isspace(*type) ) { if ( !m_isInline ) { m_name = ""; m_isInline = true; } m_name += *type; ++type; } if ( !m_name.length() ) return false; } // now only white space should follow while ( *type ) { if ( !isspace(*type) ) return false; ++type; } } else { m_class = 0; m_type = 0; m_cardinality = 0; } buildFullname(); return true; }
// we will use greater, so that simple sort will yield estimated score order... bool operator()(const candidate_type* x, const candidate_type* y) const { return (x->score > y->score) || (!(y->score > x->score) && (cardinality(x->j) < cardinality(y->j))); }
/** * Convert a string containing some number of fields into binary form, * possibly inferring the type of data in the process. * * Strings are mapped in the order they first appear in the input (so the * first string is ALWAYS mapped to 0, etc). * * ASSUMPTIONS: * 1. <line> is NUL-terminated and free of NL and CR characters. */ int feature_encode( char * const line, struct feature *f, struct mtm_descriptor *d ) { int stat_class = MTM_STATCLASS_UNKNOWN; bool infer_field_type = false; int ft, field_type = MTM_FIELD_TYPE_UNK; const char *token; char *pc = line; bool eol = false; unsigned int field_count = 0; int missing_value_count = 0; union { mtm_fp_t f; mtm_int_t i; } last_value_read; memset( d, 0, sizeof(struct mtm_descriptor) ); assert( szs_count( f->category_labels ) == 0 ); if( f->expect_row_labels ) { // Scan past the row identifier. while( *pc && *pc != CHAR_FIELD_SEP ) pc++; // ...and make sure there was more than just a row id on the line! if( CHAR_FIELD_SEP != *pc ) { // A non-empty line with exactly one field is bad format. return MTM_E_FORMAT_MATRIX; } f->label_length = pc - line; *pc++ = 0; if( f->interpret_prefix ) { stat_class = f->interpret_prefix( line ); field_type = field_type_from_stat_class( stat_class ); infer_field_type = (__builtin_popcount( field_type ) != 1 ); // ...either MTM_FIELD_TYPE_UNK or multiple bits. } } while( field_count < f->length && ! eol ) { char *endpt = ""; // ESSENTIAL initialization. token = pc; while( *pc && *pc != CHAR_FIELD_SEP ) pc++; if( token == pc /* we didn't move */ ) { ++missing_value_count; f->buf.cat[ field_count++ ] = NAN_AS_UINT; if( *pc++ ) continue; else break; // ...because we're already ON eol. } // We have a non-empty token. // Before NUL-terminating see whether it's also the end-of-line. if( *pc == 0 ) eol = true; // We HAVE last field now; preclude loop reentry! else *pc++ = 0; // The "missing data" marker is common to all rows of the matrix // so its detection can precede (and preclude) type-dependent ops. if( toktype_is_na_marker(token) ) { ++missing_value_count; f->buf.cat[ field_count++ ] = NAN_AS_UINT; continue; } // For every line, we will -not- know the field_type at this point // for at most one non-missing token. In other words, almost every // time we reach this point field_type is known. Use of the goto thus // obviates a constitutive conditional...justifying an ugly goto. retry: switch( field_type ) { case MTM_FIELD_TYPE_FLT: f->buf.num[ field_count++ ] = last_value_read.f = strtof( token, &endpt ); break; case MTM_FIELD_TYPE_INT: f->buf.cat[ field_count++ ] = last_value_read.i = strtol( token, &endpt, 0 ); if( ! ( last_value_read.i < NAN_AS_UINT ) ) { return MTM_E_LIMITS; } break; case MTM_FIELD_TYPE_STR: if( szs_insert( f->category_labels, token, &(last_value_read.i) ) < 0 ) errx( -1, _BUG, __FILE__, __LINE__ ); // Because I'm sizing the hashtable to accomodate a cardinality // equal to the sample count, szs_insert cannot fail, or rather // if it fails something else is badly broken. f->buf.cat[ field_count++ ] = last_value_read.i; break; default: // ...because field_type is either _UNKNOWN or set valued. // This insures that boolean data represented by {0,1} is parsed // as integral data, not strings, and thus has its implicit // ordering preserved. This was both original code and a bugfix. ft = toktype_infer_narrowest_type( token, NULL ); assert( __builtin_popcount(ft)==1 /* else infinite loop! */ ); // If the type was constrained at all (not simply "unknown"), // then the inferred type MUST be one of the allowed types... if( field_type!=MTM_FIELD_TYPE_UNK && (field_type & ft)==0 ) { return MTM_E_FORMAT_FIELD; // It's not an allowed type! } else field_type = ft; goto retry; // yes, goto! See comments above.. } assert( __builtin_popcount( field_type ) == 1 ); /** * REVISE INFERENCE if necessary: * * If endpt wasn't advanced to the end-of-string the token * does not contain the type it was believed to contain. * This is either because of an overt error or because the stat * class didn't fully constrain token type and the inference from * preceding fields was too narrow. Specifically either: * 1) a floating point line had an integral first value * 2) a string line had a numeric (integral or float) first value * As long as type wasn't completely determined by stat class--that * is, as long as the field_type was inferred--we can revise our * inference in these cases... */ if( *endpt /* token wasn't entirely consumed */ ) { // If the type was dictated rather than inferred we've // encountered an error; revision is only possible on // inferred types. if( infer_field_type ) { if( (field_type == MTM_FIELD_TYPE_INT) && (MTM_FIELD_TYPE_FLT == toktype_infer_narrowest_type( token, NULL ) ) ) { #ifdef _DEBUG fputs( "promoting integral line to float\n", stderr ); #endif field_type = MTM_FIELD_TYPE_FLT; --field_count; // Convert all earlier values to float... for(int i = 0; i < field_count; i++ ) { if( NAN_AS_UINT != f->buf.cat[ i ] ) { f->buf.num[i] = (float)f->buf.cat[i]; // in place! } } // ...and reparse current token. f->buf.num[ field_count++ ] = strtof( token, &endpt ); } else if( (field_type != MTM_FIELD_TYPE_STR) && (MTM_FIELD_TYPE_STR == toktype_infer_narrowest_type( token, NULL ) ) ) { #ifdef _DEBUG fputs( "revising non-string line to string\n", stderr ); #endif assert( szs_count( f->category_labels ) == 0 ); field_type = MTM_FIELD_TYPE_STR; // Reparse line up to and including current token. // Above inference of eol still holds, and, importantly, // we can count on NUL-termination of ALL relevant // tokens. pc = line; field_count = 0; do { if( szs_insert( f->category_labels, pc, &(last_value_read.i) ) < 0 ) errx( -1, _BUG, __FILE__, __LINE__ ); // See comment above re: szs_insert. f->buf.cat[ field_count++ ] = last_value_read.i; if( pc == token) break; pc += strlen(pc)+1; } while( true ); pc += strlen(pc)+1; endpt = ""; // to remove the error condition. } } if( *endpt ) // either because the line wasn't a candidate for // revision or because it was revised, but the current // token -still- wasn't entirely consumed. return MTM_E_FORMAT_FIELD; } } if( field_count != f->length ) { return MTM_E_FORMAT_MATRIX; } //////////////////////////////////////////////////////////////////////// // Now fill out the descriptor //////////////////////////////////////////////////////////////////////// d->missing = missing_value_count; // Check for the degeneracy. There is really only one kind of // degeneracy, constancy, which can manifest in three different ways: // 1. All values are missing, i.e. feature is entirely "NA". // 2. Exactly one value is not-missing; obviously one value is constant. // 3. Two or more values are non-missing, but they are all equal. // The first two are trivial to check for; the 3rd is more expensive... if( field_count - missing_value_count < 2 ) d->constant = 1; // ...so we only check for the 3rd if necessary (below). switch( field_type ) { case MTM_FIELD_TYPE_FLT: if( ! d->constant && cardinality( f->buf.cat, field_count, 2, NAN_AS_UINT ) < 2 ) { d->constant = 1; } break; case MTM_FIELD_TYPE_STR: d->integral = 1; d->categorical = 1; d->cardinality = szs_count( f->category_labels ); if( d->cardinality < 2 ) d->constant = 1; szs_clear( f->category_labels ); break; case MTM_FIELD_TYPE_INT: // TODO: Note that ordinal is not properly dealt with yet. // If cardinality is low-enough integer data is assumed // categorical. d->integral = 1; if( ! d->constant ) { d->cardinality = cardinality( f->buf.cat, field_count, f->max_cardinality, NAN_AS_UINT ); d->categorical = d->cardinality <= f->max_cardinality ? 1 : 0; } break; case MTM_FIELD_TYPE_UNK: default: if( missing_value_count < field_count ) errx( -1, _BUG, __FILE__, __LINE__ ); // ...but if whole line was empty we may, indeed, not know the type! } if( MTM_STATCLASS_BOOLEAN == stat_class ) { if( d->cardinality > 2 ) return MTM_E_CARDINALITY; } return MTM_OK; }
/** vertex file: see SINGLE format. * distribution file: see above. */ void BayesGraphSave::operator()( const Graph& graph, const std::string vertexFileName, const std::string distFileName ) const { std::ofstream distFile(distFileName.c_str()), vertexFile(vertexFileName.c_str()); vertex_iterator vi, vi_end; Label2Index label2Index; vertexFile << ID << GRAPH_SEPARATOR << LATENT << GRAPH_SEPARATOR << LEVEL << GRAPH_SEPARATOR << CARDINALITY << GRAPH_SEPARATOR << "label" << "\n"; // writes header BOOST_LOG_TRIVIAL(trace) << "saving vertices...\n"; for ( boost::tie(vi, vi_end) = boost::vertices(graph); vi != vi_end; ++vi ) { int vertex = *vi; vertexFile << graph[vertex].index << GRAPH_SEPARATOR << !(graph[vertex].is_leaf()) << GRAPH_SEPARATOR << graph[vertex].level << GRAPH_SEPARATOR << graph[vertex].variable.cardinality() << GRAPH_SEPARATOR << graph[vertex].getLabel() << std::endl; label2Index[graph[vertex].getLabel()] = graph[vertex].index; } vertexFile.close(); BOOST_LOG_TRIVIAL(trace) << "saving joint distribution...\n"; for ( boost::tie(vi, vi_end) = boost::vertices(graph); vi != vi_end; ++vi ) { const Node& node = graph[*vi]; if ( !node.is_leaf() ) { auto latentVar = node.variable; // plJointDistribution distribution = node.jointDistribution; // plVariablesConjunction all_variables = distribution.get_variables(); // all variables (latent variable and its children) plVariablesConjunction childVars = node.get_children_variables(); // child childVars // for (size_t i = 1; i < all_variables.size(); ++i) // childVars ^= all_variables[i]; // initializes child conjunction. // plSymbol latentVar = all_variables[0]; // latent variable distFile << node.index << GRAPH_SEPARATOR << childVars.size() << std::endl; // plComputableObjectList objLists = distribution.get_computable_object_list(); // plComputableObject probTableZ = objLists.get_distribution_over(latentVar); // distribution table for the latent variable auto probTableZ = node.marginalDist; int val; for ( val = 0; val < latentVar.cardinality() - 1 ; ++val ) { distFile << std::fixed << std::setprecision(30) << probTableZ->compute( plValues().add(latentVar, val) ) << GRAPH_SEPARATOR; // P(latentVar = val) } distFile << std::fixed << std::setprecision(15) << probTableZ->compute( plValues().add(latentVar, val) ) << std::endl; // writes last probability value for ( size_t i = 0; i < childVars.size(); ++i ) { plSymbol varX = childVars[ i ]; // retrieves the child variable distFile << label2Index[varX.name()] << std::endl; // writes child variable's id. auto distTableXZ = node.cndChildrenDists.at(i); //objLists.get_distribution_over(varX); // conditional distribution P(X_i | Z) // plDistributionTable& distTableXZ = // static_cast<plDistributionTable&>( compTableXZ ); // casting P(X_i | Z) to derived class for ( val = 0; val < latentVar.cardinality(); ++val ) { int childVal; for ( childVal = 0; childVal < varX.cardinality() - 1; ++childVal ) { // for each value x of the child variable distFile << std::fixed << std::setprecision(15) << distTableXZ->compute( plValues().add(latentVar, val).add(varX, childVal) ) << GRAPH_SEPARATOR; // p(X_i = childVal | Z = val) } distFile << std::fixed << std::setprecision(15) << distTableXZ->compute( plValues().add(latentVar, val).add(varX, childVal) ) << std::endl; } } distFile << std::endl; // breaks the line, moves to the next latent variable. } } distFile.close(); }