void interval_set_distinct_4_bicremental_continuous_types()
{
    typedef IntervalSet<T> IntervalSetT;
    typedef typename IntervalSetT::interval_type IntervalT;
    typedef typename IntervalSet<T>::size_type       size_T;
    typedef typename IntervalSet<T>::difference_type diff_T;
    T v1 = make<T>(1);
    T v3 = make<T>(3);
    T v5 = make<T>(5);

    size_T s3 = make<size_T>(3);
    diff_T d0 = make<diff_T>(0);
    diff_T d2 = make<diff_T>(2);

    IntervalSet<T> is_1_3_5;
    is_1_3_5.add(v1).add(v3).add(v5);

    BOOST_CHECK_EQUAL( cardinality(is_1_3_5),      s3 );
    BOOST_CHECK_EQUAL( is_1_3_5.size(),             s3 );
    BOOST_CHECK_EQUAL( icl::length(is_1_3_5),       d0 );
    BOOST_CHECK_EQUAL( interval_count(is_1_3_5),   3 );
    BOOST_CHECK_EQUAL( is_1_3_5.iterative_size(),   3 );
    BOOST_CHECK_EQUAL( iterative_size(is_1_3_5),   3 );

    

    IntervalSet<T> is_123_5;
    is_123_5 = is_1_3_5;
    is_123_5 += IntervalT::open(v1,v3);

    BOOST_CHECK_EQUAL( cardinality(is_123_5),      icl::infinity<size_T>::value() );
    BOOST_CHECK_EQUAL( is_123_5.size(),             icl::infinity<size_T>::value() );
    BOOST_CHECK_EQUAL( icl::length(is_123_5),           d2 );
}
Beispiel #2
0
    /// Run the Pipeline skeleton.
    int run(bool skip_init=false) {
        int nstages=static_cast<int>(nodes_list.size());

        if (!skip_init) {            
            // set the initial value for the barrier 
            if (!barrier)  barrier = new BARRIER_T;
            barrier->barrierSetup(cardinality(barrier));
        }
        if (!prepared) if (prepare()<0) return -1;

        if (has_input_channel) {
            /* freeze_and_run is required because in the pipeline 
             * where there are not any manager threads,
             * which allow to freeze other threads before starting the 
             * computation
             */
            for(int i=0;i<nstages;++i) {
                nodes_list[i]->set_id(i);
                if (nodes_list[i]->freeze_and_run(true)<0) {
                error("ERROR: PIPE, running stage %d\n", i);
                return -1;
                }
            }
        }  else {
            for(int i=0;i<nstages;++i) {
                nodes_list[i]->set_id(i);
                if (nodes_list[i]->run(true)<0) {
                    error("ERROR: PIPE, running stage %d\n", i);
                    return -1;
                }
            }
        }

        return 0;
    }
// == operator (EQUALITY)
bool set::operator==(const set &s)
// Postcondition: checks to see if
// set 's'  equals the current set.
{
	// self comparison
	if(this == &s)
		return true;

	if(s.cardinality() != cardinality())
		return false;

	set tmp = *this;
	tmp.insertAll(s);
	if(tmp.cardinality() == cardinality())
		return true;

	return false;
}
Beispiel #4
0
 Generator(std::string const& input) 
     : _input(input),
       _dashes(region_map(_input.begin(), _input.end())),
       _rng(std::random_device {}()),
       _select (0, _dashes.iterative_size() - 1),
       _randpos(0, _input.size() - 1),
       _is_degenerate(cardinality(_dashes) == _input.size())
 {
 }
Beispiel #5
0
Datum hll_sum_fin(PG_FUNCTION_ARGS) {
	int64 result = 0;
	dmerge_state *state;

	if (!PG_ARGISNULL(0)) {
		state = (dmerge_state *) PG_GETARG_POINTER(0);
		result = cardinality(1 << state->state[0], state->state + 2, 1);
	}

	PG_RETURN_INT64(result);
}
// - operator (COMPLIMENT)
set& set::operator-=(const set &s)
// Postcondition: returns the
// compliment of active set -  's'
{
	if(this == &s || cardinality() < 1 || s.cardinality() < 1)
		return *this;

	// Remove all intersecting parts
	removeAll(*this*s);

	return *this;
}
Beispiel #7
0
// returns the basis as a vector of commutator words
VectorOf<PolyWord> MalcevSet::getPolyWords() const {

  VectorOf<PolyWord> res( cardinality() );
  int cnt = 0;

  const BasicCommutators& BC = theCollector.commutators();

  for(int key = 1; key <= BC.theHirschNumber(); key++) {
    if( theSet.bound( Generator(key) ) ) {
      PolyWord pw = theSet.valueOf( Generator(key) );
      res[cnt++] = pw;
    }
  }
  return res;
}
void interval_set_ctor_4_bicremental_types()
{
    typedef IntervalSet<T> IntervalSetT;
    typedef typename IntervalSetT::interval_type IntervalT;

    T v4 = make<T>(4);
    IntervalT I4_4I(v4);

    IntervalSet<T> _I4_4I;
    BOOST_CHECK_EQUAL( _I4_4I.empty(), true );
    IntervalSet<T> _I4_4I_1;
    IntervalSet<T> _I4_4I_2;
    IntervalSet<T> _I4_4I_3;
    _I4_4I   += v4;
    _I4_4I_1 += I4_4I;
    BOOST_CHECK_EQUAL( _I4_4I,                    _I4_4I_1 );
    _I4_4I_2.add(v4);
    BOOST_CHECK_EQUAL( _I4_4I,                    _I4_4I_2 );
    _I4_4I_3.add(I4_4I);
    BOOST_CHECK_EQUAL( _I4_4I,                    _I4_4I_3 );
    _I4_4I_1.add(v4).add(I4_4I);
    BOOST_CHECK_EQUAL( _I4_4I,                    _I4_4I_1 );
    _I4_4I_1.insert(v4).insert(I4_4I);
    BOOST_CHECK_EQUAL( _I4_4I,                    _I4_4I_1 );
    (_I4_4I_1 += v4) += I4_4I;
    BOOST_CHECK_EQUAL( _I4_4I,                    _I4_4I_1 );
    
    BOOST_CHECK_EQUAL( cardinality(_I4_4I),      unit_element<typename IntervalSet<T>::size_type>::value()  );
    BOOST_CHECK_EQUAL( _I4_4I.size(),             unit_element<typename IntervalSet<T>::size_type>::value()  );
    BOOST_CHECK_EQUAL( interval_count(_I4_4I),   1  );
    BOOST_CHECK_EQUAL( _I4_4I.iterative_size(),   1  );
    BOOST_CHECK_EQUAL( iterative_size(_I4_4I),   1  );
    BOOST_CHECK_EQUAL( hull(_I4_4I).lower(),      v4 );
    BOOST_CHECK_EQUAL( hull(_I4_4I).upper(),      v4 );

    IntervalSet<T> _I4_4I_copy(_I4_4I);
    IntervalSet<T> _I4_4I_assigned;
    _I4_4I_assigned = _I4_4I;
    BOOST_CHECK_EQUAL( _I4_4I, _I4_4I_copy );
    BOOST_CHECK_EQUAL( _I4_4I, _I4_4I_assigned );
    _I4_4I_assigned.clear();
    BOOST_CHECK_EQUAL( true,   _I4_4I_assigned.empty() );

    _I4_4I_assigned.swap(_I4_4I_copy);
    BOOST_CHECK_EQUAL( true,   _I4_4I_copy.empty() );
    BOOST_CHECK_EQUAL( _I4_4I, _I4_4I_assigned );

}
// Postcondition: Determines if the array 'a' is bijective
// to the active set. This function also tests for
// equality of the array 'a' and the active set.
bool set::bijectiveTo(const T a[], size_t N) {
	// Make sure the number of elements in each matches.
	bool result = cardinality() == N? true : false;

	// if we have a match, then search for all the
	// elements in the array 'a' in the active set.
	if(result)
		for(size_t i=0; i<N && result; ++i)
			result = search(a[i]);

	// if all the elements of array 'a' are found in
	// the active set, then search through all the
	// elements of the active set and see if they
	// all match with the elements in the array 'a'.
	if(result)
		matchALL(a, N);

	return result;
}
// - operator (COMPLIMENT)
set set::operator-(const set &s)
// Postcondition: returns the
// compliment of active set -  's'
{
	set c;

	if(cardinality() < 1 || this == &s)
		return c;

	c = *this;
	// self assignment
	if(s.cardinality() < 1)
		return c;

	// Remove all intersecting parts
	c.removeAll(c*s);

	return c;
}
void interval_set_isolate_4_bicremental_continuous_types()
{
    typedef IntervalSet<T> IntervalSetT;
    typedef typename IntervalSetT::interval_type IntervalT;
    typedef typename IntervalSet<T>::size_type       size_T;
    typedef typename IntervalSet<T>::difference_type diff_T;

    T v0 = make<T>(0);
    T v2 = make<T>(2);
    T v4 = make<T>(4);
    IntervalT I0_4I = IntervalT::closed(v0,v4);
    IntervalT C0_2D = IntervalT::open(v0,v2);
    IntervalT C2_4D = IntervalT::open(v2,v4);
    //   {[0               4]}
    // - {   (0,2)   (2,4)   }
    // = {[0]     [2]     [4]}
    IntervalSet<T> iso_set = IntervalSet<T>(I0_4I);
    IntervalSet<T> gap_set;
    gap_set.add(C0_2D).add(C2_4D);
    BOOST_CHECK_EQUAL( true, true );
    iso_set -= gap_set;
    
    BOOST_CHECK_EQUAL( cardinality(iso_set), static_cast<size_T>(3) );
    BOOST_CHECK_EQUAL( iso_set.iterative_size(), static_cast<std::size_t>(3) );
    BOOST_CHECK_EQUAL( iterative_size(iso_set), static_cast<std::size_t>(3) );

    IntervalSet<T> iso_set2;
    iso_set2.add(I0_4I);
    iso_set2.subtract(C0_2D).subtract(C2_4D);
    
    IntervalSet<T> iso_set3(I0_4I);
    (iso_set3 -= C0_2D) -= C2_4D;

    IntervalSet<T> iso_set4;
    iso_set4.insert(I0_4I);
    iso_set4.erase(C0_2D).erase(C2_4D);
    
    BOOST_CHECK_EQUAL( iso_set, iso_set2 );
    BOOST_CHECK_EQUAL( iso_set, iso_set3 );
    BOOST_CHECK_EQUAL( iso_set, iso_set4 );
}
void QueryCostInfo :: translateToExternalFormat(SQL_QUERY_COST_INFO *query_cost_info)
{
  query_cost_info->cpuTime 
    = cpuTime();
  query_cost_info->ioTime 
    = ioTime();
  query_cost_info->msgTime 
    = msgTime();
  query_cost_info->idleTime 
    = idleTime();
  query_cost_info->totalTime 
    = totalTime();
  query_cost_info->cardinality 
    = cardinality();
  query_cost_info->estimatedTotalMem
    = totalMem();
  query_cost_info->resourceUsage
    = resourceUsage();
  query_cost_info->maxCpuUsage
    = maxCpuUsage();
}
void interval_set_distinct_4_bicremental_types()
{
    typedef IntervalSet<T> IntervalSetT;
    typedef typename IntervalSetT::interval_type IntervalT;
    typedef typename IntervalSet<T>::size_type       size_T;
    typedef typename IntervalSet<T>::difference_type diff_T;
    T v1 = make<T>(1);
    T v3 = make<T>(3);
    T v5 = make<T>(5);

    size_T s3 = make<size_T>(3);
    

    IntervalSet<T> is_1_3_5;
    is_1_3_5.add(v1).add(v3).add(v5);

    BOOST_CHECK_EQUAL( cardinality(is_1_3_5),       s3 );
    BOOST_CHECK_EQUAL( is_1_3_5.size(),             s3 );
    BOOST_CHECK_EQUAL( interval_count(is_1_3_5),   3 );
    BOOST_CHECK_EQUAL( iterative_size(is_1_3_5),   3 );
    BOOST_CHECK_EQUAL( is_1_3_5.iterative_size(),   3 );
}
Beispiel #14
0
 static value_type max ()
 {
     return cardinality();
 }
Beispiel #15
0
 static period_type period ()
 {
     return cardinality();
 }
bool Set::operator<(const Set& b)  const
{
    if( (*this <= b) && (cardinality() < b.cardinality()) ) { return true; }
    return false;
}
Beispiel #17
0
      bool operator()(const derivation_type* x, const derivation_type* y) const
      {
	return (x->score < y->score) || (!(y->score < x->score) && (cardinality(x->j) > cardinality(y->j)));
      }
Beispiel #18
0
Datum hll_count(PG_FUNCTION_ARGS) {
    bytea *arg = PG_GETARG_BYTEA_P(0);
	uint32_t *data = (uint32_t *) VARDATA(arg);
    int64 result = cardinality(1 << data[0], data + 2, 1);
    PG_RETURN_INT64(result);
}
void interval_set_fundamentals_4_ordered_types()
{
    typedef IntervalSet<T> IntervalSetT;
    typedef typename IntervalSetT::interval_type IntervalT;
    typedef typename IntervalSet<T>::size_type       size_T;
    typedef typename IntervalSet<T>::difference_type diff_T;

    // ordered types is the largest set of instance types.
    // Because we can not generate values via incrementation for e.g. string,
    // we are able to test operations only for the most basic values
    // identity_element (0, empty, T() ...) and unit_element.

    T v0 = boost::icl::identity_element<T>::value();
    T v1 = unit_element<T>::value();
    IntervalT I0_0I(v0);
    IntervalT I1_1I(v1);
    IntervalT I0_1I(v0, v1, interval_bounds::closed());

    //-------------------------------------------------------------------------
    //empty set
    //-------------------------------------------------------------------------
    BOOST_CHECK_EQUAL(IntervalSet<T>().empty(), true);
    BOOST_CHECK_EQUAL(icl::is_empty(IntervalSet<T>()), true);
    BOOST_CHECK_EQUAL(cardinality(IntervalSet<T>()), boost::icl::identity_element<size_T>::value());
    BOOST_CHECK_EQUAL(IntervalSet<T>().size(), boost::icl::identity_element<size_T>::value());
    BOOST_CHECK_EQUAL(interval_count(IntervalSet<T>()), 0);
    BOOST_CHECK_EQUAL(IntervalSet<T>().iterative_size(), 0);
    BOOST_CHECK_EQUAL(iterative_size(IntervalSet<T>()), 0);
    BOOST_CHECK_EQUAL(IntervalSet<T>(), IntervalSet<T>());

    IntervalT mt_interval = boost::icl::identity_element<IntervalT>::value();
    BOOST_CHECK_EQUAL(mt_interval, IntervalT());
    IntervalSet<T> mt_set = boost::icl::identity_element<IntervalSet<T> >::value();
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());

    //adding emptieness to emptieness yields emptieness ;)
    mt_set.add(mt_interval).add(mt_interval);
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());
    mt_set.insert(mt_interval).insert(mt_interval);
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());
    (mt_set += mt_interval) += mt_interval;
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());
    BOOST_CHECK_EQUAL(hull(mt_set), boost::icl::identity_element<IntervalT >::value());

    //subtracting emptieness
    mt_set.subtract(mt_interval).subtract(mt_interval);
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());
    mt_set.erase(mt_interval).erase(mt_interval);
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());
    (mt_set -= mt_interval) -= mt_interval;
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());

    //subtracting elements form emptieness
    mt_set.subtract(v0).subtract(v1);
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());
    mt_set.erase(v0).erase(v1);
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());
    (mt_set -= v1) -= v0;
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());

    //subtracting intervals form emptieness
    mt_set.subtract(I0_1I).subtract(I1_1I);
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());
    mt_set.erase(I0_1I).erase(I1_1I);
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());
    (mt_set -= I1_1I) -= I0_1I;
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());

    //insecting emptieness
    //mt_set.insect(mt_interval).insect(mt_interval);
    //BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());
    (mt_set &= mt_interval) &= mt_interval;
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());
    //insecting emptieness with elements
    (mt_set &= v1) &= v0;
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());
    //insecting emptieness with intervals
    (mt_set &= I1_1I) &= I0_1I;
    BOOST_CHECK_EQUAL(mt_set, IntervalSet<T>());

    //-------------------------------------------------------------------------
    //unary set
    //-------------------------------------------------------------------------
    IntervalSet<T> single_I0_0I_from_element(v0);
    IntervalSet<T> single_I0_0I_from_interval(I0_0I);
    IntervalSet<T> single_I0_0I(single_I0_0I_from_interval);

    BOOST_CHECK_EQUAL(single_I0_0I_from_element, single_I0_0I_from_interval);
    BOOST_CHECK_EQUAL(single_I0_0I_from_element, single_I0_0I);
    BOOST_CHECK_EQUAL(icl::hull(single_I0_0I).lower(), I0_0I.lower());
    BOOST_CHECK_EQUAL(icl::hull(single_I0_0I).upper(), I0_0I.upper());

    IntervalSet<T> single_I1_1I_from_element(v1);
    IntervalSet<T> single_I1_1I_from_interval(I1_1I);
    IntervalSet<T> single_I1_1I(single_I1_1I_from_interval);

    BOOST_CHECK_EQUAL(single_I1_1I_from_element, single_I1_1I_from_interval);
    BOOST_CHECK_EQUAL(single_I1_1I_from_element, single_I1_1I);

    IntervalSet<T> single_I0_1I_from_interval(I0_1I);
    IntervalSet<T> single_I0_1I(single_I0_1I_from_interval);

    BOOST_CHECK_EQUAL(single_I0_1I_from_interval, single_I0_1I);
    BOOST_CHECK_EQUAL(hull(single_I0_1I), I0_1I);
    BOOST_CHECK_EQUAL(hull(single_I0_1I).lower(), I0_1I.lower());
    BOOST_CHECK_EQUAL(hull(single_I0_1I).upper(), I0_1I.upper());

    //contains predicate
    BOOST_CHECK_EQUAL(icl::contains(single_I0_0I, v0), true);
    BOOST_CHECK_EQUAL(icl::contains(single_I0_0I, I0_0I), true);
    BOOST_CHECK_EQUAL(icl::contains(single_I1_1I, v1), true);
    BOOST_CHECK_EQUAL(icl::contains(single_I1_1I, I1_1I), true);

    BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, v0), true);
    BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, I0_1I), true);
    BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, v1), true);
    BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, I1_1I), true);

    BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, single_I0_0I), true);
    BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, single_I1_1I), true);
    BOOST_CHECK_EQUAL(icl::contains(single_I0_1I, single_I0_1I), true);

    BOOST_CHECK_EQUAL(cardinality(single_I0_0I), unit_element<size_T>::value());
    BOOST_CHECK_EQUAL(single_I0_0I.size(), unit_element<size_T>::value());
    BOOST_CHECK_EQUAL(interval_count(single_I0_0I), 1);
    BOOST_CHECK_EQUAL(single_I0_0I.iterative_size(), 1);
    BOOST_CHECK_EQUAL(iterative_size(single_I0_0I), 1);
}
//Alla tal i this måste finnas i b och b måste vara större
//för att få ett proper subset
bool Set:: operator<(const Set& b)  const
{
    if (*this <= b && b.cardinality() > cardinality())
        return true;
    return false;
}
////////////////////////////////////////////////////////////////////////////////
// parse - parses a declaration (s.a. TIntermediateRenderer::declare() for syntax
//         of type)
//
// Parameters:
//   name  - optional name of the declaration (case sensitive)
//   type  - type description of the declaration
//   isDef - true if the declarition a default declaration of the renderer
//
// Returns:
//   false - type could not be parsed
//
bool TParameterDeclaration::parse(const char *name, const char *type, bool isDef) {

	m_isDefault = isDef;

	m_name = name ? name : "";
	m_fullname = type ? type : m_name.c_str();
	m_isInline = false;

	if ( type ) {
		size_t pos = 0;
		while ( *type && isspace(*type) )
			++type;

		m_class = classNum(type, pos);
		if ( m_class )
			type += pos;
		else
			m_class = CLASS_UNIFORM;

		pos = 0;
		while ( *type && isspace(*type) )
			++type;

		m_type = typeNum(type, pos);
		if ( m_type )
			type += pos;

		/*
		else // if there is no default type (== 0)
			return false;
		*/

		pos = 0;
		while ( *type && isspace(*type) )
			++type;

		m_cardinality = cardinality(type, pos);
		if ( m_cardinality >= 1 )
			type += pos;
		else
			m_cardinality = 1;

		pos = 0;
		while ( *type && isspace(*type) )
			++type;

		if ( !name || !*name ) {
			// RI_TOKEN name
			while ( *type && !isspace(*type) ) {
				if ( !m_isInline ) {
					m_name = "";
					m_isInline = true;
				}
				m_name += *type;
				++type;
			}
			if ( !m_name.length() )
				return false;
		}

		// now only white space should follow
		while ( *type ) {
			if ( !isspace(*type) )
				return false;
			++type;
		}

	} else {
		m_class = 0;
		m_type = 0;
		m_cardinality = 0;
	}

	buildFullname();
	return true;
}
      // we will use greater, so that simple sort will yield estimated score order...
      bool operator()(const candidate_type* x, const candidate_type* y) const
      {
	return (x->score > y->score) || (!(y->score > x->score) && (cardinality(x->j) < cardinality(y->j)));
      }
Beispiel #23
0
/**
  * Convert a string containing some number of fields into binary form,
  * possibly inferring the type of data in the process.
  *
  * Strings are mapped in the order they first appear in the input (so the
  * first string is ALWAYS mapped to 0, etc).
  *
  * ASSUMPTIONS:
  * 1. <line> is NUL-terminated and free of NL and CR characters.
  */
int feature_encode( char * const line,
		struct feature *f,
		struct mtm_descriptor *d ) {

	int stat_class = MTM_STATCLASS_UNKNOWN;
	bool infer_field_type = false;
	int ft, field_type = MTM_FIELD_TYPE_UNK;
	const char *token;
	char *pc = line;

	bool eol = false;
	unsigned int field_count = 0;
	int missing_value_count = 0;

	union {
		mtm_fp_t  f;
		mtm_int_t i;
	} last_value_read;

	memset( d, 0, sizeof(struct mtm_descriptor) );
	assert( szs_count( f->category_labels ) == 0 );

	if( f->expect_row_labels ) {

		// Scan past the row identifier.

		while( *pc && *pc != CHAR_FIELD_SEP ) pc++;

		// ...and make sure there was more than just a row id on the line!

		if( CHAR_FIELD_SEP != *pc ) {
			// A non-empty line with exactly one field is bad format.
			return MTM_E_FORMAT_MATRIX;
		}

		f->label_length = pc - line;
		*pc++ = 0;

		if( f->interpret_prefix ) {
			stat_class
				= f->interpret_prefix( line );
			field_type
				= field_type_from_stat_class( stat_class );
			infer_field_type
				= (__builtin_popcount( field_type ) != 1 );
				// ...either MTM_FIELD_TYPE_UNK or multiple bits.
		}
	}

	while( field_count < f->length && ! eol ) {

		char *endpt = ""; // ESSENTIAL initialization.
		token = pc;

		while( *pc && *pc != CHAR_FIELD_SEP )
			pc++;

		if( token == pc /* we didn't move */ ) {

			++missing_value_count;
			f->buf.cat[ field_count++ ] = NAN_AS_UINT;

			if( *pc++ )
				continue;
			else
				break; // ...because we're already ON eol.
		}

		// We have a non-empty token.
		// Before NUL-terminating see whether it's also the end-of-line.

		if( *pc == 0 )
			eol = true; // We HAVE last field now; preclude loop reentry!
		else
			*pc++ = 0;

		// The "missing data" marker is common to all rows of the matrix
		// so its detection can precede (and preclude) type-dependent ops.

		if( toktype_is_na_marker(token) ) {
			++missing_value_count;
			f->buf.cat[ field_count++ ] = NAN_AS_UINT;
			continue;
		}

		// For every line, we will -not- know the field_type at this point
		// for at most one non-missing token. In other words, almost every
		// time we reach this point field_type is known. Use of the goto thus
		// obviates a constitutive conditional...justifying an ugly goto.
retry:
		switch( field_type ) {

		case MTM_FIELD_TYPE_FLT:
			f->buf.num[ field_count++ ]
				= last_value_read.f
				= strtof( token, &endpt );
			break;

		case MTM_FIELD_TYPE_INT:
			f->buf.cat[ field_count++ ]
				= last_value_read.i
				= strtol( token, &endpt, 0 );
			if( ! ( last_value_read.i < NAN_AS_UINT ) ) {
				return MTM_E_LIMITS;
			}
			break;

		case MTM_FIELD_TYPE_STR:
			if( szs_insert( f->category_labels, token, &(last_value_read.i) ) < 0 )
				errx( -1, _BUG, __FILE__, __LINE__ );
			// Because I'm sizing the hashtable to accomodate a cardinality
			// equal to the sample count, szs_insert cannot fail, or rather
			// if it fails something else is badly broken.
			f->buf.cat[ field_count++ ] = last_value_read.i;
			break;

		default: // ...because field_type is either _UNKNOWN or set valued.
			// This insures that boolean data represented by {0,1} is parsed
			// as integral data, not strings, and thus has its implicit
			// ordering preserved. This was both original code and a bugfix. 
			ft = toktype_infer_narrowest_type( token, NULL );
			assert( __builtin_popcount(ft)==1 /* else infinite loop! */ );
			// If the type was constrained at all (not simply "unknown"),
			// then the inferred type MUST be one of the allowed types...
			if( field_type!=MTM_FIELD_TYPE_UNK && (field_type & ft)==0 ) {
				return MTM_E_FORMAT_FIELD; // It's not an allowed type!
			} else
				field_type = ft;

			goto retry; // yes, goto! See comments above..
		}

		assert( __builtin_popcount( field_type ) == 1 );

		/**
		  * REVISE INFERENCE if necessary:
		  *
		  * If endpt wasn't advanced to the end-of-string the token
		  * does not contain the type it was believed to contain.
		  * This is either because of an overt error or because the stat
		  * class didn't fully constrain token type and the inference from
		  * preceding fields was too narrow. Specifically either:
		  * 1) a floating point line had an integral first value
		  * 2) a string line had a numeric (integral or float) first value
		  * As long as type wasn't completely determined by stat class--that
		  * is, as long as the field_type was inferred--we can revise our
		  * inference in these cases...
		  */

		if( *endpt /* token wasn't entirely consumed */ ) {

			// If the type was dictated rather than inferred we've
			// encountered an error; revision is only possible on
			// inferred types.

			if( infer_field_type ) {

				if( (field_type == MTM_FIELD_TYPE_INT)
					&& (MTM_FIELD_TYPE_FLT == toktype_infer_narrowest_type( token, NULL ) ) ) {
#ifdef _DEBUG
					fputs( "promoting integral line to float\n", stderr );
#endif
					field_type = MTM_FIELD_TYPE_FLT;
					--field_count;
					// Convert all earlier values to float...
					for(int i = 0; i < field_count; i++ ) {
						if( NAN_AS_UINT != f->buf.cat[ i ] ) {
							f->buf.num[i] = (float)f->buf.cat[i]; // in place!
						}
					}
					// ...and reparse current token.
					f->buf.num[ field_count++ ]
						= strtof( token, &endpt );
				} else
				if( (field_type != MTM_FIELD_TYPE_STR)
					&& (MTM_FIELD_TYPE_STR == toktype_infer_narrowest_type( token, NULL ) ) ) {
#ifdef _DEBUG
					fputs( "revising non-string line to string\n", stderr );
#endif
					assert( szs_count( f->category_labels ) == 0 );
					field_type = MTM_FIELD_TYPE_STR;

					// Reparse line up to and including current token.
					// Above inference of eol still holds, and, importantly,
					// we can count on NUL-termination of ALL relevant
					// tokens.

					pc = line;
					field_count = 0;
					do {
						if( szs_insert( f->category_labels, pc, &(last_value_read.i) ) < 0 )
							errx( -1, _BUG, __FILE__, __LINE__ );
						// See comment above re: szs_insert.
						f->buf.cat[ field_count++ ] = last_value_read.i;
						if( pc == token) break;
						pc += strlen(pc)+1;
					} while( true );
					pc += strlen(pc)+1;
					endpt = ""; // to remove the error condition.
				}
			}

			if( *endpt ) // either because the line wasn't a candidate for
				// revision or because it was revised, but the current
				// token -still- wasn't entirely consumed.
				return MTM_E_FORMAT_FIELD;
		}
	}

	if( field_count != f->length ) {
		return MTM_E_FORMAT_MATRIX;
	}

	////////////////////////////////////////////////////////////////////////
	// Now fill out the descriptor
	////////////////////////////////////////////////////////////////////////

	d->missing = missing_value_count;

	// Check for the degeneracy. There is really only one kind of
	// degeneracy, constancy, which can manifest in three different ways:
	// 1. All values are missing, i.e. feature is entirely "NA".
	// 2. Exactly one value is not-missing; obviously one value is constant.
	// 3. Two or more values are non-missing, but they are all equal.
	// The first two are trivial to check for; the 3rd is more expensive...

	if( field_count - missing_value_count < 2 )
		d->constant = 1;

	// ...so we only check for the 3rd if necessary (below).

	switch( field_type ) {

	case MTM_FIELD_TYPE_FLT:
		if( ! d->constant &&
				cardinality( f->buf.cat, field_count, 2, NAN_AS_UINT ) < 2 ) {
			d->constant = 1;
		}
		break;

	case MTM_FIELD_TYPE_STR:
		d->integral    = 1;
		d->categorical = 1;
		d->cardinality = szs_count( f->category_labels );
		if( d->cardinality < 2 )
			d->constant = 1;
		szs_clear( f->category_labels );
		break;

	case MTM_FIELD_TYPE_INT:
		// TODO: Note that ordinal is not properly dealt with yet.
		// If cardinality is low-enough integer data is assumed
		// categorical.
		d->integral = 1;
		if( ! d->constant ) {
			d->cardinality
				= cardinality( f->buf.cat, field_count, f->max_cardinality, NAN_AS_UINT );
			d->categorical
				= d->cardinality <= f->max_cardinality
				? 1
				: 0;
		}
		break;

	case MTM_FIELD_TYPE_UNK:
	default:
		if( missing_value_count < field_count )
			errx( -1, _BUG, __FILE__, __LINE__ );
		// ...but if whole line was empty we may, indeed, not know the type!
	}

	if( MTM_STATCLASS_BOOLEAN == stat_class ) {
		if( d->cardinality > 2 )
			return MTM_E_CARDINALITY;
	}

	return MTM_OK;
}
Beispiel #24
0
/** vertex file: see SINGLE format.
*  distribution file: see above.
*/
void BayesGraphSave::operator()( const Graph& graph,
                                 const std::string vertexFileName,
                                 const std::string distFileName ) const {

  std::ofstream distFile(distFileName.c_str()), vertexFile(vertexFileName.c_str());
  vertex_iterator vi, vi_end;
  Label2Index label2Index;
  vertexFile << ID << GRAPH_SEPARATOR << LATENT << GRAPH_SEPARATOR
             << LEVEL << GRAPH_SEPARATOR << CARDINALITY << GRAPH_SEPARATOR << "label" << "\n";  // writes header
 BOOST_LOG_TRIVIAL(trace) << "saving vertices...\n";
  for ( boost::tie(vi, vi_end) = boost::vertices(graph); vi != vi_end; ++vi ) {
    int vertex = *vi;
    vertexFile << graph[vertex].index << GRAPH_SEPARATOR
               << !(graph[vertex].is_leaf()) << GRAPH_SEPARATOR
               << graph[vertex].level << GRAPH_SEPARATOR
               << graph[vertex].variable.cardinality() << GRAPH_SEPARATOR
               << graph[vertex].getLabel() << std::endl;
    label2Index[graph[vertex].getLabel()] = graph[vertex].index;
  }
  vertexFile.close();
  BOOST_LOG_TRIVIAL(trace) << "saving joint distribution...\n";

  for ( boost::tie(vi, vi_end) = boost::vertices(graph); vi != vi_end; ++vi ) {
    const Node& node = graph[*vi];
    if ( !node.is_leaf() ) {
      auto latentVar = node.variable;
      // plJointDistribution distribution = node.jointDistribution;
      // plVariablesConjunction all_variables = distribution.get_variables(); // all variables (latent variable and its children)
      plVariablesConjunction childVars = node.get_children_variables(); // child childVars
      // for (size_t i = 1; i <  all_variables.size(); ++i)
      //   childVars ^=  all_variables[i]; // initializes child conjunction.
      // plSymbol latentVar =  all_variables[0]; // latent variable
      distFile << node.index << GRAPH_SEPARATOR <<  childVars.size() << std::endl;

      // plComputableObjectList objLists = distribution.get_computable_object_list();
      // plComputableObject probTableZ = objLists.get_distribution_over(latentVar); // distribution table for the latent variable
      auto probTableZ = node.marginalDist; int val;

      for ( val = 0; val < latentVar.cardinality() - 1 ; ++val ) {
        distFile << std::fixed << std::setprecision(30)
                 << probTableZ->compute( plValues().add(latentVar, val) )
                 << GRAPH_SEPARATOR; // P(latentVar = val)
      }

      distFile << std::fixed << std::setprecision(15)
               << probTableZ->compute( plValues().add(latentVar, val) )
               << std::endl; // writes last probability value

      for ( size_t i = 0; i < childVars.size(); ++i ) {

        plSymbol varX = childVars[ i ]; // retrieves the child variable
        distFile << label2Index[varX.name()] << std::endl; // writes child variable's id.
        auto distTableXZ = node.cndChildrenDists.at(i);  //objLists.get_distribution_over(varX); // conditional distribution P(X_i | Z)
        // plDistributionTable& distTableXZ =
        //     static_cast<plDistributionTable&>( compTableXZ ); // casting P(X_i | Z) to derived class

        for ( val = 0; val < latentVar.cardinality(); ++val ) {
          int childVal;
          for ( childVal = 0; childVal < varX.cardinality() - 1; ++childVal ) { // for each value x of the child variable            
            distFile << std::fixed << std::setprecision(15)
                     << distTableXZ->compute( plValues().add(latentVar, val).add(varX, childVal) )
                     << GRAPH_SEPARATOR; // p(X_i = childVal | Z = val)
          }
          distFile << std::fixed << std::setprecision(15)
                   << distTableXZ->compute( plValues().add(latentVar, val).add(varX, childVal) ) << std::endl;
       }
      }
      distFile << std::endl; // breaks the line, moves to the next latent variable.
    }
  }

  distFile.close();
}