예제 #1
0
void 
GaussJordan::find_pivot (const int k)
{
  if (iszero (get_entry (k, k)))
    swap_rows (k, find_nonzero (k));
}
예제 #2
0
파일: column.c 프로젝트: ini-bdds/bdqc
/**
  * Having finished scanning a putatively tabular file, apply heuristics
  * to the column statistics to finally determine the statistical class
  * for each column (quantitative, categorical or (maybe) ordinal).
  *
  * The determination depends primarily on the presence or lack of unanimity
  * in the observed types (integer, float, string) in the column, then
  * devolves to consideration of the many other statistics in the column
  * struct.
  */
void analyze_column( struct column *c ) {

	const int OBSERVED_TYPE_COUNT
		= count_nonzero( c->type_vote+1 /* exclude FTY_EMPTY */, FTY_COUNT-1 );

	c->stat_class = STC_UNK; // ...unless overridden below!

	if( OBSERVED_TYPE_COUNT == 0 /* all must be empty */ ) {

		assert( c->type_vote[ FTY_EMPTY ] > 0 );

	} else
	if( OBSERVED_TYPE_COUNT == 1 /* Unanimity... */ ) {

		// ...doesn't necessarily determine the stat class because...

		switch( find_nonzero( c->type_vote, FTY_COUNT ) ) {

		case FTY_INTEGER: // ...integers can be used in many ways!
			c->stat_class = _integer_inference( c );
			break;

		case FTY_STRING:

			if( ( ! c->excess_values )
				&& set_count( & c->value_set ) <  c->type_vote[ FTY_STRING ] 
				&& c->long_field_count == 0 )
				c->stat_class = STC_CAT;
			break;

		case FTY_FLOAT:
			c->stat_class = STC_QUA;
		}

	} else {

		/**
		  * If more than two types are observed and STRING is one of them,
		  * then everything hinges on the cardinality of observed strings
		  * (the contents of value_set EXCLUDING any integers it contains).
		  * 1. If exactly one string value is observed AND it's a potential
		  *    missing data indicator, then inference devolves to that for
		  *    numeric types.
		  * 2. If more than one string value is observed all bets are off;
		  *    the column remains STC_UNKnown.
		  */

		const char *sval[2];
		const bool UNIQUE_STRING
			= c->type_vote[ FTY_STRING ] > 0
			&& _fetch_string_values( & c->value_set, sval, 2 ) == 1;
		const bool HAS_CANDIDATE_MISSING_DATA_PLACEHOLDER
			= UNIQUE_STRING
			&& regexec( &_compiled_re_NA, sval[0], 0, NULL, 0 ) == 0;

		if( OBSERVED_TYPE_COUNT == 2 ) {

			if( c->type_vote[ FTY_STRING ] > 0 ) {

				if( HAS_CANDIDATE_MISSING_DATA_PLACEHOLDER ) {
					if( c->type_vote[ FTY_INTEGER ] > 0 ) {
						c->stat_class = _integer_inference( c );
					} else {
						assert( c->type_vote[ FTY_FLOAT ] > 0 );
						c->stat_class = STC_QUA;
					}
				}

			} else { // no string, just ints and floats

				assert( c->type_vote[ FTY_INTEGER ] > 0 &&
						c->type_vote[ FTY_FLOAT ]   > 0 );

				c->stat_class = STC_QUA;
			}

		} else { // Column contains int(s), float(s) AND string(s).

			if( HAS_CANDIDATE_MISSING_DATA_PLACEHOLDER )
				c->stat_class = STC_QUA;

		} // 3 types observed

	} // > 1 type observed.
}