Beispiel #1
0
template<class ST> void CSparseFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t num, float64_t* vec, int32_t dim, bool abs_val)
{
	REQUIRE(vec, "add_to_dense_vec(num=%d,dim=%d): vec must not be NULL\n",
		num, dim);
	REQUIRE(dim>=get_num_features(),
		"add_to_dense_vec(num=%d,dim=%d): dim should contain number of features %d\n",
		num, dim, get_num_features());

	SGSparseVector<ST> sv=get_sparse_feature_vector(num);

	if (sv.features)
	{
		if (abs_val)
		{
			for (int32_t i=0; i<sv.num_feat_entries; i++)
			{
				vec[sv.features[i].feat_index]+=alpha
					*CMath::abs(sv.features[i].entry);
			}
		}
		else
		{
			for (int32_t i=0; i<sv.num_feat_entries; i++)
			{
				vec[sv.features[i].feat_index]+=alpha
						*sv.features[i].entry;
			}
		}
	}

	free_sparse_feature_vector(num);
}
Beispiel #2
0
template< class ST > void CMatrixFeatures< ST >::get_feature_vector_col(
		SGVector< ST > out, 
		int32_t num, 
		int32_t col) const
{
	if ( num < 0 || num >= get_num_vectors() )
	{
		SG_ERROR("The index of the feature vector to get must be between "
			 "0 and %d (get_num_vectors()-1)\n", get_num_vectors()-1);
	}

	// Shorthands for the dimensions of the feature vector to get
	int32_t num_cols = m_features[num].num_cols;
	int32_t num_rows = m_features[num].num_rows;

	if ( col < 0 || col >= num_cols )
	{
		SG_ERROR("The index of the column to get must be between "
			 "0 and %d (#columns of the feature vector)\n", num_cols);
	}

	if ( out.vlen < get_num_features() )
	{
		SG_ERROR("The vector out must have space to hold at least "
			 "%d (get_num_features()) elements\n", get_num_features());
	}

	int32_t start = col*num_rows;
	for ( int32_t i = 0 ; i < get_num_features(); ++i )
	{
		out[i] = m_features[num][start + i];
	}
}
Beispiel #3
0
template<class ST> ST CSparseFeatures<ST>::get_feature(int32_t num, int32_t index)
{
	REQUIRE(index>=0 && index<get_num_features(),
		"get_feature(num=%d,index=%d): index exceeds [0;%d]\n",
		num, index, get_num_features()-1);

	SGSparseVector<ST> sv=get_sparse_feature_vector(num);
	ST ret = sv.get_feature(index);

	free_sparse_feature_vector(num);
	return ret;
}
Beispiel #4
0
template<class ST> void CSparseFeatures<ST>::set_sparse_feature_matrix(SGSparseMatrix<ST> sm)
{
	if (m_subset_stack->has_subsets())
		SG_ERROR("Not allowed with subset\n");

	sparse_feature_matrix=sm;

	// TODO: check should be implemented in sparse matrix class
	for (int32_t j=0; j<get_num_vectors(); j++) {
		SGSparseVector<ST> sv=get_sparse_feature_vector(j);
		REQUIRE(get_num_features() >= sv.get_num_dimensions(),
			"sparse_matrix[%d] check failed (matrix features %d >= vector dimension %d)\n",
			j, get_num_features(), sv.get_num_dimensions());
	}
}
Beispiel #5
0
template<class ST> SGVector<ST> CSparseFeatures<ST>::get_full_feature_vector(int32_t num)
{
	SGSparseVector<ST> sv=get_sparse_feature_vector(num);
	SGVector<ST> dense = sv.get_dense(get_num_features());
	free_sparse_feature_vector(num);
	return dense;
}
Beispiel #6
0
template< class ST > void CMatrixFeatures< ST >::set_feature_vector(
		SGMatrix< ST > const vec,
		int32_t num)
{
	if ( num < 0 || num >= get_num_vectors() )
	{
		SG_ERROR("The index of the feature vector to set must be between "
			 "0 and %d (get_num_vectors()-1)\n", get_num_vectors()-1);
	}

	if ( get_num_features() != 0 && vec.num_rows != get_num_features() )
	{
		SG_ERROR("The feature vector to set must have the same features "
			 "as the rest of the MatrixFeatures, %d "
			 "(get_num_features())\n", get_num_features());
	}

	m_features.set_matrix(num, vec);
}
Beispiel #7
0
template<class ST> SGMatrix<ST> CSparseFeatures<ST>::get_full_feature_matrix()
{
	SGMatrix<ST> full(get_num_features(), get_num_vectors());
	full.zero();

	SG_INFO("converting sparse features to full feature matrix of %d x %d"
			" entries\n", sparse_feature_matrix.num_vectors, get_num_features())

	for (int32_t v=0; v<full.num_cols; v++)
	{
		int32_t idx=m_subset_stack->subset_idx_conversion(v);
		SGSparseVector<ST> current=sparse_feature_matrix[idx];

		for (int32_t f=0; f<current.num_feat_entries; f++)
		{
			int64_t offs=(v*get_num_features())
					+current.features[f].feat_index;

			full.matrix[offs]=current.features[f].entry;
		}
	}

	return full;
}
Beispiel #8
0
CFeatures* CFeatureSelection<ST>::apply(CFeatures* features)
{
	SG_DEBUG("Entering!\n");

	// remove previously computed feature subsets
	m_subset->remove_all_subsets();

	// sanity checks
	REQUIRE(features, "Features cannot be NULL!\n");
	REQUIRE(features->get_num_vectors()>0,
			"Number of feature vectors has to be positive!\n");
	REQUIRE(m_target_dim>0, "Target dimension (%d) has to be positive! Set "
			"a higher number via set_target_dim().\n", m_target_dim);

	index_t num_features=get_num_features(features);
	REQUIRE(num_features>0, "Invalid number of features (%d)! Most likely "
			"feature selection cannot be performed for %s!\n",
			num_features, features->get_name());
	REQUIRE(num_features>m_target_dim,
			"Number of original features (dimensions of the feature vectors) "
			"(%d) has to be greater that the target dimension (%d)!\n",
			num_features, m_target_dim);

	// this method makes a deep copy of the feature object and performs
	// feature selection on it. This is already SG_REF'ed because of the
	// implementation of clone()
	CFeatures* feats_copy=(CFeatures*)features->clone();

	switch (m_algorithm)
	{
		case BACKWARD_ELIMINATION:
			return apply_backward_elimination(feats_copy);
		default:
			SG_ERROR("Specified algorithm not yet supported!\n");
			return features;
	}

	SG_DEBUG("Leaving!\n");
}
Beispiel #9
0
template<class ST> SGSparseVector<ST> CSparseFeatures<ST>::get_sparse_feature_vector(int32_t num)
{
	REQUIRE(num>=0 && num<get_num_vectors(),
		"get_sparse_feature_vector(num=%d): num exceeds [0;%d]\n",
		num, get_num_vectors()-1);
	index_t real_num=m_subset_stack->subset_idx_conversion(num);

	if (sparse_feature_matrix.sparse_matrix)
	{
		return sparse_feature_matrix[real_num];
	}
	else
	{
		SGSparseVector<ST> result;
		if (feature_cache)
		{
			result.features=feature_cache->lock_entry(num);

			if (result.features)
				return result;
			else
			{
				result.features=feature_cache->set_entry(num);
			}
		}

		//if (!result.features)
		//	result.do_free=true;

		result.features=compute_sparse_feature_vector(num,
			result.num_feat_entries, result.features);


		if (get_num_preprocessors())
		{
			int32_t tmp_len=result.num_feat_entries;
			SGSparseVectorEntry<ST>* tmp_feat_before=result.features;
			SGSparseVectorEntry<ST>* tmp_feat_after = NULL;

			for (int32_t i=0; i<get_num_preprocessors(); i++)
			{
				//tmp_feat_after=((CSparsePreprocessor<ST>*) get_preproc(i))->apply_to_feature_vector(tmp_feat_before, tmp_len);

				if (i!=0)	// delete feature vector, except for the the first one, i.e., feat
					SG_FREE(tmp_feat_before);
				tmp_feat_before=tmp_feat_after;
			}

			if (tmp_feat_after)
			{
				memcpy(result.features, tmp_feat_after,
						sizeof(SGSparseVectorEntry<ST>)*tmp_len);

				SG_FREE(tmp_feat_after);
				result.num_feat_entries=tmp_len;
			}
			SG_DEBUG("len: %d len2: %d\n", result.num_feat_entries, get_num_features())
		}
		return result ;
	}
}
Beispiel #10
0
CFeatures* CFeatureSelection<ST>::apply_backward_elimination(CFeatures* features)
{
	SG_DEBUG("Entering!\n");

	// precompute whenever appropriate for performing the rest of the tasks
	precompute();

	// NULL check for features is handled in get_num_features
	index_t num_features=get_num_features(features);
	SG_DEBUG("Initial number of features %d!\n", num_features);

	// the main loop
	while (num_features>m_target_dim)
	{
		// tune the measurement parameters whenever necessary based on current
		// features
		adapt_params(features);

		// compute the measures for each of the current dimensions
		SGVector<float64_t> measures(num_features);
		for (index_t i=0; i<num_features; ++i)
			measures[i]=compute_measures(features, i);

		if (io->get_loglevel()==MSG_DEBUG || io->get_loglevel()==MSG_GCDEBUG)
			measures.display_vector("measures");

		// rank the measures
		SGVector<index_t> argsorted=CMath::argsort(measures);

		if (io->get_loglevel()==MSG_DEBUG || io->get_loglevel()==MSG_GCDEBUG)
			argsorted.display_vector("argsorted");

		// make sure that we don't end up with lesser feats than target dim
		index_t to_remove;
		if (m_policy==N_SMALLEST || m_policy==N_LARGEST)
			to_remove=m_num_remove;
		else
			to_remove=num_features*m_num_remove*0.01;

		index_t can_remove=num_features-m_target_dim;

		// if policy is to remove N feats corresponding to smallest/largest
		// measures, we just replace N with can_remove. if policy is to remove
		// N% feats, then we change the policy temporarily and remove a fixed
		// can_remove number of feats instead
		index_t orig_remove=m_num_remove;
		EFeatureRemovalPolicy orig_policy=m_policy;

		if (to_remove>can_remove)
		{
			m_num_remove=can_remove;
			SG_DEBUG("Can only remove %d features in this iteration!\n",
					can_remove);

			if (m_policy==PERCENTILE_SMALLEST)
				m_policy=N_SMALLEST;
			else if (m_policy==PERCENTILE_LARGEST)
				m_policy=N_LARGEST;
		}

		// remove appropriate number of features based on the measures and the
		// removal policy. this internally update the subset for selected
		// features as well
		features=remove_feats(features, argsorted);

		// restore original removal policy and numbers if necessary for the
		// sake of consistency
		if (to_remove>can_remove)
		{
			m_policy=orig_policy;
			m_num_remove=orig_remove;
		}

		// update the number of features
		num_features=get_num_features(features);
		SG_DEBUG("Current number of features %d!\n", num_features);
	}

	// sanity check
	ASSERT(m_subset->get_size()==m_target_dim);

	SG_DEBUG("Leaving!\n");
	return features;
}