Esempio n. 1
0
bool Clustering::perform_single_move(const SingleMove& move) {
	if (move.c1 == move.c2) return false;
	
	// update stats
	Stats delta1 = node_stats[move.i]; delta1.self += 2.0 * move.weight_i_c1;
	Stats delta2 = node_stats[move.i]; delta2.self += 2.0 * move.weight_i_c2;
	clus_stats[move.c1] -= delta1;
	clus_stats[move.c2] += delta2;
	clus_stats.total -= delta1;
	clus_stats.total += delta2;
	// update loss
	sum_local_loss = move.sum_local_after;
	loss = move.loss_after;
	// update cluster assignment
	if(node_clus[move.i] != move.c1) throw std::logic_error("node_clus[move.i] != move.c2");
	node_clus[move.i] = move.c2;
	// update
	update_clus_size(move.c1, -1);
	update_clus_size(move.c2, +1);
	
	// recheck invariants
	if (params.check_invariants) verify_invariants();
	if (params.verbosity >= 6) {
		params.debug_out << "     moved " << move.i << " from " << move.c1;
		if (!node_partition.empty()) params.debug_out << " in " << node_partition[move.c1];
		params.debug_out << " to " << move.c2;
		if (!node_partition.empty()) params.debug_out << " in " << node_partition[move.c2];
		params.debug_out << " loss " << setprecision(12) << move.loss_after << " in " << num_clusters() << " clusters" << endl;
	}
	return true;
}
Esempio n. 2
0
void Clustering::verify_clus_stats() const {
	#define error(X...) {printf(X); throw std::logic_error("verify cluster stats failed");}
	#define check(sub,str,ii) \
		if (abs(clus_stats sub - cs sub) > validate_epsilon) \
			error("At " str "  %f != %f (diff: %g)\n",ii,clus_stats sub, cs sub, clus_stats sub - cs sub)
	ClusteringStats cs;
	init_stats(cs, a, &node_clus[0], &node_stats);
	for (size_t i = 0 ; i < clus_stats.size() ; ++i) {
		check([i].degree,"[%d].degree",(int)i);
		check([i].self,"[%d].self",(int)i);
		check([i].size,"[%d].size",(int)i);
	}
	check(.total.degree,"%s.degree","total");
	check(.total.self,"%s.self","total");
	check(.total.size,"%s.size","total");
	Doubles new_sl;
	double new_loss = params.lossfun->loss(cs, num_clusters(), &new_sl) + extra_loss_self * cs.total.self / cs.total.degree;
	if (abs(loss - new_loss) > validate_epsilon) {
		error("Incorrect loss update:  %f != %f\n",loss,new_loss);
	}
	for (int i = 0 ; i < MAX_DOUBLES ; ++i) {
		if (abs(sum_local_loss[i] - new_sl[i]) > validate_epsilon) {
			error("Incorrect local loss [%d] update:  %f != %f\n",i, sum_local_loss[i], new_sl[i]);
		}
	}
	#undef check
}
Esempio n. 3
0
File: DF.C Progetto: FDOS/defrag
void setlast(int disk, int *track, int *head, int sysonly)
{
int i;
struct DEVICEPARAMS dp;
long fatsector,lastsec;
int secsfat,datasec;
unsigned int nclusters;
unsigned short maxcluster;

    if ((i = disk_getparams(disk,&dp)) != DISK_OK)
    {
        /* LIB TODO: make a strliberror() function (like strerror()) */
        lib_error("getlast()",i);
        exit(1);
    }

    if (sysonly)
    {
        datasec = data_sector(&dp);
        physical(track,NULL,head,datasec,dp.hidden_sectors,dp.secs_track,
                 dp.num_heads);
        printf("System extends to track %d, side %d (logical sector %d).\n",
               MaxTrack,MaxHead,datasec);
        return;
    }

    nclusters = num_clusters(&dp);
    fatsector = dp.reserved_secs;
    secsfat = dp.secs_fat;
    maxcluster = searchfat12(disk,(unsigned short)nclusters,
                             fatsector,secsfat,0);
    if (maxcluster == 0)
        die("There seems to be no data on the disk in ","%c:.",disk+'A');

    datasec = data_sector(&dp);
    lastsec = cluster_to_sector(maxcluster,datasec,dp.secs_cluster);
    lastsec += (dp.secs_cluster-1);

    physical(track,NULL,head,lastsec,dp.hidden_sectors,dp.secs_track,
             dp.num_heads);

    printf("Last allocated cluster, %u, is on ",maxcluster);
    printf("track %d, side %d.\n",MaxTrack,MaxHead);
}
Esempio n. 4
0
void Clustering::recalc_internal_data() {
	if (params.check_invariants) verify_partition();
	// recalculate accumulated degrees
	init_stats(clus_stats, a, &node_clus[0], &node_stats);
	// find empty clusters
	fill(clus_size.begin(),clus_size.end(), 0);
	empty_cluss.clear();
	for (size_t i = 0 ; i < node_clus.size() ; ++i) {
		clus_size[node_clus[i]]++;
	}
	for (size_t i = 0 ; i < clus_stats.size() ; ++i) {
		if (clus_size[i] == 0) {
			clus_size[i] = -static_cast<int>(empty_cluss.size());
			empty_cluss.push_back(i);
		}
	}
	// calculate loss
	loss = params.lossfun->loss(clus_stats, num_clusters(), &sum_local_loss) + extra_loss_self * clus_stats.total.self / clus_stats.total.degree;
}
Esempio n. 5
0
void Clustering::reduce_num_clusters() {
	// tweak loss function to reduce number of clusters
	if (params.use_loss_tweak && ((int)num_clusters() > params.max_num_clusters || (int)num_clusters() < params.min_num_clusters)) {
		//optimize_higher_level(true, &Clustering::reduce_num_clusters_with_extra_loss);
		reduce_num_clusters_with_extra_loss();
	}
	// repeatedly perform a move on higher level graph
	// since clustering starts out as singletons, this always merges two clusters
	while ((int)num_clusters() > params.max_num_clusters && !use_exhaustive_for(num_clusters(), params.max_num_clusters)) {
		if (!optimize_higher_level(true, &Clustering::perform_best_single_forced_move)) break;
	}
	// with few enough clusters, we can do an exhaustive search
	if ((int)num_clusters() > params.max_num_clusters && use_exhaustive_for(num_clusters(), params.max_num_clusters)) {
		optimize_higher_level(true, &Clustering::optimize_exhaustive);
	}
}
Esempio n. 6
0
void Clustering::optimize() {
	vector<clus_t> node_clus_initial = node_clus;
	vector<clus_t> best_node_clus;
	double best_loss = 1e100;
	for (int rep = 0 ; rep < params.num_repeats ; ++rep) {
		if (params.verbosity >= 1) params.debug_out << "Repetition " << rep << endl;
		// reset clustering
		node_clus = node_clus_initial;
		recalc_internal_data();
		trace("initial");
		// lots of optimization
		if (params.optimize_num_clusters_with_outer_loop &&
				(params.min_num_clusters > 1 || params.max_num_clusters < (int)num_nodes())) {
			reduce_num_clusters_with_extra_loss();
		} else {
			for (int it = 0 ; it < params.num_partitions ; ++it) {
				optimize_all_levels();
				optimize_partition();
			}
			optimize_all_levels();
			// enforce number of clusters?
			if (params.min_num_clusters > (int)num_clusters() || params.max_num_clusters < (int)num_clusters()) {
				reduce_num_clusters();
			}
		}
		// is it an improvement?
		if (rep == 0 || loss < best_loss) {
			best_loss = loss;
			best_node_clus = node_clus;
		}
		trace("done");
	}
	// store
	this->loss = best_loss;
	this->node_clus = best_node_clus;
}
Esempio n. 7
0
bool Clustering::reduce_num_clusters_with_extra_loss() {
	double lower = -M_PI_2;
	double upper = M_PI_2;
	size_t min_allowed_clusters = params.min_num_clusters;
	size_t max_allowed_clusters = params.max_num_clusters;
	
	if (params.verbosity >= 2) {
		params.debug_out << " Use extra loss to get " << params.min_num_clusters << " <= " << num_clusters() << " <= " << params.max_num_clusters << endl;
	}
	if (params.verbosity >= 2) {
		params.debug_out << " Use extra loss to get " << min_allowed_clusters << " <= " << num_clusters() << " <= " << max_allowed_clusters << endl;
	}
	
	// now do a binary search
	// mapping using tan(), so we cover the entire range from -infinity to +infinity
	// because of the random optimization, this is technically not valid, but it should work in practice.
	vector<clus_t> init_node_clus = node_clus;
	vector<clus_t> best_node_clus = node_clus;
	double best_loss = loss; // not including the extra loss
	int best_too_few_or_many = max(0,(int)num_clusters() - (int)max_allowed_clusters)
	                         + max(0,(int)min_allowed_clusters - (int)num_clusters());
	
	for (int it = 0 ; it < params.num_loss_tweak_iterations ; ++it) {
		// use tan(-pi/2...pi/2) to search over all reals
		
		// use the given extra_loss_self
		double middle = (lower + upper) * 0.5;
		extra_loss_self = tan(middle);
		node_clus = init_node_clus;
		recalc_internal_data();
		for (int it = 0 ; it < params.num_partitions ; ++it) {
			optimize_all_levels();
			optimize_partition();
		}
		optimize_all_levels();
		
		// calculate loss without the extra_loss_self change
		double actual_loss = this->loss - extra_loss_self * clus_stats.total.self / clus_stats.total.degree;
		int too_few  = max(0,(int)min_allowed_clusters - (int)num_clusters());
		int too_many = max(0,(int)num_clusters() - (int)max_allowed_clusters);
		int too_few_or_many = too_few + too_many;
		if (params.verbosity >= 2) {
			params.debug_out << " Extra loss " << setprecision(5) << tan(lower) << " < " << extra_loss_self << " < " << tan(upper) << " gives loss " << actual_loss << " in " << num_clusters() << " clusters, " << too_few << "+" << too_many << "=" << too_few_or_many << " off" << endl;
		}
		if (too_few_or_many < best_too_few_or_many || (too_few_or_many == best_too_few_or_many && actual_loss < best_loss)) {
			best_loss = actual_loss;
			best_too_few_or_many = too_few_or_many;
			best_node_clus = node_clus;
			if (params.verbosity >= 2) {
				params.debug_out << " (best so far)" << endl;
			}
		}
		if (num_clusters() > max_allowed_clusters) {
			if (params.verbosity >= 2) params.debug_out << " (too many clusters: " << num_clusters() << " > " << max_allowed_clusters << "), decrease upper bound" << endl;
			upper = middle; // need more loss contribution to bring down #of clusters
		} else if (num_clusters() < min_allowed_clusters) {
			if (params.verbosity >= 2) params.debug_out << " (too few clusters: " << num_clusters() << " < " << min_allowed_clusters << "), increase lower bound" << endl;
			lower = middle;
		} else {
			if (params.verbosity >= 2) params.debug_out << " (okay nr. of clusters)" << endl;
			// we are in the accepted region, prefer smaller (in absolute sense) loss tweaks
			if (abs(upper) > abs(lower)) {
				upper = middle; 
			} else {
				lower = middle; 
			}
		}
	}
	
	// store
	//double best_extra_loss_self = extra_loss_self;
	extra_loss_self = 0.;
	node_clus = best_node_clus;
	recalc_internal_data();
	return true;
}
Esempio n. 8
0
// Optimize by greedyliy moving single nodes around.
// Repeated until convergence.
bool Clustering::optimize_single_moves() {
	bool changes = true;
	int iteration = 0;
	if (params.verbosity >= 4) {
		params.debug_out << "   Initially loss is " << setprecision(12) << loss << " in " << num_clusters() << " clusters" << endl;
	}
	while (changes) {
		double loss_before = loss;
		if (params.optimize_globally_best_moves) {
			changes = optimize_best_single_move_pass();
		} else {
			changes = optimize_single_moves_pass();
		}
		iteration++;
		if (params.verbosity >= 5) {
			params.debug_out << "    After iteration " << iteration << " loss is " << setprecision(12) << loss << " in " << num_clusters() << " clusters" << endl;
		}
		trace(changes ? "single moves" : "no single moves");
		if (changes && loss >= loss_before) {
			break; // something went wrong: we made the loss worse, probably due to numerical errors
		}
	}
	if (params.verbosity == 4) {
		params.debug_out << "   After " << iteration << " iterations, loss is " << setprecision(12) << loss << " in " << num_clusters() << " clusters" << endl;
	}
	return iteration > 1;
}
Esempio n. 9
0
SingleMove Clustering::best_single_move_for_node(node_t i, bool force_change) const {
	int c1 = node_clus[i]; // current cluster of i
	// find neighboring clusters, and sum of weights to them
	// i.e. set neighbors.weight(c) = sum of edges from i to any node in c
	neighbors.clear();
	neighbors.add(c1, -node_stats[i].self); // don't count the self loops for node i twice
	for (int j_it = a.cidx(i) ; j_it < a.cidx(i+1) ; ++j_it) {
		int j = a.ridx(j_it);
		if (!node_partition.empty() && node_partition[j] != node_partition[i]) {
			// don't allow this move, i can only be clustered together with nodes from the same partition
			continue;
		}
		neighbors.add(node_clus[j], a.data(j_it));
	}
	// always also consider moving i into an empty cluster
	if (params.always_consider_empty && !empty_cluss.empty()) {
		neighbors.add(empty_cluss.back(), 0.);
	}
	// always make sure that at least one move to another cluster is considered
	// this allows different connected components to be merged
	if (params.consider_random_if_no_moves && neighbors.size() == 1 && num_nodes() > 1) {
		int j = rand() % (num_nodes() - 1);
		if (j>=i) j++;
		neighbors.add(j, 0.);
		if (params.verbosity >= 7) {
			params.debug_out << "      considering a random move" << endl;
		}
	}
	// default move: don't move
	SingleMove best_move;
	best_move.i  = i;
	best_move.c2 = best_move.c1 = c1;
	best_move.weight_i_c2 = best_move.weight_i_c1 = neighbors.weight(c1);
	best_move.loss_after = loss;
	best_move.sum_local_after = sum_local_loss;
	// delta loss for move to possible neighboring clusters
	for (vector<clus_t>::const_iterator n_it = neighbors.begin() ; n_it != neighbors.end() ; ++n_it) {
		int c2 = *n_it;
		if (c1 == c2) continue;
		// How would the loss change after moving i to c2?
		double weight_i_c2 = neighbors.weight(c2);
		Stats delta1 = node_stats[i]; delta1.self += 2.0 * best_move.weight_i_c1;
		Stats delta2 = node_stats[i]; delta2.self += 2.0 * weight_i_c2;
		Stats total_after = clus_stats.total - delta1 + delta2;
		int num_cluster_after = num_clusters() - (node_stats[i].size == clus_stats[c1].size ? 1 : 0) + (clus_stats[c2].size == 0 ? 1 : 0);
		Doubles sum_local_after = sum_local_loss;
		sum_local_after -= params.lossfun->local(clus_stats[c1], clus_stats.total);
		sum_local_after -= params.lossfun->local(clus_stats[c2], clus_stats.total);
		sum_local_after += params.lossfun->local(clus_stats[c1]-delta1, total_after);
		sum_local_after += params.lossfun->local(clus_stats[c2]+delta2, total_after);
		double loss_after = params.lossfun->global(sum_local_after, total_after, num_cluster_after)
		                  + extra_loss_self * total_after.self / total_after.degree;
		// is it better?
		if ((loss_after < best_move.loss_after - epsilon) || (force_change && best_move.c2 == c1)) {
			best_move.c2              = c2;
			best_move.weight_i_c2     = weight_i_c2;
			best_move.loss_after      = loss_after;
			best_move.sum_local_after = sum_local_after;
		}
		if (params.verbosity >= 7) {
			params.debug_out << "      consider " << i << " from " << c1 << " to " << c2 << " loss " << setprecision(12) << loss_after << " in " << num_cluster_after << " clusters" << endl;
		}
	}
	return best_move;
}
Esempio n. 10
0
    void doc_manager::populate(void)
    {
        
        add_class_descriptor(ml::k_base);
        
        add_class_descriptors(ml::k_base, {
            ml::k_classification,
            ml::k_regression
        });
        
        add_class_descriptors(ml::k_regression, {
            ml::k_ann,
            ml::k_linreg,
            ml::k_logreg
        });
        
        add_class_descriptors(ml::k_classification, {
            ml::k_svm,
            ml::k_adaboost,
            ml::k_anbc,
            ml::k_dtw,
            ml::k_hmmc,
            ml::k_softmax,
            ml::k_randforest,
            ml::k_mindist,
            ml::k_knn,
            ml::k_gmm,
            ml::k_dtree
        });
        
        add_class_descriptors(ml::k_feature_extraction, {
            ml::k_peak,
            ml::k_minmax,
            ml::k_zerox
        });
        
        descriptors[ml::k_ann].desc("Artificial Neural Network").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/MLP");
        descriptors[ml::k_linreg].desc("Linear Regression").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/LinearRegression");
        descriptors[ml::k_logreg].desc("Logistic Regression").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/LogisticRegression");
        descriptors[ml::k_peak].desc("Peak Detection").url("").num_outlets(1);
        descriptors[ml::k_minmax].desc("Minimum / Maximum Detection").url("").num_outlets(1);
        descriptors[ml::k_zerox].desc("Zero Crossings Detection").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/ZeroCrossingCounter");
        descriptors[ml::k_svm].desc("Support Vector Machine").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/SVM");
        descriptors[ml::k_adaboost].desc("Adaptive Boosting").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/AdaBoost");
        descriptors[ml::k_anbc].desc("Adaptive Naive Bayes Classifier").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/ANBC");
        descriptors[ml::k_dtw].desc("Dynamic Time Warping").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/DTW");
        descriptors[ml::k_hmmc].desc("Continuous Hidden Markov Model").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/HMM");
        descriptors[ml::k_softmax].desc("Softmax Classifier").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/Softmax");
        descriptors[ml::k_randforest].desc("Random Forests").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/RandomForests");
        descriptors[ml::k_mindist].desc("Minimum Distance").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/MinDist");
        descriptors[ml::k_knn].desc("K Nearest Neighbour").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/KNN");
        descriptors[ml::k_gmm].desc("Gaussian Mixture Model").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/GMMClassifier");
        descriptors[ml::k_dtree].desc("Decision Trees").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/DecisionTree");
        
        for (auto& desc : {&descriptors[ml::k_hmmc], &descriptors[ml::k_dtw]})
        {
            desc->notes(
                        "add and map messages for time series should be delimited with record messages, e.g. record 1, add 1 40 50, add 1 41 50, record 0"
            );
        }
        
        // base descriptor
        message_descriptor add(
                              "add",
                              "list comprising a class id followed by n features, <class> <feature 1> <feature 2> etc",
                               "1 0.2 0.7 0.3 0.1"
                              );

        
        message_descriptor train(
                                "train",
                                "train the model based on vectors added with 'add'"
                                );
        
        message_descriptor map(
                              "map",
                              "generate the output value(s) for the input feature vector",
                               "0.2 0.7 0.3 0.1"
                              );
        
        message_descriptor write(
                                 "write",
                                 "write training data and / or model, first argument gives path to write file",
                                 "/path/to/my_ml-lib_data"
                                 );
        
        message_descriptor read(
                                "read",
                                "read training data and / or model, first argument gives path to the read file",
                                "/path/to/my_ml-lib_data"
                                );
        
        message_descriptor clear(
                                 "clear",
                                 "clear the stored training data and model"
                                 );
        
        message_descriptor help(
                               "help",
                               "post usage statement to the console"
                               );
        
        valued_message_descriptor<int> scaling(
                                               "scaling",
                                               "sets whether values are automatically scaled",
                                               {0, 1},
                                               1
                                               );
        
        valued_message_descriptor<int> record(
                                              "record",
                                              "start or stop time series recording for a single example of a given class",
                                              {0, 1},
                                              0
                                              );
        
        ranged_message_descriptor<float> training_rate(
                                                       "training_rate",
                                                       "set the learning rate, used to update the weights at each step of learning algorithms such as stochastic gradient descent.",
                                                       0.01,
                                                       1.0,
                                                       0.1
                                                       );
        
        ranged_message_descriptor<float> min_change(
                                                    "min_change",
                                                    "set the minimum change that must be achieved between two training epochs for the training to continue",
                                                    0.0,
                                                    1.0,
                                                    1.0e-5
                                                    );
        
        ranged_message_descriptor<int> max_iterations(
                                                      "max_iterations",
                                                      "set the maximum number of training iterations",
                                                      0,
                                                      1000,
                                                      100
                                                      );
        
        record.insert_before = "add";
        descriptors[ml::k_base].add_message_descriptor(add, write, read, train, clear, map, help, scaling, training_rate, min_change, max_iterations);

        // generic classification descriptor
        valued_message_descriptor<bool> null_rejection(
                                                       "null_rejection",
                                                       "toggle NULL rejection off or on, when 'on' classification results below the NULL-rejection threshold will be discarded",
                                                       {false, true},
                                                       true
                                                       );
        
        ranged_message_descriptor<float> null_rejection_coeff(
                                                              "null_rejection_coeff",
                                                              "set a multiplier for the NULL-rejection threshold ",
                                                              0.1,
                                                              1.0,
                                                              0.9
                                                              );
        
        valued_message_descriptor<int> probs(
                                             "probs",
                                             "determines whether probabilities are sent from the right outlet",
                                             {0, 1},
                                             0
                                             );
        
        descriptors[ml::k_classification].add_message_descriptor(null_rejection_coeff, probs, null_rejection);
        
        // generic feature extraction descriptor
//        descriptors[ml::k_feature_extraction].add_message_descriptor(null_rejection_coeff, null_rejection);

        // generic regression descriptor
       
        
//        descriptors[ml::k_regression].add_message_descriptor(training_rate, min_change, max_iterations);
        
        // Object-specific descriptors
        //-- Regressifiers
        //---- ann
        valued_message_descriptor<ml::data_type> mode("mode",
                                                      "set the mode of the ANN, " + std::to_string(ml::LABELLED_CLASSIFICATION) + " for classification, " + std::to_string(ml::LABELLED_REGRESSION) + " for regression",
                                                      {ml::LABELLED_CLASSIFICATION, ml::LABELLED_REGRESSION, ml::LABELLED_TIME_SERIES_CLASSIFICATION},
                                                      ml::defaults::data_type
                                                      );
        
        
        message_descriptor add_ann(
                              "add",
                              "class id followed by n features, <class> <feature 1> <feature 2> etc when in classification mode or N output values followed by M input values when in regression mode (N = num_outputs)",
                                   "1 0.2 0.7 0.3 0.1"

                              );
      
        ranged_message_descriptor<int> num_outputs(
                                                   "num_outputs",
                                                   "set the number of neurons in the output layer",
                                                   1,
                                                   1000,
                                                   ml::defaults::num_output_dimensions
                                                   );
        
        ranged_message_descriptor<int> num_hidden(
                                                  "num_hidden",
                                                  "set the number of neurons in the hidden layer",
                                                  1,
                                                  1000,
                                                  ml::defaults::num_hidden_neurons
                                                  );
        
        ranged_message_descriptor<int> min_epochs(
                                                  "min_epochs",
                                                  "setting the minimum number of training iterations",
                                                  1,
                                                  1000,
                                                  10
                                                  );
        
        // TODO: check if the "epochs" are still needed or if we can use "iterations" as inherited from ml_regression
        ranged_message_descriptor<int> max_epochs(
                                                  "max_epochs",
                                                  "setting the maximum number of training iterations",
                                                  1,
                                                  10000,
                                                  100
                                                  );

        ranged_message_descriptor<float> momentum(
                                                  "momentum",
                                                  "set the momentum",
                                                  0.0,
                                                  1.0,
                                                  0.5
                                                  );
        
        ranged_message_descriptor<float> gamma(
                                                  "gamma",
                                                  "set the gamma",
                                                  0.0,
                                                  10.0,
                                                  2.0
                                                  );
        
        // TODO: have optional value_labels for value labels
        valued_message_descriptor<int> input_activation_function(
                                                                 "input_activation_function",
                                                                 "set the activation function for the input layer, 0:LINEAR, 1:SIGMOID, 2:BIPOLAR_SIGMOID",
                                                                 {0, 1, 2},
                                                                 0
                                                                 );
        
        valued_message_descriptor<int> hidden_activation_function(
                                                                 "hidden_activation_function",
                                                                 "set the activation function for the hidden layer, 0:LINEAR, 1:SIGMOID, 2:BIPOLAR_SIGMOID",
                                                                 {0, 1, 2},
                                                                 0
                                                                 );
        
        valued_message_descriptor<int> output_activation_function(
                                                                 "output_activation_function",
                                                                 "set the activation function for the output layer, 0:LINEAR, 1:SIGMOID, 2:BIPOLAR_SIGMOID",
                                                                 {0, 1, 2},
                                                                 0
                                                                 );

                                                                 
        ranged_message_descriptor<int> rand_training_iterations(
                                                                 "rand_training_iterations",
                                                                 "set the number of random training iterations",
                                                                 0,
                                                                 1000,
                                                                 10
                                                                 );

        valued_message_descriptor<bool> use_validation_set(
                                                           "use_validation_set",
                                                           "set whether to use a validation training set",
                                                           {false, true},
                                                           true
                                                           );
        
        ranged_message_descriptor<int> validation_set_size(
                                                           "validation_set_size",
                                                           "set the size of the validation set",
                                                           1,
                                                           100,
                                                           20
                                                           );
        
        valued_message_descriptor<bool> randomize_training_order(
                                                           "randomize_training_order",
                                                           "sets whether to randomize the training order",
                                                           {false, true},
                                                           false
                                                           );
        
        
        descriptors[ml::k_ann].add_message_descriptor(add_ann, probs, mode, null_rejection, null_rejection_coeff, num_outputs, num_hidden, min_epochs, max_epochs, momentum, gamma, input_activation_function, hidden_activation_function, output_activation_function, rand_training_iterations, use_validation_set, validation_set_size, randomize_training_order);
        
        
        //-- Classifiers
        //---- ml.svm
        ranged_message_descriptor<int> type(
                                            "type",
                                            "set SVM type,"
                                            " 0:C-SVC (multi-class),"
                                            " 1:nu-SVC (multi-class),"
                                            " 2:one-class SVM,"
                                           // " 3:epsilon-SVR (regression),"
                                           // " 4:nu-SVR (regression)"
                                            ,
                                            0,
                                            2,
                                            0
                                            //        "	0 -- C-SVC		(multi-class classification)\n"
                                            //        "	1 -- nu-SVC		(multi-class classification)\n"
                                            //        "	2 -- one-class SVM\n"
                                            //        "	3 -- epsilon-SVR	(regression)\n"
                                            //        "	4 -- nu-SVR		(regression)\n"

                                            );
        
        ranged_message_descriptor<int> kernel(
                                              "kernel",
                                              "set type of kernel function, "
                                              "0:linear, " // (u'*v),"
                                              "1:polynomial, " // (gamma*u'*v + coef0)^degree,"
                                              "2:radial basis function, " //: exp(-gamma*|u-v|^2),"
                                              "3:sigmoid, " //  tanh(gamma*u'*v + coef0),"
                                              "4:precomputed kernel (kernel values in training_set_file)",
                                              0,
                                              4,
                                              0
                                              //        "	0 -- linear: u'*v\n"
                                              //        "	1 -- polynomial: (gamma*u'*v + coef0)^degree\n"
                                              //        "	2 -- radial basis function: exp(-gamma*|u-v|^2)\n"
                                              //        "	3 -- sigmoid: tanh(gamma*u'*v + coef0)\n"
                                              //        "	4 -- precomputed kernel (kernel values in training_set_file)\n"
                                              );
        
        ranged_message_descriptor<float> degree(
                                              "degree",
                                              "set degree in kernel function",
                                              0,
                                              20,
                                              3
                                              );
        
        ranged_message_descriptor<float> svm_gamma(
                                              "gamma",
                                              "set gamma in kernel function",
                                              0.0,
                                              1.0,
                                              0.5
                                              );
        
        ranged_message_descriptor<float> coef0(
                                               "coef0",
                                               "coef0 in kernel function",
                                               INFINITY * -1.f, INFINITY,
                                               0.0
                                               );
        
        ranged_message_descriptor<float> cost(
                                               "cost",
                                               "set the parameter C of C-SVC, epsilon-SVR, and nu-SVR",
                                               INFINITY * -1.f, INFINITY,
                                               1.0
                                               );
        
        ranged_message_descriptor<float> nu(
                                              "nu",
                                              "set the parameter nu of nu-SVC, one-class SVM, and nu-SVR",
                                              INFINITY * -1.f, INFINITY,
                                              0.5
                                              );
        
        message_descriptor cross_validation(
                                            "cross_validation",
                                            "perform cross validation"
                                            );
        
        ranged_message_descriptor<int> num_folds(
                                                 "num_folds",
                                                 "set the number of folds used for cross validation",
                                                 1, 100,
                                                 10
                                                 );
        
        descriptors[ml::k_svm].add_message_descriptor(cross_validation, num_folds, type, kernel, degree, svm_gamma, coef0, cost, nu);
        
        //---- ml.adaboost        
        ranged_message_descriptor<int> num_boosting_iterations(
                                                                "num_boosting_iterations",
                                                               "set the number of boosting iterations that should be used when training the model",
                                                               0,
                                                               200,
                                                               20
                                                               );
        
        valued_message_descriptor<int> prediction_method(
                                                        "prediction_method",
                                                         "set the Adaboost prediction method, 0:MAX_VALUE, 1:MAX_POSITIVE_VALUE",
                                                         {GRT::AdaBoost::MAX_VALUE, GRT::AdaBoost::MAX_POSITIVE_VALUE},
                                                         GRT::AdaBoost::MAX_VALUE
                                                         
        );
        
        valued_message_descriptor<int> set_weak_classifier(
                                                           "set_weak_classifier",
                                                           "sets the weak classifier to be used by Adaboost, 0:DECISION_STUMP, 1:RADIAL_BASIS_FUNCTION",
                                                           {ml::weak_classifiers::DECISION_STUMP, ml::weak_classifiers::RADIAL_BASIS_FUNCTION},
                                                           ml::weak_classifiers::DECISION_STUMP
                                                           );
        
        valued_message_descriptor<int> add_weak_classifier(
                                                           "add_weak_classifier",
                                                           "add a weak classifier to the list of classifiers used by Adaboost",
                                                           {ml::weak_classifiers::DECISION_STUMP, ml::weak_classifiers::RADIAL_BASIS_FUNCTION},
                                                           ml::weak_classifiers::DECISION_STUMP
                                                           );

        descriptors[ml::k_adaboost].add_message_descriptor(num_boosting_iterations, prediction_method, set_weak_classifier, add_weak_classifier);
        
        //---- ml.anbc
        message_descriptor weights("weights",
                                   "vector of 1 integer and N floating point values where the integer is a class label and the floats are the weights for that class. Sending weights with a vector size of zero clears all weights"
                                   );
        
        descriptors[ml::k_anbc].add_message_descriptor(weights);
        
        //---- ml.dtw
        valued_message_descriptor<int> rejection_mode(
                                                      "rejection_mode",
                                                      "sets the method used for null rejection, 0:TEMPLATE_THRESHOLDS, 1:CLASS_LIKELIHOODS, 2:THRESHOLDS_AND_LIKELIHOODS",
                                                      {GRT::DTW::TEMPLATE_THRESHOLDS, GRT::DTW::CLASS_LIKELIHOODS, GRT::DTW::THRESHOLDS_AND_LIKELIHOODS},
                                                      GRT::DTW::TEMPLATE_THRESHOLDS
                                                      );
        
        ranged_message_descriptor<float> warping_radius(
                                                        "warping_radius",
                                                        "sets the radius of the warping path, which is used if the constrain_warping_path is set to 1",
                                                        0.0,
                                                        1.0,
                                                        0.2
                                                        );
        
        valued_message_descriptor<bool> offset_time_series(
                                                           "offset_time_series",
                                                           "set if each timeseries should be offset by the first sample in the time series",
                                                           {false, true},
                                                           false
                                                           );
        
        valued_message_descriptor<bool> constrain_warping_path(
                                                           "constrain_warping_path",
                                                           "sets the warping path should be constrained to within a specific radius from the main diagonal of the cost matrix",
                                                           {false, true},
                                                           true
                                                           );
        
        valued_message_descriptor<bool> enable_z_normalization(
                                                               "enable_z_normalization",
                                                               "turn z-normalization on or off for training and prediction",
                                                               {false, true},
                                                               false
                                                               );
        
        valued_message_descriptor<bool> enable_trim_training_data(
                                                               "enable_trim_training_data",
                                                               "enabling data trimming prior to training",
                                                               {false, true},
                                                               false
                                                               );
  
        descriptors[ml::k_dtw].insert_message_descriptor(record);
        descriptors[ml::k_dtw].add_message_descriptor(rejection_mode, warping_radius, offset_time_series, constrain_warping_path, enable_z_normalization, enable_trim_training_data);
        
        //---- ml.hmmc
        valued_message_descriptor<int> model_type(
                                                  "model_type",
                                                  "set the model type used, 0:ERGODIC, 1:LEFTRIGHT",
                                                  {HMM_ERGODIC, HMM_LEFTRIGHT},
                                                  HMM_LEFTRIGHT
                                                  );
        
        ranged_message_descriptor<int> delta(
                                             "delta",
                                             "control how many states a model can transition to if the LEFTRIGHT model type is used",
                                             1,
                                             100,
                                             11
                                             );
        
        ranged_message_descriptor<int> max_num_iterations(
                                                          "max_num_iterations",
                                                          "set the maximum number of training iterations",
                                                          1,
                                                          1000,
                                                          100
                                                          );
        
        ranged_message_descriptor<int> committee_size(
                                                      "committee_size",
                                                      "set the committee size for the number of votes combined to make a prediction",
                                                      1,
                                                      1000,
                                                      5
                                                      );
        
        ranged_message_descriptor<int> downsample_factor(
                                                      "downsample_factor",
                                                         "set the downsample factor for the resampling of each training time series. A factor of 5 will result in each time series being resized (smaller) by a factor of 5",
                                                      1,
                                                      1000,
                                                      5
                                                      );
        
        descriptors[ml::k_hmmc].insert_message_descriptor(record);
        descriptors[ml::k_hmmc].add_message_descriptor(model_type, delta, max_num_iterations, committee_size, downsample_factor);
        
        //---- ml.softmax
        
        //---- ml.randforest
        ranged_message_descriptor<int> num_random_splits(
                                                         "num_random_splits",
                                                         "set the number of steps that will be used to search for the best spliting value for each node",
                                                         1,
                                                         1000,
                                                         100
                                                         );
        
        ranged_message_descriptor<int> min_samples_per_node2(
                                                            "min_samples_per_node",
                                                            "set the minimum number of samples that are allowed per node",
                                                            1,
                                                            100,
                                                            5
                                                            );
        
        ranged_message_descriptor<int> max_depth(
                                                 "max_depth",
                                                 "sets the maximum depth of the tree, any node that reaches this depth will automatically become a leaf node",
                                                 1,
                                                 100,
                                                 10
                                                 );

        descriptors[ml::k_randforest].add_message_descriptor(num_random_splits, min_samples_per_node2, max_depth);
        
        //----ml.mindist
        ranged_message_descriptor<int> num_clusters(
                                                    "num_clusters",
                                                    "set how many clusters each model will try to find during the training phase",
                                                    1,
                                                    100,
                                                    10
                                                    );

        descriptors[ml::k_mindist].add_message_descriptor(num_clusters);
                
        //---- ml.knn
//        "best_k_value_search:\tbool (0 or 1) set whether k value search is enabled or not (default 0)\n";

        ranged_message_descriptor<int> k(
                                         "k",
                                         "sets the K nearest neighbours that will be searched for by the algorithm during prediction",
                                         1,
                                         500,
                                         10
                                         );
        
        ranged_message_descriptor<int> min_k_search_value(
                                         "min_k_search_value",
                                         "set the minimum K value to use when searching for the best K value",
                                         1,
                                         500,
                                         1
                                         );
        
        ranged_message_descriptor<int> max_k_search_value(
                                                          "max_k_search_value",
                                                          "set the maximum K value to use when searching for the best K value",
                                                          1,
                                                          500,
                                                          10
                                                          );
        
        valued_message_descriptor<bool> best_k_value_search(
                                                            "best_k_value_search",
                                                            "set whether k value search is enabled or not",
                                                            {false, true},
                                                            false
                                                            );
        
        descriptors[ml::k_knn].add_message_descriptor(k, min_k_search_value, max_k_search_value, best_k_value_search);
        
        //---- ml.gmm
        ranged_message_descriptor<int> num_mixture_models(
                                                          "num_mixture_models",
                                                          "sets the number of mixture models used for class",
                                                          1,
                                                          20,
                                                          2
                                                          );

        descriptors[ml::k_gmm].add_message_descriptor(num_mixture_models);

        //---- ml.dtree
        valued_message_descriptor<bool> training_mode(
                                                      "training_mode",
                                                      "set the training mode",
                                                      {GRT::Tree::BEST_ITERATIVE_SPILT, GRT::Tree::BEST_RANDOM_SPLIT},
                                                      GRT::Tree::BEST_ITERATIVE_SPILT
                                                      );
        
        ranged_message_descriptor<int> num_splitting_steps(
                                                          "num_splitting_steps",
                                                          "set the number of steps that will be used to search for the best spliting value for each node",
                                                          1,
                                                          500,
                                                          100
                                                          );
        
        ranged_message_descriptor<int> min_samples_per_node(
                                                          "min_samples_per_node",
                                                          "sets the minimum number of samples that are allowed per node, if the number of samples at a node is below this value then the node will automatically become a leaf node",
                                                          1,
                                                          100,
                                                          5
                                                          );
        
        ranged_message_descriptor<int> dtree_max_depth(
                                                 "max_depth",
                                                 "sets the maximum depth of the tree, any node that reaches this depth will automatically become a leaf node",
                                                 1,
                                                 100,
                                                 10
                                                 );
        
        valued_message_descriptor<bool> remove_features_at_each_split(
                                                               "remove_features_at_each_split",
                                                               "set if a feature is removed at each spilt so it can not be used again",
                                                               {false, true},
                                                               false
                                                               );
        descriptors[ml::k_dtree].add_message_descriptor(training_mode, num_splitting_steps, min_samples_per_node, dtree_max_depth, remove_features_at_each_split);

        //-- Feature extraction
        
        //---- ml.peak
        ranged_message_descriptor<int> search_window_size(
                                                          "search_window_size",
                                                          "set the search window size in values",
                                                          1,
                                                          500,
                                                          5
                                                          );
        
        ranged_message_descriptor<float> peak(
                                              "float",
                                              "set the current value of the peak detector, a bang will be output when a peak is detected",
                                              INFINITY * -1.f, INFINITY,
                                              1
                                              );
        
        message_descriptor reset(
                                "reset",
                                "reset the peak detector"
                                );
        
        message_descriptor peak_help(
                                 "help",
                                 "post usage statement to the console"
                                 );


        descriptors[ml::k_peak].add_message_descriptor(peak, reset, search_window_size, peak_help);
        
        //---- ml.minmax
        
        message_descriptor input(
                                 "list",
                                 "list of float values in which to find minima and maxima",
                                 "0.1 0.5 -0.3 0.1 0.2 -0.1 0.7 0.1 0.3"
                                 );
        
        ranged_message_descriptor<float> minmax_delta(
                                                      "delta",
                                                      "setting the minmax delta. Input values will be considered to be peaks if they are greater than the previous and next value by at least the delta value",
                                                      0,
                                                      1,
                                                      0.1
                                                      );
        
        descriptors[ml::k_minmax].add_message_descriptor(input, minmax_delta);
        
        //---- ml.zerox
        
        valued_message_descriptor<float> zerox_map(
                                                   "map",
                                                   "a stream of input values in which to detect zero crossings",
                                                   0.5
                                                   );
        
        ranged_message_descriptor<float> dead_zone_threshold(
                                                             "dead_zone_threshold",
                                                             "set the dead zone threshold",
                                                             0.f,
                                                             1.f,
                                                             0.01f
                                                             );
        
        ranged_message_descriptor<int> zerox_search_window_size(
                                                          "search_window_size",
                                                          "set the search window size in values",
                                                          1,
                                                          500,
                                                          20
                                                          );
        
        descriptors[ml::k_zerox].add_message_descriptor(zerox_map, dead_zone_threshold, zerox_search_window_size);
    }
Esempio n. 11
0
int main(int argc,char *argv[])
{
  Centroid *SYS,*ENV;
  Sprngmtx *Gamma;
  char *sysfile,*envfile,*massfile,*ctfile,*spgfile;
  double **GG;
  double *CUT,**HS,**HE,**HX,**PH,*P;
  double dd,x,y;
  int **CT,**PIX,nss,nen,nn,ntp,nse,r1,c1,r2,c2,i,j,k;
  int ptmd,rprm,pprm;


  /* Formalities */
  read_command_line(argc,argv,&pprm,&rprm,&ptmd,&MSCL);


  /* Read coordinate and mass data */
  sysfile=get_param(argv[pprm],"syscoords");
  envfile=get_param(argv[pprm],"envcoords");
  massfile=get_param(argv[pprm],"massfile");
  SYS=read_centroids1(sysfile,massfile,&nss);
  ENV=read_centroids1(envfile,massfile,&nen);
  nn=nss+nen;
  fprintf(stderr,"System read from %s: %d centroids\n",sysfile,nss);
  fprintf(stderr,"Environment read from %s: %d centroids\n",envfile,nen);
  fprintf(stderr,"Masses read from %s\n",massfile);
  CT=imatrix(1,nn,1,nn);


  /* Find extent of membrane */
  membounds(ENV,nen,&MHI,&MLO);


  /* Print masses, if called for */
  if(ptmd==2){
    for(i=1;i<=nss;i++)
      for(j=-2;j<=0;j++){
	k=3*i+j;
	printf("%8d%8d% 25.15e\n",k,k,SYS[i].mass);
      }
    for(i=1;i<=nen;i++)
      for(j=-2;j<=0;j++){
	k=3*nss+3*i+j;
	printf("%8d%8d% 25.15e\n",k,k,ENV[i].mass);
      }
    return 0;
  }


  /* ---------------- Assign contacts... ----------------- */
  /* From a contact file... */
  if((ctfile=get_param(argv[pprm],"contactfile"))!=NULL){
    read_contacts(ctfile,CT,nn);
    fprintf(stderr,"Contacts read from %s\n",ctfile);
  }
  /* ...or from a cutoff file... */
  else if((ctfile=get_param(argv[pprm],"cutfile"))!=NULL){
    fprintf(stderr,"Cutoff values read from %s\n",ctfile);
    CUT=read_cutfile2(ctfile,SYS,ENV,nss,nen);
    radius_contact_sysenv(CT,SYS,ENV,nss,nen,CUT);
  }
  /* ...or from default values */
  else{
    CUT=dvector(1,nn);
    for(i=1;i<=nn;i++) CUT[i]=DEFCUT;
    fprintf(stderr,"All cutoff values set to %.3f\n",DEFCUT);
    radius_contact_sysenv(CT,SYS,ENV,nss,nen,CUT);
  }
  fprintf(stderr,"%d clusters\n",num_clusters(CT,nn));

  if(ptmd==1){
    fprintf(stderr,"Printing contacts\n");
    for(i=1;i<=nn;i++)
      for(j=i+1;j<=nn;j++)
	if(CT[i][j]!=0)
	  printf("%d\t%d\n",i,j);
    return 0;
  }

    



  /* ------- Construct the matrix of force constants -------*/
  GG=dmatrix(1,nn,1,nn);
  
  /* Read force constants from file... */
  if((spgfile=get_param(argv[pprm],"springfile"))!=NULL){
    fprintf(stderr,"Reading spring constants from %s\n",spgfile);
    Gamma=read_springfile_sysenv(spgfile,SYS,ENV,nss,nen,&ntp);
    spring_constants_sysenv(SYS,ENV,GG,CT,Gamma,nss,nen,ntp);
  }
  /* ...or else assign the default value to all springs */
  else
    for(i=1;i<=nn;i++)
      for(j=i;j<=nn;j++)
	if(CT[i][j]!=0)
	  GG[i][j]=GG[j][i]=DEFGAM;



  /* Construct the mass-weighted Hessian from 
     coordinates, masses, and potential matrix */
  fprintf(stderr,"Calculating Hessian...\n");
  HS=dmatrix(1,3*nss,1,3*nss);
  HE=dmatrix(1,3*nen,1,3*nen);
  HX=dmatrix(1,3*nss,1,3*nen);
  mwhess_sysenv(HS,HE,HX,SYS,ENV,GG,nss,nen);
      


  /* PRINT THE ENVIRONMENT-ENVIRONMENT SUB-HESSIAN IN SPARSE FORMAT */
  if(ptmd==0 && rprm==-1){
    fprintf(stderr,"\nPrinting env-env sub-hessian...\n\n");
    for(i=1;i<=3*nen;i++)
      for(j=i;j<=3*nen;j++)
	if(fabs(HE[i][j])>1.0e-10)
	  printf("%8d%8d% 25.15e\n",i,j,HE[i][j]);
    return 0;
  }


  /* PRINT THE FULL HESSIAN IN SPARSE FORMAT */
  if(ptmd==3){
    for(i=1;i<=3*nss;i++){
      for(j=i;j<=3*nss;j++)
	if(fabs(HS[i][j])>1.0e-10)
	  printf("%8d%8d% 20.10e\n",i,j,HS[i][j]);
      for(j=1;j<=3*nen;j++)
	if(fabs(HX[i][j])>1.0e-10)
	  printf("%8d%8d% 20.10e\n",i,j+3*nss,HX[i][j]);
    }
    for(i=1;i<=3*nen;i++)
      for(j=i;j<=3*nen;j++)
	if(fabs(HE[i][j])>1.0e-10)
	  printf("%8d%8d% 20.10e\n",i+3*nss,j+3*nss,HE[i][j]);
    return 0;
  }


  /* READ INVERSE OF ENVIRONMENTAL HESSIAN, OR INVERT HE */
  free_imatrix(CT,1,nn,1,nn);
  free_dmatrix(GG,1,nn,1,nn);
  if(rprm!=-1){
    fprintf(stderr,"Reading matrix from %s...\n",argv[rprm]);
    read_sparsemtx(argv[rprm],HE,3*nen,3*nen);
  }
  else{
    fprintf(stderr,"\nWell...How did I get here?\n\n");
    exit(1);}


  /* ---------------- CALCULATE AND PRINT THE PSEUDOHESSIAN ---------------- */

  /* COUNT THE NUMBER OF NON-ZERO TERMS IN THE PROJECTION MATRIX */
  nse=0;
  for(i=1;i<=3*nss;i++)
    for(j=1;j<=3*nen;j++)
      if(fabs(HX[i][j])>1.0e-9) nse++;
  fprintf(stderr,"%d non-zero projection elements\n",nse);
  P=dvector(1,nse);
  PIX=imatrix(1,nse,1,2);
  k=1;
  for(i=1;i<=3*nss;i++)
    for(j=1;j<=3*nen;j++)
      if(fabs(HX[i][j])>1.0e-9){
	PIX[k][1]=i;
	PIX[k][2]=j;
	P[k]=HX[i][j];
	k++;
      }
  free_dmatrix(HX,1,3*nss,1,3*nen);
  PH=dmatrix(1,3*nss,1,3*nss);
  for(i=1;i<=3*nss;i++)
    for(j=i;j<=3*nss;j++)
      PH[i][j]=PH[j][i]=0.0;
  for(i=1;i<=nse;i++){
    r1=PIX[i][1];
    c1=PIX[i][2];
    x=P[i];
    for(j=i;j<=nse;j++){
      r2=PIX[j][1];
      c2=PIX[j][2];
      y=HE[c1][c2]*P[j]*x;
      PH[r1][r2]+=y;
      if(r1==r2 && c1!=c2)
	PH[r1][r2]+=y;
    }
  }

  for(i=1;i<=3*nss;i++)
    for(j=i;j<=3*nss;j++){
      dd=HS[i][j]-PH[i][j];
      if(fabs(dd)>1.0e-10)
	printf("%8d%8d% 25.15e\n",i,j,dd);
    }
  return 0;
}