void EM::run_em(NaiveBayesClassifier* classifier, int** feature_vectors, int** labeled_docs, int** unlabeled_docs, int number_unique_words,int number_unlabeled_documents, int number_labeled_documents, int number_labels) { /*Initialize arrays to store old parameters*/ long double** old_likelihood = (long double**)malloc(sizeof(long double*)*number_labels); long double* old_prior = (long double*)malloc(sizeof(long double)*number_labels); for(int i = 0;i<number_labels;i++) old_likelihood[i] = (long double*)malloc(sizeof(long double)*number_unique_words); /*Initial Step*/ //Construct classifier with labeled feature_vectors classifier->calculate_likelihood(labeled_docs, number_unique_words, number_labeled_documents, number_labels); classifier->calculate_prior(labeled_docs, number_labeled_documents, number_labels); for(;;) { break; /*E Step*/ printf("Performing E Step\n"); classify_all_unlabeled_documents(classifier, unlabeled_docs, number_unique_words, number_unlabeled_documents, number_labels); //ConsolePrint::print_2d_int(number_unique_words, number_unlabeled_documents, unlabeled_docs); //ConsolePrint::print_2d_int(number_unique_words, number_labeled_documents, labeled_docs); //ConsolePrint::print_2d_int(number_unique_words, number_labeled_documents+number_unlabeled_documents, feature_vectors); /*M Step*/ printf("Performing M Step\n"); copy_parameters(classifier->get_likelihood(), classifier->get_prior(), old_likelihood, old_prior, number_labels, number_unique_words); classifier->calculate_likelihood(feature_vectors, number_unique_words, number_labeled_documents+number_unlabeled_documents, number_labels); classifier->calculate_prior(feature_vectors,number_labeled_documents+number_unlabeled_documents,number_labels); /*Check for convergence*/ if(check_if_converged(old_likelihood,old_prior,classifier->get_likelihood(),classifier->get_prior(),number_labels,number_unique_words)) break; } printf("EM Process Done\n"); }
/** * A function to create a new load balancer. * @param[in] api The deltacloud_api structure representing the connection * @param[in] name The name to give to the new load balancer * @param[in] realm_id The realm ID to put the new load balancer in * @param[in] protocol The protocol to load balance * @param[in] balancer_port The port the load balancer listens on * @param[in] instance_port The port the load balancer balances to * @param[in] params An array of deltacloud_create_parameter structures that * represent any optional parameters to pass into the * create call * @param[in] params_length An integer describing the length of the params * array * @returns 0 on success, -1 on error */ int deltacloud_create_loadbalancer(struct deltacloud_api *api, const char *name, const char *realm_id, const char *protocol, int balancer_port, int instance_port, struct deltacloud_create_parameter *params, int params_length) { struct deltacloud_create_parameter *internal_params; int ret = -1; int pos; if (!valid_api(api) || !valid_arg(name) || !valid_arg(realm_id) || !valid_arg(protocol)) return -1; if (balancer_port < 0 || balancer_port > 65536) { invalid_argument_error("balancer_port must be between 0 and 65536"); return -1; } if (instance_port < 0 || instance_port > 65536) { invalid_argument_error("instance_port must be between 0 and 65536"); return -1; } if (params_length < 0) { invalid_argument_error("params_length must be >= 0"); return -1; } internal_params = calloc(params_length + 5, sizeof(struct deltacloud_create_parameter)); if (internal_params == NULL) { oom_error(); return -1; } pos = copy_parameters(internal_params, params, params_length); if (pos < 0) /* copy_parameters already set the error */ goto cleanup; if (deltacloud_prepare_parameter(&internal_params[pos++], "name", name) < 0 || deltacloud_prepare_parameter(&internal_params[pos++], "realm_id", realm_id) < 0 || deltacloud_prepare_parameter(&internal_params[pos++], "listener_protocol", protocol) < 0) /* deltacloud_prepare_parameter already set the error */ goto cleanup; if (prepare_int_parameter(&internal_params[pos++], "listener_balancer_port", balancer_port) < 0) /* prepare_int_parameter already set the error */ goto cleanup; if (prepare_int_parameter(&internal_params[pos++], "listener_instance_port", instance_port) < 0) /* prepare_int_parameter already set the error */ goto cleanup; if (internal_create(api, "load_balancers", internal_params, pos, NULL, NULL) < 0) /* internal_create already set the error */ goto cleanup; ret = 0; cleanup: free_parameters(internal_params, pos); SAFE_FREE(internal_params); return ret; }
static int lb_register_unregister(struct deltacloud_api *api, struct deltacloud_loadbalancer *balancer, const char *instance_id, struct deltacloud_create_parameter *params, int params_length, const char *link) { struct deltacloud_link *thislink; struct deltacloud_create_parameter *internal_params; char *href; int ret = -1; int pos; int rc; if (!valid_api(api) || !valid_arg(balancer) || !valid_arg(instance_id)) return -1; if (params_length < 0) { invalid_argument_error("params_length must be >= 0"); return -1; } thislink = api_find_link(api, "load_balancers"); if (thislink == NULL) /* api_find_link set the error */ return -1; internal_params = calloc(params_length + 1, sizeof(struct deltacloud_create_parameter)); if (internal_params == NULL) { oom_error(); return -1; } pos = copy_parameters(internal_params, params, params_length); if (pos < 0) /* copy_parameters already set the error */ goto cleanup; if (deltacloud_prepare_parameter(&internal_params[pos++], "instance_id", instance_id) < 0) /* deltacloud_prepare_parameter already set the error */ goto cleanup; if (asprintf(&href, "%s/%s/%s", thislink->href, balancer->id, link) < 0) { oom_error(); goto cleanup; } rc = internal_post(api, href, internal_params, pos, NULL, NULL); SAFE_FREE(href); if (rc < 0) /* internal_post already set the error */ goto cleanup; ret = 0; cleanup: free_parameters(internal_params, pos); SAFE_FREE(internal_params); return ret; }
void parameter_test(Tree &T, Model &Mod, long Nrep, long length, double eps, std::vector<double> &pvals, std::string data_prefix, bool save_mc_exact){ long iter; long i, r; double df, C; double distance, KL; KL=0; distance=0; double likel; Parameters Parsim, Par, Par_noperm; Alignment align; Counts data; double eps_pseudo = 0.001; // Amount added to compute the pseudo-counts. StateList sl; bool save_data = (data_prefix != ""); std::string output_filename; std::stringstream output_index; std::ofstream logfile; std::ofstream logdistfile; std::ofstream out_chi2; std::ofstream out_br; std::ofstream out_brPerc; std::ofstream out_pvals; std::ofstream out_pvals_noperm; std::ofstream out_qvals; std::ofstream out_bound; std::ofstream out_variances; std::ofstream out_qvalsComb; std::ofstream out_qvalsCombzscore; std::ofstream out_covmatrix; std::ofstream out_parest; std::ofstream out_parsim; std::vector<double> KLe; std::vector<std::vector<double> > chi2_array; // an array of chi2 for every edge. std::vector<std::vector<double> > mult_array; // an array of mult for every edge. std::vector<std::vector<double> > br_array; // an array of br. length for every edge. std::vector<std::vector<double> > br_arrayPerc; // an array of br. length for every edge. std::vector<std::vector<double> > cota_array; // an array of upper bounds of the diff in lengths for every edge. std::vector<std::vector<double> > pval_array; // an array of pvals for every edge. std::vector<std::vector<double> > pval_noperm_array; std::vector<std::vector<double> > qval_array; // an array of qvalues for every edge. std::vector<std::vector<double> > variances_array; // an array of theoretical variances. std::vector<std::vector<double> > parest_array; // array of estimated parameters std::vector<std::vector<double> > parsim_array; // array of simulation parameters // ci_binom ci_bin; // condfidence interval std::vector<std::vector<ci_binom> > CIbinomial ; // vector of CIs std::list<long> produced_nan; long npars = T.nedges*Mod.df + Mod.rdf; // Initializing pvals pvals.resize(T.nedges); // Initialize the parameters for simulation of K81 data for testing Par = create_parameters(T); Parsim = create_parameters(T); // Initializing data structures KLe.resize(T.nedges); pval_array.resize(T.nedges); pval_noperm_array.resize(T.nedges); qval_array.resize(T.nedges); chi2_array.resize(T.nedges); mult_array.resize(T.nedges); br_array.resize(T.nedges); br_arrayPerc.resize(T.nedges); cota_array.resize(T.nedges); variances_array.resize(npars); parest_array.resize(npars); parsim_array.resize(npars); // initialize to 0's for (i=0; i < T.nedges; i++) { pval_array[i].resize(Nrep, 0); pval_noperm_array[i].resize(Nrep, 0); qval_array[i].resize(Nrep, 0); chi2_array[i].resize(Nrep, 0); mult_array[i].resize(Nrep, 0); br_array[i].resize(Nrep, 0); br_arrayPerc[i].resize(Nrep, 0); cota_array[i].resize(Nrep, 0); } for(i=0; i < npars; i++) { variances_array[i].resize(Nrep, 0); parest_array[i].resize(Nrep, 0); parsim_array[i].resize(Nrep, 0); } // Information about the chi^2. df = Mod.df; C = get_scale_constant(Mod); if (save_data) { logfile.open((data_prefix + ".log").c_str(), std::ios::out); logfile << "model: " << Mod.name << std::endl; logfile << "length: " << length << std::endl; logfile << "eps: " << eps << std::endl; logfile << "nalpha: " << T.nalpha << std::endl; logfile << "leaves: " << T.nleaves << std::endl; logfile << "tree: " << T.tree_name << std::endl; logfile << std::endl; logdistfile.open((data_prefix + ".dist.log").c_str(), std::ios::out); out_chi2.open(("out_chi2-" + data_prefix + ".txt").c_str(), std::ios::out); out_br.open(("out_br-" + data_prefix + ".txt").c_str(), std::ios::out); out_brPerc.open(("out_brPerc-" + data_prefix + ".txt").c_str(), std::ios::out); out_pvals.open(("out_pvals-" + data_prefix + ".txt").c_str(), std::ios::out); out_pvals_noperm.open(("out_pvals_noperm-" + data_prefix + ".txt").c_str(), std::ios::out); out_qvals.open(("out_qvals-" + data_prefix + ".txt").c_str(), std::ios::out); out_variances.open(("out_variances-" + data_prefix + ".txt").c_str(), std::ios::out); out_parest.open(("out_params-est-" + data_prefix + ".txt").c_str(), std::ios::out); out_parsim.open(("out_params-sim-" + data_prefix + ".txt").c_str(), std::ios::out); out_bound.open(("out_bound-" + data_prefix + ".txt").c_str(), std::ios::out); out_qvalsComb.open(("out_qvalsComb-" + data_prefix + ".txt").c_str(), std::ios::out); out_qvalsCombzscore.open(("out_qvalsCombzscore-" + data_prefix + ".txt").c_str(), std::ios::out); out_parsim.precision(15); out_parest.precision(15); out_variances.precision(15); } // uncomment the 2 following lines if want to fix the parameters // random_parameters_length(T, Mod, Parsim); //random_data(T, Mod, Parsim, length, align); for (iter=0; iter < Nrep; iter++) { std::cout << "iteration: " << iter << " \n"; // Produces an alignment from random parameters random_parameters_length(T, Mod, Parsim); random_data(T, Mod, Parsim, length, align); get_counts(align, data); add_pseudocounts(eps_pseudo, data); // Saving data if (save_data) { output_index.str(""); output_index << iter; output_filename = data_prefix + "-" + output_index.str(); save_alignment(align, output_filename + ".fa"); save_parameters(Parsim, output_filename + ".sim.dat"); } // Runs the EM std::tie(likel, iter) = EMalgorithm(T, Mod, Par, data, eps); // If algorithm returns NaN skip this iteration. if (boost::math::isnan(likel)) { produced_nan.push_back(iter); continue; } copy_parameters(Par, Par_noperm); // Chooses the best permutation. guess_permutation(T, Mod, Par); distance = parameters_distance(Parsim, Par); // estimated counts: Par ; original: Parsim std::vector<double> counts_est; counts_est.resize(T.nalpha, 0); // calculate the cov matrix std::vector<std::vector<double> > Cov; Array2 Cov_br; full_MLE_covariance_matrix(T, Mod, Parsim, length, Cov); if(save_data) { save_matrix(Cov, output_filename + ".cov.dat"); } // Save the covariances in an array std::vector<double> param; std::vector<double> param_sim; param.resize(npars); param_sim.resize(npars); get_free_param_vector(T, Mod, Par, param); get_free_param_vector(T, Mod, Parsim, param_sim); for(i=0; i < npars; i++) { variances_array[i][iter] = Cov[i][i]; parsim_array[i][iter] = param_sim[i]; parest_array[i][iter] = param[i]; } std::vector<double> xbranca, xbranca_noperm, mubranca; double chi2_noperm; xbranca.resize(Mod.df); xbranca_noperm.resize(Mod.df); mubranca.resize(Mod.df); for (i=0; i < T.nedges; i++) { r = 0; // row to be fixed // Extracts the covariance matrix, 1 edge branch_inverted_covariance_matrix(Mod, Cov, i, Cov_br); get_branch_free_param_vector(T, Mod, Parsim, i, mubranca); get_branch_free_param_vector(T, Mod, Par, i, xbranca); get_branch_free_param_vector(T, Mod, Par_noperm, i, xbranca_noperm); chi2_array[i][iter] = chi2_mult(mubranca, xbranca, Cov_br); chi2_noperm = chi2_mult(mubranca, xbranca_noperm, Cov_br); pval_array[i][iter] = pvalue_chi2(chi2_array[i][iter], Mod.df); pval_noperm_array[i][iter] = pvalue_chi2(chi2_noperm, Mod.df); br_array[i][iter] = T.edges[i].br - branch_length(Par.tm[i], T.nalpha); br_arrayPerc[i][iter] = branch_length(Par.tm[i], T.nalpha)/T.edges[i].br; // Upper bound on the parameter distance using multinomial: // cota_array[i][iter] = bound_mult(Parsim.tm[i], Xm, length); // and using the L2 bound cota_array[i][iter] = branch_length_error_bound_mult(Parsim.tm[i], Par.tm[i]); out_br << br_array[i][iter] << " "; out_brPerc << br_arrayPerc[i][iter] << " "; out_bound << cota_array[i][iter] << " "; out_chi2 << chi2_array[i][iter] << " "; } out_chi2 << std::endl; out_bound << std::endl; out_br << std::endl; out_brPerc << std::endl; // Saves more data. if (save_data) { logfile << iter << ": " << distance << " " << KL << std::endl; save_parameters(Par, output_filename + ".est.dat"); logdistfile << iter << ": "; logdistfile << parameters_distance_root(Par, Parsim) << " "; for(int j=0; j < T.nedges; j++) { logdistfile << parameters_distance_edge(Par, Parsim, j) << " "; } logdistfile << std::endl; } } // close iter loop here // Correct the p-values for(i=0; i < T.nedges; i++) { BH(pval_array[i], qval_array[i]); //save them } if (save_mc_exact) { for(long iter=0; iter < Nrep; iter++) { for(long i=0; i < T.nedges; i++) { out_pvals << pval_array[i][iter] << " "; out_pvals_noperm << pval_noperm_array[i][iter] << " "; out_qvals << qval_array[i][iter] << " "; } out_pvals << std::endl; out_pvals_noperm << std::endl; out_qvals << std::endl; for(long i=0; i < npars; i++) { out_variances << variances_array[i][iter] << " "; out_parsim << parsim_array[i][iter] << " "; out_parest << parest_array[i][iter] << " "; } out_variances << std::endl; out_parsim << std::endl; out_parest << std::endl; } } // now combine the pvalues for(i=0; i < T.nedges; i++) { pvals[i] = Fisher_combined_pvalue(pval_array[i]); //using the Zscore it goes like this: pvals[i] = Zscore_combined_pvalue(pval_array[i]); if (save_mc_exact) { out_qvalsComb << pvals[i] << " " ; out_qvalsCombzscore << Zscore_combined_pvalue(pval_array[i]) << " "; } } // Close files if (save_data) { logdistfile.close(); logfile.close(); } if (save_mc_exact) { out_chi2.close(); out_bound.close(); out_variances.close(); out_parest.close(); out_parsim.close(); out_br.close(); out_brPerc.close(); out_pvals.close(); out_qvals.close(); out_qvalsComb.close(); out_qvalsCombzscore.close(); out_covmatrix.close(); } // Warn if some EM's produced NaN. if (produced_nan.size() > 0) { std::cout << std::endl; std::cout << "WARNING: Some iterations produced NaN." << std::endl; std::list<long>::iterator it; for (it = produced_nan.begin(); it != produced_nan.end(); it++) { std::cout << *it << ", "; } std::cout << std::endl; } }
/** * A function to create a new storage snapshot. * @param[in] api The deltacloud_api structure representing the connection * @param[in] volume The volume to take the snapshot from * @param[in] params An array of deltacloud_create_parameter structures that * represent any optional parameters to pass into the * create call * @param[in] params_length An integer describing the length of the params * array * @param[out] snap_id The snapshot_id returned by the create call * @returns 0 on success, -1 on error */ int deltacloud_create_storage_snapshot(struct deltacloud_api *api, struct deltacloud_storage_volume *volume, struct deltacloud_create_parameter *params, int params_length, char **snap_id) { struct deltacloud_create_parameter *internal_params; struct deltacloud_storage_snapshot snap; char *data = NULL; int ret = -1; int pos; if (!valid_api(api) || !valid_arg(volume)) return -1; if (params_length < 0) { invalid_argument_error("params_length must be >= 0"); return -1; } internal_params = calloc(params_length + 1, sizeof(struct deltacloud_create_parameter)); if (internal_params == NULL) { oom_error(); return -1; } pos = copy_parameters(internal_params, params, params_length); if (pos < 0) /* copy_parameters already set the error */ goto cleanup; if (deltacloud_prepare_parameter(&internal_params[pos++], "volume_id", volume->id) < 0) /* deltacloud_create_parameter already set the error */ goto cleanup; if (internal_create(api, "storage_snapshots", internal_params, pos, &data, NULL) < 0) /* internal_create already set the error */ goto cleanup; if (snap_id != NULL) { if (internal_xml_parse(data, "storage_snapshot", parse_one_storage_snapshot, 1, &snap) < 0) /* internal_xml_parse set the error */ goto cleanup; *snap_id = strdup(snap.id); deltacloud_free_storage_snapshot(&snap); if (*snap_id == NULL) { oom_error(); goto cleanup; } } ret = 0; cleanup: free_parameters(internal_params, pos); SAFE_FREE(internal_params); SAFE_FREE(data); return ret; }