vector<int> findMinHeightTrees(int n, vector<pair<int, int>>& edges)
        {
            vector<vector<int>> adjacency_list(n);
            vector<summary> summaries(n);
            for (size_t e = 0; e < edges.size(); e++)
            {
                adjacency_list[edges[e].first].push_back(edges[e].second);
                adjacency_list[edges[e].second].push_back(edges[e].first);
            }

            first_pass(-1, 0, adjacency_list, summaries);
            second_pass(-1, 0, 0, adjacency_list, summaries);

            vector<int> result;
            int min_max = 100000;
            for (int i = 0; i < n; i++)
            {
                min_max = min(min_max, summaries[i].get_max());
            }
            for (int i = 0; i < n; i++)
            {
                if (summaries[i].get_max() == min_max)
                {
                    result.push_back(i);
                }
            }
            return result;
        }
void train_one_round(const Fast5Map& name_map, size_t round)
{
    const PoreModelMap& current_models = PoreModelSet::get_models(opt::trained_model_type);

    // Initialize the training summary stats for each kmer for each model
    ModelTrainingMap model_training_data;
    for(auto current_model_iter = current_models.begin(); current_model_iter != current_models.end(); current_model_iter++) {
        // one summary entry per kmer in the model
        std::vector<StateSummary> summaries(current_model_iter->second.get_num_states());
        model_training_data[current_model_iter->first] = summaries;
    }

    // Open the BAM and iterate over reads

    // load bam file
    htsFile* bam_fh = sam_open(opt::bam_file.c_str(), "r");
    assert(bam_fh != NULL);

    // load bam index file
    std::string index_filename = opt::bam_file + ".bai";
    hts_idx_t* bam_idx = bam_index_load(index_filename.c_str());
    assert(bam_idx != NULL);

    // read the bam header
    bam_hdr_t* hdr = sam_hdr_read(bam_fh);

    // load reference fai file
    faidx_t *fai = fai_load(opt::genome_file.c_str());

    hts_itr_t* itr;

    // If processing a region of the genome, only emit events aligned to this window
    int clip_start = -1;
    int clip_end = -1;

    if(opt::region.empty()) {
        // TODO: is this valid?
        itr = sam_itr_queryi(bam_idx, HTS_IDX_START, 0, 0);
    } else {
        fprintf(stderr, "Region: %s\n", opt::region.c_str());
        itr = sam_itr_querys(bam_idx, hdr, opt::region.c_str());
        hts_parse_reg(opt::region.c_str(), &clip_start, &clip_end);
    }

#ifndef H5_HAVE_THREADSAFE
    if(opt::num_threads > 1) {
        fprintf(stderr, "You enabled multi-threading but you do not have a threadsafe HDF5\n");
        fprintf(stderr, "Please recompile nanopolish's built-in libhdf5 or run with -t 1\n");
        exit(1);
    }
#endif

    // Initialize iteration
    std::vector<bam1_t*> records(opt::batch_size, NULL);
    for(size_t i = 0; i < records.size(); ++i) {
        records[i] = bam_init1();
    }

    int result;
    size_t num_reads_realigned = 0;
    size_t num_records_buffered = 0;
    Progress progress("[methyltrain]");

    do {
        assert(num_records_buffered < records.size());
        
        // read a record into the next slot in the buffer
        result = sam_itr_next(bam_fh, itr, records[num_records_buffered]);
        num_records_buffered += result >= 0;

        // realign if we've hit the max buffer size or reached the end of file
        if(num_records_buffered == records.size() || result < 0) {
            #pragma omp parallel for            
            for(size_t i = 0; i < num_records_buffered; ++i) {
                bam1_t* record = records[i];
                size_t read_idx = num_reads_realigned + i;
                if( (record->core.flag & BAM_FUNMAP) == 0) {
                    add_aligned_events(name_map, fai, hdr, record, read_idx, clip_start, clip_end, round, model_training_data);
                }
            }

            num_reads_realigned += num_records_buffered;
            num_records_buffered = 0;
        }

        if(opt::progress) {
            fprintf(stderr, "Realigned %zu reads in %.1lfs\r", num_reads_realigned, progress.get_elapsed_seconds());
        }
    } while(result >= 0);
    
    assert(num_records_buffered == 0);
    progress.end();
    
    // open the summary file
    std::stringstream summary_fn;
    summary_fn << "methyltrain" << opt::out_suffix << ".summary";
    FILE* summary_fp = fopen(summary_fn.str().c_str(), "w");
    fprintf(summary_fp, "model_short_name\tkmer\tnum_matches\tnum_skips\t"
                         "num_stays\tnum_events_for_training\twas_trained\t"
                         "trained_level_mean\ttrained_level_stdv\n");

    // open the tsv file with the raw training data
    std::stringstream training_fn;
    training_fn << "methyltrain" << opt::out_suffix << ".round" << round << ".events.tsv";
    std::ofstream training_ofs(training_fn.str());

    // write out a header for the training data
    StateTrainingData::write_header(training_ofs);

    // iterate over models: template, complement_pop1, complement_pop2
    for(auto model_training_iter = model_training_data.begin(); 
             model_training_iter != model_training_data.end(); model_training_iter++) {
        
        // Initialize the trained model from the input model
        auto current_model_iter = current_models.find(model_training_iter->first);
        assert(current_model_iter != current_models.end());

        std::string model_name = model_training_iter->first;
        std::string model_short_name = current_model_iter->second.metadata.get_short_name();
        
        // Initialize the new model from the current model
        PoreModel updated_model = current_model_iter->second;
        uint32_t k = updated_model.k;
        const std::vector<StateSummary>& summaries = model_training_iter->second;

        // Generate the complete set of kmers
        std::string gen_kmer(k, 'A');
        std::vector<std::string> all_kmers;
        for(size_t ki = 0; ki < summaries.size(); ++ki) {
            all_kmers.push_back(gen_kmer);
            mtrain_alphabet->lexicographic_next(gen_kmer);
        }
        assert(gen_kmer == std::string(k, 'A'));
        assert(all_kmers.front() == std::string(k, 'A'));
        assert(all_kmers.back() == std::string(k, 'T'));

        // Update means for each kmer
        #pragma omp parallel for
        for(size_t ki = 0; ki < summaries.size(); ++ki) {
            assert(ki < all_kmers.size());
            std::string kmer = all_kmers[ki];

            // write the observed values to a tsv file
            #pragma omp critical
            {
                for(size_t ei = 0; ei < summaries[ki].events.size(); ++ei) {
                    summaries[ki].events[ei].write_tsv(training_ofs, model_short_name, kmer);
                }

            }

            bool is_m_kmer = kmer.find('M') != std::string::npos;
            bool update_kmer = opt::training_target == TT_ALL_KMERS ||
                               (is_m_kmer && opt::training_target == TT_METHYLATED_KMERS) ||
                               (!is_m_kmer && opt::training_target == TT_UNMETHYLATED_KMERS);
            bool trained = false;
            // only train if there are a sufficient number of events for this kmer
            if(update_kmer && summaries[ki].events.size() >= opt::min_number_of_events_to_train) {
                
                // train a mixture model where a minority of k-mers aren't methylated
                ParamMixture mixture;
                
                float incomplete_methylation_rate = 0.05f;
                std::string um_kmer = mtrain_alphabet->unmethylate(kmer);
                size_t um_ki = mtrain_alphabet->kmer_rank(um_kmer.c_str(), k);
                
                // Initialize the training parameters. If this is a kmer containing
                // a methylation site we train a two component mixture, otherwise
                // just fit a gaussian
                float major_weight = is_m_kmer ? 1 - incomplete_methylation_rate : 1.0f;
                mixture.log_weights.push_back(log(major_weight));
                mixture.params.push_back(current_model_iter->second.get_parameters(ki));
                
                if(is_m_kmer) {
                    // add second unmethylated component
                    mixture.log_weights.push_back(std::log(incomplete_methylation_rate));
                    mixture.params.push_back(current_model_iter->second.get_parameters(um_ki));
                }

                if(opt::verbose > 1) {
                    fprintf(stderr, "INIT__MIX %s\t%s\t[%.2lf %.2lf %.2lf]\t[%.2lf %.2lf %.2lf]\n", model_training_iter->first.c_str(), kmer.c_str(), 
                        std::exp(mixture.log_weights[0]), mixture.params[0].level_mean, mixture.params[0].level_stdv,
                        std::exp(mixture.log_weights[1]), mixture.params[1].level_mean, mixture.params[1].level_stdv);
                }

                ParamMixture trained_mixture = train_gaussian_mixture(summaries[ki].events, mixture);

                if(opt::verbose > 1) {
                    fprintf(stderr, "TRAIN_MIX %s\t%s\t[%.2lf %.2lf %.2lf]\t[%.2lf %.2lf %.2lf]\n", model_training_iter->first.c_str(), kmer.c_str(), 
                        std::exp(trained_mixture.log_weights[0]), trained_mixture.params[0].level_mean, trained_mixture.params[0].level_stdv,
                        std::exp(trained_mixture.log_weights[1]), trained_mixture.params[1].level_mean, trained_mixture.params[1].level_stdv);
                }

                #pragma omp critical
                updated_model.states[ki] = trained_mixture.params[0];

                if (model_stdv()) {
                    ParamMixture ig_mixture;
                    // weights
                    ig_mixture.log_weights = trained_mixture.log_weights;
                    // states
                    ig_mixture.params.emplace_back(trained_mixture.params[0]);

                    if(is_m_kmer) {
                        ig_mixture.params.emplace_back(current_model_iter->second.get_parameters(um_ki));
                    }
                    // run training
                    auto trained_ig_mixture = train_invgaussian_mixture(summaries[ki].events, ig_mixture);

                    LOG("methyltrain", debug)
                        << "IG_INIT__MIX " << model_training_iter->first.c_str() << " " << kmer.c_str() << " ["
                        << std::fixed << std::setprecision(5) << ig_mixture.params[0].sd_mean << " "
                        << ig_mixture.params[1].sd_mean << "]" << std::endl
                        << "IG_TRAIN_MIX " << model_training_iter->first.c_str() << " " << kmer.c_str() << " ["
                        << trained_ig_mixture.params[0].sd_mean << " "
                        << trained_ig_mixture.params[1].sd_mean << "]" << std::endl;

                    // update state
                    #pragma omp critical
                    {
                        updated_model.states[ki] = trained_ig_mixture.params[0];
                    }
                }

                trained = true;
            }

            #pragma omp critical
            {
                fprintf(summary_fp, "%s\t%s\t%d\t%d\t%d\t%zu\t%d\t%.2lf\t%.2lf\n",
                                        model_short_name.c_str(), kmer.c_str(), 
                                        summaries[ki].num_matches, summaries[ki].num_skips, summaries[ki].num_stays, 
                                        summaries[ki].events.size(), trained, updated_model.states[ki].level_mean, updated_model.states[ki].level_stdv);
            }

            // add the updated model into the collection (or replace what is already there)
            PoreModelSet::insert_model(opt::trained_model_type, updated_model);
        }
    }

    // cleanup records
    for(size_t i = 0; i < records.size(); ++i) {
        bam_destroy1(records[i]);
    }

    // cleanup
    sam_itr_destroy(itr);
    bam_hdr_destroy(hdr);
    fai_destroy(fai);
    sam_close(bam_fh);
    hts_idx_destroy(bam_idx);
    fclose(summary_fp);
}