예제 #1
0
void count_pair_distances::operator()(const SequenceTree& T)
{
  if (not initialized) {
    N = T.n_leaves();
    names = T.get_leaf_labels();
    m1.resize(N*(N-1)/2);
    m2.resize(N*(N-1)/2);
    m1 = 0;
    m2 = 0;
    initialized = true;
  }

  n_samples++;

  // Theoretically, we could do this much faster, I think.
  //  vector<vector<int> > leaf_sets = partition_sets(T);

  int k=0;
  for(int i=0;i<N;i++)
    for(int j=0;j<i;j++,k++) 
    {
      double D = 0;
      if (RF)
	D = T.edges_distance(i,j);
      else
	D = T.distance(i,j);
      m1[k] += D;
      m2[k] += D*D;
    }
}
예제 #2
0
Partition partition_from_branch(const SequenceTree& T,int b) 
{
  dynamic_bitset<> group(T.n_leaves());
  const dynamic_bitset<>& with_internal = T.partition(b);

  for(int i=0;i<group.size();i++)
    group[i] = with_internal[i];

  return Partition(T.get_leaf_labels(), group);
}
예제 #3
0
/// \brief Re-index the leaves of tree \a T so that the labels have the same ordering as in \a names.
///
/// \param T The leaf-labelled tree.
/// \param names The ordered leaf labels.
///
void remap_T_leaf_indices(SequenceTree& T,const vector<string>& names)
{
  assert(names.size() == T.n_leaves());
  //----- Remap leaf indices for T onto A's leaf sequence indices -----//
  try {
    vector<int> mapping = compute_mapping(T.get_leaf_labels(), names);

    T.standardize(mapping);
  }
  catch(const bad_mapping<string>& b)
  {
    bad_mapping<string> b2 = b;
    b2.clear();
    if (b2.from == 0)
      b2<<"Couldn't find leaf sequence \""<<b2.missing<<"\" in names.";
    else
      b2<<"Sequence '"<<b2.missing<<"' not found in the tree.";
    throw b2;
  }
}
예제 #4
0
int main(int argc,char* argv[]) 
{ 
  try {

    cerr.precision(10);
    cout.precision(10);

    //---------- Parse command line  -------//
    variables_map args = parse_cmd_line(argc,argv);

    //---------- Initialize random seed -----------//
    unsigned long seed = 0;
    if (args.count("seed")) {
      seed = args["seed"].as<unsigned long>();
      myrand_init(seed);
    }
    else
      seed = myrand_init();
    cout<<"random seed = "<<seed<<endl<<endl;
    
    alignment A;
    SequenceTree T;
    load_A_and_T(args,A,T);

    cout<<"Using alphabet: "<<A.get_alphabet().name<<endl<<endl;

    //------------- Show Similarity/Distances between sequences ---------//
    cout.precision(3);

    cout<<"conserved = \n";
    print_entire(cout,T.get_leaf_labels(),getConserved(A))<<"\n";

    Matrix S = getSimilarity(A);

    cout<<"%similarity = \n";
    print_lower(cout,T.get_leaf_labels(),S)<<"\n";

    owned_ptr<substitution::MultiModel> smodel_in = get_smodel(args,A);
    set_parameters(*smodel_in,args);
    cout<<"Using substitution model: "<<smodel_in->name()<<endl;
    smodel_in->set_rate(1);
    show_parameters(cout,*smodel_in);
    cout<<endl;

    cout<<"input T = "<<T<<endl;
    cout<<endl;


    //----- Prior & Posterior Rate Distributions (rate-bin probabilities) -------- //
    analyze_rates(A,T,*smodel_in);

    //------- Estimate branch lengths -------------//
    owned_ptr<substitution::MultiModel> smodel_est = smodel_in;
    SequenceTree T2 = T;

    if (args.count("search")) {

      vector<int> parameters;
      if (args["search"].as<string>() == "smodel")
	for(int i=0;i<smodel_est->n_parameters();i++)
	  if (not smodel_est->is_fixed(i))
	    parameters.push_back(i);
      
      estimate_tree(A,T2,*smodel_est,parameters);
    

      cout<<"E T = "<<T2<<endl;
      show_parameters(cout,*smodel_est);
      cout<<endl<<endl;

      analyze_rates(A,T2,*smodel_est);
    }

    //------- Set up function to maximize --------//
    Matrix S1 = getSimilarity(T,*smodel_in);
    Matrix S2 = getSimilarity(T2,*smodel_est);

    Matrix D = C(S);
    cout<<"%difference (actual) = \n";
    print_lower(cout,T.get_leaf_labels(),D)<<"\n";
    cout<<"%difference (input) = \n";
    print_lower(cout,T.get_leaf_labels(),C(S1))<<"\n";
    if (args.count("search")){
      cout<<"%difference (estimated) = \n";
      print_lower(cout,T.get_leaf_labels(),C(S2))<<"\n\n";
    }

    cout<<"tree distances (input) = \n";
    print_lower(cout,T.get_leaf_labels(),DistanceMatrix(T))<<"\n";
    if (args.count("search")){
      cout<<"tree distances (estimated) = \n";
      print_lower(cout,T.get_leaf_labels(),DistanceMatrix(T2))<<"\n\n";
    }

  }
  catch (std::exception& e) {
    cerr<<"analyze_distances: Error! "<<e.what()<<endl;
    exit(1);
  }
  return 0;

}