コード例 #1
0
ファイル: qgram.c プロジェクト: richierocks/stringdist
static void get_counts( qtree *Q, int q, int *qgrams, int nLoc, int *index, double *count ){
  if ( Q == NULL ) return ;
  memcpy(qgrams + q*index[0], Q->qgram, sizeof(int) * q);
  memcpy(count + nLoc*index[0], Q->n, sizeof(double) * nLoc);
  ++index[0];
  get_counts(Q->left, q, qgrams, nLoc, index, count);
  get_counts(Q->right,q, qgrams, nLoc, index, count);
}
コード例 #2
0
ファイル: DM_Server.cpp プロジェクト: azybler/Yahoo_LDA
bool DM_Server::remove(const string& word, string& counts, const Ice::Current&) {
    word_mutex_t::scoped_lock lock(*(_tc_table.get_lock(word)), false);
    if (!get_counts(word, counts))
        return false;
    _tc_table.erase(word);
    return true;
}
コード例 #3
0
ファイル: p111.c プロジェクト: mchalek/euler
int main(void)
{
    // first compute all 10-digit primes
    long *p, np;
    primes(PMAX, &p, &np);
    
    int max_counts[10];
    memset(max_counts, 0, sizeof(max_counts));

    uint64_t max_sums[10];
    memset(max_sums, 0, sizeof(max_sums));

    int counts[10];

    long i;
    for(i = 0; i < np; i++) {
        if(p[i] < PMIN)
            continue;

        if(p[i] > PMAX)
            break;

        get_counts(p[i], counts);
        update_maxes(p[i], counts, max_counts, max_sums);
    }

    uint64_t result = 0;
    for(i = 0; i < 10; i++)
        result += max_sums[i];

    printf("result: %ld\n", result);

    return 0;
}
コード例 #4
0
ファイル: procd_ps.c プロジェクト: aunali1/exopc
static void ps_print_level(int level, proc_p p) {
  /* pid ppid envid pgrp session   xstat stat flag nxch */
#define MAXLEVEL 4
  int i;
  int epilogue_count, yield_count;
#if 0
  char state[] = "XIRSHZ";
#endif
  if (level == -1) {
    printf("PID ");
    for (i = 0; i < MAXLEVEL; i++) printf("  ");

#if 0    
    printf("PPID  PG SES "); 
    printf("sta xst flag  nx ");
    printf("ticks ctxcnt eip      name\n");
#else
    printf ("    TICKS\tCTXCNT\tEIP\t\tNAME\t\tEID\n\n");
#endif
    return;
  }
  if (level > MAXLEVEL) level = MAXLEVEL;
  for (i = 0; i < level; i++) printf("  ");

  if (p == NULL) {printf(" null proc entry\n");return;}

  printf("%-3d",p->p_pid);
  for (i = level; i <= MAXLEVEL; i++) printf("  ");
#if 0
  printf("%3d ",(p->p_pptr) ? p->p_pptr->p_pid : -1);
  printf("%3d ",(p->p_pgrp) ? p->p_pgrp->pg_id : -1);
  printf("%3d ",(p->p_session->s_leader) ? p->p_session->s_leader->p_pid : -1);
  printf("%c %3d %4s %3d ",
	   state[p->p_stat], p->p_xstat, ps_get_flags(p->p_flag), p->nxchildren);
#endif
  epilogue_count = 0;
  yield_count = 0;
  get_counts(p->envid, &epilogue_count, &yield_count);
  printf("\t%5d\t%6d\t%08x\t%.10s",
	 __envs[envidx(p->envid)].env_ticks,
	 __envs[envidx(p->envid)].env_ctxcnt, /*  epilogue_count, */
	 (p->envid == __envid || p->p_stat == SZOMB) ? 0 : get_eip(p->envid),
	 ps_get_comm(p->envid,1));
  if (strlen (ps_get_comm (p->envid,1)) < 8)
    printf ("\t\t");
  else
    printf ("\t");
  printf ("%d\n", p->envid);
  for (p = p->p_children.lh_first; p != 0; p = p->p_sibling.le_next) {
    ps_print_level(level+1,p);
  }

}
コード例 #5
0
void parameter_cloud(Tree &T, Model &Mod, long Nrep, long length, double eps, Parameters &Parsim){

  long iter;

  float likel;


  Parameters Par;
  Alignment align;
  Counts data;

  double eps_pseudo = 0.001;     // Amount added to compute the pseudo-counts.

  // Initialize the parameters for simulation of K81 data for testing
  Par = create_parameters(T);

  // Obtaining the distribution of estimated parameters with EM

  std::ofstream estpar;
  estpar.open("est-par.dat", std::ios::out);
  estpar.precision(15);

  std::vector<double> param;
  for (iter=0; iter < Nrep; iter++) {
    random_data(T, Mod, Parsim, length, align);
    get_counts(align, data);
    add_pseudocounts(eps_pseudo, data);

    // Runs EM
    std::tie(likel, iter)= EMalgorithm(T, Mod, Par, data, eps);

    // Choses the best permutation.
    guess_permutation(T, Mod, Par);

    get_free_param_vector(T, Mod, Par, param);

    for (unsigned long k=0; k < param.size(); k++) {
      estpar << param[k] << "  ";
    }
    estpar << std::endl;
  }

}
コード例 #6
0
void parameter_test(Tree &T, Model &Mod, long Nrep, long length, double eps, std::vector<double> &pvals, std::string data_prefix, bool save_mc_exact){

  long iter;
  long i, r;

  double df, C;
  double distance, KL;
	KL=0;
	distance=0;
  double likel;


  Parameters Parsim, Par, Par_noperm;
  Alignment align;
  Counts data;

  double eps_pseudo = 0.001;     // Amount added to compute the pseudo-counts.

  StateList sl;

  bool save_data = (data_prefix != "");


  std::string output_filename;
  std::stringstream output_index;
  std::ofstream logfile;
  std::ofstream logdistfile;

  std::ofstream out_chi2;
  std::ofstream out_br;
  std::ofstream out_brPerc;

  std::ofstream out_pvals;
  std::ofstream out_pvals_noperm;
  std::ofstream out_qvals;
  std::ofstream out_bound;
  std::ofstream out_variances;
  std::ofstream out_qvalsComb;
  std::ofstream out_qvalsCombzscore;
  std::ofstream out_covmatrix;

  std::ofstream out_parest;
  std::ofstream out_parsim;

  std::vector<double> KLe;
  std::vector<std::vector<double> > chi2_array; // an array of chi2 for every edge.
  std::vector<std::vector<double> > mult_array; // an array of mult for every edge.
  std::vector<std::vector<double> > br_array; // an array of br. length for every edge.
  std::vector<std::vector<double> > br_arrayPerc; // an array of br. length for every edge.

  std::vector<std::vector<double> > cota_array; // an array of upper bounds of the diff in lengths for every edge.
  std::vector<std::vector<double> > pval_array; // an array of pvals for every edge.
  std::vector<std::vector<double> > pval_noperm_array;
  std::vector<std::vector<double> > qval_array; // an array of qvalues for every edge.
  std::vector<std::vector<double> > variances_array; // an array of theoretical variances.
  std::vector<std::vector<double> > parest_array; // array of estimated parameters
  std::vector<std::vector<double> > parsim_array; // array of simulation parameters

	//  ci_binom ci_bin; // condfidence interval
  std::vector<std::vector<ci_binom> > CIbinomial ; //  	vector of CIs

  std::list<long> produced_nan;

  long npars = T.nedges*Mod.df + Mod.rdf;

  // Initializing pvals
  pvals.resize(T.nedges);

  // Initialize the parameters for simulation of K81 data for testing
  Par = create_parameters(T);
  Parsim = create_parameters(T);

  // Initializing data structures
  KLe.resize(T.nedges);

	pval_array.resize(T.nedges);
        pval_noperm_array.resize(T.nedges);
        qval_array.resize(T.nedges);
	chi2_array.resize(T.nedges);
	mult_array.resize(T.nedges);
	br_array.resize(T.nedges);
        br_arrayPerc.resize(T.nedges);
	cota_array.resize(T.nedges);
        variances_array.resize(npars);
        parest_array.resize(npars);
        parsim_array.resize(npars);

	// initialize to 0's
  for (i=0; i < T.nedges; i++) {
      pval_array[i].resize(Nrep, 0);
      pval_noperm_array[i].resize(Nrep, 0);
      qval_array[i].resize(Nrep, 0);
          chi2_array[i].resize(Nrep, 0);
	  mult_array[i].resize(Nrep, 0);
	  br_array[i].resize(Nrep, 0);
          br_arrayPerc[i].resize(Nrep, 0);
	  cota_array[i].resize(Nrep, 0);
  }

  for(i=0; i < npars; i++) {
    variances_array[i].resize(Nrep, 0);
    parest_array[i].resize(Nrep, 0);
    parsim_array[i].resize(Nrep, 0);
  }

  // Information about the chi^2.
  df = Mod.df;
  C = get_scale_constant(Mod);


  if (save_data) {
    logfile.open((data_prefix + ".log").c_str(), std::ios::out);
    logfile << "model:  " << Mod.name << std::endl;
    logfile << "length: " << length << std::endl;
    logfile << "eps:    " << eps << std::endl;
    logfile << "nalpha: " << T.nalpha << std::endl;
    logfile << "leaves: " << T.nleaves << std::endl;
    logfile << "tree:   " << T.tree_name << std::endl;
    logfile << std::endl;
    logdistfile.open((data_prefix + ".dist.log").c_str(), std::ios::out);

    out_chi2.open(("out_chi2-" + data_prefix + ".txt").c_str(), std::ios::out);
    out_br.open(("out_br-" + data_prefix + ".txt").c_str(), std::ios::out);
    out_brPerc.open(("out_brPerc-" + data_prefix + ".txt").c_str(), std::ios::out);
    out_pvals.open(("out_pvals-" + data_prefix + ".txt").c_str(), std::ios::out);
    out_pvals_noperm.open(("out_pvals_noperm-" + data_prefix + ".txt").c_str(), std::ios::out);
    out_qvals.open(("out_qvals-" + data_prefix + ".txt").c_str(), std::ios::out);
    out_variances.open(("out_variances-" + data_prefix + ".txt").c_str(), std::ios::out);
    out_parest.open(("out_params-est-" + data_prefix + ".txt").c_str(), std::ios::out);
    out_parsim.open(("out_params-sim-" + data_prefix + ".txt").c_str(), std::ios::out);
    out_bound.open(("out_bound-" + data_prefix + ".txt").c_str(), std::ios::out);
    out_qvalsComb.open(("out_qvalsComb-" + data_prefix + ".txt").c_str(), std::ios::out);
    out_qvalsCombzscore.open(("out_qvalsCombzscore-" + data_prefix + ".txt").c_str(), std::ios::out);

    out_parsim.precision(15);
    out_parest.precision(15);
    out_variances.precision(15);
  }


	// uncomment the 2 following lines if want to fix the parameters
	// random_parameters_length(T, Mod, Parsim);
	 //random_data(T, Mod, Parsim, length, align);

  for (iter=0; iter < Nrep; iter++) {
    std::cout << "iteration: " << iter << "             \n";

    // Produces an alignment from random parameters
    random_parameters_length(T, Mod, Parsim);


    random_data(T, Mod, Parsim, length, align);
    get_counts(align, data);
    add_pseudocounts(eps_pseudo, data);

    // Saving data
    if (save_data) {
      output_index.str("");
      output_index << iter;
      output_filename = data_prefix + "-" + output_index.str();
      save_alignment(align, output_filename + ".fa");
      save_parameters(Parsim, output_filename + ".sim.dat");
    }

    // Runs the EM
    std::tie(likel, iter) = EMalgorithm(T, Mod, Par, data, eps);

    // If algorithm returns NaN skip this iteration.
    if (boost::math::isnan(likel)) {
      produced_nan.push_back(iter);
      continue;
    }
    copy_parameters(Par, Par_noperm);

    // Chooses the best permutation.
    guess_permutation(T, Mod, Par);

    distance = parameters_distance(Parsim, Par);

      // estimated counts: Par ; original: Parsim
      std::vector<double> counts_est;
      counts_est.resize(T.nalpha, 0);


      // calculate the cov matrix
      std::vector<std::vector<double> > Cov;
      Array2 Cov_br;

      full_MLE_covariance_matrix(T, Mod, Parsim, length, Cov);

      if(save_data) {
          save_matrix(Cov, output_filename + ".cov.dat");
      }

      // Save the covariances in an array
      std::vector<double> param;
      std::vector<double> param_sim;

      param.resize(npars);
      param_sim.resize(npars);

      get_free_param_vector(T, Mod, Par, param);
      get_free_param_vector(T, Mod, Parsim, param_sim);

      for(i=0; i < npars; i++) {
	variances_array[i][iter] = Cov[i][i];
        parsim_array[i][iter] = param_sim[i];
        parest_array[i][iter] = param[i];
      }

      std::vector<double> xbranca, xbranca_noperm, mubranca;
      double chi2_noperm;
      xbranca.resize(Mod.df);
      xbranca_noperm.resize(Mod.df);
      mubranca.resize(Mod.df);
      for (i=0; i < T.nedges; i++) {
		  r = 0; // row to be fixed
		  // Extracts the covariance matrix, 1 edge

				  branch_inverted_covariance_matrix(Mod, Cov, i, Cov_br);
                  get_branch_free_param_vector(T, Mod, Parsim, i, mubranca);
                  get_branch_free_param_vector(T, Mod, Par, i, xbranca);
                  get_branch_free_param_vector(T, Mod, Par_noperm, i, xbranca_noperm);

			      chi2_array[i][iter] = chi2_mult(mubranca, xbranca, Cov_br);
                  chi2_noperm = chi2_mult(mubranca, xbranca_noperm, Cov_br);


                  pval_array[i][iter] =  pvalue_chi2(chi2_array[i][iter], Mod.df);
                  pval_noperm_array[i][iter] = pvalue_chi2(chi2_noperm, Mod.df);

			      br_array[i][iter] = T.edges[i].br - branch_length(Par.tm[i], T.nalpha);
				  br_arrayPerc[i][iter] = branch_length(Par.tm[i], T.nalpha)/T.edges[i].br;


		// Upper bound on the parameter distance using multinomial:
		//  cota_array[i][iter] = bound_mult(Parsim.tm[i], Xm, length);
	    // and using the  L2 bound
		  cota_array[i][iter] = branch_length_error_bound_mult(Parsim.tm[i], Par.tm[i]);
		  out_br <<  br_array[i][iter]  << " ";
		  out_brPerc <<  br_arrayPerc[i][iter]  << " ";

		  out_bound  <<  cota_array[i][iter] << " ";
		  out_chi2 << chi2_array[i][iter] << " ";

			}
         out_chi2 << std::endl;
		 out_bound  <<  std::endl;
		 out_br << std::endl;
		 out_brPerc << std::endl;




    // Saves more data.
    if (save_data) {
      logfile << iter << ": " << distance << "   " << KL << std::endl;
      save_parameters(Par, output_filename + ".est.dat");

      logdistfile << iter << ": ";
      logdistfile << parameters_distance_root(Par, Parsim) << " ";
      for(int j=0; j < T.nedges; j++) {
        logdistfile << parameters_distance_edge(Par, Parsim, j) << " ";
      }
      logdistfile << std::endl;
    }

} // close iter loop here

  // Correct the p-values
  for(i=0; i < T.nedges; i++) {
       BH(pval_array[i], qval_array[i]);
	//save them
  }

  if (save_mc_exact) {
    for(long iter=0; iter < Nrep; iter++) {
      for(long i=0; i < T.nedges; i++) {
        out_pvals << pval_array[i][iter] << "  ";
        out_pvals_noperm << pval_noperm_array[i][iter] << "  ";
        out_qvals << qval_array[i][iter] << "  ";
      }
      out_pvals  << std::endl;
      out_pvals_noperm << std::endl;
      out_qvals  << std::endl;

      for(long i=0; i < npars; i++) {
        out_variances << variances_array[i][iter] << "  ";
        out_parsim << parsim_array[i][iter] << "  ";
        out_parest << parest_array[i][iter] << "  ";
      }
      out_variances << std::endl;
      out_parsim << std::endl;
      out_parest << std::endl;
    }
  }

	// now combine the pvalues
   for(i=0; i < T.nedges; i++) {
	pvals[i] = Fisher_combined_pvalue(pval_array[i]);
    //using the Zscore it goes like this: pvals[i] = Zscore_combined_pvalue(pval_array[i]);
	if (save_mc_exact) {	   out_qvalsComb <<  pvals[i] << "  " ;
	out_qvalsCombzscore << Zscore_combined_pvalue(pval_array[i]) << " ";
	}
  }

  // Close files
  if (save_data) {
    logdistfile.close();
    logfile.close();
  }

if (save_mc_exact) {
	out_chi2.close();
	out_bound.close();
        out_variances.close();
        out_parest.close();
        out_parsim.close();
	out_br.close();
	out_brPerc.close();

	out_pvals.close();
        out_qvals.close();
	out_qvalsComb.close();
	out_qvalsCombzscore.close();
        out_covmatrix.close();
	}

  // Warn if some EM's produced NaN.
  if (produced_nan.size() > 0) {
    std::cout << std::endl;
    std::cout << "WARNING: Some iterations produced NaN." << std::endl;
    std::list<long>::iterator it;
    for (it = produced_nan.begin(); it != produced_nan.end(); it++) {
      std::cout << *it << ", ";
    }
    std::cout << std::endl;
  }
}
コード例 #7
0
ファイル: qgram.c プロジェクト: richierocks/stringdist
SEXP R_get_qgrams(SEXP a, SEXP qq){
  PROTECT(a);
  PROTECT(qq);

  int q = INTEGER(qq)[0];

  if ( q < 0 ){
    UNPROTECT(2);
    error("q must be a nonnegative integer");
  }


  SEXP strlist;
  int nstr, nchar, nLoc = length(a);
  unsigned int *str;
  
  // set up a tree; push all the qgrams.
  qtree *Q = new_qtree( q, nLoc);
  
  for ( int iLoc = 0; iLoc < nLoc; ++iLoc ){
    strlist = VECTOR_ELT(a, iLoc);
    nstr    = length(strlist);
 
    for ( int i=0; i < nstr; ++i ){
      str   = (unsigned int *) INTEGER(VECTOR_ELT(strlist,i));
      nchar = length(VECTOR_ELT(strlist,i));
      if ( str[0] == NA_INTEGER 
          || q > nchar
          || ( q == 0 && nchar > 0 )
        ){
        continue ;
      }
      Q = push_string(str, nchar, q, Q, iLoc, nLoc);
      if ( Q == NULL ){
        UNPROTECT(2);
        error("could not allocate enough memory");
      }
    }
  }
  // pick and delete the tree

  int nqgram[1] = {0};

  // helper variable for get_counts 
  int index[1] = {0};

  count_qtree(Q,nqgram);  

  SEXP qgrams, qcount;
  PROTECT(qgrams = allocVector(INTSXP, q*nqgram[0]));
  PROTECT(qcount = allocVector(REALSXP, nLoc*nqgram[0]));

  get_counts(Q, q, INTEGER(qgrams), nLoc, index, REAL(qcount));
  
  setAttrib(qcount, install("qgrams"), qgrams);
  
  free_qtree();
  UNPROTECT(4);

  return(qcount);
}
コード例 #8
0
ファイル: DM_Server.cpp プロジェクト: azybler/Yahoo_LDA
void DM_Server::putNget_async(const AMD_DistributedMap_putNgetPtr& png_cb,
        const string& word, const string& delta, const Ice::Current&) {
    IceUtil::Monitor<IceUtil::Mutex>::Lock monitor(_monitor_mutex);
    PNGJobPtr job = new PNGJob(png_cb, word, delta);
    _png_jobs.push_back(job);
    if (_num_consumers >= NUM_CONSUMERS) {
        //cerr << _num_consumers << " are working" << endl;
        int num_jobs_to_respond = _png_jobs.size();
        if (num_jobs_to_respond < QUE_FULL) {
            //cerr << "Queue is not full. Queuing up the job. Que Size: " << num_jobs_to_respond << endl;
            if (_waiting_thrds) { /*cerr << "Waking up waiting consumers" << endl;*/
                _monitor_mutex.notify();
            }
        } else {
            //cerr << "Queue is full. Acting as consumer" << endl;
            num_jobs_to_respond = min(CHUNK, (int) _png_jobs.size());
            vector<PNGJobPtr> my_png_jobs;
            for (int i = 0; i < num_jobs_to_respond; i++) {
                my_png_jobs.push_back(_png_jobs.front());
                _png_jobs.pop_front();
            }
            //cerr << "Cleared " << CHUNK << " jobs to process. Que Size: " << _png_jobs.size() << endl;
            monitor.release();
            for (int i = 0; i < num_jobs_to_respond; i++) {
                PNGJobPtr my_job = my_png_jobs[i];
                word_mutex_t::scoped_lock lock(*(_tc_table.get_lock(
                        my_job->word)), true);
                upd_counts(my_job->word, my_job->delta);
                string counts;
                get_counts(my_job->word, counts);
                lock.release();
                my_job->png_cb->ice_response(counts);
                //cerr << "Sent Response to job " << i << endl;
            }
            my_png_jobs.clear();
        }
    } else {
        //cerr << _num_consumers << " are working" << endl;
        ++_num_consumers;
        vector<PNGJobPtr> my_png_jobs;
        while (true) {
            int num_jobs_to_respond = min(CHUNK, (int) _png_jobs.size());
            for (int i = 0; i < num_jobs_to_respond; i++) {
                my_png_jobs.push_back(_png_jobs.front());
                _png_jobs.pop_front();
            }
            //cerr << "Cleared " << num_jobs_to_respond << " jobs to process. Que Size: " << _png_jobs.size() << endl;
            monitor.release();
            for (int i = 0; i < num_jobs_to_respond; i++) {
                PNGJobPtr my_job = my_png_jobs[i];
                word_mutex_t::scoped_lock lock(*(_tc_table.get_lock(
                        my_job->word)), true);
                upd_counts(my_job->word, my_job->delta);
                string counts;
                get_counts(my_job->word, counts);
                lock.release();
                my_job->png_cb->ice_response(counts);
                //cerr << "Sent Response to job " << i << endl;
            }
            my_png_jobs.clear();
            monitor.acquire();
            while (_png_jobs.empty()) {
                try {
                    ++_waiting_thrds;
                    //cerr << "Queue empty. " << _waiting_thrds << " are waiting including me" << endl;
                    _monitor_mutex.wait();
                    --_waiting_thrds;
                    //cerr << "Woke up, " << _waiting_thrds << " are waiting" << endl;
                } catch (const Ice::Exception& ex) {
                    --_waiting_thrds;
                    throw ex;
                }
            }
        }
    }
}
コード例 #9
0
ファイル: DM_Server.cpp プロジェクト: azybler/Yahoo_LDA
bool DM_Server::get(const string& word, string& counts, const Ice::Current&) {
    word_mutex_t::scoped_lock lock(*(_tc_table.get_lock(word)), false);
    return get_counts(word, counts);
}