Exemple #1
0
int16_t main(uint16_t argc, char **argv) {
	if(argc > 4)
		return(_usage(argv, FAILURE));
	setup();
	mpz_set_ui(len1, DEFAULT);
	mpz_set_ui(len2, DEFAULT);
	mpz_set_ui(pair, DEFAULT);
	switch(argc){
		case 4:
			if(mpz_set_str(len1, *(argv+2), 10) == -1 || mpz_set_str(len2, *(argv+3), 10) == -1)
				return _usage(argv, FAILURE);
		case 3:
			if(mpz_set_str(len1, *(argv+2), 10) == -1)
				return _usage(argv, FAILURE);
			if(argc == 3)
				if(mpz_set_str(len2, *(argv+2), 10 == -1))
					return _usage(argv, FAILURE);
		case 2:
			if(mpz_set_str(pair, *(argv+1), 10) == -1)
				return _usage(argv, FAILURE);
	}
	assert(mpz_sgn(len1) > 0);
	assert(mpz_sgn(len2) > 0);
	assert(mpz_sgn(pair) > 0);
	gmp_printf("Generating %Zd relatively prime pairs of lengths %Zd and %Zd:\n\n", pair, len1, len2);
	setlimits();
	setseed();
	while(mpz_sgn(pair)){
		genpair();
		if(!impossible){
			gmp_printf("Pair Found:  %Zd\t%Zd\n", num1, num2);
			mpz_sub_ui(pair, pair, 1);
		}
	}
}
/* uvec_randomize - randomizes the vector  - will be deterministic if
 * use the same seed, However, if seed<=0 then will "randomly"
 * choose one.
 */
int uvec_randomize(uvec const *uv, int seed) {
	char *swap;
	char **vec, **ptr1, **ptr2;
	int num, i;

	if (!uvec_exists(uv)) {
		return -1;
	}
	if (seed <=0) {
		seed = (int) time(NULL);
	}
	if (seed < 0) {
		return -2;
	}
	setseed(seed);
	num = uvec_number(uv);
	vec = uvec_vector(uv);
	/* make 3*number of swaps */
	for (i = 0; i < 3*num; ++i) {
		ptr1 = vec + (int) (num*urand());
		ptr2 = vec + (int) (num*urand());
		swap = *ptr1;
		*ptr1 = *ptr2;
		*ptr2 = swap;
	}
	return 0;
}
Exemple #3
0
int main( int argc, char* argv[] ) {
	long long int n = strtoll( argv[1], NULL, 0 );
	uint64_t x = 0;
	
	setseed(0);
	
	int64_t start = getusertime();
	for( long long int i = n; i-- != 0; ) {
		x ^= jnext();
		x ^= jnext();
	}
	int64_t elapsed = getusertime() - start;
	const double s = elapsed / 1E6;
	if ( x == 0 ) putchar(0);
	printf( "%f s, %.02f queries/s, %.02f ns/query\n", s, n / s, 1E9 * s / n );
	return 0;
}
Exemple #4
0
main(int argc, char *argv[]) {

  int nline, ndat, i, j, mid, l95, u95, k, nmiss=0;
  long seed = -setseed();
  double **dat, av, *arr;
  double drate[4];
  double *locs;
  int ns;
  double tlseq;
  char lc;
  int loc_file = 0;
  char fname[MAXNAME+1];
  char prefix[MAXNAME+1];
  FILE *ifp, *ofp;
  FILE *loc=NULL;

  char *in_str;
  int ask_questions = 1;

  print_help(argc, argv);
  strcpy(prefix, "");
  for(i = 0; i < argc; i++)
	{
		if(*argv[i] == '-')
		{ 
			ask_questions = 0;
			in_str = argv[i];
			if(strcmp(in_str, "-input") == 0) ifp = fopen(argv[i+1], "r");
			if(strcmp(in_str, "-burn") == 0) nmiss = atoi(argv[i+1]);
			if(strcmp(in_str, "-loc") == 0) {loc = fopen(argv[i+1], "r"); loc_file = 1;}
			if(strcmp(in_str, "-prefix") == 0) strcpy(prefix, argv[i+1]);
		}
	}
 
  if((ifp == NULL) || (ask_questions == 1)) 
  {
	  printf("\nCould not find input file in command line\n");
	  printf("\nInput file name:");
	  scanf("%s", fname);
	  ifp = fopen(fname, "r");
  }
  if (!ifp) nrerror("Cannot open file");
  idum = &seed;
  
  	if (loc != NULL)
  	{
		fscanf(loc, "%i %lf %c", &ns, &tlseq, &lc);
	  	locs = dvector(0, ns);
	  	locs[0] = -1.0;

		for (i=1; i<=ns; i++) 
		{
			fscanf(loc, "%lf", &locs[i]); 
			if (i>1 && locs[i]<=locs[i-1]) nrerror("Error in locs file: SNPs must be monotonically increasing");
		}
		fclose(loc);
	}

  if (ask_questions) 
  {
	  printf("\nNumber of burn-in samples:");
	  scanf("%i", &nmiss);
  }

  if (nmiss<0) nmiss=0;

  fscanf(ifp,"%i %i", &nline, &ndat);
  if (nline < 2) nrerror("Too few points for analysis");
  dat = dmatrix(1,nline,1,ndat);
  if (nmiss>nline) nrerror("Miss more than have data points!!");

  printf("\n\nReading data: %i lines (miss first %i) of %i points......",nline,nmiss,ndat);

  for (i=1;i<=nline;i++) for (j=1;j<=ndat;j++) {
    fscanf(ifp,"%lf", &dat[i][j]);
    if (feof(ifp)) nrerror("Reached end of file unexpectedly");
  }

  printf("...Data read successfully\n\n");
  fclose(ifp);
  
  if ((loc_file != 0) && (ndat != ns))
  {
  	printf("Loc file does not match input file. Ignoring\n\n");
  	loc_file = 0;
  }

  arr = dvector(1,nline-nmiss);
  strcpy(fname, prefix);
  ofp = fopen(strcat(fname, "res.txt"), "w");
  mid = (int) ((double) (nline-nmiss)/2);
  l95 = (int) ((double) (nline-nmiss)*0.025+1.0);
  u95 = (int) ((double) (nline-nmiss)*0.975);

  printf("\n\nMid = %i, L95=%i, U95=%i\n\n", mid, l95, u95);


  fprintf(ofp,"Loci\tMean_rho\tMedian\tL95\tU95");
  for (i=1;i<=ndat;i++) {
    printf("."); if (!i%50) printf("\n");
    for (j=1+nmiss,av=0;j<=nline;j++) {
      av+=dat[j][i];
      arr[j-nmiss]=dat[j][i];
    }
    av/=(double) (nline-nmiss);
    /*    printf("Initial  array: "); for (k=1;k<=nline-nmiss;k++) printf(" %8.5f",arr[k-nmiss]);*/
    sort_farray(arr,nline-nmiss);
    /*    printf("\nSorted array: "); for (k=1;k<=nline-nmiss;k++) printf(" %8.5f",arr[k-nmiss]);
    printf("\n\n");
    */

	if (loc_file == 0)
	    fprintf(ofp,"\n%6i\t%10.5f\t%10.5f\t%10.5f\t%10.5f",i-1,av,arr[mid],arr[l95],arr[u95]);
	else
	    fprintf(ofp,"\n%10.3f\t%10.5f\t%10.5f\t%10.5f\t%10.5f",locs[i-1],av,arr[mid],arr[l95],arr[u95]);
  }

  /*Count total rate jumped over region*/
  for (i=1+nmiss, drate[0]=drate[2]=0.0;i<=nline;i++) {
    for (j=2,drate[1]=drate[3]=0.0;j<ndat-1;j++) {
      drate[1]+=(double) fabs(dat[i][j+1]-dat[i][j]); 
      if (dat[i][j+1]!=dat[i][j]) drate[3]++;
    }
    drate[0]+=drate[1];
    drate[2]+=drate[3];
  }
  drate[0]/=(double) nline-nmiss;
  drate[2]/=(double) nline-nmiss;
  printf("\n\nAverage total change in rate = %.3f\nAverage total # changes = %.3f\n\n", drate[0],drate[2]);
  
  if (loc_file != 0)
  	free_dvector(locs, 0, ns);

  fclose(ofp);
}
Exemple #5
0
main (int argc, char *argv[]) {

	int i, j, **seqs, **nall, ord=1, ns, **pij, lkf=0, npt=0, pnew=0, anc=0;
	int tcat=1, rcat=0, verb=1, miss=0, *flocs;

	int sw_flag=0, moment_flag=0, rmin_flag=0, sim_flag=0, test_flag=0;
	char fname[MAXNAME+1], **seqnames;
	long seed=-setseed();
	extern int sizeofpset;
	double *locs;

	double **lkmat, *lkres;
	FILE *ifp=NULL, *ifp2=NULL, *ifp3=NULL, *tfp;
	struct site_type **pset;
	struct data_sum *data;
	int ask_questions = 1;
	char *in_str;

	print_help(argc, argv);
	idum = &seed;
	data = malloc((size_t) sizeof(struct data_sum));
	data->exact = 0;
	strcpy(data->prefix, "");

	for(i = 0; i < argc; i++)
	{
		if(*argv[i] == '-')
		{ 
			in_str = argv[i];
			ask_questions = 0;
			if(strcmp(in_str, "-seq") == 0) ifp = fopen(argv[i+1], "r");		
			if(strcmp(in_str, "-loc") == 0) ifp2 = fopen(argv[i+1], "r");
			if(strcmp(in_str, "-lk") == 0) 
			{
				lkf = 1;
				ifp3 = fopen(argv[i+1], "r");
			}
			if(strcmp(in_str, "-exact") == 0) data->exact = 1;
			if(strcmp(in_str, "-concise") == 0) verb=0;
			if(strcmp(in_str, "-window") == 0) sw_flag=1;
			if(strcmp(in_str, "-moment") == 0) moment_flag=1;
			if(strcmp(in_str, "-simulate") == 0) sim_flag=1;
			if(strcmp(in_str, "-rmin_flag") == 0) rmin_flag=2;
			if(strcmp(in_str, "-test") == 0) test_flag=1;
			if(strcmp(in_str, "-prefix") == 0) strcpy(data->prefix, argv[i+1]);
		}
	}
	if (ifp == NULL) 
	{
		printf("\nCould not find seqs file in command line.\n");
		printf("\nInput filename for seqs:\n");
		scanf("%s", &fname);
		ifp = fopen(fname, "r");
	}
	if (ifp == NULL) nrerror("Error in opening sequence file");

	
	fscanf(ifp,"%i%i%i", &data->nseq, &data->lseq, &data->hd);
	if ((data->nseq < 2) || (data->lseq < 2)) {printf("\n\nInsufficient data for analysis (n > 1, L > 1) \n\n"); exit(1);}
	if (data->nseq > SEQ_MAX) {printf("\n\nMore than max no. sequences: Using first %i for analysis\n\n", SEQ_MAX); data->nseq=SEQ_MAX;}
	printf("\nAnalysing %i (n=%i) sequences of length %i seg sites\n", data->nseq, data->hd, data->lseq);
	seqs = imatrix(1, data->nseq, 1, data->lseq);
    seqnames = cmatrix(1, data->nseq+11, 1, MAXNAME+11);
	if (read_fasta(seqs, ifp, data->nseq, data->lseq, seqnames)) printf("\nSequences read succesfully\n");
    fclose(ifp);

	nall = imatrix(1, data->lseq, 1, 6);
	allele_count(seqs, data->nseq, data->lseq, nall,1, data->hd, data->prefix);

	/*Store lnfac values in array for speed of computation*/

	lnfac_array = (double *) malloc((size_t) ((int) (data->nseq+2)*(data->hd))*sizeof(double));

	lnfac_array[0]=lnfac_array[1]=0;

	for (j=2;j<=((int) data->nseq*(data->hd));j++) lnfac_array[j]=(double) lnfac_array[j-1]+log(j);


	/*Open file with location of seg sites and read in data*/	
	if (ifp2 == NULL) 
	{
		printf("\nCould not find locs file in command line.\n");
		printf("\nInput name of file containing location of seg sites\n\n");
		scanf("%s", &fname);
		ifp2 = fopen(fname, "r");
	}

	if (ifp2 == NULL) nrerror("Cannot open loc file");
	fscanf(ifp2, "%i %lf %c", &ns, &data->tlseq, &data->lc);
	if (ns != data->lseq) nrerror("Lseq and Locs disagree");
	if ((data->lc != 'C')&&(data->lc != 'L')) nrerror("Must input linear(L)/conversion(C)");
	if (data->lc == 'C') {
	  data->avc=0;
	  while (data->avc <= 0) {
	    printf("\n\nInput average tract length for conversion model: ");scanf("%lf", &(data->avc));
	  }
	}

	locs = dvector(1, data->lseq);
	flocs = ivector(1, data->lseq); /*Array to use when simulating data*/


	for (i=1; i<=data->lseq; i++) {
		fscanf(ifp2, "%lf", &locs[i]); 
		if ((locs[i]==0)||(locs[i]>data->tlseq)) {printf("\n\nError in Loc file\n\n%lf\n", data->tlseq); exit(1);}
		if (i>1 && locs[i]<=locs[i-1]) nrerror("Error in locs file: SNPs must be montonically increasing");
	}
	printf("\nLocation of seg sites\n\n");
	for (i=1; i<=data->lseq; i++) printf("%3i   %4.2lf\n", i, locs[i]);
	fclose(ifp2);

	/*Read in likelihood file where needed*/
    if (ask_questions) 
	{
			printf("\n\nUse existing likelihood file? (yes=1, no=0):");
			scanf("%i", &lkf);  /*lkf is a flag: 1 means use existing likelihood file as starting point*/
			if (lkf) 
			{
				printf("\n\nInput name of likelihood file: ");
				scanf("%s", &fname);
				ifp3 = fopen(fname, "r");
			}
			else 
				data->exact=0;

			if (lkf == 1)
			{
				printf("\n\nIs likelihood file an exact match to data?(no=0/yes=1): ");
				scanf("%i", &data->exact);
			}
	}

	if (lkf && !ifp3) nrerror("Cannot open likelihood file");
	if (!lkf && data->hd==2) nrerror("For diploid data need complete lookup table for sequences");

	/*Store pair-types in pij matrix - classify in pair_spectrum routine*/

	data->w	= data->lseq;  /*Note for this program use all data - pair_int restricts to a smaller window*/
	pij = imatrix((int) 1,(int) data->lseq,(int) 1,(int) data->w);

	for (i=1;i<=data->lseq;i++) for (j=1;j<=data->w;j++) pij[i][j]=0;

	pset = init_pset(pset, lkf, ifp3, &npt, data);  /*Reads in type configurations from likelihood file*/

	printf("\n\n*** Calculating distribution of pair types ***\n\n");
	pset = pair_spectrum(seqs, data, nall, pset, &npt, &pnew, &miss, anc, pij);
	printf("\n\n *** Completed classification of pair types ***\n\n");

	if (data->exact && (pnew || miss)) nrerror("Lookup table is not exact for sequences\n(possibly generated by interval)");
	printf("\n\nOld = %i: New = %i: Missing = %i\n\n", npt,pnew,miss);
	data->ptt = (int) npt+pnew+miss;  /*npt is number from likelihood file, pnew is number new with no missing data, miss is # new with missing data*/
	if (verb) {
		strcpy(fname, data->prefix);
		tfp = fopen(strcat(fname, "type_table.txt"), "w");
		if (!tfp) nrerror("Cannot open type file");
		type_print(pij, data->lseq, data->w,tfp);
		fclose(tfp);
	}
	if (verb) print_pairs(stdout, pset, npt+pnew, data->hd, data->nseq);

	/*Need a complete set for missing data or diploid data - check this*/
	if (!data->exact && (data->hd ==2 || miss)) {
		printf("\n\nMissing data or diploid: checking that likelihood table is exhaustive\n\n");
		check_exhaustive(pset,npt,(data->nseq)*((int) data->hd));
	}
	/*Read parameters and likelihoods from likelihood file - where appropriate*/
	if (lkf) {
		read_pars(ifp3, &tcat, &data->th, &data->rcat, &data->rmax);
		lkmat = dmatrix(1,npt+pnew+miss,1,data->rcat);
		if (lkf) read_lk(ifp3, lkmat, npt, tcat, data->rcat);
	}

	/*If haploid, but novel types, need to calculate new likelihoods and input parameter values*/
	if (data->hd ==1 && pnew) { /*Note can have pnew for diploid data, but this has been checked for already*/
		if (!lkf) {
			data->th=data->rmax=-1.0; data->rcat=0;
			printf("\n\nInput theta per site (suggest Watterson estimate of %.5lf):",(double) data->lseq/(watterson(data->nseq*data->hd)*data->tlseq));
			while (data->th<0.0) scanf("%lf", &data->th);
			printf("\n\nMax 4Ner for grid (suggest 100):");
			while(data->rmax<0.0) scanf("%lf", &data->rmax);
			printf("\n\nNumber of points on grid (suggest 101, min=2):");
			while(data->rcat<2) scanf("%i", &data->rcat);
			lkmat = dmatrix(1,npt+pnew+miss,1,data->rcat);
		}
		lk_est(pset,npt,pnew,lkmat,data->th,data->rcat,data->rmax);
		data->exact=1;
	}

	/*Sum over missing data or resolve genotypes and sum over missing data+configurations*/
	else if (miss && data->hd==1) {  
		printf("\n\n*** Calculating likelihoods for missing data ***\n\n");
		for (i=1;i<=miss;i++) {
			lk_miss(pset[npt+i],lkmat[npt+i],lkmat,data);
			printf("\rType %i", i);
		}

		printf("  ...Done!\n\n");
	}


	/*Sum over resolutions for diploid data*/
	else if (data->hd==2 && !data->exact) {
	  printf("\n\n*** Resolving diploid data: %i ***\n\n",pnew+miss);
	  lkres = dvector(1,data->rcat);
	  for (i=1;i<=pnew+miss;i++) {
	    lk_resolve(lkres,pset[npt+i],lkmat[npt+i],lkmat,data);
	    printf("\rType %i", i); 
	  }
	  free_dvector(lkres,1,data->rcat); 

	  printf("  ...Done!\n\n");
	}

	/*If new likelihood generated can output likelihood file for future analyses*/
	if (verb) print_lks(pset, data, npt+pnew+miss, lkmat);


	/*Basic analysis - estimation of 4Ner asuming constant rate*/

	data->rme=data->rmax; data->rce=data->rcat;
	if (1) {
		printf("\n\nDo you wish to change grid over which to estimate likelihoods for (default = %i points, 4Ner 0 - %.1lf) (1/0) :",data->rcat,data->rmax);
		scanf("%i", &lkf);
		if (lkf) {
			data->rme=-10; data->rce=0;
			printf("\n\nMax 4Ner for estimation           : ");
			while (data->rme < 0.0) scanf("%lf", &data->rme);  
       		printf("\n\nNumber of classes to estimate for: ");
       		while (data->rce < 1) scanf("%i", &data->rce);
		}
	}
	data->lksurf = dmatrix(1,data->rce,1,2);
	lk_surf(pset, pij, data, lkmat, data->th, locs, 1);


	/*Print marginal likelihood ratio test statistics for each pair of sites*/
	printf("\n\nCalculating fits\n\n");
	fit_pwlk(data,pij,locs,lkmat,verb);

	/*Sliding windows version*/
	if (1) {
		printf("\n\nDo you wish to carry out a sliding windows analysis? (yes=1/no=0):");
		scanf("%i", &sw_flag);
	}
	if (sw_flag) lk_win(pset,pij,data,lkmat,locs,nall);

	/*Nonparametric estimation of recombination rate*/
	if (1) {
		printf("\n\nPrint out table of Rmin values?\n(0=No, 1=Total only, 2=Full table):");
		scanf("%i", &rmin_flag);
	}

	if (rmin_flag) {
		rmin(data, pset, pij, locs, lkf-1);
		printf("\n\nLower bound on Rmin = %i\n\n",data->rmin);
	}

	/*Estimate 4Ner by Wakeley 1997 method*/
	if (1) {
		printf("\n\nEstimate 4Ner by moment method? (yes=1, no=0)");
		scanf("%i", &moment_flag);
	}

	if (moment_flag) wakeley_est(data, seqs, locs);

	/*Recombination tests - only available for haploid data!*/
	if (data->hd==1) {
		if (1) {
			printf("\n\nDo you wish to test for recombination? (yes=1, no=0): ");
			scanf("%i", &test_flag);
		}
		if (test_flag) {
			rec_test(data, pij, locs, lkmat, pset, npt+pnew+miss);
		}
	}

	/*Conditional simulation - only available for haploid data with a complete lk file*/
	if (data->hd==1 && !(data->exact)) {

		if (1) {
	  printf("\n\nDo you wish to test constant-rate model and estimate sampling distribution by simulation? (yes=1/no=0): ");
	  scanf("%i", &test_flag);
		}
	  if (test_flag) {
	    freq_min(locs, flocs, nall, data);
	    printf("\n\nHow many simulations? ");
	    scanf("%i", &lkf);
	    snp_sim(locs, flocs, pset, lkmat, lkf, data);
	  }
	}

	free_imatrix(pij,1,data->lseq,1,data->w);
	free_imatrix(seqs,1,data->nseq,1,data->lseq);
	free_imatrix(nall,1,data->lseq,1,5);
	for (i=1;i<sizeofpset;i++) free(pset[i]);
	free(pset);
	free(data);
	free_dvector(locs, 1, data->lseq);
	free_ivector(flocs, 1, data->lseq);

	/* system("PAUSE"); */
}