Exemplo n.º 1
0
int main(int argc, char *argv[])
{
	int i;
	pbt_high_level_vars *HLV = (pbt_high_level_vars *)malloc(sizeof(pbt_high_level_vars));
	FB_Vars  *CPFB;  /* "Compat Pairs FB_vars"  this is going to be heavily re-used space, each time we are dealing with
									an offspring and all of his non-excluded parent pairs.  It will contain 
									a Colls array that is linear with the Specified pedigrees.  */
	 
	
	
	
	HLV->PBUO = GetPBT_UserOpts(argc, argv);
	
	
	/* open up a file stream for the basic summary data  */
	if( (HLV->BasicSummaries_File = fopen("snppit_output_BasicDataSummary.txt","w"))==NULL) {
		fprintf(stderr,"Error! Failed to open file snppit_output_BasicDataSummary.txt to write to it.  You may have it open and locked in another application. Exiting...\n");
		exit(1);
	}
	printf("\n\n");
	
	
	HLV->PFR = FirstPassThroughData(HLV->PBUO->DataFileName);
	
	#ifdef VERBOSE_PRINT_FIRST_PASS_SUMMARY
		PrintFirstPassSummaryOfPopsCollsAndIndivs(HLV->PFR);
	#endif
	CollectDataOnSecondPass(HLV->PFR, HLV->PBUO->DataFileName);
	NegotiatePiVectors(HLV->PBUO, HLV->PFR);
	PrintSummaryOfInputData(HLV->BasicSummaries_File,HLV->PFR);
	SummarizeLocusNameAndGtypRates(HLV->BasicSummaries_File, HLV->PFR);
	SummarizeAllelicTypes(HLV->BasicSummaries_File,HLV->PFR);
	CountAlleles(HLV->PFR);
	ComputeAlleleFreqsFromCounts(HLV->BasicSummaries_File, HLV->PFR);
	PrintSummaryOfAllelicCountsAndFreqs(HLV->BasicSummaries_File,HLV->PFR);
	fflush(stdout);
	fclose(HLV->BasicSummaries_File);
	
	
	
	
	
	if(HLV->PBUO->DryRun>0) {
		printf("\n\nData have been read in and summaries compiled on this dry run.\n");
		printf("Please check the data summaries in file \"snppit_output_BasicDataSummary.txt\"\n");
		printf("to confirm that the program is running correctly. If it all looks good,\n");
		printf("then try a full-blown run by removing the --dry-run option from the\n");
		printf("command line.\n\n");
		
		return(0);
	}
	else {
		printf("\n\n\nDATA HAVE BEEN READ.  SUMMARIES APPEAR IN:  snppit_output_BasicDataSummary.txt\n\n\n");
	}
	

	
	
	
	/* now compute the parental trios forwards probs using the Big Smax and select the smax to use 
	   for the future analyses */
	printf("COMPUTING AN APPROPRIATE S-MAX\n");
	SelectAnSmax3(HLV);
	
	
	
	printf("\n\n");
	for(i=0;i<HLV->PFR->NumOffColls;i++) {
		#ifdef VERBOSE_SINGLE_PARENT_COMPAT_WITH_OFFSPRING
			printf("EXCLUDING SINGLE PARENTS.  COLLECTION %d  %s   with %d indivs in collection. \nDone with individual index:\n",i+1,HLV->PFR->OffCollNames[i],HLV->PFR->NumInOffColls[i]);
		#endif
		AssignMatchingSingleParents(HLV, i);
	}
	
	
	
	printf("\n\n");
	for(i=0;i<HLV->PFR->NumOffColls;i++) {
		printf("FINDING NON EXCLUDED PARENT PAIRS.  COLLECTION %d  %s   with %d indivs in collection. \nDone with individual index:\n",i+1,HLV->PFR->OffCollNames[i], HLV->PFR->NumInOffColls[i]);
		fflush(stdout);
		AssignMatchingParentPairs(HLV, i, HLV->smax_to_use[2]);
		
	}
	
	
	printf("\n\n");
	printf("COMPUTING THE FORWARD STEP AND PREPARING FOR BACKWARD STEP FOR ALL POPULATIONS\n");
	fflush(stdout);
	ComputePurePopTrioColls(HLV);  /* this should do the forward step AND prepare for the backward step in all these */
	//ComputeCrossPopTrioColls(HLV); 
	
	
	/* this can only be done AFTER computing all the PurePopTrioColls */
	/* open up a file stream for the posteriors */
	if( (HLV->TrioPosteriors_File = fopen("snppit_output_TrioPosteriors.txt","w"))==NULL) {
		fprintf(stderr,"Error! Failed to open file snppit_output_TrioPosteriors.txt to write to it.  You may have it open and locked in another application. Exiting...\n");
		exit(1);
	}
	fprintf(HLV->TrioPosteriors_File,"OffspCollection\tKid\tPa\tMa\tRank\tLOD");
	{int k;
		for(k=0;k<NUM_SPEC_PEDS;k++)  {
			fprintf(HLV->TrioPosteriors_File,"\tP.Pr.%s",SpecPedIdx2Str(k));
		}
	}
	fprintf(HLV->TrioPosteriors_File,"\tKidMiss\tPaMiss\tMaMiss\tMI.Kid.Pa\tMI.Kid.Ma\tMI.Trio\n");
	
	
	
	printf("\n\n");
	for(i=0;i<HLV->PFR->NumOffColls;i++) {
		printf("COMPUTING POSTERIORS:  COLLECTION %d  %s      with %d indivs in collection. \nDone with individual index:\n",i+1,HLV->PFR->OffCollNames[i],HLV->PFR->NumInOffColls[i]);
		fflush(stdout);
		CalculateTrioPosteriorProbs(HLV, i);
	}
	fclose(HLV->TrioPosteriors_File);
	
	
	HLV->KidsWithMaxLOD_Parents = (inds_with_max_lod_parents_from_this_pop **)calloc(HLV->PFR->NumPops,sizeof(inds_with_max_lod_parents_from_this_pop *));
	for(i=0;i<HLV->PFR->NumPops;i++)  {
		HLV->KidsWithMaxLOD_Parents[i] = RecordNonExcParentPairsFromPop(i,HLV);
	}
	
	
			
	/* allocate space to the areas where we will do FB algorithm successively, for each offspring with compatible parents */
	CPFB = (FB_Vars *)malloc(sizeof(FB_Vars));
	CPFB->RP = HLV->PurePopTrioColls->RP;
	CPFB->NumColls = NUM_SPEC_PEDS;
	CPFB->Colls = (Collection **)calloc(CPFB->NumColls, sizeof(Collection *));
	for(i=0;i<CPFB->NumColls;i++)  {
		CPFB->Colls[i] = AllocToCollection(CPFB->RP);
	}
	
	
	
	/* open up a file stream where we will store the max LOD parents */
	/*if( (HLV->MaxLodNonExpPar_File = fopen("snppit_output_MaxLodNonExParents.txt","w"))==NULL) {
		fprintf(stderr,"Error! Failed to open file snppit_output_MaxLodNonExParents.txt to write to it.  You may have it open and locked in another application. Exiting...\n");
		exit(1);
	}
	 */
	/*fprintf(HLV->MaxLodNonExpPar_File,"Kid\tPa\tMa\tPvalue\tLOD\tP.Pr.C_Se_Se\tP.Pr.Max\tMaxP.Pr.Relat\tTotPaNonExc\tTotMaNonExc\tTotUnkNonExc\tTotPairsNonExc\tKidMiss\tPaMiss\tMaMiss\tMI.Kid.Pa\tMI.Kid.Ma\tMI.Trio\tMendIncLoci\n");
	*/
	printf("\n\n");
	
	SeedFromFile("snppit_seeds");
	
	printf("\n\n");
	for(i=0;i<HLV->PFR->NumOffColls;i++) {
		printf("COMPUTING P-VALUES BY SIMULATION:  COLLECTION %d  %s    with %d indivs in collection\nDone with individual index:\n",i+1,HLV->PFR->OffCollNames[i],HLV->PFR->NumInOffColls[i]);
		AssessFPR_and_FNR_ByBackwardSimulation(HLV, i, CPFB);
	}
	/*fclose(HLV->MaxLodNonExpPar_File);*/
	
	
	if( (HLV->FDR_Summaries_File = fopen("snppit_output_FDR_Summary.txt","w"))==NULL) {
		fprintf(stderr,"Error! Failed to open file \"snppit_output_FDR_Summary.txt\" to write to it.  You may have it open and locked in another application. Exiting...\n");
		exit(1);
	}
	printf("\n\n");
	printf("PERFORMING FALSE DISCOVERY RATE CORRECTIONS\n");
	fprintf(HLV->FDR_Summaries_File,"PopName\tRankInFDR\tKid\tPa\tMa\tFDR\tFDC.est.to.pop\tPvalue\n");
	for(i=0;i<HLV->PFR->NumPops;i++) {
		DoFDR_ForAPop(i,HLV);
		//printf("Done with FDR for Pop= %d\n",i);
	}
	fclose(HLV->FDR_Summaries_File);
	
	printf("\n\n");
	SeedToFile("snppit_seeds");
	printf("\n\n");
	
	
	printf("PRINTING FINAL PARENTAGE REPORT\n");
	PrintFinalIndivReportWithFDRs(HLV);
	
	printf("\n\n");
	printf("SNPPIT PROGRAM EXECUTION COMPLETED.\n");
	printf("\nOutput is in the following files:\n");
	printf("\tsnppit_output_ParentageAssignments.txt -- Main output file that gives false discovery rates for all offspring with the most likely parents\n");
	printf("\tsnppit_output_BasicDataSummary.txt -- Basic information about the data that got read in.\n"); 
	printf("\tsnppit_output_ChosenSMAXes.txt -- Information about the smax vectors used in the analysis.\n"); 
	printf("\tsnppit_output_FDR_Summary.txt -- Offspring assigned to parents in each population, ranked by false discovery rate.\n"); 
	printf("\tsnppit_output_PopSizesAnPiVectors.txt -- Information about the sizes of the populations and the expected fraction of different trios thereby implied.\n"); 
	printf("\tsnppit_output_TrioPosteriors.txt -- Posterior probs for all non-excluded parent pairs of every offspring in the data file.\n"); 
	printf("\n\n");
	printf("Questions, etc.? Send them to [email protected]\n\n");
	
	
	return(0);		
}
Exemplo n.º 2
0
int main(int argc, char *argv[])
{	
	char File[100];
	int temp,priorchoice;
	double temp_d;
	enum prior_type PiP = JEFFREYS, ThP = JEFFREYS;
	char gnog;
	int DoAsBurnIn = 0, DoAsReps = 0,i,g;
	time_t StartTime, CurrTime, LastTime, RealRepStartTime;
	cli_opts *CL_Opts=NULL;
	int tempSeed1,tempSeed2;
	
	hyb_data *D;
	hyb_prior *P;
	hyb_chain *C;
	
	gPiFixed = 0;  /* default is not fixed */
	
	if(argc>1) {
		CL_Opts = Get_NewHybs_CLI_Opts(argc,argv);
		
		fprintCL_Probs(stdout, CL_Opts);
	}

	else { 
		#ifdef COMPILE_NEW_HYB_WITH_NO_GUI
			fprintf(stderr,"\n\nThis version of NewHybrids was compiled with COMPILE_NEW_HYB_WITH_NO_GUI defined.  Therefore\n");
			fprintf(stderr,"you must use the command-line interface to run it.  Issue the --help option for more information.\n\n");
			exit(1);
		#endif
	
	    /* if no command line options, then do it interactively */
		/* the very first thing that we want to do is get the current working directory
			and store it in a global variable, because when it enters the GLUT 
			interface it is going to forget about that altogether... */
		sprintf(gPWD,"%s/",getenv("PWD"));
		
		printf("Welcome to the program currently named \"NewHybrids\" Version 1.1 beta");
		printf("\nPre-Released 2 OCTOBER 2002.");
		printf("\nwritten by Eric C. Anderson ([email protected])");
		printf("\nCopyright (c) by The Regents of the University of California\n");
		printf("Please see user documentation for full software agreement.");
		
			
		printf("\n-----------------------\nEnter a \n\t0 to load \"TestDat.txt\"");
		printf("\n\t1 to load \"TestDatWithOptions.dat\"");
		printf("\n\t2 to load \"TestAFLP.dat\"");
		printf("\n\t3 to load \"TestAFLPWithOptions.dat\"");
		printf("\nas the Data File\n");
		printf("\nOr enter the name of the file you wish to use.");
		printf("\n\nRemember that file must be in the directory in which\nthe program resides.\n\n->");

		
		

		/*  read the DATA FILE name or a 0 */
		temp = erdGetNext(File,&temp_d,stdin);
		
		if(temp == 1 && temp_d == 0.0) {  /*  if input was an int equal to zero set File to "TestDat.txt" */
			sprintf(File,"TestDat.dat");
		}
		if(temp == 1 && temp_d == 1.0) { 
			sprintf(File,"TestDatWithOptions.dat");
		}
		if(temp == 1 && temp_d == 2.0) {  
			sprintf(File,"TestAFLP.dat");
		}
		if(temp == 1 && temp_d == 3.0) {  
			sprintf(File,"TestAFLPWithOptions.dat");
		}
		/*  get the DATA: */
		D = GetData(File);

		
		/* Now Process Cline Info */
		ProcessClineOptions(D, gClines);
		
		
		
		/*  now read in the gtyp freq categories */
		printf("\n\n-----------------\n");
		printf("\nNow enter a 0 to read the genotype frequency classes in the file");
		printf("\"TwoGensGtypFreq.txt\".  Or enter the name of the file holding your");
		printf("\nown definitions of genotype frequency classes.\n\n->");
		
		/*  read the GTYP FILE name or a 0 */
		temp = erdGetNext(File,&temp_d,stdin);
		
		if(temp == 1 && temp_d == 0.0) {  /*  if input was an int equal to zero set File to "TestDat.txt" */
			sprintf(File,"TwoGensGtypFreq.txt");
		}
		GetGtypFreqCats(D,File);

		
		/*  now we can process the individual options a little bit  */
		ProcessIndivOptions(D);
		
		/* put alleles from indivs known to be in Purebred categories into the 
			"PriorizedAllelesPile" */
		PriorizeAllelesFromFixedZ(D);
		
		AddPriorizeAllelesFromFile(D, NULL);
		
			
				
		printf("\n\nGive me two small integers (>0) for random number seeds\n\n");
		scanf("%d%d",&tempSeed1,&tempSeed2);
		setall((long)tempSeed1,(long)tempSeed2);
		
		
		
		printf("\n\nEnter the choices for prior type for pi (mixing proportion):\n\t0 for Jeffreys\n\t1 for Uniform\n\t2 to specify it with fixed values\n->");
		scanf("%d",&priorchoice);


		switch(priorchoice) {
			case(0): 
				PiP = JEFFREYS;
				break;
			case(1):
				PiP = UNIFORM;
				break;
			case(2):
				PiP=FIXED_PRIOR;  /* I have to make a new enum for this that reports it properly */
				gPiFixed = 1; 
				gPiFixedValues = (double *)ECA_CALLOC(D->Gn->v,sizeof(double));
				temp_d = 0.0;
				printf("\nEnter values for fixed components of Pi.\nThese will be rescaled so as to sum to 1.0 if necessary.\n");
				CYCLE_g(D)
					printf("%s?  --> ",D->CategoryNames[g]);
					scanf("%lf",&(gPiFixedValues[g]));
					temp_d += gPiFixedValues[g];
				END1CYCLE
				
				printf("\nThank You! Using values:\n");
				CYCLE_g(D)
					gPiFixedValues[g] /= temp_d;
					printf("%s:  %f\n",D->CategoryNames[g],gPiFixedValues[g]);
				END1CYCLE
				break;
			default:
				PiP = JEFFREYS;
				printf("\n\nInvalid choice.  Pi Prior set to Jeffreys by default.\n\n");
		}


		P = CreatePriors(D,PiP,ThP);
		C = CreateLatentChain(D,P);
		InitializeChain(C);
		C->Seed1 = tempSeed1;
		C->Seed2 = tempSeed2;
		
		printf("\n\nData all read and ready!!!\n\n");
		
		printf("\n\nEntering GLUT interface...\n\n");
	}
	
	
	if(CL_Opts != NULL)  {  /* if we didn't do it all interactively, then we have to copy some variables, etc., over */
		/*  get the DATA: */
		printf("DATA_INITIALIZATION: Preparing to read data from %s\n",CL_Opts->DataFilePath);
		D = GetData(CL_Opts->DataFilePath);
		
		/* Now Process Cline Info */
		printf("DATA_INITIALIZATION: Processing cline options (if any)\n");
		ProcessClineOptions(D, gClines);
		
		/* then initialize the gtyp frequency category probabilities */
		printf("DATA_INITIALIZATION: Initializing genotype frequency category probabilities\n");
		if(strlen(CL_Opts->GtypCatFilePath)>0) {
			GetGtypFreqCats(D,CL_Opts->GtypCatFilePath);
		}
		else {
			CopyGtypFreqCatsFromCL(D,CL_Opts);
		}
		
		/*  now we can process the individual options a little bit  */
		ProcessIndivOptions(D);
		
		/* put alleles from indivs known to be in Purebred categories into the 
			"PriorizedAllelesPile" */
		PriorizeAllelesFromFixedZ(D);
		
		/* then add them in from a file, if there is one */
		if(strcmp(CL_Opts->AlleFreqPriorPath,"UNSET") != 0) {
			AddPriorizeAllelesFromFile(D, CL_Opts->AlleFreqPriorPath);
		}
		
		/* and copy across information about the pi priors */
		PiP = CL_Opts->PiPriType;
		ThP = CL_Opts->ThetaPriType;
		if(PiP==FIXED_PRIOR) { int i;
			gPiFixed = 1; 
			gPiFixedValues = (double *)ECA_CALLOC(D->Gn->v,sizeof(double));
			for(i=0;i<D->Gn->v;i++)  {
				gPiFixedValues[i] = CL_Opts->PiFixedValues[i];
			}
		}
		
		
		/* copy across the Trace Report request to the D structure */
		D->PiTraceReport = CL_Opts->PiTraceReport;
		
		
		P = CreatePriors(D,PiP,ThP);
		C = CreateLatentChain(D,P);
		
		if(CL_Opts->Seed1==0 && CL_Opts->Seed2==0) {
			SeedFromFile("newhyb_seeds");
		}
		else {
			printf("SEEDS_FROM_COMMAND_LINE : %ld  %ld\n",CL_Opts->Seed1,CL_Opts->Seed2);
			setall(CL_Opts->Seed1,CL_Opts->Seed1);
			C->Seed1 = (int)CL_Opts->Seed1;
			C->Seed2 = (int)CL_Opts->Seed2;
		}
		
		InitializeChain(C);
	}
	

	/* Here this is set up so that if you use the interactive version, you end up having to use the
	GUI, for now.  You have the choice if you use the command line */
	if(CL_Opts==NULL || CL_Opts->NoGui==0) {
	
		#ifndef COMPILE_NEW_HYB_WITH_NO_GUI
		/*  make the global pointer point to where it ought to */
		gC = C;
		/*  then allocate space for a sliding trace struct that holds up to 1,000 entries */
		gCompleteDataLogLike = gfduAllocSlidingTrace(1000,1);
			

		/*  here must call glutInit from within main, with the command line option */
		/*  pointer, and then call gfmInitGFM, and that does it.   */
		glutInit(&argc, argv);
		gfmInitGFM();
		#else
			fprintf(stderr,"\n\nThis version of NewHybrids was compiled with COMPILE_NEW_HYB_WITH_NO_GUI defined.  Therefore\n");
			fprintf(stderr,"it MUST be invoked with the --no-gui option.  Try adding --no-gui to your last command line.  Cheers.\n\n");
			exit(1);
		#endif
	}
	else {int dummy;
		dummy = RunWithoutGraphics(C, CL_Opts->NumBurnIn, CL_Opts->NumPostBurnIn);
	}


	return(1);
}