int main(int argc,char *argv[])
{	
	int i,j,profilenum,genelen,linelen,siglen;
	short **profileSet;
	short **indexSet;
	short gs[MAX_GENESET];
	char gsStr[1024];
	struct GSEA_RESULT *gsea_result;	
	double start,finish,duration;
	
	FILE *fp;
	char conditions[L1000_CONDITION_LEN];
	char conditionsfile[FileName_LEN];
	char offsetfile[FileName_LEN];
	char genelistfile[FileName_LEN];
	long cidnum;
	long offset;
	int input_way;
	
	// Unset flags (value -1).
	int TopN = -1;
    // Unset options (value 'UNSET').
	char * const UNSET = "unset";
    char * input   = UNSET;
	char * sample   = UNSET;
	char * reference   = UNSET;
	
	if (argc == 1) 
	{
		Usage();
		exit(0);
    }
	
	int c;
	while (1) {
		int option_index = 0;
		static struct option long_options[] = {
			{"topn",              required_argument,        0, 'n'},
			{"input",             required_argument,        0, 'i'},
			{"sample",             required_argument,        0, 's'},
			{"reference",             required_argument,        0, 'r'},
			{0, 0, 0, 0}
		};

		c = getopt_long(argc, argv, "n:i:s:r:",
            long_options, &option_index);
	
		if(c==-1)	break;
		
		switch (c) {
		
		case 0:
			// A flag was set. //
			break;

		case 'i':
			if (input == UNSET) 
			{
				input = optarg;
			}
			else 
			{
				fprintf(stderr, "%s --input set more than once\n", ERRM);
				Usage();
				exit(0);
			}
			break;
		
		case 's':
			if (sample == UNSET) 
			{
				sample = optarg;
			}
			else 
			{
				fprintf(stderr, "%s --sample set more than once\n", ERRM);
				Usage();
				exit(0);
			}
			break;
			
		case 'r':
			if (reference == UNSET) 
			{
				reference = optarg;
			}
			else 
			{
				fprintf(stderr, "%s --reference set more than once\n", ERRM);
				Usage();
				exit(0);
			}
			break;
		
		case 'n':
			if (TopN < 0) {
				TopN = atoi(optarg);
				if (TopN < 1) {
					fprintf(stderr, "%s --topn must be a positive integer\n", ERRM);
					Usage();
					exit(0);
				}
			}
			else {
				fprintf(stderr,"%s --topn set more " "than once\n", ERRM);
				Usage();
				exit(0);
			}
			break;
		default:
			// Cannot parse. //
			Usage();
			exit(0);
		}		
	}
	
	//check the parameters
	if(TopN==-1)
		TopN = 10;
	
	if((fp=fopen(sample,"r"))==NULL)
	{
		fprintf(stderr, "[ param error : -s ] can not open sample sequence number '%s' file\n",sample);
		exit(0);
	}
	fclose(fp);
	
	sprintf(genelistfile,"%s/Gene_List.txt",reference);
	
	if((fp=fopen(genelistfile,"r"))==NULL)
	{
		fprintf(stderr, "[ param error : -r ] the reference directory may be incorrect!\n");
		exit(0);
	}
	fclose(fp);
	
	sprintf(conditionsfile,"%s/Samples_Condition.txt",reference);
	sprintf(offsetfile,"%s/Samples_RowByteOffset.txt",reference);
	
	printf("Profile Set is Loading...!\n");
	
	GET_TIME(start);
	//read file parameters
	ReadFilePara(input, &profilenum, &genelen, &linelen);
	
	if( profilenum <= 0 || genelen <= 0 )
	{
		fprintf(stderr,"[ param error : -i ] this file is not exist!\n");
		exit(0);
	}
	
	printf("profilenum:%d\t genelen:%d\n",profilenum,genelen);
	
	printf("Memory check......\n");
	unsigned long memavail = memoryAvailable(1);
	printf("Available Memory:      %ld KB\n", memavail);
	unsigned long memneed = (2*sizeof(short)*profilenum*genelen + profilenum*sizeof(struct GSEA_RESULT))/1024;
	printf("Needed Memory:      %ld KB\n", memneed);
	
	if(memavail < memneed)
	{
		printf("available memory is not enough!!! Please use MPI version and more nodes!!!\n");
		return;
	}
		
	
	//malloc profile dataset memory
	profileSet = (short **)malloc(profilenum*sizeof(short *));
	for(i=0;i<profilenum;i++)
		profileSet[i] = (short *)malloc(genelen*sizeof(short));
	//malloc index set for profile dataset 
	indexSet = (short **)malloc(profilenum*sizeof(short *));
	for(i=0;i<profilenum;i++)
		indexSet[i] = (short *)malloc(genelen*sizeof(short));
	
	//malloc GSEA para Vector
	gsea_result = (struct GSEA_RESULT*)malloc(profilenum*sizeof(struct GSEA_RESULT));
	
	//load profile dataset
	ReadFile(input, linelen, 0 , profilenum , profilenum, genelen, profileSet); 
		
	//compute the index for profile sets
	for(i=0; i<profilenum; i++)
		getIndex(profileSet[i],indexSet[i],genelen);
	
	GET_TIME(finish);
	//compute the IO time and prework time
	duration = finish-start;     
	printf("loading IO and prework time: %.4f s\n",duration); 
	
	printf("which way do you want to input the GeneSet( 0 -> standard input , others -> file input ):");
	scanf("%d", &input_way);
	
	if(input_way==0)
	{
		//get the geneset , split by space
		getchar();
		printf("input the GeneSet until 'exit'( a string of each Gene Symbol split by space ):\n");
		scanf("%[^\n]",gsStr);	
	}else
	{
		printf("input the path of file that has GeneSet until 'exit'(each line has a Gene Symbol/name):\n");
		scanf("%s",gsStr);
	}
	
	while(strcmp(gsStr,"exit")!=0) 
	{
		//get the geneset
		if(input_way==0)
		{
			getGeneSet(gs,&siglen,gsStr,genelistfile);
			if(siglen==0)
			{
				getchar();    //remove the Enter from stdin
				printf("There is no gene be hitted, please make sure the GeneSet have at least one Gene in Profile!\n");
				printf("input the GeneSet until 'exit'( a string of each Gene Symbol split by space ):\n");
				scanf("%[^\n]",gsStr);
				continue;
			}
		}else
		{
			getGeneSetbyFile(gs,&siglen,gsStr,genelistfile);	
			if(siglen==0)
			{
				getchar();    //remove the Enter from stdin
				printf("There is no gene be hitted, please make sure the GeneSet have at least one Gene in Profile!\n");
				printf("input the path of file that has GeneSet until 'exit'(each line has a Gene Symbol/name):\n");
				scanf("%s",gsStr);
				continue;
			}
		}
		
		GET_TIME(start);
		/********************run the GSEA algorithm*****************************/
		//compute the global ES	
		getGlobalES( genelen, siglen , global_ES);
		
		for(i=0; i<profilenum; i++){
			GSEA( gs, indexSet[i], genelen, siglen, &(gsea_result[i].ES), &(gsea_result[i].NES), &(gsea_result[i].pv), global_ES );
			gsea_result[i].cid = i+1;
		}
		
		//printf("cid:%d  ES:%f  NES:%f  pv:%.10lf\n",gsea_result[19999].cid, gsea_result[19999].ES, gsea_result[19999].NES, gsea_result[19999].pv);
		//sort the gsea result
		quiksort_gsea(gsea_result,0,profilenum-1);
		
		/********************print the TopN GSEA result*************************/
		printf("\nprintf the high level of TopN GSEA result:\n");
		for(i = profilenum-1; i > profilenum-1-TopN; i--)
		{
			cidnum = readByteOffsetFile(sample,gsea_result[i].cid);
			offset = readByteOffsetFile(offsetfile,cidnum);
			getSampleConditions(conditionsfile, offset, conditions);
			printf("\nNO.%d -> SampleConditions: %s  ES:%f  NES:%f  pv:%.10lf\n", profilenum-i, conditions, gsea_result[i].ES, gsea_result[i].NES, gsea_result[i].pv);
		}
			
		printf("\nprintf the low level of TopN GSEA result:\n");
		for(i=0; i<TopN; i++)
		{
			cidnum = readByteOffsetFile(sample,gsea_result[i].cid);
			offset = readByteOffsetFile(offsetfile,cidnum);
			getSampleConditions(conditionsfile, offset, conditions);
			printf("\nNO.%d -> SampleConditions: %s  ES:%f  NES:%f  pv:%.10lf\n", i+1, conditions, gsea_result[i].ES, gsea_result[i].NES, gsea_result[i].pv); 
		}
			 				
		GET_TIME(finish);
		duration = finish-start;    //compute the GSEA time 
		printf("finish GSEA time: %.4f s\n",duration); 
		
		getchar();    //remove the Enter from stdin
		//get the geneset
		if(input_way==0)
		{
			//get the geneset , split by space
			printf("input the GeneSet until 'exit'( a string of each Gene Symbol split by space ):\n");
			scanf("%[^\n]",gsStr);	
		}else
		{
			printf("input the path of file that has GeneSet until 'exit'(each line has a Gene Symbol/name):\n");
			scanf("%s",gsStr);
		}		
	}
	
	//free the memory allocate dyn.
	free(gsea_result);
	for(i=0; i<profilenum; i++){
		free(profileSet[i]);
		free(indexSet[i]);
	}
	free(profileSet);
	free(indexSet);
	
	return 0;
}
int main(int argc,char *argv[])
{	
	int i,j;
	int genelen;
	int profilenum1,profilenum2;
	int linelen1,linelen2;
	struct Profile_triple *triples1,**triples2;
	float **local_ES_Matrix;		//part of the ES_Matrix in this process
	float *ES_test;  //ES used for testing without writing
	int	my_rank;   /* My process rank           */
    int	p;         /* The number of processes   */
    int source,dest;  
    int tag = 0;
    MPI_Status  status;
	int local_P;	//the data number of each processes must hand
	int begin,end;
	int parameternum;
	int corenum;
	int siglen;	
	int load_time;
	float proportion;
	int ifwrite;
	

	double start,finish,duration;
	
	/* Let the system do what it needs to start up MPI */
    MPI_Init(&argc, &argv);

    /* Get my process rank */
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

    /* Find out how many processes are being used */
    MPI_Comm_size(MPI_COMM_WORLD, &p);
	
	/* check parameter*/
	if(my_rank == 0)
	{
		parameternum = argc;
		if(parameternum == 1)
			Usage();
	}
	MPI_Bcast(&parameternum, 1, MPI_INT, 0, MPI_COMM_WORLD);
	if(parameternum == 1)
	{
		MPI_Finalize();
		exit(0);
	}
	
	// Unset flags (value -1).
	corenum = -1;
	siglen = -1;
	load_time = -1;
	proportion = -1;
	ifwrite = -1;
    // Unset options (value 'UNSET').
	char * const UNSET = "unset";
    char * input1   = UNSET;
	char * input2   = UNSET;
	char * output   = UNSET;
	
	int c;
	while (1) {
		int option_index = 0;
		static struct option long_options[] = {
			{"thread",             required_argument,        0, 't'},
			{"siglen",             required_argument,        0, 'l'},
			{"loadtime",           required_argument,        0, 'a'},
			{"proportion",         required_argument,        0, 'p'},
			{"write",              required_argument,        0, 'w'},
			{"input1",             required_argument,        0, '1'},
			{"input2",             required_argument,        0, '2'},
			{"output",             required_argument,        0, 'o'},
			{0, 0, 0, 0}
		};

		c = getopt_long(argc, argv, "t:l:a:p:w:1:2:o:",
            long_options, &option_index);
	
		if(c==-1)	break;
		
		switch (c) {
		
		case 0:
			// A flag was set. //
			break;

		case '1':
			if (input1 == UNSET) 
			{
				input1 = optarg;
			}
			else 
			{
				if(my_rank==0)
				{
					fprintf(stderr, "%s --input1 set more than once\n", ERRM);
					Usage();
				}		
				MPI_Finalize();
				exit(0);
			}
			break;
		
		case '2':
			if (input2 == UNSET) 
			{
				input2 = optarg;
			}
			else 
			{
				if(my_rank==0)
				{
					fprintf(stderr, "%s --input2 set more than once\n", ERRM);
					Usage();
				}		
				MPI_Finalize();
				exit(0);
			}
			break;
		
		case 'o':
			if (output == UNSET) 
			{
				output = optarg;
			}
			else 
			{
				if(my_rank==0)
				{
					fprintf(stderr, "%s --output set more than once\n", ERRM);
					Usage();
				}		
				MPI_Finalize();
				exit(0);
			}
			break;
		
		case 't':
			if (corenum < 0) {
				corenum = atoi(optarg);
				if (corenum < 1) {
					if(my_rank==0)
					{
						fprintf(stderr, "%s --thread must be a positive integer\n", ERRM);
						Usage();
					}		
					MPI_Finalize();
					exit(0);
				}
			}
			else {
				if(my_rank==0)
				{
					fprintf(stderr,"%s --thread set more " "than once\n", ERRM);
					Usage();
				}		
				MPI_Finalize();
				exit(0);
			}
			break;
			
		case 'l':
			if (siglen < 0) {
				siglen = atoi(optarg);
				if (siglen < 1) {
					if(my_rank==0)
					{
						fprintf(stderr, "%s --siglen must be a positive integer\n", ERRM);
						Usage();
					}		
					MPI_Finalize();
					exit(0);
				}
			}
			else {
				if(my_rank==0)
				{
					fprintf(stderr,"%s --siglen set more " "than once\n", ERRM);
					Usage();
				}		
				MPI_Finalize();
				exit(0);
			}
			break;
			
		case 'a':
			if (load_time < 0) {
				load_time = atoi(optarg);
				if (load_time < 1) {
					if(my_rank==0)
					{
						fprintf(stderr, "%s --load time must be a positive integer\n", ERRM);
						Usage();
					}		
					MPI_Finalize();
					exit(0);
				}
			}
			else {
				if(my_rank==0)
				{
					fprintf(stderr,"%s --load time set more " "than once\n", ERRM);
					Usage();
				}		
				MPI_Finalize();
				exit(0);
			}
			break;
		
		case 'p':
			if (proportion < 0) {
				proportion = atof(optarg);
				if (proportion > 1 || proportion <= 0) {
					if(my_rank==0)
					{
						fprintf(stderr, "%s -- proportion must be kept in (0,1]\n", ERRM);
						Usage();
					}		
					MPI_Finalize();
					exit(0);
				}
			}
			else {
				if(my_rank==0)
				{
					fprintf(stderr,"%s --proportion set more " "than once\n", ERRM);
					Usage();
				}		
				MPI_Finalize();
				exit(0);
			}
			break;
		
		case 'w':
			if (ifwrite < 0) {
				ifwrite = atof(optarg);
			}
			else {
				if(my_rank==0)
				{
					fprintf(stderr,"%s --write set more " "than once\n", ERRM);
					Usage();
				}		
				MPI_Finalize();
				exit(0);
			}
			break;
			
		default:
			// Cannot parse. //
			if(my_rank==0)
				Usage();
			MPI_Finalize();
			exit(0);
		}		
	}

	//check the parameters
	if(corenum == -1)
		corenum = 1;
	
	if(siglen == -1)
		siglen = 50;
	
	if(load_time == -1)
		load_time = 1;
	
	if(proportion == -1)
		proportion = 1;
	
	if(ifwrite == -1)
		ifwrite = 1;
	
	if(output == UNSET)
	{
		if(my_rank==0)
			fprintf(stderr," [ param error : -o ] Not Set output parameter!\n");
		MPI_Finalize();
		exit(0);
	}
	
	triples2 = (struct Profile_triple **)malloc(load_time*sizeof(struct Profile_triple *));
	
	//barrier all processes to compute time
	MPI_Barrier(MPI_COMM_WORLD); 
	if(my_rank == 0){
		printf("Profile Set is Loading...!\n");
		GET_TIME(start);
	}
	
	//read file parameters in all processes
	ReadFilePara(input1, &profilenum1, &genelen, &linelen1);
	ReadFilePara(input2, &profilenum2, &genelen, &linelen2);
	
	profilenum1 *= proportion;
	profilenum2 *= proportion;
	
	//input file check
	if( profilenum1 <= 0 || genelen <= 0)
	{
		if(my_rank==0)
			fprintf(stderr," [ param error : -1 ] this file input1 is not exist!\n");
		MPI_Finalize();
		exit(0);
	}
	
	if( profilenum2 <= 0 || genelen <= 0)
	{
		if(my_rank==0)
			fprintf(stderr," [ param error : -2 ] this file input2 is not exist!\n");
		MPI_Finalize();
		exit(0);
	}
	
	if(my_rank==0)
	{
		printf("Genelen:	%d\n", genelen);
		printf("Profiles1 length:	%d\n", profilenum1);
		printf("Profiles2 length:	%d\n", profilenum2);
	}
	
	// compute the local size 、up boundary and down boundary for every process in dataset1
	split_data(profilenum1, p, my_rank, &begin, &end, &local_P);
	
	if(my_rank==0)
		printf("Memory check......\n");
	
	unsigned long memavail = memoryAvailable(1);
	
	unsigned long memneed = sizeof(struct Profile_triple)/1024*(local_P+profilenum2/load_time) + local_P/1024*profilenum2*sizeof(float);
	
	unsigned long memallneed = sizeof(struct Profile_triple)/1024*(profilenum1+profilenum2) + profilenum1/1024*profilenum2*sizeof(float);
	
	if(my_rank==0)
	{
		printf("Available Memory:      %ld KB\n", memavail);
		printf("Needed Memory:      %ld KB\n", memneed);
		printf("All Needed Memory:      %ld KB\n", memallneed);
	}

	unsigned long mem1 = sizeof(struct Profile_triple)/1024*(local_P+profilenum2/load_time);
	unsigned long mem2 = profilenum1/1024*profilenum2*sizeof(float);
	
	int nodenum = (int)(mem2/(memavail-mem1)+1);
	if( memneed > memavail )
	{
		if( my_rank==0 )
		{
			//printf("mem1:      %ld KB\n", mem1);
			//printf("mem2:      %ld KB\n", mem2);
			printf("available memory is not enough to store all results, recommend to use more than %d nodes!!!\n", nodenum);
		}
		if(ifwrite==1){
			MPI_Finalize();
			exit(0);
		}else{
			if( my_rank==0 )
			{
				printf("because we are just testing without writing, we will continue!!!\n");
			}
		}
	}
	
		
	/*****read the local part file of dataset1 in every process and get their triples****************/
	triples1 = (struct Profile_triple *)malloc(sizeof(struct Profile_triple)*local_P);	
	getTriples(local_P, genelen, siglen, profilenum1, linelen1, begin, end, input1, triples1);
	
	
	if(ifwrite==1)
	{
		//not test, then allocate the local_ES_Matrix memory
		local_ES_Matrix = (float **)malloc(local_P*sizeof(float *));
		for(i=0;i<local_P;i++)
			local_ES_Matrix[i] = (float *)malloc(profilenum2*sizeof(float));
	}else{
		ES_test = (float *)malloc(corenum*sizeof(float));
	}
	
	int current_time = 0;
	int begin_localfile2, end_localfile2, len_localfile2;
	while( current_time < load_time )
	{
		/********************para load profile dataset2 by openmp******************************/
		split_data(profilenum2, load_time, current_time, &begin_localfile2, &end_localfile2, &len_localfile2);
		//allocate the triples memory for local dataset2
		triples2[current_time] = (struct Profile_triple *)malloc(sizeof(struct Profile_triple)*len_localfile2);
		#pragma omp parallel num_threads(corenum)
		{
			int local_t;	//the data number of each thread must hand
			int begin_t,end_t;
			int threadID = omp_get_thread_num();
		
			// compute the local size 、up boundary and down boundary for every thread in dataset2
			split_data(len_localfile2, corenum, threadID, &begin_t, &end_t, &local_t);
		
			// compute the begin_t to end_t triples
			getFreeTriples(genelen, siglen, profilenum2, linelen2, begin_localfile2 + begin_t, begin_t, local_t, input2, triples2[current_time]);
			//getPartTriples(genelen, siglen, profilenum2, linelen2, begin_t, end_t,  input2, triples2[current_time]);
		}
		
		MPI_Barrier(MPI_COMM_WORLD);
		if(my_rank == 0){
			GET_TIME(finish);
			//compute the IO time
			duration = finish-start;     
			printf("phase %d -->  loading IO and prework time in no communication way: %.4f s\n", current_time+1, duration);

			printf("phase %d -->  Paral compute the ES_Matrix is Starting...!\n", current_time+1);
			GET_TIME(start);
		}
		
		
		/*
		if(my_rank == 0){
			int k;
			for(k=0;k<siglen;k++)
				printf("%d ",triples1[0].gsUp[k]);
			printf("\n");
			for(k=0;k<genelen;k++)
				printf("%d ",triples2[current_time][profilenum2-1].index[k]);
			printf("\n");
		}
		*/
	
		/********************para compute the part of ES_Matrix******************************/
		
		#pragma omp parallel num_threads(corenum)
		{
			int k,t;
			int local_t;	//the data number of each thread must hand
			int begin_t,end_t;
			int threadID = omp_get_thread_num();
		
			// compute the local size 、up boundary and down boundary for every thread in dataset2
			split_data(len_localfile2, corenum, threadID, &begin_t, &end_t, &local_t);
		
			// compute the part of the ES matrix
			if(ifwrite==1){
				for(k=0;k<local_P;k++)
					for(t=begin_t;t<end_t;t++)
						local_ES_Matrix[k][begin_localfile2 + t] = ES_Profile_triple(triples1[k],triples2[current_time][t],genelen,siglen);
			}else{ //just calculate for testing
				for(k=0;k<local_P;k++)
					for(t=begin_t;t<end_t;t++)
						ES_test[threadID] = ES_Profile_triple(triples1[k],triples2[current_time][t],genelen,siglen);
			}
		}
	
		MPI_Barrier(MPI_COMM_WORLD);
		if(my_rank == 0){
			GET_TIME(finish);
			//compute the compute time
			duration = finish-start;     
			printf("phase %d --> Paral compute the ES_Matrix time: %.4f s\n", current_time+1, duration);
		
			if(current_time==load_time-1)
				if(ifwrite==1)
					printf("Writing file is Starting...!\n");
			
			GET_TIME(start);		
		}
			
		free(triples2[current_time]);
		current_time++;	
	}

	
	/*
	if(my_rank == 0){
		int k;
		for(k=0;k<profilenum2;k++)
			printf("%f ",local_ES_Matrix[0][k]);
		printf("\n");
	}
	*/
	
	if(ifwrite==1)
	{
		char Res[128];
		sprintf(Res,"%s_%d.txt",output,my_rank);
		WritetxtResult(0, local_P, profilenum2, Res, local_ES_Matrix);
	
		MPI_Barrier(MPI_COMM_WORLD);
		if(my_rank == 0){
			GET_TIME(finish);
			//compute the write time
			duration = finish-start;     
			printf("Write Result spent: %.4f s\n",duration);
		}
	}else{
		if(my_rank == 0){   
			printf("Just run for test, no results output\n");
		}
	}
	
	//free the memory
	if(ifwrite==1)
	{
		for(i=0;i<local_P;i++)
			free(local_ES_Matrix[i]);
		free(local_ES_Matrix);
	}else{
		free(ES_test);
	}
	free(triples1);
	free(triples2);
	
	MPI_Finalize();
	return 0;
}
int main(int argc,char *argv[])
{	
	int i,j;
	int genelen;
	int profilenum1,profilenum2;
	int linelen1,linelen2;
	struct Profile_triple *triples1,*triples2;
	float **local_ES_Matrix;		//part of the ES_Matrix in this process
	int	my_rank;   /* My process rank           */
    int	p;         /* The number of processes   */
    int source,dest;  
    int tag = 0;
    MPI_Status  status;
	int local_P;	//the data number of each processes must hand
	int begin,end;
	int parameternum;
	int corenum;
	int siglen;

	double start,finish,duration;
	
	/* Let the system do what it needs to start up MPI */
    MPI_Init(&argc, &argv);

    /* Get my process rank */
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

    /* Find out how many processes are being used */
    MPI_Comm_size(MPI_COMM_WORLD, &p);
	
	/* check parameter*/
	if(my_rank == 0)
	{
		parameternum = argc;
		if(parameternum!=6)
			Usage(argv[0]);
	}
	MPI_Bcast(&parameternum, 1, MPI_INT, 0, MPI_COMM_WORLD);
	if(parameternum!=6)
	{
		MPI_Finalize();
		exit(0);
	}
	
	corenum = atoi(argv[1]);
	siglen = atoi(argv[2]);
	//barrier all processes to compute time
	MPI_Barrier(MPI_COMM_WORLD); 
	if(my_rank == 0){
		printf("Profile Set is Loading...!\n");
		GET_TIME(start);
	}
	
	//read file parameters in all processes
	ReadFilePara(argv[3], &profilenum1, &genelen, &linelen1);
	ReadFilePara(argv[4], &profilenum2, &genelen, &linelen2);

	// compute the local size 、up boundary and down boundary for every process in dataset1
	split_data(profilenum1, p, my_rank, &begin, &end, &local_P);
	
	/*****read the local part file of dataset1 in every process and get their triples****************/
	triples1 = (struct Profile_triple *)malloc(sizeof(struct Profile_triple)*local_P);	
	getTriples(local_P, genelen, siglen, profilenum1, linelen1, begin, end, argv[3], triples1);
	
	/********************para load profile dataset2 by openmp******************************/
	//allocate the triples memory for dataset2
	triples2 = (struct Profile_triple *)malloc(sizeof(struct Profile_triple)*profilenum2);
	#pragma omp parallel num_threads(corenum)
	{
		int local_t;	//the data number of each thread must hand
		int begin_t,end_t;
		int threadID = omp_get_thread_num();
		
		// compute the local size 、up boundary and down boundary for every thread in dataset2
		split_data(profilenum2, corenum, threadID, &begin_t, &end_t, &local_t);
		
		// compute the begin_t to end_t triples
		getPartTriples(genelen, siglen, profilenum2, linelen2, begin_t, end_t, argv[4], triples2);		
	}
	
	MPI_Barrier(MPI_COMM_WORLD);
	if(my_rank == 0){
		GET_TIME(finish);
		//compute the IO time
		duration = finish-start;     
		printf("loading IO and prework time in no communication way: %.4f s\n",duration);

		printf("Paral compute the ES_Matrix is Starting...!\n");
		GET_TIME(start);
	}
	
	/*
	if(my_rank == 0){
		int k;
		for(k=0;k<siglen;k++)
			printf("%d ",triples1[0].gsUp[k]);
		printf("\n");
		for(k=0;k<genelen;k++)
			printf("%d ",triples2[profilenum2-1].index[k]);
		printf("\n");
	}
	*/
	
	/********************para compute the part of ES_Matrix******************************/
	//allocate the local_ES_Matrix memory
	local_ES_Matrix = (float **)malloc(local_P*sizeof(float *));
	for(i=0;i<local_P;i++)
		local_ES_Matrix[i] = (float *)malloc(profilenum2*sizeof(float));
	#pragma omp parallel num_threads(corenum)
	{
		int k,t;
		int local_t;	//the data number of each thread must hand
		int begin_t,end_t;
		int threadID = omp_get_thread_num();
		
		// compute the local size 、up boundary and down boundary for every thread in dataset2
		split_data(profilenum2, corenum, threadID, &begin_t, &end_t, &local_t);
		
		// compute the part of the ES matrix
		for(k=0;k<local_P;k++)
			for(t=begin_t;t<end_t;t++)
				local_ES_Matrix[k][t] = ES_Profile_triple(triples1[k],triples2[t],genelen,siglen);
	}
	
	MPI_Barrier(MPI_COMM_WORLD);
	if(my_rank == 0){
		GET_TIME(finish);
		//compute the compute time
		duration = finish-start;     
		printf("Paral compute the ES_Matrix time: %.4f s\n",duration);
		
		printf("Writing file is Starting...!\n");
		GET_TIME(start);
	}
	
	/*
	if(my_rank == 0){
		int k;
		for(k=0;k<profilenum2;k++)
			printf("%f ",local_ES_Matrix[0][k]);
		printf("\n");
	}
	*/
	
	char Res[128];
	sprintf(Res,"%s_%d.txt",argv[5],my_rank);
	WritetxtResult(0, local_P, profilenum2, Res, local_ES_Matrix);
	
	MPI_Barrier(MPI_COMM_WORLD);
	if(my_rank == 0){
		GET_TIME(finish);
		//compute the write time
		duration = finish-start;     
		printf("Write Result spent: %.4f s\n",duration);
	}
	
	//free the memory
	for(i=0;i<local_P;i++)
		free(local_ES_Matrix[i]);
	free(local_ES_Matrix);
	free(triples1);
	free(triples2);
	
	MPI_Finalize();
	return 0;
}