int main(int argc,char *argv[]) { int i,j,profilenum,genelen,linelen,siglen; short **profileSet; short **indexSet; short gs[MAX_GENESET]; char gsStr[1024]; struct GSEA_RESULT *gsea_result; double start,finish,duration; FILE *fp; char conditions[L1000_CONDITION_LEN]; char conditionsfile[FileName_LEN]; char offsetfile[FileName_LEN]; char genelistfile[FileName_LEN]; long cidnum; long offset; int input_way; // Unset flags (value -1). int TopN = -1; // Unset options (value 'UNSET'). char * const UNSET = "unset"; char * input = UNSET; char * sample = UNSET; char * reference = UNSET; if (argc == 1) { Usage(); exit(0); } int c; while (1) { int option_index = 0; static struct option long_options[] = { {"topn", required_argument, 0, 'n'}, {"input", required_argument, 0, 'i'}, {"sample", required_argument, 0, 's'}, {"reference", required_argument, 0, 'r'}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, "n:i:s:r:", long_options, &option_index); if(c==-1) break; switch (c) { case 0: // A flag was set. // break; case 'i': if (input == UNSET) { input = optarg; } else { fprintf(stderr, "%s --input set more than once\n", ERRM); Usage(); exit(0); } break; case 's': if (sample == UNSET) { sample = optarg; } else { fprintf(stderr, "%s --sample set more than once\n", ERRM); Usage(); exit(0); } break; case 'r': if (reference == UNSET) { reference = optarg; } else { fprintf(stderr, "%s --reference set more than once\n", ERRM); Usage(); exit(0); } break; case 'n': if (TopN < 0) { TopN = atoi(optarg); if (TopN < 1) { fprintf(stderr, "%s --topn must be a positive integer\n", ERRM); Usage(); exit(0); } } else { fprintf(stderr,"%s --topn set more " "than once\n", ERRM); Usage(); exit(0); } break; default: // Cannot parse. // Usage(); exit(0); } } //check the parameters if(TopN==-1) TopN = 10; if((fp=fopen(sample,"r"))==NULL) { fprintf(stderr, "[ param error : -s ] can not open sample sequence number '%s' file\n",sample); exit(0); } fclose(fp); sprintf(genelistfile,"%s/Gene_List.txt",reference); if((fp=fopen(genelistfile,"r"))==NULL) { fprintf(stderr, "[ param error : -r ] the reference directory may be incorrect!\n"); exit(0); } fclose(fp); sprintf(conditionsfile,"%s/Samples_Condition.txt",reference); sprintf(offsetfile,"%s/Samples_RowByteOffset.txt",reference); printf("Profile Set is Loading...!\n"); GET_TIME(start); //read file parameters ReadFilePara(input, &profilenum, &genelen, &linelen); if( profilenum <= 0 || genelen <= 0 ) { fprintf(stderr,"[ param error : -i ] this file is not exist!\n"); exit(0); } printf("profilenum:%d\t genelen:%d\n",profilenum,genelen); printf("Memory check......\n"); unsigned long memavail = memoryAvailable(1); printf("Available Memory: %ld KB\n", memavail); unsigned long memneed = (2*sizeof(short)*profilenum*genelen + profilenum*sizeof(struct GSEA_RESULT))/1024; printf("Needed Memory: %ld KB\n", memneed); if(memavail < memneed) { printf("available memory is not enough!!! Please use MPI version and more nodes!!!\n"); return; } //malloc profile dataset memory profileSet = (short **)malloc(profilenum*sizeof(short *)); for(i=0;i<profilenum;i++) profileSet[i] = (short *)malloc(genelen*sizeof(short)); //malloc index set for profile dataset indexSet = (short **)malloc(profilenum*sizeof(short *)); for(i=0;i<profilenum;i++) indexSet[i] = (short *)malloc(genelen*sizeof(short)); //malloc GSEA para Vector gsea_result = (struct GSEA_RESULT*)malloc(profilenum*sizeof(struct GSEA_RESULT)); //load profile dataset ReadFile(input, linelen, 0 , profilenum , profilenum, genelen, profileSet); //compute the index for profile sets for(i=0; i<profilenum; i++) getIndex(profileSet[i],indexSet[i],genelen); GET_TIME(finish); //compute the IO time and prework time duration = finish-start; printf("loading IO and prework time: %.4f s\n",duration); printf("which way do you want to input the GeneSet( 0 -> standard input , others -> file input ):"); scanf("%d", &input_way); if(input_way==0) { //get the geneset , split by space getchar(); printf("input the GeneSet until 'exit'( a string of each Gene Symbol split by space ):\n"); scanf("%[^\n]",gsStr); }else { printf("input the path of file that has GeneSet until 'exit'(each line has a Gene Symbol/name):\n"); scanf("%s",gsStr); } while(strcmp(gsStr,"exit")!=0) { //get the geneset if(input_way==0) { getGeneSet(gs,&siglen,gsStr,genelistfile); if(siglen==0) { getchar(); //remove the Enter from stdin printf("There is no gene be hitted, please make sure the GeneSet have at least one Gene in Profile!\n"); printf("input the GeneSet until 'exit'( a string of each Gene Symbol split by space ):\n"); scanf("%[^\n]",gsStr); continue; } }else { getGeneSetbyFile(gs,&siglen,gsStr,genelistfile); if(siglen==0) { getchar(); //remove the Enter from stdin printf("There is no gene be hitted, please make sure the GeneSet have at least one Gene in Profile!\n"); printf("input the path of file that has GeneSet until 'exit'(each line has a Gene Symbol/name):\n"); scanf("%s",gsStr); continue; } } GET_TIME(start); /********************run the GSEA algorithm*****************************/ //compute the global ES getGlobalES( genelen, siglen , global_ES); for(i=0; i<profilenum; i++){ GSEA( gs, indexSet[i], genelen, siglen, &(gsea_result[i].ES), &(gsea_result[i].NES), &(gsea_result[i].pv), global_ES ); gsea_result[i].cid = i+1; } //printf("cid:%d ES:%f NES:%f pv:%.10lf\n",gsea_result[19999].cid, gsea_result[19999].ES, gsea_result[19999].NES, gsea_result[19999].pv); //sort the gsea result quiksort_gsea(gsea_result,0,profilenum-1); /********************print the TopN GSEA result*************************/ printf("\nprintf the high level of TopN GSEA result:\n"); for(i = profilenum-1; i > profilenum-1-TopN; i--) { cidnum = readByteOffsetFile(sample,gsea_result[i].cid); offset = readByteOffsetFile(offsetfile,cidnum); getSampleConditions(conditionsfile, offset, conditions); printf("\nNO.%d -> SampleConditions: %s ES:%f NES:%f pv:%.10lf\n", profilenum-i, conditions, gsea_result[i].ES, gsea_result[i].NES, gsea_result[i].pv); } printf("\nprintf the low level of TopN GSEA result:\n"); for(i=0; i<TopN; i++) { cidnum = readByteOffsetFile(sample,gsea_result[i].cid); offset = readByteOffsetFile(offsetfile,cidnum); getSampleConditions(conditionsfile, offset, conditions); printf("\nNO.%d -> SampleConditions: %s ES:%f NES:%f pv:%.10lf\n", i+1, conditions, gsea_result[i].ES, gsea_result[i].NES, gsea_result[i].pv); } GET_TIME(finish); duration = finish-start; //compute the GSEA time printf("finish GSEA time: %.4f s\n",duration); getchar(); //remove the Enter from stdin //get the geneset if(input_way==0) { //get the geneset , split by space printf("input the GeneSet until 'exit'( a string of each Gene Symbol split by space ):\n"); scanf("%[^\n]",gsStr); }else { printf("input the path of file that has GeneSet until 'exit'(each line has a Gene Symbol/name):\n"); scanf("%s",gsStr); } } //free the memory allocate dyn. free(gsea_result); for(i=0; i<profilenum; i++){ free(profileSet[i]); free(indexSet[i]); } free(profileSet); free(indexSet); return 0; }
int main(int argc,char *argv[]) { int i,j; int genelen; int profilenum1,profilenum2; int linelen1,linelen2; struct Profile_triple *triples1,**triples2; float **local_ES_Matrix; //part of the ES_Matrix in this process float *ES_test; //ES used for testing without writing int my_rank; /* My process rank */ int p; /* The number of processes */ int source,dest; int tag = 0; MPI_Status status; int local_P; //the data number of each processes must hand int begin,end; int parameternum; int corenum; int siglen; int load_time; float proportion; int ifwrite; double start,finish,duration; /* Let the system do what it needs to start up MPI */ MPI_Init(&argc, &argv); /* Get my process rank */ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* Find out how many processes are being used */ MPI_Comm_size(MPI_COMM_WORLD, &p); /* check parameter*/ if(my_rank == 0) { parameternum = argc; if(parameternum == 1) Usage(); } MPI_Bcast(¶meternum, 1, MPI_INT, 0, MPI_COMM_WORLD); if(parameternum == 1) { MPI_Finalize(); exit(0); } // Unset flags (value -1). corenum = -1; siglen = -1; load_time = -1; proportion = -1; ifwrite = -1; // Unset options (value 'UNSET'). char * const UNSET = "unset"; char * input1 = UNSET; char * input2 = UNSET; char * output = UNSET; int c; while (1) { int option_index = 0; static struct option long_options[] = { {"thread", required_argument, 0, 't'}, {"siglen", required_argument, 0, 'l'}, {"loadtime", required_argument, 0, 'a'}, {"proportion", required_argument, 0, 'p'}, {"write", required_argument, 0, 'w'}, {"input1", required_argument, 0, '1'}, {"input2", required_argument, 0, '2'}, {"output", required_argument, 0, 'o'}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, "t:l:a:p:w:1:2:o:", long_options, &option_index); if(c==-1) break; switch (c) { case 0: // A flag was set. // break; case '1': if (input1 == UNSET) { input1 = optarg; } else { if(my_rank==0) { fprintf(stderr, "%s --input1 set more than once\n", ERRM); Usage(); } MPI_Finalize(); exit(0); } break; case '2': if (input2 == UNSET) { input2 = optarg; } else { if(my_rank==0) { fprintf(stderr, "%s --input2 set more than once\n", ERRM); Usage(); } MPI_Finalize(); exit(0); } break; case 'o': if (output == UNSET) { output = optarg; } else { if(my_rank==0) { fprintf(stderr, "%s --output set more than once\n", ERRM); Usage(); } MPI_Finalize(); exit(0); } break; case 't': if (corenum < 0) { corenum = atoi(optarg); if (corenum < 1) { if(my_rank==0) { fprintf(stderr, "%s --thread must be a positive integer\n", ERRM); Usage(); } MPI_Finalize(); exit(0); } } else { if(my_rank==0) { fprintf(stderr,"%s --thread set more " "than once\n", ERRM); Usage(); } MPI_Finalize(); exit(0); } break; case 'l': if (siglen < 0) { siglen = atoi(optarg); if (siglen < 1) { if(my_rank==0) { fprintf(stderr, "%s --siglen must be a positive integer\n", ERRM); Usage(); } MPI_Finalize(); exit(0); } } else { if(my_rank==0) { fprintf(stderr,"%s --siglen set more " "than once\n", ERRM); Usage(); } MPI_Finalize(); exit(0); } break; case 'a': if (load_time < 0) { load_time = atoi(optarg); if (load_time < 1) { if(my_rank==0) { fprintf(stderr, "%s --load time must be a positive integer\n", ERRM); Usage(); } MPI_Finalize(); exit(0); } } else { if(my_rank==0) { fprintf(stderr,"%s --load time set more " "than once\n", ERRM); Usage(); } MPI_Finalize(); exit(0); } break; case 'p': if (proportion < 0) { proportion = atof(optarg); if (proportion > 1 || proportion <= 0) { if(my_rank==0) { fprintf(stderr, "%s -- proportion must be kept in (0,1]\n", ERRM); Usage(); } MPI_Finalize(); exit(0); } } else { if(my_rank==0) { fprintf(stderr,"%s --proportion set more " "than once\n", ERRM); Usage(); } MPI_Finalize(); exit(0); } break; case 'w': if (ifwrite < 0) { ifwrite = atof(optarg); } else { if(my_rank==0) { fprintf(stderr,"%s --write set more " "than once\n", ERRM); Usage(); } MPI_Finalize(); exit(0); } break; default: // Cannot parse. // if(my_rank==0) Usage(); MPI_Finalize(); exit(0); } } //check the parameters if(corenum == -1) corenum = 1; if(siglen == -1) siglen = 50; if(load_time == -1) load_time = 1; if(proportion == -1) proportion = 1; if(ifwrite == -1) ifwrite = 1; if(output == UNSET) { if(my_rank==0) fprintf(stderr," [ param error : -o ] Not Set output parameter!\n"); MPI_Finalize(); exit(0); } triples2 = (struct Profile_triple **)malloc(load_time*sizeof(struct Profile_triple *)); //barrier all processes to compute time MPI_Barrier(MPI_COMM_WORLD); if(my_rank == 0){ printf("Profile Set is Loading...!\n"); GET_TIME(start); } //read file parameters in all processes ReadFilePara(input1, &profilenum1, &genelen, &linelen1); ReadFilePara(input2, &profilenum2, &genelen, &linelen2); profilenum1 *= proportion; profilenum2 *= proportion; //input file check if( profilenum1 <= 0 || genelen <= 0) { if(my_rank==0) fprintf(stderr," [ param error : -1 ] this file input1 is not exist!\n"); MPI_Finalize(); exit(0); } if( profilenum2 <= 0 || genelen <= 0) { if(my_rank==0) fprintf(stderr," [ param error : -2 ] this file input2 is not exist!\n"); MPI_Finalize(); exit(0); } if(my_rank==0) { printf("Genelen: %d\n", genelen); printf("Profiles1 length: %d\n", profilenum1); printf("Profiles2 length: %d\n", profilenum2); } // compute the local size 、up boundary and down boundary for every process in dataset1 split_data(profilenum1, p, my_rank, &begin, &end, &local_P); if(my_rank==0) printf("Memory check......\n"); unsigned long memavail = memoryAvailable(1); unsigned long memneed = sizeof(struct Profile_triple)/1024*(local_P+profilenum2/load_time) + local_P/1024*profilenum2*sizeof(float); unsigned long memallneed = sizeof(struct Profile_triple)/1024*(profilenum1+profilenum2) + profilenum1/1024*profilenum2*sizeof(float); if(my_rank==0) { printf("Available Memory: %ld KB\n", memavail); printf("Needed Memory: %ld KB\n", memneed); printf("All Needed Memory: %ld KB\n", memallneed); } unsigned long mem1 = sizeof(struct Profile_triple)/1024*(local_P+profilenum2/load_time); unsigned long mem2 = profilenum1/1024*profilenum2*sizeof(float); int nodenum = (int)(mem2/(memavail-mem1)+1); if( memneed > memavail ) { if( my_rank==0 ) { //printf("mem1: %ld KB\n", mem1); //printf("mem2: %ld KB\n", mem2); printf("available memory is not enough to store all results, recommend to use more than %d nodes!!!\n", nodenum); } if(ifwrite==1){ MPI_Finalize(); exit(0); }else{ if( my_rank==0 ) { printf("because we are just testing without writing, we will continue!!!\n"); } } } /*****read the local part file of dataset1 in every process and get their triples****************/ triples1 = (struct Profile_triple *)malloc(sizeof(struct Profile_triple)*local_P); getTriples(local_P, genelen, siglen, profilenum1, linelen1, begin, end, input1, triples1); if(ifwrite==1) { //not test, then allocate the local_ES_Matrix memory local_ES_Matrix = (float **)malloc(local_P*sizeof(float *)); for(i=0;i<local_P;i++) local_ES_Matrix[i] = (float *)malloc(profilenum2*sizeof(float)); }else{ ES_test = (float *)malloc(corenum*sizeof(float)); } int current_time = 0; int begin_localfile2, end_localfile2, len_localfile2; while( current_time < load_time ) { /********************para load profile dataset2 by openmp******************************/ split_data(profilenum2, load_time, current_time, &begin_localfile2, &end_localfile2, &len_localfile2); //allocate the triples memory for local dataset2 triples2[current_time] = (struct Profile_triple *)malloc(sizeof(struct Profile_triple)*len_localfile2); #pragma omp parallel num_threads(corenum) { int local_t; //the data number of each thread must hand int begin_t,end_t; int threadID = omp_get_thread_num(); // compute the local size 、up boundary and down boundary for every thread in dataset2 split_data(len_localfile2, corenum, threadID, &begin_t, &end_t, &local_t); // compute the begin_t to end_t triples getFreeTriples(genelen, siglen, profilenum2, linelen2, begin_localfile2 + begin_t, begin_t, local_t, input2, triples2[current_time]); //getPartTriples(genelen, siglen, profilenum2, linelen2, begin_t, end_t, input2, triples2[current_time]); } MPI_Barrier(MPI_COMM_WORLD); if(my_rank == 0){ GET_TIME(finish); //compute the IO time duration = finish-start; printf("phase %d --> loading IO and prework time in no communication way: %.4f s\n", current_time+1, duration); printf("phase %d --> Paral compute the ES_Matrix is Starting...!\n", current_time+1); GET_TIME(start); } /* if(my_rank == 0){ int k; for(k=0;k<siglen;k++) printf("%d ",triples1[0].gsUp[k]); printf("\n"); for(k=0;k<genelen;k++) printf("%d ",triples2[current_time][profilenum2-1].index[k]); printf("\n"); } */ /********************para compute the part of ES_Matrix******************************/ #pragma omp parallel num_threads(corenum) { int k,t; int local_t; //the data number of each thread must hand int begin_t,end_t; int threadID = omp_get_thread_num(); // compute the local size 、up boundary and down boundary for every thread in dataset2 split_data(len_localfile2, corenum, threadID, &begin_t, &end_t, &local_t); // compute the part of the ES matrix if(ifwrite==1){ for(k=0;k<local_P;k++) for(t=begin_t;t<end_t;t++) local_ES_Matrix[k][begin_localfile2 + t] = ES_Profile_triple(triples1[k],triples2[current_time][t],genelen,siglen); }else{ //just calculate for testing for(k=0;k<local_P;k++) for(t=begin_t;t<end_t;t++) ES_test[threadID] = ES_Profile_triple(triples1[k],triples2[current_time][t],genelen,siglen); } } MPI_Barrier(MPI_COMM_WORLD); if(my_rank == 0){ GET_TIME(finish); //compute the compute time duration = finish-start; printf("phase %d --> Paral compute the ES_Matrix time: %.4f s\n", current_time+1, duration); if(current_time==load_time-1) if(ifwrite==1) printf("Writing file is Starting...!\n"); GET_TIME(start); } free(triples2[current_time]); current_time++; } /* if(my_rank == 0){ int k; for(k=0;k<profilenum2;k++) printf("%f ",local_ES_Matrix[0][k]); printf("\n"); } */ if(ifwrite==1) { char Res[128]; sprintf(Res,"%s_%d.txt",output,my_rank); WritetxtResult(0, local_P, profilenum2, Res, local_ES_Matrix); MPI_Barrier(MPI_COMM_WORLD); if(my_rank == 0){ GET_TIME(finish); //compute the write time duration = finish-start; printf("Write Result spent: %.4f s\n",duration); } }else{ if(my_rank == 0){ printf("Just run for test, no results output\n"); } } //free the memory if(ifwrite==1) { for(i=0;i<local_P;i++) free(local_ES_Matrix[i]); free(local_ES_Matrix); }else{ free(ES_test); } free(triples1); free(triples2); MPI_Finalize(); return 0; }
int main(int argc,char *argv[]) { int i,j; int genelen; int profilenum1,profilenum2; int linelen1,linelen2; struct Profile_triple *triples1,*triples2; float **local_ES_Matrix; //part of the ES_Matrix in this process int my_rank; /* My process rank */ int p; /* The number of processes */ int source,dest; int tag = 0; MPI_Status status; int local_P; //the data number of each processes must hand int begin,end; int parameternum; int corenum; int siglen; double start,finish,duration; /* Let the system do what it needs to start up MPI */ MPI_Init(&argc, &argv); /* Get my process rank */ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* Find out how many processes are being used */ MPI_Comm_size(MPI_COMM_WORLD, &p); /* check parameter*/ if(my_rank == 0) { parameternum = argc; if(parameternum!=6) Usage(argv[0]); } MPI_Bcast(¶meternum, 1, MPI_INT, 0, MPI_COMM_WORLD); if(parameternum!=6) { MPI_Finalize(); exit(0); } corenum = atoi(argv[1]); siglen = atoi(argv[2]); //barrier all processes to compute time MPI_Barrier(MPI_COMM_WORLD); if(my_rank == 0){ printf("Profile Set is Loading...!\n"); GET_TIME(start); } //read file parameters in all processes ReadFilePara(argv[3], &profilenum1, &genelen, &linelen1); ReadFilePara(argv[4], &profilenum2, &genelen, &linelen2); // compute the local size 、up boundary and down boundary for every process in dataset1 split_data(profilenum1, p, my_rank, &begin, &end, &local_P); /*****read the local part file of dataset1 in every process and get their triples****************/ triples1 = (struct Profile_triple *)malloc(sizeof(struct Profile_triple)*local_P); getTriples(local_P, genelen, siglen, profilenum1, linelen1, begin, end, argv[3], triples1); /********************para load profile dataset2 by openmp******************************/ //allocate the triples memory for dataset2 triples2 = (struct Profile_triple *)malloc(sizeof(struct Profile_triple)*profilenum2); #pragma omp parallel num_threads(corenum) { int local_t; //the data number of each thread must hand int begin_t,end_t; int threadID = omp_get_thread_num(); // compute the local size 、up boundary and down boundary for every thread in dataset2 split_data(profilenum2, corenum, threadID, &begin_t, &end_t, &local_t); // compute the begin_t to end_t triples getPartTriples(genelen, siglen, profilenum2, linelen2, begin_t, end_t, argv[4], triples2); } MPI_Barrier(MPI_COMM_WORLD); if(my_rank == 0){ GET_TIME(finish); //compute the IO time duration = finish-start; printf("loading IO and prework time in no communication way: %.4f s\n",duration); printf("Paral compute the ES_Matrix is Starting...!\n"); GET_TIME(start); } /* if(my_rank == 0){ int k; for(k=0;k<siglen;k++) printf("%d ",triples1[0].gsUp[k]); printf("\n"); for(k=0;k<genelen;k++) printf("%d ",triples2[profilenum2-1].index[k]); printf("\n"); } */ /********************para compute the part of ES_Matrix******************************/ //allocate the local_ES_Matrix memory local_ES_Matrix = (float **)malloc(local_P*sizeof(float *)); for(i=0;i<local_P;i++) local_ES_Matrix[i] = (float *)malloc(profilenum2*sizeof(float)); #pragma omp parallel num_threads(corenum) { int k,t; int local_t; //the data number of each thread must hand int begin_t,end_t; int threadID = omp_get_thread_num(); // compute the local size 、up boundary and down boundary for every thread in dataset2 split_data(profilenum2, corenum, threadID, &begin_t, &end_t, &local_t); // compute the part of the ES matrix for(k=0;k<local_P;k++) for(t=begin_t;t<end_t;t++) local_ES_Matrix[k][t] = ES_Profile_triple(triples1[k],triples2[t],genelen,siglen); } MPI_Barrier(MPI_COMM_WORLD); if(my_rank == 0){ GET_TIME(finish); //compute the compute time duration = finish-start; printf("Paral compute the ES_Matrix time: %.4f s\n",duration); printf("Writing file is Starting...!\n"); GET_TIME(start); } /* if(my_rank == 0){ int k; for(k=0;k<profilenum2;k++) printf("%f ",local_ES_Matrix[0][k]); printf("\n"); } */ char Res[128]; sprintf(Res,"%s_%d.txt",argv[5],my_rank); WritetxtResult(0, local_P, profilenum2, Res, local_ES_Matrix); MPI_Barrier(MPI_COMM_WORLD); if(my_rank == 0){ GET_TIME(finish); //compute the write time duration = finish-start; printf("Write Result spent: %.4f s\n",duration); } //free the memory for(i=0;i<local_P;i++) free(local_ES_Matrix[i]); free(local_ES_Matrix); free(triples1); free(triples2); MPI_Finalize(); return 0; }