int main() { SYSTEM_INFO sysinfo; GetSystemInfo( &sysinfo ); std::cout << "Total Cores: " << sysinfo.dwNumberOfProcessors << std::endl; std::cout << "Total Memory: " << getTotalSystemMemory() << std::endl; return 0; }
int main(int argc, char *argv[]){ #ifdef MEMORY_PERCENTAGE printf("Currently total memory: %zd\n",getTotalSystemMemory()); printf("Currently avail memory: %zd\n",getFreeSystemMemory()); #endif int i; for(i=0;i<argc;i++){ char *arg=argv[i]; if(strcmp(arg, "-h")==0 || strcmp(arg,"-?")==0 || argc==1){ printf("Usage: eatmemory <size>\n"); printf("Size can be specified in megabytes or gigabytes in the following way:\n"); printf("# # Bytes example: 1024\n"); printf("#M # Megabytes example: 15M\n"); printf("#G # Gigabytes example: 2G\n"); #ifdef MEMORY_PERCENTAGE printf("#%% # Percent example: 50%%\n"); #endif printf("\n"); }else if(i>0){ int len=strlen(arg); char unit=arg[len - 1]; long size=-1; int chunk=1024; if(!isdigit(unit) ){ if(unit=='M' || unit=='G'){ arg[len-1]=0; size=atol(arg) * (unit=='M'?1024*1024:1024*1024*1024); } #ifdef MEMORY_PERCENTAGE else if (unit=='%') { size = (atol(arg) * (long)getFreeSystemMemory())/100; } #endif else{ printf("Invalid size format\n"); exit(0); } }else{ size=atoi(arg); } printf("Eating %ld bytes in chunks of %d...\n",size,chunk); if(eat(size,chunk)){ printf("Done, press any key to free the memory\n"); getchar(); }else{ printf("ERROR: Could not allocate the memory"); } } } }
int main_opt(args *arg){ std::vector<persaf *> &saf =arg->saf; for(int i=0;i<saf.size();i++) assert(saf[i]->pos!=NULL&&saf[i]->saf!=NULL); size_t nSites = arg->nSites; if(nSites == 0){//if no -nSites is specified nSites=nsites(saf,arg); } if(fsizes<T>(saf,nSites)>getTotalSystemMemory()) fprintf(stderr,"\t-> Looks like you will allocate too much memory, consider starting the program with a lower -nSites argument\n"); fprintf(stderr,"\t-> nSites: %lu\n",nSites); float bytes_req_megs =(float) fsizes<T>(saf,nSites)/1024/1024; float mem_avail_megs =(float) getTotalSystemMemory()/1024/1024;//in percentile //fprintf(stderr,"en:%zu to:%f\n",bytes_req_megs,mem_avail_megs); fprintf(stderr,"\t-> The choice of -nSites will require atleast: %f megabyte memory, that is at least: %.2f%% of total memory\n",bytes_req_megs,bytes_req_megs*100/mem_avail_megs); std::vector<Matrix<T> *> gls; for(int i=0;i<saf.size();i++) gls.push_back(alloc<T>(nSites,saf[i]->nChr+1)); int ndim=(int) parspace(saf); double *sfs=new double[ndim]; //temp used for checking pos are in sync setGloc(saf,nSites); while(1) { int ret=readdata(saf,gls,nSites,arg->chooseChr,arg->start,arg->stop,NULL,NULL);//read nsites from data int b=0; //fprintf(stderr,"\t\tRET:%d gls->x:%lu\n",ret,gls[0]->x); if(ret==-2&&gls[0]->x==0)//no more data in files or in chr, eith way we break; break; #if 0 if(saf.size()==1){ if(ret!=-2){ if(gls[0]->x!=nSites&&arg->chooseChr==NULL&&ret!=-3){ // fprintf(stderr,"continue continue\n"); continue; } } }else #endif { if(gls[0]->x!=nSites&&arg->chooseChr==NULL&&ret!=-3){ //fprintf(stderr,"continue continue\n"); continue; } } if(gls[0]->x==0) continue; fprintf(stderr,"\t-> Will run optimization on nSites: %lu\n",gls[0]->x); neverusegoto: if(arg->bootstrap) fprintf(stderr,"Will do bootstrap replicate %d/%d\n",b+1,arg->bootstrap); if(arg->sfsfname.size()!=0) readSFS(arg->sfsfname[0],ndim,sfs); else{ if(arg->seed==-1){ for(int i=0;i<ndim;i++) sfs[i] = (i+1)/((double)(ndim)); }else{ for(int i=0;i<ndim;i++){ double r=drand48(); while(r==0.0) r = drand48(); sfs[i] = r; } } } normalize(sfs,ndim); if(bootstrap==NULL &&arg->bootstrap) bootstrap = new size_t[gls[0]->x]; if(bootstrap){ for(size_t i=0;i<gls[0]->x;i++) bootstrap[i] = lrand48() % gls[0]->x; std::sort(bootstrap,bootstrap+gls[0]->x); } double lik; if(arg->emAccl==0) lik = em<float>(sfs,arg->tole,arg->maxIter,arg->nThreads,ndim,gls); else lik = emAccl<float>(sfs,arg->tole,arg->maxIter,arg->nThreads,ndim,gls,arg->emAccl); fprintf(stderr,"likelihood: %f\n",lik); fprintf(stderr,"------------\n"); #if 1 // fprintf(stdout,"#### Estimate of the sfs ####\n"); //all gls have the same ->x. That means the same numbe of sites. for(int x=0;x<ndim;x++) fprintf(stdout,"%f ",((double)gls[0]->x)*sfs[x]); fprintf(stdout,"\n"); fflush(stdout); #endif if(++b<arg->bootstrap) goto neverusegoto; for(int i=0;i<gls.size();i++) gls[i]->x =0; if(ret==-2&&arg->chooseChr!=NULL) break; if(arg->onlyOnce) break; } delGloc(saf,nSites); destroy(gls,nSites); destroy_args(arg); delete [] sfs; fprintf(stderr,"\n\t-> NB NB output is no longer log probs of the frequency spectrum!\n"); fprintf(stderr,"\t-> Output is now simply the expected values! \n"); fprintf(stderr,"\t-> You can convert to the old format simply with log(norm(x))\n"); return 0; }
int main_2dsfs(int argc,char **argv){ if(argc==1){ fprintf(stderr,"./emOptim2 2dsfs pop1 pop2 nChr1 nChr2 [-start FNAME -P nThreds -tole tole -maxIter ] (only works if the two saf files covers the same region)\n"); return 0; } argv++; argc--; fname1 = *(argv++); fname2 = *(argv++); argc -=2; chr1 = atoi(*(argv++)); chr2 = atoi(*(argv++)); argc -=2; getArgs(argc,argv); if(nSites==0){ if(fsize(fname1)+fsize(fname2)>getTotalSystemMemory()) fprintf(stderr,"Looks like you will allocate too much memory, consider starting the program with a lower -nSites argument\n"); //this doesnt make sense if ppl supply a filelist containing safs nSites=calcNsites(fname1,chr1); } fprintf(stderr,"fname1:%sfname2:%s chr1:%d chr2:%d startsfs:%s nThreads=%d tole=%f maxIter=%d nSites:%lu\n",fname1,fname2,chr1,chr2,sfsfname,nThreads,tole,maxIter,nSites); float bytes_req_megs = nSites*(sizeof(double)*(chr1+1) + sizeof(double)*(chr2+1)+2*sizeof(double*))/1024/1024; float mem_avail_megs = getTotalSystemMemory()/1024/1024;//in percentile // fprintf(stderr,"en:%zu to:%f\n",bytes_req_megs,mem_avail_megs); fprintf(stderr,"The choice of -nSites will require atleast: %f megabyte memory, that is approx: %.2f%% of total memory\n",bytes_req_megs,bytes_req_megs*100/mem_avail_megs); #if 0 //read in positions, not used, YET... std::vector<int> p1 = getPosi(fname1); std::vector<int> p2 = getPosi(fname2); fprintf(stderr,"nSites in pop1: %zu nSites in pop2: %zu\n",p1.size(),p2.size()); #endif if(nSites==0){ if(calcNsites(fname1,chr1)!=calcNsites(fname2,chr2)){ fprintf(stderr,"Problem with number of sites in file: %s and %s\n",fname1,fname2); exit(0); } nSites=calcNsites(fname1,chr1); } gzFile gz1=getGz(fname1); gzFile gz2=getGz(fname2); dim=(chr1+1)*(chr2+1); Matrix<double> GL1=alloc(nSites,chr1+1); Matrix<double> GL2=alloc(nSites,chr2+1); dim=GL1.y*GL2.y; double *sfs = new double[dim]; while(1){ if(isList ==0){ readGL(gz1,nSites,chr1,GL1); readGL(gz2,nSites,chr2,GL2); }else{ readGL2(gz1,nSites,chr1,GL1); readGL2(gz2,nSites,chr2,GL2); } assert(GL1.x==GL2.x); if(GL1.x==0) break; if(sfsfname!=NULL){ readSFS(sfsfname,dim,sfs); }else{ for(int i=0;i<dim;i++) sfs[i] = (i+1)/((double)dim); normalize(sfs,dim); } setThreadPars(&GL1,&GL2,sfs,nThreads); if(calcLike==0){ if(SIG_COND) em2(sfs,&GL1,&GL2,tole,maxIter); } double lik; if(nThreads>1) lik = lik1_master(); else lik = lik1(sfs,&GL1,0,GL1.x); fprintf(stderr,"likelihood: %f\n",lik); #if 1 int inc=0; for(int x=0;x<chr1+1;x++){ for(int y=0;y<chr2+1;y++) fprintf(stdout,"%f ",log(sfs[inc++])); fprintf(stdout,"\n"); } #endif if(isList==1) break; } dalloc(GL1,nSites); dalloc(GL2,nSites); gzclose(gz1); gzclose(gz2); return 0; }
int main_1dsfs(int argc,char **argv){ if(argc<2){ fprintf(stderr,"Must supply afile.saf and number of chromosomes\n"); return 0; } fname1 = *(argv++); chr1 = atoi(*(argv++)); argc-=2; getArgs(argc,argv); dim=chr1+1; //hook for new EJ banded version if(isNewFormat(fname1)) return main_1dsfs_v2(fname1,chr1,nSites,nThreads,sfsfname,tole,maxIter); if(nSites==0){//if no -nSites is specified if(fsize(fname1)>getTotalSystemMemory()) fprintf(stderr,"Looks like you will allocate too much memory, consider starting the program with a lower -nSites argument\n"); //this doesnt make sense if ppl supply a filelist containing safs nSites=calcNsites(fname1,chr1); } fprintf(stderr,"fname1:%s nChr:%d startsfs:%s nThreads:%d tole=%f maxIter=%d nSites=%lu\n",fname1,chr1,sfsfname,nThreads,tole,maxIter,nSites); float bytes_req_megs = nSites*(sizeof(double)*(chr1+1)+sizeof(double*))/1024/1024; float mem_avail_megs = getTotalSystemMemory()/1024/1024;//in percentile // fprintf(stderr,"en:%zu to:%f\n",bytes_req_megs,mem_avail_megs); fprintf(stderr,"The choice of -nSites will require atleast: %f megabyte memory, that is approx: %.2f%% of total memory\n",bytes_req_megs,bytes_req_megs*100/mem_avail_megs); Matrix<double> GL1=alloc(nSites,dim); gzFile gz1=getGz(fname1); double *sfs=new double[dim]; while(1) { if(isList==0) readGL(gz1,nSites,chr1,GL1); else readGL2(gz1,nSites,chr1,GL1); if(GL1.x==0) break; fprintf(stderr,"dim(GL1)=%zu,%zu\n",GL1.x,GL1.y); if(sfsfname!=NULL){ readSFS(sfsfname,dim,sfs); }else{ for(int i=0;i<dim;i++) sfs[i] = (i+1)/((double)dim); if(doBFGS){ double ts=1; for(int i=0;i<dim-1;i++) ts += 0.01/(1.0+i); sfs[0]=1.0/ts; for(int i=0;i<dim-1;i++) sfs[i+1] = (0.01/(1.0+i))/ts; } normalize(sfs,dim); } // em2_smart(sfs2,pops,1e-6,1e3); setThreadPars(&GL1,NULL,sfs,nThreads); if(calcLike==0){ if(doBFGS==0) em1(sfs,&GL1,tole,maxIter); else bfgs(sfs,&GL1); } double lik; if(nThreads>1) lik = lik1_master(); else lik = lik1(sfs,&GL1,0,GL1.x); fprintf(stderr,"likelihood: %f\n",lik); #if 1 for(int x=0;x<dim;x++) fprintf(stdout,"%f ",log(sfs[x])); fprintf(stdout,"\n"); fflush(stdout); #endif if(isList==1) break; } dalloc(GL1,nSites); gzclose(gz1); delete [] sfs; return 0; }
int main(void) { clock_t t1, t2; t1 = clock(); FILE *fp; // fp = fopen("/exports/home/s1052689/nesterov.txt", "w"); fp = fopen("/tmp/nesterov.txt", "w"); srand(2); double lambda = 1; double diff; double rho = 1; long int n = 100000000; int n_nonzero = 160000; long int m = 5 * n; double sqrtofnonzeros = 400; int p = 10; int NMAX = 90; int samplingsize = n/100 ; printf("outside thread num is %d\n", omp_get_num_threads()); int totalThreds=0; #pragma omp parallel shared(totalThreds) { totalThreds = omp_get_num_threads(); // printf("total threds:%d",totalThreds); } printf("total threds:%d",totalThreds); int N; long int i, j, k; unsigned int s; unsigned int seed[omp_get_num_threads()]; for (i = 0; i < totalThreds; i++) { seed[i] = (int) RAND_MAX*rand(); if (seed[i]<0) seed[i]=-seed[i]; // printf("seed %d, val %d\n",i, seed[i]); } printf("texst\n"); printf("free memory:%d\n", getTotalSystemMemory()); // double* AAA; printf("Idem alokovat data s obsahom %d\n", n); // AAA = (double*) malloc(n * sizeof(double)); printf("alokacia poli start\n"); double A_h[n][p]; // host A matrix pointers printf("alokacia A done \n"); long int IDX_h[n][p]; // host Aindex matrix pointers printf("alokacia I done \n"); printf("alokacia x done \n"); double optimalvalue = 0; int analysisLength = NMAX * n / samplingsize; struct optimalityAnalysis* analysis; analysis = (struct optimalityAnalysis*) calloc(analysisLength, sizeof(struct optimalityAnalysis)); double tmp; printf("alokacia poli END\n"); print_time_message(t1, "alokacia poli"); //Generovanie problemu------------------------------------------------------------------- t1 = clock(); long int idx; int notfinished; double val; #pragma omp parallel private(i,j,idx,notfinished,val,k,s ), shared(IDX_h, A_h,n,m,p) { s = seed[omp_get_thread_num()]; // printf("thred %d, val:%f\n",omp_get_thread_num(),(double) rand_r(&s) / RAND_MAX); // printf("thred %d, val:%f\n",omp_get_thread_num(),(double) rand_r(&s) / RAND_MAX); #pragma omp for for (i = 0; i < n; i++) { idx = 0; for (j = 0; j < p; j++) { notfinished = 1; val = (double) rand_r(&s) / RAND_MAX; while (notfinished) { notfinished = 0; idx = ((long int) ((m) * (rand_r(&s) / (RAND_MAX + 1.0)))); for (k = 0; k < j; k++) { if (IDX_h[i][k] == idx) { notfinished = 1; } } } A_h[i][j] = 2 * val - 1; IDX_h[i][j] = idx; } } } //return 1; print_time_message(t1, "Matrix B Generated"); t1 = clock(); double* y; y = (double*) calloc(m, sizeof(double)); tmp = 0; #pragma omp parallel private(j,s), shared(y), reduction(+:tmp) { s = seed[omp_get_thread_num()]; #pragma omp for for (j = 0; j < m; j++) { y[j] = (double) rand_r(&s) / RAND_MAX; tmp += y[j] * y[j]; } } #pragma omp parallel private(j),shared(y,tmp) { #pragma omp for for (j = 0; j < m; j++) { y[j] = y[j] / tmp; } } print_time_message(t1, "vector y Generated"); struct st_sortingByAbsWithIndex* dataToSort; dataToSort = (struct st_sortingByAbsWithIndex*) calloc(n, sizeof(struct st_sortingByAbsWithIndex)); #pragma omp parallel private(i,j,tmp), shared(dataToSort,A_h,IDX_h,y) { #pragma omp for for (i = 0; i < n; i++) { dataToSort[i].idx = i; tmp = 0; for (j = 0; j < p; j++) { tmp += y[IDX_h[i][j]] * A_h[i][j]; } dataToSort[i].value = tmp; } } print_time_message(t1, "Struc created"); //Sorting B printf("SORTING START\n"); size_t structs_len = sizeof(dataToSort) / sizeof(struct st_sortingByAbsWithIndex); printf("SORTING 2\n"); qsort(dataToSort, structs_len, sizeof(struct st_sortingByAbsWithIndex), struct_cmp_by_value); printf("SORTING END\n"); // return 1; double* x; x = (double*) calloc(n, sizeof(double)); #pragma omp parallel private(i,s), shared(x) { s = seed[omp_get_thread_num()]; #pragma omp for for (i = 0; i < n; i++) { x[i] = ((double) rand_r(&s) / RAND_MAX); } } print_time_message(t1, "GENEROVANIE RANDOM X END"); double alpha = 0; #pragma omp parallel private(i,alpha,idx,j), shared(x,A_h,dataToSort,sqrtofnonzeros,rho ,n,p) { #pragma omp for for (i = 0; i < n; i++) { // vytvaranie matice A idx = dataToSort[i].idx; alpha = 1; if (i < n_nonzero) { alpha = (double) abs(1 / dataToSort[idx].value); x[idx] = x[idx] * rho / (sqrtofnonzeros); if (dataToSort[idx].value < 0) { x[idx] = -x[idx]; } } else if (dataToSort[idx].value > 0.1 || dataToSort[idx].value < -0.1) { alpha = (double) abs(1 / dataToSort[idx].value) * x[idx]; x[idx] = 0; } else { x[idx] = 0; } for (j = 0; j < p; j++) { A_h[idx][j] = A_h[idx][j] * alpha; } } } print_time_message(t1, "A modified"); t1 = clock(); // print_double_array(&L[0],n); // print_double_array(&Li[0], 10); free(dataToSort); // Compute Li double* Li; // Lipschitz constants Li = (double*) calloc(n, sizeof(double)); print_time_message(t1, "Alokacia Li"); t1 = clock(); #pragma omp parallel private(i,j), shared(Li,A_h,p,n) { #pragma omp for for (i = 0; i < n; i++) { Li[i] = 0; for (j = 0; j < p; j++) { Li[i] += A_h[i][j] * A_h[i][j]; } Li[i] = 1 / Li[i]; } } // END compute Li print_time_message(t1, "Compute Li"); t1 = clock(); #pragma omp parallel private(i), shared(y,m), reduction(+:optimalvalue) { #pragma omp for for (i = 0; i < m; i++) { optimalvalue += y[i] * y[i]; } } print_time_message(t1, "OptVal1"); t1 = clock(); optimalvalue = optimalvalue * 0.5; double* b; b = y; for (j = 0; j < p; j++) { for (i = 0; i < n; i++) { b[IDX_h[i][j]] += x[i] * A_h[i][j]; } } print_time_message(t1, "OptVal2 serial"); t1 = clock(); #pragma omp parallel private(i), shared(n,x), reduction(+:optimalvalue) { #pragma omp for for (i = 0; i < n; i++) { if (x[i] > 0) optimalvalue += (x[i]); else optimalvalue -= x[i]; } } print_time_message(t1, "OptVal3"); t1 = clock(); printf("optval %1.16f \n", optimalvalue); t2 = clock(); diff = ((float) t2 - (float) t1) / 1000000.0F; printf("Generating END:%f\n", diff); fprintf(fp, "Generating END:%f\n", diff); //Generovanie problemu----------------------------END---------------------------------- double * residuals; residuals = (double*) calloc(m, sizeof(double)); printf("Residuals alocated"); #pragma omp parallel private(i), shared(m,b,residuals) { #pragma omp for for (i = 0; i < m; i++) { residuals[i] = -b[i]; } } printf("Residuals = -b"); for (i = 0; i < n; i++) { for (j = 0; j < p; j++) { residuals[IDX_h[i][j]] += x[i] * A_h[i][j]; } } printf("Residuals =updated"); double nesterovvalue = 0; #pragma omp parallel private(i), shared(m, residuals), reduction(+:nesterovvalue) { #pragma omp for for (i = 0; i < m; i++) { nesterovvalue += residuals[i] * residuals[i]; } } nesterovvalue = nesterovvalue / 2; #pragma omp parallel private(i), shared(n, x), reduction(+:nesterovvalue) { #pragma omp for for (i = 0; i < n; i++) { if (x[i] > 0) nesterovvalue += x[i]; else nesterovvalue -= x[i]; } } // Calculate residuals #pragma omp parallel private(j,i), shared(m,b,x,n,residuals) { #pragma omp for for (j = 0; j < m; j++) residuals[j] = -b[j]; #pragma omp for for (i = 0; i < n; i++) x[i] = 0; } //----------------RCDM----------serial===================================--- double tmp1; double currentvalue = 0; printf("RCDM serial"); int analisisIDX = 0; double epsilon = 0; currentvalue = 0; // print_double_array(&residuals[0],m); #pragma omp parallel private(i), shared(residuals,m), reduction(+:currentvalue) { #pragma omp for for (i = 0; i < m; i++) { currentvalue += residuals[i] * residuals[i]; } } currentvalue = currentvalue * 0.5; // printf("CV:%1.16f\n", currentvalue); // printf(" %1.16f\n", currentvalue ); double normsize = 0; #pragma omp parallel private(i), shared(lambda,n,x), reduction(+:normsize) { #pragma omp for for (i = 0; i < n; i++) { if (x[i] > 0) normsize += lambda * x[i]; else normsize -= lambda * x[i]; } } // print_double_array(&x[0],n); epsilon = currentvalue + normsize; srand(2); printf("ZACIATOK RIESENIA\n"); t1 = clock(); for (N = 0; N < NMAX; N++) { for (k = 0; k < n; k++) { // for (k = 0; k < n; k++) { int idx = (int) (n * (rand() / (RAND_MAX + 1.0))); double tmp = 0; for (j = 0; j < p; j++) { // printf("tmp:%f A:%f residual:%f \n",tmp,A_h[idx][j],residuals[IDX_h[idx][j]]); tmp += A_h[idx][j] * residuals[IDX_h[idx][j]]; } // printf("Li[%d] = %f; tmp=%f \n", idx, Li[idx], tmp); tmp1 = Li[idx] * (tmp + lambda); if (x[idx] > tmp1) { tmp = -tmp1; } else { tmp1 = Li[idx] * (tmp - lambda); if (x[idx] < tmp1) { tmp = -tmp1; } else { tmp = -x[idx]; } } x[idx] += tmp; //update residuals: for (j = 0; j < p; j++) { residuals[IDX_h[idx][j]] += tmp * A_h[idx][j]; } // printf("Iteration %d, x[%d]=%f \n", N, idx, x[idx]); if (k % samplingsize == 0) { currentvalue = 0; // print_double_array(&residuals[0],m); #pragma omp parallel private(i), shared(residuals,m), reduction(+:currentvalue) { #pragma omp for for (i = 0; i < m; i++) { currentvalue += residuals[i] * residuals[i]; } } currentvalue = currentvalue * 0.5; // printf("CV:%1.16f\n", currentvalue); // printf(" %1.16f\n", currentvalue ); double normsize = 0; #pragma omp parallel private(i), shared(lambda,n,x), reduction(+:normsize) { #pragma omp for for (i = 0; i < n; i++) { if (x[i] > 0) normsize += lambda * x[i]; else normsize -= lambda * x[i]; } } // print_double_array(&x[0],n); currentvalue = currentvalue + normsize; // printf("NZ:%1.16f; :%1.16f\n", currentvalue, normsize); // printf(" %1.16f\n", currentvalue ); analysis[analisisIDX].accuracy = currentvalue; analysis[analisisIDX].nnz = 0; analysis[analisisIDX].correctnnz = 0; analysis[analisisIDX].iteration = N + (double) k / n; for (i = 0; i < n; i++) { if (x[i] != 0) analysis[analisisIDX].nnz++; // if (x_optimal[i] != 0 && x[i] != 0) // analysis[analisisIDX].correctnnz++; } t2 = clock(); diff = ((float) t2 - (float) t1) / 1000000.0F; printf("%f,%d,%d,%1.16f,TIME:%f\n", analysis[analisisIDX].iteration, analysis[analisisIDX].nnz, analysis[analisisIDX].correctnnz, currentvalue - optimalvalue, diff); // printf("%d: nnz %d correct nnz %d \n", N, analysis[analisisIDX].nnz,analysis[analisisIDX].correctnnz); // printf("%d: f^*=%1.16f, f(x)=%1.16f \n", N, optimalvalue, // currentvalue); // printf("%d: f(x)-f^*=%1.16f\n", N, currentvalue - optimalvalue); analisisIDX++; } } } /// SErIAL RCDM =========================================================END printf("KONIEC RIESENIA\n"); currentvalue = 0; #pragma omp parallel private(i), shared(residuals,m), reduction(+:currentvalue) { #pragma omp for for (i = 0; i < m; i++) { currentvalue = residuals[i] * residuals[i]; } } currentvalue = currentvalue / 2; #pragma omp parallel private(i), shared(x,n), reduction(+:currentvalue) { #pragma omp for for (i = 0; i < n; i++) { if (x[i] > 0) currentvalue += x[i]; else currentvalue -= x[i]; } } printf("Comparison \n"); // for (i = 0; i < n; i++) { // if (x[i] > 0 || x[i] < 0 || x_optimal[i] > 0 || x_optimal[i] < 0) { // printf("x[%d] = %1.10f ;x*[%d]=%1.10f \n", i, x[i], i, // x_optimal[i]); // } // } printf("f^*=%1.16f, f(x)=%1.16f \n", optimalvalue, currentvalue); printf("f(x)-f^*=%1.16f\n", currentvalue - optimalvalue); // Skutocna optimalna hodnota dana nesterovym vysledkom printf("=====================================\n"); printf("f^N=%1.16f, f(x)=%1.16f \n", nesterovvalue, currentvalue); printf("f(x)-f^N=%1.16f \n", -nesterovvalue + currentvalue); printf("f^N=%1.16f, f(x)=%1.16f \n", nesterovvalue, currentvalue); printf("f(x)-f^N=%1.16f \n", -nesterovvalue + currentvalue); //tmp=0; // for (i = 0; i < n; i++) { // tmp+=(x[i]-x_optimal[i])*(x[i]-x_optimal[i]); // } // printf("|x-xoptimal|^2 = %1.16f \n",tmp); // Allocation arrays on cuda device: //VYPISANIE VYSLEDKOV double minvalue = nesterovvalue; for (i = 1; i < analisisIDX; i++) { if (analysis[i].accuracy < minvalue) { minvalue = analysis[i].accuracy; } } printf("min value: %f\n", minvalue); fprintf(fp,"min value: %f\n", minvalue); // i = analisisIDX - 1; // printf("it: %d; eps: %1.16f; nnzofX: %d, basis: %f \n", // analysis[i].iteration, analysis[i].accuracy - minvalue, // analysis[i].nnz, (double) analysis[i].correctnnz / n_nonzero); printf("F(x_0): %f\n", epsilon); fprintf(fp,"F(x_0): %f\n", epsilon); epsilon=epsilon-minvalue; fprintf(fp,"F(x_0)-F^*: %f\n", epsilon); epsilon=epsilon*0.1; for (i = 1; i < analisisIDX; i++) { if (analysis[i].accuracy - minvalue <= epsilon && epsilon >= 0) { fprintf(fp, "it: %1.4f; eps: %1.16f; nnzofX: %d, basis: %f \n", (double) analysis[i].iteration, analysis[i].accuracy - minvalue, analysis[i].nnz, (double) analysis[i].correctnnz / n_nonzero); printf("it: %1.4f; eps: %1.16f; nnzofX: %d, basis: %f \n", (double) analysis[i].iteration, analysis[i].accuracy - minvalue, analysis[i].nnz, (double) analysis[i].correctnnz / n_nonzero); epsilon = epsilon * 0.1; printf("epsilon: %f \n", epsilon); } if (i > 10 && analysis[i].accuracy - minvalue == 0) { break; } } fclose(fp); fp = fopen("/tmp/nesterov_time.txt", "w"); // fp = fopen("/exports/home/s1052689/nesterov_time.txt", "w"); // Calculate residuals #pragma omp parallel private(j,i), shared(m,b,x,n,residuals) { #pragma omp for for (j = 0; j < m; j++) residuals[j] = -b[j]; #pragma omp for for (i = 0; i < n; i++) x[i] = 0; } //----------------RCDM----------serial===================================--- srand(2); printf("ZACIATOK RIESENIA\n"); t1 = clock(); for (N = 0; N < NMAX; N++) { for (k = 0; k < n; k++) { // for (k = 0; k < n; k++) { int idx = (int) (n * (rand() / (RAND_MAX + 1.0))); double tmp = 0; for (j = 0; j < p; j++) { // printf("tmp:%f A:%f residual:%f \n",tmp,A_h[idx][j],residuals[IDX_h[idx][j]]); tmp += A_h[idx][j] * residuals[IDX_h[idx][j]]; } // printf("Li[%d] = %f; tmp=%f \n", idx, Li[idx], tmp); tmp1 = Li[idx] * (tmp + lambda); if (x[idx] > tmp1) { tmp = -tmp1; } else { tmp1 = Li[idx] * (tmp - lambda); if (x[idx] < tmp1) { tmp = -tmp1; } else { tmp = -x[idx]; } } x[idx] += tmp; //update residuals: for (j = 0; j < p; j++) { residuals[IDX_h[idx][j]] += tmp * A_h[idx][j]; } // printf("Iteration %d, x[%d]=%f \n", N, idx, x[idx]); if (k % samplingsize == 0) { t2 = clock(); diff = ((float) t2 - (float) t1) / 1000000.0F; printf("%f,TIME:%f\n", N + (double) k / n, diff); fprintf(fp, "%f,TIME:%f\n", N + (double) k / n, diff); } } } /// SErIAL RCDM =========================================================END printf("KONIEC RIESENIA\n"); // return 1; double * A_dev; double * L_dev; double * x_dev; double * b_dev; double * lambda_dev; //----------------RCDM------- parallel fclose(fp); }
int main(void) { FILE *fp; // fp = fopen("/exports/home/s1052689/nesterov.txt", "w"); fp = fopen("/tmp/nesterov.txt", "w"); srand(2); double lambda = 1; double rho = 1; long int n = 1000000; int n_nonzero = 160000; long int m = 10 * n; double sqrtofnonzeros = 400; int p = 15; int NMAX = 60; int N; int samplingsize = n / 1; long int i, j, k; printf("texst\n"); printf("free memory:%d\n", getTotalSystemMemory()); // double* AAA; printf("Idem alokovat data s obsahom %d\n", n); // AAA = (double*) malloc(n * sizeof(double)); printf("alokacia poli start\n"); double A_h[n][p]; // host A matrix pointers printf("alokacia A done \n"); long int IDX_h[n][p]; // host Aindex matrix pointers printf("alokacia I done \n"); printf("alokacia x done \n"); double optimalvalue = 0; int analysisLength = NMAX * n / samplingsize; struct optimalityAnalysis* analysis; analysis = (struct optimalityAnalysis*) calloc(analysisLength, sizeof(struct optimalityAnalysis)); double tmp; printf("alokacia poli END\n"); printf("free memory:%d\n", getTotalSystemMemory()); //Generovanie problemu------------------------------------------------------------------- for (i = 0; i < n; i++) { long int idx = 0; for (j = 0; j < p; j++) { int notfinished = 1; double val = (double) rand() / RAND_MAX; while (notfinished) { notfinished = 0; idx = ((long int) ((m) * (rand() / (RAND_MAX + 1.0)))); for (k = 0; k < j; k++) { if (IDX_h[i][k] == idx) { notfinished = 1; } } } A_h[i][j] = 2 * val - 1; IDX_h[i][j] = idx; } } printf("Matrix B Generated\n"); printf("free memory:%d\n", getTotalSystemMemory()); double* y; y = (double*) calloc(m, sizeof(double)); tmp = 0; for (j = 0; j < m; j++) { y[j] = (double) rand() / RAND_MAX; tmp += y[j] * y[j]; } for (j = 0; j < m; j++) { y[j] = y[j] / tmp; } printf("vector y Generated\n"); struct st_sortingByAbsWithIndex* dataToSort; dataToSort = (struct st_sortingByAbsWithIndex*) calloc(m, sizeof(struct st_sortingByAbsWithIndex)); for (i = 0; i < n; i++) { dataToSort[i].idx = i; dataToSort[i].value = 0; } printf("Struc created\n"); for (i = 0; i < n; i++) { tmp = 0; for (j = 0; j < p; j++) { tmp += y[IDX_h[i][j]] * A_h[i][j]; } dataToSort[i].value = tmp; } //Sorting B printf("SORTING START\n"); size_t structs_len = sizeof(dataToSort) / sizeof(struct st_sortingByAbsWithIndex); printf("SORTING 2\n"); qsort(dataToSort, structs_len, sizeof(struct st_sortingByAbsWithIndex), struct_cmp_by_value); printf("SORTING END\n"); // return 1; double* x; x = (double*) calloc(n, sizeof(double)); for (i = 0; i < n; i++) { // vytvaranie matice A int idx = dataToSort[i].idx; double alpha = 1; x[idx] = 0; if (i < n_nonzero) { alpha = (double) abs(1 / dataToSort[idx].value); x[idx] = ((double) rand() / RAND_MAX) * rho / (sqrtofnonzeros); if (dataToSort[idx].value < 0) { x[idx] = -x[idx]; } } else if (dataToSort[idx].value > 0.1 || dataToSort[idx].value < -0.1) { alpha = (double) abs(1 / dataToSort[idx].value) * (double) rand() / RAND_MAX; } for (j = 0; j < p; j++) { A_h[idx][j] = A_h[idx][j] * alpha; } } // print_double_array(&L[0],n); // print_double_array(&Li[0], 10); free(dataToSort); // Compute Li double* Li; // Lipschitz constants Li = (double*) calloc(n, sizeof(double)); for (i = 0; i < n; i++) { Li[i] = 0; for (j = 0; j < p; j++) { Li[i] += A_h[i][j] * A_h[i][j]; } Li[i] = 1 / Li[i]; } // END compute Li for (i = 0; i < m; i++) { optimalvalue += y[i] * y[i]; } optimalvalue = optimalvalue * 0.5; double* b; b = y; for (i = 0; i < n; i++) { for (j = 0; j < p; j++) { b[IDX_h[i][j]] += x[i] * A_h[i][j]; } } for (i = 0; i < n; i++) { // printf("optval %1.16f \n", optimalvalue); if (x[i] > 0) optimalvalue += x[i]; else optimalvalue -= x[i]; } printf("optval %1.16f \n", optimalvalue); //Generovanie problemu----------------------------END---------------------------------- double * residuals; residuals = (double*) calloc(m, sizeof(double)); for (i = 0; i < m; i++) { residuals[i] = -b[i]; } for (i = 0; i < n; i++) { for (j = 0; j < p; j++) { residuals[IDX_h[i][j]] += x[i] * A_h[i][j]; } } double nesterovvalue = 0; for (i = 0; i < m; i++) { nesterovvalue = residuals[i] * residuals[i]; } nesterovvalue = nesterovvalue / 2; for (i = 0; i < n; i++) { if (x[i] > 0) nesterovvalue += x[i]; else nesterovvalue -= x[i]; } // Calculate residuals for (j = 0; j < m; j++) residuals[j] = -b[j]; //----------------RCDM----------serial===================================--- for (i = 0; i < n; i++) x[i] = 0; double tmp1; double currentvalue = 0; int analisisIDX = 0; printf("ZACIATOK RIESENIA\n"); for (N = 0; N < NMAX; N++) { for (k = 0; k < n; k++) { // for (k = 0; k < n; k++) { int idx = (int) (n * (rand() / (RAND_MAX + 1.0))); double tmp = 0; for (j = 0; j < p; j++) { // printf("tmp:%f A:%f residual:%f \n",tmp,A_h[idx][j],residuals[IDX_h[idx][j]]); tmp += A_h[idx][j] * residuals[IDX_h[idx][j]]; } // printf("Li[%d] = %f; tmp=%f \n", idx, Li[idx], tmp); tmp1 = Li[idx] * (tmp + lambda); if (x[idx] > tmp1) { tmp = -tmp1; } else { tmp1 = Li[idx] * (tmp - lambda); if (x[idx] < tmp1) { tmp = -tmp1; } else { tmp = -x[idx]; } } x[idx] += tmp; //update residuals: for (j = 0; j < p; j++) { residuals[IDX_h[idx][j]] += tmp * A_h[idx][j]; } // printf("Iteration %d, x[%d]=%f \n", N, idx, x[idx]); if (k % samplingsize == 0) { currentvalue = 0; // print_double_array(&residuals[0],m); for (i = 0; i < m; i++) { currentvalue += residuals[i] * residuals[i]; } currentvalue = currentvalue * 0.5; // printf("CV:%1.16f\n", currentvalue); // printf(" %1.16f\n", currentvalue ); double normsize = 0; for (i = 0; i < n; i++) { if (x[i] > 0) normsize += lambda * x[i]; else normsize -= lambda * x[i]; } // print_double_array(&x[0],n); currentvalue = currentvalue + normsize; // printf("NZ:%1.16f; :%1.16f\n", currentvalue, normsize); // printf(" %1.16f\n", currentvalue ); analysis[analisisIDX].accuracy = currentvalue; analysis[analisisIDX].nnz = 0; analysis[analisisIDX].correctnnz = 0; analysis[analisisIDX].iteration = N + (double) k / n; for (i = 0; i < n; i++) { if (x[i] != 0) analysis[analisisIDX].nnz++; // if (x_optimal[i] != 0 && x[i] != 0) // analysis[analisisIDX].correctnnz++; } printf("%f,%d,%d,%1.16f\n", N + (double) k / n, analysis[analisisIDX].nnz, analysis[analisisIDX].correctnnz, currentvalue - optimalvalue); // printf("%d: nnz %d correct nnz %d \n", N, analysis[analisisIDX].nnz,analysis[analisisIDX].correctnnz); // printf("%d: f^*=%1.16f, f(x)=%1.16f \n", N, optimalvalue, // currentvalue); // printf("%d: f(x)-f^*=%1.16f\n", N, currentvalue - optimalvalue); analisisIDX++; } } } /// SErIAL RCDM =========================================================END printf("KONIEC RIESENIA\n"); currentvalue = 0; for (i = 0; i < m; i++) { currentvalue = residuals[i] * residuals[i]; } currentvalue = currentvalue / 2; for (i = 0; i < n; i++) { if (x[i] > 0) currentvalue += x[i]; else currentvalue -= x[i]; } printf("Comparison \n"); // for (i = 0; i < n; i++) { // if (x[i] > 0 || x[i] < 0 || x_optimal[i] > 0 || x_optimal[i] < 0) { // printf("x[%d] = %1.10f ;x*[%d]=%1.10f \n", i, x[i], i, // x_optimal[i]); // } // } printf("f^*=%1.16f, f(x)=%1.16f \n", optimalvalue, currentvalue); printf("f(x)-f^*=%1.16f\n", currentvalue - optimalvalue); // Skutocna optimalna hodnota dana nesterovym vysledkom printf("=====================================\n"); printf("f^N=%1.16f, f(x)=%1.16f \n", nesterovvalue, currentvalue); printf("f(x)-f^N=%1.16f \n", -nesterovvalue + currentvalue); printf("f^N=%1.16f, f(x)=%1.16f \n", nesterovvalue, currentvalue); printf("f(x)-f^N=%1.16f \n", -nesterovvalue + currentvalue); //tmp=0; // for (i = 0; i < n; i++) { // tmp+=(x[i]-x_optimal[i])*(x[i]-x_optimal[i]); // } // printf("|x-xoptimal|^2 = %1.16f \n",tmp); // Allocation arrays on cuda device: //VYPISANIE VYSLEDKOV double epsilon = 1000000000; double minvalue = nesterovvalue; for (i = 1; i < analisisIDX; i++) { if (analysis[i].accuracy < minvalue) { minvalue = analysis[i].accuracy; } } printf("min value: %f\n", minvalue); printf("min value: %f\n", minvalue); // i = analisisIDX - 1; // printf("it: %d; eps: %1.16f; nnzofX: %d, basis: %f \n", // analysis[i].iteration, analysis[i].accuracy - minvalue, // analysis[i].nnz, (double) analysis[i].correctnnz / n_nonzero); i = 1; printf("it: %1.4f; eps: %1.16f; nnzofX: %d, basis: %f \n", analysis[i].iteration, analysis[i].accuracy - minvalue, analysis[i].nnz, (double) analysis[i].correctnnz / n_nonzero); fprintf(fp, "it: %1.4f; eps: %1.16f; nnzofX: %d, basis: %f \n", analysis[i].iteration, analysis[i].accuracy - minvalue, analysis[i].nnz, (double) analysis[i].correctnnz / n_nonzero); printf("analisisIdx:%d", analisisIDX); for (i = 1; i < analisisIDX; i++) { if (analysis[i].accuracy - minvalue <= epsilon && epsilon >= 0) { fprintf(fp, "it: %1.4f; eps: %1.16f; nnzofX: %d, basis: %f \n", (double) analysis[i].iteration, analysis[i].accuracy - minvalue, analysis[i].nnz, (double) analysis[i].correctnnz / n_nonzero); printf("it: %1.4f; eps: %1.16f; nnzofX: %d, basis: %f \n", (double) analysis[i].iteration, analysis[i].accuracy - minvalue, analysis[i].nnz, (double) analysis[i].correctnnz / n_nonzero); epsilon = epsilon * 0.1; printf("epsilon: %f \n", epsilon); } if (i > 10 && analysis[i].accuracy - minvalue == 0) { break; } } // return 1; double * A_dev; double * L_dev; double * x_dev; double * b_dev; double * lambda_dev; //----------------RCDM------- parallel fclose(fp); }
void SystemFreeMem() { printf("Free mem [Yet to perfect this value]: %u\n", getTotalSystemMemory()); }