示例#1
0
文件: test.cpp 项目: PeterWang9/code
int main() {
    SYSTEM_INFO sysinfo;
    GetSystemInfo( &sysinfo );

    std::cout << "Total Cores: " << sysinfo.dwNumberOfProcessors << std::endl;
    std::cout << "Total Memory: " << getTotalSystemMemory() << std::endl;
    return 0;
}
示例#2
0
int main(int argc, char *argv[]){

#ifdef MEMORY_PERCENTAGE
    printf("Currently total memory: %zd\n",getTotalSystemMemory());
    printf("Currently avail memory: %zd\n",getFreeSystemMemory());
#endif

    int i;
    for(i=0;i<argc;i++){
        char *arg=argv[i];
        if(strcmp(arg, "-h")==0 || strcmp(arg,"-?")==0  || argc==1){
            printf("Usage: eatmemory <size>\n");
            printf("Size can be specified in megabytes or gigabytes in the following way:\n");
            printf("#          # Bytes      example: 1024\n");
            printf("#M         # Megabytes  example: 15M\n");
            printf("#G         # Gigabytes  example: 2G\n");
#ifdef MEMORY_PERCENTAGE            
            printf("#%%         # Percent    example: 50%%\n");
#endif            
            printf("\n");
        }else if(i>0){
            int len=strlen(arg);
            char unit=arg[len - 1];
            long size=-1;
            int chunk=1024;
            if(!isdigit(unit) ){
                if(unit=='M' || unit=='G'){
                    arg[len-1]=0;
                    size=atol(arg) * (unit=='M'?1024*1024:1024*1024*1024);
                }
#ifdef MEMORY_PERCENTAGE                
                else if (unit=='%') {
                    size = (atol(arg) * (long)getFreeSystemMemory())/100;
                }
#endif                
                else{
                    printf("Invalid size format\n");
                    exit(0);
                }
            }else{
                size=atoi(arg);
            }
            printf("Eating %ld bytes in chunks of %d...\n",size,chunk);
            if(eat(size,chunk)){
                printf("Done, press any key to free the memory\n");
                getchar();
            }else{
                printf("ERROR: Could not allocate the memory");
            }
        }
    }

}
示例#3
0
int main_opt(args *arg){

  std::vector<persaf *> &saf =arg->saf;
  for(int i=0;i<saf.size();i++)
    assert(saf[i]->pos!=NULL&&saf[i]->saf!=NULL);
  size_t nSites = arg->nSites;
  if(nSites == 0){//if no -nSites is specified
    nSites=nsites(saf,arg);
  }
  if(fsizes<T>(saf,nSites)>getTotalSystemMemory())
    fprintf(stderr,"\t-> Looks like you will allocate too much memory, consider starting the program with a lower -nSites argument\n"); 
    
  fprintf(stderr,"\t-> nSites: %lu\n",nSites);
  float bytes_req_megs =(float) fsizes<T>(saf,nSites)/1024/1024;
  float mem_avail_megs =(float) getTotalSystemMemory()/1024/1024;//in percentile
  //fprintf(stderr,"en:%zu to:%f\n",bytes_req_megs,mem_avail_megs);
  fprintf(stderr,"\t-> The choice of -nSites will require atleast: %f megabyte memory, that is at least: %.2f%% of total memory\n",bytes_req_megs,bytes_req_megs*100/mem_avail_megs);

  std::vector<Matrix<T> *> gls;
  for(int i=0;i<saf.size();i++)
    gls.push_back(alloc<T>(nSites,saf[i]->nChr+1));

  int ndim=(int) parspace(saf);
  double *sfs=new double[ndim];

  //temp used for checking pos are in sync
  setGloc(saf,nSites);
  while(1) {
    int ret=readdata(saf,gls,nSites,arg->chooseChr,arg->start,arg->stop,NULL,NULL);//read nsites from data
    int b=0;  
    //fprintf(stderr,"\t\tRET:%d gls->x:%lu\n",ret,gls[0]->x);
    if(ret==-2&&gls[0]->x==0)//no more data in files or in chr, eith way we break;
      break;
#if 0
    if(saf.size()==1){
      if(ret!=-2){
	if(gls[0]->x!=nSites&&arg->chooseChr==NULL&&ret!=-3){
	  //	  fprintf(stderr,"continue continue\n");
	  continue;
	}
      }
    }else
#endif
      {
      if(gls[0]->x!=nSites&&arg->chooseChr==NULL&&ret!=-3){
	//fprintf(stderr,"continue continue\n");
	continue;
      }

    }
    if(gls[0]->x==0)
      continue;
    
    fprintf(stderr,"\t-> Will run optimization on nSites: %lu\n",gls[0]->x);
  neverusegoto:
    if(arg->bootstrap)
      fprintf(stderr,"Will do bootstrap replicate %d/%d\n",b+1,arg->bootstrap);
    if(arg->sfsfname.size()!=0)
	readSFS(arg->sfsfname[0],ndim,sfs);
      else{
	if(arg->seed==-1){
	  for(int i=0;i<ndim;i++)
	    sfs[i] = (i+1)/((double)(ndim));
	}else{
	  for(int i=0;i<ndim;i++){
	    double r=drand48();
	    while(r==0.0)
	      r = drand48();
	    sfs[i] = r;
	  }
	}
	
      }
      normalize(sfs,ndim);
      
      if(bootstrap==NULL &&arg->bootstrap)
	bootstrap = new size_t[gls[0]->x];
      
      if(bootstrap){
	for(size_t i=0;i<gls[0]->x;i++)
	  bootstrap[i] = lrand48() % gls[0]->x;
	std::sort(bootstrap,bootstrap+gls[0]->x);
      }
      double lik;
      if(arg->emAccl==0)
	lik = em<float>(sfs,arg->tole,arg->maxIter,arg->nThreads,ndim,gls);
      else
	lik = emAccl<float>(sfs,arg->tole,arg->maxIter,arg->nThreads,ndim,gls,arg->emAccl);
      fprintf(stderr,"likelihood: %f\n",lik);
      fprintf(stderr,"------------\n");
#if 1
      //    fprintf(stdout,"#### Estimate of the sfs ####\n");
      //all gls have the same ->x. That means the same numbe of sites.
      for(int x=0;x<ndim;x++)
	fprintf(stdout,"%f ",((double)gls[0]->x)*sfs[x]);
      fprintf(stdout,"\n");
      fflush(stdout);
#endif
      if(++b<arg->bootstrap)
	goto neverusegoto;
    for(int i=0;i<gls.size();i++)
      gls[i]->x =0;
    
    if(ret==-2&&arg->chooseChr!=NULL)
      break;
    if(arg->onlyOnce)
      break;
  }
  delGloc(saf,nSites);
  destroy(gls,nSites);
  destroy_args(arg);
  delete [] sfs;
  
  fprintf(stderr,"\n\t-> NB NB output is no longer log probs of the frequency spectrum!\n");
  fprintf(stderr,"\t-> Output is now simply the expected values! \n");
  fprintf(stderr,"\t-> You can convert to the old format simply with log(norm(x))\n");
  return 0;
}
示例#4
0
int main_2dsfs(int argc,char **argv){
  if(argc==1){
    fprintf(stderr,"./emOptim2 2dsfs pop1 pop2 nChr1 nChr2 [-start FNAME -P nThreds -tole tole -maxIter ] (only works if the two saf files covers the same region)\n");
    return 0;
  }
  argv++;
  argc--;
  fname1 = *(argv++);
  fname2 = *(argv++);
  argc -=2;
  chr1 = atoi(*(argv++));
  chr2 = atoi(*(argv++));
  argc -=2;
  getArgs(argc,argv);
  if(nSites==0){
    if(fsize(fname1)+fsize(fname2)>getTotalSystemMemory())
      fprintf(stderr,"Looks like you will allocate too much memory, consider starting the program with a lower -nSites argument\n");
    //this doesnt make sense if ppl supply a filelist containing safs
     nSites=calcNsites(fname1,chr1);
  }
  fprintf(stderr,"fname1:%sfname2:%s chr1:%d chr2:%d startsfs:%s nThreads=%d tole=%f maxIter=%d nSites:%lu\n",fname1,fname2,chr1,chr2,sfsfname,nThreads,tole,maxIter,nSites);
  float bytes_req_megs = nSites*(sizeof(double)*(chr1+1) + sizeof(double)*(chr2+1)+2*sizeof(double*))/1024/1024;
  float mem_avail_megs = getTotalSystemMemory()/1024/1024;//in percentile
  //  fprintf(stderr,"en:%zu to:%f\n",bytes_req_megs,mem_avail_megs);
  fprintf(stderr,"The choice of -nSites will require atleast: %f megabyte memory, that is approx: %.2f%% of total memory\n",bytes_req_megs,bytes_req_megs*100/mem_avail_megs);
  
#if 0
  //read in positions, not used, YET...
  std::vector<int> p1 = getPosi(fname1);
  std::vector<int> p2 = getPosi(fname2);
  fprintf(stderr,"nSites in pop1: %zu nSites in pop2: %zu\n",p1.size(),p2.size());
#endif

  if(nSites==0){
    if(calcNsites(fname1,chr1)!=calcNsites(fname2,chr2)){
      fprintf(stderr,"Problem with number of sites in file: %s and %s\n",fname1,fname2);
      exit(0);
    }
    nSites=calcNsites(fname1,chr1);
  }
  gzFile gz1=getGz(fname1);
  gzFile gz2=getGz(fname2);
  
  dim=(chr1+1)*(chr2+1);
  
  Matrix<double> GL1=alloc(nSites,chr1+1);
  Matrix<double> GL2=alloc(nSites,chr2+1);
  dim=GL1.y*GL2.y;
  
  double *sfs = new double[dim];
  while(1){
    if(isList ==0){
      readGL(gz1,nSites,chr1,GL1);
      readGL(gz2,nSites,chr2,GL2);
    }else{
      readGL2(gz1,nSites,chr1,GL1);
      readGL2(gz2,nSites,chr2,GL2);
    }
      
    assert(GL1.x==GL2.x);
    if(GL1.x==0)
      break;
    
    if(sfsfname!=NULL){
      readSFS(sfsfname,dim,sfs);
    }else{
      for(int i=0;i<dim;i++)
	sfs[i] = (i+1)/((double)dim);
      normalize(sfs,dim);
    }
    
    setThreadPars(&GL1,&GL2,sfs,nThreads);
    if(calcLike==0){
      if(SIG_COND) 
	em2(sfs,&GL1,&GL2,tole,maxIter);
    }
    double lik;
    if(nThreads>1)
      lik = lik1_master();
    else
      lik = lik1(sfs,&GL1,0,GL1.x);
      
    fprintf(stderr,"likelihood: %f\n",lik);
#if 1
    int inc=0;
    for(int x=0;x<chr1+1;x++){
      for(int y=0;y<chr2+1;y++)
	fprintf(stdout,"%f ",log(sfs[inc++]));
      fprintf(stdout,"\n");
    }
#endif
    if(isList==1)
      break;
  }
  dalloc(GL1,nSites);
  dalloc(GL2,nSites);
  gzclose(gz1);
  gzclose(gz2);
  return 0;
}
示例#5
0
int main_1dsfs(int argc,char **argv){
  if(argc<2){
    fprintf(stderr,"Must supply afile.saf and number of chromosomes\n");
    return 0;
  }
  fname1 = *(argv++);
  chr1 = atoi(*(argv++));
  argc-=2;
 
  getArgs(argc,argv);
  dim=chr1+1;
  //hook for new EJ banded version
  if(isNewFormat(fname1))
    return main_1dsfs_v2(fname1,chr1,nSites,nThreads,sfsfname,tole,maxIter);

  if(nSites==0){//if no -nSites is specified
    if(fsize(fname1)>getTotalSystemMemory())
      fprintf(stderr,"Looks like you will allocate too much memory, consider starting the program with a lower -nSites argument\n");
    //this doesnt make sense if ppl supply a filelist containing safs
    nSites=calcNsites(fname1,chr1);
  }
  fprintf(stderr,"fname1:%s nChr:%d startsfs:%s nThreads:%d tole=%f maxIter=%d nSites=%lu\n",fname1,chr1,sfsfname,nThreads,tole,maxIter,nSites);
  float bytes_req_megs = nSites*(sizeof(double)*(chr1+1)+sizeof(double*))/1024/1024;
  float mem_avail_megs = getTotalSystemMemory()/1024/1024;//in percentile
  //  fprintf(stderr,"en:%zu to:%f\n",bytes_req_megs,mem_avail_megs);
  fprintf(stderr,"The choice of -nSites will require atleast: %f megabyte memory, that is approx: %.2f%% of total memory\n",bytes_req_megs,bytes_req_megs*100/mem_avail_megs);

  

  Matrix<double> GL1=alloc(nSites,dim);
  gzFile gz1=getGz(fname1);  
  double *sfs=new double[dim];
  
  while(1) {
    if(isList==0)
      readGL(gz1,nSites,chr1,GL1);
    else
      readGL2(gz1,nSites,chr1,GL1);
    if(GL1.x==0)
      break;
    fprintf(stderr,"dim(GL1)=%zu,%zu\n",GL1.x,GL1.y);
   
    
  
    if(sfsfname!=NULL){
      readSFS(sfsfname,dim,sfs);
    }else{
      
      for(int i=0;i<dim;i++)
	sfs[i] = (i+1)/((double)dim);
      if(doBFGS){
	double ts=1;
	for(int i=0;i<dim-1;i++)
	  ts += 0.01/(1.0+i);
	sfs[0]=1.0/ts;
	for(int i=0;i<dim-1;i++)
	  sfs[i+1]  = (0.01/(1.0+i))/ts;
      }
      normalize(sfs,dim);
    }
    //  em2_smart(sfs2,pops,1e-6,1e3);
    setThreadPars(&GL1,NULL,sfs,nThreads);
    if(calcLike==0){
      if(doBFGS==0) 
	em1(sfs,&GL1,tole,maxIter);
      else
	bfgs(sfs,&GL1);
    }
    double lik;
    if(nThreads>1)
      lik = lik1_master();
    else
      lik = lik1(sfs,&GL1,0,GL1.x);
      
    fprintf(stderr,"likelihood: %f\n",lik);
#if 1
    for(int x=0;x<dim;x++)
      fprintf(stdout,"%f ",log(sfs[x]));
    fprintf(stdout,"\n");
    fflush(stdout);
#endif
    if(isList==1)
      break;
  }
  dalloc(GL1,nSites);
  gzclose(gz1);
  delete [] sfs;
  return 0;
}
示例#6
0
int main(void) {
	clock_t t1, t2;
	t1 = clock();
	FILE *fp;
//				fp = fopen("/exports/home/s1052689/nesterov.txt", "w");
	fp = fopen("/tmp/nesterov.txt", "w");
	srand(2);
	double lambda = 1;
	double diff;
	double rho = 1;
	long int    n = 100000000;
	int n_nonzero = 160000;
	long int m = 5 * n;
	double sqrtofnonzeros = 400;
	int p = 10;
	int NMAX = 90;
	int samplingsize = n/100  ;

	printf("outside thread num is %d\n", omp_get_num_threads());
	int totalThreds=0;
#pragma omp parallel  shared(totalThreds)
	{
		totalThreds = omp_get_num_threads();
//		printf("total threds:%d",totalThreds);
	}
	printf("total threds:%d",totalThreds);
	int N;

	long int i, j, k;
	unsigned int s;
	unsigned int seed[omp_get_num_threads()];
	for (i = 0; i < totalThreds; i++) {
		seed[i] = (int) RAND_MAX*rand();
		if (seed[i]<0)
			seed[i]=-seed[i];
//		printf("seed %d, val %d\n",i, seed[i]);

	}

	printf("texst\n");
	printf("free memory:%d\n", getTotalSystemMemory());
	//	double* AAA;

	printf("Idem alokovat data s obsahom %d\n", n);
	//	AAA = (double*) malloc(n * sizeof(double));

	printf("alokacia poli start\n");
	double A_h[n][p]; // host A matrix pointers
	printf("alokacia A done \n");
	long int IDX_h[n][p]; // host Aindex matrix pointers
	printf("alokacia I done \n");
	printf("alokacia x done \n");
	double optimalvalue = 0;

	int analysisLength = NMAX * n / samplingsize;

	struct optimalityAnalysis* analysis;
	analysis = (struct optimalityAnalysis*) calloc(analysisLength,
			sizeof(struct optimalityAnalysis));

	double tmp;
	printf("alokacia poli END\n");

	print_time_message(t1, "alokacia poli");
	//Generovanie problemu-------------------------------------------------------------------
	t1 = clock();
	long int idx;
	int notfinished;
	double val;

#pragma omp parallel private(i,j,idx,notfinished,val,k,s ), shared(IDX_h, A_h,n,m,p)
	{
		s = seed[omp_get_thread_num()];
//		printf("thred %d, val:%f\n",omp_get_thread_num(),(double) rand_r(&s) / RAND_MAX);
//		printf("thred %d, val:%f\n",omp_get_thread_num(),(double) rand_r(&s) / RAND_MAX);
#pragma omp for
		for (i = 0; i < n; i++) {
			idx = 0;

			for (j = 0; j < p; j++) {
			notfinished = 1;
				val = (double) rand_r(&s) / RAND_MAX;
				while (notfinished) {
					notfinished = 0;
					idx = ((long int) ((m) * (rand_r(&s) / (RAND_MAX + 1.0))));
					for (k = 0; k < j; k++) {
						if (IDX_h[i][k] == idx) {
							notfinished = 1;
						}
					}
				}
				A_h[i][j] = 2 * val - 1;
				IDX_h[i][j] = idx;
			}
		}
	}
//return 1;
	print_time_message(t1, "Matrix B Generated");
	t1 = clock();
	double* y;
	y = (double*) calloc(m, sizeof(double));
	tmp = 0;

#pragma omp parallel private(j,s), shared(y), reduction(+:tmp)
	{
		s = seed[omp_get_thread_num()];
#pragma omp for
		for (j = 0; j < m; j++) {
			y[j] = (double) rand_r(&s) / RAND_MAX;
			tmp += y[j] * y[j];
		}
	}

#pragma omp parallel private(j),shared(y,tmp)
	{
#pragma omp for
		for (j = 0; j < m; j++) {
			y[j] = y[j] / tmp;
		}
	}
	print_time_message(t1, "vector y Generated");
	struct st_sortingByAbsWithIndex* dataToSort;
	dataToSort = (struct st_sortingByAbsWithIndex*) calloc(n,
			sizeof(struct st_sortingByAbsWithIndex));

#pragma omp parallel private(i,j,tmp), shared(dataToSort,A_h,IDX_h,y)
	{
#pragma omp for
		for (i = 0; i < n; i++) {
			dataToSort[i].idx = i;
			tmp = 0;
			for (j = 0; j < p; j++) {
				tmp += y[IDX_h[i][j]] * A_h[i][j];
			}
			dataToSort[i].value = tmp;
		}
	}
	print_time_message(t1, "Struc created");
	//Sorting B
	printf("SORTING START\n");

	size_t structs_len = sizeof(dataToSort)
			/ sizeof(struct st_sortingByAbsWithIndex);
	printf("SORTING 2\n");
	qsort(dataToSort, structs_len, sizeof(struct st_sortingByAbsWithIndex),
			struct_cmp_by_value);
	printf("SORTING END\n");
	//	return 1;
	double* x;
	x = (double*) calloc(n, sizeof(double));

#pragma omp parallel private(i,s), shared(x)
	{
		s = seed[omp_get_thread_num()];
#pragma omp for
		for (i = 0; i < n; i++) {
			x[i] = ((double) rand_r(&s) / RAND_MAX);
		}
	}
	print_time_message(t1, "GENEROVANIE RANDOM X END");
	double alpha = 0;

#pragma omp parallel private(i,alpha,idx,j), shared(x,A_h,dataToSort,sqrtofnonzeros,rho ,n,p)
	{
#pragma omp for
		for (i = 0; i < n; i++) { // vytvaranie matice A
			idx = dataToSort[i].idx;
			alpha = 1;
			if (i < n_nonzero) {
				alpha = (double) abs(1 / dataToSort[idx].value);
				x[idx] = x[idx] * rho / (sqrtofnonzeros);
				if (dataToSort[idx].value < 0) {
					x[idx] = -x[idx];
				}
			} else if (dataToSort[idx].value > 0.1 || dataToSort[idx].value
					< -0.1) {
				alpha = (double) abs(1 / dataToSort[idx].value) * x[idx];
				x[idx] = 0;
			} else {
				x[idx] = 0;
			}
			for (j = 0; j < p; j++) {
				A_h[idx][j] = A_h[idx][j] * alpha;
			}
		}
	}
	print_time_message(t1, "A modified");
	t1 = clock();
	//	print_double_array(&L[0],n);
	//	print_double_array(&Li[0], 10);
	free(dataToSort);
	// Compute Li
	double* Li; // Lipschitz constants
	Li = (double*) calloc(n, sizeof(double));
	print_time_message(t1, "Alokacia Li");
	t1 = clock();
#pragma omp parallel private(i,j), shared(Li,A_h,p,n)
	{
#pragma omp for
		for (i = 0; i < n; i++) {
			Li[i] = 0;
			for (j = 0; j < p; j++) {
				Li[i] += A_h[i][j] * A_h[i][j];
			}
			Li[i] = 1 / Li[i];
		}
	}
	// END compute Li
	print_time_message(t1, "Compute Li");
	t1 = clock();
#pragma omp parallel private(i), shared(y,m), reduction(+:optimalvalue)
	{
#pragma omp for
		for (i = 0; i < m; i++) {
			optimalvalue += y[i] * y[i];
		}
	}
	print_time_message(t1, "OptVal1");
	t1 = clock();
	optimalvalue = optimalvalue * 0.5;
	double* b;
	b = y;

	for (j = 0; j < p; j++) {
		for (i = 0; i < n; i++) {
			b[IDX_h[i][j]] += x[i] * A_h[i][j];
		}
	}
	print_time_message(t1, "OptVal2 serial");
	t1 = clock();

#pragma omp parallel private(i), shared(n,x), reduction(+:optimalvalue)
	{
#pragma omp for
		for (i = 0; i < n; i++) {
			if (x[i] > 0)
				optimalvalue += (x[i]);
			else
				optimalvalue -= x[i];
		}
	}
	print_time_message(t1, "OptVal3");
	t1 = clock();

	printf("optval %1.16f \n", optimalvalue);
	t2 = clock();
	diff = ((float) t2 - (float) t1) / 1000000.0F;
	printf("Generating END:%f\n", diff);
	fprintf(fp, "Generating END:%f\n", diff);
	//Generovanie problemu----------------------------END----------------------------------
	double * residuals;
	residuals = (double*) calloc(m, sizeof(double));
	printf("Residuals alocated");
#pragma omp parallel private(i), shared(m,b,residuals)
	{
#pragma omp for
		for (i = 0; i < m; i++) {
			residuals[i] = -b[i];
		}
	}
	printf("Residuals = -b");
	for (i = 0; i < n; i++) {
		for (j = 0; j < p; j++) {
			residuals[IDX_h[i][j]] += x[i] * A_h[i][j];
		}
	}
	printf("Residuals =updated");
	double nesterovvalue = 0;

#pragma omp parallel private(i), shared(m, residuals), reduction(+:nesterovvalue)
	{
#pragma omp for
		for (i = 0; i < m; i++) {
			nesterovvalue += residuals[i] * residuals[i];
		}
	}
	nesterovvalue = nesterovvalue / 2;
#pragma omp parallel private(i), shared(n, x), reduction(+:nesterovvalue)
	{
#pragma omp for
		for (i = 0; i < n; i++) {
			if (x[i] > 0)
				nesterovvalue += x[i];
			else
				nesterovvalue -= x[i];
		}
	}
	// Calculate residuals
#pragma omp parallel private(j,i), shared(m,b,x,n,residuals)
	{
#pragma omp for
		for (j = 0; j < m; j++)
			residuals[j] = -b[j];
#pragma omp for
		for (i = 0; i < n; i++)
			x[i] = 0;
	}
	//----------------RCDM----------serial===================================---
	double tmp1;
	double currentvalue = 0;
printf("RCDM serial");
	int analisisIDX = 0;
	double epsilon = 0;
	currentvalue = 0;
	//			print_double_array(&residuals[0],m);

#pragma omp parallel private(i), shared(residuals,m), reduction(+:currentvalue)
	{
#pragma omp for
		for (i = 0; i < m; i++) {
			currentvalue += residuals[i] * residuals[i];
		}
	}
	currentvalue = currentvalue * 0.5;
	//			printf("CV:%1.16f\n", currentvalue);

	//			printf(" %1.16f\n",  currentvalue  );
	double normsize = 0;
#pragma omp parallel private(i), shared(lambda,n,x), reduction(+:normsize)
	{
#pragma omp for
		for (i = 0; i < n; i++) {
			if (x[i] > 0)
				normsize += lambda * x[i];
			else
				normsize -= lambda * x[i];
		}
	}

	//			print_double_array(&x[0],n);
	epsilon = currentvalue + normsize;


	srand(2);
	printf("ZACIATOK RIESENIA\n");
	t1 = clock();
	for (N = 0; N < NMAX; N++) {
		for (k = 0; k < n; k++) {
			//		for (k = 0; k < n; k++) {
			int idx = (int) (n * (rand() / (RAND_MAX + 1.0)));
			double tmp = 0;
			for (j = 0; j < p; j++) {
				//						printf("tmp:%f  A:%f   residual:%f  \n",tmp,A_h[idx][j],residuals[IDX_h[idx][j]]);
				tmp += A_h[idx][j] * residuals[IDX_h[idx][j]];
			}
			//				printf("Li[%d] =  %f; tmp=%f  \n", idx, Li[idx], tmp);
			tmp1 = Li[idx] * (tmp + lambda);
			if (x[idx] > tmp1) {
				tmp = -tmp1;
			} else {
				tmp1 = Li[idx] * (tmp - lambda);
				if (x[idx] < tmp1) {
					tmp = -tmp1;
				} else {
					tmp = -x[idx];
				}
			}
			x[idx] += tmp;
			//update residuals:
			for (j = 0; j < p; j++) {
				residuals[IDX_h[idx][j]] += tmp * A_h[idx][j];
			}
			//				printf("Iteration %d,  x[%d]=%f  \n", N, idx, x[idx]);
			if (k % samplingsize == 0) {
				currentvalue = 0;
				//			print_double_array(&residuals[0],m);

#pragma omp parallel private(i), shared(residuals,m), reduction(+:currentvalue)
				{
#pragma omp for
					for (i = 0; i < m; i++) {
						currentvalue += residuals[i] * residuals[i];
					}
				}
				currentvalue = currentvalue * 0.5;
				//			printf("CV:%1.16f\n", currentvalue);

				//			printf(" %1.16f\n",  currentvalue  );
				double normsize = 0;
#pragma omp parallel private(i), shared(lambda,n,x), reduction(+:normsize)
				{
#pragma omp for
					for (i = 0; i < n; i++) {
						if (x[i] > 0)
							normsize += lambda * x[i];
						else
							normsize -= lambda * x[i];
					}
				}

				//			print_double_array(&x[0],n);
				currentvalue = currentvalue + normsize;
				//			printf("NZ:%1.16f; :%1.16f\n", currentvalue, normsize);

				//			printf(" %1.16f\n",  currentvalue  );
				analysis[analisisIDX].accuracy = currentvalue;
				analysis[analisisIDX].nnz = 0;
				analysis[analisisIDX].correctnnz = 0;
				analysis[analisisIDX].iteration = N + (double) k / n;
				for (i = 0; i < n; i++) {
					if (x[i] != 0)
						analysis[analisisIDX].nnz++;
					//					if (x_optimal[i] != 0 && x[i] != 0)
					//						analysis[analisisIDX].correctnnz++;

				}
				t2 = clock();
				diff = ((float) t2 - (float) t1) / 1000000.0F;

				printf("%f,%d,%d,%1.16f,TIME:%f\n",
						analysis[analisisIDX].iteration,
						analysis[analisisIDX].nnz,
						analysis[analisisIDX].correctnnz, currentvalue
								- optimalvalue, diff);

				//			printf("%d: nnz %d   correct nnz %d \n", N, analysis[analisisIDX].nnz,analysis[analisisIDX].correctnnz);
				//			printf("%d: f^*=%1.16f,   f(x)=%1.16f \n", N, optimalvalue,
				//					currentvalue);
				//			printf("%d: f(x)-f^*=%1.16f\n", N, currentvalue - optimalvalue);


				analisisIDX++;
			}
		}

	}
	/// SErIAL RCDM =========================================================END
	printf("KONIEC RIESENIA\n");

	currentvalue = 0;
#pragma omp parallel private(i), shared(residuals,m), reduction(+:currentvalue)
	{
#pragma omp for
		for (i = 0; i < m; i++) {
			currentvalue = residuals[i] * residuals[i];
		}
	}
	currentvalue = currentvalue / 2;
#pragma omp parallel private(i), shared(x,n), reduction(+:currentvalue)
	{
#pragma omp for
		for (i = 0; i < n; i++) {
			if (x[i] > 0)
				currentvalue += x[i];
			else
				currentvalue -= x[i];
		}
	}
	printf("Comparison \n");

	//	for (i = 0; i < n; i++) {
	//		if (x[i] > 0 || x[i] < 0 || x_optimal[i] > 0 || x_optimal[i] < 0) {
	//			printf("x[%d] =  %1.10f ;x*[%d]=%1.10f  \n", i, x[i], i,
	//					x_optimal[i]);
	//		}
	//	}

	printf("f^*=%1.16f,   f(x)=%1.16f \n", optimalvalue, currentvalue);
	printf("f(x)-f^*=%1.16f\n", currentvalue - optimalvalue);

	// Skutocna optimalna hodnota dana nesterovym vysledkom


	printf("=====================================\n");
	printf("f^N=%1.16f,   f(x)=%1.16f \n", nesterovvalue, currentvalue);
	printf("f(x)-f^N=%1.16f \n", -nesterovvalue + currentvalue);
	printf("f^N=%1.16f,   f(x)=%1.16f \n", nesterovvalue, currentvalue);
	printf("f(x)-f^N=%1.16f \n", -nesterovvalue + currentvalue);
	//tmp=0;
	//	for (i = 0; i < n; i++) {
	// tmp+=(x[i]-x_optimal[i])*(x[i]-x_optimal[i]);
	//	}
	//	printf("|x-xoptimal|^2 = %1.16f \n",tmp);
	// Allocation arrays on cuda device:

	//VYPISANIE VYSLEDKOV

	double minvalue = nesterovvalue;
	for (i = 1; i < analisisIDX; i++) {
		if (analysis[i].accuracy < minvalue) {
			minvalue = analysis[i].accuracy;
		}
	}
	printf("min value: %f\n", minvalue);
	fprintf(fp,"min value: %f\n", minvalue);
	//	i = analisisIDX - 1;
	//	printf("it: %d; eps: %1.16f; nnzofX: %d, basis: %f \n",
	//			analysis[i].iteration, analysis[i].accuracy - minvalue,
	//			analysis[i].nnz, (double) analysis[i].correctnnz / n_nonzero);
	printf("F(x_0): %f\n", epsilon);
	fprintf(fp,"F(x_0): %f\n", epsilon);

	epsilon=epsilon-minvalue;
	fprintf(fp,"F(x_0)-F^*: %f\n", epsilon);
	epsilon=epsilon*0.1;
	for (i = 1; i < analisisIDX; i++) {
		if (analysis[i].accuracy - minvalue <= epsilon && epsilon >= 0) {
			fprintf(fp, "it: %1.4f; eps: %1.16f; nnzofX: %d, basis: %f \n",
					(double) analysis[i].iteration, analysis[i].accuracy
							- minvalue, analysis[i].nnz,
					(double) analysis[i].correctnnz / n_nonzero);
			printf("it: %1.4f; eps: %1.16f; nnzofX: %d, basis: %f \n",
					(double) analysis[i].iteration, analysis[i].accuracy
							- minvalue, analysis[i].nnz,
					(double) analysis[i].correctnnz / n_nonzero);

			epsilon = epsilon * 0.1;
			printf("epsilon: %f \n", epsilon);
		}
		if (i > 10 && analysis[i].accuracy - minvalue == 0) {
			break;
		}
	}

	fclose(fp);
		fp = fopen("/tmp/nesterov_time.txt", "w");
//	fp = fopen("/exports/home/s1052689/nesterov_time.txt", "w");

	// Calculate residuals
#pragma omp parallel private(j,i), shared(m,b,x,n,residuals)
	{
#pragma omp for
		for (j = 0; j < m; j++)
			residuals[j] = -b[j];
#pragma omp for
		for (i = 0; i < n; i++)
			x[i] = 0;
	}
	//----------------RCDM----------serial===================================---
	srand(2);
	printf("ZACIATOK RIESENIA\n");
	t1 = clock();
	for (N = 0; N < NMAX; N++) {
		for (k = 0; k < n; k++) {
			//		for (k = 0; k < n; k++) {
			int idx = (int) (n * (rand() / (RAND_MAX + 1.0)));
			double tmp = 0;
			for (j = 0; j < p; j++) {
				//						printf("tmp:%f  A:%f   residual:%f  \n",tmp,A_h[idx][j],residuals[IDX_h[idx][j]]);
				tmp += A_h[idx][j] * residuals[IDX_h[idx][j]];
			}
			//				printf("Li[%d] =  %f; tmp=%f  \n", idx, Li[idx], tmp);
			tmp1 = Li[idx] * (tmp + lambda);
			if (x[idx] > tmp1) {
				tmp = -tmp1;
			} else {
				tmp1 = Li[idx] * (tmp - lambda);
				if (x[idx] < tmp1) {
					tmp = -tmp1;
				} else {
					tmp = -x[idx];
				}
			}
			x[idx] += tmp;
			//update residuals:
			for (j = 0; j < p; j++) {
				residuals[IDX_h[idx][j]] += tmp * A_h[idx][j];
			}
			//				printf("Iteration %d,  x[%d]=%f  \n", N, idx, x[idx]);
			if (k % samplingsize == 0) {
				t2 = clock();
				diff = ((float) t2 - (float) t1) / 1000000.0F;

				printf("%f,TIME:%f\n", N + (double) k / n, diff);
				fprintf(fp, "%f,TIME:%f\n", N + (double) k / n, diff);

			}
		}

	}
	/// SErIAL RCDM =========================================================END
	printf("KONIEC RIESENIA\n");

	//	return 1;
	double * A_dev;
	double * L_dev;
	double * x_dev;
	double * b_dev;
	double * lambda_dev;

	//----------------RCDM------- parallel


	fclose(fp);
}
示例#7
0
int main(void) {
	FILE *fp;
//	fp = fopen("/exports/home/s1052689/nesterov.txt", "w");
			fp = fopen("/tmp/nesterov.txt", "w");
	srand(2);
	double lambda = 1;
	double rho = 1;
	long int n = 1000000;
	int n_nonzero = 160000;
	long int m = 10 * n;
	double sqrtofnonzeros = 400;
	int p = 15;

	int NMAX = 60;
	int N;
	int samplingsize = n / 1;
	long int i, j, k;
	printf("texst\n");
	printf("free memory:%d\n", getTotalSystemMemory());
	//	double* AAA;

	printf("Idem alokovat data s obsahom %d\n", n);
	//	AAA = (double*) malloc(n * sizeof(double));

	printf("alokacia poli start\n");
	double A_h[n][p]; // host A matrix pointers
	printf("alokacia A done \n");
	long int IDX_h[n][p]; // host Aindex matrix pointers
	printf("alokacia I done \n");
	printf("alokacia x done \n");
	double optimalvalue = 0;

	int analysisLength = NMAX * n / samplingsize;

	struct optimalityAnalysis* analysis;
	analysis = (struct optimalityAnalysis*) calloc(analysisLength,
			sizeof(struct optimalityAnalysis));

	double tmp;
	printf("alokacia poli END\n");

	printf("free memory:%d\n", getTotalSystemMemory());

	//Generovanie problemu-------------------------------------------------------------------
	for (i = 0; i < n; i++) {
		long int idx = 0;
		for (j = 0; j < p; j++) {
			int notfinished = 1;
			double val = (double) rand() / RAND_MAX;
			while (notfinished) {
				notfinished = 0;
				idx = ((long int) ((m) * (rand() / (RAND_MAX + 1.0))));
				for (k = 0; k < j; k++) {
					if (IDX_h[i][k] == idx) {
						notfinished = 1;
					}
				}
			}
			A_h[i][j] = 2 * val - 1;
			IDX_h[i][j] = idx;
		}
	}
	printf("Matrix B Generated\n");
	printf("free memory:%d\n", getTotalSystemMemory());

	double* y;
	y = (double*) calloc(m, sizeof(double));
	tmp = 0;
	for (j = 0; j < m; j++) {
		y[j] = (double) rand() / RAND_MAX;
		tmp += y[j] * y[j];
	}
	for (j = 0; j < m; j++) {
		y[j] = y[j] / tmp;
	}
	printf("vector y Generated\n");

	struct st_sortingByAbsWithIndex* dataToSort;
	dataToSort = (struct st_sortingByAbsWithIndex*) calloc(m,
			sizeof(struct st_sortingByAbsWithIndex));

	for (i = 0; i < n; i++) {
		dataToSort[i].idx = i;
		dataToSort[i].value = 0;
	}
	printf("Struc created\n");
	for (i = 0; i < n; i++) {
		tmp = 0;
		for (j = 0; j < p; j++) {
			tmp += y[IDX_h[i][j]] * A_h[i][j];
		}
		dataToSort[i].value = tmp;
	}

	//Sorting B
	printf("SORTING START\n");

	size_t structs_len = sizeof(dataToSort)
			/ sizeof(struct st_sortingByAbsWithIndex);
	printf("SORTING 2\n");
	qsort(dataToSort, structs_len, sizeof(struct st_sortingByAbsWithIndex),
			struct_cmp_by_value);
	printf("SORTING END\n");
	//	return 1;
	double* x;
	x = (double*) calloc(n, sizeof(double));
	for (i = 0; i < n; i++) { // vytvaranie matice A
		int idx = dataToSort[i].idx;
		double alpha = 1;
		x[idx] = 0;
		if (i < n_nonzero) {
			alpha = (double) abs(1 / dataToSort[idx].value);
			x[idx] = ((double) rand() / RAND_MAX) * rho / (sqrtofnonzeros);
			if (dataToSort[idx].value < 0) {
				x[idx] = -x[idx];
			}
		} else if (dataToSort[idx].value > 0.1 || dataToSort[idx].value < -0.1) {
			alpha = (double) abs(1 / dataToSort[idx].value) * (double) rand()
					/ RAND_MAX;
		}
		for (j = 0; j < p; j++) {
			A_h[idx][j] = A_h[idx][j] * alpha;
		}
	}
	//	print_double_array(&L[0],n);
	//	print_double_array(&Li[0], 10);
	free(dataToSort);
	// Compute Li
	double* Li; // Lipschitz constants
	Li = (double*) calloc(n, sizeof(double));
	for (i = 0; i < n; i++) {
		Li[i] = 0;
		for (j = 0; j < p; j++) {
			Li[i] += A_h[i][j] * A_h[i][j];
		}
		Li[i] = 1 / Li[i];
	}
	// END compute Li

	for (i = 0; i < m; i++) {
		optimalvalue += y[i] * y[i];
	}
	optimalvalue = optimalvalue * 0.5;

	double* b;
	b = y;
	for (i = 0; i < n; i++) {
		for (j = 0; j < p; j++) {
			b[IDX_h[i][j]] += x[i] * A_h[i][j];
		}
	}
	for (i = 0; i < n; i++) {
		//		printf("optval %1.16f \n", optimalvalue);
		if (x[i] > 0)
			optimalvalue += x[i];
		else
			optimalvalue -= x[i];
	}
	printf("optval %1.16f \n", optimalvalue);

	//Generovanie problemu----------------------------END----------------------------------
	double * residuals;
	residuals = (double*) calloc(m, sizeof(double));
	for (i = 0; i < m; i++) {
		residuals[i] = -b[i];
	}
	for (i = 0; i < n; i++) {
		for (j = 0; j < p; j++) {
			residuals[IDX_h[i][j]] += x[i] * A_h[i][j];
		}
	}
	double nesterovvalue = 0;
	for (i = 0; i < m; i++) {
		nesterovvalue = residuals[i] * residuals[i];
	}
	nesterovvalue = nesterovvalue / 2;
	for (i = 0; i < n; i++) {
		if (x[i] > 0)
			nesterovvalue += x[i];
		else
			nesterovvalue -= x[i];
	}

	// Calculate residuals
	for (j = 0; j < m; j++)
		residuals[j] = -b[j];
	//----------------RCDM----------serial===================================---
	for (i = 0; i < n; i++)
		x[i] = 0;
	double tmp1;
	double currentvalue = 0;

	int analisisIDX = 0;

	printf("ZACIATOK RIESENIA\n");

	for (N = 0; N < NMAX; N++) {
		for (k = 0; k < n; k++) {
			//		for (k = 0; k < n; k++) {
			int idx = (int) (n * (rand() / (RAND_MAX + 1.0)));
			double tmp = 0;
			for (j = 0; j < p; j++) {
				//						printf("tmp:%f  A:%f   residual:%f  \n",tmp,A_h[idx][j],residuals[IDX_h[idx][j]]);
				tmp += A_h[idx][j] * residuals[IDX_h[idx][j]];
			}
			//				printf("Li[%d] =  %f; tmp=%f  \n", idx, Li[idx], tmp);
			tmp1 = Li[idx] * (tmp + lambda);
			if (x[idx] > tmp1) {
				tmp = -tmp1;
			} else {
				tmp1 = Li[idx] * (tmp - lambda);
				if (x[idx] < tmp1) {
					tmp = -tmp1;
				} else {
					tmp = -x[idx];
				}
			}
			x[idx] += tmp;
			//update residuals:
			for (j = 0; j < p; j++) {
				residuals[IDX_h[idx][j]] += tmp * A_h[idx][j];
			}
			//				printf("Iteration %d,  x[%d]=%f  \n", N, idx, x[idx]);
			if (k % samplingsize == 0) {
				currentvalue = 0;
				//			print_double_array(&residuals[0],m);
				for (i = 0; i < m; i++) {
					currentvalue += residuals[i] * residuals[i];
				}
				currentvalue = currentvalue * 0.5;
				//			printf("CV:%1.16f\n", currentvalue);

				//			printf(" %1.16f\n",  currentvalue  );
				double normsize = 0;
				for (i = 0; i < n; i++) {
					if (x[i] > 0)
						normsize += lambda * x[i];
					else
						normsize -= lambda * x[i];
				}
				//			print_double_array(&x[0],n);
				currentvalue = currentvalue + normsize;
				//			printf("NZ:%1.16f; :%1.16f\n", currentvalue, normsize);

				//			printf(" %1.16f\n",  currentvalue  );
				analysis[analisisIDX].accuracy = currentvalue;
				analysis[analisisIDX].nnz = 0;
				analysis[analisisIDX].correctnnz = 0;
				analysis[analisisIDX].iteration = N + (double) k / n;
				for (i = 0; i < n; i++) {
					if (x[i] != 0)
						analysis[analisisIDX].nnz++;
//					if (x_optimal[i] != 0 && x[i] != 0)
//						analysis[analisisIDX].correctnnz++;

				}

				printf("%f,%d,%d,%1.16f\n", N + (double) k / n,
						analysis[analisisIDX].nnz,
						analysis[analisisIDX].correctnnz, currentvalue
								- optimalvalue);

				//			printf("%d: nnz %d   correct nnz %d \n", N, analysis[analisisIDX].nnz,analysis[analisisIDX].correctnnz);
				//			printf("%d: f^*=%1.16f,   f(x)=%1.16f \n", N, optimalvalue,
				//					currentvalue);
				//			printf("%d: f(x)-f^*=%1.16f\n", N, currentvalue - optimalvalue);


				analisisIDX++;
			}
		}

	}
	/// SErIAL RCDM =========================================================END
	printf("KONIEC RIESENIA\n");

	currentvalue = 0;
	for (i = 0; i < m; i++) {
		currentvalue = residuals[i] * residuals[i];
	}
	currentvalue = currentvalue / 2;
	for (i = 0; i < n; i++) {
		if (x[i] > 0)
			currentvalue += x[i];
		else
			currentvalue -= x[i];
	}

	printf("Comparison \n");

	//	for (i = 0; i < n; i++) {
	//		if (x[i] > 0 || x[i] < 0 || x_optimal[i] > 0 || x_optimal[i] < 0) {
	//			printf("x[%d] =  %1.10f ;x*[%d]=%1.10f  \n", i, x[i], i,
	//					x_optimal[i]);
	//		}
	//	}

	printf("f^*=%1.16f,   f(x)=%1.16f \n", optimalvalue, currentvalue);
	printf("f(x)-f^*=%1.16f\n", currentvalue - optimalvalue);

	// Skutocna optimalna hodnota dana nesterovym vysledkom


	printf("=====================================\n");
	printf("f^N=%1.16f,   f(x)=%1.16f \n", nesterovvalue, currentvalue);
	printf("f(x)-f^N=%1.16f \n", -nesterovvalue + currentvalue);
	printf("f^N=%1.16f,   f(x)=%1.16f \n", nesterovvalue, currentvalue);
	printf("f(x)-f^N=%1.16f \n", -nesterovvalue + currentvalue);
	//tmp=0;
	//	for (i = 0; i < n; i++) {
	// tmp+=(x[i]-x_optimal[i])*(x[i]-x_optimal[i]);
	//	}
	//	printf("|x-xoptimal|^2 = %1.16f \n",tmp);
	// Allocation arrays on cuda device:

	//VYPISANIE VYSLEDKOV
	double epsilon = 1000000000;
	double minvalue = nesterovvalue;
	for (i = 1; i < analisisIDX; i++) {
		if (analysis[i].accuracy < minvalue) {
			minvalue = analysis[i].accuracy;
		}
	}
	printf("min value: %f\n", minvalue);
	printf("min value: %f\n", minvalue);
	//	i = analisisIDX - 1;
	//	printf("it: %d; eps: %1.16f; nnzofX: %d, basis: %f \n",
	//			analysis[i].iteration, analysis[i].accuracy - minvalue,
	//			analysis[i].nnz, (double) analysis[i].correctnnz / n_nonzero);
	i = 1;

	printf("it: %1.4f; eps: %1.16f; nnzofX: %d, basis: %f \n",
			analysis[i].iteration, analysis[i].accuracy - minvalue,
			analysis[i].nnz, (double) analysis[i].correctnnz / n_nonzero);
	fprintf(fp, "it: %1.4f; eps: %1.16f; nnzofX: %d, basis: %f \n",
			analysis[i].iteration, analysis[i].accuracy - minvalue,
			analysis[i].nnz, (double) analysis[i].correctnnz / n_nonzero);

	printf("analisisIdx:%d", analisisIDX);

	for (i = 1; i < analisisIDX; i++) {
		if (analysis[i].accuracy - minvalue <= epsilon && epsilon >= 0) {
			fprintf(fp, "it: %1.4f; eps: %1.16f; nnzofX: %d, basis: %f \n",
					(double) analysis[i].iteration, analysis[i].accuracy
							- minvalue, analysis[i].nnz,
					(double) analysis[i].correctnnz / n_nonzero);
			printf("it: %1.4f; eps: %1.16f; nnzofX: %d, basis: %f \n",
					(double) analysis[i].iteration, analysis[i].accuracy
							- minvalue, analysis[i].nnz,
					(double) analysis[i].correctnnz / n_nonzero);

			epsilon = epsilon * 0.1;
			printf("epsilon: %f \n", epsilon);
		}
		if (i > 10 && analysis[i].accuracy - minvalue == 0) {
			break;
		}
	}
	//	return 1;
	double * A_dev;
	double * L_dev;
	double * x_dev;
	double * b_dev;
	double * lambda_dev;

	//----------------RCDM------- parallel


	fclose(fp);
}
示例#8
0
void SystemFreeMem()
{
    printf("Free mem [Yet to perfect this value]: %u\n", getTotalSystemMemory());
}