Ejemplo n.º 1
0
int main(int argc, char **argv){   
  MPI_Init(argc, argv);
  MPI_Barrier(MPI_COMM_WORLD);
  int nb;
  int rank = -1;
  double t,start,stop;
  double* mat_A;
  double* mat_B;
  double* mat_res = alloc_mat();
  // Allocations
  mat_A   = init_mat();
  mat_B   = init_mat();
  // Init
  printf("Nb.threads\tTps.\n");
  for(nb=1;nb<=PROC;nb++){
    MPI_Comm_size(MPI_COMM_WORLD, &nb);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    start = get_time();
    multiply_mat(mat_A, mat_B, mat_res);
    stop=get_time();
    t=stop-start;
    printf("%d\t%f\n",nb,t);
  }
  // Memory free
  free(mat_A);
  free(mat_B);
  free(mat_res);
  return EXIT_SUCCESS;
  MPI_Finalize(void);
}
Ejemplo n.º 2
0
int main(int argc, char *argv[])
{ // export OMP_NUM_THREADS=1
    float **A, **B, **C;	// matrices
    int d1, d2, d3;         // dimensions of matrices
    int i, j, k;			// loop variables
    double start, end;
    start = omp_get_wtime();

    /* print user instruction */
    if (argc != 4)
    {
        printf ("Matrix multiplication: C = A x B\n");
        printf ("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]); 
        return 0;
    }

    /* read user input */
    d1 = atoi(argv[1]);		// rows of A and C
    d2 = atoi(argv[2]);     // cols of A and rows of B
    d3 = atoi(argv[3]);     // cols of B and C

    printf("Matrix sizes C[%d][%d] = A[%d][%d] x B[%d][%d]\n", d1, d3, d1, d2, d2, d3);

    /* prepare matrices */
    A = alloc_mat(d1, d2);
    init_mat(A, d1, d2); 
    B = alloc_mat(d2, d3);
    init_mat(B, d2, d3);
    C = alloc_mat(d1, d3);	// no initialisation of C, because it gets filled by matmult

    /* serial version of matmult */
    printf("Perform matrix multiplication...\n");
                      /* spezielle collapse-Schleife, um über ein mehrdimensionales Array zu iterieren
                         Schleifen müssen sehr einfach gehalten sein, damit Parallelisierung erfolgen kann
                         Alle Schleifenvariablen müssen völlig unabhängig voneinander sein. 
                         Sind es aber nicht, die Ausgabe war Fehlerhaft. */

    double sum;
    // #pragma omp parallel for collapse(3) schedule(dynamik)
    for (i = 0; i < d1; i++)
       for (j = 0; j < d3; j++)
          #pragma omp parallel for private(sum)// Rechenintensive Operation wird parallelisiert.
          for (k = 0; k < d2; k++)
            { // Nur hier darf beliebiger Code stehen! wenn collaps verwendet würde
             sum = A[i][k] * B[k][j];
             #pragma omp atomic
             C[i][j] += sum;
         }

    /* test output */
    print_mat(A, d1, d2, "A"); 
    print_mat(B, d2, d3, "B"); 
    print_mat(C, d1, d3, "C"); 

    printf ("\nDone.\n");

    end = omp_get_wtime();
    printf("This task took %f seconds\n", end-start);
    return 0;
}
Ejemplo n.º 3
0
int main(int argc, char *argv[])
{
	double **a, **b, **ret;
	clock_t start, stop;
	int i;
	int l, m, n;
	if(argc != 2) {
		printf("Usage: mm_seq <mat_size>\n");
		return 1;
	} else
		l = m = n = atoi(argv[1]);
	
	a = malloc(l * sizeof(double *));
	b = malloc(m * sizeof(double *));
	ret = malloc(l * sizeof(double *));
        if(!(a && b && ret)) {        
		printf("Out of memory; exiting...\n");
		return 1;
	}
	
	for (i = 0; i < l; i++)
		a[i] = malloc(m * sizeof(double));
	for (i = 0; i < m; i++)
		b[i] = malloc(n * sizeof(double));
	for (i = 0; i < l; i++)
		ret[i] = malloc(n * sizeof(double));
	
	printf("matrix size: %d x %d\n", n,n);
	
	a = init_mat(a,l,m);
	b = init_mat(b,m,n);
	//print_mat(a,l,m);
	//print_mat(b,m,n);
	
	if((start = clock()) == -1) {
		printf("Could not start the clock; exiting...\n");
		return 1;
	}
	
	b = transpose(b,m,n);
	ret = mm(a,b,ret,l,m,n);
	stop = clock();
	
	//print_mat(ret,l,n);
	double time = (double)(stop - start)/CLOCKS_PER_SEC;
	printf("Elapsed time: %f\n", time);

	return 0;
}
Ejemplo n.º 4
0
int main()
{
    int sum1, sum2;
    sum1 = init_mat(3);
    sum2 = mat_sum();
    printf("sum1 = %d sum2 = %d\n", sum1, sum2);
    return 0;
}
Ejemplo n.º 5
0
int main(void)
{
    init_mat();
    sq_matrix_revise();
    sq_matrix_print();
    sq_release();

    return 0;
}
Ejemplo n.º 6
0
int main(int argc, char *argv[])
{
	float **A, **B, **C;	// matrices
    int d1, d2, d3;         // dimensions of matrices
    int i, j, k;			// loop variables

    /* print user instruction */
    if (argc != 4)
    {
        printf ("Matrix multiplication: C = A x B\n");
        printf ("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]); 
        return 0;
    }

    /* read user input */
    d1 = atoi(argv[1]);		// rows of A and C
    d2 = atoi(argv[2]);     // cols of A and rows of B
    d3 = atoi(argv[3]);     // cols of B and C

    printf("Matrix sizes C[%d][%d] = A[%d][%d] x B[%d][%d]\n", d1, d3, d1, d2, d2, d3);

    /* prepare matrices */
    A = alloc_mat(d1, d2);
    init_mat(A, d1, d2); 
    B = alloc_mat(d2, d3);
    init_mat(B, d2, d3);
    C = alloc_mat(d1, d3);	// no initialisation of C, because it gets filled by matmult

    /* serial version of matmult */
    printf("Perform matrix multiplication...\n");
    for (i = 0; i < d1; i++)
       for (j = 0; j < d3; j++)
          for (k = 0; k < d2; k++)
             C[i][j] += A[i][k] * B[k][j];

    /* test output */
    print_mat(A, d1, d2, "A"); 
    print_mat(B, d2, d3, "B"); 
    print_mat(C, d1, d3, "C"); 

    printf ("\nDone.\n");

    return 0;
}
Ejemplo n.º 7
0
// Initialisation de matrice supérieure, aléatoirement
void init_matSup (mat M) {
 init_mat(M);
 register unsigned int i, j;
 srand(time(NULL));
 for (i=0; i < N; i++){
	 for(j=i; j<N; j++){
		 M[i][j] = rand_b() ;
	 }
 }
}
Ejemplo n.º 8
0
void test_cosamp(){

   mat A= init_mat("0.9528    0.5982    0.8368 ; 0.7041    0.8407    0.5187; 0.9539    0.4428    0.0222", 3, 3);
   vec b= init_vec(" 0.3759 0.8986 0.4290",3);
   int K=1;
   double epsilon =1e-3;
   vec ret = CoSaMP(A,b,K,10, epsilon,3);
   vec right = init_vec("0 1.2032 0", 3);
   double diff = norm(ret - right);
   assert(diff <1e-4);

}
Ejemplo n.º 9
0
void swap_mat(t_mat *m)
{
    t_mat *tmp;
    int    i, j;

    tmp = init_mat(m->nn, FALSE);
    for (i = 0; (i < m->nn); i++)
    {
        for (j = 0; (j < m->nn); j++)
        {
            tmp->mat[m->m_ind[i]][m->m_ind[j]] = m->mat[i][j];
        }
    }
    /*tmp->mat[i][j] =  m->mat[m->m_ind[i]][m->m_ind[j]]; */
    for (i = 0; (i < m->nn); i++)
    {
        for (j = 0; (j < m->nn); j++)
        {
            m->mat[i][j] = tmp->mat[i][j];
        }
    }
    done_mat(&tmp);
}
Ejemplo n.º 10
0
/*Command line entry point of the program*/
int main (int argc, char* argv[]){
    
    /*getopt() variables */
    int   opt;
	char* optarg_dup = NULL;
    
    /*Command line options*/
	uchar cflg=0,gflg=0,kflg=0,lflg=0,mflg=0,oflg=0, sflg=0;
	uchar pflg=0,tflg=0,vflg=0,xflg=0,zflg=0;
    InputParams* inputParams = NULL;

    /*Working variables */
	int            i,m;
	int            nbPriors;
    int            nbNodes;
	int            nbClass;
	int            nbLabeled;
	int            nbUnlabeled;
	int			   nbSimParams;
	int            actualWlen;
	int            start_cpu,stop_cpu;
	real           tmp;
	uchar*         absorbing     = NULL;
	char*          labels        = NULL;
	char*          featLabels    = NULL;
	char*          targets       = NULL;
	char*		   preds         = NULL;
	int**          classes       = NULL;
	real**         N             = NULL;
	real*          gram          = NULL;
    real**         latF          = NULL;
    real**         latB          = NULL;
	real**		   kernel		 = NULL;
	SparseMatrix*  spmat         = NULL;
	SparseMatrix*  dataset       = NULL;
	DataStat*      graphStat     = NULL;
	DataStat*      featuresStat  = NULL;
	DWInfo*        dwInfo        = NULL;
	OptiLen*       optiLen       = NULL;
	PerfMeasures*  unlabeledPerf = NULL;
	LSVMDataInfo*  dataInfo      = NULL;
	
	/*Print the program banner*/
	printBanner();

    /*Scan the command line*/
	inputParams = malloc(sizeof(InputParams));
	init_InputParams(inputParams);
	while ((opt = getopt(argc,argv,"c:g:l:xms:k:o:p:rt:v:z:h")) != EOF){
        switch(opt){
	        case 'c':
	            cflg++;
				inputParams->nbFolds = atoi(optarg);
	            break;
            case 'g':
                gflg++;
				inputParams->graphFname = (char*)malloc(FNAME_LEN*sizeof(char));
				strncpy(inputParams->graphFname,optarg,FNAME_LEN);
                break;
            case 'k':
				kflg++;
				inputParams->gramFname = (char*)malloc(FNAME_LEN*sizeof(char));
				strncpy(inputParams->gramFname,optarg,FNAME_LEN);
                break;
			case 's':
                sflg++;
                optarg_dup  = (char*)__strdup(optarg);
				nbSimParams = getNbTokens(optarg,",");
				inputParams->simParams = vec_alloc(nbSimParams+1);
				tokenizeReals(optarg_dup,",",inputParams->simParams);
				break;
            case 'l':
                lflg++;
                inputParams->wlen = atoi(optarg);
                break;
			case 'm':
				mflg++;
				break;
            case 'o':
                oflg++;
				inputParams->outFname = (char*)malloc(FNAME_LEN*sizeof(char));
				strncpy(inputParams->outFname,optarg,FNAME_LEN);
				inputParams->outPRED  = (char*)malloc(FNAME_LEN*sizeof(char));
				inputParams->outLEN   = (char*)malloc(FNAME_LEN*sizeof(char));
                addExtension(inputParams->outFname,"pred",inputParams->outPRED);
				addExtension(inputParams->outFname,"len",inputParams->outLEN);
                break;
            case 'p':
                pflg++;
                optarg_dup = (char*)__strdup(optarg);
				nbPriors   = getNbTokens(optarg,"#");
                inputParams->priors = vec_alloc(nbPriors+1);
				tokenizeReals(optarg_dup,"#",inputParams->priors);
                break;
			case 't':
				tflg++;
				inputParams->tarFname = (char*)malloc(FNAME_LEN*sizeof(char));
				strncpy(inputParams->tarFname,optarg,FNAME_LEN);
				break;
            case 'v':
                vflg++;
                inputParams->verbose = atoi(optarg);
                break;
			case 'x':
				xflg++;
				inputParams->crossWalks = 1;
				break;
			case 'z':
				zflg++;
				inputParams->cvSeed = atoi(optarg);
				break;
            case 'h':
                printHelp();
                exit(EXIT_FAILURE);
                break;
        }
    }
    
    /*Check mandatory arguments*/
    if(!gflg || !lflg || (!oflg && !mflg)){
        fprintf(stderr,"Mandatory argument(s) missing\n");
        printHelp();
        exit(EXIT_FAILURE);
    }

	if( (kflg && !sflg) || (!kflg && sflg)){
		fprintf(stderr, "Error with 'k' and 's' parameters\n");
		printHelp();
		exit(EXIT_FAILURE);
	}

	/*Check that the walk length is greater than 2*/
	if(inputParams->wlen < 2){
		fprintf(stderr,"The walkLen must be >= 2\n");
		exit(EXIT_FAILURE);
	}

	/*Check that there are the right number of similarity parameters*/
	if (kflg && sflg){
		if(inputParams->simParams){
			switch((int)inputParams->simParams[1]){
				case 1 :
					if((int)inputParams->simParams[0] != 1){
						fprintf(stderr,"The similarity type 1 must have no parameters\n");
						exit(EXIT_FAILURE);	
					}
					break;
				case 2 :
					if((int)inputParams->simParams[0] != 2){
						fprintf(stderr,"The similarity type 2 must have exactly 1 parameter\n");
						exit(EXIT_FAILURE);	
					}
					break;
				case 3 :
					if((int)inputParams->simParams[0] != 4){
						fprintf(stderr,"The similarity type 3 must have exactly 3 parameters\n");
						exit(EXIT_FAILURE);	
					}
					break;
				case 4 :
					if((int)inputParams->simParams[0] != 2){
						fprintf(stderr,"The similarity type 4 must have exactly 1 parameter\n");
						exit(EXIT_FAILURE);	
					}
					break;
				default :
					fprintf(stderr,"Unrecognized similarity type\n");
					exit(EXIT_FAILURE);	
			}
		}
	}

    /*Get the number of nodes in the graph*/
    nbNodes = readNbNodes(inputParams->graphFname);
    
	/*Get the number of distinct classes*/
	nbClass = readNbClass(inputParams->graphFname);

	/*Get info from the LIBSVM data*/
	if (kflg){
		dataInfo = malloc(sizeof(LSVMDataInfo));
		getLSVMDataInfo(inputParams->gramFname,dataInfo);
		dataset = spmat_alloc(dataInfo->nbLines,dataInfo->nbFeatures,0);
		init_SparseMat(dataset);
		featuresStat = DataStat_alloc(dataInfo->nbClass);
		featLabels = char_vec_alloc(nbNodes);
	}
	
	/*Check if the number of nodes does not exceed the limitation*/
	if(nbNodes > MAX_NODES){
		fprintf(stderr,"This version is limited to maximum %i nodes\n",MAX_NODES);
		exit(EXIT_FAILURE);
	}

	/*Check that the number of classes is lower than 128*/
	if(nbClass > 127){
		fprintf(stderr,"The number of classes must be <= 127\n");
		exit(EXIT_FAILURE);		
	}

	/*Check that the number of folds is between 2 and nbNodes*/
	if(cflg){
		if(inputParams->nbFolds == 1 || inputParams->nbFolds > nbNodes){
			fprintf(stderr,"The number of folds must be > 1 and <= number of nodes\n");
			exit(EXIT_FAILURE);
		}
	}

    /*Allocate data structure*/
    latF             = mat_alloc(inputParams->wlen+1,nbNodes);
    latB             = mat_alloc(inputParams->wlen+1,nbNodes);
	kernel			 = mat_alloc(nbNodes, nbNodes);
	classes          = (int**)malloc(sizeof(int*)*(nbClass+1));
	labels           = char_vec_alloc(nbNodes);
	absorbing        = uchar_vec_alloc(nbNodes);
	if(kflg) gram    = vec_alloc((nbNodes*(nbNodes+1))/2);
	dwInfo           = malloc(sizeof(DWInfo));
	dwInfo->mass_abs = vec_alloc(nbClass);
	spmat            = spmat_alloc(nbNodes,nbNodes,1);
	graphStat        = DataStat_alloc(nbClass+1);
	optiLen          = malloc(sizeof(OptiLen));
	
    /*Initialize the sparse transition matrix and the dataset if required*/
	init_SparseMat(spmat);

    /*Read the adjacency matrix*/
    readMat(inputParams->graphFname,spmat,labels,graphStat,inputParams->verbose);
    isSymmetric(spmat);

	/*Get the indices of the nodes in each class */
	getClasses(labels,nbNodes,classes,nbClass);

	/*Get the number of labeled nodes*/
	nbUnlabeled = classes[0][0];
	nbLabeled   = nbNodes - nbUnlabeled;

	/*If provided, check that the priors match the number of classes*/
	if(pflg){
		if(nbClass != nbPriors){
			printf("The number of priors does not match with the number of classes\n");
			exit(EXIT_FAILURE);			
		}
		/*Check that the priors sum up to 1*/
		else{
			tmp=0.0;
			for(i=1;i<=inputParams->priors[0];i++){
				tmp += inputParams->priors[i];
			}
			if(ABS(tmp-1.0) > PROB_EPS){
				textcolor(BRIGHT,RED,BLACK);
	            printf("WARNING: The class priors do not sum up to 1\n");
				textcolor(RESET,WHITE,BLACK);
			}
		}
	}
	/*If no priors provided, use empirical priors */
	else{
		inputParams->priors = vec_alloc(nbClass+1);
		inputParams->priors[0] = (real)nbClass;
		tmp = 0.0;
		for(i=1;i<=inputParams->priors[0];i++)
			tmp += graphStat->classCount[i];
		for(i=1;i<=inputParams->priors[0];i++)
			inputParams->priors[i] = (real)graphStat->classCount[i]/tmp;
	}

	/*If provided read the LIBSVM feature matrix*/
	if(kflg){
		m = readLSVMData(inputParams->gramFname,dataInfo,dataset,featLabels,featuresStat,inputParams->verbose);
		if (dataInfo->nbLines != nbNodes){
			fprintf(stderr,"Number of line on the LIBSVM (%i) file doesn't match the number of nodes (%i)\n", dataInfo->nbLines, nbNodes);
			exit(EXIT_FAILURE);
		}
		
		/* Multiply a kernel matrix based on the dataset*/
		/* TO DO : define a parameters to lower the importance of the features*/
		buildKernel2(spmat, dataset, 0.1, inputParams);
		/*Multiply adjacency matrix*/
	}

	
    
    /*Print statistics about the graph, classes, and run-mode*/
    if (inputParams->verbose > 0)
		printInputInfo(spmat,graphStat,nbClass,inputParams);               
    
	/*Minimum Covering Length mode*/
	if (mflg){
		computeMCL(spmat,absorbing,labels,classes,nbClass,latF,latB,inputParams);
		/*Exit after displaying the statistics*/
		exit(EXIT_SUCCESS);		
	}

	 /*Start the CPU chronometer*/
    start_cpu = clock();

	/*If required tune the maximum walk length by cross-validation*/
	if(cflg){
		crossValidateLength(spmat,absorbing,labels,classes,nbClass,NULL,latF,latB,inputParams,optiLen);
		actualWlen = optiLen->len;
	}
	/*Otherwise use the prescribed length*/
	else
		actualWlen = inputParams->wlen;

	/************************ALGORITHM STARTS HERE****************************/

	if(inputParams->verbose >= 1){
		textcolor(BRIGHT,RED,BLACK);
		printf("Performing discriminative walks up to length %i on full data\n",actualWlen);
		textcolor(RESET,WHITE,BLACK);
	}

	/*Allocate data structure*/
    N     = mat_alloc(nbUnlabeled,nbClass);
	preds = char_vec_alloc(nbUnlabeled);
    init_mat(N,nbUnlabeled,nbClass);

	/*Launch the D-walks*/
	dwalk(spmat,absorbing,classes,nbClass,N,latF,latB,actualWlen,inputParams->crossWalks,dwInfo,inputParams->verbose);

	/************************ALGORITHM STOPS HERE****************************/

    /*Stop the CPU chronometer*/
    stop_cpu = clock();
    
	/*Compute the predictions as the argmax on classes for each unlabeled node*/
	computePredictions(N,preds,inputParams->priors,nbUnlabeled,nbClass);

	/*Write the model predictions*/
	writePredictions_unlabeled(inputParams->outPRED,preds,N,nbClass,classes[0]);

	/*Write the class betweeness */


	/*If a target file is provided compare predictions and targets*/
	if(tflg){
		unlabeledPerf = PerfMeasures_alloc(nbClass+1);
		computeUnlabeledPerf(preds,classes[0],nbUnlabeled,nbClass,inputParams,unlabeledPerf,inputParams->verbose);
		free_PerfMeasures(unlabeledPerf,nbClass+1);
	}
    /*Print informations*/
    if (inputParams->verbose >= 1){
		for(i=0;i<nbClass;i++)
			printf("Exploration rate in class %2i : %1.2f %%\n",i+1,100*dwInfo->mass_abs[i]);
		printf("CPU Time (sec)               : %1.4f\n",((double)(stop_cpu - start_cpu)/CLOCKS_PER_SEC));
		printLineDelim();	
    }

	/*Optionally release memory here*/
	#ifdef FREE_MEMORY_AT_END
		free_classes(classes,nbClass+1);
		free_InputParams(inputParams);
		free_DataStat(graphStat);
		free_DWInfo(dwInfo);
		free(absorbing);
		free(labels);
		free_mat(N,nbUnlabeled);
		free_mat(latF,inputParams->wlen+1);
		free_mat(latB,inputParams->wlen+1);
		free_SparseMatrix(spmat);
		free(optiLen);
		free(preds);
		if(kflg) free(gram);
	#endif

    /*Exit successfully :-)*/
    exit(EXIT_SUCCESS);
}
Ejemplo n.º 11
0
/** **/
int main (int argc, char* argv[])
{
  int WORK_DIM = 2; // Wie viele Dimensionen hat der Indexraum?
  std::chrono::time_point<std::chrono::system_clock> s_start, s_end, p_start, p_end;


  // Lese den Kernel dynamisch ein: (uebernommen von Foliensatz 9, Folie 20)
  FILE *fp;
  const char *FileName = "matmult.cl";
  char *KernelSource;
  fp = fopen(FileName, "r");
  if (!fp) {
    printf("Can't open kernel source: %s", FileName); exit(1);
  }
  KernelSource = (char *)malloc(MAX_SOURCE_SIZE);
  size_t kernel_s_size = fread(KernelSource, 1, MAX_SOURCE_SIZE, fp);
  fclose(fp);

  cl_int            err;
  cl_platform_id*   platforms = NULL;
  char              platform_name[1024];
  cl_device_id      device_id = NULL;
  cl_uint           num_of_platforms = 0,
                    num_of_devices = 0;
  cl_context        context;
  cl_kernel         kernel;
  cl_command_queue  command_queue;
  cl_program        program;


  err = clGetPlatformIDs(0, NULL, &num_of_platforms);
  if (err != CL_SUCCESS) {
    printf("No platforms found. Error: %d\n", err);
    return 0;
  }

  // Liefert Plattformen
  platforms = (cl_platform_id *)malloc(num_of_platforms);
  err = clGetPlatformIDs(num_of_platforms, platforms, NULL);
  if (err != CL_SUCCESS) {
    printf("No platforms found. Error: %d\n", err);
    return 0;
  } else {
    int nvidia_platform = 0;  // Speichert den Rang der letzten NVIDIA-Plattform
    for (unsigned int i=0; i<num_of_platforms; i++) // Fuer jede Plattform:
    {
      clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL);
      if (err != CL_SUCCESS) {
        printf("Could not get information about platform. Error: %d\n", err);
        return 0;
      }

      if (strstr(platform_name, "NVIDIA") != NULL) { // Falls die Plattform eine NVIDIA-Plattform ist: Speichere ihren Rang
        nvidia_platform = i;
        break;
      }
    }
    // Gibt die ID des Devices der NVIDIA-Plattform zurueck
    err = clGetDeviceIDs(platforms[nvidia_platform], CL_DEVICE_TYPE_GPU, 1, &device_id, &num_of_devices);
    if (err != CL_SUCCESS) {
      printf("Could not get device in platform. Error: %d\n", err);
      return 0;
    }
  }

  // Erschaffe einen OpenCl-context, in dem OpenCl-Datenobjekte verwaltet werden koennen
  context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
  if (err != CL_SUCCESS) {
    printf("Unable to create context. Error: %d\n", err);
    return 0;
  }

  // Initialisiere eine Befehlswarteschleife, die Befehle fuer OpenCl-Objekte speichern kann
  command_queue = clCreateCommandQueue(context, device_id, 0, &err);
  if (err != CL_SUCCESS) {
    printf("Unable to create command queue. Error: %d\n", err);
    return 0;
  }

  // Initialisiere ein Programm und spezifiziere, aus welchem Code dieses kompiliert werden soll
  program = clCreateProgramWithSource(context, 1, (const char **)&KernelSource, (const size_t *)& kernel_s_size, &err);
  if (err != CL_SUCCESS) {
    printf("Unable to create program. Error: %d\n", err);
    return 0;
  }

  // Kompiliere das Programm zur Laufzeit
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  if (err != CL_SUCCESS) {
    // Zeige Compilermeldungen an: (uebernommen von Foliensatz 9, Folie 23)
    char *log;
    size_t size;
    clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size);
    log = (char *)malloc(size+1);
    if (log) {
      clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL);
      log[size] = '\0';
      printf("%s", log);
      free(log);
    }

    printf("Error building program. Error: %d\n", err);
    return 0;
  }

  // Erschaffe einen Kernel und lade oben kompiliertes Programm ein
  kernel = clCreateKernel(program, "matmult", &err);
  if (err != CL_SUCCESS) {
    printf("Error setting kernel. Error: %d\n", err);
    return 0;
  }

  float **A, **B, **C; // Matrizen
  int dim1, dim2, dim3; // Matrixdimensionen
  dim1 = D1; // Zeilen von A, Zeilen von C
  dim2 = D2; // Spalten von A, Zeilen von B
  dim3 = D3; // Spalten von B, Spalten von C

  A = alloc_mat(dim1, dim2);
  B = alloc_mat(dim2, dim3);
  C = alloc_mat(dim1, dim3);

  init_mat(A, dim1, dim2);
  init_mat(B, dim2, dim3);

  cl_mem            in_A, in_B, output;
  // float             data[DATA_SIZE] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};

  size_t global[1] = {dim1*dim3}; // Dimensionen von C
  size_t global_two[2] = {dim1, dim3};

  in_A  = clCreateBuffer (context, CL_MEM_READ_ONLY,  sizeof(float)*dim1*dim2, NULL, &err);
  in_B  = clCreateBuffer (context, CL_MEM_READ_ONLY,  sizeof(float)*dim2*dim3, NULL, &err);
  output = clCreateBuffer (context, CL_MEM_WRITE_ONLY, sizeof(float)*dim1*dim3, NULL, &err);

  clEnqueueWriteBuffer(command_queue, in_A, CL_TRUE, 0, sizeof(float)*dim1*dim2, *A, 0, NULL, NULL);
  clEnqueueWriteBuffer(command_queue, in_B, CL_TRUE, 0, sizeof(float)*dim2*dim3, *B, 0, NULL, NULL);

  clSetKernelArg(kernel, 0, sizeof(cl_mem), &in_A);
  clSetKernelArg(kernel, 1, sizeof(cl_mem), &in_B);
  clSetKernelArg(kernel, 2, sizeof(cl_mem), &output);
  // clSetKernelArg(kernel, 3, sizeof(int), &dim2);
  // clSetKernelArg(kernel, 4, sizeof(int), &dim3);

  clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
  if (WORK_DIM == 2) {
    clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, global_two, NULL, 0, NULL, NULL);
  }

  // Zeitmessung fuer parallele Version
  p_start = std::chrono::system_clock::now();
  err = clFinish(command_queue);
  p_end = std::chrono::system_clock::now();
  std::chrono::duration<double> p_duration = p_end - p_start;


  if (err == CL_INVALID_COMMAND_QUEUE ) {
    printf("CL_INVALID_COMMAND_QUEUE: %d\n", err);
    return 0;
  }

  clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, sizeof(float)*dim1*dim3, *C, 0, NULL, NULL);

  // Ueberpruefe, ob serielle Version und parallele gleich sind:
  float **correct_matrix;
  correct_matrix = alloc_mat(dim1, dim3);

  s_start = std::chrono::system_clock::now(); // Zeitmessung fuer serielle Version
  correct_matrix = mult_mat(A, B, dim1, dim2, dim3);
  s_end = std::chrono::system_clock::now();
  std::chrono::duration<double> s_duration = s_end - s_start;

  is_correct(C, correct_matrix, dim1, dim3); // Numerischer Korrektheitsbeweis

  print_mat(C, dim1, dim3, "C = ");
  print_mat(correct_matrix, dim1, dim3, "correct_matrix = ");
  // printf("Kernel execution time: %f\n", t_end-t_start);

  clReleaseMemObject(in_A);
  clReleaseMemObject(in_B);
  clReleaseMemObject(output);
  clReleaseProgram(program);
  clReleaseKernel(kernel);
  err = clReleaseCommandQueue(command_queue); //!!
  if (err != CL_SUCCESS) {
    printf("Error releasing command queue: %d\n", err);
    return 0;
  }
  clReleaseContext(context);

  printf("Dauer der seriellen Version: %.2f Millisekunden\n", s_duration.count() * 1000);
  printf("Dauer der parallelen Version: %.2f Millisekunden\n", p_duration.count() * 1000);
  printf("Erhaltenes Speed Up: %.2f \n", p_duration.count() / p_duration.count());


  return 0;
}
Ejemplo n.º 12
0
int gmx_cluster(int argc,char *argv[])
{
  static char *desc[] = {
    "g_cluster can cluster structures with several different methods.",
    "Distances between structures can be determined from a trajectory",
    "or read from an XPM matrix file with the [TT]-dm[tt] option.",
    "RMS deviation after fitting or RMS deviation of atom-pair distances",
    "can be used to define the distance between structures.[PAR]",
    
    "single linkage: add a structure to a cluster when its distance to any",
    "element of the cluster is less than [TT]cutoff[tt].[PAR]",
    
    "Jarvis Patrick: add a structure to a cluster when this structure",
    "and a structure in the cluster have each other as neighbors and",
    "they have a least [TT]P[tt] neighbors in common. The neighbors",
    "of a structure are the M closest structures or all structures within",
    "[TT]cutoff[tt].[PAR]",
    
    "Monte Carlo: reorder the RMSD matrix using Monte Carlo.[PAR]",
    
    "diagonalization: diagonalize the RMSD matrix.[PAR]"
    
    "gromos: use algorithm as described in Daura [IT]et al.[it]",
    "([IT]Angew. Chem. Int. Ed.[it] [BB]1999[bb], [IT]38[it], pp 236-240).",
    "Count number of neighbors using cut-off, take structure with",
    "largest number of neighbors with all its neighbors as cluster",
    "and eleminate it from the pool of clusters. Repeat for remaining",
    "structures in pool.[PAR]",
    
    "When the clustering algorithm assigns each structure to exactly one",
    "cluster (single linkage, Jarvis Patrick and gromos) and a trajectory",
    "file is supplied, the structure with",
    "the smallest average distance to the others or the average structure",
    "or all structures for each cluster will be written to a trajectory",
    "file. When writing all structures, separate numbered files are made",
    "for each cluster.[PAR]"
    
    "Two output files are always written:[BR]",
    "[TT]-o[tt] writes the RMSD values in the upper left half of the matrix",
    "and a graphical depiction of the clusters in the lower right half",
    "When [TT]-minstruct[tt] = 1 the graphical depiction is black",
    "when two structures are in the same cluster.",
    "When [TT]-minstruct[tt] > 1 different colors will be used for each",
    "cluster.[BR]",
    "[TT]-g[tt] writes information on the options used and a detailed list",
    "of all clusters and their members.[PAR]",
    
    "Additionally, a number of optional output files can be written:[BR]",
    "[TT]-dist[tt] writes the RMSD distribution.[BR]",
    "[TT]-ev[tt] writes the eigenvectors of the RMSD matrix",
    "diagonalization.[BR]",
    "[TT]-sz[tt] writes the cluster sizes.[BR]",
    "[TT]-tr[tt] writes a matrix of the number transitions between",
    "cluster pairs.[BR]",
    "[TT]-ntr[tt] writes the total number of transitions to or from",
    "each cluster.[BR]",
    "[TT]-clid[tt] writes the cluster number as a function of time.[BR]",
    "[TT]-cl[tt] writes average (with option [TT]-av[tt]) or central",
    "structure of each cluster or writes numbered files with cluster members",
    "for a selected set of clusters (with option [TT]-wcl[tt], depends on",
    "[TT]-nst[tt] and [TT]-rmsmin[tt]).[BR]",
  };
  
  FILE         *fp,*log;
  int          i,i1,i2,j,nf,nrms;

  matrix       box;
  rvec         *xtps,*usextps,*x1,**xx=NULL;
  char         *fn,*trx_out_fn;
  t_clusters   clust;
  t_mat        *rms;
  real         *eigval;
  t_topology   top;
  int          ePBC;
  t_atoms      useatoms;
  t_matrix     *readmat;
  real         *tmp;
  
  int      isize=0,ifsize=0,iosize=0;
  atom_id  *index=NULL, *fitidx, *outidx;
  char     *grpname;
  real     rmsd,**d1,**d2,*time,time_invfac,*mass=NULL;
  char     buf[STRLEN],buf1[80],title[STRLEN];
  bool     bAnalyze,bUseRmsdCut,bJP_RMSD=FALSE,bReadMat,bReadTraj;

  int method,ncluster=0;  
  static char *methodname[] = { 
    NULL, "linkage", "jarvis-patrick","monte-carlo", 
    "diagonalization", "gromos", NULL
  };
  enum { m_null, m_linkage, m_jarvis_patrick, 
	 m_monte_carlo, m_diagonalize, m_gromos, m_nr };
  /* Set colors for plotting: white = zero RMS, black = maximum */
  static t_rgb rlo_top = { 1.0, 1.0, 1.0 };
  static t_rgb rhi_top = { 0.0, 0.0, 0.0 };
  static t_rgb rlo_bot = { 1.0, 1.0, 1.0 };
  static t_rgb rhi_bot = { 0.0, 0.0, 1.0 };
  static int  nlevels=40,skip=1;
  static real scalemax=-1.0,rmsdcut=0.1,rmsmin=0.0;
  static bool bRMSdist=FALSE,bBinary=FALSE,bAverage=FALSE,bFit=TRUE;
  static int  niter=10000,seed=1993,write_ncl=0,write_nst=1,minstruct=1;
  static real kT=1e-3;
  static int  M=10,P=3;
  t_pargs pa[] = {
    { "-dista", FALSE, etBOOL, {&bRMSdist},
      "Use RMSD of distances instead of RMS deviation" },
    { "-nlevels",FALSE,etINT,  {&nlevels},
      "Discretize RMSD matrix in # levels" },
    { "-cutoff",FALSE, etREAL, {&rmsdcut},
      "RMSD cut-off (nm) for two structures to be neighbor" },
    { "-fit",   FALSE, etBOOL, {&bFit},
      "Use least squares fitting before RMSD calculation" },
    { "-max",   FALSE, etREAL, {&scalemax},
      "Maximum level in RMSD matrix" },
    { "-skip",  FALSE, etINT,  {&skip},
      "Only analyze every nr-th frame" },
    { "-av",    FALSE, etBOOL, {&bAverage},
      "Write average iso middle structure for each cluster" },
    { "-wcl",   FALSE, etINT,  {&write_ncl},
      "Write all structures for first # clusters to numbered files" },
    { "-nst",   FALSE, etINT,  {&write_nst},
      "Only write all structures if more than # per cluster" },
    { "-rmsmin",FALSE, etREAL, {&rmsmin},
      "minimum rms difference with rest of cluster for writing structures" },
    { "-method",FALSE, etENUM, {methodname},
      "Method for cluster determination" },
    { "-minstruct", FALSE, etINT, {&minstruct},
      "Minimum number of structures in cluster for coloring in the xpm file" },
    { "-binary",FALSE, etBOOL, {&bBinary},
      "Treat the RMSD matrix as consisting of 0 and 1, where the cut-off "
      "is given by -cutoff" },
    { "-M",     FALSE, etINT,  {&M},
      "Number of nearest neighbors considered for Jarvis-Patrick algorithm, "
      "0 is use cutoff" },
    { "-P",     FALSE, etINT,  {&P},
      "Number of identical nearest neighbors required to form a cluster" },
    { "-seed",  FALSE, etINT,  {&seed},
      "Random number seed for Monte Carlo clustering algorithm" },
    { "-niter", FALSE, etINT,  {&niter},
      "Number of iterations for MC" },
    { "-kT",    FALSE, etREAL, {&kT},
      "Boltzmann weighting factor for Monte Carlo optimization "
      "(zero turns off uphill steps)" }
  };
  t_filenm fnm[] = {
    { efTRX, "-f",     NULL,        ffOPTRD },
    { efTPS, "-s",     NULL,        ffOPTRD },
    { efNDX, NULL,     NULL,        ffOPTRD },
    { efXPM, "-dm",   "rmsd",       ffOPTRD },     
    { efXPM, "-o",    "rmsd-clust", ffWRITE },
    { efLOG, "-g",    "cluster",    ffWRITE },
    { efXVG, "-dist", "rmsd-dist",  ffOPTWR },
    { efXVG, "-ev",   "rmsd-eig",   ffOPTWR },
    { efXVG, "-sz",   "clust-size", ffOPTWR},
    { efXPM, "-tr",   "clust-trans",ffOPTWR},
    { efXVG, "-ntr",  "clust-trans",ffOPTWR},
    { efXVG, "-clid", "clust-id.xvg",ffOPTWR},
    { efTRX, "-cl",   "clusters.pdb", ffOPTWR }
  };
#define NFILE asize(fnm)
  
  CopyRight(stderr,argv[0]);
  parse_common_args(&argc,argv,PCA_CAN_VIEW | PCA_CAN_TIME | PCA_TIME_UNIT | PCA_BE_NICE,
		    NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL);

  /* parse options */
  bReadMat   = opt2bSet("-dm",NFILE,fnm);
  bReadTraj  = opt2bSet("-f",NFILE,fnm) || !bReadMat;
  if ( opt2parg_bSet("-av",asize(pa),pa) ||
       opt2parg_bSet("-wcl",asize(pa),pa) ||
       opt2parg_bSet("-nst",asize(pa),pa) ||
       opt2parg_bSet("-rmsmin",asize(pa),pa) ||
       opt2bSet("-cl",NFILE,fnm) )
    trx_out_fn = opt2fn("-cl",NFILE,fnm);
  else
    trx_out_fn = NULL;
  if (bReadMat && time_factor()!=1) {
    fprintf(stderr,
	    "\nWarning: assuming the time unit in %s is %s\n",
	    opt2fn("-dm",NFILE,fnm),time_unit());
  }
  if (trx_out_fn && !bReadTraj)
    fprintf(stderr,"\nWarning: "
	    "cannot write cluster structures without reading trajectory\n"
	    "         ignoring option -cl %s\n", trx_out_fn);

  method=1;
  while ( method < m_nr && strcasecmp(methodname[0], methodname[method])!=0 )
    method++;
  if (method == m_nr)
    gmx_fatal(FARGS,"Invalid method");
  
  bAnalyze = (method == m_linkage || method == m_jarvis_patrick ||
	      method == m_gromos );
  
  /* Open log file */
  log = ftp2FILE(efLOG,NFILE,fnm,"w");

  fprintf(stderr,"Using %s method for clustering\n",methodname[0]);
  fprintf(log,"Using %s method for clustering\n",methodname[0]);

  /* check input and write parameters to log file */
  bUseRmsdCut = FALSE;
  if (method == m_jarvis_patrick) {
    bJP_RMSD = (M == 0) || opt2parg_bSet("-cutoff",asize(pa),pa);
    if ((M<0) || (M == 1))
      gmx_fatal(FARGS,"M (%d) must be 0 or larger than 1",M);
    if (M < 2) {
      sprintf(buf1,"Will use P=%d and RMSD cutoff (%g)",P,rmsdcut);
      bUseRmsdCut = TRUE;
    } else {
      if (P >= M)
	gmx_fatal(FARGS,"Number of neighbors required (P) must be less than M");
      if (bJP_RMSD) {
	sprintf(buf1,"Will use P=%d, M=%d and RMSD cutoff (%g)",P,M,rmsdcut);
	bUseRmsdCut = TRUE;
      } else
	sprintf(buf1,"Will use P=%d, M=%d",P,M);
    }
    ffprintf1(stderr,log,buf,"%s for determining the neighbors\n\n",buf1);
  } else /* method != m_jarvis */
    bUseRmsdCut = ( bBinary || method == m_linkage || method == m_gromos );
  if (bUseRmsdCut && method != m_jarvis_patrick)
    fprintf(log,"Using RMSD cutoff %g nm\n",rmsdcut);
  if ( method==m_monte_carlo )
    fprintf(log,"Using %d iterations\n",niter);
  
  if (skip < 1)
    gmx_fatal(FARGS,"skip (%d) should be >= 1",skip);

  /* get input */
  if (bReadTraj) {
    /* don't read mass-database as masses (and top) are not used */
    read_tps_conf(ftp2fn(efTPS,NFILE,fnm),buf,&top,&ePBC,&xtps,NULL,box,
		  bAnalyze);
    
    fprintf(stderr,"\nSelect group for least squares fit%s:\n",
	    bReadMat?"":" and RMSD calculation");
    get_index(&(top.atoms),ftp2fn_null(efNDX,NFILE,fnm),
	      1,&ifsize,&fitidx,&grpname);
    if (trx_out_fn) {
      fprintf(stderr,"\nSelect group for output:\n");
      get_index(&(top.atoms),ftp2fn_null(efNDX,NFILE,fnm),
		1,&iosize,&outidx,&grpname);
      /* merge and convert both index groups: */
      /* first copy outidx to index. let outidx refer to elements in index */
      snew(index,iosize);
      isize = iosize;
      for(i=0; i<iosize; i++) {
	index[i]=outidx[i];
	outidx[i]=i;
      }
      /* now lookup elements from fitidx in index, add them if necessary
	 and also let fitidx refer to elements in index */
      for(i=0; i<ifsize; i++) {
	j=0;
	while (j<isize && index[j]!=fitidx[i])
	  j++;
	if (j>=isize) {
	  /* slow this way, but doesn't matter much */
	  isize++;
	  srenew(index,isize);
	}
	index[j]=fitidx[i];
	fitidx[i]=j;
      }
    } else { /* !trx_out_fn */
      isize = ifsize;
      snew(index, isize);
      for(i=0; i<ifsize; i++) {
	index[i]=fitidx[i];
	fitidx[i]=i;
      }
    }
  }
  /* Initiate arrays */
  snew(d1,isize);
  snew(d2,isize);
  for(i=0; (i<isize); i++) {
    snew(d1[i],isize);
    snew(d2[i],isize);
  }

  if (bReadTraj) {
    /* Loop over first coordinate file */
    fn = opt2fn("-f",NFILE,fnm);
    
    xx = read_whole_trj(fn,isize,index,skip,&nf,&time);
    convert_times(nf, time);
    if (!bRMSdist || bAnalyze) {
      /* Center all frames on zero */
      snew(mass,isize);
      for(i=0; i<ifsize; i++)
	mass[fitidx[i]] = top.atoms.atom[index[fitidx[i]]].m;
      if (bFit)
      for(i=0; i<nf; i++)
	reset_x(ifsize,fitidx,isize,NULL,xx[i],mass);
    }
  }
  if (bReadMat) {
    fprintf(stderr,"Reading rms distance matrix ");
    read_xpm_matrix(opt2fn("-dm",NFILE,fnm),&readmat);
    fprintf(stderr,"\n");
    if (readmat[0].nx != readmat[0].ny)
      gmx_fatal(FARGS,"Matrix (%dx%d) is not square",
		  readmat[0].nx,readmat[0].ny);
    if (bReadTraj && bAnalyze && (readmat[0].nx != nf))
      gmx_fatal(FARGS,"Matrix size (%dx%d) does not match the number of "
		  "frames (%d)",readmat[0].nx,readmat[0].ny,nf);

    nf = readmat[0].nx;
    sfree(time);
    time = readmat[0].axis_x;
    time_invfac = time_invfactor();
    for(i=0; i<nf; i++)
      time[i] *= time_invfac;

    rms = init_mat(readmat[0].nx,method == m_diagonalize);
    convert_mat(&(readmat[0]),rms);
    
    nlevels = readmat[0].nmap;
  } else { /* !bReadMat */
    rms = init_mat(nf,method == m_diagonalize);
    nrms = (nf*(nf-1))/2;
    if (!bRMSdist) {
      fprintf(stderr,"Computing %dx%d RMS deviation matrix\n",nf,nf);
      snew(x1,isize);
      for(i1=0; (i1<nf); i1++) {
	for(i2=i1+1; (i2<nf); i2++) {
	  for(i=0; i<isize; i++)
	    copy_rvec(xx[i1][i],x1[i]);
	  if (bFit)
	    do_fit(isize,mass,xx[i2],x1);
	  rmsd = rmsdev(isize,mass,xx[i2],x1);
	  set_mat_entry(rms,i1,i2,rmsd);
	}
	nrms -= (nf-i1-1);
	fprintf(stderr,"\r# RMSD calculations left: %d   ",nrms);
      }
    } else { /* bRMSdist */
      fprintf(stderr,"Computing %dx%d RMS distance deviation matrix\n",nf,nf);
      for(i1=0; (i1<nf); i1++) {
	calc_dist(isize,xx[i1],d1);
	for(i2=i1+1; (i2<nf); i2++) {
	  calc_dist(isize,xx[i2],d2);
	  set_mat_entry(rms,i1,i2,rms_dist(isize,d1,d2));
      }
	nrms -= (nf-i1-1);
	fprintf(stderr,"\r# RMSD calculations left: %d   ",nrms);
      }
    }
    fprintf(stderr,"\n\n");
  }
  ffprintf2(stderr,log,buf,"The RMSD ranges from %g to %g nm\n",
	    rms->minrms,rms->maxrms);
  ffprintf1(stderr,log,buf,"Average RMSD is %g\n",2*rms->sumrms/(nf*(nf-1)));
  ffprintf1(stderr,log,buf,"Number of structures for matrix %d\n",nf);
  ffprintf1(stderr,log,buf,"Energy of the matrix is %g nm\n",mat_energy(rms));
  if (bUseRmsdCut && (rmsdcut < rms->minrms || rmsdcut > rms->maxrms) )
    fprintf(stderr,"WARNING: rmsd cutoff %g is outside range of rmsd values "
	    "%g to %g\n",rmsdcut,rms->minrms,rms->maxrms);
  if (bAnalyze && (rmsmin < rms->minrms) )
    fprintf(stderr,"WARNING: rmsd minimum %g is below lowest rmsd value %g\n",
	    rmsmin,rms->minrms);
  if (bAnalyze && (rmsmin > rmsdcut) )
    fprintf(stderr,"WARNING: rmsd minimum %g is above rmsd cutoff %g\n",
	    rmsmin,rmsdcut);
  
  /* Plot the rmsd distribution */
  rmsd_distribution(opt2fn("-dist",NFILE,fnm),rms);
  
  if (bBinary) {
    for(i1=0; (i1 < nf); i1++) 
      for(i2=0; (i2 < nf); i2++)
	if (rms->mat[i1][i2] < rmsdcut)
	  rms->mat[i1][i2] = 0;
	else
	  rms->mat[i1][i2] = 1;
  }

  snew(clust.cl,nf);
  switch (method) {
  case m_linkage: 
    /* Now sort the matrix and write it out again */
    gather(rms,rmsdcut,&clust);
    break;
  case m_diagonalize:
    /* Do a diagonalization */
      snew(eigval,nf);
      snew(tmp,nf*nf);
      memcpy(tmp,rms->mat[0],nf*nf*sizeof(real));
      eigensolver(tmp,nf,0,nf,eigval,rms->mat[0]);
      sfree(tmp);
      
      fp = xvgropen(opt2fn("-ev",NFILE,fnm),"RMSD matrix Eigenvalues",
                    "Eigenvector index","Eigenvalues (nm\\S2\\N)");
      for(i=0; (i<nf); i++)
          fprintf(fp,"%10d  %10g\n",i,eigval[i]);
          ffclose(fp);
      break;
  case m_monte_carlo:
    mc_optimize(log,rms,niter,&seed,kT);
    swap_mat(rms);
    reset_index(rms);
    break;
  case m_jarvis_patrick:
    jarvis_patrick(rms->nn,rms->mat,M,P,bJP_RMSD ? rmsdcut : -1,&clust);
    break;
  case m_gromos:
    gromos(rms->nn,rms->mat,rmsdcut,&clust);
    break;
  default:
    gmx_fatal(FARGS,"DEATH HORROR unknown method \"%s\"",methodname[0]);
  }
  
  if (method == m_monte_carlo || method == m_diagonalize)
    fprintf(stderr,"Energy of the matrix after clustering is %g nm\n",
	    mat_energy(rms));
  
  if (bAnalyze) {
    if (minstruct > 1) {
      ncluster = plot_clusters(nf,rms->mat,&clust,nlevels,minstruct);
    } else {
      mark_clusters(nf,rms->mat,rms->maxrms,&clust);
    }
    init_t_atoms(&useatoms,isize,FALSE);
    snew(usextps, isize);
    useatoms.resname=top.atoms.resname;
    for(i=0; i<isize; i++) {
      useatoms.atomname[i]=top.atoms.atomname[index[i]];
      useatoms.atom[i].resnr=top.atoms.atom[index[i]].resnr;
      useatoms.nres=max(useatoms.nres,useatoms.atom[i].resnr+1);
      copy_rvec(xtps[index[i]],usextps[i]);
    }
    useatoms.nr=isize;
    analyze_clusters(nf,&clust,rms->mat,isize,&useatoms,usextps,mass,xx,time,
		     ifsize,fitidx,iosize,outidx,
		     bReadTraj?trx_out_fn:NULL,
		     opt2fn_null("-sz",NFILE,fnm),
		     opt2fn_null("-tr",NFILE,fnm),
		     opt2fn_null("-ntr",NFILE,fnm),
		     opt2fn_null("-clid",NFILE,fnm),
		     bAverage, write_ncl, write_nst, rmsmin, bFit, log,
		     rlo_bot,rhi_bot);
  }
  ffclose(log);
  
  if (bBinary && !bAnalyze)
    /* Make the clustering visible */
    for(i2=0; (i2 < nf); i2++)
       for(i1=i2+1; (i1 < nf); i1++)
	 if (rms->mat[i1][i2])
	   rms->mat[i1][i2] = rms->maxrms;

  fp = opt2FILE("-o",NFILE,fnm,"w");
  fprintf(stderr,"Writing rms distance/clustering matrix ");
  if (bReadMat) {
    write_xpm(fp,0,readmat[0].title,readmat[0].legend,readmat[0].label_x,
	      readmat[0].label_y,nf,nf,readmat[0].axis_x,readmat[0].axis_y,
	      rms->mat,0.0,rms->maxrms,rlo_top,rhi_top,&nlevels);
  } 
  else {
    sprintf(buf,"Time (%s)",time_unit());
    sprintf(title,"RMS%sDeviation / Cluster Index",
 	    bRMSdist ? " Distance " : " ");
    if (minstruct > 1) {
      write_xpm_split(fp,0,title,"RMSD (nm)",buf,buf,
		      nf,nf,time,time,rms->mat,0.0,rms->maxrms,&nlevels,
		      rlo_top,rhi_top,0.0,(real) ncluster,
		      &ncluster,TRUE,rlo_bot,rhi_bot);
    } else {
      write_xpm(fp,0,title,"RMSD (nm)",buf,buf,
		nf,nf,time,time,rms->mat,0.0,rms->maxrms,
		rlo_top,rhi_top,&nlevels);
    }
  }
  fprintf(stderr,"\n");
  ffclose(fp);
  
  /* now show what we've done */
  do_view(opt2fn("-o",NFILE,fnm),"-nxy");
  do_view(opt2fn_null("-sz",NFILE,fnm),"-nxy");
  if (method == m_diagonalize)
    do_view(opt2fn_null("-ev",NFILE,fnm),"-nxy");
  do_view(opt2fn("-dist",NFILE,fnm),"-nxy");
  if (bAnalyze) {
    do_view(opt2fn_null("-tr",NFILE,fnm),"-nxy");
    do_view(opt2fn_null("-ntr",NFILE,fnm),"-nxy");
    do_view(opt2fn_null("-clid",NFILE,fnm),"-nxy");
  }
  
  /* Thank the user for her patience */  
  thanx(stderr);
  
  return 0;
}
Ejemplo n.º 13
0
int main(int argc, char** argv)
{
	double serial_time, openCL_time, start_time;
	cl_int err;
	cl_platform_id* platforms = NULL;
	char platform_name[1024];
	cl_device_id device_id = NULL;
	cl_uint	num_of_platforms = 0;
	cl_uint num_of_devices = 0;
	cl_context context;
	cl_kernel kernel;
	cl_command_queue command_queue;
	cl_program program;
	cl_mem input1, input2, input3, output;
	float **A, **B, **C, **serialC;	// matrices
	int d1, d2, d3;         // dimensions of matrices

							/* print user instruction */
	if (argc != 4)
	{
		printf("Matrix multiplication: C = A x B\n");
		printf("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]);
		return 0;
	}

	/* read user input */
	d1 = 1000;		// rows of A and C
	d2 = 1000;     // cols of A and rows of B
	d3 = 1000;     // cols of B and C
	int d[4] = { 0, d1, d2, d3 };
	size_t global[1] = { (size_t)d1*d3 };

	printf("Matrix sizes C[%d][%d] = A[%d][%d] x B[%d][%d]\n", d1, d3, d1, d2, d2, d3);

	/* prepare matrices */
	A = alloc_mat(d1, d2);
	init_mat(A, d1, d2);
	B = alloc_mat(d2, d3);
	init_mat(B, d2, d3);
	C = alloc_mat(d1, d3);
	serialC = alloc_mat(d1, d3);

	err = clGetPlatformIDs(0, NULL, &num_of_platforms);
	if (err != CL_SUCCESS) {
		printf("No platforms found. Error: %d\n", err);
		return 0;
	}

	platforms = (cl_platform_id *)malloc(num_of_platforms);
	err = clGetPlatformIDs(num_of_platforms, platforms, NULL);
	if (err != CL_SUCCESS) {
		printf("No platforms found. Error: %d\n", err);
		return 0;
	}
	else {
		int nvidia_platform = 0;
		for (unsigned int i = 0; i<num_of_platforms; i++) {
			clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL);
			if (err != CL_SUCCESS) {
				printf("Could not get information about platform. Error: %d\n", err);
				return 0;
			}
			if (strstr(platform_name, "NVIDIA") != NULL) {
				nvidia_platform = i;
				break;
			}
		}
		err = clGetDeviceIDs(platforms[nvidia_platform], CL_DEVICE_TYPE_GPU, 1, &device_id, &num_of_devices);
		if (err != CL_SUCCESS) {
			printf("Could not get device in platform. Error: %d\n", err);
			return 0;
		}
	}

	context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
	if (err != CL_SUCCESS) {
		printf("Unable to create context. Error: %d\n", err);
		return 0;
	}

	command_queue = clCreateCommandQueue(context, device_id, 0, &err);
	if (err != CL_SUCCESS) {
		printf("Unable to create command queue. Error: %d\n", err);
		return 0;
	}

	program = clCreateProgramWithSource(context, 1, (const char **)&KernelSource, NULL, &err);
	if (err != CL_SUCCESS) {
		printf("Unable to create program. Error: %d\n", err);
		return 0;
	}

	if (clBuildProgram(program, 0, NULL, NULL, NULL, NULL) != CL_SUCCESS) {
		char *log;
		size_t size;
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size); // 1. Länge des Logbuches?
		log = (char *)malloc(size + 1);
		if (log) {
			clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL); // 2. Hole das Logbuch ab
			log[size] = '\0';
			printf("%s", log);
			free(log);
		}
		return 1;
	}


	err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
	if (err != CL_SUCCESS) {
		printf("Error building program. Error: %d\n", err);
		return 0;
	}


	kernel = clCreateKernel(program, "matmult_ocl", &err);
	if (err != CL_SUCCESS) {
		printf("Error setting kernel. Error: %d\n", err);
		return 0;
	}

	input1 = clCreateBuffer(context, CL_MEM_READ_ONLY, d1*d2*sizeof(float), NULL, &err);
	input2 = clCreateBuffer(context, CL_MEM_READ_ONLY, d2*d3*sizeof(float), NULL, &err);
	input3 = clCreateBuffer(context, CL_MEM_READ_ONLY, 4 * sizeof(int), NULL, &err);

	output = clCreateBuffer(context, CL_MEM_READ_WRITE, d1*d3*sizeof(float), NULL, &err);

	start_time = omp_get_wtime();

	clEnqueueWriteBuffer(command_queue, input1, CL_TRUE, 0, d1*d2*sizeof(float), *A, 0, NULL, NULL);
	clEnqueueWriteBuffer(command_queue, input2, CL_TRUE, 0, d2*d3*sizeof(float), *B, 0, NULL, NULL);
	clEnqueueWriteBuffer(command_queue, input3, CL_TRUE, 0, 4 * sizeof(int), d, 0, NULL, NULL);

	clSetKernelArg(kernel, 0, sizeof(cl_mem), &input1);
	clSetKernelArg(kernel, 1, sizeof(cl_mem), &input2);
	clSetKernelArg(kernel, 2, sizeof(cl_mem), &input3);
	clSetKernelArg(kernel, 3, sizeof(cl_mem), &output);

	clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);

	clFinish(command_queue);

	clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, d1*d3*sizeof(float), *C, 0, NULL, NULL);
	// for (unsigned int i = 0; i < (unsigned int) d1*d3; i++)
	//	printf("%f\n", C[0][i]);

	openCL_time = omp_get_wtime() - start_time;

	clReleaseMemObject(input1);
	clReleaseMemObject(input2);
	clReleaseMemObject(input3);
	clReleaseMemObject(output);
	clReleaseProgram(program);
	clReleaseKernel(kernel);
	clReleaseCommandQueue(command_queue);
	clReleaseContext(context);

	printf("Running serial algorithm...\n");
	start_time = omp_get_wtime();
	serialC = mult_mat(A, B, d1, d2, d3);
	serial_time = omp_get_wtime() - start_time;

	printf("Checking results... ");
	is_correct(C, serialC, d1, d3);

	printf("Showing stats...\n");
	printf("   serial runtime = %f\n", serial_time);
	printf("   OpenCL runtime = %f\n", openCL_time);
	printf("   Speedup = %f\n", serial_time / openCL_time);
	return 0;
}