예제 #1
0
int main(int argc, char** argv)
{
  // timer
  struct timeval st, et;
  float gputime = 0.0, cputime = 0.0;

  // read Sparse Matrix from file or generate
  if (argc < 2 || argc > 4) {
      printf("Correct Usage: <executable> <input matrix file>\n");
      exit(-1);
  }

  // init the network
  agile::NetworkEnvironment environment(argc, argv);

  // allocate a GPU
  typedef agile::GPUCommunicator<unsigned, float, float> communicator_type;
  communicator_type com;
  com.allocateGPU();

  char spmfileName[256];
  strcpy(spmfileName, argv[1]);
  if (!fileIsReadable(spmfileName))
  {
    printf("Non-existent input matrix file\n");
    exit(-1);
  }

  unsigned m_num_rows, m_num_cols;
  std::vector<unsigned> m_row_nnz;
  std::vector<unsigned> m_column_index;
  std::vector<float> m_data;

  // read in matrix from matrix-market file
  readSparseMatrix(spmfileName, 0, m_num_rows, m_num_cols, m_row_nnz,
    m_column_index, m_data);

  std::cout << m_num_rows << "\t" << m_num_cols << "\t";
/*
  PRINT_VEC("m_row_nnz", m_row_nnz);
  PRINT_VEC("m_column_index", m_column_index);
  PRINT_VEC("m_data", m_data);
*/

  // init gpu matrix
  agile::GPUCSMatrix<float> A(m_row_nnz, m_column_index, m_data);

  // init random vector
  std::vector<float> x_host(m_num_cols, 0);
  srand(time(NULL));
  for (unsigned i=0; i<m_num_cols; ++i)
    x_host[i] = rand() / (float)RAND_MAX;

//PRINT_VEC("RANDOM X VECTOR", x_host);

  // init gpu vector
  agile::GPUVector<float> x(m_num_cols);
  x.assignFromHost(x_host.begin(), x_host.end());

  // init result gpu vector: y
  agile::GPUVector<float> y(m_num_rows);

  // start time
  gettimeofday(&st, NULL);

  for (unsigned t=0; t<NUM_ITER; ++t)
  {
    // gpu multiplication
    agile::multiply(A, x, y);

    cudaThreadSynchronize();
  }

  // stop time
  gettimeofday(&et, NULL);
  gputime = ((et.tv_sec-st.tv_sec)*1000.0 + (et.tv_usec - st.tv_usec)/1000.0)/NUM_ITER;

  // transfer GPU multiplication result back to cpu
  std::vector<float> y_host;
  y.copyToHost(y_host);


  //----------------- CPU computation from ibm demo ---------------------------
  SpMatrix m;
  readSparseMatrix(&m, spmfileName, 0);
  unsigned int numNonZeroElements = m.numNZEntries;
  unsigned int memSize_row = sizeof(float) * m_num_rows;

  // allocate host memory
  float* h_x = (float*) malloc(memSize_row); 

  #if PADDED_CSR
    float *h_val;
    unsigned int *h_indices, *h_rowIndices;
    genPaddedCSRFormat(&m, &h_val, &h_rowIndices, &h_indices);
  #else
    float* h_val = (float*) malloc(sizeof(float)*numNonZeroElements);
    unsigned int* h_indices = (unsigned int*) malloc(sizeof(int)*numNonZeroElements);
    unsigned int* h_rowIndices = (unsigned int*) malloc(sizeof(int)*(m_num_rows+1));
    genCSRFormat(&m, h_val, h_rowIndices, h_indices);
  #endif

  // CPU REFERENCE
  float* reference = (float*) malloc(memSize_row);
#if EXEC_CPU
  #if TIMER
  gettimeofday(&st, NULL);
  #endif
  // compute reference solution
  #if BCSR
  float *val;
  unsigned int *rowIndices, *indices;
  unsigned int numblocks;
  genBCSRFormat(&m, &val, &rowIndices, &indices, &numblocks, BCSR_r, BCSR_c);
  computeSpMV_BCSR(reference, val, rowIndices, indices, &(x_host[0]), m_num_rows, m_num_cols, BCSR_r, BCSR_c);
  #else
  computeSpMV(reference, h_val, h_rowIndices, h_indices, &(x_host[0]), m_num_rows);
  #endif
  #if TIMER
  gettimeofday(&et, NULL);
  cputime = (et.tv_sec-st.tv_sec)*1000.0 + (et.tv_usec - st.tv_usec)/1000.0;
  #endif
#endif

  float flops= ((numNonZeroElements * 2) / (gputime*1000000));
  //printf("GPU (ms) \tCPU (ms) \tGFLOPS\n");
  printf("%f\t%f\t%f\t", gputime, cputime, flops);

#if VERIFY
  // check result
  float error_norm, ref_norm, diff;
  error_norm = 0;
  ref_norm = 0;
  for (unsigned i = 0; i < m_num_rows; ++i) {
      diff = reference[i] - y_host[i];
      error_norm += diff * diff;
      ref_norm += reference[i] * reference[i];
  }
  error_norm = (float)sqrt((double)error_norm);
  ref_norm = (float)sqrt((double)ref_norm);

  if (fabs(ref_norm) < 1e-7)
    printf ("Test FAILED");
  else
    printf( "Test %s", ((error_norm / ref_norm) < 1e-6f) ? "PASSED" : "FAILED");

#endif

  free(reference);
  free(h_x);
  #if !PADDED_CSR
    free(h_val);
    free(h_indices);
    free(h_rowIndices);
  #endif

  return 0;
}
예제 #2
0
파일: axlcomp.c 프로젝트: nilqed/aldor
/*
 * Compile files controlled by the argument vector and
 * return the total error count.
 */
int
compFilesLoop(int argc, char **argv)
{
	int		i, iargc, totErrors, nErrors;
	FileName	fn;
	Bool		isSolo;
 
	compInit();

	iargc = cmdArguments(1, argc, argv);
 
	argc -= iargc;
	argv += iargc;
	if (argc == 0) {
		if (comsgOkBreakLoop())
			bloopMsgFPrintf(osStdout, ALDOR_W_NoFiles, cmdName);
		comsgWarning(NULL, ALDOR_W_NoFiles, cmdName);
	}
	emitDoneOptions(argc, argv);
	ccGetReady();
 
	isSolo    = (cmdFileCount == 1);
 
	compFinfov = (EmitInfo *) stoAlloc((unsigned) OB_Other,
					   (cmdFileCount+1) * sizeof(EmitInfo));
	for (i = 0; i <= cmdFileCount; i += 1) compFinfov[i] = 0;
 
	totErrors = 0;
	for (i = 0; i < cmdFileCount; i++) {
		fn = fnameParse(argv[i]);
		compFinfov[i] = emitInfoNew(fn);
		nErrors = 0;
 
		if (!fileIsReadable(fn)) {
			if (comsgOkBreakLoop())
				bloopMsgFPrintf(osStdout, ALDOR_F_CantOpen, argv[i]);
			comsgFatal(NULL, ALDOR_F_CantOpen, argv[i]);
		}
 
		switch (ftypeNo(fnameType(fn))) {
#if 0
		case FTYPENO_C:
			nErrors = compCFile(compFinfov[i]);
			break;
#endif
		case FTYPENO_OBJECT:
		case FTYPENO_AR_OBJ:
		case FTYPENO_AR_INT:
			break;
		case FTYPENO_FOAMEXPR:
		case FTYPENO_INTERMED:
			if (!isSolo) fprintf(osStdout, "\n%s:\n", argv[i]);
			nErrors = compSavedFile(compFinfov[i]);
			break;
		default:
			if (!ftypeEqual(fnameType(fn), "")) {
				if (comsgOkBreakLoop())
					bloopMsgFPrintf(osStdout,
							ALDOR_F_BadFType,
							argv[i],
							fnameType(fn),
							FTYPE_SRC);
				comsgFatal(NULL, ALDOR_F_BadFType, argv[i],
					   fnameType(fn), FTYPE_SRC);
			}
			/* Fall through. */
		case FTYPENO_NONE:
		case FTYPENO_SRC:
		case FTYPENO_INCLUDED:
		case FTYPENO_ABSYN:
		case FTYPENO_OLDABSYN:
			if (!isSolo) fprintf(osStdout, "\n%s:\n", argv[i]);
			nErrors = compSourceFile(compFinfov[i]);
			break;
		}
		totErrors += nErrors;
		fnameFree(fn);
	}
 
	if (cmdFileCount > 0 && totErrors == 0) {
		compFinfov[cmdFileCount] = emitInfoNewAXLmain();
		compAXLmainFile(compFinfov[cmdFileCount]);
		emitLink(cmdFileCount + 1, compFinfov);
		argc -= cmdFileCount;
		argv += cmdFileCount;
		emitInterp(argc, argv);
		emitRun   (argc, argv);
	}
	if (totErrors > 0) emitAllDone();
 
	for (i = 0; i < cmdFileCount + 1; i++) emitInfoFree(compFinfov[i]);
	stoFree((Pointer) compFinfov);
	compFinfov = 0;

	if (!isSolo) phGrandTotals(cmdVerboseFlag);
	compFini();

	return totErrors;
}
예제 #3
0
파일: cmdline.c 프로젝트: nilqed/aldor
/*
 * Subsume options in response file into argument vector.
 * Only the slots 'argi0..*pargc-1 are treated as arguments.
 */
Bool
cmdSubsumeResponseFiles(int argi0, int *pargc, String **pargv)
{
	int 	nresps = 0, i;
	String 	*argv;
	String 	envopts;

	assert(*pargc >= 1);

	/* Copy the original args into a new r/w vector.
	 * The extra slot is potentially used in handling AXIOMXLARGS.
	 */
	argv = (String *) stoAlloc(OB_Other, (*pargc+1) * sizeof(String *));
	for (i = 0; i < *pargc; i++)
		argv[i] = strCopy((*pargv)[i]);
	*pargv  = argv;
 
	/* Check for ALDORARGS/AXIOMXLARGS environment variable. */
	envopts = osGetEnv("ALDORARGS");
	if (!envopts) envopts = osGetEnv("AXIOMXLARGS");
	if (envopts) {
		envopts = strCopy(envopts);
		for (i = *pargc - 1; i >= argi0; i--)
			(*pargv)[i+1] = (*pargv)[i];
		(*pargv)[argi0] = strCopy("-aFake");
		(*pargc)++;
		cmdOneResponse(pargc, pargv, envopts, argi0, argi0+1);
		nresps++;
		strFree(envopts);
	}

	while (cmdHasOption('a', NULL, *pargc, *pargv)) {
		String	 fileName = 0, fileText;
		FileName fn;
		int	 opt = 0, nextArg, startArg, j;
 
		for (nextArg = argi0, j = 0; ; ) {
			startArg = nextArg;
			opt = cmdGetOption(*pargc,*pargv,
					   &nextArg,&j,&fileName);

			/* Have response file option by itself. */
			if (optIs(opt, 'a'))
				break;
 
			/* Next option is response file option. Must be last */
			else if (j > 0 && optIs((*pargv)[startArg][j], 'a')) {
				int oldStartArg = startArg, oldJ = j;
				startArg = nextArg;
				opt = cmdGetOption(*pargc,*pargv,
						   &nextArg,&j,&fileName);
				(*pargv)[oldStartArg][oldJ] = '\0';
				++startArg;
				break;
			}
		}
 
		if (! fileName || ! fileName[0])
			/* Can't use cmdUseError here - no msg db! */
			comsgFatal(NULL,ALDOR_F_CmdBadOption,"-a",cmdName);
 
		fn = fnameParse(fileName);
		if (! fileIsReadable(fn))
			comsgFatal(NULL, ALDOR_F_CantOpen, fileName);
 
		/* We now have an existing response file */
		fileText = fileContentsString(fn);
		cmdOneResponse(pargc, pargv, fileText, startArg, nextArg);
		fileFreeContentsString(fileText);

		nresps++;
	}
 
	return nresps > 0;
}