int main(int argc, char** argv) { // timer struct timeval st, et; float gputime = 0.0, cputime = 0.0; // read Sparse Matrix from file or generate if (argc < 2 || argc > 4) { printf("Correct Usage: <executable> <input matrix file>\n"); exit(-1); } // init the network agile::NetworkEnvironment environment(argc, argv); // allocate a GPU typedef agile::GPUCommunicator<unsigned, float, float> communicator_type; communicator_type com; com.allocateGPU(); char spmfileName[256]; strcpy(spmfileName, argv[1]); if (!fileIsReadable(spmfileName)) { printf("Non-existent input matrix file\n"); exit(-1); } unsigned m_num_rows, m_num_cols; std::vector<unsigned> m_row_nnz; std::vector<unsigned> m_column_index; std::vector<float> m_data; // read in matrix from matrix-market file readSparseMatrix(spmfileName, 0, m_num_rows, m_num_cols, m_row_nnz, m_column_index, m_data); std::cout << m_num_rows << "\t" << m_num_cols << "\t"; /* PRINT_VEC("m_row_nnz", m_row_nnz); PRINT_VEC("m_column_index", m_column_index); PRINT_VEC("m_data", m_data); */ // init gpu matrix agile::GPUCSMatrix<float> A(m_row_nnz, m_column_index, m_data); // init random vector std::vector<float> x_host(m_num_cols, 0); srand(time(NULL)); for (unsigned i=0; i<m_num_cols; ++i) x_host[i] = rand() / (float)RAND_MAX; //PRINT_VEC("RANDOM X VECTOR", x_host); // init gpu vector agile::GPUVector<float> x(m_num_cols); x.assignFromHost(x_host.begin(), x_host.end()); // init result gpu vector: y agile::GPUVector<float> y(m_num_rows); // start time gettimeofday(&st, NULL); for (unsigned t=0; t<NUM_ITER; ++t) { // gpu multiplication agile::multiply(A, x, y); cudaThreadSynchronize(); } // stop time gettimeofday(&et, NULL); gputime = ((et.tv_sec-st.tv_sec)*1000.0 + (et.tv_usec - st.tv_usec)/1000.0)/NUM_ITER; // transfer GPU multiplication result back to cpu std::vector<float> y_host; y.copyToHost(y_host); //----------------- CPU computation from ibm demo --------------------------- SpMatrix m; readSparseMatrix(&m, spmfileName, 0); unsigned int numNonZeroElements = m.numNZEntries; unsigned int memSize_row = sizeof(float) * m_num_rows; // allocate host memory float* h_x = (float*) malloc(memSize_row); #if PADDED_CSR float *h_val; unsigned int *h_indices, *h_rowIndices; genPaddedCSRFormat(&m, &h_val, &h_rowIndices, &h_indices); #else float* h_val = (float*) malloc(sizeof(float)*numNonZeroElements); unsigned int* h_indices = (unsigned int*) malloc(sizeof(int)*numNonZeroElements); unsigned int* h_rowIndices = (unsigned int*) malloc(sizeof(int)*(m_num_rows+1)); genCSRFormat(&m, h_val, h_rowIndices, h_indices); #endif // CPU REFERENCE float* reference = (float*) malloc(memSize_row); #if EXEC_CPU #if TIMER gettimeofday(&st, NULL); #endif // compute reference solution #if BCSR float *val; unsigned int *rowIndices, *indices; unsigned int numblocks; genBCSRFormat(&m, &val, &rowIndices, &indices, &numblocks, BCSR_r, BCSR_c); computeSpMV_BCSR(reference, val, rowIndices, indices, &(x_host[0]), m_num_rows, m_num_cols, BCSR_r, BCSR_c); #else computeSpMV(reference, h_val, h_rowIndices, h_indices, &(x_host[0]), m_num_rows); #endif #if TIMER gettimeofday(&et, NULL); cputime = (et.tv_sec-st.tv_sec)*1000.0 + (et.tv_usec - st.tv_usec)/1000.0; #endif #endif float flops= ((numNonZeroElements * 2) / (gputime*1000000)); //printf("GPU (ms) \tCPU (ms) \tGFLOPS\n"); printf("%f\t%f\t%f\t", gputime, cputime, flops); #if VERIFY // check result float error_norm, ref_norm, diff; error_norm = 0; ref_norm = 0; for (unsigned i = 0; i < m_num_rows; ++i) { diff = reference[i] - y_host[i]; error_norm += diff * diff; ref_norm += reference[i] * reference[i]; } error_norm = (float)sqrt((double)error_norm); ref_norm = (float)sqrt((double)ref_norm); if (fabs(ref_norm) < 1e-7) printf ("Test FAILED"); else printf( "Test %s", ((error_norm / ref_norm) < 1e-6f) ? "PASSED" : "FAILED"); #endif free(reference); free(h_x); #if !PADDED_CSR free(h_val); free(h_indices); free(h_rowIndices); #endif return 0; }
/* * Compile files controlled by the argument vector and * return the total error count. */ int compFilesLoop(int argc, char **argv) { int i, iargc, totErrors, nErrors; FileName fn; Bool isSolo; compInit(); iargc = cmdArguments(1, argc, argv); argc -= iargc; argv += iargc; if (argc == 0) { if (comsgOkBreakLoop()) bloopMsgFPrintf(osStdout, ALDOR_W_NoFiles, cmdName); comsgWarning(NULL, ALDOR_W_NoFiles, cmdName); } emitDoneOptions(argc, argv); ccGetReady(); isSolo = (cmdFileCount == 1); compFinfov = (EmitInfo *) stoAlloc((unsigned) OB_Other, (cmdFileCount+1) * sizeof(EmitInfo)); for (i = 0; i <= cmdFileCount; i += 1) compFinfov[i] = 0; totErrors = 0; for (i = 0; i < cmdFileCount; i++) { fn = fnameParse(argv[i]); compFinfov[i] = emitInfoNew(fn); nErrors = 0; if (!fileIsReadable(fn)) { if (comsgOkBreakLoop()) bloopMsgFPrintf(osStdout, ALDOR_F_CantOpen, argv[i]); comsgFatal(NULL, ALDOR_F_CantOpen, argv[i]); } switch (ftypeNo(fnameType(fn))) { #if 0 case FTYPENO_C: nErrors = compCFile(compFinfov[i]); break; #endif case FTYPENO_OBJECT: case FTYPENO_AR_OBJ: case FTYPENO_AR_INT: break; case FTYPENO_FOAMEXPR: case FTYPENO_INTERMED: if (!isSolo) fprintf(osStdout, "\n%s:\n", argv[i]); nErrors = compSavedFile(compFinfov[i]); break; default: if (!ftypeEqual(fnameType(fn), "")) { if (comsgOkBreakLoop()) bloopMsgFPrintf(osStdout, ALDOR_F_BadFType, argv[i], fnameType(fn), FTYPE_SRC); comsgFatal(NULL, ALDOR_F_BadFType, argv[i], fnameType(fn), FTYPE_SRC); } /* Fall through. */ case FTYPENO_NONE: case FTYPENO_SRC: case FTYPENO_INCLUDED: case FTYPENO_ABSYN: case FTYPENO_OLDABSYN: if (!isSolo) fprintf(osStdout, "\n%s:\n", argv[i]); nErrors = compSourceFile(compFinfov[i]); break; } totErrors += nErrors; fnameFree(fn); } if (cmdFileCount > 0 && totErrors == 0) { compFinfov[cmdFileCount] = emitInfoNewAXLmain(); compAXLmainFile(compFinfov[cmdFileCount]); emitLink(cmdFileCount + 1, compFinfov); argc -= cmdFileCount; argv += cmdFileCount; emitInterp(argc, argv); emitRun (argc, argv); } if (totErrors > 0) emitAllDone(); for (i = 0; i < cmdFileCount + 1; i++) emitInfoFree(compFinfov[i]); stoFree((Pointer) compFinfov); compFinfov = 0; if (!isSolo) phGrandTotals(cmdVerboseFlag); compFini(); return totErrors; }
/* * Subsume options in response file into argument vector. * Only the slots 'argi0..*pargc-1 are treated as arguments. */ Bool cmdSubsumeResponseFiles(int argi0, int *pargc, String **pargv) { int nresps = 0, i; String *argv; String envopts; assert(*pargc >= 1); /* Copy the original args into a new r/w vector. * The extra slot is potentially used in handling AXIOMXLARGS. */ argv = (String *) stoAlloc(OB_Other, (*pargc+1) * sizeof(String *)); for (i = 0; i < *pargc; i++) argv[i] = strCopy((*pargv)[i]); *pargv = argv; /* Check for ALDORARGS/AXIOMXLARGS environment variable. */ envopts = osGetEnv("ALDORARGS"); if (!envopts) envopts = osGetEnv("AXIOMXLARGS"); if (envopts) { envopts = strCopy(envopts); for (i = *pargc - 1; i >= argi0; i--) (*pargv)[i+1] = (*pargv)[i]; (*pargv)[argi0] = strCopy("-aFake"); (*pargc)++; cmdOneResponse(pargc, pargv, envopts, argi0, argi0+1); nresps++; strFree(envopts); } while (cmdHasOption('a', NULL, *pargc, *pargv)) { String fileName = 0, fileText; FileName fn; int opt = 0, nextArg, startArg, j; for (nextArg = argi0, j = 0; ; ) { startArg = nextArg; opt = cmdGetOption(*pargc,*pargv, &nextArg,&j,&fileName); /* Have response file option by itself. */ if (optIs(opt, 'a')) break; /* Next option is response file option. Must be last */ else if (j > 0 && optIs((*pargv)[startArg][j], 'a')) { int oldStartArg = startArg, oldJ = j; startArg = nextArg; opt = cmdGetOption(*pargc,*pargv, &nextArg,&j,&fileName); (*pargv)[oldStartArg][oldJ] = '\0'; ++startArg; break; } } if (! fileName || ! fileName[0]) /* Can't use cmdUseError here - no msg db! */ comsgFatal(NULL,ALDOR_F_CmdBadOption,"-a",cmdName); fn = fnameParse(fileName); if (! fileIsReadable(fn)) comsgFatal(NULL, ALDOR_F_CantOpen, fileName); /* We now have an existing response file */ fileText = fileContentsString(fn); cmdOneResponse(pargc, pargv, fileText, startArg, nextArg); fileFreeContentsString(fileText); nresps++; } return nresps > 0; }