/** * Compare results * * @param argv Commmand line parameters * * @relates Compare * @ingroup pwglf_forward_tracklets */ void CompareResults(const char** argv) { TString newFile; TString oldFile; TString newTit(""); TString oldTit(""); const char** ptr = argv; while ((*ptr)) { TString argi = *ptr; ptr++; if (argi.Contains("help")) { Printf("Usage: CompareResults AFILE BFILE [ATITLTE [BTITLE]]"); return; } if (argi.Contains("CompareResults.C")) continue; if (argi.BeginsWith("-")) continue; if (argi.EndsWith(".root")) { if (newFile.IsNull()) newFile = argi; else oldFile = argi; } else { if (newTit.IsNull()) newTit = argi; else oldTit = argi; } } if (newTit.IsNull()) newTit = "New"; if (oldTit.IsNull()) oldTit = "Old"; CompareResults(newFile, oldFile, newTit, oldTit); }
void CIndex::SortResults(CSearchObject& SearchObject, CSortType SortType, int l, int r) { int i = l; int j = r; int pivot = SearchObject.m_SearchData.m_Results[(l+r)/2]; while (i<=j) { while(CompareResults(SortType, SearchObject.m_SearchData.m_Results[i], pivot) < 0) i++; while(CompareResults(SortType, SearchObject.m_SearchData.m_Results[j], pivot) > 0) j--; if (i<=j){ int t = SearchObject.m_SearchData.m_Results[i]; int tt = SearchObject.m_SearchData.m_ResultsPositions[i]; SearchObject.m_SearchData.m_Results[i] = SearchObject.m_SearchData.m_Results[j]; SearchObject.m_SearchData.m_ResultsPositions[i] = SearchObject.m_SearchData.m_ResultsPositions[j]; SearchObject.m_SearchData.m_Results[j] = t; SearchObject.m_SearchData.m_ResultsPositions[j] = tt; i++; j--; } } if (l < j) SortResults(SearchObject, SortType, l, j); if (i < r) SortResults(SearchObject, SortType, i, r); }
//____________________________________________________________________ Int_t multidimfit(bool doFit = true) { cout << "*************************************************" << endl; cout << "* Multidimensional Fit *" << endl; cout << "* *" << endl; cout << "* By Christian Holm <*****@*****.**> 14/10/00 *" << endl; cout << "*************************************************" << endl; cout << endl; // Initialize global TRannom object. gRandom = new TRandom(); // Open output file TFile* output = new TFile("mdf.root", "RECREATE"); // Global data parameters Int_t nVars = 4; Int_t nData = 500; Double_t x[4]; // make fit object and set parameters on it. TMultiDimFit* fit = new TMultiDimFit(nVars, TMultiDimFit::kMonomials,"v"); Int_t mPowers[] = { 6 , 6, 6, 6 }; fit->SetMaxPowers(mPowers); fit->SetMaxFunctions(1000); fit->SetMaxStudy(1000); fit->SetMaxTerms(30); fit->SetPowerLimit(1); fit->SetMinAngle(10); fit->SetMaxAngle(10); fit->SetMinRelativeError(.01); // variables to hold the temporary input data Double_t d; Double_t e; // Print out the start parameters fit->Print("p"); printf("======================================\n"); // Create training sample Int_t i; for (i = 0; i < nData ; i++) { // Make some data makeData(x,d,e); // Add the row to the fit object fit->AddRow(x,d,e); } // Print out the statistics fit->Print("s"); // Book histograms fit->MakeHistograms(); // Find the parameterization fit->FindParameterization(); // Print coefficents fit->Print("rc"); // Get the min and max of variables from the training sample, used // for cuts in test sample. Double_t *xMax = new Double_t[nVars]; Double_t *xMin = new Double_t[nVars]; for (i = 0; i < nVars; i++) { xMax[i] = (*fit->GetMaxVariables())(i); xMin[i] = (*fit->GetMinVariables())(i); } nData = fit->GetNCoefficients() * 100; Int_t j; // Create test sample for (i = 0; i < nData ; i++) { // Make some data makeData(x,d,e); for (j = 0; j < nVars; j++) if (x[j] < xMin[j] || x[j] > xMax[j]) break; // If we get through the loop above, all variables are in range if (j == nVars) // Add the row to the fit object fit->AddTestRow(x,d,e); else i--; } //delete gRandom; // Test the parameterizatio and coefficents using the test sample. if (doFit) fit->Fit("M"); // Print result fit->Print("fc v"); // Write code to file fit->MakeCode(); // Write histograms to disk, and close file output->Write(); output->Close(); delete output; // Compare results with reference run Int_t compare = CompareResults(fit, doFit); if (!compare) { printf("\nmultidimfit .............................................. OK\n"); } else { printf("\nmultidimfit .............................................. fails case %d\n",compare); } // We're done delete fit; return compare; }
int main(int argc, char **argv) { l_int32 i, j, k, w, h, w2, w4, w8, w16, w32, wpl; l_int32 count1, count2, count3; l_uint32 val32, val1, val2; l_uint32 *data1, *line1, *data2, *line2; void **lines1, **linet1, **linet2; PIX *pixs, *pix1, *pix2; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; pixs = pixRead("feyn-fract.tif"); pixGetDimensions(pixs, &w, &h, NULL); data1 = pixGetData(pixs); wpl = pixGetWpl(pixs); lines1 = pixGetLinePtrs(pixs, NULL); /* Get timing for the 3 different methods */ startTimer(); for (k = 0; k < 10; k++) { count1 = 0; for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { if (GET_DATA_BIT(lines1[i], j)) count1++; } } } fprintf(stderr, "Time with line ptrs = %5.3f sec, count1 = %d\n", stopTimer(), count1); startTimer(); for (k = 0; k < 10; k++) { count2 = 0; for (i = 0; i < h; i++) { line1 = data1 + i * wpl; for (j = 0; j < w; j++) { if (l_getDataBit(line1, j)) count2++; } } } fprintf(stderr, "Time with l_get* = %5.3f sec, count2 = %d\n", stopTimer(), count2); startTimer(); for (k = 0; k < 10; k++) { count3 = 0; for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { pixGetPixel(pixs, j, i, &val32); count3 += val32; } } } fprintf(stderr, "Time with pixGetPixel() = %5.3f sec, count3 = %d\n", stopTimer(), count3); pix1 = pixCreateTemplate(pixs); linet1 = pixGetLinePtrs(pix1, NULL); pix2 = pixCreateTemplate(pixs); data2 = pixGetData(pix2); linet2 = pixGetLinePtrs(pix2, NULL); /* ------------------------------------------------- */ /* Test different methods for 1 bpp */ /* ------------------------------------------------- */ count1 = 0; for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { val1 = GET_DATA_BIT(lines1[i], j); count1 += val1; if (val1) SET_DATA_BIT(linet1[i], j); } } count2 = 0; for (i = 0; i < h; i++) { line1 = data1 + i * wpl; line2 = data2 + i * wpl; for (j = 0; j < w; j++) { val2 = l_getDataBit(line1, j); count2 += val2; if (val2) l_setDataBit(line2, j); } } CompareResults(pixs, pix1, pix2, count1, count2, "1 bpp", rp); /* ------------------------------------------------- */ /* Test different methods for 2 bpp */ /* ------------------------------------------------- */ count1 = 0; w2 = w / 2; for (i = 0; i < h; i++) { for (j = 0; j < w2; j++) { val1 = GET_DATA_DIBIT(lines1[i], j); count1 += val1; val1 += 0xbbbbbbbc; SET_DATA_DIBIT(linet1[i], j, val1); } } count2 = 0; for (i = 0; i < h; i++) { line1 = data1 + i * wpl; line2 = data2 + i * wpl; for (j = 0; j < w2; j++) { val2 = l_getDataDibit(line1, j); count2 += val2; val2 += 0xbbbbbbbc; l_setDataDibit(line2, j, val2); } } CompareResults(pixs, pix1, pix2, count1, count2, "2 bpp", rp); /* ------------------------------------------------- */ /* Test different methods for 4 bpp */ /* ------------------------------------------------- */ count1 = 0; w4 = w / 4; for (i = 0; i < h; i++) { for (j = 0; j < w4; j++) { val1 = GET_DATA_QBIT(lines1[i], j); count1 += val1; val1 += 0xbbbbbbb0; SET_DATA_QBIT(linet1[i], j, val1); } } count2 = 0; for (i = 0; i < h; i++) { line1 = data1 + i * wpl; line2 = data2 + i * wpl; for (j = 0; j < w4; j++) { val2 = l_getDataQbit(line1, j); count2 += val2; val2 += 0xbbbbbbb0; l_setDataQbit(line2, j, val2); } } CompareResults(pixs, pix1, pix2, count1, count2, "4 bpp", rp); /* ------------------------------------------------- */ /* Test different methods for 8 bpp */ /* ------------------------------------------------- */ count1 = 0; w8 = w / 8; for (i = 0; i < h; i++) { for (j = 0; j < w8; j++) { val1 = GET_DATA_BYTE(lines1[i], j); count1 += val1; val1 += 0xbbbbbb00; SET_DATA_BYTE(linet1[i], j, val1); } } count2 = 0; for (i = 0; i < h; i++) { line1 = data1 + i * wpl; line2 = data2 + i * wpl; for (j = 0; j < w8; j++) { val2 = l_getDataByte(line1, j); count2 += val2; val2 += 0xbbbbbb00; l_setDataByte(line2, j, val2); } } CompareResults(pixs, pix1, pix2, count1, count2, "8 bpp", rp); /* ------------------------------------------------- */ /* Test different methods for 16 bpp */ /* ------------------------------------------------- */ count1 = 0; w16 = w / 16; for (i = 0; i < h; i++) { for (j = 0; j < w16; j++) { val1 = GET_DATA_TWO_BYTES(lines1[i], j); count1 += val1; val1 += 0xbbbb0000; SET_DATA_TWO_BYTES(linet1[i], j, val1); } } count2 = 0; for (i = 0; i < h; i++) { line1 = data1 + i * wpl; line2 = data2 + i * wpl; for (j = 0; j < w16; j++) { val2 = l_getDataTwoBytes(line1, j); count2 += val2; val2 += 0xbbbb0000; l_setDataTwoBytes(line2, j, val2); } } CompareResults(pixs, pix1, pix2, count1, count2, "16 bpp", rp); /* ------------------------------------------------- */ /* Test different methods for 32 bpp */ /* ------------------------------------------------- */ count1 = 0; w32 = w / 32; for (i = 0; i < h; i++) { for (j = 0; j < w32; j++) { val1 = GET_DATA_FOUR_BYTES(lines1[i], j); count1 += val1 & 0xfff; SET_DATA_FOUR_BYTES(linet1[i], j, val1); } } count2 = 0; for (i = 0; i < h; i++) { line1 = data1 + i * wpl; line2 = data2 + i * wpl; for (j = 0; j < w32; j++) { val2 = l_getDataFourBytes(line1, j); count2 += val2 & 0xfff; l_setDataFourBytes(line2, j, val2); } } CompareResults(pixs, pix1, pix2, count1, count2, "32 bpp", rp); pixDestroy(&pixs); pixDestroy(&pix1); pixDestroy(&pix2); lept_free(lines1); lept_free(linet1); lept_free(linet2); return regTestCleanup(rp); }
// Main program //***************************************************************************** int main(int argc, char** argv) { // Locals used with command line args int p = 256; // workgroup X dimension int q = 1; // workgroup Y dimension pArgc = &argc; pArgv = argv; shrQAStart(argc, argv); // latch the executable path for other funcs to use cExecutablePath = argv[0]; // start logs and show command line help shrSetLogFileName ("oclNbody.txt"); shrLog("%s Starting...\n\n", cExecutablePath); shrLog("Command line switches:\n"); shrLog(" --qatest\t\tCheck correctness of GPU execution and measure performance)\n"); shrLog(" --noprompt\t\tQuit simulation automatically after a brief period\n"); shrLog(" --n=<numbodies>\tSpecify # of bodies to simulate (default = %d)\n", numBodies); shrLog(" --double\t\tUse double precision floating point values for simulation\n"); shrLog(" --p=<workgroup X dim>\tSpecify X dimension of workgroup (default = %d)\n", p); shrLog(" --q=<workgroup Y dim>\tSpecify Y dimension of workgroup (default = %d)\n\n", q); // Get command line arguments if there are any and set vars accordingly if (argc > 0) { shrGetCmdLineArgumenti(argc, (const char**)argv, "p", &p); shrGetCmdLineArgumenti(argc, (const char**)argv, "q", &q); shrGetCmdLineArgumenti(argc, (const char**)argv, "n", &numBodies); bDouble = (shrTRUE == shrCheckCmdLineFlag(argc, (const char**)argv, "double")); bNoPrompt = shrCheckCmdLineFlag(argc, (const char**)argv, "noprompt"); bQATest = shrCheckCmdLineFlag(argc, (const char**)argv, "qatest"); } //Get the NVIDIA platform cl_int ciErrNum = oclGetPlatformID(&cpPlatform); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); shrLog("clGetPlatformID...\n\n"); if (bDouble) { shrLog("Double precision execution...\n\n"); } else { shrLog("Single precision execution...\n\n"); } flopsPerInteraction = bDouble ? 30 : 20; //Get all the devices shrLog("Get the Device info and select Device...\n"); ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &uiNumDevices); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); cdDevices = (cl_device_id *)malloc(uiNumDevices * sizeof(cl_device_id) ); ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, uiNumDevices, cdDevices, NULL); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // Set target device and Query number of compute units on uiTargetDevice shrLog(" # of Devices Available = %u\n", uiNumDevices); if(shrGetCmdLineArgumentu(argc, (const char**)argv, "device", &uiTargetDevice)== shrTRUE) { uiTargetDevice = CLAMP(uiTargetDevice, 0, (uiNumDevices - 1)); } shrLog(" Using Device %u, ", uiTargetDevice); oclPrintDevName(LOGBOTH, cdDevices[uiTargetDevice]); cl_uint uiNumComputeUnits; clGetDeviceInfo(cdDevices[uiTargetDevice], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(uiNumComputeUnits), &uiNumComputeUnits, NULL); shrLog(" # of Compute Units = %u\n", uiNumComputeUnits); //Create the context shrLog("clCreateContext...\n"); cxContext = clCreateContext(0, uiNumDevsUsed, &cdDevices[uiTargetDevice], NULL, NULL, &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // Create a command-queue shrLog("clCreateCommandQueue...\n\n"); cqCommandQueue = clCreateCommandQueue(cxContext, cdDevices[uiTargetDevice], CL_QUEUE_PROFILING_ENABLE, &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // Log and config for number of bodies shrLog("Number of Bodies = %d\n", numBodies); switch (numBodies) { case 1024: activeParams.m_clusterScale = 1.52f; activeParams.m_velocityScale = 2.f; break; case 2048: activeParams.m_clusterScale = 1.56f; activeParams.m_velocityScale = 2.64f; break; case 4096: activeParams.m_clusterScale = 1.68f; activeParams.m_velocityScale = 2.98f; break; case 7680: case 8192: activeParams.m_clusterScale = 1.98f; activeParams.m_velocityScale = 2.9f; break; default: case 15360: case 16384: activeParams.m_clusterScale = 1.54f; activeParams.m_velocityScale = 8.f; break; case 30720: case 32768: activeParams.m_clusterScale = 1.44f; activeParams.m_velocityScale = 11.f; break; } if ((q * p) > 256) { p = 256 / q; shrLog("Setting p=%d to maintain %d threads per block\n", p, 256); } if ((q == 1) && (numBodies < p)) { p = numBodies; shrLog("Setting p=%d because # of bodies < p\n", p); } shrLog("Workgroup Dims = (%d x %d)\n\n", p, q); // Initialize OpenGL items if using GL if (bQATest == shrFALSE) { assert(0); /* shrLog("Calling InitGL...\n"); InitGL(&argc, argv); */ } else { shrLog("Skipping InitGL...\n"); } // CL/GL interop disabled bUsePBO = (false && (bQATest == shrFALSE)); InitNbody(cdDevices[uiTargetDevice], cxContext, cqCommandQueue, numBodies, p, q, bUsePBO, bDouble); ResetSim(nbody, numBodies, NBODY_CONFIG_SHELL, bUsePBO); // init timers shrDeltaT(DEMOTIME); // timer 0 is for timing demo periods shrDeltaT(FUNCTIME); // timer 1 is for logging function delta t's shrDeltaT(FPSTIME); // timer 2 is for fps measurement // Standard simulation if (bQATest == shrFALSE) { assert(0); /* shrLog("Running standard oclNbody simulation...\n\n"); glutDisplayFunc(DisplayGL); glutReshapeFunc(ReshapeGL); glutMouseFunc(MouseGL); glutMotionFunc(MotionGL); glutKeyboardFunc(KeyboardGL); glutSpecialFunc(SpecialGL); glutIdleFunc(IdleGL); glutMainLoop(); */ } // Compare to host, profile and write out file for regression analysis if (bQATest == shrTRUE) { bool bTestResults = false; shrLog("Running oclNbody Results Comparison...\n\n"); bTestResults = CompareResults(numBodies); //shrLog("Profiling oclNbody...\n\n"); //RunProfiling(100, (unsigned int)(p * q)); // 100 iterations shrQAFinish(argc, (const char **)argv, bTestResults ? QA_PASSED : QA_FAILED); } else { // Cleanup/exit bNoPrompt = shrTRUE; shrQAFinish2(false, *pArgc, (const char **)pArgv, QA_PASSED); } Cleanup(EXIT_SUCCESS); }
void NonTemporalStoreTimed(void) { ThreadTimer::SetThreadAffinityMask(); const int num_it = 500; const int num_alg = 3; const double et_scale = 1.0e6; double et[num_it][num_alg]; ThreadTimer tt; const int n = 1000000; const int align = 16; float* a = (float*)_aligned_malloc(n * sizeof(float), align); float* b = (float*)_aligned_malloc(n * sizeof(float), align); float* c1 = (float*)_aligned_malloc(n * sizeof(float), align); float* c2a = (float*)_aligned_malloc(n * sizeof(float), align); float* c2b = (float*)_aligned_malloc(n * sizeof(float), align); srand(67); for (int i = 0; i < n; i++) { a[i] = (float)(rand() % 100); b[i] = (float)(rand() % 100); } for (int i = 0; i < num_it; i++) { tt.Start(); CalcResultCpp(c1, a, b, n); tt.Stop(); et[i][0] = tt.GetElapsedTime() * et_scale; } for (int i = 0; i < num_it; i++) { tt.Start(); CalcResultA_(c2a, a, b, n); tt.Stop(); et[i][1] = tt.GetElapsedTime() * et_scale; } for (int i = 0; i < num_it; i++) { tt.Start(); CalcResultB_(c2b, a, b, n); tt.Stop(); et[i][2] = tt.GetElapsedTime() * et_scale; } #ifdef _WIN64 const char* fn = "__NonTemporalStore64.csv"; #else const char* fn = "__NonTemporalStore32.csv"; #endif if (!CompareResults(c1, c2a, c2b, n, false)) printf("NonTemporalStoreTimed() - array compare FAILED\n"); ThreadTimer::SaveElapsedTimeMatrix(fn, (double*)et, num_it, num_alg); printf("\nBenchmark times saved to file %s\n", fn); _aligned_free(a); _aligned_free(b); _aligned_free(c1); _aligned_free(c2a); _aligned_free(c2b); }
/** * Compare results * * @relates Compare * @ingroup pwglf_forward_tracklets */ void CompareResults() { CompareResults(const_cast<const char**>(&(gApplication->Argv()[1]))); gApplication->ClearInputFiles(); }