SummaryTree::SummaryTree(QWidget *parent) : QTreeWidget(parent) { curContest = 0; addCount = 0; addTaskAction = new QAction(tr("Add a New Task"), this); addTestCaseAction = new QAction(tr("Add a Test Case"), this); addTestCasesAction = new QAction(tr("Add Test Cases ..."), this); deleteTaskAction = new QAction(tr("Delete Current Task"), this); deleteTestCaseAction = new QAction(tr("Delete Current Test Case"), this); deleteTaskKeyAction = new QAction(this); deleteTestCaseKeyAction = new QAction(this); deleteTaskKeyAction->setShortcutContext(Qt::WidgetShortcut); deleteTestCaseKeyAction->setShortcutContext(Qt::WidgetShortcut); deleteTaskKeyAction->setShortcut(QKeySequence::Delete); deleteTestCaseKeyAction->setShortcut(QKeySequence::Delete); deleteTaskKeyAction->setEnabled(false); deleteTestCaseKeyAction->setEnabled(false); addAction(deleteTaskKeyAction); addAction(deleteTestCaseKeyAction); connect(addTaskAction, SIGNAL(triggered()), this, SLOT(addTask())); connect(addTestCaseAction, SIGNAL(triggered()), this, SLOT(addTestCase())); connect(addTestCasesAction, SIGNAL(triggered()), this, SLOT(addTestCases())); connect(deleteTaskAction, SIGNAL(triggered()), this, SLOT(deleteTask())); connect(deleteTestCaseAction, SIGNAL(triggered()), this, SLOT(deleteTestCase())); connect(deleteTaskKeyAction, SIGNAL(triggered()), this, SLOT(deleteTask())); connect(deleteTestCaseKeyAction, SIGNAL(triggered()), this, SLOT(deleteTestCase())); connect(this, SIGNAL(currentItemChanged(QTreeWidgetItem*,QTreeWidgetItem*)), this, SLOT(selectionChanged())); connect(this, SIGNAL(itemChanged(QTreeWidgetItem*, int)), this, SLOT(itemChanged(QTreeWidgetItem*))); }
int main(int argc, char** argv) { printf("\n\n\n--------------------------------------------------------------------------------\n"); printf("Running Whole-Function Vectorization Test Suite 3...\n\n"); if (NUM_INPUTS_SQRT < 8U) { printf("ERROR: NUM_INPUTS_SQRT must be at least 8!\n"); return -1; } if (NUM_INPUTS_SQRT % SIMD_WIDTH != 0) { printf("ERROR: NUM_INPUTS_SQRT must be defined as a multiple of SIMD width!\n"); return -1; } //------------------------------------------------------------------------// // create function pointers for test cases and save test case names //------------------------------------------------------------------------// std::vector<TestCase*> testCases; //add scalar and generated functions addTestCases(testCases); const unsigned testCaseNr = testCases.size(); //------------------------------------------------------------------------// // create input values //------------------------------------------------------------------------// const unsigned hardcodedInputNr = 16; // must not be changed const unsigned inputPermutations = 4; // must not be changed float scalarInputs0[NUM_INPUTS]; float scalarInputs1[NUM_INPUTS]; float scalarInputs2[NUM_INPUTS]; float scalarInputs3[NUM_INPUTS]; int scalarInputsInt0[NUM_INPUTS]; int scalarInputsInt1[NUM_INPUTS]; // 16 hardcoded input value sets scalarInputs0[0] = 0.f; scalarInputs0[1] = 3.f; scalarInputs0[2] = 2.f; scalarInputs0[3] = 8.f; scalarInputs0[4] = 10.2f; scalarInputs0[5] = -1.f; scalarInputs0[6] = 0.f; scalarInputs0[7] = 1000.23f; scalarInputs0[8] = 0.0002f; scalarInputs0[9] = -0.0002f; scalarInputs0[10] = -3.f; scalarInputs0[11] = -1.f; scalarInputs0[12] = 0.f; scalarInputs0[13] = 12.f; scalarInputs0[14] = -333.12f; scalarInputs0[15] = 0.003f; scalarInputs1[0] = 1.f; scalarInputs1[1] = 2.f; scalarInputs1[2] = 4.f; scalarInputs1[3] = 6.f; scalarInputs1[4] = -14.13f; scalarInputs1[5] = -13.f; scalarInputs1[6] = 0.f; scalarInputs1[7] = 0.0002f; scalarInputs1[8] = 420.001f; scalarInputs1[9] = -420.001f; scalarInputs1[10] = 3.f; scalarInputs1[11] = -1.f; scalarInputs1[12] = 0.f; scalarInputs1[13] = 12.f; scalarInputs1[14] = -33.0012f; scalarInputs1[15] = 1.0004f; scalarInputs2[0] = 2.f; scalarInputs2[1] = 1.f; scalarInputs2[2] = 6.f; scalarInputs2[3] = 4.f; scalarInputs2[4] = 999.f; scalarInputs2[5] = -5.f; scalarInputs2[6] = 0.f; scalarInputs2[7] = 420.001f; scalarInputs2[8] = 0.01f; scalarInputs2[9] = 0.01f; scalarInputs2[10] = 3.f; scalarInputs2[11] = 1.f; scalarInputs2[12] = 333.333f; scalarInputs2[13] = 4.f; scalarInputs2[14] = -20.f; scalarInputs2[15] = 20.1546f; scalarInputs3[0] = 3.f; scalarInputs3[1] = 0.f; scalarInputs3[2] = 8.f; scalarInputs3[3] = 2.f; scalarInputs3[4] = 0.f; scalarInputs3[5] = -420.001f; scalarInputs3[6] = 0.f; scalarInputs3[7] = 0.01f; scalarInputs3[8] = 1000.23f; scalarInputs3[9] = 0.01f; scalarInputs3[10] = -3.f; scalarInputs3[11] = 1.f; scalarInputs3[12] = -333.333f; scalarInputs3[13] = -4.f; scalarInputs3[14] = 777.01f; scalarInputs3[15] = -0.004f; scalarInputsInt0[0] = 0; scalarInputsInt0[1] = 1; scalarInputsInt0[2] = 2; scalarInputsInt0[3] = 3; scalarInputsInt0[4] = 8; scalarInputsInt0[5] = -13; scalarInputsInt0[6] = -222; scalarInputsInt0[7] = 99; scalarInputsInt0[8] = 0; scalarInputsInt0[9] = 111; scalarInputsInt0[10] = -32; scalarInputsInt0[11] = -1; scalarInputsInt0[12] = 99; scalarInputsInt0[13] = 1000; scalarInputsInt0[14] = -1000; scalarInputsInt0[15] = 71; scalarInputsInt1[0] = -3; scalarInputsInt1[1] = 5; scalarInputsInt1[2] = 100; scalarInputsInt1[3] = 22; scalarInputsInt1[4] = -56; scalarInputsInt1[5] = -2; scalarInputsInt1[6] = -1; scalarInputsInt1[7] = 1; scalarInputsInt1[8] = 2; scalarInputsInt1[9] = 15; scalarInputsInt1[10] = 1024; scalarInputsInt1[11] = -255; scalarInputsInt1[12] = -256; scalarInputsInt1[13] = 10; scalarInputsInt1[14] = 11; scalarInputsInt1[15] = -10; // now add as many random inputs as required to reach NUM_INPUTS #define CUSTOM_RAND_MAX 1000 //prevent too large inputs srand((unsigned)time(0)); for (unsigned i=hardcodedInputNr; i<NUM_INPUTS; ++i) { float r = (float)rand()/(float)RAND_MAX; float neg = rand() > (RAND_MAX/2) ? 1.f : -1.f; scalarInputs0[i] = (rand() % CUSTOM_RAND_MAX) * r * neg; r = (float)rand()/(float)RAND_MAX; neg = rand() > (RAND_MAX/2) ? 1.f : -1.f; scalarInputs1[i] = (rand() % CUSTOM_RAND_MAX) * r * neg; r = (float)rand()/(float)RAND_MAX; neg = rand() > (RAND_MAX/2) ? 1.f : -1.f; scalarInputs2[i] = (rand() % CUSTOM_RAND_MAX) * r * neg; r = (float)rand()/(float)RAND_MAX; neg = rand() > (RAND_MAX/2) ? 1.f : -1.f; scalarInputs3[i] = (rand() % CUSTOM_RAND_MAX) * r * neg; int r2 = rand()/RAND_MAX; int neg2 = rand() > (RAND_MAX/2) ? 1 : -1; scalarInputsInt0[i] = (rand() % CUSTOM_RAND_MAX) * r2 * neg2; r2 = rand()/RAND_MAX; neg2 = rand() > (RAND_MAX/2) ? 1 : -1; scalarInputsInt1[i] = (rand() % CUSTOM_RAND_MAX) * r2 * neg2; } // store pointers to arrays for random access (input permutations) float* inputArrays[4]; int* inputArraysInt[2]; inputArrays[0] = scalarInputs0; inputArrays[1] = scalarInputs1; inputArrays[2] = scalarInputs2; inputArrays[3] = scalarInputs3; inputArraysInt[0] = scalarInputsInt0; inputArraysInt[1] = scalarInputsInt1; //------------------------------------------------------------------------// // create result array (each result holds the computed arrays, which // equals the results of NUM_INPUTS scalar calls) //------------------------------------------------------------------------// const unsigned resultSetNr = inputPermutations * inputPermutations * testCaseNr; Result** results = new Result*[resultSetNr](); //------------------------------------------------------------------------// // compute results of scalar and generated functions //------------------------------------------------------------------------// unsigned testsRun = 0; for (unsigned TC=0; TC<testCaseNr; ++TC) { unsigned inputPermsRun = 0; for (unsigned i=0; i<inputPermutations; ++i) { for (unsigned i2=0; i2<inputPermutations; ++i2) { // Abort if we have already run too many test cases. if (testsRun >= resultSetNr*NUM_INPUTS) { printf("\nERROR: not enough space allocated for results!\n"); return -1; } // Get function pointers of current test case. const TestCase& testCase = *testCases[TC]; // Choose input arrays. const float* input0 = inputArrays[i]; const float* input1 = inputArrays[i2]; const int* input2 = inputArraysInt[(i+i2) % 2]; // Choose uniform inputs ( = the same for the entire run!). const float input3 = scalarInputs0[i*inputPermutations + i2]; const int input4 = scalarInputsInt0[i*inputPermutations + i2]; // Create output arrays for scalar function. float* scalarOutput0 = new float[NUM_INPUTS](); float* scalarOutput1 = new float[NUM_INPUTS](); int* scalarOutput2 = new int[NUM_INPUTS](); for (unsigned idx=0; idx<NUM_INPUTS; ++idx) { scalarOutput0[idx] = 0.f; scalarOutput1[idx] = 0.f; scalarOutput2[idx] = 0; } //printf("\nexecuting scalar function...\n"); // Execute scalar function, setting the appropriate OpenCL // state in each iteration. for (unsigned x=0; x<GLOBAL_SIZE_0; ++x) { global_id_0 = x; local_id_0 = x % LOCAL_SIZE_0; group_id_0 = x / LOCAL_SIZE_0; for (unsigned y=0; y<GLOBAL_SIZE_1; ++y) { global_id_1 = y; local_id_1 = y % LOCAL_SIZE_1; group_id_1 = y / LOCAL_SIZE_1; //printf("\niteration: %d/%d\n", x, y); //printf("get_global_id_0: %d\n", get_global_id_0()); //printf("get_global_id_1: %d\n", get_global_id_1()); //printf("get_local_id_0: %d\n", get_local_id_0()); //printf("get_local_id_1: %d\n", get_local_id_1()); testCase.scalarFn(input0, input1, input2, scalarOutput0, scalarOutput1, scalarOutput2, input3, input4); //printf("result: %f\n", scalarOutput0[get_global_id_0()]); } } // Create output arrays for vector function. float* vectorizedOutput0 = new float[NUM_INPUTS](); float* vectorizedOutput1 = new float[NUM_INPUTS](); int* vectorizedOutput2 = new int[NUM_INPUTS](); for (unsigned idx=0; idx<NUM_INPUTS; ++idx) { vectorizedOutput0[idx] = 0.f; vectorizedOutput1[idx] = 0.f; vectorizedOutput2[idx] = 0; } //printf("\nexecuting packet function...\n"); // Execute vectorized function, setting the appropriate OpenCL // state in each iteration. for (unsigned x=0; x<GLOBAL_SIZE_0; x+=SIMD_WIDTH) { global_id_0 = x; local_id_0 = x % (LOCAL_SIZE_0 / SIMD_WIDTH); group_id_0 = x / (LOCAL_SIZE_0 / SIMD_WIDTH); for (unsigned y=0; y<GLOBAL_SIZE_1; ++y) { global_id_1 = y; local_id_1 = y % LOCAL_SIZE_1; group_id_1 = y / LOCAL_SIZE_1; //printf("\niteration: %d/%d\n", x, y); //printf("get_global_id_0: %d\n", get_global_id_0()); //printf("get_global_id_1: %d\n", get_global_id_1()); //printf("get_local_id_0: %d\n", get_local_id_0()); //printf("get_local_id_1: %d\n", get_local_id_1()); testCase.vectorizedFn(input0, input1, input2, vectorizedOutput0, vectorizedOutput1, vectorizedOutput2, input3, input4); //for (unsigned i=0; i<SIMD_WIDTH; ++i) //printf("result[%d]: %f\n", i, vectorizedOutput0[get_global_id_0()+i]); } } // Result index (between 0 and resultSetNr). const unsigned index = TC*inputPermutations*inputPermutations + i*inputPermutations + i2; // Store results & information. results[index] = new Result(testCase.name, input0, input1, input2, input3, input4, scalarOutput0, scalarOutput1, scalarOutput2, vectorizedOutput0, vectorizedOutput1, vectorizedOutput2); //Result* res = results[index]; //for (unsigned idx=0; idx<NUM_INPUTS; ++idx) //{ //const float scalarRes0 = res->scalarOutput0[idx]; //const float pktRes0 = res->vectorizedOutput0[idx]; //const bool success = resultMatches(scalarRes0, pktRes0); //if (!success) //{ //printf("ERROR at index %d:\n", idx); //printf(" expected result: %f\n", scalarRes0); //printf(" computed result: %f\n", pktRes0); //} //} testsRun += NUM_INPUTS; ++inputPermsRun; #define CLEAR "\033[K" printf("\rProgress: %6.2f%% | test %2d/%2d (inputs %2d/%2d) : %s" CLEAR, ((float)(testsRun / NUM_INPUTS) * 100.f) / (float)resultSetNr, TC+1, testCaseNr, inputPermsRun, inputPermutations*inputPermutations, testCase.name); fflush(stdout); } } } if (testsRun <= 0) { printf("\nERROR: need to compute at least one result! (forgot to activate test cases?)\n"); return -1; } if (testsRun != resultSetNr*NUM_INPUTS) { printf("\nERROR: unexpected number of results (testsRun != resultSetNr*NUM_INPUTS)!\n"); return -1; } printf("\nverifying results:\n\n"); bool allSuccessful = true; unsigned failedTestsNr = 0; const char* curTest = ""; for (unsigned i=0; i<resultSetNr; ++i) { Result* res = results[i]; for (unsigned j=0; j<NUM_INPUTS; ++j) { const float scalarRes0 = res->scalarOutput0[j]; const float scalarRes1 = res->scalarOutput1[j]; const int scalarRes2 = res->scalarOutput2[j]; const float pktRes0 = res->vectorizedOutput0[j]; const float pktRes1 = res->vectorizedOutput1[j]; const int pktRes2 = res->vectorizedOutput2[j]; const bool success = resultMatches(scalarRes0, pktRes0) && resultMatches(scalarRes1, pktRes1) && resultMatches(scalarRes2, pktRes2); if (!success) { ++failedTestsNr; allSuccessful = false; #if 1 // Only print failed test once. if (strcmp(curTest, res->name) == 0) continue; printf("test case failed: %s\n", res->name); curTest = res->name; #else // Print all failed tests. // The inputs actually don't help much, because each test case // can access arbitrary values from the arrays. const float input0 = res->input0[j]; const float input1 = res->input1[j]; const int input2 = res->input2[j]; const float input3 = res->input3; const int input4 = res->input4; const unsigned gid0 = j % GLOBAL_SIZE_0; const unsigned gid1 = j / GLOBAL_SIZE_1; printf("%s FAILED at global id %d/%d! ", res->name, gid0, gid1); printf(" %f %f %d %f %d\n", input0, input1, input2, input3, input4); printf(" expected results: [ %f %f %d ]\n", scalarRes0, scalarRes1, scalarRes2); printf(" computed results: [ %f %f %d ]\n", pktRes0, pktRes1, pktRes2); #endif } if (!success) ++failedTestsNr; } } if (allSuccessful) printf("ALL TESTS SUCCESSFUL! (%d)\n", testsRun); else printf("\n%d / %d TESTS FAILED!\n", failedTestsNr, testsRun); printf("\n\ntest-suite run complete!\n"); printf("--------------------------------------------------------------------------------\n\n"); for (unsigned i=0; i<resultSetNr; ++i) { delete results[i]; } return !allSuccessful; }