void HashStreamTest::hmacstream_test01() { { std::string key = "1234557890123456"; ibrcommon::HMacStream hstream((unsigned char*)key.c_str(), key.length()); hstream << "Hello World" << std::flush; if ("b3:7a:c5:9f:6d:2:60:bd:da:51:cc:d3:95:2:11:7:c0:f1:7b:f9:" != getHex(hstream)) { throw ibrcommon::Exception("unexpected hash value"); } } { std::string key = "1234557890123456"; ibrcommon::HMacStream hstream((unsigned char*)key.c_str(), key.length()); hstream << "Hello again my World!" << std::flush; if ("a0:1d:99:f2:99:a4:b9:dc:b7:44:df:7b:b5:75:19:c6:20:8:bc:da:" != getHex(hstream)) { throw ibrcommon::Exception("unexpected hash value"); } } }
void ParallelismTraceAnalyzer::parallelism() const { typedef std::vector< ParallelismTraceGenerator::Event > EventVector; typedef std::set< long long unsigned int > InstructionSet; double averageSIMD = 0; double averageMIMD = 0; double averageCTAs = 0; for( KernelMap::const_iterator vector = _kernels.begin(); vector != _kernels.end(); ++vector ) { std::cout << " From program \"" << vector->first << "\".\n"; double localSIMD = 0; double localMIMD = 0; double localCTAs = 0; for( KernelVector::const_iterator kernel = vector->second.begin(); kernel != vector->second.end(); ++kernel ) { std::ifstream hstream( kernel->header.c_str() ); boost::archive::text_iarchive harchive( hstream ); ParallelismTraceGenerator::Header header; harchive >> header; assert( header.format == TraceGenerator::ParallelismTraceFormat ); hstream.close(); std::ifstream stream( kernel->path.c_str() ); if( !stream.is_open() ) { throw hydrazine::Exception( "Failed to open ParallelismTrace kernel trace file " + kernel->path ); } boost::archive::text_iarchive archive( stream ); EventVector events( header.dimensions ); for( EventVector::iterator event = events.begin(); event != events.end(); ++event ) { archive >> *event; } std::cout << " From file " << kernel->path << "\n"; std::cout << " kernel: " << kernel->name << "\n"; std::cout << " module: " << kernel->module << "\n"; std::cout << " statistics:\n"; std::cout << " ctas: " << header.dimensions << "\n"; std::cout << " threads: " << header.threads << "\n"; InstructionSet instructions; long long unsigned int totalInstructions = 0; double activity = 0; for( EventVector::iterator event = events.begin(); event != events.end(); ++event ) { totalInstructions += event->instructions; instructions.insert( event->instructions ); activity += event->activity * event->instructions; } activity /= totalInstructions + DBL_EPSILON; unsigned int previous = 0; unsigned int count = header.dimensions; double mimd = 0; for( InstructionSet::iterator instruction = instructions.begin(); instruction != instructions.end(); ++instruction ) { mimd += (*instruction - previous) * count; previous = *instruction; --count; } if( !instructions.empty() ) { mimd /= *(--instructions.end()) + DBL_EPSILON; } std::cout << " SIMD parallelism: " << activity << "\n"; std::cout << " MIMD parallelism: " << mimd << "\n"; localSIMD += activity; localMIMD += mimd; localCTAs += header.dimensions; } localSIMD /= vector->second.size() + DBL_EPSILON; localMIMD /= vector->second.size() + DBL_EPSILON; localCTAs /= vector->second.size() + DBL_EPSILON; std::cout << " Kernel " << vector->first << " statistics:\n"; std::cout << " average CTAs: " << localCTAs << "\n"; std::cout << " average SIMD parallelism: " << localSIMD << "\n"; std::cout << " average MIMD parallelism: " << localMIMD << "\n"; averageSIMD += localSIMD; averageMIMD += localMIMD; averageCTAs += localCTAs; } averageSIMD /= _kernels.size() + DBL_EPSILON; averageMIMD /= _kernels.size() + DBL_EPSILON; averageCTAs /= _kernels.size() + DBL_EPSILON; std::cout << "Aggregate statistics:\n"; std::cout << " average CTAs: " << averageCTAs << "\n"; std::cout << " average SIMD parallelism: " << averageSIMD << "\n"; std::cout << " average MIMD parallelism: " << averageMIMD << "\n"; }
void trace::InstructionTraceAnalyzer::instructions_by_kernel(bool pyList) const { // sequence of functional units trace::InstructionTraceGenerator::FunctionalUnit funcUnits[] = { InstructionTraceGenerator::Integer_arithmetic, //! integer arithmetic InstructionTraceGenerator::Integer_logical, //! itneger logical InstructionTraceGenerator::Integer_comparison, //! comparison InstructionTraceGenerator::Float_single, //! floating-point single-precision InstructionTraceGenerator::Float_double, //! floating-point, double-precision InstructionTraceGenerator::Float_comparison, //! floating-point comparison InstructionTraceGenerator::Memory_offchip, //! off-chip: {global, local} InstructionTraceGenerator::Memory_onchip, //! cached or scratchpad: {texture, shared, constant} InstructionTraceGenerator::Control, //! control-flow instructions InstructionTraceGenerator::Parallelism, //! parallelism: sync, reduction, vote InstructionTraceGenerator::Special, //! transcendental and special functions InstructionTraceGenerator::Other, //! not categorized InstructionTraceGenerator::FunctionalUnit_invalid }; std::cout << "# InstructionTraceAnalyzer - by kernel\n#" << std::endl; std::cout << "\n"; std::cout << "# Python object notation:\n"; std::cout << "# \n"; std::cout << "# map<program name.kernel name, \n"; std::cout << "# map<functional unit, pair<dynamic count, static count, activity> > >\n\n"; std::cout << "kernels = [\n"; for( KernelMap::const_iterator vector = _kernels.begin(); vector != _kernels.end(); ++vector ) { std::string program = vector->first; const KernelVector & kernels = vector->second; std::map< std::string, int > kernelCount; for (KernelVector::const_iterator k_it = kernels.begin(); k_it != kernels.end(); ++k_it) { if (kernelCount.find(k_it->name) == kernelCount.end()) { kernelCount[k_it->name] = 0; std::cout << " '" << program << ":" << k_it->name << "',\n"; } kernelCount[k_it->name] ++; } } std::cout << "]\n\n"; std::cout << "functional_units = [\n"; for (int n = 0; funcUnits[n] != InstructionTraceGenerator::FunctionalUnit_invalid; n++) { std::cout << " '" << trace::InstructionTraceGenerator::toString(funcUnits[n]) << "',\n"; } std::cout << "]\n\n"; std::cout << "results = {\n"; for( KernelMap::const_iterator vector = _kernels.begin(); vector != _kernels.end(); ++vector ) { std::string program = vector->first; const KernelVector & kernels = vector->second; InstructionTraceGenerator::FunctionalUnitCountMap appCounter; std::map< std::string, int > kernelCount; // loop over the kernels for (KernelVector::const_iterator k_it = kernels.begin(); k_it != kernels.end(); ++k_it) { InstructionTraceGenerator::Header header; std::ifstream hstream( k_it->header.c_str() ); boost::archive::text_iarchive harchive( hstream ); if (kernelCount.find(k_it->name) == kernelCount.end()) { kernelCount[k_it->name] = 0; } InstructionTraceGenerator::FunctionalUnitCountMap counter; try { harchive >> header; assert(header.format == TraceGenerator::InstructionTraceFormat); harchive >> counter; } catch (boost::archive::archive_exception &exp) { std::cout << "### boost::archive::archive_exception: application '" << program << "' kernel '" << k_it->name << "' "; std::cout << " (exception code: " << toString(exp.code) << ") - stream: '" << k_it->header.c_str() << "'\n"; continue; } // print the program name std::cout << " '" << program << ":" << k_it->name << ":" << kernelCount[k_it->name] << "': {" << std::endl; std::cout << " 'blockDim': [" << header.blockDim.x << ", " << header.blockDim.y << ", " << header.blockDim.z << "],\n"; std::cout << " 'gridDim': [" << header.gridDim.x << ", " << header.gridDim.y << ", " << header.gridDim.z << "],\n"; kernelCount[k_it->name] ++; size_t totalDynamicCount = 0; size_t totalStaticCount = 0; double totalActivity = 0; // print out one bar per functional unit for (int n = 0; funcUnits[n] != InstructionTraceGenerator::FunctionalUnit_invalid; n++) { size_t dynamicCount = 0; size_t staticCount = 0; double activity = 0; // int activeFU = 0; typedef trace::InstructionTraceGenerator::OpcodeCountMap OC; for (OC::iterator op_it = counter[funcUnits[n]].begin(); op_it != counter[funcUnits[n]].end(); ++op_it) { dynamicCount += op_it->second.dynamic_count; staticCount += op_it->second.static_count; if (op_it->second.dynamic_count) { activity += op_it->second.activity; // activeFU++; } } totalDynamicCount += dynamicCount; totalStaticCount += staticCount; totalActivity += activity; // if (activeFU) { // activity /= (double)activeFU; // } if(dynamicCount) activity /= (double)dynamicCount; // write to stdout std::cout << " '" << trace::InstructionTraceGenerator::toString(funcUnits[n]) << "': ( " << dynamicCount << ", " << staticCount << ", " << activity << " )," << std::endl; } if(totalDynamicCount) totalActivity /= (double)totalDynamicCount; std::cout << " '" << "total" << "': ( " << totalDynamicCount << ", " << totalStaticCount << ", " << totalActivity << " )," << std::endl; std::cout << " },\n"; } } std::cout << "}\n"; }
/*! Produces: histogram of dynamic instruction counts for each application x-axis: functional units y-axis: number of dynamic instructions */ void trace::InstructionTraceAnalyzer::instructions_by_application(bool pyList) const { // sequence of functional units trace::InstructionTraceGenerator::FunctionalUnit funcUnits[] = { InstructionTraceGenerator::Integer_arithmetic, //! integer arithmetic InstructionTraceGenerator::Integer_logical, //! itneger logical InstructionTraceGenerator::Integer_comparison, //! comparison InstructionTraceGenerator::Float_single, //! floating-point single-precision InstructionTraceGenerator::Float_double, //! floating-point, double-precision InstructionTraceGenerator::Float_comparison, //! floating-point comparison InstructionTraceGenerator::Memory_offchip, //! off-chip: {global, local} InstructionTraceGenerator::Memory_onchip, //! cached or scratchpad: {texture, shared, constant} InstructionTraceGenerator::Control, //! control-flow instructions InstructionTraceGenerator::Parallelism, //! parallelism: sync, reduction, vote InstructionTraceGenerator::Special, //! transcendental and special functions InstructionTraceGenerator::Other, //! not categorized InstructionTraceGenerator::FunctionalUnit_invalid }; std::cout << "# InstructionTraceAnalyzer - by application\n#" << std::endl; std::cout << "\n"; std::cout << "# Python object notation:\n"; std::cout << "# \n"; std::cout << "# map<program name, \n"; std::cout << "# map<functional unit, tuple<dynamic count, static count, activity> > >\n\n"; std::cout << "applications = [\n"; for( KernelMap::const_iterator vector = _kernels.begin(); vector != _kernels.end(); ++vector ) { std::string program = vector->first; std::cout << " '" << program << "',\n"; } std::cout << "]\n\n"; std::cout << "functional_units = [\n"; for (int n = 0; funcUnits[n] != InstructionTraceGenerator::FunctionalUnit_invalid; n++) { std::cout << " '" << trace::InstructionTraceGenerator::toString(funcUnits[n]) << "',\n"; } std::cout << "]\n\n"; std::cout << "results = {\n"; for( KernelMap::const_iterator vector = _kernels.begin(); vector != _kernels.end(); ++vector ) { std::set< std::string > visitedKernels; std::string program = vector->first; const KernelVector & kernels = vector->second; InstructionTraceGenerator::FunctionalUnitCountMap appCounter; // loop over the kernels for (KernelVector::const_iterator k_it = kernels.begin(); k_it != kernels.end(); ++k_it) { InstructionTraceGenerator::Header header; report("Open header file " << k_it->header.c_str()); std::ifstream hstream( k_it->header.c_str() ); boost::archive::text_iarchive harchive( hstream ); InstructionTraceGenerator::FunctionalUnitCountMap counter; try { harchive >> header; assert(header.format == TraceGenerator::InstructionTraceFormat); harchive >> counter; } catch (boost::archive::archive_exception &exp) { std::cout << "### Exception: application '" << program << "' kernel '" << k_it->name << "' "; std::cout << " (exception code: " << toString(exp.code) << ") - stream: '" << k_it->header.c_str() << "'\n"; continue; } // aggregate counts append(appCounter, counter, visitedKernels.find(k_it->name) == visitedKernels.end()); visitedKernels.insert(k_it->name); } std::stringstream ssDynamic, ssStatic, ssDynamicComments, ssStaticComments; if (pyList) { ssDynamic << " '" << program << "': ["; ssStatic << " '" << program << "': ["; } else { // print the program name std::cout << " '" << program << "': {" << std::endl; } size_t totalDynamicCount = 0; size_t totalStaticCount = 0; double totalActiveCount = 0; // print out one bar per functional unit for (int n = 0; funcUnits[n] != InstructionTraceGenerator::FunctionalUnit_invalid; n++) { size_t dynamicCount = 0; size_t staticCount = 0; double activity = 0; int activeFU = 0; typedef trace::InstructionTraceGenerator::OpcodeCountMap OC; for (OC::iterator op_it = appCounter[funcUnits[n]].begin(); op_it != appCounter[funcUnits[n]].end(); ++op_it) { dynamicCount += op_it->second.dynamic_count; staticCount += op_it->second.static_count; if (op_it->second.dynamic_count) { activity += op_it->second.activity; activeFU ++; } } totalDynamicCount += dynamicCount; totalStaticCount += staticCount; totalActiveCount += activity; if(dynamicCount != 0) activity /= (double)dynamicCount; if (pyList) { ssDynamic << (n ? ", " : " ") << dynamicCount; ssStatic << (n ? ", " : " ") << staticCount; ssDynamicComments << " " << trace::InstructionTraceGenerator::toString(funcUnits[n]); ssStaticComments << " " << trace::InstructionTraceGenerator::toString(funcUnits[n]); } else { // write to stdout std::cout << " '" << trace::InstructionTraceGenerator::toString(funcUnits[n]) << "': ( " << dynamicCount << ", " << staticCount << ", " << activity << " )," << std::endl; } } if (pyList) { std::cout << "# " << program << " sequence: " << ssDynamicComments.str() << "\n"; std::cout << ssDynamic.str() << " ] # dynamic\n"; std::cout << ssStatic.str() << " ] # static\n"; } if(totalDynamicCount != 0) totalActiveCount /= (double)totalDynamicCount; std::cout << " '" << "total" << "': ( " << totalDynamicCount << ", " << totalStaticCount << ", " << totalActiveCount << " )," << std::endl; std::cout << " },\n"; } std::cout << "}\n"; }