Kernel::Kernel(std::string f, KernelType readType, int useColumn) { this->type = readType; this->name = "default"; this->kernel = NULL; this->N = NULL; this->eigenValues = NULL; this->eigenVectors = NULL; this->asymmetric = false; this->diagonalized = false; this->normalized = true; this->genotypes = NULL; this->covariates = NULL; if(this->type == kernelGRM) { readKernel(f); } else if(this->type == kernelFromDiscreteCovariates) { createKernelFromDiscreteCovariates(f, useColumn); } else if(this->type == kernelFromContinuousCovariates) { createKernelFromContinuousCovariates(f); } else { misc.error("Error: An internal error was happened. Invalid Kernel type when creating a new kernel.", 0); } }
TEST_F(Device, KernelRead) { auto alloc_ptr = std::make_unique<char[]>(8192 + 1024); void *buf = (void *)(ALIGN((unsigned long)alloc_ptr.get(), 4096) + 1024); for (unsigned int heapMask : m_allHeaps) { SCOPED_TRACE(::testing::Message() << "heap " << heapMask); int map_fd = -1; unsigned int flags = 0; ASSERT_EQ(0, ion_alloc_fd(m_ionFd, 4096, 0, heapMask, flags, &map_fd)); ASSERT_GE(map_fd, 0); void *ptr; ptr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); ASSERT_TRUE(ptr != NULL); for (int i = 0; i < 4096; i++) ((char *)ptr)[i] = i; ((char*)buf)[4096] = 0x12; readKernel(map_fd, buf, 4096); ASSERT_EQ(((char*)buf)[4096], 0x12); for (int i = 0; i < 4096; i++) ASSERT_EQ((char)i, ((char *)buf)[i]); ASSERT_EQ(0, munmap(ptr, 4096)); ASSERT_EQ(0, close(map_fd)); } }
void PackVmlinuzI386::pack(OutputFile *fo) { readKernel(); // prepare filter Filter ft(ph.level); ft.buf_len = ph.u_len; ft.addvalue = physical_start; // saves 4 bytes in unfilter code // compress upx_compress_config_t cconf; cconf.reset(); // limit stack size needed for runtime decompression cconf.conf_lzma.max_num_probs = 1846 + (768 << 4); // ushort: ~28 KiB stack compressWithFilters(&ft, 512, &cconf, getStrategy(ft)); const unsigned lsize = getLoaderSize(); defineDecompressorSymbols(); defineFilterSymbols(&ft); linker->defineSymbol("src_for_decompressor", zimage_offset + lsize); linker->defineSymbol("original_entry", physical_start); linker->defineSymbol("stack_offset", stack_offset_during_uncompression); relocateLoader(); MemBuffer loader(lsize); memcpy(loader, getLoader(), lsize); patchPackHeader(loader, lsize); boot_sect_t * const bs = (boot_sect_t *) ((unsigned char *) setup_buf); bs->sys_size = ALIGN_UP(lsize + ph.c_len, 16u) / 16; bs->payload_length = ph.c_len; fo->write(setup_buf, setup_buf.getSize()); fo->write(loader, lsize); fo->write(obuf, ph.c_len); #if 0 printf("%-13s: setup : %8ld bytes\n", getName(), (long) setup_buf.getSize()); printf("%-13s: loader : %8ld bytes\n", getName(), (long) lsize); printf("%-13s: compressed : %8ld bytes\n", getName(), (long) ph.c_len); #endif // verify verifyOverlappingDecompression(); // finally check the compression ratio if (!checkFinalCompressionRatio(fo)) throwNotCompressible(); }
int main (void) { int *a; cl_mem a_in; cl_event event; cl_kernel kernel; cl_context context; cl_program program; cl_uint devices_num; char *program_source; cl_device_id device_id; cl_platform_id platform_id; cl_command_queue command_queue; program_source = (char *) calloc (1000, sizeof (char)); program_source = readKernel (); /* number of platforms on the system */ platforms_number (); /* id of the first platform proposed by the system */ platform_id = get_platform (); /* number of devices on the platform specified by platform_id */ devices_num = devices_number (platform_id); /* id of the first device proposed by the system on the platform specified by platform_id */ device_id = create_device (platform_id); /* create a context to stablish a communication channel between the host process and the device */ context = create_context (device_id); /* create a program providing the source code */ program = create_program (context, program_source); /* compile the program for the specific device architecture */ build_program (program, device_id); /* create a kernel given the program */ kernel = create_kernel (program); /* create a memory object, in this case this will be an array of integers of length specified by the LENGTH macro */ a = create_memory_object (LENGTH, "a"); /* create a buffer, this will be allocated on the global memory of the device */ a_in = create_buffer (LENGTH, context, "a_in"); /* assign this buffer as the only kernel argument */ set_kernel_argument (kernel, a_in, 0, "a_in"); /* create a command queue, here we can enqueue tasks for the device specified by device_id */ command_queue = create_command_queue (context, device_id); /* copy the memory object allocated on the host memory into the buffer created on the global memory of the device */ enqueue_write_buffer_task (command_queue, a_in, LENGTH, a, "a_in"); /* enqueue a task to execute the kernel on the device */ event = enqueue_kernel_execution (command_queue, kernel, LENGTH, 0, NULL); enqueue_kernel_execution (command_queue, kernel, LENGTH, 1, &event); /* copy the content of the buffer from the global memory of the device to the host memory */ enqueue_read_buffer_task (command_queue, a_in, LENGTH, a, "a_in"); /* print the memory object with the result of the execution */ print_memory_object (a, LENGTH, "a"); return 0; }
void Device::readDMA(int fd, void *buf, size_t size) { ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, fd)); struct ion_test_rw_data ion_test_rw_data = { .ptr = (uint64_t)buf, .offset = 0, .size = size, .write = 0, }; ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_DMA_MAPPING, &ion_test_rw_data)); ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, -1)); } void Device::writeDMA(int fd, void *buf, size_t size) { ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, fd)); struct ion_test_rw_data ion_test_rw_data = { .ptr = (uint64_t)buf, .offset = 0, .size = size, .write = 1, }; ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_DMA_MAPPING, &ion_test_rw_data)); ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, -1)); } void Device::readKernel(int fd, void *buf, size_t size) { ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, fd)); struct ion_test_rw_data ion_test_rw_data = { .ptr = (uint64_t)buf, .offset = 0, .size = size, .write = 0, }; ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_KERNEL_MAPPING, &ion_test_rw_data)); ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, -1)); } void Device::writeKernel(int fd, void *buf, size_t size) { ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, fd)); struct ion_test_rw_data ion_test_rw_data = { .ptr = (uint64_t)buf, .offset = 0, .size = size, .write = 1, }; ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_KERNEL_MAPPING, &ion_test_rw_data)); ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, -1)); } void Device::blowCache() { const size_t bigger_than_cache = 8*1024*1024; void *buf1 = malloc(bigger_than_cache); void *buf2 = malloc(bigger_than_cache); memset(buf1, 0xaa, bigger_than_cache); memcpy(buf2, buf1, bigger_than_cache); free(buf1); free(buf2); } void Device::dirtyCache(void *ptr, size_t size) { /* try to dirty cache lines */ for (size_t i = size-1; i > 0; i--) { ((volatile char *)ptr)[i]; ((char *)ptr)[i] = i; } } TEST_F(Device, KernelReadCached) { auto alloc_ptr = std::make_unique<char[]>(8192 + 1024); void *buf = (void *)(ALIGN((unsigned long)alloc_ptr.get(), 4096) + 1024); for (unsigned int heapMask : m_allHeaps) { SCOPED_TRACE(::testing::Message() << "heap " << heapMask); int map_fd = -1; unsigned int flags = ION_FLAG_CACHED; ASSERT_EQ(0, ion_alloc_fd(m_ionFd, 4096, 0, heapMask, flags, &map_fd)); ASSERT_GE(map_fd, 0); void *ptr; ptr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); ASSERT_TRUE(ptr != NULL); for (int i = 0; i < 4096; i++) ((char *)ptr)[i] = i; ((char*)buf)[4096] = 0x12; readKernel(map_fd, buf, 4096); ASSERT_EQ(((char*)buf)[4096], 0x12); for (int i = 0; i < 4096; i++) ASSERT_EQ((char)i, ((char *)buf)[i]); ASSERT_EQ(0, munmap(ptr, 4096)); ASSERT_EQ(0, close(map_fd)); } }
int main (void) { float *sum; cl_kernel kernel; cl_mem sum_buffer; cl_context context; cl_program program; cl_uint devices_num; char *program_source; cl_device_id device_id; cl_platform_id platform_id; cl_command_queue command_queue; sum = (float *) calloc (NUM_STEPS, sizeof (float)); program_source = (char *) calloc (1000, sizeof (char)); program_source = readKernel (); /* number of platforms on the system */ platforms_number (); /* id of the first platform proposed by the system */ platform_id = get_platform (); /* number of devices on the platform specified by platform_id */ devices_num = devices_number (platform_id); /* id of the first device proposed by the system on the platform specified by platform_id */ device_id = create_device (platform_id); /* create a context to stablish a communication channel between the host process and the device */ context = create_context (device_id); /* create a program providing the source code */ program = create_program (context, program_source); /* compile the program for the specific device architecture */ build_program (program, device_id);\ /* create a kernel given the program */ kernel = create_kernel (program); /* create a memory object, in this case this will be float number that will contain the values of the partial sums */ sum_buffer = create_buffer (context, "sum_buffer", NUM_STEPS); /* assign this buffer as the only kernel argument */ set_kernel_argument (kernel, sum_buffer, 0, "sum_buffer"); /* create a command queue, here we can enqueue tasks for the device specified by device_id */ command_queue = create_command_queue (context, device_id); /* enqueue a task to execute the kernel on the device */ enqueue_kernel_execution (command_queue, kernel, NUM_STEPS); /* copy the content of the buffer from the global memory of the device to the host memory */ enqueue_read_buffer_task (command_queue, sum_buffer, NUM_STEPS, sum, "sum"); printf (ANSI_COLOR_CYAN "\nAproximación de PI: %.10lf\n\n" ANSI_COLOR_RESET, sum[0] / NUM_STEPS); return 0; }
void PackBvmlinuzI386::pack(OutputFile *fo) { readKernel(); // prepare filter Filter ft(ph.level); ft.buf_len = (filter_len ? filter_len : (ph.u_len * 3)/5); // May 2008: 3/5 is heuristic to cover most .text but avoid non-instructions. // Otherwise "call trick" filter cannot find a free marker byte, // especially when it searches over tables of data. ft.addvalue = 0; // The destination buffer might be relocated at runtime. upx_compress_config_t cconf; cconf.reset(); // LINUZ001 allows most of low memory as stack for Bvmlinuz cconf.conf_lzma.max_num_probs = (0x90000 - 0x10000)>>1; // ushort: 512 KiB stack compressWithFilters(&ft, 512, &cconf, getStrategy(ft)); // align everything to dword boundary - it is easier to handle unsigned c_len = ph.c_len; memset(obuf + c_len, 0, 4); c_len = ALIGN_UP(c_len, 4u); const unsigned lsize = getLoaderSize(); if (M_IS_LZMA(ph.method)) { const lzma_compress_result_t *res = &ph.compress_result.result_lzma; upx_uint32_t properties = // lc, lp, pb, dummy (res->lit_context_bits << 0) | (res->lit_pos_bits << 8) | (res->pos_bits << 16); if (linker->bele->isBE()) // big endian - bswap32 acc_swab32s(&properties); linker->defineSymbol("lzma_properties", properties); // -2 for properties linker->defineSymbol("lzma_c_len", ph.c_len - 2); linker->defineSymbol("lzma_u_len", ph.u_len); unsigned const stack = getDecompressorWrkmemSize(); linker->defineSymbol("lzma_stack_adjust", 0u - stack); } const int e_len = getLoaderSectionStart("LZCUTPOI"); assert(e_len > 0); if (0==page_offset) { // not relocatable kernel const unsigned d_len4 = ALIGN_UP(lsize - e_len, 4u); const unsigned decompr_pos = ALIGN_UP(ph.u_len + ph.overlap_overhead, 16u); const unsigned copy_size = c_len + d_len4; const unsigned edi = decompr_pos + d_len4 - 4; // copy to const unsigned esi = ALIGN_UP(c_len + lsize, 4u) - 4; // copy from linker->defineSymbol("decompressor", decompr_pos - bzimage_offset + physical_start); linker->defineSymbol("src_for_decompressor", physical_start + decompr_pos - c_len); linker->defineSymbol("words_to_copy", copy_size / 4); linker->defineSymbol("copy_dest", physical_start + edi); linker->defineSymbol("copy_source", bzimage_offset + esi); } defineFilterSymbols(&ft); defineDecompressorSymbols(); if (0==page_offset) { linker->defineSymbol("original_entry", physical_start); } linker->defineSymbol("stack_offset", stack_offset_during_uncompression); relocateLoader(); MemBuffer loader(lsize); memcpy(loader, getLoader(), lsize); patchPackHeader(loader, lsize); boot_sect_t * const bs = (boot_sect_t *) ((unsigned char *) setup_buf); bs->sys_size = (ALIGN_UP(lsize + c_len, 16u) / 16); fo->write(setup_buf, setup_buf.getSize()); unsigned const e_pfx = (0==page_offset) ? 0 : getLoaderSectionStart("LINUZ110"); if (0!=page_offset) { fo->write(loader, e_pfx); } else { fo->write(loader, e_len); } fo->write(obuf, c_len); if (0!=page_offset) { fo->write(loader + e_pfx, e_len - e_pfx); } fo->write(loader + e_len, lsize - e_len); #if 0 printf("%-13s: setup : %8ld bytes\n", getName(), (long) setup_buf.getSize()); printf("%-13s: entry : %8ld bytes\n", getName(), (long) e_len); printf("%-13s: compressed : %8ld bytes\n", getName(), (long) c_len); printf("%-13s: decompressor : %8ld bytes\n", getName(), (long) (lsize - e_len)); #endif // verify verifyOverlappingDecompression(); // finally check the compression ratio if (!checkFinalCompressionRatio(fo)) throwNotCompressible(); }