Exemple #1
0
Kernel::Kernel(std::string f, KernelType readType, int useColumn)
{
  this->type = readType;
  
  this->name = "default";
  
  this->kernel = NULL;
  this->N = NULL;
  this->eigenValues = NULL;
  this->eigenVectors = NULL;
  
  this->asymmetric = false;
  this->diagonalized = false;
  this->normalized = true;
  
  this->genotypes = NULL;
  this->covariates = NULL;
  
  if(this->type == kernelGRM)
  {
    readKernel(f);
  }
  else if(this->type == kernelFromDiscreteCovariates)
  {
    createKernelFromDiscreteCovariates(f, useColumn);
  }
  else if(this->type == kernelFromContinuousCovariates)
  {
    createKernelFromContinuousCovariates(f);
  }
  else
  {
    misc.error("Error: An internal error was happened. Invalid Kernel type when creating a new kernel.", 0);
  }
}
TEST_F(Device, KernelRead)
{
    auto alloc_ptr = std::make_unique<char[]>(8192 + 1024);
    void *buf = (void *)(ALIGN((unsigned long)alloc_ptr.get(), 4096) + 1024);

    for (unsigned int heapMask : m_allHeaps) {
        SCOPED_TRACE(::testing::Message() << "heap " << heapMask);
        int map_fd = -1;
        unsigned int flags = 0;

        ASSERT_EQ(0, ion_alloc_fd(m_ionFd, 4096, 0, heapMask, flags, &map_fd));
        ASSERT_GE(map_fd, 0);

        void *ptr;
        ptr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0);
        ASSERT_TRUE(ptr != NULL);

        for (int i = 0; i < 4096; i++)
            ((char *)ptr)[i] = i;

        ((char*)buf)[4096] = 0x12;
        readKernel(map_fd, buf, 4096);
        ASSERT_EQ(((char*)buf)[4096], 0x12);

        for (int i = 0; i < 4096; i++)
            ASSERT_EQ((char)i, ((char *)buf)[i]);

        ASSERT_EQ(0, munmap(ptr, 4096));
        ASSERT_EQ(0, close(map_fd));
    }
}
void PackVmlinuzI386::pack(OutputFile *fo)
{
    readKernel();

    // prepare filter
    Filter ft(ph.level);
    ft.buf_len = ph.u_len;
    ft.addvalue = physical_start;  // saves 4 bytes in unfilter code

    // compress
    upx_compress_config_t cconf; cconf.reset();
    // limit stack size needed for runtime decompression
    cconf.conf_lzma.max_num_probs = 1846 + (768 << 4); // ushort: ~28 KiB stack
    compressWithFilters(&ft, 512, &cconf, getStrategy(ft));

    const unsigned lsize = getLoaderSize();

    defineDecompressorSymbols();
    defineFilterSymbols(&ft);
    linker->defineSymbol("src_for_decompressor", zimage_offset + lsize);
    linker->defineSymbol("original_entry", physical_start);
    linker->defineSymbol("stack_offset", stack_offset_during_uncompression);
    relocateLoader();

    MemBuffer loader(lsize);
    memcpy(loader, getLoader(), lsize);
    patchPackHeader(loader, lsize);

    boot_sect_t * const bs = (boot_sect_t *) ((unsigned char *) setup_buf);
    bs->sys_size = ALIGN_UP(lsize + ph.c_len, 16u) / 16;
    bs->payload_length = ph.c_len;

    fo->write(setup_buf, setup_buf.getSize());
    fo->write(loader, lsize);
    fo->write(obuf, ph.c_len);
#if 0
    printf("%-13s: setup        : %8ld bytes\n", getName(), (long) setup_buf.getSize());
    printf("%-13s: loader       : %8ld bytes\n", getName(), (long) lsize);
    printf("%-13s: compressed   : %8ld bytes\n", getName(), (long) ph.c_len);
#endif

    // verify
    verifyOverlappingDecompression();

    // finally check the compression ratio
    if (!checkFinalCompressionRatio(fo))
        throwNotCompressible();
}
int main (void) {
  int *a;
  cl_mem a_in;
  cl_event event;
  cl_kernel kernel;
  cl_context context;
  cl_program program;
  cl_uint devices_num;
  char *program_source;
  cl_device_id device_id;
  cl_platform_id platform_id;
  cl_command_queue command_queue;

  program_source = (char *) calloc (1000, sizeof (char));
  program_source = readKernel ();

  /* number of platforms on the system */
  platforms_number ();

  /* id of the first platform proposed by the system */
  platform_id = get_platform ();

  /* number of devices on the platform specified by platform_id */
  devices_num = devices_number (platform_id);

  /* id of the first device proposed by the system on the platform
     specified by platform_id */
  device_id = create_device (platform_id);

  /* create a context to stablish a communication channel between the
     host process and the device */
  context = create_context (device_id);

  /* create a program providing the source code */
  program = create_program (context, program_source);

  /* compile the program for the specific device architecture */
  build_program (program, device_id);

  /* create a kernel given the program */
  kernel = create_kernel (program);

  /* create a memory object, in this case this will be an array of
     integers of length specified by the LENGTH macro */
  a = create_memory_object (LENGTH, "a");

  /* create a buffer, this will be allocated on the global memory of
     the device */
  a_in = create_buffer (LENGTH, context, "a_in");

  /* assign this buffer as the only kernel argument */
  set_kernel_argument (kernel, a_in, 0, "a_in");

  /* create a command queue, here we can enqueue tasks for the device
     specified by device_id */
  command_queue = create_command_queue (context, device_id);

  /* copy the memory object allocated on the host memory into the
     buffer created on the global memory of the device */
  enqueue_write_buffer_task (command_queue, a_in, LENGTH, a, "a_in");

  /* enqueue a task to execute the kernel on the device */
  event = enqueue_kernel_execution (command_queue, kernel, LENGTH, 0, NULL);
  enqueue_kernel_execution (command_queue, kernel, LENGTH, 1, &event);

  /* copy the content of the buffer from the global memory of the
     device to the host memory */
  enqueue_read_buffer_task (command_queue, a_in, LENGTH,  a, "a_in");

  /* print the memory object with the result of the execution */
  print_memory_object (a, LENGTH, "a");

  return 0;
}
void Device::readDMA(int fd, void *buf, size_t size)
{
    ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, fd));
    struct ion_test_rw_data ion_test_rw_data = {
            .ptr = (uint64_t)buf,
            .offset = 0,
            .size = size,
            .write = 0,
    };

    ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_DMA_MAPPING, &ion_test_rw_data));
    ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, -1));
}

void Device::writeDMA(int fd, void *buf, size_t size)
{
    ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, fd));
    struct ion_test_rw_data ion_test_rw_data = {
            .ptr = (uint64_t)buf,
            .offset = 0,
            .size = size,
            .write = 1,
    };

    ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_DMA_MAPPING, &ion_test_rw_data));
    ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, -1));
}

void Device::readKernel(int fd, void *buf, size_t size)
{
    ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, fd));
    struct ion_test_rw_data ion_test_rw_data = {
            .ptr = (uint64_t)buf,
            .offset = 0,
            .size = size,
            .write = 0,
    };

    ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_KERNEL_MAPPING, &ion_test_rw_data));
    ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, -1));
}

void Device::writeKernel(int fd, void *buf, size_t size)
{
    ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, fd));
    struct ion_test_rw_data ion_test_rw_data = {
            .ptr = (uint64_t)buf,
            .offset = 0,
            .size = size,
            .write = 1,
    };

    ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_KERNEL_MAPPING, &ion_test_rw_data));
    ASSERT_EQ(0, ioctl(m_deviceFd, ION_IOC_TEST_SET_FD, -1));
}

void Device::blowCache()
{
    const size_t bigger_than_cache = 8*1024*1024;
    void *buf1 = malloc(bigger_than_cache);
    void *buf2 = malloc(bigger_than_cache);
    memset(buf1, 0xaa, bigger_than_cache);
    memcpy(buf2, buf1, bigger_than_cache);
    free(buf1);
    free(buf2);
}

void Device::dirtyCache(void *ptr, size_t size)
{
    /* try to dirty cache lines */
    for (size_t i = size-1; i > 0; i--) {
        ((volatile char *)ptr)[i];
        ((char *)ptr)[i] = i;
    }
}

TEST_F(Device, KernelReadCached)
{
    auto alloc_ptr = std::make_unique<char[]>(8192 + 1024);
    void *buf = (void *)(ALIGN((unsigned long)alloc_ptr.get(), 4096) + 1024);

    for (unsigned int heapMask : m_allHeaps) {
        SCOPED_TRACE(::testing::Message() << "heap " << heapMask);
        int map_fd = -1;
        unsigned int flags = ION_FLAG_CACHED;

        ASSERT_EQ(0, ion_alloc_fd(m_ionFd, 4096, 0, heapMask, flags, &map_fd));
        ASSERT_GE(map_fd, 0);

        void *ptr;
        ptr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0);
        ASSERT_TRUE(ptr != NULL);

        for (int i = 0; i < 4096; i++)
            ((char *)ptr)[i] = i;

        ((char*)buf)[4096] = 0x12;
        readKernel(map_fd, buf, 4096);
        ASSERT_EQ(((char*)buf)[4096], 0x12);

        for (int i = 0; i < 4096; i++)
            ASSERT_EQ((char)i, ((char *)buf)[i]);

        ASSERT_EQ(0, munmap(ptr, 4096));
        ASSERT_EQ(0, close(map_fd));
    }
}
int main (void) {
  float *sum;
  cl_kernel kernel;
  cl_mem sum_buffer;
  cl_context context;
  cl_program program;
  cl_uint devices_num;
  char *program_source;
  cl_device_id device_id;
  cl_platform_id platform_id;
  cl_command_queue command_queue;

  sum = (float *) calloc (NUM_STEPS, sizeof (float));
  program_source = (char *) calloc (1000, sizeof (char));
  program_source = readKernel ();

  /* number of platforms on the system */
  platforms_number ();

  /* id of the first platform proposed by the system */
  platform_id = get_platform ();

  /* number of devices on the platform specified by platform_id */
  devices_num = devices_number (platform_id);

  /* id of the first device proposed by the system on the platform
     specified by platform_id */
  device_id = create_device (platform_id);

  /* create a context to stablish a communication channel between the
     host process and the device */
  context = create_context (device_id);

  /* create a program providing the source code */
  program = create_program (context, program_source);

  /* compile the program for the specific device architecture */
  build_program (program, device_id);\

  /* create a kernel given the program */
  kernel = create_kernel (program);

  /* create a memory object, in this case this will be float number
     that will contain the values of the partial sums */
  sum_buffer = create_buffer (context, "sum_buffer", NUM_STEPS);

  /* assign this buffer as the only kernel argument */
  set_kernel_argument (kernel, sum_buffer, 0, "sum_buffer");

  /* create a command queue, here we can enqueue tasks for the device
     specified by device_id */
  command_queue = create_command_queue (context, device_id);

  /* enqueue a task to execute the kernel on the device */
  enqueue_kernel_execution (command_queue, kernel, NUM_STEPS);

  /* copy the content of the buffer from the global memory of the
     device to the host memory */
  enqueue_read_buffer_task (command_queue, sum_buffer, NUM_STEPS, sum, "sum");

  printf (ANSI_COLOR_CYAN "\nAproximación de PI: %.10lf\n\n" ANSI_COLOR_RESET, sum[0] / NUM_STEPS);

  return 0;
}
void PackBvmlinuzI386::pack(OutputFile *fo)
{
    readKernel();

    // prepare filter
    Filter ft(ph.level);
    ft.buf_len = (filter_len ? filter_len : (ph.u_len * 3)/5);
    // May 2008: 3/5 is heuristic to cover most .text but avoid non-instructions.
    // Otherwise "call trick" filter cannot find a free marker byte,
    // especially when it searches over tables of data.
    ft.addvalue = 0;  // The destination buffer might be relocated at runtime.

    upx_compress_config_t cconf; cconf.reset();
    // LINUZ001 allows most of low memory as stack for Bvmlinuz
    cconf.conf_lzma.max_num_probs = (0x90000 - 0x10000)>>1; // ushort: 512 KiB stack

    compressWithFilters(&ft, 512, &cconf, getStrategy(ft));

    // align everything to dword boundary - it is easier to handle
    unsigned c_len = ph.c_len;
    memset(obuf + c_len, 0, 4);
    c_len = ALIGN_UP(c_len, 4u);

    const unsigned lsize = getLoaderSize();

    if (M_IS_LZMA(ph.method)) {
        const lzma_compress_result_t *res = &ph.compress_result.result_lzma;
        upx_uint32_t properties = // lc, lp, pb, dummy
            (res->lit_context_bits << 0) |
            (res->lit_pos_bits << 8) |
            (res->pos_bits << 16);
        if (linker->bele->isBE()) // big endian - bswap32
            acc_swab32s(&properties);
        linker->defineSymbol("lzma_properties", properties);
        // -2 for properties
        linker->defineSymbol("lzma_c_len", ph.c_len - 2);
        linker->defineSymbol("lzma_u_len", ph.u_len);
        unsigned const stack = getDecompressorWrkmemSize();
        linker->defineSymbol("lzma_stack_adjust", 0u - stack);
    }

    const int e_len = getLoaderSectionStart("LZCUTPOI");
    assert(e_len > 0);

    if (0==page_offset) {  // not relocatable kernel
        const unsigned d_len4 = ALIGN_UP(lsize - e_len, 4u);
        const unsigned decompr_pos = ALIGN_UP(ph.u_len + ph.overlap_overhead, 16u);
        const unsigned copy_size = c_len + d_len4;
        const unsigned edi = decompr_pos + d_len4 - 4;          // copy to
        const unsigned esi = ALIGN_UP(c_len + lsize, 4u) - 4;   // copy from

        linker->defineSymbol("decompressor", decompr_pos - bzimage_offset + physical_start);
        linker->defineSymbol("src_for_decompressor", physical_start + decompr_pos - c_len);
        linker->defineSymbol("words_to_copy", copy_size / 4);
        linker->defineSymbol("copy_dest", physical_start + edi);
        linker->defineSymbol("copy_source", bzimage_offset + esi);
    }

    defineFilterSymbols(&ft);
    defineDecompressorSymbols();
    if (0==page_offset) {
        linker->defineSymbol("original_entry", physical_start);
    }
    linker->defineSymbol("stack_offset", stack_offset_during_uncompression);
    relocateLoader();

    MemBuffer loader(lsize);
    memcpy(loader, getLoader(), lsize);
    patchPackHeader(loader, lsize);

    boot_sect_t * const bs = (boot_sect_t *) ((unsigned char *) setup_buf);
    bs->sys_size = (ALIGN_UP(lsize + c_len, 16u) / 16);

    fo->write(setup_buf, setup_buf.getSize());

    unsigned const e_pfx = (0==page_offset) ? 0 : getLoaderSectionStart("LINUZ110");
    if (0!=page_offset) {
        fo->write(loader, e_pfx);
    }
    else {
        fo->write(loader, e_len);
    }
    fo->write(obuf, c_len);
    if (0!=page_offset) {
        fo->write(loader + e_pfx, e_len - e_pfx);
    }
    fo->write(loader + e_len, lsize - e_len);
#if 0
    printf("%-13s: setup        : %8ld bytes\n", getName(), (long) setup_buf.getSize());
    printf("%-13s: entry        : %8ld bytes\n", getName(), (long) e_len);
    printf("%-13s: compressed   : %8ld bytes\n", getName(), (long) c_len);
    printf("%-13s: decompressor : %8ld bytes\n", getName(), (long) (lsize - e_len));
#endif

    // verify
    verifyOverlappingDecompression();

    // finally check the compression ratio
    if (!checkFinalCompressionRatio(fo))
        throwNotCompressible();
}