Beispiel #1
0
// system interrupt handler
extern void sys_handler(void) {
    unsigned st_status = AT91C_BASE_ST->ST_SR & AT91C_BASE_ST->ST_IMR;
    unsigned rtc_status = AT91C_BASE_RTC->RTC_SR & AT91C_BASE_RTC->RTC_IMR;
    unsigned dbgu_status = AT91C_BASE_DBGU->DBGU_CSR & AT91C_BASE_DBGU->DBGU_IMR;
    static unsigned char counter = '0';

    if (dbgu_status & AT91C_US_RXRDY) {
        // disable RXRDY interrupt in DBGU
        AT91C_BASE_DBGU->DBGU_IDR |= AT91C_US_RXRDY;
        // disable rtt interrupt flag
        AT91C_BASE_ST->ST_IDR = AT91C_ST_RTTINC;
        if (wait_status == 1) {
            wait_status = 0;
        }
        else {
            transfer_size = dbgu_xmod_recv((void *)LINUX_BASE_ADDRESS);
            run_kernel();
        }
    }

    // handler of rtt - rttinc
    if (st_status & AT91C_ST_RTTINC) {
        AT91C_BASE_PIOB->PIO_ODSR ^= AT91C_PIO_PB27;
        if (wait_status == 1) {
            put_char(counter);
            counter++;
            if (counter == '6')
                run_kernel();
            put_char(' ');
        }
        else
            put_char('C');
    }
}
Beispiel #2
0
bool
check(misc::runner const & i_runner, host::generic_program i_program)
{
    chrono::steady_clock::time_point tp = chrono::steady_clock::now();


    host::buffer<pfm::int_> bufWrite(i_runner.m_context, item_count);
    typedef host::buffer<pfm::int_>::const_iterator iterator;


    i_runner.m_queue(
        run_kernel(i_program,
                   fill_index(bufWrite),
                   item_count));
    i_runner.m_queue(
        run_kernel(i_program,
                   twice(bufWrite),
                   item_count));


    auto future =
        i_runner.m_queue(
            bufWrite.with_range(
    [](iterator i_begin, iterator i_end) {
        return std::accumulate(i_begin, i_end, 0);
    }));
    std::future_status result = future.wait_until(tp + chrono::seconds(5));
    assert(result == std::future_status::ready);
    assert(future.get() == arith(2, item_count));

    return true;
}
Beispiel #3
0
END_TEST

START_TEST (test_builtins)
{
    uint32_t rs = run_kernel(builtins_source, NormalKind);
    const char *errstr = 0;

    switch (rs)
    {
        case 1:
            errstr = "float2 cos(float2) doesn't behave correctly";
            break;
        case 2:
            errstr = "float cos(float) doesn't behave correctly";
            break;
        case 3:
            errstr = "float copysign(float) doesn't behave correctly";
            break;
        case 4:
            errstr = "float2 copysign(float2) doesn't behave correctly";
            break;
        case 5:
            errstr = "exp2() doesn't behave correctly";
            break;
        default:
            errstr = default_error(rs);
    }

    fail_if(
        errstr != 0,
        errstr
    );
}
Beispiel #4
0
END_TEST

START_TEST (test_image)
{
    uint32_t rs = run_kernel(image_source, ImageKind);
    const char *errstr = 0;

    switch (rs)
    {
        case 1:
            errstr = "Image1 must have width of 4";
            break;
        case 2:
            errstr = "Image1 must have width of 4";
            break;
        case 3:
            errstr = "Image2 must have type SIGNED_FLOAT16";
            break;
        case 4:
            errstr = "Image2 must have channel order RGBA";
            break;
        case 5:
            errstr = "The value read from the image is not good";
            break;
        default:
            errstr = default_error(rs);
    }

    fail_if(
        errstr != 0,
        errstr
    );
}
Beispiel #5
0
void process_cfm_by_gpu(unsigned char *pDataDst, int nDstWidth, int nDstHeight, short *pSrcData, int nSrcWidth, int nSrcHeight) {

    int i,j;
    int m,n;

#ifdef OPENCL_MU1

    LOGD("MU1 ---------------------- input start");
    set_input_i_to_kernel();
    set_input_o_to_kernel();
    LOGD("MU1 ---------------------- run kernel");
    run_kernel();
    LOGD("MU1 ---------------------- get output");
    get_output_from_kernel();
    LOGD("MU1 ---------------------- end");

#else

    LOGD("MU1 ---------------------- start C");
    for(i=0; i<512; i++) {
        for(j=0; j<1024; j++) {
            table_o[i][j] = (table_i[i][j]+3)*(table_q[i][j]+3)*(table_i[i][j]+2)*(table_q[i][j]+2)*(table_i[i][j]+1)*(table_q[i][j]+1)*(table_i[i][j])*(table_q[i][j])*(table_i[i][j]+3)*(table_q[i][j]+3)*(table_i[i][j]+2)*(table_q[i][j]+2)*(table_i[i][j]+1)*(table_q[i][j]+1)*(table_i[i][j])*(table_q[i][j]);
            table_o[i][j] +=sin((100.0)/(table_i[i][j]+table_q[i][j]))*1000;
            table_o[i][j] += sqrt(table_i[i][j]) + sqrt(table_q[i][j]) + sqrt(table_i[i][j]+table_q[i][j]);
        }
    }
    LOGD("MU1 ---------------------- end");

#endif

}
Beispiel #6
0
END_TEST

START_TEST (test_barrier)
{
    uint32_t rs = run_kernel(barrier_source, BarrierKind);

    fail_if(
        rs != 0x40,
        default_error(rs)
    );
}
Beispiel #7
0
bool
check(misc::runner const & i_runner, host::generic_program i_program)
{
    chrono::steady_clock::time_point tp = chrono::steady_clock::now();  

    host::buffer<pfm::int_> bufWrite(i_runner.m_context, item_count);
    typedef host::buffer<pfm::int_>::const_iterator iterator;

    // kernel内で使用できる事の確認
    i_runner.m_queue(
        run_kernel(
            i_program, 
            fill_index(bufWrite), 
            item_count));


    auto future = 
        i_runner.m_queue(
            bufWrite.with_range(
                [](iterator i_begin, iterator i_end){
                    return std::accumulate(i_begin, i_end, 0);
                }));
    std::future_status result = 
        future.wait_until(tp + chrono::seconds(5));

    assert(result == std::future_status::ready);
    assert(future.get() == arith(2, item_count));

    // ホストから呼べない事の確認
    try {
        int const a = 1; //gcc-4.7.2 twice(1)と書くと内部エラー
        i_runner.m_queue(
            host::run_kernel(
                i_program,
                twice(a),
                1)
        );
        assert(false); 
    }
    catch (cl::Error err) {
        assert(err.err() == CL_INVALID_KERNEL_NAME);
    }

    return true;
}
Beispiel #8
0
static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
{
  hashconfig_t         *hashconfig         = hashcat_ctx->hashconfig;
  hashes_t             *hashes             = hashcat_ctx->hashes;
  status_ctx_t         *status_ctx         = hashcat_ctx->status_ctx;
  user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra;

  cl_int CL_err;

  int CL_rc;

  if (hashconfig->st_hash == NULL) return 0;

  // init : replace hashes with selftest hash

  device_param->kernel_params[15] = &device_param->d_st_digests_buf;
  device_param->kernel_params[17] = &device_param->d_st_salts_buf;
  device_param->kernel_params[18] = &device_param->d_st_esalts_buf;

  device_param->kernel_params_buf32[31] = 1;
  device_param->kernel_params_buf32[32] = 0;

  // password : move the known password into a fake buffer

  u32 highest_pw_len = 0;

  if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
  {
    if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
    {
      device_param->kernel_params_buf32[30] = 1;

      pw_t pw; memset (&pw, 0, sizeof (pw));

      char *pw_ptr = (char *) &pw.i;

      const size_t pw_len = strlen (hashconfig->st_pass);

      memcpy (pw_ptr, hashconfig->st_pass, pw_len);

      pw.pw_len = (u32) pw_len;

      if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER)
      {
        uppercase ((u8 *) pw_ptr, pw.pw_len);
      }

      CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);

      if (CL_err != CL_SUCCESS) return -1;
    }
    else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
    {
      device_param->kernel_params_buf32[30] = 1;
      device_param->kernel_params_buf32[33] = COMBINATOR_MODE_BASE_LEFT;

      pw_t pw; memset (&pw, 0, sizeof (pw));

      char *pw_ptr = (char *) &pw.i;

      const size_t pw_len = strlen (hashconfig->st_pass);

      memcpy (pw_ptr, hashconfig->st_pass, pw_len - 1);

      pw.pw_len = (u32) pw_len - 1;

      if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER)
      {
        uppercase ((u8 *) pw_ptr, pw.pw_len);
      }

      pw_t comb; memset (&comb, 0, sizeof (comb));

      char *comb_ptr = (char *) &comb.i;

      memcpy (comb_ptr, hashconfig->st_pass + pw_len - 1, 1);

      comb.pw_len = 1;

      if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER)
      {
        uppercase ((u8 *) comb_ptr, comb.pw_len);
      }

      if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)
      {
        comb_ptr[comb.pw_len] = 0x01;
      }

      if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
      {
        comb_ptr[comb.pw_len] = 0x80;
      }

      CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_combs_c, CL_TRUE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL);

      if (CL_err != CL_SUCCESS) return -1;

      CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);

      if (CL_err != CL_SUCCESS) return -1;
    }
    else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
    {
      device_param->kernel_params_buf32[30] = 1;

      if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE)
      {
        pw_t pw; memset (&pw, 0, sizeof (pw));

        char *pw_ptr = (char *) &pw.i;

        const size_t pw_len = strlen (hashconfig->st_pass);

        memcpy (pw_ptr, hashconfig->st_pass, pw_len);

        if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER)
        {
          uppercase ((u8 *) pw_ptr, pw_len);
        }

        pw.pw_len = (u32) pw_len;

        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);

        if (CL_err != CL_SUCCESS) return -1;
      }
      else
      {
        bf_t bf; memset (&bf, 0, sizeof (bf));

        char *bf_ptr = (char *) &bf.i;

        memcpy (bf_ptr, hashconfig->st_pass, 1);

        if (hashconfig->opts_type & OPTS_TYPE_PT_UTF16LE)
        {
          memset (bf_ptr, 0, 4);

          for (int i = 0, j = 0; i < 1; i += 1, j += 2)
          {
            bf_ptr[j + 0] = hashconfig->st_pass[i];
            bf_ptr[j + 1] = 0;
          }
        }
        else if (hashconfig->opts_type & OPTS_TYPE_PT_UTF16BE)
        {
          memset (bf_ptr, 0, 4);

          for (int i = 0, j = 0; i < 1; i += 1, j += 2)
          {
            bf_ptr[j + 0] = 0;
            bf_ptr[j + 1] = hashconfig->st_pass[i];
          }
        }

        if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER)
        {
          uppercase ((u8 *) bf_ptr, 4);
        }

        if (hashconfig->opts_type & OPTS_TYPE_PT_GENERATE_BE)
        {
          bf.i = byte_swap_32 (bf.i);
        }

        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_bfs_c, CL_TRUE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL);

        if (CL_err != CL_SUCCESS) return -1;

        pw_t pw; memset (&pw, 0, sizeof (pw));

        char *pw_ptr = (char *) &pw.i;

        const size_t pw_len = strlen (hashconfig->st_pass);

        memcpy (pw_ptr + 1, hashconfig->st_pass + 1, pw_len - 1);

        size_t new_pass_len = pw_len;

        if (hashconfig->opts_type & OPTS_TYPE_PT_UTF16LE)
        {
          memset (pw_ptr, 0, pw_len);

          for (size_t i = 1, j = 2; i < new_pass_len; i += 1, j += 2)
          {
            pw_ptr[j + 0] = hashconfig->st_pass[i];
            pw_ptr[j + 1] = 0;
          }

          new_pass_len *= 2;
        }
        else if (hashconfig->opts_type & OPTS_TYPE_PT_UTF16BE)
        {
          memset (pw_ptr, 0, pw_len);

          for (size_t i = 1, j = 2; i < new_pass_len; i += 1, j += 2)
          {
            pw_ptr[j + 0] = 0;
            pw_ptr[j + 1] = hashconfig->st_pass[i];
          }

          new_pass_len *= 2;
        }

        if (hashconfig->opts_type & OPTS_TYPE_PT_UPPER)
        {
          uppercase ((u8 *) pw_ptr, new_pass_len);
        }

        if (hashconfig->opti_type & OPTI_TYPE_SINGLE_HASH)
        {
          if (hashconfig->opti_type & OPTI_TYPE_APPENDED_SALT)
          {
            memcpy (pw_ptr + new_pass_len, (char *) hashes->st_salts_buf[0].salt_buf, 64 - new_pass_len);

            new_pass_len += hashes->st_salts_buf[0].salt_len;
          }
        }

        pw.pw_len = (u32) new_pass_len;

        if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01)
        {
          pw_ptr[new_pass_len] = 0x01;
        }

        if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80)
        {
          pw_ptr[new_pass_len] = 0x80;
        }

        if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14)
        {
          pw.i[14] = (u32) new_pass_len * 8;
          pw.i[15] = 0;
        }

        if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15)
        {
          pw.i[14] = 0;
          pw.i[15] = (u32) new_pass_len * 8;
        }

        if (hashconfig->opts_type & OPTS_TYPE_PT_GENERATE_BE)
        {
          for (int i = 0; i < 14; i++) pw.i[i] = byte_swap_32 (pw.i[i]);
        }

        CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);

        if (CL_err != CL_SUCCESS) return -1;

        highest_pw_len = pw.pw_len;
      }
    }
  }
  else
  {
    pw_t pw; memset (&pw, 0, sizeof (pw));

    char *pw_ptr = (char *) &pw.i;

    const size_t pw_len = strlen (hashconfig->st_pass);

    memcpy (pw_ptr, hashconfig->st_pass, pw_len);

    pw.pw_len = (u32) pw_len;

    CL_err = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL);

    if (CL_err != CL_SUCCESS) return -1;
  }

  // main : run the kernel

  if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
  {
    if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
    {
      if (highest_pw_len < 16)
      {
        CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_1, 1, false, 0);

        if (CL_rc == -1) return -1;
      }
      else if (highest_pw_len < 32)
      {
        CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_2, 1, false, 0);

        if (CL_rc == -1) return -1;
      }
      else
      {
        CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_3, 1, false, 0);

        if (CL_rc == -1) return -1;
      }
    }
    else
    {
      CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_4, 1, false, 0);

      if (CL_rc == -1) return -1;
    }
  }
  else
  {
    // missing handling hooks

    CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_1, 1, false, 0);

    if (CL_rc == -1) return -1;

    if (hashconfig->opts_type & OPTS_TYPE_HOOK12)
    {
      CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_12, 1, false, 0);

      if (CL_rc == -1) return -1;

      CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);

      if (CL_rc == -1) return -1;

      // do something with data

      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);

      if (CL_rc == -1) return -1;
    }

    const u32 salt_pos = 0;

    salt_t *salt_buf = &hashes->st_salts_buf[salt_pos];

    const u32 kernel_loops_fixed = hashconfig_get_kernel_loops (hashcat_ctx);

    const u32 loop_step = (kernel_loops_fixed) ? kernel_loops_fixed : 1;

    const u32 iter = salt_buf->salt_iter;

    for (u32 loop_pos = 0; loop_pos < iter; loop_pos += loop_step)
    {
      u32 loop_left = iter - loop_pos;

      loop_left = MIN (loop_left, loop_step);

      device_param->kernel_params_buf32[28] = loop_pos;
      device_param->kernel_params_buf32[29] = loop_left;

      CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_2, 1, false, 0);

      if (CL_rc == -1) return -1;
    }

    if (hashconfig->opts_type & OPTS_TYPE_HOOK23)
    {
      CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_23, 1, false, 0);

      if (CL_rc == -1) return -1;

      CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);

      if (CL_rc == -1) return -1;

      /*
       * The following section depends on the hash mode
       */

      switch (hashconfig->hash_mode)
      {
        // for 7z we only need device_param->hooks_buf, but other hooks could use any info from device_param. All of them should/must update hooks_buf
        case 11600: seven_zip_hook_func (device_param, hashes->st_hook_salts_buf, 0, 1); break;
      }

      /*
       * END of hash mode specific hook operations
       */

      CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL);

      if (CL_rc == -1) return -1;
    }

    if (hashconfig->opts_type & OPTS_TYPE_INIT2)
    {
      CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_INIT2, 1, false, 0);

      if (CL_rc == -1) return -1;
    }

    if (hashconfig->opts_type & OPTS_TYPE_LOOP2)
    {
      const u32 iter2 = salt_buf->salt_iter2;

      for (u32 loop_pos = 0; loop_pos < iter2; loop_pos += loop_step)
      {
        u32 loop_left = iter2 - loop_pos;

        loop_left = MIN (loop_left, loop_step);

        device_param->kernel_params_buf32[28] = loop_pos;
        device_param->kernel_params_buf32[29] = loop_left;

        CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_LOOP2, 1, false, 0);

        if (CL_rc == -1) return -1;
      }
    }

    if ((hashconfig->hash_mode == 2500) || (hashconfig->hash_mode == 2501))
    {
      device_param->kernel_params_buf32[28] = 0;
      device_param->kernel_params_buf32[29] = 1;

      CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_AUX1, 1, false, 0);

      if (CL_rc == -1) return -1;
    }
    else
    {
      CL_rc = run_kernel (hashcat_ctx, device_param, KERN_RUN_3, 1, false, 0);

      if (CL_rc == -1) return -1;
    }
  }

  // check : check if cracked

  u32 num_cracked;

  CL_err = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->command_queue, device_param->d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);

  if (CL_err != CL_SUCCESS) return -1;

  // finish : cleanup and restore

  device_param->kernel_params_buf32[27] = 0;
  device_param->kernel_params_buf32[28] = 0;
  device_param->kernel_params_buf32[29] = 0;
  device_param->kernel_params_buf32[30] = 0;
  device_param->kernel_params_buf32[31] = 0;
  device_param->kernel_params_buf32[32] = 0;
  device_param->kernel_params_buf32[33] = 0;
  device_param->kernel_params_buf64[34] = 0;

  device_param->kernel_params[15] = &device_param->d_digests_buf;
  device_param->kernel_params[17] = &device_param->d_salt_bufs;
  device_param->kernel_params[18] = &device_param->d_esalt_bufs;

  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_buf,       device_param->size_pws);      if (CL_rc == -1) return -1;
  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_tmps,          device_param->size_tmps);     if (CL_rc == -1) return -1;
  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_hooks,         device_param->size_hooks);    if (CL_rc == -1) return -1;
  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_plain_bufs,    device_param->size_plains);   if (CL_rc == -1) return -1;
  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_digests_shown, device_param->size_shown);    if (CL_rc == -1) return -1;
  CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_result,        device_param->size_results);  if (CL_rc == -1) return -1;

  if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT)
  {
    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_rules_c, device_param->size_rules_c);

    if (CL_rc == -1) return -1;
  }
  else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
  {
    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_combs_c, device_param->size_combs);

    if (CL_rc == -1) return -1;
  }
  else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
  {
    CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_bfs_c, device_param->size_bfs);

    if (CL_rc == -1) return -1;
  }

  // check return

  if (num_cracked == 0)
  {
    hc_thread_mutex_lock (status_ctx->mux_display);

    event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! OpenCL kernel self-test failed.", device_param->device_id + 1);

    event_log_warning (hashcat_ctx, "Your device driver installation is probably broken.");
    event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
    event_log_warning (hashcat_ctx, NULL);

    hc_thread_mutex_unlock (status_ctx->mux_display);

    return -1;
  }

  return 0;
}
Beispiel #9
0
int clPeak::runComputeDP(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
{
    float timed, gflops;
    cl_uint workPerWI;
    cl::NDRange globalSize, localSize;
    cl_double A = 1.3f;
    int iters = devInfo.computeIters;

    if(!isComputeDP)
        return 0;

    if(!devInfo.doubleSupported)
    {
        cout << NEWLINE TAB TAB "No double precision support! Skipped" << endl;
        return 0;
    }

    try
    {
        cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();

        uint globalWIs = (devInfo.numCUs) * (devInfo.computeWgsPerCU) * (devInfo.maxWGSize);
        uint t = MIN((globalWIs * sizeof(cl_double)), devInfo.maxAllocSize);
        t = roundToPowOf2(t);
        globalWIs = t / sizeof(cl_double);
        cl::Buffer outputBuf = cl::Buffer(ctx, CL_MEM_WRITE_ONLY, (globalWIs * sizeof(cl_double)));

        globalSize = globalWIs;
        localSize = devInfo.maxWGSize;

        cl::Kernel kernel_v1(prog, "compute_dp_v1");
        kernel_v1.setArg(0, outputBuf), kernel_v1.setArg(1, A);

        cl::Kernel kernel_v2(prog, "compute_dp_v2");
        kernel_v2.setArg(0, outputBuf), kernel_v2.setArg(1, A);

        cl::Kernel kernel_v4(prog, "compute_dp_v4");
        kernel_v4.setArg(0, outputBuf), kernel_v4.setArg(1, A);

        cl::Kernel kernel_v8(prog, "compute_dp_v8");
        kernel_v8.setArg(0, outputBuf), kernel_v8.setArg(1, A);

        cl::Kernel kernel_v16(prog, "compute_dp_v16");
        kernel_v16.setArg(0, outputBuf), kernel_v16.setArg(1, A);

        cout << NEWLINE TAB TAB "Double-precision compute (GFLOPS)" << endl;
        cout << setprecision(2) << fixed;

        ///////////////////////////////////////////////////////////////////////////
        // Vector width 1
        cout << TAB TAB TAB "double   : ";  cout.flush();

        workPerWI = 4096;      // Indicates flops executed per work-item

        timed = run_kernel(queue, kernel_v1, globalSize, localSize, iters);

        gflops = ((float)globalWIs * workPerWI) / timed / 1e3f;
        cout << gflops << endl;
        ///////////////////////////////////////////////////////////////////////////

        // Vector width 2
        cout << TAB TAB TAB "double2  : ";  cout.flush();

        workPerWI = 4096;

        timed = run_kernel(queue, kernel_v2, globalSize, localSize, iters);

        gflops = ((float)globalWIs * workPerWI) / timed / 1e3f;
        cout << gflops << endl;
        ///////////////////////////////////////////////////////////////////////////

        // Vector width 4
        cout << TAB TAB TAB "double4  : ";  cout.flush();

        workPerWI = 4096;

        timed = run_kernel(queue, kernel_v4, globalSize, localSize, iters);

        gflops = ((float)globalWIs * workPerWI) / timed / 1e3f;
        cout << gflops << endl;
        ///////////////////////////////////////////////////////////////////////////

        // Vector width 8
        cout << TAB TAB TAB "double8  : ";  cout.flush();
        workPerWI = 4096;

        timed = run_kernel(queue, kernel_v8, globalSize, localSize, iters);

        gflops = ((float)globalWIs * workPerWI) / timed / 1e3f;
        cout << gflops << endl;
        ///////////////////////////////////////////////////////////////////////////

        // Vector width 16
        cout << TAB TAB TAB "double16 : ";  cout.flush();

        workPerWI = 4096;

        timed = run_kernel(queue, kernel_v16, globalSize, localSize, iters);

        gflops = ((float)globalWIs * workPerWI) / timed / 1e3f;
        cout << gflops << endl;
        ///////////////////////////////////////////////////////////////////////////
    }
    catch(cl::Error error)
    {
        cerr << error.what() << "(" << error.err() << ")" << endl;
        cerr << TAB TAB TAB "Tests skipped" << endl;
        return -1;
    }

    return 0;
}
Beispiel #10
0
// main function
extern void main(void) {
    unsigned char flag, numb_pages;
    unsigned i, write_size, read_size,
             temp_six_vector, six_vector, errors, write_offset;
    unsigned boot_args[AT45_PAGE_SIZE / 4];
    // calculate temp six vector
    numb_pages = 0;
    i = AT45_PAGE_NUMB;
    while(i >>= 1)
        numb_pages++;
    temp_six_vector = (numb_pages << 13)  + (AT45_PAGE_SIZE << 17);

    // setup SYS interrupt
    aic_configure_irq(AT91C_ID_SYS, AT91C_AIC_PRIOR_LOWEST,
                      AT91C_AIC_SRCTYPE_INT_LEVEL_SENSITIVE, aic_asm_sys_handler);
    // enable SYS interrupt
    aic_enable_irq(AT91C_ID_SYS);
    // setup rtt - 1Hz clock
    AT91C_BASE_ST->ST_RTMR = 0x4000;

    upoint_r = pt_mem_area, upoint_w = (pt_mem_area + AT45DB642D_SIZE);
    put_string("Init AT45DB642D and get device information\n");

    if (!at45_init())
        put_string("Device inited and ready\n");
    else
        put_string("Error!\n");

    put_string("Press any key to load boot menu\n: ");
    // setup rtt interrupt flag
    AT91C_BASE_ST->ST_IER = AT91C_ST_RTTINC;
    // enable RXRDY interrupt in DBGU
    AT91C_BASE_DBGU->DBGU_IER |= AT91C_US_RXRDY;
    wait_status = 1;
    get_char();

    while (flag != 'q') {
        put_string("\nload (l), write(w), run kernel(r), quit(q), erase(e): ");
        flag = get_char();
        switch(flag) {
        // loading data to sdram
        case 'l':
            put_string("Please trasfer the boot file:\n");
            transfer_size = 0;
            // setup rtt interrupt flag
            AT91C_BASE_ST->ST_IER = AT91C_ST_RTTINC;
            // enable RXRDY interrupt in DBGU
            AT91C_BASE_DBGU->DBGU_IER |= AT91C_US_RXRDY;
            while(!transfer_size);
            delay(100000);
            if (transfer_size > 0) {
                put_string("Transfer complete\n");
                util_printf("Byte's sended: %x\n", transfer_size);
            }
            break;
        // writing bytes from data flash
        case 'w':
            if (transfer_size == 0) {
                put_string("Please transfer begin, write end\n");
                break;
            }
            else if (transfer_size > (AT45DB642D_SIZE)) {
                put_string("Trasfer is larger than flash size\n");
                break;
            }
            else {
                if ((unsigned)transfer_size % AT45_PAGE_SIZE)
                    write_size = ((unsigned)transfer_size / AT45_PAGE_SIZE + 1) * AT45_PAGE_SIZE;
                else
                    write_size = transfer_size;
                put_string("Write boot(b) or linux kernel(n): ");
                flag = get_char();
                util_printf("%c\n", flag);
                if (flag == 'b') {
                    write_offset = BOOT_OFFSET;
                    put_string("\nModification of Arm Interrupt Vector #6\n");
                    six_vector = (write_size / 512) + 1 + temp_six_vector;
                    util_printf("Six vector is 0x%x\n", six_vector);
                    upoint_w[5] = six_vector;
                }
                else {
                    write_offset = LINUX_OFFSET;
                    put_string("Writing args\n");

                    boot_args[0] = write_size;
                    if (!at45_write(BOOT_2_ARGS_OFFSET, boot_args, AT45_PAGE_SIZE))
                        put_string("Write success\n");
                    else
                        put_string("Error!\n");
                }
                util_printf("Write 0x%x bytes\n", write_size);

                if (!at45_write(write_offset, upoint_w, write_size))
                    put_string("Write success\n");
                else
                    put_string("Error!\n");
                if (!at45_read(write_offset, upoint_r, write_size)) {
                    put_string("Read success\nStart verification\n");
                    six_vector = upoint_r[5];
                    if (write_offset == BOOT_OFFSET) {
                        if ((six_vector & 0xfffff000) - temp_six_vector) {
                            util_printf("Six vector is damage, current 0x%x, original 0x%x\n",
                                        six_vector, temp_six_vector);
                            break;
                        }
                        else {
                            put_string("Six vector is correct\nStart code verification\n");
                        }
                    }
                    errors = 0;
                    for (i = 0; i < write_size / 4; i++) {
                        if (upoint_r[i] != upoint_w[i]) {
                            errors++;
                            util_printf("Addr - %x, write - %x, read - %x\n", i, upoint_w[i], upoint_r[i]);
                        }
                    }
                    put_string("Stop code verification\n");
                    if (errors)
                        put_string("Verification failed!\n");
                    else
                        put_string("Verification success!\n");
                }
            }
            break;
        // erase first page
        case 'e':
            if (!at45_read(0x0, upoint_r, 0x20)) {
                six_vector = upoint_r[5];
                read_size = (six_vector & 0xff) * 512;
                for (i = 0; i <= read_size; i += AT45_PAGE_SIZE) {
                    if (!at45_page_erase(i))
                        put_string("Erase success\n");
                    else
                        put_string("Error!\n");
                }
            }
            break;
        // run 2boot code
        case 'r':
            run_kernel();
            break;
        // exit of loop
        case 'q':
            put_string("\nQuit & Reset\n");
            AT91C_BASE_ST->ST_WDMR = 256 | AT91C_ST_RSTEN;
            AT91C_BASE_ST->ST_CR = AT91C_ST_WDRST;
            break;
        // undef
        default:
            put_string("\nUndefined command\n");
            break;
        }
    }
    // Infinity loop
    while (1) {
        // Disable pck for idle cpu mode
        AT91C_BASE_PMC->PMC_SCDR = AT91C_PMC_PCK;
    }
}
Beispiel #11
0
static void run_kernel_loop()
{
  int i;
  for(i = 0; i < 2000; ++i)
    run_kernel();
}
Beispiel #12
0
int clPeak::runComputeInteger(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
{
  float timed, gflops;
  cl_uint workPerWI;
  cl::NDRange globalSize, localSize;
  cl_int A = 4;
  uint iters = devInfo.computeIters;

  if(!isComputeInt)
    return 0;

  try
  {
    log->print(NEWLINE TAB TAB "Integer compute (GIOPS)" NEWLINE);
    log->xmlOpenTag("integer_compute");
    log->xmlAppendAttribs("unit", "gflops");

    cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();

    uint64_t globalWIs = (devInfo.numCUs) * (devInfo.computeWgsPerCU) * (devInfo.maxWGSize);
    uint64_t t = MIN((globalWIs * sizeof(cl_int)), devInfo.maxAllocSize) / sizeof(cl_int);
    globalWIs = roundToMultipleOf(t, devInfo.maxWGSize);

    cl::Buffer outputBuf = cl::Buffer(ctx, CL_MEM_WRITE_ONLY, (globalWIs * sizeof(cl_int)));

    globalSize = globalWIs;
    localSize = devInfo.maxWGSize;

    cl::Kernel kernel_v1(prog, "compute_integer_v1");
    kernel_v1.setArg(0, outputBuf), kernel_v1.setArg(1, A);

    cl::Kernel kernel_v2(prog, "compute_integer_v2");
    kernel_v2.setArg(0, outputBuf), kernel_v2.setArg(1, A);

    cl::Kernel kernel_v4(prog, "compute_integer_v4");
    kernel_v4.setArg(0, outputBuf), kernel_v4.setArg(1, A);

    cl::Kernel kernel_v8(prog, "compute_integer_v8");
    kernel_v8.setArg(0, outputBuf), kernel_v8.setArg(1, A);

    cl::Kernel kernel_v16(prog, "compute_integer_v16");
    kernel_v16.setArg(0, outputBuf), kernel_v16.setArg(1, A);

    ///////////////////////////////////////////////////////////////////////////
    // Vector width 1
    log->print(TAB TAB TAB "int   : ");

    workPerWI = 2048;      // Indicates integer operations executed per work-item

    timed = run_kernel(queue, kernel_v1, globalSize, localSize, iters);

    gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;

    log->print(gflops);     log->print(NEWLINE);
    log->xmlRecord("int", gflops);
    ///////////////////////////////////////////////////////////////////////////

    // Vector width 2
    log->print(TAB TAB TAB "int2  : ");

    workPerWI = 2048;

    timed = run_kernel(queue, kernel_v2, globalSize, localSize, iters);

    gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;

    log->print(gflops);     log->print(NEWLINE);
    log->xmlRecord("int2", gflops);
    ///////////////////////////////////////////////////////////////////////////

    // Vector width 4
    log->print(TAB TAB TAB "int4  : ");

    workPerWI = 2048;

    timed = run_kernel(queue, kernel_v4, globalSize, localSize, iters);

    gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;

    log->print(gflops);     log->print(NEWLINE);
    log->xmlRecord("int4", gflops);
    ///////////////////////////////////////////////////////////////////////////

    // Vector width 8
    log->print(TAB TAB TAB "int8  : ");

    workPerWI = 2048;

    timed = run_kernel(queue, kernel_v8, globalSize, localSize, iters);

    gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;

    log->print(gflops);     log->print(NEWLINE);
    log->xmlRecord("int8", gflops);
    ///////////////////////////////////////////////////////////////////////////

    // Vector width 16
    log->print(TAB TAB TAB "int16 : ");

    workPerWI = 2048;

    timed = run_kernel(queue, kernel_v16, globalSize, localSize, iters);

    gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;

    log->print(gflops);     log->print(NEWLINE);
    log->xmlRecord("int16", gflops);
    ///////////////////////////////////////////////////////////////////////////
    log->xmlCloseTag();     // integer_compute
  }
  catch(cl::Error &error)
  {
    stringstream ss;
    ss << error.what() << " (" << error.err() << ")" NEWLINE
       << TAB TAB TAB "Tests skipped" NEWLINE;
    log->print(ss.str());
    return -1;
  }

  return 0;
}