/* * Generate KAT */ void GenKat() { unsigned char out[128]; unsigned char zero_array[256]; unsigned char one_array[256]; unsigned t_cost = 3; memset(zero_array, 0, 256); memset(one_array, 1, 256); for (unsigned m_cost = MIN_MEMORY; m_cost <= 1000; m_cost *= 2) { for (unsigned p_len = 16; p_len <= 128; p_len += 16) { for (unsigned s_len = 8; s_len <= 128; s_len += 16) { for (unsigned thr = 1; thr <= 8; ++thr) { for (unsigned outlen = 8; outlen <= 128; outlen *= 4) { #ifdef _MEASURE uint64_t start_cycles, stop_cycles, delta; uint32_t ui2, ui3; clock_t start_time = clock(); start_cycles = __rdtscp(&ui2); #endif //Argon2_Context context(out, outlen, zero_array, p_len, one_array, s_len, NULL, 0, NULL, 0, t_cost, m_cost, thr); Argon2_Context context(out, outlen, zero_array, p_len, one_array, s_len, NULL, 0, NULL, 0, t_cost, m_cost, thr, CustomAllocateMemory, CustomFreeMemory); int result = Argon2d(&context); if (ARGON2_OK != result) { printf("Error %d: %s\n", result, ErrorMessage(result)); continue; } #ifdef _MEASURE stop_cycles = __rdtscp(&ui3); clock_t stop_time = clock(); delta = (stop_cycles - start_cycles) / (m_cost); float mcycles = (float) (stop_cycles - start_cycles) / (1 << 20); printf("Argon2d+2i: %d iterations %2.2f cpb %2.2f Mcycles\n", t_cost, (float) delta / 1024, mcycles); printf("Tag: "); for (unsigned i = 0; i < outlen; ++i) { printf("%2.2x ", ((unsigned char*) out)[i]); } printf("\n"); float run_time = ((float) stop_time - start_time) / (CLOCKS_PER_SEC); printf("%2.4f seconds\n", run_time); #endif } } } } } }
// RDTSCP _Use_decl_annotations_ static void VmmpHandleRdtscp( GuestContext *guest_context) { HYPERPLATFORM_PERFORMANCE_MEASURE_THIS_SCOPE(); unsigned int tsc_aux = 0; ULARGE_INTEGER tsc = {}; tsc.QuadPart = __rdtscp(&tsc_aux); guest_context->gp_regs->dx = tsc.HighPart; guest_context->gp_regs->ax = tsc.LowPart; guest_context->gp_regs->cx = tsc_aux; VmmpAdjustGuestInstructionPointer(guest_context->ip); }
int main () { #ifdef _MSC_VER unsigned aux = 0x00; __rdtscp(&aux); #else // _MSC_VER unsigned eax = 0x00; unsigned edx = 0x00; unsigned ecx = 0x00; __asm__ volatile ( "rdtscp\\n" : "=a" (eax), "=d" (edx), "=c" (ecx) ); #endif // _MSC_VER return 0; }
/* * Benchmarks Argon2 with salt length 16, password length 32, t_cost 3, and different threads and m_cost */ void Benchmark() { const uint32_t inlen = 32; unsigned char out[32]; unsigned char zero_array[inlen]; unsigned char one_array[256]; uint32_t outlen = 16; uint32_t saltlen = 16; uint32_t t_cost = 1; memset(zero_array, 0, inlen); memset(one_array, 1, 256); std::vector<uint32_t> thread_test = {1, 2, 4, 6, 8, 16}; for (uint32_t m_cost = (uint32_t) 1 << 10; m_cost <= (uint32_t) 1 << 22; m_cost *= 2) { for (uint32_t thread_n : thread_test) { #ifdef _MEASURE uint64_t start_cycles, stop_cycles, stop_cycles_i, stop_cycles_di, stop_cycles_ds; uint32_t ui1, ui2, ui3, ui4, ui5; clock_t start_time = clock(); start_cycles = __rdtscp(&ui1); #endif Argon2_Context context(out, outlen, zero_array, inlen, one_array, saltlen, NULL, 0, NULL, 0, t_cost, m_cost, thread_n,NULL,NULL,false,false, false); Argon2d(&context); #ifdef _MEASURE stop_cycles = __rdtscp(&ui2); #endif Argon2i(&context); #ifdef _MEASURE stop_cycles_i = __rdtscp(&ui3); #endif Argon2id(&context); #ifdef _MEASURE stop_cycles_di = __rdtscp(&ui4); #endif Argon2ds(&context); #ifdef _MEASURE stop_cycles_ds = __rdtscp(&ui5); clock_t stop_time = clock(); uint64_t delta_d = (stop_cycles - start_cycles) / (m_cost); uint64_t delta_i = (stop_cycles_i - stop_cycles) / (m_cost); uint64_t delta_id = (stop_cycles_di - stop_cycles_i) / m_cost; uint64_t delta_ds = (stop_cycles_ds - stop_cycles_di) / m_cost; float mcycles_d = (float) (stop_cycles - start_cycles) / (1 << 20); float mcycles_i = (float) (stop_cycles_i - stop_cycles) / (1 << 20); float mcycles_id = (float) (stop_cycles_di - stop_cycles_i) / (1 << 20); float mcycles_ds = (float) (stop_cycles_ds - stop_cycles_di) / (1 << 20); printf("Argon2d %d pass(es) %d Mbytes %d threads: %2.2f cpb %2.2f Mcycles \n", t_cost, m_cost >> 10, thread_n, (float) delta_d / 1024, mcycles_d); printf("Argon2i %d pass(es) %d Mbytes %d threads: %2.2f cpb %2.2f Mcycles \n", t_cost, m_cost >> 10, thread_n, (float) delta_i / 1024, mcycles_i); printf("Argon2id %d pass(es) %d Mbytes %d threads: %2.2f cpb %2.2f Mcycles \n", t_cost, m_cost >> 10, thread_n, (float) delta_id / 1024, mcycles_id); printf("Argon2ds %d pass(es) %d Mbytes %d threads: %2.2f cpb %2.2f Mcycles \n", t_cost, m_cost >> 10, thread_n, (float) delta_ds / 1024, mcycles_ds); float run_time = ((float) stop_time - start_time) / (CLOCKS_PER_SEC); printf("%2.4f seconds\n\n", run_time); #endif } } }
inline unsigned long long refClock() { unsigned int taux=0; return __rdtscp(&taux); }
inline volatile unsigned long long rdtsc() { return __rdtscp(&taux); }
inline unsigned long long rdtsc() { unsigned int i = 0; return __rdtscp(&i); }
int benchmark(unsigned long long plaintext_length, unsigned long long ad_length) { if ((plaintext_length >(1 << 31)) || (ad_length> (1 << 31))) return 1; Init(); //For generating plaintext unsigned char *key = (unsigned char*)malloc(key_bytes); unsigned char *nonce = (unsigned char*)malloc(nonce_bytes); unsigned char *ciphertext; unsigned long long ciphertext_length; unsigned long long decrypted_length; unsigned char *plaintext = (unsigned char*)malloc((size_t)plaintext_length); unsigned char *plaintext_decrypted = (unsigned char*)malloc((size_t)plaintext_length); plaintext_length = (size_t)plaintext_length; if (plaintext == NULL || plaintext_decrypted == NULL) return 1; unsigned char *associated_data = (unsigned char*)malloc((size_t)ad_length); if (associated_data == NULL) { free(plaintext); free(plaintext_decrypted); return 1; } //Plaintext initialization unsigned char StateIn[64]; memset(StateIn, 0, 64); unsigned char StateOut[64]; int counter = (int)plaintext_length; unsigned char *dest_pointer = plaintext; while (counter>0) { FPerm(StateIn, StateOut); unsigned to_copy = (counter<64) ? counter : 64; memcpy(dest_pointer, StateOut, to_copy); dest_pointer += to_copy; (*((unsigned*)StateIn))++; counter -= to_copy; } //AD initialization counter = (int) ad_length; dest_pointer = associated_data; while (counter>0) { FPerm(StateIn, StateOut); unsigned to_copy = (counter<64) ? counter : 64; memcpy(dest_pointer, StateOut, to_copy); dest_pointer += to_copy; (*((unsigned*)StateIn))++; counter -= to_copy; } //Key setting FPerm(StateIn, StateOut); memcpy(key, StateOut, key_bytes); (*((unsigned*)StateIn))++; //Nonce setting FPerm(StateIn, StateOut); memcpy(nonce, StateOut, nonce_bytes); (*((unsigned*)StateIn))++; //Ciphertext memory allocation ciphertext = (unsigned char*)malloc((size_t)(plaintext_length + tag_bytes)); if (ciphertext == NULL) { free(plaintext); free(plaintext_decrypted); free(associated_data); return 1; } uint64_t start_time, mid_time, end_time; uint32_t start_ptr, mid_ptr, end_ptr; start_time = __rdtscp(&start_ptr); #ifdef EXTRANONCE //ExtraNonce crypto_aead_encrypt_no_nonce(ciphertext, &ciphertext_length, plaintext, plaintext_length, associated_data, ad_length, NULL, nonce, key); #else crypto_aead_encrypt(ciphertext, &ciphertext_length, plaintext, plaintext_length, associated_data, ad_length, NULL, nonce, key); #endif mid_time = __rdtscp(&mid_ptr); float speed = (float)(mid_time - start_time) / (plaintext_length + ad_length); printf("PAEQ-128: %d bytes encrypted, %2.2f cpb\n", (uint32_t)(plaintext_length + ad_length), speed); mid_time = __rdtscp(&mid_ptr); int result = crypto_aead_decrypt(plaintext_decrypted, &decrypted_length, NULL, ciphertext, ciphertext_length, associated_data, ad_length, nonce, key); end_time = __rdtscp(&end_ptr); speed = (float)(end_time - mid_time) / (plaintext_length + ad_length); printf("PAEQ-128: %d bytes decrypted, %2.2f cpb\n", (uint32_t)(plaintext_length + ad_length), speed); if (decrypted_length != plaintext_length) printf("Plaintext length mismatch\n"); if (result!=0) printf("Decryption result: %d\n", result); free(ciphertext); free(plaintext_decrypted); free(associated_data); return 0; }