void xor_range(int xor_type, unsigned long start, unsigned long bytes) { unsigned long i; // sanity check test (catch problems with test specification or test harness) assert(start >= 0); assert(start + bytes <= SIZE_IN_BYTES); assert((xor_type == NONE) || (xor_type == BYTE_XOR) || (xor_type == WORD_XOR)); if (bytes == 0) return; if (xor_type == NONE) return; if (xor_type == BYTE_XOR) { bytewise_xor(&dst[start],&src[start],bytes); } else { // must be WORD_XOR aligned_word_xor(&dst[start],&src[start],bytes); } }
/* * CC-MAC function WUSB1.0[6.5] * * Take a data string and produce the encrypted CBC Counter-mode MIC * * Note the names for most function arguments are made to (more or * less) match those used in the pseudo-function definition given in * WUSB1.0[6.5]. * * @tfm_cbc: CBC(AES) blkcipher handle (initialized) * * @tfm_aes: AES cipher handle (initialized) * * @mic: buffer for placing the computed MIC (Message Integrity * Code). This is exactly 8 bytes, and we expect the buffer to * be at least eight bytes in length. * * @key: 128 bit symmetric key * * @n: CCM nonce * * @a: ASCII string, 14 bytes long (I guess zero padded if needed; * we use exactly 14 bytes). * * @b: data stream to be processed; cannot be a global or const local * (will confuse the scatterlists) * * @blen: size of b... * * Still not very clear how this is done, but looks like this: we * create block B0 (as WUSB1.0[6.5] says), then we AES-crypt it with * @key. We bytewise xor B0 with B1 (1) and AES-crypt that. Then we * take the payload and divide it in blocks (16 bytes), xor them with * the previous crypto result (16 bytes) and crypt it, repeat the next * block with the output of the previous one, rinse wash (I guess this * is what AES CBC mode means...but I truly have no idea). So we use * the CBC(AES) blkcipher, that does precisely that. The IV (Initial * Vector) is 16 bytes and is set to zero, so * * See rfc3610. Linux crypto has a CBC implementation, but the * documentation is scarce, to say the least, and the example code is * so intricated that is difficult to understand how things work. Most * of this is guess work -- bite me. * * (1) Created as 6.5 says, again, using as l(a) 'Blen + 14', and * using the 14 bytes of @a to fill up * b1.{mac_header,e0,security_reserved,padding}. * * NOTE: The definition of l(a) in WUSB1.0[6.5] vs the definition of * l(m) is orthogonal, they bear no relationship, so it is not * in conflict with the parameter's relation that * WUSB1.0[6.4.2]) defines. * * NOTE: WUSB1.0[A.1]: Host Nonce is missing a nibble? (1e); fixed in * first errata released on 2005/07. * * NOTE: we need to clean IV to zero at each invocation to make sure * we start with a fresh empty Initial Vector, so that the CBC * works ok. * * NOTE: blen is not aligned to a block size, we'll pad zeros, that's * what sg[4] is for. Maybe there is a smarter way to do this. */ static int wusb_ccm_mac(struct crypto_blkcipher *tfm_cbc, struct crypto_cipher *tfm_aes, void *mic, const struct aes_ccm_nonce *n, const struct aes_ccm_label *a, const void *b, size_t blen) { int result = 0; struct blkcipher_desc desc; struct aes_ccm_b0 b0; struct aes_ccm_b1 b1; struct aes_ccm_a ax; struct scatterlist sg[4], sg_dst; void *iv, *dst_buf; size_t ivsize, dst_size; const u8 bzero[16] = { 0 }; size_t zero_padding; /* * These checks should be compile time optimized out * ensure @a fills b1's mac_header and following fields */ WARN_ON(sizeof(*a) != sizeof(b1) - sizeof(b1.la)); WARN_ON(sizeof(b0) != sizeof(struct aes_ccm_block)); WARN_ON(sizeof(b1) != sizeof(struct aes_ccm_block)); WARN_ON(sizeof(ax) != sizeof(struct aes_ccm_block)); result = -ENOMEM; zero_padding = sizeof(struct aes_ccm_block) - blen % sizeof(struct aes_ccm_block); zero_padding = blen % sizeof(struct aes_ccm_block); if (zero_padding) zero_padding = sizeof(struct aes_ccm_block) - zero_padding; dst_size = blen + sizeof(b0) + sizeof(b1) + zero_padding; dst_buf = kzalloc(dst_size, GFP_KERNEL); if (dst_buf == NULL) { printk(KERN_ERR "E: can't alloc destination buffer\n"); goto error_dst_buf; } iv = crypto_blkcipher_crt(tfm_cbc)->iv; ivsize = crypto_blkcipher_ivsize(tfm_cbc); memset(iv, 0, ivsize); /* Setup B0 */ b0.flags = 0x59; /* Format B0 */ b0.ccm_nonce = *n; b0.lm = cpu_to_be16(0); /* WUSB1.0[6.5] sez l(m) is 0 */ /* Setup B1 * * The WUSB spec is anything but clear! WUSB1.0[6.5] * says that to initialize B1 from A with 'l(a) = blen + * 14'--after clarification, it means to use A's contents * for MAC Header, EO, sec reserved and padding. */ b1.la = cpu_to_be16(blen + 14); memcpy(&b1.mac_header, a, sizeof(*a)); sg_init_table(sg, ARRAY_SIZE(sg)); sg_set_buf(&sg[0], &b0, sizeof(b0)); sg_set_buf(&sg[1], &b1, sizeof(b1)); sg_set_buf(&sg[2], b, blen); /* 0 if well behaved :) */ sg_set_buf(&sg[3], bzero, zero_padding); sg_init_one(&sg_dst, dst_buf, dst_size); desc.tfm = tfm_cbc; desc.flags = 0; result = crypto_blkcipher_encrypt(&desc, &sg_dst, sg, dst_size); if (result < 0) { printk(KERN_ERR "E: can't compute CBC-MAC tag (MIC): %d\n", result); goto error_cbc_crypt; } /* Now we crypt the MIC Tag (*iv) with Ax -- values per WUSB1.0[6.5] * The procedure is to AES crypt the A0 block and XOR the MIC * Tag against it; we only do the first 8 bytes and place it * directly in the destination buffer. * * POS Crypto API: size is assumed to be AES's block size. * Thanks for documenting it -- tip taken from airo.c */ ax.flags = 0x01; /* as per WUSB 1.0 spec */ ax.ccm_nonce = *n; ax.counter = 0; crypto_cipher_encrypt_one(tfm_aes, (void *)&ax, (void *)&ax); bytewise_xor(mic, &ax, iv, 8); result = 8; error_cbc_crypt: kfree(dst_buf); error_dst_buf: return result; }
static int wusb_ccm_mac(struct crypto_blkcipher *tfm_cbc, struct crypto_cipher *tfm_aes, void *mic, const struct aes_ccm_nonce *n, const struct aes_ccm_label *a, const void *b, size_t blen) { int result = 0; struct blkcipher_desc desc; struct aes_ccm_b0 b0; struct aes_ccm_b1 b1; struct aes_ccm_a ax; struct scatterlist sg[4], sg_dst; void *iv, *dst_buf; size_t ivsize, dst_size; const u8 bzero[16] = { 0 }; size_t zero_padding; WARN_ON(sizeof(*a) != sizeof(b1) - sizeof(b1.la)); WARN_ON(sizeof(b0) != sizeof(struct aes_ccm_block)); WARN_ON(sizeof(b1) != sizeof(struct aes_ccm_block)); WARN_ON(sizeof(ax) != sizeof(struct aes_ccm_block)); result = -ENOMEM; zero_padding = sizeof(struct aes_ccm_block) - blen % sizeof(struct aes_ccm_block); zero_padding = blen % sizeof(struct aes_ccm_block); if (zero_padding) zero_padding = sizeof(struct aes_ccm_block) - zero_padding; dst_size = blen + sizeof(b0) + sizeof(b1) + zero_padding; dst_buf = kzalloc(dst_size, GFP_KERNEL); if (dst_buf == NULL) { printk(KERN_ERR "E: can't alloc destination buffer\n"); goto error_dst_buf; } iv = crypto_blkcipher_crt(tfm_cbc)->iv; ivsize = crypto_blkcipher_ivsize(tfm_cbc); memset(iv, 0, ivsize); b0.flags = 0x59; b0.ccm_nonce = *n; b0.lm = cpu_to_be16(0); b1.la = cpu_to_be16(blen + 14); memcpy(&b1.mac_header, a, sizeof(*a)); sg_init_table(sg, ARRAY_SIZE(sg)); sg_set_buf(&sg[0], &b0, sizeof(b0)); sg_set_buf(&sg[1], &b1, sizeof(b1)); sg_set_buf(&sg[2], b, blen); sg_set_buf(&sg[3], bzero, zero_padding); sg_init_one(&sg_dst, dst_buf, dst_size); desc.tfm = tfm_cbc; desc.flags = 0; result = crypto_blkcipher_encrypt(&desc, &sg_dst, sg, dst_size); if (result < 0) { printk(KERN_ERR "E: can't compute CBC-MAC tag (MIC): %d\n", result); goto error_cbc_crypt; } ax.flags = 0x01; ax.ccm_nonce = *n; ax.counter = 0; crypto_cipher_encrypt_one(tfm_aes, (void *)&ax, (void *)&ax); bytewise_xor(mic, &ax, iv, 8); result = 8; error_cbc_crypt: kfree(dst_buf); error_dst_buf: return result; }
// simple benchmarks to compare bytewise_xor and aligned_word_xor performance void do_benchmarks(void) { // we need better than the 1s granularity provided by time() struct timespec start_time, end_time; long long delta_ns; int retval; long long ns_per_test = 1000000000ll; // 1 second int lengths[] = {1, 4, 8, 16, 32, SIZE_IN_BYTES - sizeof(native_register_t)}; unsigned long byte_runs[6] = { 0,0,0,0,0,0}; unsigned long word_runs[6] = { 0,0,0,0,0,0}; int i,j,offset; int batch_size = 250; printf("Running benchmarks with batch size of %d\n", batch_size); printf("Each test takes %.2fs to run\n", ((float)ns_per_test) / 1000000000.0); printf("Lower scores below are better\n\n"); for (offset = sizeof(native_register_t) - 1; offset >= 0 ; --offset) { printf("Testing offset %d\n", offset); for (i=0; i < 6; ++i) { printf(" String size in bytes: %d\n",lengths[i]); // start clock retval = clock_gettime(CLOCK_REALTIME,&start_time); assert (retval == 0); do { // do a batch of xors for (j=0; j<batch_size; ++j) { bytewise_xor(&dst[offset],&src[offset],lengths[i]); } byte_runs[i] += batch_size; // end clock retval = clock_gettime(CLOCK_REALTIME,&end_time); assert (retval == 0); delta_ns = end_time.tv_nsec - start_time.tv_nsec; delta_ns += 1000000000ll * (end_time.tv_sec - start_time.tv_sec); } while (delta_ns < ns_per_test); printf(" bytewise= %f ns/byte\n", ((float) delta_ns / (lengths[i] * (float) byte_runs[i]))); // start clock retval = clock_gettime(CLOCK_REALTIME,&start_time); assert (retval == 0); do { // do a batch of xors for (j=0; j<batch_size; ++j) { aligned_word_xor(&dst[offset],&src[offset],lengths[i]); } word_runs[i] += batch_size; // end clock retval = clock_gettime(CLOCK_REALTIME,&end_time); assert (retval == 0); delta_ns = end_time.tv_nsec - start_time.tv_nsec; delta_ns += 1000000000 * (end_time.tv_sec - start_time.tv_sec); } while (delta_ns < ns_per_test); printf(" wordwise= %f ns/byte\n\n", ((float) delta_ns / (lengths[i] * (float) word_runs[i]))); } } }
// test_harness returns the number of tests passed int test_harness(void) { int passed = 0; test_entry_t *tp = tests; int is_required, invert, result; int rand_trials = 10000; unsigned long start,bytes; // do all the individual tests from the list while(tp->test_a != END_OF_TESTS) { is_required = 0; invert = 0; // check whether description starts with '*' or '!' while ((tp->desc)[0] == '*' || (tp->desc)[0] == '!') { if ((tp->desc)[0] == '*') { is_required = 1; } if ((tp->desc)[0] == '!') { invert ^= 1; } ++(tp->desc); } result = do_test(tp,invert); if (result) { ++passed; } else { if (is_required) { printf("Failed required test: aborting remaining tests\n"); return passed; } } ++tp; } // do random test: first xor with bytewise, then do same xor with // aligned_word_xor and check that dest array is zero after each // pair. srand(1); printf("Doing %d random XOR trials\n", rand_trials); init_dest(); while(rand_trials--) { start = rand() % SIZE_IN_BYTES; bytes = rand() % (SIZE_IN_BYTES - start); assert(start + bytes <= SIZE_IN_BYTES); bytewise_xor(&dst[start],&src[start],bytes); aligned_word_xor(&dst[start],&src[start],bytes); result = test_range(IS_ZERO,0,SIZE_IN_BYTES); if (!result) { printf("Failed random trials\n"); break; } } if (rand_trials <= 0) { printf("Passed random trials\n"); ++passed; } return passed; }