void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data) { register RC4_INT tmp; register int id1, id2; register RC4_INT *d; unsigned int i; d = &(key->data[0]); key->x = 0; key->y = 0; id1 = id2 = 0; #define SK_LOOP(d,n) { \ tmp=d[(n)]; \ id2 = (data[id1] + tmp + id2) & 0xff; \ if (++id1 == len) id1=0; \ d[(n)]=d[id2]; \ d[id2]=tmp; } for (i = 0; i < 256; i++) d[i] = i; for (i = 0; i < 256; i += 4) { SK_LOOP(d, i + 0); SK_LOOP(d, i + 1); SK_LOOP(d, i + 2); SK_LOOP(d, i + 3); } }
static void eay_RC4_set_key(ccrc4_ctx *skey, size_t keylen, const void *keydata) { register eay_RC4_INT tmp; register unsigned int id1,id2; register eay_RC4_INT *d; unsigned int i; /* typecast: keylen should always be small enough */ unsigned long len = (unsigned long)keylen; const unsigned char *data = keydata; eay_RC4_KEY *key=(eay_RC4_KEY *)skey; d= &(key->data[0]); for (i=0; i<256; i++) d[i]=i; key->x = 0; key->y = 0; id1=id2=0; #define SK_LOOP(n) { \ tmp=d[(n)]; \ id2 = (data[id1] + tmp + id2) & 0xff; \ if (++id1 == len) id1=0; \ d[(n)]=d[id2]; \ d[id2]=tmp; } for (i=0; i < 256; i+=4) { SK_LOOP(i+0); SK_LOOP(i+1); SK_LOOP(i+2); SK_LOOP(i+3); } }
void RC4_set_key(RC4_KEY *key, int len, const unsigned char *data) #endif { register RC4_INT tmp; register int id1,id2; register RC4_INT *d; unsigned int i; d= &(key->data[0]); key->x = 0; key->y = 0; id1=id2=0; #define SK_LOOP(d,n) { \ tmp=d[(n)]; \ id2 = (data[id1] + tmp + id2) & 0xff; \ if (++id1 == len) id1=0; \ d[(n)]=d[id2]; \ d[id2]=tmp; } #if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ defined(__INTEL__) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) if (sizeof(RC4_INT) > 1) { /* * Unlike all other x86 [and x86_64] implementations, * Intel P4 core [including EM64T] was found to perform * poorly with wider RC4_INT. Performance improvement * for IA-32 hand-coded assembler turned out to be 2.8x * if re-coded for RC4_CHAR! It's however inappropriate * to just switch to RC4_CHAR for x86[_64], as non-P4 * implementations suffer from significant performance * losses then, e.g. PIII exhibits >2x deterioration, * and so does Opteron. In order to assure optimal * all-round performance, we detect P4 at run-time by * checking upon reserved bit 20 in CPU capability * vector and set up compressed key schedule, which is * recognized by correspondingly updated assembler * module... Bit 20 is set up by OPENSSL_ia32_cpuid. * * <*****@*****.**> */ #ifdef OPENSSL_FIPS unsigned long *ia32cap_ptr = OPENSSL_ia32cap_loc(); if (ia32cap_ptr && (*ia32cap_ptr & (1<<20))) { #else if (OPENSSL_ia32cap_P & (1<<20)) { #endif unsigned char *cp=(unsigned char *)d; for (i=0;i<256;i++) cp[i]=i; for (i=0;i<256;i++) SK_LOOP(cp,i); /* mark schedule as compressed! */ d[256/sizeof(RC4_INT)]=-1; return; } } # endif #endif for (i=0; i < 256; i++) d[i]=i; for (i=0; i < 256; i+=4) { SK_LOOP(d,i+0); SK_LOOP(d,i+1); SK_LOOP(d,i+2); SK_LOOP(d,i+3); } }