static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) { uint64_t m[16]; uint64_t v[16]; int i; for( i = 0; i < 16; ++i ) m[i] = load64( block + i * sizeof( m[i] ) ); for( i = 0; i < 8; ++i ) v[i] = S->h[i]; v[ 8] = blake2b_IV[0]; v[ 9] = blake2b_IV[1]; v[10] = blake2b_IV[2]; v[11] = blake2b_IV[3]; v[12] = S->t[0] ^ blake2b_IV[4]; v[13] = S->t[1] ^ blake2b_IV[5]; v[14] = S->f[0] ^ blake2b_IV[6]; v[15] = S->f[1] ^ blake2b_IV[7]; #define G(r,i,a,b,c,d) \ do { \ a = a + b + m[blake2b_sigma[r][2*i+0]]; \ d = rotr64(d ^ a, 32); \ c = c + d; \ b = rotr64(b ^ c, 24); \ a = a + b + m[blake2b_sigma[r][2*i+1]]; \ d = rotr64(d ^ a, 16); \ c = c + d; \ b = rotr64(b ^ c, 63); \ } while(0) #define ROUND(r) \ do { \ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ G(r,2,v[ 2],v[ 6],v[10],v[14]); \ G(r,3,v[ 3],v[ 7],v[11],v[15]); \ G(r,4,v[ 0],v[ 5],v[10],v[15]); \ G(r,5,v[ 1],v[ 6],v[11],v[12]); \ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while(0) ROUND( 0 ); ROUND( 1 ); ROUND( 2 ); ROUND( 3 ); ROUND( 4 ); ROUND( 5 ); ROUND( 6 ); ROUND( 7 ); ROUND( 8 ); ROUND( 9 ); ROUND( 10 ); ROUND( 11 ); for( i = 0; i < 8; ++i ) S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; #undef G #undef ROUND return 0; }
void JIT::compileLoadVarargs(Instruction* instruction) { int thisValue = instruction[2].u.operand; int arguments = instruction[3].u.operand; int firstFreeRegister = instruction[4].u.operand; killLastResultRegister(); JumpList slowCase; JumpList end; bool canOptimize = m_codeBlock->usesArguments() && arguments == m_codeBlock->argumentsRegister() && !m_codeBlock->symbolTable()->slowArguments(); if (canOptimize) { emitGetVirtualRegister(arguments, regT0); slowCase.append(branch64(NotEqual, regT0, TrustedImm64(JSValue::encode(JSValue())))); emitGetFromCallFrameHeader32(JSStack::ArgumentCount, regT0); slowCase.append(branch32(Above, regT0, TrustedImm32(Arguments::MaxArguments + 1))); // regT0: argumentCountIncludingThis move(regT0, regT1); add32(TrustedImm32(firstFreeRegister + JSStack::CallFrameHeaderSize), regT1); lshift32(TrustedImm32(3), regT1); addPtr(callFrameRegister, regT1); // regT1: newCallFrame slowCase.append(branchPtr(Below, AbsoluteAddress(m_globalData->interpreter->stack().addressOfEnd()), regT1)); // Initialize ArgumentCount. store32(regT0, Address(regT1, JSStack::ArgumentCount * static_cast<int>(sizeof(Register)) + OBJECT_OFFSETOF(EncodedValueDescriptor, asBits.payload))); // Initialize 'this'. emitGetVirtualRegister(thisValue, regT2); store64(regT2, Address(regT1, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register)))); // Copy arguments. neg32(regT0); signExtend32ToPtr(regT0, regT0); end.append(branchAdd64(Zero, TrustedImm32(1), regT0)); // regT0: -argumentCount Label copyLoop = label(); load64(BaseIndex(callFrameRegister, regT0, TimesEight, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register))), regT2); store64(regT2, BaseIndex(regT1, regT0, TimesEight, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register)))); branchAdd64(NonZero, TrustedImm32(1), regT0).linkTo(copyLoop, this); end.append(jump()); } if (canOptimize) slowCase.link(this); JITStubCall stubCall(this, cti_op_load_varargs); stubCall.addArgument(thisValue, regT0); stubCall.addArgument(arguments, regT0); stubCall.addArgument(Imm32(firstFreeRegister)); stubCall.call(regT1); if (canOptimize) end.link(this); }
static int blake2b_compress( blake2b_state *S, const byte block[BLAKE2B_BLOCKBYTES] ) { int i; #ifdef WOLFSSL_SMALL_STACK word64* m; word64* v; m = (word64*)XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); if ( m == NULL ) return -1; v = (word64*)XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); if ( v == NULL ) { XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER); return -1; } #else word64 m[16]; word64 v[16]; #endif for( i = 0; i < 16; ++i ) m[i] = load64( block + i * sizeof( m[i] ) ); for( i = 0; i < 8; ++i ) v[i] = S->h[i]; v[ 8] = blake2b_IV[0]; v[ 9] = blake2b_IV[1]; v[10] = blake2b_IV[2]; v[11] = blake2b_IV[3]; v[12] = S->t[0] ^ blake2b_IV[4]; v[13] = S->t[1] ^ blake2b_IV[5]; v[14] = S->f[0] ^ blake2b_IV[6]; v[15] = S->f[1] ^ blake2b_IV[7]; #define G(r,i,a,b,c,d) \ do { \ a = a + b + m[blake2b_sigma[r][2*i+0]]; \ d = rotr64(d ^ a, 32); \ c = c + d; \ b = rotr64(b ^ c, 24); \ a = a + b + m[blake2b_sigma[r][2*i+1]]; \ d = rotr64(d ^ a, 16); \ c = c + d; \ b = rotr64(b ^ c, 63); \ } while(0) #define ROUND(r) \ do { \ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ G(r,2,v[ 2],v[ 6],v[10],v[14]); \ G(r,3,v[ 3],v[ 7],v[11],v[15]); \ G(r,4,v[ 0],v[ 5],v[10],v[15]); \ G(r,5,v[ 1],v[ 6],v[11],v[12]); \ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while(0) ROUND( 0 ); ROUND( 1 ); ROUND( 2 ); ROUND( 3 ); ROUND( 4 ); ROUND( 5 ); ROUND( 6 ); ROUND( 7 ); ROUND( 8 ); ROUND( 9 ); ROUND( 10 ); ROUND( 11 ); for( i = 0; i < 8; ++i ) S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; #undef G #undef ROUND #ifdef WOLFSSL_SMALL_STACK XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER); XFREE(v, NULL, DYNAMIC_TYPE_TMP_BUFFER); #endif return 0; }
void JIT::compileLoadVarargs(Instruction* instruction) { int thisValue = instruction[3].u.operand; int arguments = instruction[4].u.operand; int firstFreeRegister = instruction[5].u.operand; JumpList slowCase; JumpList end; bool canOptimize = m_codeBlock->usesArguments() && arguments == m_codeBlock->argumentsRegister().offset() && !m_codeBlock->symbolTable()->slowArguments(); if (canOptimize) { emitGetVirtualRegister(arguments, regT0); slowCase.append(branch64(NotEqual, regT0, TrustedImm64(JSValue::encode(JSValue())))); emitGetFromCallFrameHeader32(JSStack::ArgumentCount, regT0); slowCase.append(branch32(Above, regT0, TrustedImm32(Arguments::MaxArguments + 1))); // regT0: argumentCountIncludingThis move(regT0, regT1); neg64(regT1); add64(TrustedImm32(firstFreeRegister - JSStack::CallFrameHeaderSize), regT1); lshift64(TrustedImm32(3), regT1); addPtr(callFrameRegister, regT1); // regT1: newCallFrame slowCase.append(branchPtr(Above, AbsoluteAddress(m_vm->addressOfJSStackLimit()), regT1)); // Initialize ArgumentCount. store32(regT0, Address(regT1, JSStack::ArgumentCount * static_cast<int>(sizeof(Register)) + OBJECT_OFFSETOF(EncodedValueDescriptor, asBits.payload))); // Initialize 'this'. emitGetVirtualRegister(thisValue, regT2); store64(regT2, Address(regT1, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register)))); // Copy arguments. signExtend32ToPtr(regT0, regT0); end.append(branchSub64(Zero, TrustedImm32(1), regT0)); // regT0: argumentCount Label copyLoop = label(); load64(BaseIndex(callFrameRegister, regT0, TimesEight, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register))), regT2); store64(regT2, BaseIndex(regT1, regT0, TimesEight, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register)))); branchSub64(NonZero, TrustedImm32(1), regT0).linkTo(copyLoop, this); end.append(jump()); } if (canOptimize) slowCase.link(this); emitGetVirtualRegister(arguments, regT1); callOperation(operationSizeAndAllocFrameForVarargs, regT1, firstFreeRegister); emitGetVirtualRegister(thisValue, regT1); emitGetVirtualRegister(arguments, regT2); callOperation(operationLoadVarargs, returnValueGPR, regT1, regT2); move(returnValueGPR, regT1); if (canOptimize) end.link(this); }
/* Relocate a non-PLT object with addend. */ static int reloc_non_plt_obj(Obj_Entry *obj_rtld, Obj_Entry *obj, const Elf_Rela *rela, SymCache *cache, int flags, RtldLockState *lockstate) { struct fptr **fptrs; Elf_Addr *where = (Elf_Addr *) (obj->relocbase + rela->r_offset); switch (ELF_R_TYPE(rela->r_info)) { case R_IA_64_REL64LSB: /* * We handle rtld's relocations in rtld_start.S */ if (obj != obj_rtld) store64(where, load64(where) + (Elf_Addr) obj->relocbase); break; case R_IA_64_DIR64LSB: { const Elf_Sym *def; const Obj_Entry *defobj; Elf_Addr target; def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, flags, cache, lockstate); if (def == NULL) return -1; target = (def->st_shndx != SHN_UNDEF) ? (Elf_Addr)(defobj->relocbase + def->st_value) : 0; store64(where, target + rela->r_addend); break; } case R_IA_64_FPTR64LSB: { /* * We have to make sure that all @fptr references to * the same function are identical so that code can * compare function pointers. */ const Elf_Sym *def; const Obj_Entry *defobj; struct fptr *fptr = 0; Elf_Addr target, gp; int sym_index; def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, SYMLOOK_IN_PLT | flags, cache, lockstate); if (def == NULL) { /* * XXX r_debug_state is problematic and find_symdef() * returns NULL for it. This probably has something to * do with symbol versioning (r_debug_state is in the * symbol map). If we return -1 in that case we abort * relocating rtld, which typically is fatal. So, for * now just skip the symbol when we're relocating * rtld. We don't care about r_debug_state unless we * are being debugged. */ if (obj != obj_rtld) return -1; break; } if (def->st_shndx != SHN_UNDEF) { target = (Elf_Addr)(defobj->relocbase + def->st_value); gp = (Elf_Addr)defobj->pltgot; /* rtld is allowed to reference itself only */ assert(!obj->rtld || obj == defobj); fptrs = defobj->priv; if (fptrs == NULL) fptrs = alloc_fptrs((Obj_Entry *) defobj, obj->rtld); sym_index = def - defobj->symtab; /* * Find the @fptr, using fptrs as a helper. */ if (fptrs) fptr = fptrs[sym_index]; if (!fptr) { fptr = alloc_fptr(target, gp); if (fptrs) fptrs[sym_index] = fptr; } } else fptr = NULL; store64(where, (Elf_Addr)fptr); break; } case R_IA_64_IPLTLSB: { /* * Relocation typically used to populate C++ virtual function * tables. It creates a 128-bit function descriptor at the * specified memory address. */ const Elf_Sym *def; const Obj_Entry *defobj; struct fptr *fptr; Elf_Addr target, gp; def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, flags, cache, lockstate); if (def == NULL) return -1; if (def->st_shndx != SHN_UNDEF) { target = (Elf_Addr)(defobj->relocbase + def->st_value); gp = (Elf_Addr)defobj->pltgot; } else { target = 0; gp = 0; } fptr = (void*)where; store64(&fptr->target, target); store64(&fptr->gp, gp); break; } case R_IA_64_DTPMOD64LSB: { const Elf_Sym *def; const Obj_Entry *defobj; def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, flags, cache, lockstate); if (def == NULL) return -1; store64(where, defobj->tlsindex); break; } case R_IA_64_DTPREL64LSB: { const Elf_Sym *def; const Obj_Entry *defobj; def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, flags, cache, lockstate); if (def == NULL) return -1; store64(where, def->st_value + rela->r_addend); break; } case R_IA_64_TPREL64LSB: { const Elf_Sym *def; const Obj_Entry *defobj; def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, flags, cache, lockstate); if (def == NULL) return -1; /* * We lazily allocate offsets for static TLS as we * see the first relocation that references the * TLS block. This allows us to support (small * amounts of) static TLS in dynamically loaded * modules. If we run out of space, we generate an * error. */ if (!defobj->tls_done) { if (!allocate_tls_offset((Obj_Entry*) defobj)) { _rtld_error("%s: No space available for static " "Thread Local Storage", obj->path); return -1; } } store64(where, defobj->tlsoffset + def->st_value + rela->r_addend); break; } case R_IA_64_NONE: break; default: _rtld_error("%s: Unsupported relocation type %u" " in non-PLT relocations\n", obj->path, (unsigned int)ELF_R_TYPE(rela->r_info)); return -1; } return(0); }
/* Permute the state while xoring in the block of data. */ static void blake2b_compress(BLAKE2B_CTX *S, const uint8_t *blocks, size_t len) { uint64_t m[16]; uint64_t v[16]; int i; size_t increment; /* * There are two distinct usage vectors for this function: * * a) BLAKE2b_Update uses it to process complete blocks, * possibly more than one at a time; * * b) BLAK2b_Final uses it to process last block, always * single but possibly incomplete, in which case caller * pads input with zeros. */ assert(len < BLAKE2B_BLOCKBYTES || len % BLAKE2B_BLOCKBYTES == 0); /* * Since last block is always processed with separate call, * |len| not being multiple of complete blocks can be observed * only with |len| being less than BLAKE2B_BLOCKBYTES ("less" * including even zero), which is why following assignment doesn't * have to reside inside the main loop below. */ increment = len < BLAKE2B_BLOCKBYTES ? len : BLAKE2B_BLOCKBYTES; for (i = 0; i < 8; ++i) { v[i] = S->h[i]; } do { for (i = 0; i < 16; ++i) { m[i] = load64(blocks + i * sizeof(m[i])); } /* blake2b_increment_counter */ S->t[0] += increment; S->t[1] += (S->t[0] < increment); v[8] = blake2b_IV[0]; v[9] = blake2b_IV[1]; v[10] = blake2b_IV[2]; v[11] = blake2b_IV[3]; v[12] = S->t[0] ^ blake2b_IV[4]; v[13] = S->t[1] ^ blake2b_IV[5]; v[14] = S->f[0] ^ blake2b_IV[6]; v[15] = S->f[1] ^ blake2b_IV[7]; #define G(r,i,a,b,c,d) \ do { \ a = a + b + m[blake2b_sigma[r][2*i+0]]; \ d = rotr64(d ^ a, 32); \ c = c + d; \ b = rotr64(b ^ c, 24); \ a = a + b + m[blake2b_sigma[r][2*i+1]]; \ d = rotr64(d ^ a, 16); \ c = c + d; \ b = rotr64(b ^ c, 63); \ } while (0) #define ROUND(r) \ do { \ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ G(r,2,v[ 2],v[ 6],v[10],v[14]); \ G(r,3,v[ 3],v[ 7],v[11],v[15]); \ G(r,4,v[ 0],v[ 5],v[10],v[15]); \ G(r,5,v[ 1],v[ 6],v[11],v[12]); \ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while (0) #if defined(OPENSSL_SMALL_FOOTPRINT) /* 3x size reduction on x86_64, almost 7x on ARMv8, 9x on ARMv4 */ for (i = 0; i < 12; i++) { ROUND(i); } #else ROUND(0); ROUND(1); ROUND(2); ROUND(3); ROUND(4); ROUND(5); ROUND(6); ROUND(7); ROUND(8); ROUND(9); ROUND(10); ROUND(11); #endif for (i = 0; i < 8; ++i) { S->h[i] = v[i] ^= v[i + 8] ^ S->h[i]; } #undef G #undef ROUND blocks += increment; len -= increment; } while (len); }
int crypto_aead_decrypt( unsigned char *m,unsigned long long *mlen, unsigned char *nsec, const unsigned char *c,unsigned long long clen, const unsigned char *ad,unsigned long long adlen, const unsigned char *npub, const unsigned char *k ) { ICESTATE S; uint64_t Tcomp[2]; /* computed authentication tag */ uint64_t Trecv[2]; /* received authentication tag */ unsigned int frameBit; /* ciphertext cannot be shorter than the tag length */ if (clen < ICEPOLETAGLEN) { return -1; } initState128a(S, k, npub); /* secret message number is zero-length */ frameBit = 0; processIceBlockRev(S, NULL, NULL, 0, frameBit); /* process associated data blocks */ do { unsigned long long blocklen = 128; frameBit = (adlen <= blocklen ? 1 : 0); if (adlen < blocklen) { blocklen = adlen; } processIceBlock(S, ad, NULL, blocklen, frameBit); ad += blocklen; adlen -= blocklen; } while (adlen > 0); /* process ciphertext blocks to get auth tag */ *mlen = 0; clen -= ICEPOLETAGLEN; /* need to stop before auth tag*/ do { unsigned long long blocklen = 128; frameBit = (clen <= blocklen ? 0 : 1); if (clen < blocklen) { blocklen = clen; } processIceBlockRev(S, c, &m, blocklen, frameBit); c += blocklen; *mlen += blocklen; clen -= blocklen; } while (clen > 0); /* compare computed and received auth tags */ Tcomp[0] = S[0][0]; Tcomp[1] = S[1][0]; Trecv[0] = load64(c, 8); Trecv[1] = load64(c+8, 8); if (Tcomp[0] != Trecv[0] || Tcomp[1] != Trecv[1]) { *mlen = 0; return -1; } return 0; }