Пример #1
0
static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
{
  uint64_t m[16];
  uint64_t v[16];
  int      i;

  for( i = 0; i < 16; ++i )
    m[i] = load64( block + i * sizeof( m[i] ) );

  for( i = 0; i < 8; ++i )
    v[i] = S->h[i];

  v[ 8] = blake2b_IV[0];
  v[ 9] = blake2b_IV[1];
  v[10] = blake2b_IV[2];
  v[11] = blake2b_IV[3];
  v[12] = S->t[0] ^ blake2b_IV[4];
  v[13] = S->t[1] ^ blake2b_IV[5];
  v[14] = S->f[0] ^ blake2b_IV[6];
  v[15] = S->f[1] ^ blake2b_IV[7];
#define G(r,i,a,b,c,d) \
  do { \
    a = a + b + m[blake2b_sigma[r][2*i+0]]; \
    d = rotr64(d ^ a, 32); \
    c = c + d; \
    b = rotr64(b ^ c, 24); \
    a = a + b + m[blake2b_sigma[r][2*i+1]]; \
    d = rotr64(d ^ a, 16); \
    c = c + d; \
    b = rotr64(b ^ c, 63); \
  } while(0)
#define ROUND(r)  \
  do { \
    G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
    G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
    G(r,2,v[ 2],v[ 6],v[10],v[14]); \
    G(r,3,v[ 3],v[ 7],v[11],v[15]); \
    G(r,4,v[ 0],v[ 5],v[10],v[15]); \
    G(r,5,v[ 1],v[ 6],v[11],v[12]); \
    G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
    G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
  } while(0)
  ROUND( 0 );
  ROUND( 1 );
  ROUND( 2 );
  ROUND( 3 );
  ROUND( 4 );
  ROUND( 5 );
  ROUND( 6 );
  ROUND( 7 );
  ROUND( 8 );
  ROUND( 9 );
  ROUND( 10 );
  ROUND( 11 );

  for( i = 0; i < 8; ++i )
    S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];

#undef G
#undef ROUND
  return 0;
}
Пример #2
0
void JIT::compileLoadVarargs(Instruction* instruction)
{
    int thisValue = instruction[2].u.operand;
    int arguments = instruction[3].u.operand;
    int firstFreeRegister = instruction[4].u.operand;

    killLastResultRegister();

    JumpList slowCase;
    JumpList end;
    bool canOptimize = m_codeBlock->usesArguments()
        && arguments == m_codeBlock->argumentsRegister()
        && !m_codeBlock->symbolTable()->slowArguments();

    if (canOptimize) {
        emitGetVirtualRegister(arguments, regT0);
        slowCase.append(branch64(NotEqual, regT0, TrustedImm64(JSValue::encode(JSValue()))));

        emitGetFromCallFrameHeader32(JSStack::ArgumentCount, regT0);
        slowCase.append(branch32(Above, regT0, TrustedImm32(Arguments::MaxArguments + 1)));
        // regT0: argumentCountIncludingThis

        move(regT0, regT1);
        add32(TrustedImm32(firstFreeRegister + JSStack::CallFrameHeaderSize), regT1);
        lshift32(TrustedImm32(3), regT1);
        addPtr(callFrameRegister, regT1);
        // regT1: newCallFrame

        slowCase.append(branchPtr(Below, AbsoluteAddress(m_globalData->interpreter->stack().addressOfEnd()), regT1));

        // Initialize ArgumentCount.
        store32(regT0, Address(regT1, JSStack::ArgumentCount * static_cast<int>(sizeof(Register)) + OBJECT_OFFSETOF(EncodedValueDescriptor, asBits.payload)));

        // Initialize 'this'.
        emitGetVirtualRegister(thisValue, regT2);
        store64(regT2, Address(regT1, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register))));

        // Copy arguments.
        neg32(regT0);
        signExtend32ToPtr(regT0, regT0);
        end.append(branchAdd64(Zero, TrustedImm32(1), regT0));
        // regT0: -argumentCount

        Label copyLoop = label();
        load64(BaseIndex(callFrameRegister, regT0, TimesEight, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register))), regT2);
        store64(regT2, BaseIndex(regT1, regT0, TimesEight, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register))));
        branchAdd64(NonZero, TrustedImm32(1), regT0).linkTo(copyLoop, this);

        end.append(jump());
    }

    if (canOptimize)
        slowCase.link(this);

    JITStubCall stubCall(this, cti_op_load_varargs);
    stubCall.addArgument(thisValue, regT0);
    stubCall.addArgument(arguments, regT0);
    stubCall.addArgument(Imm32(firstFreeRegister));
    stubCall.call(regT1);

    if (canOptimize)
        end.link(this);
}
Пример #3
0
static int blake2b_compress( blake2b_state *S,
                             const byte block[BLAKE2B_BLOCKBYTES] )
{
  int i;

#ifdef WOLFSSL_SMALL_STACK
  word64* m;
  word64* v;

  m = (word64*)XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER);

  if ( m == NULL ) return -1;

  v = (word64*)XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER);

  if ( v == NULL )
  {
    XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER);
    return -1;
  }
#else
  word64 m[16];
  word64 v[16];
#endif

  for( i = 0; i < 16; ++i )
    m[i] = load64( block + i * sizeof( m[i] ) );

  for( i = 0; i < 8; ++i )
    v[i] = S->h[i];

  v[ 8] = blake2b_IV[0];
  v[ 9] = blake2b_IV[1];
  v[10] = blake2b_IV[2];
  v[11] = blake2b_IV[3];
  v[12] = S->t[0] ^ blake2b_IV[4];
  v[13] = S->t[1] ^ blake2b_IV[5];
  v[14] = S->f[0] ^ blake2b_IV[6];
  v[15] = S->f[1] ^ blake2b_IV[7];
#define G(r,i,a,b,c,d) \
  do { \
    a = a + b + m[blake2b_sigma[r][2*i+0]]; \
    d = rotr64(d ^ a, 32); \
    c = c + d; \
    b = rotr64(b ^ c, 24); \
    a = a + b + m[blake2b_sigma[r][2*i+1]]; \
    d = rotr64(d ^ a, 16); \
    c = c + d; \
    b = rotr64(b ^ c, 63); \
  } while(0)
#define ROUND(r)  \
  do { \
    G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
    G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
    G(r,2,v[ 2],v[ 6],v[10],v[14]); \
    G(r,3,v[ 3],v[ 7],v[11],v[15]); \
    G(r,4,v[ 0],v[ 5],v[10],v[15]); \
    G(r,5,v[ 1],v[ 6],v[11],v[12]); \
    G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
    G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
  } while(0)
  ROUND( 0 );
  ROUND( 1 );
  ROUND( 2 );
  ROUND( 3 );
  ROUND( 4 );
  ROUND( 5 );
  ROUND( 6 );
  ROUND( 7 );
  ROUND( 8 );
  ROUND( 9 );
  ROUND( 10 );
  ROUND( 11 );

  for( i = 0; i < 8; ++i )
    S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];

#undef G
#undef ROUND

#ifdef WOLFSSL_SMALL_STACK
  XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER);
  XFREE(v, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif

  return 0;
}
Пример #4
0
void JIT::compileLoadVarargs(Instruction* instruction)
{
    int thisValue = instruction[3].u.operand;
    int arguments = instruction[4].u.operand;
    int firstFreeRegister = instruction[5].u.operand;

    JumpList slowCase;
    JumpList end;
    bool canOptimize = m_codeBlock->usesArguments()
        && arguments == m_codeBlock->argumentsRegister().offset()
        && !m_codeBlock->symbolTable()->slowArguments();

    if (canOptimize) {
        emitGetVirtualRegister(arguments, regT0);
        slowCase.append(branch64(NotEqual, regT0, TrustedImm64(JSValue::encode(JSValue()))));

        emitGetFromCallFrameHeader32(JSStack::ArgumentCount, regT0);
        slowCase.append(branch32(Above, regT0, TrustedImm32(Arguments::MaxArguments + 1)));
        // regT0: argumentCountIncludingThis

        move(regT0, regT1);
        neg64(regT1);
        add64(TrustedImm32(firstFreeRegister - JSStack::CallFrameHeaderSize), regT1);
        lshift64(TrustedImm32(3), regT1);
        addPtr(callFrameRegister, regT1);
        // regT1: newCallFrame

        slowCase.append(branchPtr(Above, AbsoluteAddress(m_vm->addressOfJSStackLimit()), regT1));

        // Initialize ArgumentCount.
        store32(regT0, Address(regT1, JSStack::ArgumentCount * static_cast<int>(sizeof(Register)) + OBJECT_OFFSETOF(EncodedValueDescriptor, asBits.payload)));

        // Initialize 'this'.
        emitGetVirtualRegister(thisValue, regT2);
        store64(regT2, Address(regT1, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register))));

        // Copy arguments.
        signExtend32ToPtr(regT0, regT0);
        end.append(branchSub64(Zero, TrustedImm32(1), regT0));
        // regT0: argumentCount

        Label copyLoop = label();
        load64(BaseIndex(callFrameRegister, regT0, TimesEight, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register))), regT2);
        store64(regT2, BaseIndex(regT1, regT0, TimesEight, CallFrame::thisArgumentOffset() * static_cast<int>(sizeof(Register))));
        branchSub64(NonZero, TrustedImm32(1), regT0).linkTo(copyLoop, this);

        end.append(jump());
    }

    if (canOptimize)
        slowCase.link(this);

    emitGetVirtualRegister(arguments, regT1);
    callOperation(operationSizeAndAllocFrameForVarargs, regT1, firstFreeRegister);
    emitGetVirtualRegister(thisValue, regT1);
    emitGetVirtualRegister(arguments, regT2);
    callOperation(operationLoadVarargs, returnValueGPR, regT1, regT2);
    move(returnValueGPR, regT1);

    if (canOptimize)
        end.link(this);
}
Пример #5
0
/* Relocate a non-PLT object with addend. */
static int
reloc_non_plt_obj(Obj_Entry *obj_rtld, Obj_Entry *obj, const Elf_Rela *rela,
    SymCache *cache, int flags, RtldLockState *lockstate)
{
	struct fptr **fptrs;
	Elf_Addr *where = (Elf_Addr *) (obj->relocbase + rela->r_offset);

	switch (ELF_R_TYPE(rela->r_info)) {
	case R_IA_64_REL64LSB:
		/*
		 * We handle rtld's relocations in rtld_start.S
		 */
		if (obj != obj_rtld)
			store64(where,
				load64(where) + (Elf_Addr) obj->relocbase);
		break;

	case R_IA_64_DIR64LSB: {
		const Elf_Sym *def;
		const Obj_Entry *defobj;
		Elf_Addr target;

		def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
		    flags, cache, lockstate);
		if (def == NULL)
			return -1;

		target = (def->st_shndx != SHN_UNDEF)
		    ? (Elf_Addr)(defobj->relocbase + def->st_value) : 0;
		store64(where, target + rela->r_addend);
		break;
	}

	case R_IA_64_FPTR64LSB: {
		/*
		 * We have to make sure that all @fptr references to
		 * the same function are identical so that code can
		 * compare function pointers.
		 */
		const Elf_Sym *def;
		const Obj_Entry *defobj;
		struct fptr *fptr = 0;
		Elf_Addr target, gp;
		int sym_index;

		def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
		    SYMLOOK_IN_PLT | flags, cache, lockstate);
		if (def == NULL) {
			/*
			 * XXX r_debug_state is problematic and find_symdef()
			 * returns NULL for it. This probably has something to
			 * do with symbol versioning (r_debug_state is in the
			 * symbol map). If we return -1 in that case we abort
			 * relocating rtld, which typically is fatal. So, for
			 * now just skip the symbol when we're relocating
			 * rtld. We don't care about r_debug_state unless we
			 * are being debugged.
			 */
			if (obj != obj_rtld)
				return -1;
			break;
		}

		if (def->st_shndx != SHN_UNDEF) {
			target = (Elf_Addr)(defobj->relocbase + def->st_value);
			gp = (Elf_Addr)defobj->pltgot;

			/* rtld is allowed to reference itself only */
			assert(!obj->rtld || obj == defobj);
			fptrs = defobj->priv;
			if (fptrs == NULL)
				fptrs = alloc_fptrs((Obj_Entry *) defobj, 
				    obj->rtld);

			sym_index = def - defobj->symtab;

			/*
			 * Find the @fptr, using fptrs as a helper.
			 */
			if (fptrs)
				fptr = fptrs[sym_index];
			if (!fptr) {
				fptr = alloc_fptr(target, gp);
				if (fptrs)
					fptrs[sym_index] = fptr;
			}
		} else
			fptr = NULL;

		store64(where, (Elf_Addr)fptr);
		break;
	}

	case R_IA_64_IPLTLSB: {
		/*
		 * Relocation typically used to populate C++ virtual function
		 * tables. It creates a 128-bit function descriptor at the
		 * specified memory address.
		 */
		const Elf_Sym *def;
		const Obj_Entry *defobj;
		struct fptr *fptr;
		Elf_Addr target, gp;

		def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
		    flags, cache, lockstate);
		if (def == NULL)
			return -1;

		if (def->st_shndx != SHN_UNDEF) {
			target = (Elf_Addr)(defobj->relocbase + def->st_value);
			gp = (Elf_Addr)defobj->pltgot;
		} else {
			target = 0;
			gp = 0;
		}

		fptr = (void*)where;
		store64(&fptr->target, target);
		store64(&fptr->gp, gp);
		break;
	}

	case R_IA_64_DTPMOD64LSB: {
		const Elf_Sym *def;
		const Obj_Entry *defobj;

		def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
		    flags, cache, lockstate);
		if (def == NULL)
			return -1;

		store64(where, defobj->tlsindex);
		break;
	}

	case R_IA_64_DTPREL64LSB: {
		const Elf_Sym *def;
		const Obj_Entry *defobj;

		def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
		    flags, cache, lockstate);
		if (def == NULL)
			return -1;

		store64(where, def->st_value + rela->r_addend);
		break;
	}

	case R_IA_64_TPREL64LSB: {
		const Elf_Sym *def;
		const Obj_Entry *defobj;

		def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj,
		    flags, cache, lockstate);
		if (def == NULL)
			return -1;

		/*
		 * We lazily allocate offsets for static TLS as we
		 * see the first relocation that references the
		 * TLS block. This allows us to support (small
		 * amounts of) static TLS in dynamically loaded
		 * modules. If we run out of space, we generate an
		 * error.
		 */
		if (!defobj->tls_done) {
			if (!allocate_tls_offset((Obj_Entry*) defobj)) {
				_rtld_error("%s: No space available for static "
				    "Thread Local Storage", obj->path);
				return -1;
			}
		}

		store64(where, defobj->tlsoffset + def->st_value + rela->r_addend);
		break;
	}

	case R_IA_64_NONE:
		break;

	default:
		_rtld_error("%s: Unsupported relocation type %u"
			    " in non-PLT relocations\n", obj->path,
			    (unsigned int)ELF_R_TYPE(rela->r_info));
		return -1;
	}

	return(0);
}
Пример #6
0
/* Permute the state while xoring in the block of data. */
static void blake2b_compress(BLAKE2B_CTX *S,
                            const uint8_t *blocks,
                            size_t len)
{
    uint64_t m[16];
    uint64_t v[16];
    int i;
    size_t increment;

    /*
     * There are two distinct usage vectors for this function:
     *
     * a) BLAKE2b_Update uses it to process complete blocks,
     *    possibly more than one at a time;
     *
     * b) BLAK2b_Final uses it to process last block, always
     *    single but possibly incomplete, in which case caller
     *    pads input with zeros.
     */
    assert(len < BLAKE2B_BLOCKBYTES || len % BLAKE2B_BLOCKBYTES == 0);

    /*
     * Since last block is always processed with separate call,
     * |len| not being multiple of complete blocks can be observed
     * only with |len| being less than BLAKE2B_BLOCKBYTES ("less"
     * including even zero), which is why following assignment doesn't
     * have to reside inside the main loop below.
     */
    increment = len < BLAKE2B_BLOCKBYTES ? len : BLAKE2B_BLOCKBYTES;

    for (i = 0; i < 8; ++i) {
        v[i] = S->h[i];
    }

    do {
        for (i = 0; i < 16; ++i) {
            m[i] = load64(blocks + i * sizeof(m[i]));
        }

        /* blake2b_increment_counter */
        S->t[0] += increment;
        S->t[1] += (S->t[0] < increment);

        v[8]  = blake2b_IV[0];
        v[9]  = blake2b_IV[1];
        v[10] = blake2b_IV[2];
        v[11] = blake2b_IV[3];
        v[12] = S->t[0] ^ blake2b_IV[4];
        v[13] = S->t[1] ^ blake2b_IV[5];
        v[14] = S->f[0] ^ blake2b_IV[6];
        v[15] = S->f[1] ^ blake2b_IV[7];
#define G(r,i,a,b,c,d) \
        do { \
            a = a + b + m[blake2b_sigma[r][2*i+0]]; \
            d = rotr64(d ^ a, 32); \
            c = c + d; \
            b = rotr64(b ^ c, 24); \
            a = a + b + m[blake2b_sigma[r][2*i+1]]; \
            d = rotr64(d ^ a, 16); \
            c = c + d; \
            b = rotr64(b ^ c, 63); \
        } while (0)
#define ROUND(r)  \
        do { \
            G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
            G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
            G(r,2,v[ 2],v[ 6],v[10],v[14]); \
            G(r,3,v[ 3],v[ 7],v[11],v[15]); \
            G(r,4,v[ 0],v[ 5],v[10],v[15]); \
            G(r,5,v[ 1],v[ 6],v[11],v[12]); \
            G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
            G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
        } while (0)
#if defined(OPENSSL_SMALL_FOOTPRINT)
        /* 3x size reduction on x86_64, almost 7x on ARMv8, 9x on ARMv4 */
        for (i = 0; i < 12; i++) {
            ROUND(i);
        }
#else
        ROUND(0);
        ROUND(1);
        ROUND(2);
        ROUND(3);
        ROUND(4);
        ROUND(5);
        ROUND(6);
        ROUND(7);
        ROUND(8);
        ROUND(9);
        ROUND(10);
        ROUND(11);
#endif

        for (i = 0; i < 8; ++i) {
            S->h[i] = v[i] ^= v[i + 8] ^ S->h[i];
        }
#undef G
#undef ROUND
        blocks += increment;
        len -= increment;
    } while (len);
}
Пример #7
0
int crypto_aead_decrypt(
	unsigned char *m,unsigned long long *mlen,
	unsigned char *nsec,
	const unsigned char *c,unsigned long long clen,
	const unsigned char *ad,unsigned long long adlen,
	const unsigned char *npub,
	const unsigned char *k
)
{
    ICESTATE S;
    uint64_t Tcomp[2]; /* computed authentication tag */
    uint64_t Trecv[2]; /* received authentication tag */
    unsigned int frameBit;
    
    /* ciphertext cannot be shorter than the tag length */
    if (clen < ICEPOLETAGLEN) {
        return -1;
    }
    
    initState128a(S, k, npub);

    /* secret message number is zero-length */
    frameBit = 0;
    processIceBlockRev(S, NULL, NULL, 0, frameBit);
   
    /* process associated data blocks */
    do {
        unsigned long long blocklen = 128;
        frameBit = (adlen <= blocklen ? 1 : 0);
        if (adlen < blocklen) {
            blocklen = adlen;
        }
        processIceBlock(S, ad, NULL, blocklen, frameBit);
        ad += blocklen;
        adlen -= blocklen;
    } while (adlen > 0);
    
    /* process ciphertext blocks to get auth tag */
    *mlen = 0;
    clen -= ICEPOLETAGLEN; /* need to stop before auth tag*/
    do {
        unsigned long long blocklen = 128;
        frameBit = (clen <= blocklen ? 0 : 1);
        if (clen < blocklen) {
            blocklen = clen;
        }
        processIceBlockRev(S, c, &m, blocklen, frameBit);
        c += blocklen;
        *mlen += blocklen;
        clen -= blocklen;    
    } while (clen > 0);

    /* compare computed and received auth tags */
    Tcomp[0] = S[0][0];
    Tcomp[1] = S[1][0];
    Trecv[0] = load64(c, 8);
    Trecv[1] = load64(c+8, 8);
    if (Tcomp[0] != Trecv[0] || Tcomp[1] != Trecv[1]) {
        *mlen = 0;
        return -1;
    }
    
    return 0;
}