Ejemplo n.º 1
0
Value * IDISA_SSE_Builder::hsimd_signmask(unsigned fw, Value * a) {
    // SSE special cases using Intrinsic::x86_sse_movmsk_ps (fw=32 only)
    if (fw == 32) {
        Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse_movmsk_ps);
        Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
        Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
        if (mBitBlockWidth == 128) {
            return CreateCall(signmask_f32func, a_as_ps);
        }
    }
    else if ((fw == 64) && (mBitBlockWidth == 256)) {
        Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
        Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
        std::vector<Constant*> Idxs;
        for (unsigned i = 0; i < mBitBlockWidth/fw; i++) {
            Idxs.push_back(getInt32(2*i+1));
        }
        Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get(Idxs));
        Type * halfBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/64);
        Value * pack_as_ps = CreateBitCast(packh, halfBlock_f32type);
        Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse_movmsk_ps);
        Value * mask = CreateCall(signmask_f32func, pack_as_ps);
        return mask;
    }
    // Otherwise use default logic.
    return IDISA_Builder::hsimd_signmask(fw, a);
}
Ejemplo n.º 2
0
Value * IDISA_SSE2_Builder::hsimd_signmask(unsigned fw, Value * a) {
    // SSE2 special case using Intrinsic::x86_sse2_movmsk_pd (fw=32 only)
    if (mBitBlockWidth == 128) {
        if (fw == 64) {
            Value * signmask_f64func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_movmsk_pd);
            Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
            Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
            Value * mask = CreateCall(signmask_f64func, a_as_pd);
            return mask;
        }
        if (fw == 8) {
            Value * pmovmskb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_pmovmskb_128);
            Value * mask = CreateCall(pmovmskb_func, fwCast(8, a));
            return mask;
        }
    }
    int fieldCount = mBitBlockWidth/fw;
    if ((fieldCount > 4) && (fieldCount <= 16)) {
        Value * pmovmskb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_pmovmskb_128);
        int fieldBytes = fw/8;
        int hiByte = fieldBytes - 1;
        std::vector<Constant*> Idxs;
        for (unsigned i = 0; i < fieldCount; i++) {
            Idxs.push_back(getInt32(fieldBytes*i+hiByte));
        }
        for (unsigned i = fieldCount; i < 16; i++) {
            Idxs.push_back(getInt32(mBitBlockWidth/8));
        }
        Value * packh = CreateShuffleVector(fwCast(8, a), fwCast(8, allZeroes()), ConstantVector::get(Idxs));
        Value * mask = CreateCall(pmovmskb_func, packh);
        return mask;
    }
    // Otherwise use default SSE logic.
    return IDISA_SSE_Builder::hsimd_signmask(fw, a);
}
Ejemplo n.º 3
0
static ERL_NIF_TERM ErlangCall(ErlNifEnv *env, ERL_NIF_TERM fun, ERL_NIF_TERM args) {
  ErlCall *erlCall = CreateCall(fun, args);

  enif_mutex_lock(erlCall->mutex);
  enif_send(env, &server, erlCall->env, erlCall->msg);
  while(!erlCall->complete) {
    enif_cond_wait(erlCall->cond, erlCall->mutex);
  }
  enif_mutex_unlock(erlCall->mutex);

  ERL_NIF_TERM result = enif_make_copy(env, erlCall->result);
  DestroyCall(erlCall);

  return result;
}