void CodeGen_X86::visit(const Select *op) { if (op->condition.type().is_vector()) { // LLVM handles selects on vector conditions much better at native width Value *cond = codegen(op->condition); Value *true_val = codegen(op->true_value); Value *false_val = codegen(op->false_value); Type t = op->true_value.type(); int slice_size = 128 / t.bits(); if (slice_size < t.lanes()) { slice_size = target.natural_vector_size(t); } vector<Value *> result; for (int i = 0; i < t.lanes(); i += slice_size) { Value *st = slice_vector(true_val, i, slice_size); Value *sf = slice_vector(false_val, i, slice_size); Value *sc = slice_vector(cond, i, slice_size); Value *slice_value = builder->CreateSelect(sc, st, sf); result.push_back(slice_value); } value = concat_vectors(result); value = slice_vector(value, 0, t.lanes()); } else { CodeGen_Posix::visit(op); } }
void CodeGen_X86::visit(const GT *op) { if (op->type.is_vector()) { // Non-native vector widths get legalized poorly by llvm. We // split it up ourselves. Type t = op->a.type(); int slice_size = 128 / t.bits(); if (slice_size < t.lanes()) { slice_size = target.natural_vector_size(t); } Value *a = codegen(op->a), *b = codegen(op->b); vector<Value *> result; for (int i = 0; i < op->type.lanes(); i += slice_size) { Value *sa = slice_vector(a, i, slice_size); Value *sb = slice_vector(b, i, slice_size); Value *slice_value; if (t.is_float()) { slice_value = builder->CreateFCmpOGT(sa, sb); } else if (t.is_int()) { slice_value = builder->CreateICmpSGT(sa, sb); } else { slice_value = builder->CreateICmpUGT(sa, sb); } result.push_back(slice_value); } value = concat_vectors(result); value = slice_vector(value, 0, t.lanes()); } else { CodeGen_Posix::visit(op); } }
void CodeGen_X86::visit(const EQ *op) { Type t = op->a.type(); int bits = t.lanes() * t.bits(); if (t.lanes() == 1 || bits % 128 == 0) { // LLVM is fine for native vector widths or scalars CodeGen_Posix::visit(op); } else { // Non-native vector widths get legalized poorly by llvm. We // split it up ourselves. Value *a = codegen(op->a), *b = codegen(op->b); int slice_size = 128 / t.bits(); if (target.has_feature(Target::AVX) && bits > 128) { slice_size = 256 / t.bits(); } vector<Value *> result; for (int i = 0; i < op->type.lanes(); i += slice_size) { Value *sa = slice_vector(a, i, slice_size); Value *sb = slice_vector(b, i, slice_size); Value *slice_value; if (t.is_float()) { slice_value = builder->CreateFCmpOEQ(sa, sb); } else { slice_value = builder->CreateICmpEQ(sa, sb); } result.push_back(slice_value); } value = concat_vectors(result); value = slice_vector(value, 0, t.lanes()); } }