/** * Generates LLVM IR to call debug_printf. */ static LLVMValueRef lp_build_print_args(struct gallivm_state* gallivm, int argcount, LLVMValueRef* args) { LLVMBuilderRef builder = gallivm->builder; LLVMContextRef context = gallivm->context; LLVMValueRef func_printf; LLVMTypeRef printf_type; int i; assert(args); assert(argcount > 0); assert(LLVMTypeOf(args[0]) == LLVMPointerType(LLVMInt8TypeInContext(context), 0)); /* Cast any float arguments to doubles as printf expects */ for (i = 1; i < argcount; i++) { LLVMTypeRef type = LLVMTypeOf(args[i]); if (LLVMGetTypeKind(type) == LLVMFloatTypeKind) args[i] = LLVMBuildFPExt(builder, args[i], LLVMDoubleTypeInContext(context), ""); } printf_type = LLVMFunctionType(LLVMInt32TypeInContext(context), NULL, 0, 1); func_printf = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer)debug_printf)); func_printf = LLVMBuildBitCast(builder, func_printf, LLVMPointerType(printf_type, 0), "debug_printf"); return LLVMBuildCall(builder, func_printf, args, argcount, ""); }
/** * Sort the list with ``func'' comparing keys. */ void hash_list_sort(hash_list_t *hl, cmp_fn_t func) { hash_list_check(hl); g_assert(1 == hl->refcount); g_assert(NULL != func); elist_sort_with_data(&hl->list, sort_wrapper, func_to_pointer(func)); }
/** * lp_build_assert. * * Build an assertion in LLVM IR by building a function call to the * lp_assert() function above. * * \param condition should be an 'i1' or 'i32' value * \param msg a string to print if the assertion fails. */ LLVMValueRef lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition, const char *msg) { LLVMModuleRef module; LLVMTypeRef arg_types[2]; LLVMValueRef msg_string, assert_func, params[2], r; module = LLVMGetGlobalParent(LLVMGetBasicBlockParent( LLVMGetInsertBlock(builder))); msg_string = lp_build_const_string_variable(module, msg, strlen(msg) + 1); arg_types[0] = LLVMInt32Type(); arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* lookup the lp_assert function */ assert_func = LLVMGetNamedFunction(module, "lp_assert"); /* Create the assertion function if not found */ if (!assert_func) { LLVMTypeRef func_type = LLVMFunctionType(LLVMVoidType(), arg_types, 2, 0); assert_func = LLVMAddFunction(module, "lp_assert", func_type); LLVMSetFunctionCallConv(assert_func, LLVMCCallConv); LLVMSetLinkage(assert_func, LLVMExternalLinkage); LLVMAddGlobalMapping(lp_build_engine, assert_func, func_to_pointer((func_pointer)lp_assert)); } assert(assert_func); /* build function call param list */ params[0] = LLVMBuildZExt(builder, condition, arg_types[0], ""); params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], ""); /* check arg types */ assert(LLVMTypeOf(params[0]) == arg_types[0]); assert(LLVMTypeOf(params[1]) == arg_types[1]); r = LLVMBuildCall(builder, assert_func, params, 2, ""); return r; }
/** * Fetch a pixel into a 4 float AoS. * * \param format_desc describes format of the image we're fetching from * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0, 0). * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j) { LLVMBuilderRef builder = gallivm->builder; unsigned num_pixels = type.length / 4; struct lp_build_context bld; assert(type.length <= LP_MAX_VECTOR_LENGTH); assert(type.length % 4 == 0); lp_build_context_init(&bld, gallivm, type); /* * Trivial case * * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ if (format_matches_type(format_desc, type) && format_desc->block.bits <= type.width * 4 && util_is_power_of_two(format_desc->block.bits)) { LLVMValueRef packed; /* * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ packed = lp_build_gather(gallivm, type.length/4, format_desc->block.bits, type.width*4, base_ptr, offset); assert(format_desc->block.bits <= type.width * type.length); packed = LLVMBuildBitCast(gallivm->builder, packed, lp_build_vec_type(gallivm, type), ""); return lp_build_format_swizzle_aos(format_desc, &bld, packed); } /* * Bit arithmetic */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && format_desc->block.width == 1 && format_desc->block.height == 1 && util_is_power_of_two(format_desc->block.bits) && format_desc->block.bits <= 32 && format_desc->is_bitmask && !format_desc->is_mixed && (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; unsigned k; /* * Unpack a pixel at a time into a <4 x float> RGBA vector */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef packed; packed = lp_build_gather_elem(gallivm, num_pixels, format_desc->block.bits, 32, base_ptr, offset, k); tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, format_desc, packed); } /* * Type conversion. * * TODO: We could avoid floating conversion for integer to * integer conversions. */ if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { debug_printf("%s: unpacking %s with floating point\n", __FUNCTION__, format_desc->short_name); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return lp_build_format_swizzle_aos(format_desc, &bld, res); } /* * YUV / subsampled formats */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { struct lp_type tmp_type; LLVMValueRef tmp; memset(&tmp_type, 0, sizeof tmp_type); tmp_type.width = 8; tmp_type.length = num_pixels * 4; tmp_type.norm = TRUE; tmp = lp_build_fetch_subsampled_rgba_aos(gallivm, format_desc, num_pixels, base_ptr, offset, i, j); lp_build_conv(gallivm, tmp_type, type, &tmp, 1, &tmp, 1); return tmp; } /* * Fallback to util_format_description::fetch_rgba_8unorm(). */ if (format_desc->fetch_rgba_8unorm && !type.floating && type.width == 8 && !type.sign && type.norm) { /* * Fallback to calling util_format_description::fetch_rgba_8unorm. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); char name[256]; LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmp; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_8unorm(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pi8t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, i32t, ""); res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); /* * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef index = lp_build_const_int32(gallivm, k); LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmp = LLVMBuildLoad(builder, tmp_ptr, ""); if (num_pixels == 1) { res = tmp; } else { res = LLVMBuildInsertElement(builder, res, tmp, index, ""); } } /* Bitcast from <n x i32> to <4n x i8> */ res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); return res; } /* * Fallback to util_format_description::fetch_rgba_float(). */ if (format_desc->fetch_rgba_float) { /* * Fallback to calling util_format_description::fetch_rgba_float. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); char name[256]; LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_float(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pf32t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* Note: we're using this casting here instead of LLVMAddGlobalMapping() * to work around a bug in LLVM 2.6. */ /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_float)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { LLVMValueRef index = lp_build_const_int32(gallivm, k); args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return res; } assert(0); return lp_build_undef(gallivm, type); }
/** * Read value from database file, returning a pointer to the allocated * deserialized data. These data can be modified freely and stored back, * but their lifetime will not exceed that of the next call to a dbmw * operation on the same descriptor. * * User code does not need to bother with freeing the allocated data, this * is managed directly by the DBM wrapper. * * @param dw the DBM wrapper * @param key the key (constant-width, determined at open time) * @param lenptr if non-NULL, writes length of (deserialized) value * * @return pointer to value, or NULL if it was either not found or the * deserialization failed. */ G_GNUC_HOT gpointer dbmw_read(dbmw_t *dw, gconstpointer key, size_t *lenptr) { struct cached *entry; dbmap_datum_t dval; dbmw_check(dw); g_assert(key); dw->r_access++; entry = map_lookup(dw->values, key); if (entry) { dw->r_hits++; if (lenptr) *lenptr = entry->len; return entry->data; } /* * Not cached, must read from DB. */ dw->ioerr = FALSE; dval = dbmap_lookup(dw->dm, key); if (dbmap_has_ioerr(dw->dm)) { dw->ioerr = TRUE; dw->error = errno; g_warning("DBMW \"%s\" I/O error whilst reading entry: %s", dw->name, dbmap_strerror(dw->dm)); return NULL; } else if (NULL == dval.data) return NULL; /* Not found in DB */ /* * Value was found, allocate a cache entry object for it. */ WALLOC0(entry); /* * Deserialize data if needed. */ if (dw->unpack) { /* * Allocate cache entry arena to hold the deserialized version. */ entry->data = walloc(dw->value_size); entry->len = dw->value_size; bstr_reset(dw->bs, dval.data, dval.len, BSTR_F_ERROR); if (!dbmw_deserialize(dw, dw->bs, entry->data, dw->value_size)) { g_carp("DBMW \"%s\" deserialization error in %s(): %s", dw->name, stacktrace_routine_name(func_to_pointer(dw->unpack), FALSE), bstr_error(dw->bs)); /* Not calling value free routine on deserialization failures */ wfree(entry->data, dw->value_size); WFREE(entry); return NULL; } if (lenptr) *lenptr = dw->value_size; } else { g_assert(dw->value_size >= dval.len); if (dval.len) { entry->len = dval.len; entry->data = wcopy(dval.data, dval.len); } else { entry->data = NULL; entry->len = 0; } if (lenptr) *lenptr = dval.len; } g_assert((entry->len != 0) == (entry->data != NULL)); /* * Insert into cache. */ (void) allocate_entry(dw, key, entry); return entry->data; }
/** * Write back cached value to disk. * @return TRUE on success */ static gboolean write_back(dbmw_t *dw, gconstpointer key, struct cached *value) { dbmap_datum_t dval; gboolean ok; g_assert(value->dirty); if (value->absent) { /* Key not present, value is null item */ dval.data = NULL; dval.len = 0; } else { /* * Serialize value into our reused message block if a * serialization routine was provided. */ if (dw->pack) { pmsg_reset(dw->mb); (*dw->pack)(dw->mb, value->data); dval.data = pmsg_start(dw->mb); dval.len = pmsg_size(dw->mb); /* * We allocated the message block one byte larger than the * maximum size, in order to detect unexpected serialization * overflows. */ if (dval.len > dw->value_data_size) { /* Don't g_carp() as this is asynchronous wrt data change */ g_warning("DBMW \"%s\" serialization overflow in %s() " "whilst %s dirty entry", dw->name, stacktrace_routine_name(func_to_pointer(dw->pack), FALSE), value->absent ? "deleting" : "flushing"); return FALSE; } } else { dval.data = value->data; dval.len = value->len; } } /* * If cached entry is absent, delete the key. * Otherwise store the serialized value. * * Dirty bit is cleared on success. */ if (common_dbg > 4) g_debug("DBMW \"%s\" %s dirty value (%lu byte%s)", dw->name, value->absent ? "deleting" : "flushing", (unsigned long) dval.len, 1 == dval.len ? "" : "s"); dw->ioerr = FALSE; ok = value->absent ? dbmap_remove(dw->dm, key) : dbmap_insert(dw->dm, key, dval); if (ok) { value->dirty = FALSE; } else if (dbmap_has_ioerr(dw->dm)) { dw->ioerr = TRUE; dw->error = errno; g_warning("DBMW \"%s\" I/O error whilst %s dirty entry: %s", dw->name, value->absent ? "deleting" : "flushing", dbmap_strerror(dw->dm)); } else { g_warning("DBMW \"%s\" error whilst %s dirty entry: %s", dw->name, value->absent ? "deleting" : "flushing", dbmap_strerror(dw->dm)); } return ok; }
/** * Common code for dbmw_foreach_trampoline() and * dbmw_foreach_remove_trampoline(). */ static gboolean dbmw_foreach_common(gboolean removing, gpointer key, dbmap_datum_t *d, gpointer arg) { struct foreach_ctx *ctx = arg; dbmw_t *dw = ctx->dw; struct cached *entry; dbmw_check(dw); entry = map_lookup(dw->values, key); if (entry != NULL) { /* * Key / value pair is present in the cache. * * This affects us in two ways: * * - We may already know that the key was deleted, in which case * that entry is just skipped: no further access is possible * through DBMW until that key is recreated. We still return * TRUE to make sure the lower layers will delete the entry * physically, since deletion has not been flushed yet (that's * the reason we're still iterating on it). * * - Should the cached key need to be deleted (as determined by * the user callback, we make sure we delete the entry in the * cache upon callback return). */ entry->traversed = TRUE; /* Signal we iterated on cached value */ if (entry->absent) return TRUE; /* Key was already deleted, info cached */ if (removing) { gboolean status; status = (*ctx->u.cbr)(key, entry->data, entry->len, ctx->arg); if (status) { entry->removable = TRUE; /* Discard it after traversal */ } return status; } else { (*ctx->u.cb)(key, entry->data, entry->len, ctx->arg); return FALSE; } } else { gboolean status = FALSE; gpointer data = d->data; size_t len = d->len; /* * Deserialize data if needed, but do not cache this value. * Iterating over the map must not disrupt the cache. */ if (dw->unpack) { len = dw->value_size; data = walloc(len); bstr_reset(dw->bs, d->data, d->len, BSTR_F_ERROR); if (!dbmw_deserialize(dw, dw->bs, data, len)) { g_carp("DBMW \"%s\" deserialization error in %s(): %s", dw->name, stacktrace_routine_name(func_to_pointer(dw->unpack), FALSE), bstr_error(dw->bs)); /* Not calling value free routine on deserialization failures */ wfree(data, len); return FALSE; } } if (removing) { status = (*ctx->u.cbr)(key, data, len, ctx->arg); } else { (*ctx->u.cb)(key, data, len, ctx->arg); } if (dw->unpack) { if (dw->valfree) (*dw->valfree)(data, len); wfree(data, len); } return status; } }
static void update_cached_block(struct gallivm_state *gallivm, const struct util_format_description *format_desc, LLVMValueRef ptr_addr, LLVMValueRef hash_index, LLVMValueRef cache) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4); LLVMValueRef function; LLVMValueRef tag_value, tmp_ptr; LLVMValueRef col[4]; unsigned i, j; /* * Use format_desc->fetch_rgba_8unorm() for each pixel in the block. * This doesn't actually make any sense whatsoever, someone would need * to write a function doing this for all pixels in a block (either as * an external c function or with generated code). Don't ask. */ { /* * Function to call looks like: * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; assert(format_desc->fetch_rgba_8unorm); ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pi8t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, ARRAY_SIZE(arg_types), 0); /* make const pointer for the C fetch_rgba_8unorm function */ function = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, function, LLVMPointerType(function_type, 0), "cast callee"); } tmp_ptr = lp_build_array_alloca(gallivm, i32x4, lp_build_const_int32(gallivm, 16), "tmp_decode_store"); tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); /* * Invoke format_desc->fetch_rgba_8unorm() for each pixel. * This is going to be really really slow. * Note: the block store format is actually * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ... */ for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) { LLVMValueRef args[4]; LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4); /* * Note we actually supply a pointer to the start of the block, * not the start of the texture. */ args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, ""); args[1] = ptr_addr; args[2] = LLVMConstInt(i32t, i, 0); args[3] = LLVMConstInt(i32t, j, 0); LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), ""); } } /* Finally store the block - pointless mem copy + update tag. */ tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), ""); for (i = 0; i < 4; ++i) { LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i); LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, ""); col[i] = LLVMBuildLoad(builder, ptr, ""); } tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr, LLVMInt64TypeInContext(gallivm->context), ""); store_cached_block(gallivm, col, tag_value, hash_index, cache); }
/** * Main task scheduling timer. */ static gboolean bg_sched_timer(void *unused_arg) { struct bgtask * volatile bt; volatile int remain = MAX_LIFE; volatile int target; volatile unsigned schedules = 0; volatile int ticks; bgret_t ret; g_assert(current_task == NULL); g_assert(bg_runcount >= 0); (void) unused_arg; bg_ticker_adjust_period(); /* * Loop as long as there are tasks to be scheduled and we have some * time left to spend. */ while (bg_runcount > 0 && remain > 0) { /* * Compute how much time we can spend for this task. */ target = MAX_LIFE / bg_runcount; target = MIN(target, remain); bt = bg_sched_pick(); g_assert(bt != NULL); /* bg_runcount > 0 => there is a task */ g_assert(bt->flags & TASK_F_RUNNABLE); bt->flags &= ~TASK_F_NOTICK; /* We'll want tick cost update */ /* * Compute how many ticks we can ask for this processing step. * * We don't allow brutal variations of the amount of ticks larger * than DELTA_FACTOR. */ if (bt->tick_cost > 0.0) { g_assert(bt->prev_ticks >= 0); g_assert(bt->prev_ticks <= INT_MAX / DELTA_FACTOR); if (target < bt->tick_cost * (INT_MAX / DELTA_FACTOR - 1)) ticks = 1 + target / bt->tick_cost; else ticks = INT_MAX / DELTA_FACTOR; if (bt->prev_ticks) { if (ticks > bt->prev_ticks * DELTA_FACTOR) { ticks = bt->prev_ticks * DELTA_FACTOR; } else if (ticks < bt->prev_ticks / DELTA_FACTOR) { if (bt->prev_ticks > DELTA_FACTOR) ticks = bt->prev_ticks / DELTA_FACTOR; else ticks = 1; } } g_assert(ticks > 0); } else { ticks = 1; } bt->ticks = ticks; bt->ticks_used = ticks; /* * Switch to the selected task. */ bg_task_switch(bt, 0); schedules++; g_assert(current_task == bt); g_assert(bt->flags & TASK_F_RUNNING); /* * Before running the step, ensure we setjmp(), so that they * may call bg_task_exit() and immediately come back here. */ if (setjmp(bt->env)) { /* * So they exited, or someone is killing the task. */ if (bg_debug > 1) g_debug("BGTASK back from setjmp() for \"%s\"", bt->name); bt->flags |= TASK_F_NOTICK; bg_task_switch(NULL, target); if (bg_debug > 0 && remain < bt->elapsed) { g_debug("%s: remain=%d, bt->elapsed=%d", G_STRFUNC, remain, bt->elapsed); } remain -= MIN(remain, bt->elapsed); bg_task_terminate(bt); continue; } /* * Run the next step. */ if (bg_debug > 2 && 0 == bt->seqno) { g_debug("BGTASK \"%s\" starting step #%d (%s)", bt->name, bt->step, stacktrace_routine_name( func_to_pointer(bt->stepvec[bt->step]), FALSE)); } if (bg_debug > 4) { g_debug("BGTASK \"%s\" running step #%d.%d with %d tick%s", bt->name, bt->step, bt->seqno, ticks, ticks == 1 ? "" : "s"); } bg_task_deliver_signals(bt); /* Send any queued signal */ /* * If task is a daemon task, and we're starting at the first step, * process the first item in the work queue. */ if ((bt->flags & TASK_F_DAEMON) && bt->step == 0 && bt->seqno == 0) { gpointer item; g_assert(bt->wq != NULL); /* Runnable daemon, must have work */ item = bt->wq->data; if (bg_debug > 2) g_debug("BGTASK daemon \"%s\" starting with item 0x%lx", bt->name, (gulong) item); (*bt->start_cb)(bt, bt->ucontext, item); } g_assert(bt->step < bt->stepcnt); ret = (*bt->stepvec[bt->step])(bt, bt->ucontext, ticks); bg_task_switch(NULL, target); /* Stop current task, update stats */ if (bg_debug > 0 && remain < bt->elapsed) { g_debug("%s: remain=%d, bt->elapsed=%d", G_STRFUNC, remain, bt->elapsed); } remain -= MIN(remain, bt->elapsed); if (bg_debug > 4) g_debug("BGTASK \"%s\" step #%d.%d ran %d tick%s " "in %d usecs [ret=%d]", bt->name, bt->step, bt->seqno, bt->ticks_used, bt->ticks_used == 1 ? "" : "s", bt->elapsed, ret); /* * Analyse return code from processing callback. */ switch (ret) { case BGR_DONE: /* OK, end processing */ bg_task_ended(bt); break; case BGR_NEXT: /* OK, move to next step */ if (bt->step == (bt->stepcnt - 1)) bg_task_ended(bt); else { bt->seqno = 0; bt->step++; bt->tick_cost = 0.0; /* Don't know cost of this new step */ } break; case BGR_MORE: bt->seqno++; break; case BGR_ERROR: bt->exitcode = -1; /* Fake an exit(-1) */ bg_task_terminate(bt); break; } } if (dead_tasks != NULL) bg_reclaim_dead(); /* Free dead tasks */ if (bg_debug > 3 && MAX_LIFE != remain) { g_debug("BGTASK runable=%d, ran for %lu usecs, scheduling %u task%s", bg_runcount, MAX_LIFE - remain, schedules, 1 == schedules ? "" : "s"); } return TRUE; /* Keep calling */ }
/** * Converts float32 to int16 half-float * Note this can be performed in 1 instruction if vcvtps2ph exists (f16c/cvt16) * [llvm.x86.vcvtps2ph / _mm_cvtps_ph] * * @param src value to convert * * Convert float32 to half floats, preserving Infs and NaNs, * with rounding towards zero (trunc). */ LLVMValueRef lp_build_float_to_half(struct gallivm_state *gallivm, LLVMValueRef src) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef f32_vec_type = LLVMTypeOf(src); unsigned length = LLVMGetTypeKind(f32_vec_type) == LLVMVectorTypeKind ? LLVMGetVectorSize(f32_vec_type) : 1; struct lp_type i32_type = lp_type_int_vec(32, 32 * length); struct lp_type i16_type = lp_type_int_vec(16, 16 * length); LLVMValueRef result; if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 && (length == 4 || length == 8)) { struct lp_type i168_type = lp_type_int_vec(16, 16 * 8); unsigned mode = 3; /* same as LP_BUILD_ROUND_TRUNCATE */ LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); const char *intrinsic = NULL; if (length == 4) { intrinsic = "llvm.x86.vcvtps2ph.128"; } else { intrinsic = "llvm.x86.vcvtps2ph.256"; } result = lp_build_intrinsic_binary(builder, intrinsic, lp_build_vec_type(gallivm, i168_type), src, LLVMConstInt(i32t, mode, 0)); if (length == 4) { result = lp_build_extract_range(gallivm, result, 0, 4); } } else { result = lp_build_float_to_smallfloat(gallivm, i32_type, src, 10, 5, 0, true); /* Convert int32 vector to int16 vector by trunc (might generate bad code) */ result = LLVMBuildTrunc(builder, result, lp_build_vec_type(gallivm, i16_type), ""); } /* * Debugging code. */ if (0) { LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMTypeRef i16t = LLVMInt16TypeInContext(gallivm->context); LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); LLVMValueRef ref_result = LLVMGetUndef(LLVMVectorType(i16t, length)); unsigned i; LLVMTypeRef func_type = LLVMFunctionType(i16t, &f32t, 1, 0); LLVMValueRef func = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer)util_float_to_half)); func = LLVMBuildBitCast(builder, func, LLVMPointerType(func_type, 0), "util_float_to_half"); for (i = 0; i < length; ++i) { LLVMValueRef index = LLVMConstInt(i32t, i, 0); LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, ""); #if 0 /* XXX: not really supported by backends */ LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32); #else LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, ""); #endif ref_result = LLVMBuildInsertElement(builder, ref_result, f16, index, ""); } lp_build_print_value(gallivm, "src = ", src); lp_build_print_value(gallivm, "llvm = ", result); lp_build_print_value(gallivm, "util = ", ref_result); lp_build_printf(gallivm, "\n"); } return result; }