static awk_bool_t write_array(int fd, awk_array_t array) { uint32_t i; uint32_t count; awk_flat_array_t *flat_array; if (! flatten_array(array, & flat_array)) { fprintf(stderr, _("write_array: could not flatten array\n")); return awk_false; } count = htonl(flat_array->count); if (write(fd, & count, sizeof(count)) != sizeof(count)) return awk_false; for (i = 0; i < flat_array->count; i++) { if (! write_elem(fd, & flat_array->elements[i])) return awk_false; } if (! release_flattened_array(array, flat_array)) { fprintf(stderr, _("write_array: could not release flattened array\n")); return awk_false; } return awk_true; }
void write_vec(struct lp_type type, void *dst, const double *src) { unsigned i; for (i = 0; i < type.length; ++i) write_elem(type, dst, i, src[i]); }
void random_elem(struct lp_type type, void *dst, unsigned index) { double value; assert(index < type.length); value = (double)rand()/(double)RAND_MAX; if(!type.norm) { if (type.floating) { value *= 2.0; } else { unsigned long long mask; if (type.fixed) mask = ((unsigned long long)1 << (type.width / 2)) - 1; else if (type.sign) mask = ((unsigned long long)1 << (type.width - 1)) - 1; else mask = ((unsigned long long)1 << type.width) - 1; value += (double)(mask & rand()); } } if(!type.sign) if(rand() & 1) value = -value; write_elem(type, dst, index, value); }
static int write_chain(int fd, int index, NODE *bucket) { int ret; if (bucket == NULL) return 0; ret = write_chain(fd, index, bucket->ahnext); if (ret != 0) return ret; return write_elem(fd, index, bucket); }
/* convert an element to a text string */ APU_DECLARE(void) apr_xml_to_text(apr_pool_t * p, const apr_xml_elem *elem, int style, apr_array_header_t *namespaces, int *ns_map, const char **pbuf, apr_size_t *psize) { /* get the exact size, plus a null terminator */ apr_size_t size = elem_size(elem, style, namespaces, ns_map) + 1; char *s = apr_palloc(p, size); (void) write_elem(s, elem, style, namespaces, ns_map); s[size - 1] = '\0'; *pbuf = s; if (psize) *psize = size; }
ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, struct lp_type type) { LLVMModuleRef module = NULL; LLVMValueRef func = NULL; LLVMExecutionEngineRef engine = NULL; LLVMModuleProviderRef provider = NULL; LLVMPassManagerRef pass = NULL; char *error = NULL; blend_test_ptr_t blend_test_ptr; boolean success; const unsigned n = LP_TEST_NUM_SAMPLES; int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned i, j; if(verbose >= 1) dump_blend_type(stdout, blend, mode, type); module = LLVMModuleCreateWithName("test"); func = add_blend_test(module, blend, mode, type); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); abort(); } LLVMDisposeMessage(error); provider = LLVMCreateModuleProviderForExistingModule(module); if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { if(verbose < 1) dump_blend_type(stderr, blend, mode, type); fprintf(stderr, "%s\n", error); LLVMDisposeMessage(error); abort(); } #if 0 pass = LLVMCreatePassManager(); LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ LLVMAddConstantPropagationPass(pass); LLVMAddInstructionCombiningPass(pass); LLVMAddPromoteMemoryToRegisterPass(pass); LLVMAddGVNPass(pass); LLVMAddCFGSimplificationPass(pass); LLVMRunPassManager(pass, module); #else (void)pass; #endif if(verbose >= 2) LLVMDumpModule(module); blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func); if(verbose >= 2) lp_disassemble(blend_test_ptr); success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; random_vec(type, src); random_vec(type, dst); random_vec(type, con); { double fsrc[LP_MAX_VECTOR_LENGTH]; double fdst[LP_MAX_VECTOR_LENGTH]; double fcon[LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH]; read_vec(type, src, fsrc); read_vec(type, dst, fdst); read_vec(type, con, fcon); for(j = 0; j < type.length; j += 4) compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j); write_vec(type, ref, fref); } start_counter = rdtsc(); blend_test_ptr(src, dst, con, res); end_counter = rdtsc(); cycles[i] = end_counter - start_counter; if(!compare_vec(type, res, ref)) { success = FALSE; if(verbose < 1) dump_blend_type(stderr, blend, mode, type); fprintf(stderr, "MISMATCH\n"); fprintf(stderr, " Src: "); dump_vec(stderr, type, src); fprintf(stderr, "\n"); fprintf(stderr, " Dst: "); dump_vec(stderr, type, dst); fprintf(stderr, "\n"); fprintf(stderr, " Con: "); dump_vec(stderr, type, con); fprintf(stderr, "\n"); fprintf(stderr, " Res: "); dump_vec(stderr, type, res); fprintf(stderr, "\n"); fprintf(stderr, " Ref: "); dump_vec(stderr, type, ref); fprintf(stderr, "\n"); } } if(mode == SoA) { const unsigned stride = type.length*type.width/8; ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; for(j = 0; j < 4; ++j) { random_vec(type, src + j*stride); random_vec(type, dst + j*stride); random_vec(type, con + j*stride); } { double fsrc[4]; double fdst[4]; double fcon[4]; double fref[4]; unsigned k; for(k = 0; k < type.length; ++k) { for(j = 0; j < 4; ++j) { fsrc[j] = read_elem(type, src + j*stride, k); fdst[j] = read_elem(type, dst + j*stride, k); fcon[j] = read_elem(type, con + j*stride, k); } compute_blend_ref(blend, fsrc, fdst, fcon, fref); for(j = 0; j < 4; ++j) write_elem(type, ref + j*stride, k, fref[j]); } } start_counter = rdtsc(); blend_test_ptr(src, dst, con, res); end_counter = rdtsc(); cycles[i] = end_counter - start_counter; mismatch = FALSE; for (j = 0; j < 4; ++j) if(!compare_vec(type, res + j*stride, ref + j*stride)) mismatch = TRUE; if (mismatch) { success = FALSE; if(verbose < 1) dump_blend_type(stderr, blend, mode, type); fprintf(stderr, "MISMATCH\n"); for(j = 0; j < 4; ++j) { char channel = "RGBA"[j]; fprintf(stderr, " Src%c: ", channel); dump_vec(stderr, type, src + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Dst%c: ", channel); dump_vec(stderr, type, dst + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Con%c: ", channel); dump_vec(stderr, type, con + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Res%c: ", channel); dump_vec(stderr, type, res + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Ref%c: ", channel); dump_vec(stderr, type, ref + j*stride); fprintf(stderr, "\n"); } } } } /* * Unfortunately the output of cycle counter is not very reliable as it comes * -- sometimes we get outliers (due IRQs perhaps?) which are * better removed to avoid random or biased data. */ { double sum = 0.0, sum2 = 0.0; double avg, std; unsigned m; for(i = 0; i < n; ++i) { sum += cycles[i]; sum2 += cycles[i]*cycles[i]; } avg = sum/n; std = sqrtf((sum2 - n*avg*avg)/n); m = 0; sum = 0.0; for(i = 0; i < n; ++i) { if(fabs(cycles[i] - avg) <= 4.0*std) { sum += cycles[i]; ++m; } } cycles_avg = sum/m; } if(fp) write_tsv_row(fp, blend, mode, type, cycles_avg, success); if (!success) { if(verbose < 2) LLVMDumpModule(module); LLVMWriteBitcodeToFile(module, "blend.bc"); fprintf(stderr, "blend.bc written\n"); fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n"); abort(); } LLVMFreeMachineCodeForFunction(engine, func); LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); return success; }
static char *write_elem(char *s, const apr_xml_elem *elem, int style, apr_array_header_t *namespaces, int *ns_map) { const apr_xml_elem *child; apr_size_t len; int ns; if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) { int empty = APR_XML_ELEM_IS_EMPTY(elem); const apr_xml_attr *attr; if (elem->ns == APR_XML_NS_NONE) { len = sprintf(s, "<%s", elem->name); } else { ns = ns_map ? ns_map[elem->ns] : elem->ns; len = sprintf(s, "<ns%d:%s", ns, elem->name); } s += len; for (attr = elem->attr; attr; attr = attr->next) { if (attr->ns == APR_XML_NS_NONE) len = sprintf(s, " %s=\"%s\"", attr->name, attr->value); else { ns = ns_map ? ns_map[attr->ns] : attr->ns; len = sprintf(s, " ns%d:%s=\"%s\"", ns, attr->name, attr->value); } s += len; } /* add the xml:lang value if necessary */ if (elem->lang != NULL && (style == APR_XML_X2T_FULL_NS_LANG || elem->parent == NULL || elem->lang != elem->parent->lang)) { len = sprintf(s, " xml:lang=\"%s\"", elem->lang); s += len; } /* add namespace definitions, if required */ if (style == APR_XML_X2T_FULL_NS_LANG) { int i; for (i = namespaces->nelts; i--;) { len = sprintf(s, " xmlns:ns%d=\"%s\"", i, APR_XML_GET_URI_ITEM(namespaces, i)); s += len; } } /* no more to do. close it up and go. */ if (empty) { *s++ = '/'; *s++ = '>'; return s; } /* just close it */ *s++ = '>'; } else if (style == APR_XML_X2T_LANG_INNER) { /* prepend the xml:lang value */ if (elem->lang != NULL) { len = strlen(elem->lang); memcpy(s, elem->lang, len); s += len; } *s++ = '\0'; } s = write_text(s, elem->first_cdata.first); for (child = elem->first_child; child; child = child->next) { s = write_elem(s, child, APR_XML_X2T_FULL, NULL, ns_map); s = write_text(s, child->following_cdata.first); } if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) { if (elem->ns == APR_XML_NS_NONE) { len = sprintf(s, "</%s>", elem->name); } else { ns = ns_map ? ns_map[elem->ns] : elem->ns; len = sprintf(s, "</ns%d:%s>", ns, elem->name); } s += len; } return s; }
PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, const struct pipe_blend_state *blend, enum vector_mode mode, struct lp_type type) { struct gallivm_state *gallivm; LLVMValueRef func = NULL; blend_test_ptr_t blend_test_ptr; boolean success; const unsigned n = LP_TEST_NUM_SAMPLES; int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned i, j; const unsigned stride = lp_type_width(type)/8; if(verbose >= 1) dump_blend_type(stdout, blend, mode, type); gallivm = gallivm_create(); func = add_blend_test(gallivm, blend, mode, type); gallivm_compile_module(gallivm); blend_test_ptr = (blend_test_ptr_t)gallivm_jit_function(gallivm, func); success = TRUE; if(mode == AoS) { uint8_t *src, *dst, *con, *res, *ref; src = align_malloc(stride, stride); dst = align_malloc(stride, stride); con = align_malloc(stride, stride); res = align_malloc(stride, stride); ref = align_malloc(stride, stride); for(i = 0; i < n && success; ++i) { int64_t start_counter = 0; int64_t end_counter = 0; random_vec(type, src); random_vec(type, dst); random_vec(type, con); { double fsrc[LP_MAX_VECTOR_LENGTH]; double fdst[LP_MAX_VECTOR_LENGTH]; double fcon[LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH]; read_vec(type, src, fsrc); read_vec(type, dst, fdst); read_vec(type, con, fcon); for(j = 0; j < type.length; j += 4) compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j); write_vec(type, ref, fref); } start_counter = rdtsc(); blend_test_ptr(src, dst, con, res); end_counter = rdtsc(); cycles[i] = end_counter - start_counter; if(!compare_vec(type, res, ref)) { success = FALSE; if(verbose < 1) dump_blend_type(stderr, blend, mode, type); fprintf(stderr, "MISMATCH\n"); fprintf(stderr, " Src: "); dump_vec(stderr, type, src); fprintf(stderr, "\n"); fprintf(stderr, " Dst: "); dump_vec(stderr, type, dst); fprintf(stderr, "\n"); fprintf(stderr, " Con: "); dump_vec(stderr, type, con); fprintf(stderr, "\n"); fprintf(stderr, " Res: "); dump_vec(stderr, type, res); fprintf(stderr, "\n"); fprintf(stderr, " Ref: "); dump_vec(stderr, type, ref); fprintf(stderr, "\n"); } } align_free(src); align_free(dst); align_free(con); align_free(res); align_free(ref); } else if(mode == SoA) { uint8_t *src, *dst, *con, *res, *ref; src = align_malloc(4*stride, stride); dst = align_malloc(4*stride, stride); con = align_malloc(4*stride, stride); res = align_malloc(4*stride, stride); ref = align_malloc(4*stride, stride); for(i = 0; i < n && success; ++i) { int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; for(j = 0; j < 4; ++j) { random_vec(type, src + j*stride); random_vec(type, dst + j*stride); random_vec(type, con + j*stride); } { double fsrc[4]; double fdst[4]; double fcon[4]; double fref[4]; unsigned k; for(k = 0; k < type.length; ++k) { for(j = 0; j < 4; ++j) { fsrc[j] = read_elem(type, src + j*stride, k); fdst[j] = read_elem(type, dst + j*stride, k); fcon[j] = read_elem(type, con + j*stride, k); } compute_blend_ref(blend, fsrc, fdst, fcon, fref); for(j = 0; j < 4; ++j) write_elem(type, ref + j*stride, k, fref[j]); } } start_counter = rdtsc(); blend_test_ptr(src, dst, con, res); end_counter = rdtsc(); cycles[i] = end_counter - start_counter; mismatch = FALSE; for (j = 0; j < 4; ++j) if(!compare_vec(type, res + j*stride, ref + j*stride)) mismatch = TRUE; if (mismatch) { success = FALSE; if(verbose < 1) dump_blend_type(stderr, blend, mode, type); fprintf(stderr, "MISMATCH\n"); for(j = 0; j < 4; ++j) { char channel = "RGBA"[j]; fprintf(stderr, " Src%c: ", channel); dump_vec(stderr, type, src + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Dst%c: ", channel); dump_vec(stderr, type, dst + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Con%c: ", channel); dump_vec(stderr, type, con + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Res%c: ", channel); dump_vec(stderr, type, res + j*stride); fprintf(stderr, "\n"); fprintf(stderr, " Ref%c: ", channel); dump_vec(stderr, type, ref + j*stride); fprintf(stderr, "\n"); fprintf(stderr, "\n"); } } } align_free(src); align_free(dst); align_free(con); align_free(res); align_free(ref); } /* * Unfortunately the output of cycle counter is not very reliable as it comes * -- sometimes we get outliers (due IRQs perhaps?) which are * better removed to avoid random or biased data. */ { double sum = 0.0, sum2 = 0.0; double avg, std; unsigned m; for(i = 0; i < n; ++i) { sum += cycles[i]; sum2 += cycles[i]*cycles[i]; } avg = sum/n; std = sqrtf((sum2 - n*avg*avg)/n); m = 0; sum = 0.0; for(i = 0; i < n; ++i) { if(fabs(cycles[i] - avg) <= 4.0*std) { sum += cycles[i]; ++m; } } cycles_avg = sum/m; } if(fp) write_tsv_row(fp, blend, mode, type, cycles_avg, success); gallivm_free_function(gallivm, func, blend_test_ptr); gallivm_destroy(gallivm); return success; }