boolean
compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps)
{
   unsigned i;
   for (i = 0; i < type.length; ++i) {
      double res_elem = read_elem(type, res, i);
      double ref_elem = read_elem(type, ref, i);
      double delta = fabs(res_elem - ref_elem);
      if(delta >= 2.0*eps)
         return FALSE;
   }

   return TRUE;
}
Example #2
0
static awk_bool_t
read_array(int fd, awk_array_t array)
{
	uint32_t i;
	uint32_t count;
	awk_element_t new_elem;

	if (read(fd, & count, sizeof(count)) != sizeof(count)) {
		return awk_false;
	}
	count = ntohl(count);

	for (i = 0; i < count; i++) {
		if (read_elem(fd, & new_elem)) {
			/* add to array */
			if (! set_array_element_by_elem(array, & new_elem)) {
				fprintf(stderr, _("read_array: set_array_element failed\n"));
				return awk_false;
			}
		} else
			break;
	}

	if (i != count)
		return awk_false;

	return awk_true;
}
void
read_vec(struct lp_type type, const void *src, double *dst)
{
   unsigned i;
   for (i = 0; i < type.length; ++i)
      dst[i] = read_elem(type, src, i);
}
Example #4
0
static int
read_array(int fd, NODE *array)
{
	int i;
	uint32_t count;
	uint32_t array_sz;
	int index;
	NODE *new_elem;

	if (read(fd, & count, sizeof(count)) != sizeof(count)) {
		return -1;
	}
	array->table_size = ntohl(count);

	if (read(fd, & array_sz, sizeof(array_sz)) != sizeof(array_sz)) {
		return -1;
	}
	array->array_size = ntohl(array_sz);

	/* malloc var_array */
	array->var_array = (NODE **) malloc(array->array_size * sizeof(NODE *));
	memset(array->var_array, '\0', array->array_size * sizeof(NODE *));

	for (i = 0; i < array->table_size; i++) {
		if ((new_elem = read_elem(fd, & index, array)) != NULL) {
			new_elem->ahnext = array->var_array[index];
			array->var_array[index] = new_elem;
		} else
			break;
	}
	if (i != array->table_size)
		return -1; 
	return 0;
}
boolean
compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps)
{
   unsigned i;
   eps *= type.floating ? 8.0 : 2.0;
   for (i = 0; i < type.length; ++i) {
      double res_elem = read_elem(type, res, i);
      double ref_elem = read_elem(type, ref, i);
      double delta = res_elem - ref_elem;
      if (ref_elem < -1.0 || ref_elem > 1.0) {
	 delta /= ref_elem;
      }
      delta = fabs(delta);
      if (delta >= eps) {
         return FALSE;
      }
   }

   return TRUE;
}
Example #6
0
ALIGN_STACK
static boolean
test_one(unsigned verbose,
         FILE *fp,
         const struct pipe_blend_state *blend,
         enum vector_mode mode,
         struct lp_type type)
{
   LLVMModuleRef module = NULL;
   LLVMValueRef func = NULL;
   LLVMExecutionEngineRef engine = NULL;
   LLVMModuleProviderRef provider = NULL;
   LLVMPassManagerRef pass = NULL;
   char *error = NULL;
   blend_test_ptr_t blend_test_ptr;
   boolean success;
   const unsigned n = LP_TEST_NUM_SAMPLES;
   int64_t cycles[LP_TEST_NUM_SAMPLES];
   double cycles_avg = 0.0;
   unsigned i, j;

   if(verbose >= 1)
      dump_blend_type(stdout, blend, mode, type);

   module = LLVMModuleCreateWithName("test");

   func = add_blend_test(module, blend, mode, type);

   if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
      LLVMDumpModule(module);
      abort();
   }
   LLVMDisposeMessage(error);

   provider = LLVMCreateModuleProviderForExistingModule(module);
   if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
      if(verbose < 1)
         dump_blend_type(stderr, blend, mode, type);
      fprintf(stderr, "%s\n", error);
      LLVMDisposeMessage(error);
      abort();
   }

#if 0
   pass = LLVMCreatePassManager();
   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
    * but there are more on SVN. */
   LLVMAddConstantPropagationPass(pass);
   LLVMAddInstructionCombiningPass(pass);
   LLVMAddPromoteMemoryToRegisterPass(pass);
   LLVMAddGVNPass(pass);
   LLVMAddCFGSimplificationPass(pass);
   LLVMRunPassManager(pass, module);
#else
   (void)pass;
#endif

   if(verbose >= 2)
      LLVMDumpModule(module);

   blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func);

   if(verbose >= 2)
      lp_disassemble(blend_test_ptr);

   success = TRUE;
   for(i = 0; i < n && success; ++i) {
      if(mode == AoS) {
         ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
         int64_t start_counter = 0;
         int64_t end_counter = 0;

         random_vec(type, src);
         random_vec(type, dst);
         random_vec(type, con);

         {
            double fsrc[LP_MAX_VECTOR_LENGTH];
            double fdst[LP_MAX_VECTOR_LENGTH];
            double fcon[LP_MAX_VECTOR_LENGTH];
            double fref[LP_MAX_VECTOR_LENGTH];

            read_vec(type, src, fsrc);
            read_vec(type, dst, fdst);
            read_vec(type, con, fcon);

            for(j = 0; j < type.length; j += 4)
               compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);

            write_vec(type, ref, fref);
         }

         start_counter = rdtsc();
         blend_test_ptr(src, dst, con, res);
         end_counter = rdtsc();

         cycles[i] = end_counter - start_counter;

         if(!compare_vec(type, res, ref)) {
            success = FALSE;

            if(verbose < 1)
               dump_blend_type(stderr, blend, mode, type);
            fprintf(stderr, "MISMATCH\n");

            fprintf(stderr, "  Src: ");
            dump_vec(stderr, type, src);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Dst: ");
            dump_vec(stderr, type, dst);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Con: ");
            dump_vec(stderr, type, con);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Res: ");
            dump_vec(stderr, type, res);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Ref: ");
            dump_vec(stderr, type, ref);
            fprintf(stderr, "\n");
         }
      }

      if(mode == SoA) {
         const unsigned stride = type.length*type.width/8;
         ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
         int64_t start_counter = 0;
         int64_t end_counter = 0;
         boolean mismatch;

         for(j = 0; j < 4; ++j) {
            random_vec(type, src + j*stride);
            random_vec(type, dst + j*stride);
            random_vec(type, con + j*stride);
         }

         {
            double fsrc[4];
            double fdst[4];
            double fcon[4];
            double fref[4];
            unsigned k;

            for(k = 0; k < type.length; ++k) {
               for(j = 0; j < 4; ++j) {
                  fsrc[j] = read_elem(type, src + j*stride, k);
                  fdst[j] = read_elem(type, dst + j*stride, k);
                  fcon[j] = read_elem(type, con + j*stride, k);
               }

               compute_blend_ref(blend, fsrc, fdst, fcon, fref);

               for(j = 0; j < 4; ++j)
                  write_elem(type, ref + j*stride, k, fref[j]);
            }
         }

         start_counter = rdtsc();
         blend_test_ptr(src, dst, con, res);
         end_counter = rdtsc();

         cycles[i] = end_counter - start_counter;

         mismatch = FALSE;
         for (j = 0; j < 4; ++j)
            if(!compare_vec(type, res + j*stride, ref + j*stride))
               mismatch = TRUE;

         if (mismatch) {
            success = FALSE;

            if(verbose < 1)
               dump_blend_type(stderr, blend, mode, type);
            fprintf(stderr, "MISMATCH\n");
            for(j = 0; j < 4; ++j) {
               char channel = "RGBA"[j];
               fprintf(stderr, "  Src%c: ", channel);
               dump_vec(stderr, type, src + j*stride);
               fprintf(stderr, "\n");

               fprintf(stderr, "  Dst%c: ", channel);
               dump_vec(stderr, type, dst + j*stride);
               fprintf(stderr, "\n");

               fprintf(stderr, "  Con%c: ", channel);
               dump_vec(stderr, type, con + j*stride);
               fprintf(stderr, "\n");

               fprintf(stderr, "  Res%c: ", channel);
               dump_vec(stderr, type, res + j*stride);
               fprintf(stderr, "\n");

               fprintf(stderr, "  Ref%c: ", channel);
               dump_vec(stderr, type, ref + j*stride);
               fprintf(stderr, "\n");
            }
         }
      }
   }

   /*
    * Unfortunately the output of cycle counter is not very reliable as it comes
    * -- sometimes we get outliers (due IRQs perhaps?) which are
    * better removed to avoid random or biased data.
    */
   {
      double sum = 0.0, sum2 = 0.0;
      double avg, std;
      unsigned m;

      for(i = 0; i < n; ++i) {
         sum += cycles[i];
         sum2 += cycles[i]*cycles[i];
      }

      avg = sum/n;
      std = sqrtf((sum2 - n*avg*avg)/n);

      m = 0;
      sum = 0.0;
      for(i = 0; i < n; ++i) {
         if(fabs(cycles[i] - avg) <= 4.0*std) {
            sum += cycles[i];
            ++m;
         }
      }

      cycles_avg = sum/m;

   }

   if(fp)
      write_tsv_row(fp, blend, mode, type, cycles_avg, success);

   if (!success) {
      if(verbose < 2)
         LLVMDumpModule(module);
      LLVMWriteBitcodeToFile(module, "blend.bc");
      fprintf(stderr, "blend.bc written\n");
      fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
      abort();
   }

   LLVMFreeMachineCodeForFunction(engine, func);

   LLVMDisposeExecutionEngine(engine);
   if(pass)
      LLVMDisposePassManager(pass);

   return success;
}
Example #7
0
PIPE_ALIGN_STACK
static boolean
test_one(unsigned verbose,
         FILE *fp,
         const struct pipe_blend_state *blend,
         enum vector_mode mode,
         struct lp_type type)
{
   struct gallivm_state *gallivm;
   LLVMValueRef func = NULL;
   blend_test_ptr_t blend_test_ptr;
   boolean success;
   const unsigned n = LP_TEST_NUM_SAMPLES;
   int64_t cycles[LP_TEST_NUM_SAMPLES];
   double cycles_avg = 0.0;
   unsigned i, j;
   const unsigned stride = lp_type_width(type)/8;

   if(verbose >= 1)
      dump_blend_type(stdout, blend, mode, type);

   gallivm = gallivm_create();

   func = add_blend_test(gallivm, blend, mode, type);

   gallivm_compile_module(gallivm);

   blend_test_ptr = (blend_test_ptr_t)gallivm_jit_function(gallivm, func);

   success = TRUE;
   if(mode == AoS) {
      uint8_t *src, *dst, *con, *res, *ref;
      src = align_malloc(stride, stride);
      dst = align_malloc(stride, stride);
      con = align_malloc(stride, stride);
      res = align_malloc(stride, stride);
      ref = align_malloc(stride, stride);

      for(i = 0; i < n && success; ++i) {
         int64_t start_counter = 0;
         int64_t end_counter = 0;

         random_vec(type, src);
         random_vec(type, dst);
         random_vec(type, con);

         {
            double fsrc[LP_MAX_VECTOR_LENGTH];
            double fdst[LP_MAX_VECTOR_LENGTH];
            double fcon[LP_MAX_VECTOR_LENGTH];
            double fref[LP_MAX_VECTOR_LENGTH];

            read_vec(type, src, fsrc);
            read_vec(type, dst, fdst);
            read_vec(type, con, fcon);

            for(j = 0; j < type.length; j += 4)
               compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);

            write_vec(type, ref, fref);
         }

         start_counter = rdtsc();
         blend_test_ptr(src, dst, con, res);
         end_counter = rdtsc();

         cycles[i] = end_counter - start_counter;

         if(!compare_vec(type, res, ref)) {
            success = FALSE;

            if(verbose < 1)
               dump_blend_type(stderr, blend, mode, type);
            fprintf(stderr, "MISMATCH\n");

            fprintf(stderr, "  Src: ");
            dump_vec(stderr, type, src);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Dst: ");
            dump_vec(stderr, type, dst);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Con: ");
            dump_vec(stderr, type, con);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Res: ");
            dump_vec(stderr, type, res);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Ref: ");
            dump_vec(stderr, type, ref);
            fprintf(stderr, "\n");
         }
      }
      align_free(src);
      align_free(dst);
      align_free(con);
      align_free(res);
      align_free(ref);
   }
   else if(mode == SoA) {
      uint8_t *src, *dst, *con, *res, *ref;
      src = align_malloc(4*stride, stride);
      dst = align_malloc(4*stride, stride);
      con = align_malloc(4*stride, stride);
      res = align_malloc(4*stride, stride);
      ref = align_malloc(4*stride, stride);

      for(i = 0; i < n && success; ++i) {
         int64_t start_counter = 0;
         int64_t end_counter = 0;
         boolean mismatch;

         for(j = 0; j < 4; ++j) {
            random_vec(type, src + j*stride);
            random_vec(type, dst + j*stride);
            random_vec(type, con + j*stride);
         }

         {
            double fsrc[4];
            double fdst[4];
            double fcon[4];
            double fref[4];
            unsigned k;

            for(k = 0; k < type.length; ++k) {
               for(j = 0; j < 4; ++j) {
                  fsrc[j] = read_elem(type, src + j*stride, k);
                  fdst[j] = read_elem(type, dst + j*stride, k);
                  fcon[j] = read_elem(type, con + j*stride, k);
               }

               compute_blend_ref(blend, fsrc, fdst, fcon, fref);

               for(j = 0; j < 4; ++j)
                  write_elem(type, ref + j*stride, k, fref[j]);
            }
         }

         start_counter = rdtsc();
         blend_test_ptr(src, dst, con, res);
         end_counter = rdtsc();

         cycles[i] = end_counter - start_counter;

         mismatch = FALSE;
         for (j = 0; j < 4; ++j)
            if(!compare_vec(type, res + j*stride, ref + j*stride))
               mismatch = TRUE;

         if (mismatch) {
            success = FALSE;

            if(verbose < 1)
               dump_blend_type(stderr, blend, mode, type);
            fprintf(stderr, "MISMATCH\n");
            for(j = 0; j < 4; ++j) {
               char channel = "RGBA"[j];
               fprintf(stderr, "  Src%c: ", channel);
               dump_vec(stderr, type, src + j*stride);
               fprintf(stderr, "\n");

               fprintf(stderr, "  Dst%c: ", channel);
               dump_vec(stderr, type, dst + j*stride);
               fprintf(stderr, "\n");

               fprintf(stderr, "  Con%c: ", channel);
               dump_vec(stderr, type, con + j*stride);
               fprintf(stderr, "\n");

               fprintf(stderr, "  Res%c: ", channel);
               dump_vec(stderr, type, res + j*stride);
               fprintf(stderr, "\n");

               fprintf(stderr, "  Ref%c: ", channel);
               dump_vec(stderr, type, ref + j*stride);
               fprintf(stderr, "\n");

               fprintf(stderr, "\n");
            }
         }
      }
      align_free(src);
      align_free(dst);
      align_free(con);
      align_free(res);
      align_free(ref);
   }

   /*
    * Unfortunately the output of cycle counter is not very reliable as it comes
    * -- sometimes we get outliers (due IRQs perhaps?) which are
    * better removed to avoid random or biased data.
    */
   {
      double sum = 0.0, sum2 = 0.0;
      double avg, std;
      unsigned m;

      for(i = 0; i < n; ++i) {
         sum += cycles[i];
         sum2 += cycles[i]*cycles[i];
      }

      avg = sum/n;
      std = sqrtf((sum2 - n*avg*avg)/n);

      m = 0;
      sum = 0.0;
      for(i = 0; i < n; ++i) {
         if(fabs(cycles[i] - avg) <= 4.0*std) {
            sum += cycles[i];
            ++m;
         }
      }

      cycles_avg = sum/m;

   }

   if(fp)
      write_tsv_row(fp, blend, mode, type, cycles_avg, success);

   gallivm_free_function(gallivm, func, blend_test_ptr);

   gallivm_destroy(gallivm);

   return success;
}