Exemple #1
0
void Bayes::mcmc(const int nit, const int nmc, const string& data_file, const bool record) {

  Py_BEGIN_ALLOW_THREADS

  // assign storage;
  save_z.resize(nmc);
  save_pi.resize(nmc);
  save_mu.resize(nmc);
  save_Omega.resize(nmc);
  for (int i=0; i<nmc; ++i) {
    save_z[i].resize(n);
    save_pi[i].resize(k);
    save_mu[i].resize(k);
    save_Omega[i].resize(k);
    for (int j=0; j<k; ++j) {
      save_mu[i][j].resize(p);
      save_Omega[i][j].resize(p, p);
    }
  }

  //! MCMC routine
  for (int i=0; i<(nit + nmc); ++i) {
    sample_zpi();

    // save MCMC samples
    if (i >= nit) {
      	save_z.push_back(z);
      	save_pi.push_back(pi);
      	save_mu.push_back(mu);
      	save_Omega.push_back(Omega);
    } 

    // print out occasionally
    if (i%100 == 0) {
      cout << "======== " << i << " =========\n";
      cout << "MCMC:       " << i << "\n";
      cout << "pi          " << pi << "\n";
      cout << "mu          " << mu << "\n";
      // cout << "Omega       " << Omega << "\n\n";
    }
    sample_muOmega();
  }

  Py_END_ALLOW_THREADS

  if (record) {
    // dump stored results to file
    write_matrix("save_probx_" + int2str(k) + "_" + data_file, probx);
    write_vec("save_z_" +  int2str(k) + "_" + data_file, save_z);
    write_vec("save_pi_" + int2str(k) + "_" + data_file, save_pi);
    write_vec_vec("save_mu_" + int2str(k) + "_" + data_file, save_mu);
    write_vec_mat("save_omega_" + int2str(k) + "_" + data_file, save_Omega);
  }

}
Exemple #2
0
/* --------------------- */
static int send_packet(CNID_private *db, struct cnid_dbd_rqst *rqst)
{
    struct iovec iov[2];
    size_t towrite;
    int vecs;

    iov[0].iov_base = rqst;
    iov[0].iov_len  = sizeof(struct cnid_dbd_rqst);
    towrite = sizeof(struct cnid_dbd_rqst);
    vecs = 1;

    if (rqst->namelen) {
        iov[1].iov_base = (char *)rqst->name;
        iov[1].iov_len  = rqst->namelen;
        towrite += rqst->namelen;
        vecs++;
    }

    if (write_vec(db->fd, iov, towrite, vecs) != towrite) {
        LOG(log_warning, logtype_cnid, "send_packet: Error writev rqst (db_dir %s): %s",
            db->db_dir, strerror(errno));
        return -1;
    }

    LOG(log_maxdebug, logtype_cnid, "send_packet: {done}");
    return 0;
}
Exemple #3
0
/* --------------------- */
static int init_tsock(CNID_private *db)
{
    int fd;
    int len;
    struct iovec iov[2];

    LOG(log_debug, logtype_cnid, "init_tsock: BEGIN. Opening volume '%s', CNID Server: %s/%s",
        db->db_dir, db->cnidserver, db->cnidport);

    if ((fd = tsock_getfd(db->cnidserver, db->cnidport)) < 0)
        return -1;

    len = strlen(db->db_dir);

    iov[0].iov_base = &len;
    iov[0].iov_len  = sizeof(int);

    iov[1].iov_base = db->db_dir;
    iov[1].iov_len  = len;

    if (write_vec(fd, iov, len + sizeof(int), 2) != len + sizeof(int)) {
        LOG(log_error, logtype_cnid, "init_tsock: Error/short write: %s", strerror(errno));
        close(fd);
        return -1;
    }

    LOG(log_debug, logtype_cnid, "init_tsock: ok");

    return fd;
}
Exemple #4
0
/* --------------------- */
static int init_tsock(CNID_bdb_private *db)
{
    int fd;
    int len[DBD_NUM_OPEN_ARGS];
    int iovecs;
    struct iovec iov[DBD_NUM_OPEN_ARGS + 1] = {{0}};
    struct vol *vol = db->vol;
    ssize_t iovlen;

    LOG(log_debug, logtype_cnid, "connecting to CNID server: %s:%s",
        vol->v_cnidserver, vol->v_cnidport);

    if ((fd = tsock_getfd(vol->v_cnidserver, vol->v_cnidport)) < 0)
        return -1;

    LOG(log_debug, logtype_cnid, "connecting volume '%s', path: %s, user: %s",
        vol->v_configname, vol->v_path, vol->v_obj->username[0] ? vol->v_obj->username : "******");

    iovecs = 1 + DBD_NUM_OPEN_ARGS - 1;

    len[0] = strlen(vol->v_configname) + 1;
    len[1] = strlen(vol->v_path) + 1;
    len[2] = strlen(vol->v_obj->username);

    iov[0].iov_base = &len[0];
    iov[0].iov_len  = DBD_NUM_OPEN_ARGS * sizeof(int);

    iov[1].iov_base = vol->v_configname;
    iov[1].iov_len  = len[0];

    iov[2].iov_base = vol->v_path;
    iov[2].iov_len  = len[1];

    if (len[2] > 0) {
        len[2] += 1;
        iovecs++;
        iov[3].iov_base = vol->v_obj->username;
        iov[3].iov_len  = len[2];
    }

    iovlen = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len + iov[3].iov_len;

    if (write_vec(fd, iov, iovlen, iovecs) != iovlen) {
        LOG(log_error, logtype_cnid, "init_tsock: Error/short write: %s", strerror(errno));
        close(fd);
        return -1;
    }

    LOG(log_debug, logtype_cnid, "init_tsock: ok");

    return fd;
}
PIPE_ALIGN_STACK
static boolean
test_one(unsigned verbose,
         FILE *fp,
         struct lp_type src_type,
         struct lp_type dst_type)
{
   struct gallivm_state *gallivm;
   LLVMValueRef func = NULL;
   conv_test_ptr_t conv_test_ptr;
   boolean success;
   const unsigned n = LP_TEST_NUM_SAMPLES;
   int64_t cycles[LP_TEST_NUM_SAMPLES];
   double cycles_avg = 0.0;
   unsigned num_srcs;
   unsigned num_dsts;
   double eps;
   unsigned i, j;

   if ((src_type.width >= dst_type.width && src_type.length > dst_type.length) ||
       (src_type.width <= dst_type.width && src_type.length < dst_type.length)) {
      return TRUE;
   }

   /* Known failures
    * - fixed point 32 -> float 32
    * - float 32 -> signed normalised integer 32
    */
   if ((src_type.floating && !dst_type.floating && dst_type.sign && dst_type.norm && src_type.width == dst_type.width) ||
       (!src_type.floating && dst_type.floating && src_type.fixed && src_type.width == dst_type.width)) {
      return TRUE;
   }

   /* Known failures
    * - fixed point 32 -> float 32
    * - float 32 -> signed normalised integer 32
    */
   if ((src_type.floating && !dst_type.floating && dst_type.sign && dst_type.norm && src_type.width == dst_type.width) ||
       (!src_type.floating && dst_type.floating && src_type.fixed && src_type.width == dst_type.width)) {
      return TRUE;
   }

   if(verbose >= 1)
      dump_conv_types(stderr, src_type, dst_type);

   if (src_type.length > dst_type.length) {
      num_srcs = 1;
      num_dsts = src_type.length/dst_type.length;
   }
   else if (src_type.length < dst_type.length) {
      num_dsts = 1;
      num_srcs = dst_type.length/src_type.length;
   }
   else  {
      num_dsts = 1;
      num_srcs = 1;
   }

   /* We must not loose or gain channels. Only precision */
   assert(src_type.length * num_srcs == dst_type.length * num_dsts);

   eps = MAX2(lp_const_eps(src_type), lp_const_eps(dst_type));

   gallivm = gallivm_create();

   func = add_conv_test(gallivm, src_type, num_srcs, dst_type, num_dsts);

   gallivm_compile_module(gallivm);

   conv_test_ptr = (conv_test_ptr_t)gallivm_jit_function(gallivm, func);

   success = TRUE;
   for(i = 0; i < n && success; ++i) {
      unsigned src_stride = src_type.length*src_type.width/8;
      unsigned dst_stride = dst_type.length*dst_type.width/8;
      PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
      PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
      double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
      uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
      int64_t start_counter = 0;
      int64_t end_counter = 0;

      for(j = 0; j < num_srcs; ++j) {
         random_vec(src_type, src + j*src_stride);
         read_vec(src_type, src + j*src_stride, fref + j*src_type.length);
      }

      for(j = 0; j < num_dsts; ++j) {
         write_vec(dst_type, ref + j*dst_stride, fref + j*dst_type.length);
      }

      start_counter = rdtsc();
      conv_test_ptr(src, dst);
      end_counter = rdtsc();

      cycles[i] = end_counter - start_counter;

      for(j = 0; j < num_dsts; ++j) {
         if(!compare_vec_with_eps(dst_type, dst + j*dst_stride, ref + j*dst_stride, eps))
            success = FALSE;
      }

      if (!success || verbose >= 3) {
         if(verbose < 1)
            dump_conv_types(stderr, src_type, dst_type);
         if (success) {
            fprintf(stderr, "PASS\n");
         }
         else {
            fprintf(stderr, "MISMATCH\n");
         }

         for(j = 0; j < num_srcs; ++j) {
            fprintf(stderr, "  Src%u: ", j);
            dump_vec(stderr, src_type, src + j*src_stride);
            fprintf(stderr, "\n");
         }

#if 1
         fprintf(stderr, "  Ref: ");
         for(j = 0; j < src_type.length*num_srcs; ++j)
            fprintf(stderr, " %f", fref[j]);
         fprintf(stderr, "\n");
#endif

         for(j = 0; j < num_dsts; ++j) {
            fprintf(stderr, "  Dst%u: ", j);
            dump_vec(stderr, dst_type, dst + j*dst_stride);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Ref%u: ", j);
            dump_vec(stderr, dst_type, ref + j*dst_stride);
            fprintf(stderr, "\n");
         }
      }
   }

   /*
    * Unfortunately the output of cycle counter is not very reliable as it comes
    * -- sometimes we get outliers (due IRQs perhaps?) which are
    * better removed to avoid random or biased data.
    */
   {
      double sum = 0.0, sum2 = 0.0;
      double avg, std;
      unsigned m;

      for(i = 0; i < n; ++i) {
         sum += cycles[i];
         sum2 += cycles[i]*cycles[i];
      }

      avg = sum/n;
      std = sqrtf((sum2 - n*avg*avg)/n);

      m = 0;
      sum = 0.0;
      for(i = 0; i < n; ++i) {
         if(fabs(cycles[i] - avg) <= 4.0*std) {
            sum += cycles[i];
            ++m;
         }
      }

      cycles_avg = sum/m;

   }

   if(fp)
      write_tsv_row(fp, src_type, dst_type, cycles_avg, success);

   gallivm_free_function(gallivm, func, conv_test_ptr);

   gallivm_destroy(gallivm);

   return success;
}
Exemple #6
0
int _xcb_conn_wait(xcb_connection_t *c, pthread_cond_t *cond, struct iovec **vector, int *count)
{
    int ret;
#if USE_POLL
    struct pollfd fd;
#else
    fd_set rfds, wfds;
#endif

    /* If the thing I should be doing is already being done, wait for it. */
    if(count ? c->out.writing : c->in.reading)
    {
        pthread_cond_wait(cond, &c->iolock);
        return 1;
    }

#if USE_POLL
    memset(&fd, 0, sizeof(fd));
    fd.fd = c->fd;
    fd.events = POLLIN;
#else
    FD_ZERO(&rfds);
    FD_SET(c->fd, &rfds);
#endif
    ++c->in.reading;

#if USE_POLL
    if(count)
    {
        fd.events |= POLLOUT;
        ++c->out.writing;
    }
#else
    FD_ZERO(&wfds);
    if(count)
    {
        FD_SET(c->fd, &wfds);
        ++c->out.writing;
    }
#endif

    pthread_mutex_unlock(&c->iolock);
    do {
#if USE_POLL
        ret = poll(&fd, 1, -1);
#else
        ret = select(c->fd + 1, &rfds, &wfds, 0, 0);
#endif
    } while (ret == -1 && errno == EINTR);
    if(ret < 0)
    {
        _xcb_conn_shutdown(c);
        ret = 0;
    }
    pthread_mutex_lock(&c->iolock);

    if(ret)
    {
#if USE_POLL
        if((fd.revents & POLLIN) == POLLIN)
#else
        if(FD_ISSET(c->fd, &rfds))
#endif
            ret = ret && _xcb_in_read(c);

#if USE_POLL
        if((fd.revents & POLLOUT) == POLLOUT)
#else
        if(FD_ISSET(c->fd, &wfds))
#endif
            ret = ret && write_vec(c, vector, count);
    }

    if(count)
        --c->out.writing;
    --c->in.reading;

    return ret;
}
Exemple #7
0
PIPE_ALIGN_STACK
static boolean
test_one(unsigned verbose,
         FILE *fp,
         const struct pipe_blend_state *blend,
         struct lp_type type)
{
    struct gallivm_state *gallivm;
    LLVMValueRef func = NULL;
    blend_test_ptr_t blend_test_ptr;
    boolean success;
    const unsigned n = LP_TEST_NUM_SAMPLES;
    int64_t cycles[LP_TEST_NUM_SAMPLES];
    double cycles_avg = 0.0;
    unsigned i, j;
    const unsigned stride = lp_type_width(type)/8;

    if(verbose >= 1)
        dump_blend_type(stdout, blend, type);

    gallivm = gallivm_create();

    func = add_blend_test(gallivm, blend, type);

    gallivm_compile_module(gallivm);

    blend_test_ptr = (blend_test_ptr_t)gallivm_jit_function(gallivm, func);

    success = TRUE;

    {
        uint8_t *src, *dst, *con, *res, *ref;
        src = align_malloc(stride, stride);
        dst = align_malloc(stride, stride);
        con = align_malloc(stride, stride);
        res = align_malloc(stride, stride);
        ref = align_malloc(stride, stride);

        for(i = 0; i < n && success; ++i) {
            int64_t start_counter = 0;
            int64_t end_counter = 0;

            random_vec(type, src);
            random_vec(type, dst);
            random_vec(type, con);

            {
                double fsrc[LP_MAX_VECTOR_LENGTH];
                double fdst[LP_MAX_VECTOR_LENGTH];
                double fcon[LP_MAX_VECTOR_LENGTH];
                double fref[LP_MAX_VECTOR_LENGTH];

                read_vec(type, src, fsrc);
                read_vec(type, dst, fdst);
                read_vec(type, con, fcon);

                for(j = 0; j < type.length; j += 4)
                    compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);

                write_vec(type, ref, fref);
            }

            start_counter = rdtsc();
            blend_test_ptr(src, dst, con, res);
            end_counter = rdtsc();

            cycles[i] = end_counter - start_counter;

            if(!compare_vec(type, res, ref)) {
                success = FALSE;

                if(verbose < 1)
                    dump_blend_type(stderr, blend, type);
                fprintf(stderr, "MISMATCH\n");

                fprintf(stderr, "  Src: ");
                dump_vec(stderr, type, src);
                fprintf(stderr, "\n");

                fprintf(stderr, "  Dst: ");
                dump_vec(stderr, type, dst);
                fprintf(stderr, "\n");

                fprintf(stderr, "  Con: ");
                dump_vec(stderr, type, con);
                fprintf(stderr, "\n");

                fprintf(stderr, "  Res: ");
                dump_vec(stderr, type, res);
                fprintf(stderr, "\n");

                fprintf(stderr, "  Ref: ");
                dump_vec(stderr, type, ref);
                fprintf(stderr, "\n");
            }
        }
        align_free(src);
        align_free(dst);
        align_free(con);
        align_free(res);
        align_free(ref);
    }

    /*
     * Unfortunately the output of cycle counter is not very reliable as it comes
     * -- sometimes we get outliers (due IRQs perhaps?) which are
     * better removed to avoid random or biased data.
     */
    {
        double sum = 0.0, sum2 = 0.0;
        double avg, std;
        unsigned m;

        for(i = 0; i < n; ++i) {
            sum += cycles[i];
            sum2 += cycles[i]*cycles[i];
        }

        avg = sum/n;
        std = sqrtf((sum2 - n*avg*avg)/n);

        m = 0;
        sum = 0.0;
        for(i = 0; i < n; ++i) {
            if(fabs(cycles[i] - avg) <= 4.0*std) {
                sum += cycles[i];
                ++m;
            }
        }

        cycles_avg = sum/m;

    }

    if(fp)
        write_tsv_row(fp, blend, type, cycles_avg, success);

    gallivm_free_function(gallivm, func, blend_test_ptr);

    gallivm_destroy(gallivm);

    return success;
}
Exemple #8
0
ALIGN_STACK
static boolean
test_one(unsigned verbose,
         FILE *fp,
         const struct pipe_blend_state *blend,
         enum vector_mode mode,
         struct lp_type type)
{
   LLVMModuleRef module = NULL;
   LLVMValueRef func = NULL;
   LLVMExecutionEngineRef engine = NULL;
   LLVMModuleProviderRef provider = NULL;
   LLVMPassManagerRef pass = NULL;
   char *error = NULL;
   blend_test_ptr_t blend_test_ptr;
   boolean success;
   const unsigned n = LP_TEST_NUM_SAMPLES;
   int64_t cycles[LP_TEST_NUM_SAMPLES];
   double cycles_avg = 0.0;
   unsigned i, j;

   if(verbose >= 1)
      dump_blend_type(stdout, blend, mode, type);

   module = LLVMModuleCreateWithName("test");

   func = add_blend_test(module, blend, mode, type);

   if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
      LLVMDumpModule(module);
      abort();
   }
   LLVMDisposeMessage(error);

   provider = LLVMCreateModuleProviderForExistingModule(module);
   if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
      if(verbose < 1)
         dump_blend_type(stderr, blend, mode, type);
      fprintf(stderr, "%s\n", error);
      LLVMDisposeMessage(error);
      abort();
   }

#if 0
   pass = LLVMCreatePassManager();
   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
    * but there are more on SVN. */
   LLVMAddConstantPropagationPass(pass);
   LLVMAddInstructionCombiningPass(pass);
   LLVMAddPromoteMemoryToRegisterPass(pass);
   LLVMAddGVNPass(pass);
   LLVMAddCFGSimplificationPass(pass);
   LLVMRunPassManager(pass, module);
#else
   (void)pass;
#endif

   if(verbose >= 2)
      LLVMDumpModule(module);

   blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func);

   if(verbose >= 2)
      lp_disassemble(blend_test_ptr);

   success = TRUE;
   for(i = 0; i < n && success; ++i) {
      if(mode == AoS) {
         ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
         int64_t start_counter = 0;
         int64_t end_counter = 0;

         random_vec(type, src);
         random_vec(type, dst);
         random_vec(type, con);

         {
            double fsrc[LP_MAX_VECTOR_LENGTH];
            double fdst[LP_MAX_VECTOR_LENGTH];
            double fcon[LP_MAX_VECTOR_LENGTH];
            double fref[LP_MAX_VECTOR_LENGTH];

            read_vec(type, src, fsrc);
            read_vec(type, dst, fdst);
            read_vec(type, con, fcon);

            for(j = 0; j < type.length; j += 4)
               compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);

            write_vec(type, ref, fref);
         }

         start_counter = rdtsc();
         blend_test_ptr(src, dst, con, res);
         end_counter = rdtsc();

         cycles[i] = end_counter - start_counter;

         if(!compare_vec(type, res, ref)) {
            success = FALSE;

            if(verbose < 1)
               dump_blend_type(stderr, blend, mode, type);
            fprintf(stderr, "MISMATCH\n");

            fprintf(stderr, "  Src: ");
            dump_vec(stderr, type, src);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Dst: ");
            dump_vec(stderr, type, dst);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Con: ");
            dump_vec(stderr, type, con);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Res: ");
            dump_vec(stderr, type, res);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Ref: ");
            dump_vec(stderr, type, ref);
            fprintf(stderr, "\n");
         }
      }

      if(mode == SoA) {
         const unsigned stride = type.length*type.width/8;
         ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
         ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
         int64_t start_counter = 0;
         int64_t end_counter = 0;
         boolean mismatch;

         for(j = 0; j < 4; ++j) {
            random_vec(type, src + j*stride);
            random_vec(type, dst + j*stride);
            random_vec(type, con + j*stride);
         }

         {
            double fsrc[4];
            double fdst[4];
            double fcon[4];
            double fref[4];
            unsigned k;

            for(k = 0; k < type.length; ++k) {
               for(j = 0; j < 4; ++j) {
                  fsrc[j] = read_elem(type, src + j*stride, k);
                  fdst[j] = read_elem(type, dst + j*stride, k);
                  fcon[j] = read_elem(type, con + j*stride, k);
               }

               compute_blend_ref(blend, fsrc, fdst, fcon, fref);

               for(j = 0; j < 4; ++j)
                  write_elem(type, ref + j*stride, k, fref[j]);
            }
         }

         start_counter = rdtsc();
         blend_test_ptr(src, dst, con, res);
         end_counter = rdtsc();

         cycles[i] = end_counter - start_counter;

         mismatch = FALSE;
         for (j = 0; j < 4; ++j)
            if(!compare_vec(type, res + j*stride, ref + j*stride))
               mismatch = TRUE;

         if (mismatch) {
            success = FALSE;

            if(verbose < 1)
               dump_blend_type(stderr, blend, mode, type);
            fprintf(stderr, "MISMATCH\n");
            for(j = 0; j < 4; ++j) {
               char channel = "RGBA"[j];
               fprintf(stderr, "  Src%c: ", channel);
               dump_vec(stderr, type, src + j*stride);
               fprintf(stderr, "\n");

               fprintf(stderr, "  Dst%c: ", channel);
               dump_vec(stderr, type, dst + j*stride);
               fprintf(stderr, "\n");

               fprintf(stderr, "  Con%c: ", channel);
               dump_vec(stderr, type, con + j*stride);
               fprintf(stderr, "\n");

               fprintf(stderr, "  Res%c: ", channel);
               dump_vec(stderr, type, res + j*stride);
               fprintf(stderr, "\n");

               fprintf(stderr, "  Ref%c: ", channel);
               dump_vec(stderr, type, ref + j*stride);
               fprintf(stderr, "\n");
            }
         }
      }
   }

   /*
    * Unfortunately the output of cycle counter is not very reliable as it comes
    * -- sometimes we get outliers (due IRQs perhaps?) which are
    * better removed to avoid random or biased data.
    */
   {
      double sum = 0.0, sum2 = 0.0;
      double avg, std;
      unsigned m;

      for(i = 0; i < n; ++i) {
         sum += cycles[i];
         sum2 += cycles[i]*cycles[i];
      }

      avg = sum/n;
      std = sqrtf((sum2 - n*avg*avg)/n);

      m = 0;
      sum = 0.0;
      for(i = 0; i < n; ++i) {
         if(fabs(cycles[i] - avg) <= 4.0*std) {
            sum += cycles[i];
            ++m;
         }
      }

      cycles_avg = sum/m;

   }

   if(fp)
      write_tsv_row(fp, blend, mode, type, cycles_avg, success);

   if (!success) {
      if(verbose < 2)
         LLVMDumpModule(module);
      LLVMWriteBitcodeToFile(module, "blend.bc");
      fprintf(stderr, "blend.bc written\n");
      fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
      abort();
   }

   LLVMFreeMachineCodeForFunction(engine, func);

   LLVMDisposeExecutionEngine(engine);
   if(pass)
      LLVMDisposePassManager(pass);

   return success;
}
Exemple #9
0
int genrmt(char *infile, char *outfile)
{
  int i,j;
  FILE *fp;
  double x,t0,t1;
  char *cbuf,*fext;

  /* open file */
  switch(seqmode) {
  case SEQ_MOLPHY: fext=fext_molphy; break;
  case SEQ_PAML: fext=fext_paml; break;
  case SEQ_PAUP: fext=fext_paup; break;
  case SEQ_PUZZLE: fext=fext_puzzle; break;
  case SEQ_PHYML: fext=fext_phyml; break;
  case SEQ_MT: 
  default: fext=fext_mt; break;
  }
  if(infile) {
    fp=openfp(infile,fext,"r",&cbuf);
    printf("\n# reading %s",cbuf);
  } else {
    fp=STDIN;
    printf("\n# reading from stdin");
  }

  /* read file */
  mm=nn=0;
  switch(seqmode) {
  case SEQ_MOLPHY: 
    datmat = fread_mat_lls(fp, &mm, &nn); break;
  case SEQ_PAML: 
    datmat = fread_mat_lfh(fp, &mm, &nn); break;
  case SEQ_PAUP: 
    datmat = fread_mat_paup(fp, &mm, &nn); break;
  case SEQ_PUZZLE: 
    datmat = fread_mat_puzzle(fp, &mm, &nn); break;
  case SEQ_PHYML: 
    datmat = fread_mat_phyml(fp, &mm, &nn); break;
  case SEQ_MT: 
  default: 
    datmat = fread_mat(fp, &mm, &nn); break;  
  }
  if(infile) {fclose(fp);  FREE(cbuf);}
  printf("\n# M:%d N:%d",mm,nn);

  /* allocating buffers */
  datvec=new_vec(mm);
  bn=new_ivec(kk); rr1=new_vec(kk);

  /* calculate the log-likelihoods */
  for(i=0;i<mm;i++) {
    x=0; for(j=0;j<nn;j++) x+=datmat[i][j];
    datvec[i]=x;
  }
  
  /* calculate scales */
  for(i=0;i<kk;i++) {
    bn[i]=(int)(rr[i]*nn); /* sample size for bootstrap */
    rr1[i]=(double)bn[i]/nn; /* recalculate rr for integer adjustment */
  }

  /* open out file */
  if(outfile) {
    /* vt ascii write to file */
    fp=openfp(outfile,fext_vt,"w",&cbuf);
    printf("\n# writing %s",cbuf);
    fwrite_vec(fp,datvec,mm);
    fclose(fp); FREE(cbuf);
    /* rmt binary write to file */
    fp=openfp(outfile,fext_rmt,"wb",&cbuf);
    printf("\n# writing %s",cbuf);
    fwrite_bvec(fp,datvec,mm);
    fwrite_bvec(fp,rr1,kk);
    fwrite_bivec(fp,bb,kk);
    fwrite_bi(fp,kk);
  } else {
    /* rmt ascii write to stdout */
    printf("\n# writing to stdout");
    printf("\n# OBS:\n"); write_vec(datvec,mm);
    printf("\n# R:\n"); write_vec(rr1,kk);
    printf("\n# B:\n"); write_ivec(bb,kk);
    printf("\n# RMAT:\n");
    printf("%d\n",kk);
  }


  /* generating the replicates by resampling*/
  for(i=j=0;i<kk;i++) j+=bb[i];
  printf("\n# start generating total %d replicates for %d items",j,mm);
  fflush(STDOUT);
  t0=get_time();

  for(i=0;i<kk;i++) {
    repmat=new_lmat(mm,bb[i]);
    scaleboot(datmat,repmat,mm,nn,bn[i],bb[i]);
    if(outfile) {
      fwrite_bmat(fp,repmat,mm,bb[i]);
      putdot();
    } else {
      printf("\n## RMAT[%d]:\n",i); write_mat(repmat,mm,bb[i]);
    }
    free_lmat(repmat,mm);
  }

  t1=get_time();
  printf("\n# time elapsed for bootstrap t=%g sec",t1-t0);

  if(outfile) {
    fclose(fp); FREE(cbuf);
  }

  /* freeing buffers */
  free_vec(bn); free_vec(rr1); free_vec(datvec); free_mat(datmat);

  return 0;
}
Exemple #10
0
void			Prob::write(std::string equ_name, std::string filename_post) {

	/*if(not os.path.exists(directory)) {
	  os.makedirs(directory)
	  }*/

	std::ofstream ofs;
	ofs.open("prof_" + name_ + "_" + equ_name + filename_post + ".prof", std::ofstream::trunc);

	if(!ofs.is_open()) {
		LOG(lg, info, warning) << "file stream not open";
		return;
	}

	std::vector<real> x;// = np.zeros(0);
	std::vector<real> y;// = np.zeros(0);
	std::vector<real> z;// = np.zeros(0);
	std::vector<real> w;// = np.zeros(0);


	for(auto g : patch_groups_) {
		//print g
		//g->write(equ_name, ofs);

		for(auto f : g->faces()) {
			auto grid = f->grid(equ_name);
			//X,Y,Z,W = f.grid(equ_name);

			auto Xr = grid.X[f->glo_to_loc2(1).i]->ravel();
			auto Yr = grid.X[f->glo_to_loc2(2).i]->ravel();
			auto Zr = grid.X[f->glo_to_loc2(3).i]->ravel();
			auto Wr = grid.W->ravel();

			x.insert(x.end(), Xr.begin(), Xr.end());
			y.insert(y.end(), Yr.begin(), Yr.end());
			z.insert(z.end(), Zr.begin(), Zr.end());
			w.insert(w.end(), Wr.begin(), Wr.end());
		}	
	}



	std::string name = "prof_" + name_ + "_" + equ_name;

	int n = x.size();


	LOG(lg, info, info)
		<< "writing " << n << " points";


	ofs << "((" << name << " point " << n << ")\n";

	ofs << "(x\n";
	write_vec(ofs, x);
	ofs << ")\n";

	ofs << "(y\n";
	write_vec(ofs, y);
	ofs << ")\n";

	ofs << "(z\n";
	write_vec(ofs, z);
	ofs << ")\n";

	ofs << "(w\n";
	write_vec(ofs, w);
	ofs << ")\n";

	ofs << ")\n";


}
  void BurkardtFileIOHandler::Write( const Teuchos::RCP<const Epetra_MultiVector>& MV, const std::string& filename )
  {
    if (isInit) {

#ifdef EPETRA_MPI
      Epetra_MpiComm comm( MPI_COMM_WORLD );
#else
      Epetra_SerialComm comm;
#endif

      int i;
      int num_vecs = MV->NumVectors();
      int dim = 2*num_nodes;
      double *u = 0, *v = 0;
      Epetra_Map Map( dim, 0, comm );
      Epetra_Map* Proc0Map;
      Epetra_BLAS blas;
      const Epetra_Vector* col_newMV;
      std::string out_file;
      int num_places = (int)::ceil( ::log10( (double)(num_vecs) ) );
      //
      // Create map putting all elements of vector on Processor 0.
      //
      if ( comm.MyPID() == 0 ) {
        Proc0Map = new Epetra_Map( dim, dim, 0, comm );
        u = new double[ num_nodes ];
        v = new double[ num_nodes ];
      } else {
        Proc0Map = new Epetra_Map( dim, 0, 0, comm );
      }
      Epetra_Vector Proc0Vector( *Proc0Map );
      //
      // Create an exporter to get the global Epetra_Vector to a local Epetra_Vector.
      //
      Epetra_Export exporter( MV->Map(), *Proc0Map );
      //
      i = 0;
      while ( i < num_vecs ) {
        //
        // Get column of Epetra_MultiVector in terms of Epetra_Vector.
        //
        col_newMV = (*MV)( i );
        //
        Proc0Vector.Export(*col_newMV, exporter, Insert);
        //
        // Copy the singular vector into holders
        //
        i++;  // Increment counter here to get right number in output filename!
        //
        if ( comm.MyPID() == 0 ) {
          blas.COPY( num_nodes, &Proc0Vector[0], u, 2, 1 );
          blas.COPY( num_nodes, &Proc0Vector[0]+1, v, 2, 1 );
          //
          // Determine next filename.
          //
          out_file = out_path + filename;
          int curr_places = (int)::ceil( ::log10( (double)(i) ) );

          // Put in the right number of zeros.
          for (int j=curr_places; j<num_places; j++) {
            out_file += "0";
          }
 
          // Add the file number.
          out_file += Teuchos::Utils::toString( i );

          //
          // Write out.
          //
          write_vec( out_file, num_nodes, u, v );
        }
      }
      //
      // Clean up.
      //
      if ( u ) delete [] u;
      if ( v ) delete [] v;
      delete Proc0Map;
    }
    else {
      TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "File I/O handler is not initialized!");
    }      
  }
Exemple #12
0
int main(int argc, char *argv[])
{
  char  global[]="global";
  char  local[]="local";

  int    proc_config[AZ_PROC_SIZE];/* Processor information.                */
  int    options[AZ_OPTIONS_SIZE]; /* Array used to select solver options.  */
  double params[AZ_PARAMS_SIZE];   /* User selected solver paramters.       */
  int    *data_org;
                                   /* Array to specify data layout          */
  double status[AZ_STATUS_SIZE];   /* Information returned from AZ_solve(). */
  int    *update;                  /* vector elements updated on this node. */
  int    *external;
                                   /* vector elements needed by this node.  */
  int    *update_index;
                                   /* ordering of update[] and external[]   */
  int    *extern_index;
                                   /* locally on this processor.            */
  int    *indx;   /* MSR format of real and imag parts */
  int    *bindx;
  int    *bpntr;
  int    *rpntr;
  int    *cpntr;
  AZ_MATRIX *Amat;
  AZ_PRECOND *Prec;
  double *val;
  double *x, *b, *xexact, *xsolve;
  int    n_nonzeros, n_blk_nonzeros;
  int    N_update;           /* # of block unknowns updated on this node    */
  int    N_local;
                                 /* Number scalar equations on this node */
  int    N_global, N_blk_global; /* Total number of equations */
  int    N_external, N_blk_eqns;

  double *val_msr;
  int *bindx_msr;
  
  double norm, d ;

  int matrix_type;

  int has_global_indices, option;
  int i, j, m, mp ;
  int ione = 1;

#ifdef TEST_SINGULAR
  double * xnull; /* will contain difference of given exact solution and computed solution*/
  double * Axnull; /* Product of A time xnull */
  double norm_Axnull;
#endif

#ifdef AZTEC_MPI
  double MPI_Wtime(void) ;
#endif
  double time ;
#ifdef AZTEC_MPI
  MPI_Init(&argc,&argv);
#endif

  /* get number of processors and the name of this processor */
 
#ifdef AZTEC_MPI
  AZ_set_proc_config(proc_config,MPI_COMM_WORLD);
#else
  AZ_set_proc_config(proc_config,0);
#endif

  printf("proc %d of %d is alive\n",
	 proc_config[AZ_node],proc_config[AZ_N_procs]) ;

#ifdef AZTEC_MPI
  MPI_Barrier(MPI_COMM_WORLD) ;
#endif

#ifdef VBRMATRIX
  if(argc != 3) 
    perror("error: enter name of data and partition file on command line") ; 
#else
  if(argc != 2) perror("error: enter name of data file on command line") ; 
#endif
  /* Set exact solution to NULL */
  xexact = NULL;

  /* Read matrix file and distribute among processors.  
     Returns with this processor's set of rows */ 

#ifdef VBRMATRIX
  read_hb(argv[1], proc_config, &N_global, &n_nonzeros, 
	  &val_msr,  &bindx_msr, &x, &b, &xexact);
  
  create_vbr(argv[2], proc_config, &N_global, &N_blk_global,
	     &n_nonzeros, &n_blk_nonzeros, &N_update, &update,
	     bindx_msr, val_msr, &val, &indx, 
	     &rpntr, &cpntr, &bpntr, &bindx);

  if(proc_config[AZ_node] == 0) 
    {
      free ((void *) val_msr);
      free ((void *) bindx_msr);
      free ((void *) cpntr);
    }
    matrix_type = AZ_VBR_MATRIX;

#ifdef AZTEC_MPI
  MPI_Barrier(MPI_COMM_WORLD) ;
#endif

  distrib_vbr_matrix( proc_config, N_global, N_blk_global, 
		      &n_nonzeros, &n_blk_nonzeros,
		      &N_update, &update, 
		      &val, &indx, &rpntr, &cpntr, &bpntr, &bindx, 
		      &x, &b, &xexact);

#else
    read_hb(argv[1], proc_config, &N_global, &n_nonzeros,
             &val,  &bindx, &x, &b, &xexact);

#ifdef AZTEC_MPI
  MPI_Barrier(MPI_COMM_WORLD) ;
#endif

  distrib_msr_matrix(proc_config, N_global, &n_nonzeros, &N_update,
		  &update, &val, &bindx, &x, &b, &xexact);

#ifdef DEBUG
  for (i = 0; i<N_update; i++)
    if (val[i] == 0.0 ) printf("Zero diagonal at row %d\n",i);
#endif
    matrix_type = AZ_MSR_MATRIX;
#endif
  /* convert matrix to a local distributed matrix */
    cpntr = NULL;
  AZ_transform(proc_config, &external, bindx, val, update,
	       &update_index, &extern_index, &data_org, 
	       N_update, indx, bpntr, rpntr, &cpntr,
               matrix_type);

  printf("Processor %d: Completed AZ_transform\n",proc_config[AZ_node]) ;
      has_global_indices = 0;
      option = AZ_LOCAL;

#ifdef VBRMATRIX
  N_local = rpntr[N_update];
#else
  N_local = N_update;
#endif

  Amat = AZ_matrix_create(N_local);

#ifdef VBRMATRIX
  AZ_set_VBR(Amat, rpntr, cpntr, bpntr, indx, bindx, val, data_org,
          N_update, update, option);
#else
  AZ_set_MSR(Amat, bindx, val, data_org, N_update, update, option);
#endif


  printf("proc %d Completed AZ_create_matrix\n",proc_config[AZ_node]) ;

#ifdef AZTEC_MPI
  MPI_Barrier(MPI_COMM_WORLD) ;
#endif

  /* initialize AZTEC options */
 
  AZ_defaults(options, params);
  options[AZ_solver]  = AZ_gmres;
  options[AZ_precond] = AZ_sym_GS; 
  options[AZ_poly_ord] = 1;
  options[AZ_graph_fill] = 1;
  params[AZ_rthresh] = 0.0E-7;
  params[AZ_athresh] = 0.0E-7;
  options[AZ_overlap] = 1;
 /*
  params[AZ_ilut_fill] = 2.0;
  params[AZ_drop] = 0.01;
  options[AZ_overlap] = 0;
  options[AZ_reorder] = 0;
  params[AZ_rthresh] = 1.0E-1;
  params[AZ_athresh] = 1.0E-1;
  options[AZ_precond] = AZ_dom_decomp ;
  options[AZ_subdomain_solve] = AZ_bilu_ifp;
  options[AZ_reorder] = 0;
  options[AZ_graph_fill] = 0;
  params[AZ_rthresh] = 1.0E-7;
  params[AZ_athresh] = 1.0E-7;
 options[AZ_poly_ord] = 1;
 options[AZ_precond] = AZ_Jacobi;
  params[AZ_omega] = 1.0;
  options[AZ_precond] = AZ_none ;

  options[AZ_poly_ord] = 1;
  options[AZ_precond] = AZ_Jacobi ;
  options[AZ_scaling] = AZ_sym_row_sum ;
  options[AZ_scaling] = AZ_sym_diag;


  options[AZ_conv] = AZ_noscaled;
  options[AZ_scaling] = AZ_Jacobi ;

  options[AZ_precond] = AZ_dom_decomp ;
  options[AZ_subdomain_solve] = AZ_icc ;
  options[AZ_subdomain_solve] = AZ_ilut ;
  params[AZ_omega] = 1.2;
  params[AZ_ilut_fill] = 2.0;
  params[AZ_drop] = 0.01;
  options[AZ_reorder] = 0;
  options[AZ_overlap] = 0;
  options[AZ_type_overlap] = AZ_symmetric;

  options[AZ_precond] = AZ_dom_decomp ;
  options[AZ_subdomain_solve] = AZ_bilu ;
  options[AZ_graph_fill] = 0;
  options[AZ_overlap] = 0;

  options[AZ_precond] = AZ_dom_decomp ;
  options[AZ_subdomain_solve] = AZ_bilu_ifp ;
  options[AZ_graph_fill] = 0;
  options[AZ_overlap] = 0;
  params[AZ_rthresh] = 1.0E-3;
  params[AZ_athresh] = 1.0E-3;

 options[AZ_poly_ord] = 1;
 options[AZ_precond] = AZ_Jacobi ; */


  options[AZ_kspace] = 600 ;

  options[AZ_max_iter] = 600 ;
  params[AZ_tol] = 1.0e-14;

#ifdef BGMRES
  options[AZ_gmres_blocksize] = 3;
  options[AZ_gmres_num_rhs] = 1;
#endif

#ifdef DEBUG
  if (proc_config[AZ_N_procs]==1)
    write_vec("rhs.dat", N_local, b);
#endif

  /* xsolve is a little longer vector needed to account for external 
     entries.  Make it and copy x (initial guess) into it. 
  */

  if (has_global_indices)
    {
      N_external = 0;
    }
  else
    {
      N_external = data_org[AZ_N_external];
    }

  xsolve  = (double *) calloc(N_local + N_external, 
			   sizeof(double)) ;

  for (i=0; i<N_local; i++) xsolve[i] = x[i];

  /* Reorder rhs and xsolve to match matrix ordering from AZ_transform */
  if (!has_global_indices)
    {
      AZ_reorder_vec(b, data_org, update_index, rpntr) ;
      AZ_reorder_vec(xsolve, data_org, update_index, rpntr) ;
    }

#ifdef VBRMATRIX
  AZ_check_vbr(N_update, data_org[AZ_N_ext_blk], AZ_LOCAL, 
	       bindx, bpntr, cpntr, rpntr, proc_config);
#else
  AZ_check_msr(bindx, N_update, N_external, AZ_LOCAL, proc_config);
#endif

  printf("Processor %d of %d N_local = %d N_external = %d NNZ = %d\n",
	 proc_config[AZ_node],proc_config[AZ_N_procs],N_local,N_external,
	 n_nonzeros);

  /* solve the system of equations using b  as the right hand side */

  Prec = AZ_precond_create(Amat,AZ_precondition, NULL);

  AZ_iterate(xsolve, b, options, params, status, proc_config,
	     Amat, Prec, NULL);
  /*AZ_ifpack_iterate(xsolve, b, options, params, status, proc_config,
    Amat);*/

  if (proc_config[AZ_node]==0)
    {
      printf("True residual norm = %22.16g\n",status[AZ_r]);
      printf("True scaled res    = %22.16g\n",status[AZ_scaled_r]);
      printf("Computed res norm  = %22.16g\n",status[AZ_rec_r]);
    }

#ifdef TEST_SINGULAR

   xnull  = (double *) calloc(N_local + N_external, sizeof(double)) ;
   Axnull  = (double *) calloc(N_local + N_external, sizeof(double)) ;
   for (i=0; i<N_local; i++) xnull[i] = xexact[i];
   if (!has_global_indices)  AZ_reorder_vec(xnull, data_org, update_index, rpntr);
   for (i=0; i<N_local; i++) xnull[i] -= xsolve[i]; /* fill with nullerence */
   Amat->matvec(xnull, Axnull, Amat, proc_config);

   norm_Axnull = AZ_gvector_norm(N_local, 2, Axnull, proc_config);

   if (proc_config[AZ_node]==0) printf("Norm of A(xexact-xsolve) = %12.4g\n",norm_Axnull);
   free((void *) xnull);
   free((void *) Axnull);
#endif


  /* Get solution back into original ordering */
   if (!has_global_indices) {
     AZ_invorder_vec(xsolve, data_org, update_index, rpntr, x);
     free((void *) xsolve);
   }
  else {
    free((void *) x);
    x = xsolve;
  }

#ifdef DEBUG
  if (proc_config[AZ_N_procs]==1)
      write_vec("solution.dat", N_local, x);
#endif
  if (xexact != NULL)
    {
      double sum = 0.0;
      double largest = 0.0;
      for (i=0; i<N_local; i++) sum += fabs(x[i]-xexact[i]);
 printf("Processor %d:  Difference between exact and computed solution = %12.4g\n",
	     proc_config[AZ_node],sum);
      for (i=0; i<N_local; i++) largest = AZ_MAX(largest,fabs(xexact[i]));
 printf("Processor %d:  Difference divided by max abs value of exact   = %12.4g\n",
	     proc_config[AZ_node],sum/largest);
    }

				       

  free((void *) val);
  free((void *) bindx);
#ifdef VBRMATRIX
  free((void *) rpntr);
  free((void *) bpntr);
  free((void *) indx);
#endif
  free((void *) b);
  free((void *) x);
  if (xexact!=NULL) free((void *) xexact);

  AZ_free((void *) update);
  AZ_free((void *) update_index);
  AZ_free((void *) external); 
  AZ_free((void *) extern_index);
  AZ_free((void *) data_org);
  if (cpntr!=NULL) AZ_free((void *) cpntr);
  AZ_precond_destroy(&Prec);
  AZ_matrix_destroy(&Amat);
  


#ifdef AZTEC_MPI
  MPI_Finalize() ;
#endif

/* end main
*/
return 0 ;
}
Exemple #13
0
static void
write_core(pic_state *pic, pic_value obj, pic_value port, struct writer_control *p)
{
  pic_value labels = p->labels;
  int i;

  /* shared objects */
  if (is_shared_object(pic, obj, p)) {
    if (pic_weak_has(pic, labels, obj)) {
      pic_fprintf(pic, port, "#%d#", pic_int(pic, pic_weak_ref(pic, labels, obj)));
      return;
    }
    i = p->cnt++;
    pic_fprintf(pic, port, "#%d=", i);
    pic_weak_set(pic, labels, obj, pic_int_value(pic, i));
  }

  switch (pic_type(pic, obj)) {
  case PIC_TYPE_UNDEF:
    pic_fprintf(pic, port, "#undefined");
    break;
  case PIC_TYPE_NIL:
    pic_fprintf(pic, port, "()");
    break;
  case PIC_TYPE_TRUE:
    pic_fprintf(pic, port, "#t");
    break;
  case PIC_TYPE_FALSE:
    pic_fprintf(pic, port, "#f");
    break;
  case PIC_TYPE_ID:
    pic_fprintf(pic, port, "#<identifier %s>", pic_str(pic, pic_id_name(pic, obj)));
    break;
  case PIC_TYPE_EOF:
    pic_fprintf(pic, port, "#.(eof-object)");
    break;
  case PIC_TYPE_INT:
    pic_fprintf(pic, port, "%d", pic_int(pic, obj));
    break;
  case PIC_TYPE_SYMBOL:
    pic_fprintf(pic, port, "%s", pic_sym(pic, obj));
    break;
  case PIC_TYPE_FLOAT:
    write_float(pic, obj, port);
    break;
  case PIC_TYPE_BLOB:
    write_blob(pic, obj, port);
    break;
  case PIC_TYPE_CHAR:
    write_char(pic, obj, port, p);
    break;
  case PIC_TYPE_STRING:
    write_str(pic, obj, port, p);
    break;
  case PIC_TYPE_PAIR:
    write_pair(pic, obj, port, p);
    break;
  case PIC_TYPE_VECTOR:
    write_vec(pic, obj, port, p);
    break;
  case PIC_TYPE_DICT:
    write_dict(pic, obj, port, p);
    break;
  default:
    pic_fprintf(pic, port, "#<%s %p>", pic_typename(pic, pic_type(pic, obj)), pic_obj_ptr(obj));
    break;
  }

  if (p->op == OP_WRITE) {
    if (is_shared_object(pic, obj, p)) {
      pic_weak_del(pic, labels, obj);
    }
  }
}
Exemple #14
0
PIPE_ALIGN_STACK
static boolean
test_one(unsigned verbose,
         FILE *fp,
         struct lp_type src_type,
         struct lp_type dst_type)
{
   LLVMModuleRef module = NULL;
   LLVMValueRef func = NULL;
   LLVMExecutionEngineRef engine = NULL;
   LLVMModuleProviderRef provider = NULL;
   LLVMPassManagerRef pass = NULL;
   char *error = NULL;
   conv_test_ptr_t conv_test_ptr;
   boolean success;
   const unsigned n = LP_TEST_NUM_SAMPLES;
   int64_t cycles[LP_TEST_NUM_SAMPLES];
   double cycles_avg = 0.0;
   unsigned num_srcs;
   unsigned num_dsts;
   double eps;
   unsigned i, j;

   if(verbose >= 1)
      dump_conv_types(stdout, src_type, dst_type);

   if(src_type.length > dst_type.length) {
      num_srcs = 1;
      num_dsts = src_type.length/dst_type.length;
   }
   else  {
      num_dsts = 1;
      num_srcs = dst_type.length/src_type.length;
   }

   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);

   /* We must not loose or gain channels. Only precision */
   assert(src_type.length * num_srcs == dst_type.length * num_dsts);

   eps = MAX2(lp_const_eps(src_type), lp_const_eps(dst_type));

   module = LLVMModuleCreateWithName("test");

   func = add_conv_test(module, src_type, num_srcs, dst_type, num_dsts);

   if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
      LLVMDumpModule(module);
      abort();
   }
   LLVMDisposeMessage(error);

   provider = LLVMCreateModuleProviderForExistingModule(module);
   if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
      if(verbose < 1)
         dump_conv_types(stderr, src_type, dst_type);
      fprintf(stderr, "%s\n", error);
      LLVMDisposeMessage(error);
      abort();
   }

#if 0
   pass = LLVMCreatePassManager();
   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
    * but there are more on SVN. */
   LLVMAddConstantPropagationPass(pass);
   LLVMAddInstructionCombiningPass(pass);
   LLVMAddPromoteMemoryToRegisterPass(pass);
   LLVMAddGVNPass(pass);
   LLVMAddCFGSimplificationPass(pass);
   LLVMRunPassManager(pass, module);
#else
   (void)pass;
#endif

   if(verbose >= 2)
      LLVMDumpModule(module);

   conv_test_ptr = (conv_test_ptr_t)LLVMGetPointerToGlobal(engine, func);

   if(verbose >= 2)
      lp_disassemble(conv_test_ptr);

   success = TRUE;
   for(i = 0; i < n && success; ++i) {
      unsigned src_stride = src_type.length*src_type.width/8;
      unsigned dst_stride = dst_type.length*dst_type.width/8;
      PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
      PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
      double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
      uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
      int64_t start_counter = 0;
      int64_t end_counter = 0;

      for(j = 0; j < num_srcs; ++j) {
         random_vec(src_type, src + j*src_stride);
         read_vec(src_type, src + j*src_stride, fref + j*src_type.length);
      }

      for(j = 0; j < num_dsts; ++j) {
         write_vec(dst_type, ref + j*dst_stride, fref + j*dst_type.length);
      }

      start_counter = rdtsc();
      conv_test_ptr(src, dst);
      end_counter = rdtsc();

      cycles[i] = end_counter - start_counter;

      for(j = 0; j < num_dsts; ++j) {
         if(!compare_vec_with_eps(dst_type, dst + j*dst_stride, ref + j*dst_stride, eps))
            success = FALSE;
      }

      if (!success) {
         if(verbose < 1)
            dump_conv_types(stderr, src_type, dst_type);
         fprintf(stderr, "MISMATCH\n");

         for(j = 0; j < num_srcs; ++j) {
            fprintf(stderr, "  Src%u: ", j);
            dump_vec(stderr, src_type, src + j*src_stride);
            fprintf(stderr, "\n");
         }

#if 1
         fprintf(stderr, "  Ref: ");
         for(j = 0; j < src_type.length*num_srcs; ++j)
            fprintf(stderr, " %f", fref[j]);
         fprintf(stderr, "\n");
#endif

         for(j = 0; j < num_dsts; ++j) {
            fprintf(stderr, "  Dst%u: ", j);
            dump_vec(stderr, dst_type, dst + j*dst_stride);
            fprintf(stderr, "\n");

            fprintf(stderr, "  Ref%u: ", j);
            dump_vec(stderr, dst_type, ref + j*dst_stride);
            fprintf(stderr, "\n");
         }
      }
   }

   /*
    * Unfortunately the output of cycle counter is not very reliable as it comes
    * -- sometimes we get outliers (due IRQs perhaps?) which are
    * better removed to avoid random or biased data.
    */
   {
      double sum = 0.0, sum2 = 0.0;
      double avg, std;
      unsigned m;

      for(i = 0; i < n; ++i) {
         sum += cycles[i];
         sum2 += cycles[i]*cycles[i];
      }

      avg = sum/n;
      std = sqrtf((sum2 - n*avg*avg)/n);

      m = 0;
      sum = 0.0;
      for(i = 0; i < n; ++i) {
         if(fabs(cycles[i] - avg) <= 4.0*std) {
            sum += cycles[i];
            ++m;
         }
      }

      cycles_avg = sum/m;

   }

   if(fp)
      write_tsv_row(fp, src_type, dst_type, cycles_avg, success);

   if (!success) {
      static boolean firsttime = TRUE;
      if(firsttime) {
         if(verbose < 2)
            LLVMDumpModule(module);
         LLVMWriteBitcodeToFile(module, "conv.bc");
         fprintf(stderr, "conv.bc written\n");
         fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n");
         firsttime = FALSE;
         /* abort(); */
      }
   }

   LLVMFreeMachineCodeForFunction(engine, func);

   LLVMDisposeExecutionEngine(engine);
   if(pass)
      LLVMDisposePassManager(pass);

   return success;
}