BENCHMARK_DEFINE_F(dynamic_default_fixture, single_thread)(benchmark::State& state) {


  if (state.thread_index == 0) {
    SetUp(state);
  }

  sqeazy::remove_estimated_background_scheme<std::uint16_t> local;
  local.set_n_threads(1);


  local.encode(sinus_.data(),
               output_.data(),
               shape_);

  while (state.KeepRunning()) {
    state.PauseTiming();
    std::fill(output_.begin(), output_.end(),0);
    state.ResumeTiming();

    local.encode(sinus_.data(),
               output_.data(),
               shape_);
  }

  state.SetBytesProcessed(int64_t(state.iterations()) *
                          int64_t(size_)*sizeof(sinus_.front()));
}
BENCHMARK_DEFINE_F(dynamic_default_fixture, max_threads)(benchmark::State& state) {


  if (state.thread_index == 0) {
    SetUp(state);
  }

  int nthreads = std::thread::hardware_concurrency();
  sqeazy::frame_shuffle_scheme<std::uint16_t> local;
  local.set_n_threads(nthreads);


  local.encode(sinus_.data(),
               output_.data(),
               shape_);

  while (state.KeepRunning()) {
    state.PauseTiming();
    std::fill(output_.begin(), output_.end(),0);
    state.ResumeTiming();

    local.encode(sinus_.data(),
               output_.data(),
               shape_);
  }

  state.SetBytesProcessed(int64_t(state.iterations()) *
                          int64_t(size_)*sizeof(sinus_.front()));
}
static inline void BM_DeflateDecompressor(benchmark::State& state) {
  static_assert(BPS::value > 0, "bad bps");
  static_assert(rawspeed::isAligned(BPS::value, 8), "not byte count");

  const auto dim = areaToRectangle(state.range(0));
  auto mRaw = rawspeed::RawImage::create(dim, rawspeed::TYPE_FLOAT32, 1);

  uLong cBufSize;
  auto cBuf = compressChunk<BPS>(mRaw, &cBufSize);
  assert(cBuf != nullptr);
  assert(cBufSize > 0);

  Buffer buf(std::move(cBuf), cBufSize);
  assert(buf.getSize() == cBufSize);

  int predictor = 0;
  switch (Pf::value) {
  case 0:
    predictor = 0;
    break;
  case 1:
    predictor = 3;
    break;
  case 2:
    predictor = 34894;
    break;
  case 4:
    predictor = 34895;
    break;
  default:
    __builtin_unreachable();
    break;
  }

  std::unique_ptr<unsigned char[]> uBuffer;

  const rawspeed::ByteStream bs(buf, 0, buf.getSize());

  while (state.KeepRunning()) {
    DeflateDecompressor d(bs, mRaw, predictor, BPS::value);

    d.decode(&uBuffer, mRaw->dim.x, mRaw->dim.y, 0, 0);
  }

  state.SetComplexityN(dim.area());
  state.SetItemsProcessed(state.complexity_length_n() * state.iterations());
  state.SetBytesProcessed(BPS::value * state.items_processed() / 8);
}
void AtriaSequence(benchmark::State& state) {
    using namespace atria::xform;
    using namespace atria::prelude;

    auto seq = sequence(range(1, ITERATION_COUNT));
    while (state.KeepRunning()) {
        run(
                    comp(
                        map([](auto j){ return ADD_VALUE(j); }),
                        filter([](auto k){ return FILTER_VALUE(k); }),
                        sink([](auto m){ doNotOptimize(m); })
                        ),
                    seq);
    }
    state.SetItemsProcessed(ITERATION_COUNT * state.iterations());
    state.SetBytesProcessed(ITERATION_COUNT * state.iterations() * sizeof(ELEMENT_TYPE));
}
void BM_Connection_Unidirectional(benchmark::State& state) {
    int fds[2];
    if (adb_socketpair(fds) != 0) {
        LOG(FATAL) << "failed to create socketpair";
    }

    auto client = MakeConnection<ConnectionType>(unique_fd(fds[0]));
    auto server = MakeConnection<ConnectionType>(unique_fd(fds[1]));

    std::atomic<size_t> received_bytes;

    client->SetReadCallback([](Connection*, std::unique_ptr<apacket>) -> bool { return true; });
    server->SetReadCallback([&received_bytes](Connection*, std::unique_ptr<apacket> packet) -> bool {
        received_bytes += packet->payload.size();
        return true;
    });

    client->SetErrorCallback(
        [](Connection*, const std::string& error) { LOG(INFO) << "client closed: " << error; });
    server->SetErrorCallback(
        [](Connection*, const std::string& error) { LOG(INFO) << "server closed: " << error; });

    client->Start();
    server->Start();

    for (auto _ : state) {
        size_t data_size = state.range(0);
        std::unique_ptr<apacket> packet = std::make_unique<apacket>();
        memset(&packet->msg, 0, sizeof(packet->msg));
        packet->msg.command = A_WRTE;
        packet->msg.data_length = data_size;
        packet->payload.resize(data_size);

        memset(&packet->payload[0], 0xff, data_size);

        received_bytes = 0;
        client->Write(std::move(packet));
        while (received_bytes < data_size) {
            continue;
        }
    }
    state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * state.range(0));

    client->Stop();
    server->Stop();
}
void AtriaEnumerate(benchmark::State& state) {
    using namespace atria::xform;
    using namespace atria::prelude;

    auto input = comp(enumerate_from(1), take(ITERATION_COUNT));
    while (state.KeepRunning()) {
        run(
                    comp(
                        input,
                        map([](auto j){ return ADD_VALUE(j); }),
                        filter([](auto k){ return FILTER_VALUE(k); }),
                        sink([](auto m){ doNotOptimize(m); })
                        )
                    );
    }
    state.SetItemsProcessed(ITERATION_COUNT * state.iterations());
    state.SetBytesProcessed(ITERATION_COUNT * state.iterations() * sizeof(ELEMENT_TYPE));
}
void ReadWriteTest(benchmark::State& state, Fn f, bool buffered) {
  size_t chunk_size = state.range_x();

  FILE* fp = fopen("/dev/zero", "rw");
  __fsetlocking(fp, FSETLOCKING_BYCALLER);
  char* buf = new char[chunk_size];

  if (!buffered) {
    setvbuf(fp, 0, _IONBF, 0);
  }

  while (state.KeepRunning()) {
    f(buf, chunk_size, 1, fp);
  }

  state.SetBytesProcessed(int64_t(state.iterations()) * int64_t(chunk_size));
  delete[] buf;
  fclose(fp);
}
BENCHMARK_DEFINE_F(uint16_fixture, stack_fill_histogram_uint16)(benchmark::State& state) {

  if (state.thread_index == 0) {
    SetUp(state);
  }

  auto dh = sqeazy::detail::serial_fill_histogram(sinus_.cbegin(), sinus_.cend());


  while (state.KeepRunning()) {
    state.PauseTiming();
    std::fill(dh.begin(),dh.end(),0);
    state.ResumeTiming();

    dh = sqeazy::detail::serial_fill_histogram(sinus_.cbegin(), sinus_.cend());
  }

  state.SetBytesProcessed(int64_t(state.iterations()) *
                          int64_t(size_)*sizeof(sinus_.front()));
}
BENCHMARK_DEFINE_F(static_default_fixture, one_thread)(benchmark::State& state) {

  sqeazy::remove_estimated_background_scheme<std::uint16_t> local;
  local.set_n_threads(state.threads);

  local.encode(sin_data.data(),
               output_data.data(),
               shape);

  while (state.KeepRunning()) {

    state.PauseTiming();
    std::fill(output_data.begin(), output_data.end(),0);
    state.ResumeTiming();

    local.encode(sin_data.data(),
                 output_data.data(),
                 shape);
  }

  state.SetBytesProcessed(int64_t(state.iterations()) *
                          int64_t(sin_data.size())*sizeof(sin_data.front()));
}
BENCHMARK_DEFINE_F(uint16_fixture, heap_fill_histogram_uint16)(benchmark::State& state) {

  if (state.thread_index == 0) {
    SetUp(state);
  }

  typedef std::vector<std::uint32_t> container_t;
  static const std::size_t hlen = (1 << (sizeof(std::uint16_t)*CHAR_BIT));

  container_t dh(hlen,0);
  sqeazy::detail::serial_fill_histogram_byref(sinus_.cbegin(), sinus_.cend(),dh.begin());


  while (state.KeepRunning()) {
    state.PauseTiming();
    std::fill(dh.begin(),dh.end(),0);
    state.ResumeTiming();

    sqeazy::detail::serial_fill_histogram_byref(sinus_.cbegin(), sinus_.cend(),dh.begin());
  }

  state.SetBytesProcessed(int64_t(state.iterations()) *
                          int64_t(size_)*sizeof(sinus_.front()));
}
void BM_Connection_Echo(benchmark::State& state) {
    int fds[2];
    if (adb_socketpair(fds) != 0) {
        LOG(FATAL) << "failed to create socketpair";
    }

    auto client = MakeConnection<ConnectionType>(unique_fd(fds[0]));
    auto server = MakeConnection<ConnectionType>(unique_fd(fds[1]));

    std::atomic<size_t> received_bytes;

    fdevent_reset();
    std::thread fdevent_thread([]() { fdevent_loop(); });

    client->SetReadCallback([&received_bytes](Connection*, std::unique_ptr<apacket> packet) -> bool {
        received_bytes += packet->payload.size();
        return true;
    });

    static const auto handle_packet = [](Connection* connection, std::unique_ptr<apacket> packet) {
        connection->Write(std::move(packet));
    };

    server->SetReadCallback([](Connection* connection, std::unique_ptr<apacket> packet) -> bool {
        if (Policy == ThreadPolicy::MainThread) {
            auto raw_packet = packet.release();
            fdevent_run_on_main_thread([connection, raw_packet]() {
                std::unique_ptr<apacket> packet(raw_packet);
                handle_packet(connection, std::move(packet));
            });
        } else {
            handle_packet(connection, std::move(packet));
        }
        return true;
    });

    client->SetErrorCallback(
        [](Connection*, const std::string& error) { LOG(INFO) << "client closed: " << error; });
    server->SetErrorCallback(
        [](Connection*, const std::string& error) { LOG(INFO) << "server closed: " << error; });

    client->Start();
    server->Start();

    for (auto _ : state) {
        size_t data_size = state.range(0);
        std::unique_ptr<apacket> packet = std::make_unique<apacket>();
        memset(&packet->msg, 0, sizeof(packet->msg));
        packet->msg.command = A_WRTE;
        packet->msg.data_length = data_size;
        packet->payload.resize(data_size);

        memset(&packet->payload[0], 0xff, data_size);

        received_bytes = 0;
        client->Write(std::move(packet));
        while (received_bytes < data_size) {
            continue;
        }
    }
    state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * state.range(0));

    client->Stop();
    server->Stop();

    // TODO: Make it so that you don't need to poke the fdevent loop to make it terminate?
    fdevent_terminate_loop();
    fdevent_run_on_main_thread([]() {});

    fdevent_thread.join();
}