示例#1
0
int main( int argc, char* argv[] )
{
    if( argc < 3 ) {
        usage();
        exit(1);
    }

    FILE* reads_in = fopen( argv[1], "r" );
    FILE* quals_in = fopen( argv[2], "r" );

    const size_t buf_size = 4096;

    char *read_name, *read_seq, *qual_name, *qual_seq;

    read_name = malloc( buf_size*sizeof(char) );
    read_seq  = malloc( buf_size*sizeof(char) );
    qual_name = malloc( buf_size*sizeof(char) );
    qual_seq  = malloc( buf_size*sizeof(char) );
    double* quals = malloc( buf_size*sizeof(double) );

    int k;
    int n;

    while( fgets( read_name, buf_size, reads_in ) &&
           fgets( read_seq,  buf_size, reads_in ) &&
           fgets( qual_name, buf_size, quals_in ) &&
           fgets( qual_seq,  buf_size, quals_in ) )
    {
        if( strcmp( read_name, qual_name ) != 0 ) {
            fprintf( stderr, "Mismatching read, quality pair.\n" );
            exit(1);
        }

        k = poly_tail( read_seq );
        n = get_quals( qual_seq, quals );

        if( k < 0 || n - k < 6 ) continue;

        double mu = mean_qual( quals, k, n-1 );

        printf( "%0.5f\n", mu );
    }


    fclose(reads_in);
    fclose(quals_in);

    return 0;
}
示例#2
0
RowSetPtr Executor::executeResultPlan(const Planner::Result* result_plan,
                                      const bool hoist_literals,
                                      const ExecutorDeviceType device_type,
                                      const ExecutorOptLevel opt_level,
                                      const Catalog_Namespace::Catalog& cat,
                                      size_t& max_groups_buffer_entry_guess,
                                      int32_t* error_code,
                                      const Planner::Sort* sort_plan,
                                      const bool allow_multifrag,
                                      const bool just_explain,
                                      const bool allow_loop_joins) {
  const auto agg_plan = dynamic_cast<const Planner::AggPlan*>(result_plan->get_child_plan());
  if (!agg_plan) {  // TODO(alex)
    throw std::runtime_error("Query not supported yet, child plan needs to be an aggregate plan.");
  }
  row_set_mem_owner_ = std::make_shared<RowSetMemoryOwner>();
  lit_str_dict_proxy_ = nullptr;
  const auto scan_plan = dynamic_cast<const Planner::Scan*>(agg_plan->get_child_plan());
  auto simple_quals = scan_plan ? scan_plan->get_simple_quals() : std::list<std::shared_ptr<Analyzer::Expr>>{};
  auto quals = scan_plan ? scan_plan->get_quals() : std::list<std::shared_ptr<Analyzer::Expr>>{};
  std::vector<InputDescriptor> input_descs;
  std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs;
  collect_input_descs(input_descs, input_col_descs, agg_plan, cat);
  const auto join_plan = get_join_child(agg_plan);
  if (join_plan) {
    collect_quals_from_join(simple_quals, quals, join_plan);
  }
  const auto join_quals = join_plan ? join_plan->get_quals() : std::list<std::shared_ptr<Analyzer::Expr>>{};
  CHECK(check_plan_sanity(agg_plan));
  const auto query_infos = get_table_infos(input_descs, this);
  const auto ra_exe_unit_in = RelAlgExecutionUnit{input_descs,
                                                  {},
                                                  input_col_descs,
                                                  simple_quals,
                                                  quals,
                                                  JoinType::INVALID,
                                                  {},
                                                  join_quals,
                                                  {},
                                                  agg_plan->get_groupby_list(),
                                                  get_agg_target_exprs(agg_plan),
                                                  {},
                                                  nullptr,
                                                  {{}, SortAlgorithm::Default, 0, 0},
                                                  0};
  QueryRewriter query_rewriter(ra_exe_unit_in, query_infos, this, result_plan);
  const auto ra_exe_unit = query_rewriter.rewrite();
  auto result = executeWorkUnit(error_code,
                                max_groups_buffer_entry_guess,
                                true,
                                query_infos,
                                ra_exe_unit,
                                {device_type, hoist_literals, opt_level, g_enable_dynamic_watchdog},
                                {false,
                                 allow_multifrag,
                                 just_explain,
                                 allow_loop_joins,
                                 g_enable_watchdog,
                                 false,
                                 false,
                                 g_enable_dynamic_watchdog,
                                 g_dynamic_watchdog_time_limit},
                                cat,
                                row_set_mem_owner_,
                                nullptr,
                                true);
  auto& rows = boost::get<RowSetPtr>(result);
  CHECK(rows);
  if (just_explain) {
    return std::move(rows);
  }

  const int in_col_count{static_cast<int>(agg_plan->get_targetlist().size())};
  std::list<std::shared_ptr<const InputColDescriptor>> pseudo_input_col_descs;
  for (int pseudo_col = 1; pseudo_col <= in_col_count; ++pseudo_col) {
    pseudo_input_col_descs.push_back(std::make_shared<const InputColDescriptor>(pseudo_col, 0, -1));
  }
  const auto order_entries = sort_plan ? sort_plan->get_order_entries() : std::list<Analyzer::OrderEntry>{};
  const RelAlgExecutionUnit res_ra_unit{{},
                                        {},
                                        pseudo_input_col_descs,
                                        result_plan->get_constquals(),
                                        result_plan->get_quals(),
                                        JoinType::INVALID,
                                        {},
                                        {},
                                        {},
                                        {nullptr},
                                        get_agg_target_exprs(result_plan),
                                        {},
                                        nullptr,
                                        {
                                            order_entries, SortAlgorithm::Default, 0, 0,
                                        },
                                        0};
  if (*error_code) {
    return std::make_shared<ResultSet>(
        std::vector<TargetInfo>{}, ExecutorDeviceType::CPU, QueryMemoryDescriptor{}, nullptr, this);
  }
  const auto& targets = result_plan->get_targetlist();
  CHECK(!targets.empty());
  std::vector<AggInfo> agg_infos;
  for (size_t target_idx = 0; target_idx < targets.size(); ++target_idx) {
    const auto target_entry = targets[target_idx];
    const auto target_type = target_entry->get_expr()->get_type_info().get_type();
    agg_infos.emplace_back((target_type == kFLOAT || target_type == kDOUBLE) ? "agg_id_double" : "agg_id",
                           target_entry->get_expr(),
                           0,
                           target_idx);
  }
  std::vector<SQLTypeInfo> target_types;
  for (auto in_col : agg_plan->get_targetlist()) {
    target_types.push_back(in_col->get_expr()->get_type_info());
  }
  CHECK(rows);
  ColumnarResults result_columns(row_set_mem_owner_, *rows, in_col_count, target_types);
  std::vector<llvm::Value*> col_heads;
  // Nested query, let the compiler know
  ResetIsNested reset_is_nested(this);
  is_nested_ = true;
  std::vector<Analyzer::Expr*> target_exprs;
  for (auto target_entry : targets) {
    target_exprs.emplace_back(target_entry->get_expr());
  }
  const auto row_count = rows->rowCount();
  if (!row_count) {
    return std::make_shared<ResultSet>(
        std::vector<TargetInfo>{}, ExecutorDeviceType::CPU, QueryMemoryDescriptor{}, nullptr, this);
  }
  std::vector<ColWidths> agg_col_widths;
  for (auto wid : get_col_byte_widths(target_exprs, {})) {
    agg_col_widths.push_back(
        {wid, int8_t(compact_byte_width(wid, pick_target_compact_width(res_ra_unit, {}, get_min_byte_width())))});
  }
  QueryMemoryDescriptor query_mem_desc{this,
                                       allow_multifrag,
                                       GroupByColRangeType::Projection,
                                       false,
                                       false,
                                       -1,
                                       0,
                                       {sizeof(int64_t)},
#ifdef ENABLE_KEY_COMPACTION
                                       0,
#endif
                                       agg_col_widths,
                                       {},
                                       row_count,
                                       small_groups_buffer_entry_count_,
                                       0,
                                       0,
                                       0,
                                       false,
                                       GroupByMemSharing::Shared,
                                       CountDistinctDescriptors{},
                                       false,
                                       true,
                                       false,
                                       false,
                                       {},
                                       {},
                                       false};
  auto compilation_result =
      compileWorkUnit(false,
                      {},
                      res_ra_unit,
                      {ExecutorDeviceType::CPU, hoist_literals, opt_level, g_enable_dynamic_watchdog},
                      {false,
                       allow_multifrag,
                       just_explain,
                       allow_loop_joins,
                       g_enable_watchdog,
                       false,
                       false,
                       g_enable_dynamic_watchdog,
                       g_dynamic_watchdog_time_limit},
                      nullptr,
                      false,
                      row_set_mem_owner_,
                      row_count,
                      small_groups_buffer_entry_count_,
                      get_min_byte_width(),
                      JoinInfo(JoinImplType::Invalid, std::vector<std::shared_ptr<Analyzer::BinOper>>{}, {}, ""),
                      false);
  auto column_buffers = result_columns.getColumnBuffers();
  CHECK_EQ(column_buffers.size(), static_cast<size_t>(in_col_count));
  std::vector<int64_t> init_agg_vals(query_mem_desc.agg_col_widths.size());
  auto query_exe_context = query_mem_desc.getQueryExecutionContext(res_ra_unit,
                                                                   init_agg_vals,
                                                                   this,
                                                                   ExecutorDeviceType::CPU,
                                                                   0,
                                                                   {},
                                                                   {},
                                                                   {},
                                                                   row_set_mem_owner_,
                                                                   false,
                                                                   false,
                                                                   nullptr);
  const auto hoist_buf = serializeLiterals(compilation_result.literal_values, 0);
  *error_code = 0;
  std::vector<std::vector<const int8_t*>> multi_frag_col_buffers{column_buffers};
  query_exe_context->launchCpuCode(res_ra_unit,
                                   compilation_result.native_functions,
                                   hoist_literals,
                                   hoist_buf,
                                   multi_frag_col_buffers,
                                   {{static_cast<int64_t>(result_columns.size())}},
                                   {{0}},
                                   1u,
                                   0,
                                   init_agg_vals,
                                   error_code,
                                   1,
                                   {});
  CHECK_GE(*error_code, 0);
  return query_exe_context->groupBufferToResults(0, target_exprs, false);
}
示例#3
0
RowSetPtr Executor::executeSelectPlan(const Planner::Plan* plan,
                                      const int64_t limit,
                                      const int64_t offset,
                                      const bool hoist_literals,
                                      const ExecutorDeviceType device_type,
                                      const ExecutorOptLevel opt_level,
                                      const Catalog_Namespace::Catalog& cat,
                                      size_t& max_groups_buffer_entry_guess,
                                      int32_t* error_code,
                                      const Planner::Sort* sort_plan_in,
                                      const bool allow_multifrag,
                                      const bool just_explain,
                                      const bool allow_loop_joins,
                                      RenderAllocatorMap* render_allocator_map) {
  if (dynamic_cast<const Planner::Scan*>(plan) || dynamic_cast<const Planner::AggPlan*>(plan) ||
      dynamic_cast<const Planner::Join*>(plan)) {
    row_set_mem_owner_ = std::make_shared<RowSetMemoryOwner>();
    lit_str_dict_proxy_ = nullptr;
    const auto target_exprs = get_agg_target_exprs(plan);
    const auto scan_plan = get_scan_child(plan);
    auto simple_quals = scan_plan ? scan_plan->get_simple_quals() : std::list<std::shared_ptr<Analyzer::Expr>>{};
    auto quals = scan_plan ? scan_plan->get_quals() : std::list<std::shared_ptr<Analyzer::Expr>>{};
    const auto agg_plan = dynamic_cast<const Planner::AggPlan*>(plan);
    auto groupby_exprs = agg_plan ? agg_plan->get_groupby_list() : std::list<std::shared_ptr<Analyzer::Expr>>{nullptr};
    std::vector<InputDescriptor> input_descs;
    std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs;
    collect_input_descs(input_descs, input_col_descs, plan, cat);
    const auto join_plan = get_join_child(plan);
    if (join_plan) {
      collect_quals_from_join(simple_quals, quals, join_plan);
    }
    const auto join_quals = join_plan ? join_plan->get_quals() : std::list<std::shared_ptr<Analyzer::Expr>>{};
    CHECK(check_plan_sanity(plan));
    const bool is_agg = dynamic_cast<const Planner::AggPlan*>(plan);
    const auto order_entries = sort_plan_in ? sort_plan_in->get_order_entries() : std::list<Analyzer::OrderEntry>{};
    const auto query_infos = get_table_infos(input_descs, this);
    const size_t scan_limit = get_scan_limit(plan, limit);
    const size_t scan_total_limit = scan_limit ? get_scan_limit(plan, scan_limit + offset) : 0;
    const auto ra_exe_unit_in = RelAlgExecutionUnit{
        input_descs,
        {},
        input_col_descs,
        simple_quals,
        quals,
        JoinType::INVALID,
        {},
        join_quals,
        {},
        groupby_exprs,
        target_exprs,
        {},
        nullptr,
        {order_entries, SortAlgorithm::Default, static_cast<size_t>(limit), static_cast<size_t>(offset)},
        scan_total_limit};
    QueryRewriter query_rewriter(ra_exe_unit_in, query_infos, this, agg_plan);
    const auto ra_exe_unit = query_rewriter.rewrite();
    if (limit || offset) {
      size_t max_groups_buffer_entry_guess_limit{scan_total_limit ? scan_total_limit : max_groups_buffer_entry_guess};
      auto result = executeWorkUnit(error_code,
                                    max_groups_buffer_entry_guess_limit,
                                    is_agg,
                                    query_infos,
                                    ra_exe_unit,
                                    {device_type, hoist_literals, opt_level, g_enable_dynamic_watchdog},
                                    {false,
                                     allow_multifrag,
                                     just_explain,
                                     allow_loop_joins,
                                     g_enable_watchdog,
                                     false,
                                     false,
                                     g_enable_dynamic_watchdog,
                                     g_dynamic_watchdog_time_limit},
                                    cat,
                                    row_set_mem_owner_,
                                    render_allocator_map,
                                    true);
      auto& rows = boost::get<RowSetPtr>(result);
      max_groups_buffer_entry_guess = max_groups_buffer_entry_guess_limit;
      CHECK(rows);
      rows->dropFirstN(offset);
      if (limit) {
        rows->keepFirstN(limit);
      }
      return std::move(rows);
    }
    auto result = executeWorkUnit(error_code,
                                  max_groups_buffer_entry_guess,
                                  is_agg,
                                  query_infos,
                                  ra_exe_unit,
                                  {device_type, hoist_literals, opt_level, g_enable_dynamic_watchdog},
                                  {false,
                                   allow_multifrag,
                                   just_explain,
                                   allow_loop_joins,
                                   g_enable_watchdog,
                                   false,
                                   false,
                                   g_enable_dynamic_watchdog,
                                   g_dynamic_watchdog_time_limit},
                                  cat,
                                  row_set_mem_owner_,
                                  render_allocator_map,
                                  true);
    auto& rows = boost::get<RowSetPtr>(result);
    CHECK(rows);
    return std::move(rows);
  }
  const auto result_plan = dynamic_cast<const Planner::Result*>(plan);
  if (result_plan) {
    if (limit || offset) {
      auto rows = executeResultPlan(result_plan,
                                    hoist_literals,
                                    device_type,
                                    opt_level,
                                    cat,
                                    max_groups_buffer_entry_guess,
                                    error_code,
                                    sort_plan_in,
                                    allow_multifrag,
                                    just_explain,
                                    allow_loop_joins);
      CHECK(rows);
      rows->dropFirstN(offset);
      if (limit) {
        rows->keepFirstN(limit);
      }
      return rows;
    }
    return executeResultPlan(result_plan,
                             hoist_literals,
                             device_type,
                             opt_level,
                             cat,
                             max_groups_buffer_entry_guess,
                             error_code,
                             sort_plan_in,
                             allow_multifrag,
                             just_explain,
                             allow_loop_joins);
  }
  const auto sort_plan = dynamic_cast<const Planner::Sort*>(plan);
  if (sort_plan) {
    return executeSortPlan(sort_plan,
                           limit,
                           offset,
                           hoist_literals,
                           device_type,
                           opt_level,
                           cat,
                           max_groups_buffer_entry_guess,
                           error_code,
                           allow_multifrag,
                           just_explain,
                           allow_loop_joins);
  }
  abort();
}