int main( int argc, char* argv[] ) { if( argc < 3 ) { usage(); exit(1); } FILE* reads_in = fopen( argv[1], "r" ); FILE* quals_in = fopen( argv[2], "r" ); const size_t buf_size = 4096; char *read_name, *read_seq, *qual_name, *qual_seq; read_name = malloc( buf_size*sizeof(char) ); read_seq = malloc( buf_size*sizeof(char) ); qual_name = malloc( buf_size*sizeof(char) ); qual_seq = malloc( buf_size*sizeof(char) ); double* quals = malloc( buf_size*sizeof(double) ); int k; int n; while( fgets( read_name, buf_size, reads_in ) && fgets( read_seq, buf_size, reads_in ) && fgets( qual_name, buf_size, quals_in ) && fgets( qual_seq, buf_size, quals_in ) ) { if( strcmp( read_name, qual_name ) != 0 ) { fprintf( stderr, "Mismatching read, quality pair.\n" ); exit(1); } k = poly_tail( read_seq ); n = get_quals( qual_seq, quals ); if( k < 0 || n - k < 6 ) continue; double mu = mean_qual( quals, k, n-1 ); printf( "%0.5f\n", mu ); } fclose(reads_in); fclose(quals_in); return 0; }
RowSetPtr Executor::executeResultPlan(const Planner::Result* result_plan, const bool hoist_literals, const ExecutorDeviceType device_type, const ExecutorOptLevel opt_level, const Catalog_Namespace::Catalog& cat, size_t& max_groups_buffer_entry_guess, int32_t* error_code, const Planner::Sort* sort_plan, const bool allow_multifrag, const bool just_explain, const bool allow_loop_joins) { const auto agg_plan = dynamic_cast<const Planner::AggPlan*>(result_plan->get_child_plan()); if (!agg_plan) { // TODO(alex) throw std::runtime_error("Query not supported yet, child plan needs to be an aggregate plan."); } row_set_mem_owner_ = std::make_shared<RowSetMemoryOwner>(); lit_str_dict_proxy_ = nullptr; const auto scan_plan = dynamic_cast<const Planner::Scan*>(agg_plan->get_child_plan()); auto simple_quals = scan_plan ? scan_plan->get_simple_quals() : std::list<std::shared_ptr<Analyzer::Expr>>{}; auto quals = scan_plan ? scan_plan->get_quals() : std::list<std::shared_ptr<Analyzer::Expr>>{}; std::vector<InputDescriptor> input_descs; std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs; collect_input_descs(input_descs, input_col_descs, agg_plan, cat); const auto join_plan = get_join_child(agg_plan); if (join_plan) { collect_quals_from_join(simple_quals, quals, join_plan); } const auto join_quals = join_plan ? join_plan->get_quals() : std::list<std::shared_ptr<Analyzer::Expr>>{}; CHECK(check_plan_sanity(agg_plan)); const auto query_infos = get_table_infos(input_descs, this); const auto ra_exe_unit_in = RelAlgExecutionUnit{input_descs, {}, input_col_descs, simple_quals, quals, JoinType::INVALID, {}, join_quals, {}, agg_plan->get_groupby_list(), get_agg_target_exprs(agg_plan), {}, nullptr, {{}, SortAlgorithm::Default, 0, 0}, 0}; QueryRewriter query_rewriter(ra_exe_unit_in, query_infos, this, result_plan); const auto ra_exe_unit = query_rewriter.rewrite(); auto result = executeWorkUnit(error_code, max_groups_buffer_entry_guess, true, query_infos, ra_exe_unit, {device_type, hoist_literals, opt_level, g_enable_dynamic_watchdog}, {false, allow_multifrag, just_explain, allow_loop_joins, g_enable_watchdog, false, false, g_enable_dynamic_watchdog, g_dynamic_watchdog_time_limit}, cat, row_set_mem_owner_, nullptr, true); auto& rows = boost::get<RowSetPtr>(result); CHECK(rows); if (just_explain) { return std::move(rows); } const int in_col_count{static_cast<int>(agg_plan->get_targetlist().size())}; std::list<std::shared_ptr<const InputColDescriptor>> pseudo_input_col_descs; for (int pseudo_col = 1; pseudo_col <= in_col_count; ++pseudo_col) { pseudo_input_col_descs.push_back(std::make_shared<const InputColDescriptor>(pseudo_col, 0, -1)); } const auto order_entries = sort_plan ? sort_plan->get_order_entries() : std::list<Analyzer::OrderEntry>{}; const RelAlgExecutionUnit res_ra_unit{{}, {}, pseudo_input_col_descs, result_plan->get_constquals(), result_plan->get_quals(), JoinType::INVALID, {}, {}, {}, {nullptr}, get_agg_target_exprs(result_plan), {}, nullptr, { order_entries, SortAlgorithm::Default, 0, 0, }, 0}; if (*error_code) { return std::make_shared<ResultSet>( std::vector<TargetInfo>{}, ExecutorDeviceType::CPU, QueryMemoryDescriptor{}, nullptr, this); } const auto& targets = result_plan->get_targetlist(); CHECK(!targets.empty()); std::vector<AggInfo> agg_infos; for (size_t target_idx = 0; target_idx < targets.size(); ++target_idx) { const auto target_entry = targets[target_idx]; const auto target_type = target_entry->get_expr()->get_type_info().get_type(); agg_infos.emplace_back((target_type == kFLOAT || target_type == kDOUBLE) ? "agg_id_double" : "agg_id", target_entry->get_expr(), 0, target_idx); } std::vector<SQLTypeInfo> target_types; for (auto in_col : agg_plan->get_targetlist()) { target_types.push_back(in_col->get_expr()->get_type_info()); } CHECK(rows); ColumnarResults result_columns(row_set_mem_owner_, *rows, in_col_count, target_types); std::vector<llvm::Value*> col_heads; // Nested query, let the compiler know ResetIsNested reset_is_nested(this); is_nested_ = true; std::vector<Analyzer::Expr*> target_exprs; for (auto target_entry : targets) { target_exprs.emplace_back(target_entry->get_expr()); } const auto row_count = rows->rowCount(); if (!row_count) { return std::make_shared<ResultSet>( std::vector<TargetInfo>{}, ExecutorDeviceType::CPU, QueryMemoryDescriptor{}, nullptr, this); } std::vector<ColWidths> agg_col_widths; for (auto wid : get_col_byte_widths(target_exprs, {})) { agg_col_widths.push_back( {wid, int8_t(compact_byte_width(wid, pick_target_compact_width(res_ra_unit, {}, get_min_byte_width())))}); } QueryMemoryDescriptor query_mem_desc{this, allow_multifrag, GroupByColRangeType::Projection, false, false, -1, 0, {sizeof(int64_t)}, #ifdef ENABLE_KEY_COMPACTION 0, #endif agg_col_widths, {}, row_count, small_groups_buffer_entry_count_, 0, 0, 0, false, GroupByMemSharing::Shared, CountDistinctDescriptors{}, false, true, false, false, {}, {}, false}; auto compilation_result = compileWorkUnit(false, {}, res_ra_unit, {ExecutorDeviceType::CPU, hoist_literals, opt_level, g_enable_dynamic_watchdog}, {false, allow_multifrag, just_explain, allow_loop_joins, g_enable_watchdog, false, false, g_enable_dynamic_watchdog, g_dynamic_watchdog_time_limit}, nullptr, false, row_set_mem_owner_, row_count, small_groups_buffer_entry_count_, get_min_byte_width(), JoinInfo(JoinImplType::Invalid, std::vector<std::shared_ptr<Analyzer::BinOper>>{}, {}, ""), false); auto column_buffers = result_columns.getColumnBuffers(); CHECK_EQ(column_buffers.size(), static_cast<size_t>(in_col_count)); std::vector<int64_t> init_agg_vals(query_mem_desc.agg_col_widths.size()); auto query_exe_context = query_mem_desc.getQueryExecutionContext(res_ra_unit, init_agg_vals, this, ExecutorDeviceType::CPU, 0, {}, {}, {}, row_set_mem_owner_, false, false, nullptr); const auto hoist_buf = serializeLiterals(compilation_result.literal_values, 0); *error_code = 0; std::vector<std::vector<const int8_t*>> multi_frag_col_buffers{column_buffers}; query_exe_context->launchCpuCode(res_ra_unit, compilation_result.native_functions, hoist_literals, hoist_buf, multi_frag_col_buffers, {{static_cast<int64_t>(result_columns.size())}}, {{0}}, 1u, 0, init_agg_vals, error_code, 1, {}); CHECK_GE(*error_code, 0); return query_exe_context->groupBufferToResults(0, target_exprs, false); }
RowSetPtr Executor::executeSelectPlan(const Planner::Plan* plan, const int64_t limit, const int64_t offset, const bool hoist_literals, const ExecutorDeviceType device_type, const ExecutorOptLevel opt_level, const Catalog_Namespace::Catalog& cat, size_t& max_groups_buffer_entry_guess, int32_t* error_code, const Planner::Sort* sort_plan_in, const bool allow_multifrag, const bool just_explain, const bool allow_loop_joins, RenderAllocatorMap* render_allocator_map) { if (dynamic_cast<const Planner::Scan*>(plan) || dynamic_cast<const Planner::AggPlan*>(plan) || dynamic_cast<const Planner::Join*>(plan)) { row_set_mem_owner_ = std::make_shared<RowSetMemoryOwner>(); lit_str_dict_proxy_ = nullptr; const auto target_exprs = get_agg_target_exprs(plan); const auto scan_plan = get_scan_child(plan); auto simple_quals = scan_plan ? scan_plan->get_simple_quals() : std::list<std::shared_ptr<Analyzer::Expr>>{}; auto quals = scan_plan ? scan_plan->get_quals() : std::list<std::shared_ptr<Analyzer::Expr>>{}; const auto agg_plan = dynamic_cast<const Planner::AggPlan*>(plan); auto groupby_exprs = agg_plan ? agg_plan->get_groupby_list() : std::list<std::shared_ptr<Analyzer::Expr>>{nullptr}; std::vector<InputDescriptor> input_descs; std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs; collect_input_descs(input_descs, input_col_descs, plan, cat); const auto join_plan = get_join_child(plan); if (join_plan) { collect_quals_from_join(simple_quals, quals, join_plan); } const auto join_quals = join_plan ? join_plan->get_quals() : std::list<std::shared_ptr<Analyzer::Expr>>{}; CHECK(check_plan_sanity(plan)); const bool is_agg = dynamic_cast<const Planner::AggPlan*>(plan); const auto order_entries = sort_plan_in ? sort_plan_in->get_order_entries() : std::list<Analyzer::OrderEntry>{}; const auto query_infos = get_table_infos(input_descs, this); const size_t scan_limit = get_scan_limit(plan, limit); const size_t scan_total_limit = scan_limit ? get_scan_limit(plan, scan_limit + offset) : 0; const auto ra_exe_unit_in = RelAlgExecutionUnit{ input_descs, {}, input_col_descs, simple_quals, quals, JoinType::INVALID, {}, join_quals, {}, groupby_exprs, target_exprs, {}, nullptr, {order_entries, SortAlgorithm::Default, static_cast<size_t>(limit), static_cast<size_t>(offset)}, scan_total_limit}; QueryRewriter query_rewriter(ra_exe_unit_in, query_infos, this, agg_plan); const auto ra_exe_unit = query_rewriter.rewrite(); if (limit || offset) { size_t max_groups_buffer_entry_guess_limit{scan_total_limit ? scan_total_limit : max_groups_buffer_entry_guess}; auto result = executeWorkUnit(error_code, max_groups_buffer_entry_guess_limit, is_agg, query_infos, ra_exe_unit, {device_type, hoist_literals, opt_level, g_enable_dynamic_watchdog}, {false, allow_multifrag, just_explain, allow_loop_joins, g_enable_watchdog, false, false, g_enable_dynamic_watchdog, g_dynamic_watchdog_time_limit}, cat, row_set_mem_owner_, render_allocator_map, true); auto& rows = boost::get<RowSetPtr>(result); max_groups_buffer_entry_guess = max_groups_buffer_entry_guess_limit; CHECK(rows); rows->dropFirstN(offset); if (limit) { rows->keepFirstN(limit); } return std::move(rows); } auto result = executeWorkUnit(error_code, max_groups_buffer_entry_guess, is_agg, query_infos, ra_exe_unit, {device_type, hoist_literals, opt_level, g_enable_dynamic_watchdog}, {false, allow_multifrag, just_explain, allow_loop_joins, g_enable_watchdog, false, false, g_enable_dynamic_watchdog, g_dynamic_watchdog_time_limit}, cat, row_set_mem_owner_, render_allocator_map, true); auto& rows = boost::get<RowSetPtr>(result); CHECK(rows); return std::move(rows); } const auto result_plan = dynamic_cast<const Planner::Result*>(plan); if (result_plan) { if (limit || offset) { auto rows = executeResultPlan(result_plan, hoist_literals, device_type, opt_level, cat, max_groups_buffer_entry_guess, error_code, sort_plan_in, allow_multifrag, just_explain, allow_loop_joins); CHECK(rows); rows->dropFirstN(offset); if (limit) { rows->keepFirstN(limit); } return rows; } return executeResultPlan(result_plan, hoist_literals, device_type, opt_level, cat, max_groups_buffer_entry_guess, error_code, sort_plan_in, allow_multifrag, just_explain, allow_loop_joins); } const auto sort_plan = dynamic_cast<const Planner::Sort*>(plan); if (sort_plan) { return executeSortPlan(sort_plan, limit, offset, hoist_literals, device_type, opt_level, cat, max_groups_buffer_entry_guess, error_code, allow_multifrag, just_explain, allow_loop_joins); } abort(); }