void from_recordset_stream(const std::string &reliable_storage_local_root, const std::string &reliable_storage_remote_root, Input_Stream &input, Output_Stream &output, const std::vector<rel::rlang::token> &tokens) { /* Get the headers. */ record headings(input.parse_headings()); // Get the resource id field id, this is the only field that we're interested // in, the rest are just placeholders. size_t resource_id_field_id = headings.mandatory_find_field( NP1_REL_DISTRIBUTED_RESOURCE_ID_FIELD_NAME); // Create the target recordset. std::string target_recordset_name(rel::rlang::compiler::eval_to_string_only(tokens)); np1::io::reliable_storage::id target_recordset_resource_id(target_recordset_name); np1::io::reliable_storage rs(reliable_storage_local_root, reliable_storage_remote_root); np1::io::reliable_storage::stream target_recordset_stream(rs); NP1_ASSERT(rs.create_wo(target_recordset_resource_id, target_recordset_stream), "Unable to create recordset " + target_recordset_name); // Read all the input recordset chunks ids and write them to the recordset file. input.parse_records( shallow_copy_chunk_record_callback( rs, target_recordset_stream, resource_id_field_id, headings.ref())); NP1_ASSERT(target_recordset_stream.close(), "Unable to close target recordset stream"); }
void from_data_stream(const std::string &reliable_storage_local_root, const std::string &reliable_storage_remote_root, Input_Stream &input, Output_Stream &output, const std::vector<rel::rlang::token> &tokens) { /* Get the headers. */ record headings(input.parse_headings()); // Interpret the arguments. std::vector<std::pair<std::string, rlang::dt::data_type> > args = rel::rlang::compiler::eval_to_strings(tokens); NP1_ASSERT((args.size() > 0) && (args.size() <= 2), "Incorrect number of arguments to rel.recordset.create"); tokens[0].assert(rlang::dt::data_type::TYPE_STRING == args[0].second, "First argument to rel.recordset.create is not a string"); std::string target_recordset_name = args[0].first; uint64_t approx_max_chunk_size = DEFAULT_APPROX_MAX_CHUNK_SIZE_BYTES; if (args.size() > 1) { tokens[0].assert((rlang::dt::data_type::TYPE_INT == args[1].second) || (rlang::dt::data_type::TYPE_UINT == args[1].second), "Second argument to rel.recordset.create is not an integer"); approx_max_chunk_size = str::dec_to_int64(args[1].first); } // Get the stream that will hold the recordset. np1::io::reliable_storage::id target_recordset_resource_id(target_recordset_name); np1::io::reliable_storage rs(reliable_storage_local_root, reliable_storage_remote_root); np1::io::reliable_storage::stream target_recordset_stream(rs); NP1_ASSERT(rs.create_wo(target_recordset_resource_id, target_recordset_stream), "Unable to create recordset " + target_recordset_name + " in reliable storage '" + reliable_storage_local_root + "'"); np1::io::reliable_storage::stream current_target_chunk_stream(rs); buffered_reliable_storage_stream_type buffered_current_target_chunk_stream( current_target_chunk_stream); mandatory_reliable_storage_stream_type mandatory_current_target_chunk_stream( buffered_current_target_chunk_stream); np1::io::reliable_storage::id current_target_chunk_id; uint64_t current_target_chunk_size = 0; // Read all the input data and redistribute it into recordset chunks. input.parse_records( chunk_data_record_callback( rs, target_recordset_stream, current_target_chunk_id, current_target_chunk_stream, mandatory_current_target_chunk_stream, current_target_chunk_size, approx_max_chunk_size, headings.ref())); // Close everything. mandatory_current_target_chunk_stream.close(); NP1_ASSERT(target_recordset_stream.close(), "Unable to close target recordset stream"); }
void operator()(Input_Stream &input, Output_Stream &output, const rstd::vector<rel::rlang::token> &tokens) { NP1_ASSERT(tokens.size() == 0, "rel.record_count accepts no arguments"); // Read & discard the headings. input.parse_headings(); uint64_t number_records = 0; input.parse_records(record_counter_callback(number_records)); output.write(str::to_dec_str(number_records).c_str()); }
static void run(Input_Stream &input, Final_Output_Stream &output, const rstd::vector<rel::rlang::token> &tokens) { /* Get the headers and the interesting fields out of them. */ rel::record input_headings(input.parse_headings()); size_t file_name_field_id = input_headings.mandatory_find_heading(NP1_META_PARALLEL_EXPLICIT_MAPPING_HEADING_FILE_NAME); size_t host_name_field_id = input_headings.mandatory_find_heading(NP1_META_PARALLEL_EXPLICIT_MAPPING_HEADING_HOST_NAME); // The object that manages all the processes. process_pool_map_type process_pool_map; bool output_headings_written = false; // Translate all the input records into command lines and put them in the pool ready to execute. input.parse_records(input_record_callback(file_name_field_id, host_name_field_id, tokens, process_pool_map, output, output_headings_written)); // Run all the processes in the pool. process_pool_map.wait_all(); }
void operator()(Input_Stream &input, Output_Stream &output, const std::vector<rel::rlang::token> &tokens, sort_type_type sort_type, sort_order_type sort_order) { NP1_ASSERT(tokens.size() > 0, "Unexpected empty stream operator argument list"); // Read the first line of input, we need it to add meaning to the arguments. record headings(input.parse_headings()); std::vector<std::string> arg_headings; rlang::compiler::compile_heading_name_list( tokens, headings.ref(), arg_headings); // Create the compare specs. detail::compare_specs comp_specs(headings, arg_headings); // Write out the headings then do the actual sorting. headings.write(output); less_than lt(comp_specs); greater_than gt(comp_specs); switch (sort_type) { case TYPE_MERGE_SORT: switch (sort_order) { case ORDER_ASCENDING: sort<detail::merge_sort>(input, output, lt); break; case ORDER_DESCENDING: sort<detail::merge_sort>(input, output, gt); break; } break; case TYPE_QUICK_SORT: //TODO: why the dickens isn't this quick sort and why is quick sort broken? switch (sort_order) { case ORDER_ASCENDING: sort<detail::merge_sort>(input, output, lt); break; case ORDER_DESCENDING: sort<detail::merge_sort>(input, output, gt); break; } break; } }
void operator()(const std::string &reliable_storage_local_root, const std::string &reliable_storage_remote_root, const std::string &listen_endpoint, Input_Stream &input, Output_Stream &output, const std::vector<rel::rlang::token> &tokens) { log_info("Reading headers and compiling expression against headers."); /* Get the headers. */ record headings(input.parse_headings()); // Compile, just to check that the headings & tokens are likely to work // when we distribute. record empty_headings; rlang::vm vm = rlang::compiler::compile_single_expression( tokens, headings.ref(), empty_headings.ref()); // Check that the expression is actually a boolean expression. NP1_ASSERT(vm.return_type() == rlang::dt::TYPE_BOOL, "Expression is not a boolean expression"); // Do the distribution. distributed::distribute(log_id(), headings, headings, "rel.where", reliable_storage_local_root, reliable_storage_remote_root, listen_endpoint, input, output, tokens); }