예제 #1
0
  void from_recordset_stream(const std::string &reliable_storage_local_root,
                              const std::string &reliable_storage_remote_root,
                              Input_Stream &input, Output_Stream &output,
                              const std::vector<rel::rlang::token> &tokens) {
    /* Get the headers. */
    record headings(input.parse_headings());

    // Get the resource id field id, this is the only field that we're interested
    // in, the rest are just placeholders.
    size_t resource_id_field_id = headings.mandatory_find_field(
                                      NP1_REL_DISTRIBUTED_RESOURCE_ID_FIELD_NAME);

    // Create the target recordset.
    std::string target_recordset_name(rel::rlang::compiler::eval_to_string_only(tokens));
    np1::io::reliable_storage::id target_recordset_resource_id(target_recordset_name);

    np1::io::reliable_storage rs(reliable_storage_local_root, reliable_storage_remote_root);
    np1::io::reliable_storage::stream target_recordset_stream(rs);
    NP1_ASSERT(rs.create_wo(target_recordset_resource_id, target_recordset_stream),
                "Unable to create recordset " + target_recordset_name);


    // Read all the input recordset chunks ids and write them to the recordset file.
    input.parse_records(
      shallow_copy_chunk_record_callback(
        rs, target_recordset_stream,  resource_id_field_id, headings.ref()));

    NP1_ASSERT(target_recordset_stream.close(),
                "Unable to close target recordset stream");
  }
예제 #2
0
  void from_data_stream(const std::string &reliable_storage_local_root,
                          const std::string &reliable_storage_remote_root,
                          Input_Stream &input, Output_Stream &output,
                          const std::vector<rel::rlang::token> &tokens) {
    /* Get the headers. */
    record headings(input.parse_headings());

    // Interpret the arguments.
    std::vector<std::pair<std::string, rlang::dt::data_type> > args = rel::rlang::compiler::eval_to_strings(tokens);
    NP1_ASSERT((args.size() > 0) && (args.size() <= 2), "Incorrect number of arguments to rel.recordset.create");
    tokens[0].assert(rlang::dt::data_type::TYPE_STRING == args[0].second,
                      "First argument to rel.recordset.create is not a string");
    std::string target_recordset_name = args[0].first;

    uint64_t approx_max_chunk_size = DEFAULT_APPROX_MAX_CHUNK_SIZE_BYTES;
    if (args.size() > 1) {
      tokens[0].assert((rlang::dt::data_type::TYPE_INT == args[1].second)
                        || (rlang::dt::data_type::TYPE_UINT == args[1].second),
                        "Second argument to rel.recordset.create is not an integer");
      approx_max_chunk_size = str::dec_to_int64(args[1].first);
    }

    // Get the stream that will hold the recordset.
    np1::io::reliable_storage::id target_recordset_resource_id(target_recordset_name);

    np1::io::reliable_storage rs(reliable_storage_local_root, reliable_storage_remote_root);
    np1::io::reliable_storage::stream target_recordset_stream(rs);
    NP1_ASSERT(rs.create_wo(target_recordset_resource_id, target_recordset_stream),
                "Unable to create recordset " + target_recordset_name
                  + " in reliable storage '" + reliable_storage_local_root + "'");

    np1::io::reliable_storage::stream current_target_chunk_stream(rs);
    buffered_reliable_storage_stream_type buffered_current_target_chunk_stream(
                                            current_target_chunk_stream);

    mandatory_reliable_storage_stream_type mandatory_current_target_chunk_stream(
                                            buffered_current_target_chunk_stream);

    np1::io::reliable_storage::id current_target_chunk_id;
    uint64_t current_target_chunk_size = 0;

    // Read all the input data and redistribute it into recordset chunks.
    input.parse_records(
        chunk_data_record_callback(
          rs, target_recordset_stream, current_target_chunk_id,
          current_target_chunk_stream, mandatory_current_target_chunk_stream,
          current_target_chunk_size, approx_max_chunk_size, headings.ref()));

    // Close everything.
    mandatory_current_target_chunk_stream.close();

    NP1_ASSERT(target_recordset_stream.close(),
                "Unable to close target recordset stream");
  }
예제 #3
0
  void operator()(Input_Stream &input, Output_Stream &output,
                  const rstd::vector<rel::rlang::token> &tokens) {
    NP1_ASSERT(tokens.size() == 0, "rel.record_count accepts no arguments"); 

    // Read & discard the headings.
    input.parse_headings();

    uint64_t number_records = 0;    
    input.parse_records(record_counter_callback(number_records));
    output.write(str::to_dec_str(number_records).c_str());
  }
예제 #4
0
    static void run(Input_Stream &input, Final_Output_Stream &output,
                    const rstd::vector<rel::rlang::token> &tokens) {
        /* Get the headers and the interesting fields out of them. */
        rel::record input_headings(input.parse_headings());
        size_t file_name_field_id =
            input_headings.mandatory_find_heading(NP1_META_PARALLEL_EXPLICIT_MAPPING_HEADING_FILE_NAME);
        size_t host_name_field_id =
            input_headings.mandatory_find_heading(NP1_META_PARALLEL_EXPLICIT_MAPPING_HEADING_HOST_NAME);

        // The object that manages all the processes.
        process_pool_map_type process_pool_map;

        bool output_headings_written = false;

        // Translate all the input records into command lines and put them in the pool ready to execute.
        input.parse_records(input_record_callback(file_name_field_id, host_name_field_id, tokens,
                            process_pool_map, output, output_headings_written));

        // Run all the processes in the pool.
        process_pool_map.wait_all();
    }
예제 #5
0
  void operator()(Input_Stream &input, Output_Stream &output,
                  const std::vector<rel::rlang::token> &tokens,
                  sort_type_type sort_type,
                  sort_order_type sort_order) {
    NP1_ASSERT(tokens.size() > 0, "Unexpected empty stream operator argument list");

    // Read the first line of input, we need it to add meaning to the arguments.
    record headings(input.parse_headings()); 

    std::vector<std::string> arg_headings;

    rlang::compiler::compile_heading_name_list(
                      tokens, headings.ref(), arg_headings);

    // Create the compare specs.
    detail::compare_specs comp_specs(headings, arg_headings);

    // Write out the headings then do the actual sorting.
    headings.write(output);

    less_than lt(comp_specs);
    greater_than gt(comp_specs);
    switch (sort_type) {
    case TYPE_MERGE_SORT:
      switch (sort_order) {
        case ORDER_ASCENDING:
          sort<detail::merge_sort>(input, output, lt);
          break;
        
        case ORDER_DESCENDING:
          sort<detail::merge_sort>(input, output, gt);
          break;
      }
      break;

    case TYPE_QUICK_SORT:
      //TODO: why the dickens isn't this quick sort and why is quick sort broken?
      switch (sort_order) {
        case ORDER_ASCENDING:
          sort<detail::merge_sort>(input, output, lt);
          break;
        
        case ORDER_DESCENDING:
          sort<detail::merge_sort>(input, output, gt);
          break;
      }
      break;
    }
  }
예제 #6
0
  void operator()(const std::string &reliable_storage_local_root,
                  const std::string &reliable_storage_remote_root,
                  const std::string &listen_endpoint,                  
                  Input_Stream &input, Output_Stream &output,
                  const std::vector<rel::rlang::token> &tokens) {
    log_info("Reading headers and compiling expression against headers.");

    /* Get the headers. */
    record headings(input.parse_headings());

    // Compile, just to check that the headings & tokens are likely to work
    // when we distribute.
    record empty_headings;
    rlang::vm vm = rlang::compiler::compile_single_expression(
                      tokens, headings.ref(), empty_headings.ref());

    // Check that the expression is actually a boolean expression.
    NP1_ASSERT(vm.return_type() == rlang::dt::TYPE_BOOL,
                "Expression is not a boolean expression");

    // Do the distribution.
    distributed::distribute(log_id(), headings, headings, "rel.where", reliable_storage_local_root,
                            reliable_storage_remote_root, listen_endpoint, input, output, tokens);
  }