int main( int ac, char** av ) { try { comma::command_line_options options( ac, av ); if( options.exists( "--help" ) || options.exists( "-h" ) || ac == 1 ) { usage(); } options.assert_mutually_exclusive( "--by-lower,--by-upper,--nearest" ); bool by_upper = options.exists( "--by-upper" ); bool nearest = options.exists( "--nearest" ); bool by_lower = ( options.exists( "--by-lower" ) || !by_upper ) && !nearest; //bool nearest_only = options.exists( "--nearest-only" ); bool timestamp_only = options.exists( "--timestamp-only,--time-only" ); bool discard = !options.exists( "--no-discard" ); boost::optional< boost::posix_time::time_duration > bound; if( options.exists( "--bound" ) ) { bound = boost::posix_time::microseconds( options.value< double >( "--bound" ) * 1000000 ); } comma::csv::options stdin_csv( options, "t" ); //bool has_block = stdin_csv.has_field( "block" ); comma::csv::input_stream< Point > stdin_stream( std::cin, stdin_csv ); std::vector< std::string > unnamed = options.unnamed( "--by-lower,--by-upper,--nearest,--timestamp-only,--time-only,--no-discard", "--binary,-b,--delimiter,-d,--fields,-f,--bound" ); std::string properties; bool bounded_first = true; switch( unnamed.size() ) { case 0: std::cerr << "csv-time-join: please specify bounding source" << std::endl; return 1; case 1: properties = unnamed[0]; break; case 2: if( unnamed[0] == "-" ) { properties = unnamed[1]; } else if( unnamed[1] == "-" ) { properties = unnamed[0]; bounded_first = false; } else { std::cerr << "csv-time-join: expected either '- <bounding>' or '<bounding> -'; got : " << comma::join( unnamed, ' ' ) << std::endl; return 1; } break; default: std::cerr << "csv-time-join: expected either '- <bounding>' or '<bounding> -'; got : " << comma::join( unnamed, ' ' ) << std::endl; return 1; } comma::io::istream is( comma::split( properties, ';' )[0] ); comma::name_value::parser parser( "filename" ); comma::csv::options csv = parser.get< comma::csv::options >( properties ); if( csv.fields.empty() ) { csv.fields = "t"; } comma::csv::input_stream< Point > istream( *is, csv ); std::pair< std::string, std::string > last; std::pair< boost::posix_time::ptime, boost::posix_time::ptime > last_timestamp; comma::signal_flag is_shutdown; #ifdef WIN32 if( stdin_csv.binary() ) { _setmode( _fileno( stdout ), _O_BINARY ); } #endif while( !is_shutdown && std::cin.good() && !std::cin.eof() && is->good() && !is->eof() ) { const Point* p = stdin_stream.read(); if( !p ) { break; } bool eof = false; while( last_timestamp.first.is_not_a_date_time() || p->timestamp >= last_timestamp.second ) { last_timestamp.first = last_timestamp.second; last.first = last.second; const Point* q = istream.read(); if( !q ) { eof = true; break; } last_timestamp.second = q->timestamp; if( !timestamp_only ) { if( csv.binary() ) { last.second = std::string( istream.binary().last(), csv.format().size() ); } else { last.second = comma::join( istream.ascii().last(), stdin_csv.delimiter ); } } } if( eof ) { break; } if( discard && p->timestamp < last_timestamp.first ) { continue; } bool is_first = by_lower || ( nearest && ( p->timestamp - last_timestamp.first ) < ( last_timestamp.second - p->timestamp ) ); const boost::posix_time::ptime& t = is_first ? last_timestamp.first : last_timestamp.second; if( bound && !( ( t - *bound ) <= p->timestamp && p->timestamp <= ( t + *bound ) ) ) { continue; } const std::string& s = is_first ? last.first : last.second; if( stdin_csv.binary() ) { if( bounded_first ) { std::cout.write( stdin_stream.binary().last(), stdin_csv.format().size() ); } if( timestamp_only ) { static comma::csv::binary< Point > b; std::vector< char > v( b.format().size() ); b.put( Point( t ), &v[0] ); std::cout.write( &v[0], b.format().size() ); } else { std::cout.write( &s[0], s.size() ); } if( !bounded_first ) { std::cout.write( stdin_stream.binary().last(), stdin_csv.format().size() ); } std::cout.flush(); } else { if( bounded_first ) { std::cout << comma::join( stdin_stream.ascii().last(), stdin_csv.delimiter ) << stdin_csv.delimiter; } if( timestamp_only ) { std::cout << boost::posix_time::to_iso_string( t ); } else { std::cout << s; } if( !bounded_first ) { std::cout << stdin_csv.delimiter << comma::join( stdin_stream.ascii().last(), stdin_csv.delimiter ); } std::cout << std::endl; } } if( is_shutdown ) { std::cerr << "csv-time-join: interrupted by signal" << std::endl; } return 0; } catch( std::exception& ex ) { std::cerr << "csv-time-join: " << ex.what() << std::endl; } catch( ... ) { std::cerr << "csv-time-join: unknown exception" << std::endl; } usage(); }
int sort( const comma::command_line_options& options ) { input_t::map sorted_map; input_t default_input; std::vector< std::string > v = comma::split( stdin_csv.fields, ',' ); std::vector< std::string > order = options.exists("--order") ? comma::split( options.value< std::string >( "--order" ), ',' ) : v; std::vector< std::string > w (v.size()); bool unique = options.exists("--unique,-u"); std::string first_line; comma::csv::format f; if( stdin_csv.binary() ) { f = stdin_csv.format(); } else if( options.exists( "--format" ) ) { f = comma::csv::format( options.value< std::string >( "--format" ) ); } else { while( std::cin.good() && first_line.empty() ) { std::getline( std::cin, first_line ); } if( first_line.empty() ) { return 0; } f = comma::csv::impl::unstructured::guess_format( first_line, stdin_csv.delimiter ); if( verbose ) { std::cerr << "csv-sort: guessed format: " << f.string() << std::endl; } } for( std::size_t i = 0; i < order.size(); ++i ) // quick and dirty, wasteful, but who cares { if (order[i].empty()) continue; for( std::size_t k = 0; k < v.size(); ++k ) { if( v[k].empty() || v[k] != order[i] ) { if ( k + 1 == v.size()) { std::cerr << "csv-sort: order field name \"" << order[i] << "\" not found in input fields \"" << stdin_csv.fields << "\"" << std::endl; return 1; } continue; } std::string type = default_input.keys.append( f.offset( k ).type ); w[k] = "keys/" + type; ordering_t o; if ( type[0] == 's' ) { o.type = ordering_t::str_type; o.index = default_input.keys.strings.size() - 1; } else if ( type[0] == 'l' ) { o.type = ordering_t::long_type; o.index = default_input.keys.longs.size() - 1; } else if ( type[0] == 'd' ) { o.type = ordering_t::double_type; o.index = default_input.keys.doubles.size() - 1; } else if ( type[0] == 't' ) { o.type = ordering_t::time_type; o.index = default_input.keys.time.size() - 1; } else { std::cerr << "csv-sort: cannot sort on field " << v[k] << " of type \"" << type << "\"" << std::endl; return 1; } ordering.push_back(o); break; } } stdin_csv.fields = comma::join( w, ',' ); if ( verbose ) { std::cerr << "csv-sort: fields: " << stdin_csv.fields << std::endl; } comma::csv::input_stream< input_t > stdin_stream( std::cin, stdin_csv, default_input ); #ifdef WIN32 if( stdin_stream.is_binary() ) { _setmode( _fileno( stdout ), _O_BINARY ); } #endif if (!first_line.empty()) { input_t::map::mapped_type& d = sorted_map[ comma::csv::ascii< input_t >(stdin_csv,default_input).get(first_line) ]; d.push_back( first_line + "\n" ); } while( stdin_stream.ready() || ( std::cin.good() && !std::cin.eof() ) ) { const input_t* p = stdin_stream.read(); if( !p ) { break; } if( stdin_stream.is_binary() ) { input_t::map::mapped_type& d = sorted_map[ *p ]; if (unique && !d.empty()) continue; d.push_back( std::string() ); d.back().resize( stdin_csv.format().size() ); ::memcpy( &d.back()[0], stdin_stream.binary().last(), stdin_csv.format().size() ); } else { input_t::map::mapped_type& d = sorted_map[ *p ]; if (unique && !d.empty()) continue; d.push_back( comma::join( stdin_stream.ascii().last(), stdin_csv.delimiter ) + "\n" ); } } if( options.exists( "--reverse,-r" ) ) { output_( sorted_map.rbegin(), sorted_map.rend() ); } else { output_( sorted_map.begin(), sorted_map.end() ); } return 0; }
int main( int ac, char** av ) { try { comma::signal_flag is_shutdown(comma::signal_flag::hard); comma::command_line_options options( ac, av, usage ); if( options.exists( "--bash-completion" )) bash_completion( ac, av ); options.assert_mutually_exclusive( "--by-lower,--by-upper,--nearest,--realtime" ); if( options.exists( "--by-upper" )) { method = how::by_upper; } if( options.exists( "--nearest" )) { method = how::nearest; } if( options.exists( "--realtime" )) { method = how::realtime; } timestamp_only = options.exists( "--timestamp-only,--time-only" ); select_only = options.exists( "--do-not-append,--select" ); if( select_only && timestamp_only ) { std::cerr << "csv-time-join: --timestamp-only specified with --select, ignoring --timestamp-only" << std::endl; } bool discard_bounding = options.exists( "--discard-bounding" ); boost::optional< unsigned int > buffer_size = options.optional< unsigned int >( "--buffer" ); if( options.exists( "--bound" ) ) { bound = boost::posix_time::microseconds( static_cast<unsigned int>(options.value< double >( "--bound" ) * 1000000 )); } stdin_csv = comma::csv::options( options, "t" ); std::vector< std::string > unnamed = options.unnamed( "--by-lower,--by-upper,--nearest,--realtime,--select,--do-not-append,--timestamp-only,--time-only,--discard-bounding", "--binary,-b,--delimiter,-d,--fields,-f,--bound,--buffer,--verbose,-v" ); std::string properties; bool stdin_first = true; switch( unnamed.size() ) { case 0: std::cerr << "csv-time-join: please specify bounding source" << std::endl; return 1; case 1: properties = unnamed[0]; break; case 2: if( unnamed[0] == "-" ) { properties = unnamed[1]; } else if( unnamed[1] == "-" ) { properties = unnamed[0]; stdin_first = false; } else { std::cerr << "csv-time-join: expected either '- <bounding>' or '<bounding> -'; got : " << comma::join( unnamed, ' ' ) << std::endl; return 1; } break; default: std::cerr << "csv-time-join: expected either '- <bounding>' or '<bounding> -'; got : " << comma::join( unnamed, ' ' ) << std::endl; return 1; } comma::name_value::parser parser( "filename" ); bounding_csv = parser.get< comma::csv::options >( properties ); if( bounding_csv.fields.empty() ) { bounding_csv.fields = "t"; } comma::csv::input_stream< Point > stdin_stream( std::cin, stdin_csv ); #ifdef WIN32 if( stdin_csv.binary() ) { _setmode( _fileno( stdout ), _O_BINARY ); } #endif // #ifdef WIN32 comma::io::istream bounding_istream( comma::split( properties, ';' )[0] , bounding_csv.binary() ? comma::io::mode::binary : comma::io::mode::ascii ); comma::csv::input_stream< Point > bounding_stream( *bounding_istream, bounding_csv ); #ifndef WIN32 comma::io::select select; comma::io::select bounding_stream_select; select.read().add( 0 ); select.read().add( bounding_istream.fd() ); bounding_stream_select.read().add( bounding_istream.fd() ); #endif // #ifndef WIN32 const Point* p = NULL; if( method == how::realtime ) { #ifndef WIN32 bool end_of_input = false; bool end_of_bounds = false; boost::optional<timestring_t> joined_line; while (!is_shutdown && !end_of_input) { if ( !bounding_stream.ready() && !stdin_stream.ready() ) { select.wait(boost::posix_time::milliseconds(1)); } if ( !is_shutdown && !end_of_input && ( stdin_stream.ready() || ( select.check() && select.read().ready( comma::io::stdin_fd ) ) ) ) { p = stdin_stream.read(); if( p ) { timestring_t input_line = std::make_pair( get_time( *p ), stdin_stream.last() ); if( joined_line ) { output( input_line, *joined_line, stdin_first ); } } else { comma::verbose << "end of input stream" << std::endl; end_of_input = true; } } if ( !is_shutdown && !end_of_bounds && ( bounding_stream.ready() || ( select.check() && select.read().ready( bounding_istream.fd() )))) { p = bounding_stream.read(); if( p ) { joined_line = std::make_pair( get_time( *p ), bounding_stream.last() ); } else { comma::verbose << "end of bounding stream" << std::endl; end_of_bounds = true; } } } if (is_shutdown) { comma::verbose << "got a signal" << std::endl; return 0; } #else COMMA_THROW(comma::exception, "--realtime mode not supported in WIN32"); #endif } else { std::deque<timestring_t> bounding_queue; bool next = true; bool bounding_data_available; bool upper_bound_added = false; // add a fake entry for an lower bound to allow stdin before first bound to match bounding_queue.push_back( std::make_pair( boost::posix_time::neg_infin, "" )); while( ( stdin_stream.ready() || ( std::cin.good() && !std::cin.eof() ) ) ) { if( !std::cin.good() ) { select.read().remove( 0 ); } if( !bounding_istream->good() ) { select.read().remove( bounding_istream.fd() ); } bounding_data_available = bounding_stream.ready() || ( bounding_istream->good() && !bounding_istream->eof() ); #ifdef WIN32 bool bounding_stream_ready = true; bool stdin_stream_ready = true; #else // #ifdef WIN32 //check so we do not block bool bounding_stream_ready = bounding_stream.ready(); bool stdin_stream_ready = stdin_stream.ready(); if( next ) { if( !bounding_stream_ready || !stdin_stream_ready ) { if( !bounding_stream_ready && !stdin_stream_ready ) { select.wait( boost::posix_time::milliseconds(10) ); } else { select.check(); } if( select.read().ready( bounding_istream.fd() )) { bounding_stream_ready = true; } if( select.read().ready(0) ) { stdin_stream_ready=true; } } } else { if( !bounding_stream_ready ) { bounding_stream_select.wait( boost::posix_time::milliseconds(10) ); if( bounding_stream_select.read().ready( bounding_istream.fd() )) { bounding_stream_ready=true; } } } #endif //#ifdef WIN32 //keep storing available bounding data if( bounding_stream_ready ) { if( !buffer_size || bounding_queue.size() < *buffer_size || discard_bounding ) { const Point* q = bounding_stream.read(); if( q ) { bounding_queue.push_back( std::make_pair( get_time( *q ), bounding_stream.last() )); } else { bounding_data_available=false; } } if( buffer_size && bounding_queue.size() > *buffer_size && discard_bounding ) { bounding_queue.pop_front(); } } if( !upper_bound_added && bounding_istream->eof() ) { // add a fake entry for an upper bound to allow stdin data above last bound to match bounding_queue.push_back( std::make_pair( boost::posix_time::pos_infin, "" )); upper_bound_added = true; } //if we are done with the last bounded point get next if( next ) { if(!stdin_stream_ready) { continue; } p = stdin_stream.read(); if( !p ) { break; } } boost::posix_time::ptime t = get_time(*p); //get bound while(bounding_queue.size()>=2) { if( t < bounding_queue[1].first ) { break; } bounding_queue.pop_front(); } if(bounding_queue.size()<2) { //bound not found //do we have more data? if(!bounding_data_available) { break; } next=false; continue; } //bound available if( method == how::by_lower && t < bounding_queue.front().first ) { next = true; continue; } bool is_first = ( method == how::by_lower ) || ( method == how::nearest && ( t - bounding_queue[0].first ) < ( bounding_queue[1].first - t )); const timestring_t& chosen_bound = is_first ? bounding_queue[0] : bounding_queue[1];; timestring_t input_line = std::make_pair( t, stdin_stream.last() ); output( input_line, chosen_bound, stdin_first ); next=true; } } return 0; } catch( std::exception& ex ) { std::cerr << "csv-time-join: " << ex.what() << std::endl; } catch( ... ) { std::cerr << "csv-time-join: unknown exception" << std::endl; } }