Esempio n. 1
0
int main( int ac, char** av )
{
    try
    {
        comma::command_line_options options( ac, av );
        if( options.exists( "--help" ) || options.exists( "-h" ) || ac == 1 ) { usage(); }
        options.assert_mutually_exclusive( "--by-lower,--by-upper,--nearest" );
        bool by_upper = options.exists( "--by-upper" );
        bool nearest = options.exists( "--nearest" );
        bool by_lower = ( options.exists( "--by-lower" ) || !by_upper ) && !nearest;
        //bool nearest_only = options.exists( "--nearest-only" );
        bool timestamp_only = options.exists( "--timestamp-only,--time-only" );
        bool discard = !options.exists( "--no-discard" );
        boost::optional< boost::posix_time::time_duration > bound;
        if( options.exists( "--bound" ) ) { bound = boost::posix_time::microseconds( options.value< double >( "--bound" ) * 1000000 ); }
        comma::csv::options stdin_csv( options, "t" );
        //bool has_block = stdin_csv.has_field( "block" );
        comma::csv::input_stream< Point > stdin_stream( std::cin, stdin_csv );
        std::vector< std::string > unnamed = options.unnamed( "--by-lower,--by-upper,--nearest,--timestamp-only,--time-only,--no-discard", "--binary,-b,--delimiter,-d,--fields,-f,--bound" );
        std::string properties;
        bool bounded_first = true;
        switch( unnamed.size() )
        {
            case 0:
                std::cerr << "csv-time-join: please specify bounding source" << std::endl;
                return 1;
            case 1:
                properties = unnamed[0];
                break;
            case 2:
                if( unnamed[0] == "-" ) { properties = unnamed[1]; }
                else if( unnamed[1] == "-" ) { properties = unnamed[0]; bounded_first = false; }
                else { std::cerr << "csv-time-join: expected either '- <bounding>' or '<bounding> -'; got : " << comma::join( unnamed, ' ' ) << std::endl; return 1; }
                break;
            default:
                std::cerr << "csv-time-join: expected either '- <bounding>' or '<bounding> -'; got : " << comma::join( unnamed, ' ' ) << std::endl;
                return 1;
        }
        comma::io::istream is( comma::split( properties, ';' )[0] );
        comma::name_value::parser parser( "filename" );
        comma::csv::options csv = parser.get< comma::csv::options >( properties );
        if( csv.fields.empty() ) { csv.fields = "t"; }
        comma::csv::input_stream< Point > istream( *is, csv );
        std::pair< std::string, std::string > last;
        std::pair< boost::posix_time::ptime, boost::posix_time::ptime > last_timestamp;
        comma::signal_flag is_shutdown;

        #ifdef WIN32
        if( stdin_csv.binary() ) { _setmode( _fileno( stdout ), _O_BINARY ); }
        #endif
        
        while( !is_shutdown && std::cin.good() && !std::cin.eof() && is->good() && !is->eof() )
        {
            const Point* p = stdin_stream.read();
            if( !p ) { break; }
            bool eof = false;
            while( last_timestamp.first.is_not_a_date_time() || p->timestamp >= last_timestamp.second )
            {
                last_timestamp.first = last_timestamp.second;
                last.first = last.second;
                const Point* q = istream.read();
                if( !q ) { eof = true; break; }
                last_timestamp.second = q->timestamp;
                if( !timestamp_only )
                {
                    if( csv.binary() ) { last.second = std::string( istream.binary().last(), csv.format().size() ); }
                    else { last.second = comma::join( istream.ascii().last(), stdin_csv.delimiter ); }
                }
            }
            if( eof ) { break; }
            if( discard && p->timestamp < last_timestamp.first ) { continue; }
            bool is_first = by_lower || ( nearest && ( p->timestamp - last_timestamp.first ) < ( last_timestamp.second - p->timestamp ) );
            const boost::posix_time::ptime& t = is_first ? last_timestamp.first : last_timestamp.second;
            if( bound && !( ( t - *bound ) <= p->timestamp && p->timestamp <= ( t + *bound ) ) ) { continue; }
            const std::string& s = is_first ? last.first : last.second;
            if( stdin_csv.binary() )
            {
                if( bounded_first ) { std::cout.write( stdin_stream.binary().last(), stdin_csv.format().size() ); }
                if( timestamp_only )
                {
                    static comma::csv::binary< Point > b;
                    std::vector< char > v( b.format().size() );
                    b.put( Point( t ), &v[0] );
                    std::cout.write( &v[0], b.format().size() );
                }
                else
                {
                    std::cout.write( &s[0], s.size() );
                }
                if( !bounded_first ) { std::cout.write( stdin_stream.binary().last(), stdin_csv.format().size() ); }
                std::cout.flush();
            }
            else
            {
                if( bounded_first ) { std::cout << comma::join( stdin_stream.ascii().last(), stdin_csv.delimiter ) << stdin_csv.delimiter; }
                if( timestamp_only ) { std::cout << boost::posix_time::to_iso_string( t ); }
                else { std::cout << s; }
                if( !bounded_first ) { std::cout << stdin_csv.delimiter << comma::join( stdin_stream.ascii().last(), stdin_csv.delimiter ); }
                std::cout << std::endl;
            }
        }
        if( is_shutdown ) { std::cerr << "csv-time-join: interrupted by signal" << std::endl; }
        return 0;     
    }
    catch( std::exception& ex ) { std::cerr << "csv-time-join: " << ex.what() << std::endl; }
    catch( ... ) { std::cerr << "csv-time-join: unknown exception" << std::endl; }
    usage();
}
Esempio n. 2
0
int sort( const comma::command_line_options& options )
{
    input_t::map sorted_map;
    input_t default_input;
    std::vector< std::string > v = comma::split( stdin_csv.fields, ',' );
    std::vector< std::string > order = options.exists("--order") ? comma::split( options.value< std::string >( "--order" ), ',' ) : v;
    std::vector< std::string > w (v.size());
    bool unique = options.exists("--unique,-u");
    
    std::string first_line;
    comma::csv::format f;
    if( stdin_csv.binary() ) { f = stdin_csv.format(); }
    else if( options.exists( "--format" ) ) { f = comma::csv::format( options.value< std::string >( "--format" ) ); }
    else
    {
        while( std::cin.good() && first_line.empty() ) { std::getline( std::cin, first_line ); }
        if( first_line.empty() ) { return 0; }
        f = comma::csv::impl::unstructured::guess_format( first_line, stdin_csv.delimiter );
        if( verbose ) { std::cerr << "csv-sort: guessed format: " << f.string() << std::endl; }
    }
    for( std::size_t i = 0; i < order.size(); ++i ) // quick and dirty, wasteful, but who cares
    {
        if (order[i].empty()) continue;
        for( std::size_t k = 0; k < v.size(); ++k )
        {
            if( v[k].empty() || v[k] != order[i] ) 
            { 
                if ( k + 1 == v.size()) 
                { 
                    std::cerr << "csv-sort: order field name \"" << order[i] << "\" not found in input fields \"" << stdin_csv.fields << "\"" << std::endl;
                    return 1;
                }
                continue; 
            }
            std::string type = default_input.keys.append( f.offset( k ).type );
            w[k] = "keys/" + type;
            
            ordering_t o;
            if ( type[0] == 's' ) { o.type = ordering_t::str_type; o.index = default_input.keys.strings.size() - 1; }
            else if ( type[0] == 'l' ) { o.type = ordering_t::long_type; o.index = default_input.keys.longs.size() - 1; }
            else if ( type[0] == 'd' ) { o.type = ordering_t::double_type; o.index = default_input.keys.doubles.size() - 1; }
            else if ( type[0] == 't' ) { o.type = ordering_t::time_type; o.index = default_input.keys.time.size() - 1; }
            else { std::cerr << "csv-sort: cannot sort on field " << v[k] << " of type \"" << type << "\"" << std::endl; return 1; }
            
            ordering.push_back(o);
            
            break;
        }
    }
    stdin_csv.fields = comma::join( w, ',' );
    if ( verbose ) { std::cerr << "csv-sort: fields: " << stdin_csv.fields << std::endl; }
    comma::csv::input_stream< input_t > stdin_stream( std::cin, stdin_csv, default_input );
    #ifdef WIN32
    if( stdin_stream.is_binary() ) { _setmode( _fileno( stdout ), _O_BINARY ); }
    #endif
    
    if (!first_line.empty()) 
    { 
        input_t::map::mapped_type& d = sorted_map[ comma::csv::ascii< input_t >(stdin_csv,default_input).get(first_line) ];
        d.push_back( first_line + "\n" );
    }
    
    while( stdin_stream.ready() || ( std::cin.good() && !std::cin.eof() ) )
    {
        const input_t* p = stdin_stream.read();
        if( !p ) { break; }
        if( stdin_stream.is_binary() )
        {
            input_t::map::mapped_type& d = sorted_map[ *p ];
            if (unique && !d.empty()) continue;
            d.push_back( std::string() );
            d.back().resize( stdin_csv.format().size() );
            ::memcpy( &d.back()[0], stdin_stream.binary().last(), stdin_csv.format().size() );
        }
        else
        {
            input_t::map::mapped_type& d = sorted_map[ *p ];
            if (unique && !d.empty()) continue;
            d.push_back( comma::join( stdin_stream.ascii().last(), stdin_csv.delimiter ) + "\n" );
        }
    }
    
    if( options.exists( "--reverse,-r" ) ) { output_( sorted_map.rbegin(), sorted_map.rend() ); }
    else { output_( sorted_map.begin(), sorted_map.end() ); }
    
    return 0;
}
Esempio n. 3
0
int main( int ac, char** av )
{
    try
    {
        comma::signal_flag is_shutdown(comma::signal_flag::hard);
        comma::command_line_options options( ac, av, usage );

        if( options.exists( "--bash-completion" )) bash_completion( ac, av );
        options.assert_mutually_exclusive( "--by-lower,--by-upper,--nearest,--realtime" );
        if( options.exists( "--by-upper" )) { method = how::by_upper; }
        if( options.exists( "--nearest" )) { method = how::nearest; }
        if( options.exists( "--realtime" )) { method = how::realtime; }
        timestamp_only = options.exists( "--timestamp-only,--time-only" );
        select_only = options.exists( "--do-not-append,--select" );
        if( select_only && timestamp_only ) { std::cerr << "csv-time-join: --timestamp-only specified with --select, ignoring --timestamp-only" << std::endl; }
        bool discard_bounding = options.exists( "--discard-bounding" );
        boost::optional< unsigned int > buffer_size = options.optional< unsigned int >( "--buffer" );
        if( options.exists( "--bound" ) ) { bound = boost::posix_time::microseconds( static_cast<unsigned int>(options.value< double >( "--bound" ) * 1000000 )); }
        stdin_csv = comma::csv::options( options, "t" );

        std::vector< std::string > unnamed = options.unnamed(
            "--by-lower,--by-upper,--nearest,--realtime,--select,--do-not-append,--timestamp-only,--time-only,--discard-bounding",
            "--binary,-b,--delimiter,-d,--fields,-f,--bound,--buffer,--verbose,-v" );
        std::string properties;
        bool stdin_first = true;
        switch( unnamed.size() )
        {
            case 0:
                std::cerr << "csv-time-join: please specify bounding source" << std::endl;
                return 1;
            case 1:
                properties = unnamed[0];
                break;
            case 2:
                if( unnamed[0] == "-" ) { properties = unnamed[1]; }
                else if( unnamed[1] == "-" ) { properties = unnamed[0]; stdin_first = false; }
                else { std::cerr << "csv-time-join: expected either '- <bounding>' or '<bounding> -'; got : " << comma::join( unnamed, ' ' ) << std::endl; return 1; }
                break;
            default:
                std::cerr << "csv-time-join: expected either '- <bounding>' or '<bounding> -'; got : " << comma::join( unnamed, ' ' ) << std::endl;
                return 1;
        }
        comma::name_value::parser parser( "filename" );
        bounding_csv = parser.get< comma::csv::options >( properties );
        if( bounding_csv.fields.empty() ) { bounding_csv.fields = "t"; }

        comma::csv::input_stream< Point > stdin_stream( std::cin, stdin_csv );
        #ifdef WIN32
        if( stdin_csv.binary() ) { _setmode( _fileno( stdout ), _O_BINARY ); }
        #endif // #ifdef WIN32

        comma::io::istream bounding_istream( comma::split( properties, ';' )[0]
                                           , bounding_csv.binary() ? comma::io::mode::binary : comma::io::mode::ascii );
        comma::csv::input_stream< Point > bounding_stream( *bounding_istream, bounding_csv );

        #ifndef WIN32
        comma::io::select select;
        comma::io::select bounding_stream_select;
        select.read().add( 0 );
        select.read().add( bounding_istream.fd() );
        bounding_stream_select.read().add( bounding_istream.fd() );
        #endif // #ifndef WIN32

        const Point* p = NULL;

        if( method == how::realtime )
        {
            #ifndef WIN32
            bool end_of_input = false;
            bool end_of_bounds = false;
            
            boost::optional<timestring_t> joined_line;
            
            while (!is_shutdown && !end_of_input)
            {
                if ( !bounding_stream.ready() && !stdin_stream.ready() )
                {
                    select.wait(boost::posix_time::milliseconds(1));
                }
                
                if ( !is_shutdown && !end_of_input && ( stdin_stream.ready() || ( select.check() && select.read().ready( comma::io::stdin_fd ) ) ) )
                {
                    p = stdin_stream.read();
                    if( p )
                    {
                        timestring_t input_line = std::make_pair( get_time( *p ), stdin_stream.last() );
                        if( joined_line ) { output( input_line, *joined_line, stdin_first ); }
                    }
                    else
                    {
                        comma::verbose << "end of input stream" << std::endl;
                        end_of_input = true;
                    }
                }
                
                if ( !is_shutdown && !end_of_bounds &&
                   ( bounding_stream.ready() || ( select.check() && select.read().ready( bounding_istream.fd() ))))
                {
                    p = bounding_stream.read();
                    if( p )
                    {
                        joined_line = std::make_pair( get_time( *p ), bounding_stream.last() );
                    }
                    else
                    {
                        comma::verbose << "end of bounding stream" << std::endl;
                        end_of_bounds = true;
                    }
                }
            }
            if (is_shutdown) { comma::verbose << "got a signal" << std::endl; return 0; }
            #else
            COMMA_THROW(comma::exception, "--realtime mode not supported in WIN32");
            #endif
        }
        else
        {
            std::deque<timestring_t> bounding_queue;
            bool next = true;
            bool bounding_data_available;
            bool upper_bound_added = false;

            // add a fake entry for an lower bound to allow stdin before first bound to match
            bounding_queue.push_back( std::make_pair( boost::posix_time::neg_infin, "" ));

            while( ( stdin_stream.ready() || ( std::cin.good() && !std::cin.eof() ) ) )
            {
                if( !std::cin.good() ) { select.read().remove( 0 ); }
                if( !bounding_istream->good() ) { select.read().remove( bounding_istream.fd() ); }
                bounding_data_available = bounding_stream.ready() || ( bounding_istream->good() && !bounding_istream->eof() );
                #ifdef WIN32
                bool bounding_stream_ready = true;
                bool stdin_stream_ready = true;
                #else // #ifdef WIN32
                //check so we do not block
                bool bounding_stream_ready = bounding_stream.ready();
                bool stdin_stream_ready = stdin_stream.ready();

                if( next )
                {
                    if( !bounding_stream_ready || !stdin_stream_ready )
                    {
                        if( !bounding_stream_ready && !stdin_stream_ready )
                        {
                            select.wait( boost::posix_time::milliseconds(10) );
                        }
                        else
                        {
                            select.check();
                        }
                        if( select.read().ready( bounding_istream.fd() )) { bounding_stream_ready = true; }
                        if( select.read().ready(0) ) { stdin_stream_ready=true; }
                    }
                }
                else
                {
                    if( !bounding_stream_ready )
                    {
                        bounding_stream_select.wait( boost::posix_time::milliseconds(10) );
                        if( bounding_stream_select.read().ready( bounding_istream.fd() )) { bounding_stream_ready=true; }
                    }
                }
                #endif //#ifdef WIN32

                //keep storing available bounding data
                if( bounding_stream_ready )
                {
                    if( !buffer_size || bounding_queue.size() < *buffer_size || discard_bounding )
                    {
                        const Point* q = bounding_stream.read();
                        if( q )
                        {
                            bounding_queue.push_back( std::make_pair( get_time( *q ), bounding_stream.last() ));
                        }
                        else
                        {
                            bounding_data_available=false;
                        }
                    }
                    if( buffer_size && bounding_queue.size() > *buffer_size && discard_bounding )
                    {
                        bounding_queue.pop_front();
                    }
                }
                if( !upper_bound_added && bounding_istream->eof() )
                {
                    // add a fake entry for an upper bound to allow stdin data above last bound to match
                    bounding_queue.push_back( std::make_pair( boost::posix_time::pos_infin, "" ));
                    upper_bound_added = true;
                }

                //if we are done with the last bounded point get next
                if( next )
                {
                    if(!stdin_stream_ready) { continue; }
                    p = stdin_stream.read();
                    if( !p ) { break; }
                }

                boost::posix_time::ptime t = get_time(*p);
              
                //get bound
                while(bounding_queue.size()>=2)
                {
                    if( t < bounding_queue[1].first ) { break; }
                    bounding_queue.pop_front();
                }

                if(bounding_queue.size()<2)
                {
                    //bound not found
                    //do we have more data?
                    if(!bounding_data_available) { break; }
                    next=false;
                    continue;
                }

                //bound available

                if( method == how::by_lower && t < bounding_queue.front().first )
                {
                    next = true;
                    continue;
                }

                bool is_first = ( method == how::by_lower )
                    || ( method == how::nearest && ( t - bounding_queue[0].first ) < ( bounding_queue[1].first - t ));

                const timestring_t& chosen_bound = is_first ? bounding_queue[0] : bounding_queue[1];;
                timestring_t input_line = std::make_pair( t, stdin_stream.last() );

                output( input_line, chosen_bound, stdin_first );
                next=true;
            }
        }
        return 0;     
    }
    catch( std::exception& ex ) { std::cerr << "csv-time-join: " << ex.what() << std::endl; }
    catch( ... ) { std::cerr << "csv-time-join: unknown exception" << std::endl; }
}