Exemplo n.º 1
0
void replica::assign_primary(configuration_update_request& proposal)
{
    dassert(proposal.node == primary_address(), "");

    if (status() == PS_PRIMARY)
    {
        dwarn(
            "%s: invalid assgin primary proposal as the node is in %s",
            name(),
            enum_to_string(status()));
        return;
    }

    if (proposal.type == CT_UPGRADE_TO_PRIMARY 
        && (status() != PS_SECONDARY || _secondary_states.checkpoint_task != nullptr))
    {
        dwarn(
            "%s: invalid upgrade to primary proposal as the node is in %s or during checkpointing",
            name(),
            enum_to_string(status()));

        // TODO: tell meta server so new primary is built more quickly
        return;
    }

    proposal.config.primary = primary_address();
    replica_helper::remove_node(primary_address(), proposal.config.secondaries);

    update_configuration_on_meta_server(proposal.type, proposal.node, proposal.config);
}
Exemplo n.º 2
0
bool failure_detector::end_ping_internal(::dsn::error_code err, const beacon_ack& ack)
{
    /*
     * the caller of the end_ping_internal should lock necessarily!!!
     */
    uint64_t beacon_send_time = ack.time;
    auto node = ack.this_node;
    uint64_t now = now_ms();

    master_map::iterator itr = _masters.find(node);

    if (itr == _masters.end())
    {
        dwarn("received beacon ack without corresponding master, ignore it, "
            "remote_master[%s], local_worker[%s]",
            node.to_string(), primary_address().to_string());
        return false;
    }

    master_record& record = itr->second;
    if (!ack.allowed)
    {
        dwarn("worker rejected, stop sending beacon message, "
            "remote_master[%s], local_worker[%s]",
            node.to_string(), primary_address().to_string());
        record.rejected = true;
        record.send_beacon_timer->cancel(true);
        return false;
    }

    if (!is_time_greater_than(beacon_send_time, record.last_send_time_for_beacon_with_ack))
    {
        // out-dated beacon acks, do nothing
        dinfo("ignore out dated beacon acks");
        return false;
    }

    // now the ack is applicable

    if (err != ERR_OK)
    {
        dwarn("ping master failed, err=%s", err.to_string());
        return true;
    }

    // update last_send_time_for_beacon_with_ack
    record.last_send_time_for_beacon_with_ack = beacon_send_time;
    record.rejected = false;

    if (record.is_alive == false
        && now - record.last_send_time_for_beacon_with_ack <= _lease_milliseconds)
    {
        // report master connected
        report(node, true, true);
        itr->second.is_alive = true;
        on_master_connected(node);
    }
    return true;
}
Exemplo n.º 3
0
void meta_server_failure_detector::acquire_leader_lock()
{
    //
    // try to get the leader lock until it is done
    //
    dsn::dist::distributed_lock_service::lock_options opt = {true, true};
    std::string local_owner_id;
    while (true)
    {
        error_code err;
        auto tasks =
            _lock_svc->lock(
            _primary_lock_id,
            primary_address().to_std_string(), 
            // lock granted
            LPC_META_SERVER_LEADER_LOCK_CALLBACK,
            [this, &err, &local_owner_id](error_code ec, const std::string& owner, uint64_t version)
            {
                err = ec;
                local_owner_id = owner;
            },

            // lease expire
            LPC_META_SERVER_LEADER_LOCK_CALLBACK,
            [this](error_code ec, const std::string& owner, uint64_t version)
            {
                // let's take the easy way right now
                dsn_exit(0);
            },
            opt
        );

        _lock_grant_task = tasks.first;
        _lock_expire_task = tasks.second;

        _lock_grant_task->wait();
        if (err == ERR_OK)
        {
            rpc_address addr;
            if (addr.from_string_ipv4(local_owner_id.c_str()))
            {
                dassert(primary_address() == addr, "");
                set_primary(addr);
                break;
            }
        }
    }
}
void meta_server_failure_detector::on_ping(const fd::beacon_msg& beacon, ::dsn::service::rpc_replier<fd::beacon_ack>& reply)
{
    fd::beacon_ack ack;
    ack.this_node = beacon.to;
    if (!is_primary())
    {
        end_point master;
        if (_state->get_meta_server_primary(master))
        {
            ack.time = beacon.time;
            ack.is_master = false;
            ack.primary_node = master;
        }
        else
        {
            ack.time = beacon.time;
            ack.is_master = false;
            ack.primary_node =  end_point::INVALID;
        }
    }
    else
    {
        failure_detector::on_ping_internal(beacon, ack);
        ack.primary_node = primary_address();
    }

    reply(ack);
}
Exemplo n.º 5
0
void replica::on_config_sync(const partition_configuration& config)
{
    ddebug( "%s: configuration sync", name());

    // no outdated update
    if (config.ballot < get_ballot())
        return;

    if (status() == PS_PRIMARY || nullptr != _primary_states.reconfiguration_task)
    {
        // nothing to do as pirmary always holds the truth
    }
    else
    {
        update_configuration(config);

        if (status() == PS_INACTIVE && !_inactive_is_transient)
        {
            if (config.primary == primary_address() // dead primary
                || config.primary.is_invalid() // primary is dead (otherwise let primary remove this)
                )
            {
                _stub->remove_replica_on_meta_server(config);
            }
        }
    }
}
Exemplo n.º 6
0
void meta_server_failure_detector::set_primary(rpc_address primary)
{
    bool old = _is_primary;
    {
        utils::auto_lock<zlock> l(_primary_address_lock);
        _primary_address = primary;
        _is_primary = (primary == primary_address());
    }

    if (!old && _is_primary)
    {
        node_states ns;
        _state->get_node_state(ns);

        for (auto& pr : ns)
        {
            register_worker(pr.first, pr.second);
        }
    }

    if (old && !_is_primary)
    {
        clear_workers();
    }
}
Exemplo n.º 7
0
void meta_service::on_log_completed(error_code err, size_t size,
    blob buffer, 
    std::shared_ptr<configuration_update_request> req, dsn_message_t resp)
{
    dassert(err == ERR_OK, "log operation failed, cannot proceed, err = %s", err.to_string());
    dassert(buffer.length() == size, "log size must equal to the specified buffer size");

    configuration_update_response response;    
    update_configuration(*req, response);

    if (resp != nullptr)
    {
        meta_response_header rhdr;
        rhdr.err = err;
        rhdr.primary_address = primary_address();

        marshall(resp, rhdr);
        marshall(resp, response);

        dsn_rpc_reply(resp);
    }
    else
    {
        err.end_tracking();
    }
}
Exemplo n.º 8
0
void meta_service::on_request(message_ptr& msg)
{
    meta_request_header hdr;
    unmarshall(msg, hdr);

    meta_response_header rhdr;
    bool is_primary = _state->get_meta_server_primary(rhdr.primary_address);
    if (is_primary) is_primary = (primary_address() == rhdr.primary_address);
    rhdr.err = ERR_SUCCESS;

    message_ptr resp = msg->create_response();
    if (!is_primary)
    {
        rhdr.err = ERR_TALK_TO_OTHERS;
        
        marshall(resp, rhdr);
    }
    else if (hdr.rpc_tag == RPC_CM_QUERY_NODE_PARTITIONS)
    {
        configuration_query_by_node_request request;
        configuration_query_by_node_response response;
        unmarshall(msg, request);

        query_configuration_by_node(request, response);

        marshall(resp, rhdr);
        marshall(resp, response);
    }

    else if (hdr.rpc_tag == RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX)
    {
        configuration_query_by_index_request request;
        configuration_query_by_index_response response;
        unmarshall(msg, request);

        query_configuration_by_index(request, response);
        
        marshall(resp, rhdr);
        marshall(resp, response);
    }

    else  if (hdr.rpc_tag == RPC_CM_UPDATE_PARTITION_CONFIGURATION)
    {
        update_configuration(msg, resp);
        return;
    }
    
    else
    {
        dassert(false, "unknown rpc tag %x (%s)", hdr.rpc_tag, task_code(hdr.rpc_tag).to_string());
    }

    rpc::reply(resp);
}
Exemplo n.º 9
0
void replica::broadcast_group_check()
{
    dassert (nullptr != _primary_states.group_check_task, "");
    if (_primary_states.group_check_pending_replies.size() > 0)
    {
        dwarn(
            "%s: %u group check replies are still pending when doing next round check",
            name(), static_cast<int>(_primary_states.group_check_pending_replies.size())
            );

        for (auto it = _primary_states.group_check_pending_replies.begin(); it != _primary_states.group_check_pending_replies.end(); it++)
        {
            it->second->cancel(true);
        }
        _primary_states.group_check_pending_replies.clear();
    }

    for (auto it = _primary_states.statuses.begin(); it != _primary_states.statuses.end(); it++)
    {
        if (it->first == primary_address())
            continue;

        end_point addr = it->first;
        std::shared_ptr<group_check_request> request(new group_check_request);

        request->app_type = _primary_states.membership.app_type;
        request->node = addr;
        _primary_states.get_replica_config(addr, request->config);
        request->last_committed_decree = last_committed_decree();
        request->learner_signature = 0;
        if (it->second == PS_POTENTIAL_SECONDARY)
        {
            auto it2 = _primary_states.learners.find(it->first);
            dassert (it2 != _primary_states.learners.end(), "");
            request->learner_signature = it2->second.signature;
        }

        task_ptr callback_task = rpc::call_typed(
            addr,
            RPC_GROUP_CHECK,
            request,            
            this,
            &replica::on_group_check_reply,
            gpid_to_hash(get_gpid())
            );

        _primary_states.group_check_pending_replies[addr] = callback_task;

        ddebug(
            "%s: init_group_check for %s:%d", name(), addr.name.c_str(), addr.port
        );
    }
}
Exemplo n.º 10
0
void replica::on_group_check(const group_check_request& request, __out_param group_check_response& response)
{
    ddebug(
        "%s: on_group_check from %s:%d",
        name(), request.config.primary.name.c_str(), request.config.primary.port
        );
    
    if (request.config.ballot < get_ballot())
    {
        response.err = ERR_VERSION_OUTDATED;
        return;
    }
    else if (request.config.ballot > get_ballot())
    {
        update_local_configuration(request.config);
    }
    else if (is_same_ballot_status_change_allowed(status(), request.config.status))
    {
        update_local_configuration(request.config, true);
    }
    
    switch (status())
    {
    case PS_INACTIVE:
        break;
    case PS_SECONDARY:
        if (request.last_committed_decree > last_committed_decree())
        {
            _prepare_list->commit(request.last_committed_decree, true);
        }
        break;
    case PS_POTENTIAL_SECONDARY:
        init_learn(request.learner_signature);
        break;
    case PS_ERROR:
        break;
    default:
        dassert (false, "");
    }
    
    response.gpid = get_gpid();
    response.node = primary_address();
    response.err = ERR_SUCCESS;
    if (status() == PS_ERROR)
    {
        response.err = ERR_INVALID_STATE;
    }

    response.last_committed_decree_in_app = _app->last_committed_decree();
    response.last_committed_decree_in_prepare_list = last_committed_decree();
    response.learner_status_ = _potential_secondary_states.learning_status;
    response.learner_signature = _potential_secondary_states.learning_signature;
}
Exemplo n.º 11
0
void meta_service::update_configuration(dsn_message_t req, dsn_message_t resp)
{
    if (_state->freezed())
    {
        meta_response_header rhdr;
        rhdr.err = ERR_OK;
        rhdr.primary_address = primary_address();

        configuration_update_request request;
        configuration_update_response response;
        
        ::unmarshall(req, request);

        response.err = ERR_STATE_FREEZED;
        _state->query_configuration_by_gpid(request.config.gpid, response.config);

        ::marshall(resp, rhdr);
        ::marshall(resp, response);

        dsn_rpc_reply(resp);
        return;
    }

    void* ptr;
    size_t sz;
    dsn_msg_read_next(req, &ptr, &sz);
    dsn_msg_read_commit(req, 0); // commit 0 so we can read again

    uint64_t offset;
    int len = (int)sz + sizeof(int32_t);
    
    char* buffer = new char[len];
    *(int32_t*)buffer = (int)sz;
    memcpy(buffer + sizeof(int32_t), ptr, sz);

    auto tmp = std::shared_ptr<char>(buffer);
    blob bb2(tmp, 0, len);

    auto request = std::shared_ptr<configuration_update_request>(new configuration_update_request());
    ::unmarshall(req, *request);

    {

        zauto_lock l(_log_lock);
        offset = _offset;
        _offset += len;

        file::write(_log, buffer, len, offset, LPC_CM_LOG_UPDATE, this,
            std::bind(&meta_service::on_log_completed, this, 
            std::placeholders::_1, std::placeholders::_2, bb2, request, resp));
    }
}
Exemplo n.º 12
0
void meta_service::on_config_changed(global_partition_id gpid)
{
    end_point primary;
    if (_state->get_meta_server_primary(primary) && primary == primary_address())
    {
        _failure_detector->set_primary(true);
        _balancer->run(gpid);
    }
    else
    {
        _failure_detector->set_primary(false);
    }
}
Exemplo n.º 13
0
bool meta_server_failure_detector::acquire_leader_lock()
{
    error_code err;
    auto tasks = 
    _lock_svc->lock(
        _primary_lock_id,
        _local_owner_id,
        true,

        // lock granted
        LPC_META_SERVER_LEADER_LOCK_CALLBACK,
        [this, &err](error_code ec, const std::string& owner, uint64_t version)
        {
            err = ec;
        },

        // lease expire
        LPC_META_SERVER_LEADER_LOCK_CALLBACK,
        [this](error_code ec, const std::string& owner, uint64_t version)
        {
            // let's take the easy way right now
            dsn_terminate();

            // reset primary
            //rpc_address addr;
            //set_primary(addr);
            //_state->on_become_non_leader();

            //// set another round of service
            //acquire_leader_lock();            
        }
    );

    _lock_grant_task = tasks.first;
    _lock_expire_task = tasks.second;

    _lock_grant_task->wait();
    if (err == ERR_OK)
    {
        rpc_address addr;
        if (addr.from_string_ipv4(_local_owner_id.c_str()))
        {
            dassert(primary_address() == addr, "");            
            set_primary(addr);
            return true;
        }
    }

    return false;
}
Exemplo n.º 14
0
meta_server_failure_detector::meta_server_failure_detector(server_state* state, meta_service* svc)
{
    _state = state;
    _svc = svc;
    _is_primary = false;

    // TODO: config
    _lock_svc = dsn::utils::factory_store< ::dsn::dist::distributed_lock_service>::create(
        "distributed_lock_service_simple",
        PROVIDER_TYPE_MAIN
        );
    _primary_lock_id = "dsn.meta.server.leader";
    _local_owner_id = primary_address().to_string();
}
Exemplo n.º 15
0
bool replica::update_configuration(const partition_configuration& config)
{
    dassert (config.ballot >= get_ballot(), "");
    
    replica_configuration rconfig;
    replica_helper::get_replica_config(config, primary_address(), rconfig);

    if (rconfig.status == PS_PRIMARY &&
        (rconfig.ballot > get_ballot() || status() != PS_PRIMARY)
        )
    {
        _primary_states.reset_membership(config, config.primary != primary_address());
    }

    if (config.ballot > get_ballot() ||
        is_same_ballot_status_change_allowed(status(), rconfig.status)
        )
    {
        return update_local_configuration(rconfig, true);
    }
    else
        return false;
}
Exemplo n.º 16
0
// local timers
void meta_service::on_load_balance_timer()
{
    if (_state->freezed())
        return;

    ::dsn::rpc_address primary;
    if (_state->get_meta_server_primary(primary) && primary == primary_address())
    {
        _failure_detector->set_primary(true);
        _balancer->run();
    }
    else
    {
        _failure_detector->set_primary(false);
    }
}
Exemplo n.º 17
0
void failure_detector::on_ping_internal(const beacon_msg& beacon, /*out*/ beacon_ack& ack)
{
    ack.time = beacon.time;
    ack.this_node = beacon.to_addr;
    ack.primary_node = primary_address();
    ack.is_master = true;
    ack.allowed = true;

    zauto_lock l(_lock);

    uint64_t now = now_ms();
    auto node = beacon.from_addr;

    worker_map::iterator itr = _workers.find(node);
    if (itr == _workers.end())
    {
        // if is a new worker, check allow list first if need
        if (_use_allow_list && _allow_list.find(node) == _allow_list.end())
        {
            dwarn("new worker[%s] is rejected", node.to_string());
            ack.allowed = false;
            return;
        }

        // create new entry for node
        worker_record record(node, now);
        record.is_alive = true;
        _workers.insert(std::make_pair(node, record));

        report(node, false, true);
        on_worker_connected(node);
    }
    else if (is_time_greater_than(now, itr->second.last_beacon_recv_time))
    {
        // update last_beacon_recv_time
        itr->second.last_beacon_recv_time = now;

        if (itr->second.is_alive == false)
        {
            itr->second.is_alive = true;

            report(node, false, true);
            on_worker_connected(node);
        }
    }
}
Exemplo n.º 18
0
void replica::handle_remote_failure(partition_status st, const end_point& node, int error)
{    
    ddebug(
        "%s: handle remote failure error %u, status = %s, node = %s:%d",
        name(),
        error,
        enum_to_string(st),
        node.name.c_str(), static_cast<int>(node.port)
        );

    dassert (status() == PS_PRIMARY, "");
    dassert(node != primary_address(), "");

    switch (st)
    {
    case PS_SECONDARY:
        dassert (_primary_states.check_exist(node, PS_SECONDARY), "");
        {
            configuration_update_request request;
            request.node = node;
            request.type = CT_DOWNGRADE_TO_INACTIVE;
            request.config = _primary_states.membership;
            downgrade_to_inactive_on_primary(request);
        }
        break;
    case PS_POTENTIAL_SECONDARY:
        // potential secondary failure does not lead to ballot change
        // therefore, it is possible to have multiple exec here
        if (_primary_states.learners.erase(node) > 0)
        {
            if (_primary_states.check_exist(node, PS_INACTIVE))
                _primary_states.statuses[node] = PS_INACTIVE;
            else
                _primary_states.statuses.erase(node);
        }
        
        break;
    case PS_INACTIVE:
    case PS_ERROR:
        break;
    default:
        dassert (false, "");
        break;
    }
}
Exemplo n.º 19
0
void meta_server_failure_detector::on_ping(const fd::beacon_msg& beacon, ::dsn::rpc_replier<fd::beacon_ack>& reply)
{
    fd::beacon_ack ack;
    ack.this_node = beacon.to;
    if (!is_primary())
    {
        ack.time = beacon.time;
        ack.is_master = false;
        ack.primary_node = _primary_address;
    }
    else
    {
        failure_detector::on_ping_internal(beacon, ack);
        ack.primary_node = primary_address();
    }

    reply(ack);
}
Exemplo n.º 20
0
void meta_server_failure_detector::set_primary(rpc_address primary)
{
    /*
    * we don't do register worker things in set_primary
    * as only nodes sync from meta_state_service are useful, 
    * but currently, we haven't do sync yet
    */
    bool old = _is_primary;
    {
        utils::auto_lock<zlock> l(_primary_address_lock);
        _primary_address = primary;
        _is_primary = (primary == primary_address());
    }

    if (old && !_is_primary)
    {
        clear_workers();
    }
}
Exemplo n.º 21
0
void meta_service::on_log_completed(error_code err, int size, char* buffer, message_ptr req, message_ptr resp)
{
    free(buffer);
    dassert(err == ERR_SUCCESS, "log operation failed, cannot proceed, err = %s", err.to_string());

    configuration_update_request request;
    configuration_update_response response;
    unmarshall(req, request);

    update_configuration(request, response);

    meta_response_header rhdr;
    rhdr.err = err;
    rhdr.primary_address = primary_address();

    marshall(resp, rhdr);
    marshall(resp, response);  

    rpc::reply(resp);
}
Exemplo n.º 22
0
void meta_service::update_configuration(message_ptr req, message_ptr resp)
{
    if (_state->freezed())
    {
        meta_response_header rhdr;
        rhdr.err = 0;
        rhdr.primary_address = primary_address();

        configuration_update_request request;
        configuration_update_response response;
        
        unmarshall(req, request);

        response.err = ERR_STATE_FREEZED;
        _state->query_configuration_by_gpid(request.config.gpid, response.config);

        marshall(resp, rhdr);
        marshall(resp, response);

        rpc::reply(resp);
        return;
    }

    auto bb = req->reader().get_remaining_buffer();
    uint64_t offset;
    int len = bb.length() + sizeof(int32_t);
    
    char* buffer = (char*)malloc(len);
    *(int32_t*)buffer = bb.length();
    memcpy(buffer + sizeof(int32_t), bb.data(), bb.length());

    {

        zauto_lock l(_log_lock);
        offset = _offset;
        _offset += len;

        file::write(_log, buffer, len, offset, LPC_CM_LOG_UPDATE, this,
            std::bind(&meta_service::on_log_completed, this, std::placeholders::_1, std::placeholders::_2, buffer, req, resp));
    }
}
Exemplo n.º 23
0
// local timers
void meta_service::on_load_balance_timer()
{
    // first time is to activate the LB (an initial period of time is presevered for most machine to join in)
    if (!_started)
    {
        _started = true;
        _state->benign_unfree();
        return;
    }

    end_point primary;
    if (_state->get_meta_server_primary(primary) && primary == primary_address())
    {
        _failure_detector->set_primary(true);
        _balancer->run();
    }
    else
    {
        _failure_detector->set_primary(false);
    }
}
Exemplo n.º 24
0
void failure_detector::send_beacon(::dsn::rpc_address target, uint64_t time)
{
    beacon_msg beacon;
    beacon.time = time;
    beacon.from_addr= primary_address();
    beacon.to_addr = target;

    dinfo("send ping message, from[%s], to[%s], time[%" PRId64 "]",
          beacon.from_addr.to_string(), beacon.to_addr.to_string(), time);

    ::dsn::rpc::call(
        target,
        RPC_FD_FAILURE_DETECTOR_PING,
        beacon,
        this,
        [=](error_code err, beacon_ack&& resp)
        {
            if (err != ::dsn::ERR_OK)
            {
                beacon_ack ack;
                ack.time = beacon.time;
                ack.this_node = beacon.to_addr;
                ack.primary_node.set_invalid();
                ack.is_master = false;
                ack.allowed = true;
                end_ping(err, ack, nullptr);
            }
            else
            {
                end_ping(err, std::move(resp), nullptr);
            }
        },
        0,
        std::chrono::milliseconds(_check_interval_milliseconds),
        0
        );
}
/*----------------------------------------------------------------------
 |       NPT_NetworkInterface::GetNetworkInterfaces
 +---------------------------------------------------------------------*/
NPT_Result
NPT_NetworkInterface::GetNetworkInterfaces(NPT_List<NPT_NetworkInterface*>& interfaces)
{
    int result = 0;
    struct ifaddrs * ifaddrsList = NULL;
    struct ifaddrs * ifaddr = NULL;
    NPT_Flags flags = 0;
    
    result = getifaddrs(&ifaddrsList);
    
    if( result != 0 || ifaddrsList == NULL)
        return NPT_FAILURE;
    
    for(ifaddr = ifaddrsList; NULL!= ifaddr; ifaddr = ifaddr->ifa_next) {
        
        if(ifaddr->ifa_addr == NULL /*|| ifaddr->ifa_addr->sa_family == AF_INET6*/)
            continue;
        
        // process the flags
        if ((ifaddr->ifa_flags & IFF_UP) == 0) {
            // the interface is not up, ignore it
            continue;
        }
        if (ifaddr->ifa_flags & IFF_BROADCAST) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_BROADCAST;
        }
        if (ifaddr->ifa_flags & IFF_LOOPBACK) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_LOOPBACK;
        }
#if defined(IFF_POINTOPOINT)
        if (ifaddr->ifa_flags & IFF_POINTOPOINT) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_POINT_TO_POINT;
        }
#endif // defined(IFF_POINTOPOINT)
        if (ifaddr->ifa_flags & IFF_PROMISC) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_PROMISCUOUS;
        }
        if (ifaddr->ifa_flags & IFF_MULTICAST) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_MULTICAST;
        }
        
        NPT_NetworkInterface* interface = NULL;
        for (NPT_List<NPT_NetworkInterface*>::Iterator iface_iter = interfaces.GetFirstItem();
             iface_iter;
             ++iface_iter) {
            if ((*iface_iter)->GetName() == (const char*)ifaddr->ifa_name) {
                interface = *iface_iter;
                break;
            }
        }
        
        // create a new interface object
        if(interface == NULL)
            interface = new NPT_NetworkInterface(ifaddr->ifa_name, flags);
        
        if (interface == NULL)
            continue;
            
            // get the mac address
            NPT_MacAddress::Type mac_addr_type;
            unsigned int         mac_addr_length = IFHWADDRLEN;
            switch (ifaddr->ifa_addr->sa_family) {
                case AF_LOCAL:
                case AF_INET:
#if defined(AF_LINK)
                case AF_LINK:
#endif
                    mac_addr_type = NPT_MacAddress::TYPE_ETHERNET;
#if defined(ARPHRD_LOOPBACK)      
                    mac_addr_type = NPT_MacAddress::TYPE_LOOPBACK;
                    length = 0;
#endif               
                    break;
#if defined(AF_PPP)
                case AF_PPP:
                    mac_addr_type = NPT_MacAddress::TYPE_PPP;
                    mac_addr_length = 0;
                    break;
#endif
#if defined(AF_IEEE80211)
                case AF_IEEE80211:
                    mac_addr_type = NPT_MacAddress::TYPE_IEEE_802_11;
                    break;
#endif                    
                default:
                    mac_addr_type = NPT_MacAddress::TYPE_UNKNOWN;
                    mac_addr_length = sizeof(ifaddr->ifa_addr->sa_data);
                    break;
            }


            if(interface->GetMacAddress().GetLength() == 0)
                interface->SetMacAddress(mac_addr_type, (const unsigned char*)ifaddr->ifa_addr->sa_data, mac_addr_length);
        
#if defined(NPT_CONFIG_HAVE_NET_IF_DL_H)        
        if (ifaddr->ifa_addr->sa_family == AF_LINK) {
            //Refer to LLADDR
            struct sockaddr_dl * socket_dl = (struct sockaddr_dl *)ifaddr->ifa_addr;
            
            interface->SetMacAddress(mac_addr_type, (const unsigned char*)socket_dl->sdl_data+socket_dl->sdl_nlen, socket_dl->sdl_alen);
        }
#endif
            switch (ifaddr->ifa_addr->sa_family) {
                case AF_INET: {
                    // primary address
                    NPT_IpAddress primary_address( ntohl(((struct sockaddr_in *)ifaddr->ifa_addr)->sin_addr.s_addr));
                    
                    // broadcast address
                    NPT_IpAddress broadcast_address;
                    if ((flags & NPT_NETWORK_INTERFACE_FLAG_BROADCAST) && ifaddr->ifa_dstaddr) {
                        broadcast_address.Set(ntohl(((struct sockaddr_in*)ifaddr->ifa_dstaddr)->sin_addr.s_addr));
                    }
                    
                    // point to point address
                    NPT_IpAddress destination_address;
                    if ((flags & NPT_NETWORK_INTERFACE_FLAG_POINT_TO_POINT) && ifaddr->ifa_dstaddr) {
                        destination_address.Set(ntohl(((struct sockaddr_in*)ifaddr->ifa_dstaddr)->sin_addr.s_addr));
                    }
                    
                    // netmask
                    NPT_IpAddress netmask(0xFFFFFFFF);
                    if(ifaddr->ifa_netmask)
                        netmask.Set(ntohl(((struct sockaddr_in*)ifaddr->ifa_netmask)->sin_addr.s_addr));
                    
                    // add the address to the interface
                    NPT_NetworkInterfaceAddress iface_address(primary_address,
                                                              broadcast_address,
                                                              destination_address,
                                                              netmask);

                    interface->AddAddress(iface_address);
                    
                    break;
                }
                    
            }
            // add the interface to the list
            interfaces.Add(interface);
        
    }
    freeifaddrs(ifaddrsList);
    
    return NPT_SUCCESS;
}
Exemplo n.º 26
0
void replica::on_update_configuration_on_meta_server_reply(error_code err, dsn_message_t request, dsn_message_t response, std::shared_ptr<configuration_update_request> req)
{
    check_hashed_access();

    if (PS_INACTIVE != status() || _stub->is_connected() == false)
    {
        _primary_states.reconfiguration_task = nullptr;
        err.end_tracking();
        return;
    }

    configuration_update_response resp;
    if (err == ERR_OK)
    {
        ::unmarshall(response, resp);
        err = resp.err;
    }

    if (err != ERR_OK)
    {
        ddebug(
            "%s: update configuration reply with err %s, request ballot %lld",
            name(),
            err.to_string(),
            req->config.ballot
            );

        if (err != ERR_INVALID_VERSION)
        {
            rpc_address target(_stub->_failure_detector->get_servers());
            dsn_msg_add_ref(request); // added for another round of rpc::call
            _primary_states.reconfiguration_task = rpc::call(
                target,
                request,
                this,
                std::bind(&replica::on_update_configuration_on_meta_server_reply, this,
                std::placeholders::_1,
                std::placeholders::_2,
                std::placeholders::_3,
                req),
                gpid_to_hash(get_gpid())
                );
            return;
        }        
    }

    ddebug(
        "%s: update configuration reply with err %s, ballot %lld, local %lld",
        name(),
        resp.err.to_string(),
        resp.config.ballot,
        get_ballot()
        );
    
    if (resp.config.ballot < get_ballot())
    {
        _primary_states.reconfiguration_task = nullptr;
        return;
    }        
    
    // post-update work items?
    if (resp.err == ERR_OK)
    {        
        dassert (req->config.gpid == resp.config.gpid, "");
        dassert (req->config.app_type == resp.config.app_type, "");
        dassert (req->config.primary == resp.config.primary, "");
        dassert (req->config.secondaries == resp.config.secondaries, "");

        switch (req->type)
        {        
        case CT_UPGRADE_TO_PRIMARY:
            _primary_states.last_prepare_decree_on_new_primary = _prepare_list->max_decree();
            break;
        case CT_ASSIGN_PRIMARY:
        case CT_DOWNGRADE_TO_SECONDARY:
        case CT_DOWNGRADE_TO_INACTIVE:
        case CT_UPGRADE_TO_SECONDARY:
            break;
        case CT_REMOVE:
            if (req->node != primary_address())
            {
                replica_configuration rconfig;
                replica_helper::get_replica_config(resp.config, req->node, rconfig);
                rpc::call_one_way_typed(req->node, RPC_REMOVE_REPLICA, rconfig, gpid_to_hash(get_gpid()));
            }
            break;
        default:
            dassert (false, "");
        }
    }
    
    update_configuration(resp.config);
    _primary_states.reconfiguration_task = nullptr;
}
Exemplo n.º 27
0
/*----------------------------------------------------------------------
|   NPT_NetworkInterface::GetNetworkInterfaces
+---------------------------------------------------------------------*/
NPT_Result
NPT_NetworkInterface::GetNetworkInterfaces(NPT_List<NPT_NetworkInterface*>& interfaces)
{
    // create a socket to talk to the TCP/IP stack
    SOCKET net;
    if((net = WSASocket(AF_INET, SOCK_DGRAM, IPPROTO_UDP, NULL, 0, 0)) == INVALID_SOCKET) {
        return NPT_FAILURE;
    }

    // get a list of interfaces
    INTERFACE_INFO query[32];  // get up to 32 interfaces 
    DWORD bytes_returned;
    int io_result = WSAIoctl(net, 
                             SIO_GET_INTERFACE_LIST, 
                             NULL, 0, 
                             &query, sizeof(query), 
                             &bytes_returned, 
                             NULL, NULL);
    if (io_result == SOCKET_ERROR) {
        closesocket(net);
        return NPT_FAILURE;
    }

    // we don't need the socket anymore
    closesocket(net);

    // Display interface information
    int interface_count = (bytes_returned/sizeof(INTERFACE_INFO));
    unsigned int iface_index = 0;
    for (int i=0; i<interface_count; i++) {
        SOCKADDR_IN* address;
        NPT_Flags    flags = 0;

        // primary address
        address = (SOCKADDR_IN*)&query[i].iiAddress;
        NPT_IpAddress primary_address(ntohl(address->sin_addr.s_addr));

        // netmask
        address = (SOCKADDR_IN*)&query[i].iiNetmask;
        NPT_IpAddress netmask(ntohl(address->sin_addr.s_addr));

        // broadcast address
        address = (SOCKADDR_IN*)&query[i].iiBroadcastAddress;
        NPT_IpAddress broadcast_address(ntohl(address->sin_addr.s_addr));

        {
            // broadcast address is incorrect
            unsigned char addr[4];
            for(int i=0; i<4; i++) {
                addr[i] = (primary_address.AsBytes()[i] & netmask.AsBytes()[i]) | 
                    ~netmask.AsBytes()[i];
            }
            broadcast_address.Set(addr);
        }

        // ignore interfaces that are not up
        if (!(query[i].iiFlags & IFF_UP)) {
            continue;
        }
        if (query[i].iiFlags & IFF_BROADCAST) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_BROADCAST;
        }
        if (query[i].iiFlags & IFF_MULTICAST) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_MULTICAST;
        }
        if (query[i].iiFlags & IFF_LOOPBACK) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_LOOPBACK;
        }
        if (query[i].iiFlags & IFF_POINTTOPOINT) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_POINT_TO_POINT;
        }

        // mac address (no support for this for now)
        NPT_MacAddress mac;

        // create an interface object
        char iface_name[5];
        iface_name[0] = 'i';
        iface_name[1] = 'f';
        iface_name[2] = '0'+(iface_index/10);
        iface_name[3] = '0'+(iface_index%10);
        iface_name[4] = '\0';
        NPT_NetworkInterface* iface = new NPT_NetworkInterface(iface_name, mac, flags);

        // set the interface address
        NPT_NetworkInterfaceAddress iface_address(
            primary_address,
            broadcast_address,
            NPT_IpAddress::Any,
            netmask);
        iface->AddAddress(iface_address);  
         
        // add the interface to the list
        interfaces.Add(iface);   

        // increment the index (used for generating the name
        iface_index++;
    }

    return NPT_SUCCESS;
}
Exemplo n.º 28
0
/*----------------------------------------------------------------------
|       NPT_NetworkInterface::GetNetworkInterfaces
+---------------------------------------------------------------------*/
NPT_Result
NPT_NetworkInterface::GetNetworkInterfaces(NPT_List<NPT_NetworkInterface*>& interfaces)
{
    int net = socket(AF_INET, SOCK_DGRAM, 0);

    // Try to get the config until we have enough memory for it
    // According to "Unix Network Programming", some implementations
    // do not return an error when the supplied buffer is too small
    // so we need to try, increasing the buffer size every time,
    // until we get the same size twice. We cannot assume success when
    // the returned size is smaller than the supplied buffer, because
    // some implementations can return less that the buffer size if
    // another structure does not fit.
    unsigned int buffer_size = 4096; // initial guess
    unsigned int last_size = 0;
    struct ifconf config;
    unsigned char* buffer;
    for (; buffer_size < 65536;) {
        buffer = new unsigned char[buffer_size];
        config.ifc_len = buffer_size;
        config.ifc_buf = (char*)buffer;
        if (ioctl(net, SIOCGIFCONF, &config) < 0) {
            if (errno != EINVAL || last_size != 0) {
                return NPT_ERROR_BASE_UNIX-errno;
            }
        } else {
            if ((unsigned int)config.ifc_len == last_size) {
                // same size, we can use the buffer
                break;
            }
            // different size, we need to reallocate
            last_size = config.ifc_len;
        }

        // supply 4096 more bytes more next time around
        buffer_size += 4096;
        delete[] buffer;
    }

    // iterate over all objects
    unsigned char *entries;
    for (entries = buffer; entries < buffer+config.ifc_len;) {
        struct ifreq* entry = (struct ifreq*)entries;
        // get the size of the addresses
        unsigned int address_length;
#if defined(NPT_CONFIG_HAVE_SOCKADDR_SA_LEN)
        address_length = sizeof(struct sockaddr) > entry->ifr_addr.sa_len ?
                         sizeof(sockaddr) : entry->ifr_addr.sa_len;
#else
        switch (entry->ifr_addr.sa_family) {
#if defined(AF_INET6)
        case AF_INET6:
            address_length = sizeof(struct sockaddr_in6);
            break;
#endif // defined(AF_INET6)

        default:
            address_length = sizeof(struct sockaddr);
            break;
        }
#endif

        // point to the next entry
        entries += address_length + sizeof(entry->ifr_name);

        // ignore anything except AF_INET and AF_LINK addresses
        if (entry->ifr_addr.sa_family != AF_INET
#if defined(AF_LINK)
                && entry->ifr_addr.sa_family != AF_LINK
#endif
           ) {
            continue;
        }

        // get detailed info about the interface
        NPT_Flags flags = 0;
#if defined(SIOCGIFFLAGS)
        struct ifreq query = *entry;
        if (ioctl(net, SIOCGIFFLAGS, &query) < 0) continue;

        // process the flags
        if ((query.ifr_flags & IFF_UP) == 0) {
            // the interface is not up, ignore it
            continue;
        }
        if (query.ifr_flags & IFF_BROADCAST) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_BROADCAST;
        }
        if (query.ifr_flags & IFF_LOOPBACK) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_LOOPBACK;
        }
#if defined(IFF_POINTOPOINT)
        if (query.ifr_flags & IFF_POINTOPOINT) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_POINT_TO_POINT;
        }
#endif // defined(IFF_POINTOPOINT)
        if (query.ifr_flags & IFF_PROMISC) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_PROMISCUOUS;
        }
        if (query.ifr_flags & IFF_MULTICAST) {
            flags |= NPT_NETWORK_INTERFACE_FLAG_MULTICAST;
        }
#endif // defined(SIOCGIFFLAGS)

        // get a pointer to an interface we've looped over before
        // or create a new one
        NPT_NetworkInterface* interface = NULL;
        for (NPT_List<NPT_NetworkInterface*>::Iterator iface_iter = interfaces.GetFirstItem();
                iface_iter;
                ++iface_iter) {
            if ((*iface_iter)->GetName() == (const char*)entry->ifr_name) {
                interface = *iface_iter;
                break;
            }
        }
        if (interface == NULL) {
            // create a new interface object
            interface = new NPT_NetworkInterface(entry->ifr_name, flags);

            // add the interface to the list
            interfaces.Add(interface);

            // get the mac address
#if defined(SIOCGIFHWADDR)
            if (ioctl(net, SIOCGIFHWADDR, &query) == 0) {
                NPT_MacAddress::Type mac_addr_type;
                unsigned int         mac_addr_length = IFHWADDRLEN;
                switch (query.ifr_addr.sa_family) {
#if defined(ARPHRD_ETHER)
                case ARPHRD_ETHER:
                    mac_addr_type = NPT_MacAddress::TYPE_ETHERNET;
                    break;
#endif

#if defined(ARPHRD_LOOPBACK)
                case ARPHRD_LOOPBACK:
                    mac_addr_type = NPT_MacAddress::TYPE_LOOPBACK;
                    length = 0;
                    break;
#endif

#if defined(ARPHRD_PPP)
                case ARPHRD_PPP:
                    mac_addr_type = NPT_MacAddress::TYPE_PPP;
                    mac_addr_length = 0;
                    break;
#endif

#if defined(ARPHRD_IEEE80211)
                case ARPHRD_IEEE80211:
                    mac_addr_type = NPT_MacAddress::TYPE_IEEE_802_11;
                    break;
#endif

                default:
                    mac_addr_type = NPT_MacAddress::TYPE_UNKNOWN;
                    mac_addr_length = sizeof(query.ifr_addr.sa_data);
                    break;
                }

                interface->SetMacAddress(mac_addr_type, (const unsigned char*)query.ifr_addr.sa_data, mac_addr_length);
            }
#endif
        }

        switch (entry->ifr_addr.sa_family) {
        case AF_INET: {
            // primary address
            NPT_IpAddress primary_address(ntohl(((struct sockaddr_in*)&entry->ifr_addr)->sin_addr.s_addr));

            // broadcast address
            NPT_IpAddress broadcast_address;
#if defined(SIOCGIFBRDADDR)
            if (flags & NPT_NETWORK_INTERFACE_FLAG_BROADCAST) {
                if (ioctl(net, SIOCGIFBRDADDR, &query) == 0) {
                    broadcast_address.Set(ntohl(((struct sockaddr_in*)&query.ifr_addr)->sin_addr.s_addr));
                }
            }
#endif

            // point to point address
            NPT_IpAddress destination_address;
#if defined(SIOCGIFDSTADDR)
            if (flags & NPT_NETWORK_INTERFACE_FLAG_POINT_TO_POINT) {
                if (ioctl(net, SIOCGIFDSTADDR, &query) == 0) {
                    destination_address.Set(ntohl(((struct sockaddr_in*)&query.ifr_addr)->sin_addr.s_addr));
                }
            }
#endif

            // netmask
            NPT_IpAddress netmask(0xFFFFFFFF);
#if defined(SIOCGIFNETMASK)
            if (ioctl(net, SIOCGIFNETMASK, &query) == 0) {
                netmask.Set(ntohl(((struct sockaddr_in*)&query.ifr_addr)->sin_addr.s_addr));
            }
#endif

            // add the address to the interface
            NPT_NetworkInterfaceAddress iface_address(
                primary_address,
                broadcast_address,
                destination_address,
                netmask);
            interface->AddAddress(iface_address);

            break;
        }

#if defined(AF_LINK) && defined(NPT_CONFIG_HAVE_SOCKADDR_DL)
        case AF_LINK: {
            struct sockaddr_dl* mac_addr = (struct sockaddr_dl*)&entry->ifr_addr;
            NPT_MacAddress::Type mac_addr_type = NPT_MacAddress::TYPE_UNKNOWN;
            switch (mac_addr->sdl_type) {
#if defined(IFT_LOOP)
            case IFT_LOOP:
                mac_addr_type = NPT_MacAddress::TYPE_LOOPBACK;
                break;
#endif
#if defined(IFT_ETHER)
            case IFT_ETHER:
                mac_addr_type = NPT_MacAddress::TYPE_ETHERNET;
                break;
#endif
#if defined(IFT_PPP)
            case IFT_PPP:
                mac_addr_type = NPT_MacAddress::TYPE_PPP;
                break;
#endif
            }
            interface->SetMacAddress(mac_addr_type,
                                     (const unsigned char*)(&mac_addr->sdl_data[mac_addr->sdl_nlen]),
                                     mac_addr->sdl_alen);
            break;
        }
#endif
        }
    }

    // free resources
    delete[] buffer;
    close(net);

    return NPT_SUCCESS;
}
Exemplo n.º 29
0
void meta_service::start(const char* data_dir, bool clean_state)
{
    dassert(!_started, "meta service is already started");

    _data_dir = data_dir;
	std::string checkpoint_path = _data_dir + "/checkpoint";
	std::string oplog_path = _data_dir + "/oplog";


    if (clean_state)
    {
        try {
			if (!dsn::utils::filesystem::remove_path(checkpoint_path))
			{
				dassert(false, "Fail to remove file %s.", checkpoint_path.c_str());
			}

			if (!dsn::utils::filesystem::remove_path(oplog_path))
			{
				dassert(false, "Fail to remove file %s.", oplog_path.c_str());
			}
        }
        catch (std::exception& ex)
        {
            ex;
        }
    }
    else
    {
		if (!dsn::utils::filesystem::create_directory(_data_dir))
		{
			dassert(false, "Fail to create directory %s.", _data_dir.c_str());
		}

        if (dsn::utils::filesystem::file_exists(checkpoint_path))
        {
            _state->load(checkpoint_path.c_str());
        }

        if (dsn::utils::filesystem::file_exists(oplog_path))
        {
            replay_log(oplog_path.c_str());
            _state->save(checkpoint_path.c_str());
			if (!dsn::utils::filesystem::remove_path(oplog_path))
			{
				dassert(false, "Fail to remove file %s.", oplog_path.c_str());
			}
        }
    }

    _log = dsn_file_open((_data_dir + "/oplog").c_str(), O_RDWR | O_CREAT, 0666);

    _balancer = new load_balancer(_state);            
    _failure_detector = new meta_server_failure_detector(_state, this);
    
    // TODO: use zookeeper for leader election
    _failure_detector->set_primary(primary_address());

    // make sure the delay is larger than fd.grace to ensure 
    // all machines are in the correct state (assuming connected initially)
    tasking::enqueue(LPC_LBM_START, this, &meta_service::on_load_balance_start, 0, 
        _opts.fd_grace_seconds * 1000);

    auto err = _failure_detector->start(
        _opts.fd_check_interval_seconds,
        _opts.fd_beacon_interval_seconds,
        _opts.fd_lease_seconds,
        _opts.fd_grace_seconds,
        false
        );

    dassert(err == ERR_OK, "FD start failed, err = %s", err.to_string());

    register_rpc_handler(
        RPC_CM_QUERY_NODE_PARTITIONS,
        "RPC_CM_QUERY_NODE_PARTITIONS",
        &meta_service::on_query_configuration_by_node
        );

    register_rpc_handler(
        RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX,
        "RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX",
        &meta_service::on_query_configuration_by_index
        );

    register_rpc_handler(
        RPC_CM_UPDATE_PARTITION_CONFIGURATION,
        "RPC_CM_UPDATE_PARTITION_CONFIGURATION",
        &meta_service::on_update_configuration
        );
}
Exemplo n.º 30
0
void meta_service::start(const char* data_dir, bool clean_state)
{
    _data_dir = data_dir;

    if (clean_state)
    {
        try {
            boost::filesystem::remove(_data_dir + "/checkpoint");
            boost::filesystem::remove(_data_dir + "/oplog");
        }
        catch (std::exception& ex)
        {
            ex;
        }
    }
    else
    {
        if (!boost::filesystem::exists(_data_dir))
        {
            boost::filesystem::create_directory(_data_dir);
        }

        if (boost::filesystem::exists(_data_dir + "/checkpoint"))
        {
            _state->load((_data_dir + "/checkpoint").c_str());
        }

        if (boost::filesystem::exists(_data_dir + "/oplog"))
        {
            replay_log((_data_dir + "/oplog").c_str());
            _state->save((_data_dir + "/checkpoint").c_str());
            boost::filesystem::remove(_data_dir + "/oplog");
        }
    }

    _log = file::open((_data_dir + "/oplog").c_str(), O_RDWR | O_CREAT, 0666);

    _balancer = new load_balancer(_state);            
    _failure_detector = new meta_server_failure_detector(_state);
    
    end_point primary;
    if (_state->get_meta_server_primary(primary) && primary == primary_address())
    {
        _failure_detector->set_primary(true);
    }   
    else
        _failure_detector->set_primary(false);

    register_rpc_handler(RPC_CM_CALL, "RPC_CM_CALL", &meta_service::on_request);

    _balancer_timer = tasking::enqueue(LPC_LBM_RUN, this, &meta_service::on_load_balance_timer, 0, 
        _opts.fd_grace_seconds * 1000 + 1, // delay
        10000
        );

    _failure_detector->start(
        _opts.fd_check_interval_seconds,
        _opts.fd_beacon_interval_seconds,
        _opts.fd_lease_seconds,
        _opts.fd_grace_seconds,
        false
        );
}