예제 #1
0
void replica::send_prepare_message(const dsn_address_t& addr, partition_status status, mutation_ptr& mu, int timeout_milliseconds)
{
    dsn_message_t msg = dsn_msg_create_request(RPC_PREPARE, timeout_milliseconds, gpid_to_hash(get_gpid()));
    replica_configuration rconfig;
    _primary_states.get_replica_config(status, rconfig);

    {
        msg_binary_writer writer(msg);
        marshall(writer, get_gpid());
        marshall(writer, rconfig);
        mu->write_to(writer);
    }

    mu->remote_tasks()[addr] = rpc::call(addr, msg,
                                         this,
                                         std::bind(&replica::on_prepare_reply,
                                                 this,
                                                 std::make_pair(mu, rconfig.status),
                                                 std::placeholders::_1,
                                                 std::placeholders::_2,
                                                 std::placeholders::_3),
                                         gpid_to_hash(get_gpid())
                                        );

    ddebug(
        "%s: mutation %s send_prepare_message to %s:%hu as %s",
        name(), mu->name(),
        addr.name, addr.port,
        enum_to_string(rconfig.status)
    );
}
예제 #2
0
파일: replica_2pc.cpp 프로젝트: Abioy/rDSN
void replica::send_prepare_message(const end_point& addr, partition_status status, mutation_ptr& mu, int timeout_milliseconds)
{
    message_ptr msg = message::create_request(RPC_PREPARE, timeout_milliseconds, gpid_to_hash(get_gpid()));
    marshall(msg, get_gpid());
    
    replica_configuration rconfig;
    _primary_states.get_replica_config(status, rconfig);

    marshall(msg, rconfig);
    mu->write_to(msg);

    dbg_dassert (mu->remote_tasks().find(addr) == mu->remote_tasks().end());

    mu->remote_tasks()[addr] = rpc::call(addr, msg, 
        this,
        std::bind(&replica::on_prepare_reply, 
            this,
            std::make_pair(mu, rconfig.status),
            std::placeholders::_1, 
            std::placeholders::_2, 
            std::placeholders::_3),
        gpid_to_hash(get_gpid())
        );

    ddebug( 
        "%s: mutation %s send_prepare_message to %s:%d as %s", 
        name(), mu->name(),
        addr.name.c_str(), static_cast<int>(addr.port),
        enum_to_string(rconfig.status)
        );
}
예제 #3
0
파일: replica_2pc.cpp 프로젝트: Jupige/rDSN
void replica::send_prepare_message(
    ::dsn::rpc_address addr, 
    partition_status status, 
    mutation_ptr& mu, 
    int timeout_milliseconds,
    int64_t learn_signature)
{
    dsn_message_t msg = dsn_msg_create_request(RPC_PREPARE, timeout_milliseconds, gpid_to_hash(get_gpid()));
    replica_configuration rconfig;
    _primary_states.get_replica_config(status, rconfig, learn_signature);

    {
        rpc_write_stream writer(msg);
        marshall(writer, get_gpid());
        marshall(writer, rconfig);
        mu->write_to(writer);
    }
    
    mu->remote_tasks()[addr] = rpc::call(addr, msg,
        this,
        [=](error_code err, dsn_message_t request, dsn_message_t reply)
        {
            on_prepare_reply(std::make_pair(mu, rconfig.status), err, request, reply);
        },
        gpid_to_hash(get_gpid())
        );

    ddebug( 
        "%s: mutation %s send_prepare_message to %s as %s",  
        name(), mu->name(),
        addr.to_string(),
        enum_to_string(rconfig.status)
        );
}
예제 #4
0
파일: replica.cpp 프로젝트: SunnyGyb/rDSN
void replica::execute_mutation(mutation_ptr& mu)
{
    dassert (nullptr != _app, "");

    int err = ERR_SUCCESS;
    switch (status())
    {
    case PS_INACTIVE:
        if (_app->last_committed_decree() + 1 == mu->data.header.decree)
            err = _app->write_internal(mu, false);
        break;
    case PS_PRIMARY:
    case PS_SECONDARY:
        {
        dassert (_app->last_committed_decree() + 1 == mu->data.header.decree, "");
        bool ack_client = (status() == PS_PRIMARY);
        if (ack_client)
        {
            if (mu->client_request == nullptr)
                ack_client = false;
            else if (mu->client_request->header().from_address.ip == 0)
                ack_client = false;
        }
        err = _app->write_internal(mu, ack_client); 
        }
        break;
    case PS_POTENTIAL_SECONDARY:
        if (LearningSucceeded == _potential_secondary_states.learning_status)
        {
            if (mu->data.header.decree == _app->last_committed_decree() + 1)
            {
                err = _app->write_internal(mu, false); 
            }
            else
            {
                dassert (mu->data.header.decree <= _app->last_committed_decree(), "");
            }
        }
        else
        {
            // drop mutations as learning will catch up
            ddebug("%s: mutation %s skipped coz learing buffer overflow", name(), mu->name());
        }
        break;
    case PS_ERROR:
        break;
    }
    
    ddebug("TwoPhaseCommit, %s: mutation %s committed, err = %x", name(), mu->name(), err);

    if (err != ERR_SUCCESS)
    {
        handle_local_failure(err);
    }
}
예제 #5
0
void replica::on_append_log_completed(mutation_ptr& mu, error_code err, size_t size)
{
    check_hashed_access();

    ddebug( "%s: mutation %s on_append_log_completed, err = %s", name(), mu->name(), err.to_string());

    if (err == ERR_OK)
    {
        mu->set_logged();
    }

    // skip old mutations
    if (mu->data.header.ballot < get_ballot() || status() == PS_INACTIVE)
    {
        return;
    }

    switch (status())
    {
    case PS_PRIMARY:
        if (err == ERR_OK)
        {
            do_possible_commit_on_primary(mu);
        }
        else
        {
            handle_local_failure(err);
        }
        break;
    case PS_SECONDARY:
    case PS_POTENTIAL_SECONDARY:
        if (err != ERR_OK)
        {
            handle_local_failure(err);
        }

        if (!_options.prepare_ack_on_secondary_before_logging_allowed)
        {
            ack_prepare_message(err, mu);
        }
        break;
    case PS_ERROR:
        break;
    default:
        dassert (false, "");
        break;
    }
}
예제 #6
0
파일: mutation.cpp 프로젝트: ykwd/rDSN
void mutation::copy_from(mutation_ptr& old)
{
    data.updates = old->data.updates;
    client_requests = old->client_requests;
    _appro_data_bytes = old->_appro_data_bytes;
    _create_ts_ns = old->_create_ts_ns;

    for (auto& r : client_requests)
    {
        if (r != nullptr)
        {
            dsn_msg_add_ref(r); // release in dctor
        }
    }

    // let's always re-append the mutation to 
    // replication logs as the ballot number
    // is changed, to ensure the invariance:
    // if decree(A) >= decree(B) 
    // then ballot(A) >= ballot(B)
    /*if (old->is_logged())
    {
        set_logged();
        data.header.log_offset = old->data.header.log_offset;
    }
    */

    _prepare_request = old->prepare_msg();
    if (_prepare_request)
    {
        dsn_msg_add_ref(_prepare_request);
    }
}
예제 #7
0
파일: replica_2pc.cpp 프로젝트: Jupige/rDSN
void replica::ack_prepare_message(error_code err, mutation_ptr& mu)
{
    prepare_ack resp;
    resp.gpid = get_gpid();
    resp.err = err;
    resp.ballot = get_ballot();
    resp.decree = mu->data.header.decree;

    // for PS_POTENTIAL_SECONDARY ONLY
    resp.last_committed_decree_in_app = _app->last_committed_decree(); 
    resp.last_committed_decree_in_prepare_list = last_committed_decree();

    dassert(nullptr != mu->prepare_msg(), "");
    reply(mu->prepare_msg(), resp);

    ddebug("%s: mutation %s ack_prepare_message, err = %s", name(), mu->name(), err.to_string());
}
예제 #8
0
void mutation::move_from(mutation_ptr& old)
{
    data.updates = std::move(old->data.updates);
    rpc_code = old->rpc_code;
        
    _client_request = old->client_msg();
    if (_client_request)
    {
        old->_client_request = nullptr;
    }

    _prepare_request = old->prepare_msg();
    if (_prepare_request)
    {
        old->_prepare_request = nullptr;
    }
}
예제 #9
0
파일: replica_2pc.cpp 프로젝트: Abioy/rDSN
void replica::on_append_log_completed(mutation_ptr& mu, uint32_t err, uint32_t size)
{
    check_hashed_access();

    ddebug( "%s: mutation %s on_append_log_completed, err = %u", name(), mu->name(), err);

    if (err == ERR_SUCCESS)
    {
        mu->set_logged();
    }

    // skip old mutations
    if (mu->data.header.ballot < get_ballot() || status() == PS_INACTIVE)
    {
        return;
    }

    switch (status())
    {
    case PS_PRIMARY:
        if (err == ERR_SUCCESS)
        {
            do_possible_commit_on_primary(mu);
        }
        else
        {
            handle_local_failure(err);
        }
        break;
    case PS_SECONDARY:
    case PS_POTENTIAL_SECONDARY:
        if (err != ERR_SUCCESS)
        {
            handle_local_failure(err);
        }
        ack_prepare_message(err, mu);
        break;
    case PS_ERROR:
        break;
    default:
        dassert (false, "");
        break;
    }
}
예제 #10
0
파일: replica_2pc.cpp 프로젝트: Jupige/rDSN
void replica::do_possible_commit_on_primary(mutation_ptr& mu)
{
    dassert (_config.ballot == mu->data.header.ballot, "");
    dassert (PS_PRIMARY == status(), "");

    if (mu->is_ready_for_commit())
    {
        _prepare_list->commit(mu->data.header.decree, COMMIT_ALL_READY);
    }
}
예제 #11
0
void replica::do_possible_commit_on_primary(mutation_ptr& mu)
{
    dassert (_config.ballot == mu->data.header.ballot, "");
    dassert (PS_PRIMARY == status(), "");

    if (mu->is_ready_for_commit(_options.prepare_ack_on_secondary_before_logging_allowed))
    {
        _prepare_list->commit(mu->data.header.decree, false);
    }
}
예제 #12
0
void mutation::copy_from(mutation_ptr& old)
{
    data.updates = old->data.updates;
    rpc_code = old->rpc_code;
    if (old->is_logged())
    {
        set_logged();
        data.header.log_offset = old->data.header.log_offset;
    }
        
    _client_request = old->client_msg();
    if (_client_request)
    {
        dsn_msg_add_ref(_client_request);
    }

    _prepare_request = old->prepare_msg();
    if (_prepare_request)
    {
        dsn_msg_add_ref(_prepare_request);
    }
}
예제 #13
0
error_code replication_app_base::write_internal(mutation_ptr& mu)
{
    dassert (mu->data.header.decree == last_committed_decree() + 1, "");

    if (mu->rpc_code != RPC_REPLICATION_WRITE_EMPTY)
    {
        binary_reader reader(mu->data.updates[0]);
        dsn_message_t resp = (mu->client_msg() ? dsn_msg_create_response(mu->client_msg()) : nullptr);
        dispatch_rpc_call(mu->rpc_code, reader, resp);
    }
    else
    {
        on_empty_write();
    }

    if (_physical_error != 0)
    {
        derror("physical error %d occurs in replication local app %s", _physical_error, data_dir().c_str());
    }

    return _physical_error == 0 ? ERR_OK : ERR_LOCAL_APP_FAILURE;
}
예제 #14
0
error_code replication_app_base::write_internal(mutation_ptr& mu)
{
    dassert (mu->data.header.decree == last_committed_decree() + 1, "");
    dassert(mu->client_requests.size() == mu->data.updates.size()
        && mu->client_requests.size() > 0, 
        "data inconsistency in mutation");

    int count = static_cast<int>(mu->client_requests.size());
    _batch_state = (count == 1 ? BS_NOT_BATCH : BS_BATCH);
    for (int i = 0; i < count; i++)
    {
        if (_batch_state == BS_BATCH && i + 1 == count)
        {
            _batch_state = BS_BATCH_LAST;
        }

        auto& r = mu->client_requests[i];
        if (r.code != RPC_REPLICATION_WRITE_EMPTY)
        {
            dinfo("%s: mutation %s dispatch rpc call: %s",
                  _replica->name(), mu->name(), dsn_task_code_to_string(r.code));
            binary_reader reader(mu->data.updates[i]);
            dsn_message_t resp = (r.req ? dsn_msg_create_response(r.req) : nullptr);

            uint64_t now = dsn_now_ns();
            dispatch_rpc_call(r.code, reader, resp);
            now = dsn_now_ns() - now;

            _app_commit_latency.set(now);
        }
        else
        {
            // empty mutation write
        }

        if (_physical_error != 0)
        {
            derror("%s: physical error %d occurs in replication local app %s",
                   _replica->name(), _physical_error, data_dir().c_str());
            return ERR_LOCAL_APP_FAILURE;
        }
    }

    ++_last_committed_decree;

    _replica->update_commit_statistics(count);
    _app_commit_throughput.add((uint64_t)count);
    _app_commit_decree.increment();

    return ERR_OK;
}
예제 #15
0
void replica::init_prepare(mutation_ptr& mu)
{
    dassert (PS_PRIMARY == status(), "");

    error_code err = ERR_OK;
    uint8_t count = 0;

    if (static_cast<int>(_primary_states.membership.secondaries.size()) + 1 < _options.mutation_2pc_min_replica_count)
    {
        err = ERR_NOT_ENOUGH_MEMBER;
        goto ErrOut;
    }

    mu->data.header.last_committed_decree = last_committed_decree();
    if (mu->data.header.decree == invalid_decree)
    {
        mu->set_id(get_ballot(), _prepare_list->max_decree() + 1);
    }
    else
    {
        mu->set_id(get_ballot(), mu->data.header.decree);
    }

    ddebug("%s: mutation %s init_prepare", name(), mu->name());

    // check bounded staleness
    if (mu->data.header.decree > last_committed_decree() + _options.staleness_for_commit)
    {
        err = ERR_CAPACITY_EXCEEDED;
        goto ErrOut;
    }

    dassert (mu->data.header.decree > last_committed_decree(), "");

    // local prepare
    err = _prepare_list->prepare(mu, PS_PRIMARY);
    if (err != ERR_OK)
    {
        goto ErrOut;
    }

    // remote prepare
    mu->set_prepare_ts();
    mu->set_left_secondary_ack_count((unsigned int)_primary_states.membership.secondaries.size());
    for (auto it = _primary_states.membership.secondaries.begin(); it != _primary_states.membership.secondaries.end(); it++)
    {
        send_prepare_message(*it, PS_SECONDARY, mu, _options.prepare_timeout_ms_for_secondaries);
    }

    count = 0;
    for (auto it = _primary_states.learners.begin(); it != _primary_states.learners.end(); it++)
    {
        if (it->second.prepare_start_decree != invalid_decree && mu->data.header.decree >= it->second.prepare_start_decree)
        {
            send_prepare_message(it->first, PS_POTENTIAL_SECONDARY, mu, _options.prepare_timeout_ms_for_potential_secondaries);
            count++;
        }
    }
    mu->set_left_potential_secondary_ack_count(count);

    // it is possible to do commit here when logging is not required for acking prepare.
    // however, it is only possible when replica count == 1 at this moment in the
    // replication group, and we don't want to do this as it is too fragile now.
    // do_possible_commit_on_primary(mu);

    // local log
    dassert (mu->data.header.log_offset == invalid_offset, "");
    dassert (mu->log_task() == nullptr, "");
    mu->log_task() = _stub->_log->append(mu,
                                         LPC_WRITE_REPLICATION_LOG,
                                         this,
                                         std::bind(&replica::on_append_log_completed, this, mu,
                                                 std::placeholders::_1,
                                                 std::placeholders::_2),
                                         gpid_to_hash(get_gpid())
                                        );

    dassert(nullptr != mu->log_task(), "");
    return;

ErrOut:
    response_client_message(mu->client_msg(), err);
    return;
}
예제 #16
0
파일: replica_2pc.cpp 프로젝트: Abioy/rDSN
void replica::init_prepare(mutation_ptr& mu)
{
    dassert (PS_PRIMARY == status(), "");

    error_code err = ERR_SUCCESS;
    uint8_t count = 0;

    if (static_cast<int>(_primary_states.membership.secondaries.size()) + 1 < _options.mutation_2pc_min_replica_count)
    {
        err = ERR_NOT_ENOUGH_MEMBER;
        goto ErrOut;
    }
            
    mu->data.header.last_committed_decree = last_committed_decree();
    if (mu->data.header.decree == invalid_decree)
    {
        mu->set_id(get_ballot(), _prepare_list->max_decree() + 1);
    }
    else
    {
        mu->set_id(get_ballot(), mu->data.header.decree);
    }

    if (mu->data.header.decree > _prepare_list->max_decree() && _prepare_list->count() >= _options.staleness_for_commit)
    {
        err = ERR_CAPACITY_EXCEEDED;
        goto ErrOut;
    }
 
    dassert (mu->data.header.decree > last_committed_decree(), "");

    // local prepare without log
    err = _prepare_list->prepare(mu, PS_PRIMARY);
    if (err != ERR_SUCCESS)
    {
        goto ErrOut;
    }
        
    ddebug("%s: mutation %s init_prepare", name(), mu->name());

    //
    // TODO: bounded staleness on secondaries
    //
    dassert (mu->data.header.decree <= last_committed_decree() + _options.staleness_for_commit, "");
    
    // remote prepare
    dassert (mu->remote_tasks().size() == 0, "");
    mu->set_left_secondary_ack_count((unsigned int)_primary_states.membership.secondaries.size());
    for (auto it = _primary_states.membership.secondaries.begin(); it != _primary_states.membership.secondaries.end(); it++)
    {
        send_prepare_message(*it, PS_SECONDARY, mu, _options.prepare_timeout_ms_for_secondaries);
    }

    count = 0;
    for (auto it = _primary_states.learners.begin(); it != _primary_states.learners.end(); it++)
    {
        if (it->second.prepare_start_decree != invalid_decree && mu->data.header.decree >= it->second.prepare_start_decree)
        {
            send_prepare_message(it->first, PS_POTENTIAL_SECONDARY, mu, _options.prepare_timeout_ms_for_potential_secondaries);
            count++;
        }
    }    
    mu->set_left_potential_secondary_ack_count(count);

    // local log
    dassert (mu->data.header.log_offset == invalid_offset, "");
    dassert (mu->log_task() == nullptr, "");
    mu->log_task() = _stub->_log->append(mu,
        LPC_WRITE_REPLICATION_LOG,
        this,
        std::bind(&replica::on_append_log_completed, this, mu, 
            std::placeholders::_1, 
            std::placeholders::_2),
        gpid_to_hash(get_gpid())
        );

    if (nullptr == mu->log_task())
    {
        err = ERR_FILE_OPERATION_FAILED;
        handle_local_failure(err);
        goto ErrOut;
    }

    return;

ErrOut:
    response_client_message(mu->client_request, err);
    return;
}
예제 #17
0
파일: replica_2pc.cpp 프로젝트: Jupige/rDSN
void replica::init_prepare(mutation_ptr& mu)
{
    dassert (PS_PRIMARY == status(), "");

    error_code err = ERR_OK;
    uint8_t count = 0;
            
    mu->data.header.last_committed_decree = last_committed_decree();
    if (mu->data.header.decree == invalid_decree)
    {
        mu->set_id(get_ballot(), _prepare_list->max_decree() + 1);
    }
    else
    {
        mu->set_id(get_ballot(), mu->data.header.decree);
    }
    
    dinfo("%s: mutation %s init_prepare, mutation_tid=%" PRIu64, name(), mu->name(), mu->tid());

    // check bounded staleness
    if (mu->data.header.decree > last_committed_decree() + _options->staleness_for_commit)
    {
        err = ERR_CAPACITY_EXCEEDED;
        goto ErrOut;
    }
 
    dassert (mu->data.header.decree > last_committed_decree(), "");

    // local prepare
    err = _prepare_list->prepare(mu, PS_PRIMARY);
    if (err != ERR_OK)
    {
        goto ErrOut;
    }
    
    // remote prepare
    mu->set_prepare_ts();
    mu->set_left_secondary_ack_count((unsigned int)_primary_states.membership.secondaries.size());
    for (auto it = _primary_states.membership.secondaries.begin(); it != _primary_states.membership.secondaries.end(); ++it)
    {
        send_prepare_message(*it, PS_SECONDARY, mu, _options->prepare_timeout_ms_for_secondaries);
    }

    count = 0;
    for (auto it = _primary_states.learners.begin(); it != _primary_states.learners.end(); ++it)
    {
        if (it->second.prepare_start_decree != invalid_decree && mu->data.header.decree >= it->second.prepare_start_decree)
        {
            send_prepare_message(it->first, PS_POTENTIAL_SECONDARY, mu, _options->prepare_timeout_ms_for_potential_secondaries, it->second.signature);
            count++;
        }
    }    
    mu->set_left_potential_secondary_ack_count(count);

    if (mu->is_logged())
    {
        do_possible_commit_on_primary(mu);
    }
    else
    {
        dassert(mu->data.header.log_offset == invalid_offset, "");
        dassert(mu->log_task() == nullptr, "");

        mu->log_task() = _stub->_log->append(mu,
            LPC_WRITE_REPLICATION_LOG,
            this,
            std::bind(&replica::on_append_log_completed, this, mu,
                      std::placeholders::_1,
                      std::placeholders::_2),
                      gpid_to_hash(get_gpid())
            );

        dassert(nullptr != mu->log_task(), "");
    }
    return;

ErrOut:
    for (auto& r : mu->client_requests)
    {
        response_client_message(r, err);
    }
    return;
}
예제 #18
0
파일: replica_2pc.cpp 프로젝트: fxfslm/rDSN
void replica::on_append_log_completed(mutation_ptr& mu, error_code err, size_t size)
{
    check_hashed_access();

    ddebug( "%s: mutation %s on_append_log_completed, err = %s", name(), mu->name(), err.to_string());

    if (err == ERR_OK)
    {
        mu->set_logged();
    }

    // skip old mutations
    if (mu->data.header.ballot < get_ballot() || status() == PS_INACTIVE)
    {
        return;
    }

    switch (status())
    {
    case PS_PRIMARY:
        if (err == ERR_OK)
        {
            do_possible_commit_on_primary(mu);
        }
        else
        {
            handle_local_failure(err);
        }
        break;
    case PS_SECONDARY:
    case PS_POTENTIAL_SECONDARY:
        if (err != ERR_OK)
        {
            handle_local_failure(err);
        }

        ack_prepare_message(err, mu);
        break;
    case PS_ERROR:
        break;
    default:
        dassert (false, "");
        break;
    }

    // mutation log failure, propagted to all replicas
    if (err != ERR_OK)
    {
        _stub->handle_log_failure(err);
    }

    // write local private log if necessary
    else if (_private_log && status() != PS_ERROR)
    {
        _private_log->append(mu,
                             LPC_WRITE_REPLICATION_LOG,
                             this,
                             [this](error_code err, size_t size)
        {
            if (err != ERR_OK)
            {
                handle_local_failure(err);
            }
        },
        gpid_to_hash(get_gpid())
                            );
    }
}
예제 #19
0
파일: replica_2pc.cpp 프로젝트: Jupige/rDSN
void replica::on_append_log_completed(mutation_ptr& mu, error_code err, size_t size)
{
    check_hashed_access();

    dinfo("%s: append shared log completed for mutation %s, size = %u, err = %s",
          name(), mu->name(), size, err.to_string());

    if (err == ERR_OK)
    {
        mu->set_logged();
    }
    else
    {
        derror("%s: append shared log failed for mutation %s, err = %s",
               name(), mu->name(), err.to_string());
    }

    // skip old mutations
    if (mu->data.header.ballot >= get_ballot() && status() != PS_INACTIVE)
    {
        switch (status())
        {
        case PS_PRIMARY:
            if (err == ERR_OK)
            {
                do_possible_commit_on_primary(mu);
            }
            else
            {
                handle_local_failure(err);
            }
            break;
        case PS_SECONDARY:
        case PS_POTENTIAL_SECONDARY:
            if (err != ERR_OK)
            {
                handle_local_failure(err);
            }
            // always ack
            ack_prepare_message(err, mu);
            break;
        case PS_ERROR:
            break;
        default:
            dassert(false, "");
            break;
        }
    }

    if (err != ERR_OK)
    {
        // mutation log failure, propagate to all replicas
        _stub->handle_log_failure(err);
    }
   
    // write local private log if necessary
    if (err == ERR_OK && _private_log && status() != PS_ERROR)
    {
        _private_log->append(mu,
            LPC_WRITE_REPLICATION_LOG,
            nullptr,
            [this, mu](error_code err, size_t size)
            {
                //
                // DO NOT CHANGE THIS CALLBACK HERE UNLESS
                // YOU FULLY UNDERSTAND WHAT WE DO HERE
                // 
                // AS PRIVATE LOG IS BATCHED, WE ONLY EXECUTE
                // THE FIRST CALLBACK IF THERE IS FAILURE TO
                // NOTIFY FAILURE. ALL OTHER TASKS ARE SIMPLY
                // CANCELLED!!!
                //
                // TODO: we do not need so many callbacks
                //

                dinfo("%s: append private log completed for mutation %s, size = %u, err = %s",
                      name(), mu->name(), size, err.to_string());

                if (err != ERR_OK)
                {
                    derror("%s: append private log failed for mutation %s, err = %s",
                           name(), mu->name(), err.to_string());
                    handle_local_failure(err);
                }
            },
            gpid_to_hash(get_gpid())
            );
    }
}
예제 #20
0
void replica::on_append_log_completed(mutation_ptr& mu, error_code err, size_t size)
{
    check_hashed_access();

    dinfo("%s: append shared log completed for mutation %s, size = %u, err = %s",
          name(), mu->name(), size, err.to_string());

    if (err == ERR_OK)
    {
        mu->set_logged();
    }
    else
    {
        derror("%s: append shared log failed for mutation %s, err = %s",
               name(), mu->name(), err.to_string());
    }

    // skip old mutations
    if (mu->data.header.ballot >= get_ballot() && status() != partition_status::PS_INACTIVE)
    {
        switch (status())
        {
        case partition_status::PS_PRIMARY:
            if (err == ERR_OK)
            {
                do_possible_commit_on_primary(mu);
            }
            else
            {
                handle_local_failure(err);
            }
            break;
        case partition_status::PS_SECONDARY:
        case partition_status::PS_POTENTIAL_SECONDARY:
            if (err != ERR_OK)
            {
                handle_local_failure(err);
            }
            // always ack
            ack_prepare_message(err, mu);
            break;
        case partition_status::PS_ERROR:
            break;
        default:
            dassert(false, "");
            break;
        }
    }

    if (err != ERR_OK)
    {
        // mutation log failure, propagate to all replicas
        _stub->handle_log_failure(err);
    }
   
    // write local private log if necessary
    if (err == ERR_OK && _private_log && status() != partition_status::PS_ERROR)
    {
        _private_log->append(mu,
            LPC_WRITE_REPLICATION_LOG,
            nullptr,
            nullptr,
            gpid_to_hash(get_gpid())
            );
    }
}
예제 #21
0
파일: replica.cpp 프로젝트: ykwd/rDSN
void replica::execute_mutation(mutation_ptr& mu)
{
    dinfo("%s: execute mutation %s: request_count = %u",
        name(), 
        mu->name(), 
        static_cast<int>(mu->client_requests.size())
        );

    error_code err = ERR_OK;
    decree d = mu->data.header.decree;

    switch (status())
    {
    case partition_status::PS_INACTIVE:
        if (_app->last_committed_decree() + 1 == d)
        {
            err = _app->write_internal(mu);
        }
        else
        {
            ddebug(
                "%s: mutation %s commit to %s skipped, app.last_committed_decree = %" PRId64,
                name(), mu->name(),
                enum_to_string(status()),
                _app->last_committed_decree()
                );
        }
        break;
    case partition_status::PS_PRIMARY:
        {
            check_state_completeness();
            dassert(_app->last_committed_decree() + 1 == d, "");
            err = _app->write_internal(mu);
        }
        break;

    case partition_status::PS_SECONDARY:
        if (!_secondary_states.checkpoint_is_running)
        {
            check_state_completeness();
            dassert (_app->last_committed_decree() + 1 == d, "");
            err = _app->write_internal(mu);
        }
        else
        {
            ddebug(
                "%s: mutation %s commit to %s skipped, app.last_committed_decree = %" PRId64,
                name(), mu->name(),
                enum_to_string(status()),
                _app->last_committed_decree()
                );

            // make sure private log saves the state
            // catch-up will be done later after checkpoint task is fininished
            dassert(_private_log != nullptr, "");          
        }
        break;
    case partition_status::PS_POTENTIAL_SECONDARY:
        if (_potential_secondary_states.learning_status == learner_status::LearningSucceeded ||
            _potential_secondary_states.learning_status == learner_status::LearningWithPrepareTransient)
        {
            dassert(_app->last_committed_decree() + 1 == d, "");
            err = _app->write_internal(mu);
        }
        else
        {
            // prepare also happens with learner_status::LearningWithPrepare, in this case
            // make sure private log saves the state,
            // catch-up will be done later after the checkpoint task is finished

            ddebug(
                "%s: mutation %s commit to %s skipped, app.last_committed_decree = %" PRId64,
                name(), mu->name(),
                enum_to_string(status()),
                _app->last_committed_decree()
                );
        }
        break;
    case partition_status::PS_ERROR:
        break;
    }
    
    ddebug("TwoPhaseCommit, %s: mutation %s committed, err = %s", name(), mu->name(), err.to_string());

    _counter_commit_latency.set(dsn_now_ns() - mu->create_ts_ns());

    if (err != ERR_OK)
    {
        handle_local_failure(err);
    }

    if (status() == partition_status::PS_PRIMARY)
    {
        mutation_ptr next = _primary_states.write_queue.check_possible_work(
            static_cast<int>(_prepare_list->max_decree() - d)
            );

        if (next)
        {
            init_prepare(next);
        }
    }
}