static ham_status_t _remote_fun_txn_begin(ham_env_t *env, ham_db_t *db, ham_txn_t **txn, ham_u32_t flags) { ham_status_t st; proto_wrapper_t *request, *reply; request=proto_init_txn_begin_request(db_get_remote_handle(db), flags); st=_perform_request(env, env_get_curl(env), request, &reply); proto_delete(request); if (st) { if (reply) proto_delete(reply); return (st); } ham_assert(reply!=0, ("")); ham_assert(proto_has_txn_begin_reply(reply), ("")); st=proto_txn_begin_reply_get_status(reply); if (st) { proto_delete(reply); return (st); } *txn=(ham_txn_t *)allocator_alloc(env_get_allocator(env), sizeof(ham_txn_t)); if (!(*txn)) return (HAM_OUT_OF_MEMORY); st=txn_begin(*txn, env, flags); if (st) { allocator_free(env_get_allocator(env), *txn); *txn=0; } else { txn_set_remote_handle(*txn, proto_txn_begin_reply_get_txn_handle(reply)); } proto_delete(reply); return (st); }
bool engine::create(bool numeric) { if (numeric) m_config->numeric=true; for (int i=0; i<2; i++) { ham_status_t st=m_db[i]->create(); if (st) { TRACE(("db[%d]: create failed w/ status %d\n", i, st)); return (false); } } if (m_config->txn_group) { return (txn_begin()); } return (true); }
static int bdb_la_storage_stat(la_storage_object_store *store, la_storage_stat_t *stat) { DB_TXN *txn; DB_BTREE_STAT *mainStat, *seqStat; if (txn_begin(store->env->env, NULL, &txn, DB_READ_COMMITTED | DB_TXN_NOSYNC) != 0) return -1; if (store->db->stat(store->db, txn, &mainStat, DB_FAST_STAT | DB_READ_COMMITTED) != 0) { txn_abort(txn); return -1; } if (store->db->stat(store->seq_db, txn, &seqStat, DB_FAST_STAT | DB_READ_COMMITTED) != 0) { txn_abort(txn); return -1; } stat->numkeys = mainStat->bt_nkeys; stat->size = ((uint64_t) mainStat->bt_pagecnt * (uint64_t) mainStat->bt_pagesize) + ((uint64_t) seqStat->bt_pagecnt * (uint64_t) seqStat->bt_pagesize); free(mainStat); free(seqStat); txn_commit(txn, DB_TXN_NOSYNC); return 0; }
/* * This is a generic message handling loop that is used both by the * master to accept messages from a client and vice versa. */ static void * msg_mng_loop(void *args) { msg_mng_loop_args *mma; int eid; int ret = 0, retr = 0; /*Number of connection retries*/ int fd, m_index; /*Index in machtab list*/ char *buffer, * cmd, * cookie, * metric, *cmd_t; struct node_struct * nd; long long ballot = -1, instance_proposed = -1, round = -1; char mes[BUF_SIZE], log_rec[BUF_SIZE]; char id[5]; pthread_t cnt_thr; nd = &node; mma = (msg_mng_loop_args *) args; fd = mma->fd; eid = mma->eid; m_index = mma ->m_index; buffer = (char *) malloc(sizeof (char) * MTR_BUF); if (buffer == NULL) { pax_log(LOG_ERR,"msg_mng_loop: malloc: %s\n", strerror(errno)); exit(-1); } if (_PAXOS_DEBUG>2)pax_log(LOG_DEBUG,"Message loop for %d\n", m_index); for (ret = 0; ret == 0;) { memset(buffer, 0, MTR_BUF); if (_PAXOS_DEBUG)pax_log(LOG_DEBUG,"Ready for next mesg\n"); ret = get_next_message(fd, buffer, BTRUE); if (_PAXOS_DEBUG>2)pax_log(LOG_DEBUG,"Get next message ret %d\n", ret); if (ret == -1 || retr > NRETR) {/*Connection lost*/ if(fd)close(fd); if (_PAXOS_DEBUG) pax_log(LOG_DEBUG,"Connection with %d lost.\n", nd->machtab[eid].id); if ((ret = machtab_rem(&node, eid, 1)) != 0) break; if (_PAXOS_DEBUG>2) machtab_print(); sleep(rand()%3); /*Triing to reestablish the connection*/ if ((ret = pthread_create(&cnt_thr, NULL, contact_group, NULL)) != 0) { pax_log(LOG_ERR,"can't create connection thread: %s\n", strerror(errno)); goto err; } if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } if(m_index == nd->updater_id){ nd->updating = BFALSE; } if (nd->machtab[m_index].id == nd->master_id || ((nd->current)) < QUORUM(nd)) { pax_log(LOG_ERR, "Connection with master or majority lost.\n"); nd->master_id = -1; if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } sleep(1); /*Sleep some time to wait for others to notice the same event*/ election(nd); } else if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } break; } if (ret == -2) {/*Connection timeouted*/ if (retr <= NRETR) ret = send_mes("PNG;", fd); /*Ping message*/ else { pax_log(LOG_ERR, "Ping retried %d times. Node probably dead, closing connection.\n", NRETR); goto err; } retr++; continue; } if (buffer != NULL || strlen(buffer) != 0) { strncpy(log_rec, buffer, BUF_SIZE); if (_PAXOS_DEBUG>2) pax_log(LOG_DEBUG,"Message %s received %d\n", buffer, nd->machtab[eid].id); if ((cmd = strtok_r(buffer, ";", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: Command not found\n"); continue; } if (strncmp(cmd, "SHU", 3) == 0) { if(_PAXOS_DEBUG)pax_log(LOG_DEBUG, "Shutting down\n"); exit(0); } /*Upon log - the remote node asked for log*/ if (strncmp(cmd, "LOG", 3) == 0) { if ((cmd_t = strtok_r(cmd, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: LOG: Command not found:\n"); continue; } if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } if(nd->non_voting==BTRUE){ continue; } if (update_remote_node(m_index, node)==-1) { nd->master_id = -1; if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } pax_log(LOG_DEBUG>2, "Updating remote node failed, remote node connection lost, election starting\n"); election(nd); }else if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } pax_log(LOG_DEBUG>2, "Updating started\n"); continue; } /*Upon outdated message*/ if (strncmp(cmd, "OUT", 3) == 0) { if ((cmd_t = strtok_r(cmd, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: OUT: Command not found:\n"); continue; } if (strncmp(cookie, "BEG", 3) == 0) {/*The transfer started*/ nd->updater_id = m_index; clear_logs(); lock_snapshotting(); pax_log(LOG_ERR, "Logs cleared:\n"); } else if (strncmp(cookie, "SSM", 3) == 0) { if ((cmd = strtok_r(cookie, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: OUT: Command not found:\n"); continue; } write_snapshot_mark(cookie); } else if (strncmp(cookie, "SST", 3) == 0) {/*Snapshot*/ char * met_name; if ((cmd = strtok_r(cookie, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: OUT: Command not found:\n"); continue; } if ((met_name = strtok_r(cookie, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: OUT: Command not found:\n"); continue; } write_metric_snap(met_name, cookie); }else if (strncmp(cookie, "FIN", 3) == 0) { /*The transfer finnished*/ int rt = 0; if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } rt = write_log_out_hash(); rt+= out_hash(); if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } rt += process_logs(&node); unlock_snapshotting(); if (rt==0) { if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } if(_PAXOS_DEBUG) pax_log(LOG_DEBUG, "Voting allowed\n"); nd->updating = BFALSE; nd->non_voting = BFALSE; /*The node can vote again*/ if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } }else { if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } if(_PAXOS_DEBUG) pax_log(LOG_DEBUG, "Wrong update\n"); send_mes("LOG;", fd); nd->updater_id = m_index; nd->updating = BTRUE; nd->non_voting = BTRUE; /*The node can vote again*/ if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } } } else write_log_out_record(cookie); continue; } /*UPON NextBallot(ballot, instance proposed)*/ /*NXB:%lld:%lld;*/ if (strncmp(cmd, "NXB", 3) == 0) { if ((cmd_t = strtok_r(cmd, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: NXB: Command not found:\n"); continue; } if ((cmd_t = strtok_r(cookie, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: NXB: Command not found:\n"); continue; } ballot = atoll(cmd_t); if ((cmd_t = strtok_r(cookie, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: NXB: Command not found:\n"); continue; } instance_proposed = atoll(cmd_t); round = atoll(cookie); sprintf(mes, "%lld:%lld", ballot, paxos.last_instance); if (ballot >= paxos.nextBallot) { paxos.nextBallot = ballot; if (instance_proposed == (paxos.last_instance + 1) && round == (paxos.previous_round+1)) { char * ack_mes; ack_mes = malloc((4 + sizeof (char)) * strlen(mes)); if (!ack_mes) { pax_log(LOG_ERR,"NXB response message creation: %s\n", strerror(errno)); exit(EX_OSERR); } sprintf(ack_mes, "N_A:%s;", mes); /*Acknowledge the message*/ if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } if(!nd->non_voting) { send_mes(ack_mes, fd); } if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } free(ack_mes); } else if (instance_proposed <= paxos.last_instance) { /*Remote node is outdated*/ char * nack_mes; nack_mes = malloc((4 + sizeof (char)) * strlen(mes)); if (!nack_mes) { pax_log(LOG_ERR,"NXB response message creation: %s\n", strerror(errno)); exit(EX_OSERR); } if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } sprintf(nack_mes, "N_N:%s;", mes); /*Nack the message*/ if(!nd->non_voting) { send_mes(nack_mes, fd); } if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } free(nack_mes); } else if(instance_proposed > (paxos.last_instance + 1) || round > (paxos.previous_round+1)){ /*Local node is outdated*/ /*char * ack_mes;*/ if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } nd->non_voting = BTRUE; /*The local node cannot vote*/ if (!nd->updating) { if (_PAXOS_DEBUG) pax_log(LOG_DEBUG, "NXB\n"); send_mes("LOG;", fd); nd->updater_id = m_index; nd->updating = BTRUE; } if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } } } else { char * nack_mes; nack_mes = malloc((4 + sizeof (char)) * strlen(mes)); if (!nack_mes) { pax_log(LOG_ERR,"NXB response message creation: %s\n", strerror(errno)); exit(EX_OSERR); } if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } sprintf(nack_mes, "N_N:%s;", mes); /*Nack the message*/ if(!nd->non_voting) { send_mes(nack_mes, fd); } if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } free(nack_mes); } continue; } /*UPON BeginBallot(ballot, instance,round, previous_instances, metric)*/ if (strncmp(cmd, "BEB", 3) == 0) { if ((cmd_t = strtok_r(cmd, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: BEB: Command not found:\n"); continue; } if ((cmd_t = strtok_r(cookie, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: BEB: Command not found:\n"); continue; } ballot = atoll(cmd_t); if ((cmd_t = strtok_r(cookie, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: BEB: Command not found:\n"); continue; } instance_proposed = atoll(cmd_t); if ((cmd_t = strtok_r(cookie, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: BEB: Command not found:\n"); continue; } round = atoll(cmd_t); metric = strdup(cookie); sprintf(mes, "%lld:%lld:%d:", instance_proposed, ballot, m_index); if (ballot >= paxos.nextBallot) { if ((instance_proposed >= paxos.last_instance && round > paxos.previous_round)) { if (instance_proposed > (paxos.last_instance + 1) || round > (paxos.previous_round + 1)) { /*Local node is out-dated*/ if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } nd->non_voting = BTRUE; /*Local node cannot vote*/ if(_PAXOS_DEBUG) pax_log(LOG_DEBUG, "BEB received - voting forbidden\n"); if (!nd->updating) { send_mes("LOG;", fd); nd->updater_id = m_index; nd->updating = BTRUE; } if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } } if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } write_log_record(log_rec, BFALSE, BFALSE); /*Begin transaction*/ if (txn_begin(nd->machtab[m_index].id - 1, instance_proposed, round, metric) != 0) { /*In the case of failure*/ sprintf(mes, "B_N:%lld:%lld:%lld:%d;", paxos.last_instance, paxos.previous_round, paxos.nextBallot, m_index); /*Nack the message*/ send_mes(mes, fd); if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } continue; } pax_log(LOG_DEBUG>2, "BEB txn done\n"); if(!nd->non_voting) { sprintf(mes, "B_A:%lld:0:%lld:%d;", instance_proposed, ballot, m_index); /*Acknowledge the message*/ send_mes(mes, fd); } if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } } } else { /*remote node is outdated*/ /*Nack the message*/ if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } if(!nd->non_voting) { sprintf(mes, "B_N:%lld:%lld:%lld:%d;", paxos.last_instance, paxos.previous_round, paxos.nextBallot, m_index); send_mes(mes, fd); } if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } } continue; } /*Ping message*/ if (strncmp(cmd, "PNG", 3) == 0) { retr = 0; } //UPON Success(round , instance, previous_instances) //SUC:%lld:%lld:%s; if (strncmp(cmd, "SUC", 3) == 0) { if ((cmd_t = strtok_r(cmd, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: SUC: Command not found:\n"); continue; } if ((cmd_t = strtok_r(cookie, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: SUC: Command not found:\n"); continue; } round = atoll(cmd_t); if ((cmd_t = strtok_r(cookie, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: SUC: Command not found:\n"); continue; } instance_proposed = atoll(cmd_t); if (instance_proposed >= paxos.last_instance && round > paxos.previous_round) { char mtc[MTR_BUF]; if (instance_proposed > (paxos.last_instance + 1) || round > (paxos.previous_round + 1)) { /*Local node is out-dated*/ if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } if(_PAXOS_DEBUG>2) pax_log(LOG_DEBUG, "SUC received - voting forbidden\n"); nd->non_voting = BTRUE; /*Local node cannot vote*/ if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } } if (pthread_mutex_lock(&nd->mtmutex) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } paxos.last_instance = instance_proposed; paxos.previous_round = round; nd->master_id = nd->machtab[m_index].id; nd->paxos_state = 0; write_log_record(log_rec, BFALSE, BTRUE); if (txn_commit( nd->machtab[m_index].id - 1, instance_proposed, round) == NULL) { pax_log(LOG_ERR, "Local value cannot be changed to requested value. (null) value commited.\n"); pax_log(LOG_ERR, "Maybe something lost, voting forbidden.\n"); if(_PAXOS_DEBUG>2) pax_log(LOG_DEBUG, "SUC received - voting forbidden\n"); nd->non_voting = BTRUE; /*Local node cannot vote*/ } else { strcpy(mtc, txn_commit( nd->machtab[m_index].id - 1, instance_proposed, round)); if (strncmp(mtc, "NA", 2) != 0) {/*If not voting round*/ if (strncmp(mtc, "REM:", 4) == 0) {/*If not removing*/ if (memory_remove_local(mtc) != 0) { pax_log(LOG_ERR, "Local value cannot be changed to requested value.\n"); } } else if (memory_update_local(mtc) != 0) { pax_log(LOG_ERR, "Local metric cannot be removed.\n"); if (_PAXOS_DEBUG>2) pax_log(LOG_DEBUG,"Value updated %s\n", mtc); } } if(_PAXOS_DEBUG>2)print_db_state(); if (nd->non_voting) if (nd->master_id == nd->machtab[m_index].id && !nd->updating) { send_mes("LOG;", fd); nd->updater_id = m_index; nd->updating = BTRUE; } if (_PAXOS_DEBUG>1) pax_log(LOG_DEBUG,"Commited %s\n", txn_commit(nd->machtab[m_index].id - 1, instance_proposed, round)); } if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } }else{ if (_PAXOS_DEBUG>1) pax_log(LOG_DEBUG,"SUC ignoring. Should be inst: %lld >= %lld, round: %lld > %lld\n", instance_proposed, paxos.last_instance, round, paxos.previous_round); } if (_PAXOS_DEBUG>1) pax_log(LOG_DEBUG,"New master is %d\n", nd->master_id); } /*B_A, B_N, N_A, N_N*/ /*BEB acknowlegdes and NACKs*/ if ((strncmp(cmd, "B_A", 3) == 0) || (strncmp(cmd, "B_N", 3) == 0)){ if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } if(nd->paxos_state != 4) { if(_PAXOS_DEBUG>2) pax_log(LOG_DEBUG, "Not allowed BA/BN\n"); if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } continue; } if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } sprintf(mes, "%s:%d;", cmd, m_index); /*Inform the initiate consensus thread*/ write(paxpipe[m_index][1], mes, strlen(mes) * sizeof (char)); } if ((strncmp(cmd, "N_A", 3) == 0) || (strncmp(cmd, "N_N", 3) == 0)) { if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } if(nd->paxos_state != 2) { if(_PAXOS_DEBUG>2) pax_log(LOG_DEBUG, "Not allowed NA/NN, state %d\n", nd->paxos_state); if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } continue; } if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } sprintf(mes, "%s:%d;", cmd, m_index); /*Inform the initiate consensus thread*/ write(paxpipe[m_index][1], mes, strlen(mes) * sizeof (char)); } /*Who is the master?*/ if (strncmp(cmd, "WIM", 3) == 0) { if (pthread_mutex_lock(&nd->mtmutex) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } if (nd->machtab[m_index].id == nd->master_id) { nd->master_id = -1; nd->updating = BFALSE; } sprintf(id, "%d", node.master_id); if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } sprintf(mes, "MIS:%s:%lld:%lld;", id, paxos.last_instance, paxos.previous_round); send_mes(mes, fd); } /*master is id*/ /*MIS:id:inst:round;*/ if (strncmp(cmd, "MIS", 3) == 0) { long long rd, inst; if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } if(nd->paxos_state != 1) { if(_PAXOS_DEBUG>2) pax_log(LOG_DEBUG, "Not allowed MIS\n"); if (_PAXOS_DEBUG>2) pax_log(LOG_DEBUG,"State is %d.\n", (int) nd->paxos_state); if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } continue; } if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } if ((cmd = strtok_r(cmd, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: MIS: Command not found:\n"); continue; } if ((cmd = strtok_r(cookie, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: MIS: Command not found:\n"); continue; } sprintf(mes, "ID:%d;", atoi(cmd)); if ((cmd = strtok_r(cookie, ":", &cookie)) == NULL) { pax_log(LOG_ERR, "Receiver: MIS: Command not found:\n"); continue; } inst = atoll(cmd); rd = atoll(cookie); if ((inst > (paxos.last_instance) && rd > (paxos.previous_round)) || (inst == (paxos.last_instance) && rd > (paxos.previous_round))) { /*Local node is out-dated*/ if ((ret = pthread_mutex_lock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't lock mutex\n"); exit(EX_OSERR); } if(_PAXOS_DEBUG>2) pax_log(LOG_DEBUG, "MIS - voting forbidden\n"); nd->non_voting = BTRUE; /*Local node cannot vote*/ if (!nd->updating) { send_mes("LOG;", fd); nd->updater_id = m_index; nd->updating = BTRUE; } if ((ret = pthread_mutex_unlock(&nd->mtmutex)) != 0) { pax_log(LOG_ERR, "can't unlock mutex\n"); exit(EX_OSERR); } } /*Inform ask_for_master*/ write(wimpipe[m_index][1], mes, strlen(mes) * sizeof (char)); } } } err: if (_PAXOS_DEBUG>1) pax_log(LOG_DEBUG,"Connection thread exiting %d\n", m_index); if (buffer != NULL) free(buffer); free(mma); return ((void *) (uintptr_t) ret); }
int b_txn(int argc, char *argv[]) { extern char *optarg; extern int optind; DB_ENV *dbenv; DB_TXN *txn; int tabort, ch, i, count; count = 1000; tabort = 0; while ((ch = getopt(argc, argv, "ac:")) != EOF) switch (ch) { case 'a': tabort = 1; break; case 'c': count = atoi(optarg); break; case '?': default: return (usage()); } argc -= optind; argv += optind; if (argc != 0) return (usage()); /* Create the environment. */ DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0); dbenv->set_errfile(dbenv, stderr); #if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR < 1 DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, NULL, DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_PRIVATE, 0666) == 0); #else DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_PRIVATE, 0666) == 0); #endif /* Start and commit/abort a transaction count times. */ TIMER_START; if (tabort) for (i = 0; i < count; ++i) { #if DB_VERSION_MAJOR < 4 DB_BENCH_ASSERT(txn_begin(dbenv, NULL, &txn, 0) == 0); DB_BENCH_ASSERT(txn_abort(txn) == 0); #else DB_BENCH_ASSERT( dbenv->txn_begin(dbenv, NULL, &txn, 0) == 0); DB_BENCH_ASSERT(txn->abort(txn) == 0); #endif } else for (i = 0; i < count; ++i) { #if DB_VERSION_MAJOR < 4 DB_BENCH_ASSERT(txn_begin(dbenv, NULL, &txn, 0) == 0); DB_BENCH_ASSERT(txn_commit(txn, 0) == 0); #else DB_BENCH_ASSERT( dbenv->txn_begin(dbenv, NULL, &txn, 0) == 0); DB_BENCH_ASSERT(txn->commit(txn, 0) == 0); #endif } TIMER_STOP; printf("# %d empty transaction start/%s pairs\n", count, tabort ? "abort" : "commit"); TIMER_DISPLAY(count); DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0); return (0); }
int b_recover(int argc, char *argv[]) { extern char *optarg; extern int optind; DB *dbp; DBT key, data; DB_ENV *dbenv; DB_TXN *txn; u_int32_t cachesize; int ch, i, count; /* * Recover was too slow before release 4.0 that it's not worth * running the test. */ #if DB_VERSION_MAJOR < 4 return (0); #endif cachesize = MEGABYTE; count = 1000; while ((ch = getopt(argc, argv, "C:c:")) != EOF) switch (ch) { case 'C': cachesize = (u_int32_t)atoi(optarg); break; case 'c': count = atoi(optarg); break; case '?': default: return (usage()); } argc -= optind; argv += optind; if (argc != 0) return (usage()); /* Create the environment. */ DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0); dbenv->set_errfile(dbenv, stderr); DB_BENCH_ASSERT(dbenv->set_cachesize(dbenv, 0, cachesize, 0) == 0); #define OFLAGS \ (DB_CREATE | DB_INIT_LOCK | \ DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_PRIVATE) #if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR == 0 DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, NULL, OFLAGS, 0666) == 0); #endif #if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR == 1 DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, OFLAGS, 0666) == 0); #endif #if DB_VERSION_MAJOR > 3 || DB_VERSION_MINOR > 1 DB_BENCH_ASSERT(dbenv->open(dbenv, TESTDIR, OFLAGS, 0666) == 0); #endif /* Create the database. */ DB_BENCH_ASSERT(db_create(&dbp, dbenv, 0) == 0); #if DB_VERSION_MAJOR >= 4 && DB_VERSION_MINOR >= 1 DB_BENCH_ASSERT(dbp->open(dbp, NULL, TESTFILE, NULL, DB_BTREE, DB_CREATE | DB_AUTO_COMMIT, 0666) == 0); #else DB_BENCH_ASSERT( dbp->open(dbp, TESTFILE, NULL, DB_BTREE, DB_CREATE, 0666) == 0); #endif /* Initialize the data. */ memset(&key, 0, sizeof(key)); memset(&data, 0, sizeof(data)); key.size = data.size = 20; key.data = data.data = "01234567890123456789"; /* Start/commit a transaction count times. */ for (i = 0; i < count; ++i) { #if DB_VERSION_MAJOR < 4 DB_BENCH_ASSERT( txn_begin(dbenv, NULL, &txn, DB_TXN_NOSYNC) == 0); DB_BENCH_ASSERT(dbp->put(dbp, txn, &key, &data, 0) == 0); DB_BENCH_ASSERT(txn_commit(txn, 0) == 0); #else DB_BENCH_ASSERT( dbenv->txn_begin(dbenv, NULL, &txn, DB_TXN_NOSYNC) == 0); DB_BENCH_ASSERT(dbp->put(dbp, txn, &key, &data, 0) == 0); DB_BENCH_ASSERT(txn->commit(txn, 0) == 0); #endif } DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0); DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0); /* Create a new DB_ENV handle. */ DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0); dbenv->set_errfile(dbenv, stderr); DB_BENCH_ASSERT( dbenv->set_cachesize(dbenv, 0, 1048576 /* 1MB */, 0) == 0); /* Now run recovery. */ TIMER_START; #if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR == 0 DB_BENCH_ASSERT(dbenv->open( dbenv, TESTDIR, NULL, OFLAGS | DB_RECOVER, 0666) == 0); #endif #if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR == 1 DB_BENCH_ASSERT( dbenv->open(dbenv, TESTDIR, OFLAGS | DB_RECOVER, 0666) == 0); #endif #if DB_VERSION_MAJOR > 3 || DB_VERSION_MINOR > 1 DB_BENCH_ASSERT( dbenv->open(dbenv, TESTDIR, OFLAGS | DB_RECOVER, 0666) == 0); #endif TIMER_STOP; /* * We divide the time by the number of transactions, so an "operation" * is the recovery of a single transaction. */ printf("# recovery after %d transactions\n", count); TIMER_DISPLAY(count); DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0); return (0); }
// // XXX Figure out the appropriate way to pick out IDs. // int TpcbExample::txn(Db *adb, Db *bdb, Db *tdb, Db *hdb, int accounts, int branches, int tellers) { Dbc *acurs = NULL; Dbc *bcurs = NULL; Dbc *tcurs = NULL; DbTxn *t = NULL; db_recno_t key; Defrec rec; Histrec hrec; int account, branch, teller; Dbt d_dbt; Dbt d_histdbt; Dbt k_dbt; Dbt k_histdbt(&key, sizeof(key)); // // XXX We could move a lot of this into the driver to make this // faster. // account = random_id(ACCOUNT, accounts, branches, tellers); branch = random_id(BRANCH, accounts, branches, tellers); teller = random_id(TELLER, accounts, branches, tellers); k_dbt.set_size(sizeof(int)); d_dbt.set_flags(DB_DBT_USERMEM); d_dbt.set_data(&rec); d_dbt.set_ulen(sizeof(rec)); hrec.aid = account; hrec.bid = branch; hrec.tid = teller; hrec.amount = 10; // Request 0 bytes since we're just positioning. d_histdbt.set_flags(DB_DBT_PARTIAL); // START TIMING if (txn_begin(NULL, &t, 0) != 0) goto err; if (adb->cursor(t, &acurs, 0) != 0 || bdb->cursor(t, &bcurs, 0) != 0 || tdb->cursor(t, &tcurs, 0) != 0) goto err; // Account record k_dbt.set_data(&account); if (acurs->get(&k_dbt, &d_dbt, DB_SET) != 0) goto err; rec.balance += 10; if (acurs->put(&k_dbt, &d_dbt, DB_CURRENT) != 0) goto err; // Branch record k_dbt.set_data(&branch); if (bcurs->get(&k_dbt, &d_dbt, DB_SET) != 0) goto err; rec.balance += 10; if (bcurs->put(&k_dbt, &d_dbt, DB_CURRENT) != 0) goto err; // Teller record k_dbt.set_data(&teller); if (tcurs->get(&k_dbt, &d_dbt, DB_SET) != 0) goto err; rec.balance += 10; if (tcurs->put(&k_dbt, &d_dbt, DB_CURRENT) != 0) goto err; // History record d_histdbt.set_flags(0); d_histdbt.set_data(&hrec); d_histdbt.set_ulen(sizeof(hrec)); if (hdb->put(t, &k_histdbt, &d_histdbt, DB_APPEND) != 0) goto err; if (acurs->close() != 0 || bcurs->close() != 0 || tcurs->close() != 0) goto err; if (t->commit(0) != 0) goto err; // END TIMING return (0); err: if (acurs != NULL) (void)acurs->close(); if (bcurs != NULL) (void)bcurs->close(); if (tcurs != NULL) (void)tcurs->close(); if (t != NULL) (void)t->abort(); if (verbose) cout << "Transaction A=" << (long)account << " B=" << (long)branch << " T=" << (long)teller << " failed\n"; return (-1); }
/** * Put an object into the store. */ static la_storage_object_put_result bdb_la_storage_put(la_storage_object_store *store, const la_storage_rev_t *rev, la_storage_object *obj) { DB_TXN *txn; la_storage_object_header header; DBT db_key; DBT db_value_read, db_value_write; int result; #if DEBUG printf("putting %u bytes:\n", obj->data_length); la_hexdump(la_storage_object_get_data(obj), obj->data_length); #endif memset(&db_key, 0, sizeof(DBT)); memset(&db_value_read, 0, sizeof(DBT)); memset(&db_value_write, 0, sizeof(DBT)); db_key.data = obj->key; db_key.size = (u_int32_t) strlen(obj->key); db_key.ulen = (u_int32_t) strlen(obj->key); db_key.flags = DB_DBT_USERMEM; db_value_read.data = &header; db_value_read.ulen = sizeof(la_storage_object_header); db_value_read.dlen = sizeof(la_storage_object_header); db_value_read.doff = 0; db_value_read.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; if (txn_begin(store->env->env, NULL, &txn, DB_TXN_NOSYNC | DB_TXN_NOWAIT) != 0) return LA_STORAGE_OBJECT_PUT_ERROR; result = store->db->get(store->db, txn, &db_key, &db_value_read, DB_RMW); if (result != 0 && result != DB_NOTFOUND) { txn_abort(txn); if (result == DB_LOCK_NOTGRANTED) return LA_STORAGE_OBJECT_PUT_CONFLICT; return LA_STORAGE_OBJECT_PUT_ERROR; } if (result != DB_NOTFOUND) { debug("data size: %d, data: %s\n", db_value_read.size, db_value_read.data); if (rev == NULL || memcmp(rev, &header.rev, sizeof(la_storage_rev_t)) != 0) { txn_abort(txn); return LA_STORAGE_OBJECT_PUT_CONFLICT; } obj->header->doc_seq = header.doc_seq + 1; if (header.rev_count < LA_OBJECT_MAX_REVISION_COUNT) { obj->header = realloc(obj->header, la_storage_object_total_size(obj) + sizeof(la_storage_rev_t)); // If we added a revision, move the object data up to make room. memmove(la_storage_object_get_data(obj) + sizeof(la_storage_rev_t), la_storage_object_get_data(obj), obj->data_length); obj->header->rev_count = header.rev_count + 1; } // Move the rev_count-1 previous revisons over... memmove(obj->header->revs_data + sizeof(la_storage_rev_t), obj->header->revs_data, sizeof(la_storage_rev_t) * (obj->header->rev_count - 1)); // Copy the previous revision in. memcpy(obj->header->revs_data, &header.rev, sizeof(la_storage_rev_t)); #if DEBUG printf("After moving data and revisions:\nOld revisions:\n"); la_hexdump(obj->header->revs_data, obj->header->rev_count * sizeof(la_storage_rev_t)); printf("data:\n"); la_hexdump(la_storage_object_get_data(obj), obj->data_length); #endif } else { obj->header->doc_seq = 1; obj->header->rev_count = 0; } db_seq_t seq; store->seq->get(store->seq, txn, 1, &seq, 0); obj->header->seq = seq; db_value_write.size = (u_int32_t) la_storage_object_total_size(obj); db_value_write.ulen = (u_int32_t) la_storage_object_total_size(obj); db_value_write.data = obj->header; db_value_write.flags = DB_DBT_USERMEM; debug("putting { size: %u, ulen: %u, data: %s, flags: %x }\n", db_value_write.size, db_value_write.ulen, db_value_write.data, db_value_write.flags); result = store->db->put(store->db, txn, &db_key, &db_value_write, 0); if (result != 0) { txn_abort(txn); return LA_STORAGE_OBJECT_PUT_ERROR; } txn_commit(txn, DB_TXN_NOSYNC); return LA_STORAGE_OBJECT_PUT_SUCCESS; }
static la_storage_object_put_result bdb_la_storage_set_revs(la_storage_object_store *store, const char *key, la_storage_rev_t *revs, size_t revcount) { DB_TXN *txn; DBT db_key; DBT db_value; int result; la_storage_object object; int shift; revcount = la_min(revcount, LA_OBJECT_MAX_REVISION_COUNT); memset(&db_key, 0, sizeof(DBT)); memset(&db_value, 0, sizeof(DBT)); db_key.data = key; db_key.size = strlen(key); db_key.ulen = strlen(key); db_key.flags = DB_DBT_USERMEM; db_value.data = NULL; db_value.ulen = 0; db_value.flags = DB_DBT_MALLOC; if (txn_begin(store->env->env, NULL, &txn, DB_TXN_NOSYNC | DB_TXN_NOWAIT) != 0) return LA_STORAGE_OBJECT_PUT_ERROR; result = store->db->get(store->db, txn, &db_key, &db_value, DB_RMW); if (result != 0) { txn_abort(txn); if (result == DB_LOCK_NOTGRANTED) return LA_STORAGE_OBJECT_PUT_CONFLICT; return LA_STORAGE_OBJECT_PUT_ERROR; } object.header = (la_storage_object_header *) db_value.data; object.data_length = db_value.size - sizeof(la_storage_object_header) - (object.header->rev_count * sizeof(la_storage_rev_t)); shift = revcount - object.header->rev_count; if (shift > 0) { object.header = realloc(object.header, db_value.size + shift); if (object.header == NULL) { free(object.header); return LA_STORAGE_OBJECT_PUT_ERROR; } } memmove(la_storage_object_get_data(&object) + shift, la_storage_object_get_data(&object), object.data_length); memcpy(object.header->revs_data, revs, revcount * sizeof(la_storage_rev_t)); db_value.data = object.header; db_value.size = db_value.size + shift; db_value.ulen = db_value.size; db_value.flags = DB_DBT_USERMEM; result = store->db->put(store->db, txn, &db_key, &db_value, 0); free(db_value.data); if (result != 0) { txn_abort(txn); return LA_STORAGE_OBJECT_PUT_ERROR; } txn_commit(txn, DB_TXN_NOSYNC); return 0; }
static int bdb_la_storage_object_store_delete(la_storage_object_store *store) { const char *dbname; const char *seqdbname; const char *home; const char *parts[2]; DB_TXN *txn; DB_ENV *env = store->env->env; int ret; if ((ret = store->db->get_dbname(store->db, &dbname, NULL)) != 0) { syslog(LOG_NOTICE, "could not get main db name: %d", ret); return -1; } if ((ret = store->db->get_dbname(store->seq_db, &seqdbname, NULL)) != 0) { syslog(LOG_NOTICE, "could not get sequence db name: %d", ret); return -1; } if ((ret = env->get_home(env, &home)) != 0) { syslog(LOG_NOTICE, "could not get db home %d", ret); return -1; } parts[0] = home; parts[1] = dbname; dbname = string_join("/", parts, 2); parts[1] = seqdbname; seqdbname = string_join("/", parts, 2); syslog(LOG_NOTICE, "deleting db %s and sequence db %s", dbname, seqdbname); la_storage_close(store); if ((ret = txn_begin(env, NULL, &txn, DB_TXN_NOWAIT | DB_TXN_WRITE_NOSYNC)) != 0) { syslog(LOG_NOTICE, "delete db begin transaction %d", ret); free(dbname); free(seqdbname); return -1; } syslog(LOG_NOTICE, "env %p txn %p home %s name %s", env, txn, home, dbname); if ((ret = env->dbremove(env, txn, dbname, NULL, 0)) != 0) { syslog(LOG_NOTICE, "deleting main DB: %d", ret); txn_abort(txn); return -1; } syslog(LOG_NOTICE, "env %p txn %p home %s name %s", env, txn, home, seqdbname); if ((ret = env->dbremove(env, txn, seqdbname, NULL, 0)) != 0) { syslog(LOG_NOTICE, "deleting sequence DB: %d", ret); txn_abort(txn); return -1; } txn_commit(txn, DB_TXN_NOSYNC); return 0; }
static la_storage_open_result_t bdb_la_storage_open(la_storage_env *env, const char *path, int flags, la_storage_object_store **_store) { la_storage_object_store *store = (la_storage_object_store *) malloc(sizeof(struct la_storage_object_store)); DB_TXN *txn; char *seqpath; DBT seq_key; char seq_name[4]; int dbflags; int ret; if (store == NULL) return LA_STORAGE_OPEN_ERROR; store->env = env; if (txn_begin(env->env, NULL, &txn, DB_TXN_WRITE_NOSYNC) != 0) { free(store); return LA_STORAGE_OPEN_ERROR; } if (db_create(&store->db, env->env, 0) != 0) { txn_abort(txn); free(store); return LA_STORAGE_OPEN_ERROR; } if (db_create(&store->seq_db, env->env, 0) != 0) { txn_abort(txn); free(store); return LA_STORAGE_OPEN_ERROR; } dbflags = 0; if (flags & LA_STORAGE_OPEN_FLAG_CREATE) dbflags = DB_CREATE; if (flags & LA_STORAGE_OPEN_FLAG_EXCL) dbflags |= DB_EXCL; if ((ret = store->db->open(store->db, txn, path, NULL, DB_BTREE, dbflags | DB_MULTIVERSION | DB_THREAD, 0)) != 0) { txn_abort(txn); free(store); if (ret == EEXIST) return LA_STORAGE_OPEN_EXISTS; if (ret == ENOENT) return LA_STORAGE_OPEN_NOT_FOUND; return LA_STORAGE_OPEN_ERROR; } seqpath = string_append(path, ".seq"); if (seqpath == NULL) { store->db->close(store->db, 0); txn_abort(txn); free(store); return LA_STORAGE_OPEN_ERROR; } if (store->seq_db->set_bt_compare(store->seq_db, compare_seq) != 0) { store->db->close(store->db, 0); txn_abort(txn); free(store); return LA_STORAGE_OPEN_ERROR; } if (store->seq_db->open(store->seq_db, txn, seqpath, NULL, DB_BTREE, DB_CREATE | DB_THREAD, 0) != 0) { free(seqpath); store->db->close(store->db, 0); txn_abort(txn); free(store); return LA_STORAGE_OPEN_ERROR; } free(seqpath); store->db->associate(store->db, txn, store->seq_db, seqindex, 0); if (db_sequence_create(&store->seq, store->db, 0) != 0) { store->seq_db->close(store->seq_db, 0); store->db->close(store->db, 0); txn_abort(txn); free(store); return LA_STORAGE_OPEN_ERROR; } store->seq->initial_value(store->seq, 1); seq_name[0] = '\0'; seq_name[1] = 'S'; seq_name[2] = 'E'; seq_name[3] = 'Q'; seq_key.data = seq_name; seq_key.size = 4; seq_key.ulen = 4; seq_key.flags = DB_DBT_USERMEM; if (store->seq->open(store->seq, txn, &seq_key, DB_CREATE | DB_THREAD) != 0) { store->seq_db->close(store->seq_db, 0); store->db->close(store->db, 0); txn_abort(txn); free(store); return LA_STORAGE_OPEN_ERROR; } txn_commit(txn, DB_TXN_NOSYNC); *_store = store; if ((flags & (LA_STORAGE_OPEN_FLAG_CREATE|LA_STORAGE_OPEN_FLAG_EXCL)) == (LA_STORAGE_OPEN_FLAG_CREATE|LA_STORAGE_OPEN_FLAG_EXCL)) return LA_STORAGE_OPEN_CREATED; return LA_STORAGE_OPEN_OK; }