JNIEXPORT jbyteArray JNICALL Java_org_nessdb_DB_get(JNIEnv *jenv, jobject clazz, jlong ptr, jbyteArray jkey, jint jklen ) { (void)clazz; char *key, *ktmp; struct slice sk, sv; struct nessdb *_db = (struct nessdb *)ptr; if (!_db) { __ERROR("get-->db is null, pls open first"); return NULL; } if (jklen >= NESSDB_MAX_KEY_SIZE) { __ERROR("key length too long...%d", jklen); return NULL; } memset(&sk, 0, sizeof(struct slice)); memset(&sv, 0, sizeof(struct slice)); key = (char*)(*jenv)->GetByteArrayElements(jenv, jkey, 0); if (key == NULL) return NULL; ktmp = malloc(jklen + 1); memset(ktmp, 0, jklen + 1); memcpy(ktmp, key, jklen); sk.data = ktmp; sk.len = jklen; db_get(_db, &sk, &sv); jbyteArray jval = NULL; if (sv.len > 0) { jval = (*jenv)->NewByteArray(jenv, sv.len); if (jval != NULL) { (*jenv)->SetByteArrayRegion(jenv, jval, 0, sv.len, (jbyte*)sv.data); } else { __ERROR("jenv new bytearray(%d) failed...", sv.len); } } /* release */ if (key) { (*jenv)->ReleaseByteArrayElements(jenv, jkey, (jbyte*)key, 0); free(ktmp); } if (sv.data) db_free_data(sv.data); return jval; }
int networkSendAll( int socket, void *data, int length ) { int totalSent; int bytesLeft; int sent; totalSent = 0; bytesLeft = length; __DEBUG( "Sending %d bytes", length ); /* Sends some amount of data at the time until all is sent */ while( totalSent < length ) { sent = send( socket, data + totalSent, bytesLeft, 0); if( sent == -1 ) { __ERROR( "Failed to send %d bytes ", bytesLeft ); continue; } totalSent = totalSent + sent; bytesLeft = bytesLeft - sent; } return 1; }
/* search in a node's child */ int _search_child(struct cursor *cur, struct search *so, struct node *n, int childnum) { int ret; NID child_nid; int child_to_search; struct node *child; nassert(n->height > 0); ancestors_append(cur, n->parts[childnum].msgbuf); child_nid = n->parts[childnum].child_nid; if (!cache_get_and_pin(cur->tree->cf, child_nid, (void**)&child, L_READ)) { __ERROR("cache get node error, nid [%" PRIu64 "]", child_nid); return NESS_ERR; } child_to_search = _search_in_which_child(so, child); ret = _search_node(cur, so, child, child_to_search); /* unpin */ cache_unpin(cur->tree->cf, child->cpair); return ret; }
void _add_hiraishin(struct silopit *silopit, int fd, int count, int max_len) { int i; int blk_sizes; struct inner_block{ char key[max_len]; char offset[8]; }; struct inner_block *blks; blk_sizes = count * sizeof(struct inner_block); blks= mmap(0, blk_sizes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (blks == MAP_FAILED) { __PANIC("Error:Can't mmap file when add hiraishin"); return; } for (i = 0; i < count; i++) hiraishin_add(silopit->hiraishin, blks[i].key); if (munmap(blks, blk_sizes) == -1) __ERROR("Error:un-mmapping the file"); }
/* * EFFECT: * - flush in background thread * ENTER: * - parent is already locked * EXIT: * - nodes are all unlocked */ void tree_flush_node_on_background(struct tree *t, struct node *parent) { LOG; int childnum; enum reactivity re; struct node *child; struct partition *part; nassert(parent->height > 0); childnum = node_find_heaviest_idx(parent); part = &parent->parts[childnum]; /* pin the child */ if (cache_get_and_pin(t->cf, part->child_nid, (void**)&child, L_WRITE) != NESS_OK) { __ERROR("cache get node error, nid [%" PRIu64 "]", part->child_nid); return; } re = get_reactivity(t, child); if (re == STABLE) { /* detach buffer from parent */ struct nmb *buf = part->ptr.u.nonleaf->buffer; node_set_dirty(parent); part->ptr.u.nonleaf->buffer = nmb_new(t->e); /* flush it in background thread */ _place_node_and_buffer_on_background(t, child, buf); cache_unpin(t->cf, parent->cpair, make_cpair_attr(parent)); } else { /* the child is reactive, we deal it in main thread */ _child_maybe_reactivity(t, parent, child); } }
void Transaction_begin( Transaction *transaction, DB_ENV *databaseEnvironment, ConflictSet *conflictSet ) { int ret; pthread_mutex_t *lock; transaction->handler = NULL; ret = databaseEnvironment->txn_begin( databaseEnvironment, NULL, &transaction->handler, 0 ); if( ret != 0 ) { __ERROR( "Failed to create a new transaction" ); databaseEnvironment->err( databaseEnvironment, ret, "Transaction begin failed!" ); } lock = g_hash_table_lookup( __conf.transactionLocks, conflictSet->dboid ); pthread_mutex_lock( lock ); __DEBUG( "Locked conflict set for a local transaction" ); /* Tells the conflict set that there is an transaction processing it */ conflictSet->activeTransaction = 1; /* Creates a shadow copy of the conflict set */ transaction->conflictSet = ConflictSet_createCopy( conflictSet ); *(transaction->conflictSet) = *conflictSet; __DEBUG( "Shadow copy check: gen %d = %d", transaction->conflictSet->maxGeneration, conflictSet->maxGeneration ); }
void ConflictSet_insertLocalUpdate( ConflictSet *conflictSet, MethodCallObject *methodCallObject) { Generation *gen; /* Lock the structures */ pthread_mutex_lock( &conflictSet->writeLock ); __DEBUG( "Locking conflict set for local update" ); if( ConflictSet_isFull( conflictSet ) ) { __ERROR( "Conflict set is full!" ); exit( 1 ); } gen = ConflictSet_createNewGeneration( conflictSet ); /* Store local information about the generation */ gen->generationType[__conf.id] = GEN_UPDATE; gen->generationData[__conf.id].methodCallObject = methodCallObject; /* Tell the update what generation it has been stored in */ methodCallObject->generationNumber = gen->number; __DEBUG( "Added generation %d for method <%s>", gen->number, methodCallObject->methodName ); __DEBUG( "Unlocking conflict set for writing local update" ); /* Unlock the structure */ pthread_mutex_unlock( &conflictSet->writeLock ); }
JNIEXPORT jint JNICALL Java_org_nessdb_DB_remove(JNIEnv *jenv, jobject clazz, jlong ptr, jbyteArray jkey, jint jklen) { (void)clazz; char *key, *ktmp; struct slice sk; struct nessdb *_db = (struct nessdb *)ptr; if (!_db) { __ERROR("remove-->db is null, pls open first\n"); return 0; } if (jklen >= NESSDB_MAX_KEY_SIZE) { __ERROR("key length too long...%d", jklen); return (-1); } if (jklen == 0) return 0; memset(&sk, 0, sizeof(struct slice)); key = (char*)(*jenv)->GetByteArrayElements(jenv, jkey, 0); if (key == NULL) return 0; ktmp = malloc(jklen + 1); memset(ktmp, 0, jklen + 1); memcpy(ktmp, key, jklen); sk.data = ktmp; sk.len = jklen; db_remove(_db, &sk); /* release */ if (key) { (*jenv)->ReleaseByteArrayElements(jenv, jkey, (jbyte*)key, 0); free(ktmp); } return 1; }
int ness_compress(ness_compress_method_t m, const char *src, uint32_t src_size, char *dst, uint32_t *dst_size) { int ret = NESS_OK; switch (m) { case NESS_NO_COMPRESS: memcpy(dst + 1, src, src_size); *dst_size = src_size + 1; dst[0] = NESS_NO_COMPRESS; break; case NESS_SNAPPY_METHOD: if (src_size == 0) { *dst_size = 1; } else { size_t out_size; int status; struct snappy_env env; snappy_init_env(&env); status = snappy_compress(&env, src, src_size, dst + 1, &out_size); snappy_free_env(&env); if (status != 0) { __ERROR("snappy compress error %d, src_size %d, dst_size %d", status, src_size, dst_size); ret = 0; } *dst_size = out_size + 1; } dst[0] = NESS_SNAPPY_METHOD; break; default: ret = 0; __ERROR("%s", "no compress method support!"); break; } return ret; }
struct jikukan *_read_mmap(struct silopit *silopit, size_t count) { int i; int fd; int result; int fcount; int blk_sizes; char file[FILE_PATH_SIZE]; struct jikukan *merge = NULL; struct footer footer; int fsize = sizeof(struct footer); memset(file, 0, FILE_PATH_SIZE); snprintf(file, FILE_PATH_SIZE, "%s/%s", silopit->basedir, silopit->name); fd = open(file, O_RDWR, 0644); if (fd == -1) __PANIC("error opening silopit when read map"); result = lseek(fd, -fsize, SEEK_END); if (result == -1) __PANIC("error lseek footer"); result = read(fd, &footer, fsize); if (result != fsize) { __PANIC("error reading when read footer process"); } struct inner_block{ char key[from_be32(footer.max_len)]; char offset[8]; }; struct inner_block *blks; fcount = from_be32(footer.count); blk_sizes = from_be32(footer.size); blks= mmap(0, blk_sizes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (blks == MAP_FAILED) { __PANIC("error map when read process"); goto out; } merge = jikukan_new(fcount + count + 1); for (i = 0; i < fcount; i++) { jikukan_insert(merge, blks[i].key, u64_from_big((unsigned char*)blks[i].offset), ADD); } if (munmap(blks, blk_sizes) == -1) __ERROR("Un-mmapping the file"); out: close(fd); return merge; }
int ness_decompress(const char *src, uint32_t src_size, char *dst, uint32_t dst_size) { int ret = NESS_OK; /* compressed data is NULL */ if (src_size == 1) return NESS_ERR; switch (src[0] & 0xF) { case NESS_NO_COMPRESS: memcpy(dst, src + 1, src_size - 1); break; case NESS_SNAPPY_METHOD: { int status; struct snappy_env env; snappy_init_env(&env); status = snappy_uncompress(src + 1, src_size - 1, dst); snappy_free_env(&env); if (status != 0) { __ERROR("snappy uncompress error %d", status); ret = 0; goto ERR; } (void)dst_size; } break; default: ret = 0; __ERROR("%s", "no decompress method support!"); break; } ERR: return ret; }
int main(int argc, char **argv) { (void) argc; (void) argv; _svr.bindaddr = HOST; _svr.port = PORT; _svr.db = silokatana_open(); _svr.el = aeCreateEventLoop(11024); _svr.fd = anetTcpServer(_svr.neterr, _svr.port, _svr.bindaddr); if (_svr.fd == ANET_ERR) { __PANIC("openning port error #%d:%s", _svr.port, _svr.neterr); exit(1); } if (anetNonBlock(_svr.neterr, _svr.fd) == ANET_ERR) { __ERROR("set nonblock #%s",_svr.neterr); exit(1); } aeCreateTimeEvent(_svr.el, 3000, server_cron, NULL, NULL); if (aeCreateFileEvent(_svr.el, _svr.fd, AE_READABLE, accept_handler, NULL) == AE_ERR) __ERROR("creating file event"); __INFO("siloserver starting, port:%d, pid:%d", PORT, (long)getpid()); printf("%s", _ascii_logo); aeMain(_svr.el); __ERROR("oops,exit"); aeDeleteEventLoop(_svr.el); silokatana_close(_svr.db); return 1; }
void ConflictSet_notifyPropagation( ConflictSet *conflictSet ) { int generationPosition, currentGenPos; MethodCallObject *methodCallObject; GSList *methodCalls; methodCalls = NULL; /* Check if no prior proagation has been performed */ if( conflictSet->propagatedGeneration == -1 ) { generationPosition = 0; } else if( conflictSet->propagatedGeneration >= conflictSet->maxGeneration ) { __DEBUG( "No need to propagate!" ); return; } else { /* Get the first generation that is not propagated */ generationPosition = ConflictSet_getGenerationPosition( conflictSet, conflictSet->propagatedGeneration + 1 ); if( generationPosition == -1 ) { __ERROR( "Failed to get generation position in notifyPropagation()" ); } } __DEBUG( "Starting to propagate from generation set with index: %d", generationPosition ); for( currentGenPos = generationPosition; currentGenPos <= conflictSet->maxPosition; currentGenPos = (currentGenPos + 1 ) % conflictSet->numberOfGenerations ) { methodCallObject = conflictSet->generations[ currentGenPos ].generationData[__conf.id].methodCallObject; methodCalls = g_slist_append( methodCalls, methodCallObject ); //propagate( methodCallObject, __conf.replicas, conflictSet->dboid ); conflictSet->propagatedGeneration = methodCallObject->generationNumber; __DEBUG( "Propagted generation %d for object with dboid %s", methodCallObject->generationNumber, methodCallObject->databaseObjectId ); } /* Sends all the updates to all nodes on the network */ propagateList( methodCalls, __conf.replicas, conflictSet->dboid ); __DEBUG( "Propagated generation %d", conflictSet->propagatedGeneration ); g_slist_free( methodCalls ); }
int deserialize_hdr_from_disk(int fd, struct block *b, struct hdr **h) { int r; DISKOFF v0_read_off = 0UL; DISKOFF v1_read_off = ALIGN(512); r = read_hdr_from_disk(fd, b, h, v0_read_off); if (r != NESS_OK) { __ERROR("1st header broken, " "try to read next, nxt-off[%"PRIu64"]", v1_read_off); r = read_hdr_from_disk(fd, b, h, v1_read_off); } return r; }
int cron_start(struct cron *cron, void *arg) { pthread_attr_t attr; if (cron->isalive) return NESS_ERR; cron->isalive = 1U; cron->arg = arg; pthread_attr_init(&attr); if (pthread_create(&cron->thread, &attr, _do_cron, cron) != 0) { __ERROR("%s", "can't initialize cron."); return NESS_ERR; } return NESS_OK; }
void networkSendDataToAll( GSList *replicas, void *data, int dataSize ) { GSList *it; int rep_socket; Replica *replica; /* * Create a connection to the replica * Send the package to the replica * Finally, close the connection */ for (it = replicas; it != NULL; it = g_slist_next( it ) ) { replica = it->data; /* Check if connection exists to the replica, otherwise create a new one */ if( replica->tcpSocket == -1 ) { /* Try to make a connection, if failure, wait 250ms */ while(1) { rep_socket = networkCreateTCPSocket( replica->host, replica->port ); if( rep_socket == -1 ) { __DEBUG( "Failed to connect to host %s on port %d", replica->host, replica->port ); usleep( 25000 ); } else { __DEBUG( "Connection successful to host %s on port %d", replica->host, replica->port ); break; } } replica->tcpSocket = rep_socket; } //__DEBUG( "Sending %lud bytes", sizeof( struct prop_package ) ); if( networkSendAll( replica->tcpSocket, data, dataSize ) == -1 ) { __ERROR( "Failed to send propagation data: %s", strerror( errno ) ); } /* We are done with this replica, close connection */ //close( rep_socket ); } }
JNIEXPORT jlong JNICALL Java_org_nessdb_DB_open(JNIEnv *jenv, jobject clazz, jstring jpath) { (void) clazz; struct nessdb *_db = NULL; const char *path = (*jenv)->GetStringUTFChars(jenv, jpath, NULL); if (path == NULL) { __ERROR("...jpath is null"); return 0; } _db = db_open(path); (*jenv)->ReleaseStringUTFChars(jenv, jpath, path); return (jlong)_db; }
/* * |key44, key88| * / \ * |key10, key20| |key90| * / | \ \ * |basement0| |basement1| |basement2| |basement3| * * (a tree with height 2) * * cursor search is very similar to depth-first-search algorithm. * for cursor_seektofirst operation, the root-to-leaf path is: * key44 -> key10 -> basement0 * and do the inner sliding along with the basement. * if we get the end of one leaf, CURSOR_EOF will be returned to upper on, * and we also set search->pivot_bound = key10, for the next time, * the root-to-leaf path(restart with a jump) will be: * key44 -> key10 -> basement1 */ void _tree_search(struct cursor *cur, struct search *so) { int r; NID root_nid; int child_to_search; struct tree *t; struct node *root; struct cache_operations *c_op = cur->tree->cache->c_op; t = cur->tree; try_again: root_nid = t->hdr->root_nid; if (c_op->cache_get_and_pin(t->cache, root_nid, &root, L_READ) < 0) { __ERROR("cache get root node error, nid [%" PRIu64 "]", root_nid); return; } child_to_search = _search_in_which_child(so, root); r = _search_node(cur, so, root, child_to_search); /* unpin */ c_op->cache_unpin_readonly(t->cache, root); switch (r) { case CURSOR_CONTINUE: break; case CURSOR_TRY_AGAIN: goto try_again; break; case CURSOR_EOF: break; default: break; } }
JNIEXPORT jbyteArray JNICALL Java_org_nessdb_DB_stats(JNIEnv *jenv, jobject clazz, jlong ptr) { (void)clazz; char buf[1024 * 10] = {0}; struct slice stats; struct nessdb *_db = (struct nessdb *)ptr; if (!_db) { __ERROR("info-->db is null, pls open first\n"); return NULL; } stats.len = 1024 * 10; stats.data = buf; db_stats(_db, &stats); jbyteArray jval = (*jenv)->NewByteArray(jenv, stats.len); (*jenv)->SetByteArrayRegion(jenv, jval, 0, stats.len, (jbyte*)stats.data); return jval; }
/* * |key44, key88| * / \ * |key10, key20| |key90| * / | \ \ * |msgbuf0| |msgbuf1| |msgbuf2| |msgbuf3| * * (a tree with height 2) * * cursor search is very similar to depth-first-search algorithm. * for cursor_seektofirst operation, the root-to-leaf path is: * key44 -> key10 -> msgbuf0 * and do the inner sliding along with the msgbuf. * if we get the end of one leaf, CURSOR_EOF will be returned to upper on, * and we also set search->pivot_bound = key10, for the next time, * the root-to-leaf path(restart with a jump) will be: * key44 -> key10 -> msgbuf1 */ void _tree_search(struct cursor * cur, struct search * so) { int r; NID root_nid; int child_to_search; struct buftree *t; struct node *root; t = cur->tree; TRY_AGAIN: root_nid = t->hdr->root_nid; if (!cache_get_and_pin(t->cf, root_nid, (void**)&root, L_READ)) { __ERROR("cache get root node error, nid [%" PRIu64 "]", root_nid); return; } child_to_search = _search_in_which_child(so, root); r = _search_node(cur, so, root, child_to_search); /* unpin */ cache_unpin(t->cf, root->cpair); switch (r) { case CURSOR_CONTINUE: /* got the end of leaf */ goto TRY_AGAIN; break; case CURSOR_TRY_AGAIN: /* got the end of node */ goto TRY_AGAIN; break; case CURSOR_EOF: break; default: break; } }
/* search in a node's child */ int _search_child(struct cursor *cur, struct search *so, struct node *n, int childnum) { int ret; int child_to_search; NID child_nid; struct node *child; struct cache_operations *c_op = cur->tree->cache->c_op; nassert(n->height > 0); /* add basement to ances */ ancestors_append(cur, n->u.n.parts[childnum].buffer); child_nid = n->u.n.parts[childnum].child_nid; if (c_op->cache_get_and_pin(cur->tree->cache, child_nid, &child, L_READ) < 0) { __ERROR("cache get node error, nid [%" PRIu64 "]", child_nid); return NESS_ERR; } child_to_search = _search_in_which_child(so, child); ret = _search_node(cur, so, child, child_to_search); /* unpin */ c_op->cache_unpin_readonly(cur->tree->cache, child); return ret; }
void *_write_mmap(struct silopit *silopit, struct skipnode *x, size_t count, int need_new) { int i, j, c_clone; int fd; int sizes; int result; char file[FILE_PATH_SIZE]; struct skipnode *last; struct footer footer; struct stats stats; int fsize = sizeof(struct footer); memset(&footer, 0, fsize); _prepare_stats(x, count, &stats); sizes = stats.mmap_size; struct inner_block { char key[stats.max_len]; char offset[8]; }; struct inner_block *blks; memset(file, 0, FILE_PATH_SIZE); snprintf(file, FILE_PATH_SIZE, "%s/%s", silopit->basedir, silopit->name); fd = open(file, O_RDWR | O_CREAT | O_TRUNC, 0644); if (fd == -1) __PANIC("error creating silopit file"); if (lseek(fd, sizes - 1, SEEK_SET) == -1) __PANIC("error lseek silopit"); result = write(fd, "", 1); if (result == -1) __PANIC("error writing empty"); blks = mmap(0, sizes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (blks == MAP_FAILED) { __PANIC("error mapping block when write on process"); } last = x; c_clone = count; for (i = 0, j = 0; i < c_clone; i++) { if (x->opt == ADD) { buffer_putstr(silopit->buf, x->key); buffer_putc(silopit->buf, 0); buffer_putlong(silopit->buf, x->val); j++; } else count--; last = x; x = x->forward[0]; } char *strings = buffer_detach(silopit->buf); memcpy(blks, strings, sizes); #ifdef MSYNC if (msync(blks, sizes, MS_SYNC) == -1) { __ERROR("Error Msync"); } #endif if (munmap(blks, sizes) == -1) { __ERROR("Un-mmapping the file"); } footer.count = to_be32(count); footer.crc = to_be32(F_CRC); footer.size = to_be32(sizes); footer.max_len = to_be32(stats.max_len); memcpy(footer.key, last->key, strlen(last->key)); result = write(fd, &footer, fsize); if (result == -1) __PANIC("writing the footer"); struct meta_node mn; mn.count = count; memset(mn.end, 0, SILOKATANA_MAX_KEY_SIZE); memcpy(mn.end, last->key, SILOKATANA_MAX_KEY_SIZE); memset(mn.index_name, 0, FILE_NAME_SIZE); memcpy(mn.index_name, silopit->name, FILE_NAME_SIZE); if (need_new) meta_set(silopit->meta, &mn); else meta_set_byname(silopit->meta, &mn); close(fd); return x; }
int networkCreateTCPServer( int port ) { int listenSocket; struct addrinfo hints, *servinfo, *p; char port_str[10]; int rv, yes=1; memset( &hints, 0, sizeof hints ); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_PASSIVE; // use my IP snprintf( port_str, sizeof(port_str), "%d", port ); if ((rv = getaddrinfo(NULL, port_str, &hints, &servinfo)) != 0) { fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(rv)); exit( 1 ); } // loop through all the results and bind to the first we can for(p = servinfo; p != NULL; p = p->ai_next) { if ((listenSocket = socket(p->ai_family, p->ai_socktype, p->ai_protocol)) == -1) { perror("server: socket"); continue; } if (setsockopt(listenSocket, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(int)) == -1) { perror("setsockopt"); exit(1); } if (bind(listenSocket, p->ai_addr, p->ai_addrlen) == -1) { close( listenSocket ); __DEBUG( "bind: %s", strerror(errno)); continue; } break; } if( p == NULL ) { __ERROR( "Failed to bind to port: %s", port_str ); return -1; } freeaddrinfo(servinfo); // all done with this structure if (listen(listenSocket, 10) == -1) { perror("listen"); return -1; } return listenSocket; }
uint64_t _read_offset(struct silopit *silopit, struct slice *sk) { int fd; int fcount; int blk_sizes; int result; uint64_t off = 0UL; char file[FILE_PATH_SIZE]; struct footer footer; int fsize = sizeof(struct footer); memset(file, 0, FILE_PATH_SIZE); snprintf(file, FILE_PATH_SIZE, "%s/%s", silopit->basedir, silopit->name); fd = open(file, O_RDWR, 0644); if (fd == -1) { __ERROR("error opening silopit when read offset process"); return 0UL; } result = lseek(fd, -fsize, SEEK_END); if (result == -1) { __ERROR("error lseek when read offset process"); close(fd); return off; } result = read(fd, &footer, fsize); if (result == -1) { __ERROR("error reading footer when read offset process"); close(fd); return off; } int max_len = from_be32(footer.max_len); struct inner_block { char key[max_len]; char offset[8]; }; struct inner_block *blks; fcount = from_be32(footer.count); blk_sizes = from_be32(footer.size); blks= mmap(0, blk_sizes, PROT_READ, MAP_PRIVATE, fd, 0); if (blks == MAP_FAILED) { __ERROR("Map_failed when read process"); close(fd); return off; } size_t left = 0, right = fcount, i = 0; while (left < right) { i = (right -left) / 2 +left; int cmp = strcmp(sk->data, blks[i].key); if (cmp == 0) { off = u64_from_big((unsigned char*)blks[i].offset); break ; } if (cmp < 0) right = i; else left = i + 1; } if (munmap(blks, blk_sizes) == -1) __ERROR("un-mmapping the file"); close(fd); return off; }
void _process_cmd(int fd, struct request *req) { char sent_buf[BUF_SIZE]; struct response *resp; memset(sent_buf, 0, BUF_SIZE); switch(req->cmd){ case CMD_PING:{ resp = response_new(0,OK_PONG); response_detch(resp,sent_buf); write(fd,sent_buf,strlen(sent_buf)); request_free_value(req); response_free(resp); break; } case CMD_SET:{ struct slice sk, sv; if(req->argc == 3) { sk.len = req->argv[1]->len; sk.data = req->argv[1]->data; sv.len = req->argv[2]->len; sv.data = req->argv[2]->data; db_add(_svr.db, &sk, &sv); resp = response_new(0,OK); response_detch(resp,sent_buf); write(fd,sent_buf,strlen(sent_buf)); request_free_value(req); response_free(resp); break; } goto __default; } case CMD_MSET:{ int i; int c = req->argc; for (i = 1; i < c; i += 2) { struct slice sk, sv; sk.len = req->argv[i]->len; sk.data = req->argv[i]->data; sv.len = req->argv[i+1]->len; sv.data = req->argv[i+1]->data; db_add(_svr.db, &sk, &sv); } resp = response_new(0, OK); response_detch(resp, sent_buf); write(fd,sent_buf, strlen(sent_buf)); request_free_value(req); response_free(resp); break; } case CMD_GET:{ int ret; struct slice sk; struct slice sv; if (req->argc == 2) { sk.len = req->argv[1]->len; sk.data = req->argv[1]->data; ret = db_get(_svr.db, &sk, &sv); if (ret == 1) { resp = response_new(1,OK_200); resp->argv[0] = sv.data; } else { resp = response_new(0,OK_404); resp->argv[0] = NULL; } response_detch(resp, sent_buf); write(fd,sent_buf,strlen(sent_buf)); request_free_value(req); response_free(resp); if (ret == 1) free(sv.data); break; } goto __default; } case CMD_MGET:{ int i; int ret; int c=req->argc; int sub_c=c-1; char **vals = calloc(c, sizeof(char*)); resp=response_new(sub_c, OK_200); for (i = 1; i < c; i++){ struct slice sk; struct slice sv; sk.len = req->argv[i]->len; sk.data = req->argv[i]->data; ret = db_get(_svr.db, &sk, &sv); if (ret == 1) vals[i-1] = sv.data; else vals[i-1] = NULL; resp->argv[i-1] = vals[i-1]; } response_detch(resp, sent_buf); write(fd, sent_buf, strlen(sent_buf)); request_free_value(req); response_free(resp); for (i = 0; i < sub_c; i++){ if (vals[i]) free(vals[i]); } free(vals); break; } case CMD_INFO:{ char *infos; infos = db_info(_svr.db); resp = response_new(1, OK_200); resp->argv[0] = infos; response_detch(resp, sent_buf); write(fd,sent_buf, strlen(sent_buf)); request_free_value(req); response_free(resp); break; } case CMD_DEL:{ int i; for (i = 1; i < req->argc; i++){ struct slice sk; sk.len = req->argv[i]->len; sk.data = req->argv[i]->data; db_remove(_svr.db, &sk); } resp = response_new(0, OK); response_detch(resp, sent_buf); write(fd, sent_buf, strlen(sent_buf)); request_free_value(req); response_free(resp); break; } case CMD_EXISTS:{ struct slice sk; sk.len = req->argv[1]->len; sk.data = req->argv[1]->data; int ret= db_exists(_svr.db, &sk); if(ret) write(fd,":1\r\n",4); else write(fd,":-1\r\n",5); } break; case CMD_SHUTDOWN: __ERROR("siloserver shutdown..."); db_close(_svr.db); exit(2); break; __default: default:{ resp = response_new(0, ERR); response_detch(resp, sent_buf); write(fd, sent_buf, strlen(sent_buf)); request_free_value(req); response_free(resp); break; } } }
void ConflictSet_insertRemoteUpdate( ConflictSet *conflictSet, MethodCallObject *methodCallObject, int sourceReplicaId, int sourceGeneration ) { Generation *gen; int generationPosition; int maxGen; int it; int failure; generationPosition = -1; failure = 0; /* Lock the structure */ pthread_mutex_lock( &conflictSet->writeLock ); __DEBUG( "Locking conflict set for inserting remote update" ); if( ConflictSet_isEmpty( conflictSet ) ) { gen = ConflictSet_createNewGeneration( conflictSet ); /* Insert the data into the newly created generation */ ConflictSet_setRemoteData( conflictSet, gen, sourceReplicaId, methodCallObject ); /* Set local info to NO UPDATE since the generation have been created */ gen->generationType[__conf.id] = GEN_NO_UPDATE; __DEBUG( "ConflictSet is empty, inserting generation information into generation %d", gen->number ); /* Perform conflict resolution if complete */ ConflictSet_checkGenerationComplete( conflictSet, gen ); } else { /* The conflict set is not empty */ /* Check if the needed generation exists localy */ if( sourceGeneration >= conflictSet->minGeneration && sourceGeneration <= conflictSet->maxGeneration ) { generationPosition = ConflictSet_getGenerationPosition( conflictSet, sourceGeneration ); if( generationPosition == -1 ) { __ERROR( "Failed to get generation position in ConflictSet_insertRemoteUpdate()" ); } gen = &conflictSet->generations[generationPosition]; /* Insert the data into the newly created generation */ ConflictSet_setRemoteData( conflictSet, gen, sourceReplicaId, methodCallObject ); __DEBUG( "Inserting generation information into generation %d", gen->number ); /* Perform conflict resolution if complete */ ConflictSet_checkGenerationComplete( conflictSet, gen ); } else { /* Check if the generation is within the allowed span of valid generations */ if( sourceGeneration >= conflictSet->minGeneration && ( sourceGeneration - conflictSet->minGeneration ) <= conflictSet->numberOfGenerations ) { maxGen = conflictSet->maxGeneration; /* Create the number of generations that is needed to store information about * the remote generation */ for( it = 0; it < (sourceGeneration - maxGen); it++ ) { /* Create a new generation */ gen = ConflictSet_createNewGeneration( conflictSet ); __DEBUG(" Creating generation since gen %d is not available", sourceGeneration ); /* Set that the replica doesn't have any update on this generation */ gen->generationType[__conf.id] = GEN_NO_UPDATE; /* Perform conflict resolution if complete */ ConflictSet_checkGenerationComplete( conflictSet, gen ); } /* Insert the data into the newly created generation */ ConflictSet_setRemoteData( conflictSet, gen, sourceReplicaId, methodCallObject ); /* Set local info to NONE since the generation have been created */ gen->generationType[__conf.id] = GEN_NO_UPDATE; __DEBUG( "Inserting generation information into generation %d", gen->number ); /* Perform conflict resolution if complete */ ConflictSet_checkGenerationComplete( conflictSet, gen ); /* Send stabilization message for the generations that have * been created */ ConflictSet_notifyStabilization( conflictSet, gen->number ); } else { //__DEBUG( "MCO <%s> from replica %d with generation %d is not allowed, lowest is %d, highest is %d", // methodCallObject->methodName, sourceReplicaId, sourceGeneration, conflictSet->minGeneration, conflictSet->maxGeneration ); /* Notify about the failure */ failure = 1; } } } if( failure == 0 ) { __DEBUG( "Adding remote update from replica %d on generation %d", sourceReplicaId, conflictSet->maxGeneration ); } ConflictSet_showState( conflictSet ); __DEBUG( "Unlocking the conflict set for writing remote update" ); pthread_mutex_unlock( &conflictSet->writeLock ); }
int _deserialize_blockpairs_from_disk(int fd, struct block *b, struct hdr *hdr) { int r = NESS_ERR; uint32_t read_size; uint32_t align_size; struct buffer *rbuf; struct block_pair *pairs; read_size = hdr->blocksize; align_size = ALIGN(read_size); rbuf = buf_new(align_size); if (ness_os_pread(fd, rbuf->buf, align_size, hdr->blockoff) != (ssize_t)align_size) { r = NESS_READ_ERR; goto ERR; } if (!buf_seek(rbuf, read_size - CRC_SIZE)) goto ERR; uint32_t exp_xsum, act_xsum; if (!buf_getuint32(rbuf, &exp_xsum)) goto ERR; if (!buf_xsum(rbuf->buf, hdr->blocksize - CRC_SIZE, &act_xsum)) goto ERR; if (exp_xsum != act_xsum) { __ERROR("blockpairs xsum check error," "exp_xsum: [%" PRIu32 "]," "act_xsum: [%" PRIu32 "]", exp_xsum, act_xsum); goto ERR; } buf_seekfirst(rbuf); /* * skip magic with 8bytes */ if (!buf_skip(rbuf, 8)) goto ERR; uint32_t i; uint32_t block_count = 0U; if (!buf_getuint32(rbuf, &block_count)) goto ERR; pairs = xcalloc(block_count, sizeof(*pairs)); for (i = 0; i < block_count; i++) { if (!buf_getuint64(rbuf, &pairs[i].nid)) goto ERR1; if (!buf_getuint64(rbuf, &pairs[i].offset)) goto ERR1; if (!buf_getuint32(rbuf, &pairs[i].real_size)) goto ERR1; if (!buf_getuint32(rbuf, &pairs[i].skeleton_size)) goto ERR1; if (!buf_getuint32(rbuf, &pairs[i].height)) goto ERR1; pairs[i].used = 1; } if (block_count > 0) block_init(b, pairs, block_count); xfree(pairs); buf_free(rbuf); return NESS_OK; ERR: buf_free(rbuf); return r; ERR1: buf_free(rbuf); xfree(pairs); return r; }
void _flush_buffer_to_child(struct tree *t, struct node *child, struct nmb *buf) { struct mb_iter iter; mb_iter_init(&iter, buf->pma); while (mb_iter_next(&iter)) { /* TODO(BohuTANG): check msn */ struct nmb_values nvalues; nmb_get_values(&iter, &nvalues); struct bt_cmd cmd = { .msn = nvalues.msn, .type = nvalues.type, .key = &nvalues.key, .val = &nvalues.val, .xidpair = nvalues.xidpair }; node_put_cmd(t, child, &cmd); } } void _flush_some_child(struct tree *t, struct node *parent); /* * PROCESS: * - check child reactivity * - if FISSIBLE: split child * - if FLUSHBLE: flush buffer from child * ENTER: * - parent is already locked * - child is already locked * EXIT: * - parent is unlocked * - no nodes are locked */ void _child_maybe_reactivity(struct tree *t, struct node *parent, struct node *child) { enum reactivity re = get_reactivity(t, child); switch (re) { case STABLE: cache_unpin(t->cf, child->cpair, make_cpair_attr(child)); cache_unpin(t->cf, parent->cpair, make_cpair_attr(parent)); break; case FISSIBLE: node_split_child(t, parent, child); cache_unpin(t->cf, child->cpair, make_cpair_attr(child)); cache_unpin(t->cf, parent->cpair, make_cpair_attr(parent)); break; case FLUSHBLE: cache_unpin(t->cf, parent->cpair, make_cpair_attr(parent)); _flush_some_child(t, child); break; } } /* * PROCESS: * - pick a heaviest child of parent * - flush from parent to child * - maybe split/flush child recursively * ENTER: * - parent is already locked * EXIT: * - parent is unlocked * - no nodes are locked */ void _flush_some_child(struct tree *t, struct node *parent) { int childnum; enum reactivity re; struct node *child; struct partition *part; struct nmb *buffer; struct timespec t1, t2; childnum = node_find_heaviest_idx(parent); nassert(childnum < parent->n_children); part = &parent->parts[childnum]; buffer = part->ptr.u.nonleaf->buffer; if (cache_get_and_pin(t->cf, part->child_nid, (void**)&child, L_WRITE) != NESS_OK) { __ERROR("cache get node error, nid [%" PRIu64 "]", part->child_nid); return; } ngettime(&t1); re = get_reactivity(t, child); if (re == STABLE) { node_set_dirty(parent); part->ptr.u.nonleaf->buffer = nmb_new(t->e); _flush_buffer_to_child(t, child, buffer); nmb_free(buffer); } ngettime(&t2); status_add(&t->e->status->tree_flush_child_costs, time_diff_ms(t1, t2)); status_increment(&t->e->status->tree_flush_child_nums); _child_maybe_reactivity(t, parent, child); }
int read_hdr_from_disk(int fd, struct block *b, struct hdr **h, DISKOFF off) { int r = NESS_ERR; struct hdr *hdr = NULL; struct buffer *rbuf = NULL; uint32_t exp_xsum, act_xsum; uint32_t read_size, align_size; hdr = xcalloc(1, sizeof(*hdr)); read_size = ( + 8 /* magic */ + 8 /* last nid */ + 8 /* root nid */ + 4 /* version */ + 4 /* block size */ + 8 /* block offset */ + CRC_SIZE); /* checksum */ align_size = ALIGN(read_size); rbuf = buf_new(align_size); if (ness_os_pread(fd, rbuf->buf, align_size, off) != (ssize_t)align_size) { __ERROR("ness pread error, read size [%" PRIu32 "], " "offset [%" PRIu64 "]", align_size, 0UL); r = NESS_READ_ERR; goto ERR; } if (!buf_seek(rbuf, read_size - CRC_SIZE)) goto ERR; if (!buf_getuint32(rbuf, &exp_xsum)) goto ERR; if (!buf_xsum(rbuf->buf, read_size - CRC_SIZE, &act_xsum)) goto ERR; if (exp_xsum != act_xsum) { __ERROR("header xsum check error, " "exp_xsum: [%" PRIu32 "], " "act_xsum: [%" PRIu32 "], ", exp_xsum, act_xsum); r = NESS_HDR_XSUM_ERR; goto ERR; } buf_seekfirst(rbuf); if (!buf_skip(rbuf, 8)) goto ERR; if (!buf_getuint32(rbuf, &hdr->version)) goto ERR; if (!buf_getuint64(rbuf, &hdr->last_nid)) goto ERR; if (!buf_getuint64(rbuf, &hdr->root_nid)) goto ERR; if (!buf_getuint32(rbuf, &hdr->blocksize)) goto ERR; if (!buf_getuint64(rbuf, &hdr->blockoff)) goto ERR; nassert(hdr->root_nid >= NID_START); if (hdr->version < LAYOUT_MIN_SUPPORTED_VERSION) { r = NESS_LAYOUT_VERSION_OLDER_ERR; __ERROR("tree layout too older [%d], " "min_support_version [%d]", hdr->version, LAYOUT_MIN_SUPPORTED_VERSION); goto ERR; } /* block pairs */ r = _deserialize_blockpairs_from_disk(fd, b, hdr); if (r != NESS_OK) { r = NESS_DESERIAL_BLOCKPAIR_ERR; goto ERR; } *h = hdr; buf_free(rbuf); return NESS_OK; ERR: buf_free(rbuf); xfree(hdr); return r; }
int logr_read(struct logr *lgr, struct msg *k, struct msg *v, msgtype_t *t, uint32_t *tbn) { int r; uint32_t size = 0U; if (lgr->read < lgr->fsize) { char *base; uint32_t read; if (ness_os_read(lgr->fd, lgr->base, 4) != 4) { r = NESS_LOG_READ_SIZE_ERR; __ERROR(" log read size error, errno [%d]", r); goto ERR; } getuint32(lgr->base, &size); if (size > lgr->base_size) { lgr->base_size = size; lgr->base = xrealloc(lgr->base, lgr->base_size); } /* skip the length */ read = (size - 4); if (ness_os_read(lgr->fd, lgr->base, read) != read) { r = NESS_LOG_READ_DATA_ERR; __ERROR(" log read entry error, errno [%d]", r); goto ERR; } base = lgr->base; uint32_t exp_xsum; uint32_t act_xsum; getuint32(base + read - 4, &exp_xsum); buf_xsum(base, read - 4, &act_xsum); if (exp_xsum != act_xsum) { r = NESS_LOG_READ_XSUM_ERR; __ERROR("log read xsum error, exp_xsum [%" PRIu32 "],act_xsum [%" PRIu32 "]", exp_xsum, act_xsum); goto ERR; } int pos = 0; /* tbn */ getuint32(base + pos, tbn); pos += 4; /* key */ uint32_t fixsize; getuint32(base + pos, &fixsize); pos += 4; k->size = (fixsize >> 8); *t = (msgtype_t)(fixsize & 0xff); getnstr(base + pos, k->size, (char**)&k->data); pos += k->size; if (*t != MSG_DELETE) { /* value */ getuint32(base + pos, &v->size); pos += 4; getnstr(base + pos, v->size, (char**)&v->data); pos += v->size; } } else {