// Process one queue's batch requests. void* batch_process_queue(void* q_to_wait_on) { cf_queue* worker_queue = (cf_queue*)q_to_wait_on; batch_transaction btr; uint64_t start; while (1) { if (cf_queue_pop(worker_queue, &btr, CF_QUEUE_FOREVER) != 0) { cf_crash(AS_BATCH, "Failed to pop from batch worker queue."); } // Check for timeouts. if (btr.end_time != 0 && cf_getns() > btr.end_time) { cf_atomic_int_incr(&g_config.batch_timeout); if (btr.fd_h) { as_msg_send_reply(btr.fd_h, AS_PROTO_RESULT_FAIL_TIMEOUT, 0, 0, 0, 0, 0, 0, 0, btr.trid, NULL); btr.fd_h = 0; } batch_transaction_done(&btr); continue; } // Process batch request. start = cf_getns(); batch_process_request(&btr); histogram_insert_data_point(g_config.batch_q_process_hist, start); } return 0; }
/* * Code to init the fields in a transaction. Use this instead of memset. * * NB: DO NOT SHUFFLE INIT ORDER .. it is based on elements found in the * structure. */ void as_transaction_init(as_transaction *tr, cf_digest *keyd, cl_msg *msgp) { tr->msgp = msgp; if (keyd) { tr->keyd = *keyd; tr->preprocessed = true; } else { tr->keyd = cf_digest_zero; tr->preprocessed = false; } tr->flag = 0; tr->generation = 0; tr->result_code = AS_PROTO_RESULT_OK; tr->proto_fd_h = 0; tr->start_time = cf_getns(); tr->end_time = 0; AS_PARTITION_RESERVATION_INIT(tr->rsv); tr->microbenchmark_time = 0; tr->microbenchmark_is_resolve = false; tr->proxy_node = 0; tr->proxy_msg = 0; UREQ_DATA_INIT(&tr->udata); tr->batch_shared = 0; tr->batch_index = 0; tr->void_time = 0; }
// Process one queue's batch requests. static void batch_worker(void* udata) { batch_transaction* btr = (batch_transaction*)udata; // Check for timeouts. if (btr->end_time != 0 && cf_getns() > btr->end_time) { cf_atomic_int_incr(&g_config.batch_timeout); if (btr->fd_h) { as_msg_send_reply(btr->fd_h, AS_PROTO_RESULT_FAIL_TIMEOUT, 0, 0, 0, 0, 0, 0, 0, btr->trid, NULL); btr->fd_h = 0; } batch_transaction_done(btr, false); return; } // Process batch request. uint64_t start = cf_getns(); batch_process_request(btr); histogram_insert_data_point(g_config.batch_q_process_hist, start); }
//------------------------------------------------ // Insert a time interval data point. The interval // is time elapsed since start_ns, converted to // milliseconds or microseconds as appropriate. // Assumes start_ns was obtained via cf_getns() // some time ago. Generates a histogram with // either: // // bucket millisecond range // ------ ----------------- // 0 0 to 1 (more exactly, 0.999999) // 1 1 to 2 (more exactly, 1.999999) // 2 2 to 4 (more exactly, 3.999999) // 3 4 to 8 (more exactly, 7.999999) // 4 8 to 16 (more exactly, 15.999999) // etc. // // or: // // bucket microsecond range // ------ ----------------- // 0 0 to 1 (more exactly, 0.999) // 1 1 to 2 (more exactly, 1.999) // 2 2 to 4 (more exactly, 3.999) // 3 4 to 8 (more exactly, 7.999) // 4 8 to 16 (more exactly, 15.999) // etc. // void histogram_insert_data_point(histogram *h, uint64_t start_ns) { uint64_t end_ns = cf_getns(); uint64_t delta_t = (end_ns - start_ns) / h->time_div; int bucket = 0; if (delta_t != 0) { bucket = msb(delta_t); if (start_ns > end_ns) { // Either the clock went backwards, or wrapped. (Assume the former, // since it takes ~580 years from 0 to wrap.) cf_warning(AS_INFO, "clock went backwards: start %lu end %lu", start_ns, end_ns); bucket = 0; } } cf_atomic64_incr(&h->counts[bucket]); }
// Set of threads which talk to client over the connection for doing the needful // processing. Note that once fd is assigned to a thread all the work on that fd // is done by that thread. Fair fd usage is expected of the client. First thread // is special - also does accept [listens for new connections]. It is the only // thread which does it. void * thr_demarshal(void *arg) { cf_socket_cfg *s, *ls; // Create my epoll fd, register in the global list. struct epoll_event ev; int nevents, i, n, epoll_fd; cf_clock last_fd_print = 0; #if defined(USE_SYSTEMTAP) uint64_t nodeid = g_config.self_node; #endif // Early stage aborts; these will cause faults in process scope. cf_assert(arg, AS_DEMARSHAL, CF_CRITICAL, "invalid argument"); s = &g_config.socket; ls = &g_config.localhost_socket; #ifdef USE_JEM int orig_arena; if (0 > (orig_arena = jem_get_arena())) { cf_crash(AS_DEMARSHAL, "Failed to get original arena for thr_demarshal()!"); } else { cf_info(AS_DEMARSHAL, "Saved original JEMalloc arena #%d for thr_demarshal()", orig_arena); } #endif // Figure out my thread index. pthread_t self = pthread_self(); int thr_id; for (thr_id = 0; thr_id < MAX_DEMARSHAL_THREADS; thr_id++) { if (0 != pthread_equal(g_demarshal_args->dm_th[thr_id], self)) break; } if (thr_id == MAX_DEMARSHAL_THREADS) { cf_debug(AS_FABRIC, "Demarshal thread could not figure own ID, bogus, exit, fu!"); return(0); } // First thread accepts new connection at interface socket. if (thr_id == 0) { demarshal_file_handle_init(); epoll_fd = epoll_create(EPOLL_SZ); if (epoll_fd == -1) cf_crash(AS_DEMARSHAL, "epoll_create(): %s", cf_strerror(errno)); memset(&ev, 0, sizeof (ev)); ev.events = EPOLLIN | EPOLLERR | EPOLLHUP; ev.data.fd = s->sock; if (0 > epoll_ctl(epoll_fd, EPOLL_CTL_ADD, s->sock, &ev)) cf_crash(AS_DEMARSHAL, "epoll_ctl(): %s", cf_strerror(errno)); cf_info(AS_DEMARSHAL, "Service started: socket %s:%d", s->addr, s->port); if (ls->sock) { ev.events = EPOLLIN | EPOLLERR | EPOLLHUP; ev.data.fd = ls->sock; if (0 > epoll_ctl(epoll_fd, EPOLL_CTL_ADD, ls->sock, &ev)) cf_crash(AS_DEMARSHAL, "epoll_ctl(): %s", cf_strerror(errno)); cf_info(AS_DEMARSHAL, "Service also listening on localhost socket %s:%d", ls->addr, ls->port); } } else { epoll_fd = epoll_create(EPOLL_SZ); if (epoll_fd == -1) cf_crash(AS_DEMARSHAL, "epoll_create(): %s", cf_strerror(errno)); } g_demarshal_args->epoll_fd[thr_id] = epoll_fd; cf_detail(AS_DEMARSHAL, "demarshal thread started: id %d", thr_id); int id_cntr = 0; // Demarshal transactions from the socket. for ( ; ; ) { struct epoll_event events[EPOLL_SZ]; cf_detail(AS_DEMARSHAL, "calling epoll"); nevents = epoll_wait(epoll_fd, events, EPOLL_SZ, -1); if (0 > nevents) { cf_debug(AS_DEMARSHAL, "epoll_wait() returned %d ; errno = %d (%s)", nevents, errno, cf_strerror(errno)); } cf_detail(AS_DEMARSHAL, "epoll event received: nevents %d", nevents); uint64_t now_ns = cf_getns(); uint64_t now_ms = now_ns / 1000000; // Iterate over all events. for (i = 0; i < nevents; i++) { if ((s->sock == events[i].data.fd) || (ls->sock == events[i].data.fd)) { // Accept new connections on the service socket. int csocket = -1; struct sockaddr_in caddr; socklen_t clen = sizeof(caddr); char cpaddr[64]; if (-1 == (csocket = accept(events[i].data.fd, (struct sockaddr *)&caddr, &clen))) { // This means we're out of file descriptors - could be a SYN // flood attack or misbehaving client. Eventually we'd like // to make the reaper fairer, but for now we'll just have to // ignore the accept error and move on. if ((errno == EMFILE) || (errno == ENFILE)) { if (last_fd_print != (cf_getms() / 1000L)) { cf_info(AS_DEMARSHAL, " warning: hit OS file descript limit (EMFILE on accept), consider raising limit"); last_fd_print = cf_getms() / 1000L; } continue; } cf_crash(AS_DEMARSHAL, "accept: %s (errno %d)", cf_strerror(errno), errno); } // Get the client IP address in string form. if (caddr.sin_family == AF_INET) { if (NULL == inet_ntop(AF_INET, &caddr.sin_addr.s_addr, (char *)cpaddr, sizeof(cpaddr))) { cf_crash(AS_DEMARSHAL, "inet_ntop(): %s (errno %d)", cf_strerror(errno), errno); } } else if (caddr.sin_family == AF_INET6) { struct sockaddr_in6* addr_in6 = (struct sockaddr_in6*)&caddr; if (NULL == inet_ntop(AF_INET6, &addr_in6->sin6_addr, (char *)cpaddr, sizeof(cpaddr))) { cf_crash(AS_DEMARSHAL, "inet_ntop(): %s (errno %d)", cf_strerror(errno), errno); } } else { cf_crash(AS_DEMARSHAL, "unknown address family %u", caddr.sin_family); } cf_detail(AS_DEMARSHAL, "new connection: %s (fd %d)", cpaddr, csocket); // Validate the limit of protocol connections we allow. uint32_t conns_open = g_config.proto_connections_opened - g_config.proto_connections_closed; if (conns_open > g_config.n_proto_fd_max) { if ((last_fd_print + 5000L) < cf_getms()) { // no more than 5 secs cf_warning(AS_DEMARSHAL, "dropping incoming client connection: hit limit %d connections", conns_open); last_fd_print = cf_getms(); } shutdown(csocket, SHUT_RDWR); close(csocket); csocket = -1; continue; } // Set the socket to nonblocking. if (-1 == cf_socket_set_nonblocking(csocket)) { cf_info(AS_DEMARSHAL, "unable to set client socket to nonblocking mode"); shutdown(csocket, SHUT_RDWR); close(csocket); csocket = -1; continue; } // Create as_file_handle and queue it up in epoll_fd for further // communication on one of the demarshal threads. as_file_handle *fd_h = cf_rc_alloc(sizeof(as_file_handle)); if (!fd_h) { cf_crash(AS_DEMARSHAL, "malloc"); } sprintf(fd_h->client, "%s:%d", cpaddr, ntohs(caddr.sin_port)); fd_h->fd = csocket; fd_h->last_used = cf_getms(); fd_h->reap_me = false; fd_h->trans_active = false; fd_h->proto = 0; fd_h->proto_unread = 0; fd_h->fh_info = 0; fd_h->security_filter = as_security_filter_create(); // Insert into the global table so the reaper can manage it. Do // this before queueing it up for demarshal threads - once // EPOLL_CTL_ADD is done it's difficult to back out (if insert // into global table fails) because fd state could be anything. cf_rc_reserve(fd_h); pthread_mutex_lock(&g_file_handle_a_LOCK); int j; bool inserted = true; if (0 != cf_queue_pop(g_freeslot, &j, CF_QUEUE_NOWAIT)) { inserted = false; } else { g_file_handle_a[j] = fd_h; } pthread_mutex_unlock(&g_file_handle_a_LOCK); if (!inserted) { cf_info(AS_DEMARSHAL, "unable to add socket to file handle table"); shutdown(csocket, SHUT_RDWR); close(csocket); csocket = -1; cf_rc_free(fd_h); // will free even with ref-count of 2 } else { // Place the client socket in the event queue. memset(&ev, 0, sizeof(ev)); ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP ; ev.data.ptr = fd_h; // Round-robin pick up demarshal thread epoll_fd and add // this new connection to epoll. int id; while (true) { id = (id_cntr++) % g_demarshal_args->num_threads; if (g_demarshal_args->epoll_fd[id] != 0) { break; } } fd_h->epoll_fd = g_demarshal_args->epoll_fd[id]; if (0 > (n = epoll_ctl(fd_h->epoll_fd, EPOLL_CTL_ADD, csocket, &ev))) { cf_info(AS_DEMARSHAL, "unable to add socket to event queue of demarshal thread %d %d", id, g_demarshal_args->num_threads); pthread_mutex_lock(&g_file_handle_a_LOCK); fd_h->reap_me = true; as_release_file_handle(fd_h); fd_h = 0; pthread_mutex_unlock(&g_file_handle_a_LOCK); } else { cf_atomic_int_incr(&g_config.proto_connections_opened); } } } else { bool has_extra_ref = false; as_file_handle *fd_h = events[i].data.ptr; if (fd_h == 0) { cf_info(AS_DEMARSHAL, "event with null handle, continuing"); goto NextEvent; } cf_detail(AS_DEMARSHAL, "epoll connection event: fd %d, events 0x%x", fd_h->fd, events[i].events); // Process data on an existing connection: this might be more // activity on an already existing transaction, so we have some // state to manage. as_proto *proto_p = 0; int fd = fd_h->fd; if (events[i].events & (EPOLLRDHUP | EPOLLERR | EPOLLHUP)) { cf_detail(AS_DEMARSHAL, "proto socket: remote close: fd %d event %x", fd, events[i].events); // no longer in use: out of epoll etc goto NextEvent_FD_Cleanup; } if (fd_h->trans_active) { goto NextEvent; } // If pointer is NULL, then we need to create a transaction and // store it in the buffer. if (fd_h->proto == NULL) { as_proto proto; int sz; /* Get the number of available bytes */ if (-1 == ioctl(fd, FIONREAD, &sz)) { cf_info(AS_DEMARSHAL, "unable to get number of available bytes"); goto NextEvent_FD_Cleanup; } // If we don't have enough data to fill the message buffer, // just wait and we'll come back to this one. However, we'll // let messages with zero size through, since they are // likely errors. We don't cleanup the FD in this case since // we'll get more data on it. if (sz < sizeof(as_proto) && sz != 0) { goto NextEvent; } // Do a preliminary read of the header into a stack- // allocated structure, so that later on we can allocate the // entire message buffer. if (0 >= (n = cf_socket_recv(fd, &proto, sizeof(as_proto), MSG_WAITALL))) { cf_detail(AS_DEMARSHAL, "proto socket: read header fail: error: rv %d sz was %d errno %d", n, sz, errno); goto NextEvent_FD_Cleanup; } if (proto.version != PROTO_VERSION && // For backward compatibility, allow version 0 with // security messages. ! (proto.version == 0 && proto.type == PROTO_TYPE_SECURITY)) { cf_warning(AS_DEMARSHAL, "proto input from %s: unsupported proto version %u", fd_h->client, proto.version); goto NextEvent_FD_Cleanup; } // Swap the necessary elements of the as_proto. as_proto_swap(&proto); if (proto.sz > PROTO_SIZE_MAX) { cf_warning(AS_DEMARSHAL, "proto input from %s: msg greater than %d, likely request from non-Aerospike client, rejecting: sz %"PRIu64, fd_h->client, PROTO_SIZE_MAX, proto.sz); goto NextEvent_FD_Cleanup; } #ifdef USE_JEM // Attempt to peek the namespace and set the JEMalloc arena accordingly. size_t peeked_data_sz = 0; size_t min_field_sz = sizeof(uint32_t) + sizeof(char); size_t min_as_msg_sz = sizeof(as_msg) + min_field_sz; size_t peekbuf_sz = 2048; // (Arbitrary "large enough" size for peeking the fields of "most" AS_MSGs.) uint8_t peekbuf[peekbuf_sz]; if (PROTO_TYPE_AS_MSG == proto.type) { size_t offset = sizeof(as_msg); // Number of bytes to peek from the socket. // size_t peek_sz = peekbuf_sz; // Peak up to the size of the peek buffer. size_t peek_sz = MIN(proto.sz, peekbuf_sz); // Peek only up to the minimum necessary number of bytes. if (!(peeked_data_sz = cf_socket_recv(fd, peekbuf, peek_sz, 0))) { // That's actually legitimate. The as_proto may have gone into one // packet, the as_msg into the next one, which we haven't yet received. // This just "never happened" without async. cf_detail(AS_DEMARSHAL, "could not peek the as_msg header, expected %zu byte(s)", peek_sz); } if (peeked_data_sz > min_as_msg_sz) { // cf_debug(AS_DEMARSHAL, "(Peeked %zu bytes.)", peeked_data_sz); if (peeked_data_sz > proto.sz) { cf_warning(AS_DEMARSHAL, "Received unexpected extra data from client %s socket %d when peeking as_proto!", fd_h->client, fd); log_as_proto_and_peeked_data(&proto, peekbuf, peeked_data_sz); goto NextEvent_FD_Cleanup; } if (((as_msg*)peekbuf)->info1 & AS_MSG_INFO1_BATCH) { jem_set_arena(orig_arena); } else { uint16_t n_fields = ntohs(((as_msg *) peekbuf)->n_fields), field_num = 0; bool found = false; // cf_debug(AS_DEMARSHAL, "Found %d AS_MSG fields", n_fields); while (!found && (field_num < n_fields)) { as_msg_field *field = (as_msg_field *) (&peekbuf[offset]); uint32_t value_sz = ntohl(field->field_sz) - 1; // cf_debug(AS_DEMARSHAL, "Field #%d offset: %lu", field_num, offset); // cf_debug(AS_DEMARSHAL, "\tvalue_sz %u", value_sz); // cf_debug(AS_DEMARSHAL, "\ttype %d", field->type); if (AS_MSG_FIELD_TYPE_NAMESPACE == field->type) { if (value_sz >= AS_ID_NAMESPACE_SZ) { cf_warning(AS_DEMARSHAL, "namespace too long (%u) in as_msg", value_sz); goto NextEvent_FD_Cleanup; } char ns[AS_ID_NAMESPACE_SZ]; found = true; memcpy(ns, field->data, value_sz); ns[value_sz] = '\0'; // cf_debug(AS_DEMARSHAL, "Found ns \"%s\" in field #%d.", ns, field_num); jem_set_arena(as_namespace_get_jem_arena(ns)); } else { // cf_debug(AS_DEMARSHAL, "Message field %d is not namespace (type %d) ~~ Reading next field", field_num, field->type); field_num++; offset += sizeof(as_msg_field) + value_sz; if (offset >= peeked_data_sz) { break; } } } if (!found) { cf_warning(AS_DEMARSHAL, "Can't get namespace from AS_MSG (peeked %zu bytes) ~~ Using default thr_demarshal arena.", peeked_data_sz); jem_set_arena(orig_arena); } } } else { jem_set_arena(orig_arena); } } else { jem_set_arena(orig_arena); } #endif // Allocate the complete message buffer. proto_p = cf_malloc(sizeof(as_proto) + proto.sz); cf_assert(proto_p, AS_DEMARSHAL, CF_CRITICAL, "allocation: %zu %s", (sizeof(as_proto) + proto.sz), cf_strerror(errno)); memcpy(proto_p, &proto, sizeof(as_proto)); #ifdef USE_JEM // Jam in the peeked data. if (peeked_data_sz) { memcpy(proto_p->data, &peekbuf, peeked_data_sz); } fd_h->proto_unread = proto_p->sz - peeked_data_sz; #else fd_h->proto_unread = proto_p->sz; #endif fd_h->proto = (void *) proto_p; } else { proto_p = fd_h->proto; } if (fd_h->proto_unread > 0) { // Read the data. n = cf_socket_recv(fd, proto_p->data + (proto_p->sz - fd_h->proto_unread), fd_h->proto_unread, 0); if (0 >= n) { if (errno == EAGAIN) { continue; } cf_info(AS_DEMARSHAL, "receive socket: fail? n %d errno %d %s closing connection.", n, errno, cf_strerror(errno)); goto NextEvent_FD_Cleanup; } // Decrement bytes-unread counter. cf_detail(AS_DEMARSHAL, "read fd %d (%d %d)", fd, n, fd_h->proto_unread); fd_h->proto_unread -= n; } // Check for a finished read. if (0 == fd_h->proto_unread) { // It's only really live if it's injecting a transaction. fd_h->last_used = now_ms; thr_demarshal_pause(fd_h); // pause reading while the transaction is in progress fd_h->proto = 0; fd_h->proto_unread = 0; // INIT_TR as_transaction tr; as_transaction_init(&tr, NULL, (cl_msg *)proto_p); cf_rc_reserve(fd_h); has_extra_ref = true; tr.proto_fd_h = fd_h; tr.start_time = now_ns; // set transaction start time tr.preprocessed = false; if (! as_proto_is_valid_type(proto_p)) { cf_warning(AS_DEMARSHAL, "unsupported proto message type %u", proto_p->type); // We got a proto message type we don't recognize, so it // may not do any good to send back an as_msg error, but // it's the best we can do. At least we can keep the fd. as_transaction_demarshal_error(&tr, AS_PROTO_RESULT_FAIL_UNKNOWN); cf_atomic_int_incr(&g_config.proto_transactions); goto NextEvent; } if (g_config.microbenchmarks) { histogram_insert_data_point(g_config.demarshal_hist, now_ns); tr.microbenchmark_time = cf_getns(); } // Check if it's compressed. if (tr.msgp->proto.type == PROTO_TYPE_AS_MSG_COMPRESSED) { // Decompress it - allocate buffer to hold decompressed // packet. uint8_t *decompressed_buf = NULL; size_t decompressed_buf_size = 0; int rv = 0; if ((rv = as_packet_decompression((uint8_t *)proto_p, &decompressed_buf, &decompressed_buf_size))) { cf_warning(AS_DEMARSHAL, "as_proto decompression failed! (rv %d)", rv); cf_warning_binary(AS_DEMARSHAL, proto_p, sizeof(as_proto) + proto_p->sz, CF_DISPLAY_HEX_SPACED, "compressed proto_p"); as_transaction_demarshal_error(&tr, AS_PROTO_RESULT_FAIL_UNKNOWN); cf_atomic_int_incr(&g_config.proto_transactions); goto NextEvent; } // Count the packets. cf_atomic_int_add(&g_config.stat_compressed_pkts_received, 1); // Free the compressed packet since we'll be using the // decompressed packet from now on. cf_free(proto_p); proto_p = NULL; // Get original packet. tr.msgp = (cl_msg *)decompressed_buf; as_proto_swap(&(tr.msgp->proto)); if (! as_proto_wrapped_is_valid(&tr.msgp->proto, decompressed_buf_size)) { cf_warning(AS_DEMARSHAL, "decompressed unusable proto: version %u, type %u, sz %lu [%lu]", tr.msgp->proto.version, tr.msgp->proto.type, tr.msgp->proto.sz, decompressed_buf_size); as_transaction_demarshal_error(&tr, AS_PROTO_RESULT_FAIL_UNKNOWN); cf_atomic_int_incr(&g_config.proto_transactions); goto NextEvent; } } // Security protocol transactions. if (tr.msgp->proto.type == PROTO_TYPE_SECURITY) { as_security_transact(&tr); cf_atomic_int_incr(&g_config.proto_transactions); goto NextEvent; } // Info protocol requests. if (tr.msgp->proto.type == PROTO_TYPE_INFO) { if (as_info(&tr)) { cf_warning(AS_DEMARSHAL, "Info request failed to be enqueued ~~ Freeing protocol buffer"); goto NextEvent_FD_Cleanup; } cf_atomic_int_incr(&g_config.proto_transactions); goto NextEvent; } ASD_TRANS_DEMARSHAL(nodeid, (uint64_t) tr.msgp); // Fast path for batch requests. if (tr.msgp->msg.info1 & AS_MSG_INFO1_BATCH) { as_batch_queue_task(&tr); cf_atomic_int_incr(&g_config.proto_transactions); goto NextEvent; } // Either process the transaction directly in this thread, // or queue it for processing by another thread (tsvc/info). if (0 != thr_tsvc_process_or_enqueue(&tr)) { cf_warning(AS_DEMARSHAL, "Failed to queue transaction to the service thread"); goto NextEvent_FD_Cleanup; } else { cf_atomic_int_incr(&g_config.proto_transactions); } } // Jump the proto message free & FD cleanup. If we get here, the // above operations went smoothly. The message free & FD cleanup // job is handled elsewhere as directed by // thr_tsvc_process_or_enqueue(). goto NextEvent; NextEvent_FD_Cleanup: // If we allocated memory for the incoming message, free it. if (proto_p) { cf_free(proto_p); fd_h->proto = 0; } // If fd has extra reference for transaction, release it. if (has_extra_ref) { cf_rc_release(fd_h); } // Remove the fd from the events list. if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, 0) < 0) { cf_crash(AS_DEMARSHAL, "unable to remove socket FD %d from epoll instance FD %d: %d (%s)", fd, epoll_fd, errno, cf_strerror(errno)); } pthread_mutex_lock(&g_file_handle_a_LOCK); fd_h->reap_me = true; as_release_file_handle(fd_h); fd_h = 0; pthread_mutex_unlock(&g_file_handle_a_LOCK); NextEvent: ; } // We should never be canceled externally, but just in case... pthread_testcancel(); } } return NULL; }
/* Create an internal transaction. * parameters: * tr_create_data : Details for creating transaction * tr : to be filled * * return : 0 on success * -1 on failure */ int as_transaction_create_internal(as_transaction *tr, tr_create_data * trc_data) { tr_create_data * d = (tr_create_data*) trc_data; // Get Defensive memset(tr, 0, sizeof(as_transaction)); if (d->fd_h) { cf_warning(AS_PROTO, "Foreground Internal Transation .. Ignoring"); return -1; } else { tr->proto_fd_h = NULL; } tr->start_time = cf_getns(); tr->flag = AS_TRANSACTION_FLAG_INTERNAL; tr->keyd = d->digest; tr->preprocessed = true; tr->result_code = AS_PROTO_RESULT_OK; AS_PARTITION_RESERVATION_INIT(tr->rsv); UREQ_DATA_INIT(&tr->udata); // Get namespace and set lengths. int ns_len = strlen(d->ns->name); int set_len = strlen(d->set); // Figure out the size of the message. size_t msg_sz = sizeof(cl_msg); msg_sz += sizeof(as_msg_field) + ns_len; if (set_len != 0) { msg_sz += sizeof(as_msg_field) + set_len; } msg_sz += sizeof(as_msg_field) + sizeof(cf_digest); msg_sz += sizeof(as_msg_field) + sizeof(d->trid); // Udf call structure will go as a part of the transaction udata. // Do not pack it in the message. cf_debug(AS_PROTO, "UDF : Msg size for internal transaction is %d", msg_sz); // Allocate space in the buffer. uint8_t * buf = cf_malloc(msg_sz); memset(buf, 0, msg_sz); if (!buf) { return -1; } uint8_t * buf_r = buf; // Calculation of number of fields: // n_fields = ( ns ? 1 : 0 ) + (set ? 1 : 0) + (digest ? 1 : 0) + (trid ? 1 : 0) + (call ? 3 : 0); // Write the header in case the request gets proxied. Is it enough ?? buf = as_msg_write_header(buf, msg_sz, 0, d->msg_type, 0, 0, 0, 0, 2 /*n_fields*/, 0); buf = as_msg_write_fields(buf, d->ns->name, ns_len, d->set, set_len, &(d->digest), 0, 0 , 0, 0); tr->msgp = (cl_msg *) buf_r; return 0; }