cf_queue_priority *cf_queue_priority_create(size_t element_sz, bool threadsafe) { cf_queue_priority *q = (cf_queue_priority*)cf_malloc(sizeof(cf_queue_priority)); if (! q) { return NULL; } q->threadsafe = threadsafe; if (! (q->low_q = cf_queue_create(element_sz, false))) { goto Fail1; } if (! (q->medium_q = cf_queue_create(element_sz, false))) { goto Fail2; } if (! (q->high_q = cf_queue_create(element_sz, false))) { goto Fail3; } if (! threadsafe) { return q; } if (0 != pthread_mutex_init(&q->LOCK, NULL)) { goto Fail4; } if (0 != pthread_cond_init(&q->CV, NULL)) { goto Fail5; } return q; Fail5: pthread_mutex_destroy(&q->LOCK); Fail4: cf_queue_destroy(q->high_q); Fail3: cf_queue_destroy(q->medium_q); Fail2: cf_queue_destroy(q->low_q); Fail1: cf_free(q); return NULL; }
void as_netio_init() { g_netio_queue = cf_queue_create(sizeof(as_netio), true); if (!g_netio_queue) cf_crash(AS_PROTO, "Failed to create netio queue"); if (pthread_create(&g_netio_th, NULL, as_netio_th, (void *)g_netio_queue)) cf_crash(AS_PROTO, "Failed to create netio thread"); g_netio_slow_queue = cf_queue_create(sizeof(as_netio), true); if (!g_netio_slow_queue) cf_crash(AS_PROTO, "Failed to create netio slow queue"); if (pthread_create(&g_netio_slow_th, NULL, as_netio_th, (void *)g_netio_slow_queue)) cf_crash(AS_PROTO, "Failed to create netio slow thread"); }
as_node* as_node_create(as_cluster* cluster, const char* name, struct sockaddr_in* addr) { as_node* node = cf_malloc(sizeof(as_node)); if (!node) { return 0; } node->ref_count = 1; node->partition_generation = 0xFFFFFFFF; node->cluster = cluster; strcpy(node->name, name); node->address_index = 0; as_vector_init(&node->addresses, sizeof(as_address), 2); as_node_add_address(node, addr); node->conn_q = cf_queue_create(sizeof(int), true); // node->conn_q_asyncfd = cf_queue_create(sizeof(int), true); // node->asyncwork_q = cf_queue_create(sizeof(cl_async_work*), true); node->info_fd = -1; node->friends = 0; node->failures = 0; node->index = 0; node->active = true; return node; }
cf_queue_priority * cf_queue_priority_create(size_t elementsz, bool threadsafe) { cf_queue_priority *q = (cf_queue_priority*)malloc(sizeof(cf_queue_priority)); if (!q) return(0); q->threadsafe = threadsafe; q->low_q = cf_queue_create(elementsz, false); if (!q->low_q) goto Fail1; q->medium_q = cf_queue_create(elementsz, false); if (!q->medium_q) goto Fail2; q->high_q = cf_queue_create(elementsz, false); if (!q->high_q) goto Fail3; if (threadsafe == false) return(q); #ifdef EXTERNAL_LOCKS q->LOCK = cf_hooked_mutex_alloc(); if (!q->LOCK ) goto Fail5; #else if (0 != pthread_mutex_init(&q->LOCK, NULL)) goto Fail4; if (0 != pthread_cond_init(&q->CV, NULL)) goto Fail5; #endif // EXTERNAL_LOCKS return(q); Fail5: #ifdef EXTERNAL_LOCKS cf_hooked_mutex_free(q->LOCK); #else pthread_mutex_destroy(&q->LOCK); Fail4: #endif // EXTERNAL_LOCKS cf_queue_destroy(q->high_q); Fail3: cf_queue_destroy(q->medium_q); Fail2: cf_queue_destroy(q->low_q); Fail1: free(q); return(0); }
cf_queue_priority * cf_queue_priority_create(size_t elementsz, bool threadsafe) { cf_queue_priority *q = malloc(sizeof(cf_queue_priority)); if (!q) return(0); q->threadsafe = threadsafe; q->low_q = cf_queue_create(elementsz, false); if (!q->low_q) goto Fail1; q->medium_q = cf_queue_create(elementsz, false); if (!q->medium_q) goto Fail2; q->high_q = cf_queue_create(elementsz, false); if (!q->high_q) goto Fail3; if (threadsafe == false) return(q); if (0 != pthread_mutex_init(&q->LOCK, NULL)) goto Fail4; if (0 != pthread_cond_init(&q->CV, NULL)) goto Fail5; return(q); Fail5: pthread_mutex_destroy(&q->LOCK); Fail4: cf_queue_destroy(q->high_q); Fail3: cf_queue_destroy(q->medium_q); Fail2: cf_queue_destroy(q->low_q); Fail1: free(q); return(0); }
// Initialize batch queues and worker threads. void as_batch_init() { if (cf_atomic32_incr(&g_batch_init) != 1) { return; } cf_info(AS_BATCH, "Initialize %d batch worker threads.", g_config.n_batch_threads); g_batch_queue = cf_queue_create(sizeof(batch_transaction), true); int max = g_config.n_batch_threads; for (int i = 0; i < max; i++) { pthread_create(&g_batch_threads[i], 0, batch_process_queue, (void*)g_batch_queue); } }
/** * Initializes an cl_scan */ cl_scan * cl_scan_init(cl_scan * scan, const char * ns, const char * setname, uint64_t *job_id) { if ( scan == NULL ) return scan; cf_queue * result_queue = cf_queue_create(sizeof(void *), true); if ( !result_queue ) { scan->res_streamq = NULL; return scan; } scan->res_streamq = result_queue; scan->job_id = (cf_get_rand64())/2; *job_id = scan->job_id; scan->setname = setname == NULL ? NULL : strdup(setname); scan->ns = ns == NULL ? NULL : strdup(ns); cl_scan_params_init(&scan->params, NULL); cl_scan_udf_init(&scan->udf, CL_SCAN_UDF_NONE, NULL, NULL, NULL); return scan; }
static void create_async_info_queue() { int i; uintptr_t info; as_async_info_t *temp_info; async_info_queue = cf_queue_create(sizeof(uintptr_t), true); async_info_array = (as_async_info_t*)malloc(MAX_READ_REQS_QUEUED * sizeof(as_async_info_t)); if(async_info_array == NULL) { fprintf(stdout, "Error: Malloc info structs failed.\n Exiting. \n"); cf_queue_destroy(async_info_queue); exit(-1); } for(i = 0; i < MAX_READ_REQS_QUEUED; i++) { temp_info = async_info_array + i; info = (uintptr_t)temp_info; cf_queue_push(async_info_queue, (void*)&info); } }
int cf_queue_test_1() { pthread_t write_th; pthread_t read_th; cf_queue *q; q = cf_queue_create(sizeof(int), true); pthread_create( & write_th, 0, cf_queue_test_1_write, q); pthread_create( & read_th, 0, cf_queue_test_1_read, q); void *th_return; if (0 != pthread_join(write_th, &th_return)) { fprintf(stderr, "queue test 1: could not join1 %d\n",errno); return(-1); } if (0 != th_return) { fprintf(stderr, "queue test 1: returned error %p\n",th_return); return(-1); } if (0 != pthread_join(read_th, &th_return)) { fprintf(stderr, "queue test 1: could not join2 %d\n",errno); return(-1); } if (0 != th_return) { fprintf(stderr, "queue test 1: returned error 2 %p\n",th_return); return(-1); } cf_queue_destroy(q); return(0); }
int cl_cluster_scan_init(cl_cluster* asc) { // We do this lazily, during the first scan request, so make sure it's only // done once. if (cf_atomic32_incr(&asc->scan_initialized) > 1 || asc->scan_q) { return 0; } if (cf_debug_enabled()) { LOG("[DEBUG] cl_cluster_scan_init: creating %d threads\n", NUM_SCAN_THREADS); } // Create dispatch queue. asc->scan_q = cf_queue_create(sizeof(cl_scan_task), true); // Create thread pool. for (int i = 0; i < NUM_SCAN_THREADS; i++) { pthread_create(&asc->scan_threads[i], 0, cl_scan_worker, (void*)asc); } return 0; }
int main(int argc, char* argv[]) { signal(SIGSEGV, as_sig_handle_segv); signal(SIGTERM , as_sig_handle_term); fprintf(stdout, "\nAerospike act - device IO test\n"); fprintf(stdout, "Copyright 2011 by Aerospike. All rights reserved.\n\n"); if (! configure(argc, argv)) { exit(-1); } set_schedulers(); srand(time(NULL)); // rand_seed(g_rand_64_buffer); salter salters[g_num_write_buffers ? g_num_write_buffers : 1]; g_salters = salters; if (! create_salters()) { exit(-1); } device devices[g_num_devices]; readq readqs[g_num_queues]; g_devices = devices; g_readqs = readqs; // TODO - 'salt' drive? g_p_large_block_read_histogram = histogram_create(); g_p_large_block_write_histogram = histogram_create(); g_p_raw_read_histogram = histogram_create(); g_p_read_histogram = histogram_create(); g_run_start_us = cf_getus(); uint64_t run_stop_us = g_run_start_us + g_run_us; g_running = 1; for (int n = 0; n < g_num_devices; n++) { device* p_device = &g_devices[n]; p_device->name = g_device_names[n]; p_device->p_fd_queue = cf_queue_create(sizeof(int), true); discover_num_blocks(p_device); create_large_block_read_buffer(p_device); p_device->p_raw_read_histogram = histogram_create(); sprintf(p_device->histogram_tag, "%-18s", p_device->name); if (pthread_create(&p_device->large_block_read_thread, NULL, run_large_block_reads, (void*)p_device)) { fprintf(stdout, "ERROR: create large block read thread %d\n", n); exit(-1); } if (pthread_create(&p_device->large_block_write_thread, NULL, run_large_block_writes, (void*)p_device)) { fprintf(stdout, "ERROR: create write thread %d\n", n); exit(-1); } } for (int i = 0; i < g_num_queues; i++) { readq* p_readq = &g_readqs[i]; p_readq->p_req_queue = cf_queue_create(sizeof(readreq*), true); p_readq->threads = malloc(sizeof(pthread_t) * g_threads_per_queue); for (int j = 0; j < g_threads_per_queue; j++) { if (pthread_create(&p_readq->threads[j], NULL, run_reads, (void*)p_readq->p_req_queue)) { fprintf(stdout, "ERROR: create read thread %d:%d\n", i, j); exit(-1); } } } pthread_t thr_add_readreqs; if (pthread_create(&thr_add_readreqs, NULL, run_add_readreqs, NULL)) { fprintf(stdout, "ERROR: create thread thr_add_readreqs\n"); exit(-1); } fprintf(stdout, "\n"); uint64_t now_us; uint64_t count = 0; while ((now_us = cf_getus()) < run_stop_us && g_running) { count++; int sleep_us = (int) ((count * g_report_interval_us) - (now_us - g_run_start_us)); if (sleep_us > 0) { usleep((uint32_t)sleep_us); } fprintf(stdout, "After %" PRIu64 " sec:\n", (count * g_report_interval_us) / 1000000); fprintf(stdout, "read-reqs queued: %" PRIu64 "\n", cf_atomic_int_get(g_read_reqs_queued)); histogram_dump(g_p_large_block_read_histogram, "LARGE BLOCK READS "); histogram_dump(g_p_large_block_write_histogram, "LARGE BLOCK WRITES"); histogram_dump(g_p_raw_read_histogram, "RAW READS "); for (int d = 0; d < g_num_devices; d++) { histogram_dump(g_devices[d].p_raw_read_histogram, g_devices[d].histogram_tag); } histogram_dump(g_p_read_histogram, "READS "); fprintf(stdout, "\n"); fflush(stdout); } g_running = 0; void* pv_value; pthread_join(thr_add_readreqs, &pv_value); for (int i = 0; i < g_num_queues; i++) { readq* p_readq = &g_readqs[i]; for (int j = 0; j < g_threads_per_queue; j++) { pthread_join(p_readq->threads[j], &pv_value); } cf_queue_destroy(p_readq->p_req_queue); free(p_readq->threads); } for (int d = 0; d < g_num_devices; d++) { device* p_device = &g_devices[d]; pthread_join(p_device->large_block_read_thread, &pv_value); pthread_join(p_device->large_block_write_thread, &pv_value); fd_close_all(p_device); cf_queue_destroy(p_device->p_fd_queue); free(p_device->p_large_block_read_buffer); free(p_device->p_raw_read_histogram); } free(g_p_large_block_read_histogram); free(g_p_large_block_write_histogram); free(g_p_raw_read_histogram); free(g_p_read_histogram); destroy_salters(); return (0); }
cf_vector * cl_scan_execute(cl_cluster * cluster, const cl_scan * scan, char * node_name, cl_rv * res, int (* callback)(as_val *, void *), void * udata) { cl_rv rc = CITRUSLEAF_OK; uint8_t wr_stack_buf[STACK_BUF_SZ] = { 0 }; uint8_t * wr_buf = wr_stack_buf; size_t wr_buf_sz = sizeof(wr_stack_buf); int node_count = 0; cl_node_response response; rc = scan_compile(scan, &wr_buf, &wr_buf_sz); if ( rc != CITRUSLEAF_OK ) { LOG("[ERROR] cl_scan_execute: scan compile failed: \n"); *res = rc; return NULL; } // Setup worker cl_scan_task task = { .asc = cluster, .ns = scan->ns, .scan_buf = wr_buf, .scan_sz = wr_buf_sz, .udata = udata, .callback = callback, .job_id = scan->job_id, .type = scan->udf.type, }; task.complete_q = cf_queue_create(sizeof(cl_node_response), true); cf_vector * result_v = NULL; // If node_name is not null, we are executing scan on a particular node if (node_name) { // Copy the node name in the task and push it in the global scan queue. One task for each node strcpy(task.node_name, node_name); cf_queue_push(cluster->scan_q, &task); node_count = 1; } else { // Node name is NULL, we have to scan all nodes char *node_names = NULL; // Get a list of the node names, so we can can send work to each node cl_cluster_get_node_names(cluster, &node_count, &node_names); if ( node_count == 0 ) { LOG("[ERROR] cl_scan_execute: don't have any nodes?\n"); *res = CITRUSLEAF_FAIL_CLIENT; goto Cleanup; } // Dispatch work to the worker queue to allow the transactions in parallel // NOTE: if a new node is introduced in the middle, it is NOT taken care of node_name = node_names; for ( int i=0; i < node_count; i++ ) { // fill in per-request specifics strcpy(task.node_name, node_name); cf_queue_push(cluster->scan_q, &task); node_name += NODE_NAME_SIZE; } free(node_names); node_names = NULL; } // Wait for the work to complete from all the nodes. // For every node, fill in the return value in the result vector result_v = cf_vector_create(sizeof(cl_node_response), node_count, 0); for ( int i=0; i < node_count; i++ ) { // Pop the response structure cf_queue_pop(task.complete_q, &response, CF_QUEUE_FOREVER); cf_vector_append(result_v, &response); } Cleanup: if ( wr_buf && (wr_buf != wr_stack_buf) ) { free(wr_buf); wr_buf = 0; } cf_queue_destroy(task.complete_q); return result_v; } /** * Allocates and initializes a new cl_scan. */ cl_scan * cl_scan_new(const char * ns, const char * setname, uint64_t *job_id) { cl_scan * scan = (cl_scan*) malloc(sizeof(cl_scan)); memset(scan, 0, sizeof(cl_scan)); return cl_scan_init(scan, ns, setname, job_id); }
void ai_btree_init(void) { if (!g_q_dig_arr) { g_q_dig_arr = cf_queue_create(sizeof(void *), true); } }
int cfnl_sched_create_default_rnet(struct ccnl_sched_s *sched, int inter_packet_interval) { char name[32]; int law, k1, k2, e0; struct cf_molecule *s, *e, *es, *p; struct cf_reaction *r1, *r2; cf_time now; DEBUGMSG(TRACE, "%s()\n", __FUNCTION__); if (inter_packet_interval) { law = CF_LAW_MASS_ACTION; k1 = 100; k2 = 10; e0 = 1000000 / (k2 * inter_packet_interval); } else { law = CF_LAW_IMMEDIATE; k1 = 0; k2 = 0; e0 = 1; } // create reaction network sprintf(name, "%p", sched); sched->rn = cf_rnet_create(engine, name, cf_handle_null); if (!sched->rn) goto err_out; // create the queue abstraction sched->q = cf_queue_create(sched->rn, "Q", cf_handle_null); if (!sched->q) goto err_out; if (cf_queue_set_molecules_per_packet(sched->q, 1) || cf_queue_set_op_callback(sched->q, &qcb, sched)) goto err_out; // create molecules, and reactions s = cf_molecule_create(sched->rn, "S", cf_handle_null); e = cf_molecule_create(sched->rn, "E", cf_handle_null); es = cf_molecule_create(sched->rn, "ES", cf_handle_null); p = cf_molecule_create(sched->rn, "P", cf_handle_null); r1 = cf_reaction_create(sched->rn, "r1", cf_handle_null); r2 = cf_reaction_create(sched->rn, "r2", cf_handle_null); if (!s || !e || !es || !p || !r1 || !r2) goto err_out; // configure molecules and reactions if (cf_molecule_set_initial_concentration(e, e0) || cf_reaction_set_law(r1, law, cf_uint_to_dfp(k1), 0) || cf_reaction_add_reactant(r1, s) || cf_reaction_add_reactant(r1, e) || cf_reaction_add_product(r1, es) || cf_reaction_set_law(r2, law, cf_uint_to_dfp(k2), 0) || cf_reaction_add_reactant(r2, es) || cf_reaction_add_product(r2, e) || cf_reaction_add_product(r2, p)) goto err_out; /* set workbench position */ cf_object_set_pos(&s->obj, 10, 170); cf_object_set_pos(&e->obj, 220, 90); cf_object_set_pos(&es->obj, 220, 170); cf_object_set_pos(&p->obj, 430, 170); cf_object_set_pos(&r1->obj, 130, 170); cf_object_set_pos(&r2->obj, 320, 170); cf_object_set_pos(&sched->q->obj, 220, 280); // link queue to input and output molecule if (cf_queue_set_input_molecule(sched->q, s) || cf_queue_set_output_molecule(sched->q, p)) goto err_out; // prevent destruction of the created reaction network, queue and // linked molecules by the user via the chemflow configuration interface sched->rn->obj.destroylock = 1; sched->q->obj.destroylock = 1; s->obj.destroylock = 1; p->obj.destroylock = 1; now = ccnl_cf_now(); cf_rnet_reset(sched->rn, now); cf_engine_reschedule_and_set_timer(engine, now); return 0; err_out: // destroy reaction network and all its children if (sched->rn) { cf_rnet_destroy(sched->rn); sched->rn = NULL; sched->q = NULL; } return -1; }
int main(int argc, char* argv[]) { signal(SIGSEGV, as_sig_handle_segv); signal(SIGTERM, as_sig_handle_term); fprintf(stdout, "\nAerospike act - device IO test\n"); fprintf(stdout, "Copyright 2011 by Aerospike. All rights reserved.\n\n"); if (! configure(argc, argv)) { exit(-1); } set_schedulers(); srand(time(NULL)); // rand_seed(g_rand_64_buffer); salter salters[g_num_write_buffers ? g_num_write_buffers : 1]; g_salters = salters; if (! create_salters()) { exit(-1); } device devices[g_num_devices]; g_devices = devices; g_p_large_block_read_histogram = histogram_create(); g_p_large_block_write_histogram = histogram_create(); g_p_raw_read_histogram = histogram_create(); g_p_read_histogram = histogram_create(); g_run_start_ms = cf_getms(); uint64_t run_stop_ms = g_run_start_ms + g_run_ms; g_running = 1; int n; for (n = 0; n < g_num_devices; n++) { device* p_device = &g_devices[n]; p_device->name = g_device_names[n]; p_device->p_fd_queue = cf_queue_create(sizeof(int), true); discover_num_blocks(p_device); create_large_block_read_buffer(p_device); p_device->p_raw_read_histogram = histogram_create(); sprintf(p_device->histogram_tag, "%-18s", p_device->name); if (pthread_create(&p_device->large_block_read_thread, NULL, run_large_block_reads, (void*)p_device)) { fprintf(stdout, "Error: create large block read thread %d\n", n); exit(-1); } if (pthread_create(&p_device->large_block_write_thread, NULL, run_large_block_writes, (void*)p_device)) { fprintf(stdout, "Error: create write thread %d\n", n); exit(-1); } } aio_context_t aio_context = 0; if(io_setup(MAXEVENTS, &aio_context) != 0) { fprintf(stdout, "Error: AIO context not set up \n"); exit(-1); } create_async_info_queue(); /* read events generating thread */ pthread_t read_generator; if (pthread_create(&read_generator, NULL, &generate_async_reads, (void*)&aio_context)) { fprintf(stdout, "Error: create read generator thread\n"); exit(-1); } /* Create the worker threads */ pthread_t workers[g_worker_threads]; int j; for (j = 0; j < g_worker_threads; j++) { if (pthread_create(&workers[j], NULL, &worker_func , (void *)(&aio_context))) { fprintf(stdout, "Error: creating worker thread %d failed\n", j); exit(-1); } } fprintf(stdout, "\n"); uint64_t now_ms; uint64_t time_count = 0; int nanosleep_ret = -1; struct timespec initial,remaining; while ((now_ms = cf_getms()) < run_stop_ms && g_running) { time_count++; int sleep_ms = (int) ((time_count * g_report_interval_ms) - (now_ms - g_run_start_ms)); if (sleep_ms > 0) { initial.tv_sec = sleep_ms / 1000; initial.tv_nsec = (sleep_ms % 1000) * 1000000; retry: memset(&remaining, 0, sizeof(remaining)); nanosleep_ret = nanosleep(&initial, &remaining); if(nanosleep_ret == -1 && errno == EINTR) { /* Interrupted by a signal */ initial.tv_sec = remaining.tv_sec; initial.tv_nsec = remaining.tv_nsec; goto retry; } } fprintf(stdout, "After %" PRIu64 " sec:\n", (time_count * g_report_interval_ms) / 1000); fprintf(stdout, "read-reqs queued: %" PRIu64 "\n", cf_atomic_int_get(g_read_reqs_queued)); histogram_dump(g_p_large_block_read_histogram, "LARGE BLOCK READS "); histogram_dump(g_p_large_block_write_histogram, "LARGE BLOCK WRITES"); histogram_dump(g_p_raw_read_histogram, "RAW READS "); int d; for (d = 0; d < g_num_devices; d++) { histogram_dump(g_devices[d].p_raw_read_histogram, g_devices[d].histogram_tag); } histogram_dump(g_p_read_histogram, "READS "); fprintf(stdout, "\n"); fflush(stdout); } fprintf(stdout, "\nTEST COMPLETED \n"); g_running = 0; int i; //TODO aio_destroy? /* Freeing resources used by async */ void* ret_value; for (i = 0; i < g_worker_threads; i++) { pthread_join(workers[i], &ret_value); } destroy_async_info_queue(); int d; for (d = 0; d < g_num_devices; d++) { device* p_device = &g_devices[d]; pthread_join(p_device->large_block_read_thread, &ret_value); pthread_join(p_device->large_block_write_thread, &ret_value); fd_close_all(p_device); cf_queue_destroy(p_device->p_fd_queue); free(p_device->p_large_block_read_buffer); free(p_device->p_raw_read_histogram); } free(g_p_large_block_read_histogram); free(g_p_large_block_write_histogram); free(g_p_raw_read_histogram); free(g_p_read_histogram); destroy_salters(); return (0); }
//Same as do_the_full_monte, but only till the command is sent to the node. //Most of the code is duplicated. Bad. int cl_do_async_monte(cl_cluster *asc, int info1, int info2, const char *ns, const char *set, const cl_object *key, const cf_digest *digest, cl_bin **values, cl_operator operator, cl_operation **operations, int *n_values, uint32_t *cl_gen, const cl_write_parameters *cl_w_p, uint64_t *trid, void *udata) { cl_async_work *workitem = NULL; uint8_t wr_stack_buf[STACK_BUF_SZ]; uint8_t *wr_buf = wr_stack_buf; size_t wr_buf_sz = sizeof(wr_stack_buf); int progress_timeout_ms; uint64_t deadline_ms; uint64_t starttime, endtime; bool network_error; int fd = -1; int rv = CITRUSLEAF_FAIL_CLIENT; //Assume that this is a failure; // as_msg msg; cf_digest d_ret; cl_cluster_node *node = 0; #if ONEASYNCFD if (shash_get_size(g_cl_async_hashtab) >= g_async_h_szlimit) { //cf_error("Async hashtab is full. Cannot insert any more elements"); return CITRUSLEAF_FAIL_ASYNCQ_FULL; } #else //If the async buffer is at the max limit, do not entertain more requests. if (cf_queue_sz(g_cl_async_q) >= cf_atomic32_get(g_async_q_szlimit)) { //cf_error("Async buffer is full. Cannot insert any more elements"); return CITRUSLEAF_FAIL_ASYNCQ_FULL; } #endif //Allocate memory for work item that will be added to the async work list if (cf_queue_sz(g_cl_workitems_freepool_q) > 0) { cf_queue_pop(g_cl_workitems_freepool_q, &workitem, CF_QUEUE_FOREVER); } else { workitem = malloc(sizeof(cl_async_work)); if (workitem == NULL) { return CITRUSLEAF_FAIL_CLIENT; } } //Compile the write buffer to be sent to the cluster if (n_values && ( values || operations) ){ cl_compile(info1, info2, 0, ns, set, key, digest, values?*values:NULL, operator, operations?*operations:NULL, *n_values , &wr_buf, &wr_buf_sz, cl_w_p, &d_ret, *trid,NULL,NULL, 0 /*udf_type*/); }else{ cl_compile(info1, info2, 0, ns, set, key, digest, 0, 0, 0, 0, &wr_buf, &wr_buf_sz, cl_w_p, &d_ret, *trid,NULL,NULL, 0 /*udf_type*/); } deadline_ms = 0; progress_timeout_ms = 0; if (cl_w_p && cl_w_p->timeout_ms) { deadline_ms = cf_getms() + cl_w_p->timeout_ms; // policy: if asking for a long timeout, give enough time to try twice if (cl_w_p->timeout_ms > 700) { progress_timeout_ms = cl_w_p->timeout_ms / 2; } else { progress_timeout_ms = cl_w_p->timeout_ms; } } else { progress_timeout_ms = g_async_nw_progress_timeout; } //Initialize the async work unit workitem->trid = *trid; workitem->deadline = deadline_ms; workitem->starttime = cf_getms(); workitem->udata = udata; as_msg *msgp; // Hate special cases, but we have to clear the verify bit on delete verify if ( (info2 & CL_MSG_INFO2_DELETE) && (info1 & CL_MSG_INFO1_VERIFY)) { msgp = (as_msg *)wr_buf; msgp->m.info1 &= ~CL_MSG_INFO1_VERIFY; } if (asc->compression_stat.compression_threshold > 0 && wr_buf_sz > (size_t)asc->compression_stat.compression_threshold) { /* Compression is enabled. * Packet size is above threshold. * Compress the data */ uint8_t *compressed_buf = NULL; size_t compressed_buf_sz = 0; // Contstruct packet for compressed data. cf_packet_compression (wr_buf, wr_buf_sz, &compressed_buf, &compressed_buf_sz); if (compressed_buf) { // If original packet size is > 16k, cl_compile had allocated memory for it. // Free that memory. // cf_packet_compression will allocate memory for compressed packet if (wr_buf != wr_stack_buf) { free(wr_buf); } // Update stats. citrusleaf_cluster_put_compression_stat(asc, wr_buf_sz, compressed_buf_sz); wr_buf = compressed_buf; wr_buf_sz = compressed_buf_sz; //memcpy (wr_buf, compressed_buf, compressed_buf_sz); //wr_buf_sz = compressed_buf_sz; //free (compressed_buf); } //else compression failed, continue with uncompressed packet else { // Set compression stat citrusleaf_cluster_put_compression_stat(asc, wr_buf_sz, wr_buf_sz); } } int try = 0; // retry request based on the write_policy do { network_error = false; try++; #ifdef DEBUG if (try > 1) { cf_debug("request retrying try %d tid %zu", try, (uint64_t)pthread_self()); } #endif // Get an FD from a cluster. First get the probable node for the given digest. node = cl_cluster_node_get(asc, ns, &d_ret, info2 & CL_MSG_INFO2_WRITE ? true : false); if (!node) { #ifdef DEBUG cf_debug("warning: no healthy nodes in cluster, retrying"); #endif usleep(10000); //Sleep for 10ms goto Retry; } // Now get the dedicated async FD of this node starttime = cf_getms(); fd = cl_cluster_node_fd_get(node, true); endtime = cf_getms(); if ((endtime - starttime) > 10) { cf_debug("Time to get FD for a node (>10ms)=%"PRIu64, (endtime - starttime)); } if (fd == -1) { #ifdef DEBUG cf_debug("warning: node %s has no async file descriptors, retrying transaction (tid %zu)",node->name,(uint64_t)pthread_self() ); #endif usleep(1000); goto Retry; } // Send the command to the node starttime = cf_getms(); rv = cf_socket_write_timeout(fd, wr_buf, wr_buf_sz, deadline_ms, progress_timeout_ms); endtime = cf_getms(); if ((endtime - starttime) > 10) { cf_debug("Time to write to the socket (>10ms)=%"PRIu64, (endtime - starttime)); } if (rv != 0) { cf_debug("Citrusleaf: write timeout or error when writing header to server - %d fd %d errno %d (tid %zu)", rv,fd,errno,(uint64_t)pthread_self()); if (rv != ETIMEDOUT) network_error = true; goto Retry; } goto Ok; Retry: if (network_error == true) { /* * In case of Async work (for XDS), it may be extreme to * dun a node in case of network error. We just cleanup * things and retry to connect to the remote cluster. * The network error may be a transient one. As this is a * network error, its is better to wait for some significant * time before retrying. */ sleep(1); //Sleep for 1sec #if ONEASYNCFD //Do not close the FD #else cf_error("async sender: Closing the fd %d because of network error", fd); cf_close(fd); fd = -1; #endif } if (fd != -1) { cf_error("async sender: Closing the fd %d because of retry", fd); cf_close(fd); fd = -1; } if (node) { cl_cluster_node_put(node); node = 0; } if (deadline_ms && (deadline_ms < cf_getms() ) ) { #ifdef DEBUG cf_debug("async sender: out of time : deadline %"PRIu64" now %"PRIu64, deadline_ms, cf_getms()); #endif rv = CITRUSLEAF_FAIL_TIMEOUT; goto Error; } } while ( (cl_w_p == 0) || (cl_w_p->w_pol == CL_WRITE_RETRY) ); Error: #ifdef DEBUG cf_debug("exiting with failure: network_error %d wpol %d timeleft %d rv %d", (int)network_error, (int)(cl_w_p ? cl_w_p->w_pol : 0), (int)(deadline_ms - cf_getms() ), rv ); #endif if (wr_buf != wr_stack_buf) { free(wr_buf); } #if ONEASYNCFD //Do not close the FD #else //If it is a network error, the fd would be closed and set to -1. //So, we reach this place with a valid FD in case of timeout. if (fd != -1) { cf_error("async sender: Closing the fd %d because of timeout", fd); cf_close(fd); } #endif return(rv); Ok: /* * We cannot release the node here as the asyc FD associated * with this node may get closed. We should do it only when * we got back the ack for the async command that we just did. */ //As we sent the command successfully, add it to the async work list workitem->node = node; workitem->fd = fd; //We are storing only the pointer to the workitem #if ONEASYNCFD if (shash_put_unique(g_cl_async_hashtab, trid, &workitem) != SHASH_OK) { //This should always succeed. cf_error("Unable to add unique entry into the hash table"); } cf_queue_push(node->asyncwork_q, &workitem); //Also put in the node's q #else cf_queue_push(g_cl_async_q, &workitem); #endif if (wr_buf != wr_stack_buf) { free(wr_buf); } rv = CITRUSLEAF_OK; return rv; } int citrusleaf_async_reinit(int size_limit, unsigned int num_receiver_threads) { // int num_threads; if (0 == cf_atomic32_get(g_async_initialized)) { cf_error("Async client not initialized cannot reinit"); return -1; } if (num_receiver_threads > MAX_ASYNC_RECEIVER_THREADS) { //Limit the threads to the max value even if caller asks for it num_receiver_threads = MAX_ASYNC_RECEIVER_THREADS; } // If number of thread is increased create more threads if (num_receiver_threads > g_async_num_threads) { unsigned int i; for (i = g_async_num_threads; i < num_receiver_threads; i++) { pthread_create(&g_async_reciever[i], 0, async_receiver_fn, NULL); } } else { // else just reset the number the async threads will kill themselves cf_atomic32_set(&g_async_num_threads, num_receiver_threads); } cf_atomic32_set(&g_async_q_szlimit , size_limit); return ( 0 ); } int citrusleaf_async_init(int size_limit, int num_receiver_threads, cl_async_fail_cb fail_cb_fn, cl_async_success_cb success_cb_fn) { int i, num_threads; //Make sure that we do the initialization only once if (1 == cf_atomic32_incr(&g_async_initialized)) { // Start the receiver threads num_threads = num_receiver_threads; if (num_threads > MAX_ASYNC_RECEIVER_THREADS) { //Limit the threads to the max value even if caller asks for it num_threads = MAX_ASYNC_RECEIVER_THREADS; } #if ONEASYNCFD g_async_h_szlimit = size_limit * 3; //Max number of elements in the hash table g_async_h_buckets = g_async_h_szlimit/10;//Number of buckets in the hash table if (shash_create(&g_cl_async_hashtab, async_trid_hash, sizeof(uint64_t), sizeof(cl_async_work *), g_async_h_buckets, SHASH_CR_MT_BIGLOCK) != SHASH_OK) { cf_error("Failed to initialize the async work hastable"); cf_atomic32_decr(&g_async_initialized); return -1; } #else // create work queue g_async_q_szlimit = size_limit; if ((g_cl_async_q = cf_queue_create(sizeof(cl_async_work *), true)) == NULL) { cf_error("Failed to initialize the async work queue"); cf_atomic32_decr(&g_async_initialized); return -1; } for (i=0; i<num_threads; i++) { pthread_create(&g_async_reciever[i], 0, async_receiver_fn, NULL); } g_async_num_threads = num_threads; #endif if ((g_cl_workitems_freepool_q = cf_queue_create(sizeof(cl_async_work *), true)) == NULL) { cf_error("Failed to create memory pool for workitems"); return -1; } g_fail_cb_fn = fail_cb_fn; g_success_cb_fn = success_cb_fn; // Initialize the stats g_async_stats.retries = 0; g_async_stats.dropouts = 0; } return(0); }
static as_status as_scan_generic( aerospike* as, as_error* err, const as_policy_scan* policy, const as_scan* scan, aerospike_scan_foreach_callback callback, void* udata, uint64_t* task_id_ptr) { as_error_reset(err); if (! policy) { policy = &as->config.policies.scan; } as_cluster* cluster = as->cluster; as_nodes* nodes = as_nodes_reserve(cluster); uint32_t n_nodes = nodes->size; if (n_nodes == 0) { as_nodes_release(nodes); return as_error_set_message(err, AEROSPIKE_ERR_SERVER, "Scan command failed because cluster is empty."); } // Reserve each node in cluster. for (uint32_t i = 0; i < n_nodes; i++) { as_node_reserve(nodes->array[i]); } uint64_t task_id; if (task_id_ptr) { if (*task_id_ptr == 0) { *task_id_ptr = cf_get_rand64() / 2; } task_id = *task_id_ptr; } else { task_id = cf_get_rand64() / 2; } // Create scan command as_buffer argbuffer; uint16_t n_fields = 0; size_t size = as_scan_command_size(scan, &n_fields, &argbuffer); uint8_t* cmd = as_command_init(size); size = as_scan_command_init(cmd, policy, scan, task_id, n_fields, &argbuffer); // Initialize task. uint32_t error_mutex = 0; as_scan_task task; task.cluster = as->cluster; task.policy = policy; task.scan = scan; task.callback = callback; task.udata = udata; task.err = err; task.error_mutex = &error_mutex; task.task_id = task_id; task.cmd = cmd; task.cmd_size = size; as_status status = AEROSPIKE_OK; if (scan->concurrent) { uint32_t n_wait_nodes = n_nodes; task.complete_q = cf_queue_create(sizeof(as_scan_complete_task), true); // Run node scans in parallel. for (uint32_t i = 0; i < n_nodes; i++) { // Stack allocate task for each node. It should be fine since the task // only needs to be valid within this function. as_scan_task* task_node = alloca(sizeof(as_scan_task)); memcpy(task_node, &task, sizeof(as_scan_task)); task_node->node = nodes->array[i]; int rc = as_thread_pool_queue_task(&cluster->thread_pool, as_scan_worker, task_node); if (rc) { // Thread could not be added. Abort entire scan. if (ck_pr_fas_32(task.error_mutex, 1) == 0) { status = as_error_update(task.err, AEROSPIKE_ERR_CLIENT, "Failed to add scan thread: %d", rc); } // Reset node count to threads that were run. n_wait_nodes = i; break; } } // Wait for tasks to complete. for (uint32_t i = 0; i < n_wait_nodes; i++) { as_scan_complete_task complete; cf_queue_pop(task.complete_q, &complete, CF_QUEUE_FOREVER); if (complete.result != AEROSPIKE_OK && status == AEROSPIKE_OK) { status = complete.result; } } // Release temporary queue. cf_queue_destroy(task.complete_q); } else { task.complete_q = 0; // Run node scans in series. for (uint32_t i = 0; i < n_nodes && status == AEROSPIKE_OK; i++) { task.node = nodes->array[i]; status = as_scan_command_execute(&task); } } // Release each node in cluster. for (uint32_t i = 0; i < n_nodes; i++) { as_node_release(nodes->array[i]); } // Release nodes array. as_nodes_release(nodes); // Free command memory. as_command_free(cmd, size); // If user aborts query, command is considered successful. if (status == AEROSPIKE_ERR_CLIENT_ABORT) { status = AEROSPIKE_OK; } // If completely successful, make the callback that signals completion. if (callback && status == AEROSPIKE_OK) { callback(NULL, udata); } return status; }
// Initialize the demarshal service, start demarshal threads. int as_demarshal_start() { demarshal_args *dm = cf_malloc(sizeof(demarshal_args)); memset(dm, 0, sizeof(demarshal_args)); g_demarshal_args = dm; dm->num_threads = g_config.n_service_threads; g_freeslot = cf_queue_create(sizeof(int), true); if (!g_freeslot) { cf_crash(AS_DEMARSHAL, " Couldn't create reaper free list "); } // Start the listener socket: note that because this is done after privilege // de-escalation, we can't use privileged ports. g_config.socket.reuse_addr = g_config.socket_reuse_addr; if (0 != cf_socket_init_svc(&g_config.socket)) { cf_crash(AS_DEMARSHAL, "couldn't initialize service socket"); } if (-1 == cf_socket_set_nonblocking(g_config.socket.sock)) { cf_crash(AS_DEMARSHAL, "couldn't set service socket nonblocking"); } // Note: The localhost socket address will only be set if the main service socket // is not already (effectively) listening on the localhost address. if (g_config.localhost_socket.addr) { cf_debug(AS_DEMARSHAL, "Opening a localhost service socket"); g_config.localhost_socket.reuse_addr = g_config.socket_reuse_addr; if (0 != cf_socket_init_svc(&g_config.localhost_socket)) { cf_crash(AS_DEMARSHAL, "couldn't initialize localhost service socket"); } if (-1 == cf_socket_set_nonblocking(g_config.localhost_socket.sock)) { cf_crash(AS_DEMARSHAL, "couldn't set localhost service socket nonblocking"); } } // Create first thread which is the listener, and wait for it to come up // before others are spawned. if (0 != pthread_create(&(dm->dm_th[0]), 0, thr_demarshal, &g_config.socket)) { cf_crash(AS_DEMARSHAL, "Can't create demarshal threads"); } while (dm->epoll_fd[0] == 0) { sleep(1); } // Create all the epoll_fds and wait for all the threads to come up. int i; for (i = 1; i < dm->num_threads; i++) { if (0 != pthread_create(&(dm->dm_th[i]), 0, thr_demarshal, &g_config.socket)) { cf_crash(AS_DEMARSHAL, "Can't create demarshal threads"); } } for (i = 1; i < dm->num_threads; i++) { while (dm->epoll_fd[i] == 0) { sleep(1); cf_info(AS_DEMARSHAL, "Waiting to spawn demarshal threads ..."); } } cf_info(AS_DEMARSHAL, "Started %d Demarshal Threads", dm->num_threads); return 0; }
// Initialize the demarshal service, start demarshal threads. int as_demarshal_start() { demarshal_args *dm = cf_malloc(sizeof(demarshal_args)); memset(dm, 0, sizeof(demarshal_args)); g_demarshal_args = dm; dm->num_threads = g_config.n_service_threads; g_freeslot = cf_queue_create(sizeof(int), true); if (!g_freeslot) { cf_crash(AS_DEMARSHAL, " Couldn't create reaper free list "); } // Start the listener socket: note that because this is done after privilege // de-escalation, we can't use privileged ports. g_config.socket.reuse_addr = g_config.socket_reuse_addr; if (0 != cf_socket_init_server(&g_config.socket)) { cf_crash(AS_DEMARSHAL, "couldn't initialize service socket"); } cf_socket_disable_blocking(g_config.socket.sock); // Note: The localhost socket address will only be set if the main service socket // is not already (effectively) listening on the localhost address. if (g_config.localhost_socket.addr) { cf_debug(AS_DEMARSHAL, "Opening a localhost service socket"); g_config.localhost_socket.reuse_addr = g_config.socket_reuse_addr; if (0 != cf_socket_init_server(&g_config.localhost_socket)) { cf_crash(AS_DEMARSHAL, "couldn't initialize localhost service socket"); } cf_socket_disable_blocking(g_config.localhost_socket.sock); } g_config.xdr_socket.port = as_xdr_info_port(); if (g_config.xdr_socket.port != 0) { cf_debug(AS_DEMARSHAL, "Opening XDR service socket"); g_config.xdr_socket.reuse_addr = g_config.socket_reuse_addr; if (0 != cf_socket_init_server(&g_config.xdr_socket)) { cf_crash(AS_DEMARSHAL, "Couldn't initialize XDR service socket"); } cf_socket_disable_blocking(g_config.xdr_socket.sock); } // Create all the epoll_fds and wait for all the threads to come up. int i; for (i = 1; i < dm->num_threads; i++) { if (0 != pthread_create(&(dm->dm_th[i]), 0, thr_demarshal, &g_config.socket)) { cf_crash(AS_DEMARSHAL, "Can't create demarshal threads"); } } for (i = 1; i < dm->num_threads; i++) { while (CEFD(dm->polls[i]) == 0) { sleep(1); cf_info(AS_DEMARSHAL, "Waiting to spawn demarshal threads ..."); } } // Create first thread which is the listener. We do this one last, as it // requires the other threads' epoll instances. if (0 != pthread_create(&(dm->dm_th[0]), 0, thr_demarshal, &g_config.socket)) { cf_crash(AS_DEMARSHAL, "Can't create demarshal threads"); } while (CEFD(dm->polls[0]) == 0) { sleep(1); } cf_info(AS_DEMARSHAL, "Started %d Demarshal Threads", dm->num_threads); return 0; }