void as_namespace_eval_write_state(as_namespace *ns, bool *hwm_breached, bool *stop_writes) { cf_assert(ns, AS_NAMESPACE, CF_WARNING, "NULL namespace"); cf_assert(hwm_breached, AS_NAMESPACE, CF_WARNING, "NULL parameter, hwm_breached"); cf_assert(stop_writes, AS_NAMESPACE, CF_WARNING, "NULL parameter, stop_writes"); *hwm_breached = false; *stop_writes = false; // Compute the space limits on this namespace uint64_t mem_lim = ns->memory_size; uint64_t ssd_lim = ns->ssd_size; // Compute the high-watermarks - memory. uint64_t mem_hwm = mem_lim * ns->hwm_memory; uint64_t mem_stop_writes = mem_lim * ns->stop_writes_pct; // Compute the high-watermark - disk. uint64_t ssd_hwm = ssd_lim * ns->hwm_disk; // compute disk size of namespace uint64_t disk_sz = 0; int disk_avail_pct = 0; as_storage_stats(ns, &disk_avail_pct, &disk_sz); // Protection check! Make sure we are not wrapped around for the disk_sz and erroneously evict! if (disk_sz > CL_PETA_BYTES) { cf_warning(AS_NAMESPACE, "namespace disk bytes big %"PRIu64" please bring node down to reset counter", disk_sz); disk_sz = 0; } // Protection check! Make sure we are not wrapped around for the memory counter and erroneously evict! if (cf_atomic_int_get(ns->n_bytes_memory) > CL_TERA_BYTES) { cf_warning(AS_NAMESPACE, "namespace memory bytes big %"PRIu64" please bring node down to reset counter", cf_atomic_int_get(ns->n_bytes_memory)); cf_atomic_int_set(&ns->n_bytes_memory, 0); } // compute memory size of namespace // compute index size - index is always stored in memory uint64_t index_sz = cf_atomic_int_get(ns->n_objects) * as_index_size_get(ns); uint64_t sub_index_sz = cf_atomic_int_get(ns->n_sub_objects) * as_index_size_get(ns); uint64_t sindex_sz = as_sindex_get_ns_memory_used(ns); uint64_t data_in_memory_sz = cf_atomic_int_get(ns->n_bytes_memory); uint64_t memory_sz = index_sz + sub_index_sz + data_in_memory_sz + sindex_sz; // Possible reasons for eviction or stopping writes. // (We don't use all combinations, but in case we change our minds...) static const char* reasons[] = { "", " (memory)", " (disk)", " (memory & disk)", " (disk avail pct)", " (memory & disk avail pct)", " (disk & disk avail pct)", " (all)" }; // check if the high water mark is breached uint32_t how_breached = 0x0; if (memory_sz > mem_hwm) { *hwm_breached = true; how_breached = 0x1; } if (disk_sz > ssd_hwm) { *hwm_breached = true; how_breached |= 0x2; } // check if the writes should be stopped uint32_t why_stopped = 0x0; if (memory_sz > mem_stop_writes) { *stop_writes = true; why_stopped = 0x1; } if (disk_avail_pct < (int)ns->storage_min_avail_pct) { *stop_writes = true; why_stopped |= 0x4; } if (*hwm_breached || *stop_writes) { cf_warning(AS_NAMESPACE, "{%s} hwm_breached %s%s, stop_writes %s%s, memory sz:%"PRIu64" (%"PRIu64" + %"PRIu64") hwm:%"PRIu64" sw:%"PRIu64", disk sz:%"PRIu64" hwm:%"PRIu64, ns->name, *hwm_breached ? "true" : "false", reasons[how_breached], *stop_writes ? "true" : "false", reasons[why_stopped], memory_sz, index_sz, data_in_memory_sz, mem_hwm, mem_stop_writes, disk_sz, ssd_hwm); } else { cf_debug(AS_NAMESPACE, "{%s} hwm_breached %s%s, stop_writes %s%s, memory sz:%"PRIu64" (%"PRIu64" + %"PRIu64") hwm:%"PRIu64" sw:%"PRIu64", disk sz:%"PRIu64" hwm:%"PRIu64, ns->name, *hwm_breached ? "true" : "false", reasons[how_breached], *stop_writes ? "true" : "false", reasons[why_stopped], memory_sz, index_sz, data_in_memory_sz, mem_hwm, mem_stop_writes, disk_sz, ssd_hwm); } }
int main(int argc, char* argv[]) { signal(SIGSEGV, as_sig_handle_segv); signal(SIGTERM , as_sig_handle_term); fprintf(stdout, "\nAerospike act - device IO test\n"); fprintf(stdout, "Copyright 2011 by Aerospike. All rights reserved.\n\n"); if (! configure(argc, argv)) { exit(-1); } set_schedulers(); srand(time(NULL)); // rand_seed(g_rand_64_buffer); salter salters[g_num_write_buffers ? g_num_write_buffers : 1]; g_salters = salters; if (! create_salters()) { exit(-1); } device devices[g_num_devices]; readq readqs[g_num_queues]; g_devices = devices; g_readqs = readqs; // TODO - 'salt' drive? g_p_large_block_read_histogram = histogram_create(); g_p_large_block_write_histogram = histogram_create(); g_p_raw_read_histogram = histogram_create(); g_p_read_histogram = histogram_create(); g_run_start_us = cf_getus(); uint64_t run_stop_us = g_run_start_us + g_run_us; g_running = 1; for (int n = 0; n < g_num_devices; n++) { device* p_device = &g_devices[n]; p_device->name = g_device_names[n]; p_device->p_fd_queue = cf_queue_create(sizeof(int), true); discover_num_blocks(p_device); create_large_block_read_buffer(p_device); p_device->p_raw_read_histogram = histogram_create(); sprintf(p_device->histogram_tag, "%-18s", p_device->name); if (pthread_create(&p_device->large_block_read_thread, NULL, run_large_block_reads, (void*)p_device)) { fprintf(stdout, "ERROR: create large block read thread %d\n", n); exit(-1); } if (pthread_create(&p_device->large_block_write_thread, NULL, run_large_block_writes, (void*)p_device)) { fprintf(stdout, "ERROR: create write thread %d\n", n); exit(-1); } } for (int i = 0; i < g_num_queues; i++) { readq* p_readq = &g_readqs[i]; p_readq->p_req_queue = cf_queue_create(sizeof(readreq*), true); p_readq->threads = malloc(sizeof(pthread_t) * g_threads_per_queue); for (int j = 0; j < g_threads_per_queue; j++) { if (pthread_create(&p_readq->threads[j], NULL, run_reads, (void*)p_readq->p_req_queue)) { fprintf(stdout, "ERROR: create read thread %d:%d\n", i, j); exit(-1); } } } pthread_t thr_add_readreqs; if (pthread_create(&thr_add_readreqs, NULL, run_add_readreqs, NULL)) { fprintf(stdout, "ERROR: create thread thr_add_readreqs\n"); exit(-1); } fprintf(stdout, "\n"); uint64_t now_us; uint64_t count = 0; while ((now_us = cf_getus()) < run_stop_us && g_running) { count++; int sleep_us = (int) ((count * g_report_interval_us) - (now_us - g_run_start_us)); if (sleep_us > 0) { usleep((uint32_t)sleep_us); } fprintf(stdout, "After %" PRIu64 " sec:\n", (count * g_report_interval_us) / 1000000); fprintf(stdout, "read-reqs queued: %" PRIu64 "\n", cf_atomic_int_get(g_read_reqs_queued)); histogram_dump(g_p_large_block_read_histogram, "LARGE BLOCK READS "); histogram_dump(g_p_large_block_write_histogram, "LARGE BLOCK WRITES"); histogram_dump(g_p_raw_read_histogram, "RAW READS "); for (int d = 0; d < g_num_devices; d++) { histogram_dump(g_devices[d].p_raw_read_histogram, g_devices[d].histogram_tag); } histogram_dump(g_p_read_histogram, "READS "); fprintf(stdout, "\n"); fflush(stdout); } g_running = 0; void* pv_value; pthread_join(thr_add_readreqs, &pv_value); for (int i = 0; i < g_num_queues; i++) { readq* p_readq = &g_readqs[i]; for (int j = 0; j < g_threads_per_queue; j++) { pthread_join(p_readq->threads[j], &pv_value); } cf_queue_destroy(p_readq->p_req_queue); free(p_readq->threads); } for (int d = 0; d < g_num_devices; d++) { device* p_device = &g_devices[d]; pthread_join(p_device->large_block_read_thread, &pv_value); pthread_join(p_device->large_block_write_thread, &pv_value); fd_close_all(p_device); cf_queue_destroy(p_device->p_fd_queue); free(p_device->p_large_block_read_buffer); free(p_device->p_raw_read_histogram); } free(g_p_large_block_read_histogram); free(g_p_large_block_write_histogram); free(g_p_raw_read_histogram); free(g_p_read_histogram); destroy_salters(); return (0); }
int main(int argc, char* argv[]) { signal(SIGSEGV, as_sig_handle_segv); signal(SIGTERM, as_sig_handle_term); fprintf(stdout, "\nAerospike act - device IO test\n"); fprintf(stdout, "Copyright 2011 by Aerospike. All rights reserved.\n\n"); if (! configure(argc, argv)) { exit(-1); } set_schedulers(); srand(time(NULL)); // rand_seed(g_rand_64_buffer); salter salters[g_num_write_buffers ? g_num_write_buffers : 1]; g_salters = salters; if (! create_salters()) { exit(-1); } device devices[g_num_devices]; g_devices = devices; g_p_large_block_read_histogram = histogram_create(); g_p_large_block_write_histogram = histogram_create(); g_p_raw_read_histogram = histogram_create(); g_p_read_histogram = histogram_create(); g_run_start_ms = cf_getms(); uint64_t run_stop_ms = g_run_start_ms + g_run_ms; g_running = 1; int n; for (n = 0; n < g_num_devices; n++) { device* p_device = &g_devices[n]; p_device->name = g_device_names[n]; p_device->p_fd_queue = cf_queue_create(sizeof(int), true); discover_num_blocks(p_device); create_large_block_read_buffer(p_device); p_device->p_raw_read_histogram = histogram_create(); sprintf(p_device->histogram_tag, "%-18s", p_device->name); if (pthread_create(&p_device->large_block_read_thread, NULL, run_large_block_reads, (void*)p_device)) { fprintf(stdout, "Error: create large block read thread %d\n", n); exit(-1); } if (pthread_create(&p_device->large_block_write_thread, NULL, run_large_block_writes, (void*)p_device)) { fprintf(stdout, "Error: create write thread %d\n", n); exit(-1); } } aio_context_t aio_context = 0; if(io_setup(MAXEVENTS, &aio_context) != 0) { fprintf(stdout, "Error: AIO context not set up \n"); exit(-1); } create_async_info_queue(); /* read events generating thread */ pthread_t read_generator; if (pthread_create(&read_generator, NULL, &generate_async_reads, (void*)&aio_context)) { fprintf(stdout, "Error: create read generator thread\n"); exit(-1); } /* Create the worker threads */ pthread_t workers[g_worker_threads]; int j; for (j = 0; j < g_worker_threads; j++) { if (pthread_create(&workers[j], NULL, &worker_func , (void *)(&aio_context))) { fprintf(stdout, "Error: creating worker thread %d failed\n", j); exit(-1); } } fprintf(stdout, "\n"); uint64_t now_ms; uint64_t time_count = 0; int nanosleep_ret = -1; struct timespec initial,remaining; while ((now_ms = cf_getms()) < run_stop_ms && g_running) { time_count++; int sleep_ms = (int) ((time_count * g_report_interval_ms) - (now_ms - g_run_start_ms)); if (sleep_ms > 0) { initial.tv_sec = sleep_ms / 1000; initial.tv_nsec = (sleep_ms % 1000) * 1000000; retry: memset(&remaining, 0, sizeof(remaining)); nanosleep_ret = nanosleep(&initial, &remaining); if(nanosleep_ret == -1 && errno == EINTR) { /* Interrupted by a signal */ initial.tv_sec = remaining.tv_sec; initial.tv_nsec = remaining.tv_nsec; goto retry; } } fprintf(stdout, "After %" PRIu64 " sec:\n", (time_count * g_report_interval_ms) / 1000); fprintf(stdout, "read-reqs queued: %" PRIu64 "\n", cf_atomic_int_get(g_read_reqs_queued)); histogram_dump(g_p_large_block_read_histogram, "LARGE BLOCK READS "); histogram_dump(g_p_large_block_write_histogram, "LARGE BLOCK WRITES"); histogram_dump(g_p_raw_read_histogram, "RAW READS "); int d; for (d = 0; d < g_num_devices; d++) { histogram_dump(g_devices[d].p_raw_read_histogram, g_devices[d].histogram_tag); } histogram_dump(g_p_read_histogram, "READS "); fprintf(stdout, "\n"); fflush(stdout); } fprintf(stdout, "\nTEST COMPLETED \n"); g_running = 0; int i; //TODO aio_destroy? /* Freeing resources used by async */ void* ret_value; for (i = 0; i < g_worker_threads; i++) { pthread_join(workers[i], &ret_value); } destroy_async_info_queue(); int d; for (d = 0; d < g_num_devices; d++) { device* p_device = &g_devices[d]; pthread_join(p_device->large_block_read_thread, &ret_value); pthread_join(p_device->large_block_write_thread, &ret_value); fd_close_all(p_device); cf_queue_destroy(p_device->p_fd_queue); free(p_device->p_large_block_read_buffer); free(p_device->p_raw_read_histogram); } free(g_p_large_block_read_histogram); free(g_p_large_block_write_histogram); free(g_p_raw_read_histogram); free(g_p_read_histogram); destroy_salters(); return (0); }