void hs_write_checkpoints(hs_checkpoint_writer* cpw, hs_checkpoint_reader* cpr) { static int cnt = 0; FILE* tsv = NULL; bool sample = (cnt % 6 == 0); // sample performance 10 times a minute if (cnt == 0) { // write the stats once a minute just after the load tsv = fopen(cpw->tsv_path, "wb"); if (tsv) { fprintf(tsv, "Plugin\t" "Inject Message Count\tInject Message Bytes\t" "Process Message Count\tProcess Message Failures\t" "Current Memory\t" "Max Memory\tMax Output\tMax Instructions\t" "Message Matcher Avg (s)\tMessage Matcher SD (s)\t" "Process Message Avg (s)\tProcess Message SD (s)\t" "Timer Event Avg (s)\tTimer Event SD (s)\n"); } } cpw->fh = freopen(NULL, "wb", cpw->fh); if (!cpw->fh) { hs_log(g_module, 1, "checkpoint_writer freopen() error: %d", ferror(cpw->fh)); return; } if (cpw->input_plugins) { hs_input_plugin* p; pthread_mutex_lock(&cpw->input_plugins->list_lock); for (int i = 0; i < cpw->input_plugins->list_cap; ++i) { p = cpw->input_plugins->list[i]; if (p) { pthread_mutex_lock(&p->cp.lock); hs_update_checkpoint(cpr, p->sb->name, &p->cp); pthread_mutex_unlock(&p->cp.lock); if (tsv) { pthread_mutex_lock(&cpw->input_plugins->output.lock); fprintf(tsv, "%s\t" "%zu\t%zu\t" "%zu\t%zu\t" "%u\t" "%u\t%u\t%u\t" "0\t0\t" "0\t0\t" "0\t0\t\n", p->sb->name, p->sb->stats.im_cnt, p->sb->stats.im_bytes, p->sb->stats.pm_cnt, p->sb->stats.pm_failures, p->sb->stats.cur_memory, p->sb->stats.max_memory, p->sb->stats.max_output, p->sb->stats.max_instructions); pthread_mutex_unlock(&cpw->input_plugins->output.lock); } } } pthread_mutex_unlock(&cpw->input_plugins->list_lock); pthread_mutex_lock(&cpw->input_plugins->output.lock); fflush(cpw->input_plugins->output.fh); pthread_mutex_unlock(&cpw->input_plugins->output.lock); } if (cpw->analysis_plugins) { hs_checkpoint cp; for (int i = 0; i < cpw->analysis_plugins->thread_cnt; ++i) { hs_analysis_thread* at = &cpw->analysis_plugins->list[i]; pthread_mutex_lock(&at->cp_lock); cp = at->cp; pthread_mutex_unlock(&at->cp_lock); hs_update_input_checkpoint(cpr, hs_input_dir, at->input.name, &cp); pthread_mutex_lock(&cpw->analysis_plugins->output.lock); cpw->analysis_plugins->sample = sample; fflush(cpw->analysis_plugins->output.fh); pthread_mutex_unlock(&cpw->analysis_plugins->output.lock); if (tsv) { hs_analysis_plugin* p; pthread_mutex_lock(&at->list_lock); for (int i = 0; i < at->list_cap; ++i) { p = at->list[i]; if (!p) continue; fprintf(tsv, "%s\t" "%zu\t%zu\t" "%zu\t%zu\t" "%zu\t%zu\t%zu\t%zu\t" "%g\t%g\t" "%g\t%g\t" "%g\t%g\t\n", p->sb->name, p->sb->stats.im_cnt, p->sb->stats.im_bytes, p->sb->stats.pm_cnt, p->sb->stats.pm_failures, // the sandbox is not in use here, it is safe to grab the // values directly lsb_usage(p->sb->lsb, LSB_UT_MEMORY, LSB_US_CURRENT), lsb_usage(p->sb->lsb, LSB_UT_MEMORY, LSB_US_MAXIMUM), lsb_usage(p->sb->lsb, LSB_UT_OUTPUT, LSB_US_MAXIMUM), lsb_usage(p->sb->lsb, LSB_UT_INSTRUCTION, LSB_US_MAXIMUM), p->sb->stats.mm.mean, hs_sd_running_stats(&p->sb->stats.mm), p->sb->stats.pm.mean, hs_sd_running_stats(&p->sb->stats.pm), p->sb->stats.te.mean, hs_sd_running_stats(&p->sb->stats.te)); } pthread_mutex_unlock(&at->list_lock); } } } if (cpw->output_plugins) { pthread_mutex_lock(&cpw->output_plugins->list_lock); for (int i = 0; i < cpw->output_plugins->list_cap; ++i) { hs_output_plugin* p = cpw->output_plugins->list[i]; if (!p) continue; pthread_mutex_lock(&p->cp_lock); p->sample = sample; hs_update_input_checkpoint(cpr, hs_input_dir, p->sb->name, &p->cp.input); hs_update_input_checkpoint(cpr, hs_analysis_dir, p->sb->name, &p->cp.analysis); if (tsv) { fprintf(tsv, "%s\t" "%zu\t%zu\t" "%zu\t%zu\t" "%u\t" "%u\t%u\t%u\t" "%g\t%g\t" "%g\t%g\t" "%g\t%g\t\n", p->sb->name, p->sb->stats.im_cnt, p->sb->stats.im_bytes, p->sb->stats.pm_cnt, p->sb->stats.pm_failures, p->sb->stats.cur_memory, p->sb->stats.max_memory, p->sb->stats.max_output, p->sb->stats.max_instructions, p->sb->stats.mm.mean, hs_sd_running_stats(&p->sb->stats.mm), p->sb->stats.pm.mean, hs_sd_running_stats(&p->sb->stats.pm), p->sb->stats.te.mean, hs_sd_running_stats(&p->sb->stats.te)); } pthread_mutex_unlock(&p->cp_lock); } pthread_mutex_unlock(&cpw->output_plugins->list_lock); } hs_output_checkpoints(cpr, cpw->fh); fflush(cpw->fh); if (tsv) fclose(tsv); if (++cnt == 60) cnt = 0; }
static void* input_thread(void* arg) { hs_heka_message im, *pim = NULL; hs_init_heka_message(&im, 8); hs_heka_message am, *pam = NULL; hs_init_heka_message(&am, 8); hs_output_plugin* p = (hs_output_plugin*)arg; hs_log(g_module, 6, "starting: %s", p->sb->name); size_t bytes_read[2] = { 0 }; #ifdef HINDSIGHT_CLI bool input_stop = false, analysis_stop = false; while (!(p->stop && input_stop && analysis_stop)) { #else while (!p->stop) { #endif if (p->input.fh && !pim) { if (hs_find_message(&im, &p->input.ib, true)) { pim = &im; } else { bytes_read[0] = hs_read_file(&p->input); } if (!bytes_read[0]) { #ifdef HINDSIGHT_CLI size_t cid = p->input.ib.cp.id; #endif // see if the next file is there yet hs_open_file(&p->input, hs_input_dir, p->input.ib.cp.id + 1); #ifdef HINDSIGHT_CLI if (cid == p->input.ib.cp.id && p->stop) { input_stop = true; } #endif } } else if (!p->input.fh) { // still waiting on the first file hs_open_file(&p->input, hs_input_dir, p->input.ib.cp.id); #ifdef HINDSIGHT_CLI if (!p->input.fh && p->stop) { input_stop = true; } #endif } if (p->analysis.fh && !pam) { if (hs_find_message(&am, &p->analysis.ib, true)) { pam = &am; } else { bytes_read[1] = hs_read_file(&p->analysis); } if (!bytes_read[1]) { #ifdef HINDSIGHT_CLI size_t cid = p->analysis.ib.cp.id; #endif // see if the next file is there yet hs_open_file(&p->analysis, hs_analysis_dir, p->analysis.ib.cp.id + 1); #ifdef HINDSIGHT_CLI if (cid == p->analysis.ib.cp.id && p->stop) { analysis_stop = true; } #endif } } else if (!p->analysis.fh) { // still waiting on the first file hs_open_file(&p->analysis, hs_analysis_dir, p->analysis.ib.cp.id); #ifdef HINDSIGHT_CLI if (!p->analysis.fh && p->stop) { analysis_stop = true; } #endif } // if we have one send the oldest first if (pim) { if (pam) { if (pim->timestamp <= pam->timestamp) { p->msg = pim; } else { p->msg = pam; } } else { p->msg = pim; } } else if (pam) { p->msg = pam; } if (p->msg) { if (p->msg == pim) { pim = NULL; p->cur.input.id = p->input.ib.cp.id; p->cur.input.offset = p->input.ib.cp.offset - (p->input.ib.readpos - p->input.ib.scanpos); } else { pam = NULL; p->cur.analysis.id = p->analysis.ib.cp.id; p->cur.analysis.offset = p->analysis.ib.cp.offset - (p->analysis.ib.readpos - p->analysis.ib.scanpos); } int ret = output_message(p); if (ret == -4) { while (!p->stop && ret == -4) { hs_log(g_module, 7, "retry message %zu", p->sb->stats.pm_cnt); sleep(1); ret = output_message(p); } } if (ret > 0) { break; // fatal error } p->msg = NULL; } else if (!bytes_read[0] && !bytes_read[1]) { // trigger any pending timer events hs_clear_heka_message(&im); // create an idle/empty message p->msg = &im; output_message(p); p->msg = NULL; sleep(1); } } shutdown_timer_event(p); hs_free_heka_message(&am); hs_free_heka_message(&im); // hold the current checkpoints in memory incase we restart it hs_update_input_checkpoint(&p->plugins->cfg->cp_reader, hs_input_dir, p->sb->name, &p->cp.input); hs_update_input_checkpoint(&p->plugins->cfg->cp_reader, hs_analysis_dir, p->sb->name, &p->cp.analysis); if (p->stop) { hs_log(g_module, 6, "shutting down: %s", p->sb->name); } else { hs_log(g_module, 6, "detaching: %s", p->sb->name); pthread_mutex_lock(&p->plugins->list_lock); hs_output_plugins* plugins = p->plugins; plugins->list[p->list_index] = NULL; if (pthread_detach(p->thread)) { hs_log(g_module, 3, "thread could not be detached"); } free_output_plugin(p); free(p); --plugins->list_cnt; pthread_mutex_unlock(&plugins->list_lock); } pthread_exit(NULL); } static void remove_plugin(hs_output_plugins* plugins, int idx) { hs_output_plugin* p = plugins->list[idx]; plugins->list[idx] = NULL; p->stop = true; if (pthread_join(p->thread, NULL)) { hs_log(g_module, 3, "remove_plugin could not pthread_join"); } free_output_plugin(p); free(p); --plugins->list_cnt; }
static void* input_thread(void *arg) { lsb_heka_message *msg = NULL; lsb_heka_message im, *pim = NULL; lsb_init_heka_message(&im, 8); lsb_heka_message am, *pam = NULL; lsb_init_heka_message(&am, 8); hs_output_plugin *p = (hs_output_plugin *)arg; hs_log(NULL, p->name, 6, "starting"); size_t discarded_bytes; size_t bytes_read[2] = { 0 }; int ret = 0; lsb_logger logger = {.context = NULL, .cb = hs_log}; #ifdef HINDSIGHT_CLI bool input_stop = false, analysis_stop = false; while (!(p->stop && input_stop && analysis_stop)) { #else while (!p->stop) { #endif if (p->input.fh && !pim) { if (lsb_find_heka_message(&im, &p->input.ib, true, &discarded_bytes, &logger)) { pim = &im; } else { bytes_read[0] = hs_read_file(&p->input); } if (!bytes_read[0]) { #ifdef HINDSIGHT_CLI size_t cid = p->input.cp.id; #endif // see if the next file is there yet hs_open_file(&p->input, hs_input_dir, p->input.cp.id + 1); #ifdef HINDSIGHT_CLI if (cid == p->input.cp.id && p->stop) { input_stop = true; } #endif } } else if (!p->input.fh) { // still waiting on the first file hs_open_file(&p->input, hs_input_dir, p->input.cp.id); #ifdef HINDSIGHT_CLI if (!p->input.fh && p->stop) { input_stop = true; } #endif } if (p->analysis.fh && !pam) { if (lsb_find_heka_message(&am, &p->analysis.ib, true, &discarded_bytes, &logger)) { pam = &am; } else { bytes_read[1] = hs_read_file(&p->analysis); } if (!bytes_read[1]) { #ifdef HINDSIGHT_CLI size_t cid = p->analysis.cp.id; #endif // see if the next file is there yet hs_open_file(&p->analysis, hs_analysis_dir, p->analysis.cp.id + 1); #ifdef HINDSIGHT_CLI if (cid == p->analysis.cp.id && p->stop) { analysis_stop = true; } #endif } } else if (!p->analysis.fh) { // still waiting on the first file hs_open_file(&p->analysis, hs_analysis_dir, p->analysis.cp.id); #ifdef HINDSIGHT_CLI if (!p->analysis.fh && p->stop) { analysis_stop = true; } #endif } // if we have one send the oldest first if (pim) { if (pam) { if (pim->timestamp <= pam->timestamp) { msg = pim; } else { msg = pam; } } else { msg = pim; } } else if (pam) { msg = pam; } if (msg) { if (msg == pim) { pim = NULL; p->cur.input.id = p->input.cp.id; p->cur.input.offset = p->input.cp.offset - (p->input.ib.readpos - p->input.ib.scanpos); } else { pam = NULL; p->cur.analysis.id = p->analysis.cp.id; p->cur.analysis.offset = p->analysis.cp.offset - (p->analysis.ib.readpos - p->analysis.ib.scanpos); } ret = output_message(p, msg); if (ret == LSB_HEKA_PM_RETRY) { while (!p->stop && ret == LSB_HEKA_PM_RETRY) { const char *err = lsb_heka_get_error(p->hsb); hs_log(NULL, p->name, 7, "retry message %llu err: %s", p->sequence_id, err); sleep(1); ret = output_message(p, msg); } } if (ret > 0) { break; // fatal error } msg = NULL; } else if (!bytes_read[0] && !bytes_read[1]) { // trigger any pending timer events lsb_clear_heka_message(&im); // create an idle/empty message msg = &im; output_message(p, msg); msg = NULL; sleep(1); } } shutdown_timer_event(p); lsb_free_heka_message(&am); lsb_free_heka_message(&im); // hold the current checkpoints in memory incase we restart it hs_update_input_checkpoint(&p->plugins->cfg->cp_reader, hs_input_dir, p->name, &p->cp.input); hs_update_input_checkpoint(&p->plugins->cfg->cp_reader, hs_analysis_dir, p->name, &p->cp.analysis); if (p->stop) { hs_log(NULL, p->name, 6, "shutting down"); } else { hs_log(NULL, p->name, 6, "detaching received: %d msg: %s", ret, lsb_heka_get_error(p->hsb)); pthread_mutex_lock(&p->plugins->list_lock); hs_output_plugins *plugins = p->plugins; plugins->list[p->list_index] = NULL; if (pthread_detach(p->thread)) { hs_log(NULL, p->name, 3, "thread could not be detached"); } destroy_output_plugin(p); --plugins->list_cnt; pthread_mutex_unlock(&plugins->list_lock); } pthread_exit(NULL); } static void remove_plugin(hs_output_plugins *plugins, int idx) { hs_output_plugin *p = plugins->list[idx]; plugins->list[idx] = NULL; p->stop = true; if (pthread_join(p->thread, NULL)) { hs_log(NULL, p->name, 3, "remove_plugin could not pthread_join"); } destroy_output_plugin(p); --plugins->list_cnt; }