int main(int argc, char **argv) { bool argerr = false; bool follow = false; bool use_stdin = false; long num = -1; char *matcher = "TRUE"; char *filename = NULL; output_function ofn = output_text; int c; while ((c = getopt(argc, argv, "tchfn:m:")) != -1) { switch (c) { case 't': ofn = output_text; break; case 'c': ofn = NULL; break; case 'h': ofn = output_heka; break; case 'f': follow = true; break; case 'n': num = strtol(optarg, NULL, 10); if (num < 0) argerr = true; break; case 'm': matcher = optarg; break; default: argerr = true; break; } } if (argc - optind == 1) { filename = argv[optind]; use_stdin = strcmp("-", filename) == 0; } else { argerr = true; } if (argerr) { log_cb(NULL, NULL, 0, "usage: %s [-t|-c|-h] [-m message_matcher] [-f] [-n #] <FILE>\n" "description:\n" " -t output the messages in text format (default)\n" " -c only output the message count\n" " -h output the messages as a Heka protobuf stream\n" " -f output appended data as the file grows\n" " -n output the last # of messages (simple header check so not " "100%% accurate)\n" " -m message_matcher expression (default \"TRUE\")\n" " FILE name of the file to cat or '-' for stdin\n" "notes:\n" " All output is written to stdout and all log/error messages are " "written to stderr.\n", argv[0]); return EXIT_FAILURE; } lsb_message_matcher *mm = lsb_create_message_matcher(matcher); if (!mm) { log_cb(NULL, NULL, 0, "invalid message matcher: %s", matcher); return EXIT_FAILURE; } FILE *fh = stdin; if (!use_stdin) { fh = fopen(filename, "r"); if (!fh) { log_cb(NULL, NULL, 0, "error opening: %s", filename); return EXIT_FAILURE; } if (num >= 0) { move_to_offset(fh, num); } } size_t discarded_bytes; size_t bytes_read = 0; size_t pcnt = 0; size_t mcnt = 0; lsb_input_buffer ib; lsb_init_input_buffer(&ib, 1024 * 1024 * 1024); lsb_heka_message msg; lsb_init_heka_message(&msg, 8); do { if (lsb_find_heka_message(&msg, &ib, true, &discarded_bytes, &logger)) { if (lsb_eval_message_matcher(mm, &msg)) { if (ofn) { ofn(&msg); } ++mcnt; } ++pcnt; } else { bytes_read = read_file(fh, &ib); } if (bytes_read == 0 && follow && !use_stdin) { sleep(1); } } while (bytes_read > 0 || follow); lsb_free_heka_message(&msg); lsb_free_input_buffer(&ib); lsb_destroy_message_matcher(mm); if (!use_stdin) { fclose(fh); } if (ofn) { log_cb(NULL, NULL, 0, "Processed: %zu, matched: %zu messages\n", pcnt, mcnt); } else { printf("Processed: %zu, matched: %zu messages\n", pcnt, mcnt); } }
bool lsb_find_heka_message(lsb_heka_message *m, lsb_input_buffer *ib, bool decode, size_t *discarded_bytes, lsb_logger *logger) { if (!m || !ib || !discarded_bytes) { if (logger && logger->cb) { logger->cb(logger->context, __func__, 4, LSB_ERR_UTIL_NULL); } return false; } *discarded_bytes = 0; if (ib->readpos == ib->scanpos) { return false; // empty buffer } char *p = memchr(&ib->buf[ib->scanpos], 0x1e, ib->readpos - ib->scanpos); if (p) { if (p != ib->buf + ib->scanpos) { // partial buffer skipped before locating a possible header *discarded_bytes += p - ib->buf - ib->scanpos; } ib->scanpos = p - ib->buf; if (ib->readpos - ib->scanpos < 2) { return false; // header length is not buf } size_t hlen = (unsigned char)ib->buf[ib->scanpos + 1]; size_t hend = ib->scanpos + hlen + 3; if (hend > ib->readpos) { return false; // header is not in buf } if (ib->buf[hend - 1] != 0x1f) { // invalid header length ++ib->scanpos; ++*discarded_bytes; size_t db; bool b = lsb_find_heka_message(m, ib, decode, &db, logger); *discarded_bytes += db; return b; } if (!ib->msglen) { ib->msglen = decode_header(&ib->buf[ib->scanpos + 2], hlen, ib->maxsize - LSB_MAX_HDR_SIZE); } if (ib->msglen) { size_t mend = hend + ib->msglen; if (mend > ib->readpos) { return false; // message is not in buf } if (decode) { if (lsb_decode_heka_message(m, &ib->buf[hend], ib->msglen, logger)) { ib->scanpos = mend; ib->msglen = 0; return true; } else { // message decode failure ++ib->scanpos; ++*discarded_bytes; ib->msglen = 0; size_t db; bool b = lsb_find_heka_message(m, ib, decode, &db, logger); *discarded_bytes += db; return b; } } else { // allow a framed message is non Heka protobuf format lsb_clear_heka_message(m); m->raw.s = &ib->buf[hend]; m->raw.len = ib->msglen; ib->scanpos = mend; ib->msglen = 0; return true; } } else { // header decode failure ++ib->scanpos; ++*discarded_bytes; size_t db; bool b = lsb_find_heka_message(m, ib, decode, &db, logger); *discarded_bytes += db; return b; } } else { // full buffer skipped since no header was located *discarded_bytes += ib->readpos - ib->scanpos; ib->scanpos = ib->readpos = 0; } return false; }
static void* input_thread(void *arg) { lsb_heka_message *msg = NULL; lsb_heka_message im, *pim = NULL; lsb_init_heka_message(&im, 8); lsb_heka_message am, *pam = NULL; lsb_init_heka_message(&am, 8); hs_output_plugin *p = (hs_output_plugin *)arg; hs_log(NULL, p->name, 6, "starting"); size_t discarded_bytes; size_t bytes_read[2] = { 0 }; int ret = 0; lsb_logger logger = {.context = NULL, .cb = hs_log}; #ifdef HINDSIGHT_CLI bool input_stop = false, analysis_stop = false; while (!(p->stop && input_stop && analysis_stop)) { #else while (!p->stop) { #endif if (p->input.fh && !pim) { if (lsb_find_heka_message(&im, &p->input.ib, true, &discarded_bytes, &logger)) { pim = &im; } else { bytes_read[0] = hs_read_file(&p->input); } if (!bytes_read[0]) { #ifdef HINDSIGHT_CLI size_t cid = p->input.cp.id; #endif // see if the next file is there yet hs_open_file(&p->input, hs_input_dir, p->input.cp.id + 1); #ifdef HINDSIGHT_CLI if (cid == p->input.cp.id && p->stop) { input_stop = true; } #endif } } else if (!p->input.fh) { // still waiting on the first file hs_open_file(&p->input, hs_input_dir, p->input.cp.id); #ifdef HINDSIGHT_CLI if (!p->input.fh && p->stop) { input_stop = true; } #endif } if (p->analysis.fh && !pam) { if (lsb_find_heka_message(&am, &p->analysis.ib, true, &discarded_bytes, &logger)) { pam = &am; } else { bytes_read[1] = hs_read_file(&p->analysis); } if (!bytes_read[1]) { #ifdef HINDSIGHT_CLI size_t cid = p->analysis.cp.id; #endif // see if the next file is there yet hs_open_file(&p->analysis, hs_analysis_dir, p->analysis.cp.id + 1); #ifdef HINDSIGHT_CLI if (cid == p->analysis.cp.id && p->stop) { analysis_stop = true; } #endif } } else if (!p->analysis.fh) { // still waiting on the first file hs_open_file(&p->analysis, hs_analysis_dir, p->analysis.cp.id); #ifdef HINDSIGHT_CLI if (!p->analysis.fh && p->stop) { analysis_stop = true; } #endif } // if we have one send the oldest first if (pim) { if (pam) { if (pim->timestamp <= pam->timestamp) { msg = pim; } else { msg = pam; } } else { msg = pim; } } else if (pam) { msg = pam; } if (msg) { if (msg == pim) { pim = NULL; p->cur.input.id = p->input.cp.id; p->cur.input.offset = p->input.cp.offset - (p->input.ib.readpos - p->input.ib.scanpos); } else { pam = NULL; p->cur.analysis.id = p->analysis.cp.id; p->cur.analysis.offset = p->analysis.cp.offset - (p->analysis.ib.readpos - p->analysis.ib.scanpos); } ret = output_message(p, msg); if (ret == LSB_HEKA_PM_RETRY) { while (!p->stop && ret == LSB_HEKA_PM_RETRY) { const char *err = lsb_heka_get_error(p->hsb); hs_log(NULL, p->name, 7, "retry message %llu err: %s", p->sequence_id, err); sleep(1); ret = output_message(p, msg); } } if (ret > 0) { break; // fatal error } msg = NULL; } else if (!bytes_read[0] && !bytes_read[1]) { // trigger any pending timer events lsb_clear_heka_message(&im); // create an idle/empty message msg = &im; output_message(p, msg); msg = NULL; sleep(1); } } shutdown_timer_event(p); lsb_free_heka_message(&am); lsb_free_heka_message(&im); // hold the current checkpoints in memory incase we restart it hs_update_input_checkpoint(&p->plugins->cfg->cp_reader, hs_input_dir, p->name, &p->cp.input); hs_update_input_checkpoint(&p->plugins->cfg->cp_reader, hs_analysis_dir, p->name, &p->cp.analysis); if (p->stop) { hs_log(NULL, p->name, 6, "shutting down"); } else { hs_log(NULL, p->name, 6, "detaching received: %d msg: %s", ret, lsb_heka_get_error(p->hsb)); pthread_mutex_lock(&p->plugins->list_lock); hs_output_plugins *plugins = p->plugins; plugins->list[p->list_index] = NULL; if (pthread_detach(p->thread)) { hs_log(NULL, p->name, 3, "thread could not be detached"); } destroy_output_plugin(p); --plugins->list_cnt; pthread_mutex_unlock(&plugins->list_lock); } pthread_exit(NULL); } static void remove_plugin(hs_output_plugins *plugins, int idx) { hs_output_plugin *p = plugins->list[idx]; plugins->list[idx] = NULL; p->stop = true; if (pthread_join(p->thread, NULL)) { hs_log(NULL, p->name, 3, "remove_plugin could not pthread_join"); } destroy_output_plugin(p); --plugins->list_cnt; }