void hs_write_checkpoints(hs_checkpoint_writer* cpw, hs_checkpoint_reader* cpr)
{
  static int cnt = 0;

  FILE* tsv = NULL;
  bool sample = (cnt % 6 == 0); // sample performance 10 times a minute
  if (cnt == 0) { // write the stats once a minute just after the load
    tsv = fopen(cpw->tsv_path, "wb");
    if (tsv) {
      fprintf(tsv, "Plugin\t"
              "Inject Message Count\tInject Message Bytes\t"
              "Process Message Count\tProcess Message Failures\t"
              "Current Memory\t"
              "Max Memory\tMax Output\tMax Instructions\t"
              "Message Matcher Avg (s)\tMessage Matcher SD (s)\t"
              "Process Message Avg (s)\tProcess Message SD (s)\t"
              "Timer Event Avg (s)\tTimer Event SD (s)\n");
    }
  }
  cpw->fh = freopen(NULL, "wb", cpw->fh);
  if (!cpw->fh) {
    hs_log(g_module, 1, "checkpoint_writer freopen() error: %d",
           ferror(cpw->fh));
    return;
  }
  if (cpw->input_plugins) {
    hs_input_plugin* p;
    pthread_mutex_lock(&cpw->input_plugins->list_lock);
    for (int i = 0; i < cpw->input_plugins->list_cap; ++i) {
      p = cpw->input_plugins->list[i];
      if (p) {
        pthread_mutex_lock(&p->cp.lock);
        hs_update_checkpoint(cpr, p->sb->name, &p->cp);
        pthread_mutex_unlock(&p->cp.lock);

        if (tsv) {
          pthread_mutex_lock(&cpw->input_plugins->output.lock);
          fprintf(tsv, "%s\t"
                  "%zu\t%zu\t"
                  "%zu\t%zu\t"
                  "%u\t"
                  "%u\t%u\t%u\t"
                  "0\t0\t"
                  "0\t0\t"
                  "0\t0\t\n",
                  p->sb->name,
                  p->sb->stats.im_cnt, p->sb->stats.im_bytes,
                  p->sb->stats.pm_cnt, p->sb->stats.pm_failures,
                  p->sb->stats.cur_memory,
                  p->sb->stats.max_memory,
                  p->sb->stats.max_output,
                  p->sb->stats.max_instructions);
          pthread_mutex_unlock(&cpw->input_plugins->output.lock);
        }
      }
    }
    pthread_mutex_unlock(&cpw->input_plugins->list_lock);

    pthread_mutex_lock(&cpw->input_plugins->output.lock);
    fflush(cpw->input_plugins->output.fh);
    pthread_mutex_unlock(&cpw->input_plugins->output.lock);
  }

  if (cpw->analysis_plugins) {
    hs_checkpoint cp;

    for (int i = 0; i < cpw->analysis_plugins->thread_cnt; ++i) {
      hs_analysis_thread* at = &cpw->analysis_plugins->list[i];
      pthread_mutex_lock(&at->cp_lock);
      cp = at->cp;
      pthread_mutex_unlock(&at->cp_lock);
      hs_update_input_checkpoint(cpr, hs_input_dir, at->input.name, &cp);

      pthread_mutex_lock(&cpw->analysis_plugins->output.lock);
      cpw->analysis_plugins->sample = sample;
      fflush(cpw->analysis_plugins->output.fh);
      pthread_mutex_unlock(&cpw->analysis_plugins->output.lock);

      if (tsv) {
        hs_analysis_plugin* p;
        pthread_mutex_lock(&at->list_lock);
        for (int i = 0; i < at->list_cap; ++i) {
          p = at->list[i];
          if (!p) continue;
          fprintf(tsv, "%s\t"
                  "%zu\t%zu\t"
                  "%zu\t%zu\t"
                  "%zu\t%zu\t%zu\t%zu\t"
                  "%g\t%g\t"
                  "%g\t%g\t"
                  "%g\t%g\t\n",
                  p->sb->name,
                  p->sb->stats.im_cnt, p->sb->stats.im_bytes,
                  p->sb->stats.pm_cnt, p->sb->stats.pm_failures,
                  // the sandbox is not in use here, it is safe to grab the
                  // values directly
                  lsb_usage(p->sb->lsb, LSB_UT_MEMORY, LSB_US_CURRENT),
                  lsb_usage(p->sb->lsb, LSB_UT_MEMORY, LSB_US_MAXIMUM),
                  lsb_usage(p->sb->lsb, LSB_UT_OUTPUT, LSB_US_MAXIMUM),
                  lsb_usage(p->sb->lsb, LSB_UT_INSTRUCTION, LSB_US_MAXIMUM),
                  p->sb->stats.mm.mean, hs_sd_running_stats(&p->sb->stats.mm),
                  p->sb->stats.pm.mean, hs_sd_running_stats(&p->sb->stats.pm),
                  p->sb->stats.te.mean, hs_sd_running_stats(&p->sb->stats.te));
        }
        pthread_mutex_unlock(&at->list_lock);
      }
    }
  }

  if (cpw->output_plugins) {
    pthread_mutex_lock(&cpw->output_plugins->list_lock);
    for (int i = 0; i < cpw->output_plugins->list_cap; ++i) {
      hs_output_plugin* p = cpw->output_plugins->list[i];
      if (!p) continue;

      pthread_mutex_lock(&p->cp_lock);
      p->sample = sample;
      hs_update_input_checkpoint(cpr,
                                 hs_input_dir,
                                 p->sb->name,
                                 &p->cp.input);
      hs_update_input_checkpoint(cpr,
                                 hs_analysis_dir,
                                 p->sb->name,
                                 &p->cp.analysis);

      if (tsv) {
        fprintf(tsv, "%s\t"
                "%zu\t%zu\t"
                "%zu\t%zu\t"
                "%u\t"
                "%u\t%u\t%u\t"
                "%g\t%g\t"
                "%g\t%g\t"
                "%g\t%g\t\n",
                p->sb->name,
                p->sb->stats.im_cnt, p->sb->stats.im_bytes,
                p->sb->stats.pm_cnt, p->sb->stats.pm_failures,
                p->sb->stats.cur_memory,
                p->sb->stats.max_memory,
                p->sb->stats.max_output,
                p->sb->stats.max_instructions,
                p->sb->stats.mm.mean, hs_sd_running_stats(&p->sb->stats.mm),
                p->sb->stats.pm.mean, hs_sd_running_stats(&p->sb->stats.pm),
                p->sb->stats.te.mean, hs_sd_running_stats(&p->sb->stats.te));
      }

      pthread_mutex_unlock(&p->cp_lock);
    }
    pthread_mutex_unlock(&cpw->output_plugins->list_lock);
  }
  hs_output_checkpoints(cpr, cpw->fh);
  fflush(cpw->fh);
  if (tsv) fclose(tsv);
  if (++cnt == 60) cnt = 0;
}
static void* input_thread(void* arg)
{
  hs_heka_message im, *pim = NULL;
  hs_init_heka_message(&im, 8);

  hs_heka_message am, *pam = NULL;
  hs_init_heka_message(&am, 8);

  hs_output_plugin* p = (hs_output_plugin*)arg;
  hs_log(g_module, 6, "starting: %s", p->sb->name);

  size_t bytes_read[2] = { 0 };
#ifdef HINDSIGHT_CLI
  bool input_stop = false, analysis_stop = false;
  while (!(p->stop && input_stop && analysis_stop)) {
#else
  while (!p->stop) {
#endif
    if (p->input.fh && !pim) {
      if (hs_find_message(&im, &p->input.ib, true)) {
        pim = &im;
      } else {
        bytes_read[0] = hs_read_file(&p->input);
      }

      if (!bytes_read[0]) {
#ifdef HINDSIGHT_CLI
        size_t cid = p->input.ib.cp.id;
#endif
        // see if the next file is there yet
        hs_open_file(&p->input, hs_input_dir, p->input.ib.cp.id + 1);
#ifdef HINDSIGHT_CLI
        if (cid == p->input.ib.cp.id && p->stop) {
          input_stop = true;
        }
#endif
      }
    } else if (!p->input.fh) { // still waiting on the first file
      hs_open_file(&p->input, hs_input_dir, p->input.ib.cp.id);
#ifdef HINDSIGHT_CLI
      if (!p->input.fh && p->stop) {
        input_stop = true;
      }
#endif
    }

    if (p->analysis.fh && !pam) {
      if (hs_find_message(&am, &p->analysis.ib, true)) {
        pam = &am;
      } else {
        bytes_read[1] = hs_read_file(&p->analysis);
      }

      if (!bytes_read[1]) {
#ifdef HINDSIGHT_CLI
        size_t cid = p->analysis.ib.cp.id;
#endif
        // see if the next file is there yet
        hs_open_file(&p->analysis, hs_analysis_dir, p->analysis.ib.cp.id + 1);
#ifdef HINDSIGHT_CLI
        if (cid == p->analysis.ib.cp.id && p->stop) {
          analysis_stop = true;
        }
#endif
      }
    } else if (!p->analysis.fh) { // still waiting on the first file
      hs_open_file(&p->analysis, hs_analysis_dir, p->analysis.ib.cp.id);
#ifdef HINDSIGHT_CLI
      if (!p->analysis.fh && p->stop) {
        analysis_stop = true;
      }
#endif
    }

    // if we have one send the oldest first
    if (pim) {
      if (pam) {
        if (pim->timestamp <= pam->timestamp) {
          p->msg = pim;
        } else {
          p->msg = pam;
        }
      } else {
        p->msg = pim;
      }
    } else if (pam) {
      p->msg = pam;
    }

    if (p->msg) {
      if (p->msg == pim) {
        pim = NULL;
        p->cur.input.id = p->input.ib.cp.id;
        p->cur.input.offset = p->input.ib.cp.offset -
          (p->input.ib.readpos - p->input.ib.scanpos);
      } else {
        pam = NULL;
        p->cur.analysis.id = p->analysis.ib.cp.id;
        p->cur.analysis.offset = p->analysis.ib.cp.offset -
          (p->analysis.ib.readpos - p->analysis.ib.scanpos);
      }
      int ret = output_message(p);
      if (ret == -4) {
        while (!p->stop && ret == -4) {
          hs_log(g_module, 7, "retry message %zu", p->sb->stats.pm_cnt);
          sleep(1);
          ret = output_message(p);
        }
      }
      if (ret > 0) {
        break; // fatal error
      }
      p->msg = NULL;
    } else if (!bytes_read[0] && !bytes_read[1]) {
      // trigger any pending timer events
      hs_clear_heka_message(&im); // create an idle/empty message
      p->msg = &im;
      output_message(p);
      p->msg = NULL;
      sleep(1);
    }
  }

  shutdown_timer_event(p);
  hs_free_heka_message(&am);
  hs_free_heka_message(&im);

  // hold the current checkpoints in memory incase we restart it
  hs_update_input_checkpoint(&p->plugins->cfg->cp_reader,
                             hs_input_dir,
                             p->sb->name,
                             &p->cp.input);

  hs_update_input_checkpoint(&p->plugins->cfg->cp_reader,
                             hs_analysis_dir,
                             p->sb->name,
                             &p->cp.analysis);

  if (p->stop) {
    hs_log(g_module, 6, "shutting down: %s", p->sb->name);
  } else {
    hs_log(g_module, 6, "detaching: %s", p->sb->name);
    pthread_mutex_lock(&p->plugins->list_lock);
    hs_output_plugins* plugins = p->plugins;
    plugins->list[p->list_index] = NULL;
    if (pthread_detach(p->thread)) {
      hs_log(g_module, 3, "thread could not be detached");
    }
    free_output_plugin(p);
    free(p);
    --plugins->list_cnt;
    pthread_mutex_unlock(&plugins->list_lock);
  }
  pthread_exit(NULL);
}


static void remove_plugin(hs_output_plugins* plugins, int idx)
{
  hs_output_plugin* p = plugins->list[idx];
  plugins->list[idx] = NULL;
  p->stop = true;
  if (pthread_join(p->thread, NULL)) {
    hs_log(g_module, 3, "remove_plugin could not pthread_join");
  }
  free_output_plugin(p);
  free(p);
  --plugins->list_cnt;
}
Exemple #3
0
static void* input_thread(void *arg)
{
  lsb_heka_message *msg = NULL;

  lsb_heka_message im, *pim = NULL;
  lsb_init_heka_message(&im, 8);

  lsb_heka_message am, *pam = NULL;
  lsb_init_heka_message(&am, 8);

  hs_output_plugin *p = (hs_output_plugin *)arg;
  hs_log(NULL, p->name, 6, "starting");

  size_t discarded_bytes;
  size_t bytes_read[2] = { 0 };
  int ret = 0;
  lsb_logger logger = {.context = NULL, .cb = hs_log};
#ifdef HINDSIGHT_CLI
  bool input_stop = false, analysis_stop = false;
  while (!(p->stop && input_stop && analysis_stop)) {
#else
  while (!p->stop) {
#endif
    if (p->input.fh && !pim) {
      if (lsb_find_heka_message(&im, &p->input.ib, true, &discarded_bytes,
                                &logger)) {
        pim = &im;
      } else {
        bytes_read[0] = hs_read_file(&p->input);
      }

      if (!bytes_read[0]) {
#ifdef HINDSIGHT_CLI
        size_t cid = p->input.cp.id;
#endif
        // see if the next file is there yet
        hs_open_file(&p->input, hs_input_dir, p->input.cp.id + 1);
#ifdef HINDSIGHT_CLI
        if (cid == p->input.cp.id && p->stop) {
          input_stop = true;
        }
#endif
      }
    } else if (!p->input.fh) { // still waiting on the first file
      hs_open_file(&p->input, hs_input_dir, p->input.cp.id);
#ifdef HINDSIGHT_CLI
      if (!p->input.fh && p->stop) {
        input_stop = true;
      }
#endif
    }

    if (p->analysis.fh && !pam) {
      if (lsb_find_heka_message(&am, &p->analysis.ib, true, &discarded_bytes,
                                &logger)) {
        pam = &am;
      } else {
        bytes_read[1] = hs_read_file(&p->analysis);
      }

      if (!bytes_read[1]) {
#ifdef HINDSIGHT_CLI
        size_t cid = p->analysis.cp.id;
#endif
        // see if the next file is there yet
        hs_open_file(&p->analysis, hs_analysis_dir, p->analysis.cp.id + 1);
#ifdef HINDSIGHT_CLI
        if (cid == p->analysis.cp.id && p->stop) {
          analysis_stop = true;
        }
#endif
      }
    } else if (!p->analysis.fh) { // still waiting on the first file
      hs_open_file(&p->analysis, hs_analysis_dir, p->analysis.cp.id);
#ifdef HINDSIGHT_CLI
      if (!p->analysis.fh && p->stop) {
        analysis_stop = true;
      }
#endif
    }

    // if we have one send the oldest first
    if (pim) {
      if (pam) {
        if (pim->timestamp <= pam->timestamp) {
          msg = pim;
        } else {
          msg = pam;
        }
      } else {
        msg = pim;
      }
    } else if (pam) {
      msg = pam;
    }

    if (msg) {
      if (msg == pim) {
        pim = NULL;
        p->cur.input.id = p->input.cp.id;
        p->cur.input.offset = p->input.cp.offset -
            (p->input.ib.readpos - p->input.ib.scanpos);
      } else {
        pam = NULL;
        p->cur.analysis.id = p->analysis.cp.id;
        p->cur.analysis.offset = p->analysis.cp.offset -
            (p->analysis.ib.readpos - p->analysis.ib.scanpos);
      }
      ret = output_message(p, msg);
      if (ret == LSB_HEKA_PM_RETRY) {
        while (!p->stop && ret == LSB_HEKA_PM_RETRY) {
          const char *err = lsb_heka_get_error(p->hsb);
          hs_log(NULL, p->name, 7, "retry message %llu err: %s", p->sequence_id,
                 err);
          sleep(1);
          ret = output_message(p, msg);
        }
      }
      if (ret > 0) {
        break; // fatal error
      }
      msg = NULL;
    } else if (!bytes_read[0] && !bytes_read[1]) {
      // trigger any pending timer events
      lsb_clear_heka_message(&im); // create an idle/empty message
      msg = &im;
      output_message(p, msg);
      msg = NULL;
      sleep(1);
    }
  }

  shutdown_timer_event(p);
  lsb_free_heka_message(&am);
  lsb_free_heka_message(&im);

  // hold the current checkpoints in memory incase we restart it
  hs_update_input_checkpoint(&p->plugins->cfg->cp_reader,
                             hs_input_dir,
                             p->name,
                             &p->cp.input);

  hs_update_input_checkpoint(&p->plugins->cfg->cp_reader,
                             hs_analysis_dir,
                             p->name,
                             &p->cp.analysis);

  if (p->stop) {
    hs_log(NULL, p->name, 6, "shutting down");
  } else {
    hs_log(NULL, p->name, 6, "detaching received: %d msg: %s", ret,
           lsb_heka_get_error(p->hsb));
    pthread_mutex_lock(&p->plugins->list_lock);
    hs_output_plugins *plugins = p->plugins;
    plugins->list[p->list_index] = NULL;
    if (pthread_detach(p->thread)) {
      hs_log(NULL, p->name, 3, "thread could not be detached");
    }
    destroy_output_plugin(p);
    --plugins->list_cnt;
    pthread_mutex_unlock(&plugins->list_lock);
  }
  pthread_exit(NULL);
}


static void remove_plugin(hs_output_plugins *plugins, int idx)
{
  hs_output_plugin *p = plugins->list[idx];
  plugins->list[idx] = NULL;
  p->stop = true;
  if (pthread_join(p->thread, NULL)) {
    hs_log(NULL, p->name, 3, "remove_plugin could not pthread_join");
  }
  destroy_output_plugin(p);
  --plugins->list_cnt;
}