Exemplo n.º 1
0
static int
exclude_crawler (GLogItem * glog)
{
  return conf.ignore_crawlers && is_crawler (glog->agent) ? 0 : 1;
}
Exemplo n.º 2
0
/* process a line from the log and store it accordingly */
static int
process_log (GLog * logger, char *line, int test)
{
  GLogItem *glog;
  char buf[DATE_LEN];
  char *qmark = NULL, *req_key = NULL;
  int not_found = 0;            /* 404s */

  /* make compiler happy */
  memset (buf, 0, sizeof (buf));

  if ((line == NULL) || (*line == '\0')) {
    logger->invalid++;
    return 0;
  }

  /* ignore comments */
  if (*line == '#' || *line == '\n')
    return 0;

  if (parsing_spinner != NULL && parsing_spinner->state == SPN_RUN)
    pthread_mutex_lock (&parsing_spinner->mutex);

  logger->process++;

#ifdef TCB_BTREE
  process_generic_data (ht_general_stats, "total_requests");
#endif

  if (parsing_spinner != NULL && parsing_spinner->state == SPN_RUN)
    pthread_mutex_unlock (&parsing_spinner->mutex);

  glog = init_log_item (logger);
  if (parse_format (glog, conf.log_format, conf.date_format, line) == 1) {
    logger->invalid++;

#ifdef TCB_BTREE
    process_generic_data (ht_general_stats, "failed_requests");
#endif
    goto cleanup;
  }

  /* must have the following fields */
  if (glog->host == NULL || glog->date == NULL || glog->status == NULL ||
      glog->req == NULL) {
    logger->invalid++;
    goto cleanup;
  }

  if (test)
    goto cleanup;

  convert_date (buf, glog->date, conf.date_format, "%Y%m%d", DATE_LEN);
  if (buf == NULL)
    goto cleanup;

  /* ignore host */
  if (conf.ignore_ip_idx && ip_in_range (glog->host)) {
    logger->exclude_ip++;
#ifdef TCB_BTREE
    process_generic_data (ht_general_stats, "exclude_ip");
#endif
    goto cleanup;
  }
  /* ignore crawlers */
  if (conf.ignore_crawlers && is_crawler (glog->agent))
    goto cleanup;

  /* agent will be null in cases where %u is not specified */
  if (glog->agent == NULL)
    glog->agent = alloc_string ("-");
  /* process visitors, browsers, and OS */
  process_unique_data (glog->host, buf, glog->agent);

  /* process agents that are part of a host */
  if (conf.list_agents)
    process_host_agents (glog->host, glog->agent);

  /* is this a 404? */
  if (!memcmp (glog->status, "404", 3)) {
    not_found = 1;
  }
  /* treat 444 as 404? */
  else if (!memcmp (glog->status, "444", 3) && conf.code444_as_404) {
    not_found = 1;
  }
  /* check if we need to remove the request's query string */
  else if (conf.ignore_qstr) {
    if ((qmark = strchr (glog->req, '?')) != NULL) {
      if ((qmark - glog->req) > 0)
        *qmark = '\0';
    }
  }

  req_key = xstrdup (glog->req);
  /* include HTTP method/protocol to request */
  if (conf.append_method && glog->method) {
    str_to_upper (glog->method);
    append_method_to_request (&req_key, glog->method);
  }
  if (conf.append_protocol && glog->protocol) {
    str_to_upper (glog->protocol);
    append_protocol_to_request (&req_key, glog->protocol);
  }
  if ((conf.append_method) || (conf.append_protocol))
    req_key = deblank (req_key);

  /* process 404s */
  if (not_found)
    process_request (ht_not_found_requests, req_key, glog);
  /* process static files */
  else if (verify_static_content (glog->req))
    process_request (ht_requests_static, req_key, glog);
  /* process regular files */
  else
    process_request (ht_requests, req_key, glog);

  /* process referrers */
  process_referrers (glog->ref);
  /* process status codes */
  process_generic_data (ht_status_code, glog->status);
  /* process hosts */
  process_generic_data (ht_hosts, glog->host);
  /* process bandwidth  */
  process_request_meta (ht_date_bw, buf, glog->resp_size);
  process_request_meta (ht_file_bw, req_key, glog->resp_size);
  process_request_meta (ht_host_bw, glog->host, glog->resp_size);
  /* process time taken to serve the request, in microseconds */
  process_request_meta (ht_file_serve_usecs, req_key, glog->serve_time);
  process_request_meta (ht_host_serve_usecs, glog->host, glog->serve_time);
  logger->resp_size += glog->resp_size;
#ifdef TCB_BTREE
  process_request_meta (ht_general_stats, "bandwidth", glog->resp_size);
#endif

cleanup:
  free_logger (glog);
  if (req_key != NULL)
    free (req_key);

  return 0;
}